commit def352f4f85aa18994228c5b370f7d1a7d7ac185 Author: svcmobrel-release Date: Tue May 6 15:38:28 2025 -0700 Open source GPL/LGPL release diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..474db92ef --- /dev/null +++ b/.gitignore @@ -0,0 +1,101 @@ +# +# NOTE! Don't add files that are generated in specific +# subdirectories here. Add them in the ".gitignore" file +# in that subdirectory instead. +# +# NOTE! Please use 'git ls-files -i --exclude-standard' +# command after changing this file, to see if there are +# any tracked files which get ignored after the change. +# +# Normal rules +# +.* +*.o +*.o.* +*.a +*.s +*.ko +*.so +*.so.dbg +*.mod.c +*.i +*.lst +*.symtypes +*.order +*.elf +*.bin +*.gz +*.bz2 +*.lzma +*.xz +*.lz4 +*.lzo +*.patch +*.gcno +modules.builtin +Module.symvers +*.dwo + +# +# Top-level generic files +# +/tags +/TAGS +/linux +/vmlinux +/vmlinuz +/System.map +/Module.markers + +# +# Debian directory (make deb-pkg) +# +/debian/ + +# +# git files that we don't want to ignore even it they are dot-files +# +!.gitignore +!.mailmap + +# +# Generated include files +# +include/config +include/generated +arch/*/include/generated + +# stgit generated dirs +patches-* + +# quilt's files +patches +series + +# cscope files +cscope.* +ncscope.* + +# gnu global files +GPATH +GRTAGS +GSYMS +GTAGS + +*.orig +*~ +\#*# + +# +# Leavings from module signing +# +extra_certificates +signing_key.priv +signing_key.x509 +x509.genkey + +# Kconfig presets +all.config + +# Source files we generate from arch for doxygen. +Doxyfile.sources.safety diff --git a/Makefile.umbrella.tmk b/Makefile.umbrella.tmk new file mode 100644 index 000000000..4c273379f --- /dev/null +++ b/Makefile.umbrella.tmk @@ -0,0 +1,172 @@ +################################### tell Emacs this is a -*- makefile-gmake -*- +# +# Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +# tmake for SW Mobile +# +# Repository umbrella makefile fragment for "nvgpu" +# +############################################################################### + +# +# Components common to all builds +# +NV_REPOSITORY_COMPONENTS := + +ifneq ($(NV_BUILD_CONFIGURATION_OS_IS_INTEGRITY),1) + +ifeq ($(NV_BUILD_CONFIGURATION_OS_IS_QNX),1) +ifeq ($(NV_BUILD_CONFIGURATION_IS_SAFETY),1) +# On QNX, the unit tests are built only for safety profile. +build_nvgpu_ut := 1 +endif +else +# On L4T, the unit tests are built with safety profile forced. +build_nvgpu_ut := 1 +endif + +ifeq ($(build_nvgpu_ut),1) +NV_REPOSITORY_COMPONENTS := libs/igpu +NV_REPOSITORY_COMPONENTS += libs/dgpu +NV_REPOSITORY_COMPONENTS += userspace +NV_REPOSITORY_COMPONENTS += userspace/units/posix/bitops +NV_REPOSITORY_COMPONENTS += userspace/units/posix/env +NV_REPOSITORY_COMPONENTS += userspace/units/posix/fault-injection +NV_REPOSITORY_COMPONENTS += userspace/units/posix/bug +NV_REPOSITORY_COMPONENTS += userspace/units/posix/os_sched +NV_REPOSITORY_COMPONENTS += userspace/units/posix/sizes +NV_REPOSITORY_COMPONENTS += userspace/units/init +NV_REPOSITORY_COMPONENTS += userspace/units/posix/log2 +NV_REPOSITORY_COMPONENTS += userspace/units/posix/thread +NV_REPOSITORY_COMPONENTS += userspace/units/posix/cond +NV_REPOSITORY_COMPONENTS += userspace/units/posix/timers +NV_REPOSITORY_COMPONENTS += userspace/units/posix/kmem +NV_REPOSITORY_COMPONENTS += userspace/units/posix/rwsem +NV_REPOSITORY_COMPONENTS += userspace/units/posix/queue +NV_REPOSITORY_COMPONENTS += userspace/units/posix/utils +NV_REPOSITORY_COMPONENTS += userspace/units/posix/circ_buf +NV_REPOSITORY_COMPONENTS += userspace/units/interface/bit-utils +NV_REPOSITORY_COMPONENTS += userspace/units/interface/lock +NV_REPOSITORY_COMPONENTS += userspace/units/interface/nvgpu_gk20a +NV_REPOSITORY_COMPONENTS += userspace/units/interface/atomic +NV_REPOSITORY_COMPONENTS += userspace/units/interface/rbtree +NV_REPOSITORY_COMPONENTS += userspace/units/interface/static_analysis +NV_REPOSITORY_COMPONENTS += userspace/units/interface/string +NV_REPOSITORY_COMPONENTS += userspace/units/interface/worker +NV_REPOSITORY_COMPONENTS += userspace/units/interface/kref +NV_REPOSITORY_COMPONENTS += userspace/units/interface/list +NV_REPOSITORY_COMPONENTS += userspace/units/bus +NV_REPOSITORY_COMPONENTS += userspace/units/pramin +NV_REPOSITORY_COMPONENTS += userspace/units/priv_ring +NV_REPOSITORY_COMPONENTS += userspace/units/ptimer +NV_REPOSITORY_COMPONENTS += userspace/units/mc +NV_REPOSITORY_COMPONENTS += userspace/units/mm/nvgpu_sgt +NV_REPOSITORY_COMPONENTS += userspace/units/mm/nvgpu_mem +NV_REPOSITORY_COMPONENTS += userspace/units/mm/nvgpu_mem/dgpu +NV_REPOSITORY_COMPONENTS += userspace/units/mm/allocators/buddy_allocator +NV_REPOSITORY_COMPONENTS += userspace/units/mm/allocators/nvgpu_allocator +NV_REPOSITORY_COMPONENTS += userspace/units/mm/allocators/bitmap_allocator +NV_REPOSITORY_COMPONENTS += userspace/units/mm/allocators/page_allocator +NV_REPOSITORY_COMPONENTS += userspace/units/mm/as +NV_REPOSITORY_COMPONENTS += userspace/units/mm/dma +NV_REPOSITORY_COMPONENTS += userspace/units/mm/gmmu/pd_cache +NV_REPOSITORY_COMPONENTS += userspace/units/mm/gmmu/page_table +NV_REPOSITORY_COMPONENTS += userspace/units/mm/hal/cache/flush_gk20a_fusa +NV_REPOSITORY_COMPONENTS += userspace/units/mm/hal/cache/flush_gv11b_fusa +NV_REPOSITORY_COMPONENTS += userspace/units/mm/hal/gmmu/gmmu_gk20a_fusa +NV_REPOSITORY_COMPONENTS += userspace/units/mm/hal/gmmu/gmmu_gm20b_fusa +NV_REPOSITORY_COMPONENTS += userspace/units/mm/hal/gmmu/gmmu_gp10b_fusa +NV_REPOSITORY_COMPONENTS += userspace/units/mm/hal/gmmu/gmmu_gv11b_fusa +NV_REPOSITORY_COMPONENTS += userspace/units/mm/hal/gp10b_fusa +NV_REPOSITORY_COMPONENTS += userspace/units/mm/hal/gv11b_fusa +NV_REPOSITORY_COMPONENTS += userspace/units/mm/hal/mmu_fault/gv11b_fusa +NV_REPOSITORY_COMPONENTS += userspace/units/mm/mm +NV_REPOSITORY_COMPONENTS += userspace/units/mm/page_table_faults +NV_REPOSITORY_COMPONENTS += userspace/units/mm/vm +NV_REPOSITORY_COMPONENTS += userspace/units/netlist +NV_REPOSITORY_COMPONENTS += userspace/units/fb +NV_REPOSITORY_COMPONENTS += userspace/units/fbp +NV_REPOSITORY_COMPONENTS += userspace/units/fifo +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/channel +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/channel/gk20a +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/channel/gm20b +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/channel/gv11b +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/ctxsw_timeout/gv11b +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/engine +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/engine/gm20b +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/engine/gp10b +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/engine/gv100 +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/engine/gv11b +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/fifo +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/fifo/gk20a +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/fifo/gv11b +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/pbdma +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/pbdma/gv11b +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/pbdma/gm20b +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/pbdma/gp10b +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/preempt +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/preempt/gv11b +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/ramfc/gp10b +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/ramfc/gv11b +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/ramin/gk20a +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/ramin/gm20b +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/ramin/gv11b +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/runlist +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/runlist/gk20a +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/runlist/gv11b +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/tsg +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/tsg/gv11b +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/userd/gk20a +NV_REPOSITORY_COMPONENTS += userspace/units/fifo/usermode/gv11b +NV_REPOSITORY_COMPONENTS += userspace/units/fuse +NV_REPOSITORY_COMPONENTS += userspace/units/ltc +NV_REPOSITORY_COMPONENTS += userspace/units/enabled +NV_REPOSITORY_COMPONENTS += userspace/units/falcon +NV_REPOSITORY_COMPONENTS += userspace/units/falcon/falcon_tests +NV_REPOSITORY_COMPONENTS += userspace/units/pmu +NV_REPOSITORY_COMPONENTS += userspace/units/therm +NV_REPOSITORY_COMPONENTS += userspace/units/top +NV_REPOSITORY_COMPONENTS += userspace/units/class +NV_REPOSITORY_COMPONENTS += userspace/units/gr +NV_REPOSITORY_COMPONENTS += userspace/units/gr/falcon +NV_REPOSITORY_COMPONENTS += userspace/units/gr/config +NV_REPOSITORY_COMPONENTS += userspace/units/gr/init +NV_REPOSITORY_COMPONENTS += userspace/units/gr/setup +NV_REPOSITORY_COMPONENTS += userspace/units/gr/fs_state +NV_REPOSITORY_COMPONENTS += userspace/units/gr/global_ctx +NV_REPOSITORY_COMPONENTS += userspace/units/gr/ctx +NV_REPOSITORY_COMPONENTS += userspace/units/gr/obj_ctx +NV_REPOSITORY_COMPONENTS += userspace/units/gr/intr +NV_REPOSITORY_COMPONENTS += userspace/units/acr +NV_REPOSITORY_COMPONENTS += userspace/units/ce +NV_REPOSITORY_COMPONENTS += userspace/units/cg +NV_REPOSITORY_COMPONENTS += userspace/units/rc +NV_REPOSITORY_COMPONENTS += userspace/units/sync +NV_REPOSITORY_COMPONENTS += userspace/units/ecc +NV_REPOSITORY_COMPONENTS += userspace/units/io +endif +endif + +# Local Variables: +# indent-tabs-mode: t +# tab-width: 8 +# End: +# vi: set tabstop=8 noexpandtab: diff --git a/NVIDIA-REVIEWERS b/NVIDIA-REVIEWERS new file mode 100644 index 000000000..55dcaa70c --- /dev/null +++ b/NVIDIA-REVIEWERS @@ -0,0 +1,65 @@ +This is a reviewers file that can be parsed by get_nv_reviewers.py that +internally uses scripts/get_maintainer.pl from kernel. + +See the MAINTAINERS file in the Linux kernel source tree for details of the +file format. The file format is defined by the upstream Linux kernel community, +so don't modify it without upstreaming any changes to get_maintainer.pl. + +Descriptions of section entries (copied from MAINTAINERS): + + P: Person (obsolete) + M: Mail patches to: FullName + L: Mailing list that is relevant to this area + B: NvBugs Module Name + W: Web-page with status/info + Q: Patchwork web based patch tracking system site + T: SCM tree type and location. Type is one of: git, hg, quilt, stgit, topgit. + S: Status, one of the following: + Supported: Someone is actually paid to look after this. + Maintained: Someone actually looks after it. + Odd Fixes: It has a maintainer but they don't have time to do + much other than throw the odd patch in. See below.. + Orphan: No current maintainer [but maybe you could take the + role as you write your new code]. + Obsolete: Old code. Something tagged obsolete generally means + it has been replaced by a better system and you + should be using that. + F: Files and directories with wildcard patterns. + A trailing slash includes all files and subdirectory files. + F: drivers/net/ all files in and below drivers/net + F: drivers/net/* all files in drivers/net, but not below + F: */net/* all files in "any top level directory"/net + One pattern per line. Multiple F: lines acceptable. + X: Files and directories that are NOT maintained, same rules as F: + Files exclusions are tested before file matches. + Can be useful for excluding a specific subdirectory, for instance: + F: net/ + X: net/ipv6/ + matches all files in and below net excluding net/ipv6/ + K: Keyword perl extended regex pattern to match content in a + patch or file. For instance: + K: of_get_profile + matches patches or files that contain "of_get_profile" + K: \b(printk|pr_(info|err))\b + matches patches or files that contain one or more of the words + printk, pr_info or pr_err + One regex pattern per line. Multiple K: lines acceptable. + +Note: For the hard of thinking, this list is meant to remain in alphabetical +order. If you could add yourselves to it in alphabetical order that would be +so much easier [Ed] + +Maintainers List (try to look for most precise areas first) + +---------------------------------------------------------------------- +GPU +M: Vijayakumar Subbu +M: Terje Bergstrom +L: sw-mobile-nvgpu-core@exchange.nvidia.com +B: Mobile_Android_Kernel +B: Mobile_Linux_Kernel +B: Embedded Resource Manager - Linux +S: Supported +F: drivers/gpu/nvgpu/* +F: include/* +F: ../../gpu-firmware-private/ diff --git a/arch/nvgpu-common.yaml b/arch/nvgpu-common.yaml new file mode 100644 index 000000000..165f9eea5 --- /dev/null +++ b/arch/nvgpu-common.yaml @@ -0,0 +1,1078 @@ +# Copyright (c) 2019-2021, NVIDIA CORPORATION. All Rights Reserved. +# +# Common elements and units in nvgpu. +# + +## +## Top level common units. +## + +# This isn't really a unit per say but I needed some place to put these +# files. Perhaps it could become a unit some day? +nvgpu: + safe: yes + owner: Alex W + sources: [ include/nvgpu/gk20a.h, + include/nvgpu/nvgpu_common.h, + include/nvgpu/cov_whitelist.h, + include/nvgpu/static_analysis.h ] + +bios: + safe: yes + gpu: dgpu + owner: Tejal K + sources: [ common/vbios/bios.c, + common/vbios/bios_sw_gv100.c, + common/vbios/bios_sw_gv100.h, + common/vbios/bios_sw_tu104.c, + common/vbios/bios_sw_tu104.h, + common/vbios/nvlink_bios.c, + include/nvgpu/bios.h, + include/nvgpu/nvlink_bios.h, + include/nvgpu/gops/bios.h, + include/nvgpu/gops/xve.h ] + +ce: + safe: yes + owner: Thomas F + sources: [ common/ce/ce.c, + include/nvgpu/ce.h, + include/nvgpu/gops/ce.h ] + deps: + +ce_app: + safe: no + gpu: dgpu + owner: Thomas F + sources: [ common/ce/ce_app.c, + common/ce/ce_priv.h, + include/nvgpu/ce_app.h ] + deps: + +debug: + safe: yes + gpu: both + sources: [ include/nvgpu/debug.h ] + +debugger: + safe: no + owner: Deepak N + sources: [ common/debugger.c, + include/nvgpu/debugger.h, + include/nvgpu/gops/debugger.h ] + deps: + +profiler: + safe: no + owner: Deepak N + sources: [ common/profiler/profiler.c, + include/nvgpu/profiler.h, + common/profiler/pm_reservation.c, + include/nvgpu/pm_reservation.h, + include/nvgpu/gops/profiler.h ] + +defaults: + safe: yes + sources: [ include/nvgpu/defaults.h ] + +ecc: + safe: yes + owner: Antony C + sources: [ common/ecc.c, + include/nvgpu/ecc.h, + include/nvgpu/gops/ecc.h ] + deps: + +log: + safe: no + owner: Vedashree V + sources: [ common/log_common.c, + include/nvgpu/trace.h ] + deps: + +# Pretty sure this can be marked as not-safe since we plan to use +# usermode submits in the safety build. +fence: + safe: no + owner: Seema K + sources: [ common/fence/fence.c, + common/fence/fence_syncpt.c, + common/fence/fence_sema.c, + common/fence/fence_priv.h, + include/nvgpu/fence.h, + include/nvgpu/fence_syncpt.h, + include/nvgpu/fence_sema.h, + include/nvgpu/user_fence.h ] + +io: + safe: yes + owner: Vinod G + sources: [ common/io/io.c, + include/nvgpu/gops/func.h ] + deps: + +ltc: + safe: yes + owner: Seshendra G + sources: [ common/ltc/ltc.c, + include/nvgpu/gops/ltc.h, + include/nvgpu/ltc.h ] + +cbc: + safe: no + owner: Seshendra G + sources: [ common/cbc/cbc.c, + include/nvgpu/cbc.h, + include/nvgpu/gops/cbc.h ] + +regops: + safe: no + owner: Deepak N + sources: [ common/regops/regops.c, + include/nvgpu/regops.h, + include/nvgpu/regops_allowlist.h ] + +mc: + safe: yes + owner: Seema K + sources: [ common/mc/mc.c, + include/nvgpu/mc.h, + include/nvgpu/gops/mc.h ] +class: + safe: yes + owner: Seshendra G + sources: [ include/nvgpu/class.h, + include/nvgpu/gops/class.h ] + +netlist: + safe: yes + gpu: both + owner: Seshendra G + sources: [ common/netlist/netlist.c, + common/netlist/netlist_priv.h, + common/netlist/netlist_defs.h, + include/nvgpu/netlist.h ] + +nvlink: + safe: yes + owner: Tejal K + gpu: dgpu + sources: [ common/nvlink/nvlink.c, + common/nvlink/minion.c, + common/nvlink/probe.c, + common/nvlink/link_mode_transitions.c, + common/nvlink/init/device_reginit.c, + common/nvlink/init/device_reginit_gv100.c, + common/nvlink/init/device_reginit_gv100.h, + include/nvgpu/nvlink.h, + include/nvgpu/gops/nvlink.h, + include/nvgpu/nvlink_device_reginit.h, + include/nvgpu/nvlink_link_mode_transitions.h, + include/nvgpu/nvlink_minion.h, + include/nvgpu/nvlink_probe.h ] + +nvgpu_err: + safe: yes + owner: Unknown + sources: [ include/nvgpu/nvgpu_err.h, + include/nvgpu/nvgpu_err_info.h] + +pramin: + safe: yes + gpu: dgpu + owner: Terje B + sources: [ common/pramin.c, + include/nvgpu/pramin.h, + include/nvgpu/gops/pramin.h ] + deps: + +device: + safe: yes + owner: Alex W + sources: [ common/device.c, + include/nvgpu/device.h ] + +ptimer: + safe: yes + owner: Terje B + sources: [ common/ptimer/ptimer.c, + include/nvgpu/ptimer.h, + include/nvgpu/gops/ptimer.h ] + deps: + +sched: + safe: no + owner: Thomas F + sources: [ include/nvgpu/sched.h ] + +semaphore: + safe: no + owner: Alex W + children: + semaphore: + sources: [ common/semaphore/semaphore.c, + common/semaphore/semaphore_priv.h, + include/nvgpu/semaphore.h ] + deps: [ ] + semaphore_hw: + sources: [ common/semaphore/semaphore_hw.c ] + deps: [ ] + semaphore_pool: + sources: [ common/semaphore/semaphore_pool.c ] + deps: [ ] + semaphore_sea: + sources: [ common/semaphore/semaphore_sea.c ] + deps: [ ] +sim: + safe: no + gpu: igpu + owner: Seshendra G + sources: [ common/sim/sim.c, + common/sim/sim_pci.c, + common/sim/sim_netlist.c, + include/nvgpu/hw_sim.h, + include/nvgpu/hw_sim_pci.h, + include/nvgpu/sim.h ] + +utils: + safe: yes + owner: Alex W + sources: [ include/nvgpu/utils.h, + include/nvgpu/worker.h, + include/nvgpu/rbtree.h, + include/nvgpu/enabled.h, + include/nvgpu/errata.h, + common/utils/string.c, + common/utils/worker.c, + common/utils/rbtree.c, + common/utils/enabled.c, + common/utils/errata.c ] + +## +## Common elements. +## + +acr_fusa: + safe: yes + owner: Mahantesh K + sources: [ common/acr/acr.c, + common/acr/acr_blob_alloc.c, + common/acr/acr_blob_alloc.h, + common/acr/acr_blob_construct.c, + common/acr/acr_blob_construct.h, + common/acr/acr_bootstrap.c, + common/acr/acr_bootstrap.h, + common/acr/acr_priv.h, + common/acr/acr_wpr.c, + common/acr/acr_wpr.h, + common/acr/acr_sw_gv11b.c, + common/acr/acr_sw_gv11b.h, + common/acr/nvgpu_acr_interface.h, + include/nvgpu/gops/acr.h, + include/nvgpu/acr.h ] + +acr: + safe: no + owner: Mahantesh K + sources: [ common/acr/acr_blob_construct_v0.c, + common/acr/acr_blob_construct_v0.h, + common/acr/acr_sw_gp10b.c, + common/acr/acr_sw_gp10b.h, + common/acr/acr_sw_gm20b.c, + common/acr/acr_sw_gm20b.h, + common/acr/acr_sw_tu104.c, + common/acr/acr_sw_tu104.h ] + +sbr: + safe: yes + owner: Ramesh M + gpu: dgpu + sources: [ common/sbr/sbr.c, + common/sbr/sbr.h, + include/nvgpu/sbr.h, + include/nvgpu/gops/sbr.h ] + +engine_queues: + owner: Sagar K + children: + mem_queues: + children: + mem_queue: + safe: no + gpu: both + sources: [ common/engine_queues/engine_mem_queue.c, + common/engine_queues/engine_mem_queue_priv.h, + include/nvgpu/engine_mem_queue.h, + include/nvgpu/engine_queue.h ] + deps: [ ] + tags: unit-testable + dmem_queue: + safe: no + gpu: igpu + sources: [ common/engine_queues/engine_dmem_queue.c, + common/engine_queues/engine_dmem_queue.h ] + deps: [ ] + tags: unit-testable + emem_queue: + safe: no + gpu: dgpu + sources: [ common/engine_queues/engine_emem_queue.c, + common/engine_queues/engine_emem_queue.h ] + deps: [ ] + tags: unit-testable + fb_queue: + safe: yes + gpu: dgpu + sources: [ common/engine_queues/engine_fb_queue.c, + common/engine_queues/engine_fb_queue_priv.h, + include/nvgpu/engine_fb_queue.h ] + deps: [ ] + tags: unit-testable + +falcon_fusa: + owner: Sagar K + safe: yes + gpu: both + sources: [ common/falcon/falcon.c, + common/falcon/falcon_sw_gk20a.c, + common/falcon/falcon_sw_gk20a.h, + include/nvgpu/gops/falcon.h, + include/nvgpu/falcon.h, + include/nvgpu/flcnif_cmn.h ] + deps: [ ] + tags: + +falcon: + owner: Sagar K + safe: no + gpu: dgpu + sources: [ common/falcon/falcon_sw_tu104.c, + common/falcon/falcon_sw_tu104.h, + include/nvgpu/gops/gsp.h, + include/nvgpu/gops/nvdec.h ] + deps: [ ] + tags: + +rc: + safe: yes + owner: Seema K + sources: [ common/rc/rc.c, + include/nvgpu/rc.h ] + +fifo: + safe: yes + owner: Seema K + children: + channel: + safe: yes + sources: [ common/fifo/channel.c, + common/fifo/watchdog.c, + common/fifo/channel_wdt.c, + common/fifo/channel_wdt.h, + common/fifo/channel_worker.c, + common/fifo/channel_worker.h, + include/nvgpu/channel.h, + include/nvgpu/watchdog.h, + include/nvgpu/gops/channel.h, + include/nvgpu/gops/ramfc.h, + include/nvgpu/gops/ramin.h, + include/nvgpu/gops/sync.h, + include/nvgpu/error_notifier.h ] + deps: [ ] + tsg: + safe: yes + sources: [ common/fifo/tsg.c, + include/nvgpu/gops/tsg.h, + include/nvgpu/tsg.h ] + deps: [ ] + submit: + safe: yes + sources: [ common/fifo/submit.c, + common/fifo/priv_cmdbuf.c, + common/fifo/job.c, + include/nvgpu/priv_cmdbuf.h, + include/nvgpu/job.h ] + deps: [ ] + runlist: + safe: yes + sources: [ common/fifo/runlist.c, + include/nvgpu/gops/runlist.h, + include/nvgpu/runlist.h ] + deps: [ ] + userd: + safe: no + sources: [ common/fifo/userd.c, + include/nvgpu/gops/userd.h, + include/nvgpu/fifo/userd.h ] + deps: [ ] + pbdma: + safe: yes + sources: [ common/fifo/pbdma.c, + include/nvgpu/gops/pbdma.h, + include/nvgpu/pbdma.h ] + deps: [ ] + pbdma_status: + safe: yes + sources: [ common/fifo/pbdma_status.c, + include/nvgpu/pbdma_status.h ] + deps: [ ] + engine_status: + safe: yes + sources: [ common/fifo/engine_status.c, + include/nvgpu/engine_status.h ] + deps: [] + engines: + safe: yes + sources: [ common/fifo/engines.c, + include/nvgpu/gops/engine.h, + include/nvgpu/engines.h ] + deps: [] + + preempt: + safe: yes + sources: [ common/fifo/preempt.c, + include/nvgpu/preempt.h ] + deps: [] + + fifo: + safe: yes + sources: [ common/fifo/fifo.c, + include/nvgpu/gops/fifo.h, + include/nvgpu/fifo.h ] + + usermode: + safe: yes + sources: [ include/nvgpu/gops/usermode.h ] + + sync: + children: + sync: + safe: yes + sources: [ common/sync/channel_sync.c, + common/sync/channel_sync_priv.h, + include/nvgpu/channel_sync.h ] + syncpt: + safe: yes + sources: [ common/sync/channel_sync_syncpt.c, + include/nvgpu/channel_sync_syncpt.h ] + syncsema: + safe: no + sources: [ common/sync/channel_sync_semaphore.c, + include/nvgpu/channel_sync_semaphore.h ] + user_syncpt: + safe: yes + sources: [ common/sync/channel_user_syncpt.c, + common/sync/channel_user_syncpt_priv.h, + include/nvgpu/channel_user_syncpt.h ] + +gr: + safe: yes + owner: Deepak N + children: + gr: + safe: yes + sources: [ common/gr/gr.c, + common/gr/gr_priv.h, + common/gr/gr_utils.c, + include/nvgpu/gr/gr_instances.h, + include/nvgpu/gr/gr_utils.h, + include/nvgpu/gops/gr.h, + include/nvgpu/gr/gr.h ] + global_ctx: + safe: yes + sources: [ common/gr/global_ctx.c, + common/gr/global_ctx_priv.h, + include/nvgpu/gr/global_ctx.h ] + ctx: + safe: yes + sources: [ common/gr/ctx.c, + common/gr/ctx_priv.h, + include/nvgpu/gr/ctx.h ] + obj_ctx: + safe: yes + sources: [ common/gr/obj_ctx.c, + common/gr/obj_ctx_priv.h, + include/nvgpu/gr/obj_ctx.h ] + subctx: + safe: yes + sources: [ common/gr/subctx.c, + common/gr/subctx_priv.h, + include/nvgpu/gr/subctx.h ] + fs_state: + safe: yes + sources: [ common/gr/fs_state.c, + include/nvgpu/gr/fs_state.h ] + config: + safe: yes + sources: [ common/gr/gr_config.c, + common/gr/gr_config_priv.h, + include/nvgpu/gr/config.h ] + fecs_trace: + safe: no + sources: [ common/gr/fecs_trace.c, + include/nvgpu/gr/fecs_trace.h ] + zbc: + safe: no + sources: [ common/gr/zbc.c, + common/gr/zbc_priv.h, + include/nvgpu/gr/zbc.h ] + zcull: + safe: no + sources: [ common/gr/zcull.c, + common/gr/zcull_priv.h, + include/nvgpu/gr/zcull.h ] + hwpm_map: + safe: no + sources: [ common/gr/hwpm_map.c, + include/nvgpu/gr/hwpm_map.h ] + falcon: + safe: yes + sources: [ common/gr/gr_falcon.c, + common/gr/gr_falcon_priv.h, + include/nvgpu/gr/gr_falcon.h ] + intr: + safe: yes + sources: [ common/gr/gr_intr.c, + common/gr/gr_intr_priv.h, + include/nvgpu/interrupts.h, + include/nvgpu/gr/gr_intr.h ] + setup: + safe: yes + sources: [ common/gr/gr_setup.c, + include/nvgpu/gr/setup.h ] + ecc: + safe: yes + sources: [ common/gr/gr_ecc.c, + include/nvgpu/gr/gr_ecc.h ] + +fb: + safe: yes + owner: Vedashree V + sources: [ common/fb/fb.c, + include/nvgpu/fb.h ] + +fbp: + safe: yes + owner: Deepak N + sources: [ common/fbp/fbp.c, + common/fbp/fbp_priv.h, + include/nvgpu/fbp.h, + include/nvgpu/gops/fbp.h ] + +init: + safe: yes + owner: Terje B + children: + nvgpu: + safe: yes + sources: [ common/init/nvgpu_init.c, + include/nvgpu/nvgpu_init.h, + include/nvgpu/gpu_ops.h ] + +mm: + owner: Alex W + children: + as: + safe: yes + sources: [ common/mm/as.c, + include/nvgpu/as.h ] + comptags: + safe: no + gpu: igpu + sources: [ common/mm/comptags.c, + include/nvgpu/comptags.h ] + mmu_fault: + safe: yes + sources: [ include/nvgpu/mmu_fault.h ] + deps: [ ] + + gmmu: + safe: yes + children: + pd_cache: + safe: yes + sources: [ common/mm/gmmu/pd_cache.c, + common/mm/gmmu/pd_cache_priv.h, + include/nvgpu/pd_cache.h ] + deps: [ nvgpu.interface.kmem ] + tags: M4, unit-testable + page_table: + safe: yes + sources: [ common/mm/gmmu/page_table.c, + common/mm/gmmu/pte.c, + include/nvgpu/gmmu.h ] + deps: [ nvgpu.interface.kmem ] + tags: M4, unit-testable + allocators: + safe: yes + children: + nvgpu: + safe: yes + sources: [ common/mm/allocators/nvgpu_allocator.c, + include/nvgpu/allocator.h ] + deps: [ ] + tags: unit-testable + bitmap: + safe: yes + sources: [ common/mm/allocators/bitmap_allocator.c, + common/mm/allocators/bitmap_allocator_priv.h ] + deps: [ ] + tags: unit-testable + buddy: + safe: yes + sources: [ common/mm/allocators/buddy_allocator.c, + common/mm/allocators/buddy_allocator_priv.h ] + deps: [ ] + tags: unit-testable + page: + safe: yes + gpu: dgpu + sources: [ common/mm/allocators/page_allocator.c, + include/nvgpu/page_allocator.h ] + deps: [ ] + dma: + safe: yes + sources: [ common/mm/dma.c ] + deps: [ ] + tags: unit-testable + mm: + safe: yes + sources: [ common/mm/mm.c, + include/nvgpu/mm.h, + include/nvgpu/gops/mm.h ] + deps: [ ] + nvgpu_mem: + safe: yes + sources: [ common/mm/nvgpu_mem.c, + include/nvgpu/nvgpu_mem.h ] + deps: [ ] + tags: unit-testable + nvgpu_sgt: + safe: yes + sources: [ common/mm/nvgpu_sgt.c, + include/nvgpu/nvgpu_sgt.h ] + deps: [ ] + vidmem: + safe: yes + gpu: dgpu + sources: [ common/mm/vidmem.c, + include/nvgpu/vidmem.h ] + deps: [ ] + vm_area: + safe: yes + sources: [ common/mm/vm_area.c, + include/nvgpu/vm_area.h ] + deps: [ ] + tags: unit-testable + vm: + safe: yes + sources: [ common/mm/vm.c, + include/nvgpu/vm.h ] + deps: [ ] + tags: M4, unit-testable + +fuse: + safe: yes + owner: Seema K + sources: [ include/nvgpu/gops/fuse.h ] + +perf: + safe: no + owner: Deepak N + children: + perfbuf: + safe: no + sources: [ common/perf/perfbuf.c, + include/nvgpu/perfbuf.h ] + cyclestats_snapshot: + safe: no + sources: [ common/perf/cyclestats_snapshot.c, + include/nvgpu/cyclestats_snapshot.h ] + +cyclestats: + safe: no + owner: Deepak N + sources: [ common/cyclestats/cyclestats.c, + common/cyclestats/cyclestats_priv.h, + include/nvgpu/cyclestats.h, + include/nvgpu/gops/cyclestats.h ] + +clk_arb: + safe: yes + gpu: dgpu + sources: [ include/nvgpu/clk_arb.h, + include/nvgpu/gops/clk_arb.h, + common/clk_arb/clk_arb.c, + common/clk_arb/clk_arb_gp10b.c, + common/clk_arb/clk_arb_gp10b.h, + common/clk_arb/clk_arb_gv100.c, + common/clk_arb/clk_arb_gv100.h ] + +therm: + safe: yes + owner: Seshendra G + sources: [ common/therm/therm.c, + include/nvgpu/therm.h, + include/nvgpu/gops/therm.h ] + +pmu: + children: + pmuif: + safe: yes + owner: Sagar K + # Subset of each interfaces should be distributed to individual units. + # TODO! + sources: [ include/nvgpu/pmu/pmuif/ctrlpmgr.h, + include/nvgpu/pmu/pmuif/acr.h, + include/nvgpu/pmu/pmuif/ap.h, + include/nvgpu/pmu/pmuif/cmn.h, + include/nvgpu/pmu/pmuif/perfmon.h, + include/nvgpu/pmu/pmuif/pg.h, + include/nvgpu/pmu/pmuif/pg_rppg.h, + include/nvgpu/pmu/pmuif/init.h, + include/nvgpu/pmu/pmuif/bios.h, + include/nvgpu/pmu/pmuif/pmgr.h, + include/nvgpu/pmu/pmuif/seq.h, + include/nvgpu/pmu/pmuif/rpc.h, + include/nvgpu/pmu/pmuif/nvgpu_cmdif.h ] + boardobj: + safe: yes + owner: Mahantesh K + children: + boardobj: + safe: yes + gpu: dgpu + sources: [ common/pmu/boardobj/boardobj.c, + common/pmu/boardobj/boardobj.h, + common/pmu/boardobj/ucode_boardobj_inf.h ] + boardobjgrp: + safe: yes + gpu: dgpu + sources: [ common/pmu/boardobj/boardobjgrp.c, + common/pmu/boardobj/boardobjgrp_e255.c, + common/pmu/boardobj/boardobjgrp_e32.c, + include/nvgpu/boardobjgrp.h, + include/nvgpu/boardobjgrp_e255.h, + include/nvgpu/boardobjgrp_e32.h, + include/nvgpu/pmu/boardobjgrp_classes.h ] + boardobjgrpmask: + safe: yes + gpu: dgpu + sources: [ common/pmu/boardobj/boardobjgrpmask.c, + include/nvgpu/boardobjgrpmask.h ] + pmu: + safe: yes + owner: Mahantesh K + sources: [ common/pmu/pmu.c, + include/nvgpu/gops/pmu.h, + include/nvgpu/pmu.h ] + + pmu_rtos_init: + safe: no + owner: Mahantesh K + sources: [ common/pmu/pmu_rtos_init.c ] + + pmu_pstate: + safe: yes + gpu: dgpu + owner: Mahantesh K + sources: [ common/pmu/pmu_pstate.c, + include/nvgpu/pmu/pmu_pstate.h ] + + fw: + safe: yes + gpu: dgpu + sources: [ common/pmu/fw/fw.c, + common/pmu/fw/fw_ns_bootstrap.c, + common/pmu/fw/fw_ver_ops.c, + include/nvgpu/pmu/fw.h ] + + lsfm: + safe: yes + gpu: dgpu + sources: [ common/pmu/lsfm/lsfm.c, + common/pmu/lsfm/lsfm_sw_gm20b.c, + common/pmu/lsfm/lsfm_sw_gm20b.h, + common/pmu/lsfm/lsfm_sw_gp10b.c, + common/pmu/lsfm/lsfm_sw_gp10b.h, + common/pmu/lsfm/lsfm_sw_gv100.c, + common/pmu/lsfm/lsfm_sw_gv100.h, + common/pmu/lsfm/lsfm_sw_tu104.c, + common/pmu/lsfm/lsfm_sw_tu104.h, + include/nvgpu/pmu/lsfm.h ] + + perf: + safe: yes + gpu: dgpu + owner: Abdul S + sources: [ common/pmu/perf/change_seq.c, + common/pmu/perf/change_seq.h, + common/pmu/perf/ucode_perf_change_seq_inf.h, + common/pmu/perf/vfe_equ.c, + common/pmu/perf/vfe_equ.h, + common/pmu/perf/vfe_var.c, + common/pmu/perf/vfe_var.h, + common/pmu/perf/ucode_perf_vfe_inf.h, + common/pmu/perf/perf.c, + common/pmu/perf/perf.h, + common/pmu/perf/pstate.c, + common/pmu/perf/pstate.h, + common/pmu/perf/ucode_perf_pstate_inf.h, + include/nvgpu/pmu/perf.h ] + + super_surface: + safe: yes + gpu: dgpu + sources: [ common/pmu/super_surface/super_surface.c, + common/pmu/super_surface/super_surface_priv.h, + include/nvgpu/pmu/super_surface.h ] + + perfmon: + safe: no + sources: [ common/pmu/perfmon/pmu_perfmon.c, + common/pmu/perfmon/pmu_perfmon_sw_gm20b.c, + common/pmu/perfmon/pmu_perfmon_sw_gm20b.h, + common/pmu/perfmon/pmu_perfmon_sw_gv11b.c, + common/pmu/perfmon/pmu_perfmon_sw_gv11b.h, + include/nvgpu/pmu/pmu_perfmon.h ] + + clk: + safe: yes + owner: Ramesh M + children: + clk: + safe: yes + gpu: dgpu + sources: [ common/pmu/clk/clk.c, + common/pmu/clk/ucode_clk_inf.h, + common/pmu/clk/clk_domain.c, + common/pmu/clk/clk_domain.h, + common/pmu/clk/clk_fll.c, + common/pmu/clk/clk_fll.h, + common/pmu/clk/clk_prog.c, + common/pmu/clk/clk_prog.h, + common/pmu/clk/clk_vf_point.c, + common/pmu/clk/clk_vf_point.h, + common/pmu/clk/clk_vin.c, + common/pmu/clk/clk_vin.h, + common/pmu/clk/clk.h, + include/nvgpu/gops/clk.h, + include/nvgpu/pmu/clk/clk.h] + ipc: + safe: yes + owner: Sagar K + gpu: dgpu + children: + command: + sources: [ common/pmu/ipc/pmu_cmd.c, + include/nvgpu/pmu/cmd.h ] + + message: + sources: [ common/pmu/ipc/pmu_msg.c, + include/nvgpu/pmu/msg.h ] + + queues: + sources: [ common/pmu/ipc/pmu_queue.c, + include/nvgpu/pmu/queue.h ] + + sequences: + sources: [ common/pmu/ipc/pmu_seq.c, + include/nvgpu/pmu/seq.h ] + + lpwr: + safe: no + gpu: igpu + owner: Divya S + sources: [ common/pmu/lpwr/lpwr.c, + common/pmu/lpwr/rppg.c, + common/pmu/lpwr/lpwr.h, + include/nvgpu/pmu/lpwr.h ] + + pg: + safe: no + gpu: igpu + owner: Divya S + sources: [ common/pmu/pg/pg_sw_gm20b.c, + common/pmu/pg/pg_sw_gm20b.h, + common/pmu/pg/pg_sw_gp106.c, + common/pmu/pg/pg_sw_gp106.h, + common/pmu/pg/pg_sw_gp10b.c, + common/pmu/pg/pg_sw_gp10b.h, + common/pmu/pg/pg_sw_gv11b.c, + common/pmu/pg/pg_sw_gv11b.h, + common/pmu/pg/pmu_aelpg.c, + common/pmu/pg/pmu_pg.c, + common/pmu/pg/pmu_pg.h, + include/nvgpu/pmu/pmu_pg.h ] + + pmgr: + safe: yes + gpu: dgpu + owner: Abdul S + sources: [ common/pmu/pmgr/pmgr.c, + common/pmu/pmgr/pmgrpmu.c, + common/pmu/pmgr/pwrdev.c, + common/pmu/pmgr/pwrmonitor.c, + common/pmu/pmgr/pwrpolicy.c, + common/pmu/pmgr/pmgr.h, + common/pmu/pmgr/pmgrpmu.h, + common/pmu/pmgr/pwrdev.h, + common/pmu/pmgr/pwrmonitor.h, + common/pmu/pmgr/pwrpolicy.h, + include/nvgpu/pmu/pmgr.h ] + + debug: + safe: yes + gpu: dgpu + owner: Sagar K + sources: [ common/pmu/pmu_debug.c, + include/nvgpu/pmu/debug.h ] + + + allocator: + safe: yes + owner: Sagar K + gpu: dgpu + sources: [ common/pmu/allocator.c, + include/nvgpu/pmu/allocator.h ] + + mutex: + safe: yes + owner: Sagar K + gpu: dgpu + sources: [ common/pmu/pmu_mutex.c, + include/nvgpu/pmu/mutex.h ] + + therm: + safe: yes + gpu: dgpu + owner: Abdul S + sources: [ common/pmu/therm/thrm.c, + common/pmu/therm/thrm.h, + common/pmu/therm/therm_channel.c, + common/pmu/therm/therm_channel.h, + common/pmu/therm/therm_dev.c, + common/pmu/therm/therm_dev.h, + common/pmu/therm/ucode_therm_inf.h, + include/nvgpu/pmu/therm.h ] + + volt: + safe: yes + gpu: dgpu + owner: Mahantesh K + sources: [ common/pmu/volt/volt.c, + common/pmu/volt/volt.h, + common/pmu/volt/volt_dev.c, + common/pmu/volt/volt_policy.c, + common/pmu/volt/volt_rail.c, + common/pmu/volt/volt_dev.h, + common/pmu/volt/volt_policy.h, + common/pmu/volt/volt_rail.h, + common/pmu/volt/ucode_volt_inf.h, + include/nvgpu/pmu/volt.h ] + +sec2: + safe: yes + owner: Sagar K + children: + sec2: + safe: yes + gpu: dgpu + sources: [ common/sec2/sec2.c, + include/nvgpu/sec2/sec2.h, + include/nvgpu/sec2/sec2_cmn.h, + include/nvgpu/gops/sec2.h ] + ipc: + safe: yes + owner: Sagar K + gpu: dgpu + children: + command: + sources: [ common/sec2/ipc/sec2_cmd.c, + include/nvgpu/sec2/cmd.h ] + message: + sources: [ common/sec2/ipc/sec2_msg.c, + include/nvgpu/sec2/msg.h, + include/nvgpu/sec2/sec2_cmn.h ] + queues: + sources: [ common/sec2/ipc/sec2_queue.c, + include/nvgpu/sec2/queue.h ] + sequences: + sources: [ common/sec2/ipc/sec2_seq.c, + include/nvgpu/sec2/seq.h ] + allocator: + safe: yes + owner: Sagar K + gpu: dgpu + sources: [ common/sec2/sec2_allocator.c, + include/nvgpu/sec2/allocator.h ] + lsfm: + safe: yes + owner: Sagar K + gpu: dgpu + sources: [ common/sec2/sec2_lsfm.c, + include/nvgpu/sec2/lsfm.h ] + +power_features: + owner: Seema K + children: + power_features: + safe: no + sources: [ common/power_features/power_features.c, + include/nvgpu/power_features/power_features.h ] + cg: + safe: yes + sources: [ common/power_features/cg/cg.c, + include/nvgpu/power_features/cg.h, + include/nvgpu/gops/cg.h ] + pg: + safe: no + sources: [ common/power_features/pg/pg.c, + include/nvgpu/power_features/pg.h ] + +floorsweep: + owner: Divya S + safe: no + sources: [ include/nvgpu/gops/floorsweep.h ] + +swdebug: + owner: Alex W + safe: no + sources: [ common/swdebug/profile.c, + include/nvgpu/swprofile.h, + include/nvgpu/fifo/swprofile.h ] + +grmgr: + owner: Lakshmanan M + safe: no + sources: [ common/grmgr/grmgr.c, + include/nvgpu/grmgr.h, + include/nvgpu/mig.h, + include/nvgpu/gops/grmgr.h ] + +cic: + owner: Tejal K + safe: yes + sources: [ common/cic/cic.c, + common/cic/cic_intr.c, + common/cic/ce_cic.c, + common/cic/ctxsw_cic.c, + common/cic/msg_cic.c, + common/cic/ecc_cic.c, + common/cic/host_cic.c, + common/cic/gr_cic.c, + common/cic/pri_cic.c, + common/cic/pmu_cic.c, + common/cic/mmu_cic.c, + common/cic/cic_priv.h, + include/nvgpu/gops/cic.h, + include/nvgpu/cic.h ] + +## +## HAL units. Currently they are under common but this needs to change. +## We are moving these to a top level directory. +## +hal: + safe: no + children: + !include nvgpu-hal.yaml diff --git a/arch/nvgpu-gpu_hw.yaml b/arch/nvgpu-gpu_hw.yaml new file mode 100644 index 000000000..7871451d0 --- /dev/null +++ b/arch/nvgpu-gpu_hw.yaml @@ -0,0 +1,224 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All Rights Reserved. +# +# Define meta elements and units for describing GPU HW interactions in +# nvgpu. +# + +headers: + safe: yes + owner: Terje B + sources: [ include/nvgpu/hw/gk20a/hw_bus_gk20a.h, + include/nvgpu/hw/gk20a/hw_ccsr_gk20a.h, + include/nvgpu/hw/gk20a/hw_ce2_gk20a.h, + include/nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h, + include/nvgpu/hw/gk20a/hw_falcon_gk20a.h, + include/nvgpu/hw/gk20a/hw_fb_gk20a.h, + include/nvgpu/hw/gk20a/hw_fifo_gk20a.h, + include/nvgpu/hw/gk20a/hw_flush_gk20a.h, + include/nvgpu/hw/gk20a/hw_gmmu_gk20a.h, + include/nvgpu/hw/gk20a/hw_gr_gk20a.h, + include/nvgpu/hw/gk20a/hw_ltc_gk20a.h, + include/nvgpu/hw/gk20a/hw_mc_gk20a.h, + include/nvgpu/hw/gk20a/hw_pbdma_gk20a.h, + include/nvgpu/hw/gk20a/hw_perf_gk20a.h, + include/nvgpu/hw/gk20a/hw_pram_gk20a.h, + include/nvgpu/hw/gk20a/hw_pri_ringmaster_gk20a.h, + include/nvgpu/hw/gk20a/hw_pri_ringstation_gpc_gk20a.h, + include/nvgpu/hw/gk20a/hw_pri_ringstation_sys_gk20a.h, + include/nvgpu/hw/gk20a/hw_proj_gk20a.h, + include/nvgpu/hw/gk20a/hw_pwr_gk20a.h, + include/nvgpu/hw/gk20a/hw_ram_gk20a.h, + include/nvgpu/hw/gk20a/hw_therm_gk20a.h, + include/nvgpu/hw/gk20a/hw_timer_gk20a.h, + include/nvgpu/hw/gk20a/hw_top_gk20a.h, + include/nvgpu/hw/gk20a/hw_trim_gk20a.h, + include/nvgpu/hw/gm20b/hw_bus_gm20b.h, + include/nvgpu/hw/gm20b/hw_ccsr_gm20b.h, + include/nvgpu/hw/gm20b/hw_ce2_gm20b.h, + include/nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h, + include/nvgpu/hw/gm20b/hw_falcon_gm20b.h, + include/nvgpu/hw/gm20b/hw_fb_gm20b.h, + include/nvgpu/hw/gm20b/hw_fifo_gm20b.h, + include/nvgpu/hw/gm20b/hw_flush_gm20b.h, + include/nvgpu/hw/gm20b/hw_fuse_gm20b.h, + include/nvgpu/hw/gm20b/hw_gmmu_gm20b.h, + include/nvgpu/hw/gm20b/hw_gr_gm20b.h, + include/nvgpu/hw/gm20b/hw_ltc_gm20b.h, + include/nvgpu/hw/gm20b/hw_mc_gm20b.h, + include/nvgpu/hw/gm20b/hw_pbdma_gm20b.h, + include/nvgpu/hw/gm20b/hw_perf_gm20b.h, + include/nvgpu/hw/gm20b/hw_pram_gm20b.h, + include/nvgpu/hw/gm20b/hw_pri_ringmaster_gm20b.h, + include/nvgpu/hw/gm20b/hw_pri_ringstation_gpc_gm20b.h, + include/nvgpu/hw/gm20b/hw_pri_ringstation_sys_gm20b.h, + include/nvgpu/hw/gm20b/hw_proj_gm20b.h, + include/nvgpu/hw/gm20b/hw_pwr_gm20b.h, + include/nvgpu/hw/gm20b/hw_ram_gm20b.h, + include/nvgpu/hw/gm20b/hw_therm_gm20b.h, + include/nvgpu/hw/gm20b/hw_timer_gm20b.h, + include/nvgpu/hw/gm20b/hw_top_gm20b.h, + include/nvgpu/hw/gm20b/hw_trim_gm20b.h, + include/nvgpu/hw/gp106/hw_bus_gp106.h, + include/nvgpu/hw/gp106/hw_ccsr_gp106.h, + include/nvgpu/hw/gp106/hw_ce_gp106.h, + include/nvgpu/hw/gp106/hw_ctxsw_prog_gp106.h, + include/nvgpu/hw/gp106/hw_falcon_gp106.h, + include/nvgpu/hw/gp106/hw_fb_gp106.h, + include/nvgpu/hw/gp106/hw_fbpa_gp106.h, + include/nvgpu/hw/gp106/hw_fifo_gp106.h, + include/nvgpu/hw/gp106/hw_flush_gp106.h, + include/nvgpu/hw/gp106/hw_fuse_gp106.h, + include/nvgpu/hw/gp106/hw_gmmu_gp106.h, + include/nvgpu/hw/gp106/hw_gr_gp106.h, + include/nvgpu/hw/gp106/hw_ltc_gp106.h, + include/nvgpu/hw/gp106/hw_mc_gp106.h, + include/nvgpu/hw/gp106/hw_pbdma_gp106.h, + include/nvgpu/hw/gp106/hw_perf_gp106.h, + include/nvgpu/hw/gp106/hw_pnvdec_gp106.h, + include/nvgpu/hw/gp106/hw_pram_gp106.h, + include/nvgpu/hw/gp106/hw_pri_ringmaster_gp106.h, + include/nvgpu/hw/gp106/hw_pri_ringstation_gpc_gp106.h, + include/nvgpu/hw/gp106/hw_pri_ringstation_sys_gp106.h, + include/nvgpu/hw/gp106/hw_proj_gp106.h, + include/nvgpu/hw/gp106/hw_psec_gp106.h, + include/nvgpu/hw/gp106/hw_pwr_gp106.h, + include/nvgpu/hw/gp106/hw_ram_gp106.h, + include/nvgpu/hw/gp106/hw_therm_gp106.h, + include/nvgpu/hw/gp106/hw_timer_gp106.h, + include/nvgpu/hw/gp106/hw_top_gp106.h, + include/nvgpu/hw/gp106/hw_trim_gp106.h, + include/nvgpu/hw/gp106/hw_xp_gp106.h, + include/nvgpu/hw/gp106/hw_xve_gp106.h, + include/nvgpu/hw/gp10b/hw_bus_gp10b.h, + include/nvgpu/hw/gp10b/hw_ccsr_gp10b.h, + include/nvgpu/hw/gp10b/hw_ce_gp10b.h, + include/nvgpu/hw/gp10b/hw_ctxsw_prog_gp10b.h, + include/nvgpu/hw/gp10b/hw_falcon_gp10b.h, + include/nvgpu/hw/gp10b/hw_fb_gp10b.h, + include/nvgpu/hw/gp10b/hw_fifo_gp10b.h, + include/nvgpu/hw/gp10b/hw_flush_gp10b.h, + include/nvgpu/hw/gp10b/hw_fuse_gp10b.h, + include/nvgpu/hw/gp10b/hw_gmmu_gp10b.h, + include/nvgpu/hw/gp10b/hw_gr_gp10b.h, + include/nvgpu/hw/gp10b/hw_ltc_gp10b.h, + include/nvgpu/hw/gp10b/hw_mc_gp10b.h, + include/nvgpu/hw/gp10b/hw_pbdma_gp10b.h, + include/nvgpu/hw/gp10b/hw_perf_gp10b.h, + include/nvgpu/hw/gp10b/hw_pram_gp10b.h, + include/nvgpu/hw/gp10b/hw_pri_ringmaster_gp10b.h, + include/nvgpu/hw/gp10b/hw_pri_ringstation_gpc_gp10b.h, + include/nvgpu/hw/gp10b/hw_pri_ringstation_sys_gp10b.h, + include/nvgpu/hw/gp10b/hw_proj_gp10b.h, + include/nvgpu/hw/gp10b/hw_pwr_gp10b.h, + include/nvgpu/hw/gp10b/hw_ram_gp10b.h, + include/nvgpu/hw/gp10b/hw_therm_gp10b.h, + include/nvgpu/hw/gp10b/hw_timer_gp10b.h, + include/nvgpu/hw/gp10b/hw_top_gp10b.h, + include/nvgpu/hw/gv100/hw_bus_gv100.h, + include/nvgpu/hw/gv100/hw_ccsr_gv100.h, + include/nvgpu/hw/gv100/hw_ce_gv100.h, + include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h, + include/nvgpu/hw/gv100/hw_falcon_gv100.h, + include/nvgpu/hw/gv100/hw_fb_gv100.h, + include/nvgpu/hw/gv100/hw_fifo_gv100.h, + include/nvgpu/hw/gv100/hw_flush_gv100.h, + include/nvgpu/hw/gv100/hw_fuse_gv100.h, + include/nvgpu/hw/gv100/hw_gmmu_gv100.h, + include/nvgpu/hw/gv100/hw_gr_gv100.h, + include/nvgpu/hw/gv100/hw_ioctrl_gv100.h, + include/nvgpu/hw/gv100/hw_ioctrlmif_gv100.h, + include/nvgpu/hw/gv100/hw_ltc_gv100.h, + include/nvgpu/hw/gv100/hw_mc_gv100.h, + include/nvgpu/hw/gv100/hw_minion_gv100.h, + include/nvgpu/hw/gv100/hw_nvl_gv100.h, + include/nvgpu/hw/gv100/hw_nvlinkip_discovery_gv100.h, + include/nvgpu/hw/gv100/hw_nvlipt_gv100.h, + include/nvgpu/hw/gv100/hw_nvtlc_gv100.h, + include/nvgpu/hw/gv100/hw_pbdma_gv100.h, + include/nvgpu/hw/gv100/hw_perf_gv100.h, + include/nvgpu/hw/gv100/hw_pgsp_gv100.h, + include/nvgpu/hw/gv100/hw_pram_gv100.h, + include/nvgpu/hw/gv100/hw_pri_ringmaster_gv100.h, + include/nvgpu/hw/gv100/hw_pri_ringstation_gpc_gv100.h, + include/nvgpu/hw/gv100/hw_pri_ringstation_sys_gv100.h, + include/nvgpu/hw/gv100/hw_proj_gv100.h, + include/nvgpu/hw/gv100/hw_pwr_gv100.h, + include/nvgpu/hw/gv100/hw_ram_gv100.h, + include/nvgpu/hw/gv100/hw_therm_gv100.h, + include/nvgpu/hw/gv100/hw_timer_gv100.h, + include/nvgpu/hw/gv100/hw_top_gv100.h, + include/nvgpu/hw/gv100/hw_trim_gv100.h, + include/nvgpu/hw/gv100/hw_usermode_gv100.h, + include/nvgpu/hw/gv100/hw_xp_gv100.h, + include/nvgpu/hw/gv100/hw_xve_gv100.h, + include/nvgpu/hw/gv11b/hw_bus_gv11b.h, + include/nvgpu/hw/gv11b/hw_ccsr_gv11b.h, + include/nvgpu/hw/gv11b/hw_ce_gv11b.h, + include/nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h, + include/nvgpu/hw/gv11b/hw_falcon_gv11b.h, + include/nvgpu/hw/gv11b/hw_fb_gv11b.h, + include/nvgpu/hw/gv11b/hw_fifo_gv11b.h, + include/nvgpu/hw/gv11b/hw_flush_gv11b.h, + include/nvgpu/hw/gv11b/hw_fuse_gv11b.h, + include/nvgpu/hw/gv11b/hw_gmmu_gv11b.h, + include/nvgpu/hw/gv11b/hw_gr_gv11b.h, + include/nvgpu/hw/gv11b/hw_ltc_gv11b.h, + include/nvgpu/hw/gv11b/hw_mc_gv11b.h, + include/nvgpu/hw/gv11b/hw_pbdma_gv11b.h, + include/nvgpu/hw/gv11b/hw_perf_gv11b.h, + include/nvgpu/hw/gv11b/hw_pram_gv11b.h, + include/nvgpu/hw/gv11b/hw_pri_ringmaster_gv11b.h, + include/nvgpu/hw/gv11b/hw_pri_ringstation_gpc_gv11b.h, + include/nvgpu/hw/gv11b/hw_pri_ringstation_sys_gv11b.h, + include/nvgpu/hw/gv11b/hw_pri_ringstation_fbp_gv11b.h, + include/nvgpu/hw/gv11b/hw_proj_gv11b.h, + include/nvgpu/hw/gv11b/hw_pwr_gv11b.h, + include/nvgpu/hw/gv11b/hw_ram_gv11b.h, + include/nvgpu/hw/gv11b/hw_therm_gv11b.h, + include/nvgpu/hw/gv11b/hw_timer_gv11b.h, + include/nvgpu/hw/gv11b/hw_top_gv11b.h, + include/nvgpu/hw/gv11b/hw_usermode_gv11b.h, + include/nvgpu/hw/tu104/hw_bus_tu104.h, + include/nvgpu/hw/tu104/hw_ccsr_tu104.h, + include/nvgpu/hw/tu104/hw_ce_tu104.h, + include/nvgpu/hw/tu104/hw_ctrl_tu104.h, + include/nvgpu/hw/tu104/hw_ctxsw_prog_tu104.h, + include/nvgpu/hw/tu104/hw_falcon_tu104.h, + include/nvgpu/hw/tu104/hw_fb_tu104.h, + include/nvgpu/hw/tu104/hw_fbpa_tu104.h, + include/nvgpu/hw/tu104/hw_fifo_tu104.h, + include/nvgpu/hw/tu104/hw_flush_tu104.h, + include/nvgpu/hw/tu104/hw_func_tu104.h, + include/nvgpu/hw/tu104/hw_fuse_tu104.h, + include/nvgpu/hw/tu104/hw_gc6_tu104.h, + include/nvgpu/hw/tu104/hw_gmmu_tu104.h, + include/nvgpu/hw/tu104/hw_gr_tu104.h, + include/nvgpu/hw/tu104/hw_ioctrl_tu104.h, + include/nvgpu/hw/tu104/hw_ioctrlmif_tu104.h, + include/nvgpu/hw/tu104/hw_ltc_tu104.h, + include/nvgpu/hw/tu104/hw_mc_tu104.h, + include/nvgpu/hw/tu104/hw_minion_tu104.h, + include/nvgpu/hw/tu104/hw_nvl_tu104.h, + include/nvgpu/hw/tu104/hw_nvlinkip_discovery_tu104.h, + include/nvgpu/hw/tu104/hw_nvlipt_tu104.h, + include/nvgpu/hw/tu104/hw_nvtlc_tu104.h, + include/nvgpu/hw/tu104/hw_pbdma_tu104.h, + include/nvgpu/hw/tu104/hw_perf_tu104.h, + include/nvgpu/hw/tu104/hw_pgsp_tu104.h, + include/nvgpu/hw/tu104/hw_pnvdec_tu104.h, + include/nvgpu/hw/tu104/hw_pram_tu104.h, + include/nvgpu/hw/tu104/hw_pri_ringmaster_tu104.h, + include/nvgpu/hw/tu104/hw_pri_ringstation_gpc_tu104.h, + include/nvgpu/hw/tu104/hw_pri_ringstation_sys_tu104.h, + include/nvgpu/hw/tu104/hw_pri_ringstation_fbp_tu104.h, + include/nvgpu/hw/tu104/hw_proj_tu104.h, + include/nvgpu/hw/tu104/hw_psec_tu104.h, + include/nvgpu/hw/tu104/hw_pwr_tu104.h, + include/nvgpu/hw/tu104/hw_ram_tu104.h, + include/nvgpu/hw/tu104/hw_therm_tu104.h, + include/nvgpu/hw/tu104/hw_timer_tu104.h, + include/nvgpu/hw/tu104/hw_top_tu104.h, + include/nvgpu/hw/tu104/hw_trim_tu104.h, + include/nvgpu/hw/tu104/hw_usermode_tu104.h, + include/nvgpu/hw/tu104/hw_xp_tu104.h, + include/nvgpu/hw/tu104/hw_xve_tu104.h ] diff --git a/arch/nvgpu-hal-new.yaml b/arch/nvgpu-hal-new.yaml new file mode 100644 index 000000000..979451eec --- /dev/null +++ b/arch/nvgpu-hal-new.yaml @@ -0,0 +1,836 @@ +# Copyright (c) 2019-2021, NVIDIA CORPORATION. All Rights Reserved. +# +# HAL units. These are the units that have access to HW. +# + +bus_fusa: + safe: yes + owner: Terje B + sources: [ include/nvgpu/gops/bus.h, + hal/bus/bus_gk20a_fusa.c, + hal/bus/bus_gk20a.h, + hal/bus/bus_gm20b_fusa.c, hal/bus/bus_gm20b.h, + hal/bus/bus_gp10b_fusa.c, hal/bus/bus_gp10b.h, + hal/bus/bus_gv11b_fusa.c, hal/bus/bus_gv11b.h ] + +bus: + safe: no + owner: Terje B + sources: [ hal/bus/bus_gk20a.c, + hal/bus/bus_gv100.c, hal/bus/bus_gv100.h, + hal/bus/bus_tu104.c, hal/bus/bus_tu104.h ] + +ltc: + owner: Seshendra G + children: + ltc_fusa: + safe: yes + sources: [ hal/ltc/ltc_gm20b_fusa.c, + hal/ltc/ltc_gm20b.h, + hal/ltc/ltc_gp10b_fusa.c, + hal/ltc/ltc_gp10b.h, + hal/ltc/ltc_gv11b_fusa.c, + hal/ltc/ltc_gv11b.h ] + ltc: + safe: no + sources: [ hal/ltc/ltc_gm20b.c, + hal/ltc/ltc_gm20b_dbg.c, + hal/ltc/ltc_gp10b.c, + hal/ltc/ltc_tu104.c, + hal/ltc/ltc_tu104.h ] + intr_fusa: + safe: yes + sources: [ hal/ltc/intr/ltc_intr_gp10b_fusa.c, + hal/ltc/intr/ltc_intr_gp10b.h, + hal/ltc/intr/ltc_intr_gv11b_fusa.c, + hal/ltc/intr/ltc_intr_gv11b.h ] + intr: + safe: no + sources: [ hal/ltc/intr/ltc_intr_gm20b.c, + hal/ltc/intr/ltc_intr_gm20b.h, + hal/ltc/intr/ltc_intr_gp10b.c ] + +init_fusa: + safe: yes + owner: Philip E + sources: [ hal/init/hal_gv11b.c, + hal/init/hal_gv11b.h, + hal/init/hal_init.c, + hal/init/hal_gv11b_litter.c, + hal/init/hal_gv11b_litter.h ] + +init: + safe: no + owner: Philip E + sources: [ hal/init/hal_gm20b.c, + hal/init/hal_gm20b.h, + hal/init/hal_gp10b.c, + hal/init/hal_gp10b.h, + hal/init/hal_tu104.c, + hal/init/hal_tu104.h, + hal/init/hal_gm20b_litter.c, + hal/init/hal_gm20b_litter.h, + hal/init/hal_gp10b_litter.c, + hal/init/hal_gp10b_litter.h, + hal/init/hal_tu104_litter.c, + hal/init/hal_tu104_litter.h ] + +priv_ring_fusa: + safe: yes + owner: Seema K + sources: [ include/nvgpu/gops/priv_ring.h, + hal/priv_ring/priv_ring_gm20b_fusa.c, + hal/priv_ring/priv_ring_gm20b.h, + hal/priv_ring/priv_ring_gp10b_fusa.c, + hal/priv_ring/priv_ring_gp10b.h ] + +priv_ring: + safe: no + owner: Seema K + sources: [ hal/priv_ring/priv_ring_gm20b.c, + hal/priv_ring/priv_ring_gv11b.c, + hal/priv_ring/priv_ring_gv11b.h ] + +ptimer_fusa: + safe: yes + owner: Terje B + sources: [ hal/ptimer/ptimer_gk20a_fusa.c, + hal/ptimer/ptimer_gk20a.h ] + +ptimer: + safe: no + owner: Deepak N + sources: [ hal/ptimer/ptimer_gp10b.c, + hal/ptimer/ptimer_gp10b.h, + hal/ptimer/ptimer_gv11b.c, + hal/ptimer/ptimer_gv11b.h ] + +cg_fusa: + safe: yes + owner: Seema K + sources: [ hal/power_features/cg/gating_reglist.h, + hal/power_features/cg/gv11b_gating_reglist.c, + hal/power_features/cg/gv11b_gating_reglist.h ] + +cg: + safe: no + owner: Seema K + sources: [ hal/power_features/cg/gm20b_gating_reglist.c, + hal/power_features/cg/gm20b_gating_reglist.h, + hal/power_features/cg/gp10b_gating_reglist.c, + hal/power_features/cg/gp10b_gating_reglist.h, + hal/power_features/cg/tu104_gating_reglist.c, + hal/power_features/cg/tu104_gating_reglist.h ] + +rc: + safe: no + owner: Seema K + sources: [ hal/rc/rc_gk20a.c, + hal/rc/rc_gk20a.h, + hal/rc/rc_gv11b.c, + hal/rc/rc_gv11b.h ] + +fbpa: + safe: yes + owner: Seshendra G + gpu: dgpu + sources: [ hal/fbpa/fbpa_tu104.c, hal/fbpa/fbpa_tu104.h ] + +clk: + safe: yes + owner: Ramesh M + gpu: dgpu + sources: [ hal/clk/clk_tu104.c, + hal/clk/clk_tu104.h ] + +clk_mon: + safe: yes + owner: Ramesh M + gpu: dgpu + sources: [ hal/clk/clk_mon_tu104.c, + hal/clk/clk_mon_tu104.h, + include/nvgpu/clk_mon.h] + +clk_igpu: + safe: no + owner: Ramesh M + gpu: igpu + sources: [ hal/clk/clk_gk20a.h, + hal/clk/clk_gm20b.c, + hal/clk/clk_gm20b.h ] + +fifo: + safe: yes + owner: Seema K + children: + userd: + safe: no + sources: [ hal/fifo/userd_gk20a.c, + hal/fifo/userd_gk20a.h, + hal/fifo/userd_gv11b.c, + hal/fifo/userd_gv11b.h ] + ramfc_fusa: + safe: yes + sources: [ hal/fifo/ramin_gk20a_fusa.c, + hal/fifo/ramfc_gp10b_fusa.c, + hal/fifo/ramfc_gp10b.h, + hal/fifo/ramfc_gv11b_fusa.c, + hal/fifo/ramfc_gv11b.h ] + + ramfc: + safe: no + sources: [ hal/fifo/ramfc_gk20a.c, + hal/fifo/ramfc_gk20a.h, + hal/fifo/ramfc_gp10b.c, + hal/fifo/ramfc_tu104.c, + hal/fifo/ramfc_tu104.h ] + + ramin_fusa: + safe: yes + sources: [ hal/fifo/ramin_gv11b.h, + hal/fifo/ramin_gm20b.h, + hal/fifo/ramin_gv11b_fusa.c, + hal/fifo/ramin_gm20b_fusa.c ] + + ramin: + safe: no + sources: [ hal/fifo/ramin_gk20a.h, + hal/fifo/ramin_gp10b.h, + hal/fifo/ramin_gk20a.c, + hal/fifo/ramin_gp10b.c, + hal/fifo/ramin_tu104.c, + hal/fifo/ramin_tu104.h ] + + runlist_fusa: + safe: yes + sources: [ hal/fifo/runlist_fifo_gk20a.c, + hal/fifo/runlist_fifo_gk20a_fusa.c, + hal/fifo/runlist_fifo_gk20a.h, + hal/fifo/runlist_fifo_gv11b.c, + hal/fifo/runlist_fifo_gv11b_fusa.c, + hal/fifo/runlist_fifo_gv11b.h, + hal/fifo/runlist_ram_gv11b_fusa.c, + hal/fifo/runlist_ram_gv11b.h ] + + runlist: + safe: no + sources: [ hal/fifo/runlist_fifo_gv100.c, + hal/fifo/runlist_fifo_gv100.h, + hal/fifo/runlist_fifo_tu104.c, + hal/fifo/runlist_fifo_tu104.h, + hal/fifo/runlist_ram_gk20a.c, + hal/fifo/runlist_ram_gk20a.h, + hal/fifo/runlist_ram_tu104.c, + hal/fifo/runlist_ram_tu104.h ] + + channel_fusa: + safe: yes + sources: [ hal/fifo/channel_gk20a_fusa.c, + hal/fifo/channel_gk20a.h, + hal/fifo/channel_gm20b_fusa.c, + hal/fifo/channel_gm20b.h, + hal/fifo/channel_gv11b_fusa.c, + hal/fifo/channel_gv11b.h ] + channel: + safe: no + sources: [ hal/fifo/channel_gk20a.c, + hal/fifo/channel_gm20b.c, + hal/fifo/channel_gv100.c, + hal/fifo/channel_gv100.h ] + tsg_fusa: + safe: yes + sources: [ hal/fifo/tsg_gk20a.h, + hal/fifo/tsg_gk20a_fusa.c, + hal/fifo/tsg_gv11b.h, + hal/fifo/tsg_gv11b_fusa.c ] + + tsg: + safe: no + sources: [ hal/fifo/tsg_gk20a.c ] + + fifo_fusa: + safe: yes + sources: [ hal/fifo/fifo_intr_gk20a_fusa.c, + hal/fifo/fifo_intr_gk20a.h, + hal/fifo/fifo_intr_gv11b_fusa.c, + hal/fifo/fifo_intr_gv11b.h, + hal/fifo/ctxsw_timeout_gv11b_fusa.c, + hal/fifo/ctxsw_timeout_gv11b.h, + hal/fifo/fifo_gk20a_fusa.c, + hal/fifo/fifo_gk20a.h, + hal/fifo/fifo_gv11b_fusa.c, + hal/fifo/fifo_gv11b.h ] + fifo: + safe: no + sources: [ hal/fifo/fifo_intr_gk20a.c, + hal/fifo/fifo_intr_gv100.c, + hal/fifo/fifo_intr_gv100.h, + hal/fifo/ctxsw_timeout_gk20a.c, + hal/fifo/ctxsw_timeout_gk20a.h, + hal/fifo/mmu_fault_gk20a.c, + hal/fifo/mmu_fault_gk20a.h, + hal/fifo/mmu_fault_gm20b.c, + hal/fifo/mmu_fault_gm20b.h, + hal/fifo/mmu_fault_gp10b.c, + hal/fifo/mmu_fault_gp10b.h, + hal/fifo/fifo_gk20a.c, + hal/fifo/fifo_tu104.c, + hal/fifo/fifo_tu104.h ] + + engine_status_fusa: + safe: yes + sources: [ hal/fifo/engine_status_gm20b_fusa.c, + hal/fifo/engine_status_gm20b.h, + hal/fifo/engine_status_gv100_fusa.c, + hal/fifo/engine_status_gv100.h ] + engine_status: + safe: no + sources: [ hal/fifo/engine_status_gm20b.c ] + + engines_fusa: + safe: yes + sources: [ hal/fifo/engines_gp10b_fusa.c, + hal/fifo/engines_gp10b.h, + hal/fifo/engines_gv11b_fusa.c, + hal/fifo/engines_gv11b.h ] + + engines: + safe: no + sources: [ hal/fifo/engines_gm20b.c, + hal/fifo/engines_gm20b.h ] + + pbdma_status: + safe: yes + sources: [ hal/fifo/pbdma_status_gm20b_fusa.c, + hal/fifo/pbdma_status_gm20b.h ] + + pbdma_fusa: + safe: yes + sources: [ hal/fifo/pbdma_gm20b_fusa.c, + hal/fifo/pbdma_gm20b.h, + hal/fifo/pbdma_gp10b_fusa.c, + hal/fifo/pbdma_gp10b.h, + hal/fifo/pbdma_gv11b_fusa.c, + hal/fifo/pbdma_gv11b.h ] + + pbdma: + safe: no + sources: [ hal/fifo/pbdma_gm20b.c, + hal/fifo/pbdma_gp10b.c, + hal/fifo/pbdma_tu104.c, + hal/fifo/pbdma_tu104.h ] + + preempt_fusa: + safe: yes + sources: [ hal/fifo/preempt_gv11b_fusa.c, + hal/fifo/preempt_gv11b.h ] + + preempt: + safe: no + sources: [ hal/fifo/preempt_gk20a.c, + hal/fifo/preempt_gk20a.h ] + + usermode_fusa: + safe: yes + sources: [ hal/fifo/usermode_gv11b_fusa.c, + hal/fifo/usermode_gv11b.h ] + + usermode: + safe: no + sources: [ hal/fifo/usermode_tu104.c, + hal/fifo/usermode_tu104.h ] + +fuse_fusa: + safe: yes + owner: Seema K + sources: [ hal/fuse/fuse_gm20b_fusa.c, + hal/fuse/fuse_gm20b.h, + hal/fuse/fuse_gp10b_fusa.c, + hal/fuse/fuse_gp10b.h ] + +fuse: + safe: no + owner: Seema K + sources: [ hal/fuse/fuse_gm20b.c, + hal/fuse/fuse_gp106.c, + hal/fuse/fuse_gp106.h, + hal/fuse/fuse_tu104.c, + hal/fuse/fuse_tu104.h ] + +gsp: + safe: no + sources: [ hal/gsp/gsp_tu104.c, + hal/gsp/gsp_tu104.h ] + +mm: + safe: yes + owner: Alex W + children: + gmmu_fusa: + safe: yes + sources: [ hal/mm/gmmu/gmmu_gk20a_fusa.c, + hal/mm/gmmu/gmmu_gk20a.h, + hal/mm/gmmu/gmmu_gm20b_fusa.c, + hal/mm/gmmu/gmmu_gm20b.h, + hal/mm/gmmu/gmmu_gp10b_fusa.c, + hal/mm/gmmu/gmmu_gp10b.h, + hal/mm/gmmu/gmmu_gv11b_fusa.c, + hal/mm/gmmu/gmmu_gv11b.h ] + gmmu: + safe: no + sources: [ hal/mm/gmmu/gmmu_gk20a.c, + hal/mm/gmmu/gmmu_gm20b.c] + cache_fusa: + safe: yes + sources: [ hal/mm/cache/flush_gk20a_fusa.c, + hal/mm/cache/flush_gk20a.h, + hal/mm/cache/flush_gv11b_fusa.c, + hal/mm/cache/flush_gv11b.h ] + cache: + safe: no + sources: [ hal/mm/cache/flush_gk20a.c ] + mmu_fault: + safe: yes + sources: [ hal/mm/mmu_fault/mmu_fault_gv11b_fusa.c, + hal/mm/mmu_fault/mmu_fault_gv11b.h ] + mm_fusa: + safe: yes + sources: [ hal/mm/mm_gp10b_fusa.c, + hal/mm/mm_gp10b.h, + hal/mm/mm_gv11b_fusa.c, + hal/mm/mm_gv11b.h] + mm: + safe: no + sources: [ hal/mm/mm_gk20a.c, + hal/mm/mm_gk20a.h, + hal/mm/mm_gm20b.c, + hal/mm/mm_gm20b.h, + hal/mm/mm_gv100.c, + hal/mm/mm_gv100.h, + hal/mm/mm_tu104.c, + hal/mm/mm_tu104.h] + +sync: + safe: yes + owner: Thomas F + children: + sema: + safe: no + sources: [ hal/sync/sema_cmdbuf_gk20a.c, + hal/sync/sema_cmdbuf_gk20a.h, + hal/sync/sema_cmdbuf_gv11b.c, + hal/sync/sema_cmdbuf_gv11b.h ] + syncpt: + safe: no + sources: [ hal/sync/syncpt_cmdbuf_gk20a.c, + hal/sync/syncpt_cmdbuf_gk20a.h, + hal/sync/syncpt_cmdbuf_gv11b.c] + syncpt_fusa: + safe: yes + sources: [ hal/sync/syncpt_cmdbuf_gv11b_fusa.c, + hal/sync/syncpt_cmdbuf_gv11b.h ] + + +therm_fusa: + safe: yes + owner: Seshendra G + sources: [ hal/therm/therm_gv11b_fusa.c, + hal/therm/therm_gv11b.h ] + +therm: + safe: no + owner: Seshendra G + sources: [ hal/therm/therm_gm20b.c, + hal/therm/therm_gm20b.h, + hal/therm/therm_tu104.c, + hal/therm/therm_tu104.h, + hal/therm/therm_gp10b.c, + hal/therm/therm_gp10b.h ] + +cbc: + safe: no + owner: Seshendra G + sources: [ hal/cbc/cbc_gm20b.c, + hal/cbc/cbc_gm20b.h, + hal/cbc/cbc_gp10b.c, + hal/cbc/cbc_gp10b.h, + hal/cbc/cbc_gv11b.c, + hal/cbc/cbc_gv11b.h, + hal/cbc/cbc_tu104.c, + hal/cbc/cbc_tu104.h ] + +ce_fusa: + safe: yes + owner: Thomas F + sources: [ hal/ce/ce_gp10b_fusa.c, + hal/ce/ce_gp10b.h, + hal/ce/ce_gv11b_fusa.c, + hal/ce/ce_gv11b.h ] + +ce: + safe: no + owner: Thomas F + sources: [ hal/ce/ce2_gk20a.c, + hal/ce/ce2_gk20a.h, + hal/ce/ce_tu104.c, + hal/ce/ce_tu104.h ] + +gr: + safe: yes + owner: Deepak N + children: + ecc_fusa: + safe: yes + sources: [hal/gr/ecc/ecc_gv11b_fusa.c, + hal/gr/ecc/ecc_gv11b.h ] + ecc: + safe: no + sources: [hal/gr/ecc/ecc_gv11b.c, + hal/gr/ecc/ecc_gp10b.c, + hal/gr/ecc/ecc_gp10b.h ] + ctxsw_prog_fusa: + safe: yes + sources: [ hal/gr/ctxsw_prog/ctxsw_prog_gm20b_fusa.c, + hal/gr/ctxsw_prog/ctxsw_prog_gm20b.h, + hal/gr/ctxsw_prog/ctxsw_prog_gp10b_fusa.c, + hal/gr/ctxsw_prog/ctxsw_prog_gp10b.h, + hal/gr/ctxsw_prog/ctxsw_prog_gv11b_fusa.c, + hal/gr/ctxsw_prog/ctxsw_prog_gv11b.h ] + ctxsw_prog: + safe: no + sources: [ hal/gr/ctxsw_prog/ctxsw_prog_gm20b.c, + hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.c, + hal/gr/ctxsw_prog/ctxsw_prog_gp10b.c, + hal/gr/ctxsw_prog/ctxsw_prog_gv11b.c] + config_fusa: + safe: yes + sources: [ hal/gr/config/gr_config_gm20b_fusa.c, + hal/gr/config/gr_config_gm20b.h, + hal/gr/config/gr_config_gv100_fusa.c, + hal/gr/config/gr_config_gv100.h ] + + config: + safe: no + sources: [ hal/gr/config/gr_config_gm20b.c ] + + init_fusa: + safe: yes + sources: [ hal/gr/init/gr_init_gm20b_fusa.c, + hal/gr/init/gr_init_gm20b.h, + hal/gr/init/gr_init_gp10b_fusa.c, + hal/gr/init/gr_init_gp10b.h, + hal/gr/init/gr_init_gv11b_fusa.c, + hal/gr/init/gr_init_gv11b.h ] + init: + safe: no + sources: [ hal/gr/init/gr_init_gm20b.c, + hal/gr/init/gr_init_gp10b.c, + hal/gr/init/gr_init_gv100.c, + hal/gr/init/gr_init_gv100.h, + hal/gr/init/gr_init_gv11b.c, + hal/gr/init/gr_init_tu104.c, + hal/gr/init/gr_init_tu104.h ] + intr_fusa: + safe: yes + sources: [ hal/gr/intr/gr_intr_gm20b_fusa.c, + hal/gr/intr/gr_intr_gm20b.h, + hal/gr/intr/gr_intr_gp10b_fusa.c, + hal/gr/intr/gr_intr_gp10b.h, + hal/gr/intr/gr_intr_gv11b_fusa.c, + hal/gr/intr/gr_intr_gv11b.h ] + intr: + safe: no + sources: [ hal/gr/intr/gr_intr_gm20b.c, + hal/gr/intr/gr_intr_gp10b.c, + hal/gr/intr/gr_intr_tu104.c, + hal/gr/intr/gr_intr_tu104.h ] + falcon_fusa: + safe: yes + sources: [ hal/gr/falcon/gr_falcon_gm20b_fusa.c, + hal/gr/falcon/gr_falcon_gm20b.h, + hal/gr/falcon/gr_falcon_gp10b_fusa.c, + hal/gr/falcon/gr_falcon_gp10b.h, + hal/gr/falcon/gr_falcon_gv11b_fusa.c, + hal/gr/falcon/gr_falcon_gv11b.h ] + falcon: + safe: no + sources: [ hal/gr/falcon/gr_falcon_gm20b.c, + hal/gr/falcon/gr_falcon_tu104.c, + hal/gr/falcon/gr_falcon_tu104.h ] + fecs_trace: + safe: no + sources: [ hal/gr/fecs_trace/fecs_trace_gm20b.c, + hal/gr/fecs_trace/fecs_trace_gm20b.h, + hal/gr/fecs_trace/fecs_trace_gm20b.h, + hal/gr/fecs_trace/fecs_trace_gv11b.c, + hal/gr/fecs_trace/fecs_trace_gv11b.h ] + hwpm_map: + safe: no + sources: [ hal/gr/hwpm_map/hwpm_map_gv100.c, + hal/gr/hwpm_map/hwpm_map_gv100.h ] + zbc: + safe: no + sources: [ hal/gr/zbc/zbc_gm20b.c, + hal/gr/zbc/zbc_gm20b.h, + hal/gr/zbc/zbc_gp10b.c, + hal/gr/zbc/zbc_gp10b.h, + hal/gr/zbc/zbc_gv11b.c, + hal/gr/zbc/zbc_gv11b.h ] + zcull: + safe: no + sources: [ hal/gr/zcull/zcull_gm20b.c, + hal/gr/zcull/zcull_gm20b.h, + hal/gr/zcull/zcull_gv11b.c, + hal/gr/zcull/zcull_gv11b.h ] + gr: + safe: no + sources: [ hal/gr/gr/gr_gk20a.c, hal/gr/gr/gr_gk20a.h, + hal/gr/gr/gr_gm20b.c, hal/gr/gr/gr_gm20b.h, + hal/gr/gr/gr_gp10b.c, hal/gr/gr/gr_gp10b.h, + hal/gr/gr/gr_gv100.c, hal/gr/gr/gr_gv100.h, + hal/gr/gr/gr_gv11b.c, hal/gr/gr/gr_gv11b.h, + hal/gr/gr/gr_tu104.c, hal/gr/gr/gr_tu104.h, + include/nvgpu/gr/warpstate.h, + hal/gr/gr/gr_pri_gk20a.h, + hal/gr/gr/gr_pri_gv11b.h ] + +regops: + safe: no + owner: Deepak N + sources: [ hal/regops/regops_gm20b.c, + hal/regops/regops_gm20b.h, + hal/regops/regops_gp10b.c, + hal/regops/regops_gp10b.h, + hal/regops/regops_gv11b.c, + hal/regops/regops_gv11b.h, + hal/regops/regops_tu104.c, + hal/regops/regops_tu104.h, + hal/regops/allowlist_gv11b.c, + hal/regops/allowlist_gv11b.h, + hal/regops/allowlist_tu104.c, + hal/regops/allowlist_tu104.h ] + +falcon_fusa: + safe: yes + owner: Sagar K + sources: [ hal/falcon/falcon_gk20a_fusa.c, + hal/falcon/falcon_gk20a.h ] + +falcon: + safe: no + owner: Sagar K + sources: [ hal/falcon/falcon_gk20a.c ] + +mc_fusa: + safe: yes + owner: Seema K + sources: [ hal/mc/mc_gm20b_fusa.c, + hal/mc/mc_gm20b.h, + hal/mc/mc_gp10b_fusa.c, + hal/mc/mc_gp10b.h, + hal/mc/mc_gv11b_fusa.c, + hal/mc/mc_gv11b.h ] + +mc: + safe: no + owner: Seema K + sources: [ hal/mc/mc_gm20b.c, + hal/mc/mc_gv100.c, + hal/mc/mc_gv100.h, + hal/mc/mc_tu104.c, + hal/mc/mc_tu104.h ] + +fb_fusa: + safe: yes + owner: Seshendra G + sources: [ include/nvgpu/gops/fb.h, + hal/fb/fb_gm20b_fusa.c, + hal/fb/fb_gm20b.h, + hal/fb/fb_gv11b_fusa.c, + hal/fb/fb_gv11b.h, + hal/fb/ecc/fb_ecc_gv11b.h, hal/fb/ecc/fb_ecc_gv11b_fusa.c, + hal/fb/intr/fb_intr_gv11b.h, hal/fb/intr/fb_intr_gv11b_fusa.c, + hal/fb/fb_mmu_fault_gv11b.h, hal/fb/fb_mmu_fault_gv11b_fusa.c, + hal/fb/intr/fb_intr_ecc_gv11b.h, hal/fb/intr/fb_intr_ecc_gv11b_fusa.c ] + +fb: + safe: no + owner: Seshendra G + sources: [ hal/fb/fb_gm20b.c, + hal/fb/fb_gp106.c, hal/fb/fb_gp106.h, + hal/fb/fb_gp10b.c, hal/fb/fb_gp10b.h, + hal/fb/fb_gv100.c, hal/fb/fb_gv100.h, + hal/fb/fb_gv11b.c, + hal/fb/fb_tu104.c, hal/fb/fb_tu104.h, + hal/fb/intr/fb_intr_gv100.h, hal/fb/intr/fb_intr_gv100.c, + hal/fb/fb_mmu_fault_tu104.h, hal/fb/fb_mmu_fault_tu104.c, + hal/fb/intr/fb_intr_tu104.c, hal/fb/intr/fb_intr_tu104.h, + hal/fb/intr/fb_intr_ecc_gv11b.c ] + +pmu_fusa: + safe: yes + owner: Mahantesh K + sources: [ hal/pmu/pmu_gk20a_fusa.c, + hal/pmu/pmu_gk20a.h, + hal/pmu/pmu_gv11b_fusa.c, + hal/pmu/pmu_gv11b.h ] + +pmu: + safe: no + owner: Mahantesh K + sources: [ hal/pmu/pmu_gk20a.c, + hal/pmu/pmu_gm20b.c, + hal/pmu/pmu_gm20b.h, + hal/pmu/pmu_gp10b.c, + hal/pmu/pmu_gp10b.h, + hal/pmu/pmu_gv11b.c, + hal/pmu/pmu_tu104.c, + hal/pmu/pmu_tu104.h ] + +nvlink: + safe: yes + gpu: dgpu + sources: [ hal/nvlink/link_mode_transitions_gv100.c, + hal/nvlink/link_mode_transitions_gv100.h, + hal/nvlink/link_mode_transitions_tu104.c, + hal/nvlink/link_mode_transitions_tu104.h, + hal/nvlink/intr_and_err_handling_tu104.c, + hal/nvlink/intr_and_err_handling_tu104.h, + hal/nvlink/nvlink_gv100.c, + hal/nvlink/nvlink_gv100.h, + hal/nvlink/nvlink_tu104.c, + hal/nvlink/nvlink_tu104.h, + hal/nvlink/minion_gv100.c, + hal/nvlink/minion_gv100.h, + hal/nvlink/minion_tu104.c, + hal/nvlink/minion_tu104.h ] + +sec2: + safe: yes + owner: Sagar K + gpu: dgpu + sources: [ hal/sec2/sec2_tu104.c, + hal/sec2/sec2_tu104.h ] + +netlist_fusa: + safe: yes + owner: Seshendra G + gpu: both + sources: [ include/nvgpu/gops/netlist.h, + hal/netlist/netlist_gv11b_fusa.c, + hal/netlist/netlist_gv11b.h ] + +netlist: + safe: no + owner: Seshendra G + gpu: both + sources: [ hal/netlist/netlist_gm20b.c, + hal/netlist/netlist_gm20b.h, + hal/netlist/netlist_gp10b.c, + hal/netlist/netlist_gp10b.h, + hal/netlist/netlist_gv100.c, + hal/netlist/netlist_gv100.h, + hal/netlist/netlist_tu104.c, + hal/netlist/netlist_tu104.h ] + +nvdec: + safe: no + sources: [ hal/nvdec/nvdec_gp106.c, + hal/nvdec/nvdec_gp106.h, + hal/nvdec/nvdec_tu104.c, + hal/nvdec/nvdec_tu104.h ] + +perf: + safe: no + owner: Deepak N + sources: [ hal/perf/perf_gm20b.c, + hal/perf/perf_gm20b.h, + hal/perf/perf_gv11b.c, + hal/perf/perf_gv11b.h, + hal/perf/perf_tu104.c, + hal/perf/perf_tu104.h ] + +pramin: + safe: yes + gpu: dgpu + sources: [ hal/pramin/pramin_gp10b.c, + hal/pramin/pramin_gp10b.h, + hal/pramin/pramin_gv100.c, + hal/pramin/pramin_gv100.h, + hal/pramin/pramin_init.c, + hal/pramin/pramin_init.h, + hal/pramin/pramin_tu104.c, + hal/pramin/pramin_tu104.h ] + +class_fusa: + safe: yes + owner: Seshendra G + sources: [ hal/class/class_gm20b.h, + hal/class/class_gp10b.h, + hal/class/class_gv11b_fusa.c, + hal/class/class_gv11b.h ] + +class: + safe: no + owner: Seshendra G + sources: [ hal/class/class_gm20b.c, + hal/class/class_gp10b.c, + hal/class/class_tu104.c, + hal/class/class_tu104.h ] + +func: + safe: yes + owner: Terje B + gpu: dgpu + sources: [ hal/func/func_tu104.c, + hal/func/func_tu104.h ] + +top_fusa: + safe: yes + owner: Tejal K + sources: [ include/nvgpu/gops/top.h, + hal/top/top_gm20b_fusa.c, + hal/top/top_gm20b.h, + hal/top/top_gp10b.h, + hal/top/top_gv11b_fusa.c, + hal/top/top_gv11b.h ] + +top: + safe: no + owner: Tejal K + sources: [ hal/top/top_gm20b.c, + hal/top/top_gp106.c, + hal/top/top_gp106.h, + hal/top/top_gp10b.c, + hal/top/top_gv100.c, + hal/top/top_gv100.h ] + +bios: + safe: yes + owner: Tejal + gpu: dgpu + sources: [ hal/bios/bios_tu104.c, + hal/bios/bios_tu104.h ] + +xve: + safe: yes + owner: Alex W + gpu: dgpu + sources: [ include/nvgpu/xve.h, + hal/xve/xve_gp106.c, + hal/xve/xve_gp106.h, + hal/xve/xve_tu104.c, + hal/xve/xve_tu104.h ] + +tpc: + safe: no + owner: Divya S + sources: [ hal/tpc/tpc_gv11b.c, + hal/tpc/tpc_gv11b.h ] + +cic: + safe: yes + owner: Tejal K + sources: [ hal/cic/cic_gv11b_fusa.c, + hal/cic/cic_lut_gv11b_fusa.c, + hal/cic/cic_gv11b.h ] diff --git a/arch/nvgpu-hal-vgpu.yaml b/arch/nvgpu-hal-vgpu.yaml new file mode 100644 index 000000000..67577c733 --- /dev/null +++ b/arch/nvgpu-hal-vgpu.yaml @@ -0,0 +1,29 @@ +# Copyright (c) 2019-2021, NVIDIA CORPORATION. All Rights Reserved. +# +# VGPU HAL units. +# + +init: + safe: no + owner: Aparna D + sources: [ hal/vgpu/init/init_hal_vgpu.c, + hal/vgpu/init/init_hal_vgpu.h, + hal/vgpu/init/vgpu_hal_gv11b.c, + hal/vgpu/init/vgpu_hal_gv11b.h ] + +fifo: + safe : no + owner: Aparna D + children: + fifo: + safe: no + sources: [ hal/vgpu/fifo/fifo_gv11b_vgpu.c, + hal/vgpu/fifo/fifo_gv11b_vgpu.h ] + +sync: + safe: no + owner: Aparna D + children: + syncpt: + sources: [ hal/vgpu/sync/syncpt_cmdbuf_gv11b_vgpu.c, + hal/vgpu/sync/syncpt_cmdbuf_gv11b_vgpu.h ] diff --git a/arch/nvgpu-hal.yaml b/arch/nvgpu-hal.yaml new file mode 100644 index 000000000..d2753fd19 --- /dev/null +++ b/arch/nvgpu-hal.yaml @@ -0,0 +1,10 @@ +# Copyright (c) 2019-2020, NVIDIA CORPORATION. All Rights Reserved. +# +# HAL units. These are the units that have access to HW. +# + +init: + safe: yes + owner: Terje B + sources: [ include/nvgpu/hal_init.h ] + diff --git a/arch/nvgpu-interface.yaml b/arch/nvgpu-interface.yaml new file mode 100644 index 000000000..b5a25be9a --- /dev/null +++ b/arch/nvgpu-interface.yaml @@ -0,0 +1,132 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All Rights Reserved. +# +# OS interface units and utilities. Often represented by simply a header file. +# + +atomic: + safe: yes + sources: [ include/nvgpu/atomic.h ] + +barrier: + safe: yes + sources: [ include/nvgpu/barrier.h ] + +bitops: + safe: yes + sources: [ include/nvgpu/bitops.h ] + +bsearch: + safe: yes + sources: [ include/nvgpu/bsearch.h ] + +bug: + safe: yes + sources: [ include/nvgpu/bug.h ] + +circ_buf: + safe: yes + sources: [ include/nvgpu/circ_buf.h ] + +cond: + safe: yes + sources: [ include/nvgpu/cond.h ] + +dma: + safe: yes + sources: [ include/nvgpu/dma.h ] + +# This one is likely not structured correctly. +dt: + safe: yes + sources: [ include/nvgpu/dt.h ] + +# Also a problem. +errno: + safe: yes + sources: [ include/nvgpu/errno.h ] + +firmware: + safe: yes + sources: [ include/nvgpu/firmware.h ] + +fuse: + safe: yes + sources: [ include/nvgpu/fuse.h ] + +io: + safe: yes + sources: [ include/nvgpu/io.h, include/nvgpu/io_usermode.h ] + +kmem: + safe: yes + sources: [ include/nvgpu/kmem.h ] + +kref: + safe: yes + sources: [ include/nvgpu/kref.h ] + +list: + safe: yes + sources: [ include/nvgpu/list.h ] + tags: unit-testable + +lock: + safe: yes + sources: [ include/nvgpu/lock.h ] + +log: + safe: yes + sources: [ include/nvgpu/log.h, include/nvgpu/log_common.h ] + +log2: + safe: yes + sources: [ include/nvgpu/log2.h ] + +rwsem: + safe: yes + sources: [ include/nvgpu/rwsem.h ] + +sizes: + safe: yes + sources: [ include/nvgpu/sizes.h ] + +soc: + safe: yes + sources: [ include/nvgpu/soc.h ] + +sort: + safe: yes + sources: [ include/nvgpu/sort.h ] + +string: + safe: yes + sources: [ include/nvgpu/string.h ] + tags: unit-testable + +thread: + safe: yes + sources: [ include/nvgpu/thread.h ] + +timers: + safe: yes + sources: [ include/nvgpu/timers.h ] + +types: + safe: yes + sources: [ include/nvgpu/types.h ] + +nvgpu_sgt: + safe: yes + sources: [ include/nvgpu/nvgpu_sgt_os.h ] + +os_sched: + safe: yes + sources: [ include/nvgpu/os_sched.h ] + +nvhost: + safe: yes + sources: [ include/nvgpu/nvhost.h ] + +vpr: + safe: no + sources: [ include/nvgpu/vpr.h ] diff --git a/arch/nvgpu-linux.yaml b/arch/nvgpu-linux.yaml new file mode 100644 index 000000000..8beff0201 --- /dev/null +++ b/arch/nvgpu-linux.yaml @@ -0,0 +1,256 @@ +# +# Copyright (c) 2019-2021, NVIDIA CORPORATION. All Rights Reserved. +# +# Linux elements and units in nvgpu. +# +# The safe: tag is ommited through out since all Linux units are by definition +# not safe. +# +# I also have not put a huge amount of thought into this since none of this +# code is "safe" code. Nor are we planning on spending a lot of effort to +# clean this up. At least not yet. + +bsearch: + sources: [ os/linux/bsearch.c ] + +channel: + sources: [ os/linux/linux-channel.c, os/linux/channel.h ] + +vpr: + sources: [ os/linux/vpr.c ] + +clk: + sources: [ os/linux/clk.c, os/linux/clk.h ] + +cde: + sources: [ os/linux/cde.c, os/linux/cde.h, + os/linux/cde_gm20b.c, os/linux/cde_gm20b.h, + os/linux/cde_gp10b.c, os/linux/cde_gp10b.h ] + +comptags: + sources: [ os/linux/comptags.c ] + +cond: + sources: [ os/linux/cond.c ] + +dma: + sources: [ os/linux/linux-dma.c ] + +dmabuf: + sources: [ os/linux/dmabuf_vidmem.c, os/linux/dmabuf_vidmem.h, + os/linux/dmabuf_priv.c, os/linux/dmabuf_priv.h ] + +driver_common: + sources: [ os/linux/driver_common.c, os/linux/driver_common.h ] + +dt: + sources: [ os/linux/dt.c ] + +debug: + sources: [ os/linux/debug.c, + os/linux/debug_allocator.c, + os/linux/debug_allocator.h, + os/linux/debug_bios.c, + os/linux/debug_bios.h, + os/linux/debug_cde.c, + os/linux/debug_cde.h, + os/linux/debug_ce.c, + os/linux/debug_ce.h, + os/linux/debug_clk_gm20b.c, + os/linux/debug_clk_gm20b.h, + os/linux/debug_clk_tu104.c, + os/linux/debug_clk_tu104.h, + os/linux/debug_fecs_trace.c, + os/linux/debug_fecs_trace.h, + os/linux/debug_fifo.c, + os/linux/debug_fifo.h, + os/linux/debug_gr.c, + os/linux/debug_gr.h, + os/linux/debug_hal.c, + os/linux/debug_hal.h, + os/linux/debug_kmem.c, + os/linux/debug_kmem.h, + os/linux/debug_ltc.c, + os/linux/debug_ltc.h, + os/linux/debug_pmgr.c, + os/linux/debug_pmgr.h, + os/linux/debug_pmu.c, + os/linux/debug_pmu.h, + os/linux/debug_sched.c, + os/linux/debug_sched.h, + os/linux/debug_therm_tu104.c, + os/linux/debug_therm_tu104.h, + os/linux/debug_xve.c, + os/linux/debug_xve.h, + os/linux/debug_s_param.c, + os/linux/debug_s_param.h, + os/linux/debug_volt.c, + os/linux/debug_volt.h, + os/linux/swprofile_debugfs.c, + os/linux/swprofile_debugfs.h, + os/linux/fecs_trace_linux.c, + os/linux/fecs_trace_linux.h, + os/linux/nvlink_probe.c ] + +firmware: + sources: [ os/linux/firmware.c ] + +fuse: + sources: [ os/linux/fuse.c ] + +intr: + sources: [ os/linux/intr.c ] + +io: + sources: [ os/linux/io_usermode.c, + os/linux/linux-io.c ] + +ioctl: + sources: [ os/linux/ioctl.c, + os/linux/ioctl.h, + os/linux/ioctl_as.c, + os/linux/ioctl_as.h, + os/linux/ioctl_channel.c, + os/linux/ioctl_channel.h, + os/linux/ioctl_clk_arb.c, + os/linux/ioctl_ctrl.c, + os/linux/ioctl_ctrl.h, + os/linux/ioctl_dbg.c, + os/linux/ioctl_dbg.h, + os/linux/ioctl_prof.c, + os/linux/ioctl_prof.h, + os/linux/ioctl_tsg.c, + os/linux/ioctl_tsg.h, + os/linux/power_ops.c, + os/linux/power_ops.h ] + +kmem: + sources: [ os/linux/kmem.c, os/linux/kmem_priv.h ] + +log: + sources: [ os/linux/log.c ] + +module: + sources: [ os/linux/module.c, os/linux/module.h, + os/linux/module_usermode.c, os/linux/module_usermode.h ] + +nvgpu_mem: + sources: [ os/linux/nvgpu_mem.c ] + +nvhost: + sources: [ os/linux/nvhost.c, + os/linux/nvhost_common.c, + os/linux/nvhost_host1x.c, + os/linux/nvhost_priv.h ] + +nvlink: + sources: [ os/linux/nvlink.c, + os/linux/nvlink.h ] + +fence: + sources: [ os/linux/os_fence_android.c, + os/linux/os_fence_android_sema.c, + os/linux/os_fence_dma.c, + os/linux/os_fence_dma_sema.c, + os/linux/os_fence_syncpt.c, + os/linux/os_fence_priv.h, + os/linux/sync_sema_android.c, + os/linux/sync_sema_android.h, + os/linux/sync_sema_dma.c, + os/linux/sync_sema_dma.h, + include/nvgpu/os_fence.h, + include/nvgpu/os_fence_semas.h, + include/nvgpu/os_fence_syncpts.h ] + +ops: + sources: [ os/linux/os_ops.c, os/linux/os_ops.h, + os/linux/os_ops_gm20b.c, os/linux/os_ops_gm20b.h, + os/linux/os_ops_gp10b.c, os/linux/os_ops_gp10b.h, + os/linux/os_ops_gv100.c, os/linux/os_ops_gv100.h, + os/linux/os_ops_gv11b.c, os/linux/os_ops_gv11b.h, + os/linux/os_ops_tu104.c, os/linux/os_ops_tu104.h ] + +pci: + sources: [ os/linux/pci.c, + os/linux/pci.h, + os/linux/pci_power.c, + os/linux/pci_power.h, + include/nvgpu/pci.h ] + +platform: + sources: [ os/linux/platform_gk20a.h, + os/linux/platform_gk20a_tegra.c, + os/linux/platform_gk20a_tegra.h, + os/linux/platform_gp10b.h, + os/linux/platform_gp10b_tegra.c, + os/linux/platform_gv11b_tegra.c ] + +rwsem: + sources: [ os/linux/rwsem.c ] + +scale: + sources: [os/linux/scale.c, os/linux/scale.h ] + +sched: + sources: [ os/linux/os_sched.c, + os/linux/sched.c, + os/linux/sched.h ] + +sim: + sources: [ os/linux/sim.c, os/linux/sim_pci.c ] + +soc: + sources: [ os/linux/soc.c ] + +sysfs: + sources: [ os/linux/sysfs.c, os/linux/sysfs.h, + os/linux/ecc_sysfs.c ] + +thread: + sources: [ os/linux/thread.c ] + +timers: + sources: [ os/linux/timers.c ] + +vgpu: + sources: [ os/linux/vgpu/fecs_trace_vgpu_linux.c, + os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c, + os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.h, + os/linux/vgpu/platform_vgpu_tegra.c, + os/linux/vgpu/platform_vgpu_tegra.h, + os/linux/vgpu/sysfs_vgpu.c, + os/linux/vgpu/vgpu_ivc.c, + os/linux/vgpu/vgpu_ivm.c, + os/linux/vgpu/vgpu_linux.c, + os/linux/vgpu/vgpu_linux.h ] + +vm: + sources: [ os/linux/vm.c ] + +cic: + sources: [ os/linux/cic/cic_stub.c ] + +# Group all the Linux headers for now. +headers: + sources: [ include/nvgpu/linux/atomic.h, + include/nvgpu/linux/barrier.h, + include/nvgpu/linux/cond.h, + include/nvgpu/linux/dma.h, + include/nvgpu/linux/kmem.h, + include/nvgpu/linux/lock.h, + include/nvgpu/linux/bitops.h, + include/nvgpu/linux/nvgpu_mem.h, + include/nvgpu/linux/os_fence_android.h, + include/nvgpu/linux/os_fence_dma.h, + include/nvgpu/linux/rwsem.h, + include/nvgpu/linux/sim.h, + include/nvgpu/linux/sim_pci.h, + include/nvgpu/linux/soc_fuse.h, + include/nvgpu/linux/thread.h, + include/nvgpu/linux/log.h, + include/nvgpu/linux/utils.h, + include/nvgpu/linux/vm.h ] + +# An extra unit to lump all the unclassified Linux files. +extra: + sources: [ os/linux/os_linux.h ] diff --git a/arch/nvgpu-posix.yaml b/arch/nvgpu-posix.yaml new file mode 100644 index 000000000..2755be068 --- /dev/null +++ b/arch/nvgpu-posix.yaml @@ -0,0 +1,171 @@ +# Copyright (c) 2019-2021, NVIDIA CORPORATION. All Rights Reserved. +# +# POSIX elements and units in nvgpu. +# + +# TODO: break this up into individual units. Will be critical for re-use in +# QNX. +all: + safe: no + owner: Alex W + sources: [ os/posix/error_notifier.c, + os/posix/firmware.c, + os/posix/fuse.c, + os/posix/gk20a.c, + os/posix/log.c, + os/posix/nvgpu.c, + os/posix/os_posix.h, + os/posix/posix-channel.c, + os/posix/posix-comptags.c, + os/posix/posix-dma.c, + os/posix/posix-fault-injection.c, + os/posix/posix-nvgpu_mem.c, + os/posix/posix-tsg.c, + os/posix/posix-vm.c, + os/posix/soc.c, + os/posix/bsearch.c, + os/posix/posix-clk_arb.c, + os/posix/posix-dt.c, + os/posix/posix-io.c, + os/posix/posix-nvhost.c, + os/posix/posix-nvlink.c, + os/posix/posix-vgpu.c, + os/posix/posix-vidmem.c, + os/posix/fecs_trace_posix.c, + os/posix/stubs.c, + os/posix/posix-vpr.c, + os/posix/mock-registers.c ] + +headers: + safe: no + owner: Alex W + sources: [ include/nvgpu/posix/barrier.h, + include/nvgpu/posix/dma.h, + include/nvgpu/posix/io.h, + include/nvgpu/posix/mock-regs.h, + include/nvgpu/posix/log.h, + include/nvgpu/posix/nvgpu_mem.h, + include/nvgpu/posix/pci.h, + include/nvgpu/posix/posix-fault-injection.h, + include/nvgpu/posix/posix-channel.h, + include/nvgpu/posix/probe.h, + include/nvgpu/posix/soc_fuse.h, + include/nvgpu/posix/vm.h, + include/nvgpu/posix/posix_vidmem.h, + include/nvgpu/posix/posix-nvhost.h, + include/nvgpu/posix/trace_gk20a.h ] + +queue: + safe: yes + owner: Rajesh D + sources: [ os/posix/queue.c, + include/nvgpu/posix/queue.h ] + +sort: + safe: no + owner: Ajesh K + sources: [ include/nvgpu/posix/sort.h ] + deps: + +bug: + safe: yes + owner: Ajesh K + sources: [ os/posix/bug.c, + include/nvgpu/posix/bug.h ] + deps: + +lock: + safe: yes + owner: Ajesh K + sources: [ os/posix/lock.c, + include/nvgpu/posix/lock.h ] + +rwsem: + safe: yes + owner: Ajesh K + sources: [ os/posix/rwsem.c, + include/nvgpu/posix/rwsem.h ] + deps: + +size: + safe: yes + owner: Ajesh K + sources: [ include/nvgpu/posix/sizes.h ] + deps: + +cond: + safe: yes + owner: Ajesh K + sources: [ os/posix/cond.c, + include/nvgpu/posix/cond.h ] + deps: + +threads: + safe: yes + owner: Ajesh K + sources: [ os/posix/thread.c, + include/nvgpu/posix/thread.h ] + deps: + +timers: + safe: yes + owner: Ajesh K + sources: [ os/posix/timers.c, + include/nvgpu/posix/timers.h ] + deps: + +atomic: + safe: yes + owner: Ajesh K + sources: [ include/nvgpu/posix/atomic.h ] + deps: + +os_sched: + safe: yes + owner: Ajesh K + sources: [ os/posix/os_sched.c ] + +kmem: + safe: yes + owner: Ajesh K + sources: [ os/posix/kmem.c, + include/nvgpu/posix/kmem.h ] + deps: + +types: + safe: yes + owner: Ajesh K + sources: [ include/nvgpu/posix/types.h ] + deps: + +utils: + safe: yes + owner: Ajesh K + sources: [ include/nvgpu/posix/utils.h ] + deps: + +bitops: + safe: yes + owner: Ajesh K + sources: [ os/posix/bitmap.c, + include/nvgpu/posix/bitops.h ] + deps: + +file_ops: + safe: yes + owner: Prateek S + sources: [ os/posix/file_ops.c, + include/nvgpu/posix/file_ops.h ] + deps: + +log2: + safe: yes + owner: Ajesh K + sources: [ include/nvgpu/posix/log2.h ] + deps: + +circ_buf: + safe: yes + owner: Ajesh K + sources: [ include/nvgpu/posix/circ_buf.h ] + deps: diff --git a/arch/nvgpu-vgpu.yaml b/arch/nvgpu-vgpu.yaml new file mode 100644 index 000000000..c52f10029 --- /dev/null +++ b/arch/nvgpu-vgpu.yaml @@ -0,0 +1,78 @@ +# Copyright (c) 2019-2021, NVIDIA CORPORATION. All Rights Reserved. +# +# vGPU architecture: currently there hasn't been much work done on +# decomposing the vGPU architecture so all of the vGPU files are simply +# grouped into one super unit. +# +# TODO: Fix this !?!? Seems rather important given that this is the +# target for safety! +# + +all: + safe: no + owner: Aparna D + sources: [ common/vgpu/cbc/cbc_vgpu.c, + common/vgpu/cbc/cbc_vgpu.h, + common/vgpu/ce_vgpu.c, + common/vgpu/clk_vgpu.c, + common/vgpu/clk_vgpu.h, + common/vgpu/debugger_vgpu.c, + common/vgpu/debugger_vgpu.h, + common/vgpu/pm_reservation_vgpu.c, + common/vgpu/pm_reservation_vgpu.h, + common/vgpu/ecc_vgpu.c, + common/vgpu/ecc_vgpu.h, + common/vgpu/top/top_vgpu.c, + common/vgpu/top/top_vgpu.h, + common/vgpu/fifo/fifo_vgpu.c, + common/vgpu/fifo/fifo_vgpu.h, + common/vgpu/fifo/channel_vgpu.c, + common/vgpu/fifo/channel_vgpu.h, + common/vgpu/fifo/tsg_vgpu.c, + common/vgpu/fifo/tsg_vgpu.h, + common/vgpu/fifo/preempt_vgpu.c, + common/vgpu/fifo/preempt_vgpu.h, + common/vgpu/fifo/ramfc_vgpu.c, + common/vgpu/fifo/ramfc_vgpu.h, + common/vgpu/fifo/runlist_vgpu.c, + common/vgpu/fifo/runlist_vgpu.h, + common/vgpu/fifo/userd_vgpu.c, + common/vgpu/fifo/userd_vgpu.h, + common/vgpu/gr/ctx_vgpu.c, + common/vgpu/gr/ctx_vgpu.h, + common/vgpu/gr/fecs_trace_vgpu.c, + common/vgpu/gr/fecs_trace_vgpu.h, + common/vgpu/gr/gr_vgpu.c, + common/vgpu/gr/gr_vgpu.h, + common/vgpu/gr/subctx_vgpu.c, + common/vgpu/gr/subctx_vgpu.h, + common/vgpu/ltc/ltc_vgpu.c, + common/vgpu/ltc/ltc_vgpu.h, + common/vgpu/mm/mm_vgpu.c, + common/vgpu/mm/mm_vgpu.h, + common/vgpu/mm/vm_vgpu.c, + common/vgpu/perf/cyclestats_snapshot_vgpu.c, + common/vgpu/perf/cyclestats_snapshot_vgpu.h, + common/vgpu/perf/perf_vgpu.c, + common/vgpu/perf/perf_vgpu.h, + common/vgpu/fbp/fbp_vgpu.c, + common/vgpu/fbp/fbp_vgpu.h, + common/vgpu/fb/fb_vgpu.c, + common/vgpu/fb/fb_vgpu.h, + common/vgpu/intr/intr_vgpu.c, + common/vgpu/intr/intr_vgpu.h, + common/vgpu/ivc/comm_vgpu.c, + common/vgpu/ivc/comm_vgpu.h, + common/vgpu/ptimer/ptimer_vgpu.c, + common/vgpu/ptimer/ptimer_vgpu.h, + common/vgpu/init/init_vgpu.c, + common/vgpu/init/init_vgpu.h, + common/vgpu/profiler/profiler_vgpu.c, + common/vgpu/profiler/profiler_vgpu.h, + include/nvgpu/vgpu/tegra_vgpu.h, + include/nvgpu/vgpu/vgpu.h, + include/nvgpu/vgpu/vgpu_ivc.h, + include/nvgpu/vgpu/vgpu_ivm.h, + include/nvgpu/vgpu/ce_vgpu.h, + include/nvgpu/vgpu/vm_vgpu.h, + include/nvgpu/vgpu/os_init_hal_vgpu.h ] diff --git a/arch/nvgpu.yaml b/arch/nvgpu.yaml new file mode 100644 index 000000000..2dca53bac --- /dev/null +++ b/arch/nvgpu.yaml @@ -0,0 +1,69 @@ +# Copyright (c) 2019-2020, NVIDIA CORPORATION. All Rights Reserved. +# +# Top level NVGPU architecure description in YAML. +# +# The format of this document is structured by element and unit. Though only +# units may have source files associated with them. All units must exist in an +# element. +# + +nvgpu: + safe: no + children: + # The common code element. This has gr, mm, etc. + common: + safe: no + children: + !include nvgpu-common.yaml nvgpu-next-common.yaml + + # HAL units - Hardware Abstraction Layer. + hal: + safe: no + children: + !include nvgpu-hal-new.yaml nvgpu-next-hal.yaml + + # The QNX OS layer implementation units. + qnx: + safe: yes + children: + !include nvgpu-qnx.yaml + + # And the Linux version of the OS implementation units. + linux: + safe: no + children: + !include nvgpu-linux.yaml nvgpu-next-linux.yaml + + # POSIX units for implementing the OS layer for unit testing. + posix: + children: + !include nvgpu-posix.yaml + + + # Inteface units - these provide interfaces for NVGPU to the underlying + # OS or CPU. + interface: + safe: yes + owner: Alex W + children: + !include nvgpu-interface.yaml + + # Virtualization code. + vgpu: + safe: yes + children: + !include nvgpu-vgpu.yaml + + # Virtualization HAL code. + hal-vgpu: + safe: yes + children: + !include nvgpu-hal-vgpu.yaml nvgpu-next-hal-vgpu.yaml + + # A meta-element for the GPU HW. A good example of this is the HW headers. + # This is not code we write in nvgpu, but we import it from the GPU HW + # tree (with a little tranformation - the gen reg scrip). + gpu_hw: + safe: no + children: + !include nvgpu-gpu_hw.yaml nvgpu-next-gpu_hw.yaml diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile new file mode 100644 index 000000000..1a13b0983 --- /dev/null +++ b/drivers/gpu/Makefile @@ -0,0 +1,2 @@ +# Choose this option if you have an SoC with integrated Nvidia GPU IP. +obj-m += nvgpu/ diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile new file mode 100644 index 000000000..56522b1a5 --- /dev/null +++ b/drivers/gpu/nvgpu/Makefile @@ -0,0 +1,788 @@ +GCOV_PROFILE := y + +# When building NVGPU as an external module, srctree.nvgpu is not +# defined. Given that NVGPU knows where its header files are located +# relatively, we can use the path information of this Makefile to +# set srctree.nvgpu if it is not already defined. Please note that +# 'lastword $(MAKEFILE_LIST)' refers to this Makefile. +srctree.nvgpu ?= $(abspath $(shell dirname $(lastword $(MAKEFILE_LIST)))/../../..) + +ccflags-y += -I$(srctree.nvgpu)/drivers/gpu/nvgpu/include +ccflags-y += -I$(srctree.nvgpu)/drivers/gpu/nvgpu +ccflags-y += -I$(srctree.nvgpu)/include +ccflags-y += -I$(srctree.nvgpu)/include/uapi + +ccflags-y += -I$(srctree)/drivers/devfreq + +ccflags-y += -Wframe-larger-than=2048 +ccflags-y += -Wno-multichar +ccflags-y += -Werror +ccflags-y += -Wno-error=cpp +ifeq ($(VERSION),4) +ccflags-y += -Wextra -Wno-unused-parameter -Wno-missing-field-initializers +endif + +# Turn off when this is fixed upstream, if ever. +ccflags-y += -D__NVGPU_PREVENT_UNTRUSTED_SPECULATION + +ifneq ($(srctree.nvgpu),) +include $(srctree.nvgpu)/drivers/gpu/nvgpu/Makefile.linux.configs +else +include Makefile.linux.configs +endif + +# When using the upstream host1x driver, the Makefile must define the +# srctree.host1x path in order to find the necessary header files for +# the upstream host1x driver. +ifeq ($(CONFIG_TEGRA_GK20A_NVHOST_HOST1X),y) +ccflags-y += -I$(srctree.host1x)/include +endif + +ifeq ($(CONFIG_NVGPU_DEBUGGER),y) +ccflags-y += -DCONFIG_NVGPU_DEBUGGER +ccflags-y += -DCONFIG_NVGPU_ENGINE_RESET +endif + +ccflags-y += -DCONFIG_NVGPU_DETERMINISTIC_CHANNELS +ccflags-y += -DCONFIG_NVGPU_TPC_POWERGATE +ccflags-y += -DCONFIG_NVGPU_ACR_LEGACY +ccflags-y += -DCONFIG_NVGPU_ENGINE_QUEUE +ccflags-y += -DCONFIG_NVGPU_FIFO_ENGINE_ACTIVITY +ccflags-y += -DCONFIG_NVGPU_USERD +ccflags-y += -DCONFIG_NVGPU_CHANNEL_WDT +ccflags-y += -DCONFIG_NVGPU_LS_PMU +ccflags-y += -DCONFIG_NVGPU_CILP +ccflags-y += -DCONFIG_NVGPU_GFXP +ccflags-y += -DCONFIG_NVGPU_GRAPHICS +ccflags-y += -DCONFIG_NVGPU_REPLAYABLE_FAULT +ccflags-y += -DCONFIG_NVGPU_CHANNEL_TSG_SCHEDULING +ccflags-y += -DCONFIG_NVGPU_CHANNEL_TSG_CONTROL +ccflags-y += -DCONFIG_NVGPU_POWER_PG +ccflags-y += -DCONFIG_NVGPU_KERNEL_MODE_SUBMIT +ccflags-y += -DCONFIG_NVGPU_SIM +ccflags-y += -DCONFIG_NVGPU_TRACE +ccflags-y += -DCONFIG_NVGPU_SYSFS +ccflags-y += -DCONFIG_NVGPU_CLK_ARB +ccflags-y += -DCONFIG_NVGPU_FALCON_DEBUG +ccflags-y += -DCONFIG_NVGPU_FALCON_NON_FUSA +ccflags-y += -DCONFIG_NVGPU_IOCTL_NON_FUSA +ccflags-y += -DCONFIG_NVGPU_NON_FUSA +ccflags-y += -DCONFIG_NVGPU_INJECT_HWERR +ccflags-y += -DCONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT +ccflags-y += -DCONFIG_NVGPU_SET_FALCON_ACCESS_MAP +ccflags-y += -DCONFIG_NVGPU_SW_SEMAPHORE +ccflags-y += -DCONFIG_NVGPU_FENCE +ccflags-y += -DCONFIG_NVGPU_PROFILER + +ifeq ($(CONFIG_NVGPU_LOGGING),y) +ccflags-y += -DCONFIG_NVGPU_LOGGING=1 +endif + +ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),y) +ccflags-y += -DCONFIG_NVGPU_HAL_NON_FUSA +endif + +ifeq ($(CONFIG_NVGPU_RECOVERY),y) +ccflags-y += -DCONFIG_NVGPU_RECOVERY +ccflags-y += -DCONFIG_NVGPU_ENGINE_RESET +nvgpu-y += \ + hal/rc/rc_gk20a.o \ + hal/rc/rc_gv11b.o +endif + +obj-$(CONFIG_GK20A) := nvgpu.o + +# OS independent parts of nvgpu. The work to collect files here +# is in progress. + +ifeq ($(CONFIG_NVGPU_DGPU),y) +nvgpu-$(CONFIG_NVGPU_DGPU) += \ + os/linux/pci.o \ + os/linux/pci_power.o \ + os/linux/dmabuf_vidmem.o \ + os/linux/os_ops_gv100.o \ + os/linux/os_ops_tu104.o \ + common/sec2/sec2.o \ + common/sec2/sec2_allocator.o \ + common/sec2/sec2_lsfm.o \ + common/sec2/ipc/sec2_cmd.o \ + common/sec2/ipc/sec2_msg.o \ + common/sec2/ipc/sec2_queue.o \ + common/sec2/ipc/sec2_seq.o \ + common/vbios/bios_sw_gv100.o \ + common/vbios/bios_sw_tu104.o \ + common/falcon/falcon_sw_tu104.o \ + common/acr/acr_sw_tu104.o \ + common/mm/allocators/page_allocator.o \ + common/mm/vidmem.o \ + common/pramin.o \ + common/ce/ce_app.o \ + common/clk_arb/clk_arb_gv100.o \ + common/engine_queues/engine_emem_queue.o \ + hal/mm/mm_gv100.o \ + hal/mm/mm_tu104.o \ + hal/mc/mc_gv100.o \ + hal/mc/mc_tu104.o \ + hal/bus/bus_gv100.o \ + hal/bus/bus_tu104.o \ + hal/class/class_tu104.o \ + hal/clk/clk_tu104.o \ + hal/clk/clk_mon_tu104.o \ + hal/gr/init/gr_init_gv100.o \ + hal/gr/init/gr_init_tu104.o \ + hal/gr/intr/gr_intr_tu104.o \ + hal/gr/falcon/gr_falcon_tu104.o \ + hal/fbpa/fbpa_tu104.o \ + hal/init/hal_tu104.o \ + hal/init/hal_tu104_litter.o \ + hal/power_features/cg/tu104_gating_reglist.o \ + hal/ltc/ltc_tu104.o \ + hal/fb/fb_gv100.o \ + hal/fb/fb_tu104.o \ + hal/fb/fb_mmu_fault_tu104.o \ + hal/fb/intr/fb_intr_gv100.o \ + hal/fb/intr/fb_intr_tu104.o \ + hal/func/func_tu104.o \ + hal/fifo/fifo_tu104.o \ + hal/fifo/usermode_tu104.o \ + hal/fifo/pbdma_tu104.o \ + hal/fifo/ramfc_tu104.o \ + hal/fifo/ramin_tu104.o \ + hal/fifo/channel_gv100.o \ + hal/fifo/runlist_ram_tu104.o \ + hal/fifo/runlist_fifo_gv100.o \ + hal/fifo/runlist_fifo_tu104.o \ + hal/fifo/fifo_intr_gv100.o \ + hal/fuse/fuse_gp106.o \ + hal/fuse/fuse_tu104.o \ + hal/netlist/netlist_gv100.o \ + hal/netlist/netlist_tu104.o \ + hal/nvdec/nvdec_gp106.o \ + hal/nvdec/nvdec_tu104.o \ + hal/gsp/gsp_tu104.o \ + hal/sec2/sec2_tu104.o \ + hal/pramin/pramin_gp10b.o \ + hal/pramin/pramin_gv100.o \ + hal/pramin/pramin_init.o \ + hal/pramin/pramin_tu104.o \ + hal/bios/bios_tu104.o \ + hal/top/top_gv100.o \ + hal/xve/xve_gp106.o \ + hal/xve/xve_tu104.o + +nvgpu-$(CONFIG_DEBUG_FS) += \ + os/linux/debug_therm_tu104.o \ + os/linux/debug_bios.o \ + os/linux/debug_xve.o \ + os/linux/debug_clk_tu104.o +endif + +# nvlink sources are not conditionally compiled. nvlink probe and +# public functions return -ENODEV when not supported. + +nvgpu-y += \ + common/vbios/nvlink_bios.o \ + common/nvlink/probe.o \ + common/nvlink/init/device_reginit.o \ + common/nvlink/init/device_reginit_gv100.o \ + common/nvlink/minion.o \ + common/nvlink/link_mode_transitions.o \ + common/nvlink/nvlink.o \ + os/linux/nvlink_probe.o \ + os/linux/nvlink.o \ + hal/nvlink/minion_gv100.o \ + hal/nvlink/minion_tu104.o \ + hal/nvlink/nvlink_gv100.o \ + hal/nvlink/nvlink_tu104.o \ + hal/nvlink/intr_and_err_handling_tu104.o \ + hal/nvlink/link_mode_transitions_gv100.o \ + hal/nvlink/link_mode_transitions_tu104.o + +nvgpu-y += \ + common/device.o \ + common/utils/enabled.o \ + common/utils/errata.o \ + common/utils/rbtree.o \ + common/utils/string.o \ + common/utils/worker.o \ + common/swdebug/profile.o \ + common/ptimer/ptimer.o \ + common/perf/perfbuf.o \ + common/therm/therm.o \ + common/mc/mc.o \ + common/sync/channel_sync.o \ + common/sync/channel_sync_semaphore.o \ + common/semaphore/semaphore_sea.o \ + common/semaphore/semaphore_pool.o \ + common/semaphore/semaphore_hw.o \ + common/semaphore/semaphore.o \ + common/regops/regops.o \ + common/ltc/ltc.o \ + common/fb/fb.o \ + common/fbp/fbp.o \ + common/gr/gr_utils.o \ + common/gr/gr.o \ + common/gr/gr_intr.o \ + common/gr/global_ctx.o \ + common/gr/ctx.o \ + common/gr/gr_falcon.o \ + common/gr/subctx.o \ + common/gr/zcull.o \ + common/gr/gr_config.o \ + common/gr/zbc.o \ + common/gr/gr_setup.o \ + common/gr/hwpm_map.o \ + common/gr/obj_ctx.o \ + common/gr/fs_state.o \ + common/gr/gr_ecc.o \ + common/netlist/netlist.o \ + common/init/nvgpu_init.o \ + common/pmu/pmu.o \ + common/pmu/allocator.o \ + common/pmu/pmu_mutex.o \ + common/pmu/fw/fw.o \ + common/pmu/fw/fw_ver_ops.o \ + common/pmu/fw/fw_ns_bootstrap.o \ + common/pmu/pg/pmu_pg.o \ + common/pmu/pg/pmu_aelpg.o \ + common/pmu/perfmon/pmu_perfmon.o \ + common/pmu/perfmon/pmu_perfmon_sw_gm20b.o \ + common/pmu/perfmon/pmu_perfmon_sw_gv11b.o \ + common/pmu/pmu_debug.o \ + common/pmu/pg/pg_sw_gm20b.o \ + common/pmu/pg/pg_sw_gp10b.o \ + common/pmu/pg/pg_sw_gp106.o \ + common/pmu/pg/pg_sw_gv11b.o \ + common/pmu/ipc/pmu_cmd.o \ + common/pmu/ipc/pmu_msg.o \ + common/pmu/ipc/pmu_queue.o \ + common/pmu/ipc/pmu_seq.o \ + common/acr/acr.o \ + common/acr/acr_wpr.o \ + common/acr/acr_blob_alloc.o \ + common/acr/acr_blob_construct_v0.o \ + common/acr/acr_blob_construct.o \ + common/acr/acr_bootstrap.o \ + common/acr/acr_sw_gm20b.o \ + common/acr/acr_sw_gp10b.o \ + common/acr/acr_sw_gv11b.o \ + common/sbr/sbr.o \ + common/pmu/super_surface/super_surface.o \ + common/pmu/lsfm/lsfm.o \ + common/pmu/lsfm/lsfm_sw_gm20b.o \ + common/pmu/lsfm/lsfm_sw_gp10b.o \ + common/pmu/lsfm/lsfm_sw_gv100.o \ + common/pmu/lsfm/lsfm_sw_tu104.o \ + common/pmu/perf/vfe_var.o \ + common/pmu/perf/vfe_equ.o \ + common/pmu/perf/perf.o \ + common/pmu/perf/change_seq.o \ + common/pmu/perf/pstate.o \ + common/pmu/pmgr/pwrdev.o \ + common/pmu/pmgr/pmgr.o \ + common/pmu/pmgr/pmgrpmu.o \ + common/pmu/pmgr/pwrmonitor.o \ + common/pmu/pmgr/pwrpolicy.o \ + common/pmu/volt/volt.o \ + common/pmu/volt/volt_rail.o \ + common/pmu/volt/volt_dev.o \ + common/pmu/volt/volt_policy.o \ + common/pmu/therm/thrm.o \ + common/pmu/therm/therm_dev.o \ + common/pmu/therm/therm_channel.o \ + common/pmu/lpwr/rppg.o \ + common/pmu/pmu_pstate.o \ + common/pmu/pmu_rtos_init.o \ + common/pmu/clk/clk_vin.o \ + common/pmu/clk/clk_fll.o \ + common/pmu/clk/clk_domain.o \ + common/pmu/clk/clk_prog.o \ + common/pmu/clk/clk_vf_point.o \ + common/pmu/clk/clk.o \ + common/pmu/boardobj/boardobj.o \ + common/pmu/boardobj/boardobjgrp.o \ + common/pmu/boardobj/boardobjgrpmask.o \ + common/pmu/boardobj/boardobjgrp_e255.o \ + common/pmu/boardobj/boardobjgrp_e32.o \ + common/clk_arb/clk_arb.o \ + common/clk_arb/clk_arb_gp10b.o \ + common/rc/rc.o \ + common/grmgr/grmgr.o \ + common/cic/cic.o \ + common/cic/cic_intr.o \ + common/cic/ce_cic.o \ + common/cic/ctxsw_cic.o \ + common/cic/ecc_cic.o \ + common/cic/host_cic.o \ + common/cic/gr_cic.o \ + common/cic/pri_cic.o \ + common/cic/pmu_cic.o \ + common/cic/mmu_cic.o \ + common/cic/msg_cic.o \ + hal/bus/bus_gk20a.o \ + hal/class/class_gm20b.o \ + hal/class/class_gp10b.o \ + hal/clk/clk_gm20b.o \ + hal/gr/ecc/ecc_gp10b.o \ + hal/gr/ecc/ecc_gv11b.o \ + hal/gr/zcull/zcull_gm20b.o \ + hal/gr/zcull/zcull_gv11b.o \ + hal/gr/ctxsw_prog/ctxsw_prog_gp10b.o \ + hal/gr/ctxsw_prog/ctxsw_prog_gv11b.o \ + hal/gr/init/gr_init_gm20b.o \ + hal/gr/init/gr_init_gp10b.o \ + hal/gr/init/gr_init_gv11b.o \ + hal/gr/intr/gr_intr_gm20b.o \ + hal/gr/intr/gr_intr_gp10b.o \ + hal/gr/hwpm_map/hwpm_map_gv100.o \ + hal/gr/zbc/zbc_gm20b.o \ + hal/gr/zbc/zbc_gp10b.o \ + hal/gr/zbc/zbc_gv11b.o \ + hal/gr/gr/gr_gk20a.o \ + hal/gr/gr/gr_gm20b.o \ + hal/gr/gr/gr_gp10b.o \ + hal/gr/gr/gr_gv100.o \ + hal/gr/gr/gr_gv11b.o \ + hal/gr/gr/gr_tu104.o \ + hal/init/hal_gv11b.o \ + hal/init/hal_gv11b_litter.o \ + hal/init/hal_init.o \ + hal/perf/perf_gv11b.o \ + hal/perf/perf_tu104.o \ + hal/power_features/cg/gp10b_gating_reglist.o \ + hal/power_features/cg/gv11b_gating_reglist.o \ + hal/regops/regops_gv11b.o \ + hal/regops/allowlist_gv11b.o \ + hal/ce/ce2_gk20a.o \ + hal/therm/therm_gp10b.o \ + hal/therm/therm_tu104.o \ + hal/gr/falcon/gr_falcon_gm20b.o \ + hal/ltc/ltc_gp10b.o \ + hal/ltc/intr/ltc_intr_gm20b.o \ + hal/ltc/intr/ltc_intr_gp10b.o \ + hal/fb/fb_gm20b.o \ + hal/fb/fb_gp10b.o \ + hal/fb/fb_gp106.o \ + hal/fb/fb_gv11b.o \ + hal/fb/intr/fb_intr_ecc_gv11b.o \ + hal/fuse/fuse_gm20b.o \ + hal/fifo/fifo_gk20a.o \ + hal/fifo/preempt_gk20a.o \ + hal/fifo/ramfc_gk20a.o \ + hal/fifo/ramfc_gp10b.o \ + hal/fifo/ramin_gk20a.o \ + hal/fifo/ramin_gp10b.o \ + hal/fifo/runlist_fifo_gv11b.o \ + hal/fifo/channel_gk20a.o \ + hal/fifo/channel_gm20b.o \ + hal/fifo/tsg_gk20a.o \ + hal/fifo/userd_gk20a.o \ + hal/fifo/userd_gv11b.o \ + hal/fifo/fifo_intr_gk20a.o \ + hal/fifo/ctxsw_timeout_gk20a.o \ + hal/netlist/netlist_gp10b.o \ + hal/sync/sema_cmdbuf_gk20a.o \ + hal/sync/sema_cmdbuf_gv11b.o \ + hal/pmu/pmu_gk20a.o \ + hal/pmu/pmu_gm20b.o \ + hal/pmu/pmu_gp10b.o \ + hal/pmu/pmu_gv11b.o \ + hal/pmu/pmu_tu104.o \ + hal/top/top_gp106.o \ + hal/top/top_gp10b.o \ + hal/tpc/tpc_gv11b.o \ + hal/priv_ring/priv_ring_gv11b.o \ + hal/cic/cic_gv11b_fusa.o \ + hal/cic/cic_lut_gv11b_fusa.o + +# Linux specific parts of nvgpu. +nvgpu-y += \ + os/linux/os_ops.o \ + os/linux/os_ops_gm20b.o \ + os/linux/os_ops_gp10b.o \ + os/linux/os_ops_gv11b.o \ + os/linux/kmem.o \ + os/linux/timers.o \ + os/linux/ioctl.o \ + os/linux/ioctl_ctrl.o \ + os/linux/ioctl_as.o \ + os/linux/ioctl_channel.o \ + os/linux/ioctl_tsg.o \ + os/linux/ioctl_dbg.o \ + os/linux/ioctl_prof.o \ + os/linux/ioctl_clk_arb.o \ + os/linux/cond.o \ + os/linux/nvgpu_mem.o \ + os/linux/linux-dma.o \ + os/linux/driver_common.o \ + os/linux/firmware.o \ + os/linux/thread.o \ + os/linux/vm.o \ + os/linux/intr.o \ + os/linux/sysfs.o \ + os/linux/linux-io.o \ + os/linux/io_usermode.o \ + os/linux/rwsem.o \ + os/linux/sched.o \ + os/linux/linux-channel.o \ + os/linux/sim.o \ + os/linux/sim_pci.o \ + os/linux/os_sched.o \ + os/linux/dt.o \ + os/linux/ecc_sysfs.o \ + os/linux/bsearch.o \ + os/linux/cic/cic_stub.o \ + os/linux/dmabuf_priv.o \ + os/linux/power_ops.o + +nvgpu-$(CONFIG_NVGPU_VPR) += os/linux/vpr.o + +nvgpu-$(CONFIG_DEBUG_FS) += \ + os/linux/debug.o \ + os/linux/debug_gr.o \ + os/linux/debug_fifo.o \ + os/linux/debug_ce.o \ + os/linux/debug_pmu.o \ + os/linux/debug_pmgr.o \ + os/linux/debug_sched.o \ + os/linux/debug_allocator.o \ + os/linux/debug_hal.o \ + os/linux/debug_clk_gm20b.o \ + os/linux/debug_ltc.o \ + os/linux/debug_volt.o \ + os/linux/debug_s_param.o \ + os/linux/swprofile_debugfs.o + +nvgpu-$(CONFIG_NVGPU_LOGGING) += os/linux/log.o + +ifeq ($(CONFIG_NVGPU_TRACK_MEM_USAGE),y) +nvgpu-$(CONFIG_DEBUG_FS) += \ + os/linux/debug_kmem.o +endif + +nvgpu-$(CONFIG_NVGPU_FECS_TRACE) += \ + common/gr/fecs_trace.o \ + hal/gr/fecs_trace/fecs_trace_gm20b.o \ + hal/gr/fecs_trace/fecs_trace_gv11b.o \ + os/linux/fecs_trace_linux.o + +ifeq ($(CONFIG_NVGPU_FECS_TRACE),y) +nvgpu-$(CONFIG_DEBUG_FS) += \ + os/linux/debug_fecs_trace.o +endif + +nvgpu-$(CONFIG_TEGRA_GK20A) += \ + os/linux/fuse.o \ + os/linux/module.o \ + os/linux/module_usermode.o \ + os/linux/platform_gk20a_tegra.o \ + os/linux/platform_gp10b_tegra.o \ + os/linux/platform_gv11b_tegra.o + +ifeq ($(CONFIG_TEGRA_GK20A),y) +nvgpu-$(CONFIG_NVGPU_TEGRA_FUSE) += os/linux/soc.o +endif + +nvgpu-$(CONFIG_NVGPU_SYNCFD_ANDROID) += \ + os/linux/sync_sema_android.o \ + os/linux/os_fence_android.o \ + os/linux/os_fence_android_sema.o + +nvgpu-$(CONFIG_NVGPU_SYNCFD_STABLE) += \ + os/linux/sync_sema_dma.o \ + os/linux/os_fence_dma.o \ + os/linux/os_fence_dma_sema.o + +nvgpu-$(CONFIG_TEGRA_GK20A_NVHOST) += \ + common/sync/channel_sync_syncpt.o \ + common/fence/fence_syncpt.o +ifneq ($(CONFIG_NVGPU_SYNCFD_NONE),y) +nvgpu-$(CONFIG_TEGRA_GK20A_NVHOST) += \ + os/linux/os_fence_syncpt.o +endif + +ifeq ($(CONFIG_TEGRA_GK20A_NVHOST_HOST1X),y) +nvgpu-$(CONFIG_TEGRA_GK20A_NVHOST) += \ + os/linux/nvhost_host1x.o +else +nvgpu-$(CONFIG_TEGRA_GK20A_NVHOST) += \ + os/linux/nvhost.o +endif + +nvgpu-$(CONFIG_TEGRA_GK20A_NVHOST) += \ + os/linux/nvhost_common.o \ + hal/sync/syncpt_cmdbuf_gk20a.o \ + hal/sync/syncpt_cmdbuf_gv11b.o \ + hal/sync/syncpt_cmdbuf_gv11b_fusa.o \ + common/sync/channel_user_syncpt.o + +nvgpu-$(CONFIG_NVGPU_GR_VIRTUALIZATION) += \ + os/linux/vgpu/platform_vgpu_tegra.o \ + os/linux/vgpu/sysfs_vgpu.o \ + os/linux/vgpu/vgpu_ivc.o \ + os/linux/vgpu/vgpu_ivm.o \ + os/linux/vgpu/vgpu_linux.o \ + os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.o + +ifeq ($(CONFIG_NVGPU_FECS_TRACE),y) +nvgpu-$(CONFIG_NVGPU_GR_VIRTUALIZATION) += \ + os/linux/vgpu/fecs_trace_vgpu_linux.o +endif + +nvgpu-$(CONFIG_COMMON_CLK) += \ + os/linux/clk.o + +nvgpu-$(CONFIG_GK20A_DEVFREQ) += \ + os/linux/scale.o + +nvgpu-$(CONFIG_NVGPU_SUPPORT_CDE) += \ + os/linux/cde.o \ + os/linux/cde_gm20b.o \ + os/linux/cde_gp10b.o + +ifeq ($(CONFIG_DEBUG_FS),y) +nvgpu-$(CONFIG_NVGPU_SUPPORT_CDE) += \ + os/linux/debug_cde.o +endif + +nvgpu-y += \ + common/mm/allocators/nvgpu_allocator.o \ + common/mm/allocators/bitmap_allocator.o \ + common/mm/allocators/buddy_allocator.o \ + common/mm/gmmu/page_table.o \ + common/mm/gmmu/pd_cache.o \ + common/mm/gmmu/pte.o \ + common/mm/as.o \ + common/mm/vm.o \ + common/mm/vm_area.o \ + common/mm/nvgpu_mem.o \ + common/mm/nvgpu_sgt.o \ + common/mm/mm.o \ + common/mm/dma.o \ + common/vbios/bios.o \ + common/falcon/falcon.o \ + common/falcon/falcon_sw_gk20a.o \ + common/engine_queues/engine_mem_queue.o \ + common/engine_queues/engine_dmem_queue.o \ + common/engine_queues/engine_fb_queue.o \ + common/io/io.o \ + common/power_features/power_features.o \ + common/power_features/cg/cg.o \ + common/power_features/pg/pg.o \ + common/sim/sim.o \ + common/sim/sim_pci.o \ + common/sim/sim_netlist.o \ + common/fifo/fifo.o \ + common/fifo/preempt.o \ + common/fifo/channel.o \ + common/fifo/channel_wdt.o \ + common/fifo/channel_worker.o \ + common/fifo/pbdma.o \ + common/fifo/submit.o \ + common/fifo/job.o \ + common/fifo/priv_cmdbuf.o \ + common/fifo/tsg.o \ + common/fifo/runlist.o \ + common/fifo/engine_status.o \ + common/fifo/engines.o \ + common/fifo/pbdma_status.o \ + common/fifo/userd.o \ + common/fifo/watchdog.o \ + common/fence/fence.o \ + common/fence/fence_sema.o \ + common/ecc.o \ + common/log_common.o \ + common/ce/ce.o \ + common/debugger.o \ + common/profiler/profiler.o \ + common/profiler/pm_reservation.o + +nvgpu-$(CONFIG_NVGPU_GR_VIRTUALIZATION) += \ + common/vgpu/ltc/ltc_vgpu.o \ + common/vgpu/fbp/fbp_vgpu.o \ + common/vgpu/fb/fb_vgpu.o \ + common/vgpu/gr/gr_vgpu.o \ + common/vgpu/gr/ctx_vgpu.o \ + common/vgpu/gr/subctx_vgpu.o \ + common/vgpu/top/top_vgpu.o \ + common/vgpu/fifo/fifo_vgpu.o \ + common/vgpu/fifo/channel_vgpu.o \ + common/vgpu/fifo/tsg_vgpu.o \ + common/vgpu/fifo/preempt_vgpu.o \ + common/vgpu/fifo/runlist_vgpu.o \ + common/vgpu/fifo/ramfc_vgpu.o \ + common/vgpu/fifo/userd_vgpu.o \ + common/vgpu/ce_vgpu.o \ + common/vgpu/mm/mm_vgpu.o \ + common/vgpu/mm/vm_vgpu.o \ + common/vgpu/init/init_vgpu.o \ + common/vgpu/ivc/comm_vgpu.o \ + common/vgpu/intr/intr_vgpu.o \ + common/vgpu/ptimer/ptimer_vgpu.o \ + common/vgpu/debugger_vgpu.o \ + common/vgpu/pm_reservation_vgpu.o \ + common/vgpu/perf/perf_vgpu.o \ + common/vgpu/profiler/profiler_vgpu.o \ + common/vgpu/ecc_vgpu.o \ + common/vgpu/clk_vgpu.o \ + common/vgpu/gr/fecs_trace_vgpu.o \ + hal/vgpu/init/init_hal_vgpu.o \ + hal/vgpu/fifo/fifo_gv11b_vgpu.o \ + hal/vgpu/sync/syncpt_cmdbuf_gv11b_vgpu.o + +nvgpu-$(CONFIG_NVGPU_CYCLESTATS) += \ + common/perf/cyclestats_snapshot.o \ + common/cyclestats/cyclestats.o + +ifeq ($(CONFIG_NVGPU_GR_VIRTUALIZATION),y) +nvgpu-$(CONFIG_NVGPU_CYCLESTATS) += \ + common/vgpu/perf/cyclestats_snapshot_vgpu.o +nvgpu-$(CONFIG_NVGPU_COMPRESSION) += \ + common/vgpu/cbc/cbc_vgpu.o +endif + +nvgpu-$(CONFIG_NVGPU_COMPRESSION) += \ + os/linux/comptags.o \ + common/mm/comptags.o \ + common/cbc/cbc.o \ + hal/cbc/cbc_gm20b.o \ + hal/cbc/cbc_gp10b.o \ + hal/cbc/cbc_gv11b.o \ + hal/cbc/cbc_tu104.o \ + +# FUSA (Functionally Safe) HAL source files +nvgpu-y += \ + hal/mm/mm_gv11b_fusa.o \ + hal/mm/mm_gp10b_fusa.o \ + hal/mm/gmmu/gmmu_gv11b_fusa.o \ + hal/mm/gmmu/gmmu_gp10b_fusa.o \ + hal/mm/gmmu/gmmu_gk20a_fusa.o \ + hal/mm/gmmu/gmmu_gm20b_fusa.o \ + hal/mm/cache/flush_gk20a_fusa.o \ + hal/mm/cache/flush_gv11b_fusa.o \ + hal/mm/mmu_fault/mmu_fault_gv11b_fusa.o \ + hal/ltc/intr/ltc_intr_gp10b_fusa.o \ + hal/ltc/intr/ltc_intr_gv11b_fusa.o \ + hal/bus/bus_gk20a_fusa.o \ + hal/bus/bus_gm20b_fusa.o \ + hal/bus/bus_gp10b_fusa.o \ + hal/bus/bus_gv11b_fusa.o \ + hal/ce/ce_gp10b_fusa.o \ + hal/ce/ce_gv11b_fusa.o \ + hal/class/class_gv11b_fusa.o \ + hal/falcon/falcon_gk20a_fusa.o \ + hal/fb/fb_gm20b_fusa.o \ + hal/fb/fb_gv11b_fusa.o \ + hal/fb/fb_mmu_fault_gv11b_fusa.o \ + hal/fb/ecc/fb_ecc_gv11b_fusa.o \ + hal/fb/intr/fb_intr_ecc_gv11b_fusa.o \ + hal/fb/intr/fb_intr_gv11b_fusa.o \ + hal/fifo/channel_gk20a_fusa.o \ + hal/fifo/channel_gm20b_fusa.o \ + hal/fifo/channel_gv11b_fusa.o \ + hal/fifo/ctxsw_timeout_gv11b_fusa.o \ + hal/fifo/engine_status_gm20b_fusa.o \ + hal/fifo/engine_status_gv100_fusa.o \ + hal/fifo/engines_gp10b_fusa.o \ + hal/fifo/engines_gv11b_fusa.o \ + hal/fifo/fifo_gk20a_fusa.o \ + hal/fifo/fifo_gv11b_fusa.o \ + hal/fifo/fifo_intr_gk20a_fusa.o \ + hal/fifo/fifo_intr_gv11b_fusa.o \ + hal/fifo/pbdma_gm20b_fusa.o \ + hal/fifo/pbdma_gp10b_fusa.o \ + hal/fifo/pbdma_gv11b_fusa.o \ + hal/fifo/pbdma_status_gm20b_fusa.o \ + hal/fifo/preempt_gv11b_fusa.o \ + hal/fifo/ramfc_gp10b_fusa.o \ + hal/fifo/ramfc_gv11b_fusa.o \ + hal/fifo/ramin_gk20a_fusa.o \ + hal/fifo/ramin_gm20b_fusa.o \ + hal/fifo/ramin_gv11b_fusa.o \ + hal/fifo/runlist_fifo_gk20a_fusa.o \ + hal/fifo/runlist_fifo_gv11b_fusa.o \ + hal/fifo/runlist_ram_gv11b_fusa.o \ + hal/fifo/tsg_gk20a_fusa.o \ + hal/fifo/tsg_gv11b_fusa.o \ + hal/fifo/usermode_gv11b_fusa.o \ + hal/fuse/fuse_gm20b_fusa.o \ + hal/fuse/fuse_gp10b_fusa.o \ + hal/gr/config/gr_config_gm20b_fusa.o \ + hal/gr/config/gr_config_gv100_fusa.o \ + hal/gr/ctxsw_prog/ctxsw_prog_gm20b_fusa.o \ + hal/gr/ctxsw_prog/ctxsw_prog_gp10b_fusa.o \ + hal/gr/ctxsw_prog/ctxsw_prog_gv11b_fusa.o \ + hal/gr/ecc/ecc_gv11b_fusa.o \ + hal/gr/falcon/gr_falcon_gm20b_fusa.o \ + hal/gr/falcon/gr_falcon_gp10b_fusa.o \ + hal/gr/falcon/gr_falcon_gv11b_fusa.o \ + hal/gr/init/gr_init_gm20b_fusa.o \ + hal/gr/init/gr_init_gp10b_fusa.o \ + hal/gr/init/gr_init_gv11b_fusa.o \ + hal/gr/intr/gr_intr_gm20b_fusa.o \ + hal/gr/intr/gr_intr_gp10b_fusa.o \ + hal/gr/intr/gr_intr_gv11b_fusa.o \ + hal/ltc/ltc_gm20b_fusa.o \ + hal/ltc/ltc_gp10b_fusa.o \ + hal/ltc/ltc_gv11b_fusa.o \ + hal/mc/mc_gm20b_fusa.o \ + hal/mc/mc_gp10b_fusa.o \ + hal/mc/mc_gv11b_fusa.o \ + hal/netlist/netlist_gv11b_fusa.o \ + hal/pmu/pmu_gk20a_fusa.o \ + hal/pmu/pmu_gv11b_fusa.o \ + hal/priv_ring/priv_ring_gm20b_fusa.o \ + hal/priv_ring/priv_ring_gp10b_fusa.o \ + hal/ptimer/ptimer_gk20a_fusa.o \ + hal/ptimer/ptimer_gp10b.o \ + hal/ptimer/ptimer_gv11b.o \ + hal/therm/therm_gv11b_fusa.o \ + hal/top/top_gm20b_fusa.o \ + hal/top/top_gv11b_fusa.o + +nvgpu-$(CONFIG_NVGPU_HAL_NON_FUSA) += \ + hal/ce/ce_tu104.o \ + hal/clk/clk_gm20b.o \ + hal/init/hal_gp10b.o \ + hal/init/hal_gp10b_litter.o \ + hal/init/hal_gm20b.o \ + hal/init/hal_gm20b_litter.o \ + hal/fifo/engine_status_gm20b.o \ + hal/fifo/engines_gm20b.o \ + hal/fifo/pbdma_gm20b.o \ + hal/fifo/pbdma_gp10b.o \ + hal/fifo/mmu_fault_gk20a.o \ + hal/fifo/mmu_fault_gm20b.o \ + hal/fifo/mmu_fault_gp10b.o \ + hal/fifo/runlist_fifo_gk20a.o \ + hal/fifo/runlist_ram_gk20a.o \ + hal/gr/config/gr_config_gm20b.o \ + hal/gr/ctxsw_prog/ctxsw_prog_gm20b.o \ + hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.o \ + hal/gr/gr/gr_gk20a.o \ + hal/gr/gr/gr_gm20b.o \ + hal/gr/gr/gr_gp10b.o \ + hal/ltc/ltc_gm20b.o \ + hal/ltc/ltc_gm20b_dbg.o \ + hal/mc/mc_gm20b.o \ + hal/mm/cache/flush_gk20a.o \ + hal/mm/mm_gm20b.o \ + hal/mm/mm_gk20a.o \ + hal/mm/gmmu/gmmu_gk20a.o \ + hal/mm/gmmu/gmmu_gm20b.o \ + hal/falcon/falcon_gk20a.o \ + hal/netlist/netlist_gm20b.o \ + hal/perf/perf_gm20b.o \ + hal/power_features/cg/gm20b_gating_reglist.o \ + hal/priv_ring/priv_ring_gm20b.o \ + hal/regops/regops_gm20b.o \ + hal/regops/regops_gp10b.o \ + hal/regops/regops_tu104.o \ + hal/regops/allowlist_tu104.o \ + hal/therm/therm_gm20b.o \ + hal/top/top_gm20b.o + +ifeq ($(CONFIG_NVGPU_GR_VIRTUALIZATION),y) +nvgpu-$(CONFIG_NVGPU_HAL_NON_FUSA) += \ + hal/vgpu/init/vgpu_hal_gv11b.o +endif diff --git a/drivers/gpu/nvgpu/Makefile.doxygen b/drivers/gpu/nvgpu/Makefile.doxygen new file mode 100644 index 000000000..3c77cb5c2 --- /dev/null +++ b/drivers/gpu/nvgpu/Makefile.doxygen @@ -0,0 +1,53 @@ +##################### tell Emacs this is a -*- makefile-gmake -*- +# +# Copyright (c) 2019-2020 NVIDIA CORPORATION. All Rights Reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. +# +# Build the doxygen output. +# +# Some assumptions: this is a local build only. You need installed: +# +# doxygen +# python2 +# +# You also must have $TOP/$TEGRA_TOP set; For L4T: +# +# $ export TOP=$TEGRA_TOP +# +# Should work. +# + +PYTHON2 = python2.7 +ARCH = $(TEGRA_TOP)/core-private/tools/arch/arch.py +NVGPU_ARCH = $(TOP)/kernel/nvgpu/arch +ARCH_YAML = $(NVGPU_ARCH)/*.yaml + +# Build the doxygen output. But make sure the sources file is generated +# first. +all: doxygen/sources + doxygen doxygen/Doxyfile.safety + +# Generate the sources file. This has a list of files that we shall include +# in the doxygen output. +doxygen/sources: $(ARCH) $(ARCH_YAML) + $(ARCH) --arch-file $(NVGPU_ARCH)/nvgpu.yaml files \ + --safe --gpu igpu --gpu both \ + --prefix 'INPUT += ' > doxygen/Doxyfile.sources.safety + @if [ ! -z "$(EXTRA_PATH)" ] ; then \ + $(ARCH) --arch-file $(NVGPU_ARCH)/nvgpu.yaml \ + --include-path $(EXTRA_PATH) files \ + --safe --gpu igpu --gpu both \ + --prefix 'INPUT += $(EXTRA_PATH)/' \ + >> doxygen/Doxyfile.sources.safety ; \ + cat $(EXTRA_PATH)/unit-tests/SWUTS.sources \ + >> doxygen/Doxyfile.sources.safety ; \ + fi + +clean: + rm -rf doxygen/Doxyfile.sources.safety + rm -rf html diff --git a/drivers/gpu/nvgpu/Makefile.linux.configs b/drivers/gpu/nvgpu/Makefile.linux.configs new file mode 100644 index 000000000..05d3934a9 --- /dev/null +++ b/drivers/gpu/nvgpu/Makefile.linux.configs @@ -0,0 +1,225 @@ +# Turn off all other configs, if CONFIG_GK20A is not set +CONFIG_GK20A := m + +# Enable GK20A PMU features. +CONFIG_GK20A_PMU := y + +# Enable support for the GK20A graphics engine on Tegra +# by adding a Tegra platfrom interface to the GK20A driver. +CONFIG_TEGRA_GK20A := y + +# Enable Support for Loading High Secure binary, and using +# Write Protected Regions (WPR) for storing ucodes, and bootstrap +# PMU, FECS and GPCCS in Low Secure mode. +CONFIG_TEGRA_ACR := y + +# Support for debugger APIs +CONFIG_NVGPU_DEBUGGER := y + +# Support for iGPU LS PMU enable/disable +CONFIG_NVGPU_LS_PMU := y + +# Enable/Disable NVGPU logging +CONFIG_NVGPU_LOGGING := y + +# Enable/Disable the support of HALs from chips that do not have functional +# safety certification +CONFIG_NVGPU_HAL_NON_FUSA := y + +# Support recovery on failure (which may involve engine reset) +CONFIG_NVGPU_RECOVERY := y + +# Support for compression +CONFIG_NVGPU_COMPRESSION := y + +# Enable support for extraction of comptags for CDE. +ifeq ($(CONFIG_NVGPU_COMPRESSION),y) +CONFIG_NVGPU_SUPPORT_CDE := y +endif + +ifeq ($(CONFIG_COMMON_CLK),y) +ifeq ($(CONFIG_PM_DEVFREQ),y) +# Select this entry to enable gk20a scaling +CONFIG_GK20A_DEVFREQ := y + +# Disable support to pass PM_QOS constraints to devfreq based scaling. +CONFIG_GK20A_PM_QOS := n + +endif +endif + +# Say Y here to allow nvgpu to track and keep statistics on +# the system memory used by the driver. This does recreate +# some of the kmem_leak tracking but this is also applicable +# to other OSes which do not have Linux' kmem_leak. +#CONFIG_NVGPU_TRACK_MEM_USAGE := n + +# Enable the cycle stats debugging features. +CONFIG_NVGPU_CYCLESTATS := y + +# Enable support for the NVGPU Context Switch Tracing. In this mode, +# FECS collects timestamps for contexts loaded on GR engine. This +# allows tracking context switches on GR engine, as well as +# identifying processes that submitted work. +CONFIG_NVGPU_FECS_TRACE := y + +# Enable support in GK20A for the nvhost (host1x) dma engine hardware +# that includes things like hardware syncpts. This requires +# TEGRA_GRHOST +ifdef CONFIG_TEGRA_GRHOST +CONFIG_TEGRA_GK20A_NVHOST := y +endif +ifdef CONFIG_TEGRA_HOST1X_NEXT +CONFIG_TEGRA_GK20A_NVHOST := y +CONFIG_TEGRA_GK20A_NVHOST_HOST1X := y +endif + +# Enable support for GPUs on PCIe bus. +ifeq ($(CONFIG_PCI),y) +# Support for NVGPU DGPU +CONFIG_NVGPU_DGPU := y +endif + +# Enable nvgpu debug facility to redirect debug spew to ftrace. This +# affects kernel memory use, so should not be enabled by default. +ifeq ($(CONFIG_TRACING),y) +#CONFIG_GK20A_TRACE_PRINTK := y +endif + +# Use tegra_alloc_fd() for allocating dma_buf fds. This allocates +# the fds above 1024 which exempts them from counting against process +# fd limit. +ifeq ($(CONFIG_NV_TEGRA_MC),y) +CONFIG_NVGPU_USE_TEGRA_ALLOC_FD := y +endif + +# Support Nvlink +ifeq ($(CONFIG_TEGRA_NVLINK),y) +CONFIG_NVGPU_NVLINK := y +endif + +# Support NVGPU Virtualization +ifeq ($(CONFIG_TEGRA_GR_VIRTUALIZATION),y) +CONFIG_NVGPU_GR_VIRTUALIZATION := y +endif + +# Support for NVGPU VPR +ifeq ($(CONFIG_TEGRA_VPR),y) +CONFIG_NVGPU_VPR := y +endif + +# Support Tegra fuse +ifeq ($(CONFIG_TEGRA_KFUSE),y) +CONFIG_NVGPU_TEGRA_FUSE := y +endif + +# GPU job synchronization (fences before and after submits) can use raw +# syncpoints if available and sync fds if chosen. Without syncpoints, +# nvgpu also provides semaphore-backed sync fds to userspace. +# +# Select which kernel-provided API is used for sync fds. Matching +# support is required for the userspace drivers too. +ifeq ($(CONFIG_SYNC),y) +CONFIG_NVGPU_SYNCFD_ANDROID := y +else ifeq ($(CONFIG_SYNC_FILE), y) +CONFIG_NVGPU_SYNCFD_STABLE := y +else +CONFIG_NVGPU_SYNCFD_NONE := y +endif + +# Below check indicates the build is invoked from Nvidia's +# internal build system. +ifneq ($(NV_BUILD_KERNEL_OPTIONS),) + +# Disable the below configs for kstable +ifneq ($(findstring stable,$(NV_BUILD_KERNEL_OPTIONS)),) +CONFIG_GK20A_DEVFREQ := n +CONFIG_GK20A_PM_QOS := n +else ifneq ($(filter 4.9 4.14,$(patsubst -,$(space),$(NV_BUILD_KERNEL_OPTIONS))),) +# Enable support to pass PM_QOS constraints to devfreq based scaling. +CONFIG_GK20A_PM_QOS := y +endif + +endif + +ifeq ($(CONFIG_GK20A_PMU),y) +ccflags-y += -DCONFIG_GK20A_PMU +endif +ifeq ($(CONFIG_TEGRA_GK20A),y) +ccflags-y += -DCONFIG_TEGRA_GK20A +endif +ifeq ($(CONFIG_TEGRA_ACR),y) +ccflags-y += -DCONFIG_TEGRA_ACR +endif +ifeq ($(CONFIG_NVGPU_DEBUGGER),y) +ccflags-y += -DCONFIG_NVGPU_DEBUGGER +endif +ifeq ($(CONFIG_NVGPU_LS_PMU),y) +ccflags-y += -DCONFIG_NVGPU_LS_PMU +endif +ifeq ($(CONFIG_NVGPU_LOGGING),y) +ccflags-y += -DCONFIG_NVGPU_LOGGING +endif +ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),y) +ccflags-y += -DCONFIG_NVGPU_HAL_NON_FUSA +endif +ifeq ($(CONFIG_NVGPU_RECOVERY),y) +ccflags-y += -DCONFIG_NVGPU_RECOVERY +endif +ifeq ($(CONFIG_NVGPU_COMPRESSION),y) +ccflags-y += -DCONFIG_NVGPU_COMPRESSION +endif +ifeq ($(CONFIG_NVGPU_SUPPORT_CDE),y) +ccflags-y += -DCONFIG_NVGPU_SUPPORT_CDE +endif +ifeq ($(CONFIG_GK20A_DEVFREQ),y) +ccflags-y += -DCONFIG_GK20A_DEVFREQ +endif +ifeq ($(CONFIG_GK20A_PM_QOS),y) +ccflags-y += -DCONFIG_GK20A_PM_QOS +endif +ifeq ($(CONFIG_NVGPU_TRACK_MEM_USAGE),y) +ccflags-y += -DCONFIG_NVGPU_TRACK_MEM_USAGE +endif +ifeq ($(CONFIG_NVGPU_CYCLESTATS),y) +ccflags-y += -DCONFIG_NVGPU_CYCLESTATS +endif +ifeq ($(CONFIG_NVGPU_FECS_TRACE),y) +ccflags-y += -DCONFIG_NVGPU_FECS_TRACE +endif +ifeq ($(CONFIG_TEGRA_GK20A_NVHOST),y) +ccflags-y += -DCONFIG_TEGRA_GK20A_NVHOST +endif +ifeq ($(CONFIG_TEGRA_GK20A_NVHOST_HOST1X),y) +ccflags-y += -DCONFIG_TEGRA_GK20A_NVHOST_HOST1X +endif +ifeq ($(CONFIG_NVGPU_DGPU),y) +ccflags-y += -DCONFIG_NVGPU_DGPU +endif +ifeq ($(CONFIG_GK20A_TRACE_PRINTK),y) +ccflags-y += -DCONFIG_GK20A_TRACE_PRINTK +endif +ifeq ($(CONFIG_NVGPU_USE_TEGRA_ALLOC_FD),y) +ccflags-y += -DCONFIG_NVGPU_USE_TEGRA_ALLOC_FD +endif +ifeq ($(CONFIG_NVGPU_NVLINK),y) +ccflags-y += -DCONFIG_NVGPU_NVLINK +endif +ifeq ($(CONFIG_NVGPU_GR_VIRTUALIZATION),y) +ccflags-y += -DCONFIG_NVGPU_GR_VIRTUALIZATION +endif +ifeq ($(CONFIG_NVGPU_VPR),y) +ccflags-y += -DCONFIG_NVGPU_VPR +endif +ifeq ($(CONFIG_NVGPU_TEGRA_FUSE),y) +ccflags-y += -DCONFIG_NVGPU_TEGRA_FUSE +endif +ifeq ($(CONFIG_NVGPU_SYNCFD_ANDROID),y) +ccflags-y += -DCONFIG_NVGPU_SYNCFD_ANDROID +endif +ifeq ($(CONFIG_NVGPU_SYNCFD_STABLE),y) +ccflags-y += -DCONFIG_NVGPU_SYNCFD_STABLE +endif +ifeq ($(CONFIG_NVGPU_SYNCFD_NONE),y) +ccflags-y += -DCONFIG_NVGPU_SYNCFD_NONE +endif diff --git a/drivers/gpu/nvgpu/Makefile.shared.configs b/drivers/gpu/nvgpu/Makefile.shared.configs new file mode 100644 index 000000000..000c32532 --- /dev/null +++ b/drivers/gpu/nvgpu/Makefile.shared.configs @@ -0,0 +1,319 @@ +# +# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# + +# This file defines the make variables and cflags applicable to nvgpu common +# shared by qnx, nvgpu userspace, test builds. Note that cflags are added to +# variable NVGPU_COMMON_CFLAGS that needs to be used by the parent Makefile +# to update corresponding cflags variable. + +# Default is the regular profile. That can be overridden if necessary. by +# setting the NVGPU_FORCE_SAFETY_PROFILE. This is a useful hack while we +# wait for the userspace tmake build to make its way into a proper safety +# profile build. +profile := default + +# Decide whether to use the safety release, safety debug or the regular profile. +ifeq ($(NV_BUILD_CONFIGURATION_IS_SAFETY),1) +profile := safety_release +ifeq ($(NV_BUILD_CONFIGURATION_IS_DEBUG),1) +profile := safety_debug +endif +endif + +ifeq ($(NVGPU_FORCE_SAFETY_PROFILE),1) +profile := safety_release +ifeq ($(NVGPU_FORCE_DEBUG_PROFILE),1) +profile := safety_debug +endif +endif + +NVGPU_COMMON_CFLAGS := + +# +# Flags always enabled regardless of build profile. +# + +NVGPU_COMMON_CFLAGS += \ + -DCONFIG_TEGRA_GK20A_PMU=1 \ + -DCONFIG_TEGRA_ACR=1 \ + -DCONFIG_NVGPU_GR_VIRTUALIZATION \ + -DCONFIG_PCI_MSI + +CONFIG_NVGPU_LOGGING := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_LOGGING + +# Syncpoint support provided by nvhost is expected to exist. +CONFIG_TEGRA_GK20A_NVHOST := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_TEGRA_GK20A_NVHOST + +# Syncfds are a Linux feature. +CONFIG_NVGPU_SYNCFD_NONE := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_SYNCFD_NONE + +CONFIG_NVGPU_GRAPHICS := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_GRAPHICS + +ifeq ($(profile),$(filter $(profile),safety_debug safety_release)) + +# Enable golden context verification only for safety debug/release build +NVGPU_COMMON_CFLAGS += \ + -DCONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION \ + -DCONFIG_NVGPU_BUILD_CONFIGURATION_IS_SAFETY + +## For tesing of CTXSW FW error codes manually, enable below configs in safety build. +## Enable only one config at a time, because only one error can be tested at a time. +# NVGPU_COMMON_CFLAGS += \ +# -DCONFIG_NVGPU_CTXSW_FW_ERROR_WDT_TESTING +# +# NVGPU_COMMON_CFLAGS += \ +# -DCONFIG_NVGPU_CTXSW_FW_ERROR_CODE_TESTING +# +# NVGPU_COMMON_CFLAGS += \ +# -DCONFIG_NVGPU_CTXSW_FW_ERROR_HEADER_TESTING + +ifeq ($(CONFIG_NVGPU_DGPU),1) + +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_DGPU + +CONFIG_NVGPU_NVLINK := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_NVLINK + +# used by sec2 code +CONFIG_NVGPU_ENGINE_QUEUE := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_ENGINE_QUEUE + +# used in ce_app +CONFIG_NVGPU_FENCE := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_FENCE + +# ce_app uses syncpt (nvgpu_nvhost_syncpt_wait_timeout_ext) +CONFIG_NVGPU_KERNEL_MODE_SUBMIT := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_KERNEL_MODE_SUBMIT + +CONFIG_NVGPU_FALCON_NON_FUSA := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_FALCON_NON_FUSA + +CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT + +CONFIG_NVGPU_SM_DIVERSITY := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_SM_DIVERSITY + +CONFIG_NVGPU_USE_3LSS_ERR_INJECTION := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_USE_3LSS_ERR_INJECTION + +CONFIG_NVGPU_LS_PMU := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_LS_PMU + +CONFIG_NVGPU_CLK_ARB := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_CLK_ARB +endif + +endif + +CONFIG_NVGPU_TEGRA_FUSE := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_TEGRA_FUSE + +# +# Flags enabled only for safety debug and regular build profile. +# +ifneq ($(profile),safety_release) + +CONFIG_NVGPU_TRACE := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_TRACE + +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_FALCON_DEBUG + +# +# Flags enabled only for regular build profile. +# +ifneq ($(profile),safety_debug) + +CONFIG_NVGPU_SYSFS := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_SYSFS + +# ACR feature to enable old tegra ACR profile support +CONFIG_NVGPU_ACR_LEGACY := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_ACR_LEGACY + +CONFIG_NVGPU_ENGINE_QUEUE := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_ENGINE_QUEUE + +CONFIG_NVGPU_DEBUGGER := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_DEBUGGER + +CONFIG_NVGPU_PROFILER := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_PROFILER + +CONFIG_NVGPU_RECOVERY := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_RECOVERY + +CONFIG_NVGPU_CILP := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_CILP + +CONFIG_NVGPU_GFXP := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_GFXP + +CONFIG_NVGPU_CYCLESTATS := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_CYCLESTATS + +CONFIG_NVGPU_FECS_TRACE := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_FECS_TRACE + +ifneq ($(CONFIG_NVGPU_DGPU),1) +CONFIG_NVGPU_IGPU_VIRT := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_IGPU_VIRT +endif + +# Enable the usage of 3LSS error injection features. +CONFIG_NVGPU_USE_3LSS_ERR_INJECTION := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_USE_3LSS_ERR_INJECTION + +# Enable nvlink support for normal build. +CONFIG_NVGPU_NVLINK := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_NVLINK + +# Enable tpc_powergate support for normal build. +CONFIG_NVGPU_TPC_POWERGATE := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_TPC_POWERGATE + +# Enable mssnvlink0 reset control for normal build +CONFIG_MSSNVLINK0_RST_CONTROL := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_MSSNVLINK0_RST_CONTROL + +# Enable dgpu support for normal build. +CONFIG_NVGPU_DGPU := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_DGPU + +# Enable nvgpu_next for normal build +ifneq ($(NV_BUILD_CONFIGURATION_IS_EXTERNAL), 1) +CONFIG_NVGPU_NEXT := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_NEXT +endif + +CONFIG_NVGPU_VPR := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_VPR + +CONFIG_NVGPU_REPLAYABLE_FAULT := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_REPLAYABLE_FAULT + +# Enable LS PMU support for normal build +CONFIG_NVGPU_LS_PMU := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_LS_PMU + +# Enable elpg support for normal build +CONFIG_NVGPU_POWER_PG := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_POWER_PG + +# Enable sim support for normal build +CONFIG_NVGPU_SIM := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_SIM + +CONFIG_NVGPU_COMPRESSION := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_COMPRESSION + +# Enable non FUSA HALs for normal build +CONFIG_NVGPU_HAL_NON_FUSA := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_HAL_NON_FUSA + +# Enable non FUSA common code for normal build +CONFIG_NVGPU_NON_FUSA := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_NON_FUSA + +CONFIG_NVGPU_CLK_ARB := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_CLK_ARB + +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_FALCON_NON_FUSA + +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_IOCTL_NON_FUSA +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_DETERMINISTIC_CHANNELS + +CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT + +CONFIG_NVGPU_SET_FALCON_ACCESS_MAP := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_SET_FALCON_ACCESS_MAP + +# Enable SW Semaphore for normal build +CONFIG_NVGPU_SW_SEMAPHORE := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_SW_SEMAPHORE + +# Enable Channel WDT for safety build until we switch to user mode submits only +CONFIG_NVGPU_CHANNEL_WDT := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_CHANNEL_WDT + +# Enable Kernel Mode submit for safety build until we switch to user mode +# submits only +CONFIG_NVGPU_KERNEL_MODE_SUBMIT := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_KERNEL_MODE_SUBMIT + +# Enable fences for safety build till until we switch to user mode submits only +CONFIG_NVGPU_FENCE := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_FENCE + +# Enable powergate lib for normal build +CONFIG_NVGPU_USE_POWERGATE_LIB := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_USE_POWERGATE_LIB + +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_FIFO_ENGINE_ACTIVITY +# Enable dynamic busy/idle support +CONFIG_NVGPU_DYNAMIC_BUSY_IDLE_SUPPORT := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_DYNAMIC_BUSY_IDLE_SUPPORT + +# Enable HW based error injection support +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_INJECT_HWERR + +# Enable Channel/TSG Scheduling +CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_CHANNEL_TSG_SCHEDULING + +# Enable Channel/TSG Control +CONFIG_NVGPU_CHANNEL_TSG_CONTROL := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_CHANNEL_TSG_CONTROL + +# Enable Virtualization server for normal build +NVGPU_COMMON_CFLAGS += -DCONFIG_TEGRA_GR_VIRTUALIZATION_SERVER + +# Enable SM diversity support for normal build +CONFIG_NVGPU_SM_DIVERSITY := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_SM_DIVERSITY + +# Enable Multi Instance GPU support for normal build +CONFIG_NVGPU_MIG := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_MIG + +endif +endif + +# Enable USERD only if kernel mode submit is supported +ifeq ($(CONFIG_NVGPU_KERNEL_MODE_SUBMIT),1) +CONFIG_NVGPU_USERD := 1 +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_USERD +endif + +ifeq ($(CONFIG_NVGPU_DEBUGGER),1) +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_ENGINE_RESET +endif + +ifeq ($(CONFIG_NVGPU_RECOVERY),1) +NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_ENGINE_RESET +endif diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources new file mode 100644 index 000000000..c9d4a3df3 --- /dev/null +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -0,0 +1,711 @@ +# -*- mode: makefile -*- +# +# Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +srcs := + +ifdef NVGPU_POSIX +srcs += os/posix/nvgpu.c \ + os/posix/posix-io.c \ + os/posix/mock-registers.c \ + os/posix/posix-nvgpu_mem.c \ + os/posix/posix-dma.c \ + os/posix/posix-vm.c \ + os/posix/firmware.c \ + os/posix/soc.c \ + os/posix/error_notifier.c \ + os/posix/posix-channel.c \ + os/posix/posix-tsg.c \ + os/posix/stubs.c \ + os/posix/posix-nvhost.c \ + os/posix/posix-vgpu.c \ + os/posix/posix-dt.c \ + os/posix/fuse.c + +ifdef CONFIG_NVGPU_VPR +srcs += os/posix/posix-vpr.c +endif + +ifdef CONFIG_NVGPU_FECS_TRACE +srcs += os/posix/fecs_trace_posix.c +endif + +ifeq ($(CONFIG_NVGPU_CLK_ARB),1) +srcs += os/posix/posix-clk_arb.c +endif + +ifdef CONFIG_NVGPU_NVLINK +srcs += os/posix/posix-nvlink.c +endif + +ifeq ($(CONFIG_NVGPU_COMPRESSION),1) +srcs += os/posix/posix-comptags.c +endif + +ifeq ($(CONFIG_NVGPU_LOGGING),1) +srcs += os/posix/log.c +endif + +ifeq ($(CONFIG_NVGPU_DGPU),1) +srcs += os/posix/posix-vidmem.c +endif +endif + +# POSIX sources shared between the POSIX and QNX builds. +srcs += os/posix/bug.c \ + os/posix/rwsem.c \ + os/posix/timers.c \ + os/posix/cond.c \ + os/posix/lock.c \ + os/posix/thread.c \ + os/posix/os_sched.c \ + os/posix/bitmap.c \ + os/posix/kmem.c \ + os/posix/file_ops.c \ + os/posix/queue.c + +ifeq ($(NV_BUILD_CONFIGURATION_IS_SAFETY),0) +srcs += os/posix/bsearch.c +endif + +srcs += common/device.c \ + common/utils/enabled.c \ + common/utils/errata.c \ + common/utils/rbtree.c \ + common/utils/string.c \ + common/utils/worker.c \ + common/swdebug/profile.c \ + common/init/nvgpu_init.c \ + common/mm/allocators/nvgpu_allocator.c \ + common/mm/allocators/bitmap_allocator.c \ + common/mm/allocators/buddy_allocator.c \ + common/mm/gmmu/page_table.c \ + common/mm/gmmu/pd_cache.c \ + common/mm/gmmu/pte.c \ + common/mm/as.c \ + common/mm/vm.c \ + common/mm/vm_area.c \ + common/mm/nvgpu_mem.c \ + common/mm/nvgpu_sgt.c \ + common/mm/mm.c \ + common/mm/dma.c \ + common/therm/therm.c \ + common/ltc/ltc.c \ + common/fb/fb.c \ + common/fbp/fbp.c \ + common/io/io.c \ + common/ecc.c \ + common/falcon/falcon.c \ + common/falcon/falcon_sw_gk20a.c \ + common/gr/gr.c \ + common/gr/gr_utils.c \ + common/gr/gr_intr.c \ + common/gr/global_ctx.c \ + common/gr/subctx.c \ + common/gr/ctx.c \ + common/gr/gr_falcon.c \ + common/gr/gr_config.c \ + common/gr/gr_setup.c \ + common/gr/obj_ctx.c \ + common/gr/fs_state.c \ + common/gr/gr_ecc.c \ + common/netlist/netlist.c \ + common/pmu/pmu.c \ + common/acr/acr.c \ + common/acr/acr_wpr.c \ + common/acr/acr_blob_alloc.c \ + common/acr/acr_blob_construct.c \ + common/acr/acr_bootstrap.c \ + common/acr/acr_sw_gv11b.c \ + common/ptimer/ptimer.c \ + common/power_features/cg/cg.c \ + common/sync/channel_user_syncpt.c \ + common/fifo/preempt.c \ + common/fifo/channel.c \ + common/fifo/fifo.c \ + common/fifo/pbdma.c \ + common/fifo/tsg.c \ + common/fifo/runlist.c \ + common/fifo/engine_status.c \ + common/fifo/engines.c \ + common/fifo/pbdma_status.c \ + common/mc/mc.c \ + common/rc/rc.c \ + common/ce/ce.c \ + common/grmgr/grmgr.c \ + common/cic/cic.c \ + common/cic/cic_intr.c \ + common/cic/ce_cic.c \ + common/cic/ctxsw_cic.c \ + common/cic/ecc_cic.c \ + common/cic/host_cic.c \ + common/cic/gr_cic.c \ + common/cic/pri_cic.c \ + common/cic/pmu_cic.c \ + common/cic/mmu_cic.c \ + common/cic/msg_cic.c \ + hal/init/hal_gv11b.c \ + hal/init/hal_gv11b_litter.c \ + hal/init/hal_init.c \ + hal/power_features/cg/gv11b_gating_reglist.c \ + hal/fifo/runlist_fifo_gv11b.c \ + hal/fifo/userd_gk20a.c \ + hal/sync/syncpt_cmdbuf_gv11b.c + +# Source files below are functionaly safe (FuSa) and must always be included. +srcs += hal/mm/mm_gv11b_fusa.c \ + hal/mm/mm_gp10b_fusa.c \ + hal/mm/gmmu/gmmu_gv11b_fusa.c \ + hal/mm/gmmu/gmmu_gp10b_fusa.c \ + hal/mm/gmmu/gmmu_gk20a_fusa.c \ + hal/mm/gmmu/gmmu_gm20b_fusa.c \ + hal/mm/cache/flush_gk20a_fusa.c \ + hal/mm/cache/flush_gv11b_fusa.c \ + hal/mm/mmu_fault/mmu_fault_gv11b_fusa.c \ + hal/ltc/intr/ltc_intr_gp10b_fusa.c \ + hal/ltc/intr/ltc_intr_gv11b_fusa.c \ + hal/bus/bus_gk20a_fusa.c \ + hal/bus/bus_gm20b_fusa.c \ + hal/bus/bus_gp10b_fusa.c \ + hal/bus/bus_gv11b_fusa.c \ + hal/ce/ce_gp10b_fusa.c \ + hal/ce/ce_gv11b_fusa.c \ + hal/class/class_gv11b_fusa.c \ + hal/falcon/falcon_gk20a_fusa.c \ + hal/fb/fb_gm20b_fusa.c \ + hal/fb/fb_gv11b_fusa.c \ + hal/fb/fb_mmu_fault_gv11b_fusa.c \ + hal/fb/ecc/fb_ecc_gv11b_fusa.c \ + hal/fb/intr/fb_intr_ecc_gv11b_fusa.c \ + hal/fb/intr/fb_intr_gv11b_fusa.c \ + hal/fifo/channel_gk20a_fusa.c \ + hal/fifo/channel_gm20b_fusa.c \ + hal/fifo/channel_gv11b_fusa.c \ + hal/fifo/ctxsw_timeout_gv11b_fusa.c \ + hal/fifo/engine_status_gm20b_fusa.c \ + hal/fifo/engine_status_gv100_fusa.c \ + hal/fifo/engines_gp10b_fusa.c \ + hal/fifo/engines_gv11b_fusa.c \ + hal/fifo/fifo_gk20a_fusa.c \ + hal/fifo/fifo_gv11b_fusa.c \ + hal/fifo/fifo_intr_gk20a_fusa.c \ + hal/fifo/fifo_intr_gv11b_fusa.c \ + hal/fifo/pbdma_gm20b_fusa.c \ + hal/fifo/pbdma_gp10b_fusa.c \ + hal/fifo/pbdma_gv11b_fusa.c \ + hal/fifo/pbdma_status_gm20b_fusa.c \ + hal/fifo/preempt_gv11b_fusa.c \ + hal/fifo/ramfc_gp10b_fusa.c \ + hal/fifo/ramfc_gv11b_fusa.c \ + hal/fifo/ramin_gk20a_fusa.c \ + hal/fifo/ramin_gm20b_fusa.c \ + hal/fifo/ramin_gv11b_fusa.c \ + hal/fifo/runlist_fifo_gk20a_fusa.c \ + hal/fifo/runlist_fifo_gv11b_fusa.c \ + hal/fifo/runlist_ram_gv11b_fusa.c \ + hal/fifo/tsg_gk20a_fusa.c \ + hal/fifo/tsg_gv11b_fusa.c \ + hal/fifo/usermode_gv11b_fusa.c \ + hal/fuse/fuse_gm20b_fusa.c \ + hal/fuse/fuse_gp10b_fusa.c \ + hal/gr/config/gr_config_gm20b_fusa.c \ + hal/gr/config/gr_config_gv100_fusa.c \ + hal/gr/ctxsw_prog/ctxsw_prog_gm20b_fusa.c \ + hal/gr/ctxsw_prog/ctxsw_prog_gp10b_fusa.c \ + hal/gr/ctxsw_prog/ctxsw_prog_gv11b_fusa.c \ + hal/gr/ecc/ecc_gv11b_fusa.c \ + hal/gr/falcon/gr_falcon_gm20b_fusa.c \ + hal/gr/falcon/gr_falcon_gp10b_fusa.c \ + hal/gr/falcon/gr_falcon_gv11b_fusa.c \ + hal/gr/init/gr_init_gm20b_fusa.c \ + hal/gr/init/gr_init_gp10b_fusa.c \ + hal/gr/init/gr_init_gv11b_fusa.c \ + hal/gr/intr/gr_intr_gm20b_fusa.c \ + hal/gr/intr/gr_intr_gp10b_fusa.c \ + hal/gr/intr/gr_intr_gv11b_fusa.c \ + hal/ltc/ltc_gm20b_fusa.c \ + hal/ltc/ltc_gp10b_fusa.c \ + hal/ltc/ltc_gv11b_fusa.c \ + hal/mc/mc_gm20b_fusa.c \ + hal/mc/mc_gp10b_fusa.c \ + hal/mc/mc_gv11b_fusa.c \ + hal/netlist/netlist_gv11b_fusa.c \ + hal/pmu/pmu_gk20a_fusa.c \ + hal/pmu/pmu_gv11b_fusa.c \ + hal/priv_ring/priv_ring_gm20b_fusa.c \ + hal/priv_ring/priv_ring_gp10b_fusa.c \ + hal/ptimer/ptimer_gk20a_fusa.c \ + hal/sync/syncpt_cmdbuf_gv11b_fusa.c \ + hal/therm/therm_gv11b_fusa.c \ + hal/top/top_gm20b_fusa.c \ + hal/top/top_gv11b_fusa.c \ + hal/cic/cic_gv11b_fusa.c \ + hal/cic/cic_lut_gv11b_fusa.c + +# Source files below are not guaranteed to be functionaly safe (FuSa) and are +# only included in the normal build. +ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),1) +srcs += hal/init/hal_gp10b.c \ + hal/init/hal_gp10b_litter.c \ + hal/init/hal_gm20b.c \ + hal/init/hal_gm20b_litter.c \ + hal/mm/cache/flush_gk20a.c \ + hal/mm/mm_gm20b.c \ + hal/mm/mm_gk20a.c \ + hal/mm/gmmu/gmmu_gk20a.c \ + hal/mm/gmmu/gmmu_gm20b.c \ + hal/mc/mc_gm20b.c \ + hal/bus/bus_gk20a.c \ + hal/class/class_gm20b.c \ + hal/class/class_gp10b.c \ + hal/clk/clk_gm20b.c \ + hal/falcon/falcon_gk20a.c \ + hal/gr/config/gr_config_gm20b.c \ + hal/gr/ecc/ecc_gp10b.c \ + hal/gr/ecc/ecc_gv11b.c \ + hal/gr/init/gr_init_gm20b.c \ + hal/gr/init/gr_init_gp10b.c \ + hal/gr/init/gr_init_gv11b.c \ + hal/gr/intr/gr_intr_gm20b.c \ + hal/gr/intr/gr_intr_gp10b.c \ + hal/gr/falcon/gr_falcon_gm20b.c \ + hal/priv_ring/priv_ring_gm20b.c \ + hal/power_features/cg/gm20b_gating_reglist.c \ + hal/power_features/cg/gp10b_gating_reglist.c \ + hal/ce/ce2_gk20a.c \ + hal/therm/therm_gm20b.c \ + hal/therm/therm_gp10b.c \ + hal/ltc/ltc_gm20b.c \ + hal/ltc/ltc_gp10b.c \ + hal/ltc/intr/ltc_intr_gm20b.c \ + hal/ltc/intr/ltc_intr_gp10b.c \ + hal/fb/fb_gp10b.c \ + hal/fb/fb_gp106.c \ + hal/fb/fb_gm20b.c \ + hal/fb/fb_gv11b.c \ + hal/fb/intr/fb_intr_ecc_gv11b.c \ + hal/fuse/fuse_gm20b.c \ + hal/fifo/fifo_gk20a.c \ + hal/fifo/preempt_gk20a.c \ + hal/fifo/engines_gm20b.c \ + hal/fifo/pbdma_gm20b.c \ + hal/fifo/pbdma_gp10b.c \ + hal/fifo/engine_status_gm20b.c \ + hal/fifo/ramfc_gk20a.c \ + hal/fifo/ramfc_gp10b.c \ + hal/fifo/ramin_gk20a.c \ + hal/fifo/ramin_gp10b.c \ + hal/fifo/channel_gk20a.c \ + hal/fifo/channel_gm20b.c \ + hal/fifo/tsg_gk20a.c \ + hal/fifo/fifo_intr_gk20a.c \ + hal/fifo/mmu_fault_gk20a.c \ + hal/fifo/mmu_fault_gm20b.c \ + hal/fifo/mmu_fault_gp10b.c \ + hal/fifo/ctxsw_timeout_gk20a.c \ + hal/fifo/runlist_fifo_gk20a.c \ + hal/fifo/runlist_ram_gk20a.c \ + hal/netlist/netlist_gm20b.c \ + hal/netlist/netlist_gp10b.c \ + hal/sync/syncpt_cmdbuf_gk20a.c \ + hal/pmu/pmu_gv11b.c \ + hal/top/top_gm20b.c \ + hal/top/top_gp106.c \ + hal/top/top_gp10b.c \ + hal/gr/ctxsw_prog/ctxsw_prog_gm20b.c \ + hal/gr/ctxsw_prog/ctxsw_prog_gp10b.c \ + hal/gr/ctxsw_prog/ctxsw_prog_gv11b.c +else +ifeq ($(CONFIG_NVGPU_DGPU),1) +# non-FUSA files needed to build dGPU in safety +srcs += hal/gr/falcon/gr_falcon_gm20b.c \ + hal/fuse/fuse_gm20b.c \ + hal/fb/fb_gp106.c \ + hal/falcon/falcon_gk20a.c \ + hal/bus/bus_gk20a.c \ + hal/pmu/pmu_gv11b.c +endif +endif + +ifeq ($(CONFIG_NVGPU_CLK_ARB),1) +srcs += \ + common/clk_arb/clk_arb.c \ + common/clk_arb/clk_arb_gp10b.c +endif + +ifeq ($(CONFIG_NVGPU_ACR_LEGACY),1) +srcs += \ + common/acr/acr_blob_construct_v0.c \ + common/acr/acr_sw_gm20b.c \ + common/acr/acr_sw_gp10b.c +endif + +ifeq ($(CONFIG_NVGPU_ENGINE_QUEUE),1) +srcs += common/engine_queues/engine_mem_queue.c \ + common/engine_queues/engine_dmem_queue.c \ + common/engine_queues/engine_emem_queue.c \ + common/engine_queues/engine_fb_queue.c +endif + +ifeq ($(CONFIG_NVGPU_GRAPHICS),1) +srcs += common/gr/zbc.c \ + common/gr/zcull.c \ + hal/gr/zbc/zbc_gm20b.c \ + hal/gr/zbc/zbc_gp10b.c \ + hal/gr/zbc/zbc_gv11b.c \ + hal/gr/zcull/zcull_gm20b.c \ + hal/gr/zcull/zcull_gv11b.c +endif + +ifeq ($(CONFIG_NVGPU_DEBUGGER),1) +srcs += common/debugger.c \ + common/regops/regops.c \ + common/gr/hwpm_map.c \ + common/perf/perfbuf.c \ + hal/regops/regops_gv11b.c \ + hal/regops/allowlist_gv11b.c \ + hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.c \ + hal/gr/hwpm_map/hwpm_map_gv100.c \ + hal/ltc/ltc_gm20b_dbg.c \ + hal/ptimer/ptimer_gp10b.c \ + hal/perf/perf_gv11b.c \ + hal/perf/perf_tu104.c \ + hal/gr/gr/gr_gk20a.c \ + hal/gr/gr/gr_gm20b.c \ + hal/gr/gr/gr_gp10b.c \ + hal/gr/gr/gr_gv11b.c \ + hal/gr/gr/gr_gv100.c \ + hal/gr/gr/gr_tu104.c +ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),1) +srcs += hal/regops/regops_gm20b.c \ + hal/regops/regops_gp10b.c \ + hal/regops/regops_tu104.c \ + hal/regops/allowlist_tu104.c \ + hal/perf/perf_gm20b.c +endif +endif + +ifeq ($(CONFIG_NVGPU_PROFILER),1) +srcs += common/profiler/profiler.c \ + common/profiler/pm_reservation.c \ + hal/priv_ring/priv_ring_gv11b.c \ + hal/ptimer/ptimer_gv11b.c +endif + +ifeq ($(CONFIG_NVGPU_KERNEL_MODE_SUBMIT),1) +srcs += common/fifo/submit.c \ + common/fifo/priv_cmdbuf.c \ + common/fifo/job.c \ + common/fifo/channel_worker.c \ + common/sync/channel_sync.c \ + common/sync/channel_sync_syncpt.c +endif + +ifeq ($(CONFIG_NVGPU_CHANNEL_WDT),1) +srcs += common/fifo/watchdog.c \ + common/fifo/channel_wdt.c +endif + +ifeq ($(CONFIG_NVGPU_SW_SEMAPHORE),1) +srcs += common/semaphore/semaphore_sea.c \ + common/semaphore/semaphore_pool.c \ + common/semaphore/semaphore_hw.c \ + common/semaphore/semaphore.c \ + common/sync/channel_sync_semaphore.c \ + hal/sync/sema_cmdbuf_gk20a.c \ + hal/sync/sema_cmdbuf_gv11b.c +endif + +ifeq ($(CONFIG_NVGPU_USERD),1) +srcs += common/fifo/userd.c \ + hal/fifo/userd_gv11b.c +endif + +ifeq ($(CONFIG_NVGPU_RECOVERY),1) +srcs += hal/rc/rc_gv11b.c +ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),1) +srcs += hal/rc/rc_gk20a.c +endif +endif + +ifeq ($(CONFIG_NVGPU_FENCE),1) +srcs += common/fence/fence.c +ifeq ($(CONFIG_TEGRA_GK20A_NVHOST),1) +srcs += common/fence/fence_syncpt.c +endif +ifeq ($(CONFIG_NVGPU_SW_SEMAPHORE),1) +srcs += common/fence/fence_sema.c +endif +endif + +ifeq ($(CONFIG_NVGPU_FECS_TRACE),1) +srcs += common/gr/fecs_trace.c \ + hal/gr/fecs_trace/fecs_trace_gm20b.c \ + hal/gr/fecs_trace/fecs_trace_gv11b.c +ifeq ($(CONFIG_NVGPU_IGPU_VIRT),1) +srcs += common/vgpu/gr/fecs_trace_vgpu.c +endif +endif + +ifeq ($(CONFIG_NVGPU_CYCLESTATS),1) +srcs += common/perf/cyclestats_snapshot.c \ + common/cyclestats/cyclestats.c +ifeq ($(CONFIG_NVGPU_IGPU_VIRT),1) +srcs += common/vgpu/perf/cyclestats_snapshot_vgpu.c +endif +endif + +# POSIX file used for unit testing for both qnx and linux +ifdef NVGPU_FAULT_INJECTION_ENABLEMENT +srcs += os/posix/posix-fault-injection.c +endif + +ifeq ($(CONFIG_NVGPU_LS_PMU),1) +# Add LS PMU files which are required for normal build +srcs += \ + common/pmu/boardobj/boardobj.c \ + common/pmu/boardobj/boardobjgrp.c \ + common/pmu/boardobj/boardobjgrpmask.c \ + common/pmu/boardobj/boardobjgrp_e255.c \ + common/pmu/boardobj/boardobjgrp_e32.c \ + common/pmu/clk/clk.c \ + common/pmu/volt/volt.c \ + common/pmu/clk/clk_domain.c \ + common/pmu/clk/clk_fll.c \ + common/pmu/clk/clk_prog.c \ + common/pmu/clk/clk_vf_point.c \ + common/pmu/clk/clk_vin.c \ + common/pmu/fw/fw.c \ + common/pmu/fw/fw_ver_ops.c \ + common/pmu/fw/fw_ns_bootstrap.c \ + common/pmu/ipc/pmu_cmd.c \ + common/pmu/ipc/pmu_msg.c \ + common/pmu/ipc/pmu_queue.c \ + common/pmu/ipc/pmu_seq.c \ + common/pmu/lpwr/rppg.c \ + common/pmu/lsfm/lsfm.c \ + common/pmu/lsfm/lsfm_sw_gm20b.c \ + common/pmu/lsfm/lsfm_sw_gp10b.c \ + common/pmu/lsfm/lsfm_sw_gv100.c \ + common/pmu/lsfm/lsfm_sw_tu104.c \ + common/pmu/perf/vfe_equ.c \ + common/pmu/perf/vfe_var.c \ + common/pmu/perf/perf.c \ + common/pmu/perf/pstate.c \ + common/pmu/perf/change_seq.c \ + common/pmu/perfmon/pmu_perfmon.c \ + common/pmu/perfmon/pmu_perfmon_sw_gm20b.c \ + common/pmu/perfmon/pmu_perfmon_sw_gv11b.c \ + common/pmu/pmgr/pmgr.c \ + common/pmu/pmgr/pmgrpmu.c \ + common/pmu/pmgr/pwrdev.c \ + common/pmu/pmgr/pwrmonitor.c \ + common/pmu/pmgr/pwrpolicy.c \ + common/pmu/super_surface/super_surface.c \ + common/pmu/therm/thrm.c \ + common/pmu/therm/therm_channel.c \ + common/pmu/therm/therm_dev.c \ + common/pmu/volt/volt_dev.c \ + common/pmu/volt/volt_policy.c \ + common/pmu/volt/volt_rail.c \ + common/pmu/allocator.c \ + common/pmu/pmu_debug.c \ + common/pmu/pmu_mutex.c \ + common/pmu/pmu_pstate.c \ + common/pmu/pmu_rtos_init.c \ + hal/therm/therm_tu104.c \ + hal/pmu/pmu_gk20a.c \ + hal/pmu/pmu_gm20b.c \ + hal/pmu/pmu_gp10b.c \ + hal/pmu/pmu_tu104.c + +ifeq ($(CONFIG_NVGPU_POWER_PG),1) +srcs += common/pmu/pg/pg_sw_gm20b.c \ + common/pmu/pg/pg_sw_gp10b.c \ + common/pmu/pg/pg_sw_gp106.c \ + common/pmu/pg/pg_sw_gv11b.c \ + common/pmu/pg/pmu_pg.c \ + common/pmu/pg/pmu_aelpg.c +endif + +ifeq ($(CONFIG_NVGPU_CLK_ARB),1) +srcs += common/clk_arb/clk_arb_gv100.c +endif + +endif + +ifeq ($(CONFIG_NVGPU_POWER_PG),1) +srcs += common/power_features/pg/pg.c +endif + +ifeq ($(CONFIG_NVGPU_IGPU_VIRT),1) +srcs += common/vgpu/init/init_vgpu.c \ + common/vgpu/ivc/comm_vgpu.c \ + common/vgpu/intr/intr_vgpu.c \ + common/vgpu/ptimer/ptimer_vgpu.c \ + common/vgpu/top/top_vgpu.c \ + common/vgpu/fifo/fifo_vgpu.c \ + common/vgpu/fifo/channel_vgpu.c \ + common/vgpu/fifo/tsg_vgpu.c \ + common/vgpu/fifo/preempt_vgpu.c \ + common/vgpu/fifo/runlist_vgpu.c \ + common/vgpu/fifo/ramfc_vgpu.c \ + common/vgpu/perf/perf_vgpu.c \ + common/vgpu/profiler/profiler_vgpu.c \ + common/vgpu/mm/mm_vgpu.c \ + common/vgpu/mm/vm_vgpu.c \ + common/vgpu/gr/gr_vgpu.c \ + common/vgpu/fb/fb_vgpu.c \ + common/vgpu/gr/ctx_vgpu.c \ + common/vgpu/gr/subctx_vgpu.c \ + common/vgpu/clk_vgpu.c \ + common/vgpu/debugger_vgpu.c \ + common/vgpu/pm_reservation_vgpu.c \ + common/vgpu/ltc/ltc_vgpu.c \ + common/vgpu/fbp/fbp_vgpu.c \ + common/vgpu/ce_vgpu.c \ + hal/vgpu/init/init_hal_vgpu.c \ + hal/vgpu/init/vgpu_hal_gv11b.c \ + hal/vgpu/fifo/fifo_gv11b_vgpu.c \ + hal/vgpu/sync/syncpt_cmdbuf_gv11b_vgpu.c + +ifeq ($(CONFIG_NVGPU_USERD),1) +srcs += common/vgpu/fifo/userd_vgpu.c +endif + +ifeq ($(CONFIG_NVGPU_COMPRESSION),1) +srcs += common/vgpu/cbc/cbc_vgpu.c +endif +endif + +ifeq ($(CONFIG_NVGPU_COMPRESSION),1) +srcs += common/mm/comptags.c \ + common/cbc/cbc.c \ + hal/cbc/cbc_gm20b.c \ + hal/cbc/cbc_gp10b.c \ + hal/cbc/cbc_gv11b.c +endif + +ifeq ($(CONFIG_NVGPU_NVLINK),1) +srcs += common/vbios/nvlink_bios.c \ + common/nvlink/probe.c \ + common/nvlink/init/device_reginit.c \ + common/nvlink/init/device_reginit_gv100.c \ + common/nvlink/minion.c \ + common/nvlink/link_mode_transitions.c \ + common/nvlink/nvlink.c \ + hal/nvlink/minion_gv100.c \ + hal/nvlink/minion_tu104.c \ + hal/nvlink/nvlink_gv100.c \ + hal/nvlink/nvlink_tu104.c \ + hal/nvlink/intr_and_err_handling_tu104.c \ + hal/nvlink/link_mode_transitions_gv100.c \ + hal/nvlink/link_mode_transitions_tu104.c +endif + +ifeq ($(CONFIG_NVGPU_DGPU),1) +srcs += common/sec2/sec2.c \ + common/sec2/sec2_allocator.c \ + common/sec2/sec2_lsfm.c \ + common/sec2/ipc/sec2_cmd.c \ + common/sec2/ipc/sec2_msg.c \ + common/sec2/ipc/sec2_queue.c \ + common/sec2/ipc/sec2_seq.c \ + common/vbios/bios.c \ + common/vbios/bios_sw_gv100.c \ + common/vbios/bios_sw_tu104.c \ + common/falcon/falcon_sw_tu104.c \ + common/acr/acr_sw_tu104.c \ + common/mm/allocators/page_allocator.c \ + common/mm/vidmem.c \ + common/pramin.c \ + common/ce/ce_app.c \ + common/sbr/sbr.c \ + hal/mm/mm_gv100.c \ + hal/mm/mm_tu104.c \ + hal/mc/mc_gv100.c \ + hal/mc/mc_tu104.c \ + hal/bus/bus_gv100.c \ + hal/bus/bus_tu104.c \ + hal/ce/ce_tu104.c \ + hal/class/class_tu104.c \ + hal/clk/clk_tu104.c \ + hal/clk/clk_mon_tu104.c \ + hal/gr/init/gr_init_gv100.c \ + hal/gr/init/gr_init_tu104.c \ + hal/gr/intr/gr_intr_tu104.c \ + hal/gr/falcon/gr_falcon_tu104.c \ + hal/fbpa/fbpa_tu104.c \ + hal/init/hal_tu104.c \ + hal/init/hal_tu104_litter.c \ + hal/power_features/cg/tu104_gating_reglist.c \ + hal/ltc/ltc_tu104.c \ + hal/fb/fb_gv100.c \ + hal/fb/fb_tu104.c \ + hal/fb/fb_mmu_fault_tu104.c \ + hal/fb/intr/fb_intr_gv100.c \ + hal/fb/intr/fb_intr_tu104.c \ + hal/func/func_tu104.c \ + hal/fifo/fifo_tu104.c \ + hal/fifo/usermode_tu104.c \ + hal/fifo/pbdma_tu104.c \ + hal/fifo/ramfc_tu104.c \ + hal/fifo/ramin_tu104.c \ + hal/fifo/channel_gv100.c \ + hal/fifo/runlist_ram_tu104.c \ + hal/fifo/runlist_fifo_gv100.c \ + hal/fifo/runlist_fifo_tu104.c \ + hal/fifo/fifo_intr_gv100.c \ + hal/fuse/fuse_gp106.c \ + hal/fuse/fuse_tu104.c \ + hal/netlist/netlist_gv100.c \ + hal/netlist/netlist_tu104.c \ + hal/nvdec/nvdec_gp106.c \ + hal/nvdec/nvdec_tu104.c \ + hal/gsp/gsp_tu104.c \ + hal/sec2/sec2_tu104.c \ + hal/pramin/pramin_gp10b.c \ + hal/pramin/pramin_gv100.c \ + hal/pramin/pramin_init.c \ + hal/pramin/pramin_tu104.c \ + hal/bios/bios_tu104.c \ + hal/top/top_gv100.c \ + hal/xve/xve_gp106.c \ + hal/xve/xve_tu104.c + +ifeq ($(CONFIG_NVGPU_COMPRESSION),1) +srcs += hal/cbc/cbc_tu104.c +endif +endif + +ifeq ($(CONFIG_NVGPU_SIM),1) +srcs += common/sim/sim.c \ + common/sim/sim_pci.c \ + common/sim/sim_netlist.c +endif + +ifeq ($(CONFIG_NVGPU_NON_FUSA),1) +srcs += common/power_features/power_features.c +endif + +ifeq ($(CONFIG_NVGPU_TPC_POWERGATE),1) +srcs += hal/tpc/tpc_gv11b.c +endif diff --git a/drivers/gpu/nvgpu/common/acr/acr.c b/drivers/gpu/nvgpu/common/acr/acr.c new file mode 100644 index 000000000..d3a58d211 --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr.c @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "acr_priv.h" +#ifdef CONFIG_NVGPU_ACR_LEGACY +#include "acr_sw_gm20b.h" +#include "acr_sw_gp10b.h" +#endif +#include "acr_sw_gv11b.h" +#ifdef CONFIG_NVGPU_DGPU +#include "acr_sw_tu104.h" +#endif + +#if defined(CONFIG_NVGPU_NEXT) && defined(CONFIG_NVGPU_NON_FUSA) +#include "nvgpu_next_gpuid.h" +#endif + +/* ACR public API's */ +bool nvgpu_acr_is_lsf_lazy_bootstrap(struct gk20a *g, struct nvgpu_acr *acr, + u32 falcon_id) +{ + if (acr == NULL) { + return false; + } + + if ((falcon_id == FALCON_ID_FECS) || (falcon_id == FALCON_ID_PMU) || + (falcon_id == FALCON_ID_GPCCS)) { + return acr->lsf[falcon_id].is_lazy_bootstrap; + } else { + nvgpu_err(g, "Invalid falcon id\n"); + return false; + } +} + +#ifdef CONFIG_NVGPU_DGPU +int nvgpu_acr_alloc_blob_prerequisite(struct gk20a *g, struct nvgpu_acr *acr, + size_t size) +{ + if (acr == NULL) { + return -EINVAL; + } + + return acr->alloc_blob_space(g, size, &acr->ucode_blob); +} +#endif + +/* ACR blob construct & bootstrap */ +int nvgpu_acr_bootstrap_hs_acr(struct gk20a *g, struct nvgpu_acr *acr) +{ + int err = 0; + + if (acr == NULL) { + return -EINVAL; + } + + err = acr->bootstrap_hs_acr(g, acr); + if (err != 0) { + nvgpu_err(g, "ACR bootstrap failed"); + } + + nvgpu_log(g, gpu_dbg_gr, "ACR bootstrap Done"); + return err; +} + +int nvgpu_acr_construct_execute(struct gk20a *g) +{ + int err = 0; + + if (g->acr == NULL) { + return -EINVAL; + } + + err = g->acr->prepare_ucode_blob(g); + if (err != 0) { + nvgpu_err(g, "ACR ucode blob prepare failed"); + goto done; + } + + err = nvgpu_acr_bootstrap_hs_acr(g, g->acr); + if (err != 0) { + nvgpu_err(g, "Bootstrap HS ACR failed"); + } + +done: + return err; +} + +/* ACR init */ +int nvgpu_acr_init(struct gk20a *g) +{ + u32 ver = nvgpu_safe_add_u32(g->params.gpu_arch, + g->params.gpu_impl); + int err = 0; + + if (g->acr != NULL) { + /* + * Recovery/unrailgate case, we do not need to do ACR init as ACR is + * set during cold boot & doesn't execute ACR clean up as part off + * sequence, so reuse to perform faster boot. + */ + return err; + } + + g->acr = (struct nvgpu_acr *)nvgpu_kzalloc(g, sizeof(struct nvgpu_acr)); + if (g->acr == NULL) { + err = -ENOMEM; + goto done; + } + + switch (ver) { +#ifdef CONFIG_NVGPU_ACR_LEGACY + case GK20A_GPUID_GM20B: + case GK20A_GPUID_GM20B_B: + nvgpu_gm20b_acr_sw_init(g, g->acr); + break; + case NVGPU_GPUID_GP10B: + nvgpu_gp10b_acr_sw_init(g, g->acr); + break; +#endif + case NVGPU_GPUID_GV11B: + nvgpu_gv11b_acr_sw_init(g, g->acr); + break; +#if defined(CONFIG_NVGPU_NEXT) + case NVGPU_NEXT_GPUID: + nvgpu_next_acr_sw_init(g, g->acr); + break; +#endif +#ifdef CONFIG_NVGPU_DGPU + case NVGPU_GPUID_TU104: + nvgpu_tu104_acr_sw_init(g, g->acr); + break; +#if defined(CONFIG_NVGPU_NEXT) + case NVGPU_NEXT_DGPU_GPUID: + nvgpu_next_dgpu_acr_sw_init(g, g->acr); + break; +#endif +#endif + default: + nvgpu_kfree(g, g->acr); + err = -EINVAL; + nvgpu_err(g, "no support for GPUID %x", ver); + break; + } + +done: + return err; +} diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_alloc.c b/drivers/gpu/nvgpu/common/acr/acr_blob_alloc.c new file mode 100644 index 000000000..8799750bb --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_blob_alloc.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "acr_wpr.h" +#include "acr_priv.h" +#include "acr_blob_alloc.h" + +int nvgpu_acr_alloc_blob_space_sys(struct gk20a *g, size_t size, + struct nvgpu_mem *mem) +{ + return nvgpu_dma_alloc_flags_sys(g, NVGPU_DMA_PHYSICALLY_ADDRESSED, + size, mem); +} +#ifdef CONFIG_NVGPU_DGPU +int nvgpu_acr_alloc_blob_space_vid(struct gk20a *g, size_t size, + struct nvgpu_mem *mem) +{ + struct wpr_carveout_info wpr_inf; + int err; + + if (mem->size != 0ULL) { + return 0; + } + + g->acr->get_wpr_info(g, &wpr_inf); + + /* + * Even though this mem_desc wouldn't be used, the wpr region needs to + * be reserved in the allocator. + */ + err = nvgpu_dma_alloc_vid_at(g, wpr_inf.size, + &g->acr->wpr_dummy, wpr_inf.wpr_base); + if (err != 0) { + return err; + } + + return nvgpu_dma_alloc_vid_at(g, wpr_inf.size, mem, + wpr_inf.nonwpr_base); +} +#endif diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_alloc.h b/drivers/gpu/nvgpu/common/acr/acr_blob_alloc.h new file mode 100644 index 000000000..d91a8c02b --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_blob_alloc.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef ACR_BLOB_ALLOC_H +#define ACR_BLOB_ALLOC_H + +struct gk20a; +struct nvgpu_mem; + +int nvgpu_acr_alloc_blob_space_sys(struct gk20a *g, size_t size, + struct nvgpu_mem *mem); +#ifdef CONFIG_NVGPU_DGPU +int nvgpu_acr_alloc_blob_space_vid(struct gk20a *g, size_t size, + struct nvgpu_mem *mem); +#endif + +#endif /* ACR_BLOB_ALLOC_H */ diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_construct.c b/drivers/gpu/nvgpu/common/acr/acr_blob_construct.c new file mode 100644 index 000000000..1285d77e1 --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct.c @@ -0,0 +1,1159 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "nvgpu_acr_interface.h" +#include "acr_blob_construct.h" +#include "acr_wpr.h" +#include "acr_priv.h" + +#if defined(CONFIG_NVGPU_NEXT) && defined(CONFIG_NVGPU_NON_FUSA) +#include "nvgpu_next_gpuid.h" +#endif + + +#define APP_IMEM_OFFSET (0) +#define APP_IMEM_ENTRY (0) +#define APP_DMEM_OFFSET (0) +#define APP_RESIDENT_CODE_OFFSET (0) +#define MEMSET_VALUE (0) +#define LSB_HDR_DATA_SIZE (0) +#define BL_START_OFFSET (0) + +#if defined(CONFIG_NVGPU_DGPU) || defined(CONFIG_NVGPU_LS_PMU) +#define UCODE_PARAMS (1) +#define UCODE_DESC_TOOL_VERSION 0x4U +#else +#define UCODE_PARAMS (0) +#endif + +#ifdef CONFIG_NVGPU_LS_PMU +#if defined(CONFIG_NVGPU_NEXT) +#define PMU_NVRISCV_WPR_RSVD_BYTES (0x8000) +#endif + +int nvgpu_acr_lsf_pmu_ucode_details(struct gk20a *g, void *lsf_ucode_img) +{ + struct lsf_ucode_desc *lsf_desc; + struct nvgpu_firmware *fw_sig; + struct nvgpu_firmware *fw_desc; + struct nvgpu_firmware *fw_image; + struct flcn_ucode_img *p_img = + (struct flcn_ucode_img *)lsf_ucode_img; + struct ls_falcon_ucode_desc_v1 tmp_desc_v1; + int err = 0; + + lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc)); + if (lsf_desc == NULL) { + err = -ENOMEM; + goto exit; + } + + fw_sig = nvgpu_pmu_fw_sig_desc(g, g->pmu); + fw_desc = nvgpu_pmu_fw_desc_desc(g, g->pmu); + fw_image = nvgpu_pmu_fw_image_desc(g, g->pmu); + + nvgpu_memcpy((u8 *)lsf_desc, (u8 *)fw_sig->data, + min_t(size_t, sizeof(*lsf_desc), fw_sig->size)); + + lsf_desc->falcon_id = FALCON_ID_PMU; + + p_img->desc = (struct ls_falcon_ucode_desc *)(void *)fw_desc->data; + if (p_img->desc->tools_version >= UCODE_DESC_TOOL_VERSION) { + (void) memset((u8 *)&tmp_desc_v1, 0, + sizeof(struct ls_falcon_ucode_desc_v1)); + + nvgpu_memcpy((u8 *)&tmp_desc_v1, (u8 *)fw_desc->data, + sizeof(struct ls_falcon_ucode_desc_v1)); + + nvgpu_memcpy((u8 *)&p_img->desc->bootloader_start_offset, + (u8 *)&tmp_desc_v1.bootloader_start_offset, + sizeof(struct ls_falcon_ucode_desc) - + offsetof(struct ls_falcon_ucode_desc, + bootloader_start_offset)); + } + + p_img->data = (u32 *)(void *)fw_image->data; + p_img->data_size = p_img->desc->app_start_offset + p_img->desc->app_size; + p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc; + +exit: + return err; +} +#if defined(CONFIG_NVGPU_NEXT) +s32 nvgpu_acr_lsf_pmu_ncore_ucode_details(struct gk20a *g, void *lsf_ucode_img) +{ + struct lsf_ucode_desc *lsf_desc; + struct nvgpu_firmware *fw_sig; + struct nvgpu_firmware *fw_desc; + struct nvgpu_firmware *fw_image; + struct flcn_ucode_img *p_img = + (struct flcn_ucode_img *)lsf_ucode_img; + s32 err = 0; + + lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc)); + if (lsf_desc == NULL) { + err = -ENOMEM; + goto exit; + } + + fw_sig = nvgpu_pmu_fw_sig_desc(g, g->pmu); + fw_desc = nvgpu_pmu_fw_desc_desc(g, g->pmu); + fw_image = nvgpu_pmu_fw_image_desc(g, g->pmu); + + nvgpu_memcpy((u8 *)lsf_desc, (u8 *)fw_sig->data, + min_t(size_t, sizeof(*lsf_desc), fw_sig->size)); + + lsf_desc->falcon_id = FALCON_ID_PMU_NEXT_CORE; + + p_img->ndesc = (struct falcon_next_core_ucode_desc *)(void *)fw_desc->data; + + p_img->data = (u32 *)(void *)fw_image->data; + p_img->data_size = U32(fw_image->size); + p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc; + + p_img->is_next_core_img = true; + +exit: + return err; +} +#endif +#endif + +int nvgpu_acr_lsf_fecs_ucode_details(struct gk20a *g, void *lsf_ucode_img) +{ + u32 tmp_size; + u32 ver = nvgpu_safe_add_u32(g->params.gpu_arch, + g->params.gpu_impl); + struct lsf_ucode_desc *lsf_desc; + struct nvgpu_firmware *fecs_sig = NULL; + struct flcn_ucode_img *p_img = + (struct flcn_ucode_img *)lsf_ucode_img; + struct nvgpu_gr_falcon *gr_falcon = nvgpu_gr_get_falcon_ptr(g); + struct nvgpu_ctxsw_ucode_segments *fecs = + nvgpu_gr_falcon_get_fecs_ucode_segments(gr_falcon); + int err; + + switch (ver) { + case NVGPU_GPUID_GV11B: +#if defined(CONFIG_NVGPU_NEXT) + case NVGPU_NEXT_GPUID: +#endif + fecs_sig = nvgpu_request_firmware(g, GM20B_FECS_UCODE_SIG, + NVGPU_REQUEST_FIRMWARE_NO_WARN); + break; +#ifdef CONFIG_NVGPU_DGPU + case NVGPU_GPUID_TU104: + fecs_sig = nvgpu_request_firmware(g, TU104_FECS_UCODE_SIG, + NVGPU_REQUEST_FIRMWARE_NO_SOC); + break; +#endif +#if defined(CONFIG_NVGPU_NEXT) + case NVGPU_NEXT_DGPU_GPUID: + fecs_sig = nvgpu_request_firmware(g, NEXT_DGPU_FECS_UCODE_SIG, + NVGPU_REQUEST_FIRMWARE_NO_SOC); + break; +#endif + + default: + nvgpu_err(g, "no support for GPUID %x", ver); + break; + } + + if (fecs_sig == NULL) { + nvgpu_err(g, "failed to load fecs sig"); + return -ENOENT; + } + lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc)); + if (lsf_desc == NULL) { + err = -ENOMEM; + goto rel_sig; + } + nvgpu_memcpy((u8 *)lsf_desc, (u8 *)fecs_sig->data, + min_t(size_t, sizeof(*lsf_desc), fecs_sig->size)); + + lsf_desc->falcon_id = FALCON_ID_FECS; + + p_img->desc = nvgpu_kzalloc(g, sizeof(struct ls_falcon_ucode_desc)); + if (p_img->desc == NULL) { + err = -ENOMEM; + goto free_lsf_desc; + } + + p_img->desc->bootloader_start_offset = fecs->boot.offset; + p_img->desc->bootloader_size = NVGPU_ALIGN(fecs->boot.size, + LSF_DATA_SIZE_ALIGNMENT); + p_img->desc->bootloader_imem_offset = fecs->boot_imem_offset; + p_img->desc->bootloader_entry_point = fecs->boot_entry; + + tmp_size = nvgpu_safe_add_u32(NVGPU_ALIGN(fecs->boot.size, + LSF_DATA_SIZE_ALIGNMENT), + NVGPU_ALIGN(fecs->code.size, + LSF_DATA_SIZE_ALIGNMENT)); + p_img->desc->image_size = nvgpu_safe_add_u32(tmp_size, + NVGPU_ALIGN(fecs->data.size, + LSF_DATA_SIZE_ALIGNMENT)); + p_img->desc->app_size = nvgpu_safe_add_u32(NVGPU_ALIGN(fecs->code.size, + LSF_DATA_SIZE_ALIGNMENT), + NVGPU_ALIGN(fecs->data.size, + LSF_DATA_SIZE_ALIGNMENT)); + p_img->desc->app_start_offset = fecs->code.offset; + p_img->desc->app_imem_offset = APP_IMEM_OFFSET; + p_img->desc->app_imem_entry = APP_IMEM_ENTRY; + p_img->desc->app_dmem_offset = APP_DMEM_OFFSET; + p_img->desc->app_resident_code_offset = APP_RESIDENT_CODE_OFFSET; + p_img->desc->app_resident_code_size = fecs->code.size; + p_img->desc->app_resident_data_offset = + nvgpu_safe_sub_u32(fecs->data.offset, fecs->code.offset); + p_img->desc->app_resident_data_size = fecs->data.size; + p_img->data = nvgpu_gr_falcon_get_surface_desc_cpu_va(gr_falcon); + p_img->data_size = p_img->desc->image_size; + + p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc; + + nvgpu_acr_dbg(g, "fecs fw loaded\n"); + + nvgpu_release_firmware(g, fecs_sig); + + return 0; +free_lsf_desc: + nvgpu_kfree(g, lsf_desc); +rel_sig: + nvgpu_release_firmware(g, fecs_sig); + return err; +} + +int nvgpu_acr_lsf_gpccs_ucode_details(struct gk20a *g, void *lsf_ucode_img) +{ + u32 tmp_size; + u32 ver = nvgpu_safe_add_u32(g->params.gpu_arch, g->params.gpu_impl); + struct lsf_ucode_desc *lsf_desc; + struct nvgpu_firmware *gpccs_sig = NULL; + struct flcn_ucode_img *p_img = + (struct flcn_ucode_img *)lsf_ucode_img; + struct nvgpu_gr_falcon *gr_falcon = nvgpu_gr_get_falcon_ptr(g); + struct nvgpu_ctxsw_ucode_segments *gpccs = + nvgpu_gr_falcon_get_gpccs_ucode_segments(gr_falcon); + int err; + + if ((gpccs == NULL) || (gr_falcon == NULL)) { + return -EINVAL; + } + + if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { + return -ENOENT; + } + + switch (ver) { + case NVGPU_GPUID_GV11B: +#if defined(CONFIG_NVGPU_NEXT) + case NVGPU_NEXT_GPUID: +#endif + gpccs_sig = nvgpu_request_firmware(g, T18x_GPCCS_UCODE_SIG, + NVGPU_REQUEST_FIRMWARE_NO_WARN); + break; +#ifdef CONFIG_NVGPU_DGPU + case NVGPU_GPUID_TU104: + gpccs_sig = nvgpu_request_firmware(g, TU104_GPCCS_UCODE_SIG, + NVGPU_REQUEST_FIRMWARE_NO_SOC); + break; +#endif +#if defined(CONFIG_NVGPU_NEXT) + case NVGPU_NEXT_DGPU_GPUID: + gpccs_sig = nvgpu_request_firmware(g, NEXT_DGPU_GPCCS_UCODE_SIG, + NVGPU_REQUEST_FIRMWARE_NO_SOC); + break; +#endif + + default: + nvgpu_err(g, "no support for GPUID %x", ver); + break; + } + + if (gpccs_sig == NULL) { + nvgpu_err(g, "failed to load gpccs sig"); + return -ENOENT; + } + lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc)); + if (lsf_desc == NULL) { + err = -ENOMEM; + goto rel_sig; + } + nvgpu_memcpy((u8 *)lsf_desc, gpccs_sig->data, + min_t(size_t, sizeof(*lsf_desc), gpccs_sig->size)); + lsf_desc->falcon_id = FALCON_ID_GPCCS; + + p_img->desc = nvgpu_kzalloc(g, sizeof(struct ls_falcon_ucode_desc)); + if (p_img->desc == NULL) { + err = -ENOMEM; + goto free_lsf_desc; + } + + p_img->desc->bootloader_start_offset = BL_START_OFFSET; + p_img->desc->bootloader_size = NVGPU_ALIGN(gpccs->boot.size, + LSF_DATA_SIZE_ALIGNMENT); + p_img->desc->bootloader_imem_offset = gpccs->boot_imem_offset; + p_img->desc->bootloader_entry_point = gpccs->boot_entry; + + tmp_size = nvgpu_safe_add_u32(NVGPU_ALIGN(gpccs->boot.size, + LSF_DATA_SIZE_ALIGNMENT), + NVGPU_ALIGN(gpccs->code.size, + LSF_DATA_SIZE_ALIGNMENT)); + + p_img->desc->image_size = nvgpu_safe_add_u32(tmp_size, + NVGPU_ALIGN(gpccs->data.size, + LSF_DATA_SIZE_ALIGNMENT)); + p_img->desc->app_size = + nvgpu_safe_add_u32(NVGPU_ALIGN(gpccs->code.size, + LSF_DATA_SIZE_ALIGNMENT), + NVGPU_ALIGN(gpccs->data.size, + LSF_DATA_SIZE_ALIGNMENT)); + p_img->desc->app_start_offset = p_img->desc->bootloader_size; + p_img->desc->app_imem_offset = APP_IMEM_OFFSET; + p_img->desc->app_imem_entry = APP_IMEM_ENTRY; + p_img->desc->app_dmem_offset = APP_DMEM_OFFSET; + p_img->desc->app_resident_code_offset = APP_RESIDENT_CODE_OFFSET; + p_img->desc->app_resident_code_size = NVGPU_ALIGN(gpccs->code.size, + LSF_DATA_SIZE_ALIGNMENT); + p_img->desc->app_resident_data_offset = + nvgpu_safe_sub_u32(NVGPU_ALIGN(gpccs->data.offset, + LSF_DATA_SIZE_ALIGNMENT), + NVGPU_ALIGN(gpccs->code.offset, + LSF_DATA_SIZE_ALIGNMENT)); + p_img->desc->app_resident_data_size = NVGPU_ALIGN(gpccs->data.size, + LSF_DATA_SIZE_ALIGNMENT); + p_img->data = (u32 *) + (void *)((u8 *)nvgpu_gr_falcon_get_surface_desc_cpu_va(gr_falcon) + + gpccs->boot.offset); + p_img->data_size = NVGPU_ALIGN(p_img->desc->image_size, + LSF_DATA_SIZE_ALIGNMENT); + p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc; + + nvgpu_acr_dbg(g, "gpccs fw loaded\n"); + + nvgpu_release_firmware(g, gpccs_sig); + + return 0; +free_lsf_desc: + nvgpu_kfree(g, lsf_desc); +rel_sig: + nvgpu_release_firmware(g, gpccs_sig); + return err; +} + +#ifdef CONFIG_NVGPU_DGPU +int nvgpu_acr_lsf_sec2_ucode_details(struct gk20a *g, void *lsf_ucode_img) +{ + struct nvgpu_firmware *sec2_fw, *sec2_desc, *sec2_sig; + struct ls_falcon_ucode_desc *desc; + struct lsf_ucode_desc *lsf_desc; + struct flcn_ucode_img *p_img = + (struct flcn_ucode_img *)lsf_ucode_img; + u32 *ucode_image; + int err = 0; + + nvgpu_acr_dbg(g, "requesting SEC2 ucode in %s", g->name); + + if (g->is_fusa_sku) { + sec2_fw = nvgpu_request_firmware(g, + LSF_SEC2_UCODE_IMAGE_FUSA_BIN, + NVGPU_REQUEST_FIRMWARE_NO_SOC); + } else { + sec2_fw = nvgpu_request_firmware(g, + LSF_SEC2_UCODE_IMAGE_BIN, + NVGPU_REQUEST_FIRMWARE_NO_SOC); + } + + if (sec2_fw == NULL) { + nvgpu_err(g, "failed to load sec2 ucode!!"); + return -ENOENT; + } + + ucode_image = (u32 *)sec2_fw->data; + + nvgpu_acr_dbg(g, "requesting SEC2 ucode desc in %s", g->name); + if (g->is_fusa_sku) { + sec2_desc = nvgpu_request_firmware(g, + LSF_SEC2_UCODE_DESC_FUSA_BIN, + NVGPU_REQUEST_FIRMWARE_NO_SOC); + } else { + sec2_desc = nvgpu_request_firmware(g, + LSF_SEC2_UCODE_DESC_BIN, + NVGPU_REQUEST_FIRMWARE_NO_SOC); + } + + if (sec2_desc == NULL) { + nvgpu_err(g, "failed to load SEC2 ucode desc!!"); + err = -ENOENT; + goto release_img_fw; + } + + desc = (struct ls_falcon_ucode_desc *)sec2_desc->data; + + if (g->is_fusa_sku) { + sec2_sig = nvgpu_request_firmware(g, + LSF_SEC2_UCODE_SIG_FUSA_BIN, + NVGPU_REQUEST_FIRMWARE_NO_SOC); + } else { + sec2_sig = nvgpu_request_firmware(g, + LSF_SEC2_UCODE_SIG_BIN, + NVGPU_REQUEST_FIRMWARE_NO_SOC); + } + if (sec2_sig == NULL) { + nvgpu_err(g, "failed to load SEC2 sig!!"); + err = -ENOENT; + goto release_desc; + } + + lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc)); + if (lsf_desc == NULL) { + err = -ENOMEM; + goto release_sig; + } + + nvgpu_memcpy((u8 *)lsf_desc, (u8 *)sec2_sig->data, + min_t(size_t, sizeof(*lsf_desc), sec2_sig->size)); + + lsf_desc->falcon_id = FALCON_ID_SEC2; + + p_img->desc = desc; + p_img->data = ucode_image; + p_img->data_size = desc->app_start_offset + desc->app_size; + p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc; + + nvgpu_acr_dbg(g, "requesting SEC2 ucode in %s done", g->name); + + return err; +release_sig: + nvgpu_release_firmware(g, sec2_sig); +release_desc: + nvgpu_release_firmware(g, sec2_desc); +release_img_fw: + nvgpu_release_firmware(g, sec2_fw); + return err; +} +#endif + +/* Populate static LSB header information using the provided ucode image */ +static void lsfm_fill_static_lsb_hdr_info(struct gk20a *g, + u32 falcon_id, struct lsfm_managed_ucode_img *pnode) +{ + u32 full_app_size = 0; + u32 data = 0; + + if (pnode->ucode_img.lsf_desc != NULL) { + nvgpu_memcpy((u8 *)&pnode->lsb_header.signature, + (u8 *)pnode->ucode_img.lsf_desc, + sizeof(struct lsf_ucode_desc)); + } + + if (!pnode->ucode_img.is_next_core_img) { + pnode->lsb_header.ucode_size = pnode->ucode_img.data_size; + + /* Uses a loader. that is has a desc */ + pnode->lsb_header.data_size = LSB_HDR_DATA_SIZE; + + /* + * The loader code size is already aligned (padded) such that + * the code following it is aligned, but the size in the image + * desc is not, bloat it up to be on a 256 byte alignment. + */ + pnode->lsb_header.bl_code_size = NVGPU_ALIGN( + pnode->ucode_img.desc->bootloader_size, + LSF_BL_CODE_SIZE_ALIGNMENT); + full_app_size = nvgpu_safe_add_u32( + NVGPU_ALIGN(pnode->ucode_img.desc->app_size, + LSF_BL_CODE_SIZE_ALIGNMENT), + pnode->lsb_header.bl_code_size); + + pnode->lsb_header.ucode_size = nvgpu_safe_add_u32(NVGPU_ALIGN( + pnode->ucode_img.desc->app_resident_data_offset, + LSF_BL_CODE_SIZE_ALIGNMENT), + pnode->lsb_header.bl_code_size); + + pnode->lsb_header.data_size = nvgpu_safe_sub_u32(full_app_size, + pnode->lsb_header.ucode_size); + /* + * Though the BL is located at 0th offset of the image, the VA + * is different to make sure that it doesn't collide the actual OS + * VA range + */ + pnode->lsb_header.bl_imem_off = + pnode->ucode_img.desc->bootloader_imem_offset; + + pnode->lsb_header.flags = NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_FALSE; + + if (falcon_id == FALCON_ID_PMU) { + data = NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE; + pnode->lsb_header.flags = data; + } + + if (g->acr->lsf[falcon_id].is_priv_load) { + pnode->lsb_header.flags |= + NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_TRUE; + } + + } else { + pnode->lsb_header.ucode_size = 0; + pnode->lsb_header.data_size = 0; + pnode->lsb_header.bl_code_size = 0; + pnode->lsb_header.bl_imem_off = 0; + pnode->lsb_header.bl_data_size = 0; + pnode->lsb_header.bl_data_off = 0; + } +} + +/* Adds a ucode image to the list of managed ucode images managed. */ +static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm, + struct flcn_ucode_img *ucode_image, u32 falcon_id) +{ + struct lsfm_managed_ucode_img *pnode; + + pnode = nvgpu_kzalloc(g, sizeof(struct lsfm_managed_ucode_img)); + if (pnode == NULL) { + return -ENOMEM; + } + + /* Keep a copy of the ucode image info locally */ + nvgpu_memcpy((u8 *)&pnode->ucode_img, (u8 *)ucode_image, + sizeof(struct flcn_ucode_img)); + + /* Fill in static WPR header info*/ + pnode->wpr_header.falcon_id = falcon_id; + pnode->wpr_header.bootstrap_owner = g->acr->bootstrap_owner; + pnode->wpr_header.status = LSF_IMAGE_STATUS_COPY; + + pnode->wpr_header.lazy_bootstrap = + nvgpu_safe_cast_bool_to_u32( + g->acr->lsf[falcon_id].is_lazy_bootstrap); + + /* Fill in static LSB header info elsewhere */ + lsfm_fill_static_lsb_hdr_info(g, falcon_id, pnode); + pnode->wpr_header.bin_version = pnode->lsb_header.signature.version; + pnode->next = plsfm->ucode_img_list; + plsfm->ucode_img_list = pnode; + + return 0; +} + +static int lsfm_check_and_add_ucode_image(struct gk20a *g, + struct ls_flcn_mgr *plsfm, u32 lsf_index) +{ + struct flcn_ucode_img ucode_img; + struct nvgpu_acr *acr = g->acr; + u32 falcon_id; + int err = 0; + + if (!nvgpu_test_bit(lsf_index, (void *)&acr->lsf_enable_mask)) { + return err; + } + + if (acr->lsf[lsf_index].get_lsf_ucode_details == NULL) { + nvgpu_err(g, "LS falcon-%d ucode fetch details not initialized", + lsf_index); + return -ENOENT; + } + + (void) memset(&ucode_img, MEMSET_VALUE, sizeof(ucode_img)); + + err = acr->lsf[lsf_index].get_lsf_ucode_details(g, + (void *)&ucode_img); + if (err != 0) { + nvgpu_err(g, "LS falcon-%d ucode get failed", lsf_index); + return err; + } + + falcon_id = ucode_img.lsf_desc->falcon_id; + err = lsfm_add_ucode_img(g, plsfm, &ucode_img, falcon_id); + if (err != 0) { + nvgpu_err(g, " Failed to add falcon-%d to LSFM ", falcon_id); + return err; + } + + plsfm->managed_flcn_cnt++; + + return err; +} + +/* Discover all managed falcon ucode images */ +static int lsfm_discover_ucode_images(struct gk20a *g, + struct ls_flcn_mgr *plsfm) +{ + u32 i; + int err = 0; + +#ifdef CONFIG_NVGPU_DGPU + err = lsfm_check_and_add_ucode_image(g, plsfm, FALCON_ID_SEC2); + if (err != 0) { + return err; + } +#endif + /* + * Enumerate all constructed falcon objects, as we need the ucode + * image info and total falcon count + */ + for (i = 0U; i < FALCON_ID_END; i++) { +#ifdef CONFIG_NVGPU_DGPU + if (i == FALCON_ID_SEC2) { + continue; + } +#endif + err = lsfm_check_and_add_ucode_image(g, plsfm, i); + if (err != 0) { + return err; + } + } + + return err; +} + +#ifdef CONFIG_NVGPU_DGPU +/* Discover all supported shared data falcon SUB WPRs */ +static int lsfm_discover_and_add_sub_wprs(struct gk20a *g, + struct ls_flcn_mgr *plsfm) +{ + struct lsfm_sub_wpr *pnode; + u32 size_4K = 0; + u32 sub_wpr_index; + + for (sub_wpr_index = 1; + sub_wpr_index <= (u32) LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_MAX; + sub_wpr_index++) { + + switch (sub_wpr_index) { + case LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_PLAYREADY_SHARED_DATA: + size_4K = LSF_SHARED_DATA_SUB_WPR_PLAYREADY_SHARED_DATA_SIZE_IN_4K; + break; + default: + size_4K = 0; /* subWpr not supported */ + break; + } + + if (size_4K != 0U) { + pnode = nvgpu_kzalloc(g, sizeof(struct lsfm_sub_wpr)); + if (pnode == NULL) { + return -ENOMEM; + } + + pnode->sub_wpr_header.use_case_id = sub_wpr_index; + pnode->sub_wpr_header.size_4K = size_4K; + + pnode->pnext = plsfm->psub_wpr_list; + plsfm->psub_wpr_list = pnode; + + plsfm->managed_sub_wpr_count = + nvgpu_safe_cast_u32_to_u16(nvgpu_safe_add_u32( + plsfm->managed_sub_wpr_count, 1U)); + } + } + + return 0; +} +#endif + +/* Generate WPR requirements for ACR allocation request */ +static int lsf_gen_wpr_requirements(struct gk20a *g, + struct ls_flcn_mgr *plsfm) +{ + struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list; +#ifdef CONFIG_NVGPU_DGPU + struct lsfm_sub_wpr *pnode_sub_wpr = plsfm->psub_wpr_list; + u32 sub_wpr_header; +#endif + u32 wpr_offset; + u32 flcn_cnt; + + /* + * Start with an array of WPR headers at the base of the WPR. + * The expectation here is that the secure falcon will do a single DMA + * read of this array and cache it internally so it's OK to pack these. + * Also, we add 1 to the falcon count to indicate the end of the array. + */ + flcn_cnt = U32(plsfm->managed_flcn_cnt); + wpr_offset = nvgpu_safe_mult_u32(U32(sizeof(struct lsf_wpr_header)), + nvgpu_safe_add_u32(flcn_cnt, U32(1))); + +#ifdef CONFIG_NVGPU_DGPU + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR)) { + wpr_offset = ALIGN_UP(wpr_offset, LSF_WPR_HEADERS_TOTAL_SIZE_MAX); + /* + * SUB WPR header is appended after lsf_wpr_header_v0 in WPR blob. + * The size is allocated as per the managed SUB WPR count. + */ + wpr_offset = ALIGN_UP(wpr_offset, LSF_SUB_WPR_HEADER_ALIGNMENT); + sub_wpr_header = nvgpu_safe_mult_u32( + U32(sizeof(struct lsf_shared_sub_wpr_header)), + nvgpu_safe_add_u32(U32(plsfm->managed_sub_wpr_count), + U32(1))); + wpr_offset = nvgpu_safe_add_u32(wpr_offset, sub_wpr_header); + } +#endif + + /* + * Walk the managed falcons, accounting for the LSB structs + * as well as the ucode images. + */ + while (pnode != NULL) { + /* Align, save off, and include an LSB header size */ + wpr_offset = NVGPU_ALIGN(wpr_offset, LSF_LSB_HEADER_ALIGNMENT); + pnode->wpr_header.lsb_offset = wpr_offset; + wpr_offset = nvgpu_safe_add_u32(wpr_offset, + (u32)sizeof(struct lsf_lsb_header)); + + /* + * Align, save off, and include the original (static)ucode + * image size + */ + wpr_offset = NVGPU_ALIGN(wpr_offset, LSF_UCODE_DATA_ALIGNMENT); + pnode->lsb_header.ucode_off = wpr_offset; + wpr_offset = nvgpu_safe_add_u32(wpr_offset, + pnode->ucode_img.data_size); + + /* + * For falcons that use a boot loader (BL), we append a loader + * desc structure on the end of the ucode image and consider this + * the boot loader data. The host will then copy the loader desc + * args to this space within the WPR region (before locking down) + * and the HS bin will then copy them to DMEM 0 for the loader. + */ + /* + * Track the size for LSB details filled in later + * Note that at this point we don't know what kind of + * boot loader desc, so we just take the size of the + * generic one, which is the largest it will will ever be. + */ + /* Align (size bloat) and save off generic descriptor size*/ + pnode->lsb_header.bl_data_size = NVGPU_ALIGN( + nvgpu_safe_cast_u64_to_u32( + sizeof(pnode->bl_gen_desc)), + LSF_BL_DATA_SIZE_ALIGNMENT); + + /*Align, save off, and include the additional BL data*/ + wpr_offset = NVGPU_ALIGN(wpr_offset, LSF_BL_DATA_ALIGNMENT); + pnode->lsb_header.bl_data_off = wpr_offset; + wpr_offset = nvgpu_safe_add_u32(wpr_offset, + pnode->lsb_header.bl_data_size); + + /* Finally, update ucode surface size to include updates */ + pnode->full_ucode_size = wpr_offset - + pnode->lsb_header.ucode_off; + if (pnode->wpr_header.falcon_id != FALCON_ID_PMU && + pnode->wpr_header.falcon_id != FALCON_ID_PMU_NEXT_CORE) { + pnode->lsb_header.app_code_off = + pnode->lsb_header.bl_code_size; + pnode->lsb_header.app_code_size = + pnode->lsb_header.ucode_size - + pnode->lsb_header.bl_code_size; + pnode->lsb_header.app_data_off = + pnode->lsb_header.ucode_size; + pnode->lsb_header.app_data_size = + pnode->lsb_header.data_size; + } +#if defined(CONFIG_NVGPU_NEXT) + /* Falcon image is cleanly partitioned between a code and + * data section where we don't need extra reserved space. + * NVRISCV image has no clear partition for code and data + * section, so need to reserve wpr space. + */ + if (pnode->wpr_header.falcon_id == FALCON_ID_PMU_NEXT_CORE) { + wpr_offset = nvgpu_safe_add_u32(wpr_offset, + (u32)PMU_NVRISCV_WPR_RSVD_BYTES); + } +#endif + pnode = pnode->next; + } + +#ifdef CONFIG_NVGPU_DGPU + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR)) { + /* + * Walk through the sub wpr headers to accommodate + * sub wprs in WPR request + */ + while (pnode_sub_wpr != NULL) { + wpr_offset = ALIGN_UP(wpr_offset, + SUB_WPR_SIZE_ALIGNMENT); + pnode_sub_wpr->sub_wpr_header.start_addr = wpr_offset; + wpr_offset = wpr_offset + + (pnode_sub_wpr->sub_wpr_header.size_4K + << SHIFT_4KB); + pnode_sub_wpr = pnode_sub_wpr->pnext; + } + wpr_offset = ALIGN_UP(wpr_offset, SUB_WPR_SIZE_ALIGNMENT); + } +#endif + + plsfm->wpr_size = wpr_offset; + return 0; +} + +/* Initialize WPR contents */ +static int lsfm_populate_flcn_bl_dmem_desc(struct gk20a *g, + void *lsfm, u32 *p_bl_gen_desc_size, u32 falconid) +{ + struct wpr_carveout_info wpr_inf; + struct lsfm_managed_ucode_img *p_lsfm = + (struct lsfm_managed_ucode_img *)lsfm; + struct flcn_ucode_img *p_img = &(p_lsfm->ucode_img); + struct flcn_bl_dmem_desc *ldr_cfg = + &(p_lsfm->bl_gen_desc); + u64 addr_base; + struct ls_falcon_ucode_desc *desc; + u64 addr_code, addr_data; + + if (p_img->desc == NULL) { + /* + * This means its a header based ucode, + * and so we do not fill BL gen desc structure + */ + return -EINVAL; + } + desc = p_img->desc; + + /* + * Calculate physical and virtual addresses for various portions of + * the PMU ucode image + * Calculate the 32-bit addresses for the application code, application + * data, and bootloader code. These values are all based on IM_BASE. + * The 32-bit addresses will be the upper 32-bits of the virtual or + * physical addresses of each respective segment. + */ + addr_base = p_lsfm->lsb_header.ucode_off; + g->acr->get_wpr_info(g, &wpr_inf); + addr_base = nvgpu_safe_add_u64(addr_base, wpr_inf.wpr_base); + + nvgpu_acr_dbg(g, "falcon ID %x", p_lsfm->wpr_header.falcon_id); + nvgpu_acr_dbg(g, "gen loader cfg addrbase %llx ", addr_base); + addr_code = nvgpu_safe_add_u64(addr_base, desc->app_start_offset); + addr_data = nvgpu_safe_add_u64(addr_code, + desc->app_resident_data_offset); + + nvgpu_acr_dbg(g, "gen cfg addrcode %llx data %llx load offset %x", + addr_code, addr_data, desc->bootloader_start_offset); + + /* Populate the LOADER_CONFIG state */ + (void) memset((void *) ldr_cfg, MEMSET_VALUE, + sizeof(struct flcn_bl_dmem_desc)); + + ldr_cfg->ctx_dma = g->acr->lsf[falconid].falcon_dma_idx; + flcn64_set_dma(&ldr_cfg->code_dma_base, addr_code); + ldr_cfg->non_sec_code_off = desc->app_resident_code_offset; + ldr_cfg->non_sec_code_size = desc->app_resident_code_size; + flcn64_set_dma(&ldr_cfg->data_dma_base, addr_data); + ldr_cfg->data_size = desc->app_resident_data_size; + ldr_cfg->code_entry_point = desc->app_imem_entry; + + +#if defined(CONFIG_NVGPU_DGPU) || defined(CONFIG_NVGPU_LS_PMU) + /* Update the argc/argv members*/ + ldr_cfg->argc = UCODE_PARAMS; + if (g->acr->lsf[falconid].get_cmd_line_args_offset != NULL) { + g->acr->lsf[falconid].get_cmd_line_args_offset(g, + &ldr_cfg->argv); + } +#else + /* Update the argc/argv members*/ + ldr_cfg->argc = UCODE_PARAMS; + +#endif + *p_bl_gen_desc_size = (u32)sizeof(struct flcn_bl_dmem_desc); + return 0; +} + +/* Populate falcon boot loader generic desc.*/ +static int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g, + struct lsfm_managed_ucode_img *pnode) +{ + return lsfm_populate_flcn_bl_dmem_desc(g, pnode, + &pnode->bl_gen_desc_size, + pnode->wpr_header.falcon_id); +} + +#ifdef CONFIG_NVGPU_DGPU +static void lsfm_init_sub_wpr_contents(struct gk20a *g, + struct ls_flcn_mgr *plsfm, struct nvgpu_mem *ucode) +{ + struct lsfm_sub_wpr *psub_wpr_node; + struct lsf_shared_sub_wpr_header last_sub_wpr_header; + u32 temp_size = (u32)sizeof(struct lsf_shared_sub_wpr_header); + u32 sub_wpr_header_offset = 0; + u32 i = 0; + + /* SubWpr headers are placed after WPR headers */ + sub_wpr_header_offset = LSF_WPR_HEADERS_TOTAL_SIZE_MAX; + + /* + * Walk through the managed shared subWPRs headers + * and flush them to FB + */ + psub_wpr_node = plsfm->psub_wpr_list; + i = 0; + while (psub_wpr_node != NULL) { + nvgpu_mem_wr_n(g, ucode, + nvgpu_safe_add_u32(sub_wpr_header_offset, + nvgpu_safe_mult_u32(i, temp_size)), + &psub_wpr_node->sub_wpr_header, temp_size); + + psub_wpr_node = psub_wpr_node->pnext; + i = nvgpu_safe_add_u32(i, 1U); + } + last_sub_wpr_header.use_case_id = + LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_INVALID; + nvgpu_mem_wr_n(g, ucode, nvgpu_safe_add_u32(sub_wpr_header_offset, + nvgpu_safe_mult_u32(plsfm->managed_sub_wpr_count, temp_size)), + &last_sub_wpr_header, temp_size); +} +#endif + +static int lsfm_init_wpr_contents(struct gk20a *g, + struct ls_flcn_mgr *plsfm, struct nvgpu_mem *ucode) +{ + struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list; + struct lsf_wpr_header last_wpr_hdr; + u32 i = 0; + u64 tmp; + int err = 0; + + /* The WPR array is at the base of the WPR */ + pnode = plsfm->ucode_img_list; + (void) memset(&last_wpr_hdr, MEMSET_VALUE, sizeof(struct lsf_wpr_header)); + +#ifdef CONFIG_NVGPU_DGPU + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR)) { + lsfm_init_sub_wpr_contents(g, plsfm, ucode); + } +#endif + + /* + * Walk the managed falcons, flush WPR and LSB headers to FB. + * flush any bl args to the storage area relative to the + * ucode image (appended on the end as a DMEM area). + */ + while (pnode != NULL) { + /* Flush WPR header to memory*/ + nvgpu_mem_wr_n(g, ucode, + nvgpu_safe_mult_u32(i, + nvgpu_safe_cast_u64_to_u32(sizeof( + pnode->wpr_header))), &pnode->wpr_header, + nvgpu_safe_cast_u64_to_u32(sizeof(pnode->wpr_header))); + + nvgpu_acr_dbg(g, "wpr header"); + nvgpu_acr_dbg(g, "falconid :%d", + pnode->wpr_header.falcon_id); + nvgpu_acr_dbg(g, "lsb_offset :%x", + pnode->wpr_header.lsb_offset); + nvgpu_acr_dbg(g, "bootstrap_owner :%d", + pnode->wpr_header.bootstrap_owner); + nvgpu_acr_dbg(g, "lazy_bootstrap :%d", + pnode->wpr_header.lazy_bootstrap); + nvgpu_acr_dbg(g, "status :%d", + pnode->wpr_header.status); + + /*Flush LSB header to memory*/ + nvgpu_mem_wr_n(g, ucode, pnode->wpr_header.lsb_offset, + &pnode->lsb_header, + nvgpu_safe_cast_u64_to_u32( + sizeof(pnode->lsb_header))); + + nvgpu_acr_dbg(g, "lsb header"); + nvgpu_acr_dbg(g, "ucode_off :%x", + pnode->lsb_header.ucode_off); + nvgpu_acr_dbg(g, "ucode_size :%x", + pnode->lsb_header.ucode_size); + nvgpu_acr_dbg(g, "data_size :%x", + pnode->lsb_header.data_size); + nvgpu_acr_dbg(g, "bl_code_size :%x", + pnode->lsb_header.bl_code_size); + nvgpu_acr_dbg(g, "bl_imem_off :%x", + pnode->lsb_header.bl_imem_off); + nvgpu_acr_dbg(g, "bl_data_off :%x", + pnode->lsb_header.bl_data_off); + nvgpu_acr_dbg(g, "bl_data_size :%x", + pnode->lsb_header.bl_data_size); + nvgpu_acr_dbg(g, "app_code_off :%x", + pnode->lsb_header.app_code_off); + nvgpu_acr_dbg(g, "app_code_size :%x", + pnode->lsb_header.app_code_size); + nvgpu_acr_dbg(g, "app_data_off :%x", + pnode->lsb_header.app_data_off); + nvgpu_acr_dbg(g, "app_data_size :%x", + pnode->lsb_header.app_data_size); + nvgpu_acr_dbg(g, "flags :%x", + pnode->lsb_header.flags); + + if (!pnode->ucode_img.is_next_core_img) { + /* + * If this falcon has a boot loader and related args, + * flush them. + */ + /* Populate gen bl and flush to memory */ + err = lsfm_fill_flcn_bl_gen_desc(g, pnode); + if (err != 0) { + nvgpu_err(g, "bl_gen_desc failed err=%d", err); + return err; + } + nvgpu_mem_wr_n(g, ucode, pnode->lsb_header.bl_data_off, + &pnode->bl_gen_desc, pnode->bl_gen_desc_size); + } + + /* Copying of ucode */ + nvgpu_mem_wr_n(g, ucode, pnode->lsb_header.ucode_off, + pnode->ucode_img.data, pnode->ucode_img.data_size); + + pnode = pnode->next; + i = nvgpu_safe_add_u32(i, 1U); + } + + /* Tag the terminator WPR header with an invalid falcon ID. */ + last_wpr_hdr.falcon_id = FALCON_ID_INVALID; + tmp = nvgpu_safe_mult_u32(plsfm->managed_flcn_cnt, + (u32)sizeof(struct lsf_wpr_header)); + nvgpu_assert(tmp <= U32_MAX); + nvgpu_mem_wr_n(g, ucode, (u32)tmp, &last_wpr_hdr, + (u32)sizeof(struct lsf_wpr_header)); + + return err; +} + +/* Free any ucode image structure resources. */ +static void lsfm_free_ucode_img_res(struct gk20a *g, + struct flcn_ucode_img *p_img) +{ + if (p_img->lsf_desc != NULL) { + nvgpu_kfree(g, p_img->lsf_desc); + p_img->lsf_desc = NULL; + } +} + +static void lsfm_free_nonpmu_ucode_img_res(struct gk20a *g, + struct flcn_ucode_img *p_img) +{ + if (p_img->lsf_desc != NULL) { + nvgpu_kfree(g, p_img->lsf_desc); + p_img->lsf_desc = NULL; + } + if (p_img->desc != NULL) { + nvgpu_kfree(g, p_img->desc); + p_img->desc = NULL; + } +} + +static void free_acr_resources(struct gk20a *g, struct ls_flcn_mgr *plsfm) +{ + u32 cnt = plsfm->managed_flcn_cnt; + struct lsfm_managed_ucode_img *mg_ucode_img; + + while (cnt != 0U) { + mg_ucode_img = plsfm->ucode_img_list; + if (mg_ucode_img->ucode_img.lsf_desc->falcon_id == + FALCON_ID_PMU) { + lsfm_free_ucode_img_res(g, &mg_ucode_img->ucode_img); + } else { + lsfm_free_nonpmu_ucode_img_res(g, + &mg_ucode_img->ucode_img); + } + plsfm->ucode_img_list = mg_ucode_img->next; + nvgpu_kfree(g, mg_ucode_img); + cnt--; + } +} + +int nvgpu_acr_prepare_ucode_blob(struct gk20a *g) +{ + int err = 0; + struct ls_flcn_mgr lsfm_l, *plsfm; + + struct wpr_carveout_info wpr_inf; + struct nvgpu_gr_falcon *gr_falcon = nvgpu_gr_get_falcon_ptr(g); + + /* Recovery case, we do not need to form non WPR blob of ucodes */ + if (g->acr->ucode_blob.cpu_va != NULL) { + return err; + } + + + plsfm = &lsfm_l; + (void) memset((void *)plsfm, MEMSET_VALUE, sizeof(struct ls_flcn_mgr)); + err = nvgpu_gr_falcon_init_ctxsw_ucode(g, gr_falcon); + if (err != 0) { + nvgpu_err(g, "gr_falcon_init_ctxsw_ucode failed err=%d", err); + return err; + } + + g->acr->get_wpr_info(g, &wpr_inf); + nvgpu_acr_dbg(g, "wpr carveout base:%llx\n", (wpr_inf.wpr_base)); + nvgpu_acr_dbg(g, "wpr carveout size :%llx\n", wpr_inf.size); + + /* Discover all managed falcons */ + err = lsfm_discover_ucode_images(g, plsfm); + nvgpu_acr_dbg(g, " Managed Falcon cnt %d\n", plsfm->managed_flcn_cnt); + if (err != 0) { + goto cleanup_exit; + } + +#ifdef CONFIG_NVGPU_DGPU + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR)) { + err = lsfm_discover_and_add_sub_wprs(g, plsfm); + if (err != 0) { + goto cleanup_exit; + } + } +#endif + + if ((plsfm->managed_flcn_cnt != 0U) && + (g->acr->ucode_blob.cpu_va == NULL)) { + /* Generate WPR requirements */ + err = lsf_gen_wpr_requirements(g, plsfm); + if (err != 0) { + goto cleanup_exit; + } + + /* Alloc memory to hold ucode blob contents */ + err = g->acr->alloc_blob_space(g, plsfm->wpr_size, + &g->acr->ucode_blob); + if (err != 0) { + goto cleanup_exit; + } + + nvgpu_acr_dbg(g, "managed LS falcon %d, WPR size %d bytes.\n", + plsfm->managed_flcn_cnt, plsfm->wpr_size); + + err = lsfm_init_wpr_contents(g, plsfm, &g->acr->ucode_blob); + if (err != 0) { + nvgpu_kfree(g, &g->acr->ucode_blob); + goto cleanup_exit; + } + } else { + nvgpu_acr_dbg(g, "LSFM is managing no falcons.\n"); + } + nvgpu_acr_dbg(g, "prepare ucode blob return 0\n"); + +cleanup_exit: + free_acr_resources(g, plsfm); + return err; +} diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_construct.h b/drivers/gpu/nvgpu/common/acr/acr_blob_construct.h new file mode 100644 index 000000000..c6beddcac --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct.h @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef ACR_BLOB_CONSTRUCT_H +#define ACR_BLOB_CONSTRUCT_H + +#include +#include +#include + +#include "nvgpu_acr_interface.h" + +#define UCODE_NB_MAX_DATE_LENGTH 64U +struct ls_falcon_ucode_desc { + u32 descriptor_size; + u32 image_size; + u32 tools_version; + u32 app_version; + char date[UCODE_NB_MAX_DATE_LENGTH]; + u32 bootloader_start_offset; + u32 bootloader_size; + u32 bootloader_imem_offset; + u32 bootloader_entry_point; + u32 app_start_offset; + u32 app_size; + u32 app_imem_offset; + u32 app_imem_entry; + u32 app_dmem_offset; + u32 app_resident_code_offset; + u32 app_resident_code_size; + u32 app_resident_data_offset; + u32 app_resident_data_size; + u32 nb_imem_overlays; + u32 nb_dmem_overlays; + struct {u32 start; u32 size; } load_ovl[UCODE_NB_MAX_DATE_LENGTH]; + u32 compressed; +}; + +struct ls_falcon_ucode_desc_v1 { + u32 descriptor_size; + u32 image_size; + u32 tools_version; + u32 app_version; + char date[UCODE_NB_MAX_DATE_LENGTH]; + u32 secure_bootloader; + u32 bootloader_start_offset; + u32 bootloader_size; + u32 bootloader_imem_offset; + u32 bootloader_entry_point; + u32 app_start_offset; + u32 app_size; + u32 app_imem_offset; + u32 app_imem_entry; + u32 app_dmem_offset; + u32 app_resident_code_offset; + u32 app_resident_code_size; + u32 app_resident_data_offset; + u32 app_resident_data_size; + u32 nb_imem_overlays; + u32 nb_dmem_overlays; + struct {u32 start; u32 size; } load_ovl[64]; + u32 compressed; +}; + +struct flcn_ucode_img { + u32 *data; + struct ls_falcon_ucode_desc *desc; + u32 data_size; + struct lsf_ucode_desc *lsf_desc; + bool is_next_core_img; +#if defined(CONFIG_NVGPU_NEXT) + struct falcon_next_core_ucode_desc *ndesc; +#endif +}; + +struct lsfm_managed_ucode_img { + struct lsfm_managed_ucode_img *next; + struct lsf_wpr_header wpr_header; + struct lsf_lsb_header lsb_header; + struct flcn_bl_dmem_desc bl_gen_desc; + u32 bl_gen_desc_size; + u32 full_ucode_size; + struct flcn_ucode_img ucode_img; +}; + +#ifdef CONFIG_NVGPU_DGPU +/* + * LSF shared SubWpr Header + * + * use_case_id - Shared SubWpr use case ID (updated by nvgpu) + * start_addr - start address of subWpr (updated by nvgpu) + * size_4K - size of subWpr in 4K (updated by nvgpu) + */ +struct lsf_shared_sub_wpr_header { + u32 use_case_id; + u32 start_addr; + u32 size_4K; +}; + +/* + * LSFM SUB WPRs struct + * pnext : Next entry in the list, NULL if last + * sub_wpr_header : SubWpr Header struct + */ +struct lsfm_sub_wpr { + struct lsfm_sub_wpr *pnext; + struct lsf_shared_sub_wpr_header sub_wpr_header; +}; +#endif + +struct ls_flcn_mgr { + u16 managed_flcn_cnt; + u32 wpr_size; + struct lsfm_managed_ucode_img *ucode_img_list; +#ifdef CONFIG_NVGPU_DGPU + u16 managed_sub_wpr_count; + struct lsfm_sub_wpr *psub_wpr_list; +#endif +}; + +int nvgpu_acr_prepare_ucode_blob(struct gk20a *g); +#ifdef CONFIG_NVGPU_LS_PMU +int nvgpu_acr_lsf_pmu_ucode_details(struct gk20a *g, void *lsf_ucode_img); +#if defined(CONFIG_NVGPU_NEXT) +s32 nvgpu_acr_lsf_pmu_ncore_ucode_details(struct gk20a *g, void *lsf_ucode_img); +#endif +#endif +int nvgpu_acr_lsf_fecs_ucode_details(struct gk20a *g, void *lsf_ucode_img); +int nvgpu_acr_lsf_gpccs_ucode_details(struct gk20a *g, void *lsf_ucode_img); +#ifdef CONFIG_NVGPU_DGPU +int nvgpu_acr_lsf_sec2_ucode_details(struct gk20a *g, void *lsf_ucode_img); +#endif + +#endif /* ACR_BLOB_CONSTRUCT_H */ diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.c b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.c new file mode 100644 index 000000000..848d4c0ca --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.c @@ -0,0 +1,801 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "acr_blob_construct_v0.h" +#include "acr_wpr.h" +#include "acr_priv.h" + +#ifdef CONFIG_NVGPU_LS_PMU +int nvgpu_acr_lsf_pmu_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img) +{ + struct lsf_ucode_desc_v0 *lsf_desc; + struct nvgpu_firmware *fw_sig; + struct nvgpu_firmware *fw_desc; + struct nvgpu_firmware *fw_image; + struct flcn_ucode_img_v0 *p_img = (struct flcn_ucode_img_v0 *)lsf_ucode_img; + int err = 0; + + lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc_v0)); + if (lsf_desc == NULL) { + err = -ENOMEM; + goto exit; + } + + fw_sig = nvgpu_pmu_fw_sig_desc(g, g->pmu); + fw_desc = nvgpu_pmu_fw_desc_desc(g, g->pmu); + fw_image = nvgpu_pmu_fw_image_desc(g, g->pmu); + + nvgpu_memcpy((u8 *)lsf_desc, (u8 *)fw_sig->data, + min_t(size_t, sizeof(*lsf_desc), fw_sig->size)); + + lsf_desc->falcon_id = FALCON_ID_PMU; + + p_img->desc = (struct pmu_ucode_desc *)(void *)fw_desc->data; + p_img->data = (u32 *)(void *)fw_image->data; + p_img->data_size = p_img->desc->image_size; + p_img->lsf_desc = (struct lsf_ucode_desc_v0 *)lsf_desc; + +exit: + return err; +} +#endif + +int nvgpu_acr_lsf_fecs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img) +{ + struct lsf_ucode_desc_v0 *lsf_desc; + struct nvgpu_firmware *fecs_sig; + struct flcn_ucode_img_v0 *p_img = (struct flcn_ucode_img_v0 *)lsf_ucode_img; + struct nvgpu_gr_falcon *gr_falcon = nvgpu_gr_get_falcon_ptr(g); + struct nvgpu_ctxsw_ucode_segments *fecs = + nvgpu_gr_falcon_get_fecs_ucode_segments(gr_falcon); + int err; + + fecs_sig = nvgpu_request_firmware(g, GM20B_FECS_UCODE_SIG, 0); + if (fecs_sig == NULL) { + nvgpu_err(g, "failed to load fecs sig"); + return -ENOENT; + } + lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc_v0)); + if (lsf_desc == NULL) { + err = -ENOMEM; + goto rel_sig; + } + nvgpu_memcpy((u8 *)lsf_desc, (u8 *)fecs_sig->data, + min_t(size_t, sizeof(*lsf_desc), fecs_sig->size)); + + lsf_desc->falcon_id = FALCON_ID_FECS; + + p_img->desc = nvgpu_kzalloc(g, sizeof(struct pmu_ucode_desc)); + if (p_img->desc == NULL) { + err = -ENOMEM; + goto free_lsf_desc; + } + + p_img->desc->bootloader_start_offset = fecs->boot.offset; + p_img->desc->bootloader_size = NVGPU_ALIGN(fecs->boot.size, 256U); + p_img->desc->bootloader_imem_offset = fecs->boot_imem_offset; + p_img->desc->bootloader_entry_point = fecs->boot_entry; + + p_img->desc->image_size = NVGPU_ALIGN(fecs->boot.size, 256U) + + NVGPU_ALIGN(fecs->code.size, 256U) + NVGPU_ALIGN(fecs->data.size, 256U); + p_img->desc->app_size = NVGPU_ALIGN(fecs->code.size, 256U) + + NVGPU_ALIGN(fecs->data.size, 256U); + p_img->desc->app_start_offset = fecs->code.offset; + p_img->desc->app_imem_offset = 0; + p_img->desc->app_imem_entry = 0; + p_img->desc->app_dmem_offset = 0; + p_img->desc->app_resident_code_offset = 0; + p_img->desc->app_resident_code_size = fecs->code.size; + p_img->desc->app_resident_data_offset = + fecs->data.offset - fecs->code.offset; + p_img->desc->app_resident_data_size = fecs->data.size; + p_img->data = nvgpu_gr_falcon_get_surface_desc_cpu_va(gr_falcon); + p_img->data_size = p_img->desc->image_size; + + p_img->lsf_desc = (struct lsf_ucode_desc_v0 *)lsf_desc; + nvgpu_acr_dbg(g, "fecs fw loaded\n"); + nvgpu_release_firmware(g, fecs_sig); + return 0; +free_lsf_desc: + nvgpu_kfree(g, lsf_desc); +rel_sig: + nvgpu_release_firmware(g, fecs_sig); + return err; +} + +int nvgpu_acr_lsf_gpccs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img) +{ + struct lsf_ucode_desc_v0 *lsf_desc; + struct nvgpu_firmware *gpccs_sig; + struct flcn_ucode_img_v0 *p_img = (struct flcn_ucode_img_v0 *)lsf_ucode_img; + struct nvgpu_gr_falcon *gr_falcon = nvgpu_gr_get_falcon_ptr(g); + struct nvgpu_ctxsw_ucode_segments *gpccs = + nvgpu_gr_falcon_get_gpccs_ucode_segments(gr_falcon); + int err; + + if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { + return -ENOENT; + } + + gpccs_sig = nvgpu_request_firmware(g, T18x_GPCCS_UCODE_SIG, 0); + if (gpccs_sig == NULL) { + nvgpu_err(g, "failed to load gpccs sig"); + return -ENOENT; + } + lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc_v0)); + if (lsf_desc == NULL) { + err = -ENOMEM; + goto rel_sig; + } + nvgpu_memcpy((u8 *)lsf_desc, (u8 *)gpccs_sig->data, + min_t(size_t, sizeof(*lsf_desc), gpccs_sig->size)); + lsf_desc->falcon_id = FALCON_ID_GPCCS; + + p_img->desc = nvgpu_kzalloc(g, sizeof(struct pmu_ucode_desc)); + if (p_img->desc == NULL) { + err = -ENOMEM; + goto free_lsf_desc; + } + + p_img->desc->bootloader_start_offset = + 0; + p_img->desc->bootloader_size = NVGPU_ALIGN(gpccs->boot.size, 256U); + p_img->desc->bootloader_imem_offset = gpccs->boot_imem_offset; + p_img->desc->bootloader_entry_point = gpccs->boot_entry; + + p_img->desc->image_size = NVGPU_ALIGN(gpccs->boot.size, 256U) + + NVGPU_ALIGN(gpccs->code.size, 256U) + + NVGPU_ALIGN(gpccs->data.size, 256U); + p_img->desc->app_size = NVGPU_ALIGN(gpccs->code.size, 256U) + + NVGPU_ALIGN(gpccs->data.size, 256U); + p_img->desc->app_start_offset = p_img->desc->bootloader_size; + p_img->desc->app_imem_offset = 0; + p_img->desc->app_imem_entry = 0; + p_img->desc->app_dmem_offset = 0; + p_img->desc->app_resident_code_offset = 0; + p_img->desc->app_resident_code_size = NVGPU_ALIGN(gpccs->code.size, 256U); + p_img->desc->app_resident_data_offset = + NVGPU_ALIGN(gpccs->data.offset, 256U) - + NVGPU_ALIGN(gpccs->code.offset, 256U); + p_img->desc->app_resident_data_size = NVGPU_ALIGN(gpccs->data.size, 256U); + p_img->data = (u32 *) + ((u8 *)nvgpu_gr_falcon_get_surface_desc_cpu_va(gr_falcon) + + gpccs->boot.offset); + p_img->data_size = NVGPU_ALIGN(p_img->desc->image_size, 256U); + p_img->lsf_desc = (struct lsf_ucode_desc_v0 *)lsf_desc; + nvgpu_acr_dbg(g, "gpccs fw loaded\n"); + nvgpu_release_firmware(g, gpccs_sig); + return 0; +free_lsf_desc: + nvgpu_kfree(g, lsf_desc); +rel_sig: + nvgpu_release_firmware(g, gpccs_sig); + return err; +} + +/* + * @brief lsfm_fill_static_lsb_hdr_info + * Populate static LSB header information using the provided ucode image + */ +static void lsfm_fill_static_lsb_hdr_info(struct gk20a *g, + u32 falcon_id, struct lsfm_managed_ucode_img_v0 *pnode) +{ + u32 full_app_size = 0; + u32 data = 0; + + if (pnode->ucode_img.lsf_desc != NULL) { + nvgpu_memcpy((u8 *)&pnode->lsb_header.signature, + (u8 *)pnode->ucode_img.lsf_desc, + sizeof(struct lsf_ucode_desc_v0)); + } + pnode->lsb_header.ucode_size = pnode->ucode_img.data_size; + + /* Uses a loader. that is has a desc */ + pnode->lsb_header.data_size = 0; + + /* + * The loader code size is already aligned (padded) such that + * the code following it is aligned, but the size in the image + * desc is not, bloat it up to be on a 256 byte alignment. + */ + pnode->lsb_header.bl_code_size = NVGPU_ALIGN( + pnode->ucode_img.desc->bootloader_size, + LSF_BL_CODE_SIZE_ALIGNMENT); + full_app_size = NVGPU_ALIGN(pnode->ucode_img.desc->app_size, + LSF_BL_CODE_SIZE_ALIGNMENT) + + pnode->lsb_header.bl_code_size; + pnode->lsb_header.ucode_size = NVGPU_ALIGN( + pnode->ucode_img.desc->app_resident_data_offset, + LSF_BL_CODE_SIZE_ALIGNMENT) + + pnode->lsb_header.bl_code_size; + pnode->lsb_header.data_size = full_app_size - + pnode->lsb_header.ucode_size; + /* + * Though the BL is located at 0th offset of the image, the VA + * is different to make sure that it doesn't collide the actual + * OS VA range + */ + pnode->lsb_header.bl_imem_off = + pnode->ucode_img.desc->bootloader_imem_offset; + + pnode->lsb_header.flags = 0; + + if (falcon_id == FALCON_ID_PMU) { + data = NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE; + pnode->lsb_header.flags = data; + } + + if (g->acr->lsf[falcon_id].is_priv_load) { + pnode->lsb_header.flags |= + NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_TRUE; + } +} + +/* Adds a ucode image to the list of managed ucode images managed. */ +static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr_v0 *plsfm, + struct flcn_ucode_img_v0 *ucode_image, u32 falcon_id) +{ + + struct lsfm_managed_ucode_img_v0 *pnode; + + pnode = nvgpu_kzalloc(g, sizeof(struct lsfm_managed_ucode_img_v0)); + if (pnode == NULL) { + return -ENOMEM; + } + + /* Keep a copy of the ucode image info locally */ + nvgpu_memcpy((u8 *)&pnode->ucode_img, (u8 *)ucode_image, + sizeof(struct flcn_ucode_img_v0)); + + /* Fill in static WPR header info*/ + pnode->wpr_header.falcon_id = falcon_id; + pnode->wpr_header.bootstrap_owner = g->acr->bootstrap_owner; + pnode->wpr_header.status = LSF_IMAGE_STATUS_COPY; + + pnode->wpr_header.lazy_bootstrap = + (u32)g->acr->lsf[falcon_id].is_lazy_bootstrap; + + /* Fill in static LSB header info elsewhere */ + lsfm_fill_static_lsb_hdr_info(g, falcon_id, pnode); + pnode->next = plsfm->ucode_img_list; + plsfm->ucode_img_list = pnode; + return 0; +} + +/* Discover all managed falcon ucode images */ +static int lsfm_discover_ucode_images(struct gk20a *g, + struct ls_flcn_mgr_v0 *plsfm) +{ + struct flcn_ucode_img_v0 ucode_img; + struct nvgpu_acr *acr = g->acr; + u32 falcon_id; + u32 i; + int err = 0; + + /* + * Enumerate all constructed falcon objects, as we need the ucode + * image info and total falcon count + */ + for (i = 0U; i < FALCON_ID_END; i++) { + if (nvgpu_test_bit(i, (void *)&acr->lsf_enable_mask) && + acr->lsf[i].get_lsf_ucode_details != NULL) { + + (void) memset(&ucode_img, 0, sizeof(ucode_img)); + + if (acr->lsf[i].get_lsf_ucode_details(g, + (void *)&ucode_img) != 0) { + nvgpu_err(g, "LS falcon-%d ucode get failed", i); + goto exit; + } + + if (ucode_img.lsf_desc != NULL) { + /* + * falon_id is formed by grabbing the static + * base falonId from the image and adding the + * engine-designated falcon instance. + */ + falcon_id = ucode_img.lsf_desc->falcon_id; + + err = lsfm_add_ucode_img(g, plsfm, &ucode_img, + falcon_id); + if (err != 0) { + nvgpu_err(g, " Failed to add falcon-%d to LSFM ", + falcon_id); + goto exit; + } + + plsfm->managed_flcn_cnt++; + } + } + } + +exit: + return err; +} + +/* Generate WPR requirements for ACR allocation request */ +static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr_v0 *plsfm) +{ + struct lsfm_managed_ucode_img_v0 *pnode = plsfm->ucode_img_list; + u32 wpr_offset; + + /* + * Start with an array of WPR headers at the base of the WPR. + * The expectation here is that the secure falcon will do a single DMA + * read of this array and cache it internally so it's OK to pack these. + * Also, we add 1 to the falcon count to indicate the end of the array. + */ + wpr_offset = U32(sizeof(struct lsf_wpr_header_v0)) * + (U32(plsfm->managed_flcn_cnt) + U32(1)); + + /* + * Walk the managed falcons, accounting for the LSB structs + * as well as the ucode images. + */ + while (pnode != NULL) { + /* Align, save off, and include an LSB header size */ + wpr_offset = NVGPU_ALIGN(wpr_offset, LSF_LSB_HEADER_ALIGNMENT); + pnode->wpr_header.lsb_offset = wpr_offset; + wpr_offset += (u32)sizeof(struct lsf_lsb_header_v0); + + /* + * Align, save off, and include the original (static) + * ucode image size + */ + wpr_offset = NVGPU_ALIGN(wpr_offset, + LSF_UCODE_DATA_ALIGNMENT); + pnode->lsb_header.ucode_off = wpr_offset; + wpr_offset += pnode->ucode_img.data_size; + + /* + * For falcons that use a boot loader (BL), we append a loader + * desc structure on the end of the ucode image and consider this + * the boot loader data. The host will then copy the loader desc + * args to this space within the WPR region (before locking down) + * and the HS bin will then copy them to DMEM 0 for the loader. + */ + /* + * Track the size for LSB details filled in later + * Note that at this point we don't know what kind of + * boot loader desc, so we just take the size of the + * generic one, which is the largest it will will ever be. + */ + /* Align (size bloat) and save off generic descriptor size */ + pnode->lsb_header.bl_data_size = NVGPU_ALIGN( + (u32)sizeof(pnode->bl_gen_desc), + LSF_BL_DATA_SIZE_ALIGNMENT); + + /* Align, save off, and include the additional BL data */ + wpr_offset = NVGPU_ALIGN(wpr_offset, + LSF_BL_DATA_ALIGNMENT); + pnode->lsb_header.bl_data_off = wpr_offset; + wpr_offset += pnode->lsb_header.bl_data_size; + + /* Finally, update ucode surface size to include updates */ + pnode->full_ucode_size = wpr_offset - + pnode->lsb_header.ucode_off; + if (pnode->wpr_header.falcon_id != FALCON_ID_PMU) { + pnode->lsb_header.app_code_off = + pnode->lsb_header.bl_code_size; + pnode->lsb_header.app_code_size = + pnode->lsb_header.ucode_size - + pnode->lsb_header.bl_code_size; + pnode->lsb_header.app_data_off = + pnode->lsb_header.ucode_size; + pnode->lsb_header.app_data_size = + pnode->lsb_header.data_size; + } + pnode = pnode->next; + } + plsfm->wpr_size = wpr_offset; + return 0; +} + +/* Initialize WPR contents */ +static int gm20b_pmu_populate_loader_cfg(struct gk20a *g, + void *lsfm, u32 *p_bl_gen_desc_size) +{ + struct wpr_carveout_info wpr_inf; + struct lsfm_managed_ucode_img_v0 *p_lsfm = + (struct lsfm_managed_ucode_img_v0 *)lsfm; + struct flcn_ucode_img_v0 *p_img = &(p_lsfm->ucode_img); + struct loader_config *ldr_cfg = &(p_lsfm->bl_gen_desc.loader_cfg); + u64 addr_base; + struct pmu_ucode_desc *desc; + u64 tmp; + u32 addr_code, addr_data; + + if (p_img->desc == NULL) { + /* + * This means its a header based ucode, + * and so we do not fill BL gen desc structure + */ + return -EINVAL; + } + desc = p_img->desc; + /* + * Calculate physical and virtual addresses for various portions of + * the PMU ucode image + * Calculate the 32-bit addresses for the application code, application + * data, and bootloader code. These values are all based on IM_BASE. + * The 32-bit addresses will be the upper 32-bits of the virtual or + * physical addresses of each respective segment. + */ + addr_base = p_lsfm->lsb_header.ucode_off; + g->acr->get_wpr_info(g, &wpr_inf); + addr_base += wpr_inf.wpr_base; + nvgpu_acr_dbg(g, "pmu loader cfg u32 addrbase %x\n", (u32)addr_base); + /*From linux*/ + tmp = (addr_base + + desc->app_start_offset + + desc->app_resident_code_offset) >> 8; + nvgpu_assert(tmp <= U32_MAX); + addr_code = u64_lo32(tmp); + nvgpu_acr_dbg(g, "app start %d app res code off %d\n", + desc->app_start_offset, desc->app_resident_code_offset); + tmp = (addr_base + + desc->app_start_offset + + desc->app_resident_data_offset) >> 8; + nvgpu_assert(tmp <= U32_MAX); + addr_data = u64_lo32(tmp); + nvgpu_acr_dbg(g, "app res data offset%d\n", + desc->app_resident_data_offset); + nvgpu_acr_dbg(g, "bl start off %d\n", desc->bootloader_start_offset); + + /* Populate the loader_config state*/ + ldr_cfg->dma_idx = g->acr->lsf[FALCON_ID_PMU].falcon_dma_idx; + ldr_cfg->code_dma_base = addr_code; + ldr_cfg->code_dma_base1 = 0x0; + ldr_cfg->code_size_total = desc->app_size; + ldr_cfg->code_size_to_load = desc->app_resident_code_size; + ldr_cfg->code_entry_point = desc->app_imem_entry; + ldr_cfg->data_dma_base = addr_data; + ldr_cfg->data_dma_base1 = 0; + ldr_cfg->data_size = desc->app_resident_data_size; + ldr_cfg->overlay_dma_base = addr_code; + ldr_cfg->overlay_dma_base1 = 0x0; + + /* Update the argc/argv members*/ + ldr_cfg->argc = 1; +#ifdef CONFIG_NVGPU_LS_PMU + nvgpu_pmu_fw_get_cmd_line_args_offset(g, &ldr_cfg->argv); +#endif + *p_bl_gen_desc_size = (u32)sizeof(struct loader_config); + return 0; +} + +static int gm20b_flcn_populate_bl_dmem_desc(struct gk20a *g, + void *lsfm, u32 *p_bl_gen_desc_size, u32 falconid) +{ + struct wpr_carveout_info wpr_inf; + struct lsfm_managed_ucode_img_v0 *p_lsfm = + (struct lsfm_managed_ucode_img_v0 *)lsfm; + struct flcn_ucode_img_v0 *p_img = &(p_lsfm->ucode_img); + struct flcn_bl_dmem_desc_v0 *ldr_cfg = + &(p_lsfm->bl_gen_desc.bl_dmem_desc); + u64 addr_base; + struct pmu_ucode_desc *desc; + u32 addr_code, addr_data; + u64 tmp; + + if (p_img->desc == NULL) { + /* + * This means its a header based ucode, + * and so we do not fill BL gen desc structure + */ + return -EINVAL; + } + desc = p_img->desc; + + /* + * Calculate physical and virtual addresses for various portions of + * the PMU ucode image + * Calculate the 32-bit addresses for the application code, application + * data, and bootloader code. These values are all based on IM_BASE. + * The 32-bit addresses will be the upper 32-bits of the virtual or + * physical addresses of each respective segment. + */ + addr_base = p_lsfm->lsb_header.ucode_off; + g->acr->get_wpr_info(g, &wpr_inf); + addr_base += wpr_inf.wpr_base; + + nvgpu_acr_dbg(g, "gen loader cfg %x u32 addrbase %x ID\n", (u32)addr_base, + p_lsfm->wpr_header.falcon_id); + tmp = (addr_base + + desc->app_start_offset + + desc->app_resident_code_offset) >> 8; + nvgpu_assert(tmp <= U32_MAX); + addr_code = u64_lo32(tmp); + tmp = (addr_base + + desc->app_start_offset + + desc->app_resident_data_offset) >> 8; + nvgpu_assert(tmp <= U32_MAX); + addr_data = u64_lo32(tmp); + + nvgpu_acr_dbg(g, "gen cfg %x u32 addrcode %x & data %x load offset %xID\n", + (u32)addr_code, (u32)addr_data, desc->bootloader_start_offset, + p_lsfm->wpr_header.falcon_id); + + /* Populate the LOADER_CONFIG state */ + (void) memset((void *) ldr_cfg, 0, sizeof(struct flcn_bl_dmem_desc_v0)); + ldr_cfg->ctx_dma = g->acr->lsf[falconid].falcon_dma_idx; + ldr_cfg->code_dma_base = addr_code; + ldr_cfg->non_sec_code_size = desc->app_resident_code_size; + ldr_cfg->data_dma_base = addr_data; + ldr_cfg->data_size = desc->app_resident_data_size; + ldr_cfg->code_entry_point = desc->app_imem_entry; + *p_bl_gen_desc_size = (u32)sizeof(struct flcn_bl_dmem_desc_v0); + return 0; +} + +/* Populate falcon boot loader generic desc.*/ +static int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g, + struct lsfm_managed_ucode_img_v0 *pnode) +{ + int err = -ENOENT; + + if (pnode->wpr_header.falcon_id != FALCON_ID_PMU) { + nvgpu_acr_dbg(g, "non pmu. write flcn bl gen desc\n"); + err = gm20b_flcn_populate_bl_dmem_desc(g, + pnode, &pnode->bl_gen_desc_size, + pnode->wpr_header.falcon_id); + if (err != 0) { + nvgpu_err(g, "flcn_populate_bl_dmem_desc failed=%d", + err); + } + return err; + } + + if (pnode->wpr_header.falcon_id == FALCON_ID_PMU) { + nvgpu_acr_dbg(g, "pmu write flcn bl gen desc\n"); + err = gm20b_pmu_populate_loader_cfg(g, pnode, + &pnode->bl_gen_desc_size); + if (err != 0) { + nvgpu_err(g, "pmu_populate_loader_cfg failed=%d", + err); + } + return err; + } + + /* Failed to find the falcon requested. */ + return err; +} + +static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr_v0 *plsfm, + struct nvgpu_mem *ucode) +{ + struct lsfm_managed_ucode_img_v0 *pnode = plsfm->ucode_img_list; + struct lsf_wpr_header_v0 last_wpr_hdr; + u32 i; + int err = 0; + + /* The WPR array is at the base of the WPR */ + pnode = plsfm->ucode_img_list; + (void) memset(&last_wpr_hdr, 0, sizeof(struct lsf_wpr_header_v0)); + i = 0; + + /* + * Walk the managed falcons, flush WPR and LSB headers to FB. + * flush any bl args to the storage area relative to the + * ucode image (appended on the end as a DMEM area). + */ + while (pnode != NULL) { + /* Flush WPR header to memory*/ + nvgpu_mem_wr_n(g, ucode, i * (u32)sizeof(pnode->wpr_header), + &pnode->wpr_header, + (u32)sizeof(pnode->wpr_header)); + + nvgpu_acr_dbg(g, "wpr header"); + nvgpu_acr_dbg(g, "falconid :%d", + pnode->wpr_header.falcon_id); + nvgpu_acr_dbg(g, "lsb_offset :%x", + pnode->wpr_header.lsb_offset); + nvgpu_acr_dbg(g, "bootstrap_owner :%d", + pnode->wpr_header.bootstrap_owner); + nvgpu_acr_dbg(g, "lazy_bootstrap :%d", + pnode->wpr_header.lazy_bootstrap); + nvgpu_acr_dbg(g, "status :%d", + pnode->wpr_header.status); + + /*Flush LSB header to memory*/ + nvgpu_mem_wr_n(g, ucode, pnode->wpr_header.lsb_offset, + &pnode->lsb_header, + (u32)sizeof(pnode->lsb_header)); + + nvgpu_acr_dbg(g, "lsb header"); + nvgpu_acr_dbg(g, "ucode_off :%x", + pnode->lsb_header.ucode_off); + nvgpu_acr_dbg(g, "ucode_size :%x", + pnode->lsb_header.ucode_size); + nvgpu_acr_dbg(g, "data_size :%x", + pnode->lsb_header.data_size); + nvgpu_acr_dbg(g, "bl_code_size :%x", + pnode->lsb_header.bl_code_size); + nvgpu_acr_dbg(g, "bl_imem_off :%x", + pnode->lsb_header.bl_imem_off); + nvgpu_acr_dbg(g, "bl_data_off :%x", + pnode->lsb_header.bl_data_off); + nvgpu_acr_dbg(g, "bl_data_size :%x", + pnode->lsb_header.bl_data_size); + nvgpu_acr_dbg(g, "app_code_off :%x", + pnode->lsb_header.app_code_off); + nvgpu_acr_dbg(g, "app_code_size :%x", + pnode->lsb_header.app_code_size); + nvgpu_acr_dbg(g, "app_data_off :%x", + pnode->lsb_header.app_data_off); + nvgpu_acr_dbg(g, "app_data_size :%x", + pnode->lsb_header.app_data_size); + nvgpu_acr_dbg(g, "flags :%x", + pnode->lsb_header.flags); + + /* this falcon has a boot loader and related args, flush them */ + /* Populate gen bl and flush to memory */ + err = lsfm_fill_flcn_bl_gen_desc(g, pnode); + if (err != 0) { + nvgpu_err(g, "bl_gen_desc failed err=%d", err); + return err; + } + nvgpu_mem_wr_n(g, ucode, + pnode->lsb_header.bl_data_off, + &pnode->bl_gen_desc, + pnode->bl_gen_desc_size); + + /* Copying of ucode */ + nvgpu_mem_wr_n(g, ucode, pnode->lsb_header.ucode_off, + pnode->ucode_img.data, + pnode->ucode_img.data_size); + pnode = pnode->next; + i++; + } + + /* Tag the terminator WPR header with an invalid falcon ID. */ + last_wpr_hdr.falcon_id = FALCON_ID_INVALID; + nvgpu_mem_wr_n(g, ucode, + (u32)plsfm->managed_flcn_cnt * + (u32)sizeof(struct lsf_wpr_header_v0), + &last_wpr_hdr, + (u32)sizeof(struct lsf_wpr_header_v0)); + return err; +} + +/* Free any ucode image structure resources. */ +static void lsfm_free_ucode_img_res(struct gk20a *g, + struct flcn_ucode_img_v0 *p_img) +{ + if (p_img->lsf_desc != NULL) { + nvgpu_kfree(g, p_img->lsf_desc); + p_img->lsf_desc = NULL; + } +} + +/* Free any ucode image structure resources. */ +static void lsfm_free_nonpmu_ucode_img_res(struct gk20a *g, + struct flcn_ucode_img_v0 *p_img) +{ + if (p_img->lsf_desc != NULL) { + nvgpu_kfree(g, p_img->lsf_desc); + p_img->lsf_desc = NULL; + } + if (p_img->desc != NULL) { + nvgpu_kfree(g, p_img->desc); + p_img->desc = NULL; + } +} + +static void free_acr_resources(struct gk20a *g, struct ls_flcn_mgr_v0 *plsfm) +{ + u32 cnt = plsfm->managed_flcn_cnt; + struct lsfm_managed_ucode_img_v0 *mg_ucode_img; + while (cnt != 0U) { + mg_ucode_img = plsfm->ucode_img_list; + if (mg_ucode_img->ucode_img.lsf_desc->falcon_id == + FALCON_ID_PMU) { + lsfm_free_ucode_img_res(g, &mg_ucode_img->ucode_img); + } else { + lsfm_free_nonpmu_ucode_img_res(g, + &mg_ucode_img->ucode_img); + } + plsfm->ucode_img_list = mg_ucode_img->next; + nvgpu_kfree(g, mg_ucode_img); + cnt--; + } +} + +int nvgpu_acr_prepare_ucode_blob_v0(struct gk20a *g) +{ + int err = 0; + struct ls_flcn_mgr_v0 lsfm_l, *plsfm; + struct wpr_carveout_info wpr_inf; + struct nvgpu_gr_falcon *gr_falcon = nvgpu_gr_get_falcon_ptr(g); + + if (g->acr->ucode_blob.cpu_va != NULL) { + /* Recovery case, we do not need to form non WPR blob */ + return err; + } + plsfm = &lsfm_l; + (void) memset((void *)plsfm, 0, sizeof(struct ls_flcn_mgr_v0)); + nvgpu_acr_dbg(g, "fetching GMMU regs\n"); + err = g->ops.fb.vpr_info_fetch(g); + if (err != 0) { + nvgpu_err(g, "fb.vpr_info_fetch failed err=%d", err); + return err; + } + + err = nvgpu_gr_falcon_init_ctxsw_ucode(g, gr_falcon); + if (err != 0) { + nvgpu_err(g, "gr_falcon_init_ctxsw_ucode failed err=%d", err); + return err; + } + + g->acr->get_wpr_info(g, &wpr_inf); + nvgpu_acr_dbg(g, "wpr carveout base:%llx\n", wpr_inf.wpr_base); + nvgpu_acr_dbg(g, "wpr carveout size :%llx\n", wpr_inf.size); + + /* Discover all managed falcons*/ + err = lsfm_discover_ucode_images(g, plsfm); + nvgpu_acr_dbg(g, " Managed Falcon cnt %d\n", plsfm->managed_flcn_cnt); + if (err != 0) { + goto exit_err; + } + + if ((plsfm->managed_flcn_cnt != 0U) && + (g->acr->ucode_blob.cpu_va == NULL)) { + /* Generate WPR requirements */ + err = lsf_gen_wpr_requirements(g, plsfm); + if (err != 0) { + goto exit_err; + } + + /* Alloc memory to hold ucode blob contents */ + err = g->acr->alloc_blob_space(g, plsfm->wpr_size + , &g->acr->ucode_blob); + if (err != 0) { + goto exit_err; + } + + nvgpu_acr_dbg(g, "managed LS falcon %d, WPR size %d bytes.\n", + plsfm->managed_flcn_cnt, plsfm->wpr_size); + err = lsfm_init_wpr_contents(g, plsfm, &g->acr->ucode_blob); + if (err != 0) { + nvgpu_kfree(g, &g->acr->ucode_blob); + goto free_acr; + } + } else { + nvgpu_acr_dbg(g, "LSFM is managing no falcons.\n"); + } + nvgpu_acr_dbg(g, "prepare ucode blob return 0\n"); + +free_acr: + free_acr_resources(g, plsfm); +exit_err: + return err; +} diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.h b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.h new file mode 100644 index 000000000..f79260e75 --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.h @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef ACR_BLOB_CONSTRUCT_V0_H +#define ACR_BLOB_CONSTRUCT_V0_H + +#include +#include + +/* + * Light Secure WPR Content Alignments + */ +#define LSF_WPR_HEADER_ALIGNMENT (256U) +#define LSF_SUB_WPR_HEADER_ALIGNMENT (256U) +#define LSF_LSB_HEADER_ALIGNMENT (256U) +#define LSF_BL_DATA_ALIGNMENT (256U) +#define LSF_BL_DATA_SIZE_ALIGNMENT (256U) +#define LSF_BL_CODE_SIZE_ALIGNMENT (256U) +#define LSF_DATA_SIZE_ALIGNMENT (256U) +#define LSF_CODE_SIZE_ALIGNMENT (256U) + +#define LSF_UCODE_DATA_ALIGNMENT 4096U + + +/* Defined for 1MB alignment */ +#define SHIFT_1MB (20U) +#define SHIFT_4KB (12U) + +/*Light Secure Bootstrap header related defines*/ +#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_FALSE 0U +#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE BIT32(0) +#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_FALSE 0U +#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE BIT32(2) +#define NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_TRUE BIT32(3) +#define NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_FALSE (0U) + +/* + * Image Status Defines + */ +#define LSF_IMAGE_STATUS_NONE (0U) +#define LSF_IMAGE_STATUS_COPY (1U) +#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED (2U) +#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED (3U) +#define LSF_IMAGE_STATUS_VALIDATION_DONE (4U) +#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED (5U) +#define LSF_IMAGE_STATUS_BOOTSTRAP_READY (6U) + +/* + * Light Secure WPR Header + * Defines state allowing Light Secure Falcon bootstrapping. + */ +struct lsf_wpr_header_v0 { + u32 falcon_id; + u32 lsb_offset; + u32 bootstrap_owner; + u32 lazy_bootstrap; + u32 status; +}; + +/* + * Light Secure Falcon Ucode Description Defines + * This structure is prelim and may change as the ucode signing flow evolves. + */ +struct lsf_ucode_desc_v0 { + u8 prd_keys[2][16]; + u8 dbg_keys[2][16]; + u32 b_prd_present; + u32 b_dbg_present; + u32 falcon_id; +}; + +/* + * Light Secure Bootstrap Header + * Defines state allowing Light Secure Falcon bootstrapping. + */ +struct lsf_lsb_header_v0 { + struct lsf_ucode_desc_v0 signature; + u32 ucode_off; + u32 ucode_size; + u32 data_size; + u32 bl_code_size; + u32 bl_imem_off; + u32 bl_data_off; + u32 bl_data_size; + u32 app_code_off; + u32 app_code_size; + u32 app_data_off; + u32 app_data_size; + u32 flags; +}; + +/* + * Union of all supported structures used by bootloaders. + */ +/* Falcon BL interfaces */ +/* + * Structure used by the boot-loader to load the rest of the code. This has + * to be filled by NVGPU and copied into DMEM at offset provided in the + * hsflcn_bl_desc.bl_desc_dmem_load_off. + */ +struct flcn_bl_dmem_desc_v0 { + u32 reserved[4]; /*Should be the first element..*/ + u32 signature[4]; /*Should be the first element..*/ + u32 ctx_dma; + u32 code_dma_base; + u32 non_sec_code_off; + u32 non_sec_code_size; + u32 sec_code_off; + u32 sec_code_size; + u32 code_entry_point; + u32 data_dma_base; + u32 data_size; + u32 code_dma_base1; + u32 data_dma_base1; +}; + +/* + * Legacy structure used by the current PMU bootloader. + */ +struct loader_config { + u32 dma_idx; + u32 code_dma_base; /* upper 32-bits of 40-bit dma address */ + u32 code_size_total; + u32 code_size_to_load; + u32 code_entry_point; + u32 data_dma_base; /* upper 32-bits of 40-bit dma address */ + u32 data_size; /* initialized data of the application */ + u32 overlay_dma_base; /* upper 32-bits of the 40-bit dma address */ + u32 argc; + u32 argv; + u16 code_dma_base1; /* upper 7 bits of 47-bit dma address */ + u16 data_dma_base1; /* upper 7 bits of 47-bit dma address */ + u16 overlay_dma_base1; /* upper 7 bits of the 47-bit dma address */ +}; + +union flcn_bl_generic_desc { + struct flcn_bl_dmem_desc_v0 bl_dmem_desc; + struct loader_config loader_cfg; +}; + +struct flcn_ucode_img_v0 { + u32 *data; + struct pmu_ucode_desc *desc; /* only some falcons have descriptor */ + u32 data_size; + /* NULL if not a light secure falcon. */ + struct lsf_ucode_desc_v0 *lsf_desc; + /* True if there a resources to freed by the client. */ +}; + +/* + * LSFM Managed Ucode Image + * next : Next image the list, NULL if last. + * wpr_header : WPR header for this ucode image + * lsb_header : LSB header for this ucode image + * bl_gen_desc : Bootloader generic desc structure for this ucode image + * bl_gen_desc_size : Sizeof bootloader desc structure for this ucode image + * full_ucode_size : Surface size required for final ucode image + * ucode_img : Ucode image info + */ +struct lsfm_managed_ucode_img_v0 { + struct lsfm_managed_ucode_img_v0 *next; + struct lsf_wpr_header_v0 wpr_header; + struct lsf_lsb_header_v0 lsb_header; + union flcn_bl_generic_desc bl_gen_desc; + u32 bl_gen_desc_size; + u32 full_ucode_size; + struct flcn_ucode_img_v0 ucode_img; +}; + +/* + * Defines the structure used to contain all generic information related to + * the LSFM. + * + * Contains the Light Secure Falcon Manager (LSFM) feature related data. + */ +struct ls_flcn_mgr_v0 { + u16 managed_flcn_cnt; + u32 wpr_size; + struct lsfm_managed_ucode_img_v0 *ucode_img_list; +}; + +int nvgpu_acr_lsf_pmu_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img); +int nvgpu_acr_lsf_fecs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img); +int nvgpu_acr_lsf_gpccs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img); + +int nvgpu_acr_prepare_ucode_blob_v0(struct gk20a *g); + +#endif /* ACR_BLOB_CONSTRUCT_V0_H */ diff --git a/drivers/gpu/nvgpu/common/acr/acr_bootstrap.c b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.c new file mode 100644 index 000000000..85fcbfafe --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.c @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "acr_bootstrap.h" +#include "acr_priv.h" + +int nvgpu_acr_wait_for_completion(struct gk20a *g, struct hs_acr *acr_desc, + u32 timeout) +{ + u32 flcn_id; +#ifdef CONFIG_NVGPU_FALCON_NON_FUSA + u32 sctl, cpuctl; +#endif + int completion = 0; + u32 data = 0; + u32 bar0_status = 0; + u32 error_type; + + nvgpu_log_fn(g, " "); + + flcn_id = nvgpu_falcon_get_id(acr_desc->acr_flcn); + + completion = nvgpu_falcon_wait_for_halt(acr_desc->acr_flcn, timeout); + if (completion != 0) { + nvgpu_err(g, "flcn-%d: HS ucode boot timed out, limit: %d ms", + flcn_id, timeout); + error_type = ACR_BOOT_TIMEDOUT; + goto exit; + } + + if (acr_desc->acr_engine_bus_err_status != NULL) { + completion = acr_desc->acr_engine_bus_err_status(g, + &bar0_status, &error_type); + if (completion != 0) { + nvgpu_err(g, "flcn-%d: ACR engine bus error", flcn_id); + goto exit; + } + } + + data = nvgpu_falcon_mailbox_read(acr_desc->acr_flcn, FALCON_MAILBOX_0); + if (data != 0U) { + nvgpu_err(g, "flcn-%d: HS ucode boot failed, err %x", flcn_id, + data); + nvgpu_err(g, "flcn-%d: Mailbox-1 : 0x%x", flcn_id, + nvgpu_falcon_mailbox_read(acr_desc->acr_flcn, + FALCON_MAILBOX_1)); + completion = -EAGAIN; + error_type = ACR_BOOT_FAILED; + goto exit; + } + + /* + * When engine-falcon is used for ACR bootstrap, validate the integrity + * of falcon IMEM and DMEM. + */ + if (acr_desc->acr_validate_mem_integrity != NULL) { + if (!acr_desc->acr_validate_mem_integrity(g)) { + nvgpu_err(g, "flcn-%d: memcheck failed", flcn_id); + completion = -EAGAIN; + error_type = ACR_BOOT_FAILED; + } + } + +exit: + +#ifdef CONFIG_NVGPU_FALCON_NON_FUSA + nvgpu_falcon_get_ctls(acr_desc->acr_flcn, &sctl, &cpuctl); + + nvgpu_acr_dbg(g, "flcn-%d: sctl reg %x cpuctl reg %x", + flcn_id, sctl, cpuctl); +#endif + + if (completion != 0) { +#ifdef CONFIG_NVGPU_FALCON_DEBUG + nvgpu_falcon_dump_stats(acr_desc->acr_flcn); +#endif + if (acr_desc->report_acr_engine_bus_err_status != NULL) { + acr_desc->report_acr_engine_bus_err_status(g, + bar0_status, error_type); + } + } + + return completion; +} + +/* + * Patch signatures into ucode image + */ +static void acr_ucode_patch_sig(struct gk20a *g, + unsigned int *p_img, unsigned int *p_prod_sig, + unsigned int *p_dbg_sig, unsigned int *p_patch_loc, + unsigned int *p_patch_ind, u32 sig_size) +{ +#if defined(CONFIG_NVGPU_NEXT) + struct nvgpu_acr *acr = g->acr; +#endif + unsigned int i, j, *p_sig; + const u32 dmem_word_size = 4U; + nvgpu_acr_dbg(g, " "); + + if (!g->ops.pmu.is_debug_mode_enabled(g)) { + p_sig = p_prod_sig; + nvgpu_acr_dbg(g, "PRODUCTION MODE\n"); + } else { + p_sig = p_dbg_sig; + nvgpu_info(g, "DEBUG MODE\n"); + } + +#if defined(CONFIG_NVGPU_NEXT) + if (acr->get_versioned_sig != NULL) { + p_sig = acr->get_versioned_sig(g, acr, p_sig, &sig_size); + } +#endif + + /* Patching logic:*/ + sig_size = sig_size / dmem_word_size; + for (i = 0U; i < (sizeof(*p_patch_loc) / dmem_word_size); i++) { + for (j = 0U; j < sig_size; j++) { + p_img[nvgpu_safe_add_u32( + (p_patch_loc[i] / dmem_word_size), j)] = + p_sig[nvgpu_safe_add_u32( + (p_patch_ind[i] * dmem_word_size), j)]; + } + } +} + +/* + * Loads ACR bin to SYSMEM/FB and bootstraps ACR with bootloader code + * start and end are addresses of ucode blob in non-WPR region + */ +int nvgpu_acr_bootstrap_hs_ucode(struct gk20a *g, struct nvgpu_acr *acr, + struct hs_acr *acr_desc) +{ + struct nvgpu_firmware *acr_fw = acr_desc->acr_fw; + struct bin_hdr *hs_bin_hdr = NULL; + struct acr_fw_header *fw_hdr = NULL; + u32 *ucode_header = NULL; + u32 *ucode = NULL; + u32 timeout = 0; + int err = 0; + + nvgpu_acr_dbg(g, "ACR TYPE %x ", acr_desc->acr_type); + + if (acr_fw != NULL) { + err = acr->patch_wpr_info_to_ucode(g, acr, acr_desc, true); + if (err != 0) { + nvgpu_err(g, "Falcon ucode patch wpr info failed"); + return err; + } + } else { + /* Firmware is stored in soc specific path in FMODEL + * Hence NVGPU_REQUEST_FIRMWARE_NO_WARN is used instead + * of NVGPU_REQUEST_FIRMWARE_NO_SOC + */ +#ifdef CONFIG_NVGPU_SIM + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { + acr_fw = nvgpu_request_firmware(g, + acr_desc->acr_fw_name, + NVGPU_REQUEST_FIRMWARE_NO_WARN); + } else +#endif + { + acr_fw = nvgpu_request_firmware(g, + acr_desc->acr_fw_name, + NVGPU_REQUEST_FIRMWARE_NO_SOC); + } + if (acr_fw == NULL) { + nvgpu_err(g, "%s ucode get fail for %s", + acr_desc->acr_fw_name, g->name); + return -ENOENT; + } + + acr_desc->acr_fw = acr_fw; + + err = acr->patch_wpr_info_to_ucode(g, acr, acr_desc, false); + if (err != 0) { + nvgpu_err(g, "Falcon ucode patch wpr info failed"); + goto err_free_ucode; + } + } + + + hs_bin_hdr = (struct bin_hdr *)(void *)acr_fw->data; + fw_hdr = (struct acr_fw_header *)(void *)(acr_fw->data + + hs_bin_hdr->header_offset); + ucode_header = (u32 *)(void *)(acr_fw->data + fw_hdr->hdr_offset); + ucode = (u32 *)(void *)(acr_fw->data + hs_bin_hdr->data_offset); + + /* Patch Ucode signatures */ + acr_ucode_patch_sig(g, ucode, + (u32 *)(void *)(acr_fw->data + fw_hdr->sig_prod_offset), + (u32 *)(void *)(acr_fw->data + fw_hdr->sig_dbg_offset), + (u32 *)(void *)(acr_fw->data + fw_hdr->patch_loc), + (u32 *)(void *)(acr_fw->data + fw_hdr->patch_sig), + fw_hdr->sig_dbg_size); + + err = nvgpu_falcon_hs_ucode_load_bootstrap(acr_desc->acr_flcn, + ucode, ucode_header); + if (err != 0) { + nvgpu_err(g, "HS ucode load & bootstrap failed"); + goto err_free_ucode; + } + + /* wait for complete & halt */ + if (nvgpu_platform_is_silicon(g)) { + timeout = ACR_COMPLETION_TIMEOUT_SILICON_MS; + } else { + timeout = ACR_COMPLETION_TIMEOUT_NON_SILICON_MS; + } + err = nvgpu_acr_wait_for_completion(g, acr_desc, timeout); + + if (err != 0) { + nvgpu_err(g, "HS ucode completion err %d", err); + goto err_free_ucode; + } + + return 0; + +err_free_ucode: + nvgpu_release_firmware(g, acr_fw); + acr_desc->acr_fw = NULL; + return err; +} diff --git a/drivers/gpu/nvgpu/common/acr/acr_bootstrap.h b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.h new file mode 100644 index 000000000..4b116cf2e --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef ACR_BOOTSTRAP_H +#define ACR_BOOTSTRAP_H + +#include "nvgpu_acr_interface.h" +#ifdef CONFIG_NVGPU_NEXT +#include "common/acr/nvgpu_next_acr_bootstrap.h" +#endif + +struct gk20a; +struct nvgpu_acr; + +struct flcn_acr_region_prop_v0 { + u32 start_addr; + u32 end_addr; + u32 region_id; + u32 read_mask; + u32 write_mask; + u32 client_mask; +}; + +struct flcn_acr_regions_v0 { + u32 no_regions; + struct flcn_acr_region_prop_v0 region_props[NVGPU_FLCN_ACR_MAX_REGIONS]; +}; + +struct flcn_acr_desc_v0 { + union { + u32 reserved_dmem[(LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE/4)]; + u32 signatures[4]; + } ucode_reserved_space; + /*Always 1st*/ + u32 wpr_region_id; + u32 wpr_offset; + u32 mmu_mem_range; + struct flcn_acr_regions_v0 regions; + u32 nonwpr_ucode_blob_size; + u64 nonwpr_ucode_blob_start; +}; + +struct bin_hdr { + /* 0x10de */ + u32 bin_magic; + /* versioning of bin format */ + u32 bin_ver; + /* Entire image size including this header */ + u32 bin_size; + /* + * Header offset of executable binary metadata, + * start @ offset- 0x100 * + */ + u32 header_offset; + /* + * Start of executable binary data, start @ + * offset- 0x200 + */ + u32 data_offset; + /* Size of executable binary */ + u32 data_size; +}; + +struct acr_fw_header { + u32 sig_dbg_offset; + u32 sig_dbg_size; + u32 sig_prod_offset; + u32 sig_prod_size; + u32 patch_loc; + u32 patch_sig; + u32 hdr_offset; /* This header points to acr_ucode_header_t210_load */ + u32 hdr_size; /* Size of above header */ +}; + +/* ACR Falcon descriptor's */ +struct hs_acr { +#define ACR_DEFAULT 0U +#define ACR_AHESASC_NON_FUSA 1U +#define ACR_ASB_NON_FUSA 2U +#define ACR_AHESASC_FUSA 3U +#define ACR_ASB_FUSA 4U + u32 acr_type; + + /* ACR ucode */ + const char *acr_fw_name; + const char *acr_code_name; + const char *acr_data_name; + const char *acr_manifest_name; + struct nvgpu_firmware *code_fw; + struct nvgpu_firmware *data_fw; + struct nvgpu_firmware *manifest_fw; + struct nvgpu_firmware *acr_fw; + + union{ + struct flcn_acr_desc_v0 *acr_dmem_desc_v0; + struct flcn_acr_desc *acr_dmem_desc; + }; + +#if defined(CONFIG_NVGPU_NEXT) + struct nvgpu_mem acr_falcon2_sysmem_desc; + struct flcn2_acr_desc acr_sysmem_desc; + struct nvgpu_mem ls_pmu_desc; +#endif + + /* Falcon used to execute ACR ucode */ + struct nvgpu_falcon *acr_flcn; + + void (*report_acr_engine_bus_err_status)(struct gk20a *g, + u32 bar0_status, u32 error_type); + int (*acr_engine_bus_err_status)(struct gk20a *g, u32 *bar0_status, + u32 *error_type); + bool (*acr_validate_mem_integrity)(struct gk20a *g); +}; + +int nvgpu_acr_wait_for_completion(struct gk20a *g, struct hs_acr *acr_desc, + u32 timeout); +int nvgpu_acr_bootstrap_hs_ucode(struct gk20a *g, struct nvgpu_acr *acr, + struct hs_acr *acr_desc); + +#endif /* ACR_BOOTSTRAP_H */ diff --git a/drivers/gpu/nvgpu/common/acr/acr_priv.h b/drivers/gpu/nvgpu/common/acr/acr_priv.h new file mode 100644 index 000000000..378f85795 --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_priv.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef ACR_H +#define ACR_H + +#include "acr_bootstrap.h" +#ifdef CONFIG_NVGPU_ACR_LEGACY +#include "acr_blob_construct_v0.h" +#endif +#include "acr_blob_construct.h" + +struct gk20a; +struct nvgpu_acr; +struct wpr_carveout_info; + +#define nvgpu_acr_dbg(g, fmt, args...) \ + nvgpu_log(g, gpu_dbg_pmu, fmt, ##args) + +/* + * Falcon UCODE header index. + */ +#define FLCN_NL_UCODE_HDR_OS_CODE_OFF_IND (0U) +#define FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND (1U) +#define FLCN_NL_UCODE_HDR_OS_DATA_OFF_IND (2U) +#define FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND (3U) +#define FLCN_NL_UCODE_HDR_NUM_APPS_IND (4U) + +/* + * There are total N number of Apps with code and offset defined in UCODE header + * This macro provides the CODE and DATA offset and size of Ath application. + */ +#define FLCN_NL_UCODE_HDR_APP_CODE_START_IND (5U) +#define FLCN_NL_UCODE_HDR_APP_CODE_OFF_IND(N, A) \ + (FLCN_NL_UCODE_HDR_APP_CODE_START_IND + ((A)*2U)) +#define FLCN_NL_UCODE_HDR_APP_CODE_SIZE_IND(N, A) \ + (FLCN_NL_UCODE_HDR_APP_CODE_START_IND + ((A)*2U) + 1U) +#define FLCN_NL_UCODE_HDR_APP_CODE_END_IND(N) \ + (FLCN_NL_UCODE_HDR_APP_CODE_START_IND + ((N)*2U) - 1U) + +#define FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) \ + (FLCN_NL_UCODE_HDR_APP_CODE_END_IND(N) + 1U) +#define FLCN_NL_UCODE_HDR_APP_DATA_OFF_IND(N, A) \ + (FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + ((A)*2U)) +#define FLCN_NL_UCODE_HDR_APP_DATA_SIZE_IND(N, A) \ + (FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + ((A)*2U) + 1U) +#define FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) \ + (FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + ((N)*2U) - 1U) + +#define FLCN_NL_UCODE_HDR_OS_OVL_OFF_IND(N) \ + (FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) + 1U) +#define FLCN_NL_UCODE_HDR_OS_OVL_SIZE_IND(N) \ + (FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) + 2U) + +#define GM20B_HSBIN_ACR_PROD_UCODE "nv_acr_ucode_prod.bin" +#define GM20B_HSBIN_ACR_DBG_UCODE "nv_acr_ucode_dbg.bin" +#define HSBIN_ACR_BL_UCODE_IMAGE "pmu_bl.bin" +#define HSBIN_ACR_PROD_UCODE "acr_ucode_prod.bin" +#define HSBIN_ACR_DBG_UCODE "acr_ucode_dbg.bin" +#define HSBIN_ACR_AHESASC_NON_FUSA_PROD_UCODE "acr_ahesasc_prod_ucode.bin" +#define HSBIN_ACR_ASB_NON_FUSA_PROD_UCODE "acr_asb_prod_ucode.bin" +#define HSBIN_ACR_AHESASC_NON_FUSA_DBG_UCODE "acr_ahesasc_dbg_ucode.bin" +#define HSBIN_ACR_ASB_NON_FUSA_DBG_UCODE "acr_asb_dbg_ucode.bin" + +#define HSBIN_ACR_AHESASC_FUSA_PROD_UCODE "acr_ahesasc_fusa_prod_ucode.bin" +#define HSBIN_ACR_ASB_FUSA_PROD_UCODE "acr_asb_fusa_prod_ucode.bin" +#define HSBIN_ACR_AHESASC_FUSA_DBG_UCODE "acr_ahesasc_fusa_dbg_ucode.bin" +#define HSBIN_ACR_ASB_FUSA_DBG_UCODE "acr_asb_fusa_dbg_ucode.bin" + +#define GM20B_FECS_UCODE_SIG "fecs_sig.bin" +#define T18x_GPCCS_UCODE_SIG "gpccs_sig.bin" + +#define TU104_FECS_UCODE_SIG "tu104/fecs_sig.bin" +#define TU104_GPCCS_UCODE_SIG "tu104/gpccs_sig.bin" + +#define LSF_SEC2_UCODE_IMAGE_BIN "sec2_ucode_image.bin" +#define LSF_SEC2_UCODE_DESC_BIN "sec2_ucode_desc.bin" +#define LSF_SEC2_UCODE_SIG_BIN "sec2_sig.bin" + +#define LSF_SEC2_UCODE_IMAGE_FUSA_BIN "sec2_ucode_fusa_image.bin" +#define LSF_SEC2_UCODE_DESC_FUSA_BIN "sec2_ucode_fusa_desc.bin" +#define LSF_SEC2_UCODE_SIG_FUSA_BIN "sec2_fusa_sig.bin" + +#define ACR_COMPLETION_TIMEOUT_NON_SILICON_MS 10000U /*in msec */ +#define ACR_COMPLETION_TIMEOUT_SILICON_MS 100 /*in msec */ + +struct acr_lsf_config { + u32 falcon_id; + u32 falcon_dma_idx; + bool is_lazy_bootstrap; + bool is_priv_load; + + int (*get_lsf_ucode_details)(struct gk20a *g, void *lsf_ucode_img); + void (*get_cmd_line_args_offset)(struct gk20a *g, u32 *args_offset); +}; + +struct nvgpu_acr { + struct gk20a *g; + + u32 bootstrap_owner; + u32 num_of_sig; + + /* LSF properties */ + u64 lsf_enable_mask; + struct acr_lsf_config lsf[FALCON_ID_END]; + + /* + * non-wpr space to hold LSF ucodes, + * ACR does copy ucode from non-wpr to wpr + */ + struct nvgpu_mem ucode_blob; + /* + * Even though this mem_desc wouldn't be used, + * the wpr region needs to be reserved in the + * allocator in dGPU case. + */ + struct nvgpu_mem wpr_dummy; + + /* ACR member for different types of ucode */ + /* For older dgpu/tegra ACR cuode */ + struct hs_acr acr; + /* ACR load split feature support */ + struct hs_acr acr_ahesasc; + struct hs_acr acr_asb; + + /* ACR load split feature support for iGPU*/ + struct hs_acr acr_alsb; + struct hs_acr acr_asc; + + int (*prepare_ucode_blob)(struct gk20a *g); + int (*alloc_blob_space)(struct gk20a *g, size_t size, + struct nvgpu_mem *mem); + int (*patch_wpr_info_to_ucode)(struct gk20a *g, struct nvgpu_acr *acr, + struct hs_acr *acr_desc, bool is_recovery); + int (*bootstrap_hs_acr)(struct gk20a *g, struct nvgpu_acr *acr); + + void (*get_wpr_info)(struct gk20a *g, struct wpr_carveout_info *inf); + u32* (*get_versioned_sig)(struct gk20a *g, struct nvgpu_acr *acr, + u32 *sig, u32 *sig_size); +}; + +#endif /* ACR_H */ diff --git a/drivers/gpu/nvgpu/common/acr/acr_sw_gm20b.c b/drivers/gpu/nvgpu/common/acr/acr_sw_gm20b.c new file mode 100644 index 000000000..22cc471fc --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_sw_gm20b.c @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "acr_wpr.h" +#include "acr_priv.h" +#include "acr_sw_gm20b.h" +#include "acr_blob_alloc.h" +#include "acr_bootstrap.h" +#include "acr_blob_construct_v0.h" + +static int gm20b_bootstrap_hs_acr(struct gk20a *g, struct nvgpu_acr *acr) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + err = nvgpu_acr_bootstrap_hs_ucode(g, g->acr, &g->acr->acr); + if (err != 0) { + nvgpu_err(g, "ACR bootstrap failed"); + } + + return err; +} + +static int gm20b_acr_patch_wpr_info_to_ucode(struct gk20a *g, + struct nvgpu_acr *acr, struct hs_acr *acr_desc, bool is_recovery) +{ + struct nvgpu_firmware *acr_fw = acr_desc->acr_fw; + struct acr_fw_header *acr_fw_hdr = NULL; + struct bin_hdr *acr_fw_bin_hdr = NULL; + struct flcn_acr_desc_v0 *acr_dmem_desc; + u32 *acr_ucode_header = NULL; + u32 *acr_ucode_data = NULL; + + nvgpu_log_fn(g, " "); + + if (is_recovery) { + acr_desc->acr_dmem_desc_v0->nonwpr_ucode_blob_size = 0U; + } else { + acr_fw_bin_hdr = (struct bin_hdr *)acr_fw->data; + acr_fw_hdr = (struct acr_fw_header *) + (acr_fw->data + acr_fw_bin_hdr->header_offset); + + acr_ucode_data = (u32 *)(acr_fw->data + + acr_fw_bin_hdr->data_offset); + + acr_ucode_header = (u32 *)(acr_fw->data + + acr_fw_hdr->hdr_offset); + + /* Patch WPR info to ucode */ + acr_dmem_desc = (struct flcn_acr_desc_v0 *) + &(((u8 *)acr_ucode_data)[acr_ucode_header[2U]]); + + acr_desc->acr_dmem_desc_v0 = acr_dmem_desc; + + acr_dmem_desc->nonwpr_ucode_blob_start = + nvgpu_mem_get_addr(g, &g->acr->ucode_blob); + nvgpu_assert(g->acr->ucode_blob.size <= U32_MAX); + acr_dmem_desc->nonwpr_ucode_blob_size = + (u32)g->acr->ucode_blob.size; + acr_dmem_desc->regions.no_regions = 1U; + acr_dmem_desc->wpr_offset = 0U; + } + + return 0; +} + +/* LSF static config functions */ +static u32 gm20b_acr_lsf_pmu(struct gk20a *g, + struct acr_lsf_config *lsf) +{ + /* PMU LS falcon info */ + lsf->falcon_id = FALCON_ID_PMU; + lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE; + lsf->is_lazy_bootstrap = false; + lsf->is_priv_load = false; +#ifdef CONFIG_NVGPU_LS_PMU + lsf->get_lsf_ucode_details = nvgpu_acr_lsf_pmu_ucode_details_v0; + lsf->get_cmd_line_args_offset = nvgpu_pmu_fw_get_cmd_line_args_offset; +#endif + return BIT32(lsf->falcon_id); +} + +static u32 gm20b_acr_lsf_fecs(struct gk20a *g, + struct acr_lsf_config *lsf) +{ + /* FECS LS falcon info */ + lsf->falcon_id = FALCON_ID_FECS; + lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE; + lsf->is_lazy_bootstrap = false; + lsf->is_priv_load = false; + lsf->get_lsf_ucode_details = nvgpu_acr_lsf_fecs_ucode_details_v0; + lsf->get_cmd_line_args_offset = NULL; + + return BIT32(lsf->falcon_id); +} + +static u32 gm20b_acr_lsf_conifg(struct gk20a *g, + struct nvgpu_acr *acr) +{ + u32 lsf_enable_mask = 0; + + lsf_enable_mask |= gm20b_acr_lsf_pmu(g, &acr->lsf[FALCON_ID_PMU]); + lsf_enable_mask |= gm20b_acr_lsf_fecs(g, &acr->lsf[FALCON_ID_FECS]); + + return lsf_enable_mask; +} + +static void gm20b_acr_default_sw_init(struct gk20a *g, struct hs_acr *hs_acr) +{ + nvgpu_log_fn(g, " "); + + /* ACR HS ucode type & f/w name*/ + hs_acr->acr_type = ACR_DEFAULT; + + if (!g->ops.pmu.is_debug_mode_enabled(g)) { + hs_acr->acr_fw_name = GM20B_HSBIN_ACR_PROD_UCODE; + } else { + hs_acr->acr_fw_name = GM20B_HSBIN_ACR_DBG_UCODE; + } + + /* set on which falcon ACR need to execute*/ + hs_acr->acr_flcn = g->pmu->flcn; + hs_acr->acr_engine_bus_err_status = + g->ops.pmu.bar0_error_status; +} + +void nvgpu_gm20b_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr) +{ + nvgpu_log_fn(g, " "); + + acr->g = g; + + acr->bootstrap_owner = FALCON_ID_PMU; + + acr->lsf_enable_mask = gm20b_acr_lsf_conifg(g, acr); + + gm20b_acr_default_sw_init(g, &acr->acr); + + acr->prepare_ucode_blob = nvgpu_acr_prepare_ucode_blob_v0; + acr->get_wpr_info = nvgpu_acr_wpr_info_sys; + acr->alloc_blob_space = nvgpu_acr_alloc_blob_space_sys; + acr->bootstrap_hs_acr = gm20b_bootstrap_hs_acr; + acr->patch_wpr_info_to_ucode = + gm20b_acr_patch_wpr_info_to_ucode; +} diff --git a/drivers/gpu/nvgpu/common/acr/acr_sw_gm20b.h b/drivers/gpu/nvgpu/common/acr/acr_sw_gm20b.h new file mode 100644 index 000000000..f4164eabd --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_sw_gm20b.h @@ -0,0 +1,33 @@ +/* + * GM20B ACR + * + * Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef ACR_SW_GM20B_H +#define ACR_SW_GM20B_H + +struct gk20a; +struct nvgpu_acr; + +void nvgpu_gm20b_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr); + +#endif /*ACR_SW_GM20B_H*/ diff --git a/drivers/gpu/nvgpu/common/acr/acr_sw_gp10b.c b/drivers/gpu/nvgpu/common/acr/acr_sw_gp10b.c new file mode 100644 index 000000000..315f37fb0 --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_sw_gp10b.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "acr_sw_gp10b.h" + +#include +#include +#include + +#include "acr_blob_construct_v0.h" +#include "acr_priv.h" + +#include "acr_sw_gm20b.h" +#include "acr_sw_gp10b.h" + +/* LSF static config functions */ +static u32 gp10b_acr_lsf_gpccs(struct gk20a *g, + struct acr_lsf_config *lsf) +{ + /* GPCCS LS falcon info */ + lsf->falcon_id = FALCON_ID_GPCCS; + lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE; + lsf->is_lazy_bootstrap = true; + lsf->is_priv_load = true; + lsf->get_lsf_ucode_details = nvgpu_acr_lsf_gpccs_ucode_details_v0; + lsf->get_cmd_line_args_offset = NULL; + + return BIT32(lsf->falcon_id); +} + +static void gp10b_acr_default_sw_init(struct gk20a *g, struct hs_acr *hs_acr) +{ + nvgpu_log_fn(g, " "); + + /* ACR HS ucode type & f/w name*/ + hs_acr->acr_type = ACR_DEFAULT; + + if (!g->ops.pmu.is_debug_mode_enabled(g)) { + hs_acr->acr_fw_name = HSBIN_ACR_PROD_UCODE; + } else { + hs_acr->acr_fw_name = HSBIN_ACR_DBG_UCODE; + } + + /* set on which falcon ACR need to execute*/ + hs_acr->acr_flcn = g->pmu->flcn; + hs_acr->acr_engine_bus_err_status = + g->ops.pmu.bar0_error_status; +} + +void nvgpu_gp10b_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr) +{ + nvgpu_log_fn(g, " "); + + /* inherit the gm20b config data */ + nvgpu_gm20b_acr_sw_init(g, acr); + gp10b_acr_default_sw_init(g, &acr->acr); + + /* gp10b supports LSF gpccs bootstrap */ + acr->lsf_enable_mask |= gp10b_acr_lsf_gpccs(g, + &acr->lsf[FALCON_ID_GPCCS]); +} diff --git a/drivers/gpu/nvgpu/common/acr/acr_sw_gp10b.h b/drivers/gpu/nvgpu/common/acr/acr_sw_gp10b.h new file mode 100644 index 000000000..cdd7d2982 --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_sw_gp10b.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef ACR_SW_GP10B_H +#define ACR_SW_GP10B_H + +struct gk20a; +struct nvgpu_acr; + +void nvgpu_gp10b_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr); + +#endif /* ACR_SW_GP10B_H */ diff --git a/drivers/gpu/nvgpu/common/acr/acr_sw_gv11b.c b/drivers/gpu/nvgpu/common/acr/acr_sw_gv11b.c new file mode 100644 index 000000000..0e258c3ac --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_sw_gv11b.c @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#ifdef CONFIG_NVGPU_LS_PMU +#include +#endif + +#include "acr_wpr.h" +#include "acr_priv.h" +#include "acr_blob_alloc.h" +#include "acr_blob_construct.h" +#include "acr_bootstrap.h" +#include "acr_sw_gv11b.h" + +#define RECOVERY_UCODE_BLOB_SIZE (0U) +#define WPR_OFFSET (0U) +#define ACR_REGIONS (1U) + +static int gv11b_bootstrap_hs_acr(struct gk20a *g, struct nvgpu_acr *acr) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + err = nvgpu_acr_bootstrap_hs_ucode(g, g->acr, &g->acr->acr); + if (err != 0) { + nvgpu_err(g, "ACR bootstrap failed"); + } + + return err; +} + +static int gv11b_acr_patch_wpr_info_to_ucode(struct gk20a *g, + struct nvgpu_acr *acr, struct hs_acr *acr_desc, bool is_recovery) +{ + struct nvgpu_firmware *acr_fw = acr_desc->acr_fw; + struct acr_fw_header *acr_fw_hdr = NULL; + struct bin_hdr *acr_fw_bin_hdr = NULL; + struct flcn_acr_desc *acr_dmem_desc; + u32 *acr_ucode_header = NULL; + u32 *acr_ucode_data = NULL; + const u32 acr_desc_offset = 2U; + + nvgpu_log_fn(g, " "); +#ifdef CONFIG_NVGPU_NON_FUSA + if (is_recovery) { + acr_desc->acr_dmem_desc->nonwpr_ucode_blob_size = + RECOVERY_UCODE_BLOB_SIZE; + } else +#endif + { + acr_fw_bin_hdr = (struct bin_hdr *)(void *)acr_fw->data; + acr_fw_hdr = (struct acr_fw_header *)(void *) + (acr_fw->data + acr_fw_bin_hdr->header_offset); + + acr_ucode_data = (u32 *)(void *)(acr_fw->data + + acr_fw_bin_hdr->data_offset); + acr_ucode_header = (u32 *)(void *)(acr_fw->data + + acr_fw_hdr->hdr_offset); + + /* Patch WPR info to ucode */ + acr_dmem_desc = (struct flcn_acr_desc *)(void *) + &(((u8 *)acr_ucode_data)[acr_ucode_header[acr_desc_offset]]); + + acr_desc->acr_dmem_desc = acr_dmem_desc; + + acr_dmem_desc->nonwpr_ucode_blob_start = + nvgpu_mem_get_addr(g, &g->acr->ucode_blob); + nvgpu_assert(g->acr->ucode_blob.size <= U32_MAX); + acr_dmem_desc->nonwpr_ucode_blob_size = + (u32)g->acr->ucode_blob.size; + acr_dmem_desc->regions.no_regions = ACR_REGIONS; + acr_dmem_desc->wpr_offset = WPR_OFFSET; + } + + return 0; +} + +/* LSF static config functions */ +#ifdef CONFIG_NVGPU_LS_PMU +static u32 gv11b_acr_lsf_pmu(struct gk20a *g, + struct acr_lsf_config *lsf) +{ + if (!g->support_ls_pmu) { + /* skip adding LS PMU ucode to ACR blob */ + return 0; + } + + /* PMU LS falcon info */ + lsf->falcon_id = FALCON_ID_PMU; + lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE; + lsf->is_lazy_bootstrap = false; + lsf->is_priv_load = false; + lsf->get_lsf_ucode_details = nvgpu_acr_lsf_pmu_ucode_details; + lsf->get_cmd_line_args_offset = nvgpu_pmu_fw_get_cmd_line_args_offset; + + return BIT32(lsf->falcon_id); +} +#endif + +/* LSF init */ +static u32 gv11b_acr_lsf_fecs(struct gk20a *g, + struct acr_lsf_config *lsf) +{ + /* FECS LS falcon info */ + lsf->falcon_id = FALCON_ID_FECS; + lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE; + /* + * FECS LSF cold/recovery bootstrap is handled by ACR when LS PMU + * not present + */ + lsf->is_lazy_bootstrap = g->support_ls_pmu ? true : false; + lsf->is_priv_load = false; + lsf->get_lsf_ucode_details = nvgpu_acr_lsf_fecs_ucode_details; + lsf->get_cmd_line_args_offset = NULL; + + return BIT32(lsf->falcon_id); +} + +static u32 gv11b_acr_lsf_gpccs(struct gk20a *g, + struct acr_lsf_config *lsf) +{ + /* GPCCS LS falcon info */ + lsf->falcon_id = FALCON_ID_GPCCS; + lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE; + /* + * GPCCS LSF cold/recovery bootstrap is handled by ACR when LS PMU + * not present + */ + lsf->is_lazy_bootstrap = g->support_ls_pmu ? true : false; + lsf->is_priv_load = true; + lsf->get_lsf_ucode_details = nvgpu_acr_lsf_gpccs_ucode_details; + lsf->get_cmd_line_args_offset = NULL; + + return BIT32(lsf->falcon_id); +} + +u32 gv11b_acr_lsf_config(struct gk20a *g, + struct nvgpu_acr *acr) +{ + u32 lsf_enable_mask = 0; +#ifdef CONFIG_NVGPU_LS_PMU + lsf_enable_mask |= gv11b_acr_lsf_pmu(g, &acr->lsf[FALCON_ID_PMU]); +#endif + lsf_enable_mask |= gv11b_acr_lsf_fecs(g, &acr->lsf[FALCON_ID_FECS]); + lsf_enable_mask |= gv11b_acr_lsf_gpccs(g, &acr->lsf[FALCON_ID_GPCCS]); + + return lsf_enable_mask; +} + +static void gv11b_acr_default_sw_init(struct gk20a *g, struct hs_acr *acr_desc) +{ + nvgpu_log_fn(g, " "); + + acr_desc->acr_type = ACR_DEFAULT; + + if (!g->ops.pmu.is_debug_mode_enabled(g)) { + acr_desc->acr_fw_name = HSBIN_ACR_PROD_UCODE; + } else { + acr_desc->acr_fw_name = HSBIN_ACR_DBG_UCODE; + } + + acr_desc->acr_flcn = g->pmu->flcn; + acr_desc->report_acr_engine_bus_err_status = + nvgpu_pmu_report_bar0_pri_err_status; + acr_desc->acr_engine_bus_err_status = + g->ops.pmu.bar0_error_status; + acr_desc->acr_validate_mem_integrity = g->ops.pmu.validate_mem_integrity; +} + +void nvgpu_gv11b_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr) +{ + nvgpu_log_fn(g, " "); + + acr->g = g; + + acr->bootstrap_owner = FALCON_ID_PMU; + + acr->lsf_enable_mask = gv11b_acr_lsf_config(g, acr); + + gv11b_acr_default_sw_init(g, &acr->acr); + + acr->prepare_ucode_blob = nvgpu_acr_prepare_ucode_blob; + acr->get_wpr_info = nvgpu_acr_wpr_info_sys; + acr->alloc_blob_space = nvgpu_acr_alloc_blob_space_sys; + acr->bootstrap_hs_acr = gv11b_bootstrap_hs_acr; + acr->patch_wpr_info_to_ucode = gv11b_acr_patch_wpr_info_to_ucode; +} diff --git a/drivers/gpu/nvgpu/common/acr/acr_sw_gv11b.h b/drivers/gpu/nvgpu/common/acr/acr_sw_gv11b.h new file mode 100644 index 000000000..a3b4ec63c --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_sw_gv11b.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef ACR_SW_GV11B_H +#define ACR_SW_GV11B_H + +struct gk20a; +struct nvgpu_acr; +struct hs_acr; + +void nvgpu_gv11b_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr); +u32 gv11b_acr_lsf_config(struct gk20a *g, struct nvgpu_acr *acr); + +#endif /* ACR_SW_GV11B_H */ + diff --git a/drivers/gpu/nvgpu/common/acr/acr_sw_tu104.c b/drivers/gpu/nvgpu/common/acr/acr_sw_tu104.c new file mode 100644 index 000000000..8d3dbc619 --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_sw_tu104.c @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "acr_sw_tu104.h" + +#include +#include + +#include "acr_wpr.h" +#include "acr_priv.h" +#include "acr_blob_alloc.h" +#include "acr_bootstrap.h" +#include "acr_blob_construct.h" +#include "acr_sw_gv11b.h" +#include "acr_sw_tu104.h" + +static int tu104_bootstrap_hs_acr(struct gk20a *g, struct nvgpu_acr *acr) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + err = nvgpu_acr_bootstrap_hs_ucode(g, g->acr, &g->acr->acr_ahesasc); + if (err != 0) { + nvgpu_err(g, "ACR AHESASC bootstrap failed"); + goto exit; + } + err = nvgpu_acr_bootstrap_hs_ucode(g, g->acr, &g->acr->acr_asb); + if (err != 0) { + nvgpu_err(g, "ACR ASB bootstrap failed"); + goto exit; + } + +exit: + return err; +} + +/* WPR info update */ +static int tu104_acr_patch_wpr_info_to_ucode(struct gk20a *g, + struct nvgpu_acr *acr, struct hs_acr *acr_desc, + bool is_recovery) +{ + struct nvgpu_firmware *acr_fw = acr_desc->acr_fw; + struct acr_fw_header *acr_fw_hdr = NULL; + struct bin_hdr *acr_fw_bin_hdr = NULL; + struct flcn_acr_desc *acr_dmem_desc; + struct wpr_carveout_info wpr_inf; + u32 *acr_ucode_header = NULL; + u32 *acr_ucode_data = NULL; + u64 tmp_addr; + + nvgpu_log_fn(g, " "); + + acr_fw_bin_hdr = (struct bin_hdr *)acr_fw->data; + acr_fw_hdr = (struct acr_fw_header *) + (acr_fw->data + acr_fw_bin_hdr->header_offset); + + acr_ucode_data = (u32 *)(acr_fw->data + acr_fw_bin_hdr->data_offset); + acr_ucode_header = (u32 *)(acr_fw->data + acr_fw_hdr->hdr_offset); + + acr->get_wpr_info(g, &wpr_inf); + + acr_dmem_desc = (struct flcn_acr_desc *) + &(((u8 *)acr_ucode_data)[acr_ucode_header[2U]]); + + acr_dmem_desc->nonwpr_ucode_blob_start = wpr_inf.nonwpr_base; + nvgpu_assert(wpr_inf.size <= U32_MAX); + acr_dmem_desc->nonwpr_ucode_blob_size = (u32)wpr_inf.size; + acr_dmem_desc->regions.no_regions = 1U; + acr_dmem_desc->wpr_offset = 0U; + + acr_dmem_desc->wpr_region_id = 1U; + acr_dmem_desc->regions.region_props[0U].region_id = 1U; + + tmp_addr = (wpr_inf.wpr_base) >> 8U; + nvgpu_assert(u64_hi32(tmp_addr) == 0U); + acr_dmem_desc->regions.region_props[0U].start_addr = U32(tmp_addr); + + tmp_addr = ((wpr_inf.wpr_base) + wpr_inf.size) >> 8U; + nvgpu_assert(u64_hi32(tmp_addr) == 0U); + acr_dmem_desc->regions.region_props[0U].end_addr = U32(tmp_addr); + + tmp_addr = wpr_inf.nonwpr_base >> 8U; + nvgpu_assert(u64_hi32(tmp_addr) == 0U); + acr_dmem_desc->regions.region_props[0U].shadowmMem_startaddress = + U32(tmp_addr); + + return 0; +} + +/* LSF init */ +static u32 tu104_acr_lsf_sec2(struct gk20a *g, + struct acr_lsf_config *lsf) +{ + /* SEC2 LS falcon info */ + lsf->falcon_id = FALCON_ID_SEC2; + lsf->falcon_dma_idx = NV_SEC2_DMAIDX_UCODE; + lsf->is_lazy_bootstrap = false; + lsf->is_priv_load = false; + lsf->get_lsf_ucode_details = nvgpu_acr_lsf_sec2_ucode_details; + lsf->get_cmd_line_args_offset = NULL; + + return BIT32(lsf->falcon_id); +} + +static u32 tu104_acr_lsf_pmu(struct gk20a *g, + struct acr_lsf_config *lsf) +{ + /* PMU support not required until PSTATE support is enabled */ + if (!g->support_ls_pmu) { + /* skip adding LS PMU ucode to ACR blob */ + return 0; + } + + /* PMU LS falcon info */ + lsf->falcon_id = FALCON_ID_PMU; + lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE; + lsf->is_lazy_bootstrap = false; + lsf->is_priv_load = false; +#ifdef CONFIG_NVGPU_LS_PMU + lsf->get_lsf_ucode_details = nvgpu_acr_lsf_pmu_ucode_details; + lsf->get_cmd_line_args_offset = nvgpu_pmu_fw_get_cmd_line_args_offset; +#endif + return BIT32(lsf->falcon_id); +} + +static u32 tu104_acr_lsf_fecs(struct gk20a *g, + struct acr_lsf_config *lsf) +{ + /* FECS LS falcon info */ + lsf->falcon_id = FALCON_ID_FECS; + lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE; + lsf->is_lazy_bootstrap = true; + lsf->is_priv_load = true; + lsf->get_lsf_ucode_details = nvgpu_acr_lsf_fecs_ucode_details; + lsf->get_cmd_line_args_offset = NULL; + + return BIT32(lsf->falcon_id); +} + +static u32 tu104_acr_lsf_gpccs(struct gk20a *g, + struct acr_lsf_config *lsf) +{ + /* FECS LS falcon info */ + lsf->falcon_id = FALCON_ID_GPCCS; + lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE; + lsf->is_lazy_bootstrap = true; + lsf->is_priv_load = true; + lsf->get_lsf_ucode_details = nvgpu_acr_lsf_gpccs_ucode_details; + lsf->get_cmd_line_args_offset = NULL; + + return BIT32(lsf->falcon_id); +} + +static u32 tu104_acr_lsf_conifg(struct gk20a *g, + struct nvgpu_acr *acr) +{ + u32 lsf_enable_mask = 0; + lsf_enable_mask |= tu104_acr_lsf_pmu(g, &acr->lsf[FALCON_ID_PMU]); + lsf_enable_mask |= tu104_acr_lsf_fecs(g, &acr->lsf[FALCON_ID_FECS]); + lsf_enable_mask |= tu104_acr_lsf_gpccs(g, &acr->lsf[FALCON_ID_GPCCS]); + lsf_enable_mask |= tu104_acr_lsf_sec2(g, &acr->lsf[FALCON_ID_SEC2]); + + return lsf_enable_mask; +} + +/* fusa signing enable check */ +static bool tu104_acr_is_fusa_enabled(struct gk20a *g) +{ + return g->is_fusa_sku; +} + +/* ACR-AHESASC(ACR hub encryption setter and signature checker) init*/ +static void tu104_acr_ahesasc_v0_ucode_select(struct gk20a *g, + struct hs_acr *acr_ahesasc) +{ + acr_ahesasc->acr_type = ACR_AHESASC_NON_FUSA; + + if (!g->ops.pmu.is_debug_mode_enabled(g)) { + acr_ahesasc->acr_fw_name = HSBIN_ACR_AHESASC_NON_FUSA_PROD_UCODE; + } else { + acr_ahesasc->acr_fw_name = HSBIN_ACR_AHESASC_NON_FUSA_DBG_UCODE; + } + +} + +static void tu104_acr_ahesasc_fusa_ucode_select(struct gk20a *g, + struct hs_acr *acr_ahesasc) +{ + acr_ahesasc->acr_type = ACR_AHESASC_FUSA; + + if (!g->ops.pmu.is_debug_mode_enabled(g)) { + acr_ahesasc->acr_fw_name = HSBIN_ACR_AHESASC_FUSA_PROD_UCODE; + } else { + acr_ahesasc->acr_fw_name = HSBIN_ACR_AHESASC_FUSA_DBG_UCODE; + } +} + +static void tu104_acr_ahesasc_sw_init(struct gk20a *g, + struct hs_acr *acr_ahesasc) +{ + if (tu104_acr_is_fusa_enabled(g)) { + tu104_acr_ahesasc_fusa_ucode_select(g, acr_ahesasc); + } else { + tu104_acr_ahesasc_v0_ucode_select(g, acr_ahesasc); + } + + acr_ahesasc->acr_flcn = &g->sec2.flcn; +} + +/* ACR-ASB(ACR SEC2 booter) init*/ +static void tu104_acr_asb_v0_ucode_select(struct gk20a *g, + struct hs_acr *acr_asb) +{ + acr_asb->acr_type = ACR_ASB_NON_FUSA; + + if (!g->ops.pmu.is_debug_mode_enabled(g)) { + acr_asb->acr_fw_name = HSBIN_ACR_ASB_NON_FUSA_PROD_UCODE; + } else { + acr_asb->acr_fw_name = HSBIN_ACR_ASB_NON_FUSA_DBG_UCODE; + } +} + +static void tu104_acr_asb_fusa_ucode_select(struct gk20a *g, + struct hs_acr *acr_asb) +{ + acr_asb->acr_type = ACR_ASB_FUSA; + + if (!g->ops.pmu.is_debug_mode_enabled(g)) { + acr_asb->acr_fw_name = HSBIN_ACR_ASB_FUSA_PROD_UCODE; + } else { + acr_asb->acr_fw_name = HSBIN_ACR_ASB_FUSA_DBG_UCODE; + } +} + +static void tu104_acr_asb_sw_init(struct gk20a *g, + struct hs_acr *acr_asb) +{ + if (tu104_acr_is_fusa_enabled(g)) { + tu104_acr_asb_fusa_ucode_select(g, acr_asb); + } else { + tu104_acr_asb_v0_ucode_select(g, acr_asb); + } + + acr_asb->acr_flcn = &g->gsp_flcn; +} + +void nvgpu_tu104_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr) +{ + nvgpu_log_fn(g, " "); + + acr->lsf_enable_mask = tu104_acr_lsf_conifg(g, acr); + + acr->prepare_ucode_blob = nvgpu_acr_prepare_ucode_blob; + acr->get_wpr_info = nvgpu_acr_wpr_info_vid; + acr->alloc_blob_space = nvgpu_acr_alloc_blob_space_vid; + acr->bootstrap_owner = FALCON_ID_GSPLITE; + acr->bootstrap_hs_acr = tu104_bootstrap_hs_acr; + acr->patch_wpr_info_to_ucode = tu104_acr_patch_wpr_info_to_ucode; + + /* Init ACR-AHESASC */ + tu104_acr_ahesasc_sw_init(g, &acr->acr_ahesasc); + + /* Init ACR-ASB*/ + tu104_acr_asb_sw_init(g, &acr->acr_asb); +} diff --git a/drivers/gpu/nvgpu/common/acr/acr_sw_tu104.h b/drivers/gpu/nvgpu/common/acr/acr_sw_tu104.h new file mode 100644 index 000000000..59e990586 --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_sw_tu104.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef ACR_SW_TU104_H +#define ACR_SW_TU104_H + +struct gk20a; +struct nvgpu_acr; + +void nvgpu_tu104_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr); + +#endif /*ACR_SW_TU104_H*/ diff --git a/drivers/gpu/nvgpu/common/acr/acr_wpr.c b/drivers/gpu/nvgpu/common/acr/acr_wpr.c new file mode 100644 index 000000000..2205900d5 --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_wpr.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "acr_wpr.h" + +/* Both size and address of WPR need to be 128K-aligned */ +#define DGPU_WPR_SIZE 0x200000U + +void nvgpu_acr_wpr_info_sys(struct gk20a *g, struct wpr_carveout_info *inf) +{ + g->ops.fb.read_wpr_info(g, &inf->wpr_base, &inf->size); +} +#ifdef CONFIG_NVGPU_DGPU +void nvgpu_acr_wpr_info_vid(struct gk20a *g, struct wpr_carveout_info *inf) +{ + inf->wpr_base = g->mm.vidmem.bootstrap_base; + inf->nonwpr_base = inf->wpr_base + DGPU_WPR_SIZE; + inf->size = DGPU_WPR_SIZE; +} +#endif diff --git a/drivers/gpu/nvgpu/common/acr/acr_wpr.h b/drivers/gpu/nvgpu/common/acr/acr_wpr.h new file mode 100644 index 000000000..aa247d5a9 --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_wpr.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef ACR_WPR_H +#define ACR_WPR_H + +struct gk20a; +struct wpr_carveout_info; + +struct wpr_carveout_info { + u64 wpr_base; + u64 nonwpr_base; + u64 size; +}; + +void nvgpu_acr_wpr_info_sys(struct gk20a *g, struct wpr_carveout_info *inf); +#ifdef CONFIG_NVGPU_DGPU +void nvgpu_acr_wpr_info_vid(struct gk20a *g, struct wpr_carveout_info *inf); +#endif + +#endif /* NVGPU_ACR_WPR_H */ diff --git a/drivers/gpu/nvgpu/common/acr/nvgpu_acr_interface.h b/drivers/gpu/nvgpu/common/acr/nvgpu_acr_interface.h new file mode 100644 index 000000000..47e3d26d7 --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/nvgpu_acr_interface.h @@ -0,0 +1,609 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_ACR_INTERFACE_H +#define NVGPU_ACR_INTERFACE_H + +/** + * @defgroup NVGPURM_BLOB_CONSTRUCT blob construct + * + * Blob construct interfaces: + * NVGPU creates LS ucode blob in system/FB's non-WPR memory. LS ucodes + * will be read from filesystem and added to blob for the detected chip. + * Below are the structs that need to be filled by NvGPU for each LS Falcon + * ucode supported for the detected chip. After filling structures successfully, + * NvGPU should copy below structs along with ucode to the non-WPR blob + * in below mentioned pattern. LS ucodes blob is required by the ACR HS + * ucode to authenticate & load LS ucode on to respective engine's LS Falcon. + * + * + WPR header struct #lsf_wpr_header. + * + LSB header struct #lsf_lsb_header. + * + Boot loader struct #flcn_bl_dmem_desc. + * + ucode image. + * + * + BLOB Pattern: + * --------------------------------------------- + * | LSF WPR HDR | LSF LSB HDR | BL desc | ucode | + * --------------------------------------------- + */ + +/** + * @ingroup NVGPURM_BLOB_CONSTRUCT + */ +/** @{*/ + +/** + * Light Secure WPR Content Alignments + */ +/** WPR header should be aligned to 256 bytes */ +#define LSF_WPR_HEADER_ALIGNMENT (256U) +/** SUB WPR header should be aligned to 256 bytes */ +#define LSF_SUB_WPR_HEADER_ALIGNMENT (256U) +/** LSB header should be aligned to 256 bytes */ +#define LSF_LSB_HEADER_ALIGNMENT (256U) +/** BL DATA should be aligned to 256 bytes */ +#define LSF_BL_DATA_ALIGNMENT (256U) +/** BL DATA size should be aligned to 256 bytes */ +#define LSF_BL_DATA_SIZE_ALIGNMENT (256U) +/** BL CODE size should be aligned to 256 bytes */ +#define LSF_BL_CODE_SIZE_ALIGNMENT (256U) +/** LSF DATA size should be aligned to 256 bytes */ +#define LSF_DATA_SIZE_ALIGNMENT (256U) +/** LSF CODE size should be aligned to 256 bytes */ +#define LSF_CODE_SIZE_ALIGNMENT (256U) + +/** UCODE surface should be aligned to 4k PAGE_SIZE */ +#define LSF_UCODE_DATA_ALIGNMENT 4096U + +/** + * Maximum WPR Header size + */ +#define LSF_WPR_HEADERS_TOTAL_SIZE_MAX \ + (ALIGN_UP(((u32)sizeof(struct lsf_wpr_header) * FALCON_ID_END), \ + LSF_WPR_HEADER_ALIGNMENT)) +#define LSF_LSB_HEADER_TOTAL_SIZE_MAX (\ + ALIGN_UP(sizeof(struct lsf_lsb_header), LSF_LSB_HEADER_ALIGNMENT)) + +/** @} */ + +#ifdef CONFIG_NVGPU_DGPU +/* Maximum SUB WPR header size */ +#define LSF_SUB_WPR_HEADERS_TOTAL_SIZE_MAX (ALIGN_UP( \ + (sizeof(struct lsf_shared_sub_wpr_header) * \ + LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_MAX), \ + LSF_SUB_WPR_HEADER_ALIGNMENT)) + +/* MMU excepts sub_wpr sizes in units of 4K */ +#define SUB_WPR_SIZE_ALIGNMENT (4096U) + +/* Defined for 1MB alignment */ +#define SHIFT_4KB (12U) + +/* shared sub_wpr use case IDs */ +enum { + LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_FRTS_VBIOS_TABLES = 1, + LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_PLAYREADY_SHARED_DATA = 2 +}; + +#define LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_MAX \ + LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_PLAYREADY_SHARED_DATA + +#define LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_INVALID (0xFFFFFFFFU) + +#define MAX_SUPPORTED_SHARED_SUB_WPR_USE_CASES \ + LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_MAX + +/* Static sizes of shared subWPRs */ +/* Minimum granularity supported is 4K */ +/* 1MB in 4K */ +#define LSF_SHARED_DATA_SUB_WPR_FRTS_VBIOS_TABLES_SIZE_IN_4K (0x100U) +/* 4K */ +#define LSF_SHARED_DATA_SUB_WPR_PLAYREADY_SHARED_DATA_SIZE_IN_4K (0x1U) +#endif + +/** + * @ingroup NVGPURM_BLOB_CONSTRUCT + */ +/** @{*/ + +/** + * Image status updated by ACR HS ucode to know the LS + * Falcon ucode status. + */ +/** IMAGE copied from NON-WPR to WPR BLOB*/ +#define LSF_IMAGE_STATUS_COPY (1U) +/** LS Falcon ucode verification failed*/ +#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED (2U) +/** LS Falcon data verification failed*/ +#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED (3U) +/** Both ucode and data validation passed */ +#define LSF_IMAGE_STATUS_VALIDATION_DONE (4U) +/** + * LS Falcons such as FECS and GPCCS does not have signatures for binaries in + * debug environment(fmodel). + */ +#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED (5U) +/** LS Falcon validation passed & ready for bootstrap */ +#define LSF_IMAGE_STATUS_BOOTSTRAP_READY (6U) + +/** + * Light Secure WPR Header + * Defines state allowing Light Secure Falcon bootstrapping. + */ +struct lsf_wpr_header { + /** + * LS Falcon ID + * FALCON_ID_FECS - 2 + * FALCON_ID_GPCCS - 3 + */ + u32 falcon_id; + /** + * LS Falcon LSB header offset from non-WPR base, below equation used + * to get LSB header offset for each managed LS falcon. + * Offset = Non-WPR base + #LSF_LSB_HEADER_ALIGNMENT + + * ((#LSF_UCODE_DATA_ALIGNMENT + #LSF_BL_DATA_ALIGNMENT) * + * LS Falcon index) + * + */ + u32 lsb_offset; + /** + * LS Falcon bootstrap owner, which performs bootstrapping of + * supported LS Falcon from ACR HS ucode. Below are the bootstrapping + * supporting Falcon owners. + * + Falcon #FALCON_ID_PMU + * + * On GV11B, bootstrap_owner set to #FALCON_ID_PMU as ACR HS ucode + * runs on PMU Engine Falcon. + * + */ + u32 bootstrap_owner; + /** + * Skip bootstrapping by ACR HS ucode, + * 1 - skip LS Falcon bootstrapping by ACR HS ucode. + * 0 - LS Falcon bootstrapping is done by ACR HS ucode. + * + * On GV11B, always set 0. + */ + u32 lazy_bootstrap; + /** LS ucode bin version*/ + u32 bin_version; + /** + * Bootstrapping status updated by ACR HS ucode to know the LS + * Falcon ucode status. + */ + u32 status; +}; + +/** @} */ + +/** + * @ingroup NVGPURM_BLOB_CONSTRUCT + */ +/** @{*/ +/** + * Size in entries of the ucode descriptor's dependency map. + */ +#define LSF_FALCON_DEPMAP_SIZE (11U) + +/** + * Code/data signature details of LS falcon + */ +struct lsf_ucode_desc { + /** ucode's production signature */ + u8 prd_keys[2][16]; + /** ucode's debug signature */ + u8 dbg_keys[2][16]; + /** + * production signature present status, + * 1 - production signature present + * 0 - production signature not present + */ + u32 b_prd_present; + /** + * debug signature present + * 1 - debug signature present + * 0 - debug signature not present + */ + u32 b_dbg_present; + /** + * LS Falcon ID + * FALCON_ID_FECS - 2 + * FALCON_ID_GPCCS - 3 + */ + u32 falcon_id; + /** + * include version in signature calculation if supported + * 1 - supported + * 0 - not supported + */ + u32 bsupports_versioning; + /** version to include it in signature calculation if supported */ + u32 version; + /** valid dependency map data to consider from dep_map array member */ + u32 dep_map_count; + /** + * packed dependency map used to compute the DM hashes on the code and + * data. + */ + u8 dep_map[LSF_FALCON_DEPMAP_SIZE * 2 * 4]; + /** Message used to derive key */ + u8 kdf[16]; +}; + +/** @} */ + +/** + * @ingroup NVGPURM_BLOB_CONSTRUCT + */ +/** @{*/ + +/** + * Light Secure Bootstrap Header + * Defines state allowing Light Secure Falcon bootstrapping. + */ +/** Load BL at 0th IMEM offset */ +#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_FALSE 0U +#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE BIT32(0) +/** This falcon requires a ctx before issuing DMAs. */ +#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_FALSE 0U +#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE BIT32(2) +/** Use priv loading method instead of bootloader/DMAs */ +#define NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_TRUE BIT32(3) +#define NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_FALSE (0U) +struct lsf_lsb_header { + /** Code/data signature details of each LS falcon */ + struct lsf_ucode_desc signature; + /** + * Offset from non-WPR base where UCODE is located, + * Offset = Non-WPR base + #LSF_LSB_HEADER_ALIGNMENT + + * #LSF_UCODE_DATA_ALIGNMENT + ( #LSF_BL_DATA_ALIGNMENT * + * LS Falcon index) + */ + u32 ucode_off; + /** + * Size of LS Falcon ucode, required to perform signature verification + * of LS Falcon ucode by ACR HS. + */ + u32 ucode_size; + /** + * Size of LS Falcon ucode data, required to perform signature + * verification of LS Falcon ucode data by ACR HS. + */ + u32 data_size; + /** + * Size of bootloader that needs to be loaded by bootstrap owner. + * + * On GV11B, respective LS Falcon BL code size should not exceed + * below mentioned size. + * FALCON_ID_FECS IMEM size - 32k + * FALCON_ID_GPCCS IMEM size - 16k + */ + u32 bl_code_size; + /** BL starting virtual address. Need for tagging */ + u32 bl_imem_off; + /** + * Offset from non-WPR base holding the BL data + * Offset = (Non-WPR base + #LSF_LSB_HEADER_ALIGNMENT + + * #LSF_UCODE_DATA_ALIGNMENT + #LSF_BL_DATA_ALIGNMENT) * + * #LS Falcon index + */ + u32 bl_data_off; + /** + * Size of BL data, BL data will be copied to LS Falcon DMEM of + * bl data size + * + * On GV11B, respective LS Falcon BL data size should not exceed + * below mentioned size. + * FALCON_ID_FECS DMEM size - 8k + * FALCON_ID_GPCCS DMEM size - 5k + */ + u32 bl_data_size; + /** + * Offset from non-WPR base address where UCODE Application code is + * located. + */ + u32 app_code_off; + /** + * Size of UCODE Application code. + * + * On GV11B, FECS/GPCCS LS Falcon app code size should not exceed + * below mentioned size. + * FALCON_ID_FECS IMEM size - 32k + * FALCON_ID_GPCCS IMEM size - 16k + */ + u32 app_code_size; + /** + * Offset from non-WPR base address where UCODE Application data + * is located + */ + u32 app_data_off; + /** + * Size of UCODE Application data. + * + * On GV11B, respective LS Falcon app data size should not exceed + * below mentioned size. + * FALCON_ID_FECS DMEM size - 8k + * FALCON_ID_GPCCS DMEM size - 5k + */ + u32 app_data_size; + /** + * NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0 - Load BL at 0th IMEM offset + * NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX - This falcon requires a ctx + * before issuing DMAs. + * NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD - Use priv loading method + * instead of bootloader/DMAs + */ + u32 flags; +}; + +#define FLCN_SIG_SIZE (4U) +/** @} */ + +/** + * @ingroup NVGPURM_BLOB_CONSTRUCT + */ +/** @{*/ +/** + * Structure used by the boot-loader to load the rest of the LS Falcon code. + * + * This has to be filled by the GPU driver and copied into WPR region offset + * holding the BL data. + */ +struct flcn_bl_dmem_desc { + /** Should be always first element */ + u32 reserved[FLCN_SIG_SIZE]; + /** + * Signature should follow reserved 16B signature for secure code. + * 0s if no secure code + */ + u32 signature[FLCN_SIG_SIZE]; + /** + * Type of memory-aperture DMA index used by the bootloader + * while loading code/data. + */ + u32 ctx_dma; + /** + * 256B aligned physical sysmem(iGPU)/FB(dGPU) address where code + * is located. + */ + struct falc_u64 code_dma_base; + /** + * Offset from code_dma_base where the nonSecure code is located. + * The offset must be multiple of 256 to help performance. + */ + u32 non_sec_code_off; + /** + * The size of the non-secure code part. + * + * On GV11B, FECS/GPCCS LS Falcon non-secure + secure code size + * should not exceed below mentioned size. + * FALCON_ID_FECS IMEM size - 32k + * FALCON_ID_GPCCS IMEM size - 16k + */ + u32 non_sec_code_size; + /** + * Offset from code_dma_base where the secure code is located. + * The offset must be multiple of 256 to help performance. + */ + u32 sec_code_off; + /** + * The size of the secure code part. + * + * On GV11B, FECS/GPCCS LS Falcon non-secure + secure code size + * should not exceed below mentioned size. + * FALCON_ID_FECS IMEM size - 32k + * FALCON_ID_GPCCS IMEM size - 16k + */ + u32 sec_code_size; + /** + * Code entry point which will be invoked by BL after code is + * loaded. + */ + u32 code_entry_point; + /** + * 256B aligned Physical sysmem(iGPU)/FB(dGPU) Address where data + * is located. + */ + struct falc_u64 data_dma_base; + /** + * Size of data block. Should be multiple of 256B. + * + * On GV11B, respective LS Falcon data size should not exceed + * below mentioned size. + * FALCON_ID_FECS DMEM size - 8k + * FALCON_ID_GPCCS DMEM size - 5k + */ + u32 data_size; + /** Arguments to be passed to the target firmware being loaded. */ + u32 argc; + /** + * Number of arguments to be passed to the target firmware + * being loaded. + */ + u32 argv; +}; + +/** @} */ + +/** + * @defgroup NVGPURM_ACR_HS_LOAD_BOOTSTRAP ACR HS ucode load & bootstrap + * + * ACR HS ucode load & bootstrap interfaces: + * ACR HS ucode is read from the filesystem based on the chip-id by the ACR + * unit. Read ACR HS ucode will be update with below structs by patching at + * offset present in struct #struct acr_fw_header member hdr_offset. Read + * ACR HS ucode is loaded onto PMU/SEC2/GSP engines Falcon to bootstrap + * ACR HS ucode. ACR HS ucode does self-authentication using H/W based + * HS authentication methodology. Once authenticated the ACR HS ucode + * starts executing on the falcon. + */ + +/** + * @ingroup NVGPURM_ACR_HS_LOAD_BOOTSTRAP + */ +/** @{*/ + +/** + * Supporting maximum of 2 regions. + * This is needed to pre-allocate space in DMEM + */ +#define NVGPU_FLCN_ACR_MAX_REGIONS (2U) +/** Reserve 512 bytes for bootstrap owner LS ucode data */ +#define LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE (0x200U) + +/** + * The descriptor used by ACR HS ucode to figure out properties of individual + * WPR regions. + * + * On GV11B, this struct members are set to 0x0 by default, reason + * to fetch WPR1 details from H/W. + */ +struct flcn_acr_region_prop { + /** Starting address of WPR region */ + u32 start_addr; + /** Ending address of WPR region */ + u32 end_addr; + /** The ID of the WPR region. 0 for WPR1 and 1 for WPR2 */ + u32 region_id; + /** Read mask associated with this region */ + u32 read_mask; + /** Write mask associated with this region */ + u32 write_mask; + /** Bit map of all clients currently using this region */ + u32 client_mask; + /** + * sysmem(iGPU)/FB(dGPU) location from where contents need to + * be copied to startAddress + */ + u32 shadowmMem_startaddress; +}; + +/** + * The descriptor used by ACR HS ucode to figure out supporting regions & + * its properties. + */ +struct flcn_acr_regions { + /** + * Number of regions used by NVGPU from the total number of ACR + * regions supported in chip. + * + * On GV11B, 1 ACR region supported and should always be greater + * than 0. + */ + u32 no_regions; + /** Region properties */ + struct flcn_acr_region_prop region_props[NVGPU_FLCN_ACR_MAX_REGIONS]; +}; + +#define DMEM_WORD_SIZE 4U +#define DUMMY_SPACE_SIZE 4U +/** + * The descriptor used by ACR HS ucode to figure out the + * WPR & non-WPR blob details. + */ +struct flcn_acr_desc { + /* + * The bootstrap owner needs to switch into LS mode when bootstrapping + * other LS Falcons is completed. It needs to have its own actual + * DMEM image copied into DMEM as part of LS setup. If ACR desc is + * at location 0, it will definitely get overwritten causing data + * corruption. Hence need to reserve 0x200 bytes to give room for + * any loading data. + * NOTE: This has to be the first member always. + */ + union { + u32 reserved_dmem[(LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE/DMEM_WORD_SIZE)]; + } ucode_reserved_space; + /** Signature of ACR ucode. */ + u32 signatures[FLCN_SIG_SIZE]; + /** + * WPR Region ID holding the WPR header and its details + * + * on GV11B, wpr_region_id set to 0x0 by default to indicate + * to ACR HS ucode to fetch WPR region details from H/W & + * updating WPR start_addr, end_addr, read_mask & write_mask + * of struct #flcn_acr_region_prop. + */ + u32 wpr_region_id; + /** Offset from the non-WPR base holding the wpr header */ + u32 wpr_offset; + /** usable memory ranges, on GV11B it is not set */ + u32 mmu_mem_range; + /** + * WPR Region descriptors to provide info about WPR. + * on GV11B, no_regions set to 1 & region properties value to 0x0 + * to indicate to ACR HS ucode to fetch WPR region details from H/W. + */ + struct flcn_acr_regions regions; + /** + * stores the size of the ucode blob. + * + * On GV11B, size is calculated at runtime & aligned to 256 bytes. + * Size varies based on number of LS falcon supports. + */ + u32 nonwpr_ucode_blob_size; + /** + * stores sysmem(iGPU)/FB's(dGPU) non-WPR start address where + * kernel stores ucode blob + */ + u64 nonwpr_ucode_blob_start; + /** dummy space, not used by iGPU */ + u32 dummy[DUMMY_SPACE_SIZE]; +}; + +struct flcn2_acr_desc { + /** + * WPR Region ID holding the WPR header and its details + * + * on GPUID_NEXT, wpr_region_id set to 0x0 by default to indicate + * to ACR HS ucode to fetch WPR region details from H/W & + * updating WPR start_addr, end_addr, read_mask & write_mask + * of struct #flcn_acr_region_prop. + */ + u32 wpr_region_id; + /** Offset from the non-WPR base holding the wpr header */ + u32 wpr_offset; + /** + * WPR Region descriptors to provide info about WPR. + * on GPUID_NEXT, no_regions set to 1 & region properties value to 0x0 + * to indicate to ACR HS ucode to fetch WPR region details from H/W. + */ + struct flcn_acr_regions regions; + /** + * stores the size of the ucode blob. + * + * On GPUID_NEXT, size is calculated at runtime & aligned to 256 bytes. + * Size varies based on number of LS falcon supports. + */ + u32 nonwpr_ucode_blob_size; + /** + * stores sysmem(iGPU)/FB's(dGPU) non-WPR start address where + * kernel stores ucode blob + */ + u64 nonwpr_ucode_blob_start; + + u64 ls_pmu_desc; +}; + +/** @} */ + +#endif /* NVGPU_ACR_INTERFACE_H */ diff --git a/drivers/gpu/nvgpu/common/cbc/cbc.c b/drivers/gpu/nvgpu/common/cbc/cbc.c new file mode 100644 index 000000000..272c41f93 --- /dev/null +++ b/drivers/gpu/nvgpu/common/cbc/cbc.c @@ -0,0 +1,122 @@ +/* + * CBC + * + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#include +#include +#include +#include +#include +#include + +void nvgpu_cbc_remove_support(struct gk20a *g) +{ + struct nvgpu_cbc *cbc = g->cbc; + + nvgpu_log_fn(g, " "); + + if (cbc == NULL) { + return; + } + + if (nvgpu_mem_is_valid(&cbc->compbit_store.mem)) { + nvgpu_dma_free(g, &cbc->compbit_store.mem); + (void) memset(&cbc->compbit_store, 0, + sizeof(struct compbit_store_desc)); + } + gk20a_comptag_allocator_destroy(g, &cbc->comp_tags); + + nvgpu_kfree(g, cbc); + g->cbc = NULL; +} + +/* + * This function is triggered during finalize_poweron multiple times. + * This function should not return if cbc is not NULL. + * cbc.init(), which re-writes HW registers that are reset during suspend, + * should be allowed to execute each time. + */ +int nvgpu_cbc_init_support(struct gk20a *g) +{ + int err = 0; + struct nvgpu_cbc *cbc = g->cbc; + + nvgpu_log_fn(g, " "); + + if (cbc == NULL) { + cbc = nvgpu_kzalloc(g, sizeof(*cbc)); + if (cbc == NULL) { + return -ENOMEM; + } + g->cbc = cbc; + + if (g->ops.cbc.alloc_comptags != NULL) { + err = g->ops.cbc.alloc_comptags(g, g->cbc); + if (err != 0) { + nvgpu_err(g, "Failed to allocate comptags"); + nvgpu_kfree(g, cbc); + g->cbc = NULL; + return err; + } + } + } + + if (g->ops.cbc.init != NULL) { + g->ops.cbc.init(g, g->cbc); + } + + return err; +} + +int nvgpu_cbc_alloc(struct gk20a *g, size_t compbit_backing_size, + bool vidmem_alloc) +{ + struct nvgpu_cbc *cbc = g->cbc; + + if (nvgpu_mem_is_valid(&cbc->compbit_store.mem) != 0) { + return 0; + } + +#ifdef CONFIG_NVGPU_DGPU + if (vidmem_alloc == true) { + /* + * Backing store MUST be physically contiguous and allocated in + * one chunk + * Vidmem allocation API does not support FORCE_CONTIGUOUS like + * flag to allocate contiguous memory + * But this allocation will happen in vidmem bootstrap allocator + * which always allocates contiguous memory + */ + return nvgpu_dma_alloc_vid(g, + compbit_backing_size, + &cbc->compbit_store.mem); + } else +#endif + { + return nvgpu_dma_alloc_flags_sys(g, + NVGPU_DMA_PHYSICALLY_ADDRESSED, + compbit_backing_size, + &cbc->compbit_store.mem); + } +} diff --git a/drivers/gpu/nvgpu/common/ce/ce.c b/drivers/gpu/nvgpu/common/ce/ce.c new file mode 100644 index 000000000..1bf9e2570 --- /dev/null +++ b/drivers/gpu/nvgpu/common/ce/ce.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +int nvgpu_ce_init_support(struct gk20a *g) +{ + int err = 0; + + if (g->ops.ce.set_pce2lce_mapping != NULL) { + g->ops.ce.set_pce2lce_mapping(g); + } + + err = nvgpu_mc_reset_devtype(g, NVGPU_DEVTYPE_LCE); + if (err != 0) { + nvgpu_err(g, "NVGPU_DEVTYPE_LCE reset failed"); + return err; + } + + nvgpu_cg_slcg_ce2_load_enable(g); + + nvgpu_cg_blcg_ce_load_enable(g); + +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + nvgpu_cg_elcg_ce_load_enable(g); +#endif + + if (g->ops.ce.init_prod_values != NULL) { + g->ops.ce.init_prod_values(g); + } + + if (g->ops.ce.init_hw != NULL) { + g->ops.ce.init_hw(g); + } + + if (g->ops.ce.intr_enable != NULL) { + g->ops.ce.intr_enable(g, true); + } + + /** Enable interrupts at MC level */ + nvgpu_cic_intr_stall_unit_config(g, NVGPU_CIC_INTR_UNIT_CE, NVGPU_CIC_INTR_ENABLE); + nvgpu_cic_intr_nonstall_unit_config(g, NVGPU_CIC_INTR_UNIT_CE, NVGPU_CIC_INTR_ENABLE); + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/ce/ce_app.c b/drivers/gpu/nvgpu/common/ce/ce_app.c new file mode 100644 index 000000000..ecf1d0411 --- /dev/null +++ b/drivers/gpu/nvgpu/common/ce/ce_app.c @@ -0,0 +1,621 @@ +/* + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/ce/ce_priv.h" + +static inline u32 nvgpu_ce_get_valid_launch_flags(struct gk20a *g, + u32 launch_flags) +{ +#ifdef CONFIG_NVGPU_DGPU + /* + * there is no local memory available, + * don't allow local memory related CE flags + */ + if (g->mm.vidmem.size == 0ULL) { + launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB | + NVGPU_CE_DST_LOCATION_LOCAL_FB); + } +#endif + return launch_flags; +} + +int nvgpu_ce_execute_ops(struct gk20a *g, + u32 ce_ctx_id, + u64 src_paddr, + u64 dst_paddr, + u64 size, + u32 payload, + u32 launch_flags, + u32 request_operation, + u32 submit_flags, + struct nvgpu_fence_type **fence_out) +{ + int ret = -EPERM; + struct nvgpu_ce_app *ce_app = g->ce_app; + struct nvgpu_ce_gpu_ctx *ce_ctx, *ce_ctx_save; + bool found = false; + u32 *cmd_buf_cpu_va; + u64 cmd_buf_gpu_va = 0UL; + u32 method_size; + u32 cmd_buf_read_offset; + u32 dma_copy_class; + struct nvgpu_gpfifo_entry gpfifo; + struct nvgpu_channel_fence fence = {0U, 0U}; + struct nvgpu_fence_type *ce_cmd_buf_fence_out = NULL; + + if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE) { + goto end; + } + + /* This shouldn't happen */ + if (size == 0ULL) { + ret = -EINVAL; + goto end; + } + + if (request_operation != NVGPU_CE_PHYS_MODE_TRANSFER && + request_operation != NVGPU_CE_MEMSET) { + ret = -EINVAL; + goto end; + } + + if (src_paddr > NVGPU_CE_MAX_ADDRESS) { + ret = -EINVAL; + goto end; + } + + if (dst_paddr > NVGPU_CE_MAX_ADDRESS) { + ret = -EINVAL; + goto end; + } + + nvgpu_mutex_acquire(&ce_app->app_mutex); + + nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, + &ce_app->allocated_contexts, nvgpu_ce_gpu_ctx, list) { + if (ce_ctx->ctx_id == ce_ctx_id) { + found = true; + break; + } + } + + nvgpu_mutex_release(&ce_app->app_mutex); + + if (!found) { + ret = -EINVAL; + goto end; + } + + if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) { + ret = -ENODEV; + goto end; + } + + nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); + + ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS; + + cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * + (NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_SUBMIT / + U32(sizeof(u32)))); + + cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; + + if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] != NULL) { + struct nvgpu_fence_type **prev_post_fence = + &ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]; + + ret = nvgpu_fence_wait(g, *prev_post_fence, + nvgpu_get_poll_timeout(g)); + + nvgpu_fence_put(*prev_post_fence); + *prev_post_fence = NULL; + if (ret != 0) { + goto noop; + } + } + + cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + + (u64)(cmd_buf_read_offset * sizeof(u32))); + + dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS); + method_size = nvgpu_ce_prepare_submit(src_paddr, + dst_paddr, + size, + &cmd_buf_cpu_va[cmd_buf_read_offset], + payload, + nvgpu_ce_get_valid_launch_flags(g, launch_flags), + request_operation, + dma_copy_class); + nvgpu_assert(method_size <= NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_SUBMIT); + + if (method_size != 0U) { + /* store the element into gpfifo */ + g->ops.pbdma.format_gpfifo_entry(g, &gpfifo, + cmd_buf_gpu_va, method_size); + + /* + * take always the postfence as it is needed for protecting the + * ce context + */ + submit_flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET; + + nvgpu_smp_wmb(); + + ret = nvgpu_submit_channel_gpfifo_kernel(ce_ctx->ch, &gpfifo, + 1, submit_flags, &fence, &ce_cmd_buf_fence_out); + + if (ret == 0) { + ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] = + ce_cmd_buf_fence_out; + if (fence_out != NULL) { + nvgpu_fence_get(ce_cmd_buf_fence_out); + *fence_out = ce_cmd_buf_fence_out; + } + + /* Next available command buffer queue Index */ + ++ce_ctx->cmd_buf_read_queue_offset; + } + } else { + ret = -ENOMEM; + } +noop: + nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex); +end: + return ret; +} + +/* static CE app api */ +static void nvgpu_ce_put_fences(struct nvgpu_ce_gpu_ctx *ce_ctx) +{ + u32 i; + + for (i = 0U; i < NVGPU_CE_MAX_INFLIGHT_JOBS; i++) { + struct nvgpu_fence_type **fence = &ce_ctx->postfences[i]; + + if (*fence != NULL) { + nvgpu_fence_put(*fence); + } + *fence = NULL; + } +} + +/* caller must hold ce_app->app_mutex */ +static void nvgpu_ce_delete_gpu_context_locked(struct nvgpu_ce_gpu_ctx *ce_ctx) +{ + struct nvgpu_list_node *list = &ce_ctx->list; + + ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_DELETED; + ce_ctx->tsg->abortable = true; + + nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); + + if (nvgpu_mem_is_valid(&ce_ctx->cmd_buf_mem)) { + nvgpu_ce_put_fences(ce_ctx); + nvgpu_dma_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem); + } + + /* + * free the channel + * nvgpu_channel_close() will also unbind the channel from TSG + */ + nvgpu_channel_close(ce_ctx->ch); + nvgpu_ref_put(&ce_ctx->tsg->refcount, nvgpu_tsg_release); + + /* housekeeping on app */ + if ((list->prev != NULL) && (list->next != NULL)) { + nvgpu_list_del(list); + } + + nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex); + nvgpu_mutex_destroy(&ce_ctx->gpu_ctx_mutex); + + nvgpu_kfree(ce_ctx->g, ce_ctx); +} + +static u32 nvgpu_prepare_ce_op(u32 *cmd_buf_cpu_va, + u64 src_paddr, u64 dst_paddr, + u32 width, u32 height, u32 payload, + bool mode_transfer, u32 launch_flags) +{ + u32 launch = 0U; + u32 methodSize = 0U; + + if (mode_transfer) { + /* setup the source */ + cmd_buf_cpu_va[methodSize++] = 0x20028100; + cmd_buf_cpu_va[methodSize++] = (u64_hi32(src_paddr) & + NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK); + cmd_buf_cpu_va[methodSize++] = (u64_lo32(src_paddr) & + NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK); + + cmd_buf_cpu_va[methodSize++] = 0x20018098; + if ((launch_flags & + NVGPU_CE_SRC_LOCATION_LOCAL_FB) != 0U) { + cmd_buf_cpu_va[methodSize++] = 0x00000000; + } else if ((launch_flags & + NVGPU_CE_SRC_LOCATION_NONCOHERENT_SYSMEM) != 0U) { + cmd_buf_cpu_va[methodSize++] = 0x00000002; + } else { + cmd_buf_cpu_va[methodSize++] = 0x00000001; + } + + launch |= 0x00001000U; + } else { /* memset */ + /* Remap from component A on 1 byte wide pixels */ + cmd_buf_cpu_va[methodSize++] = 0x200181c2; + cmd_buf_cpu_va[methodSize++] = 0x00000004; + + cmd_buf_cpu_va[methodSize++] = 0x200181c0; + cmd_buf_cpu_va[methodSize++] = payload; + + launch |= 0x00000400U; + } + + /* setup the destination/output */ + cmd_buf_cpu_va[methodSize++] = 0x20068102; + cmd_buf_cpu_va[methodSize++] = (u64_hi32(dst_paddr) & + NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK); + cmd_buf_cpu_va[methodSize++] = (u64_lo32(dst_paddr) & + NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK); + /* Pitch in/out */ + cmd_buf_cpu_va[methodSize++] = width; + cmd_buf_cpu_va[methodSize++] = width; + /* width and line count */ + cmd_buf_cpu_va[methodSize++] = width; + cmd_buf_cpu_va[methodSize++] = height; + + cmd_buf_cpu_va[methodSize++] = 0x20018099; + if ((launch_flags & NVGPU_CE_DST_LOCATION_LOCAL_FB) != 0U) { + cmd_buf_cpu_va[methodSize++] = 0x00000000; + } else if ((launch_flags & + NVGPU_CE_DST_LOCATION_NONCOHERENT_SYSMEM) != 0U) { + cmd_buf_cpu_va[methodSize++] = 0x00000002; + } else { + cmd_buf_cpu_va[methodSize++] = 0x00000001; + } + + launch |= 0x00002005U; + + if ((launch_flags & + NVGPU_CE_SRC_MEMORY_LAYOUT_BLOCKLINEAR) != 0U) { + launch |= 0x00000000U; + } else { + launch |= 0x00000080U; + } + + if ((launch_flags & + NVGPU_CE_DST_MEMORY_LAYOUT_BLOCKLINEAR) != 0U) { + launch |= 0x00000000U; + } else { + launch |= 0x00000100U; + } + + cmd_buf_cpu_va[methodSize++] = 0x200180c0; + cmd_buf_cpu_va[methodSize++] = launch; + + return methodSize; +} + +u32 nvgpu_ce_prepare_submit(u64 src_paddr, + u64 dst_paddr, + u64 size, + u32 *cmd_buf_cpu_va, + u32 payload, + u32 launch_flags, + u32 request_operation, + u32 dma_copy_class) +{ + u32 methodSize = 0; + u64 low, hi; + bool mode_transfer = (request_operation == NVGPU_CE_PHYS_MODE_TRANSFER); + + /* set the channel object */ + cmd_buf_cpu_va[methodSize++] = 0x20018000; + cmd_buf_cpu_va[methodSize++] = dma_copy_class; + + /* + * The CE can work with 2D rectangles of at most 0xffffffff or 4G-1 + * pixels per line. Exactly 2G is a more round number, so we'll use + * that as the base unit to clear large amounts of memory. If the + * requested size is not a multiple of 2G, we'll do one clear first to + * deal with the low bits, followed by another in units of 2G. + * + * We'll use 1 bytes per pixel to do byte aligned sets/copies. The + * maximum number of lines is also 4G-1, so (4G-1) * 2 GB is enough for + * whole vidmem. + */ + + /* Lower 2GB */ + low = size & 0x7fffffffULL; + /* Over 2GB */ + hi = size >> 31U; + + /* + * Unable to fit this in one submit, but no device should have this + * much memory anyway. + */ + if (hi > 0xffffffffULL) { + /* zero size means error */ + return 0; + } + + if (low != 0U) { + /* do the low bytes in one long line */ + methodSize += nvgpu_prepare_ce_op(&cmd_buf_cpu_va[methodSize], + src_paddr, dst_paddr, + nvgpu_safe_cast_u64_to_u32(low), 1, + payload, mode_transfer, launch_flags); + } + if (hi != 0U) { + /* do the high bytes in many 2G lines */ + methodSize += nvgpu_prepare_ce_op(&cmd_buf_cpu_va[methodSize], + src_paddr + low, dst_paddr + low, + 0x80000000ULL, nvgpu_safe_cast_u64_to_u32(hi), + payload, mode_transfer, launch_flags); + } + + return methodSize; +} + +/* global CE app related apis */ +int nvgpu_ce_app_init_support(struct gk20a *g) +{ + struct nvgpu_ce_app *ce_app = g->ce_app; + + if (unlikely(ce_app == NULL)) { + ce_app = nvgpu_kzalloc(g, sizeof(*ce_app)); + if (ce_app == NULL) { + return -ENOMEM; + } + g->ce_app = ce_app; + } + + if (ce_app->initialised) { + /* assume this happen during poweron/poweroff GPU sequence */ + ce_app->app_state = NVGPU_CE_ACTIVE; + return 0; + } + + nvgpu_log(g, gpu_dbg_fn, "ce: init"); + + nvgpu_mutex_init(&ce_app->app_mutex); + + nvgpu_mutex_acquire(&ce_app->app_mutex); + + nvgpu_init_list_node(&ce_app->allocated_contexts); + ce_app->ctx_count = 0; + ce_app->next_ctx_id = 0; + ce_app->initialised = true; + ce_app->app_state = NVGPU_CE_ACTIVE; + + nvgpu_mutex_release(&ce_app->app_mutex); + + nvgpu_log(g, gpu_dbg_cde_ctx, "ce: init finished"); + + return 0; +} + +void nvgpu_ce_app_destroy(struct gk20a *g) +{ + struct nvgpu_ce_app *ce_app = g->ce_app; + struct nvgpu_ce_gpu_ctx *ce_ctx, *ce_ctx_save; + + if (ce_app == NULL) { + return; + } + + if (ce_app->initialised == false) { + goto free; + } + + ce_app->app_state = NVGPU_CE_SUSPEND; + ce_app->initialised = false; + + nvgpu_mutex_acquire(&ce_app->app_mutex); + + nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, + &ce_app->allocated_contexts, nvgpu_ce_gpu_ctx, list) { + nvgpu_ce_delete_gpu_context_locked(ce_ctx); + } + + nvgpu_init_list_node(&ce_app->allocated_contexts); + ce_app->ctx_count = 0; + ce_app->next_ctx_id = 0; + + nvgpu_mutex_release(&ce_app->app_mutex); + + nvgpu_mutex_destroy(&ce_app->app_mutex); +free: + nvgpu_kfree(g, ce_app); + g->ce_app = NULL; +} + +void nvgpu_ce_app_suspend(struct gk20a *g) +{ + struct nvgpu_ce_app *ce_app = g->ce_app; + + if (ce_app == NULL || !ce_app->initialised) { + return; + } + + ce_app->app_state = NVGPU_CE_SUSPEND; +} + +/* CE app utility functions */ +u32 nvgpu_ce_app_create_context(struct gk20a *g, + u32 runlist_id, + int timeslice, + int runlist_level) +{ + struct nvgpu_ce_gpu_ctx *ce_ctx; + struct nvgpu_ce_app *ce_app = g->ce_app; + struct nvgpu_setup_bind_args setup_bind_args; + u32 ctx_id = NVGPU_CE_INVAL_CTX_ID; + int err = 0; + + if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE) { + return ctx_id; + } + + ce_ctx = nvgpu_kzalloc(g, sizeof(*ce_ctx)); + if (ce_ctx == NULL) { + return ctx_id; + } + + nvgpu_mutex_init(&ce_ctx->gpu_ctx_mutex); + + ce_ctx->g = g; + ce_ctx->cmd_buf_read_queue_offset = 0; + ce_ctx->vm = g->mm.ce.vm; + + /* allocate a tsg if needed */ + ce_ctx->tsg = nvgpu_tsg_open(g, nvgpu_current_pid(g)); + if (ce_ctx->tsg == NULL) { + nvgpu_err(g, "ce: gk20a tsg not available"); + goto end; + } + + /* this TSG should never be aborted */ + ce_ctx->tsg->abortable = false; + + /* always kernel client needs privileged channel */ + ce_ctx->ch = nvgpu_channel_open_new(g, runlist_id, true, + nvgpu_current_pid(g), nvgpu_current_tid(g)); + if (ce_ctx->ch == NULL) { + nvgpu_err(g, "ce: gk20a channel not available"); + goto end; + } + + nvgpu_channel_wdt_disable(ce_ctx->ch->wdt); + + /* bind the channel to the vm */ + err = g->ops.mm.vm_bind_channel(g->mm.ce.vm, ce_ctx->ch); + if (err != 0) { + nvgpu_err(g, "ce: could not bind vm"); + goto end; + } + + err = nvgpu_tsg_bind_channel(ce_ctx->tsg, ce_ctx->ch); + if (err != 0) { + nvgpu_err(g, "ce: unable to bind to tsg"); + goto end; + } + + setup_bind_args.num_gpfifo_entries = 1024; + setup_bind_args.num_inflight_jobs = 0; + setup_bind_args.flags = 0; + err = nvgpu_channel_setup_bind(ce_ctx->ch, &setup_bind_args); + if (err != 0) { + nvgpu_err(g, "ce: unable to setup and bind channel"); + goto end; + } + + /* allocate command buffer from sysmem */ + err = nvgpu_dma_alloc_map_sys(ce_ctx->vm, + NVGPU_CE_MAX_INFLIGHT_JOBS * + NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_SUBMIT, + &ce_ctx->cmd_buf_mem); + if (err != 0) { + nvgpu_err(g, + "ce: alloc command buffer failed"); + goto end; + } + + (void) memset(ce_ctx->cmd_buf_mem.cpu_va, 0x00, + ce_ctx->cmd_buf_mem.size); + +#ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING + /* -1 means default channel timeslice value */ + if (timeslice != -1) { + err = g->ops.tsg.set_timeslice(ce_ctx->tsg, timeslice); + if (err != 0) { + nvgpu_err(g, "ce: set timesliced failed for CE context"); + goto end; + } + } + + /* -1 means default channel runlist level */ + if (runlist_level != -1) { + err = nvgpu_tsg_set_interleave(ce_ctx->tsg, runlist_level); + if (err != 0) { + nvgpu_err(g, "ce: set runlist interleave failed"); + goto end; + } + } +#endif + + nvgpu_mutex_acquire(&ce_app->app_mutex); + ctx_id = ce_ctx->ctx_id = ce_app->next_ctx_id; + nvgpu_list_add(&ce_ctx->list, &ce_app->allocated_contexts); + ++ce_app->next_ctx_id; + ++ce_app->ctx_count; + nvgpu_mutex_release(&ce_app->app_mutex); + + ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_ALLOCATED; + +end: + if (ctx_id == NVGPU_CE_INVAL_CTX_ID) { + nvgpu_mutex_acquire(&ce_app->app_mutex); + nvgpu_ce_delete_gpu_context_locked(ce_ctx); + nvgpu_mutex_release(&ce_app->app_mutex); + } + return ctx_id; + +} + +void nvgpu_ce_app_delete_context(struct gk20a *g, + u32 ce_ctx_id) +{ + struct nvgpu_ce_app *ce_app = g->ce_app; + struct nvgpu_ce_gpu_ctx *ce_ctx, *ce_ctx_save; + + if (ce_app == NULL || !ce_app->initialised || + ce_app->app_state != NVGPU_CE_ACTIVE) { + return; + } + + nvgpu_mutex_acquire(&ce_app->app_mutex); + + nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, + &ce_app->allocated_contexts, nvgpu_ce_gpu_ctx, list) { + if (ce_ctx->ctx_id == ce_ctx_id) { + nvgpu_ce_delete_gpu_context_locked(ce_ctx); + --ce_app->ctx_count; + break; + } + } + + nvgpu_mutex_release(&ce_app->app_mutex); +} diff --git a/drivers/gpu/nvgpu/common/ce/ce_priv.h b/drivers/gpu/nvgpu/common/ce/ce_priv.h new file mode 100644 index 000000000..682c4c836 --- /dev/null +++ b/drivers/gpu/nvgpu/common/ce/ce_priv.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_CE_PRIV_H +#define NVGPU_CE_PRIV_H + +#include +#include +#include +#include + +struct gk20a; + +/* ce context db */ +struct nvgpu_ce_gpu_ctx { + struct gk20a *g; + u32 ctx_id; + struct nvgpu_mutex gpu_ctx_mutex; + int gpu_ctx_state; + + /* tsg related data */ + struct nvgpu_tsg *tsg; + + /* channel related data */ + struct nvgpu_channel *ch; + struct vm_gk20a *vm; + + /* cmd buf mem_desc */ + struct nvgpu_mem cmd_buf_mem; + struct nvgpu_fence_type *postfences[NVGPU_CE_MAX_INFLIGHT_JOBS]; + + struct nvgpu_list_node list; + + u32 cmd_buf_read_queue_offset; +}; + +/* global ce app db */ +struct nvgpu_ce_app { + bool initialised; + struct nvgpu_mutex app_mutex; + int app_state; + + struct nvgpu_list_node allocated_contexts; + u32 ctx_count; + u32 next_ctx_id; +}; + +static inline struct nvgpu_ce_gpu_ctx * +nvgpu_ce_gpu_ctx_from_list(struct nvgpu_list_node *node) +{ + return (struct nvgpu_ce_gpu_ctx *) + ((uintptr_t)node - offsetof(struct nvgpu_ce_gpu_ctx, list)); +}; + +u32 nvgpu_ce_prepare_submit(u64 src_paddr, + u64 dst_paddr, + u64 size, + u32 *cmd_buf_cpu_va, + u32 payload, + u32 launch_flags, + u32 request_operation, + u32 dma_copy_class); + +#endif /*NVGPU_CE_PRIV_H*/ diff --git a/drivers/gpu/nvgpu/common/cic/ce_cic.c b/drivers/gpu/nvgpu/common/cic/ce_cic.c new file mode 100644 index 000000000..d608ea76b --- /dev/null +++ b/drivers/gpu/nvgpu/common/cic/ce_cic.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "cic_priv.h" + +void nvgpu_report_ce_err(struct gk20a *g, u32 hw_unit, + u32 inst, u32 err_id, u32 intr_info) +{ + int err = 0; + struct nvgpu_err_desc *err_desc = NULL; + struct nvgpu_err_msg err_pkt; + + if (g->ops.cic.report_err == NULL) { + cic_dbg(g, "CIC does not support reporting error " + "to safety services"); + return; + } + + if (hw_unit != NVGPU_ERR_MODULE_CE) { + nvgpu_err(g, "invalid hw module (%u)", hw_unit); + err = -EINVAL; + goto handle_report_failure; + } + + err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc); + if (err != 0) { + nvgpu_err(g, "Failed to get err_desc for " + "err_id (%u) for hw module (%u)", + err_id, hw_unit); + goto handle_report_failure; + } + + nvgpu_init_ce_err_msg(&err_pkt); + err_pkt.hw_unit_id = hw_unit; + err_pkt.err_id = err_desc->error_id; + err_pkt.is_critical = err_desc->is_critical; + err_pkt.err_info.ce_info.header.sub_unit_id = inst; + err_pkt.err_desc = err_desc; + /* sub_err_type can be decoded using intr_info by referring + * to the interrupt status register definition corresponding + * to the error that is being reported. + */ + err_pkt.err_info.ce_info.header.sub_err_type = intr_info; + err_pkt.err_size = nvgpu_safe_cast_u64_to_u8( + sizeof(err_pkt.err_info.ce_info)); + + if (g->ops.cic.report_err != NULL) { + err = g->ops.cic.report_err(g, (void *)&err_pkt, + sizeof(err_pkt), err_desc->is_critical); + if (err != 0) { + nvgpu_err(g, "Failed to report CE error: " + "inst=%u err_id=%u intr_info=%u", + inst, err_id, intr_info); + } + } +handle_report_failure: + if (err != 0) { + nvgpu_sw_quiesce(g); + } +} + +void nvgpu_inject_ce_swerror(struct gk20a *g, u32 hw_unit, + u32 err_index, u32 sub_err_type) +{ + nvgpu_report_ce_err(g, hw_unit, 0U, err_index, sub_err_type); +} diff --git a/drivers/gpu/nvgpu/common/cic/cic.c b/drivers/gpu/nvgpu/common/cic/cic.c new file mode 100644 index 000000000..12d3877aa --- /dev/null +++ b/drivers/gpu/nvgpu/common/cic/cic.c @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "cic_priv.h" + +int nvgpu_cic_init_common(struct gk20a *g) +{ + struct nvgpu_cic *cic; + int err = 0; + + if (g->cic != NULL) { + cic_dbg(g, "CIC unit already initialized"); + return 0; + } + + cic = nvgpu_kzalloc(g, sizeof(*cic)); + if (cic == NULL) { + nvgpu_err(g, "Failed to allocate memory " + "for struct nvgpu_cic"); + return -ENOMEM; + } + + if (g->ops.cic.init != NULL) { + err = g->ops.cic.init(g, cic); + if (err != 0) { + nvgpu_err(g, "CIC chip specific " + "initialization failed."); + goto cleanup; + } + } else { + cic->err_lut = NULL; + cic->num_hw_modules = 0; + } + + g->cic = cic; + cic_dbg(g, "CIC unit initialization done."); + return 0; + +cleanup: + if (cic != NULL) { + nvgpu_kfree(g, cic); + } + return err; +} + +int nvgpu_cic_deinit_common(struct gk20a *g) +{ + struct nvgpu_cic *cic; + + cic = g->cic; + + if (cic == NULL) { + cic_dbg(g, "CIC unit already deinitialized"); + return 0; + } + + cic->err_lut = NULL; + cic->num_hw_modules = 0; + + nvgpu_kfree(g, cic); + g->cic = NULL; + + return 0; +} + +int nvgpu_cic_check_hw_unit_id(struct gk20a *g, u32 hw_unit_id) +{ + if (g->cic == NULL) { + nvgpu_err(g, "CIC is not initialized"); + return -EINVAL; + } + + if (g->cic->num_hw_modules == 0U) { + cic_dbg(g, "LUT not initialized."); + return -EINVAL; + } + + if (hw_unit_id >= g->cic->num_hw_modules) { + cic_dbg(g, "Invalid input HW unit ID."); + return -EINVAL; + } + + return 0; +} + +int nvgpu_cic_check_err_id(struct gk20a *g, u32 hw_unit_id, + u32 err_id) +{ + int err = 0; + + if ((g->cic == NULL) || (g->cic->err_lut == NULL)) { + cic_dbg(g, "CIC/LUT not initialized."); + return -EINVAL; + } + + err = nvgpu_cic_check_hw_unit_id(g, hw_unit_id); + if (err != 0) { + return err; + } + + if (err_id >= g->cic->err_lut[hw_unit_id].num_errs) { + err = -EINVAL; + } + + return err; +} + +int nvgpu_cic_get_err_desc(struct gk20a *g, u32 hw_unit_id, + u32 err_id, struct nvgpu_err_desc **err_desc) +{ + int err = 0; + + /* if (g->cic != NULL) and (g->cic->err_lut != NULL) check + * can be skipped here as it checked as part of + * nvgpu_cic_check_err_id() called below. + */ + + err = nvgpu_cic_check_err_id(g, hw_unit_id, err_id); + if (err != 0) { + return err; + } + + *err_desc = &(g->cic->err_lut[hw_unit_id].errs[err_id]); + + return err; +} + +int nvgpu_cic_get_num_hw_modules(struct gk20a *g) +{ + if (g->cic == NULL) { + nvgpu_err(g, "CIC is not initialized"); + return -EINVAL; + } + + return g->cic->num_hw_modules; +} diff --git a/drivers/gpu/nvgpu/common/cic/cic_intr.c b/drivers/gpu/nvgpu/common/cic/cic_intr.c new file mode 100644 index 000000000..612291df0 --- /dev/null +++ b/drivers/gpu/nvgpu/common/cic/cic_intr.c @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +int nvgpu_cic_wait_for_stall_interrupts(struct gk20a *g, u32 timeout) +{ + /* wait until all stalling irqs are handled */ + return NVGPU_COND_WAIT(&g->mc.sw_irq_stall_last_handled_cond, + nvgpu_atomic_read(&g->mc.sw_irq_stall_pending) == 0, + timeout); +} + +int nvgpu_cic_wait_for_nonstall_interrupts(struct gk20a *g, u32 timeout) +{ + /* wait until all non-stalling irqs are handled */ + return NVGPU_COND_WAIT(&g->mc.sw_irq_nonstall_last_handled_cond, + nvgpu_atomic_read(&g->mc.sw_irq_nonstall_pending) == 0, + timeout); +} + +void nvgpu_cic_wait_for_deferred_interrupts(struct gk20a *g) +{ + int ret; + + ret = nvgpu_cic_wait_for_stall_interrupts(g, 0U); + if (ret != 0) { + nvgpu_err(g, "wait for stall interrupts failed %d", ret); + } + + ret = nvgpu_cic_wait_for_nonstall_interrupts(g, 0U); + if (ret != 0) { + nvgpu_err(g, "wait for nonstall interrupts failed %d", ret); + } +} + +void nvgpu_cic_intr_mask(struct gk20a *g) +{ + unsigned long flags = 0; + + if (g->ops.mc.intr_mask != NULL) { + nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags); + g->ops.mc.intr_mask(g); + nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags); + } +} + +#ifdef CONFIG_NVGPU_NON_FUSA +void nvgpu_cic_log_pending_intrs(struct gk20a *g) +{ + if (g->ops.mc.log_pending_intrs != NULL) { + g->ops.mc.log_pending_intrs(g); + } +} + +void nvgpu_cic_intr_enable(struct gk20a *g) +{ + unsigned long flags = 0; + + if (g->ops.mc.intr_enable != NULL) { + nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags); + g->ops.mc.intr_enable(g); + nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags); + } +} +#endif + +void nvgpu_cic_intr_stall_unit_config(struct gk20a *g, u32 unit, bool enable) +{ + unsigned long flags = 0; + + nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags); + g->ops.mc.intr_stall_unit_config(g, unit, enable); + nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags); +} + +void nvgpu_cic_intr_nonstall_unit_config(struct gk20a *g, u32 unit, bool enable) +{ + unsigned long flags = 0; + + nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags); + g->ops.mc.intr_nonstall_unit_config(g, unit, enable); + nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags); +} + +void nvgpu_cic_intr_stall_pause(struct gk20a *g) +{ + unsigned long flags = 0; + + nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags); + g->ops.mc.intr_stall_pause(g); + nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags); +} + +void nvgpu_cic_intr_stall_resume(struct gk20a *g) +{ + unsigned long flags = 0; + + nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags); + g->ops.mc.intr_stall_resume(g); + nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags); +} + +void nvgpu_cic_intr_nonstall_pause(struct gk20a *g) +{ + unsigned long flags = 0; + + nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags); + g->ops.mc.intr_nonstall_pause(g); + nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags); +} + +void nvgpu_cic_intr_nonstall_resume(struct gk20a *g) +{ + unsigned long flags = 0; + + nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags); + g->ops.mc.intr_nonstall_resume(g); + nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags); +} + +static void nvgpu_cic_intr_nonstall_work(struct gk20a *g, u32 work_ops) +{ + bool semaphore_wakeup, post_events; + + semaphore_wakeup = + (((work_ops & NVGPU_CIC_NONSTALL_OPS_WAKEUP_SEMAPHORE) != 0U) ? + true : false); + post_events = (((work_ops & NVGPU_CIC_NONSTALL_OPS_POST_EVENTS) != 0U) ? + true : false); + + if (semaphore_wakeup) { + g->ops.semaphore_wakeup(g, post_events); + } +} + +u32 nvgpu_cic_intr_nonstall_isr(struct gk20a *g) +{ + u32 non_stall_intr_val = 0U; + + if (nvgpu_is_powered_off(g)) { + return NVGPU_CIC_INTR_UNMASK; + } + + /* not from gpu when sharing irq with others */ + non_stall_intr_val = g->ops.mc.intr_nonstall(g); + if (non_stall_intr_val == 0U) { + return NVGPU_CIC_INTR_NONE; + } + + nvgpu_cic_intr_nonstall_pause(g); + if (g->sw_quiesce_pending) { + return NVGPU_CIC_INTR_QUIESCE_PENDING; + } + + nvgpu_atomic_set(&g->mc.sw_irq_nonstall_pending, 1); + return NVGPU_CIC_INTR_HANDLE; +} + +void nvgpu_cic_intr_nonstall_handle(struct gk20a *g) +{ + int err; + u32 nonstall_ops = 0; + + nonstall_ops = g->ops.mc.isr_nonstall(g); + if (nonstall_ops != 0U) { + nvgpu_cic_intr_nonstall_work(g, nonstall_ops); + } + + /* sync handled irq counter before re-enabling interrupts */ + nvgpu_atomic_set(&g->mc.sw_irq_nonstall_pending, 0); + + nvgpu_cic_intr_nonstall_resume(g); + + err = nvgpu_cond_broadcast(&g->mc.sw_irq_nonstall_last_handled_cond); + if (err != 0) { + nvgpu_err(g, "nvgpu_cond_broadcast failed err=%d", err); + } +} + +u32 nvgpu_cic_intr_stall_isr(struct gk20a *g) +{ + u32 mc_intr_0 = 0U; + + nvgpu_trace_intr_stall_start(g); + + if (nvgpu_is_powered_off(g)) { + return NVGPU_CIC_INTR_UNMASK; + } + + /* not from gpu when sharing irq with others */ + mc_intr_0 = g->ops.mc.intr_stall(g); + if (mc_intr_0 == 0U) { + return NVGPU_CIC_INTR_NONE; + } + + nvgpu_cic_intr_stall_pause(g); + + if (g->sw_quiesce_pending) { + return NVGPU_CIC_INTR_QUIESCE_PENDING; + } + + nvgpu_atomic_set(&g->mc.sw_irq_stall_pending, 1); + + nvgpu_trace_intr_stall_done(g); + + return NVGPU_CIC_INTR_HANDLE; +} + +void nvgpu_cic_intr_stall_handle(struct gk20a *g) +{ + int err; + + nvgpu_trace_intr_thread_stall_start(g); + + g->ops.mc.isr_stall(g); + + nvgpu_trace_intr_thread_stall_done(g); + + /* sync handled irq counter before re-enabling interrupts */ + nvgpu_atomic_set(&g->mc.sw_irq_stall_pending, 0); + nvgpu_cic_intr_stall_resume(g); + + err = nvgpu_cond_broadcast(&g->mc.sw_irq_stall_last_handled_cond); + if (err != 0) { + nvgpu_err(g, "nvgpu_cond_broadcast failed err=%d", err); + } +} diff --git a/drivers/gpu/nvgpu/common/cic/cic_priv.h b/drivers/gpu/nvgpu/common/cic/cic_priv.h new file mode 100644 index 000000000..526d7b461 --- /dev/null +++ b/drivers/gpu/nvgpu/common/cic/cic_priv.h @@ -0,0 +1,291 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CIC_PRIV_H +#define CIC_PRIV_H + +#include + +struct gk20a; +struct nvgpu_err_hw_module; +struct nvgpu_err_msg; +struct gpu_err_header; + +/* + * @file + * + * Declare CIC's private structure to store error-policy LUT and + * other data and ops needed during error reporting. + */ + +#define ERR_INJECT_TEST_PATTERN 0xA5 + +/* + * This struct contains members related to error-policy look-up table, + * number of units reporting errors. + */ +struct nvgpu_cic { + /** Pointer for error look-up table. */ + struct nvgpu_err_hw_module *err_lut; + + /** Total number of GPU HW modules considered in CIC. */ + u32 num_hw_modules; + +}; + +/** + * @brief Inject ECC error. + * + * @param g [in] - The GPU driver struct. + * @param hw_unit [in] - Index of HW unit. + * @param err_index [in] - Error index. + * @param inst [in] - Instance ID. + * + * - Sets values for error address and error count. + * - Invokes error reporting API with the required set of inputs. + * + * @return None + */ +void nvgpu_inject_ecc_swerror(struct gk20a *g, u32 hw_unit, + u32 err_index, u32 inst); + +/** + * @brief Inject HOST error. + * + * @param g [in] - The GPU driver struct. + * @param hw_unit [in] - Index of HW unit. + * @param err_index [in] - Error index. + * @param sub_err_type [in] - Sub error type. + * + * - Invokes error reporting API with the required set of inputs. + * + * @return None + */ +void nvgpu_inject_host_swerror(struct gk20a *g, u32 hw_unit, + u32 err_index, u32 sub_err_type); + +/** + * @brief Inject GR error. + * + * @param g [in] - The GPU driver struct. + * @param hw_unit [in] - Index of HW unit. + * @param err_index [in] - Error index. + * @param sub_err_type [in] - Sub error type. + * + * - Sets values for GR exception and SM machine check error information. + * - Invokes error reporting API with the required set of inputs. + * + * @return None + */ +void nvgpu_inject_gr_swerror(struct gk20a *g, u32 hw_unit, + u32 err_index, u32 sub_err_type); + +/** + * @brief Inject CE error. + * + * @param g [in] - The GPU driver struct. + * @param hw_unit [in] - Index of HW unit. + * @param err_index [in] - Error index. + * @param sub_err_type [in] - Sub error type. + * + * - Invokes error reporting API with the required set of inputs. + * + * @return None + */ +void nvgpu_inject_ce_swerror(struct gk20a *g, u32 hw_unit, + u32 err_index, u32 sub_err_type); + +/** + * @brief Inject CE error. + * + * @param g [in] - The GPU driver struct. + * @param hw_unit [in] - Index of HW unit. + * @param err_index [in] - Error index. + * @param err_code [in] - Error code. + * + * - Invokes error reporting API with the required set of inputs. + * + * @return None + */ +void nvgpu_inject_pri_swerror(struct gk20a *g, u32 hw_unit, + u32 err_index, u32 err_code); + +/** + * @brief Inject PMU error. + * + * @param g [in] - The GPU driver struct. + * @param hw_unit [in] - Index of HW unit. + * @param err_index [in] - Error index. + * @param sub_err_type [in] - Sub error type. + * + * - Sets values for error info. + * - Invokes error reporting API with the required set of inputs. + * + * @return None + */ +void nvgpu_inject_pmu_swerror(struct gk20a *g, u32 hw_unit, + u32 err_index, u32 sub_err_type); + +/** + * @brief Inject CTXSW error. + * + * @param g [in] - The GPU driver struct. + * @param hw_unit [in] - Index of HW unit. + * @param err_index [in] - Error index. + * @param inst [in] - Instance ID. + * + * - Sets values for error info. + * - Invokes error reporting API with the required set of inputs. + * + * @return None + */ +void nvgpu_inject_ctxsw_swerror(struct gk20a *g, u32 hw_unit, + u32 err_index, u32 inst); + +/** + * @brief Inject MMU error. + * + * @param g [in] - The GPU driver struct. + * @param hw_unit [in] - Index of HW unit. + * @param err_index [in] - Error index. + * @param sub_err_type [in] - Sub error type. + * + * - Sets values for mmu page fault info. + * - Invokes error reporting API with the required set of inputs. + * + * @return None + */ +void nvgpu_inject_mmu_swerror(struct gk20a *g, u32 hw_unit, + u32 err_index, u32 sub_err_type); + +/** + * @brief Initialize error message header. + * + * @param header [in] - Error message header. + * + * This is used to initialize error message header. + * + * @return None + */ +void nvgpu_init_err_msg_header(struct gpu_err_header *header); + +/** + * @brief Initialize error message. + * + * @param msg [in] - Error message. + * + * This is used to initialize error message that is common + * for all HW units. + * + * @return None + */ +void nvgpu_init_err_msg(struct nvgpu_err_msg *msg); + +/** + * @brief Initialize error message for HOST unit. + * + * @param msg [in] - Error message. + * + * This is used to initialize error message that is specific to HOST unit. + * + * @return None + */ +void nvgpu_init_host_err_msg(struct nvgpu_err_msg *msg); + +/** + * @brief Initialize ECC error message. + * + * @param msg [in] - Error message. + * + * This is used to initialize error message that is specific to ECC errors. + * + * @return None + */ +void nvgpu_init_ecc_err_msg(struct nvgpu_err_msg *msg); + +/** + * @brief Initialize error message for PRI unit. + * + * @param msg [in] - Error message. + * + * This is used to initialize error message that is specific to PRI unit. + * + * @return None + */ +void nvgpu_init_pri_err_msg(struct nvgpu_err_msg *msg); + +/** + * @brief Initialize error message for CE unit. + * + * @param msg [in] - Error message. + * + * This is used to initialize error message that is specific to CE unit. + * + * @return None + */ +void nvgpu_init_ce_err_msg(struct nvgpu_err_msg *msg); + +/** + * @brief Initialize error message for PMU unit. + * + * @param msg [in] - Error message. + * + * This is used to initialize error message that is specific to PMU unit. + * + * @return None + */ +void nvgpu_init_pmu_err_msg(struct nvgpu_err_msg *msg); + +/** + * @brief Initialize error message for GR unit. + * + * @param msg [in] - Error message. + * + * This is used to initialize error message that is specific to GR unit. + * + * @return None + */ +void nvgpu_init_gr_err_msg(struct nvgpu_err_msg *msg); + +/** + * @brief Initialize error message for CTXSW. + * + * @param msg [in] - Error message. + * + * This is used to initialize error message that is specific to CTXSW. + * + * @return None + */ +void nvgpu_init_ctxsw_err_msg(struct nvgpu_err_msg *msg); + +/** + * @brief Initialize error message for MMU unit. + * + * @param msg [in] - Error message. + * + * This is used to initialize error message that is specific to MMU unit. + * + * @return None + */ +void nvgpu_init_mmu_err_msg(struct nvgpu_err_msg *msg); + +#endif /* CIC_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/cic/ctxsw_cic.c b/drivers/gpu/nvgpu/common/cic/ctxsw_cic.c new file mode 100644 index 000000000..bb6a75652 --- /dev/null +++ b/drivers/gpu/nvgpu/common/cic/ctxsw_cic.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "cic_priv.h" + +void nvgpu_report_ctxsw_err(struct gk20a *g, u32 hw_unit, u32 err_id, + void *data) +{ + int err = 0; + struct nvgpu_err_desc *err_desc = NULL; + struct nvgpu_err_msg err_pkt; + u32 inst = 0; + struct ctxsw_err_info *err_info = (struct ctxsw_err_info *)data; + + if (g->ops.cic.report_err == NULL) { + cic_dbg(g, "CIC does not support reporting error " + "to safety services"); + return; + } + + if (hw_unit != NVGPU_ERR_MODULE_FECS) { + nvgpu_err(g, "invalid hw module (%u)", hw_unit); + err = -EINVAL; + goto handle_report_failure; + } + + err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc); + if (err != 0) { + nvgpu_err(g, "Failed to get err_desc for" + " err_id (%u) for hw module (%u)", + err_id, hw_unit); + goto handle_report_failure; + } + + nvgpu_init_ctxsw_err_msg(&err_pkt); + err_pkt.hw_unit_id = hw_unit; + err_pkt.err_id = err_desc->error_id; + err_pkt.is_critical = err_desc->is_critical; + err_pkt.err_info.ctxsw_info.header.sub_unit_id = inst; + err_pkt.err_info.ctxsw_info.curr_ctx = err_info->curr_ctx; + err_pkt.err_info.ctxsw_info.chid = err_info->chid; + err_pkt.err_info.ctxsw_info.ctxsw_status0 = err_info->ctxsw_status0; + err_pkt.err_info.ctxsw_info.ctxsw_status1 = err_info->ctxsw_status1; + err_pkt.err_info.ctxsw_info.mailbox_value = err_info->mailbox_value; + err_pkt.err_desc = err_desc; + err_pkt.err_size = nvgpu_safe_cast_u64_to_u8( + sizeof(err_pkt.err_info.ctxsw_info)); + + if (g->ops.cic.report_err != NULL) { + err = g->ops.cic.report_err(g, (void *)&err_pkt, + sizeof(err_pkt), err_desc->is_critical); + if (err != 0) { + nvgpu_err(g, "Failed to report CTXSW error: " + "err_id=%u, mailbox_val=%u", + err_id, err_info->mailbox_value); + } + } +handle_report_failure: + if (err != 0) { + nvgpu_sw_quiesce(g); + } +} + +void nvgpu_inject_ctxsw_swerror(struct gk20a *g, u32 hw_unit, + u32 err_index, u32 inst) +{ + struct ctxsw_err_info err_info; + + (void)memset(&err_info, ERR_INJECT_TEST_PATTERN, sizeof(err_info)); + + nvgpu_report_ctxsw_err(g, hw_unit, err_index, (void *)&err_info); +} diff --git a/drivers/gpu/nvgpu/common/cic/ecc_cic.c b/drivers/gpu/nvgpu/common/cic/ecc_cic.c new file mode 100644 index 000000000..728fc8fe2 --- /dev/null +++ b/drivers/gpu/nvgpu/common/cic/ecc_cic.c @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "cic_priv.h" + +void nvgpu_report_ecc_err(struct gk20a *g, u32 hw_unit, u32 inst, + u32 err_id, u64 err_addr, u64 err_count) +{ + int err = 0; + struct nvgpu_err_desc *err_desc = NULL; + struct nvgpu_err_msg err_pkt; + + if (g->ops.cic.report_err == NULL) { + cic_dbg(g, "CIC does not support reporting error " + "to safety services"); + return; + } + + err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc); + if (err != 0) { + nvgpu_err(g, "Failed to get err_desc for " + "err_id (%u) for hw module (%u)", + err_id, hw_unit); + goto handle_report_failure; + } + + nvgpu_init_ecc_err_msg(&err_pkt); + err_pkt.hw_unit_id = hw_unit; + err_pkt.err_id = err_desc->error_id; + err_pkt.is_critical = err_desc->is_critical; + err_pkt.err_info.ecc_info.header.sub_unit_id = inst; + err_pkt.err_info.ecc_info.header.address = err_addr; + err_pkt.err_info.ecc_info.err_cnt = err_count; + err_pkt.err_desc = err_desc; + err_pkt.err_size = nvgpu_safe_cast_u64_to_u8( + sizeof(err_pkt.err_info.ecc_info)); + + if (g->ops.cic.report_err != NULL) { + err = g->ops.cic.report_err(g, (void *)&err_pkt, + sizeof(err_pkt), err_desc->is_critical); + if (err != 0) { + nvgpu_err(g, "Failed to report ECC error: hw_unit=%u, inst=%u, " + "err_id=%u, err_addr=%llu, err_count=%llu", + hw_unit, inst, err_id, err_addr, err_count); + } + } +handle_report_failure: + if (err != 0) { + nvgpu_sw_quiesce(g); + } +} + +void nvgpu_inject_ecc_swerror(struct gk20a *g, u32 hw_unit, u32 err_index, + u32 inst) +{ + u64 err_addr, err_count; + + err_addr = (u64)ERR_INJECT_TEST_PATTERN; + err_count = (u64)ERR_INJECT_TEST_PATTERN; + + nvgpu_report_ecc_err(g, hw_unit, inst, err_index, err_addr, err_count); +} diff --git a/drivers/gpu/nvgpu/common/cic/gr_cic.c b/drivers/gpu/nvgpu/common/cic/gr_cic.c new file mode 100644 index 000000000..87269f6a9 --- /dev/null +++ b/drivers/gpu/nvgpu/common/cic/gr_cic.c @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "cic_priv.h" + +static void nvpgu_report_fill_err_info(u32 hw_unit, + struct nvgpu_err_msg *err_pkt, struct gr_err_info *err_info) +{ + if (hw_unit == NVGPU_ERR_MODULE_SM) { + struct gr_sm_mcerr_info *info = err_info->sm_mcerr_info; + + err_pkt->err_info.sm_info.warp_esr_pc = + info->hww_warp_esr_pc; + err_pkt->err_info.sm_info.warp_esr_status = + info->hww_warp_esr_status; + err_pkt->err_info.sm_info.curr_ctx = + info->curr_ctx; + err_pkt->err_info.sm_info.chid = + info->chid; + err_pkt->err_info.sm_info.tsgid = + info->tsgid; + err_pkt->err_info.sm_info.gpc = + info->gpc; + err_pkt->err_info.sm_info.tpc = + info->tpc; + err_pkt->err_info.sm_info.sm = + info->sm; + } else { + struct gr_exception_info *info = err_info->exception_info; + + err_pkt->err_info.gr_info.curr_ctx = info->curr_ctx; + err_pkt->err_info.gr_info.chid = info->chid; + err_pkt->err_info.gr_info.tsgid = info->tsgid; + err_pkt->err_info.gr_info.status = info->status; + } +} + +void nvgpu_report_gr_err(struct gk20a *g, u32 hw_unit, u32 inst, + u32 err_id, struct gr_err_info *err_info, u32 sub_err_type) +{ + int err = 0; + struct nvgpu_err_desc *err_desc = NULL; + struct nvgpu_err_msg err_pkt; + + if (g->ops.cic.report_err == NULL) { + cic_dbg(g, "CIC does not support reporting error " + "to safety services"); + return; + } + + if ((hw_unit != NVGPU_ERR_MODULE_SM) && + (hw_unit != NVGPU_ERR_MODULE_PGRAPH)) { + nvgpu_err(g, "invalid hw module (%u)", hw_unit); + err = -EINVAL; + goto handle_report_failure; + } + + err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc); + if (err != 0) { + nvgpu_err(g, "Failed to get err_desc for " + "err_id (%u) for hw module (%u)", + err_id, hw_unit); + goto handle_report_failure; + } + + nvgpu_init_gr_err_msg(&err_pkt); + err_pkt.hw_unit_id = hw_unit; + err_pkt.err_id = err_desc->error_id; + err_pkt.is_critical = err_desc->is_critical; + err_pkt.err_desc = err_desc; + err_pkt.err_info.gr_info.header.sub_err_type = sub_err_type; + err_pkt.err_info.gr_info.header.sub_unit_id = inst; + nvpgu_report_fill_err_info(hw_unit, &err_pkt, err_info); + err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(sizeof(err_pkt.err_info)); + + if (g->ops.cic.report_err != NULL) { + err = g->ops.cic.report_err(g, (void *)&err_pkt, + sizeof(err_pkt), err_desc->is_critical); + if (err != 0) { + if (hw_unit == NVGPU_ERR_MODULE_SM) { + nvgpu_err(g, "Failed to report SM exception" + "gpc=%u, tpc=%u, sm=%u, esr_status=%x", + err_pkt.err_info.sm_info.gpc, + err_pkt.err_info.sm_info.tpc, + err_pkt.err_info.sm_info.sm, + err_pkt.err_info.sm_info.warp_esr_status); + } + if (hw_unit == NVGPU_ERR_MODULE_PGRAPH) { + nvgpu_err(g, "Failed to report PGRAPH" + "exception: inst=%u, err_id=%u, " + "status=%u", inst, err_id, + err_pkt.err_info.gr_info.status); + } + } + } +handle_report_failure: + if (err != 0) { + nvgpu_sw_quiesce(g); + } +} + +void nvgpu_inject_gr_swerror(struct gk20a *g, u32 hw_unit, + u32 err_index, u32 sub_err_type) +{ + struct gr_err_info err_info; + struct gr_exception_info gr_error_info; + struct gr_sm_mcerr_info sm_error_info; + int err = 0; + u32 inst = 0U; + + /* + * Fill fixed test pattern data for the error message + * payload. + */ + (void)memset(&gr_error_info, ERR_INJECT_TEST_PATTERN, sizeof(gr_error_info)); + (void)memset(&sm_error_info, ERR_INJECT_TEST_PATTERN, sizeof(sm_error_info)); + + switch (hw_unit) { + case NVGPU_ERR_MODULE_PGRAPH: + { + err_info.exception_info = &gr_error_info; + } + break; + + case NVGPU_ERR_MODULE_SM: + { + err_info.sm_mcerr_info = &sm_error_info; + } + break; + + default: + { + nvgpu_err(g, "unsupported hw_unit(%u)", hw_unit); + err = -EINVAL; + } + break; + } + if (err != 0) { + return; + } + + nvgpu_report_gr_err(g, hw_unit, inst, err_index, + &err_info, sub_err_type); +} diff --git a/drivers/gpu/nvgpu/common/cic/host_cic.c b/drivers/gpu/nvgpu/common/cic/host_cic.c new file mode 100644 index 000000000..44a64177a --- /dev/null +++ b/drivers/gpu/nvgpu/common/cic/host_cic.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "cic_priv.h" + +void nvgpu_report_host_err(struct gk20a *g, u32 hw_unit, + u32 inst, u32 err_id, u32 intr_info) +{ + int err = 0; + struct nvgpu_err_desc *err_desc = NULL; + struct nvgpu_err_msg err_pkt; + + if (g->ops.cic.report_err == NULL) { + cic_dbg(g, "CIC does not support reporting error " + "to safety services"); + return; + } + + if (hw_unit != NVGPU_ERR_MODULE_HOST) { + nvgpu_err(g, "invalid hw module (%u)", hw_unit); + err = -EINVAL; + goto handle_report_failure; + } + + err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc); + if (err != 0) { + nvgpu_err(g, "Failed to get err_desc for " + "err_id (%u) for hw module (%u)", + err_id, hw_unit); + goto handle_report_failure; + } + + nvgpu_init_host_err_msg(&err_pkt); + err_pkt.hw_unit_id = hw_unit; + err_pkt.err_id = err_desc->error_id; + err_pkt.is_critical = err_desc->is_critical; + err_pkt.err_info.host_info.header.sub_unit_id = inst; + err_pkt.err_desc = err_desc; + /* sub_err_type can be decoded using intr_info by referring + * to the interrupt status register definition corresponding + * to the error that is being reported. + */ + err_pkt.err_info.host_info.header.sub_err_type = intr_info; + err_pkt.err_size = nvgpu_safe_cast_u64_to_u8( + sizeof(err_pkt.err_info.host_info)); + + if (g->ops.cic.report_err != NULL) { + err = g->ops.cic.report_err(g, (void *)&err_pkt, + sizeof(err_pkt), err_desc->is_critical); + if (err != 0) { + nvgpu_err(g, "Failed to report HOST error: " + "inst=%u, err_id=%u, intr_info=%u", + inst, err_id, intr_info); + } + } +handle_report_failure: + if (err != 0) { + nvgpu_sw_quiesce(g); + } +} + +void nvgpu_inject_host_swerror(struct gk20a *g, u32 hw_unit, + u32 err_index, u32 sub_err_type) +{ + nvgpu_report_host_err(g, hw_unit, 0U, err_index, sub_err_type); +} diff --git a/drivers/gpu/nvgpu/common/cic/mmu_cic.c b/drivers/gpu/nvgpu/common/cic/mmu_cic.c new file mode 100644 index 000000000..d832e630c --- /dev/null +++ b/drivers/gpu/nvgpu/common/cic/mmu_cic.c @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "cic_priv.h" + +void nvgpu_report_mmu_err(struct gk20a *g, u32 hw_unit, u32 err_id, + struct mmu_fault_info *fault_info, u32 status, u32 sub_err_type) +{ + int err = 0; + struct nvgpu_err_desc *err_desc = NULL; + struct nvgpu_err_msg err_pkt; + + if (g->ops.cic.report_err == NULL) { + cic_dbg(g, "CIC does not support reporting error " + "to safety services"); + return; + } + + if (hw_unit != NVGPU_ERR_MODULE_HUBMMU) { + nvgpu_err(g, "invalid hw module (%u)", hw_unit); + err = -EINVAL; + goto handle_report_failure; + } + + err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc); + if (err != 0) { + nvgpu_err(g, "Failed to get err_desc for " + "err_id (%u) for hw module (%u)", + err_id, hw_unit); + goto handle_report_failure; + } + + nvgpu_init_mmu_err_msg(&err_pkt); + err_pkt.hw_unit_id = hw_unit; + err_pkt.err_id = err_desc->error_id; + err_pkt.is_critical = err_desc->is_critical; + err_pkt.err_info.mmu_info.header.sub_err_type = sub_err_type; + err_pkt.err_info.mmu_info.status = status; + /* Copy contents of mmu_fault_info */ + if (fault_info != NULL) { + err_pkt.err_info.mmu_info.info.inst_ptr = fault_info->inst_ptr; + err_pkt.err_info.mmu_info.info.inst_aperture + = fault_info->inst_aperture; + err_pkt.err_info.mmu_info.info.fault_addr + = fault_info->fault_addr; + err_pkt.err_info.mmu_info.info.fault_addr_aperture + = fault_info->fault_addr_aperture; + err_pkt.err_info.mmu_info.info.timestamp_lo + = fault_info->timestamp_lo; + err_pkt.err_info.mmu_info.info.timestamp_hi + = fault_info->timestamp_hi; + err_pkt.err_info.mmu_info.info.mmu_engine_id + = fault_info->mmu_engine_id; + err_pkt.err_info.mmu_info.info.gpc_id = fault_info->gpc_id; + err_pkt.err_info.mmu_info.info.client_type + = fault_info->client_type; + err_pkt.err_info.mmu_info.info.client_id + = fault_info->client_id; + err_pkt.err_info.mmu_info.info.fault_type + = fault_info->fault_type; + err_pkt.err_info.mmu_info.info.access_type + = fault_info->access_type; + err_pkt.err_info.mmu_info.info.protected_mode + = fault_info->protected_mode; + err_pkt.err_info.mmu_info.info.replayable_fault + = fault_info->replayable_fault; + err_pkt.err_info.mmu_info.info.replay_fault_en + = fault_info->replay_fault_en; + err_pkt.err_info.mmu_info.info.valid = fault_info->valid; + err_pkt.err_info.mmu_info.info.faulted_pbdma = + fault_info->faulted_pbdma; + err_pkt.err_info.mmu_info.info.faulted_engine = + fault_info->faulted_engine; + err_pkt.err_info.mmu_info.info.faulted_subid = + fault_info->faulted_subid; + err_pkt.err_info.mmu_info.info.chid = fault_info->chid; + } + err_pkt.err_desc = err_desc; + err_pkt.err_size = nvgpu_safe_cast_u64_to_u8( + sizeof(err_pkt.err_info.mmu_info)); + + if (g->ops.cic.report_err != NULL) { + err = g->ops.cic.report_err(g, (void *)&err_pkt, + sizeof(err_pkt), err_desc->is_critical); + if (err != 0) { + nvgpu_err(g, "Failed to report MMU fault: hw_unit=%u, " + "err_id=%u, sub_err_type=%u, status=%u", + hw_unit, err_id, sub_err_type, status); + } + } +handle_report_failure: + if (err != 0) { + nvgpu_sw_quiesce(g); + } +} + +void nvgpu_inject_mmu_swerror(struct gk20a *g, u32 hw_unit, u32 err_index, + u32 sub_err_type) +{ + u32 status = 0U; + struct mmu_fault_info fault_info; + + (void) memset(&fault_info, ERR_INJECT_TEST_PATTERN, sizeof(fault_info)); + nvgpu_report_mmu_err(g, hw_unit, err_index, + &fault_info, status, sub_err_type); +} diff --git a/drivers/gpu/nvgpu/common/cic/msg_cic.c b/drivers/gpu/nvgpu/common/cic/msg_cic.c new file mode 100644 index 000000000..ffb6c153d --- /dev/null +++ b/drivers/gpu/nvgpu/common/cic/msg_cic.c @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include "cic_priv.h" + +void nvgpu_init_err_msg_header(struct gpu_err_header *header) +{ + header->version.major = (u16)1U; + header->version.minor = (u16)0U; + header->sub_err_type = 0U; + header->sub_unit_id = 0UL; + header->address = 0UL; + header->timestamp_ns = 0UL; +} + +void nvgpu_init_err_msg(struct nvgpu_err_msg *msg) +{ + (void) memset(msg, 0, sizeof(struct nvgpu_err_msg)); + msg->hw_unit_id = 0U; + msg->is_critical = false; + msg->err_id = (u8)0U; + msg->err_size = (u8)0U; +} + +void nvgpu_init_host_err_msg(struct nvgpu_err_msg *msg) +{ + nvgpu_init_err_msg(msg); + nvgpu_init_err_msg_header(&msg->err_info.host_info.header); +} + +void nvgpu_init_ecc_err_msg(struct nvgpu_err_msg *msg) +{ + nvgpu_init_err_msg(msg); + nvgpu_init_err_msg_header(&msg->err_info.ecc_info.header); + msg->err_info.ecc_info.err_cnt = 0UL; +} + +void nvgpu_init_pri_err_msg(struct nvgpu_err_msg *msg) +{ + nvgpu_init_err_msg(msg); + nvgpu_init_err_msg_header(&msg->err_info.pri_info.header); +} + +void nvgpu_init_ce_err_msg(struct nvgpu_err_msg *msg) +{ + nvgpu_init_err_msg(msg); + nvgpu_init_err_msg_header(&msg->err_info.ce_info.header); +} + +void nvgpu_init_pmu_err_msg(struct nvgpu_err_msg *msg) +{ + nvgpu_init_err_msg(msg); + nvgpu_init_err_msg_header(&msg->err_info.pmu_err_info.header); + msg->err_info.pmu_err_info.status = 0U; +} + +void nvgpu_init_gr_err_msg(struct nvgpu_err_msg *msg) +{ + nvgpu_init_err_msg(msg); + nvgpu_init_err_msg_header(&msg->err_info.gr_info.header); + msg->err_info.gr_info.curr_ctx = 0U; + msg->err_info.gr_info.chid = 0U; + msg->err_info.gr_info.tsgid = 0U; + msg->err_info.gr_info.status = 0U; +} + +void nvgpu_init_ctxsw_err_msg(struct nvgpu_err_msg *msg) +{ + nvgpu_init_err_msg(msg); + nvgpu_init_err_msg_header(&msg->err_info.ctxsw_info.header); + msg->err_info.ctxsw_info.curr_ctx = 0U; + msg->err_info.ctxsw_info.tsgid = 0U; + msg->err_info.ctxsw_info.chid = 0U; + msg->err_info.ctxsw_info.ctxsw_status0 = 0U; + msg->err_info.ctxsw_info.ctxsw_status1 = 0U; + msg->err_info.ctxsw_info.mailbox_value = 0U; +} + +void nvgpu_init_mmu_err_msg(struct nvgpu_err_msg *msg) +{ + nvgpu_init_err_msg(msg); + nvgpu_init_err_msg_header(&msg->err_info.mmu_info.header); + msg->err_info.mmu_info.info.inst_ptr = 0UL; + msg->err_info.mmu_info.info.inst_aperture = 0U; + msg->err_info.mmu_info.info.fault_addr = 0UL; + msg->err_info.mmu_info.info.fault_addr_aperture = 0U; + msg->err_info.mmu_info.info.timestamp_lo = 0U; + msg->err_info.mmu_info.info.timestamp_hi = 0U; + msg->err_info.mmu_info.info.mmu_engine_id = 0U; + msg->err_info.mmu_info.info.gpc_id = 0U; + msg->err_info.mmu_info.info.client_type = 0U; + msg->err_info.mmu_info.info.client_id = 0U; + msg->err_info.mmu_info.info.fault_type = 0U; + msg->err_info.mmu_info.info.access_type = 0U; + msg->err_info.mmu_info.info.protected_mode = 0U; + msg->err_info.mmu_info.info.replayable_fault = false; + msg->err_info.mmu_info.info.replay_fault_en = 0U; + msg->err_info.mmu_info.info.valid = false; + msg->err_info.mmu_info.info.faulted_pbdma = 0U; + msg->err_info.mmu_info.info.faulted_engine = 0U; + msg->err_info.mmu_info.info.faulted_subid = 0U; + msg->err_info.mmu_info.info.chid = 0U; + msg->err_info.mmu_info.status = 0U; +} diff --git a/drivers/gpu/nvgpu/common/cic/pmu_cic.c b/drivers/gpu/nvgpu/common/cic/pmu_cic.c new file mode 100644 index 000000000..167c84d1d --- /dev/null +++ b/drivers/gpu/nvgpu/common/cic/pmu_cic.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "cic_priv.h" + +void nvgpu_report_pmu_err(struct gk20a *g, u32 hw_unit, u32 err_id, + u32 sub_err_type, u32 status) +{ + int err = 0; + struct nvgpu_err_desc *err_desc = NULL; + struct nvgpu_err_msg err_pkt; + + if (g->ops.cic.report_err == NULL) { + cic_dbg(g, "CIC does not support reporting error " + "to safety services"); + return; + } + + if (hw_unit != NVGPU_ERR_MODULE_PMU) { + nvgpu_err(g, "invalid hw module (%u)", hw_unit); + err = -EINVAL; + goto handle_report_failure; + } + + err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc); + if (err != 0) { + nvgpu_err(g, "Failed to get err_desc for " + "err_id (%u) for hw module (%u)", + err_id, hw_unit); + goto handle_report_failure; + } + + nvgpu_init_pmu_err_msg(&err_pkt); + err_pkt.hw_unit_id = hw_unit; + err_pkt.err_id = err_desc->error_id; + err_pkt.is_critical = err_desc->is_critical; + err_pkt.err_info.pmu_err_info.status = status; + err_pkt.err_info.pmu_err_info.header.sub_err_type = sub_err_type; + err_pkt.err_desc = err_desc; + err_pkt.err_size = nvgpu_safe_cast_u64_to_u8( + sizeof(err_pkt.err_info.pmu_err_info)); + + if (g->ops.cic.report_err != NULL) { + err = g->ops.cic.report_err(g, (void *)&err_pkt, + sizeof(err_pkt), err_desc->is_critical); + if (err != 0) { + nvgpu_err(g, "Failed to report PMU error: " + "err_id=%u, sub_err_type=%u, status=%u", + err_id, sub_err_type, status); + } + } +handle_report_failure: + if (err != 0) { + nvgpu_sw_quiesce(g); + } +} + +void nvgpu_inject_pmu_swerror(struct gk20a *g, u32 hw_unit, + u32 err_index, u32 sub_err_type) +{ + u32 err_info; + + err_info = (u32)ERR_INJECT_TEST_PATTERN; + + nvgpu_report_pmu_err(g, hw_unit, err_index, sub_err_type, err_info); +} diff --git a/drivers/gpu/nvgpu/common/cic/pri_cic.c b/drivers/gpu/nvgpu/common/cic/pri_cic.c new file mode 100644 index 000000000..e5efd9c49 --- /dev/null +++ b/drivers/gpu/nvgpu/common/cic/pri_cic.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "cic_priv.h" + +void nvgpu_report_pri_err(struct gk20a *g, u32 hw_unit, u32 inst, + u32 err_id, u32 err_addr, u32 err_code) +{ + int err = 0; + struct nvgpu_err_desc *err_desc = NULL; + struct nvgpu_err_msg err_pkt; + + if (g->ops.cic.report_err == NULL) { + cic_dbg(g, "CIC does not support reporting error " + "to safety services"); + return; + } + + if (hw_unit != NVGPU_ERR_MODULE_PRI) { + nvgpu_err(g, "invalid hw module (%u)", hw_unit); + err = -EINVAL; + goto handle_report_failure; + } + + err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc); + if (err != 0) { + nvgpu_err(g, "Failed to get err_desc for " + "err_id (%u) for hw module (%u)", + err_id, hw_unit); + goto handle_report_failure; + } + + nvgpu_init_pri_err_msg(&err_pkt); + err_pkt.hw_unit_id = hw_unit; + err_pkt.err_id = err_desc->error_id; + err_pkt.is_critical = err_desc->is_critical; + err_pkt.err_info.pri_info.header.sub_unit_id = inst; + err_pkt.err_info.pri_info.header.address = (u64) err_addr; + err_pkt.err_desc = err_desc; + /* sub_err_type can be decoded using err_code by referring + * to the FECS pri error codes. + */ + err_pkt.err_info.pri_info.header.sub_err_type = err_code; + err_pkt.err_size = nvgpu_safe_cast_u64_to_u8( + sizeof(err_pkt.err_info.pri_info)); + + if (g->ops.cic.report_err != NULL) { + err = g->ops.cic.report_err(g, (void *)&err_pkt, + sizeof(err_pkt), err_desc->is_critical); + if (err != 0) { + nvgpu_err(g, "Failed to report PRI error: " + "inst=%u, err_id=%u, err_code=%u", + inst, err_id, err_code); + } + } +handle_report_failure: + if (err != 0) { + nvgpu_sw_quiesce(g); + } +} + +void nvgpu_inject_pri_swerror(struct gk20a *g, u32 hw_unit, + u32 err_index, u32 err_code) +{ + nvgpu_report_pri_err(g, hw_unit, 0U, err_index, 0U, err_code); +} diff --git a/drivers/gpu/nvgpu/common/clk_arb/clk_arb.c b/drivers/gpu/nvgpu/common/clk_arb/clk_arb.c new file mode 100644 index 000000000..ed7d4c081 --- /dev/null +++ b/drivers/gpu/nvgpu/common/clk_arb/clk_arb.c @@ -0,0 +1,826 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_NVGPU_LS_PMU +#include +#include +#include +#endif +#include + +int nvgpu_clk_notification_queue_alloc(struct gk20a *g, + struct nvgpu_clk_notification_queue *queue, + u32 events_number) { + queue->clk_q_notifications = nvgpu_kcalloc(g, events_number, + sizeof(struct nvgpu_clk_notification)); + if (queue->clk_q_notifications == NULL) { + return -ENOMEM; + } + queue->size = events_number; + + nvgpu_atomic_set(&queue->head, 0); + nvgpu_atomic_set(&queue->tail, 0); + + return 0; +} + +void nvgpu_clk_notification_queue_free(struct gk20a *g, + struct nvgpu_clk_notification_queue *queue) { + if (queue->size > 0U) { + nvgpu_kfree(g, queue->clk_q_notifications); + queue->size = 0; + nvgpu_atomic_set(&queue->head, 0); + nvgpu_atomic_set(&queue->tail, 0); + } +} + +static void nvgpu_clk_arb_queue_notification(struct gk20a *g, + struct nvgpu_clk_notification_queue *queue, + u32 alarm_mask) { + + u32 queue_index; + u64 timestamp = 0U; + + queue_index = U32(nvgpu_atomic_inc_return(&queue->tail)) % queue->size; + +#ifdef CONFIG_NVGPU_NON_FUSA + /* get current timestamp */ + timestamp = (u64) nvgpu_hr_timestamp(); +#endif + + queue->clk_q_notifications[queue_index].timestamp = timestamp; + queue->clk_q_notifications[queue_index].clk_notification = alarm_mask; + +} + +void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm) +{ + struct nvgpu_clk_arb *arb = g->clk_arb; + + u64 current_mask; + u32 refcnt; + u32 alarm_mask; + u64 new_mask; + + do { + current_mask = (u64)nvgpu_atomic64_read(&arb->alarm_mask); + /* atomic operations are strong so they do not need masks */ + + refcnt = ((u32) (current_mask >> 32)) + 1U; + alarm_mask = (u32) (current_mask & ~U32(0)) | alarm; + new_mask = ((u64) refcnt << 32) | alarm_mask; + + } while (unlikely(current_mask != + (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask, + (long int)current_mask, (long int)new_mask))); + + nvgpu_clk_arb_queue_notification(g, &arb->notification_queue, alarm); +} + +#ifdef CONFIG_NVGPU_LS_PMU +int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) +{ + struct gk20a *g = arb->g; + struct nvgpu_clk_vf_table *table; + + u32 i, j; + int status = -EINVAL; + u16 clk_cur; + u32 num_points; + + struct nvgpu_pmu_perf_pstate_clk_info *p0_info; + + table = NV_READ_ONCE(arb->current_vf_table); + /* make flag visible when all data has resolved in the tables */ + nvgpu_smp_rmb(); + table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] : + &arb->vf_table_pool[0]; + + /* Get allowed memory ranges */ + if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPCCLK, + &arb->gpc2clk_min, + &arb->gpc2clk_max) < 0) { + nvgpu_err(g, "failed to fetch GPC2CLK range"); + goto exit_vf_table; + } + + if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_MCLK, + &arb->mclk_min, + &arb->mclk_max) < 0) { + nvgpu_err(g, "failed to fetch MCLK range"); + goto exit_vf_table; + } + + table->gpc2clk_num_points = MAX_F_POINTS; + table->mclk_num_points = MAX_F_POINTS; + if (g->ops.clk.clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_GPCCLK, + &table->gpc2clk_num_points, arb->gpc2clk_f_points)) { + nvgpu_err(g, "failed to fetch GPC2CLK frequency points"); + goto exit_vf_table; + } + if (!table->gpc2clk_num_points) { + nvgpu_err(g, "empty queries to f points gpc2clk %d", table->gpc2clk_num_points); + status = -EINVAL; + goto exit_vf_table; + } + + (void) memset(table->gpc2clk_points, 0, + table->gpc2clk_num_points*sizeof(struct nvgpu_clk_vf_point)); + + p0_info = nvgpu_pmu_perf_pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, CLKWHICH_GPCCLK); + if (!p0_info) { + status = -EINVAL; + nvgpu_err(g, "failed to get GPC2CLK P0 info"); + goto exit_vf_table; + } + + /* GPC2CLK needs to be checked in two passes. The first determines the + * relationships between GPC2CLK, SYS2CLK and XBAR2CLK, while the + * second verifies that the clocks minimum is satisfied and sets + * the voltages,the later part is done in nvgpu_pmu_perf_changeseq_set_clks + */ + j = 0; num_points = 0; clk_cur = 0; + for (i = 0; i < table->gpc2clk_num_points; i++) { + struct nvgpu_clk_slave_freq setfllclk; + + if ((arb->gpc2clk_f_points[i] >= arb->gpc2clk_min) && + (arb->gpc2clk_f_points[i] <= arb->gpc2clk_max) && + (arb->gpc2clk_f_points[i] != clk_cur)) { + + table->gpc2clk_points[j].gpc_mhz = + arb->gpc2clk_f_points[i]; + setfllclk.gpc_mhz = arb->gpc2clk_f_points[i]; + + status = clk_get_fll_clks_per_clk_domain(g, &setfllclk); + if (status < 0) { + nvgpu_err(g, + "failed to get GPC2CLK slave clocks"); + goto exit_vf_table; + } + + table->gpc2clk_points[j].sys_mhz = + setfllclk.sys_mhz; + table->gpc2clk_points[j].xbar_mhz = + setfllclk.xbar_mhz; + table->gpc2clk_points[j].nvd_mhz = + setfllclk.nvd_mhz; + table->gpc2clk_points[j].host_mhz = + setfllclk.host_mhz; + + clk_cur = table->gpc2clk_points[j].gpc_mhz; + + if ((clk_cur >= p0_info->min_mhz) && + (clk_cur <= p0_info->max_mhz)) { + VF_POINT_SET_PSTATE_SUPPORTED( + &table->gpc2clk_points[j], + CTRL_PERF_PSTATE_P0); + } + + j++; + num_points++; + } + } + table->gpc2clk_num_points = num_points; + + /* make table visible when all data has resolved in the tables */ + nvgpu_smp_wmb(); + arb->current_vf_table = table; + +exit_vf_table: + + if (status < 0) { + nvgpu_clk_arb_set_global_alarm(g, + EVENT(ALARM_VF_TABLE_UPDATE_FAILED)); + } + nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item); + + return status; +} + +static void nvgpu_clk_arb_run_vf_table_cb(struct nvgpu_clk_arb *arb) +{ + struct gk20a *g = arb->g; + int err; + + /* get latest vf curve from pmu */ + err = nvgpu_clk_vf_point_cache(g); + if (err != 0) { + nvgpu_err(g, "failed to cache VF table"); + nvgpu_clk_arb_set_global_alarm(g, + EVENT(ALARM_VF_TABLE_UPDATE_FAILED)); + nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item); + + return; + } + nvgpu_clk_arb_update_vf_table(arb); +} +#endif +u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev, + struct nvgpu_clk_arb_target *target, + u32 alarm) { + + struct nvgpu_clk_session *session = dev->session; + struct nvgpu_clk_arb *arb = session->g->clk_arb; + struct nvgpu_clk_notification *l_notification; + + u32 queue_alarm_mask = 0; + u32 enabled_mask = 0; + u32 new_alarms_reported = 0; + u32 poll_mask = 0; + u32 tail, head, index; + u32 queue_index; + size_t size; + + enabled_mask = (u32)nvgpu_atomic_read(&dev->enabled_mask); + size = arb->notification_queue.size; + + /* queue global arbiter notifications in buffer */ + do { + tail = (u32)nvgpu_atomic_read(&arb->notification_queue.tail); + /* copy items to the queue */ + queue_index = (u32)nvgpu_atomic_read(&dev->queue.tail); + head = dev->arb_queue_head; + head = (tail - head) < arb->notification_queue.size ? + head : tail - arb->notification_queue.size; + + for (index = head; WRAPGTEQ(tail, index); index++) { + u32 alarm_detected; + + l_notification = &arb->notification_queue. + clk_q_notifications[((u64)index + 1ULL) % size]; + alarm_detected = NV_READ_ONCE( + l_notification->clk_notification); + + if ((enabled_mask & alarm_detected) == 0U) { + continue; + } + + queue_index++; + dev->queue.clk_q_notifications[ + queue_index % dev->queue.size].timestamp = + NV_READ_ONCE(l_notification->timestamp); + + dev->queue.clk_q_notifications[queue_index % + dev->queue.size].clk_notification = + alarm_detected; + + queue_alarm_mask |= alarm_detected; + } + } while (unlikely(nvgpu_atomic_read(&arb->notification_queue.tail) != + (int)tail)); + + nvgpu_atomic_set(&dev->queue.tail, (int)queue_index); + /* update the last notification we processed from global queue */ + + dev->arb_queue_head = tail; + + /* Check if current session targets are met */ + if ((enabled_mask & EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE)) != 0U) { + if ((target->gpc2clk < session->target->gpc2clk) + || (target->mclk < session->target->mclk)) { + + poll_mask |= (NVGPU_POLLIN | NVGPU_POLLPRI); + nvgpu_clk_arb_queue_notification(arb->g, &dev->queue, + EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE)); + } + } + + /* Check if there is a new VF update */ + if ((queue_alarm_mask & EVENT(VF_UPDATE)) != 0U) { + poll_mask |= (NVGPU_POLLIN | NVGPU_POLLRDNORM); + } + + /* Notify sticky alarms that were not reported on previous run*/ + new_alarms_reported = (queue_alarm_mask | + (alarm & ~dev->alarms_reported & queue_alarm_mask)); + + if ((new_alarms_reported & ~LOCAL_ALARM_MASK) != 0U) { + /* check that we are not re-reporting */ + if ((new_alarms_reported & EVENT(ALARM_GPU_LOST)) != 0U) { + poll_mask |= NVGPU_POLLHUP; + } + + poll_mask |= (NVGPU_POLLIN | NVGPU_POLLPRI); + /* On next run do not report global alarms that were already + * reported, but report SHUTDOWN always + */ + dev->alarms_reported = new_alarms_reported & ~LOCAL_ALARM_MASK & + ~EVENT(ALARM_GPU_LOST); + } + + if (poll_mask != 0U) { + nvgpu_atomic_set(&dev->poll_mask, (int)poll_mask); + nvgpu_clk_arb_event_post_event(dev); + } + + return new_alarms_reported; +} + +void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm) +{ + struct nvgpu_clk_arb *arb = g->clk_arb; + + u64 current_mask; + u32 refcnt; + u32 alarm_mask; + u64 new_mask; + + do { + current_mask = (u64)nvgpu_atomic64_read(&arb->alarm_mask); + /* atomic operations are strong so they do not need masks */ + + refcnt = ((u32) (current_mask >> 32)) + 1U; + alarm_mask = (u32) ((u32)current_mask & ~alarm); + new_mask = ((u64) refcnt << 32) | alarm_mask; + + } while (unlikely(current_mask != + (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask, + (long int)current_mask, (long int)new_mask))); +} + +/* + * Process one scheduled work item. + */ +static void nvgpu_clk_arb_worker_poll_wakeup_process_item( + struct nvgpu_list_node *work_item) +{ + struct nvgpu_clk_arb_work_item *clk_arb_work_item = + nvgpu_clk_arb_work_item_from_worker_item(work_item); + + struct gk20a *g = clk_arb_work_item->arb->g; + + clk_arb_dbg(g, " "); + + if (clk_arb_work_item->item_type == CLK_ARB_WORK_UPDATE_VF_TABLE) { +#ifdef CONFIG_NVGPU_LS_PMU + nvgpu_clk_arb_run_vf_table_cb(clk_arb_work_item->arb); +#endif + } else { + if (clk_arb_work_item->item_type == CLK_ARB_WORK_UPDATE_ARB) { + g->ops.clk_arb.clk_arb_run_arbiter_cb( + clk_arb_work_item->arb); + } + } +} + +static void nvgpu_clk_arb_worker_poll_init(struct nvgpu_worker *worker) +{ + clk_arb_dbg(worker->g, " "); +} + +const struct nvgpu_worker_ops clk_arb_worker_ops = { + .pre_process = nvgpu_clk_arb_worker_poll_init, + .wakeup_early_exit = nvgpu_worker_should_stop, + .wakeup_post_process = NULL, + .wakeup_process_item = + nvgpu_clk_arb_worker_poll_wakeup_process_item, + .wakeup_condition = + nvgpu_worker_should_stop, + .wakeup_timeout = NULL, +}; + +/** + * Append a work item to the worker's list. + * + * This adds work item to the end of the list and wakes the worker + * up immediately. If the work item already existed in the list, it's not added, + * because in that case it has been scheduled already but has not yet been + * processed. + */ +void nvgpu_clk_arb_worker_enqueue(struct gk20a *g, + struct nvgpu_clk_arb_work_item *work_item) +{ + clk_arb_dbg(g, " "); + + (void)nvgpu_worker_enqueue(&g->clk_arb_worker.worker, + &work_item->worker_item); +} + +/** + * Initialize the clk arb worker's metadata and start the background thread. + */ +int nvgpu_clk_arb_worker_init(struct gk20a *g) +{ + struct nvgpu_worker *worker = &g->clk_arb_worker.worker; + + nvgpu_worker_init_name(worker, "nvgpu_clk_arb_poll", g->name); + + return nvgpu_worker_init(g, worker, &clk_arb_worker_ops); +} + +int nvgpu_clk_arb_init_arbiter(struct gk20a *g) +{ + int err = 0; + + if (g->ops.clk_arb.check_clk_arb_support != NULL) { + if (!g->ops.clk_arb.check_clk_arb_support(g)) { + return 0; + } + } + else { + return 0; + } + + nvgpu_mutex_acquire(&g->clk_arb_enable_lock); + + err = g->ops.clk_arb.arbiter_clk_init(g); + + nvgpu_mutex_release(&g->clk_arb_enable_lock); + + return err; +} + +bool nvgpu_clk_arb_has_active_req(struct gk20a *g) +{ + return (nvgpu_atomic_read(&g->clk_arb_global_nr) > 0); +} + +static void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm) +{ + struct nvgpu_clk_arb *arb = g->clk_arb; + + nvgpu_clk_arb_set_global_alarm(g, alarm); + nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item); +} + +void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g) +{ + struct nvgpu_clk_arb *arb = g->clk_arb; + + if (arb != NULL) { + nvgpu_clk_arb_schedule_alarm(g, + BIT32(NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD)); + } +} + +void nvgpu_clk_arb_worker_deinit(struct gk20a *g) +{ + struct nvgpu_worker *worker = &g->clk_arb_worker.worker; + + nvgpu_worker_deinit(worker); +} + +void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g) +{ + struct nvgpu_clk_arb *arb = g->clk_arb; + + nvgpu_mutex_acquire(&g->clk_arb_enable_lock); + + if (arb != NULL) { + g->ops.clk_arb.clk_arb_cleanup(g->clk_arb); + } + + nvgpu_mutex_release(&g->clk_arb_enable_lock); +} + +int nvgpu_clk_arb_init_session(struct gk20a *g, + struct nvgpu_clk_session **l_session) +{ + struct nvgpu_clk_arb *arb = g->clk_arb; + struct nvgpu_clk_session *session = *(l_session); + + clk_arb_dbg(g, " "); + + if (g->ops.clk_arb.check_clk_arb_support != NULL) { + if (!g->ops.clk_arb.check_clk_arb_support(g)) { + return 0; + } + } + else { + return 0; + } + + session = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_session)); + if (session == NULL) { + return -ENOMEM; + } + session->g = g; + + nvgpu_ref_init(&session->refcount); + + session->zombie = false; + session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8; + /* make sure that the initialization of the pool is visible + * before the update + */ + nvgpu_smp_wmb(); + session->target = &session->target_pool[0]; + + nvgpu_init_list_node(&session->targets); + nvgpu_spinlock_init(&session->session_lock); + + nvgpu_spinlock_acquire(&arb->sessions_lock); + nvgpu_list_add_tail(&session->link, &arb->sessions); + nvgpu_spinlock_release(&arb->sessions_lock); + + *l_session = session; + + return 0; +} + +static struct nvgpu_clk_dev * +nvgpu_clk_dev_from_refcount(struct nvgpu_ref *refcount) +{ + return (struct nvgpu_clk_dev *) + ((uintptr_t)refcount - offsetof(struct nvgpu_clk_dev, refcount)); +}; + +void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount) +{ + struct nvgpu_clk_dev *dev = nvgpu_clk_dev_from_refcount(refcount); + struct nvgpu_clk_session *session = dev->session; + struct gk20a *g = session->g; + + nvgpu_clk_notification_queue_free(g, &dev->queue); + + nvgpu_atomic_dec(&g->clk_arb_global_nr); + nvgpu_kfree(g, dev); +} + +static struct nvgpu_clk_session * +nvgpu_clk_session_from_refcount(struct nvgpu_ref *refcount) +{ + return (struct nvgpu_clk_session *) + ((uintptr_t)refcount - offsetof(struct nvgpu_clk_session, refcount)); +}; + +void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount) +{ + struct nvgpu_clk_session *session = + nvgpu_clk_session_from_refcount(refcount); + struct nvgpu_clk_arb *arb = session->g->clk_arb; + struct gk20a *g = session->g; + struct nvgpu_clk_dev *dev, *tmp; + + clk_arb_dbg(g, " "); + + if (arb != NULL) { + nvgpu_spinlock_acquire(&arb->sessions_lock); + nvgpu_list_del(&session->link); + nvgpu_spinlock_release(&arb->sessions_lock); + } + + nvgpu_spinlock_acquire(&session->session_lock); + nvgpu_list_for_each_entry_safe(dev, tmp, &session->targets, + nvgpu_clk_dev, node) { + nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); + nvgpu_list_del(&dev->node); + } + nvgpu_spinlock_release(&session->session_lock); + + nvgpu_kfree(g, session); +} + +void nvgpu_clk_arb_release_session(struct gk20a *g, + struct nvgpu_clk_session *session) +{ + struct nvgpu_clk_arb *arb = g->clk_arb; + + clk_arb_dbg(g, " "); + + session->zombie = true; + nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); + if (arb != NULL) { + nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item); + } +} +#ifdef CONFIG_NVGPU_LS_PMU +void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g) +{ + struct nvgpu_clk_arb *arb = g->clk_arb; + + nvgpu_clk_arb_worker_enqueue(g, &arb->update_vf_table_work_item); +} + +/* This function is inherently unsafe to call while arbiter is running + * arbiter must be blocked before calling this function + */ +u32 nvgpu_clk_arb_get_current_pstate(struct gk20a *g) +{ + return NV_READ_ONCE(g->clk_arb->actual->pstate); +} + +void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock) +{ + struct nvgpu_clk_arb *arb = g->clk_arb; + + if (lock) { + nvgpu_mutex_acquire(&arb->pstate_lock); + } else { + nvgpu_mutex_release(&arb->pstate_lock); + } +} +#endif +bool nvgpu_clk_arb_is_valid_domain(struct gk20a *g, u32 api_domain) +{ + u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); + bool ret_result = false; + + switch (api_domain) { + case NVGPU_CLK_DOMAIN_MCLK: + ret_result = ((clk_domains & CTRL_CLK_DOMAIN_MCLK) != 0U) ? + true : false; + break; + case NVGPU_CLK_DOMAIN_GPCCLK: + ret_result = ((clk_domains & CTRL_CLK_DOMAIN_GPCCLK) != 0U) ? + true : false; + break; + default: + ret_result = false; + break; + } + return ret_result; +} + +int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, + u16 *min_mhz, u16 *max_mhz) +{ + int ret = -EINVAL; + + switch (api_domain) { + case NVGPU_CLK_DOMAIN_MCLK: + ret = g->ops.clk_arb.get_arbiter_clk_range(g, + CTRL_CLK_DOMAIN_MCLK, min_mhz, max_mhz); + break; + + case NVGPU_CLK_DOMAIN_GPCCLK: + ret = g->ops.clk_arb.get_arbiter_clk_range(g, + CTRL_CLK_DOMAIN_GPCCLK, min_mhz, max_mhz); + break; + + default: + ret = -EINVAL; + break; + } + return ret; +} + +int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, + u32 api_domain, u32 *max_points, u16 *fpoints) +{ + int err = -EINVAL; + + switch (api_domain) { + case NVGPU_CLK_DOMAIN_GPCCLK: + err = g->ops.clk_arb.get_arbiter_f_points(g, + CTRL_CLK_DOMAIN_GPCCLK, max_points, fpoints); + if ((err != 0) || (fpoints == NULL)) { + break; + } + err = 0; + break; + case NVGPU_CLK_DOMAIN_MCLK: + err = g->ops.clk_arb.get_arbiter_f_points(g, + CTRL_CLK_DOMAIN_MCLK, max_points, fpoints); + break; + default: + err = -EINVAL; + break; + } + return err; +} + +int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session, + u32 api_domain, u16 *target_mhz) +{ + int err = 0; + struct nvgpu_clk_arb_target *target = session->target; + + if (!nvgpu_clk_arb_is_valid_domain(session->g, api_domain)) { + return -EINVAL; + } + + switch (api_domain) { + case NVGPU_CLK_DOMAIN_MCLK: + *target_mhz = target->mclk; + break; + + case NVGPU_CLK_DOMAIN_GPCCLK: + *target_mhz = target->gpc2clk; + break; + + default: + *target_mhz = 0; + err = -EINVAL; + break; + } + return err; +} + +int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, + u32 api_domain, u16 *actual_mhz) +{ + struct nvgpu_clk_arb *arb = g->clk_arb; + int err = 0; + struct nvgpu_clk_arb_target *actual = arb->actual; + + if (!nvgpu_clk_arb_is_valid_domain(g, api_domain)) { + return -EINVAL; + } + + switch (api_domain) { + case NVGPU_CLK_DOMAIN_MCLK: + *actual_mhz = actual->mclk; + break; + + case NVGPU_CLK_DOMAIN_GPCCLK: + *actual_mhz = actual->gpc2clk; + break; + + default: + *actual_mhz = 0; + err = -EINVAL; + break; + } + return err; +} + +unsigned long nvgpu_clk_measure_freq(struct gk20a *g, u32 api_domain) +{ + unsigned long freq = 0UL; + + switch (api_domain) { + /* + * Incase of iGPU clocks to each parition (GPC, SYS, LTC, XBAR) are + * generated using 1X GPCCLK and hence should be the same. + */ + case CTRL_CLK_DOMAIN_GPCCLK: + case CTRL_CLK_DOMAIN_SYSCLK: + case CTRL_CLK_DOMAIN_XBARCLK: + freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); + break; + default: + freq = 0UL; + break; + } + return freq; +} + +int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g, + u32 api_domain, u16 *effective_mhz) +{ + u64 freq_mhz_u64; + int err = -EINVAL; + + if (!nvgpu_clk_arb_is_valid_domain(g, api_domain)) { + return -EINVAL; + } + + switch (api_domain) { + case NVGPU_CLK_DOMAIN_MCLK: + freq_mhz_u64 = g->ops.clk.measure_freq(g, + CTRL_CLK_DOMAIN_MCLK) / 1000000ULL; + err = 0; + break; + + case NVGPU_CLK_DOMAIN_GPCCLK: + freq_mhz_u64 = g->ops.clk.measure_freq(g, + CTRL_CLK_DOMAIN_GPCCLK) / 1000000ULL; + err = 0; + break; + + default: + err = -EINVAL; + break; + } + + if (err == 0) { + nvgpu_assert(freq_mhz_u64 <= (u64)U16_MAX); + *effective_mhz = (u16)freq_mhz_u64; + } + return err; +} diff --git a/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gp10b.c b/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gp10b.c new file mode 100644 index 000000000..44b9cf582 --- /dev/null +++ b/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gp10b.c @@ -0,0 +1,406 @@ +/* + * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "clk_arb_gp10b.h" + +bool gp10b_check_clk_arb_support(struct gk20a *g) +{ + if (g->ops.clk_arb.get_arbiter_clk_domains != NULL) { + return true; + } + else { + return false; + } +} + +u32 gp10b_get_arbiter_clk_domains(struct gk20a *g) +{ + (void)g; + clk_arb_dbg(g, " "); + return CTRL_CLK_DOMAIN_GPCCLK; +} + +int gp10b_get_arbiter_f_points(struct gk20a *g,u32 api_domain, + u32 *num_points, u16 *freqs_in_mhz) +{ + int ret = 0; + + clk_arb_dbg(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + ret = g->ops.clk.clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_GPCCLK, + num_points, freqs_in_mhz); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +int gp10b_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, + u16 *min_mhz, u16 *max_mhz) +{ + int ret = 0; + + clk_arb_dbg(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + ret = g->ops.clk.get_clk_range(g, CTRL_CLK_DOMAIN_GPCCLK, + min_mhz, max_mhz); + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +int gp10b_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, + u16 *default_mhz) +{ + int ret = 0; + u16 min_mhz, max_mhz; + + clk_arb_dbg(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + ret = gp10b_get_arbiter_clk_range(g, api_domain, + &min_mhz, &max_mhz); + + if (ret == 0) { + *default_mhz = max_mhz; + } + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +int gp10b_init_clk_arbiter(struct gk20a *g) +{ + struct nvgpu_clk_arb *arb = NULL; + u16 default_mhz; + int err; + int index; + struct nvgpu_clk_vf_table *table; + + clk_arb_dbg(g, " "); + + if(g->clk_arb != NULL) { + return 0; + } + + arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb)); + if (arb == NULL) { + return -ENOMEM; + } + + nvgpu_mutex_init(&arb->pstate_lock); + + nvgpu_spinlock_init(&arb->sessions_lock); + nvgpu_spinlock_init(&arb->users_lock); + nvgpu_spinlock_init(&arb->requests_lock); + + arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); + if (arb->gpc2clk_f_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + + for (index = 0; index < 2; index++) { + table = &arb->vf_table_pool[index]; + table->gpc2clk_num_points = MAX_F_POINTS; + + table->gpc2clk_points = (struct nvgpu_clk_vf_point *) + nvgpu_kcalloc(g, MAX_F_POINTS, + sizeof(struct nvgpu_clk_vf_point)); + if (table->gpc2clk_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + } + + g->clk_arb = arb; + arb->g = g; + + err = g->ops.clk_arb.get_arbiter_clk_default(g, + CTRL_CLK_DOMAIN_GPCCLK, &default_mhz); + if (err < 0) { + err = -EINVAL; + goto init_fail; + } + + arb->gpc2clk_default_mhz = default_mhz; + + err = g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPCCLK, + &arb->gpc2clk_min, &arb->gpc2clk_max); + + if (err < 0) { + err = -EINVAL; + goto init_fail; + } + + arb->actual = &arb->actual_pool[0]; + + nvgpu_atomic_set(&arb->req_nr, 0); + + nvgpu_atomic64_set(&arb->alarm_mask, 0); + err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue, + DEFAULT_EVENT_NUMBER); + if (err < 0) { + goto init_fail; + } + + nvgpu_init_list_node(&arb->users); + nvgpu_init_list_node(&arb->sessions); + nvgpu_init_list_node(&arb->requests); + + err = nvgpu_cond_init(&arb->request_wq); + if (err < 0) { + goto init_fail; + } + + nvgpu_init_list_node(&arb->update_arb_work_item.worker_item); + arb->update_arb_work_item.arb = arb; + arb->update_arb_work_item.item_type = CLK_ARB_WORK_UPDATE_ARB; + + err = nvgpu_clk_arb_worker_init(g); + if (err < 0) { + goto init_fail; + } + + /* This is set for the duration of the default req */ + nvgpu_atomic_inc(&g->clk_arb_global_nr); + + nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item); + + do { + /* Check that first run is completed */ + nvgpu_smp_mb(); + NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq, + nvgpu_atomic_read(&arb->req_nr) != 0, 0U); + } while (nvgpu_atomic_read(&arb->req_nr) == 0); + + /* Once the default request is completed, reduce the usage count */ + nvgpu_atomic_dec(&g->clk_arb_global_nr); + + return arb->status; + +init_fail: + nvgpu_kfree(g, arb->gpc2clk_f_points); + + for (index = 0; index < 2; index++) { + nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points); + } + + nvgpu_mutex_destroy(&arb->pstate_lock); + nvgpu_kfree(g, arb); + + return err; +} + +void gp10b_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb) +{ + struct nvgpu_clk_session *session; + struct nvgpu_clk_dev *dev; + struct nvgpu_clk_dev *tmp; + struct nvgpu_clk_arb_target *target, *actual; + struct gk20a *g = arb->g; + + bool gpc2clk_set; + + int status = 0; + unsigned long rounded_rate = 0; + + u16 gpc2clk_target, gpc2clk_session_target; + + clk_arb_dbg(g, " "); + + /* Only one arbiter should be running */ + gpc2clk_target = 0; + + nvgpu_spinlock_acquire(&arb->sessions_lock); + nvgpu_list_for_each_entry(session, &arb->sessions, + nvgpu_clk_session, link) { + if (session->zombie) { + continue; + } + gpc2clk_set = false; + target = (session->target == &session->target_pool[0] ? + &session->target_pool[1] : + &session->target_pool[0]); + nvgpu_spinlock_acquire(&session->session_lock); + if (!nvgpu_list_empty(&session->targets)) { + /* Copy over state */ + target->gpc2clk = session->target->gpc2clk; + /* Query the latest committed request */ + nvgpu_list_for_each_entry_safe(dev, tmp, &session->targets, + nvgpu_clk_dev, node) { + if (!gpc2clk_set && + dev->gpc2clk_target_mhz != (u16)0) { + target->gpc2clk = + dev->gpc2clk_target_mhz; + gpc2clk_set = true; + } + nvgpu_ref_get(&dev->refcount); + nvgpu_list_del(&dev->node); + nvgpu_spinlock_acquire(&arb->requests_lock); + nvgpu_list_add(&dev->node, &arb->requests); + nvgpu_spinlock_release(&arb->requests_lock); + } + session->target = target; + } + nvgpu_spinlock_release(&session->session_lock); + + gpc2clk_target = + gpc2clk_target > session->target->gpc2clk ? + gpc2clk_target : session->target->gpc2clk; + } + nvgpu_spinlock_release(&arb->sessions_lock); + + gpc2clk_target = (gpc2clk_target > (u16)0) ? gpc2clk_target : + arb->gpc2clk_default_mhz; + + if (gpc2clk_target < arb->gpc2clk_min) { + gpc2clk_target = arb->gpc2clk_min; + } + + if (gpc2clk_target > arb->gpc2clk_max) { + gpc2clk_target = arb->gpc2clk_max; + } + + gpc2clk_session_target = gpc2clk_target; + + if (arb->actual->gpc2clk == gpc2clk_target) { + nvgpu_atomic_inc(&arb->req_nr); + nvgpu_cond_signal_interruptible(&arb->request_wq); + goto exit_arb; + } + + nvgpu_mutex_acquire(&arb->pstate_lock); + + status = g->ops.clk.clk_get_round_rate(g, + CTRL_CLK_DOMAIN_GPCCLK, gpc2clk_session_target * 1000000UL, &rounded_rate); + + clk_arb_dbg(g, "rounded_rate: %lu\n", + rounded_rate); + + if (status < 0) { + arb->status = status; + nvgpu_mutex_release(&arb->pstate_lock); + + /* make status visible */ + nvgpu_smp_mb(); + nvgpu_atomic_inc(&arb->req_nr); + nvgpu_cond_signal_interruptible(&arb->request_wq); + goto exit_arb; + } + + /* the igpu set_rate accepts freq in Hz */ + status = g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate); + + if (status < 0) { + arb->status = status; + nvgpu_mutex_release(&arb->pstate_lock); + + /* make status visible */ + nvgpu_smp_mb(); + nvgpu_atomic_inc(&arb->req_nr); + nvgpu_cond_signal_interruptible(&arb->request_wq); + goto exit_arb; + } + + actual = ((NV_READ_ONCE(arb->actual)) == &arb->actual_pool[0] ? + &arb->actual_pool[1] : &arb->actual_pool[0]); + + /* do not reorder this pointer */ + nvgpu_smp_rmb(); + actual->gpc2clk = gpc2clk_target; + arb->status = 0; + + /* Make changes visible to other threads */ + nvgpu_smp_wmb(); + arb->actual = actual; + + /* status must be visible before atomic inc */ + nvgpu_smp_wmb(); + nvgpu_atomic_inc(&arb->req_nr); + + /* Unlock pstate change for PG */ + nvgpu_mutex_release(&arb->pstate_lock); + + nvgpu_cond_signal_interruptible(&arb->request_wq); + +exit_arb: + if (status < 0) { + nvgpu_err(g, "Error in arbiter update"); + } + + /* notify completion for all requests */ + nvgpu_spinlock_acquire(&arb->requests_lock); + nvgpu_list_for_each_entry_safe(dev, tmp, &arb->requests, + nvgpu_clk_dev, node) { + u32 tmp_poll_mask = NVGPU_POLLIN | NVGPU_POLLRDNORM; + nvgpu_atomic_set(&dev->poll_mask, + (int)tmp_poll_mask); + nvgpu_clk_arb_event_post_event(dev); + nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); + nvgpu_list_del(&dev->node); + } + nvgpu_spinlock_release(&arb->requests_lock); + + clk_arb_dbg(g, "done"); +} + +void gp10b_clk_arb_cleanup(struct nvgpu_clk_arb *arb) +{ + struct gk20a *g = arb->g; + int index; + + nvgpu_clk_arb_worker_deinit(g); + nvgpu_kfree(g, arb->gpc2clk_f_points); + + for (index = 0; index < 2; index++) { + nvgpu_kfree(g, + arb->vf_table_pool[index].gpc2clk_points); + } + + nvgpu_mutex_destroy(&g->clk_arb->pstate_lock); + nvgpu_kfree(g, g->clk_arb); + + g->clk_arb = NULL; +} diff --git a/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gp10b.h b/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gp10b.h new file mode 100644 index 000000000..1a248e42a --- /dev/null +++ b/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gp10b.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef CLK_ARB_GP10B_H +#define CLK_ARB_GP10B_H + +struct nvgpu_clk_session; +struct nvgpu_clk_arb; + +bool gp10b_check_clk_arb_support(struct gk20a *g); +u32 gp10b_get_arbiter_clk_domains(struct gk20a *g); +int gp10b_get_arbiter_f_points(struct gk20a *g,u32 api_domain, + u32 *num_points, u16 *freqs_in_mhz); +int gp10b_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, + u16 *min_mhz, u16 *max_mhz); +int gp10b_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, + u16 *default_mhz); +int gp10b_init_clk_arbiter(struct gk20a *g); +void gp10b_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb); +void gp10b_clk_arb_cleanup(struct nvgpu_clk_arb *arb); + +#endif /* CLK_ARB_GP106_H */ diff --git a/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gv100.c b/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gv100.c new file mode 100644 index 000000000..6ef8e6973 --- /dev/null +++ b/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gv100.c @@ -0,0 +1,560 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "clk_arb_gv100.h" + +bool gv100_check_clk_arb_support(struct gk20a *g) +{ + if ((g->ops.clk_arb.get_arbiter_clk_domains != NULL) && + nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)){ + return true; + } + else { + return false; + } +} + +u32 gv100_get_arbiter_clk_domains(struct gk20a *g) +{ + (void)g; + return (CTRL_CLK_DOMAIN_GPCCLK); +} + +int gv100_get_arbiter_f_points(struct gk20a *g,u32 api_domain, + u32 *num_points, u16 *freqs_in_mhz) +{ + return g->ops.clk.clk_domain_get_f_points(g, + api_domain, num_points, freqs_in_mhz); +} + +int gv100_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, + u16 *min_mhz, u16 *max_mhz) +{ + u32 clkwhich; + struct nvgpu_pmu_perf_pstate_clk_info *p0_info; + u16 max_min_freq_mhz; + u16 limit_min_mhz; + u16 gpcclk_cap_mhz; + bool error_status = false; + + switch (api_domain) { + case CTRL_CLK_DOMAIN_MCLK: + clkwhich = CLKWHICH_MCLK; + break; + + case CTRL_CLK_DOMAIN_GPCCLK: + clkwhich = CLKWHICH_GPCCLK; + break; + + default: + error_status = true; + break; + } + + if (error_status == true) { + return -EINVAL; + } + + p0_info = nvgpu_pmu_perf_pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, clkwhich); + if (p0_info == NULL) { + return -EINVAL; + } + + limit_min_mhz = p0_info->min_mhz; + gpcclk_cap_mhz = p0_info->max_mhz; + + max_min_freq_mhz = nvgpu_pmu_clk_fll_get_min_max_freq(g); + /* + * When DVCO min is 0 in vbios update it to DVCO_MIN_DEFAULT_MHZ. + */ + if (max_min_freq_mhz == 0U) { + max_min_freq_mhz = DVCO_MIN_DEFAULT_MHZ; + } + + /* + * Needed for DVCO min. + */ + if (api_domain == CTRL_CLK_DOMAIN_GPCCLK) { + if ((max_min_freq_mhz != 0U) && + (max_min_freq_mhz >= limit_min_mhz)) { + limit_min_mhz = nvgpu_safe_cast_u32_to_u16( + nvgpu_safe_add_u32(max_min_freq_mhz, 1U)); + } + if ((g->clk_arb->gpc_cap_clkmhz != 0U) && + (p0_info->max_mhz > g->clk_arb->gpc_cap_clkmhz )) { + gpcclk_cap_mhz = g->clk_arb->gpc_cap_clkmhz; + } + } + *min_mhz = limit_min_mhz; + *max_mhz = gpcclk_cap_mhz; + + return 0; +} + +int gv100_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, + u16 *default_mhz) +{ + u32 clkwhich; + struct nvgpu_pmu_perf_pstate_clk_info *p0_info; + bool error_status = false; + u16 gpcclk_cap_mhz; + + switch (api_domain) { + case CTRL_CLK_DOMAIN_MCLK: + clkwhich = CLKWHICH_MCLK; + break; + + case CTRL_CLK_DOMAIN_GPCCLK: + clkwhich = CLKWHICH_GPCCLK; + break; + + default: + error_status = true; + break; + } + + if (error_status == true) { + return -EINVAL; + } + + p0_info = nvgpu_pmu_perf_pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, clkwhich); + if (p0_info == NULL) { + return -EINVAL; + } + + gpcclk_cap_mhz = p0_info->max_mhz; + if (api_domain == CTRL_CLK_DOMAIN_GPCCLK) { + if ((g->clk_arb->gpc_cap_clkmhz != 0U) && + (p0_info->max_mhz > g->clk_arb->gpc_cap_clkmhz )) { + gpcclk_cap_mhz = g->clk_arb->gpc_cap_clkmhz; + } + } + *default_mhz = gpcclk_cap_mhz; + + return 0; +} + +int gv100_init_clk_arbiter(struct gk20a *g) +{ + struct nvgpu_clk_arb *arb; + u16 default_mhz; + int err; + int index; + struct nvgpu_clk_vf_table *table; + clk_arb_dbg(g, " "); + + if (g->clk_arb != NULL) { + return 0; + } + arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb)); + if (arb == NULL) { + return -ENOMEM; + } + + nvgpu_mutex_init(&arb->pstate_lock); + nvgpu_spinlock_init(&arb->sessions_lock); + nvgpu_spinlock_init(&arb->users_lock); + nvgpu_spinlock_init(&arb->requests_lock); + + arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); + if (arb->mclk_f_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + + arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); + if (arb->gpc2clk_f_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + + for (index = 0; index < 2; index++) { + table = &arb->vf_table_pool[index]; + table->gpc2clk_num_points = MAX_F_POINTS; + table->mclk_num_points = MAX_F_POINTS; + + table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS, + sizeof(struct nvgpu_clk_vf_point)); + if (table->gpc2clk_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + + + table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS, + sizeof(struct nvgpu_clk_vf_point)); + if (table->mclk_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + } + + g->clk_arb = arb; + arb->g = g; + + err = g->ops.clk_arb.get_arbiter_clk_default(g, + CTRL_CLK_DOMAIN_MCLK, &default_mhz); + if (err < 0) { + err = -EINVAL; + goto init_fail; + } + + arb->mclk_default_mhz = default_mhz; + + err = g->ops.clk_arb.get_arbiter_clk_default(g, + CTRL_CLK_DOMAIN_GPCCLK, &default_mhz); + if (err < 0) { + err = -EINVAL; + goto init_fail; + } + + arb->gpc2clk_default_mhz = default_mhz; + + arb->actual = &arb->actual_pool[0]; + + nvgpu_atomic_set(&arb->req_nr, 0); + + nvgpu_atomic64_set(&arb->alarm_mask, 0); + err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue, + DEFAULT_EVENT_NUMBER); + if (err < 0) { + goto init_fail; + } + nvgpu_init_list_node(&arb->users); + nvgpu_init_list_node(&arb->sessions); + nvgpu_init_list_node(&arb->requests); + + (void)nvgpu_cond_init(&arb->request_wq); + + nvgpu_init_list_node(&arb->update_vf_table_work_item.worker_item); + nvgpu_init_list_node(&arb->update_arb_work_item.worker_item); + arb->update_vf_table_work_item.arb = arb; + arb->update_arb_work_item.arb = arb; + arb->update_vf_table_work_item.item_type = CLK_ARB_WORK_UPDATE_VF_TABLE; + arb->update_arb_work_item.item_type = CLK_ARB_WORK_UPDATE_ARB; + err = nvgpu_clk_arb_worker_init(g); + if (err < 0) { + goto init_fail; + } + + if (g->dgpu_max_clk != 0U) { + g->dgpu_max_clk = (g->dgpu_max_clk / + FREQ_STEP_SIZE_MHZ) * FREQ_STEP_SIZE_MHZ; + arb->gpc_cap_clkmhz = g->dgpu_max_clk; + } +#ifdef CONFIG_DEBUG_FS + arb->debug = &arb->debug_pool[0]; + + if (!arb->debugfs_set) { + if (nvgpu_clk_arb_debugfs_init(g)) + arb->debugfs_set = true; + } +#endif + err = nvgpu_clk_vf_point_cache(g); + if (err < 0) { + goto init_fail; + } + + err = nvgpu_clk_arb_update_vf_table(arb); + if (err < 0) { + goto init_fail; + } + + do { + /* Check that first run is completed */ + nvgpu_smp_mb(); + NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq, + nvgpu_atomic_read(&arb->req_nr), 0U); + } while (nvgpu_atomic_read(&arb->req_nr) == 0); + return arb->status; + +init_fail: + nvgpu_kfree(g, arb->gpc2clk_f_points); + nvgpu_kfree(g, arb->mclk_f_points); + + for (index = 0; index < 2; index++) { + nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points); + nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points); + } + + nvgpu_mutex_destroy(&arb->pstate_lock); + nvgpu_kfree(g, arb); + + return err; +} + +void gv100_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb) +{ + struct nvgpu_clk_session *session; + struct nvgpu_clk_dev *dev; + struct nvgpu_clk_dev *tmp; + struct nvgpu_clk_arb_target *target, *actual; + struct gk20a *g = arb->g; + + u32 current_pstate = VF_POINT_INVALID_PSTATE; + u32 voltuv = 0; + bool mclk_set, gpc2clk_set; + u32 alarms_notified = 0; + u32 current_alarm; + int status = 0; + /* Temporary variables for checking target frequency */ + u16 gpc2clk_target, mclk_target; + struct nvgpu_clk_slave_freq vf_point; + +#ifdef CONFIG_DEBUG_FS + s64 t0, t1; + struct nvgpu_clk_arb_debug *debug; + +#endif + + clk_arb_dbg(g, " "); + + /* bail out if gpu is down */ + if (nvgpu_atomic64_read(&arb->alarm_mask) & EVENT(ALARM_GPU_LOST)) { + goto exit_arb; + } + +#ifdef CONFIG_DEBUG_FS + t0 = nvgpu_current_time_ns(); +#endif + + /* Only one arbiter should be running */ + gpc2clk_target = 0; + mclk_target = 0; + nvgpu_spinlock_acquire(&arb->sessions_lock); + nvgpu_list_for_each_entry(session, &arb->sessions, + nvgpu_clk_session, link) { + if (!session->zombie) { + mclk_set = false; + gpc2clk_set = false; + target = (session->target == &session->target_pool[0] ? + &session->target_pool[1] : + &session->target_pool[0]); + nvgpu_spinlock_acquire(&session->session_lock); + if (!nvgpu_list_empty(&session->targets)) { + /* Copy over state */ + target->mclk = session->target->mclk; + target->gpc2clk = session->target->gpc2clk; + /* Query the latest committed request */ + nvgpu_list_for_each_entry_safe(dev, tmp, + &session->targets, nvgpu_clk_dev, node) { + if ((mclk_set == false) && (dev->mclk_target_mhz != 0U)) { + target->mclk = + dev->mclk_target_mhz; + mclk_set = true; + } + if ((gpc2clk_set == false) && + (dev->gpc2clk_target_mhz != 0U)) { + target->gpc2clk = + dev->gpc2clk_target_mhz; + gpc2clk_set = true; + } + nvgpu_ref_get(&dev->refcount); + nvgpu_list_del(&dev->node); + nvgpu_spinlock_acquire( + &arb->requests_lock); + nvgpu_list_add( + &dev->node, &arb->requests); + nvgpu_spinlock_release(&arb->requests_lock); + } + session->target = target; + } + nvgpu_spinlock_release( + &session->session_lock); + + mclk_target = mclk_target > session->target->mclk ? + mclk_target : session->target->mclk; + + gpc2clk_target = + gpc2clk_target > session->target->gpc2clk ? + gpc2clk_target : session->target->gpc2clk; + } + } + nvgpu_spinlock_release(&arb->sessions_lock); + + gpc2clk_target = (gpc2clk_target > 0U) ? gpc2clk_target : + arb->gpc2clk_default_mhz; + + if (gpc2clk_target < arb->gpc2clk_min) { + gpc2clk_target = arb->gpc2clk_min; + } + + if (gpc2clk_target > arb->gpc2clk_max) { + gpc2clk_target = arb->gpc2clk_max; + } + + mclk_target = (mclk_target > 0U) ? mclk_target : + arb->mclk_default_mhz; + + if (mclk_target < arb->mclk_min) { + mclk_target = arb->mclk_min; + } + + if (mclk_target > arb->mclk_max) { + mclk_target = arb->mclk_max; + } + + if ((arb->gpc_cap_clkmhz != 0U) && + (gpc2clk_target > arb->gpc_cap_clkmhz)) { + gpc2clk_target = arb->gpc_cap_clkmhz; + } + + vf_point.gpc_mhz = gpc2clk_target; + (void)nvgpu_clk_arb_find_slave_points(arb, &vf_point); + if (status != 0) { + nvgpu_err(g, "Unable to get slave frequency"); + goto exit_arb; + } + + status = nvgpu_pmu_perf_changeseq_set_clks(g, &vf_point); + if (status != 0) { + nvgpu_err(g, "Unable to program frequency"); + goto exit_arb; + } + + actual = NV_READ_ONCE(arb->actual) == &arb->actual_pool[0] ? + &arb->actual_pool[1] : &arb->actual_pool[0]; + + /* do not reorder this pointer */ + nvgpu_smp_rmb(); + actual->gpc2clk = gpc2clk_target; + actual->mclk = mclk_target; + arb->voltuv_actual = voltuv; + actual->pstate = current_pstate; + arb->status = status; + + /* Make changes visible to other threads */ + nvgpu_smp_wmb(); + arb->actual = actual; + + /* status must be visible before atomic inc */ + nvgpu_smp_wmb(); + nvgpu_atomic_inc(&arb->req_nr); + + /* VF Update complete */ + nvgpu_clk_arb_set_global_alarm(g, EVENT(VF_UPDATE)); + + nvgpu_cond_signal_interruptible(&arb->request_wq); +#ifdef CONFIG_DEBUG_FS + t1 = nvgpu_current_time_ns(); + + debug = arb->debug == &arb->debug_pool[0] ? + &arb->debug_pool[1] : &arb->debug_pool[0]; + + memcpy(debug, arb->debug, sizeof(arb->debug_pool[0])); + debug->switch_num++; + + if (debug->switch_num == 1) { + debug->switch_max = debug->switch_min = + debug->switch_avg = (t1-t0)/1000; + debug->switch_std = 0; + } else { + s64 prev_avg; + s64 curr = (t1-t0)/1000; + + debug->switch_max = curr > debug->switch_max ? + curr : debug->switch_max; + debug->switch_min = debug->switch_min ? + (curr < debug->switch_min ? + curr : debug->switch_min) : curr; + prev_avg = debug->switch_avg; + debug->switch_avg = (curr + + (debug->switch_avg * (debug->switch_num-1))) / + debug->switch_num; + debug->switch_std += + (curr - debug->switch_avg) * (curr - prev_avg); + } + /* commit changes before exchanging debug pointer */ + nvgpu_smp_wmb(); + arb->debug = debug; +#endif + +exit_arb: + if (status < 0) { + nvgpu_err(g, "Error in arbiter update"); + nvgpu_clk_arb_set_global_alarm(g, + EVENT(ALARM_CLOCK_ARBITER_FAILED)); + } + + current_alarm = (u32) nvgpu_atomic64_read(&arb->alarm_mask); + /* notify completion for all requests */ + nvgpu_spinlock_acquire(&arb->requests_lock); + nvgpu_list_for_each_entry_safe(dev, tmp, &arb->requests, + nvgpu_clk_dev, node) { + /* avoid casting composite expression below */ + u32 tmp_mask = NVGPU_POLLIN | NVGPU_POLLRDNORM; + + nvgpu_atomic_set(&dev->poll_mask, (int)tmp_mask); + nvgpu_clk_arb_event_post_event(dev); + nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); + nvgpu_list_del(&dev->node); + } + nvgpu_spinlock_release(&arb->requests_lock); + + nvgpu_atomic_set(&arb->notification_queue.head, + nvgpu_atomic_read(&arb->notification_queue.tail)); + /* notify event for all users */ + nvgpu_spinlock_acquire(&arb->users_lock); + nvgpu_list_for_each_entry(dev, &arb->users, nvgpu_clk_dev, link) { + alarms_notified |= + nvgpu_clk_arb_notify(dev, arb->actual, current_alarm); + } + nvgpu_spinlock_release(&arb->users_lock); + + /* clear alarms */ + nvgpu_clk_arb_clear_global_alarm(g, alarms_notified & + ~EVENT(ALARM_GPU_LOST)); +} + +void gv100_clk_arb_cleanup(struct nvgpu_clk_arb *arb) +{ + struct gk20a *g = arb->g; + int index; + + nvgpu_kfree(g, arb->gpc2clk_f_points); + nvgpu_kfree(g, arb->mclk_f_points); + + for (index = 0; index < 2; index++) { + nvgpu_kfree(g, + arb->vf_table_pool[index].gpc2clk_points); + nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points); + } + + nvgpu_mutex_destroy(&g->clk_arb->pstate_lock); + nvgpu_kfree(g, g->clk_arb); + + g->clk_arb = NULL; +} + +void gv100_stop_clk_arb_threads(struct gk20a *g) +{ + nvgpu_clk_arb_worker_deinit(g); +} diff --git a/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gv100.h b/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gv100.h new file mode 100644 index 000000000..386e3cc9f --- /dev/null +++ b/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gv100.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef CLK_ARB_GV100_H +#define CLK_ARB_GV100_H + +struct nvgpu_clk_session; +struct nvgpu_clk_arb; + +#define DVCO_MIN_DEFAULT_MHZ 405 + +bool gv100_check_clk_arb_support(struct gk20a *g); +u32 gv100_get_arbiter_clk_domains(struct gk20a *g); +int gv100_get_arbiter_f_points(struct gk20a *g,u32 api_domain, + u32 *num_points, u16 *freqs_in_mhz); +int gv100_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, + u16 *min_mhz, u16 *max_mhz); +int gv100_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, + u16 *default_mhz); +int gv100_init_clk_arbiter(struct gk20a *g); +void gv100_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb); +void gv100_clk_arb_cleanup(struct nvgpu_clk_arb *arb); +void gv100_stop_clk_arb_threads(struct gk20a *g); +#endif /* CLK_ARB_GV100_H */ diff --git a/drivers/gpu/nvgpu/common/cyclestats/cyclestats.c b/drivers/gpu/nvgpu/common/cyclestats/cyclestats.c new file mode 100644 index 000000000..6665ce362 --- /dev/null +++ b/drivers/gpu/nvgpu/common/cyclestats/cyclestats.c @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "cyclestats_priv.h" + +static inline bool is_valid_cyclestats_bar0_offset_gk20a(struct gk20a *g, + u32 offset) +{ + /* support only 24-bit 4-byte aligned offsets */ + bool valid = !(offset & 0xFF000003U); + + if (g->allow_all) { + return true; + } + + /* whitelist check */ + valid = valid && + is_bar0_global_offset_whitelisted_gk20a(g, offset); + /* resource size check in case there was a problem + * with allocating the assumed size of bar0 */ + valid = valid && nvgpu_io_valid_reg(g, offset); + return valid; +} + +void nvgpu_cyclestats_exec(struct gk20a *g, + struct nvgpu_channel *ch, u32 offset) +{ + void *virtual_address; + u32 buffer_size; + bool exit; + + /* GL will never use payload 0 for cycle state */ + if ((ch->cyclestate.cyclestate_buffer == NULL) || (offset == 0U)) { + return; + } + + nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex); + + virtual_address = ch->cyclestate.cyclestate_buffer; + buffer_size = ch->cyclestate.cyclestate_buffer_size; + exit = false; + + while (!exit) { + struct share_buffer_head *sh_hdr; + u32 min_element_size; + + /* validate offset */ + if (offset + sizeof(struct share_buffer_head) > buffer_size || + offset + sizeof(struct share_buffer_head) < offset) { + nvgpu_err(g, + "cyclestats buffer overrun at offset 0x%x", + offset); + break; + } + + sh_hdr = (struct share_buffer_head *) + ((char *)virtual_address + offset); + + min_element_size = + U32(sh_hdr->operation == OP_END ? + sizeof(struct share_buffer_head) : + sizeof(struct nvgpu_cyclestate_buffer_elem)); + + /* validate sh_hdr->size */ + if (sh_hdr->size < min_element_size || + offset + sh_hdr->size > buffer_size || + offset + sh_hdr->size < offset) { + nvgpu_err(g, + "bad cyclestate buffer header size at offset 0x%x", + offset); + sh_hdr->failed = U32(true); + break; + } + + switch (sh_hdr->operation) { + case OP_END: + exit = true; + break; + + case BAR0_READ32: + case BAR0_WRITE32: + { + struct nvgpu_cyclestate_buffer_elem *op_elem = + (struct nvgpu_cyclestate_buffer_elem *)sh_hdr; + bool valid = is_valid_cyclestats_bar0_offset_gk20a( + g, op_elem->offset_bar0); + u32 raw_reg; + u64 mask_orig; + u64 v; + + if (!valid) { + nvgpu_err(g, + "invalid cycletstats op offset: 0x%x", + op_elem->offset_bar0); + + exit = true; + sh_hdr->failed = U32(exit); + break; + } + + mask_orig = + ((1ULL << (op_elem->last_bit + 1)) - 1) & + ~((1ULL << op_elem->first_bit) - 1); + + raw_reg = nvgpu_readl(g, op_elem->offset_bar0); + + switch (sh_hdr->operation) { + case BAR0_READ32: + op_elem->data = ((raw_reg & mask_orig) + >> op_elem->first_bit); + break; + + case BAR0_WRITE32: + v = 0; + if ((unsigned int)mask_orig != + ~((unsigned int)0)) { + v = (unsigned int) + (raw_reg & ~mask_orig); + } + + v |= ((op_elem->data << op_elem->first_bit) + & mask_orig); + nvgpu_writel(g,op_elem->offset_bar0, + (unsigned int)v); + break; + default: + /* nop ok?*/ + break; + } + } + break; + + default: + /* no operation content case */ + exit = true; + break; + } + sh_hdr->completed = U32(true); + offset += sh_hdr->size; + } + nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex); +} diff --git a/drivers/gpu/nvgpu/common/cyclestats/cyclestats_priv.h b/drivers/gpu/nvgpu/common/cyclestats/cyclestats_priv.h new file mode 100644 index 000000000..1358ec712 --- /dev/null +++ b/drivers/gpu/nvgpu/common/cyclestats/cyclestats_priv.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CYCLESTATS_PRIV_H +#define NVGPU_CYCLESTATS_PRIV_H + +#include + +#define MULTICHAR_TAG(a, b, c, d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d)) + +enum BAR0_DEBUG_OPERATION { + BARO_ZERO_NOP = 0, + OP_END = MULTICHAR_TAG('D', 'O', 'N', 'E'), + BAR0_READ32 = MULTICHAR_TAG('0', 'R', '3', '2'), + BAR0_WRITE32 = MULTICHAR_TAG('0', 'W', '3', '2'), +}; + +struct share_buffer_head { + enum BAR0_DEBUG_OPERATION operation; +/* size of the operation item */ + u32 size; + u32 completed; + u32 failed; + u64 context; + u64 completion_callback; +}; + +struct nvgpu_cyclestate_buffer_elem { + struct share_buffer_head head; +/* in */ + u64 p_data; + u64 p_done; + u32 offset_bar0; + u16 first_bit; + u16 last_bit; +/* out */ +/* keep 64 bits to be consistent */ + u64 data; +}; + +#endif /* NVGPU_CYCLESTATS_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/debugger.c b/drivers/gpu/nvgpu/common/debugger.c new file mode 100644 index 000000000..18fa4bacd --- /dev/null +++ b/drivers/gpu/nvgpu/common/debugger.c @@ -0,0 +1,241 @@ +/* + * Tegra GK20A GPU Debugger/Profiler Driver + * + * Copyright (c) 2013-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * API to get first channel from the list of all channels + * bound to the debug session + */ +struct nvgpu_channel * +nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s) +{ + struct dbg_session_channel_data *ch_data; + struct nvgpu_channel *ch; + struct gk20a *g = dbg_s->g; + + nvgpu_mutex_acquire(&dbg_s->ch_list_lock); + if (nvgpu_list_empty(&dbg_s->ch_list)) { + nvgpu_mutex_release(&dbg_s->ch_list_lock); + return NULL; + } + + ch_data = nvgpu_list_first_entry(&dbg_s->ch_list, + dbg_session_channel_data, + ch_entry); + ch = g->fifo.channel + ch_data->chid; + + nvgpu_mutex_release(&dbg_s->ch_list_lock); + + return ch; +} + +void nvgpu_dbg_gpu_post_events(struct nvgpu_channel *ch) +{ + struct dbg_session_data *session_data; + struct dbg_session_gk20a *dbg_s; + struct gk20a *g = ch->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + + /* guard against the session list being modified */ + nvgpu_mutex_acquire(&ch->dbg_s_lock); + + nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list, + dbg_session_data, dbg_s_entry) { + dbg_s = session_data->dbg_s; + if (dbg_s->dbg_events.events_enabled) { + nvgpu_log(g, gpu_dbg_gpu_dbg, "posting event on session id %d", + dbg_s->id); + nvgpu_log(g, gpu_dbg_gpu_dbg, "%d events pending", + dbg_s->dbg_events.num_pending_events); + + dbg_s->dbg_events.num_pending_events++; + + nvgpu_dbg_session_post_event(dbg_s); + } + } + + nvgpu_mutex_release(&ch->dbg_s_lock); +} + +bool nvgpu_dbg_gpu_broadcast_stop_trigger(struct nvgpu_channel *ch) +{ + struct dbg_session_data *session_data; + struct dbg_session_gk20a *dbg_s; + bool broadcast = false; + struct gk20a *g = ch->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " "); + + /* guard against the session list being modified */ + nvgpu_mutex_acquire(&ch->dbg_s_lock); + + nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list, + dbg_session_data, dbg_s_entry) { + dbg_s = session_data->dbg_s; + if (dbg_s->broadcast_stop_trigger) { + nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn | gpu_dbg_intr, + "stop trigger broadcast enabled"); + broadcast = true; + break; + } + } + + nvgpu_mutex_release(&ch->dbg_s_lock); + + return broadcast; +} + +void nvgpu_dbg_gpu_clear_broadcast_stop_trigger(struct nvgpu_channel *ch) +{ + struct dbg_session_data *session_data; + struct dbg_session_gk20a *dbg_s; + struct gk20a *g = ch->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " "); + + /* guard against the session list being modified */ + nvgpu_mutex_acquire(&ch->dbg_s_lock); + + nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list, + dbg_session_data, dbg_s_entry) { + dbg_s = session_data->dbg_s; + if (dbg_s->broadcast_stop_trigger) { + nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn | gpu_dbg_intr, + "stop trigger broadcast disabled"); + dbg_s->broadcast_stop_trigger = false; + } + } + + nvgpu_mutex_release(&ch->dbg_s_lock); +} + +u32 nvgpu_set_powergate_locked(struct dbg_session_gk20a *dbg_s, + bool mode) +{ + u32 err = 0U; + struct gk20a *g = dbg_s->g; + + if (dbg_s->is_pg_disabled != mode) { + if (mode == false) { + g->dbg_powergating_disabled_refcount--; + } + + /* + * Allow powergate disable or enable only if + * the global pg disabled refcount is zero + */ + if (g->dbg_powergating_disabled_refcount == 0) { + err = g->ops.debugger.dbg_set_powergate(dbg_s, + mode); + } + + if (mode) { + g->dbg_powergating_disabled_refcount++; + } + + dbg_s->is_pg_disabled = mode; + } + + return err; +} + +int nvgpu_dbg_set_powergate(struct dbg_session_gk20a *dbg_s, bool disable_powergate) +{ + int err = 0; + struct gk20a *g = dbg_s->g; + + /* This function must be called with g->dbg_sessions_lock held */ + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %s", + g->name, disable_powergate ? "disable" : "enable"); + + /* + * Powergate mode here refers to railgate+powergate+clockgate + * so in case slcg/blcg/elcg are disabled and railgating is enabled, + * disable railgating and then set is_pg_disabled = true + * Similarly re-enable railgating and not other features if they are not + * enabled when powermode=MODE_ENABLE + */ + if (disable_powergate) { + /* save off current powergate, clk state. + * set gpu module's can_powergate = 0. + * set gpu module's clk to max. + * while *a* debug session is active there will be no power or + * clocking state changes allowed from mainline code (but they + * should be saved). + */ + + nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, + "module busy"); + err = gk20a_busy(g); + if (err != 0) { + return err; + } + +#ifdef CONFIG_NVGPU_NON_FUSA + err = nvgpu_cg_pg_disable(g); +#endif + if (err == 0) { + dbg_s->is_pg_disabled = true; + nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, + "pg disabled"); + } + } else { + /* restore (can) powergate, clk state */ + /* release pending exceptions to fault/be handled as usual */ + /*TBD: ordering of these? */ + +#ifdef CONFIG_NVGPU_NON_FUSA + err = nvgpu_cg_pg_enable(g); +#endif + if (err == 0) { + dbg_s->is_pg_disabled = false; + nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, + "pg enabled"); + } + + nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, "module idle"); + + gk20a_idle(g); + } + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %s done", + g->name, disable_powergate ? "disable" : "enable"); + return err; +} diff --git a/drivers/gpu/nvgpu/common/device.c b/drivers/gpu/nvgpu/common/device.c new file mode 100644 index 000000000..690678df5 --- /dev/null +++ b/drivers/gpu/nvgpu/common/device.c @@ -0,0 +1,338 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#define device_dbg(g, fmt, args...) \ + do { \ + nvgpu_log(g, gpu_dbg_device, fmt, ##args); \ + } while (0) + +static inline const char *nvgpu_device_type_to_str(const struct nvgpu_device *dev) +{ + const char *str = "Unknown"; + + switch (dev->type) { + case NVGPU_DEVTYPE_GRAPHICS: + str = "GFX"; + break; + case NVGPU_DEVTYPE_COPY0: + str = "CE0"; + break; + case NVGPU_DEVTYPE_COPY1: + str = "CE1"; + break; + case NVGPU_DEVTYPE_COPY2: + str = "CE2"; + break; + case NVGPU_DEVTYPE_IOCTRL: + str = "IOCTRL"; + break; + case NVGPU_DEVTYPE_LCE: + str = "LCE"; + break; + default: + break; + } + + return str; +} + +void nvgpu_device_dump_dev(struct gk20a *g, const struct nvgpu_device *dev) +{ + device_dbg(g, "Device %s:%d", + nvgpu_device_type_to_str(dev), dev->inst_id); + device_dbg(g, " EngineID: %2u FaultID: %2u", + dev->engine_id, dev->fault_id); + device_dbg(g, " RunlistID: %2u IntrID: %2u ResetID: %u", + dev->runlist_id, dev->intr_id, dev->reset_id); + device_dbg(g, " PRI Base: 0x%x", dev->pri_base); +} + +/* + * Faciliate the parsing of the TOP array describing the devices present in the + * GPU. + */ +static int nvgpu_device_parse_hw_table(struct gk20a *g) +{ + int ret = 0; + u32 token = NVGPU_DEVICE_TOKEN_INIT; + struct nvgpu_device *dev; + struct nvgpu_list_node *devlist; + + while (true) { + dev = g->ops.top.parse_next_device(g, &token); + if (dev == NULL) { + break; + } + + nvgpu_device_dump_dev(g, dev); + + /* + * Otherwise we have a device - let's add it to the right device + * list. + */ + devlist = &g->devs->devlist_heads[dev->type]; + + nvgpu_list_add_tail(&dev->dev_list_node, devlist); + g->devs->dev_counts[dev->type] += 1; + } + + return ret; +} + +/* + * Faciliate reading the HW register table into a software abstraction. This is + * done only on the first boot as the table will never change dynamically. + */ +int nvgpu_device_init(struct gk20a *g) +{ + u32 i; + + device_dbg(g, "Initializating GPU device list"); + + /* + * Ground work - make sure we aren't doing this again and that we have + * all the necessary data structures. + */ + if (g->devs != NULL) { + device_dbg(g, " GPU device list already present. Done."); + return 0; + } + + g->devs = nvgpu_kzalloc(g, sizeof(*g->devs)); + if (g->devs == NULL) { + return -ENOMEM; + } + + for (i = 0; i < NVGPU_MAX_DEVTYPE; i++) { + nvgpu_init_list_node(&g->devs->devlist_heads[i]); + } + + return nvgpu_device_parse_hw_table(g); +} + +static void nvgpu_device_cleanup_devtype(struct gk20a *g, + struct nvgpu_list_node *list) +{ + struct nvgpu_device *dev; + + while (!nvgpu_list_empty(list)) { + dev = nvgpu_list_first_entry(list, + nvgpu_device, + dev_list_node); + nvgpu_list_del(&dev->dev_list_node); + nvgpu_kfree(g, dev); + } +} + +void nvgpu_device_cleanup(struct gk20a *g) +{ + u32 i; + struct nvgpu_list_node *devlist; + + device_dbg(g, "Releasing GPU device list"); + + /* + * Make unit testing a bit easier. + */ + if (g->devs == NULL) { + device_dbg(g, " Already done."); + return; + } + + for (i = 0; i < NVGPU_MAX_DEVTYPE; i++) { + devlist = &g->devs->devlist_heads[i]; + + if (devlist == NULL) { + continue; + } + + nvgpu_device_cleanup_devtype(g, devlist); + } + + nvgpu_kfree(g, g->devs); + g->devs = NULL; +} + +/* + * Find the instance passed. Do this by simply traversing the linked list; it's + * not particularly efficient, but we aren't expecting there to ever be _that_ + * many devices. + * + * Return a pointer to the device or NULL of the inst ID is out of range. + */ +static const struct nvgpu_device *dev_instance_from_devlist( + struct nvgpu_list_node *devlist, u32 inst_id) +{ + struct nvgpu_device *dev; + + nvgpu_list_for_each_entry(dev, devlist, nvgpu_device, dev_list_node) { + if (dev->inst_id == inst_id) { + return dev; + } + } + + return NULL; +} + +const struct nvgpu_device *nvgpu_device_get(struct gk20a *g, + u32 type, u32 inst_id) +{ + const struct nvgpu_device *dev; + struct nvgpu_list_node *device_list; + + if (type >= NVGPU_MAX_DEVTYPE) { + return NULL; + } + + device_list = &g->devs->devlist_heads[type]; + dev = dev_instance_from_devlist(device_list, inst_id); + + if (dev == NULL) { + return NULL; + } + + return dev; +} + +u32 nvgpu_device_count(struct gk20a *g, u32 type) +{ + if (type >= NVGPU_MAX_DEVTYPE) { + return 0U; + } + + return g->devs->dev_counts[type]; +} + +/* + * Internal function to query copy engines; async_only specifies whether + * this function should or should not include the GR copy engines (CEs that + * share a runlist with the GR engine(s)). + * + * This function basically iterates over two distinct copy engine lists: + * first the COPY0-2 (the old way of describing copy engines) and the LCE + * list (the new in Pascal way of describing copy engines). + */ +static u32 nvgpu_device_do_get_copies(struct gk20a *g, + bool async_only, + const struct nvgpu_device **ces, + u32 max) +{ + u32 i; + u32 copies = 0U; + const struct nvgpu_device *dev; + const struct nvgpu_device *gr_dev; + + if (max == 0U) { + return 0U; + } + + gr_dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, 0U); + nvgpu_assert(gr_dev != NULL); + + /* + * Start with the COPY0-2 engines. Note the awkward instance ID. + */ + for (i = NVGPU_DEVTYPE_COPY0; i <= NVGPU_DEVTYPE_COPY2; i++) { + dev = nvgpu_device_get(g, i, i - NVGPU_DEVTYPE_COPY0); + if (dev == NULL) { + continue; + } + + if (async_only && + dev->runlist_id == gr_dev->runlist_id) { + /* It's a GRCE, skip it per async_only. */ + continue; + } + + ces[copies] = dev; + copies = nvgpu_safe_add_u32(copies, 1U); + if (copies == max) { + return copies; + } + } + + for (i = 0; i < nvgpu_device_count(g, NVGPU_DEVTYPE_LCE); i++) { + dev = nvgpu_device_get(g, NVGPU_DEVTYPE_LCE, i); + nvgpu_assert(dev != NULL); + + if (async_only && + dev->runlist_id == gr_dev->runlist_id) { + /* It's a GRCE, skip it per async_only. */ + continue; + } + + ces[copies] = dev; + copies = nvgpu_safe_add_u32(copies, 1U); + if (copies == max) { + return copies; + } + } + + return copies; +} + +u32 nvgpu_device_get_async_copies(struct gk20a *g, + const struct nvgpu_device **ces, + u32 max) +{ + return nvgpu_device_do_get_copies(g, true, ces, max); +} + +u32 nvgpu_device_get_copies(struct gk20a *g, + const struct nvgpu_device **ces, + u32 max) +{ + return nvgpu_device_do_get_copies(g, false, ces, max); +} + +/* + * Note: this kind of bleeds HW details into the core code. Eventually this + * should be handled by a translation table. However, for now, HW has kept the + * device type values consistent across chips and nvgpu already has this present + * in core code. + * + * Once a per-chip translation table exists we can translate and then do a + * comparison. + */ +bool nvgpu_device_is_ce(struct gk20a *g, const struct nvgpu_device *dev) +{ + if (dev->type == NVGPU_DEVTYPE_COPY0 || + dev->type == NVGPU_DEVTYPE_COPY1 || + dev->type == NVGPU_DEVTYPE_COPY2 || + dev->type == NVGPU_DEVTYPE_LCE) { + return true; + } + + return false; +} + +bool nvgpu_device_is_graphics(struct gk20a *g, const struct nvgpu_device *dev) +{ + return dev->type == NVGPU_DEVTYPE_GRAPHICS; +} diff --git a/drivers/gpu/nvgpu/common/ecc.c b/drivers/gpu/nvgpu/common/ecc.c new file mode 100644 index 000000000..830e5a05a --- /dev/null +++ b/drivers/gpu/nvgpu/common/ecc.c @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat) +{ + struct nvgpu_ecc *ecc = &g->ecc; + + nvgpu_init_list_node(&stat->node); + + nvgpu_mutex_acquire(&ecc->stats_lock); + + nvgpu_list_add_tail(&stat->node, &ecc->stats_list); + ecc->stats_count = nvgpu_safe_add_s32(ecc->stats_count, 1); + + nvgpu_mutex_release(&ecc->stats_lock); +} + +void nvgpu_ecc_stat_del(struct gk20a *g, struct nvgpu_ecc_stat *stat) +{ + struct nvgpu_ecc *ecc = &g->ecc; + + nvgpu_mutex_acquire(&ecc->stats_lock); + + nvgpu_list_del(&stat->node); + ecc->stats_count = nvgpu_safe_sub_s32(ecc->stats_count, 1); + + nvgpu_mutex_release(&ecc->stats_lock); +} + +int nvgpu_ecc_counter_init(struct gk20a *g, + struct nvgpu_ecc_stat **statp, const char *name) +{ + struct nvgpu_ecc_stat *stat; + + stat = nvgpu_kzalloc(g, sizeof(*stat)); + if (stat == NULL) { + nvgpu_err(g, "ecc counter alloc failed"); + return -ENOMEM; + } + + (void)strncpy(stat->name, name, NVGPU_ECC_STAT_NAME_MAX_SIZE - 1U); + nvgpu_ecc_stat_add(g, stat); + *statp = stat; + return 0; +} + +void nvgpu_ecc_counter_deinit(struct gk20a *g, struct nvgpu_ecc_stat **statp) +{ + struct nvgpu_ecc_stat *stat; + + if (*statp == NULL) { + return; + } + + stat = *statp; + + nvgpu_ecc_stat_del(g, stat); + nvgpu_kfree(g, stat); + *statp = NULL; +} + +/* release all ecc_stat */ +void nvgpu_ecc_free(struct gk20a *g) +{ + struct nvgpu_ecc *ecc = &g->ecc; + + nvgpu_gr_ecc_free(g); + nvgpu_ltc_ecc_free(g); + + if (g->ops.fb.ecc.free != NULL) { + g->ops.fb.ecc.free(g); + } + +#ifdef CONFIG_NVGPU_DGPU + if (g->ops.fb.fbpa_ecc_free != NULL) { + g->ops.fb.fbpa_ecc_free(g); + } +#endif + + if (g->ops.pmu.ecc_free != NULL) { + g->ops.pmu.ecc_free(g); + } + + nvgpu_mutex_acquire(&ecc->stats_lock); + WARN_ON(!nvgpu_list_empty(&ecc->stats_list)); + nvgpu_mutex_release(&ecc->stats_lock); + + (void)memset(ecc, 0, sizeof(*ecc)); +} + +int nvgpu_ecc_init_support(struct gk20a *g) +{ + struct nvgpu_ecc *ecc = &g->ecc; + + if (ecc->initialized) { + return 0; + } + + nvgpu_mutex_init(&ecc->stats_lock); + nvgpu_init_list_node(&ecc->stats_list); + + return 0; +} + +/** + * Note that this function is to be called after all units requiring ecc stats + * have added entries to ecc->stats_list. + */ +int nvgpu_ecc_finalize_support(struct gk20a *g) +{ +#ifdef CONFIG_NVGPU_SYSFS + int err; +#endif + + if (g->ecc.initialized) { + return 0; + } + +#ifdef CONFIG_NVGPU_SYSFS + err = nvgpu_ecc_sysfs_init(g); + if (err != 0) { + nvgpu_ecc_free(g); + return err; + } +#endif + + g->ecc.initialized = true; + + return 0; +} + +void nvgpu_ecc_remove_support(struct gk20a *g) +{ + if (!g->ecc.initialized) { + return; + } + +#ifdef CONFIG_NVGPU_SYSFS + nvgpu_ecc_sysfs_remove(g); +#endif + nvgpu_ecc_free(g); + + nvgpu_mutex_destroy(&g->ecc.stats_lock); +} diff --git a/drivers/gpu/nvgpu/common/engine_queues/engine_dmem_queue.c b/drivers/gpu/nvgpu/common/engine_queues/engine_dmem_queue.c new file mode 100644 index 000000000..1a4a3286d --- /dev/null +++ b/drivers/gpu/nvgpu/common/engine_queues/engine_dmem_queue.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include "engine_mem_queue_priv.h" +#include "engine_dmem_queue.h" + +/* DMEM-Q specific ops */ +static int engine_dmem_queue_push(struct nvgpu_falcon *flcn, + struct nvgpu_engine_mem_queue *queue, u32 dst, void *data, u32 size) +{ + struct gk20a *g = queue->g; + int err = 0; + + err = nvgpu_falcon_copy_to_dmem(flcn, dst, data, size, 0); + if (err != 0) { + nvgpu_err(g, "flcn-%d, queue-%d", queue->flcn_id, queue->id); + nvgpu_err(g, "dmem queue write failed"); + goto exit; + } + +exit: + return err; +} + +static int engine_dmem_queue_pop(struct nvgpu_falcon *flcn, + struct nvgpu_engine_mem_queue *queue, u32 src, void *data, u32 size) +{ + struct gk20a *g = queue->g; + int err = 0; + + err = nvgpu_falcon_copy_from_dmem(flcn, src, data, size, 0); + if (err != 0) { + nvgpu_err(g, "flcn-%d, queue-%d", queue->flcn_id, queue->id); + nvgpu_err(g, "dmem queue read failed"); + goto exit; + } + +exit: + return err; +} + +/* assign DMEM queue type specific ops */ +void engine_dmem_queue_init(struct nvgpu_engine_mem_queue *queue) +{ + queue->push = engine_dmem_queue_push; + queue->pop = engine_dmem_queue_pop; +} diff --git a/drivers/gpu/nvgpu/common/engine_queues/engine_dmem_queue.h b/drivers/gpu/nvgpu/common/engine_queues/engine_dmem_queue.h new file mode 100644 index 000000000..b038bf3a4 --- /dev/null +++ b/drivers/gpu/nvgpu/common/engine_queues/engine_dmem_queue.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_ENGINE_DMEM_QUEUE_H +#define NVGPU_ENGINE_DMEM_QUEUE_H + +void engine_dmem_queue_init(struct nvgpu_engine_mem_queue *queue); + +#endif /* NVGPU_ENGINE_DMEM_QUEUE_H */ diff --git a/drivers/gpu/nvgpu/common/engine_queues/engine_emem_queue.c b/drivers/gpu/nvgpu/common/engine_queues/engine_emem_queue.c new file mode 100644 index 000000000..9746ad1ed --- /dev/null +++ b/drivers/gpu/nvgpu/common/engine_queues/engine_emem_queue.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include "engine_mem_queue_priv.h" +#include "engine_emem_queue.h" + +/* EMEM-Q specific ops */ +static int engine_emem_queue_push(struct nvgpu_falcon *flcn, + struct nvgpu_engine_mem_queue *queue, u32 dst, void *data, u32 size) +{ + struct gk20a *g = queue->g; + int err = 0; + + err = nvgpu_falcon_copy_to_emem(flcn, dst, data, size, 0); + if (err != 0) { + nvgpu_err(g, "flcn-%d, queue-%d", queue->flcn_id, queue->id); + nvgpu_err(g, "emem queue write failed"); + goto exit; + } + +exit: + return err; +} + +static int engine_emem_queue_pop(struct nvgpu_falcon *flcn, + struct nvgpu_engine_mem_queue *queue, u32 src, void *data, u32 size) +{ + struct gk20a *g = queue->g; + int err = 0; + + err = nvgpu_falcon_copy_from_emem(flcn, src, data, size, 0); + if (err != 0) { + nvgpu_err(g, "flcn-%d, queue-%d", queue->flcn_id, queue->id); + nvgpu_err(g, "emem queue read failed"); + goto exit; + } + +exit: + return err; +} + +/* assign EMEM queue type specific ops */ +void engine_emem_queue_init(struct nvgpu_engine_mem_queue *queue) +{ + queue->push = engine_emem_queue_push; + queue->pop = engine_emem_queue_pop; +} diff --git a/drivers/gpu/nvgpu/common/engine_queues/engine_emem_queue.h b/drivers/gpu/nvgpu/common/engine_queues/engine_emem_queue.h new file mode 100644 index 000000000..d6e0bf992 --- /dev/null +++ b/drivers/gpu/nvgpu/common/engine_queues/engine_emem_queue.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_ENGINE_EMEM_QUEUE_H +#define NVGPU_ENGINE_EMEM_QUEUE_H + +void engine_emem_queue_init(struct nvgpu_engine_mem_queue *queue); + +#endif /* NVGPU_ENGINE_EMEM_QUEUE_H */ diff --git a/drivers/gpu/nvgpu/common/engine_queues/engine_fb_queue.c b/drivers/gpu/nvgpu/common/engine_queues/engine_fb_queue.c new file mode 100644 index 000000000..894d90f61 --- /dev/null +++ b/drivers/gpu/nvgpu/common/engine_queues/engine_fb_queue.c @@ -0,0 +1,603 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "engine_fb_queue_priv.h" + +/* FB-Q ops */ +static int engine_fb_queue_head(struct nvgpu_engine_fb_queue *queue, + u32 *head, bool set) +{ + return queue->queue_head(queue->g, queue->id, queue->index, head, set); +} + +static int engine_fb_queue_tail(struct nvgpu_engine_fb_queue *queue, + u32 *tail, bool set) +{ + struct gk20a *g = queue->g; + int err; + + if (set == false && PMU_IS_COMMAND_QUEUE(queue->id)) { + *tail = queue->fbq.tail; + err = 0; + } else { + err = queue->queue_tail(g, queue->id, queue->index, tail, set); + } + + return err; +} + +static inline u32 engine_fb_queue_get_next(struct nvgpu_engine_fb_queue *queue, + u32 head) +{ + return (head + 1U) % queue->size; +} + +static bool engine_fb_queue_has_room(struct nvgpu_engine_fb_queue *queue, + u32 size) +{ + u32 head = 0; + u32 tail = 0; + u32 next_head = 0; + int err = 0; + + err = queue->head(queue, &head, QUEUE_GET); + if (err != 0) { + nvgpu_err(queue->g, "queue head GET failed"); + goto exit; + } + + err = queue->tail(queue, &tail, QUEUE_GET); + if (err != 0) { + nvgpu_err(queue->g, "queue tail GET failed"); + goto exit; + } + + next_head = engine_fb_queue_get_next(queue, head); + +exit: + return next_head != tail; +} + +static int engine_fb_queue_write(struct nvgpu_engine_fb_queue *queue, + u32 offset, u8 *src, u32 size) +{ + struct gk20a *g = queue->g; + struct nv_falcon_fbq_hdr *fb_q_hdr = (struct nv_falcon_fbq_hdr *) + (void *)queue->fbq.work_buffer; + u32 entry_offset = 0U; + int err = 0; + + if (queue->fbq.work_buffer == NULL) { + nvgpu_err(g, "Invalid/Unallocated work buffer"); + err = -EINVAL; + goto exit; + } + + /* Fill out FBQ hdr, that is in the work buffer */ + fb_q_hdr->element_index = (u8)offset; + + /* check queue entry size */ + if (fb_q_hdr->heap_size >= (u16)queue->fbq.element_size) { + err = -EINVAL; + goto exit; + } + + /* get offset to this element entry */ + entry_offset = offset * queue->fbq.element_size; + + /* copy cmd to super-surface */ + nvgpu_mem_wr_n(g, queue->fbq.super_surface_mem, + queue->fbq.fb_offset + entry_offset, + queue->fbq.work_buffer, queue->fbq.element_size); + +exit: + return err; +} + +static int engine_fb_queue_set_element_use_state( + struct nvgpu_engine_fb_queue *queue, u32 queue_pos, bool set) +{ + int err = 0; + + if (queue_pos >= queue->size) { + err = -EINVAL; + goto exit; + } + + if (nvgpu_test_bit(queue_pos, + (void *)&queue->fbq.element_in_use) && set) { + nvgpu_err(queue->g, + "FBQ last received queue element not processed yet" + " queue_pos %d", queue_pos); + err = -EINVAL; + goto exit; + } + + if (set) { + nvgpu_set_bit(queue_pos, (void *)&queue->fbq.element_in_use); + } else { + nvgpu_clear_bit(queue_pos, (void *)&queue->fbq.element_in_use); + } + +exit: + return err; +} + +static int engine_fb_queue_is_element_in_use( + struct nvgpu_engine_fb_queue *queue, + u32 queue_pos, bool *in_use) +{ + int err = 0; + + if (queue_pos >= queue->size) { + err = -EINVAL; + goto exit; + } + + *in_use = nvgpu_test_bit(queue_pos, (void *)&queue->fbq.element_in_use); + +exit: + return err; +} + +static int engine_fb_queue_sweep(struct nvgpu_engine_fb_queue *queue) +{ + u32 head; + u32 tail; + bool in_use = false; + int err = 0; + + tail = queue->fbq.tail; + err = queue->head(queue, &head, QUEUE_GET); + if (err != 0) { + nvgpu_err(queue->g, "flcn-%d queue-%d, position GET failed", + queue->flcn_id, queue->id); + goto exit; + } + + /* + * Step from tail forward in the queue, + * to see how many consecutive entries + * can be made available. + */ + while (tail != head) { + if (engine_fb_queue_is_element_in_use(queue, + tail, &in_use) != 0) { + break; + } + + if (in_use) { + break; + } + + tail = engine_fb_queue_get_next(queue, tail); + } + + /* Update tail */ + queue->fbq.tail = tail; + +exit: + return err; +} + +u32 nvgpu_engine_fb_queue_get_position(struct nvgpu_engine_fb_queue *queue) +{ + return queue->position; +} + +/* return the queue element size */ +u32 nvgpu_engine_fb_queue_get_element_size(struct nvgpu_engine_fb_queue *queue) +{ + return queue->fbq.element_size; +} + +/* return the queue offset from super surface FBQ's */ +u32 nvgpu_engine_fb_queue_get_offset(struct nvgpu_engine_fb_queue *queue) +{ + return queue->fbq.fb_offset; +} + +/* lock work buffer of queue */ +void nvgpu_engine_fb_queue_lock_work_buffer(struct nvgpu_engine_fb_queue *queue) +{ + /* acquire work buffer mutex */ + nvgpu_mutex_acquire(&queue->fbq.work_buffer_mutex); +} + +/* unlock work buffer of queue */ +void nvgpu_engine_fb_queue_unlock_work_buffer( + struct nvgpu_engine_fb_queue *queue) +{ + /* release work buffer mutex */ + nvgpu_mutex_release(&queue->fbq.work_buffer_mutex); +} + +/* return a pointer of queue work buffer */ +u8 *nvgpu_engine_fb_queue_get_work_buffer(struct nvgpu_engine_fb_queue *queue) +{ + return queue->fbq.work_buffer; +} + +int nvgpu_engine_fb_queue_free_element(struct nvgpu_engine_fb_queue *queue, + u32 queue_pos) +{ + int err = 0; + + err = engine_fb_queue_set_element_use_state(queue, + queue_pos, false); + if (err != 0) { + nvgpu_err(queue->g, "fb queue elelment %d free failed", + queue_pos); + goto exit; + } + + err = engine_fb_queue_sweep(queue); + +exit: + return err; +} + +/* queue is_empty check with lock */ +bool nvgpu_engine_fb_queue_is_empty(struct nvgpu_engine_fb_queue *queue) +{ + u32 q_head = 0; + u32 q_tail = 0; + int err = 0; + + if (queue == NULL) { + return true; + } + + /* acquire mutex */ + nvgpu_mutex_acquire(&queue->mutex); + + err = queue->head(queue, &q_head, QUEUE_GET); + if (err != 0) { + nvgpu_err(queue->g, "flcn-%d queue-%d, head GET failed", + queue->flcn_id, queue->id); + goto exit; + } + + err = queue->tail(queue, &q_tail, QUEUE_GET); + if (err != 0) { + nvgpu_err(queue->g, "flcn-%d queue-%d, tail GET failed", + queue->flcn_id, queue->id); + goto exit; + } + +exit: + /* release mutex */ + nvgpu_mutex_release(&queue->mutex); + + return q_head == q_tail; +} + +static int engine_fb_queue_prepare_write(struct nvgpu_engine_fb_queue *queue, + u32 size) +{ + int err = 0; + + /* make sure there's enough free space for the write */ + if (!engine_fb_queue_has_room(queue, size)) { + nvgpu_log_info(queue->g, "queue full: queue-id %d: index %d", + queue->id, queue->index); + err = -EAGAIN; + goto exit; + } + + err = queue->head(queue, &queue->position, QUEUE_GET); + if (err != 0) { + nvgpu_err(queue->g, "flcn-%d queue-%d, position GET failed", + queue->flcn_id, queue->id); + goto exit; + } + +exit: + return err; +} + +/* queue push operation with lock */ +int nvgpu_engine_fb_queue_push(struct nvgpu_engine_fb_queue *queue, + void *data, u32 size) +{ + struct gk20a *g; + int err = 0; + + if (queue == NULL) { + return -EINVAL; + } + + g = queue->g; + + nvgpu_log_fn(g, " "); + + if (queue->oflag != OFLAG_WRITE) { + nvgpu_err(queue->g, "flcn-%d, queue-%d not opened for write", + queue->flcn_id, queue->id); + err = -EINVAL; + goto exit; + } + + /* acquire mutex */ + nvgpu_mutex_acquire(&queue->mutex); + + err = engine_fb_queue_prepare_write(queue, size); + if (err != 0) { + goto unlock_mutex; + } + + /* Bounds check size */ + if (size > queue->fbq.element_size) { + nvgpu_err(g, "size too large size=0x%x", size); + goto unlock_mutex; + } + + /* Set queue element in use */ + if (engine_fb_queue_set_element_use_state(queue, + queue->position, true) != 0) { + nvgpu_err(g, + "fb-queue element in use map is in invalid state"); + err = -EINVAL; + goto unlock_mutex; + } + + /* write data to FB */ + err = engine_fb_queue_write(queue, queue->position, data, size); + if (err != 0) { + nvgpu_err(g, "write to fb-queue failed"); + goto unlock_mutex; + } + + queue->position = engine_fb_queue_get_next(queue, + queue->position); + + err = queue->head(queue, &queue->position, QUEUE_SET); + if (err != 0) { + nvgpu_err(queue->g, "flcn-%d queue-%d, position SET failed", + queue->flcn_id, queue->id); + goto unlock_mutex; + } + +unlock_mutex: + /* release mutex */ + nvgpu_mutex_release(&queue->mutex); +exit: + if (err != 0) { + nvgpu_err(queue->g, "falcon id-%d, queue id-%d, failed", + queue->flcn_id, queue->id); + } + + return err; +} + +/* queue pop operation with lock */ +int nvgpu_engine_fb_queue_pop(struct nvgpu_engine_fb_queue *queue, + void *data, u32 size, u32 *bytes_read) +{ + struct gk20a *g; + struct pmu_hdr *hdr; + u32 entry_offset = 0U; + int err = 0; + + if (queue == NULL) { + return -EINVAL; + } + + g = queue->g; + hdr = (struct pmu_hdr *) (void *) (queue->fbq.work_buffer + + sizeof(struct nv_falcon_fbq_msgq_hdr)); + + nvgpu_log_fn(g, " "); + + if (queue->oflag != OFLAG_READ) { + nvgpu_err(g, "flcn-%d, queue-%d, not opened for read", + queue->flcn_id, queue->id); + err = -EINVAL; + goto exit; + } + + /* acquire mutex */ + nvgpu_mutex_acquire(&queue->mutex); + + err = queue->tail(queue, &queue->position, QUEUE_GET); + if (err != 0) { + nvgpu_err(g, "flcn-%d queue-%d, position GET failed", + queue->flcn_id, queue->id); + goto unlock_mutex; + } + + *bytes_read = 0U; + + /* Check size */ + if ((size + queue->fbq.read_position) >= queue->fbq.element_size) { + nvgpu_err(g, + "Attempt to read > than queue element size " + "for queue id-%d", queue->id); + err = -EINVAL; + goto unlock_mutex; + } + + entry_offset = queue->position * queue->fbq.element_size; + + /* + * If first read for this queue element then read whole queue + * element into work buffer. + */ + if (queue->fbq.read_position == 0U) { + nvgpu_mem_rd_n(g, queue->fbq.super_surface_mem, + /* source (FBQ data) offset*/ + queue->fbq.fb_offset + entry_offset, + /* destination buffer */ + (void *)queue->fbq.work_buffer, + /* copy size */ + queue->fbq.element_size); + + /* Check size in hdr of MSG just read */ + if (hdr->size >= queue->fbq.element_size) { + nvgpu_err(g, "Super Surface read failed"); + err = -ERANGE; + goto unlock_mutex; + } + } + + nvgpu_memcpy((u8 *)data, (u8 *)queue->fbq.work_buffer + + queue->fbq.read_position + + sizeof(struct nv_falcon_fbq_msgq_hdr), + size); + + /* update current position */ + queue->fbq.read_position += size; + + /* If reached end of this queue element, move on to next. */ + if (queue->fbq.read_position >= hdr->size) { + queue->fbq.read_position = 0U; + /* Increment queue index. */ + queue->position = engine_fb_queue_get_next(queue, + queue->position); + } + + *bytes_read = size; + + err = queue->tail(queue, &queue->position, QUEUE_SET); + if (err != 0) { + nvgpu_err(g, "flcn-%d queue-%d, position SET failed", + queue->flcn_id, queue->id); + goto unlock_mutex; + } + +unlock_mutex: + /* release mutex */ + nvgpu_mutex_release(&queue->mutex); +exit: + if (err != 0) { + nvgpu_err(g, "falcon id-%d, queue id-%d, failed", + queue->flcn_id, queue->id); + } + + return err; +} + +void nvgpu_engine_fb_queue_free(struct nvgpu_engine_fb_queue **queue_p) +{ + struct nvgpu_engine_fb_queue *queue = NULL; + struct gk20a *g; + + if ((queue_p == NULL) || (*queue_p == NULL)) { + return; + } + + queue = *queue_p; + + g = queue->g; + + nvgpu_log_info(g, "flcn id-%d q-id %d: index %d ", + queue->flcn_id, queue->id, queue->index); + + nvgpu_kfree(g, queue->fbq.work_buffer); + nvgpu_mutex_destroy(&queue->fbq.work_buffer_mutex); + + /* destroy mutex */ + nvgpu_mutex_destroy(&queue->mutex); + + nvgpu_kfree(g, queue); + *queue_p = NULL; +} + +int nvgpu_engine_fb_queue_init(struct nvgpu_engine_fb_queue **queue_p, + struct nvgpu_engine_fb_queue_params params) +{ + struct nvgpu_engine_fb_queue *queue = NULL; + struct gk20a *g = params.g; + int err = 0; + + if (queue_p == NULL) { + return -EINVAL; + } + + queue = (struct nvgpu_engine_fb_queue *) + nvgpu_kmalloc(g, sizeof(struct nvgpu_engine_fb_queue)); + + if (queue == NULL) { + return -ENOMEM; + } + + queue->g = params.g; + queue->flcn_id = params.flcn_id; + queue->id = params.id; + queue->index = params.index; + queue->size = params.size; + queue->oflag = params.oflag; + + queue->fbq.tail = 0U; + queue->fbq.element_in_use = 0U; + queue->fbq.read_position = 0U; + queue->fbq.super_surface_mem = params.super_surface_mem; + queue->fbq.element_size = params.fbq_element_size; + queue->fbq.fb_offset = params.fbq_offset; + + queue->position = 0U; + + queue->queue_head = params.queue_head; + queue->queue_tail = params.queue_tail; + + queue->head = engine_fb_queue_head; + queue->tail = engine_fb_queue_tail; + + /* init mutex */ + nvgpu_mutex_init(&queue->mutex); + + /* init mutex */ + nvgpu_mutex_init(&queue->fbq.work_buffer_mutex); + + queue->fbq.work_buffer = nvgpu_kzalloc(g, queue->fbq.element_size); + if (queue->fbq.work_buffer == NULL) { + err = -ENOMEM; + goto free_work_mutex; + } + + nvgpu_log_info(g, + "flcn id-%d q-id %d: index %d, size 0x%08x", + queue->flcn_id, queue->id, queue->index, + queue->size); + + *queue_p = queue; + + return 0; + +free_work_mutex: + nvgpu_mutex_destroy(&queue->fbq.work_buffer_mutex); + nvgpu_mutex_destroy(&queue->mutex); + nvgpu_kfree(g, queue); + + return err; +} diff --git a/drivers/gpu/nvgpu/common/engine_queues/engine_fb_queue_priv.h b/drivers/gpu/nvgpu/common/engine_queues/engine_fb_queue_priv.h new file mode 100644 index 000000000..f7b676a4f --- /dev/null +++ b/drivers/gpu/nvgpu/common/engine_queues/engine_fb_queue_priv.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_ENGINE_FB_QUEUE_PRIV_H +#define NVGPU_ENGINE_FB_QUEUE_PRIV_H + +#include + +struct nvgpu_engine_fb_queue { + struct gk20a *g; + u32 flcn_id; + + /* used by nvgpu, for command LPQ/HPQ */ + struct nvgpu_mutex mutex; + + /* current write position */ + u32 position; + /* logical queue identifier */ + u32 id; + /* physical queue index */ + u32 index; + /* in bytes */ + u32 size; + /* open-flag */ + u32 oflag; + + /* members unique to the FB version of the falcon queues */ + struct { + /* Holds super surface base address */ + struct nvgpu_mem *super_surface_mem; + + /* + * Holds the offset of queue data (0th element). + * This is used for FB Queues to hold a offset of + * Super Surface for this queue. + */ + u32 fb_offset; + + /* + * Define the size of a single queue element. + * queues_size above is used for the number of + * queue elements. + */ + u32 element_size; + + /* To keep track of elements in use */ + u64 element_in_use; + + /* + * Define a pointer to a local (SYSMEM) allocated + * buffer to hold a single queue element + * it is being assembled. + */ + u8 *work_buffer; + struct nvgpu_mutex work_buffer_mutex; + + /* + * Tracks how much of the current FB Queue MSG queue + * entry have been read. This is needed as functions read + * the MSG queue as a byte stream, rather + * than reading a whole MSG at a time. + */ + u32 read_position; + + /* + * Tail as tracked on the nvgpu "side". Because the queue + * elements and its associated payload (which is also moved + * PMU->nvgpu through the FB CMD Queue) can't be free-ed until + * the command is complete, response is received and any "out" + * payload delivered to the client, it is necessary for the + * nvgpu to track it's own version of "tail". This one is + * incremented as commands and completed entries are found + * following tail. + */ + u32 tail; + } fbq; + + /* engine and queue specific ops */ + int (*tail)(struct nvgpu_engine_fb_queue *queue, u32 *tail, bool set); + int (*head)(struct nvgpu_engine_fb_queue *queue, u32 *head, bool set); + + /* engine specific ops */ + int (*queue_head)(struct gk20a *g, u32 queue_id, u32 queue_index, + u32 *head, bool set); + int (*queue_tail)(struct gk20a *g, u32 queue_id, u32 queue_index, + u32 *tail, bool set); +}; + +#endif /* NVGPU_ENGINE_FB_QUEUE_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/engine_queues/engine_mem_queue.c b/drivers/gpu/nvgpu/common/engine_queues/engine_mem_queue.c new file mode 100644 index 000000000..76e178221 --- /dev/null +++ b/drivers/gpu/nvgpu/common/engine_queues/engine_mem_queue.c @@ -0,0 +1,438 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "engine_mem_queue_priv.h" +#include "engine_dmem_queue.h" +#include "engine_emem_queue.h" + +static int mem_queue_get_head_tail(struct nvgpu_engine_mem_queue *queue, + u32 *q_head, u32 *q_tail) +{ + int err = 0; + + err = queue->head(queue->g, queue->id, queue->index, + q_head, QUEUE_GET); + if (err != 0) { + nvgpu_err(queue->g, "flcn-%d, queue-%d, head GET failed", + queue->flcn_id, queue->id); + goto exit; + } + + err = queue->tail(queue->g, queue->id, queue->index, + q_tail, QUEUE_GET); + if (err != 0) { + nvgpu_err(queue->g, "flcn-%d, queue-%d, tail GET failed", + queue->flcn_id, queue->id); + goto exit; + } + + return 0; +exit: + return err; +} + +/* common falcon queue ops */ +static bool engine_mem_queue_has_room(struct nvgpu_engine_mem_queue *queue, + u32 size, bool *need_rewind) +{ + u32 q_head = 0; + u32 q_tail = 0; + u32 q_free = 0; + bool q_rewind = false; + int err = 0; + + size = NVGPU_ALIGN(size, QUEUE_ALIGNMENT); + + err = mem_queue_get_head_tail(queue, &q_head, &q_tail); + if (err != 0) { + goto exit; + } + + if (q_head >= q_tail) { + q_free = queue->offset + queue->size - q_head; + q_free -= (u32)PMU_CMD_HDR_SIZE; + + if (size > q_free) { + q_rewind = true; + q_head = queue->offset; + } + } + + if (q_head < q_tail) { + q_free = q_tail - q_head - 1U; + } + + if (need_rewind != NULL) { + *need_rewind = q_rewind; + } + +exit: + return size <= q_free; +} + +static int engine_mem_queue_rewind(struct nvgpu_falcon *flcn, + struct nvgpu_engine_mem_queue *queue) +{ + struct gk20a *g = queue->g; + struct pmu_cmd cmd; + int err = 0; + + if (queue->oflag == OFLAG_WRITE) { + cmd.hdr.unit_id = PMU_UNIT_REWIND; + cmd.hdr.size = (u8)PMU_CMD_HDR_SIZE; + err = queue->push(flcn, queue, queue->position, + &cmd, cmd.hdr.size); + if (err != 0) { + nvgpu_err(g, "flcn-%d queue-%d, rewind request failed", + queue->flcn_id, queue->id); + goto exit; + } else { + queue->position += nvgpu_safe_cast_u32_to_u8( + NVGPU_ALIGN(U32(cmd.hdr.size), QUEUE_ALIGNMENT)); + nvgpu_log_info(g, "flcn-%d queue-%d, rewinded", + queue->flcn_id, queue->id); + } + } + + /* update queue position */ + queue->position = queue->offset; + + if (queue->oflag == OFLAG_READ) { + err = queue->tail(g, queue->id, queue->index, &queue->position, + QUEUE_SET); + if (err != 0) { + nvgpu_err(g, "flcn-%d queue-%d, position SET failed", + queue->flcn_id, queue->id); + goto exit; + } + } + +exit: + return err; +} + +static int engine_mem_queue_prepare_write(struct nvgpu_falcon *flcn, + struct nvgpu_engine_mem_queue *queue, u32 size) +{ + bool q_rewind = false; + int err = 0; + + /* make sure there's enough free space for the write */ + if (!engine_mem_queue_has_room(queue, size, &q_rewind)) { + nvgpu_log_info(queue->g, "queue full: queue-id %d: index %d", + queue->id, queue->index); + err = -EAGAIN; + goto exit; + } + + err = queue->head(queue->g, queue->id, queue->index, + &queue->position, QUEUE_GET); + if (err != 0) { + nvgpu_err(queue->g, "flcn-%d queue-%d, position GET failed", + queue->flcn_id, queue->id); + goto exit; + } + + if (q_rewind) { + err = engine_mem_queue_rewind(flcn, queue); + } + +exit: + return err; +} + +/* queue public functions */ + +/* queue push operation with lock */ +int nvgpu_engine_mem_queue_push(struct nvgpu_falcon *flcn, + struct nvgpu_engine_mem_queue *queue, void *data, u32 size) +{ + struct gk20a *g; + int err = 0; + + if ((flcn == NULL) || (queue == NULL)) { + return -EINVAL; + } + + g = queue->g; + + if (queue->oflag != OFLAG_WRITE) { + nvgpu_err(g, "flcn-%d, queue-%d not opened for write", + queue->flcn_id, queue->id); + err = -EINVAL; + goto exit; + } + + /* acquire mutex */ + nvgpu_mutex_acquire(&queue->mutex); + + err = engine_mem_queue_prepare_write(flcn, queue, size); + if (err != 0) { + goto unlock_mutex; + } + + err = queue->push(flcn, queue, queue->position, data, size); + if (err != 0) { + nvgpu_err(g, "flcn-%d queue-%d, fail to write", + queue->flcn_id, queue->id); + goto unlock_mutex; + } + + queue->position += NVGPU_ALIGN(size, QUEUE_ALIGNMENT); + + err = queue->head(g, queue->id, queue->index, + &queue->position, QUEUE_SET); + if (err != 0) { + nvgpu_err(g, "flcn-%d queue-%d, position SET failed", + queue->flcn_id, queue->id); + } + +unlock_mutex: + /* release mutex */ + nvgpu_mutex_release(&queue->mutex); +exit: + return err; +} + +/* queue pop operation with lock */ +int nvgpu_engine_mem_queue_pop(struct nvgpu_falcon *flcn, + struct nvgpu_engine_mem_queue *queue, void *data, u32 size, + u32 *bytes_read) +{ + struct gk20a *g; + u32 q_tail = 0; + u32 q_head = 0; + u32 used = 0; + int err = 0; + + *bytes_read = 0; + + if ((flcn == NULL) || (queue == NULL)) { + return -EINVAL; + } + + g = queue->g; + + if (queue->oflag != OFLAG_READ) { + nvgpu_err(g, "flcn-%d, queue-%d, not opened for read", + queue->flcn_id, queue->id); + err = -EINVAL; + goto exit; + } + + /* acquire mutex */ + nvgpu_mutex_acquire(&queue->mutex); + + err = mem_queue_get_head_tail(queue, &q_head, &queue->position); + if (err != 0) { + goto unlock_mutex; + } + + q_tail = queue->position; + + if (q_head == q_tail) { + goto unlock_mutex; + } else if (q_head > q_tail) { + used = q_head - q_tail; + } else { + used = queue->offset + queue->size - q_tail; + } + + if (size > used) { + nvgpu_warn(g, "queue size smaller than request read"); + size = used; + } + + err = queue->pop(flcn, queue, q_tail, data, size); + if (err != 0) { + nvgpu_err(g, "flcn-%d queue-%d, fail to read", + queue->flcn_id, queue->id); + goto unlock_mutex; + } + + queue->position += NVGPU_ALIGN(size, QUEUE_ALIGNMENT); + + err = queue->tail(g, queue->id, queue->index, + &queue->position, QUEUE_SET); + if (err != 0) { + nvgpu_err(g, "flcn-%d queue-%d, position SET failed", + queue->flcn_id, queue->id); + goto unlock_mutex; + } + + *bytes_read = size; + +unlock_mutex: + /* release mutex */ + nvgpu_mutex_release(&queue->mutex); +exit: + return err; +} + +int nvgpu_engine_mem_queue_rewind(struct nvgpu_falcon *flcn, + struct nvgpu_engine_mem_queue *queue) +{ + int err = 0; + + if ((flcn == NULL) || (queue == NULL)) { + return -EINVAL; + } + + /* acquire mutex */ + nvgpu_mutex_acquire(&queue->mutex); + + err = engine_mem_queue_rewind(flcn, queue); + + /* release mutex */ + nvgpu_mutex_release(&queue->mutex); + + return err; +} + +/* queue is_empty check with lock */ +bool nvgpu_engine_mem_queue_is_empty(struct nvgpu_engine_mem_queue *queue) +{ + u32 q_head = 0; + u32 q_tail = 0; + int err = 0; + + if (queue == NULL) { + return true; + } + + /* acquire mutex */ + nvgpu_mutex_acquire(&queue->mutex); + + err = mem_queue_get_head_tail(queue, &q_head, &q_tail); + if (err != 0) { + goto exit; + } + +exit: + /* release mutex */ + nvgpu_mutex_release(&queue->mutex); + + return q_head == q_tail; +} + +void nvgpu_engine_mem_queue_free(struct nvgpu_engine_mem_queue **queue_p) +{ + struct nvgpu_engine_mem_queue *queue = NULL; + struct gk20a *g; + + if ((queue_p == NULL) || (*queue_p == NULL)) { + return; + } + + queue = *queue_p; + + g = queue->g; + + nvgpu_log_info(g, "flcn id-%d q-id %d: index %d ", + queue->flcn_id, queue->id, queue->index); + + /* destroy mutex */ + nvgpu_mutex_destroy(&queue->mutex); + + nvgpu_kfree(g, queue); + *queue_p = NULL; +} + +u32 nvgpu_engine_mem_queue_get_size(struct nvgpu_engine_mem_queue *queue) +{ + return queue->size; +} + +int nvgpu_engine_mem_queue_init(struct nvgpu_engine_mem_queue **queue_p, + struct nvgpu_engine_mem_queue_params params) +{ + struct nvgpu_engine_mem_queue *queue = NULL; + struct gk20a *g = params.g; + int err = 0; + + if (queue_p == NULL) { + return -EINVAL; + } + + queue = (struct nvgpu_engine_mem_queue *) + nvgpu_kmalloc(g, sizeof(struct nvgpu_engine_mem_queue)); + + if (queue == NULL) { + return -ENOMEM; + } + + queue->g = params.g; + queue->flcn_id = params.flcn_id; + queue->id = params.id; + queue->index = params.index; + queue->offset = params.offset; + queue->position = params.position; + queue->size = params.size; + queue->oflag = params.oflag; + queue->queue_type = params.queue_type; + + queue->head = params.queue_head; + queue->tail = params.queue_tail; + + nvgpu_log_info(g, + "flcn id-%d q-id %d: index %d, offset 0x%08x, size 0x%08x", + queue->flcn_id, queue->id, queue->index, + queue->offset, queue->size); + + switch (queue->queue_type) { + case QUEUE_TYPE_DMEM: + engine_dmem_queue_init(queue); + break; +#ifdef CONFIG_NVGPU_DGPU + case QUEUE_TYPE_EMEM: + engine_emem_queue_init(queue); + break; +#endif + default: + err = -EINVAL; + break; + } + + if (err != 0) { + nvgpu_err(g, "flcn-%d queue-%d, init failed", + queue->flcn_id, queue->id); + nvgpu_kfree(g, queue); + goto exit; + } + + /* init mutex */ + nvgpu_mutex_init(&queue->mutex); + + *queue_p = queue; +exit: + return err; +} diff --git a/drivers/gpu/nvgpu/common/engine_queues/engine_mem_queue_priv.h b/drivers/gpu/nvgpu/common/engine_queues/engine_mem_queue_priv.h new file mode 100644 index 000000000..735406b21 --- /dev/null +++ b/drivers/gpu/nvgpu/common/engine_queues/engine_mem_queue_priv.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_ENGINE_MEM_QUEUE_PRIV_H +#define NVGPU_ENGINE_MEM_QUEUE_PRIV_H + +#include +#include + +struct gk20a; +struct nvgpu_falcon; + +struct nvgpu_engine_mem_queue { + struct gk20a *g; + u32 flcn_id; + + /* Queue Type (queue_type) */ + u8 queue_type; + + /* used by nvgpu, for command LPQ/HPQ */ + struct nvgpu_mutex mutex; + + /* current write position */ + u32 position; + /* physical dmem offset where this queue begins */ + u32 offset; + /* logical queue identifier */ + u32 id; + /* physical queue index */ + u32 index; + /* in bytes */ + u32 size; + /* open-flag */ + u32 oflag; + + /* queue type(DMEM-Q/EMEM-Q) specific ops */ + int (*push)(struct nvgpu_falcon *flcn, + struct nvgpu_engine_mem_queue *queue, + u32 dst, void *data, u32 size); + int (*pop)(struct nvgpu_falcon *flcn, + struct nvgpu_engine_mem_queue *queue, + u32 src, void *data, u32 size); + + /* engine specific ops */ + int (*head)(struct gk20a *g, u32 queue_id, u32 queue_index, + u32 *head, bool set); + int (*tail)(struct gk20a *g, u32 queue_id, u32 queue_index, + u32 *tail, bool set); +}; + +#endif /* NVGPU_ENGINE_MEM_QUEUE_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/falcon/falcon.c b/drivers/gpu/nvgpu/common/falcon/falcon.c new file mode 100644 index 000000000..7da45f087 --- /dev/null +++ b/drivers/gpu/nvgpu/common/falcon/falcon.c @@ -0,0 +1,825 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +#include "falcon_sw_gk20a.h" +#ifdef CONFIG_NVGPU_DGPU +#include "falcon_sw_tu104.h" +#endif + +#if defined(CONFIG_NVGPU_NEXT) && defined(CONFIG_NVGPU_NON_FUSA) +#include "nvgpu_next_gpuid.h" +#endif + +static bool is_falcon_valid(struct nvgpu_falcon *flcn) +{ + if (flcn == NULL) { + return false; + } + + if (!flcn->is_falcon_supported) { + nvgpu_err(flcn->g, "Falcon %d not supported", flcn->flcn_id); + return false; + } + + return true; +} + +u32 nvgpu_falcon_readl(struct nvgpu_falcon *flcn, u32 offset) +{ + return nvgpu_readl(flcn->g, + nvgpu_safe_add_u32(flcn->flcn_base, offset)); +} + +void nvgpu_falcon_writel(struct nvgpu_falcon *flcn, + u32 offset, u32 val) +{ + nvgpu_writel(flcn->g, nvgpu_safe_add_u32(flcn->flcn_base, offset), val); +} + +int nvgpu_falcon_reset(struct nvgpu_falcon *flcn) +{ + struct gk20a *g; + int status = 0; + + if (!is_falcon_valid(flcn)) { + return -EINVAL; + } + + g = flcn->g; + + if (flcn->flcn_engine_dep_ops.reset_eng != NULL) { + /* falcon & engine reset */ + status = flcn->flcn_engine_dep_ops.reset_eng(g); + } else { + g->ops.falcon.reset(flcn); + } + + if (status == 0) { + status = nvgpu_falcon_mem_scrub_wait(flcn); + } + + return status; +} + +int nvgpu_falcon_wait_for_halt(struct nvgpu_falcon *flcn, unsigned int timeout) +{ + struct nvgpu_timeout to; + struct gk20a *g; + int status; + + if (!is_falcon_valid(flcn)) { + return -EINVAL; + } + + g = flcn->g; + + status = nvgpu_timeout_init(g, &to, timeout, NVGPU_TIMER_CPU_TIMER); + if (status != 0) { + return status; + } + + do { + if (g->ops.falcon.is_falcon_cpu_halted(flcn)) { + break; + } + + nvgpu_udelay(10); + } while (nvgpu_timeout_expired(&to) == 0); + + if (nvgpu_timeout_peek_expired(&to)) { + status = -ETIMEDOUT; + } + + return status; +} + +int nvgpu_falcon_wait_idle(struct nvgpu_falcon *flcn) +{ + struct nvgpu_timeout timeout; + struct gk20a *g; + int status; + + if (!is_falcon_valid(flcn)) { + return -EINVAL; + } + + g = flcn->g; + + status = nvgpu_timeout_init(g, &timeout, 2000, NVGPU_TIMER_RETRY_TIMER); + if (status != 0) { + return status; + } + + /* wait for falcon idle */ + do { + if (g->ops.falcon.is_falcon_idle(flcn)) { + break; + } + + if (nvgpu_timeout_expired_msg(&timeout, + "waiting for falcon idle") != 0) { + return -ETIMEDOUT; + } + + nvgpu_usleep_range(100, 200); + } while (true); + + return 0; +} + +int nvgpu_falcon_mem_scrub_wait(struct nvgpu_falcon *flcn) +{ + struct nvgpu_timeout timeout; + /** + * Delay depends on memory size and pwr_clk + * delay = (MAX {IMEM_SIZE, DMEM_SIZE} * 64 + 1) / pwr_clk + * Timeout set is 1msec & status check at interval 10usec + */ + const u32 mem_scrubbing_max_timeout = 1000U; + const u32 mem_scrubbing_default_timeout = 10U; + struct gk20a *g; + int status; + + if (!is_falcon_valid(flcn)) { + return -EINVAL; + } + + g = flcn->g; + + /* check IMEM/DMEM scrubbing complete status */ + status = nvgpu_timeout_init(g, &timeout, + mem_scrubbing_max_timeout / + mem_scrubbing_default_timeout, + NVGPU_TIMER_RETRY_TIMER); + if (status != 0) { + return status; + } + + do { + if (g->ops.falcon.is_falcon_scrubbing_done(flcn)) { + break; + } + nvgpu_udelay(mem_scrubbing_default_timeout); + } while (nvgpu_timeout_expired(&timeout) == 0); + + if (nvgpu_timeout_peek_expired(&timeout)) { + status = -ETIMEDOUT; + } + + return status; +} + +static int falcon_memcpy_params_check(struct nvgpu_falcon *flcn, + u32 offset, u32 size, enum falcon_mem_type mem_type, u8 port) +{ + struct gk20a *g = flcn->g; + u32 mem_size = 0; + int ret = -EINVAL; + + if (size == 0U) { + nvgpu_err(g, "size is zero"); + goto exit; + } + + if ((offset & 0x3U) != 0U) { + nvgpu_err(g, "offset (0x%08x) not 4-byte aligned", offset); + goto exit; + } + + if (port >= g->ops.falcon.get_ports_count(flcn, mem_type)) { + nvgpu_err(g, "invalid port %u", (u32) port); + goto exit; + } + + mem_size = g->ops.falcon.get_mem_size(flcn, mem_type); + + if (!((offset < mem_size) && ((offset + size) <= mem_size))) { + nvgpu_err(g, "flcn-id 0x%x, copy overflow ", + flcn->flcn_id); + nvgpu_err(g, "total size 0x%x, offset 0x%x, copy size 0x%x", + mem_size, offset, size); + ret = -EINVAL; + goto exit; + } + + ret = 0; + +exit: + return ret; +} + +int nvgpu_falcon_copy_to_dmem(struct nvgpu_falcon *flcn, + u32 dst, u8 *src, u32 size, u8 port) +{ + int status = -EINVAL; + struct gk20a *g; + + if (!is_falcon_valid(flcn)) { + return -EINVAL; + } + + g = flcn->g; + + if (falcon_memcpy_params_check(flcn, dst, size, MEM_DMEM, port) != 0) { + nvgpu_err(g, "incorrect parameters"); + goto exit; + } + + nvgpu_mutex_acquire(&flcn->dmem_lock); + status = g->ops.falcon.copy_to_dmem(flcn, dst, src, size, port); + nvgpu_mutex_release(&flcn->dmem_lock); + +exit: + return status; +} + +int nvgpu_falcon_copy_to_imem(struct nvgpu_falcon *flcn, + u32 dst, u8 *src, u32 size, u8 port, bool sec, u32 tag) +{ + int status = -EINVAL; + struct gk20a *g; + + if (!is_falcon_valid(flcn)) { + return -EINVAL; + } + + g = flcn->g; + + if (falcon_memcpy_params_check(flcn, dst, size, MEM_IMEM, port) != 0) { + nvgpu_err(g, "incorrect parameters"); + goto exit; + } + + nvgpu_mutex_acquire(&flcn->imem_lock); + status = g->ops.falcon.copy_to_imem(flcn, dst, src, + size, port, sec, tag); + nvgpu_mutex_release(&flcn->imem_lock); + +exit: + return status; +} + +u32 nvgpu_falcon_mailbox_read(struct nvgpu_falcon *flcn, u32 mailbox_index) +{ + struct gk20a *g; + u32 data = 0; + + if (!is_falcon_valid(flcn)) { + return 0; + } + + g = flcn->g; + + if (mailbox_index >= FALCON_MAILBOX_COUNT) { + nvgpu_err(g, "incorrect mailbox id %d", mailbox_index); + goto exit; + } + + data = g->ops.falcon.mailbox_read(flcn, mailbox_index); + +exit: + return data; +} + +void nvgpu_falcon_mailbox_write(struct nvgpu_falcon *flcn, u32 mailbox_index, + u32 data) +{ + struct gk20a *g; + + if (!is_falcon_valid(flcn)) { + return; + } + + g = flcn->g; + + if (mailbox_index >= FALCON_MAILBOX_COUNT) { + nvgpu_err(g, "incorrect mailbox id %d", mailbox_index); + goto exit; + } + + g->ops.falcon.mailbox_write(flcn, mailbox_index, data); + +exit: + return; +} + +int nvgpu_falcon_hs_ucode_load_bootstrap(struct nvgpu_falcon *flcn, u32 *ucode, + u32 *ucode_header) +{ + struct gk20a *g; + u32 sec_imem_dest = 0U; + int err = 0; + + if (!is_falcon_valid(flcn)) { + return -EINVAL; + } + + g = flcn->g; + + /* falcon reset */ + err = nvgpu_falcon_reset(flcn); + if (err != 0) { + nvgpu_err(g, "nvgpu_falcon_reset() failed err=%d", err); + return err; + } + + /* setup falcon apertures, boot-config */ + if (flcn->flcn_engine_dep_ops.setup_bootstrap_config != NULL) { + flcn->flcn_engine_dep_ops.setup_bootstrap_config(flcn->g); + } + + /* Copy Non Secure IMEM code */ + err = nvgpu_falcon_copy_to_imem(flcn, 0U, + (u8 *)&ucode[ucode_header[OS_CODE_OFFSET] >> 2U], + ucode_header[OS_CODE_SIZE], 0U, false, + GET_IMEM_TAG(ucode_header[OS_CODE_OFFSET])); + if (err != 0) { + nvgpu_err(g, "HS ucode non-secure code to IMEM failed"); + goto exit; + } + + /* Put secure code after non-secure block */ + sec_imem_dest = GET_NEXT_BLOCK(ucode_header[OS_CODE_SIZE]); + + err = nvgpu_falcon_copy_to_imem(flcn, sec_imem_dest, + (u8 *)&ucode[ucode_header[APP_0_CODE_OFFSET] >> 2U], + ucode_header[APP_0_CODE_SIZE], 0U, true, + GET_IMEM_TAG(ucode_header[APP_0_CODE_OFFSET])); + if (err != 0) { + nvgpu_err(g, "HS ucode secure code to IMEM failed"); + goto exit; + } + + /* load DMEM: ensure that signatures are patched */ + err = nvgpu_falcon_copy_to_dmem(flcn, 0U, (u8 *)&ucode[ + ucode_header[OS_DATA_OFFSET] >> 2U], + ucode_header[OS_DATA_SIZE], 0U); + if (err != 0) { + nvgpu_err(g, "HS ucode data copy to DMEM failed"); + goto exit; + } + + /* + * Write non-zero value to mailbox register which is updated by + * HS bin to denote its return status. + */ + nvgpu_falcon_mailbox_write(flcn, FALCON_MAILBOX_0, 0xdeadbeefU); + + /* set BOOTVEC to start of non-secure code */ + g->ops.falcon.bootstrap(flcn, 0U); + +exit: + return err; +} + +u32 nvgpu_falcon_get_id(struct nvgpu_falcon *flcn) +{ + return flcn->flcn_id; +} + +#if defined(CONFIG_NVGPU_NEXT) +bool nvgpu_falcon_is_falcon2_enabled(struct nvgpu_falcon *flcn) +{ + return flcn->is_falcon2_enabled ? true : false; +} + +bool nvgpu_falcon_is_feature_supported(struct nvgpu_falcon *flcn, + u32 feature) +{ + return nvgpu_test_bit(feature, (void *)&flcn->fuse_settings); +} +#endif + +struct nvgpu_falcon *nvgpu_falcon_get_instance(struct gk20a *g, u32 flcn_id) +{ + struct nvgpu_falcon *flcn = NULL; + + switch (flcn_id) { + case FALCON_ID_PMU: + flcn = &g->pmu_flcn; + break; + case FALCON_ID_FECS: + flcn = &g->fecs_flcn; + break; + case FALCON_ID_GPCCS: + flcn = &g->gpccs_flcn; + break; + case FALCON_ID_GSPLITE: + flcn = &g->gsp_flcn; + break; + case FALCON_ID_NVDEC: + flcn = &g->nvdec_flcn; + break; + case FALCON_ID_SEC2: + flcn = &g->sec2.flcn; + break; + case FALCON_ID_MINION: + flcn = &g->minion_flcn; + break; + default: + nvgpu_err(g, "Invalid/Unsupported falcon ID %x", flcn_id); + break; + }; + + return flcn; +} + +static int falcon_sw_chip_init(struct gk20a *g, struct nvgpu_falcon *flcn) +{ + u32 ver = nvgpu_safe_add_u32(g->params.gpu_arch, g->params.gpu_impl); + int err = 0; + + switch (ver) { +#ifdef CONFIG_NVGPU_NON_FUSA + case GK20A_GPUID_GM20B: + case GK20A_GPUID_GM20B_B: + gk20a_falcon_sw_init(flcn); + break; + case NVGPU_GPUID_GP10B: + gk20a_falcon_sw_init(flcn); + break; +#if defined(CONFIG_NVGPU_NEXT) + case NVGPU_NEXT_GPUID: + nvgpu_next_falcon_sw_init(flcn); + break; +#endif +#endif + case NVGPU_GPUID_GV11B: + gk20a_falcon_sw_init(flcn); + break; +#ifdef CONFIG_NVGPU_DGPU + case NVGPU_GPUID_TU104: +#if defined(CONFIG_NVGPU_NEXT) + case NVGPU_NEXT_DGPU_GPUID: +#endif + tu104_falcon_sw_init(flcn); + break; +#endif + default: + err = -EINVAL; + nvgpu_err(g, "no support for GPUID %x", ver); + break; + } + + return err; +} + +int nvgpu_falcon_sw_init(struct gk20a *g, u32 flcn_id) +{ + struct nvgpu_falcon *flcn = NULL; + int err = 0; + + flcn = nvgpu_falcon_get_instance(g, flcn_id); + if (flcn == NULL) { + return -ENODEV; + } + + flcn->flcn_id = flcn_id; + flcn->g = g; + + /* call SW init methods to assign flcn base & support of a falcon */ + err = falcon_sw_chip_init(g, flcn); + if (err != 0) { + nvgpu_err(g, "Chip specific falcon sw init failed %d", err); + return err; + } + + nvgpu_mutex_init(&flcn->imem_lock); + nvgpu_mutex_init(&flcn->dmem_lock); + +#ifdef CONFIG_NVGPU_DGPU + if (flcn->emem_supported) { + nvgpu_mutex_init(&flcn->emem_lock); + } +#endif + + return 0; +} + +void nvgpu_falcon_sw_free(struct gk20a *g, u32 flcn_id) +{ + struct nvgpu_falcon *flcn = NULL; + + flcn = nvgpu_falcon_get_instance(g, flcn_id); + if (flcn == NULL) { + return; + } + + if (flcn->is_falcon_supported) { + flcn->is_falcon_supported = false; + } else { + nvgpu_log_info(g, "falcon 0x%x not supported on %s", + flcn->flcn_id, g->name); + return; + } + +#ifdef CONFIG_NVGPU_DGPU + if (flcn->emem_supported) { + nvgpu_mutex_destroy(&flcn->emem_lock); + } +#endif + nvgpu_mutex_destroy(&flcn->dmem_lock); + nvgpu_mutex_destroy(&flcn->imem_lock); +} + +void nvgpu_falcon_set_irq(struct nvgpu_falcon *flcn, bool enable, + u32 intr_mask, u32 intr_dest) +{ + struct gk20a *g; + + if (!is_falcon_valid(flcn)) { + return; + } + + g = flcn->g; + + if (!flcn->is_interrupt_enabled) { + nvgpu_warn(g, "Interrupt not supported on flcn 0x%x ", + flcn->flcn_id); + return; + } + + g->ops.falcon.set_irq(flcn, enable, intr_mask, intr_dest); +} + +#ifdef CONFIG_NVGPU_DGPU +int nvgpu_falcon_copy_from_emem(struct nvgpu_falcon *flcn, + u32 src, u8 *dst, u32 size, u8 port) +{ + struct nvgpu_falcon_engine_dependency_ops *flcn_dops; + int status = -EINVAL; + struct gk20a *g; + + if (!is_falcon_valid(flcn)) { + return -EINVAL; + } + + g = flcn->g; + flcn_dops = &flcn->flcn_engine_dep_ops; + + if (flcn_dops->copy_from_emem != NULL) { + nvgpu_mutex_acquire(&flcn->emem_lock); + status = flcn_dops->copy_from_emem(g, src, dst, size, port); + nvgpu_mutex_release(&flcn->emem_lock); + } else { + nvgpu_warn(g, "Invalid op on falcon 0x%x ", + flcn->flcn_id); + goto exit; + } + +exit: + return status; +} + +int nvgpu_falcon_copy_to_emem(struct nvgpu_falcon *flcn, + u32 dst, u8 *src, u32 size, u8 port) +{ + struct nvgpu_falcon_engine_dependency_ops *flcn_dops; + int status = -EINVAL; + struct gk20a *g; + + if (!is_falcon_valid(flcn)) { + return -EINVAL; + } + + g = flcn->g; + flcn_dops = &flcn->flcn_engine_dep_ops; + + if (flcn_dops->copy_to_emem != NULL) { + nvgpu_mutex_acquire(&flcn->emem_lock); + status = flcn_dops->copy_to_emem(g, dst, src, size, port); + nvgpu_mutex_release(&flcn->emem_lock); + } else { + nvgpu_warn(g, "Invalid op on falcon 0x%x ", + flcn->flcn_id); + goto exit; + } + +exit: + return status; +} +#endif + +#ifdef CONFIG_NVGPU_FALCON_DEBUG +void nvgpu_falcon_dump_stats(struct nvgpu_falcon *flcn) +{ + if (!is_falcon_valid(flcn)) { + return; + } + + flcn->g->ops.falcon.dump_falcon_stats(flcn); +} +#endif + +#ifdef CONFIG_NVGPU_FALCON_NON_FUSA +int nvgpu_falcon_bootstrap(struct nvgpu_falcon *flcn, u32 boot_vector) +{ + if (!is_falcon_valid(flcn)) { + return -EINVAL; + } + + flcn->g->ops.falcon.bootstrap(flcn, boot_vector); + + return 0; +} + +int nvgpu_falcon_get_mem_size(struct nvgpu_falcon *flcn, + enum falcon_mem_type type, u32 *size) +{ + if (!is_falcon_valid(flcn)) { + return -EINVAL; + } + + *size = flcn->g->ops.falcon.get_mem_size(flcn, type); + + return 0; +} + +int nvgpu_falcon_clear_halt_intr_status(struct nvgpu_falcon *flcn, + unsigned int timeout) +{ + struct nvgpu_timeout to; + struct gk20a *g; + int status; + + if (!is_falcon_valid(flcn)) { + return -EINVAL; + } + + g = flcn->g; + + status = nvgpu_timeout_init(g, &to, timeout, NVGPU_TIMER_CPU_TIMER); + if (status != 0) { + return status; + } + + do { + if (g->ops.falcon.clear_halt_interrupt_status(flcn)) { + break; + } + + nvgpu_udelay(1); + } while (nvgpu_timeout_expired(&to) == 0); + + if (nvgpu_timeout_peek_expired(&to)) { + status = -ETIMEDOUT; + } + + return status; +} + +int nvgpu_falcon_copy_from_dmem(struct nvgpu_falcon *flcn, + u32 src, u8 *dst, u32 size, u8 port) +{ + int status = -EINVAL; + struct gk20a *g; + + if (!is_falcon_valid(flcn)) { + return -EINVAL; + } + + g = flcn->g; + + if (falcon_memcpy_params_check(flcn, src, size, MEM_DMEM, port) != 0) { + nvgpu_err(g, "incorrect parameters"); + goto exit; + } + + nvgpu_mutex_acquire(&flcn->dmem_lock); + status = g->ops.falcon.copy_from_dmem(flcn, src, dst, size, port); + nvgpu_mutex_release(&flcn->dmem_lock); + +exit: + return status; +} + +int nvgpu_falcon_copy_from_imem(struct nvgpu_falcon *flcn, + u32 src, u8 *dst, u32 size, u8 port) +{ + int status = -EINVAL; + struct gk20a *g; + + if (!is_falcon_valid(flcn)) { + return -EINVAL; + } + + g = flcn->g; + + if (falcon_memcpy_params_check(flcn, src, size, MEM_IMEM, port) != 0) { + nvgpu_err(g, "incorrect parameters"); + goto exit; + } + + nvgpu_mutex_acquire(&flcn->imem_lock); + status = g->ops.falcon.copy_from_imem(flcn, src, dst, size, port); + nvgpu_mutex_release(&flcn->imem_lock); + +exit: + return status; +} + +static void falcon_print_mem(struct nvgpu_falcon *flcn, u32 src, + u32 size, enum falcon_mem_type mem_type) +{ + u32 buff[64] = {0}; + u32 total_block_read = 0; + u32 byte_read_count = 0; + struct gk20a *g; + u32 i = 0; + int status = 0; + + g = flcn->g; + + if (falcon_memcpy_params_check(flcn, src, size, mem_type, 0) != 0) { + nvgpu_err(g, "incorrect parameters"); + return; + } + + nvgpu_info(g, " offset 0x%x size %d bytes", src, size); + + total_block_read = size >> 8; + do { + byte_read_count = + (total_block_read != 0U) ? (u32)sizeof(buff) : size; + + if (byte_read_count == 0U) { + break; + } + + if (mem_type == MEM_DMEM) { + status = nvgpu_falcon_copy_from_dmem(flcn, src, + (u8 *)buff, byte_read_count, 0); + } else { + status = nvgpu_falcon_copy_from_imem(flcn, src, + (u8 *)buff, byte_read_count, 0); + } + + if (status != 0) { + nvgpu_err(g, "MEM print failed"); + break; + } + + for (i = 0U; i < (byte_read_count >> 2U); i += 4U) { + nvgpu_info(g, "0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x", + src + (i << 2U), buff[i], buff[i+1U], + buff[i+2U], buff[i+3U]); + } + + src += byte_read_count; + size -= byte_read_count; + } while (total_block_read-- != 0U); +} + +void nvgpu_falcon_print_dmem(struct nvgpu_falcon *flcn, u32 src, u32 size) +{ + if (!is_falcon_valid(flcn)) { + return; + } + + nvgpu_info(flcn->g, " PRINT DMEM "); + falcon_print_mem(flcn, src, size, MEM_DMEM); +} + +void nvgpu_falcon_print_imem(struct nvgpu_falcon *flcn, u32 src, u32 size) +{ + if (!is_falcon_valid(flcn)) { + return; + } + + nvgpu_info(flcn->g, " PRINT IMEM "); + falcon_print_mem(flcn, src, size, MEM_IMEM); +} + +void nvgpu_falcon_get_ctls(struct nvgpu_falcon *flcn, u32 *sctl, u32 *cpuctl) +{ + if (!is_falcon_valid(flcn)) { + return; + } + + flcn->g->ops.falcon.get_falcon_ctls(flcn, sctl, cpuctl); +} +#endif diff --git a/drivers/gpu/nvgpu/common/falcon/falcon_sw_gk20a.c b/drivers/gpu/nvgpu/common/falcon/falcon_sw_gk20a.c new file mode 100644 index 000000000..2f9ed80ed --- /dev/null +++ b/drivers/gpu/nvgpu/common/falcon/falcon_sw_gk20a.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include + +#include "falcon_sw_gk20a.h" + +void gk20a_falcon_engine_dependency_ops(struct nvgpu_falcon *flcn) +{ + struct gk20a *g = flcn->g; + struct nvgpu_falcon_engine_dependency_ops *flcn_eng_dep_ops = + &flcn->flcn_engine_dep_ops; + + switch (flcn->flcn_id) { + case FALCON_ID_PMU: + flcn_eng_dep_ops->reset_eng = g->ops.pmu.pmu_reset; + flcn_eng_dep_ops->setup_bootstrap_config = + g->ops.pmu.flcn_setup_boot_config; + break; + default: + /* NULL assignment make sure + * CPU hard reset in gk20a_falcon_reset() gets execute + * if falcon doesn't need specific reset implementation + */ + flcn_eng_dep_ops->reset_eng = NULL; + break; + } +} + +void gk20a_falcon_sw_init(struct nvgpu_falcon *flcn) +{ + struct gk20a *g = flcn->g; + + switch (flcn->flcn_id) { + case FALCON_ID_PMU: + flcn->flcn_base = g->ops.pmu.falcon_base_addr(); + flcn->is_falcon_supported = true; + flcn->is_interrupt_enabled = true; + break; + case FALCON_ID_FECS: + flcn->flcn_base = g->ops.gr.falcon.fecs_base_addr(); + flcn->is_falcon_supported = true; + flcn->is_interrupt_enabled = false; + break; + case FALCON_ID_GPCCS: + flcn->flcn_base = g->ops.gr.falcon.gpccs_base_addr(); + flcn->is_falcon_supported = true; + flcn->is_interrupt_enabled = false; + break; + default: + flcn->is_falcon_supported = false; + break; + } + + if (flcn->is_falcon_supported) { + gk20a_falcon_engine_dependency_ops(flcn); + } else { + nvgpu_log_info(g, "falcon 0x%x not supported on %s", + flcn->flcn_id, g->name); + } +} diff --git a/drivers/gpu/nvgpu/common/falcon/falcon_sw_gk20a.h b/drivers/gpu/nvgpu/common/falcon/falcon_sw_gk20a.h new file mode 100644 index 000000000..856de51af --- /dev/null +++ b/drivers/gpu/nvgpu/common/falcon/falcon_sw_gk20a.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_FALCON_SW_GK20A_H +#define NVGPU_FALCON_SW_GK20A_H + +void gk20a_falcon_engine_dependency_ops(struct nvgpu_falcon *flcn); +void gk20a_falcon_sw_init(struct nvgpu_falcon *flcn); + +#endif /* NVGPU_FALCON_SW_GK20A_H */ diff --git a/drivers/gpu/nvgpu/common/falcon/falcon_sw_tu104.c b/drivers/gpu/nvgpu/common/falcon/falcon_sw_tu104.c new file mode 100644 index 000000000..fc92380d6 --- /dev/null +++ b/drivers/gpu/nvgpu/common/falcon/falcon_sw_tu104.c @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include + +#include "falcon_sw_gk20a.h" +#include "falcon_sw_tu104.h" + +void tu104_falcon_engine_dependency_ops(struct nvgpu_falcon *flcn) +{ + struct nvgpu_falcon_engine_dependency_ops *flcn_eng_dep_ops = + &flcn->flcn_engine_dep_ops; + struct gk20a *g = flcn->g; + + gk20a_falcon_engine_dependency_ops(flcn); + + switch (flcn->flcn_id) { + case FALCON_ID_GSPLITE: + flcn_eng_dep_ops->reset_eng = g->ops.gsp.gsp_reset; + flcn_eng_dep_ops->setup_bootstrap_config = + g->ops.gsp.falcon_setup_boot_config; + break; + case FALCON_ID_SEC2: + flcn_eng_dep_ops->reset_eng = g->ops.sec2.sec2_reset; + flcn_eng_dep_ops->setup_bootstrap_config = + g->ops.sec2.flcn_setup_boot_config; + flcn_eng_dep_ops->copy_to_emem = g->ops.sec2.sec2_copy_to_emem; + flcn_eng_dep_ops->copy_from_emem = + g->ops.sec2.sec2_copy_from_emem; + break; + default: + flcn_eng_dep_ops->reset_eng = NULL; + break; + } +} + +void tu104_falcon_sw_init(struct nvgpu_falcon *flcn) +{ + struct gk20a *g = flcn->g; + + switch (flcn->flcn_id) { + case FALCON_ID_GSPLITE: + flcn->flcn_base = g->ops.gsp.falcon_base_addr(); + flcn->is_falcon_supported = true; + flcn->is_interrupt_enabled = false; + break; + case FALCON_ID_SEC2: + flcn->flcn_base = g->ops.sec2.falcon_base_addr(); + flcn->is_falcon_supported = true; + flcn->is_interrupt_enabled = true; + flcn->emem_supported = true; + break; + case FALCON_ID_MINION: + flcn->flcn_base = g->ops.nvlink.minion.base_addr(g); + flcn->is_falcon_supported = true; + flcn->is_interrupt_enabled = true; + break; + case FALCON_ID_NVDEC: + flcn->flcn_base = g->ops.nvdec.falcon_base_addr(); + flcn->is_falcon_supported = true; + flcn->is_interrupt_enabled = true; + break; + default: + /* + * set false to inherit falcon support + * from previous chips HAL + */ + flcn->is_falcon_supported = false; + break; + } + + if (flcn->is_falcon_supported) { + tu104_falcon_engine_dependency_ops(flcn); + } else { + /* + * Forward call to previous chip's SW init + * to fetch info for requested + * falcon as no changes between + * current & previous chips. + */ + gk20a_falcon_sw_init(flcn); + } +} diff --git a/drivers/gpu/nvgpu/common/falcon/falcon_sw_tu104.h b/drivers/gpu/nvgpu/common/falcon/falcon_sw_tu104.h new file mode 100644 index 000000000..72556c0f3 --- /dev/null +++ b/drivers/gpu/nvgpu/common/falcon/falcon_sw_tu104.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_FALCON_SW_TU104_H +#define NVGPU_FALCON_SW_TU104_H + +void tu104_falcon_engine_dependency_ops(struct nvgpu_falcon *flcn); +void tu104_falcon_sw_init(struct nvgpu_falcon *flcn); + +#endif /* NVGPU_FALCON_SW_TU104_H */ diff --git a/drivers/gpu/nvgpu/common/fb/fb.c b/drivers/gpu/nvgpu/common/fb/fb.c new file mode 100644 index 000000000..d82568589 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fb/fb.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +int nvgpu_init_fb_support(struct gk20a *g) +{ + if (g->ops.mc.fb_reset != NULL) { + g->ops.mc.fb_reset(g); + } + + nvgpu_cg_slcg_fb_ltc_load_enable(g); + + nvgpu_cg_blcg_fb_ltc_load_enable(g); + + if (g->ops.fb.init_fs_state != NULL) { + g->ops.fb.init_fs_state(g); + } + return 0; +} diff --git a/drivers/gpu/nvgpu/common/fbp/fbp.c b/drivers/gpu/nvgpu/common/fbp/fbp.c new file mode 100644 index 000000000..2057f108e --- /dev/null +++ b/drivers/gpu/nvgpu/common/fbp/fbp.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "fbp_priv.h" + +int nvgpu_fbp_init_support(struct gk20a *g) +{ + struct nvgpu_fbp *fbp; + u32 fbp_en_mask; +#ifdef CONFIG_NVGPU_NON_FUSA + u32 max_ltc_per_fbp; + u32 rop_l2_all_en; + unsigned long i; + unsigned long fbp_en_mask_tmp; + u32 tmp; +#endif + + if (g->fbp != NULL) { + return 0; + } + + fbp = nvgpu_kzalloc(g, sizeof(*fbp)); + if (fbp == NULL) { + return -ENOMEM; + } + +#ifdef CONFIG_NVGPU_NON_FUSA + fbp->num_fbps = g->ops.priv_ring.get_fbp_count(g); + nvgpu_log_info(g, "fbps: %d", fbp->num_fbps); +#endif + + fbp->max_fbps_count = g->ops.top.get_max_fbps_count(g); + nvgpu_log_info(g, "max_fbps_count: %d", fbp->max_fbps_count); + + /* + * Read active fbp mask from fuse + * Note that 0:enable and 1:disable in value read from fuse so we've to + * flip the bits. + * Also set unused bits to zero + */ + fbp_en_mask = g->ops.fuse.fuse_status_opt_fbp(g); + fbp_en_mask = ~fbp_en_mask; + fbp_en_mask = fbp_en_mask & + nvgpu_safe_sub_u32(BIT32(fbp->max_fbps_count), 1U); + fbp->fbp_en_mask = fbp_en_mask; + +#ifdef CONFIG_NVGPU_NON_FUSA + fbp->fbp_rop_l2_en_mask = + nvgpu_kzalloc(g, + nvgpu_safe_mult_u64(fbp->max_fbps_count, sizeof(u32))); + if (fbp->fbp_rop_l2_en_mask == NULL) { + nvgpu_kfree(g, fbp); + return -ENOMEM; + } + + fbp_en_mask_tmp = fbp_en_mask; + max_ltc_per_fbp = g->ops.top.get_max_ltc_per_fbp(g); + rop_l2_all_en = nvgpu_safe_sub_u32(BIT32(max_ltc_per_fbp), 1U); + + /* mask of Rop_L2 for each FBP */ + for_each_set_bit(i, &fbp_en_mask_tmp, fbp->max_fbps_count) { + tmp = g->ops.fuse.fuse_status_opt_rop_l2_fbp(g, i); + fbp->fbp_rop_l2_en_mask[i] = rop_l2_all_en ^ tmp; + } +#endif + + g->fbp = fbp; + + return 0; +} + +void nvgpu_fbp_remove_support(struct gk20a *g) +{ + struct nvgpu_fbp *fbp = g->fbp; + + if (fbp != NULL) { + nvgpu_kfree(g, fbp->fbp_rop_l2_en_mask); + nvgpu_kfree(g, fbp); + } + + g->fbp = NULL; +} + +u32 nvgpu_fbp_get_max_fbps_count(struct nvgpu_fbp *fbp) +{ + return fbp->max_fbps_count; +} + +u32 nvgpu_fbp_get_fbp_en_mask(struct nvgpu_fbp *fbp) +{ + return fbp->fbp_en_mask; +} + +#ifdef CONFIG_NVGPU_NON_FUSA +u32 nvgpu_fbp_get_num_fbps(struct nvgpu_fbp *fbp) +{ + return fbp->num_fbps; +} + +u32 *nvgpu_fbp_get_rop_l2_en_mask(struct nvgpu_fbp *fbp) +{ + return fbp->fbp_rop_l2_en_mask; +} +#endif + diff --git a/drivers/gpu/nvgpu/common/fbp/fbp_priv.h b/drivers/gpu/nvgpu/common/fbp/fbp_priv.h new file mode 100644 index 000000000..1442e4776 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fbp/fbp_priv.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_FBP_PRIV_H +#define NVGPU_FBP_PRIV_H + +struct nvgpu_fbp { + u32 num_fbps; + u32 max_fbps_count; + u32 fbp_en_mask; + u32 *fbp_rop_l2_en_mask; +}; + +#endif /* NVGPU_FBP_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/fence/fence.c b/drivers/gpu/nvgpu/common/fence/fence.c new file mode 100644 index 000000000..1b56c4040 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fence/fence.c @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "fence_priv.h" + +static struct nvgpu_fence_type *nvgpu_fence_from_ref(struct nvgpu_ref *ref) +{ + return (struct nvgpu_fence_type *)((uintptr_t)ref - + offsetof(struct nvgpu_fence_type, priv.ref)); +} + +static void nvgpu_fence_release(struct nvgpu_ref *ref) +{ + struct nvgpu_fence_type *f = nvgpu_fence_from_ref(ref); + struct nvgpu_fence_type_priv *pf = &f->priv; + + if (nvgpu_os_fence_is_initialized(&pf->os_fence)) { + pf->os_fence.ops->drop_ref(&pf->os_fence); + } + + pf->ops->release(f); +} + +void nvgpu_fence_put(struct nvgpu_fence_type *f) +{ + struct nvgpu_fence_type_priv *pf = &f->priv; + + nvgpu_ref_put(&pf->ref, nvgpu_fence_release); +} + +struct nvgpu_fence_type *nvgpu_fence_get(struct nvgpu_fence_type *f) +{ + struct nvgpu_fence_type_priv *pf = &f->priv; + + nvgpu_ref_get(&pf->ref); + return f; +} + +/* + * Extract an object to be passed to the userspace as a result of a submitted + * job. This must be balanced with a call to nvgpu_user_fence_release(). + */ +struct nvgpu_user_fence nvgpu_fence_extract_user(struct nvgpu_fence_type *f) +{ + struct nvgpu_fence_type_priv *pf = &f->priv; + + struct nvgpu_user_fence uf = (struct nvgpu_user_fence) { +#ifdef CONFIG_TEGRA_GK20A_NVHOST + .syncpt_id = pf->syncpt_id, + .syncpt_value = pf->syncpt_value, +#endif + .os_fence = pf->os_fence, + }; + + /* + * The os fence member has to live so it can be signaled when the job + * completes. The returned user fence may live longer than that before + * being safely attached to an fd if the job completes before a + * submission ioctl finishes, or if it's stored for cde job state + * tracking. + */ + if (nvgpu_os_fence_is_initialized(&pf->os_fence)) { + pf->os_fence.ops->dup(&pf->os_fence); + } + + return uf; +} + +int nvgpu_fence_wait(struct gk20a *g, struct nvgpu_fence_type *f, + u32 timeout) +{ + struct nvgpu_fence_type_priv *pf = &f->priv; + + if (!nvgpu_platform_is_silicon(g)) { + timeout = U32_MAX; + } + return pf->ops->wait(f, timeout); +} + +bool nvgpu_fence_is_expired(struct nvgpu_fence_type *f) +{ + struct nvgpu_fence_type_priv *pf = &f->priv; + + return pf->ops->is_expired(f); +} + +void nvgpu_fence_init(struct nvgpu_fence_type *f, + const struct nvgpu_fence_ops *ops, + struct nvgpu_os_fence os_fence) +{ + struct nvgpu_fence_type_priv *pf = &f->priv; + + nvgpu_ref_init(&pf->ref); + pf->ops = ops; + pf->os_fence = os_fence; +} diff --git a/drivers/gpu/nvgpu/common/fence/fence_priv.h b/drivers/gpu/nvgpu/common/fence/fence_priv.h new file mode 100644 index 000000000..1a95abc2c --- /dev/null +++ b/drivers/gpu/nvgpu/common/fence/fence_priv.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_FENCE_PRIV_H +#define NVGPU_FENCE_PRIV_H + +#include + +struct nvgpu_fence_type; + +struct nvgpu_fence_ops { + int (*wait)(struct nvgpu_fence_type *f, u32 timeout); + bool (*is_expired)(struct nvgpu_fence_type *f); + void (*release)(struct nvgpu_fence_type *f); +}; + +void nvgpu_fence_init(struct nvgpu_fence_type *f, + const struct nvgpu_fence_ops *ops, + struct nvgpu_os_fence os_fence); + +#endif diff --git a/drivers/gpu/nvgpu/common/fence/fence_sema.c b/drivers/gpu/nvgpu/common/fence/fence_sema.c new file mode 100644 index 000000000..c68188c8a --- /dev/null +++ b/drivers/gpu/nvgpu/common/fence/fence_sema.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include "fence_priv.h" + +static int nvgpu_fence_semaphore_wait(struct nvgpu_fence_type *f, u32 timeout) +{ + struct nvgpu_fence_type_priv *pf = &f->priv; + + if (!nvgpu_semaphore_is_acquired(pf->semaphore)) { + return 0; + } + + return NVGPU_COND_WAIT_INTERRUPTIBLE( + pf->semaphore_wq, + !nvgpu_semaphore_is_acquired(pf->semaphore), + timeout); +} + +static bool nvgpu_fence_semaphore_is_expired(struct nvgpu_fence_type *f) +{ + struct nvgpu_fence_type_priv *pf = &f->priv; + + return !nvgpu_semaphore_is_acquired(pf->semaphore); +} + +static void nvgpu_fence_semaphore_release(struct nvgpu_fence_type *f) +{ + struct nvgpu_fence_type_priv *pf = &f->priv; + + if (pf->semaphore != NULL) { + nvgpu_semaphore_put(pf->semaphore); + } +} + +static const struct nvgpu_fence_ops nvgpu_fence_semaphore_ops = { + .wait = nvgpu_fence_semaphore_wait, + .is_expired = nvgpu_fence_semaphore_is_expired, + .release = nvgpu_fence_semaphore_release, +}; + +/* This function takes ownership of the semaphore as well as the os_fence */ +void nvgpu_fence_from_semaphore( + struct nvgpu_fence_type *f, + struct nvgpu_semaphore *semaphore, + struct nvgpu_cond *semaphore_wq, + struct nvgpu_os_fence os_fence) +{ + struct nvgpu_fence_type_priv *pf = &f->priv; + + nvgpu_fence_init(f, &nvgpu_fence_semaphore_ops, os_fence); + + pf->semaphore = semaphore; + pf->semaphore_wq = semaphore_wq; +} diff --git a/drivers/gpu/nvgpu/common/fence/fence_syncpt.c b/drivers/gpu/nvgpu/common/fence/fence_syncpt.c new file mode 100644 index 000000000..f57d1e54d --- /dev/null +++ b/drivers/gpu/nvgpu/common/fence/fence_syncpt.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include "fence_priv.h" + +static int nvgpu_fence_syncpt_wait(struct nvgpu_fence_type *f, u32 timeout) +{ + struct nvgpu_fence_type_priv *pf = &f->priv; + + return nvgpu_nvhost_syncpt_wait_timeout_ext( + pf->nvhost_device, pf->syncpt_id, pf->syncpt_value, + timeout, NVGPU_NVHOST_DEFAULT_WAITER); +} + +static bool nvgpu_fence_syncpt_is_expired(struct nvgpu_fence_type *f) +{ + struct nvgpu_fence_type_priv *pf = &f->priv; + + /* + * In cases we don't register a notifier, we can't expect the + * syncpt value to be updated. For this case, we force a read + * of the value from HW, and then check for expiration. + */ + if (!nvgpu_nvhost_syncpt_is_expired_ext(pf->nvhost_device, + pf->syncpt_id, pf->syncpt_value)) { + int err; + u32 val; + + err = nvgpu_nvhost_syncpt_read_ext_check(pf->nvhost_device, + pf->syncpt_id, &val); + WARN(err != 0, "syncpt read failed??"); + if (err == 0) { + return nvgpu_nvhost_syncpt_is_expired_ext( + pf->nvhost_device, + pf->syncpt_id, pf->syncpt_value); + } else { + return false; + } + } + + return true; +} + +static void nvgpu_fence_syncpt_release(struct nvgpu_fence_type *f) +{ +} + +static const struct nvgpu_fence_ops nvgpu_fence_syncpt_ops = { + .wait = nvgpu_fence_syncpt_wait, + .is_expired = nvgpu_fence_syncpt_is_expired, + .release = nvgpu_fence_syncpt_release, +}; + +/* This function takes the ownership of the os_fence */ +void nvgpu_fence_from_syncpt( + struct nvgpu_fence_type *f, + struct nvgpu_nvhost_dev *nvhost_device, + u32 id, u32 value, struct nvgpu_os_fence os_fence) +{ + struct nvgpu_fence_type_priv *pf = &f->priv; + + nvgpu_fence_init(f, &nvgpu_fence_syncpt_ops, os_fence); + + pf->nvhost_device = nvhost_device; + pf->syncpt_id = id; + pf->syncpt_value = value; +} diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c new file mode 100644 index 000000000..a4a808e64 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -0,0 +1,2304 @@ +/* + * GK20A Graphics channel + * + * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_NVGPU_DEBUGGER +#include +#endif +#include +#include + +#include "channel_wdt.h" +#include "channel_worker.h" + +static void free_channel(struct nvgpu_fifo *f, struct nvgpu_channel *ch); +static void channel_dump_ref_actions(struct nvgpu_channel *ch); + +static int channel_setup_ramfc(struct nvgpu_channel *c, + struct nvgpu_setup_bind_args *args, + u64 gpfifo_gpu_va, u32 gpfifo_size); + +/* allocate GPU channel */ +static struct nvgpu_channel *allocate_channel(struct nvgpu_fifo *f) +{ + struct nvgpu_channel *ch = NULL; + struct gk20a *g = f->g; + + nvgpu_mutex_acquire(&f->free_chs_mutex); + if (!nvgpu_list_empty(&f->free_chs)) { + ch = nvgpu_list_first_entry(&f->free_chs, nvgpu_channel, + free_chs); + nvgpu_list_del(&ch->free_chs); +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 10_3), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + WARN_ON(nvgpu_atomic_read(&ch->ref_count) != 0); + WARN_ON(ch->referenceable); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + f->used_channels = nvgpu_safe_add_u32(f->used_channels, 1U); + } + nvgpu_mutex_release(&f->free_chs_mutex); + + if ((g->aggressive_sync_destroy_thresh != 0U) && + (f->used_channels > + g->aggressive_sync_destroy_thresh)) { + g->aggressive_sync_destroy = true; + } + + return ch; +} + +static void free_channel(struct nvgpu_fifo *f, + struct nvgpu_channel *ch) +{ + struct gk20a *g = f->g; + +#ifdef CONFIG_NVGPU_TRACE + trace_gk20a_release_used_channel(ch->chid); +#endif + /* refcount is zero here and channel is in a freed/dead state */ + nvgpu_mutex_acquire(&f->free_chs_mutex); + /* add to head to increase visibility of timing-related bugs */ + nvgpu_list_add(&ch->free_chs, &f->free_chs); + f->used_channels = nvgpu_safe_sub_u32(f->used_channels, 1U); + nvgpu_mutex_release(&f->free_chs_mutex); + + /* + * On teardown it is not possible to dereference platform, but ignoring + * this is fine then because no new channels would be created. + */ + if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { + if ((g->aggressive_sync_destroy_thresh != 0U) && + (f->used_channels < + g->aggressive_sync_destroy_thresh)) { + g->aggressive_sync_destroy = false; + } + } +} + +void nvgpu_channel_commit_va(struct nvgpu_channel *c) +{ + struct gk20a *g = c->g; + + nvgpu_log_fn(g, " "); + + if (g->ops.mm.init_inst_block_for_subctxs != NULL) { + u32 subctx_count = nvgpu_channel_get_max_subctx_count(c); + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_mig, + "chid: %d max_subctx_count[%u] ", + c->chid, subctx_count); + g->ops.mm.init_inst_block_for_subctxs(&c->inst_block, c->vm, + c->vm->gmmu_page_sizes[GMMU_PAGE_SIZE_BIG], + subctx_count); + } else { + g->ops.mm.init_inst_block(&c->inst_block, c->vm, + c->vm->gmmu_page_sizes[GMMU_PAGE_SIZE_BIG]); + } +} + +int nvgpu_channel_update_runlist(struct nvgpu_channel *c, bool add) +{ + return c->g->ops.runlist.update(c->g, c->runlist, c, add, true); +} + +int nvgpu_channel_enable_tsg(struct gk20a *g, struct nvgpu_channel *ch) +{ + struct nvgpu_tsg *tsg; + + tsg = nvgpu_tsg_from_ch(ch); + if (tsg != NULL) { + g->ops.tsg.enable(tsg); + return 0; + } else { + nvgpu_err(ch->g, "chid: %d is not bound to tsg", ch->chid); + return -EINVAL; + } +} + +int nvgpu_channel_disable_tsg(struct gk20a *g, struct nvgpu_channel *ch) +{ + struct nvgpu_tsg *tsg; + + tsg = nvgpu_tsg_from_ch(ch); + if (tsg != NULL) { + g->ops.tsg.disable(tsg); + return 0; + } else { + nvgpu_err(ch->g, "chid: %d is not bound to tsg", ch->chid); + return -EINVAL; + } +} + +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT +void nvgpu_channel_abort_clean_up(struct nvgpu_channel *ch) +{ + /* ensure no fences are pending */ + nvgpu_mutex_acquire(&ch->sync_lock); + if (ch->sync != NULL) { + nvgpu_channel_sync_set_min_eq_max(ch->sync); + } + +#ifdef CONFIG_TEGRA_GK20A_NVHOST + if (ch->user_sync != NULL) { + nvgpu_channel_user_syncpt_set_safe_state(ch->user_sync); + } +#endif + nvgpu_mutex_release(&ch->sync_lock); + + /* The update to flush the job queue is only needed to process + * nondeterministic resources and ch wdt timeouts. Any others are + * either nonexistent or preallocated from pools that can be killed in + * one go on deterministic channels; take a look at what would happen + * in nvgpu_channel_clean_up_deterministic_job() and what + * nvgpu_submit_deterministic() requires. + */ + if (!nvgpu_channel_is_deterministic(ch)) { + /* + * When closing the channel, this scheduled update holds one + * channel ref which is waited for before advancing with + * freeing. + */ + nvgpu_channel_update(ch); + } +} + +static void channel_kernelmode_deinit(struct nvgpu_channel *ch) +{ + struct vm_gk20a *ch_vm = ch->vm; + + nvgpu_dma_unmap_free(ch_vm, &ch->gpfifo.mem); +#ifdef CONFIG_NVGPU_DGPU + nvgpu_big_free(ch->g, ch->gpfifo.pipe); +#endif + (void) memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); + + if (ch->priv_cmd_q != NULL) { + nvgpu_priv_cmdbuf_queue_free(ch->priv_cmd_q); + ch->priv_cmd_q = NULL; + } + + nvgpu_channel_joblist_deinit(ch); + + /* sync must be destroyed before releasing channel vm */ + nvgpu_mutex_acquire(&ch->sync_lock); + if (ch->sync != NULL) { + nvgpu_channel_sync_destroy(ch->sync); + ch->sync = NULL; + } + nvgpu_mutex_release(&ch->sync_lock); +} + +#ifdef CONFIG_TEGRA_GK20A_NVHOST +int nvgpu_channel_set_syncpt(struct nvgpu_channel *ch) +{ + struct gk20a *g = ch->g; + struct nvgpu_channel_sync_syncpt *sync_syncpt; + u32 new_syncpt = 0U; + u32 old_syncpt = g->ops.ramfc.get_syncpt(ch); + int err = 0; + + if (ch->sync != NULL) { + sync_syncpt = nvgpu_channel_sync_to_syncpt(ch->sync); + if (sync_syncpt != NULL) { + new_syncpt = + nvgpu_channel_sync_get_syncpt_id(sync_syncpt); + } else { + new_syncpt = NVGPU_INVALID_SYNCPT_ID; + /* ??? */ + return -EINVAL; + } + } else { + return -EINVAL; + } + + if ((new_syncpt != 0U) && (new_syncpt != old_syncpt)) { + /* disable channel */ + err = nvgpu_channel_disable_tsg(g, ch); + if (err != 0) { + nvgpu_err(g, "failed to disable channel/TSG"); + return err; + } + + /* preempt the channel */ + err = nvgpu_preempt_channel(g, ch); + nvgpu_assert(err == 0); + if (err != 0 ) { + goto out; + } + /* no error at this point */ + g->ops.ramfc.set_syncpt(ch, new_syncpt); + + err = nvgpu_channel_enable_tsg(g, ch); + if (err != 0) { + nvgpu_err(g, "failed to enable channel/TSG"); + } + } + + nvgpu_log_fn(g, "done"); + return err; +out: + if (nvgpu_channel_enable_tsg(g, ch) != 0) { + nvgpu_err(g, "failed to enable channel/TSG"); + } + return err; +} +#endif + +static int channel_setup_kernelmode(struct nvgpu_channel *c, + struct nvgpu_setup_bind_args *args) +{ + u32 gpfifo_size, gpfifo_entry_size; + u64 gpfifo_gpu_va; + u32 job_count; + + int err = 0; + struct gk20a *g = c->g; + + gpfifo_size = args->num_gpfifo_entries; + gpfifo_entry_size = nvgpu_get_gpfifo_entry_size(); + + err = nvgpu_dma_alloc_map_sys(c->vm, + (size_t)gpfifo_size * (size_t)gpfifo_entry_size, + &c->gpfifo.mem); + if (err != 0) { + nvgpu_err(g, "memory allocation failed"); + goto clean_up; + } + +#ifdef CONFIG_NVGPU_DGPU + if (c->gpfifo.mem.aperture == APERTURE_VIDMEM) { + c->gpfifo.pipe = nvgpu_big_malloc(g, + (size_t)gpfifo_size * + (size_t)gpfifo_entry_size); + if (c->gpfifo.pipe == NULL) { + err = -ENOMEM; + goto clean_up_unmap; + } + } +#endif + gpfifo_gpu_va = c->gpfifo.mem.gpu_va; + + c->gpfifo.entry_num = gpfifo_size; + c->gpfifo.get = 0; + c->gpfifo.put = 0; + + nvgpu_log_info(g, "channel %d : gpfifo_base 0x%016llx, size %d", + c->chid, gpfifo_gpu_va, c->gpfifo.entry_num); + + g->ops.userd.init_mem(g, c); + + if (g->aggressive_sync_destroy_thresh == 0U) { + nvgpu_mutex_acquire(&c->sync_lock); + c->sync = nvgpu_channel_sync_create(c); + if (c->sync == NULL) { + err = -ENOMEM; + nvgpu_mutex_release(&c->sync_lock); + goto clean_up_unmap; + } + nvgpu_mutex_release(&c->sync_lock); + + if (g->ops.channel.set_syncpt != NULL) { + err = g->ops.channel.set_syncpt(c); + if (err != 0) { + goto clean_up_sync; + } + } + } + + err = channel_setup_ramfc(c, args, gpfifo_gpu_va, + c->gpfifo.entry_num); + + if (err != 0) { + goto clean_up_sync; + } + + /* + * Allocate priv cmdbuf space for pre and post fences. If the inflight + * job count isn't specified, we base it on the gpfifo count. We + * multiply by a factor of 1/3 because at most a third of the GPFIFO + * entries can be used for user-submitted jobs; another third goes to + * wait entries, and the final third to incr entries. There will be one + * pair of acq and incr commands for each job. + */ + job_count = args->num_inflight_jobs; + if (job_count == 0U) { + /* + * Round up so the allocation behaves nicely with a very small + * gpfifo, and to be able to use all slots when the entry count + * would be one too small for both wait and incr commands. An + * increment would then still just fit. + * + * gpfifo_size is required to be at most 2^31 earlier. + */ + job_count = nvgpu_safe_add_u32(gpfifo_size, 2U) / 3U; + } + + err = nvgpu_channel_joblist_init(c, job_count); + if (err != 0) { + goto clean_up_sync; + } + + err = nvgpu_priv_cmdbuf_queue_alloc(c->vm, job_count, &c->priv_cmd_q); + if (err != 0) { + goto clean_up_prealloc; + } + + err = nvgpu_channel_update_runlist(c, true); + if (err != 0) { + goto clean_up_priv_cmd; + } + + return 0; + +clean_up_priv_cmd: + nvgpu_priv_cmdbuf_queue_free(c->priv_cmd_q); + c->priv_cmd_q = NULL; +clean_up_prealloc: + nvgpu_channel_joblist_deinit(c); +clean_up_sync: + if (c->sync != NULL) { + nvgpu_channel_sync_destroy(c->sync); + c->sync = NULL; + } +clean_up_unmap: +#ifdef CONFIG_NVGPU_DGPU + nvgpu_big_free(g, c->gpfifo.pipe); +#endif + nvgpu_dma_unmap_free(c->vm, &c->gpfifo.mem); +clean_up: + (void) memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); + + return err; + +} + +/* Update with this periodically to determine how the gpfifo is draining. */ +static inline u32 channel_update_gpfifo_get(struct gk20a *g, + struct nvgpu_channel *c) +{ + u32 new_get = g->ops.userd.gp_get(g, c); + + c->gpfifo.get = new_get; + return new_get; +} + +u32 nvgpu_channel_get_gpfifo_free_count(struct nvgpu_channel *ch) +{ + return (ch->gpfifo.entry_num - (ch->gpfifo.put - ch->gpfifo.get) - 1U) % + ch->gpfifo.entry_num; +} + +u32 nvgpu_channel_update_gpfifo_get_and_get_free_count(struct nvgpu_channel *ch) +{ + (void)channel_update_gpfifo_get(ch->g, ch); + return nvgpu_channel_get_gpfifo_free_count(ch); +} + +int nvgpu_channel_add_job(struct nvgpu_channel *c, + struct nvgpu_channel_job *job, + bool skip_buffer_refcounting) +{ + struct vm_gk20a *vm = c->vm; + struct nvgpu_mapped_buf **mapped_buffers = NULL; + int err = 0; + u32 num_mapped_buffers = 0; + + if (!skip_buffer_refcounting) { + err = nvgpu_vm_get_buffers(vm, &mapped_buffers, + &num_mapped_buffers); + if (err != 0) { + return err; + } + } + + if (c != NULL) { + job->num_mapped_buffers = num_mapped_buffers; + job->mapped_buffers = mapped_buffers; + + nvgpu_channel_launch_wdt(c); + + nvgpu_channel_joblist_lock(c); + nvgpu_channel_joblist_add(c, job); + nvgpu_channel_joblist_unlock(c); + } else { + err = -ETIMEDOUT; + goto err_put_buffers; + } + + return 0; + +err_put_buffers: + nvgpu_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); + + return err; +} + +/** + * Release preallocated job resources from a job that's known to be completed. + */ +static void nvgpu_channel_finalize_job(struct nvgpu_channel *c, + struct nvgpu_channel_job *job) +{ + /* + * On deterministic channels, this fence is just backed by a raw + * syncpoint. On nondeterministic channels the fence may be backed by a + * semaphore or even a syncfd. + */ + nvgpu_fence_put(&job->post_fence); + + /* + * Free the private command buffers (in order of allocation) + */ + if (job->wait_cmd != NULL) { + nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->wait_cmd); + } + nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->incr_cmd); + + nvgpu_channel_free_job(c, job); + + nvgpu_channel_joblist_lock(c); + nvgpu_channel_joblist_delete(c, job); + nvgpu_channel_joblist_unlock(c); +} + +/** + * Clean up job resources for further jobs to use. + * + * Loop all jobs from the joblist until a pending job is found. Pending jobs + * are detected from the job's post fence, so this is only done for jobs that + * have job tracking resources. Free all per-job memory for completed jobs; in + * case of preallocated resources, this opens up slots for new jobs to be + * submitted. + */ +void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c) +{ + struct vm_gk20a *vm; + struct nvgpu_channel_job *job; + struct gk20a *g; + bool job_finished = false; + bool watchdog_on = false; + + if (nvgpu_is_powered_off(c->g)) { /* shutdown case */ + return; + } + + vm = c->vm; + g = c->g; + + nvgpu_assert(!nvgpu_channel_is_deterministic(c)); + + watchdog_on = nvgpu_channel_wdt_stop(c->wdt); + + while (true) { + bool completed; + + nvgpu_channel_joblist_lock(c); + job = nvgpu_channel_joblist_peek(c); + nvgpu_channel_joblist_unlock(c); + + if (job == NULL) { + /* + * No jobs in flight, timeout will remain stopped until + * new jobs are submitted. + */ + break; + } + + completed = nvgpu_fence_is_expired(&job->post_fence); + if (!completed) { + /* + * The watchdog eventually sees an updated gp_get if + * something happened in this loop. A new job can have + * been submitted between the above call to stop and + * this - in that case, this is a no-op and the new + * later timeout is still used. + */ + if (watchdog_on) { + nvgpu_channel_wdt_continue(c->wdt); + } + break; + } + + WARN_ON(c->sync == NULL); + + if (c->sync != NULL) { + if (c->has_os_fence_framework_support && + g->os_channel.os_fence_framework_inst_exists(c)) { + g->os_channel.signal_os_fence_framework(c, + &job->post_fence); + } + + if (g->aggressive_sync_destroy_thresh != 0U) { + nvgpu_mutex_acquire(&c->sync_lock); + if (nvgpu_channel_sync_put_ref_and_check(c->sync) + && g->aggressive_sync_destroy) { + nvgpu_channel_sync_destroy(c->sync); + c->sync = NULL; + } + nvgpu_mutex_release(&c->sync_lock); + } + } + + if (job->num_mapped_buffers != 0U) { + nvgpu_vm_put_buffers(vm, job->mapped_buffers, + job->num_mapped_buffers); + } + + nvgpu_channel_finalize_job(c, job); + + job_finished = true; + + /* taken in nvgpu_submit_nondeterministic() */ + gk20a_idle(g); + } + + if ((job_finished) && + (g->os_channel.work_completion_signal != NULL)) { + g->os_channel.work_completion_signal(c); + } +} + +/** + * Clean up one job if any to provide space for a new submit. + * + * Deterministic channels do very little in the submit path, so the cleanup + * code does not do much either. This assumes the preconditions that + * deterministic channels are missing features such as timeouts and mapped + * buffers. + */ +void nvgpu_channel_clean_up_deterministic_job(struct nvgpu_channel *c) +{ + struct nvgpu_channel_job *job; + + nvgpu_assert(nvgpu_channel_is_deterministic(c)); + + nvgpu_channel_joblist_lock(c); + job = nvgpu_channel_joblist_peek(c); + nvgpu_channel_joblist_unlock(c); + + if (job == NULL) { + /* Nothing queued */ + return; + } + + nvgpu_assert(job->num_mapped_buffers == 0U); + + if (nvgpu_fence_is_expired(&job->post_fence)) { + nvgpu_channel_finalize_job(c, job); + } +} + +/** + * Schedule a job cleanup work on this channel to free resources and to signal + * about completion. + * + * Call this when there has been an interrupt about finished jobs, or when job + * cleanup needs to be performed, e.g., when closing a channel. This is always + * safe to call even if there is nothing to clean up. Any visible actions on + * jobs just before calling this are guaranteed to be processed. + */ +void nvgpu_channel_update(struct nvgpu_channel *c) +{ + if (nvgpu_is_powered_off(c->g)) { /* shutdown case */ + return; + } +#ifdef CONFIG_NVGPU_TRACE + trace_nvgpu_channel_update(c->chid); +#endif + /* A queued channel is always checked for job cleanup. */ + nvgpu_channel_worker_enqueue(c); +} + +bool nvgpu_channel_update_and_check_ctxsw_timeout(struct nvgpu_channel *ch, + u32 timeout_delta_ms, bool *progress) +{ + u32 gpfifo_get; + + if (ch->usermode_submit_enabled) { + ch->ctxsw_timeout_accumulated_ms += timeout_delta_ms; + *progress = false; + goto done; + } + + gpfifo_get = channel_update_gpfifo_get(ch->g, ch); + + if (gpfifo_get == ch->ctxsw_timeout_gpfifo_get) { + /* didn't advance since previous ctxsw timeout check */ + ch->ctxsw_timeout_accumulated_ms += timeout_delta_ms; + *progress = false; + } else { + /* first ctxsw timeout isr encountered */ + ch->ctxsw_timeout_accumulated_ms = timeout_delta_ms; + *progress = true; + } + + ch->ctxsw_timeout_gpfifo_get = gpfifo_get; + +done: + return nvgpu_is_timeouts_enabled(ch->g) && + ch->ctxsw_timeout_accumulated_ms > ch->ctxsw_timeout_max_ms; +} + +#else + +void nvgpu_channel_abort_clean_up(struct nvgpu_channel *ch) +{ + /* ensure no fences are pending */ + nvgpu_mutex_acquire(&ch->sync_lock); + if (ch->user_sync != NULL) { + nvgpu_channel_user_syncpt_set_safe_state(ch->user_sync); + } + nvgpu_mutex_release(&ch->sync_lock); +} + +#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */ + +void nvgpu_channel_set_unserviceable(struct nvgpu_channel *ch) +{ + nvgpu_spinlock_acquire(&ch->unserviceable_lock); + ch->unserviceable = true; + nvgpu_spinlock_release(&ch->unserviceable_lock); +} + +bool nvgpu_channel_check_unserviceable(struct nvgpu_channel *ch) +{ + bool unserviceable_status; + + nvgpu_spinlock_acquire(&ch->unserviceable_lock); + unserviceable_status = ch->unserviceable; + nvgpu_spinlock_release(&ch->unserviceable_lock); + + return unserviceable_status; +} + +void nvgpu_channel_abort(struct nvgpu_channel *ch, bool channel_preempt) +{ + struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch); + + nvgpu_log_fn(ch->g, " "); + + if (tsg != NULL) { + return nvgpu_tsg_abort(ch->g, tsg, channel_preempt); + } else { + nvgpu_err(ch->g, "chid: %d is not bound to tsg", ch->chid); + } +} + +void nvgpu_channel_wait_until_counter_is_N( + struct nvgpu_channel *ch, nvgpu_atomic_t *counter, int wait_value, + struct nvgpu_cond *c, const char *caller, const char *counter_name) +{ + while (true) { + if (NVGPU_COND_WAIT( + c, + nvgpu_atomic_read(counter) == wait_value, + 5000U) == 0) { + break; + } + + nvgpu_warn(ch->g, + "%s: channel %d, still waiting, %s left: %d, waiting for: %d", + caller, ch->chid, counter_name, + nvgpu_atomic_read(counter), wait_value); + + channel_dump_ref_actions(ch); + } +} + +static void nvgpu_channel_usermode_deinit(struct nvgpu_channel *ch) +{ + nvgpu_channel_free_usermode_buffers(ch); +#ifdef CONFIG_NVGPU_USERD + (void) nvgpu_userd_init_channel(ch->g, ch); +#endif + ch->usermode_submit_enabled = false; +} + +static void channel_free_invoke_unbind(struct nvgpu_channel *ch) +{ + int err = 0; + struct nvgpu_tsg *tsg; + struct gk20a *g = ch->g; + + if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { + /* abort channel and remove from runlist */ + tsg = nvgpu_tsg_from_ch(ch); + if (tsg != NULL) { + /* Between tsg is not null and unbind_channel call, + * ioctl cannot be called anymore because user doesn't + * have an open channel fd anymore to use for the unbind + * ioctl. + */ + err = nvgpu_tsg_force_unbind_channel(tsg, ch); + if (err != 0) { + nvgpu_err(g, + "failed to unbind channel %d from TSG", + ch->chid); + } + } else { + /* + * Channel is already unbound from TSG by User with + * explicit call + * Nothing to do here in that case + */ + } + } +} + +static void channel_free_invoke_deferred_engine_reset(struct nvgpu_channel *ch) +{ +#ifdef CONFIG_NVGPU_DEBUGGER + struct gk20a *g = ch->g; + struct nvgpu_fifo *f = &g->fifo; + bool deferred_reset_pending; + + /* if engine reset was deferred, perform it now */ + nvgpu_mutex_acquire(&f->deferred_reset_mutex); + deferred_reset_pending = g->fifo.deferred_reset_pending; + nvgpu_mutex_release(&f->deferred_reset_mutex); + + if (deferred_reset_pending) { + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" + " deferred, running now"); + nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex); + + nvgpu_assert(nvgpu_channel_deferred_reset_engines(g, ch) == 0); + + nvgpu_mutex_release(&g->fifo.engines_reset_mutex); + } +#endif +} + +static void channel_free_invoke_sync_destroy(struct nvgpu_channel *ch) +{ +#ifdef CONFIG_TEGRA_GK20A_NVHOST + nvgpu_mutex_acquire(&ch->sync_lock); + if (ch->user_sync != NULL) { + /* + * Set user managed syncpoint to safe state + * But it's already done if channel is recovered + */ + if (!nvgpu_channel_check_unserviceable(ch)) { + nvgpu_channel_user_syncpt_set_safe_state(ch->user_sync); + } + nvgpu_channel_user_syncpt_destroy(ch->user_sync); + ch->user_sync = NULL; + } + nvgpu_mutex_release(&ch->sync_lock); +#endif +} + +static void channel_free_unlink_debug_session(struct nvgpu_channel *ch) +{ +#ifdef CONFIG_NVGPU_DEBUGGER + struct gk20a *g = ch->g; + struct dbg_session_gk20a *dbg_s; + struct dbg_session_data *session_data, *tmp_s; + struct dbg_session_channel_data *ch_data, *tmp; + + /* unlink all debug sessions */ + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + nvgpu_list_for_each_entry_safe(session_data, tmp_s, + &ch->dbg_s_list, dbg_session_data, dbg_s_entry) { + dbg_s = session_data->dbg_s; + nvgpu_mutex_acquire(&dbg_s->ch_list_lock); + nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list, + dbg_session_channel_data, ch_entry) { + if (ch_data->chid == ch->chid) { + if (ch_data->unbind_single_channel(dbg_s, + ch_data) != 0) { + nvgpu_err(g, + "unbind failed for chid: %d", + ch_data->chid); + } + } + } + nvgpu_mutex_release(&dbg_s->ch_list_lock); + } + + nvgpu_mutex_release(&g->dbg_sessions_lock); +#endif +} + +static void channel_free_wait_for_refs(struct nvgpu_channel *ch, + int wait_value, bool force) +{ + /* wait until no more refs to the channel */ + if (!force) { + nvgpu_channel_wait_until_counter_is_N( + ch, &ch->ref_count, wait_value, &ch->ref_count_dec_wq, + __func__, "references"); + } + +} + +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS +static void channel_free_put_deterministic_ref_from_init( + struct nvgpu_channel *ch) +{ + struct gk20a *g = ch->g; + + /* put back the channel-wide submit ref from init */ + if (ch->deterministic) { + nvgpu_rwsem_down_read(&g->deterministic_busy); + ch->deterministic = false; + if (!ch->deterministic_railgate_allowed) { + gk20a_idle(g); + } + ch->deterministic_railgate_allowed = false; + + nvgpu_rwsem_up_read(&g->deterministic_busy); + } +} +#endif + +/* call ONLY when no references to the channel exist: after the last put */ +static void channel_free(struct nvgpu_channel *ch, bool force) +{ + struct gk20a *g = ch->g; + struct nvgpu_fifo *f = &g->fifo; + struct vm_gk20a *ch_vm = ch->vm; + unsigned long timeout; + + if (g == NULL) { + nvgpu_do_assert_print(g, "ch already freed"); + return; + } + + nvgpu_log_fn(g, " "); + + timeout = nvgpu_get_poll_timeout(g); + +#ifdef CONFIG_NVGPU_TRACE + trace_gk20a_free_channel(ch->chid); +#endif + + /* + * Disable channel/TSG and unbind here. This should not be executed if + * HW access is not available during shutdown/removal path as it will + * trigger a timeout + */ + channel_free_invoke_unbind(ch); + + /* + * OS channel close may require that syncpoint should be set to some + * safe value before it is called. nvgpu_tsg_force_unbind_channel(above) + * is internally doing that by calling nvgpu_nvhost_syncpt_set_safe_- + * state deep down in the stack. Otherwise os_channel close may block if + * the app is killed abruptly (which was going to do the syncpoint + * signal). + */ + if (g->os_channel.close != NULL) { + g->os_channel.close(ch, force); + } + + /* wait until there's only our ref to the channel */ + channel_free_wait_for_refs(ch, 1, force); + + /* wait until all pending interrupts for recently completed + * jobs are handled */ + nvgpu_cic_wait_for_deferred_interrupts(g); + + /* prevent new refs */ + nvgpu_spinlock_acquire(&ch->ref_obtain_lock); + if (!ch->referenceable) { + nvgpu_spinlock_release(&ch->ref_obtain_lock); + nvgpu_err(ch->g, + "Extra %s() called to channel %u", + __func__, ch->chid); + return; + } + ch->referenceable = false; + nvgpu_spinlock_release(&ch->ref_obtain_lock); + + /* matches with the initial reference in nvgpu_channel_open_new() */ + nvgpu_atomic_dec(&ch->ref_count); + + channel_free_wait_for_refs(ch, 0, force); + + channel_free_invoke_deferred_engine_reset(ch); + + if (!nvgpu_channel_as_bound(ch)) { + goto unbind; + } + + nvgpu_log_info(g, "freeing bound channel context, timeout=%ld", + timeout); + +#ifdef CONFIG_NVGPU_FECS_TRACE + if (g->ops.gr.fecs_trace.unbind_channel && !ch->vpr) + g->ops.gr.fecs_trace.unbind_channel(g, &ch->inst_block); +#endif + + if (g->ops.gr.setup.free_subctx != NULL) { + g->ops.gr.setup.free_subctx(ch); + ch->subctx = NULL; + } + + g->ops.gr.intr.flush_channel_tlb(g); + + if (ch->usermode_submit_enabled) { + nvgpu_channel_usermode_deinit(ch); + } else { +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + channel_kernelmode_deinit(ch); +#endif + } + + channel_free_invoke_sync_destroy(ch); + + /* + * When releasing the channel we unbind the VM - so release the ref. + */ + nvgpu_vm_put(ch_vm); + + /* make sure we don't have deferred interrupts pending that + * could still touch the channel */ + nvgpu_cic_wait_for_deferred_interrupts(g); + +unbind: + g->ops.channel.unbind(ch); + g->ops.channel.free_inst(g, ch); + + nvgpu_channel_wdt_destroy(ch->wdt); + ch->wdt = NULL; + +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS + channel_free_put_deterministic_ref_from_init(ch); +#endif + + ch->vpr = false; + ch->vm = NULL; + +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + WARN_ON(ch->sync != NULL); +#endif + + channel_free_unlink_debug_session(ch); + +#if GK20A_CHANNEL_REFCOUNT_TRACKING + (void) memset(ch->ref_actions, 0, sizeof(ch->ref_actions)); + ch->ref_actions_put = 0; +#endif + + nvgpu_cond_destroy(&ch->notifier_wq); + nvgpu_cond_destroy(&ch->semaphore_wq); + + /* make sure we catch accesses of unopened channels in case + * there's non-refcounted channel pointers hanging around */ + ch->g = NULL; + nvgpu_smp_wmb(); + + /* ALWAYS last */ + free_channel(f, ch); +} + +static void channel_dump_ref_actions(struct nvgpu_channel *ch) +{ +#if GK20A_CHANNEL_REFCOUNT_TRACKING + size_t i, get; + s64 now = nvgpu_current_time_ms(); + s64 prev = 0; + struct gk20a *g = ch->g; + + nvgpu_spinlock_acquire(&ch->ref_actions_lock); + + nvgpu_info(g, "ch %d: refs %d. Actions, most recent last:", + ch->chid, nvgpu_atomic_read(&ch->ref_count)); + + /* start at the oldest possible entry. put is next insertion point */ + get = ch->ref_actions_put; + + /* + * If the buffer is not full, this will first loop to the oldest entry, + * skipping not-yet-initialized entries. There is no ref_actions_get. + */ + for (i = 0; i < GK20A_CHANNEL_REFCOUNT_TRACKING; i++) { + struct nvgpu_channel_ref_action *act = &ch->ref_actions[get]; + + if (act->trace.nr_entries) { + nvgpu_info(g, + "%s ref %zu steps ago (age %lld ms, diff %lld ms)", + act->type == channel_gk20a_ref_action_get + ? "GET" : "PUT", + GK20A_CHANNEL_REFCOUNT_TRACKING - 1 - i, + now - act->timestamp_ms, + act->timestamp_ms - prev); + + print_stack_trace(&act->trace, 0); + prev = act->timestamp_ms; + } + + get = (get + 1) % GK20A_CHANNEL_REFCOUNT_TRACKING; + } + + nvgpu_spinlock_release(&ch->ref_actions_lock); +#endif +} + +#if GK20A_CHANNEL_REFCOUNT_TRACKING +static void channel_save_ref_source(struct nvgpu_channel *ch, + enum nvgpu_channel_ref_action_type type) +{ + struct nvgpu_channel_ref_action *act; + + nvgpu_spinlock_acquire(&ch->ref_actions_lock); + + act = &ch->ref_actions[ch->ref_actions_put]; + act->type = type; + act->trace.max_entries = GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN; + act->trace.nr_entries = 0; + act->trace.skip = 3; /* onwards from the caller of this */ + act->trace.entries = act->trace_entries; + save_stack_trace(&act->trace); + act->timestamp_ms = nvgpu_current_time_ms(); + ch->ref_actions_put = (ch->ref_actions_put + 1) % + GK20A_CHANNEL_REFCOUNT_TRACKING; + + nvgpu_spinlock_release(&ch->ref_actions_lock); +} +#endif + +/* Try to get a reference to the channel. Return nonzero on success. If fails, + * the channel is dead or being freed elsewhere and you must not touch it. + * + * Always when a nvgpu_channel pointer is seen and about to be used, a + * reference must be held to it - either by you or the caller, which should be + * documented well or otherwise clearly seen. This usually boils down to the + * file from ioctls directly, or an explicit get in exception handlers when the + * channel is found by a chid. + * + * Most global functions in this file require a reference to be held by the + * caller. + */ +struct nvgpu_channel *nvgpu_channel_get__func(struct nvgpu_channel *ch, + const char *caller) +{ + struct nvgpu_channel *ret; + + nvgpu_spinlock_acquire(&ch->ref_obtain_lock); + + if (likely(ch->referenceable)) { +#if GK20A_CHANNEL_REFCOUNT_TRACKING + channel_save_ref_source(ch, channel_gk20a_ref_action_get); +#endif + nvgpu_atomic_inc(&ch->ref_count); + ret = ch; + } else { + ret = NULL; + } + + nvgpu_spinlock_release(&ch->ref_obtain_lock); + +#ifdef CONFIG_NVGPU_TRACE + if (ret != NULL) { + trace_nvgpu_channel_get(ch->chid, caller); + } +#endif + + return ret; +} + +void nvgpu_channel_put__func(struct nvgpu_channel *ch, const char *caller) +{ +#if GK20A_CHANNEL_REFCOUNT_TRACKING + channel_save_ref_source(ch, channel_gk20a_ref_action_put); +#endif +#ifdef CONFIG_NVGPU_TRACE + trace_nvgpu_channel_put(ch->chid, caller); +#endif + nvgpu_atomic_dec(&ch->ref_count); + if (nvgpu_cond_broadcast(&ch->ref_count_dec_wq) != 0) { + nvgpu_warn(ch->g, "failed to broadcast"); + } + + /* More puts than gets. Channel is probably going to get + * stuck. */ +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 10_3), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + WARN_ON(nvgpu_atomic_read(&ch->ref_count) < 0); + + /* Also, more puts than gets. ref_count can go to 0 only if + * the channel is closing. Channel is probably going to get + * stuck. */ + WARN_ON((nvgpu_atomic_read(&ch->ref_count) == 0) && ch->referenceable); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) +} + +struct nvgpu_channel *nvgpu_channel_from_id__func(struct gk20a *g, + u32 chid, const char *caller) +{ + if (chid == NVGPU_INVALID_CHANNEL_ID) { + return NULL; + } + + return nvgpu_channel_get__func(&g->fifo.channel[chid], caller); +} + +void nvgpu_channel_close(struct nvgpu_channel *ch) +{ + channel_free(ch, false); +} + +/* + * Be careful with this - it is meant for terminating channels when we know the + * driver is otherwise dying. Ref counts and the like are ignored by this + * version of the cleanup. + */ +void nvgpu_channel_kill(struct nvgpu_channel *ch) +{ + channel_free(ch, true); +} + +struct nvgpu_channel *nvgpu_channel_open_new(struct gk20a *g, + u32 runlist_id, + bool is_privileged_channel, + pid_t pid, pid_t tid) +{ + struct nvgpu_fifo *f = &g->fifo; + struct nvgpu_channel *ch; + + /* compatibility with existing code */ + if (!nvgpu_engine_is_valid_runlist_id(g, runlist_id)) { + runlist_id = nvgpu_engine_get_gr_runlist_id(g); + } + + nvgpu_log_fn(g, " "); + + ch = allocate_channel(f); + if (ch == NULL) { + /* TBD: we want to make this virtualizable */ + nvgpu_err(g, "out of hw chids"); + return NULL; + } + +#ifdef CONFIG_NVGPU_TRACE + trace_nvgpu_channel_open_new(ch->chid); +#endif + +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + BUG_ON(ch->g != NULL); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + ch->g = g; + + /* Runlist for the channel */ + ch->runlist = f->runlists[runlist_id]; + + /* Channel privilege level */ + ch->is_privileged_channel = is_privileged_channel; + + ch->pid = tid; + ch->tgid = pid; /* process granularity for FECS traces */ + +#ifdef CONFIG_NVGPU_USERD + if (nvgpu_userd_init_channel(g, ch) != 0) { + nvgpu_err(g, "userd init failed"); + goto clean_up; + } +#endif + + if (g->ops.channel.alloc_inst(g, ch) != 0) { + nvgpu_err(g, "inst allocation failed"); + goto clean_up; + } + + /* now the channel is in a limbo out of the free list but not marked as + * alive and used (i.e. get-able) yet */ + + /* By default, channel is regular (non-TSG) channel */ + ch->tsgid = NVGPU_INVALID_TSG_ID; + + /* clear ctxsw timeout counter and update timestamp */ + ch->ctxsw_timeout_accumulated_ms = 0; + ch->ctxsw_timeout_gpfifo_get = 0; + /* set gr host default timeout */ + ch->ctxsw_timeout_max_ms = nvgpu_get_poll_timeout(g); + ch->ctxsw_timeout_debug_dump = true; + /* ch is unserviceable until it is bound to tsg */ + ch->unserviceable = true; + +#ifdef CONFIG_NVGPU_CHANNEL_WDT + ch->wdt = nvgpu_channel_wdt_alloc(g); + if (ch->wdt == NULL) { + nvgpu_err(g, "wdt alloc failed"); + goto clean_up; + } + ch->wdt_debug_dump = true; +#endif + + ch->obj_class = 0; + ch->subctx_id = 0; + ch->runqueue_sel = 0; + + ch->mmu_nack_handled = false; + + /* The channel is *not* runnable at this point. It still needs to have + * an address space bound and allocate a gpfifo and grctx. */ + + if (nvgpu_cond_init(&ch->notifier_wq) != 0) { + nvgpu_err(g, "cond init failed"); + goto clean_up; + } + if (nvgpu_cond_init(&ch->semaphore_wq) != 0) { + nvgpu_err(g, "cond init failed"); + goto clean_up; + } + + /* Mark the channel alive, get-able, with 1 initial use + * references. The initial reference will be decreased in + * channel_free(). + * + * Use the lock, since an asynchronous thread could + * try to access this channel while it's not fully + * initialized. + */ + nvgpu_spinlock_acquire(&ch->ref_obtain_lock); + ch->referenceable = true; + nvgpu_atomic_set(&ch->ref_count, 1); + nvgpu_spinlock_release(&ch->ref_obtain_lock); + + return ch; + +clean_up: + ch->g = NULL; + free_channel(f, ch); + return NULL; +} + +static int channel_setup_ramfc(struct nvgpu_channel *c, + struct nvgpu_setup_bind_args *args, + u64 gpfifo_gpu_va, u32 gpfifo_size) +{ + int err = 0; + u64 pbdma_acquire_timeout = 0ULL; + struct gk20a *g = c->g; + + if (nvgpu_channel_wdt_enabled(c->wdt) && + nvgpu_is_timeouts_enabled(c->g)) { + pbdma_acquire_timeout = nvgpu_channel_wdt_limit(c->wdt); + } + + err = g->ops.ramfc.setup(c, gpfifo_gpu_va, gpfifo_size, + pbdma_acquire_timeout, args->flags); + + return err; +} + +static int nvgpu_channel_setup_usermode(struct nvgpu_channel *c, + struct nvgpu_setup_bind_args *args) +{ + u32 gpfifo_size = args->num_gpfifo_entries; + int err = 0; + struct gk20a *g = c->g; + u64 gpfifo_gpu_va; + + if (g->os_channel.alloc_usermode_buffers != NULL) { + err = g->os_channel.alloc_usermode_buffers(c, args); + if (err != 0) { + nvgpu_err(g, "Usermode buffer alloc failed"); + goto clean_up; + } + c->userd_mem = &c->usermode_userd; + c->userd_offset = 0U; + c->userd_iova = nvgpu_mem_get_addr(g, c->userd_mem); + c->usermode_submit_enabled = true; + } else { + nvgpu_err(g, "Usermode submit not supported"); + err = -EINVAL; + goto clean_up; + } + gpfifo_gpu_va = c->usermode_gpfifo.gpu_va; + + nvgpu_log_info(g, "channel %d : gpfifo_base 0x%016llx, size %d", + c->chid, gpfifo_gpu_va, gpfifo_size); + + err = channel_setup_ramfc(c, args, gpfifo_gpu_va, gpfifo_size); + + if (err != 0) { + goto clean_up_unmap; + } + + err = nvgpu_channel_update_runlist(c, true); + if (err != 0) { + goto clean_up_unmap; + } + + return 0; + +clean_up_unmap: + nvgpu_channel_free_usermode_buffers(c); +#ifdef CONFIG_NVGPU_USERD + (void) nvgpu_userd_init_channel(g, c); +#endif + c->usermode_submit_enabled = false; +clean_up: + return err; +} + +static int channel_setup_bind_prechecks(struct nvgpu_channel *c, + struct nvgpu_setup_bind_args *args) +{ + struct gk20a *g = c->g; + struct nvgpu_tsg *tsg; + int err = 0; + + if (args->num_gpfifo_entries > 0x80000000U) { + nvgpu_err(g, + "num_gpfifo_entries exceeds max limit of 2^31"); + err = -EINVAL; + goto fail; + } + + /* + * The gpfifo ring buffer is empty when get == put and it's full when + * get == put + 1. Just one entry wouldn't make sense. + */ + if (args->num_gpfifo_entries < 2U) { + nvgpu_err(g, "gpfifo has no space for any jobs"); + err = -EINVAL; + goto fail; + } + + /* an address space needs to have been bound at this point. */ + if (!nvgpu_channel_as_bound(c)) { + nvgpu_err(g, + "not bound to an address space at time of setup_bind"); + err = -EINVAL; + goto fail; + } + + /* The channel needs to be bound to a tsg at this point */ + tsg = nvgpu_tsg_from_ch(c); + if (tsg == NULL) { + nvgpu_err(g, + "not bound to tsg at time of setup_bind"); + err = -EINVAL; + goto fail; + } + + if (c->usermode_submit_enabled) { + nvgpu_err(g, "channel %d : " + "usermode buffers allocated", c->chid); + err = -EEXIST; + goto fail; + } + +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + if (nvgpu_mem_is_valid(&c->gpfifo.mem)) { + nvgpu_err(g, "channel %d :" + "gpfifo already allocated", c->chid); + err = -EEXIST; + goto fail; + } +#endif + if ((args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC) != 0U + && nvgpu_channel_wdt_enabled(c->wdt)) { + /* + * The watchdog would need async job tracking, but that's not + * compatible with deterministic mode. We won't disable it + * implicitly; the user has to ask. + */ + nvgpu_err(g, + "deterministic is not compatible with watchdog"); + err = -EINVAL; + goto fail; + } + + /* FUSA build for now assumes that the deterministic flag is not useful */ +#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA + if ((args->flags & NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT) != 0U && + (args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC) == 0U) { + /* + * Usermode submit shares various preconditions with + * deterministic mode. Require that it's explicitly set to + * avoid surprises. + */ + nvgpu_err(g, "need deterministic for usermode submit"); + err = -EINVAL; + goto fail; + } +#endif + +fail: + return err; +} + +int nvgpu_channel_setup_bind(struct nvgpu_channel *c, + struct nvgpu_setup_bind_args *args) +{ + struct gk20a *g = c->g; + int err = 0; + + err = channel_setup_bind_prechecks(c, args); + if (err != 0) { + goto fail; + } + +#ifdef CONFIG_NVGPU_VPR + if ((args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_VPR) != 0U) { + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_VPR)) { + err = -EINVAL; + goto fail; + } + + c->vpr = true; + } +#else + c->vpr = false; +#endif + +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS + if ((args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC) != 0U) { + nvgpu_rwsem_down_read(&g->deterministic_busy); + /* + * Railgating isn't deterministic; instead of disallowing + * railgating globally, take a power refcount for this + * channel's lifetime. The gk20a_idle() pair for this happens + * when the channel gets freed. + * + * Deterministic flag and this busy must be atomic within the + * busy lock. + */ + err = gk20a_busy(g); + if (err != 0) { + nvgpu_rwsem_up_read(&g->deterministic_busy); + return err; + } + + c->deterministic = true; + nvgpu_rwsem_up_read(&g->deterministic_busy); + } +#endif + + if ((args->flags & NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT) != 0U) { + err = nvgpu_channel_setup_usermode(c, args); + } else { +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + if (g->os_channel.open != NULL) { + g->os_channel.open(c); + } + err = channel_setup_kernelmode(c, args); +#else + err = -EINVAL; +#endif + } + + if (err != 0) { + goto clean_up_idle; + } + + g->ops.channel.bind(c); + + nvgpu_log_fn(g, "done"); + return 0; + +clean_up_idle: +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS + if (nvgpu_channel_is_deterministic(c)) { + nvgpu_rwsem_down_read(&g->deterministic_busy); + gk20a_idle(g); + c->deterministic = false; + nvgpu_rwsem_up_read(&g->deterministic_busy); + } +#endif +fail: + nvgpu_err(g, "fail"); + return err; +} + +void nvgpu_channel_free_usermode_buffers(struct nvgpu_channel *c) +{ + if (nvgpu_mem_is_valid(&c->usermode_userd)) { + nvgpu_dma_free(c->g, &c->usermode_userd); + } + if (nvgpu_mem_is_valid(&c->usermode_gpfifo)) { + nvgpu_dma_unmap_free(c->vm, &c->usermode_gpfifo); + } + if (c->g->os_channel.free_usermode_buffers != NULL) { + c->g->os_channel.free_usermode_buffers(c); + } +} + +static bool nvgpu_channel_ctxsw_timeout_debug_dump_state( + struct nvgpu_channel *ch) +{ + bool verbose = false; + if (nvgpu_is_err_notifier_set(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT)) { + verbose = ch->ctxsw_timeout_debug_dump; + } + + return verbose; +} + +void nvgpu_channel_wakeup_wqs(struct gk20a *g, + struct nvgpu_channel *ch) +{ + /* unblock pending waits */ + if (nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq) != 0) { + nvgpu_warn(g, "failed to broadcast"); + } + if (nvgpu_cond_broadcast_interruptible(&ch->notifier_wq) != 0) { + nvgpu_warn(g, "failed to broadcast"); + } +} + +bool nvgpu_channel_mark_error(struct gk20a *g, struct nvgpu_channel *ch) +{ + bool verbose; + + verbose = nvgpu_channel_ctxsw_timeout_debug_dump_state(ch); + + /* mark channel as faulted */ + nvgpu_channel_set_unserviceable(ch); + + nvgpu_channel_wakeup_wqs(g, ch); + + return verbose; +} + +void nvgpu_channel_set_error_notifier(struct gk20a *g, struct nvgpu_channel *ch, + u32 error_notifier) +{ + g->ops.channel.set_error_notifier(ch, error_notifier); +} + +void nvgpu_channel_sw_quiesce(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + struct nvgpu_channel *ch; + u32 chid; + + for (chid = 0; chid < f->num_channels; chid++) { + ch = nvgpu_channel_get(&f->channel[chid]); + if (ch != NULL) { + nvgpu_channel_set_error_notifier(g, ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); + nvgpu_channel_set_unserviceable(ch); + nvgpu_channel_wakeup_wqs(g, ch); + nvgpu_channel_put(ch); + } + } +} + +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS +/* + * Stop deterministic channel activity for do_idle() when power needs to go off + * momentarily but deterministic channels keep power refs for potentially a + * long time. + * + * Takes write access on g->deterministic_busy. + * + * Must be paired with nvgpu_channel_deterministic_unidle(). + */ +void nvgpu_channel_deterministic_idle(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 chid; + + /* Grab exclusive access to the hw to block new submits */ + nvgpu_rwsem_down_write(&g->deterministic_busy); + + for (chid = 0; chid < f->num_channels; chid++) { + struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid); + + if (ch == NULL) { + continue; + } + + if (ch->deterministic && !ch->deterministic_railgate_allowed) { + /* + * Drop the power ref taken when setting deterministic + * flag. deterministic_unidle will put this and the + * channel ref back. If railgate is allowed separately + * for this channel, the power ref has already been put + * away. + * + * Hold the channel ref: it must not get freed in + * between. A race could otherwise result in lost + * gk20a_busy() via unidle, and in unbalanced + * gk20a_idle() via closing the channel. + */ + gk20a_idle(g); + } else { + /* Not interesting, carry on. */ + nvgpu_channel_put(ch); + } + } +} + +/* + * Allow deterministic channel activity again for do_unidle(). + * + * This releases write access on g->deterministic_busy. + */ +void nvgpu_channel_deterministic_unidle(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 chid; + int err; + + for (chid = 0; chid < f->num_channels; chid++) { + struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid); + + if (ch == NULL) { + continue; + } + + /* + * Deterministic state changes inside deterministic_busy lock, + * which we took in deterministic_idle. + */ + if (ch->deterministic && !ch->deterministic_railgate_allowed) { + err = gk20a_busy(g); + if (err != 0) { + nvgpu_err(g, "cannot busy() again!"); + } + /* Took this in idle() */ + nvgpu_channel_put(ch); + } + + nvgpu_channel_put(ch); + } + + /* Release submits, new deterministic channels and frees */ + nvgpu_rwsem_up_write(&g->deterministic_busy); +} +#endif + +static void nvgpu_channel_destroy(struct nvgpu_channel *c) +{ + nvgpu_mutex_destroy(&c->ioctl_lock); +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock); +#endif + nvgpu_mutex_destroy(&c->sync_lock); +#if defined(CONFIG_NVGPU_CYCLESTATS) + nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex); + nvgpu_mutex_destroy(&c->cs_client_mutex); +#endif +#if defined(CONFIG_NVGPU_DEBUGGER) + nvgpu_mutex_destroy(&c->dbg_s_lock); +#endif +} + +void nvgpu_channel_cleanup_sw(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 chid; + + /* + * Make sure all channels are closed before deleting them. + */ + for (chid = 0; chid < f->num_channels; chid++) { + struct nvgpu_channel *ch = &f->channel[chid]; + + /* + * Could race but worst that happens is we get an error message + * from channel_free() complaining about multiple closes. + */ + if (ch->referenceable) { + nvgpu_channel_kill(ch); + } + + nvgpu_channel_destroy(ch); + } + + nvgpu_vfree(g, f->channel); + f->channel = NULL; + nvgpu_mutex_destroy(&f->free_chs_mutex); +} + +int nvgpu_channel_init_support(struct gk20a *g, u32 chid) +{ + struct nvgpu_channel *c = &g->fifo.channel[chid]; + int err; + + c->g = NULL; + c->chid = chid; + nvgpu_atomic_set(&c->bound, 0); + nvgpu_spinlock_init(&c->ref_obtain_lock); + nvgpu_atomic_set(&c->ref_count, 0); + c->referenceable = false; + err = nvgpu_cond_init(&c->ref_count_dec_wq); + if (err != 0) { + nvgpu_err(g, "cond_init failed"); + return err; + } + + nvgpu_spinlock_init(&c->unserviceable_lock); + +#if GK20A_CHANNEL_REFCOUNT_TRACKING + nvgpu_spinlock_init(&c->ref_actions_lock); +#endif +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + nvgpu_init_list_node(&c->worker_item); + + nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock); + +#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */ + nvgpu_mutex_init(&c->ioctl_lock); + nvgpu_mutex_init(&c->sync_lock); +#if defined(CONFIG_NVGPU_CYCLESTATS) + nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex); + nvgpu_mutex_init(&c->cs_client_mutex); +#endif +#if defined(CONFIG_NVGPU_DEBUGGER) + nvgpu_init_list_node(&c->dbg_s_list); + nvgpu_mutex_init(&c->dbg_s_lock); +#endif + nvgpu_init_list_node(&c->ch_entry); + nvgpu_list_add(&c->free_chs, &g->fifo.free_chs); + + return 0; +} + +int nvgpu_channel_setup_sw(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 chid, i; + int err; + + f->num_channels = g->ops.channel.count(g); + + nvgpu_mutex_init(&f->free_chs_mutex); + + f->channel = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->channel)); + if (f->channel == NULL) { + nvgpu_err(g, "no mem for channels"); + err = -ENOMEM; + goto clean_up_mutex; + } + + nvgpu_init_list_node(&f->free_chs); + + for (chid = 0; chid < f->num_channels; chid++) { + err = nvgpu_channel_init_support(g, chid); + if (err != 0) { + nvgpu_err(g, "channel init failed, chid=%u", chid); + goto clean_up; + } + } + + return 0; + +clean_up: + for (i = 0; i < chid; i++) { + struct nvgpu_channel *ch = &f->channel[i]; + + nvgpu_channel_destroy(ch); + } + nvgpu_vfree(g, f->channel); + f->channel = NULL; + +clean_up_mutex: + nvgpu_mutex_destroy(&f->free_chs_mutex); + + return err; +} + +int nvgpu_channel_suspend_all_serviceable_ch(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 chid; + bool channels_in_use = false; + u32 active_runlist_ids = 0; + int err; + + nvgpu_log_fn(g, " "); + + for (chid = 0; chid < f->num_channels; chid++) { + struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid); + + if (ch == NULL) { + continue; + } + if (nvgpu_channel_check_unserviceable(ch)) { + nvgpu_log_info(g, "do not suspend recovered " + "channel %d", chid); + } else { + nvgpu_log_info(g, "suspend channel %d", chid); + /* disable channel */ + if (nvgpu_channel_disable_tsg(g, ch) != 0) { + nvgpu_err(g, "failed to disable channel/TSG"); + } + /* preempt the channel */ + err = nvgpu_preempt_channel(g, ch); + if (err != 0) { + nvgpu_err(g, "failed to preempt channel/TSG"); + } + /* wait for channel update notifiers */ + if (g->os_channel.work_completion_cancel_sync != NULL) { + g->os_channel.work_completion_cancel_sync(ch); + } + + g->ops.channel.unbind(ch); + + channels_in_use = true; + + active_runlist_ids |= BIT32(ch->runlist->id); + } + + nvgpu_channel_put(ch); + } + + if (channels_in_use) { + nvgpu_assert(nvgpu_runlist_reload_ids(g, + active_runlist_ids, false) == 0); + } + + nvgpu_log_fn(g, "done"); + return 0; +} + +int nvgpu_channel_resume_all_serviceable_ch(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 chid; + bool channels_in_use = false; + u32 active_runlist_ids = 0; + + nvgpu_log_fn(g, " "); + + for (chid = 0; chid < f->num_channels; chid++) { + struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid); + + if (ch == NULL) { + continue; + } + if (nvgpu_channel_check_unserviceable(ch)) { + nvgpu_log_info(g, "do not resume recovered " + "channel %d", chid); + } else { + nvgpu_log_info(g, "resume channel %d", chid); + g->ops.channel.bind(ch); + channels_in_use = true; + active_runlist_ids |= BIT32(ch->runlist->id); + } + nvgpu_channel_put(ch); + } + + if (channels_in_use) { + nvgpu_assert(nvgpu_runlist_reload_ids(g, + active_runlist_ids, true) == 0); + } + + nvgpu_log_fn(g, "done"); + + return 0; +} + +static void nvgpu_channel_semaphore_signal(struct nvgpu_channel *c, + bool post_events) +{ + struct gk20a *g = c->g; + + if (nvgpu_cond_broadcast_interruptible( &c->semaphore_wq) != 0) { + nvgpu_warn(g, "failed to broadcast"); + } + +#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL + if (post_events) { + struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(c); + if (tsg != NULL) { + g->ops.tsg.post_event_id(tsg, + NVGPU_EVENT_ID_BLOCKING_SYNC); + } + } +#endif + +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + /* + * Only non-deterministic channels get the channel_update callback. We + * don't allow semaphore-backed syncs for these channels anyways, since + * they have a dependency on the sync framework. If deterministic + * channels are receiving a semaphore wakeup, it must be for a + * user-space managed semaphore. + */ + if (!nvgpu_channel_is_deterministic(c)) { + nvgpu_channel_update(c); + } +#endif +} + +void nvgpu_channel_semaphore_wakeup(struct gk20a *g, bool post_events) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 chid; + + nvgpu_log_fn(g, " "); + + /* + * Ensure that all pending writes are actually done before trying to + * read semaphore values from DRAM. + */ + nvgpu_assert(g->ops.mm.cache.fb_flush(g) == 0); + + for (chid = 0; chid < f->num_channels; chid++) { + struct nvgpu_channel *c = &g->fifo.channel[chid]; + if (nvgpu_channel_get(c) != NULL) { + if (nvgpu_atomic_read(&c->bound) != 0) { + nvgpu_channel_semaphore_signal(c, post_events); + } + nvgpu_channel_put(c); + } + } +} + +/* return with a reference to the channel, caller must put it back */ +struct nvgpu_channel *nvgpu_channel_refch_from_inst_ptr(struct gk20a *g, + u64 inst_ptr) +{ + struct nvgpu_fifo *f = &g->fifo; + unsigned int ci; + + if (unlikely(f->channel == NULL)) { + return NULL; + } + for (ci = 0; ci < f->num_channels; ci++) { + struct nvgpu_channel *ch; + u64 ch_inst_ptr; + + ch = nvgpu_channel_from_id(g, ci); + /* only alive channels are searched */ + if (ch == NULL) { + continue; + } + + ch_inst_ptr = nvgpu_inst_block_addr(g, &ch->inst_block); + if (inst_ptr == ch_inst_ptr) { + return ch; + } + + nvgpu_channel_put(ch); + } + return NULL; +} + +int nvgpu_channel_alloc_inst(struct gk20a *g, struct nvgpu_channel *ch) +{ + int err; + + nvgpu_log_fn(g, " "); + + err = nvgpu_alloc_inst_block(g, &ch->inst_block); + if (err != 0) { + return err; + } + + nvgpu_log_info(g, "channel %d inst block physical addr: 0x%16llx", + ch->chid, nvgpu_inst_block_addr(g, &ch->inst_block)); + + nvgpu_log_fn(g, "done"); + return 0; +} + +void nvgpu_channel_free_inst(struct gk20a *g, struct nvgpu_channel *ch) +{ + nvgpu_free_inst_block(g, &ch->inst_block); +} + +static void nvgpu_channel_sync_debug_dump(struct gk20a *g, + struct nvgpu_debug_context *o, struct nvgpu_channel_dump_info *info) +{ +#ifdef CONFIG_NVGPU_NON_FUSA + gk20a_debug_output(o, + "RAMFC: TOP: %012llx PUT: %012llx GET: %012llx " + "FETCH: %012llx " + "HEADER: %08x COUNT: %08x " + "SYNCPOINT: %08x %08x " + "SEMAPHORE: %08x %08x %08x %08x", + info->inst.pb_top_level_get, + info->inst.pb_put, + info->inst.pb_get, + info->inst.pb_fetch, + info->inst.pb_header, + info->inst.pb_count, + info->inst.syncpointa, + info->inst.syncpointb, + info->inst.semaphorea, + info->inst.semaphoreb, + info->inst.semaphorec, + info->inst.semaphored); + + g->ops.pbdma.syncpt_debug_dump(g, o, info); +#endif +} + +static void nvgpu_channel_info_debug_dump(struct gk20a *g, + struct nvgpu_debug_context *o, + struct nvgpu_channel_dump_info *info) +{ + /** + * Use gpu hw version to control the channel instance fields + * dump in nvgpu_channel_dump_info struct. + * For hw version before gv11b, dump syncpoint a/b, semaphore a/b/c/d. + * For hw version after gv11b, dump sem addr/payload/execute. + */ + u32 ver = nvgpu_safe_add_u32(g->params.gpu_arch, g->params.gpu_impl); + + gk20a_debug_output(o, "%d-%s, TSG: %u, pid %d, refs: %d%s: ", + info->chid, + g->name, + info->tsgid, + info->pid, + info->refs, + info->deterministic ? ", deterministic" : ""); + gk20a_debug_output(o, "channel status: %s in use %s %s", + info->hw_state.enabled ? "" : "not", + info->hw_state.status_string, + info->hw_state.busy ? "busy" : "not busy"); + + if (ver < NVGPU_GPUID_GV11B) { + nvgpu_channel_sync_debug_dump(g, o, info); + } else { + gk20a_debug_output(o, + "RAMFC: TOP: %012llx PUT: %012llx GET: %012llx " + "FETCH: %012llx " + "HEADER: %08x COUNT: %08x " + "SEMAPHORE: addr %012llx " + "payload %016llx execute %08x", + info->inst.pb_top_level_get, + info->inst.pb_put, + info->inst.pb_get, + info->inst.pb_fetch, + info->inst.pb_header, + info->inst.pb_count, + info->inst.sem_addr, + info->inst.sem_payload, + info->inst.sem_execute); + } + + if (info->sema.addr != 0ULL) { + gk20a_debug_output(o, "SEMA STATE: value: 0x%08x " + "next_val: 0x%08x addr: 0x%010llx", + info->sema.value, + info->sema.next, + info->sema.addr); + } + + gk20a_debug_output(o, " "); +} + +void nvgpu_channel_debug_dump_all(struct gk20a *g, + struct nvgpu_debug_context *o) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 chid; + struct nvgpu_channel_dump_info **infos; + + infos = nvgpu_kzalloc(g, sizeof(*infos) * f->num_channels); + if (infos == NULL) { + gk20a_debug_output(o, "cannot alloc memory for channels"); + return; + } + + for (chid = 0U; chid < f->num_channels; chid++) { + struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid); + + if (ch != NULL) { + struct nvgpu_channel_dump_info *info; + + info = nvgpu_kzalloc(g, sizeof(*info)); + + /* + * ref taken stays to below loop with + * successful allocs + */ + if (info == NULL) { + nvgpu_channel_put(ch); + } else { + infos[chid] = info; + } + } + } + + for (chid = 0U; chid < f->num_channels; chid++) { + struct nvgpu_channel *ch = &f->channel[chid]; + struct nvgpu_channel_dump_info *info = infos[chid]; +#ifdef CONFIG_NVGPU_SW_SEMAPHORE + struct nvgpu_channel_sync_semaphore *sync_sema; + struct nvgpu_hw_semaphore *hw_sema = NULL; + + if (ch->sync != NULL) { + sync_sema = nvgpu_channel_sync_to_semaphore(ch->sync); + if (sync_sema != NULL) { + hw_sema = nvgpu_channel_sync_semaphore_hw_sema( + sync_sema); + } + } +#endif + + /* if this info exists, the above loop took a channel ref */ + if (info == NULL) { + continue; + } + + info->chid = ch->chid; + info->tsgid = ch->tsgid; + info->pid = ch->pid; + info->refs = nvgpu_atomic_read(&ch->ref_count); + info->deterministic = nvgpu_channel_is_deterministic(ch); + +#ifdef CONFIG_NVGPU_SW_SEMAPHORE + if (hw_sema != NULL) { + info->sema.value = nvgpu_hw_semaphore_read(hw_sema); + info->sema.next = + (u32)nvgpu_hw_semaphore_read_next(hw_sema); + info->sema.addr = nvgpu_hw_semaphore_addr(hw_sema); + } +#endif + + g->ops.channel.read_state(g, ch, &info->hw_state); + g->ops.ramfc.capture_ram_dump(g, ch, info); + + nvgpu_channel_put(ch); + } + + gk20a_debug_output(o, "Channel Status - chip %-5s", g->name); + gk20a_debug_output(o, "---------------------------"); + for (chid = 0U; chid < f->num_channels; chid++) { + struct nvgpu_channel_dump_info *info = infos[chid]; + + if (info != NULL) { + nvgpu_channel_info_debug_dump(g, o, info); + nvgpu_kfree(g, info); + } + } + + nvgpu_kfree(g, infos); +} + +#ifdef CONFIG_NVGPU_DEBUGGER +int nvgpu_channel_deferred_reset_engines(struct gk20a *g, + struct nvgpu_channel *ch) +{ + unsigned long engine_id, engines = 0U; + struct nvgpu_tsg *tsg; + bool deferred_reset_pending; + struct nvgpu_fifo *f = &g->fifo; + int err = 0; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + nvgpu_mutex_acquire(&f->deferred_reset_mutex); + deferred_reset_pending = g->fifo.deferred_reset_pending; + nvgpu_mutex_release(&f->deferred_reset_mutex); + + if (!deferred_reset_pending) { + nvgpu_mutex_release(&g->dbg_sessions_lock); + return 0; + } + + err = nvgpu_gr_disable_ctxsw(g); + if (err != 0) { + nvgpu_err(g, "failed to disable ctxsw"); + goto fail; + } + + tsg = nvgpu_tsg_from_ch(ch); + if (tsg != NULL) { + engines = nvgpu_engine_get_mask_on_id(g, tsg->tsgid, true); + } else { + nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid); + engines = g->fifo.deferred_fault_engines; + } + + if (engines == 0U) { + goto clean_up; + } + + /* + * If deferred reset is set for an engine, and channel is running + * on that engine, reset it + */ + + for_each_set_bit(engine_id, &g->fifo.deferred_fault_engines, 32UL) { + if ((BIT64(engine_id) & engines) != 0ULL) { + nvgpu_engine_reset(g, (u32)engine_id); + } + } + + nvgpu_mutex_acquire(&f->deferred_reset_mutex); + g->fifo.deferred_fault_engines = 0; + g->fifo.deferred_reset_pending = false; + nvgpu_mutex_release(&f->deferred_reset_mutex); + +clean_up: + err = nvgpu_gr_enable_ctxsw(g); + if (err != 0) { + nvgpu_err(g, "failed to enable ctxsw"); + } +fail: + nvgpu_mutex_release(&g->dbg_sessions_lock); + + return err; +} +#endif diff --git a/drivers/gpu/nvgpu/common/fifo/channel_wdt.c b/drivers/gpu/nvgpu/common/fifo/channel_wdt.c new file mode 100644 index 000000000..93b10638d --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/channel_wdt.c @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "channel_wdt.h" +#include "channel_worker.h" + +#include +#include +#include +#include + +void nvgpu_channel_set_wdt_debug_dump(struct nvgpu_channel *ch, bool dump) +{ + ch->wdt_debug_dump = dump; +} + +static struct nvgpu_channel_wdt_state nvgpu_channel_collect_wdt_state( + struct nvgpu_channel *ch) +{ + struct gk20a *g = ch->g; + struct nvgpu_channel_wdt_state state = { 0, 0 }; + + /* + * Note: just checking for nvgpu_channel_wdt_enabled() is not enough at + * the moment because system suspend puts g->regs away but doesn't stop + * the worker thread that runs the watchdog. This might need to be + * cleared up in the future. + */ + if (nvgpu_channel_wdt_running(ch->wdt)) { + /* + * Read the state only if the wdt is on to avoid unnecessary + * accesses. The kernel mem for userd may not even exist; this + * channel could be in usermode submit mode. + */ + state.gp_get = g->ops.userd.gp_get(g, ch); + state.pb_get = g->ops.userd.pb_get(g, ch); + } + + return state; +} + +void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch) +{ + struct nvgpu_channel_wdt_state state = nvgpu_channel_collect_wdt_state(ch); + + /* + * FIXME: channel recovery can race the submit path and can start even + * after this, but this check is the best we can do for now. + */ + if (!nvgpu_channel_check_unserviceable(ch)) { + nvgpu_channel_wdt_start(ch->wdt, &state); + } +} + +void nvgpu_channel_restart_all_wdts(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 chid; + + for (chid = 0; chid < f->num_channels; chid++) { + struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid); + + if (ch != NULL) { + if ((ch->wdt != NULL) && + !nvgpu_channel_check_unserviceable(ch)) { + struct nvgpu_channel_wdt_state state = + nvgpu_channel_collect_wdt_state(ch); + + nvgpu_channel_wdt_rewind(ch->wdt, &state); + } + nvgpu_channel_put(ch); + } + } +} + +static void nvgpu_channel_recover_from_wdt(struct nvgpu_channel *ch) +{ + struct gk20a *g = ch->g; + + nvgpu_log_fn(g, " "); + + if (nvgpu_channel_check_unserviceable(ch)) { + /* channel is already recovered */ + nvgpu_info(g, "chid: %d unserviceable but wdt was ON", ch->chid); + return; + } + + nvgpu_err(g, "Job on channel %d timed out", ch->chid); + + /* force reset calls gk20a_debug_dump but not this */ + if (ch->wdt_debug_dump) { + gk20a_gr_debug_dump(g); + } + +#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL + if (g->ops.tsg.force_reset(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT, + ch->wdt_debug_dump) != 0) { + nvgpu_err(g, "failed tsg force reset for chid: %d", ch->chid); + } +#endif +} + +/* + * Test the watchdog progress. If the channel is stuck, reset it. + * + * The gpu is implicitly on at this point because the watchdog can only run on + * channels that have submitted jobs pending for cleanup. + */ +static void nvgpu_channel_check_wdt(struct nvgpu_channel *ch) +{ + struct nvgpu_channel_wdt_state state = nvgpu_channel_collect_wdt_state(ch); + + if (nvgpu_channel_wdt_check(ch->wdt, &state)) { + nvgpu_channel_recover_from_wdt(ch); + } +} + +void nvgpu_channel_worker_poll_init(struct nvgpu_worker *worker) +{ + struct nvgpu_channel_worker *ch_worker = + nvgpu_channel_worker_from_worker(worker); + int ret; + + ch_worker->watchdog_interval = 100U; + + ret = nvgpu_timeout_init(worker->g, &ch_worker->timeout, + ch_worker->watchdog_interval, NVGPU_TIMER_CPU_TIMER); + if (ret != 0) { + nvgpu_err(worker->g, "timeout_init failed: %d", ret); + } +} + +/** + * Loop every living channel, check timeouts and handle stuck channels. + */ +static void nvgpu_channel_poll_wdt(struct gk20a *g) +{ + unsigned int chid; + + for (chid = 0; chid < g->fifo.num_channels; chid++) { + struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid); + + if (ch != NULL) { + if (!nvgpu_channel_check_unserviceable(ch)) { + nvgpu_channel_check_wdt(ch); + } + nvgpu_channel_put(ch); + } + } +} + +void nvgpu_channel_worker_poll_wakeup_post_process_item( + struct nvgpu_worker *worker) +{ + struct gk20a *g = worker->g; + + struct nvgpu_channel_worker *ch_worker = + nvgpu_channel_worker_from_worker(worker); + int ret; + + if (nvgpu_timeout_peek_expired(&ch_worker->timeout)) { + nvgpu_channel_poll_wdt(g); + ret = nvgpu_timeout_init(g, &ch_worker->timeout, + ch_worker->watchdog_interval, + NVGPU_TIMER_CPU_TIMER); + if (ret != 0) { + nvgpu_err(g, "timeout_init failed: %d", ret); + } + } +} + +u32 nvgpu_channel_worker_poll_wakeup_condition_get_timeout( + struct nvgpu_worker *worker) +{ + struct nvgpu_channel_worker *ch_worker = + nvgpu_channel_worker_from_worker(worker); + + return ch_worker->watchdog_interval; +} diff --git a/drivers/gpu/nvgpu/common/fifo/channel_wdt.h b/drivers/gpu/nvgpu/common/fifo/channel_wdt.h new file mode 100644 index 000000000..d262ae073 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/channel_wdt.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_COMMON_FIFO_CHANNEL_WDT_H +#define NVGPU_COMMON_FIFO_CHANNEL_WDT_H + +#include + +struct nvgpu_channel; + +#ifdef CONFIG_NVGPU_CHANNEL_WDT +struct nvgpu_worker; + +void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch); +void nvgpu_channel_worker_poll_init(struct nvgpu_worker *worker); +void nvgpu_channel_worker_poll_wakeup_post_process_item( + struct nvgpu_worker *worker); +u32 nvgpu_channel_worker_poll_wakeup_condition_get_timeout( + struct nvgpu_worker *worker); +#else +static inline void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch) {} +#endif /* CONFIG_NVGPU_CHANNEL_WDT */ + +#endif /* NVGPU_COMMON_FIFO_CHANNEL_WDT_H */ diff --git a/drivers/gpu/nvgpu/common/fifo/channel_worker.c b/drivers/gpu/nvgpu/common/fifo/channel_worker.c new file mode 100644 index 000000000..6b61fb83f --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/channel_worker.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "channel_worker.h" +#include "channel_wdt.h" + +#include +#include + +static inline struct nvgpu_channel * +nvgpu_channel_from_worker_item(struct nvgpu_list_node *node) +{ + return (struct nvgpu_channel *) + ((uintptr_t)node - offsetof(struct nvgpu_channel, worker_item)); +}; + +static void nvgpu_channel_worker_poll_wakeup_process_item( + struct nvgpu_list_node *work_item) +{ + struct nvgpu_channel *ch = nvgpu_channel_from_worker_item(work_item); + + nvgpu_assert(ch != NULL); + + nvgpu_log_fn(ch->g, " "); + + nvgpu_channel_clean_up_jobs(ch); + + /* ref taken when enqueued */ + nvgpu_channel_put(ch); +} + +static const struct nvgpu_worker_ops channel_worker_ops = { +#ifdef CONFIG_NVGPU_CHANNEL_WDT + .pre_process = nvgpu_channel_worker_poll_init, + .wakeup_post_process = + nvgpu_channel_worker_poll_wakeup_post_process_item, + .wakeup_timeout = + nvgpu_channel_worker_poll_wakeup_condition_get_timeout, +#endif + .wakeup_early_exit = NULL, + .wakeup_process_item = + nvgpu_channel_worker_poll_wakeup_process_item, + .wakeup_condition = NULL, +}; + +/** + * Initialize the channel worker's metadata and start the background thread. + */ +int nvgpu_channel_worker_init(struct gk20a *g) +{ + struct nvgpu_worker *worker = &g->channel_worker.worker; + + nvgpu_worker_init_name(worker, "nvgpu_channel_poll", g->name); + + return nvgpu_worker_init(g, worker, &channel_worker_ops); +} + +void nvgpu_channel_worker_deinit(struct gk20a *g) +{ + struct nvgpu_worker *worker = &g->channel_worker.worker; + + nvgpu_worker_deinit(worker); +} + +/** + * Append a channel to the worker's list, if not there already. + * + * The worker thread processes work items (channels in its work list) and polls + * for other things. This adds @ch to the end of the list and wakes the worker + * up immediately. If the channel already existed in the list, it's not added, + * because in that case it has been scheduled already but has not yet been + * processed. + */ +void nvgpu_channel_worker_enqueue(struct nvgpu_channel *ch) +{ + struct gk20a *g = ch->g; + int ret; + + nvgpu_log_fn(g, " "); + + /* + * Ref released when this item gets processed. The caller should hold + * one ref already, so normally shouldn't fail, but the channel could + * end up being freed between the time the caller got its reference and + * the time we end up here (e.g., if the client got killed); if so, just + * return. + */ + if (nvgpu_channel_get(ch) == NULL) { + nvgpu_info(g, "cannot get ch ref for worker!"); + return; + } + + ret = nvgpu_worker_enqueue(&g->channel_worker.worker, + &ch->worker_item); + if (ret != 0) { + nvgpu_channel_put(ch); + return; + } +} diff --git a/drivers/gpu/nvgpu/common/fifo/channel_worker.h b/drivers/gpu/nvgpu/common/fifo/channel_worker.h new file mode 100644 index 000000000..6a8f39daf --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/channel_worker.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_COMMON_FIFO_CHANNEL_WORKER_H +#define NVGPU_COMMON_FIFO_CHANNEL_WORKER_H + +#include + +void nvgpu_channel_worker_enqueue(struct nvgpu_channel *ch); + +static inline struct nvgpu_channel_worker * +nvgpu_channel_worker_from_worker(struct nvgpu_worker *worker) +{ + return (struct nvgpu_channel_worker *) + ((uintptr_t)worker - offsetof(struct nvgpu_channel_worker, worker)); +}; + +#endif /* NVGPU_COMMON_FIFO_CHANNEL_WORKER_H */ diff --git a/drivers/gpu/nvgpu/common/fifo/engine_status.c b/drivers/gpu/nvgpu/common/fifo/engine_status.c new file mode 100644 index 000000000..366159e55 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/engine_status.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +bool nvgpu_engine_status_is_ctxsw_switch(struct nvgpu_engine_status_info + *engine_status) +{ + return engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_SWITCH; +} + +bool nvgpu_engine_status_is_ctxsw_load(struct nvgpu_engine_status_info + *engine_status) +{ + return engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_LOAD; +} + +bool nvgpu_engine_status_is_ctxsw_save(struct nvgpu_engine_status_info + *engine_status) +{ + return engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_SAVE; +} + +bool nvgpu_engine_status_is_ctxsw(struct nvgpu_engine_status_info + *engine_status) +{ + return (nvgpu_engine_status_is_ctxsw_switch(engine_status) || + nvgpu_engine_status_is_ctxsw_load(engine_status) || + nvgpu_engine_status_is_ctxsw_save(engine_status)); +} + +bool nvgpu_engine_status_is_ctxsw_invalid(struct nvgpu_engine_status_info + *engine_status) +{ + return engine_status->ctxsw_status == NVGPU_CTX_STATUS_INVALID; +} + +bool nvgpu_engine_status_is_ctxsw_valid(struct nvgpu_engine_status_info + *engine_status) +{ + return engine_status->ctxsw_status == NVGPU_CTX_STATUS_VALID; +} +bool nvgpu_engine_status_is_ctx_type_tsg(struct nvgpu_engine_status_info + *engine_status) +{ + return engine_status->ctx_id_type == ENGINE_STATUS_CTX_ID_TYPE_TSGID; +} +bool nvgpu_engine_status_is_next_ctx_type_tsg(struct nvgpu_engine_status_info + *engine_status) +{ + return engine_status->ctx_next_id_type == + ENGINE_STATUS_CTX_NEXT_ID_TYPE_TSGID; +} + +void nvgpu_engine_status_get_ctx_id_type(struct nvgpu_engine_status_info + *engine_status, u32 *ctx_id, u32 *ctx_type) +{ + *ctx_id = engine_status->ctx_id; + *ctx_type = engine_status->ctx_id_type; +} + +void nvgpu_engine_status_get_next_ctx_id_type(struct nvgpu_engine_status_info + *engine_status, u32 *ctx_next_id, + u32 *ctx_next_type) +{ + *ctx_next_id = engine_status->ctx_next_id; + *ctx_next_type = engine_status->ctx_next_id_type; +} diff --git a/drivers/gpu/nvgpu/common/fifo/engines.c b/drivers/gpu/nvgpu/common/fifo/engines.c new file mode 100644 index 000000000..3aa2fa157 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/engines.c @@ -0,0 +1,960 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#include +#include +#include +#include +#ifdef CONFIG_NVGPU_LS_PMU +#include +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define FECS_METHOD_WFI_RESTORE 0x80000U + +enum nvgpu_fifo_engine nvgpu_engine_enum_from_dev(struct gk20a *g, + const struct nvgpu_device *dev) +{ + enum nvgpu_fifo_engine ret = NVGPU_ENGINE_INVAL; + + if (nvgpu_device_is_graphics(g, dev)) { + ret = NVGPU_ENGINE_GR; + } else if (nvgpu_device_is_ce(g, dev)) { + /* For now, all CE engines have separate runlists. We can + * identify the NVGPU_ENGINE_GRCE type CE using runlist_id + * comparsion logic with GR runlist_id in init_info() + */ + ret = NVGPU_ENGINE_ASYNC_CE; + } else { + ret = NVGPU_ENGINE_INVAL; + } + + return ret; +} + +const struct nvgpu_device *nvgpu_engine_get_active_eng_info( + struct gk20a *g, u32 engine_id) +{ + struct nvgpu_fifo *f = &g->fifo; + + if (engine_id >= f->max_engines) { + return NULL; + } + + return f->host_engines[engine_id]; +} + +bool nvgpu_engine_check_valid_id(struct gk20a *g, u32 engine_id) +{ + struct nvgpu_fifo *f = &g->fifo; + + if (engine_id >= f->max_engines) { + return false; + } + + return f->host_engines[engine_id] != NULL; +} + +u32 nvgpu_engine_get_gr_id_for_inst(struct gk20a *g, u32 inst_id) +{ + const struct nvgpu_device *dev; + + dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, inst_id); + if (dev == NULL) { + nvgpu_warn(g, "No GR devices on this GPU for inst[%u]?!", + inst_id); + return NVGPU_INVALID_ENG_ID; + } + + return dev->engine_id; +} + +u32 nvgpu_engine_get_gr_id(struct gk20a *g) +{ + /* Consider 1st available GR engine */ + return nvgpu_engine_get_gr_id_for_inst(g, 0U); +} + +u32 nvgpu_engine_act_interrupt_mask(struct gk20a *g, u32 engine_id) +{ + const struct nvgpu_device *dev = NULL; + + dev = nvgpu_engine_get_active_eng_info(g, engine_id); + if (dev == NULL) { + return 0; + } + + return BIT32(dev->intr_id); +} + +u32 nvgpu_gr_engine_interrupt_mask(struct gk20a *g) +{ + const struct nvgpu_device *dev; + u32 intr_mask = 0U; + u32 i; + + for (i = 0U; i < g->num_gr_instances; i++) { + dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, + nvgpu_gr_get_syspipe_id(g, i)); + if (dev == NULL) { + continue; + } + + intr_mask |= BIT32(dev->intr_id); + } + + return intr_mask; +} + +u32 nvgpu_ce_engine_interrupt_mask(struct gk20a *g) +{ + const struct nvgpu_device *dev; + u32 i; + u32 mask = 0U; + + /* + * For old chips - pre-Pascal - we have COPY[0-2], for new chips we + * have some number of LCE instances. For the purpose of this code we + * imagine a system that could have both; in reality that'll never be + * the case. + * + * This can be cleaned up in the future by defining a SW type for CE and + * hiding this ugliness in the device management code. + */ + for (i = NVGPU_DEVTYPE_COPY0; i <= NVGPU_DEVTYPE_COPY2; i++) { + dev = nvgpu_device_get(g, i, i - NVGPU_DEVTYPE_COPY0); + if (dev == NULL) { + continue; + } + + mask |= BIT32(dev->intr_id); + } + + /* + * Now take care of LCEs. + */ + for (i = 0U; i < nvgpu_device_count(g, NVGPU_DEVTYPE_LCE); i++) { + dev = nvgpu_device_get(g, NVGPU_DEVTYPE_LCE, i); + nvgpu_assert(dev != NULL); + + mask |= BIT32(dev->intr_id); + } + + return mask; +} + +#ifdef CONFIG_NVGPU_FIFO_ENGINE_ACTIVITY + +static void nvgpu_engine_enable_activity(struct gk20a *g, + const struct nvgpu_device *dev) +{ + nvgpu_runlist_set_state(g, BIT32(dev->runlist_id), RUNLIST_ENABLED); +} + +void nvgpu_engine_enable_activity_all(struct gk20a *g) +{ + u32 i; + + for (i = 0; i < g->fifo.num_engines; i++) { + nvgpu_engine_enable_activity(g, g->fifo.active_engines[i]); + } +} + +int nvgpu_engine_disable_activity(struct gk20a *g, + const struct nvgpu_device *dev, + bool wait_for_idle) +{ + u32 pbdma_chid = NVGPU_INVALID_CHANNEL_ID; + u32 engine_chid = NVGPU_INVALID_CHANNEL_ID; +#ifdef CONFIG_NVGPU_LS_PMU + u32 token = PMU_INVALID_MUTEX_OWNER_ID; + int mutex_ret = -EINVAL; +#endif + int err = 0; + struct nvgpu_channel *ch = NULL; + struct nvgpu_engine_status_info engine_status; + struct nvgpu_pbdma_status_info pbdma_status; + unsigned long runlist_served_pbdmas; + unsigned long bit; + u32 pbdma_id; + struct nvgpu_fifo *f = &g->fifo; + + nvgpu_log_fn(g, " "); + + g->ops.engine_status.read_engine_status_info(g, dev->engine_id, + &engine_status); + if (engine_status.is_busy && !wait_for_idle) { + return -EBUSY; + } + +#ifdef CONFIG_NVGPU_LS_PMU + if (g->ops.pmu.is_pmu_supported(g)) { + mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu, + PMU_MUTEX_ID_FIFO, &token); + } +#endif + + nvgpu_runlist_set_state(g, BIT32(dev->runlist_id), + RUNLIST_DISABLED); + + runlist_served_pbdmas = f->runlists[dev->runlist_id]->pbdma_bitmask; + + for_each_set_bit(bit, &runlist_served_pbdmas, + nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA)) { + pbdma_id = U32(bit); + /* chid from pbdma status */ + g->ops.pbdma_status.read_pbdma_status_info(g, + pbdma_id, + &pbdma_status); + if (nvgpu_pbdma_status_is_chsw_valid(&pbdma_status) || + nvgpu_pbdma_status_is_chsw_save(&pbdma_status)) { + pbdma_chid = pbdma_status.id; + } else if (nvgpu_pbdma_status_is_chsw_load(&pbdma_status) || + nvgpu_pbdma_status_is_chsw_switch(&pbdma_status)) { + pbdma_chid = pbdma_status.next_id; + } else { + /* Nothing to do here */ + } + + if (pbdma_chid != NVGPU_INVALID_CHANNEL_ID) { + ch = nvgpu_channel_from_id(g, pbdma_chid); + if (ch != NULL) { + err = g->ops.fifo.preempt_channel(g, ch); + nvgpu_channel_put(ch); + } + if (err != 0) { + goto clean_up; + } + } + } + + /* chid from engine status */ + g->ops.engine_status.read_engine_status_info(g, dev->engine_id, + &engine_status); + if (nvgpu_engine_status_is_ctxsw_valid(&engine_status) || + nvgpu_engine_status_is_ctxsw_save(&engine_status)) { + engine_chid = engine_status.ctx_id; + } else if (nvgpu_engine_status_is_ctxsw_switch(&engine_status) || + nvgpu_engine_status_is_ctxsw_load(&engine_status)) { + engine_chid = engine_status.ctx_next_id; + } else { + /* Nothing to do here */ + } + + if (engine_chid != NVGPU_INVALID_ENG_ID && engine_chid != pbdma_chid) { + ch = nvgpu_channel_from_id(g, engine_chid); + if (ch != NULL) { + err = g->ops.fifo.preempt_channel(g, ch); + nvgpu_channel_put(ch); + } + if (err != 0) { + goto clean_up; + } + } + +clean_up: +#ifdef CONFIG_NVGPU_LS_PMU + if (mutex_ret == 0) { + if (nvgpu_pmu_lock_release(g, g->pmu, + PMU_MUTEX_ID_FIFO, &token) != 0){ + nvgpu_err(g, "failed to release PMU lock"); + } + } +#endif + if (err != 0) { + nvgpu_log_fn(g, "failed"); + nvgpu_engine_enable_activity(g, dev); + } else { + nvgpu_log_fn(g, "done"); + } + return err; +} + +int nvgpu_engine_disable_activity_all(struct gk20a *g, + bool wait_for_idle) +{ + unsigned int i; + int err = 0, ret = 0; + + for (i = 0; i < g->fifo.num_engines; i++) { + err = nvgpu_engine_disable_activity(g, + g->fifo.active_engines[i], + wait_for_idle); + if (err != 0) { + nvgpu_err(g, "failed to disable engine %d activity", + g->fifo.active_engines[i]->engine_id); + ret = err; + break; + } + } + + if (err != 0) { + while (i-- != 0U) { + nvgpu_engine_enable_activity(g, + g->fifo.active_engines[i]); + } + } + + return ret; +} + +int nvgpu_engine_wait_for_idle(struct gk20a *g) +{ + struct nvgpu_timeout timeout; + u32 delay = POLL_DELAY_MIN_US; + int ret = 0, err = 0; + u32 i, host_num_engines; + struct nvgpu_engine_status_info engine_status; + + nvgpu_log_fn(g, " "); + + host_num_engines = + nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES); + + err = nvgpu_timeout_init(g, &timeout, nvgpu_get_poll_timeout(g), + NVGPU_TIMER_CPU_TIMER); + if (err != 0) { + return -EINVAL; + } + + for (i = 0; i < host_num_engines; i++) { + if (!nvgpu_engine_check_valid_id(g, i)) { + continue; + } + + ret = -ETIMEDOUT; + do { + g->ops.engine_status.read_engine_status_info(g, i, + &engine_status); + if (!engine_status.is_busy) { + ret = 0; + break; + } + + nvgpu_usleep_range(delay, delay * 2U); + delay = min_t(u32, + delay << 1U, POLL_DELAY_MAX_US); + } while (nvgpu_timeout_expired(&timeout) == 0); + + if (ret != 0) { + /* possible causes: + * check register settings programmed in hal set by + * elcg_init_idle_filters and init_therm_setup_hw + */ + nvgpu_err(g, "cannot idle engine: %u " + "engine_status: 0x%08x", i, + engine_status.reg_data); + break; + } + } + + nvgpu_log_fn(g, "done"); + + return ret; +} + +#endif /* CONFIG_NVGPU_FIFO_ENGINE_ACTIVITY */ + +int nvgpu_engine_setup_sw(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + int err = 0; + size_t size; + + f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES); + size = nvgpu_safe_mult_u64(f->max_engines, + sizeof(struct nvgpu_device *)); + + /* + * Allocate the two device lists for host devices. + */ + f->host_engines = nvgpu_kzalloc(g, size); + if (f->host_engines == NULL) { + nvgpu_err(g, "OOM allocating host engine list"); + return -ENOMEM; + } + f->active_engines = nvgpu_kzalloc(g, size); + if (f->active_engines == NULL) { + nvgpu_err(g, "no mem for active engine list"); + err = -ENOMEM; + goto clean_up_engine_info; + } + + err = nvgpu_engine_init_info(f); + if (err != 0) { + nvgpu_err(g, "init engine info failed"); + goto clean_up; + } + + return 0; + +clean_up: + nvgpu_kfree(g, f->active_engines); + f->active_engines = NULL; + +clean_up_engine_info: + nvgpu_kfree(g, f->host_engines); + f->host_engines = NULL; + + return err; +} + +void nvgpu_engine_cleanup_sw(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + + f->num_engines = 0; + nvgpu_kfree(g, f->host_engines); + f->host_engines = NULL; + nvgpu_kfree(g, f->active_engines); + f->active_engines = NULL; +} + +#ifdef CONFIG_NVGPU_ENGINE_RESET +static void nvgpu_engine_gr_reset(struct gk20a *g) +{ + struct nvgpu_swprofiler *prof = &g->fifo.eng_reset_profiler; + int err = 0; + + nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_PREAMBLE); + +#ifdef CONFIG_NVGPU_POWER_PG + if (nvgpu_pg_elpg_disable(g) != 0 ) { + nvgpu_err(g, "failed to set disable elpg"); + } +#endif + nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_ELPG_DISABLE); + +#ifdef CONFIG_NVGPU_FECS_TRACE + /* + * Resetting engine will alter read/write index. Need to flush + * circular buffer before re-enabling FECS. + */ + if (g->ops.gr.fecs_trace.reset != NULL) { + if (g->ops.gr.fecs_trace.reset(g) != 0) { + nvgpu_warn(g, "failed to reset fecs traces"); + } + } +#endif + + nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_FECS_TRACE_RESET); + + /* + * HALT_PIPELINE method and gr reset during recovery is supported + * starting nvgpu-next simulation. + */ + err = g->ops.gr.falcon.ctrl_ctxsw(g, + NVGPU_GR_FALCON_METHOD_HALT_PIPELINE, 0U, NULL); + if (err != 0) { + nvgpu_err(g, "failed to halt gr pipe"); + } + + nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_HALT_PIPELINE); + + /* + * resetting only engine is not + * enough, we do full init sequence + */ + nvgpu_log(g, gpu_dbg_rec, "resetting gr engine"); + + err = nvgpu_gr_reset(g); + if (err != 0) { + nvgpu_err(g, "failed to reset gr engine"); + } + +#ifdef CONFIG_NVGPU_POWER_PG + if (nvgpu_pg_elpg_enable(g) != 0) { + nvgpu_err(g, "failed to set enable elpg"); + } + nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_ELPG_REENABLE); +#endif +} + +void nvgpu_engine_reset(struct gk20a *g, u32 engine_id) +{ + struct nvgpu_swprofiler *prof = &g->fifo.eng_reset_profiler; + const struct nvgpu_device *dev; + int err = 0; + u32 gr_instance_id; + + nvgpu_log_fn(g, " "); + + if (g == NULL) { + return; + } + + nvgpu_swprofile_begin_sample(prof); + + dev = nvgpu_engine_get_active_eng_info(g, engine_id); + if (dev == NULL) { + nvgpu_err(g, "unsupported engine_id %d", engine_id); + return; + } + + if (!nvgpu_device_is_ce(g, dev) && + !nvgpu_device_is_graphics(g, dev)) { + nvgpu_warn(g, "Ignoring reset for non-host engine."); + return; + } + + /* + * Simple case first: reset a copy engine. + */ + if (nvgpu_device_is_ce(g, dev)) { + err = nvgpu_mc_reset_dev(g, dev); + if (err != 0) { + nvgpu_log_info(g, "CE engine [id:%u] reset failed", + dev->engine_id); + } + return; + } + + /* + * Now reset a GR engine. + */ + gr_instance_id = + nvgpu_grmgr_get_gr_instance_id_for_syspipe( + g, dev->inst_id); + + nvgpu_gr_exec_for_instance(g, + gr_instance_id, nvgpu_engine_gr_reset(g)); +} +#endif + +u32 nvgpu_engine_get_fast_ce_runlist_id(struct gk20a *g) +{ + const struct nvgpu_device *dev; + u32 nr_lces; + u32 i; + + /* + * Obtain a runlist ID for the fastest available CE. The priority order + * is: + * + * 1. Last available LCE + * 2. Last available COPY[0-2] + * 3. GRAPHICS runlist as a last resort. + */ + nr_lces = nvgpu_device_count(g, NVGPU_DEVTYPE_LCE); + if (nr_lces > 0U) { + dev = nvgpu_device_get(g, + NVGPU_DEVTYPE_LCE, + nr_lces - 1U); + nvgpu_assert(dev != NULL); + + return dev->runlist_id; + } + + /* + * Note: this only works since NVGPU_DEVTYPE_GRAPHICS is 0 and the COPYx + * are all > 0. + */ + for (i = NVGPU_DEVTYPE_COPY2; i >= NVGPU_DEVTYPE_COPY0; i--) { + dev = nvgpu_device_get(g, i, i - NVGPU_DEVTYPE_COPY0); + if (dev != NULL) { + return dev->runlist_id; + } + } + + /* + * Fall back to GR. + */ + dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, 0); + nvgpu_assert(dev != NULL); + + return dev->runlist_id; +} + +u32 nvgpu_engine_get_gr_runlist_id(struct gk20a *g) +{ + const struct nvgpu_device *dev; + + dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, 0); + if (dev == NULL) { + nvgpu_warn(g, "No GR device on this GPU?!"); + return NVGPU_INVALID_RUNLIST_ID; + } + + return dev->runlist_id; +} + +bool nvgpu_engine_is_valid_runlist_id(struct gk20a *g, u32 runlist_id) +{ + u32 i; + struct nvgpu_fifo *f = &g->fifo; + + for (i = 0U; i < f->num_engines; i++) { + const struct nvgpu_device *dev = f->active_engines[i]; + + if (dev->runlist_id == runlist_id) { + return true; + } + } + + return false; +} + +/* + * Link engine IDs to MMU IDs and vice versa. + */ +u32 nvgpu_engine_id_to_mmu_fault_id(struct gk20a *g, u32 engine_id) +{ + const struct nvgpu_device *dev; + + dev = nvgpu_engine_get_active_eng_info(g, engine_id); + + if (dev == NULL) { + nvgpu_err(g, + "engine_id: %u is not in active list", + engine_id); + return NVGPU_INVALID_ENG_ID; + } + + return dev->fault_id; +} + +u32 nvgpu_engine_mmu_fault_id_to_engine_id(struct gk20a *g, u32 fault_id) +{ + u32 i; + const struct nvgpu_device *dev; + struct nvgpu_fifo *f = &g->fifo; + + for (i = 0U; i < f->num_engines; i++) { + dev = f->active_engines[i]; + + if (dev->fault_id == fault_id) { + return dev->engine_id; + } + } + + return NVGPU_INVALID_ENG_ID; +} + +u32 nvgpu_engine_get_mask_on_id(struct gk20a *g, u32 id, bool is_tsg) +{ + unsigned int i; + u32 engines = 0; + struct nvgpu_engine_status_info engine_status; + u32 ctx_id; + u32 type; + bool busy; + + for (i = 0; i < g->fifo.num_engines; i++) { + const struct nvgpu_device *dev = g->fifo.active_engines[i]; + + g->ops.engine_status.read_engine_status_info(g, + dev->engine_id, &engine_status); + + if (nvgpu_engine_status_is_ctxsw_load( + &engine_status)) { + nvgpu_engine_status_get_next_ctx_id_type( + &engine_status, &ctx_id, &type); + } else { + nvgpu_engine_status_get_ctx_id_type( + &engine_status, &ctx_id, &type); + } + + busy = engine_status.is_busy; + + if (!busy || !(ctx_id == id)) { + continue; + } + + if ((is_tsg && (type == ENGINE_STATUS_CTX_ID_TYPE_TSGID)) || + (!is_tsg && (type == ENGINE_STATUS_CTX_ID_TYPE_CHID))) { + engines |= BIT32(dev->engine_id); + } + } + + return engines; +} + +static int nvgpu_engine_init_one_dev(struct nvgpu_fifo *f, + const struct nvgpu_device *dev) +{ + bool found; + struct nvgpu_device *dev_rw; + struct gk20a *g = f->g; + + dev_rw = (struct nvgpu_device *)dev; + + /* + * Populate the PBDMA info for this device; ideally it'd be done + * during device init, but the FIFO unit is not out of reset that + * early in the nvgpu_finalize_poweron() sequence. + * + * We only need to do this for native; vGPU already has pbdma_id + * populated during device initialization. + */ + if (g->ops.fifo.find_pbdma_for_runlist != NULL) { + found = g->ops.fifo.find_pbdma_for_runlist(g, + dev->runlist_id, + &dev_rw->pbdma_id); + if (!found) { + nvgpu_err(g, "busted pbdma map"); + return -EINVAL; + } + } + +#if defined(CONFIG_NVGPU_NEXT) + { + int err = nvgpu_next_engine_init_one_dev(g, dev); + if (err != 0) { + return err; + } + } +#endif + + f->host_engines[dev->engine_id] = dev; + f->active_engines[f->num_engines] = dev; + ++f->num_engines; + + return 0; +} + +int nvgpu_engine_init_info(struct nvgpu_fifo *f) +{ + int err; + struct gk20a *g = f->g; + const struct nvgpu_device *dev; + + f->num_engines = 0; + + nvgpu_log(g, gpu_dbg_device, "Loading host engines from device list"); + nvgpu_log(g, gpu_dbg_device, " GFX devices: %u", + nvgpu_device_count(g, NVGPU_DEVTYPE_GRAPHICS)); + + nvgpu_device_for_each(g, dev, NVGPU_DEVTYPE_GRAPHICS) { + err = nvgpu_engine_init_one_dev(f, dev); + if (err != 0) { + return err; + } + } + + return g->ops.engine.init_ce_info(f); +} + +void nvgpu_engine_get_id_and_type(struct gk20a *g, u32 engine_id, + u32 *id, u32 *type) +{ + struct nvgpu_engine_status_info engine_status; + + g->ops.engine_status.read_engine_status_info(g, engine_id, + &engine_status); + + /* use next_id if context load is failing */ + if (nvgpu_engine_status_is_ctxsw_load( + &engine_status)) { + nvgpu_engine_status_get_next_ctx_id_type( + &engine_status, id, type); + } else { + nvgpu_engine_status_get_ctx_id_type( + &engine_status, id, type); + } +} + +u32 nvgpu_engine_find_busy_doing_ctxsw(struct gk20a *g, + u32 *id_ptr, bool *is_tsg_ptr) +{ + u32 i; + u32 id = U32_MAX; + bool is_tsg = false; + u32 mailbox2; + struct nvgpu_engine_status_info engine_status; + const struct nvgpu_device *dev = NULL; + + for (i = 0U; i < g->fifo.num_engines; i++) { + dev = g->fifo.active_engines[i]; + + g->ops.engine_status.read_engine_status_info(g, dev->engine_id, + &engine_status); + + /* + * we are interested in busy engines that + * are doing context switch + */ + if (!engine_status.is_busy || + !nvgpu_engine_status_is_ctxsw(&engine_status)) { + continue; + } + + if (nvgpu_engine_status_is_ctxsw_load(&engine_status)) { + id = engine_status.ctx_next_id; + is_tsg = nvgpu_engine_status_is_next_ctx_type_tsg( + &engine_status); + } else if (nvgpu_engine_status_is_ctxsw_switch(&engine_status)) { + mailbox2 = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g, + NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX2); + if ((mailbox2 & FECS_METHOD_WFI_RESTORE) != 0U) { + id = engine_status.ctx_next_id; + is_tsg = nvgpu_engine_status_is_next_ctx_type_tsg( + &engine_status); + } else { + id = engine_status.ctx_id; + is_tsg = nvgpu_engine_status_is_ctx_type_tsg( + &engine_status); + } + } else { + id = engine_status.ctx_id; + is_tsg = nvgpu_engine_status_is_ctx_type_tsg( + &engine_status); + } + break; + } + + *id_ptr = id; + *is_tsg_ptr = is_tsg; + + return dev->engine_id; +} + +u32 nvgpu_engine_get_runlist_busy_engines(struct gk20a *g, u32 runlist_id) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 i, eng_bitmask = 0U; + struct nvgpu_engine_status_info engine_status; + + for (i = 0U; i < f->num_engines; i++) { + const struct nvgpu_device *dev = f->active_engines[i]; + + g->ops.engine_status.read_engine_status_info(g, dev->engine_id, + &engine_status); + + if (engine_status.is_busy && (dev->runlist_id == runlist_id)) { + eng_bitmask |= BIT32(dev->engine_id); + } + } + + return eng_bitmask; +} + +#ifdef CONFIG_NVGPU_DEBUGGER +bool nvgpu_engine_should_defer_reset(struct gk20a *g, u32 engine_id, + u32 engine_subid, bool fake_fault) +{ + const struct nvgpu_device *dev; + + dev = nvgpu_engine_get_active_eng_info(g, engine_id); + if (dev == NULL) { + return false; + } + + /* + * channel recovery is only deferred if an sm debugger + * is attached and has MMU debug mode is enabled + */ + if (!g->ops.gr.sm_debugger_attached(g) || + !g->ops.fb.is_debug_mode_enabled(g)) { + return false; + } + + /* if this fault is fake (due to RC recovery), don't defer recovery */ + if (fake_fault) { + return false; + } + + if (dev->type != NVGPU_DEVTYPE_GRAPHICS) { + return false; + } + + return g->ops.engine.is_fault_engine_subid_gpc(g, engine_subid); +} +#endif + +u32 nvgpu_engine_mmu_fault_id_to_veid(struct gk20a *g, u32 mmu_fault_id, + u32 gr_eng_fault_id) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 num_subctx; + u32 veid = INVAL_ID; + + num_subctx = f->max_subctx_count; + + if ((mmu_fault_id >= gr_eng_fault_id) && + (mmu_fault_id < nvgpu_safe_add_u32(gr_eng_fault_id, + num_subctx))) { + veid = mmu_fault_id - gr_eng_fault_id; + } + + return veid; +} + +static u32 nvgpu_engine_mmu_fault_id_to_eng_id_and_veid(struct gk20a *g, + u32 mmu_fault_id, u32 *veid) +{ + u32 i; + u32 engine_id = INVAL_ID; + const struct nvgpu_device *dev; + struct nvgpu_fifo *f = &g->fifo; + + for (i = 0U; i < f->num_engines; i++) { + dev = f->active_engines[i]; + + if (dev->type == NVGPU_DEVTYPE_GRAPHICS) { + *veid = nvgpu_engine_mmu_fault_id_to_veid(g, + mmu_fault_id, dev->fault_id); + if (*veid != INVAL_ID) { + engine_id = dev->engine_id; + break; + } + } else { + if (dev->fault_id == mmu_fault_id) { + engine_id = dev->engine_id; + *veid = INVAL_ID; + break; + } + } + } + return engine_id; +} + +void nvgpu_engine_mmu_fault_id_to_eng_ve_pbdma_id(struct gk20a *g, + u32 mmu_fault_id, u32 *engine_id, u32 *veid, u32 *pbdma_id) +{ + *engine_id = nvgpu_engine_mmu_fault_id_to_eng_id_and_veid(g, + mmu_fault_id, veid); + + if (*engine_id == INVAL_ID) { + *pbdma_id = g->ops.fifo.mmu_fault_id_to_pbdma_id(g, + mmu_fault_id); + } else { + *pbdma_id = INVAL_ID; + } +} diff --git a/drivers/gpu/nvgpu/common/fifo/fifo.c b/drivers/gpu/nvgpu/common/fifo/fifo.c new file mode 100644 index 000000000..5c25ac939 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/fifo.c @@ -0,0 +1,315 @@ +/* + * FIFO + * + * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const char *nvgpu_fifo_kickoff_profile_events[] = { + NVGPU_FIFO_KICKOFF_PROFILE_EVENTS, +}; + +static const char *nvgpu_fifo_recovery_profile_events[] = { + NVGPU_FIFO_RECOVERY_PROFILE_EVENTS, +}; + +static const char *nvgpu_fifo_engine_reset_events[] = { + NVGPU_FIFO_ENGINE_RESET_EVENTS, +}; + +void nvgpu_fifo_cleanup_sw_common(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + + nvgpu_log_fn(g, " "); + +#ifdef CONFIG_NVGPU_USERD + g->ops.userd.cleanup_sw(g); +#endif + nvgpu_channel_cleanup_sw(g); + nvgpu_tsg_cleanup_sw(g); + nvgpu_runlist_cleanup_sw(g); + nvgpu_engine_cleanup_sw(g); + if (g->ops.pbdma.cleanup_sw != NULL) { + g->ops.pbdma.cleanup_sw(g); + } + +#ifdef CONFIG_NVGPU_DEBUGGER + f->deferred_reset_pending = false; + nvgpu_mutex_destroy(&f->deferred_reset_mutex); +#endif + nvgpu_mutex_destroy(&f->engines_reset_mutex); + nvgpu_mutex_destroy(&f->intr.isr.mutex); + + f->sw_ready = false; +} + +void nvgpu_fifo_cleanup_sw(struct gk20a *g) +{ +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + nvgpu_channel_worker_deinit(g); +#endif + nvgpu_fifo_cleanup_sw_common(g); +} + +static void nvgpu_fifo_remove_support(struct nvgpu_fifo *f) +{ + struct gk20a *g = f->g; + + g->ops.fifo.cleanup_sw(g); +} + +int nvgpu_fifo_setup_sw_common(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + int err = 0; + + nvgpu_log_fn(g, " "); + + f->g = g; + + nvgpu_mutex_init(&f->intr.isr.mutex); + nvgpu_mutex_init(&f->engines_reset_mutex); +#ifdef CONFIG_NVGPU_DEBUGGER + nvgpu_mutex_init(&f->deferred_reset_mutex); +#endif + + nvgpu_swprofile_initialize(g, &f->kickoff_profiler, + nvgpu_fifo_kickoff_profile_events); + nvgpu_swprofile_initialize(g, &f->recovery_profiler, + nvgpu_fifo_recovery_profile_events); + nvgpu_swprofile_initialize(g, &f->eng_reset_profiler, + nvgpu_fifo_engine_reset_events); + + + err = nvgpu_channel_setup_sw(g); + if (err != 0) { + nvgpu_err(g, "failed to init channel support"); + goto clean_up; + } + + err = nvgpu_tsg_setup_sw(g); + if (err != 0) { + nvgpu_err(g, "failed to init tsg support"); + goto clean_up_channel; + } + + if (g->ops.pbdma.setup_sw != NULL) { + err = g->ops.pbdma.setup_sw(g); + if (err != 0) { + nvgpu_err(g, "failed to init pbdma support"); + goto clean_up_tsg; + } + } + + err = nvgpu_engine_setup_sw(g); + if (err != 0) { + nvgpu_err(g, "failed to init engine support"); + goto clean_up_pbdma; + } + + err = nvgpu_runlist_setup_sw(g); + if (err != 0) { + nvgpu_err(g, "failed to init runlist support"); + goto clean_up_engine; + } + +#ifdef CONFIG_NVGPU_USERD + err = g->ops.userd.setup_sw(g); + if (err != 0) { + nvgpu_err(g, "failed to init userd support"); + goto clean_up_runlist; + } +#endif + + f->remove_support = nvgpu_fifo_remove_support; + + nvgpu_log_fn(g, "done"); + return 0; + +#ifdef CONFIG_NVGPU_USERD +clean_up_runlist: + nvgpu_runlist_cleanup_sw(g); +#endif + +clean_up_engine: + nvgpu_engine_cleanup_sw(g); + +clean_up_pbdma: + if (g->ops.pbdma.cleanup_sw != NULL) { + g->ops.pbdma.cleanup_sw(g); + } + +clean_up_tsg: + nvgpu_tsg_cleanup_sw(g); + +clean_up_channel: + nvgpu_channel_cleanup_sw(g); + +clean_up: + nvgpu_err(g, "init fifo support failed"); + return err; +} + +int nvgpu_fifo_setup_sw(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + int err = 0; + + nvgpu_log_fn(g, " "); + + if (f->sw_ready) { + nvgpu_log_fn(g, "skip init"); + return 0; + } + + err = nvgpu_fifo_setup_sw_common(g); + if (err != 0) { + nvgpu_err(g, "fifo common sw setup failed, err=%d", err); + return err; + } + +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + err = nvgpu_channel_worker_init(g); + if (err != 0) { + nvgpu_err(g, "worker init fail, err=%d", err); + goto clean_up; + } +#endif + + f->sw_ready = true; + + nvgpu_log_fn(g, "done"); + return 0; + +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT +clean_up: + nvgpu_fifo_cleanup_sw_common(g); + + return err; +#endif +} + +int nvgpu_fifo_init_support(struct gk20a *g) +{ + int err; + + err = g->ops.fifo.setup_sw(g); + if (err != 0) { + nvgpu_err(g, "fifo sw setup failed, err=%d", err); + return err; + } + + if (g->ops.fifo.init_fifo_setup_hw != NULL) { + err = g->ops.fifo.init_fifo_setup_hw(g); + if (err != 0) { + nvgpu_err(g, "fifo hw setup failed, err=%d", err); + goto clean_up; + } + } + + return 0; + +clean_up: + nvgpu_fifo_cleanup_sw_common(g); + + return err; +} + +static const char * const pbdma_ch_eng_status_str[] = { + "invalid", + "valid", + "NA", + "NA", + "NA", + "load", + "save", + "switch", +}; + +static const char * const not_found_str[] = { + "NOT FOUND" +}; + +const char *nvgpu_fifo_decode_pbdma_ch_eng_status(u32 index) +{ + if (index >= ARRAY_SIZE(pbdma_ch_eng_status_str)) { + return not_found_str[0]; + } else { + return pbdma_ch_eng_status_str[index]; + } +} + +static void disable_fifo_interrupts(struct gk20a *g) +{ + /** Disable fifo intr */ + g->ops.fifo.intr_0_enable(g, false); + g->ops.fifo.intr_1_enable(g, false); + + if (g->ops.fifo.intr_top_enable == NULL) { + nvgpu_cic_intr_stall_unit_config(g, NVGPU_CIC_INTR_UNIT_FIFO, + NVGPU_CIC_INTR_DISABLE); + nvgpu_cic_intr_nonstall_unit_config(g, NVGPU_CIC_INTR_UNIT_FIFO, + NVGPU_CIC_INTR_DISABLE); + } else { + g->ops.fifo.intr_top_enable(g, NVGPU_CIC_INTR_DISABLE); + } +} + +int nvgpu_fifo_suspend(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + if (g->ops.mm.is_bar1_supported(g)) { + g->ops.fifo.bar1_snooping_disable(g); + } + + disable_fifo_interrupts(g); + + nvgpu_log_fn(g, "done"); + return 0; +} + +void nvgpu_fifo_sw_quiesce(struct gk20a *g) +{ + u32 runlist_mask = U32_MAX; + + g->ops.runlist.write_state(g, runlist_mask, RUNLIST_DISABLED); + + /* Preempt all runlists */ + nvgpu_fifo_preempt_runlists_for_rc(g, runlist_mask); +} diff --git a/drivers/gpu/nvgpu/common/fifo/job.c b/drivers/gpu/nvgpu/common/fifo/job.c new file mode 100644 index 000000000..635957acb --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/job.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static inline struct nvgpu_channel_job * +channel_gk20a_job_from_list(struct nvgpu_list_node *node) +{ + return (struct nvgpu_channel_job *) + ((uintptr_t)node - offsetof(struct nvgpu_channel_job, list)); +}; + +int nvgpu_channel_alloc_job(struct nvgpu_channel *c, + struct nvgpu_channel_job **job_out) +{ + unsigned int put = c->joblist.pre_alloc.put; + unsigned int get = c->joblist.pre_alloc.get; + unsigned int next = (put + 1) % c->joblist.pre_alloc.length; + bool full = next == get; + + if (full) { + return -EAGAIN; + } + + *job_out = &c->joblist.pre_alloc.jobs[put]; + (void) memset(*job_out, 0, sizeof(**job_out)); + + return 0; +} + +void nvgpu_channel_free_job(struct nvgpu_channel *c, + struct nvgpu_channel_job *job) +{ + /* + * Nothing needed for now. The job contents are preallocated. The + * completion fence may briefly outlive the job, but the job memory is + * reclaimed only when a new submit comes in and the ringbuffer has ran + * out of space. + */ +} + +void nvgpu_channel_joblist_lock(struct nvgpu_channel *c) +{ + nvgpu_mutex_acquire(&c->joblist.pre_alloc.read_lock); +} + +void nvgpu_channel_joblist_unlock(struct nvgpu_channel *c) +{ + nvgpu_mutex_release(&c->joblist.pre_alloc.read_lock); +} + +struct nvgpu_channel_job *nvgpu_channel_joblist_peek(struct nvgpu_channel *c) +{ + unsigned int get = c->joblist.pre_alloc.get; + unsigned int put = c->joblist.pre_alloc.put; + bool empty = get == put; + + return empty ? NULL : &c->joblist.pre_alloc.jobs[get]; +} + +void nvgpu_channel_joblist_add(struct nvgpu_channel *c, + struct nvgpu_channel_job *job) +{ + c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1U) % + (c->joblist.pre_alloc.length); +} + +void nvgpu_channel_joblist_delete(struct nvgpu_channel *c, + struct nvgpu_channel_job *job) +{ + c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1U) % + (c->joblist.pre_alloc.length); +} + +int nvgpu_channel_joblist_init(struct nvgpu_channel *c, u32 num_jobs) +{ + int err; + u32 size; + + size = (u32)sizeof(struct nvgpu_channel_job); + if (num_jobs > nvgpu_safe_sub_u32(U32_MAX / size, 1U)) { + err = -ERANGE; + goto clean_up; + } + + /* + * The max capacity of this ring buffer is the alloc size minus one (in + * units of item slot), so allocate a size of (num_jobs + 1) * size + * bytes. + */ + c->joblist.pre_alloc.jobs = nvgpu_vzalloc(c->g, + nvgpu_safe_mult_u32( + nvgpu_safe_add_u32(num_jobs, 1U), + size)); + if (c->joblist.pre_alloc.jobs == NULL) { + err = -ENOMEM; + goto clean_up; + } + + /* + * length is the allocation size of the ringbuffer; the number of jobs + * that fit is one less. + */ + c->joblist.pre_alloc.length = nvgpu_safe_add_u32(num_jobs, 1U); + c->joblist.pre_alloc.put = 0; + c->joblist.pre_alloc.get = 0; + + return 0; + +clean_up: + nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs); + (void) memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc)); + return err; +} + +void nvgpu_channel_joblist_deinit(struct nvgpu_channel *c) +{ + if (c->joblist.pre_alloc.jobs != NULL) { + nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs); + c->joblist.pre_alloc.jobs = NULL; + } +} diff --git a/drivers/gpu/nvgpu/common/fifo/pbdma.c b/drivers/gpu/nvgpu/common/fifo/pbdma.c new file mode 100644 index 000000000..ffbe25d5c --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/pbdma.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +static void nvgpu_pbdma_init_intr_descs(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + + if (g->ops.pbdma.device_fatal_0_intr_descs != NULL) { + f->intr.pbdma.device_fatal_0 = + g->ops.pbdma.device_fatal_0_intr_descs(); + } + + if (g->ops.pbdma.channel_fatal_0_intr_descs != NULL) { + f->intr.pbdma.channel_fatal_0 = + g->ops.pbdma.channel_fatal_0_intr_descs(); + } + if (g->ops.pbdma.restartable_0_intr_descs != NULL) { + f->intr.pbdma.restartable_0 = + g->ops.pbdma.restartable_0_intr_descs(); + } +} + +int nvgpu_pbdma_setup_sw(struct gk20a *g) +{ + nvgpu_pbdma_init_intr_descs(g); + + return 0; +} + +void nvgpu_pbdma_cleanup_sw(struct gk20a *g) +{ + return; +} diff --git a/drivers/gpu/nvgpu/common/fifo/pbdma_status.c b/drivers/gpu/nvgpu/common/fifo/pbdma_status.c new file mode 100644 index 000000000..0c214d273 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/pbdma_status.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +bool nvgpu_pbdma_status_is_chsw_switch(struct nvgpu_pbdma_status_info + *pbdma_status) +{ + return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_SWITCH; +} +bool nvgpu_pbdma_status_is_chsw_load(struct nvgpu_pbdma_status_info + *pbdma_status) +{ + return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_LOAD; +} +bool nvgpu_pbdma_status_is_chsw_save(struct nvgpu_pbdma_status_info + *pbdma_status) +{ + return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_SAVE; +} +bool nvgpu_pbdma_status_is_chsw_valid(struct nvgpu_pbdma_status_info + *pbdma_status) +{ + return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_VALID; +} +bool nvgpu_pbdma_status_is_id_type_tsg(struct nvgpu_pbdma_status_info + *pbdma_status) +{ + return pbdma_status->id_type == PBDMA_STATUS_ID_TYPE_TSGID; +} +bool nvgpu_pbdma_status_is_next_id_type_tsg(struct nvgpu_pbdma_status_info + *pbdma_status) +{ + return pbdma_status->next_id_type == PBDMA_STATUS_NEXT_ID_TYPE_TSGID; +} diff --git a/drivers/gpu/nvgpu/common/fifo/preempt.c b/drivers/gpu/nvgpu/common/fifo/preempt.c new file mode 100644 index 000000000..b2429b054 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/preempt.c @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_NVGPU_LS_PMU +#include +#endif + +u32 nvgpu_preempt_get_timeout(struct gk20a *g) +{ + return g->ctxsw_timeout_period_ms; +} + +int nvgpu_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg) +{ + int ret = 0; + u32 preempt_retry_count = 10U; + u32 preempt_retry_timeout = + nvgpu_preempt_get_timeout(g) / preempt_retry_count; +#ifdef CONFIG_NVGPU_LS_PMU + u32 token = PMU_INVALID_MUTEX_OWNER_ID; + int mutex_ret = 0; +#endif + + nvgpu_log_fn(g, "tsgid: %d", tsg->tsgid); + + if (tsg->runlist == NULL) { + return 0; + } + + do { + nvgpu_mutex_acquire(&tsg->runlist->runlist_lock); + + if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) { + nvgpu_runlist_set_state(g, BIT32(tsg->runlist->id), + RUNLIST_DISABLED); + } + +#ifdef CONFIG_NVGPU_LS_PMU + mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu, + PMU_MUTEX_ID_FIFO, &token); +#endif + g->ops.fifo.preempt_trigger(g, tsg->tsgid, ID_TYPE_TSG); + + /* + * Poll for preempt done. if stalling interrupts are pending + * while preempt is in progress we poll for stalling interrupts + * to finish based on return value from this function and + * retry preempt again. + * If HW is hung, on the last retry instance we try to identify + * the engines hung and set the runlist reset_eng_bitmask + * and mark preemption completion. + */ + ret = g->ops.fifo.is_preempt_pending(g, tsg->tsgid, + ID_TYPE_TSG, preempt_retry_count > 1U); + +#ifdef CONFIG_NVGPU_LS_PMU + if (mutex_ret == 0) { + int err = nvgpu_pmu_lock_release(g, g->pmu, + PMU_MUTEX_ID_FIFO, &token); + if (err != 0) { + nvgpu_err(g, "PMU_MUTEX_ID_FIFO not released err=%d", err); + } + } +#endif + if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) { + nvgpu_runlist_set_state(g, BIT32(tsg->runlist->id), + RUNLIST_ENABLED); + } + + nvgpu_mutex_release(&tsg->runlist->runlist_lock); + + if (ret != -EAGAIN) { + break; + } + + ret = nvgpu_cic_wait_for_stall_interrupts(g, preempt_retry_timeout); + if (ret != 0) { + nvgpu_log_info(g, "wait for stall interrupts failed %d", ret); + } + } while (--preempt_retry_count != 0U); + + if (ret != 0) { + if (nvgpu_platform_is_silicon(g)) { + nvgpu_err(g, "preempt timed out for tsgid: %u, " + "ctxsw timeout will trigger recovery if needed", + tsg->tsgid); + } else { + nvgpu_rc_preempt_timeout(g, tsg); + } + } + return ret; +} + +int nvgpu_preempt_channel(struct gk20a *g, struct nvgpu_channel *ch) +{ + int err; + struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch); + + if (tsg != NULL) { + err = g->ops.fifo.preempt_tsg(ch->g, tsg); + } else { + err = g->ops.fifo.preempt_channel(ch->g, ch); + } + + return err; +} + +/* called from rc */ +int nvgpu_preempt_poll_tsg_on_pbdma(struct gk20a *g, + struct nvgpu_tsg *tsg) +{ + unsigned long runlist_served_pbdmas; + unsigned long pbdma_id_bit; + u32 tsgid, pbdma_id; + + if (g->ops.fifo.preempt_poll_pbdma == NULL) { + return 0; + } + + tsgid = tsg->tsgid; + runlist_served_pbdmas = tsg->runlist->pbdma_bitmask; + + for_each_set_bit(pbdma_id_bit, &runlist_served_pbdmas, + nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA)) { + pbdma_id = U32(pbdma_id_bit); + /* + * If pbdma preempt fails the only option is to reset + * GPU. Any sort of hang indicates the entire GPU’s + * memory system would be blocked. + */ + if (g->ops.fifo.preempt_poll_pbdma(g, tsgid, pbdma_id) != 0) { + nvgpu_err(g, "PBDMA preempt failed"); + return -EBUSY; + } + } + return 0; +} + +/* + * This should be called with runlist_lock held for all the + * runlists set in runlists_mask + */ +void nvgpu_fifo_preempt_runlists_for_rc(struct gk20a *g, u32 runlists_bitmask) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 i; +#ifdef CONFIG_NVGPU_LS_PMU + u32 token = PMU_INVALID_MUTEX_OWNER_ID; + int mutex_ret = 0; +#endif + + /* runlist_lock are locked by teardown and sched are disabled too */ + nvgpu_log_fn(g, "preempt runlists_bitmask:0x%08x", runlists_bitmask); +#ifdef CONFIG_NVGPU_LS_PMU + mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu, + PMU_MUTEX_ID_FIFO, &token); +#endif + + for (i = 0U; i < f->num_runlists; i++) { + struct nvgpu_runlist *runlist; + + runlist = &f->active_runlists[i]; + + if ((BIT32(runlist->id) & runlists_bitmask) == 0U) { + continue; + } + /* issue runlist preempt */ + g->ops.fifo.preempt_trigger(g, runlist->id, + ID_TYPE_RUNLIST); +#ifdef CONFIG_NVGPU_RECOVERY + /* + * Preemption will never complete in RC due to some + * fatal condition. Do not poll for preemption to + * complete. Reset engines served by runlists. + */ + runlist->reset_eng_bitmask = runlist->eng_bitmask; +#endif + } + +#ifdef CONFIG_NVGPU_LS_PMU + if (mutex_ret == 0) { + int err = nvgpu_pmu_lock_release(g, g->pmu, PMU_MUTEX_ID_FIFO, + &token); + if (err != 0) { + nvgpu_err(g, "PMU_MUTEX_ID_FIFO not released err=%d", + err); + } + } +#endif +} diff --git a/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c b/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c new file mode 100644 index 000000000..638807461 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct priv_cmd_entry { + struct nvgpu_mem *mem; + u32 off; /* offset in mem, in u32 entries */ + u32 fill_off; /* write offset from off, in u32 entries */ + u32 size; /* in words */ + u32 alloc_size; +}; + +struct priv_cmd_queue { + struct vm_gk20a *vm; + struct nvgpu_mem mem; /* pushbuf */ + u32 size; /* allocated length in words */ + u32 put; /* next entry will begin here */ + u32 get; /* next entry to free begins here */ + + /* an entry is a fragment of the pushbuf memory */ + struct priv_cmd_entry *entries; + u32 entries_len; /* allocated length */ + u32 entry_put; + u32 entry_get; +}; + +/* allocate private cmd buffer queue. + used for inserting commands before/after user submitted buffers. */ +int nvgpu_priv_cmdbuf_queue_alloc(struct vm_gk20a *vm, + u32 job_count, struct priv_cmd_queue **queue) +{ + struct gk20a *g = vm->mm->g; + struct priv_cmd_queue *q; + u64 size, tmp_size; + int err = 0; + u32 wait_size, incr_size; + u32 mem_per_job; + + /* + * sema size is at least as much as syncpt size, but semas may not be + * enabled in the build. If neither semas nor syncpts are enabled, priv + * cmdbufs and as such kernel mode submits with job tracking won't be + * supported. + */ +#ifdef CONFIG_NVGPU_SW_SEMAPHORE + wait_size = g->ops.sync.sema.get_wait_cmd_size(); + incr_size = g->ops.sync.sema.get_incr_cmd_size(); +#else + wait_size = g->ops.sync.syncpt.get_wait_cmd_size(); + incr_size = g->ops.sync.syncpt.get_incr_cmd_size(true); +#endif + + /* + * Compute the amount of priv_cmdbuf space we need. In general the + * worst case is the kernel inserts both a semaphore pre-fence and + * post-fence. Any sync-pt fences will take less memory so we can + * ignore them unless they're the only supported type. Jobs can also + * have more than one pre-fence but that's abnormal and we'll -EAGAIN + * if such jobs would fill the queue. + * + * A semaphore ACQ (fence-wait) is 8 words: semaphore_a, semaphore_b, + * semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be + * 10 words: all the same as an ACQ plus a non-stalling intr which is + * another 2 words. In reality these numbers vary by chip but we'll use + * 8 and 10 as examples. + * + * Given the job count, cmdbuf space is allocated such that each job + * can get one wait command and one increment command: + * + * job_count * (8 + 10) * 4 bytes + * + * These cmdbufs are inserted as gpfifo entries right before and after + * the user submitted gpfifo entries per submit. + * + * One extra slot is added to the queue length so that the requested + * job count can actually be allocated. This ring buffer implementation + * is full when the number of consumed entries is one less than the + * allocation size: + * + * alloc bytes = job_count * (wait + incr + 1) * slot in bytes + */ + mem_per_job = nvgpu_safe_mult_u32( + nvgpu_safe_add_u32( + nvgpu_safe_add_u32(wait_size, incr_size), + 1U), + (u32)sizeof(u32)); + /* both 32 bit and mem_per_job is small */ + size = nvgpu_safe_mult_u64((u64)job_count, (u64)mem_per_job); + + tmp_size = PAGE_ALIGN(roundup_pow_of_two(size)); + if (tmp_size > U32_MAX) { + return -ERANGE; + } + size = (u32)tmp_size; + + q = nvgpu_kzalloc(g, sizeof(*q)); + if (q == NULL) { + return -ENOMEM; + } + + q->vm = vm; + + if (job_count > U32_MAX / 2U - 1U) { + err = -ERANGE; + goto err_free_queue; + } + + /* One extra to account for the full condition: 2 * job_count + 1 */ + q->entries_len = nvgpu_safe_mult_u32(2U, + nvgpu_safe_add_u32(job_count, 1U)); + q->entries = nvgpu_vzalloc(g, + nvgpu_safe_mult_u64((u64)q->entries_len, + sizeof(*q->entries))); + if (q->entries == NULL) { + err = -ENOMEM; + goto err_free_queue; + } + + err = nvgpu_dma_alloc_map_sys(vm, size, &q->mem); + if (err != 0) { + nvgpu_err(g, "%s: memory allocation failed", __func__); + goto err_free_entries; + } + + tmp_size = q->mem.size / sizeof(u32); + nvgpu_assert(tmp_size <= U32_MAX); + q->size = (u32)tmp_size; + + *queue = q; + return 0; +err_free_entries: + nvgpu_vfree(g, q->entries); +err_free_queue: + nvgpu_kfree(g, q); + return err; +} + +void nvgpu_priv_cmdbuf_queue_free(struct priv_cmd_queue *q) +{ + struct vm_gk20a *vm = q->vm; + struct gk20a *g = vm->mm->g; + + nvgpu_dma_unmap_free(vm, &q->mem); + nvgpu_vfree(g, q->entries); + nvgpu_kfree(g, q); +} + +/* allocate a cmd buffer with given size. size is number of u32 entries */ +static int nvgpu_priv_cmdbuf_alloc_buf(struct priv_cmd_queue *q, u32 orig_size, + struct priv_cmd_entry *e) +{ + struct gk20a *g = q->vm->mm->g; + u32 size = orig_size; + u32 free_count; + + nvgpu_log_fn(g, "size %d", orig_size); + + /* + * If free space in the end is less than requested, increase the size + * to make the real allocated space start from beginning. The hardware + * expects each cmdbuf to be contiguous in the dma space. + * + * This too small extra space in the end may happen because the + * requested wait and incr command buffers do not necessarily align + * with the whole buffer capacity. They don't always align because the + * buffer size is rounded to the next power of two and because not all + * jobs necessarily use exactly one wait command. + */ + if (nvgpu_safe_add_u32(q->put, size) > q->size) { + size = orig_size + (q->size - q->put); + } + + nvgpu_log_info(g, "priv cmd queue get:put %d:%d", + q->get, q->put); + + nvgpu_assert(q->put < q->size); + nvgpu_assert(q->get < q->size); + nvgpu_assert(q->size > 0U); + free_count = (q->size - q->put + q->get - 1U) & (q->size - 1U); + + if (size > free_count) { + return -EAGAIN; + } + + e->fill_off = 0; + e->size = orig_size; + e->alloc_size = size; + e->mem = &q->mem; + + /* + * if we have increased size to skip free space in the end, set put + * to beginning of cmd buffer + size, as if the prev put was at + * position 0. + */ + if (size != orig_size) { + e->off = 0; + q->put = orig_size; + } else { + e->off = q->put; + q->put = (q->put + orig_size) & (q->size - 1U); + } + + /* we already handled q->put + size > q->size so BUG_ON this */ + BUG_ON(q->put > q->size); + + nvgpu_log_fn(g, "done"); + + return 0; +} + +int nvgpu_priv_cmdbuf_alloc(struct priv_cmd_queue *q, u32 size, + struct priv_cmd_entry **e) +{ + u32 next_put = nvgpu_safe_add_u32(q->entry_put, 1U) % q->entries_len; + struct priv_cmd_entry *entry; + int err; + + if (next_put == q->entry_get) { + return -EAGAIN; + } + entry = &q->entries[q->entry_put]; + + err = nvgpu_priv_cmdbuf_alloc_buf(q, size, entry); + if (err != 0) { + return err; + } + + q->entry_put = next_put; + *e = entry; + + return 0; +} + +void nvgpu_priv_cmdbuf_rollback(struct priv_cmd_queue *q, + struct priv_cmd_entry *e) +{ + nvgpu_assert(q->put < q->size); + nvgpu_assert(q->size > 0U); + nvgpu_assert(e->alloc_size <= q->size); + q->put = (q->put + q->size - e->alloc_size) & (q->size - 1U); + + (void)memset(e, 0, sizeof(*e)); + + nvgpu_assert(q->entry_put < q->entries_len); + nvgpu_assert(q->entries_len > 0U); + q->entry_put = (q->entry_put + q->entries_len - 1U) + % q->entries_len; +} + +void nvgpu_priv_cmdbuf_free(struct priv_cmd_queue *q, struct priv_cmd_entry *e) +{ + struct gk20a *g = q->vm->mm->g; + + if ((q->get != e->off) && e->off != 0U) { + nvgpu_err(g, "priv cmdbuf requests out-of-order"); + } + nvgpu_assert(q->size > 0U); + q->get = nvgpu_safe_add_u32(e->off, e->size) & (q->size - 1U); + q->entry_get = nvgpu_safe_add_u32(q->entry_get, 1U) % q->entries_len; + + (void)memset(e, 0, sizeof(*e)); +} + +void nvgpu_priv_cmdbuf_append(struct gk20a *g, struct priv_cmd_entry *e, + u32 *data, u32 entries) +{ + nvgpu_assert(e->fill_off + entries <= e->size); + nvgpu_mem_wr_n(g, e->mem, (e->off + e->fill_off) * sizeof(u32), + data, entries * sizeof(u32)); + e->fill_off += entries; +} + +void nvgpu_priv_cmdbuf_append_zeros(struct gk20a *g, struct priv_cmd_entry *e, + u32 entries) +{ + nvgpu_assert(e->fill_off + entries <= e->size); + nvgpu_memset(g, e->mem, (e->off + e->fill_off) * sizeof(u32), + 0, entries * sizeof(u32)); + e->fill_off += entries; +} + +void nvgpu_priv_cmdbuf_finish(struct gk20a *g, struct priv_cmd_entry *e, + u64 *gva, u32 *size) +{ + /* + * The size is written to the pushbuf entry, so make sure this buffer + * is complete at this point. The responsibility of the channel sync is + * to be consistent in allocation and usage, and the matching size and + * add gops (e.g., get_wait_cmd_size, add_wait_cmd) help there. + */ + nvgpu_assert(e->fill_off == e->size); + +#ifdef CONFIG_NVGPU_TRACE + if (e->mem->aperture == APERTURE_SYSMEM) { + trace_gk20a_push_cmdbuf(g->name, 0, e->size, 0, + (u32 *)e->mem->cpu_va + e->off); + } +#endif + *gva = nvgpu_safe_add_u64(e->mem->gpu_va, + nvgpu_safe_mult_u64((u64)e->off, sizeof(u32))); + *size = e->size; +} diff --git a/drivers/gpu/nvgpu/common/fifo/runlist.c b/drivers/gpu/nvgpu/common/fifo/runlist.c new file mode 100644 index 000000000..809695402 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/runlist.c @@ -0,0 +1,914 @@ +/* + * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_NVGPU_LS_PMU +#include +#endif + +void nvgpu_runlist_lock_active_runlists(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + struct nvgpu_runlist *runlist; + u32 i; + + nvgpu_log_info(g, "acquire runlist_lock for active runlists"); + for (i = 0; i < g->fifo.num_runlists; i++) { + runlist = &f->active_runlists[i]; + nvgpu_mutex_acquire(&runlist->runlist_lock); + } +} + +void nvgpu_runlist_unlock_active_runlists(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + struct nvgpu_runlist *runlist; + u32 i; + + nvgpu_log_info(g, "release runlist_lock for active runlists"); + for (i = 0; i < g->fifo.num_runlists; i++) { + runlist = &f->active_runlists[i]; + nvgpu_mutex_release(&runlist->runlist_lock); + } +} + +static u32 nvgpu_runlist_append_tsg(struct gk20a *g, + struct nvgpu_runlist *runlist, + u32 **runlist_entry, + u32 *entries_left, + struct nvgpu_tsg *tsg) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 runlist_entry_words = f->runlist_entry_size / (u32)sizeof(u32); + struct nvgpu_channel *ch; + u32 count = 0; + u32 timeslice; + + nvgpu_log_fn(f->g, " "); + + if (*entries_left == 0U) { + return RUNLIST_APPEND_FAILURE; + } + + /* add TSG entry */ + nvgpu_log_info(g, "add TSG %d to runlist", tsg->tsgid); + + /* + * timeslice is measured with PTIMER. + * On some platforms, PTIMER is lower than 1GHz. + */ + timeslice = scale_ptimer(tsg->timeslice_us, + ptimer_scalingfactor10x(g->ptimer_src_freq)); + + g->ops.runlist.get_tsg_entry(tsg, *runlist_entry, timeslice); + + nvgpu_log_info(g, "tsg rl entries left %d runlist [0] %x [1] %x", + *entries_left, + (*runlist_entry)[0], (*runlist_entry)[1]); + *runlist_entry += runlist_entry_words; + count++; + (*entries_left)--; + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + /* add runnable channels bound to this TSG */ + nvgpu_list_for_each_entry(ch, &tsg->ch_list, + nvgpu_channel, ch_entry) { + if (!nvgpu_test_bit(ch->chid, + runlist->active_channels)) { + continue; + } + + if (*entries_left == 0U) { + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + return RUNLIST_APPEND_FAILURE; + } + + nvgpu_log_info(g, "add channel %d to runlist", + ch->chid); + g->ops.runlist.get_ch_entry(ch, *runlist_entry); + nvgpu_log_info(g, "rl entries left %d runlist [0] %x [1] %x", + *entries_left, + (*runlist_entry)[0], (*runlist_entry)[1]); + count = nvgpu_safe_add_u32(count, 1U); + *runlist_entry += runlist_entry_words; + (*entries_left)--; + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + + return count; +} + + +static u32 nvgpu_runlist_append_prio(struct nvgpu_fifo *f, + struct nvgpu_runlist *runlist, + u32 **runlist_entry, + u32 *entries_left, + u32 interleave_level) +{ + u32 count = 0; + unsigned long tsgid; + + nvgpu_log_fn(f->g, " "); + + for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) { + struct nvgpu_tsg *tsg = nvgpu_tsg_get_from_id(f->g, (u32)tsgid); + u32 entries; + + if (tsg->interleave_level == interleave_level) { + entries = nvgpu_runlist_append_tsg(f->g, runlist, + runlist_entry, entries_left, tsg); + if (entries == RUNLIST_APPEND_FAILURE) { + return RUNLIST_APPEND_FAILURE; + } + count += entries; + } + } + + return count; +} + +static u32 nvgpu_runlist_append_hi(struct nvgpu_fifo *f, + struct nvgpu_runlist *runlist, + u32 **runlist_entry, + u32 *entries_left) +{ + nvgpu_log_fn(f->g, " "); + + /* + * No higher levels - this is where the "recursion" ends; just add all + * active TSGs at this level. + */ + return nvgpu_runlist_append_prio(f, runlist, runlist_entry, + entries_left, + NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH); +} + +static u32 nvgpu_runlist_append_med(struct nvgpu_fifo *f, + struct nvgpu_runlist *runlist, + u32 **runlist_entry, + u32 *entries_left) +{ + u32 count = 0; + unsigned long tsgid; + + nvgpu_log_fn(f->g, " "); + + for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) { + struct nvgpu_tsg *tsg = nvgpu_tsg_get_from_id(f->g, (u32)tsgid); + u32 entries; + + if (tsg->interleave_level != + NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM) { + continue; + } + + /* LEVEL_MEDIUM list starts with a LEVEL_HIGH, if any */ + + entries = nvgpu_runlist_append_hi(f, runlist, + runlist_entry, entries_left); + if (entries == RUNLIST_APPEND_FAILURE) { + return RUNLIST_APPEND_FAILURE; + } + count += entries; + + entries = nvgpu_runlist_append_tsg(f->g, runlist, + runlist_entry, entries_left, tsg); + if (entries == RUNLIST_APPEND_FAILURE) { + return RUNLIST_APPEND_FAILURE; + } + count += entries; + } + + return count; +} + +static u32 nvgpu_runlist_append_low(struct nvgpu_fifo *f, + struct nvgpu_runlist *runlist, + u32 **runlist_entry, + u32 *entries_left) +{ + u32 count = 0; + unsigned long tsgid; + + nvgpu_log_fn(f->g, " "); + + for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) { + struct nvgpu_tsg *tsg = nvgpu_tsg_get_from_id(f->g, (u32)tsgid); + u32 entries; + + if (tsg->interleave_level != + NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW) { + continue; + } + + /* The medium level starts with the highs, if any. */ + + entries = nvgpu_runlist_append_med(f, runlist, + runlist_entry, entries_left); + if (entries == RUNLIST_APPEND_FAILURE) { + return RUNLIST_APPEND_FAILURE; + } + count += entries; + + entries = nvgpu_runlist_append_hi(f, runlist, + runlist_entry, entries_left); + if (entries == RUNLIST_APPEND_FAILURE) { + return RUNLIST_APPEND_FAILURE; + } + count += entries; + + entries = nvgpu_runlist_append_tsg(f->g, runlist, + runlist_entry, entries_left, tsg); + if (entries == RUNLIST_APPEND_FAILURE) { + return RUNLIST_APPEND_FAILURE; + } + count += entries; + } + + if (count == 0U) { + /* + * No transitions to fill with higher levels, so add + * the next level once. If that's empty too, we have only + * LEVEL_HIGH jobs. + */ + count = nvgpu_runlist_append_med(f, runlist, + runlist_entry, entries_left); + if (count == 0U) { + count = nvgpu_runlist_append_hi(f, runlist, + runlist_entry, entries_left); + } + } + + return count; +} + +static u32 nvgpu_runlist_append_flat(struct nvgpu_fifo *f, + struct nvgpu_runlist *runlist, + u32 **runlist_entry, + u32 *entries_left) +{ + u32 count = 0, entries, i; + + nvgpu_log_fn(f->g, " "); + + /* Group by priority but don't interleave. High comes first. */ + + for (i = 0; i < NVGPU_FIFO_RUNLIST_INTERLEAVE_NUM_LEVELS; i++) { + u32 level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH - i; + + entries = nvgpu_runlist_append_prio(f, runlist, runlist_entry, + entries_left, level); + if (entries == RUNLIST_APPEND_FAILURE) { + return RUNLIST_APPEND_FAILURE; + } + count += entries; + } + + return count; +} + +u32 nvgpu_runlist_construct_locked(struct nvgpu_fifo *f, + struct nvgpu_runlist *runlist, + u32 buf_id, + u32 max_entries) +{ + u32 *runlist_entry_base = runlist->mem[buf_id].cpu_va; + + /* + * The entry pointer and capacity counter that live on the stack here + * keep track of the current position and the remaining space when tsg + * and channel entries are ultimately appended. + */ + if (f->g->runlist_interleave) { + return nvgpu_runlist_append_low(f, runlist, + &runlist_entry_base, &max_entries); + } else { + return nvgpu_runlist_append_flat(f, runlist, + &runlist_entry_base, &max_entries); + } +} + +static bool nvgpu_runlist_modify_active_locked(struct gk20a *g, + struct nvgpu_runlist *runlist, + struct nvgpu_channel *ch, bool add) +{ + struct nvgpu_tsg *tsg = NULL; + + tsg = nvgpu_tsg_from_ch(ch); + + if (tsg == NULL) { + /* + * Unsupported condition, but shouldn't break anything. Warn + * and tell the caller that nothing has changed. + */ + nvgpu_warn(g, "Bare channel in runlist update"); + return false; + } + + if (add) { + if (nvgpu_test_and_set_bit(ch->chid, + runlist->active_channels)) { + /* was already there */ + return false; + } else { + /* new, and belongs to a tsg */ + nvgpu_set_bit(tsg->tsgid, runlist->active_tsgs); + tsg->num_active_channels = nvgpu_safe_add_u32( + tsg->num_active_channels, 1U); + } + } else { + if (!nvgpu_test_and_clear_bit(ch->chid, + runlist->active_channels)) { + /* wasn't there */ + return false; + } else { + tsg->num_active_channels = nvgpu_safe_sub_u32( + tsg->num_active_channels, 1U); + if (tsg->num_active_channels == 0U) { + /* was the only member of this tsg */ + nvgpu_clear_bit(tsg->tsgid, + runlist->active_tsgs); + } + } + } + + return true; +} + +static int nvgpu_runlist_reconstruct_locked(struct gk20a *g, + struct nvgpu_runlist *runlist, + u32 buf_id, bool add_entries) +{ + u32 num_entries; + struct nvgpu_fifo *f = &g->fifo; + + rl_dbg(g, "[%u] switch to new buffer 0x%16llx", + runlist->id, (u64)nvgpu_mem_get_addr(g, &runlist->mem[buf_id])); + + if (!add_entries) { + runlist->count = 0; + return 0; + } + + num_entries = nvgpu_runlist_construct_locked(f, runlist, buf_id, + f->num_runlist_entries); + if (num_entries == RUNLIST_APPEND_FAILURE) { + return -E2BIG; + } + runlist->count = num_entries; +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + WARN_ON(runlist->count > f->num_runlist_entries); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + + return 0; +} + +int nvgpu_runlist_update_locked(struct gk20a *g, struct nvgpu_runlist *rl, + struct nvgpu_channel *ch, bool add, + bool wait_for_finish) +{ + int ret = 0; + u32 buf_id; + bool add_entries; + + if (ch != NULL) { + bool update = nvgpu_runlist_modify_active_locked(g, rl, ch, add); + if (!update) { + /* no change in runlist contents */ + return 0; + } + /* had a channel to update, so reconstruct */ + add_entries = true; + } else { + /* no channel; add means update all, !add means clear all */ + add_entries = add; + } + + /* double buffering, swap to next */ + buf_id = (rl->cur_buffer == 0U) ? 1U : 0U; + + ret = nvgpu_runlist_reconstruct_locked(g, rl, buf_id, add_entries); + if (ret != 0) { + return ret; + } + + g->ops.runlist.hw_submit(g, rl->id, rl->count, buf_id); + + if (wait_for_finish) { + ret = g->ops.runlist.wait_pending(g, rl->id); + + if (ret == -ETIMEDOUT) { + nvgpu_err(g, "runlist %d update timeout", rl->id); + /* trigger runlist update timeout recovery */ + return ret; + + } else { + if (ret == -EINTR) { + nvgpu_err(g, "runlist update interrupted"); + } + } + } + + rl->cur_buffer = buf_id; + + return ret; +} + +#ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING +/* trigger host to expire current timeslice and reschedule runlist from front */ +int nvgpu_runlist_reschedule(struct nvgpu_channel *ch, bool preempt_next, + bool wait_preempt) +{ + struct gk20a *g = ch->g; + struct nvgpu_runlist *runlist; +#ifdef CONFIG_NVGPU_LS_PMU + u32 token = PMU_INVALID_MUTEX_OWNER_ID; + int mutex_ret = 0; +#endif + int ret = 0; + + runlist = ch->runlist; + if (nvgpu_mutex_tryacquire(&runlist->runlist_lock) == 0) { + return -EBUSY; + } +#ifdef CONFIG_NVGPU_LS_PMU + mutex_ret = nvgpu_pmu_lock_acquire( + g, g->pmu, PMU_MUTEX_ID_FIFO, &token); +#endif + + g->ops.runlist.hw_submit( + g, runlist->id, runlist->count, runlist->cur_buffer); + + if (preempt_next) { + if (g->ops.runlist.reschedule_preempt_next_locked(ch, + wait_preempt) != 0) { + nvgpu_err(g, "reschedule preempt next failed"); + } + } + + if (g->ops.runlist.wait_pending(g, runlist->id) != 0) { + nvgpu_err(g, "wait pending failed for runlist %u", + runlist->id); + } +#ifdef CONFIG_NVGPU_LS_PMU + if (mutex_ret == 0) { + if (nvgpu_pmu_lock_release(g, g->pmu, + PMU_MUTEX_ID_FIFO, &token) != 0) { + nvgpu_err(g, "failed to release PMU lock"); + } + } +#endif + nvgpu_mutex_release(&runlist->runlist_lock); + + return ret; +} +#endif + +/* add/remove a channel from runlist + special cases below: runlist->active_channels will NOT be changed. + (ch == NULL && !add) means remove all active channels from runlist. + (ch == NULL && add) means restore all active channels on runlist. */ +static int nvgpu_runlist_do_update(struct gk20a *g, struct nvgpu_runlist *rl, + struct nvgpu_channel *ch, + bool add, bool wait_for_finish) +{ +#ifdef CONFIG_NVGPU_LS_PMU + u32 token = PMU_INVALID_MUTEX_OWNER_ID; + int mutex_ret = 0; +#endif + int ret = 0; + + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&rl->runlist_lock); +#ifdef CONFIG_NVGPU_LS_PMU + mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu, + PMU_MUTEX_ID_FIFO, &token); +#endif + ret = nvgpu_runlist_update_locked(g, rl, ch, add, wait_for_finish); +#ifdef CONFIG_NVGPU_LS_PMU + if (mutex_ret == 0) { + if (nvgpu_pmu_lock_release(g, g->pmu, + PMU_MUTEX_ID_FIFO, &token) != 0) { + nvgpu_err(g, "failed to release PMU lock"); + } + } +#endif + nvgpu_mutex_release(&rl->runlist_lock); + + if (ret == -ETIMEDOUT) { + nvgpu_rc_runlist_update(g, rl->id); + } + + return ret; +} + +int nvgpu_runlist_update(struct gk20a *g, struct nvgpu_runlist *rl, + struct nvgpu_channel *ch, + bool add, bool wait_for_finish) +{ + nvgpu_assert(ch != NULL); + + return nvgpu_runlist_do_update(g, rl, ch, add, wait_for_finish); +} + +int nvgpu_runlist_reload(struct gk20a *g, struct nvgpu_runlist *rl, + bool add, bool wait_for_finish) +{ + return nvgpu_runlist_do_update(g, rl, NULL, add, wait_for_finish); +} + +int nvgpu_runlist_reload_ids(struct gk20a *g, u32 runlist_ids, bool add) +{ + struct nvgpu_fifo *f = &g->fifo; + int ret = -EINVAL; + unsigned long runlist_id = 0; + int errcode; + unsigned long ulong_runlist_ids = (unsigned long)runlist_ids; + + if (g == NULL) { + goto end; + } + + ret = 0; + for_each_set_bit(runlist_id, &ulong_runlist_ids, 32U) { + /* Capture the last failure error code */ + errcode = g->ops.runlist.reload(g, + f->runlists[runlist_id], add, true); + if (errcode != 0) { + nvgpu_err(g, + "failed to update_runlist %lu %d", + runlist_id, errcode); + ret = errcode; + } + } +end: + return ret; +} + +const char *nvgpu_runlist_interleave_level_name(u32 interleave_level) +{ + const char *ret_string = NULL; + + switch (interleave_level) { + case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW: + ret_string = "LOW"; + break; + + case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM: + ret_string = "MEDIUM"; + break; + + case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH: + ret_string = "HIGH"; + break; + + default: + ret_string = "?"; + break; + } + + return ret_string; +} + +void nvgpu_runlist_set_state(struct gk20a *g, u32 runlists_mask, + u32 runlist_state) +{ +#ifdef CONFIG_NVGPU_LS_PMU + u32 token = PMU_INVALID_MUTEX_OWNER_ID; + int mutex_ret = 0; +#endif + nvgpu_log(g, gpu_dbg_info, "runlist mask = 0x%08x state = 0x%08x", + runlists_mask, runlist_state); + +#ifdef CONFIG_NVGPU_LS_PMU + mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu, + PMU_MUTEX_ID_FIFO, &token); +#endif + g->ops.runlist.write_state(g, runlists_mask, runlist_state); +#ifdef CONFIG_NVGPU_LS_PMU + if (mutex_ret == 0) { + if (nvgpu_pmu_lock_release(g, g->pmu, + PMU_MUTEX_ID_FIFO, &token) != 0) { + nvgpu_err(g, "failed to release PMU lock"); + } + } +#endif +} + +void nvgpu_runlist_cleanup_sw(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 i, j; + struct nvgpu_runlist *runlist; + + if ((f->runlists == NULL) || (f->active_runlists == NULL)) { + return; + } + + g = f->g; + + for (i = 0; i < f->num_runlists; i++) { + runlist = &f->active_runlists[i]; + for (j = 0; j < MAX_RUNLIST_BUFFERS; j++) { + nvgpu_dma_free(g, &runlist->mem[j]); + } + + nvgpu_kfree(g, runlist->active_channels); + runlist->active_channels = NULL; + + nvgpu_kfree(g, runlist->active_tsgs); + runlist->active_tsgs = NULL; + + nvgpu_mutex_destroy(&runlist->runlist_lock); + f->runlists[runlist->id] = NULL; + } + + nvgpu_kfree(g, f->active_runlists); + f->active_runlists = NULL; + f->num_runlists = 0; + nvgpu_kfree(g, f->runlists); + f->runlists = NULL; + f->max_runlists = 0; +} + +void nvgpu_runlist_init_enginfo(struct gk20a *g, struct nvgpu_fifo *f) +{ + struct nvgpu_runlist *runlist; + const struct nvgpu_device *dev; + u32 i, j; + + nvgpu_log_fn(g, " "); + + if (g->is_virtual) { + return; + } + + for (i = 0; i < f->num_runlists; i++) { + runlist = &f->active_runlists[i]; + + (void) g->ops.fifo.find_pbdma_for_runlist(g, + runlist->id, + &runlist->pbdma_bitmask); + nvgpu_log(g, gpu_dbg_info, "runlist %d: pbdma bitmask 0x%x", + runlist->id, runlist->pbdma_bitmask); + + for (j = 0; j < f->num_engines; j++) { + dev = f->active_engines[j]; + + if (dev->runlist_id == runlist->id) { + runlist->eng_bitmask |= BIT32(dev->engine_id); + } + } + nvgpu_log(g, gpu_dbg_info, "runlist %d: act eng bitmask 0x%x", + runlist->id, runlist->eng_bitmask); + } + + nvgpu_log_fn(g, "done"); +} + +static int nvgpu_init_active_runlist_mapping(struct gk20a *g) +{ + struct nvgpu_runlist *runlist; + struct nvgpu_fifo *f = &g->fifo; + unsigned int runlist_id; + size_t runlist_size; + u32 i, j; + int err = 0; + + rl_dbg(g, "Building active runlist map."); + + /* + * In most case we want to loop through active runlists only. Here + * we need to loop through all possible runlists, to build the mapping + * between runlists[runlist_id] and active_runlists[i]. + */ + i = 0U; + for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) { + if (!nvgpu_engine_is_valid_runlist_id(g, runlist_id)) { + /* skip inactive runlist */ + rl_dbg(g, " Skipping invalid runlist: %d", runlist_id); + continue; + } + + rl_dbg(g, " Configuring HW runlist: %u", runlist_id); + rl_dbg(g, " SW runlist index to HW: %u -> %u", i, runlist_id); + + runlist = &f->active_runlists[i]; + runlist->id = runlist_id; + f->runlists[runlist_id] = runlist; + i = nvgpu_safe_add_u32(i, 1U); + + runlist->active_channels = + nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels, + BITS_PER_BYTE)); + if (runlist->active_channels == NULL) { + err = -ENOMEM; + goto clean_up_runlist; + } + + runlist->active_tsgs = + nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels, + BITS_PER_BYTE)); + if (runlist->active_tsgs == NULL) { + err = -ENOMEM; + goto clean_up_runlist; + } + + runlist_size = (size_t)f->runlist_entry_size * + (size_t)f->num_runlist_entries; + rl_dbg(g, " RL entries: %d", f->num_runlist_entries); + rl_dbg(g, " RL size %zu", runlist_size); + + for (j = 0; j < MAX_RUNLIST_BUFFERS; j++) { + err = nvgpu_dma_alloc_flags_sys(g, + g->is_virtual ? + 0ULL : NVGPU_DMA_PHYSICALLY_ADDRESSED, + runlist_size, + &runlist->mem[j]); + if (err != 0) { + nvgpu_err(g, "memory allocation failed"); + err = -ENOMEM; + goto clean_up_runlist; + } + } + + nvgpu_mutex_init(&runlist->runlist_lock); + + /* + * None of buffers is pinned if this value doesn't change. + * Otherwise, one of them (cur_buffer) must have been pinned. + */ + runlist->cur_buffer = MAX_RUNLIST_BUFFERS; + } + + return 0; + +clean_up_runlist: + return err; +} + +int nvgpu_runlist_setup_sw(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 num_runlists = 0U; + unsigned int runlist_id; + int err = 0; + + rl_dbg(g, "Initializing Runlists"); + + nvgpu_spinlock_init(&f->runlist_submit_lock); + + f->runlist_entry_size = g->ops.runlist.entry_size(g); + f->num_runlist_entries = g->ops.runlist.length_max(g); + f->max_runlists = g->ops.runlist.count_max(g); + + f->runlists = nvgpu_kzalloc(g, nvgpu_safe_mult_u64( + sizeof(*f->runlists), f->max_runlists)); + if (f->runlists == NULL) { + err = -ENOMEM; + goto clean_up_runlist; + } + + for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) { + if (nvgpu_engine_is_valid_runlist_id(g, runlist_id)) { + num_runlists = nvgpu_safe_add_u32(num_runlists, 1U); + } + } + f->num_runlists = num_runlists; + + f->active_runlists = nvgpu_kzalloc(g, nvgpu_safe_mult_u64( + sizeof(*f->active_runlists), num_runlists)); + if (f->active_runlists == NULL) { + err = -ENOMEM; + goto clean_up_runlist; + } + + + rl_dbg(g, " Max runlists: %u", f->max_runlists); + rl_dbg(g, " Active runlists: %u", f->num_runlists); + rl_dbg(g, " RL entry size: %u bytes", f->runlist_entry_size); + rl_dbg(g, " Max RL entries: %u", f->num_runlist_entries); + + err = nvgpu_init_active_runlist_mapping(g); + if (err != 0) { + goto clean_up_runlist; + } + + g->ops.runlist.init_enginfo(g, f); + return 0; + +clean_up_runlist: + nvgpu_runlist_cleanup_sw(g); + rl_dbg(g, "fail"); + return err; +} + +u32 nvgpu_runlist_get_runlists_mask(struct gk20a *g, u32 id, + unsigned int id_type, u32 act_eng_bitmask, u32 pbdma_bitmask) +{ + u32 i, runlists_mask = 0; + struct nvgpu_fifo *f = &g->fifo; + struct nvgpu_runlist *runlist; + + bool bitmask_disabled = ((act_eng_bitmask == 0U) && + (pbdma_bitmask == 0U)); + + /* engine and/or pbdma ids are known */ + if (!bitmask_disabled) { + for (i = 0U; i < f->num_runlists; i++) { + runlist = &f->active_runlists[i]; + + if ((runlist->eng_bitmask & act_eng_bitmask) != 0U) { + runlists_mask |= BIT32(runlist->id); + } + + if ((runlist->pbdma_bitmask & pbdma_bitmask) != 0U) { + runlists_mask |= BIT32(runlist->id); + } + } + } + + if (id_type != ID_TYPE_UNKNOWN) { + if (id_type == ID_TYPE_TSG) { + runlist = f->tsg[id].runlist; + } else { + runlist = f->channel[id].runlist; + } + + if (runlist == NULL) { + /* Warning on Linux, real assert on QNX. */ + nvgpu_assert(runlist != NULL); + } else { + runlists_mask |= BIT32(runlist->id); + } + } else { + if (bitmask_disabled) { + nvgpu_log(g, gpu_dbg_info, "id_type_unknown, engine " + "and pbdma ids are unknown"); + + for (i = 0U; i < f->num_runlists; i++) { + runlist = &f->active_runlists[i]; + + runlists_mask |= BIT32(runlist->id); + } + } else { + nvgpu_log(g, gpu_dbg_info, "id_type_unknown, engine " + "and/or pbdma ids are known"); + } + } + + nvgpu_log(g, gpu_dbg_info, "runlists_mask = 0x%08x", runlists_mask); + return runlists_mask; +} + +void nvgpu_runlist_unlock_runlists(struct gk20a *g, u32 runlists_mask) +{ + struct nvgpu_fifo *f = &g->fifo; + struct nvgpu_runlist *runlist; + u32 i; + + nvgpu_log_info(g, "release runlist_lock for runlists set in " + "runlists_mask: 0x%08x", runlists_mask); + + for (i = 0U; i < f->num_runlists; i++) { + runlist = &f->active_runlists[i]; + + if ((BIT32(i) & runlists_mask) != 0U) { + nvgpu_mutex_release(&runlist->runlist_lock); + } + } +} diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c new file mode 100644 index 000000000..5b057c26f --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -0,0 +1,837 @@ +/* + * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * We might need two extra gpfifo entries per submit - one for pre fence and + * one for post fence. + */ +#define EXTRA_GPFIFO_ENTRIES 2U + +static int nvgpu_submit_create_wait_cmd(struct nvgpu_channel *c, + struct nvgpu_channel_fence *fence, + struct priv_cmd_entry **wait_cmd, bool flag_sync_fence) +{ + /* + * A single input sync fd may contain multiple fences. The preallocated + * priv cmdbuf space allows exactly one per submit in the worst case. + * Require at most one wait for consistent deterministic submits; if + * there are more and no space, we'll -EAGAIN in nondeterministic mode. + */ + u32 max_wait_cmds = nvgpu_channel_is_deterministic(c) ? + 1U : 0U; + int err; + + if (flag_sync_fence) { + nvgpu_assert(fence->id <= (u32)INT_MAX); + err = nvgpu_channel_sync_wait_fence_fd(c->sync, + (int)fence->id, wait_cmd, max_wait_cmds); + } else { + struct nvgpu_channel_sync_syncpt *sync_syncpt; + + sync_syncpt = nvgpu_channel_sync_to_syncpt(c->sync); + if (sync_syncpt != NULL) { + err = nvgpu_channel_sync_wait_syncpt(sync_syncpt, + fence->id, fence->value, wait_cmd); + } else { + err = -EINVAL; + } + } + + return err; +} + +static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c, + struct priv_cmd_entry **incr_cmd, + struct nvgpu_fence_type *post_fence, bool flag_fence_get, + bool need_wfi, bool need_sync_fence) +{ + int err; + + if (flag_fence_get) { + err = nvgpu_channel_sync_incr_user(c->sync, incr_cmd, + post_fence, need_wfi, need_sync_fence); + } else { + err = nvgpu_channel_sync_incr(c->sync, incr_cmd, + post_fence, need_sync_fence); + } + + return err; +} + +/* + * Handle the submit synchronization - pre-fences and post-fences. + */ +static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c, + struct nvgpu_channel_fence *fence, + struct nvgpu_channel_job *job, + u32 flags) +{ + struct gk20a *g = c->g; + bool need_sync_fence; + bool new_sync_created = false; + int err = 0; + bool need_wfi = (flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI) == 0U; + bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U; + bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U; + bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U; + + if (g->aggressive_sync_destroy_thresh != 0U) { + nvgpu_mutex_acquire(&c->sync_lock); + if (c->sync == NULL) { + c->sync = nvgpu_channel_sync_create(c); + if (c->sync == NULL) { + err = -ENOMEM; + goto clean_up_unlock; + } + new_sync_created = true; + } + nvgpu_channel_sync_get_ref(c->sync); + } + + if ((g->ops.channel.set_syncpt != NULL) && new_sync_created) { + err = g->ops.channel.set_syncpt(c); + if (err != 0) { + goto clean_up_put_sync; + } + } + + /* + * Optionally insert syncpt/semaphore wait in the beginning of gpfifo + * submission when user requested. + */ + if (flag_fence_wait) { + err = nvgpu_submit_create_wait_cmd(c, fence, &job->wait_cmd, + flag_sync_fence); + if (err != 0) { + goto clean_up_put_sync; + } + } + + need_sync_fence = flag_fence_get && flag_sync_fence; + + /* + * Always generate an increment at the end of a GPFIFO submission. When + * we do job tracking, post fences are needed for various reasons even + * if not requested by user. + */ + err = nvgpu_submit_create_incr_cmd(c, &job->incr_cmd, &job->post_fence, + flag_fence_get, need_wfi, need_sync_fence); + if (err != 0) { + goto clean_up_wait_cmd; + } + + if (g->aggressive_sync_destroy_thresh != 0U) { + nvgpu_mutex_release(&c->sync_lock); + } + return 0; + +clean_up_wait_cmd: + if (job->wait_cmd != NULL) { + nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->wait_cmd); + } + job->wait_cmd = NULL; +clean_up_put_sync: + if (g->aggressive_sync_destroy_thresh != 0U) { + if (nvgpu_channel_sync_put_ref_and_check(c->sync) + && g->aggressive_sync_destroy) { + nvgpu_channel_sync_destroy(c->sync); + } + } +clean_up_unlock: + if (g->aggressive_sync_destroy_thresh != 0U) { + nvgpu_mutex_release(&c->sync_lock); + } + return err; +} + +static void nvgpu_submit_append_priv_cmdbuf(struct nvgpu_channel *c, + struct priv_cmd_entry *cmd) +{ + struct gk20a *g = c->g; + struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; + struct nvgpu_gpfifo_entry gpfifo_entry; + u64 gva; + u32 size; + + nvgpu_priv_cmdbuf_finish(g, cmd, &gva, &size); + g->ops.pbdma.format_gpfifo_entry(g, &gpfifo_entry, gva, size); + + nvgpu_mem_wr_n(g, gpfifo_mem, + c->gpfifo.put * (u32)sizeof(gpfifo_entry), + &gpfifo_entry, (u32)sizeof(gpfifo_entry)); + + c->gpfifo.put = (c->gpfifo.put + 1U) & (c->gpfifo.entry_num - 1U); +} + +static int nvgpu_submit_append_gpfifo_user_direct(struct nvgpu_channel *c, + struct nvgpu_gpfifo_userdata userdata, + u32 num_entries) +{ + struct gk20a *g = c->g; + struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va; + u32 gpfifo_size = c->gpfifo.entry_num; + u32 len = num_entries; + u32 start = c->gpfifo.put; + u32 end = start + len; /* exclusive */ + int err; + + nvgpu_speculation_barrier(); + if (end > gpfifo_size) { + /* wrap-around */ + u32 length0 = gpfifo_size - start; + u32 length1 = len - length0; + + err = g->os_channel.copy_user_gpfifo( + &gpfifo_cpu[start], userdata, + 0, length0); + if (err != 0) { + return err; + } + + err = g->os_channel.copy_user_gpfifo( + gpfifo_cpu, userdata, + length0, length1); + if (err != 0) { + return err; + } + } else { + err = g->os_channel.copy_user_gpfifo( + &gpfifo_cpu[start], userdata, + 0, len); + if (err != 0) { + return err; + } + } + + return 0; +} + +static void nvgpu_submit_append_gpfifo_common(struct nvgpu_channel *c, + struct nvgpu_gpfifo_entry *src, u32 num_entries) +{ + struct gk20a *g = c->g; + struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; + /* in bytes */ + u32 gpfifo_size = + c->gpfifo.entry_num * (u32)sizeof(struct nvgpu_gpfifo_entry); + u32 len = num_entries * (u32)sizeof(struct nvgpu_gpfifo_entry); + u32 start = c->gpfifo.put * (u32)sizeof(struct nvgpu_gpfifo_entry); + u32 end = start + len; /* exclusive */ + + if (end > gpfifo_size) { + /* wrap-around */ + u32 length0 = gpfifo_size - start; + u32 length1 = len - length0; + struct nvgpu_gpfifo_entry *src2 = &src[length0]; + + nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0); + nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1); + } else { + nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len); + } +} + +/* + * Copy source gpfifo entries into the gpfifo ring buffer, potentially + * splitting into two memcpys to handle wrap-around. + */ +static int nvgpu_submit_append_gpfifo(struct nvgpu_channel *c, + struct nvgpu_gpfifo_entry *kern_gpfifo, + struct nvgpu_gpfifo_userdata userdata, + u32 num_entries) +{ + int err; + + if ((kern_gpfifo == NULL) +#ifdef CONFIG_NVGPU_DGPU + && (c->gpfifo.pipe == NULL) +#endif + ) { + /* + * This path (from userspace to sysmem) is special in order to + * avoid two copies unnecessarily (from user to pipe, then from + * pipe to gpu sysmem buffer). + */ + err = nvgpu_submit_append_gpfifo_user_direct(c, userdata, + num_entries); + if (err != 0) { + return err; + } + } +#ifdef CONFIG_NVGPU_DGPU + else if (kern_gpfifo == NULL) { + /* from userspace to vidmem, use the common path */ + err = c->g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, + userdata, 0, num_entries); + if (err != 0) { + return err; + } + + nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe, + num_entries); + } +#endif + else { + /* from kernel to either sysmem or vidmem, don't need + * copy_user_gpfifo so use the common path */ + nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries); + } + + trace_write_pushbuffers(c, num_entries); + + c->gpfifo.put = (c->gpfifo.put + num_entries) & + (c->gpfifo.entry_num - 1U); + + return 0; +} + +static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c, + struct nvgpu_gpfifo_entry *gpfifo, + struct nvgpu_gpfifo_userdata userdata, + u32 num_entries, + u32 flags, + struct nvgpu_channel_fence *fence, + struct nvgpu_fence_type **fence_out, + struct nvgpu_swprofiler *profiler, + bool need_deferred_cleanup) +{ + bool skip_buffer_refcounting = (flags & + NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U; + struct nvgpu_channel_job *job = NULL; + int err; + + nvgpu_channel_joblist_lock(c); + err = nvgpu_channel_alloc_job(c, &job); + nvgpu_channel_joblist_unlock(c); + if (err != 0) { + return err; + } + + err = nvgpu_submit_prepare_syncs(c, fence, job, flags); + if (err != 0) { + goto clean_up_job; + } + + nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_JOB_TRACKING); + + /* + * wait_cmd can be unset even if flag_fence_wait exists; the + * android sync framework for example can provide entirely + * empty fences that act like trivially expired waits. + */ + if (job->wait_cmd != NULL) { + nvgpu_submit_append_priv_cmdbuf(c, job->wait_cmd); + } + + err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata, num_entries); + if (err != 0) { + goto clean_up_gpfifo_wait; + } + + nvgpu_submit_append_priv_cmdbuf(c, job->incr_cmd); + + err = nvgpu_channel_add_job(c, job, skip_buffer_refcounting); + if (err != 0) { + goto clean_up_gpfifo_incr; + } + + nvgpu_channel_sync_mark_progress(c->sync, need_deferred_cleanup); + + if (fence_out != NULL) { + /* This fence ref is going somewhere else but it's owned by the + * job; the caller is expected to release it promptly, so that + * a subsequent job cannot reclaim its memory. + */ + *fence_out = nvgpu_fence_get(&job->post_fence); + } + + return 0; + +clean_up_gpfifo_incr: + /* + * undo the incr priv cmdbuf and the user entries: + * new gp.put = + * (gp.put - (1 + num_entries)) & (gp.entry_num - 1) = + * (gp.put + (gp.entry_num - (1 + num_entries))) & (gp.entry_num - 1) + * the + entry_num does not affect the result but avoids wrapping below + * zero for MISRA, although it would be well defined. + */ + c->gpfifo.put = + (nvgpu_safe_add_u32(c->gpfifo.put, + nvgpu_safe_sub_u32(c->gpfifo.entry_num, + nvgpu_safe_add_u32(1U, num_entries)))) & + nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U); +clean_up_gpfifo_wait: + if (job->wait_cmd != NULL) { + /* + * undo the wait priv cmdbuf entry: + * gp.put = + * (gp.put - 1) & (gp.entry_num - 1) = + * (gp.put + (gp.entry_num - 1)) & (gp.entry_num - 1) + * same as above with the gp.entry_num on the left side. + */ + c->gpfifo.put = + nvgpu_safe_add_u32(c->gpfifo.put, + nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U)) & + nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U); + } + nvgpu_fence_put(&job->post_fence); + nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->incr_cmd); + if (job->wait_cmd != NULL) { + nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->wait_cmd); + } +clean_up_job: + nvgpu_channel_free_job(c, job); + return err; +} + +static int nvgpu_submit_prepare_gpfifo_notrack(struct nvgpu_channel *c, + struct nvgpu_gpfifo_entry *gpfifo, + struct nvgpu_gpfifo_userdata userdata, + u32 num_entries, + struct nvgpu_fence_type **fence_out, + struct nvgpu_swprofiler *profiler) +{ + int err; + + nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_JOB_TRACKING); + + err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata, + num_entries); + if (err != 0) { + return err; + } + + if (fence_out != NULL) { + *fence_out = NULL; + } + + return 0; +} + +static int check_gpfifo_capacity(struct nvgpu_channel *c, u32 required) +{ + /* + * Make sure we have enough space for gpfifo entries. Check cached + * values first and then read from HW. If no space, return -EAGAIN + * and let userpace decide to re-try request or not. + */ + if (nvgpu_channel_get_gpfifo_free_count(c) < required) { + if (nvgpu_channel_update_gpfifo_get_and_get_free_count(c) < + required) { + return -EAGAIN; + } + } + + return 0; +} + +static int nvgpu_do_submit(struct nvgpu_channel *c, + struct nvgpu_gpfifo_entry *gpfifo, + struct nvgpu_gpfifo_userdata userdata, + u32 num_entries, + u32 flags, + struct nvgpu_channel_fence *fence, + struct nvgpu_fence_type **fence_out, + struct nvgpu_swprofiler *profiler, + bool need_job_tracking, + bool need_deferred_cleanup) +{ + struct gk20a *g = c->g; + int err; + +#ifdef CONFIG_NVGPU_TRACE + trace_gk20a_channel_submit_gpfifo(g->name, + c->chid, + num_entries, + flags, + fence ? fence->id : 0, + fence ? fence->value : 0); +#endif + + nvgpu_log_info(g, "pre-submit put %d, get %d, size %d", + c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); + + err = check_gpfifo_capacity(c, num_entries + EXTRA_GPFIFO_ENTRIES); + if (err != 0) { + return err; + } + + if (need_job_tracking) { + err = nvgpu_submit_prepare_gpfifo_track(c, gpfifo, + userdata, num_entries, flags, fence, + fence_out, profiler, need_deferred_cleanup); + } else { + err = nvgpu_submit_prepare_gpfifo_notrack(c, gpfifo, + userdata, num_entries, fence_out, profiler); + } + + if (err != 0) { + return err; + } + + nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_APPEND); + + g->ops.userd.gp_put(g, c); + + return 0; +} + +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS +static int nvgpu_submit_deterministic(struct nvgpu_channel *c, + struct nvgpu_gpfifo_entry *gpfifo, + struct nvgpu_gpfifo_userdata userdata, + u32 num_entries, + u32 flags, + struct nvgpu_channel_fence *fence, + struct nvgpu_fence_type **fence_out, + struct nvgpu_swprofiler *profiler) +{ + bool skip_buffer_refcounting = (flags & + NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U; + bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U; + bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U; + bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U; + struct gk20a *g = c->g; + bool need_job_tracking; + int err = 0; + + nvgpu_assert(nvgpu_channel_is_deterministic(c)); + + /* sync framework on post fences would not be deterministic */ + if (flag_fence_get && flag_sync_fence) { + return -EINVAL; + } + + /* this would be O(n) */ + if (!skip_buffer_refcounting) { + return -EINVAL; + } + + /* the watchdog needs periodic job cleanup */ + if (nvgpu_channel_wdt_enabled(c->wdt)) { + return -EINVAL; + } + + /* + * Job tracking is necessary on deterministic channels if and only if + * pre- or post-fence functionality is needed. If not, a fast submit + * can be done (ie. only need to write out userspace GPFIFO entries and + * update GP_PUT). + */ + need_job_tracking = flag_fence_wait || flag_fence_get; + + if (need_job_tracking) { + /* nvgpu_semaphore is dynamically allocated, not pooled */ + if (!nvgpu_has_syncpoints(g)) { + return -EINVAL; + } + + /* dynamic sync allocation wouldn't be deterministic */ + if (g->aggressive_sync_destroy_thresh != 0U) { + return -EINVAL; + } + + /* + * (Try to) clean up a single job, if available. Each job + * requires the same amount of metadata, so this is enough for + * the job list, fence pool, and private command buffers that + * this submit will need. + * + * This submit might still need more gpfifo space than what the + * previous has used. The job metadata doesn't look at it + * though - the hw GP_GET pointer can be much further away than + * our metadata pointers; gpfifo space is "freed" by the HW. + */ + nvgpu_channel_clean_up_deterministic_job(c); + } + + /* Grab access to HW to deal with do_idle */ + nvgpu_rwsem_down_read(&g->deterministic_busy); + + if (c->deterministic_railgate_allowed) { + /* + * Nope - this channel has dropped its own power ref. As + * deterministic submits don't hold power on per each submitted + * job like normal ones do, the GPU might railgate any time now + * and thus submit is disallowed. + */ + err = -EINVAL; + goto clean_up; + } + + err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence, + fence_out, profiler, need_job_tracking, false); + if (err != 0) { + goto clean_up; + } + + /* No hw access beyond this point */ + nvgpu_rwsem_up_read(&g->deterministic_busy); + + return 0; + +clean_up: + nvgpu_log_fn(g, "fail %d", err); + nvgpu_rwsem_up_read(&g->deterministic_busy); + + return err; +} +#endif + +static int nvgpu_submit_nondeterministic(struct nvgpu_channel *c, + struct nvgpu_gpfifo_entry *gpfifo, + struct nvgpu_gpfifo_userdata userdata, + u32 num_entries, + u32 flags, + struct nvgpu_channel_fence *fence, + struct nvgpu_fence_type **fence_out, + struct nvgpu_swprofiler *profiler) +{ + bool skip_buffer_refcounting = (flags & + NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U; + bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U; + bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U; + struct gk20a *g = c->g; + bool need_job_tracking; + int err = 0; + + nvgpu_assert(!nvgpu_channel_is_deterministic(c)); + + /* + * Job tracking is necessary for any of the following conditions on + * non-deterministic channels: + * - pre- or post-fence functionality + * - GPU rail-gating + * - VPR resize enabled + * - buffer refcounting + * - channel watchdog + * + * If none of the conditions are met, then job tracking is not + * required and a fast submit can be done (ie. only need to write + * out userspace GPFIFO entries and update GP_PUT). + */ + need_job_tracking = flag_fence_wait || + flag_fence_get || + nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) || + nvgpu_is_vpr_resize_enabled() || + !skip_buffer_refcounting || + nvgpu_channel_wdt_enabled(c->wdt); + + if (need_job_tracking) { + /* + * Get a power ref because this isn't a deterministic + * channel that holds them during the channel lifetime. + * This one is released by nvgpu_channel_clean_up_jobs, + * via syncpt or sema interrupt, whichever is used. + */ + err = gk20a_busy(g); + if (err != 0) { + nvgpu_err(g, + "failed to host gk20a to submit gpfifo"); + nvgpu_print_current(g, NULL, NVGPU_ERROR); + return err; + } + } + + err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence, + fence_out, profiler, need_job_tracking, true); + if (err != 0) { + goto clean_up; + } + + return 0; + +clean_up: + nvgpu_log_fn(g, "fail %d", err); + gk20a_idle(g); + + return err; +} + +static int check_submit_allowed(struct nvgpu_channel *c) +{ + struct gk20a *g = c->g; + + if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { + return -ENODEV; + } + + if (nvgpu_channel_check_unserviceable(c)) { + return -ETIMEDOUT; + } + + if (c->usermode_submit_enabled) { + return -EINVAL; + } + + if (!nvgpu_mem_is_valid(&c->gpfifo.mem)) { + return -ENOMEM; + } + + /* an address space needs to have been bound at this point. */ + if (!nvgpu_channel_as_bound(c)) { + nvgpu_err(g, + "not bound to an address space at time of gpfifo" + " submission."); + return -EINVAL; + } + + return 0; +} + +static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, + struct nvgpu_gpfifo_entry *gpfifo, + struct nvgpu_gpfifo_userdata userdata, + u32 num_entries, + u32 flags, + struct nvgpu_channel_fence *fence, + struct nvgpu_fence_type **fence_out, + struct nvgpu_swprofiler *profiler) +{ + struct gk20a *g = c->g; + int err; + + err = check_submit_allowed(c); + if (err != 0) { + return err; + } + + /* + * Fifo not large enough for request. Return error immediately. + * Kernel can insert gpfifo entries before and after user gpfifos. + * So, add extra entries in user request. Also, HW with fifo size N + * can accept only N-1 entries. + */ + if (c->gpfifo.entry_num - 1U < num_entries + EXTRA_GPFIFO_ENTRIES) { + nvgpu_err(g, "not enough gpfifo space allocated"); + return -ENOMEM; + } + + nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_ENTRY); + + /* update debug settings */ + nvgpu_ltc_sync_enabled(g); + + nvgpu_log_info(g, "channel %d", c->chid); + +#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS + if (c->deterministic) { + err = nvgpu_submit_deterministic(c, gpfifo, userdata, + num_entries, flags, fence, fence_out, profiler); + } else +#endif + { + err = nvgpu_submit_nondeterministic(c, gpfifo, userdata, + num_entries, flags, fence, fence_out, profiler); + } + + if (err != 0) { + return err; + } + +#ifdef CONFIG_NVGPU_TRACE + if (fence_out != NULL && *fence_out != NULL) { + /* + * This is not a good example on how to use the fence type. + * Don't touch the priv data. The debug trace is special. + */ +#ifdef CONFIG_TEGRA_GK20A_NVHOST + trace_gk20a_channel_submitted_gpfifo(g->name, + c->chid, num_entries, flags, + (*fence_out)->priv.syncpt_id, + (*fence_out)->priv.syncpt_value); +#else + trace_gk20a_channel_submitted_gpfifo(g->name, + c->chid, num_entries, flags, + 0, 0); +#endif + } else { + trace_gk20a_channel_submitted_gpfifo(g->name, + c->chid, num_entries, flags, + 0, 0); + } +#endif + + nvgpu_log_info(g, "post-submit put %d, get %d, size %d", + c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); + + nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_END); + + nvgpu_log_fn(g, "done"); + return err; +} + +int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c, + struct nvgpu_gpfifo_userdata userdata, + u32 num_entries, + u32 flags, + struct nvgpu_channel_fence *fence, + struct nvgpu_user_fence *fence_out, + struct nvgpu_swprofiler *profiler) +{ + struct nvgpu_fence_type *fence_internal = NULL; + int err; + + err = nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries, + flags, fence, &fence_internal, profiler); + if (err == 0 && fence_internal != NULL) { + *fence_out = nvgpu_fence_extract_user(fence_internal); + nvgpu_fence_put(fence_internal); + } + return err; +} + +int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c, + struct nvgpu_gpfifo_entry *gpfifo, + u32 num_entries, + u32 flags, + struct nvgpu_channel_fence *fence, + struct nvgpu_fence_type **fence_out) +{ + struct nvgpu_gpfifo_userdata userdata = { NULL, NULL }; + + return nvgpu_submit_channel_gpfifo(c, gpfifo, userdata, num_entries, + flags, fence, fence_out, NULL); +} diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c new file mode 100644 index 000000000..392503dc3 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -0,0 +1,1120 @@ +/* + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_NVGPU_PROFILER +#include +#endif + +void nvgpu_tsg_disable(struct nvgpu_tsg *tsg) +{ + struct gk20a *g = tsg->g; + struct nvgpu_channel *ch; + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + nvgpu_list_for_each_entry(ch, &tsg->ch_list, nvgpu_channel, ch_entry) { + g->ops.channel.disable(ch); + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); +} + +struct nvgpu_tsg *nvgpu_tsg_check_and_get_from_id(struct gk20a *g, u32 tsgid) +{ + if (tsgid == NVGPU_INVALID_TSG_ID) { + return NULL; + } + + return nvgpu_tsg_get_from_id(g, tsgid); +} + + +struct nvgpu_tsg *nvgpu_tsg_get_from_id(struct gk20a *g, u32 tsgid) +{ + struct nvgpu_fifo *f = &g->fifo; + + return &f->tsg[tsgid]; +} + + +static bool nvgpu_tsg_is_channel_active(struct gk20a *g, + struct nvgpu_channel *ch) +{ + struct nvgpu_fifo *f = &g->fifo; + struct nvgpu_runlist *runlist; + unsigned int i; + + for (i = 0; i < f->num_runlists; ++i) { + runlist = &f->active_runlists[i]; + if (nvgpu_test_bit(ch->chid, runlist->active_channels)) { + return true; + } + } + + return false; +} + +/* + * API to mark channel as part of TSG + * + * Note that channel is not runnable when we bind it to TSG + */ +int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) +{ + struct gk20a *g = ch->g; + int err = 0; + + nvgpu_log_fn(g, "bind tsg:%u ch:%u\n", tsg->tsgid, ch->chid); + + /* check if channel is already bound to some TSG */ + if (nvgpu_tsg_from_ch(ch) != NULL) { + return -EINVAL; + } + + /* channel cannot be bound to TSG if it is already active */ + if (nvgpu_tsg_is_channel_active(tsg->g, ch)) { + return -EINVAL; + } + + /* Use runqueue selector 1 for all ASYNC ids */ + if (ch->subctx_id > CHANNEL_INFO_VEID0) { + ch->runqueue_sel = 1; + } + + /* + * All the channels in a TSG must share the same runlist. + */ + if (tsg->runlist == NULL) { + tsg->runlist = ch->runlist; + } else { + if (tsg->runlist != ch->runlist) { + nvgpu_err(tsg->g, + "runlist_id mismatch ch[%d] tsg[%d]", + ch->runlist->id, + tsg->runlist->id); + return -EINVAL; + } + } + + if (g->ops.tsg.bind_channel != NULL) { + err = g->ops.tsg.bind_channel(tsg, ch); + if (err != 0) { + nvgpu_err(tsg->g, "fail to bind ch %u to tsg %u", + ch->chid, tsg->tsgid); + return err; + } + } + + nvgpu_rwsem_down_write(&tsg->ch_list_lock); + nvgpu_list_add_tail(&ch->ch_entry, &tsg->ch_list); + ch->tsgid = tsg->tsgid; + /* channel is serviceable after it is bound to tsg */ + ch->unserviceable = false; + nvgpu_rwsem_up_write(&tsg->ch_list_lock); + + if (g->ops.tsg.bind_channel_eng_method_buffers != NULL) { + g->ops.tsg.bind_channel_eng_method_buffers(tsg, ch); + } + + nvgpu_ref_get(&tsg->refcount); + + return err; +} + +static bool nvgpu_tsg_is_multi_channel(struct nvgpu_tsg *tsg) +{ + bool ret = false; + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + if (nvgpu_list_first_entry(&tsg->ch_list, nvgpu_channel, + ch_entry) != + nvgpu_list_last_entry(&tsg->ch_list, nvgpu_channel, + ch_entry)) { + ret = true; + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + + return ret; +} + +static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg, + struct nvgpu_channel *ch) +{ + struct gk20a *g = ch->g; + int err; + bool tsg_timedout; + + /* If one channel in TSG times out, we disable all channels */ + nvgpu_rwsem_down_write(&tsg->ch_list_lock); + tsg_timedout = nvgpu_channel_check_unserviceable(ch); + nvgpu_rwsem_up_write(&tsg->ch_list_lock); + + /* Disable TSG and examine status before unbinding channel */ + g->ops.tsg.disable(tsg); + + err = g->ops.fifo.preempt_tsg(g, tsg); + if (err != 0) { + goto fail_enable_tsg; + } + + /* + * State validation is only necessary if there are multiple channels in + * the TSG. + */ + if (nvgpu_tsg_is_multi_channel(tsg) && !tsg_timedout && + (g->ops.tsg.unbind_channel_check_hw_state != NULL)) { + err = g->ops.tsg.unbind_channel_check_hw_state(tsg, ch); + if (err == -EAGAIN) { + goto fail_enable_tsg; + } + + if (err != 0) { + nvgpu_err(g, "invalid hw_state for ch %u", ch->chid); + goto fail_enable_tsg; + } + } + + if (g->ops.channel.clear != NULL) { + g->ops.channel.clear(ch); + } + + /* Channel should be seen as TSG channel while updating runlist */ + err = nvgpu_channel_update_runlist(ch, false); + if (err != 0) { + nvgpu_err(g, "update runlist failed ch:%u tsg:%u", + ch->chid, tsg->tsgid); + goto fail_enable_tsg; + } + +#ifdef CONFIG_NVGPU_DEBUGGER + while (ch->mmu_debug_mode_refcnt > 0U) { + err = nvgpu_tsg_set_mmu_debug_mode(ch, false); + if (err != 0) { + nvgpu_err(g, "disable mmu debug mode failed ch:%u", + ch->chid); + break; + } + } +#endif + + /* Remove channel from TSG and re-enable rest of the channels */ + nvgpu_rwsem_down_write(&tsg->ch_list_lock); + nvgpu_list_del(&ch->ch_entry); + ch->tsgid = NVGPU_INVALID_TSG_ID; + + /* another thread could have re-enabled the channel because it was + * still on the list at that time, so make sure it's truly disabled + */ + g->ops.channel.disable(ch); + nvgpu_rwsem_up_write(&tsg->ch_list_lock); + + /* + * Don't re-enable all channels if TSG has timed out already + * + * Note that we can skip disabling and preempting TSG too in case of + * time out, but we keep that to ensure TSG is kicked out + */ + if (!tsg_timedout) { + g->ops.tsg.enable(tsg); + } + + if (g->ops.channel.abort_clean_up != NULL) { + g->ops.channel.abort_clean_up(ch); + } + + return 0; + +fail_enable_tsg: + if (!tsg_timedout) { + g->ops.tsg.enable(tsg); + } + return err; +} + +/* The caller must ensure that channel belongs to a tsg */ +int nvgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch, + bool force) +{ + struct gk20a *g = ch->g; + int err; + + nvgpu_log_fn(g, "unbind tsg:%u ch:%u\n", tsg->tsgid, ch->chid); + + err = nvgpu_tsg_unbind_channel_common(tsg, ch); + if (!force && err == -EAGAIN) { + return err; + } + + if (err != 0) { + nvgpu_err(g, "unbind common failed, err=%d", err); + goto fail_common; + } + + if (g->ops.tsg.unbind_channel != NULL) { + err = g->ops.tsg.unbind_channel(tsg, ch); + if (err != 0) { + /* + * ch already removed from TSG's list. + * mark error explicitly. + */ + (void) nvgpu_channel_mark_error(g, ch); + nvgpu_err(g, "unbind hal failed, err=%d", err); + goto fail; + } + } + + nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release); + + return 0; + +fail_common: + if (g->ops.tsg.unbind_channel != NULL) { + int unbind_err = g->ops.tsg.unbind_channel(tsg, ch); + if (unbind_err != 0) { + nvgpu_err(g, "unbind hal failed, err=%d", unbind_err); + } + } +fail: + nvgpu_err(g, "Channel %d unbind failed, tearing down TSG %d", + ch->chid, tsg->tsgid); + + nvgpu_tsg_abort(g, tsg, true); + + if (g->ops.channel.clear != NULL) { + g->ops.channel.clear(ch); + } + + /* If channel unbind fails, channel is still part of runlist */ + if (nvgpu_channel_update_runlist(ch, false) != 0) { + nvgpu_err(g, "remove ch %u from runlist failed", ch->chid); + } + +#ifdef CONFIG_NVGPU_DEBUGGER + while (ch->mmu_debug_mode_refcnt > 0U) { + err = nvgpu_tsg_set_mmu_debug_mode(ch, false); + if (err != 0) { + nvgpu_err(g, "disable mmu debug mode failed ch:%u", + ch->chid); + break; + } + } +#endif + + nvgpu_rwsem_down_write(&tsg->ch_list_lock); + nvgpu_list_del(&ch->ch_entry); + ch->tsgid = NVGPU_INVALID_TSG_ID; + nvgpu_rwsem_up_write(&tsg->ch_list_lock); + + nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release); + + return err; + +} + +int nvgpu_tsg_force_unbind_channel(struct nvgpu_tsg *tsg, + struct nvgpu_channel *ch) +{ + return nvgpu_tsg_unbind_channel(tsg, ch, true); +} + +int nvgpu_tsg_unbind_channel_check_hw_state(struct nvgpu_tsg *tsg, + struct nvgpu_channel *ch) +{ + struct gk20a *g = ch->g; + struct nvgpu_channel_hw_state hw_state; + int err = 0; + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + g->ops.channel.read_state(g, ch, &hw_state); + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + + if (g->ops.tsg.unbind_channel_check_hw_next != NULL) { + err = g->ops.tsg.unbind_channel_check_hw_next(ch, &hw_state); + } + + if (g->ops.tsg.unbind_channel_check_ctx_reload != NULL) { + g->ops.tsg.unbind_channel_check_ctx_reload(tsg, ch, &hw_state); + } + + if (g->ops.tsg.unbind_channel_check_eng_faulted != NULL) { + g->ops.tsg.unbind_channel_check_eng_faulted(tsg, ch, + &hw_state); + } + + return err; +} + +void nvgpu_tsg_unbind_channel_check_ctx_reload(struct nvgpu_tsg *tsg, + struct nvgpu_channel *ch, + struct nvgpu_channel_hw_state *hw_state) +{ + struct gk20a *g = ch->g; + struct nvgpu_channel *temp_ch; + + /* If CTX_RELOAD is set on a channel, move it to some other channel */ + if (hw_state->ctx_reload) { + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + nvgpu_list_for_each_entry(temp_ch, &tsg->ch_list, + nvgpu_channel, ch_entry) { + if (temp_ch->chid != ch->chid) { + g->ops.channel.force_ctx_reload(temp_ch); + break; + } + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + } +} + +static void nvgpu_tsg_destroy(struct nvgpu_tsg *tsg) +{ +#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL + nvgpu_mutex_destroy(&tsg->event_id_list_lock); +#endif + nvgpu_mutex_destroy(&tsg->ctx_init_lock); +} + +#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL +/* force reset tsg that the channel is bound to */ +int nvgpu_tsg_force_reset_ch(struct nvgpu_channel *ch, + u32 err_code, bool verbose) +{ + struct gk20a *g = ch->g; + + struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch); + + if (tsg != NULL) { + nvgpu_tsg_set_error_notifier(g, tsg, err_code); + nvgpu_rc_tsg_and_related_engines(g, tsg, verbose, + RC_TYPE_FORCE_RESET); + } else { + nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid); + } + + return 0; +} +#endif + +void nvgpu_tsg_cleanup_sw(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 tsgid; + + for (tsgid = 0; tsgid < f->num_channels; tsgid++) { + struct nvgpu_tsg *tsg = &f->tsg[tsgid]; + + nvgpu_tsg_destroy(tsg); + } + + nvgpu_vfree(g, f->tsg); + f->tsg = NULL; + nvgpu_mutex_destroy(&f->tsg_inuse_mutex); +} + +static void nvgpu_tsg_init_support(struct gk20a *g, u32 tsgid) +{ + struct nvgpu_tsg *tsg = NULL; + + tsg = &g->fifo.tsg[tsgid]; + + tsg->in_use = false; + tsg->tsgid = tsgid; + tsg->abortable = true; + + nvgpu_init_list_node(&tsg->ch_list); + nvgpu_rwsem_init(&tsg->ch_list_lock); + nvgpu_mutex_init(&tsg->ctx_init_lock); + +#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL + nvgpu_init_list_node(&tsg->event_id_list); + nvgpu_mutex_init(&tsg->event_id_list_lock); +#endif +} + +int nvgpu_tsg_setup_sw(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 tsgid; + int err; + + nvgpu_mutex_init(&f->tsg_inuse_mutex); + + f->tsg = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->tsg)); + if (f->tsg == NULL) { + nvgpu_err(g, "no mem for tsgs"); + err = -ENOMEM; + goto clean_up_mutex; + } + + for (tsgid = 0; tsgid < f->num_channels; tsgid++) { + nvgpu_tsg_init_support(g, tsgid); + } + + return 0; + +clean_up_mutex: + nvgpu_mutex_destroy(&f->tsg_inuse_mutex); + return err; +} + +void nvgpu_tsg_set_unserviceable(struct gk20a *g, + struct nvgpu_tsg *tsg) +{ + struct nvgpu_channel *ch = NULL; + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + nvgpu_list_for_each_entry(ch, &tsg->ch_list, nvgpu_channel, ch_entry) { + if (nvgpu_channel_get(ch) != NULL) { + nvgpu_channel_set_unserviceable(ch); + nvgpu_channel_put(ch); + } + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); +} + +void nvgpu_tsg_wakeup_wqs(struct gk20a *g, + struct nvgpu_tsg *tsg) +{ + struct nvgpu_channel *ch = NULL; + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + nvgpu_list_for_each_entry(ch, &tsg->ch_list, nvgpu_channel, ch_entry) { + if (nvgpu_channel_get(ch) != NULL) { + nvgpu_channel_wakeup_wqs(g, ch); + nvgpu_channel_put(ch); + } + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); +} + +bool nvgpu_tsg_mark_error(struct gk20a *g, + struct nvgpu_tsg *tsg) +{ + struct nvgpu_channel *ch = NULL; + bool verbose = false; + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + nvgpu_list_for_each_entry(ch, &tsg->ch_list, nvgpu_channel, ch_entry) { + if (nvgpu_channel_get(ch) != NULL) { + if (nvgpu_channel_mark_error(g, ch)) { + verbose = true; + } + nvgpu_channel_put(ch); + } + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + + return verbose; + +} + +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT +void nvgpu_tsg_set_ctxsw_timeout_accumulated_ms(struct nvgpu_tsg *tsg, u32 ms) +{ + struct nvgpu_channel *ch = NULL; + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + nvgpu_list_for_each_entry(ch, &tsg->ch_list, nvgpu_channel, ch_entry) { + if (nvgpu_channel_get(ch) != NULL) { + ch->ctxsw_timeout_accumulated_ms = ms; + nvgpu_channel_put(ch); + } + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); +} + +bool nvgpu_tsg_ctxsw_timeout_debug_dump_state(struct nvgpu_tsg *tsg) +{ + struct nvgpu_channel *ch = NULL; + bool verbose = false; + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + nvgpu_list_for_each_entry(ch, &tsg->ch_list, nvgpu_channel, ch_entry) { + if (nvgpu_channel_get(ch) != NULL) { + if (ch->ctxsw_timeout_debug_dump) { + verbose = true; + } + nvgpu_channel_put(ch); + } + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + + return verbose; +} +#endif + +void nvgpu_tsg_set_error_notifier(struct gk20a *g, struct nvgpu_tsg *tsg, + u32 error_notifier) +{ + struct nvgpu_channel *ch = NULL; + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + nvgpu_list_for_each_entry(ch, &tsg->ch_list, nvgpu_channel, ch_entry) { + if (nvgpu_channel_get(ch) != NULL) { + nvgpu_channel_set_error_notifier(g, ch, error_notifier); + nvgpu_channel_put(ch); + } + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); +} + +void nvgpu_tsg_set_ctx_mmu_error(struct gk20a *g, struct nvgpu_tsg *tsg) +{ + nvgpu_err(g, "TSG %d generated a mmu fault", tsg->tsgid); + + nvgpu_tsg_set_error_notifier(g, tsg, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT); +} + +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT +bool nvgpu_tsg_check_ctxsw_timeout(struct nvgpu_tsg *tsg, + bool *debug_dump, u32 *ms) +{ + struct nvgpu_channel *ch; + bool recover = false; + bool progress = false; + struct gk20a *g = tsg->g; + + *debug_dump = false; + *ms = g->ctxsw_timeout_period_ms; + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + + /* check if there was some progress on any of the TSG channels. + * fifo recovery is needed if at least one channel reached the + * maximum timeout without progress (update in gpfifo pointers). + */ + nvgpu_list_for_each_entry(ch, &tsg->ch_list, nvgpu_channel, ch_entry) { + if (nvgpu_channel_get(ch) != NULL) { + recover = nvgpu_channel_update_and_check_ctxsw_timeout(ch, + *ms, &progress); + if (progress || recover) { + break; + } + nvgpu_channel_put(ch); + } + } + + if (recover) { + /* + * if one channel is presumed dead (no progress for too long), + * then fifo recovery is needed. we can't really figure out + * which channel caused the problem, so set ctxsw timeout error + * notifier for all channels. + */ + *ms = ch->ctxsw_timeout_accumulated_ms; + nvgpu_channel_put(ch); + *debug_dump = nvgpu_tsg_ctxsw_timeout_debug_dump_state(tsg); + + } else { + /* + * if at least one channel in the TSG made some progress, reset + * ctxsw_timeout_accumulated_ms for all channels in the TSG. In + * particular, this resets ctxsw_timeout_accumulated_ms timeout + * for channels that already completed their work. + */ + if (progress) { + nvgpu_log_info(g, "progress on tsg=%d ch=%d", + tsg->tsgid, ch->chid); + nvgpu_channel_put(ch); + *ms = g->ctxsw_timeout_period_ms; + nvgpu_tsg_set_ctxsw_timeout_accumulated_ms(tsg, *ms); + } + } + + /* if we could not detect progress on any of the channel, but none + * of them has reached the timeout, there is nothing more to do: + * ctxsw_timeout_accumulated_ms has been updated for all of them. + */ + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + return recover; +} +#endif + +#ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING +int nvgpu_tsg_set_interleave(struct nvgpu_tsg *tsg, u32 level) +{ + struct gk20a *g = tsg->g; + int ret; + + nvgpu_log(g, gpu_dbg_sched, + "tsgid=%u interleave=%u", tsg->tsgid, level); + + nvgpu_speculation_barrier(); + + if ((level != NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW) && + (level != NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM) && + (level != NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH)) { + return -EINVAL; + } + + if (g->ops.tsg.set_interleave != NULL) { + ret = g->ops.tsg.set_interleave(tsg, level); + if (ret != 0) { + nvgpu_err(g, + "set interleave failed tsgid=%u", tsg->tsgid); + return ret; + } + } + + tsg->interleave_level = level; + + /* TSG may not be bound yet */ + if (tsg->runlist == NULL) { + return 0; + } + + return g->ops.runlist.reload(g, tsg->runlist, true, true); +} + +int nvgpu_tsg_set_timeslice(struct nvgpu_tsg *tsg, u32 timeslice_us) +{ + struct gk20a *g = tsg->g; + + nvgpu_log(g, gpu_dbg_sched, "tsgid=%u timeslice=%u us", + tsg->tsgid, timeslice_us); + + if (timeslice_us < g->tsg_timeslice_min_us || + timeslice_us > g->tsg_timeslice_max_us) { + return -EINVAL; + } + + tsg->timeslice_us = timeslice_us; + + /* TSG may not be bound yet */ + if (tsg->runlist == NULL) { + return 0; + } + + return g->ops.runlist.reload(g, tsg->runlist, true, true); +} + +u32 nvgpu_tsg_get_timeslice(struct nvgpu_tsg *tsg) +{ + return tsg->timeslice_us; +} + +int nvgpu_tsg_set_long_timeslice(struct nvgpu_tsg *tsg, u32 timeslice_us) +{ + struct gk20a *g = tsg->g; + + nvgpu_log(g, gpu_dbg_sched, "tsgid=%u timeslice=%u us", + tsg->tsgid, timeslice_us); + + if (timeslice_us < g->tsg_timeslice_min_us || + timeslice_us > g->tsg_dbg_timeslice_max_us) { + return -EINVAL; + } + + tsg->timeslice_us = timeslice_us; + + /* TSG may not be bound yet */ + if (tsg->runlist == NULL) { + return 0; + } + + return g->ops.runlist.reload(g, tsg->runlist, true, true); +} +#endif + +u32 nvgpu_tsg_default_timeslice_us(struct gk20a *g) +{ + return NVGPU_TSG_TIMESLICE_DEFAULT_US; +} + +static void nvgpu_tsg_release_used_tsg(struct nvgpu_fifo *f, + struct nvgpu_tsg *tsg) +{ + nvgpu_mutex_acquire(&f->tsg_inuse_mutex); + f->tsg[tsg->tsgid].in_use = false; + nvgpu_mutex_release(&f->tsg_inuse_mutex); +} + +static struct nvgpu_tsg *nvgpu_tsg_acquire_unused_tsg(struct nvgpu_fifo *f) +{ + struct nvgpu_tsg *tsg = NULL; + unsigned int tsgid; + + nvgpu_mutex_acquire(&f->tsg_inuse_mutex); + for (tsgid = 0; tsgid < f->num_channels; tsgid++) { + if (!f->tsg[tsgid].in_use) { + f->tsg[tsgid].in_use = true; + tsg = &f->tsg[tsgid]; + break; + } + } + nvgpu_mutex_release(&f->tsg_inuse_mutex); + + return tsg; +} + +int nvgpu_tsg_open_common(struct gk20a *g, struct nvgpu_tsg *tsg, pid_t pid) +{ + u32 no_of_sm = g->ops.gr.init.get_no_of_sm(g); + int err; + + /* we need to allocate this after g->ops.gr.init_fs_state() since + * we initialize gr.config->no_of_sm in this function + */ + if (no_of_sm == 0U) { + nvgpu_err(g, "no_of_sm %d not set, failed allocation", no_of_sm); + return -EINVAL; + } + + err = nvgpu_tsg_alloc_sm_error_states_mem(g, tsg, no_of_sm); + if (err != 0) { + return err; + } + + tsg->tgid = pid; + tsg->g = g; + tsg->num_active_channels = 0U; + nvgpu_ref_init(&tsg->refcount); + + tsg->vm = NULL; + tsg->interleave_level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; + tsg->timeslice_us = g->ops.tsg.default_timeslice_us(g); + tsg->runlist = NULL; +#ifdef CONFIG_NVGPU_DEBUGGER + tsg->sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; +#endif + tsg->gr_ctx = nvgpu_alloc_gr_ctx_struct(g); + if (tsg->gr_ctx == NULL) { + err = -ENOMEM; + goto clean_up; + } + +#ifdef CONFIG_NVGPU_SM_DIVERSITY + nvgpu_gr_ctx_set_sm_diversity_config(tsg->gr_ctx, + NVGPU_INVALID_SM_CONFIG_ID); +#endif + + if (g->ops.tsg.init_eng_method_buffers != NULL) { + err = g->ops.tsg.init_eng_method_buffers(g, tsg); + if (err != 0) { + nvgpu_err(g, "tsg %d init eng method bufs failed %d", + tsg->tsgid, err); + goto clean_up; + } + } + + if (g->ops.tsg.open != NULL) { + err = g->ops.tsg.open(tsg); + if (err != 0) { + nvgpu_err(g, "tsg %d fifo open failed %d", + tsg->tsgid, err); + goto clean_up; + } + } + + return 0; + +clean_up: + nvgpu_tsg_release_common(g, tsg); + nvgpu_ref_put(&tsg->refcount, NULL); + + return err; +} + +struct nvgpu_tsg *nvgpu_tsg_open(struct gk20a *g, pid_t pid) +{ + struct nvgpu_tsg *tsg; + int err; + + tsg = nvgpu_tsg_acquire_unused_tsg(&g->fifo); + if (tsg == NULL) { + return NULL; + } + + err = nvgpu_tsg_open_common(g, tsg, pid); + if (err != 0) { + nvgpu_tsg_release_used_tsg(&g->fifo, tsg); + nvgpu_err(g, "tsg %d open failed %d", tsg->tsgid, err); + return NULL; + } + + nvgpu_log(g, gpu_dbg_fn, "tsg opened %d\n", tsg->tsgid); + + return tsg; +} + +void nvgpu_tsg_release_common(struct gk20a *g, struct nvgpu_tsg *tsg) +{ + if (g->ops.tsg.release != NULL) { + g->ops.tsg.release(tsg); + } + + nvgpu_free_gr_ctx_struct(g, tsg->gr_ctx); + tsg->gr_ctx = NULL; + + if (g->ops.tsg.deinit_eng_method_buffers != NULL) { + g->ops.tsg.deinit_eng_method_buffers(g, tsg); + } + +#ifdef CONFIG_NVGPU_PROFILER + if (tsg->prof != NULL) { + nvgpu_profiler_unbind_context(tsg->prof); + } +#endif + + if (tsg->vm != NULL) { + nvgpu_vm_put(tsg->vm); + tsg->vm = NULL; + } + + if(tsg->sm_error_states != NULL) { + nvgpu_kfree(g, tsg->sm_error_states); + tsg->sm_error_states = NULL; +#ifdef CONFIG_NVGPU_DEBUGGER + nvgpu_mutex_destroy(&tsg->sm_exception_mask_lock); +#endif + } +} + +static struct nvgpu_tsg *tsg_gk20a_from_ref(struct nvgpu_ref *ref) +{ + return (struct nvgpu_tsg *) + ((uintptr_t)ref - offsetof(struct nvgpu_tsg, refcount)); +} + +void nvgpu_tsg_release(struct nvgpu_ref *ref) +{ + struct nvgpu_tsg *tsg = tsg_gk20a_from_ref(ref); + struct gk20a *g = tsg->g; + int err; + + err = gk20a_busy(g); + if (err != 0) { + nvgpu_err(g, "cannot busy() err=%d!", err); + return; + } + + if ((tsg->gr_ctx != NULL) && + nvgpu_mem_is_valid(nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx)) && + (tsg->vm != NULL)) { + g->ops.gr.setup.free_gr_ctx(g, tsg->vm, tsg->gr_ctx); + } + +#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL + /* unhook all events created on this TSG */ + nvgpu_mutex_acquire(&tsg->event_id_list_lock); + while (nvgpu_list_empty(&tsg->event_id_list) == false) { + nvgpu_list_del(tsg->event_id_list.next); + } + nvgpu_mutex_release(&tsg->event_id_list_lock); +#endif + + nvgpu_tsg_release_common(g, tsg); + nvgpu_tsg_release_used_tsg(&g->fifo, tsg); + + nvgpu_log(g, gpu_dbg_fn, "tsg released %d", tsg->tsgid); + + gk20a_idle(g); +} + +struct nvgpu_tsg *nvgpu_tsg_from_ch(struct nvgpu_channel *ch) +{ + struct nvgpu_tsg *tsg = NULL; + u32 tsgid = ch->tsgid; + + if (tsgid != NVGPU_INVALID_TSG_ID) { + struct gk20a *g = ch->g; + struct nvgpu_fifo *f = &g->fifo; + + tsg = &f->tsg[tsgid]; + } else { + nvgpu_log(ch->g, gpu_dbg_fn, "tsgid is invalid for chid: %d", + ch->chid); + } + return tsg; +} + +int nvgpu_tsg_alloc_sm_error_states_mem(struct gk20a *g, + struct nvgpu_tsg *tsg, + u32 num_sm) +{ + if (tsg->sm_error_states != NULL) { + return -EINVAL; + } + + tsg->sm_error_states = nvgpu_kzalloc(g, nvgpu_safe_mult_u64( + sizeof(struct nvgpu_tsg_sm_error_state), num_sm)); + if (tsg->sm_error_states == NULL) { + nvgpu_err(g, "sm_error_states mem allocation failed"); + return -ENOMEM; + } + +#ifdef CONFIG_NVGPU_DEBUGGER + nvgpu_mutex_init(&tsg->sm_exception_mask_lock); +#endif + + return 0; +} + +#ifdef CONFIG_NVGPU_DEBUGGER +int nvgpu_tsg_set_sm_exception_type_mask(struct nvgpu_channel *ch, + u32 exception_mask) +{ + struct nvgpu_tsg *tsg; + + tsg = nvgpu_tsg_from_ch(ch); + if (tsg == NULL) { + return -EINVAL; + } + + nvgpu_mutex_acquire(&tsg->sm_exception_mask_lock); + tsg->sm_exception_mask_type = exception_mask; + nvgpu_mutex_release(&tsg->sm_exception_mask_lock); + + return 0; +} +#endif + +void nvgpu_tsg_abort(struct gk20a *g, struct nvgpu_tsg *tsg, bool preempt) +{ + struct nvgpu_channel *ch = NULL; + + nvgpu_log_fn(g, " "); + +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + WARN_ON(tsg->abortable == false); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + + g->ops.tsg.disable(tsg); + + if (preempt) { + /* + * Ignore the return value below. If preempt fails, preempt_tsg + * operation will print the error and ctxsw timeout may trigger + * a recovery if needed. + */ + (void)g->ops.fifo.preempt_tsg(g, tsg); + } + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + nvgpu_list_for_each_entry(ch, &tsg->ch_list, nvgpu_channel, ch_entry) { + if (nvgpu_channel_get(ch) != NULL) { + nvgpu_channel_set_unserviceable(ch); + if (g->ops.channel.abort_clean_up != NULL) { + g->ops.channel.abort_clean_up(ch); + } + nvgpu_channel_put(ch); + } + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); +} + +void nvgpu_tsg_reset_faulted_eng_pbdma(struct gk20a *g, struct nvgpu_tsg *tsg, + bool eng, bool pbdma) +{ + struct nvgpu_channel *ch; + + if (g->ops.channel.reset_faulted == NULL) { + return; + } + + if (tsg == NULL) { + return; + } + + nvgpu_log(g, gpu_dbg_info, "reset faulted eng and pbdma bits in ccsr"); + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + nvgpu_list_for_each_entry(ch, &tsg->ch_list, nvgpu_channel, ch_entry) { + g->ops.channel.reset_faulted(g, ch, eng, pbdma); + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); +} + +#ifdef CONFIG_NVGPU_DEBUGGER +int nvgpu_tsg_set_mmu_debug_mode(struct nvgpu_channel *ch, bool enable) +{ + struct gk20a *g; + int err = 0; + u32 ch_refcnt; + u32 tsg_refcnt; + u32 fb_refcnt; + struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch); + + if ((ch == NULL) || (tsg == NULL)) { + return -EINVAL; + } + g = ch->g; + + if ((g->ops.fb.set_mmu_debug_mode == NULL) && + (g->ops.gr.set_mmu_debug_mode == NULL)) { + return -ENOSYS; + } + + if (enable) { + ch_refcnt = ch->mmu_debug_mode_refcnt + 1U; + tsg_refcnt = tsg->mmu_debug_mode_refcnt + 1U; + fb_refcnt = g->mmu_debug_mode_refcnt + 1U; + } else { + ch_refcnt = ch->mmu_debug_mode_refcnt - 1U; + tsg_refcnt = tsg->mmu_debug_mode_refcnt - 1U; + fb_refcnt = g->mmu_debug_mode_refcnt - 1U; + } + + if (g->ops.gr.set_mmu_debug_mode != NULL) { + /* + * enable GPC MMU debug mode if it was requested for at + * least one channel in the TSG + */ + err = g->ops.gr.set_mmu_debug_mode(g, ch, tsg_refcnt > 0U); + if (err != 0) { + nvgpu_err(g, "set mmu debug mode failed, err=%d", err); + return err; + } + } + + if (g->ops.fb.set_mmu_debug_mode != NULL) { + /* + * enable FB/HS MMU debug mode if it was requested for + * at least one TSG + */ + g->ops.fb.set_mmu_debug_mode(g, fb_refcnt > 0U); + } + + ch->mmu_debug_mode_refcnt = ch_refcnt; + tsg->mmu_debug_mode_refcnt = tsg_refcnt; + g->mmu_debug_mode_refcnt = fb_refcnt; + + return err; +} +#endif diff --git a/drivers/gpu/nvgpu/common/fifo/userd.c b/drivers/gpu/nvgpu/common/fifo/userd.c new file mode 100644 index 000000000..90db7a88a --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/userd.c @@ -0,0 +1,157 @@ +/* + * USERD + * + * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +int nvgpu_userd_init_slabs(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + int err; + + nvgpu_mutex_init(&f->userd_mutex); + + f->num_channels_per_slab = NVGPU_CPU_PAGE_SIZE / g->ops.userd.entry_size(g); + f->num_userd_slabs = + DIV_ROUND_UP(f->num_channels, f->num_channels_per_slab); + + f->userd_slabs = nvgpu_big_zalloc(g, f->num_userd_slabs * + sizeof(struct nvgpu_mem)); + if (f->userd_slabs == NULL) { + nvgpu_err(g, "could not allocate userd slabs"); + err = -ENOMEM; + goto clean_up; + } + + return 0; + +clean_up: + nvgpu_mutex_destroy(&f->userd_mutex); + + return err; +} + +void nvgpu_userd_free_slabs(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 slab; + + for (slab = 0; slab < f->num_userd_slabs; slab++) { + nvgpu_dma_free(g, &f->userd_slabs[slab]); + } + nvgpu_big_free(g, f->userd_slabs); + f->userd_slabs = NULL; + + nvgpu_mutex_destroy(&f->userd_mutex); +} + +int nvgpu_userd_init_channel(struct gk20a *g, struct nvgpu_channel *c) +{ + struct nvgpu_fifo *f = &g->fifo; + struct nvgpu_mem *mem; + u32 slab = c->chid / f->num_channels_per_slab; + int err = 0; + + if (slab > f->num_userd_slabs) { + nvgpu_err(g, "chid %u, slab %u out of range (max=%u)", + c->chid, slab, f->num_userd_slabs); + return -EINVAL; + } + + mem = &g->fifo.userd_slabs[slab]; + + nvgpu_mutex_acquire(&f->userd_mutex); + if (!nvgpu_mem_is_valid(mem)) { + err = nvgpu_dma_alloc_sys(g, NVGPU_CPU_PAGE_SIZE, mem); + if (err != 0) { + nvgpu_err(g, "userd allocation failed, err=%d", err); + goto done; + } + + if (g->ops.mm.is_bar1_supported(g)) { + mem->gpu_va = g->ops.mm.bar1_map_userd(g, mem, + slab * NVGPU_CPU_PAGE_SIZE); + } + } + c->userd_mem = mem; + c->userd_offset = (c->chid % f->num_channels_per_slab) * + g->ops.userd.entry_size(g); + c->userd_iova = nvgpu_channel_userd_addr(c); + + nvgpu_log(g, gpu_dbg_info, + "chid=%u slab=%u mem=%p offset=%u addr=%llx gpu_va=%llx", + c->chid, slab, mem, c->userd_offset, + nvgpu_channel_userd_addr(c), + nvgpu_channel_userd_gpu_va(c)); + +done: + nvgpu_mutex_release(&f->userd_mutex); + return err; +} + +int nvgpu_userd_setup_sw(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + int err; + u32 size, num_pages; + + err = nvgpu_userd_init_slabs(g); + if (err != 0) { + nvgpu_err(g, "failed to init userd support"); + return err; + } + + size = f->num_channels * g->ops.userd.entry_size(g); + num_pages = DIV_ROUND_UP(size, NVGPU_CPU_PAGE_SIZE); + err = nvgpu_vm_area_alloc(g->mm.bar1.vm, + num_pages, NVGPU_CPU_PAGE_SIZE, &f->userd_gpu_va, 0); + if (err != 0) { + nvgpu_err(g, "userd gpu va allocation failed, err=%d", err); + goto clean_up; + } + + return 0; + +clean_up: + nvgpu_userd_free_slabs(g); + + return err; +} + +void nvgpu_userd_cleanup_sw(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + + if (f->userd_gpu_va != 0ULL) { + (void) nvgpu_vm_area_free(g->mm.bar1.vm, f->userd_gpu_va); + f->userd_gpu_va = 0ULL; + } + + nvgpu_userd_free_slabs(g); +} diff --git a/drivers/gpu/nvgpu/common/fifo/watchdog.c b/drivers/gpu/nvgpu/common/fifo/watchdog.c new file mode 100644 index 000000000..aad6f8ab1 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/watchdog.c @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +struct nvgpu_channel_wdt { + struct gk20a *g; + + /* lock protects the running timer state */ + struct nvgpu_spinlock lock; + struct nvgpu_timeout timer; + bool running; + struct nvgpu_channel_wdt_state ch_state; + + /* lock not needed */ + u32 limit_ms; + bool enabled; +}; + +struct nvgpu_channel_wdt *nvgpu_channel_wdt_alloc(struct gk20a *g) +{ + struct nvgpu_channel_wdt *wdt = nvgpu_kzalloc(g, sizeof(*wdt)); + + if (wdt == NULL) { + return NULL; + } + + wdt->g = g; + nvgpu_spinlock_init(&wdt->lock); + wdt->enabled = true; + wdt->limit_ms = g->ch_wdt_init_limit_ms; + + return wdt; +} + +void nvgpu_channel_wdt_destroy(struct nvgpu_channel_wdt *wdt) +{ + nvgpu_kfree(wdt->g, wdt); +} + +void nvgpu_channel_wdt_enable(struct nvgpu_channel_wdt *wdt) +{ + wdt->enabled = true; +} + +void nvgpu_channel_wdt_disable(struct nvgpu_channel_wdt *wdt) +{ + wdt->enabled = false; +} + +bool nvgpu_channel_wdt_enabled(struct nvgpu_channel_wdt *wdt) +{ + return wdt->enabled; +} + +void nvgpu_channel_wdt_set_limit(struct nvgpu_channel_wdt *wdt, u32 limit_ms) +{ + wdt->limit_ms = limit_ms; +} + +u32 nvgpu_channel_wdt_limit(struct nvgpu_channel_wdt *wdt) +{ + return wdt->limit_ms; +} + +static void nvgpu_channel_wdt_init(struct nvgpu_channel_wdt *wdt, + struct nvgpu_channel_wdt_state *state) +{ + struct gk20a *g = wdt->g; + int ret; + + ret = nvgpu_timeout_init(g, &wdt->timer, + wdt->limit_ms, + NVGPU_TIMER_CPU_TIMER); + if (ret != 0) { + nvgpu_err(g, "timeout_init failed: %d", ret); + return; + } + + wdt->ch_state = *state; + wdt->running = true; +} + +/** + * Start a timeout counter (watchdog) on this channel. + * + * Trigger a watchdog to recover the channel after the per-platform timeout + * duration (but strictly no earlier) if the channel hasn't advanced within + * that time. + * + * If the timeout is already running, do nothing. This should be called when + * new jobs are submitted. The timeout will stop when the last tracked job + * finishes, making the channel idle. + */ +void nvgpu_channel_wdt_start(struct nvgpu_channel_wdt *wdt, + struct nvgpu_channel_wdt_state *state) +{ + if (!nvgpu_is_timeouts_enabled(wdt->g)) { + return; + } + + if (!wdt->enabled) { + return; + } + + nvgpu_spinlock_acquire(&wdt->lock); + + if (wdt->running) { + nvgpu_spinlock_release(&wdt->lock); + return; + } + nvgpu_channel_wdt_init(wdt, state); + nvgpu_spinlock_release(&wdt->lock); +} + +/** + * Stop a running timeout counter (watchdog) on this channel. + * + * Make the watchdog consider the channel not running, so that it won't get + * recovered even if no progress is detected. Progress is not tracked if the + * watchdog is turned off. + * + * No guarantees are made about concurrent execution of the timeout handler. + * (This should be called from an update handler running in the same thread + * with the watchdog.) + */ +bool nvgpu_channel_wdt_stop(struct nvgpu_channel_wdt *wdt) +{ + bool was_running; + + nvgpu_spinlock_acquire(&wdt->lock); + was_running = wdt->running; + wdt->running = false; + nvgpu_spinlock_release(&wdt->lock); + return was_running; +} + +/** + * Continue a previously stopped timeout + * + * Enable the timeout again but don't reinitialize its timer. + * + * No guarantees are made about concurrent execution of the timeout handler. + * (This should be called from an update handler running in the same thread + * with the watchdog.) + */ +void nvgpu_channel_wdt_continue(struct nvgpu_channel_wdt *wdt) +{ + nvgpu_spinlock_acquire(&wdt->lock); + wdt->running = true; + nvgpu_spinlock_release(&wdt->lock); +} + +/** + * Reset the counter of a timeout that is in effect. + * + * If this channel has an active timeout, act as if something happened on the + * channel right now. + * + * Rewinding a stopped counter is irrelevant; this is a no-op for non-running + * timeouts. Stopped timeouts can only be started (which is technically a + * rewind too) or continued (where the stop is actually pause). + */ +void nvgpu_channel_wdt_rewind(struct nvgpu_channel_wdt *wdt, + struct nvgpu_channel_wdt_state *state) +{ + nvgpu_spinlock_acquire(&wdt->lock); + if (wdt->running) { + nvgpu_channel_wdt_init(wdt, state); + } + nvgpu_spinlock_release(&wdt->lock); +} + +/** + * Check if the watchdog is running. + * + * A running watchdog means one that is requested to run and expire in the + * future. The state of a running watchdog has to be checked periodically to + * see if it's expired. + */ +bool nvgpu_channel_wdt_running(struct nvgpu_channel_wdt *wdt) +{ + bool running; + + nvgpu_spinlock_acquire(&wdt->lock); + running = wdt->running; + nvgpu_spinlock_release(&wdt->lock); + + return running; +} + +/** + * Check if a channel has been stuck for the watchdog limit. + * + * Test if this channel has really got stuck at this point by checking if its + * {gp,pb}_get have advanced or not. If progress was detected, start the timer + * from zero again. If no {gp,pb}_get action happened in the watchdog time + * limit, return true. Else return false. + */ +static bool nvgpu_channel_wdt_handler(struct nvgpu_channel_wdt *wdt, + struct nvgpu_channel_wdt_state *state) +{ + struct gk20a *g = wdt->g; + struct nvgpu_channel_wdt_state previous_state; + + nvgpu_log_fn(g, " "); + + /* Get status but keep timer running */ + nvgpu_spinlock_acquire(&wdt->lock); + previous_state = wdt->ch_state; + nvgpu_spinlock_release(&wdt->lock); + + if (nvgpu_memcmp((const u8 *)state, + (const u8 *)&previous_state, + sizeof(*state)) != 0) { + /* Channel has advanced, timer keeps going but resets */ + nvgpu_channel_wdt_rewind(wdt, state); + return false; + } + + if (!nvgpu_timeout_peek_expired(&wdt->timer)) { + /* Seems stuck but waiting to time out */ + return false; + } + + return true; +} + +/** + * Test if the per-channel watchdog is on; check the timeout in that case. + * + * Each channel has an expiration time based watchdog. The timer is + * (re)initialized in two situations: when a new job is submitted on an idle + * channel and when the timeout is checked but progress is detected. The + * watchdog timeout limit is a coarse sliding window. + * + * The timeout is stopped (disabled) after the last job in a row finishes + * and marks the channel idle. + */ +bool nvgpu_channel_wdt_check(struct nvgpu_channel_wdt *wdt, + struct nvgpu_channel_wdt_state *state) +{ + bool running; + + nvgpu_spinlock_acquire(&wdt->lock); + running = wdt->running; + nvgpu_spinlock_release(&wdt->lock); + + if (running) { + return nvgpu_channel_wdt_handler(wdt, state); + } else { + return false; + } +} diff --git a/drivers/gpu/nvgpu/common/gr/ctx.c b/drivers/gpu/nvgpu/common/gr/ctx.c new file mode 100644 index 000000000..3d49f7e9c --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/ctx.c @@ -0,0 +1,1195 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/gr/ctx_priv.h" + +static void nvgpu_gr_ctx_unmap_global_ctx_buffers(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct vm_gk20a *vm); + +struct nvgpu_gr_ctx_desc * +nvgpu_gr_ctx_desc_alloc(struct gk20a *g) +{ + struct nvgpu_gr_ctx_desc *desc = nvgpu_kzalloc(g, sizeof(*desc)); + return desc; +} + +void nvgpu_gr_ctx_desc_free(struct gk20a *g, + struct nvgpu_gr_ctx_desc *desc) +{ + nvgpu_kfree(g, desc); +} + +void nvgpu_gr_ctx_set_size(struct nvgpu_gr_ctx_desc *gr_ctx_desc, + u32 index, u32 size) +{ + nvgpu_assert(index < NVGPU_GR_CTX_COUNT); + gr_ctx_desc->size[index] = size; +} + +struct nvgpu_gr_ctx *nvgpu_alloc_gr_ctx_struct(struct gk20a *g) +{ + return nvgpu_kzalloc(g, sizeof(struct nvgpu_gr_ctx)); +} + +void nvgpu_free_gr_ctx_struct(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) +{ + nvgpu_kfree(g, gr_ctx); +} + +int nvgpu_gr_ctx_alloc(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_desc *gr_ctx_desc, + struct vm_gk20a *vm) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + if (gr_ctx_desc->size[NVGPU_GR_CTX_CTX] == 0U) { + return -EINVAL; + } + + err = nvgpu_dma_alloc(g, gr_ctx_desc->size[NVGPU_GR_CTX_CTX], + &gr_ctx->mem); + if (err != 0) { + return err; + } + + gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm, + &gr_ctx->mem, + gr_ctx->mem.size, + 0, /* not GPU-cacheable */ + gk20a_mem_flag_none, true, + gr_ctx->mem.aperture); + if (gr_ctx->mem.gpu_va == 0ULL) { + err = -ENOMEM; + goto err_free_mem; + } + + gr_ctx->ctx_id_valid = false; + + return 0; + +err_free_mem: + nvgpu_dma_free(g, &gr_ctx->mem); + + return err; +} + +void nvgpu_gr_ctx_free(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct vm_gk20a *vm) +{ + nvgpu_log_fn(g, " "); + + if (gr_ctx != NULL) { + nvgpu_gr_ctx_unmap_global_ctx_buffers(g, gr_ctx, + global_ctx_buffer, vm); + +#ifdef CONFIG_NVGPU_DEBUGGER + nvgpu_gr_ctx_free_pm_ctx(g, vm, gr_ctx); +#endif + nvgpu_gr_ctx_free_patch_ctx(g, vm, gr_ctx); +#ifdef CONFIG_NVGPU_GFXP + if (nvgpu_mem_is_valid(&gr_ctx->gfxp_rtvcb_ctxsw_buffer)) { + nvgpu_dma_unmap_free(vm, + &gr_ctx->gfxp_rtvcb_ctxsw_buffer); + } + nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); +#endif + + nvgpu_dma_unmap_free(vm, &gr_ctx->mem); + (void) memset(gr_ctx, 0, sizeof(*gr_ctx)); + } +} + +int nvgpu_gr_ctx_alloc_patch_ctx(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_desc *gr_ctx_desc, + struct vm_gk20a *vm) +{ + struct patch_desc *patch_ctx = &gr_ctx->patch_ctx; + int err = 0; + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "patch_ctx size = %u", + gr_ctx_desc->size[NVGPU_GR_CTX_PATCH_CTX]); + + err = nvgpu_dma_alloc_map_sys(vm, gr_ctx_desc->size[NVGPU_GR_CTX_PATCH_CTX], + &patch_ctx->mem); + if (err != 0) { + return err; + } + + return 0; +} + +void nvgpu_gr_ctx_free_patch_ctx(struct gk20a *g, struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx) +{ + struct patch_desc *patch_ctx = &gr_ctx->patch_ctx; + + if (nvgpu_mem_is_valid(&patch_ctx->mem)) { + nvgpu_gmmu_unmap(vm, &patch_ctx->mem, + patch_ctx->mem.gpu_va); + + nvgpu_dma_free(g, &patch_ctx->mem); + patch_ctx->data_count = 0; + } +} + +static void nvgpu_gr_ctx_unmap_global_ctx_buffers(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct vm_gk20a *vm) +{ + u64 *g_bfr_va = &gr_ctx->global_ctx_buffer_va[0]; + u32 *g_bfr_index = &gr_ctx->global_ctx_buffer_index[0]; + u32 i; + + nvgpu_log_fn(g, " "); + + for (i = 0U; i < NVGPU_GR_CTX_VA_COUNT; i++) { + if (g_bfr_va[i] != 0ULL) { + nvgpu_gr_global_ctx_buffer_unmap(global_ctx_buffer, + g_bfr_index[i], vm, g_bfr_va[i]); + } + } + + (void) memset(g_bfr_va, 0, sizeof(gr_ctx->global_ctx_buffer_va)); + (void) memset(g_bfr_index, 0, sizeof(gr_ctx->global_ctx_buffer_index)); + + gr_ctx->global_ctx_buffer_mapped = false; +} + +static int nvgpu_gr_ctx_map_ctx_circular_buffer(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct vm_gk20a *vm, bool vpr) +{ + u64 *g_bfr_va; + u32 *g_bfr_index; + u64 gpu_va = 0ULL; + + g_bfr_va = &gr_ctx->global_ctx_buffer_va[0]; + g_bfr_index = &gr_ctx->global_ctx_buffer_index[0]; + +#ifdef CONFIG_NVGPU_VPR + if (vpr && nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR)) { + gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR, + vm, NVGPU_VM_MAP_CACHEABLE, true); + g_bfr_index[NVGPU_GR_CTX_CIRCULAR_VA] = + NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR; + } else { +#endif + gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_CIRCULAR, + vm, NVGPU_VM_MAP_CACHEABLE, true); + g_bfr_index[NVGPU_GR_CTX_CIRCULAR_VA] = + NVGPU_GR_GLOBAL_CTX_CIRCULAR; +#ifdef CONFIG_NVGPU_VPR + } +#endif + if (gpu_va == 0ULL) { + goto clean_up; + } + g_bfr_va[NVGPU_GR_CTX_CIRCULAR_VA] = gpu_va; + + return 0; + +clean_up: + return -ENOMEM; +} + +static int nvgpu_gr_ctx_map_ctx_attribute_buffer(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct vm_gk20a *vm, bool vpr) +{ + u64 *g_bfr_va; + u32 *g_bfr_index; + u64 gpu_va = 0ULL; + + g_bfr_va = &gr_ctx->global_ctx_buffer_va[0]; + g_bfr_index = &gr_ctx->global_ctx_buffer_index[0]; + +#ifdef CONFIG_NVGPU_VPR + if (vpr && nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR)) { + gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR, + vm, NVGPU_VM_MAP_CACHEABLE, false); + g_bfr_index[NVGPU_GR_CTX_ATTRIBUTE_VA] = + NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR; + } else { +#endif + gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_ATTRIBUTE, + vm, NVGPU_VM_MAP_CACHEABLE, false); + g_bfr_index[NVGPU_GR_CTX_ATTRIBUTE_VA] = + NVGPU_GR_GLOBAL_CTX_ATTRIBUTE; +#ifdef CONFIG_NVGPU_VPR + } +#endif + if (gpu_va == 0ULL) { + goto clean_up; + } + g_bfr_va[NVGPU_GR_CTX_ATTRIBUTE_VA] = gpu_va; + + return 0; + +clean_up: + return -ENOMEM; +} + + +static int nvgpu_gr_ctx_map_ctx_pagepool_buffer(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct vm_gk20a *vm, bool vpr) +{ + u64 *g_bfr_va; + u32 *g_bfr_index; + u64 gpu_va = 0ULL; + + g_bfr_va = &gr_ctx->global_ctx_buffer_va[0]; + g_bfr_index = &gr_ctx->global_ctx_buffer_index[0]; + +#ifdef CONFIG_NVGPU_VPR + if (vpr && nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR)) { + gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR, + vm, NVGPU_VM_MAP_CACHEABLE, true); + g_bfr_index[NVGPU_GR_CTX_PAGEPOOL_VA] = + NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR; + } else { +#endif + gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_PAGEPOOL, + vm, NVGPU_VM_MAP_CACHEABLE, true); + g_bfr_index[NVGPU_GR_CTX_PAGEPOOL_VA] = + NVGPU_GR_GLOBAL_CTX_PAGEPOOL; +#ifdef CONFIG_NVGPU_VPR + } +#endif + if (gpu_va == 0ULL) { + goto clean_up; + } + g_bfr_va[NVGPU_GR_CTX_PAGEPOOL_VA] = gpu_va; + + return 0; + +clean_up: + return -ENOMEM; +} + +static int nvgpu_gr_ctx_map_ctx_buffer(struct gk20a *g, + u32 buffer_type, u32 va_type, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct vm_gk20a *vm) +{ + u64 *g_bfr_va; + u32 *g_bfr_index; + u64 gpu_va = 0ULL; + + g_bfr_va = &gr_ctx->global_ctx_buffer_va[0]; + g_bfr_index = &gr_ctx->global_ctx_buffer_index[0]; + + gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer, + buffer_type, vm, 0, true); + if (gpu_va == 0ULL) { + goto clean_up; + } + + g_bfr_index[va_type] = buffer_type; + g_bfr_va[va_type] = gpu_va; + + return 0; + +clean_up: + return -ENOMEM; +} + +int nvgpu_gr_ctx_map_global_ctx_buffers(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct vm_gk20a *vm, bool vpr) +{ + int err; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + /* + * MIG supports only compute class. + * Allocate BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB + * if 2D/3D/I2M classes(graphics) are supported. + */ + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + /* Circular Buffer */ + err = nvgpu_gr_ctx_map_ctx_circular_buffer(g, gr_ctx, + global_ctx_buffer, vm, vpr); + if (err != 0) { + nvgpu_err(g, "cannot map ctx circular buffer"); + goto fail; + } + + /* Attribute Buffer */ + err = nvgpu_gr_ctx_map_ctx_attribute_buffer(g, gr_ctx, + global_ctx_buffer, vm, vpr); + if (err != 0) { + nvgpu_err(g, "cannot map ctx attribute buffer"); + goto fail; + } + + /* Page Pool */ + err = nvgpu_gr_ctx_map_ctx_pagepool_buffer(g, gr_ctx, + global_ctx_buffer, vm, vpr); + if (err != 0) { + nvgpu_err(g, "cannot map ctx pagepool buffer"); + goto fail; + } +#ifdef CONFIG_NVGPU_GRAPHICS + /* RTV circular buffer */ + if (nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER)) { + err = nvgpu_gr_ctx_map_ctx_buffer(g, + NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER, + NVGPU_GR_CTX_RTV_CIRCULAR_BUFFER_VA, + gr_ctx, global_ctx_buffer, vm); + if (err != 0) { + nvgpu_err(g, + "cannot map ctx rtv circular buffer"); + goto fail; + } + } +#endif + } + + /* Priv register Access Map */ + err = nvgpu_gr_ctx_map_ctx_buffer(g, + NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP, + NVGPU_GR_CTX_PRIV_ACCESS_MAP_VA, + gr_ctx, global_ctx_buffer, vm); + if (err != 0) { + nvgpu_err(g, "cannot map ctx priv access buffer"); + goto fail; + } + +#ifdef CONFIG_NVGPU_FECS_TRACE + /* FECS trace buffer */ + if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) { + err = nvgpu_gr_ctx_map_ctx_buffer(g, + NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER, + NVGPU_GR_CTX_FECS_TRACE_BUFFER_VA, + gr_ctx, global_ctx_buffer, vm); + if (err != 0) { + nvgpu_err(g, "cannot map ctx fecs trace buffer"); + goto fail; + } + } +#endif + + gr_ctx->global_ctx_buffer_mapped = true; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + return 0; + +fail: + nvgpu_gr_ctx_unmap_global_ctx_buffers(g, gr_ctx, global_ctx_buffer, vm); + return err; +} + +u64 nvgpu_gr_ctx_get_global_ctx_va(struct nvgpu_gr_ctx *gr_ctx, + u32 index) +{ + return gr_ctx->global_ctx_buffer_va[index]; +} + +struct nvgpu_mem *nvgpu_gr_ctx_get_patch_ctx_mem(struct nvgpu_gr_ctx *gr_ctx) +{ + return &gr_ctx->patch_ctx.mem; +} + +void nvgpu_gr_ctx_set_patch_ctx_data_count(struct nvgpu_gr_ctx *gr_ctx, + u32 data_count) +{ + gr_ctx->patch_ctx.data_count = data_count; +} + +struct nvgpu_mem *nvgpu_gr_ctx_get_ctx_mem(struct nvgpu_gr_ctx *gr_ctx) +{ + return &gr_ctx->mem; +} + +#ifdef CONFIG_NVGPU_SM_DIVERSITY +void nvgpu_gr_ctx_set_sm_diversity_config(struct nvgpu_gr_ctx *gr_ctx, + u32 sm_diversity_config) +{ + gr_ctx->sm_diversity_config = sm_diversity_config; +} + +u32 nvgpu_gr_ctx_get_sm_diversity_config(struct nvgpu_gr_ctx *gr_ctx) +{ + return gr_ctx->sm_diversity_config; +} +#endif + +/* load saved fresh copy of gloden image into channel gr_ctx */ +void nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image, + bool cde) +{ + struct nvgpu_mem *mem; +#ifdef CONFIG_NVGPU_DEBUGGER + u64 virt_addr = 0; +#endif + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + mem = &gr_ctx->mem; + + nvgpu_gr_global_ctx_load_local_golden_image(g, + local_golden_image, mem); + + g->ops.gr.ctxsw_prog.init_ctxsw_hdr_data(g, mem); + +#ifdef CONFIG_NVGPU_DEBUGGER + if ((g->ops.gr.ctxsw_prog.set_cde_enabled != NULL) && cde) { + g->ops.gr.ctxsw_prog.set_cde_enabled(g, mem); + } +#endif + + /* set priv access map */ + g->ops.gr.ctxsw_prog.set_priv_access_map_config_mode(g, mem, + g->allow_all); + g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, mem, + nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, + NVGPU_GR_CTX_PRIV_ACCESS_MAP_VA)); + + /* disable verif features */ + g->ops.gr.ctxsw_prog.disable_verif_features(g, mem); + +#ifdef CONFIG_NVGPU_DEBUGGER + if (g->ops.gr.ctxsw_prog.set_pmu_options_boost_clock_frequencies != + NULL) { + g->ops.gr.ctxsw_prog.set_pmu_options_boost_clock_frequencies(g, + mem, nvgpu_safe_cast_bool_to_u32(gr_ctx->boosted_ctx)); + } +#endif + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "write patch count = %d", + gr_ctx->patch_ctx.data_count); + g->ops.gr.ctxsw_prog.set_patch_count(g, mem, + gr_ctx->patch_ctx.data_count); + g->ops.gr.ctxsw_prog.set_patch_addr(g, mem, + gr_ctx->patch_ctx.mem.gpu_va); + +#ifdef CONFIG_NVGPU_DEBUGGER + /* PM ctxt switch is off by default */ + gr_ctx->pm_ctx.pm_mode = + g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw(); + virt_addr = 0; + + g->ops.gr.ctxsw_prog.set_pm_mode(g, mem, gr_ctx->pm_ctx.pm_mode); + g->ops.gr.ctxsw_prog.set_pm_ptr(g, mem, virt_addr); +#endif + + nvgpu_log(g, gpu_dbg_gr, "done"); +} + +/* + * Context state can be written directly, or "patched" at times. So that code + * can be used in either situation it is written using a series of + * _ctx_patch_write(..., patch) statements. However any necessary map overhead + * should be minimized; thus, bundle the sequence of these writes together, and + * set them up and close with _ctx_patch_write_begin/_ctx_patch_write_end. + */ +void nvgpu_gr_ctx_patch_write_begin(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + bool update_patch_count) +{ + if (update_patch_count) { + /* reset patch count if ucode has already processed it */ + gr_ctx->patch_ctx.data_count = + g->ops.gr.ctxsw_prog.get_patch_count(g, &gr_ctx->mem); + nvgpu_log(g, gpu_dbg_info, "patch count reset to %d", + gr_ctx->patch_ctx.data_count); + } +} + +void nvgpu_gr_ctx_patch_write_end(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + bool update_patch_count) +{ + /* Write context count to context image if it is mapped */ + if (update_patch_count) { + g->ops.gr.ctxsw_prog.set_patch_count(g, &gr_ctx->mem, + gr_ctx->patch_ctx.data_count); + nvgpu_log(g, gpu_dbg_info, "write patch count %d", + gr_ctx->patch_ctx.data_count); + } +} + +void nvgpu_gr_ctx_patch_write(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + u32 addr, u32 data, bool patch) +{ + if (patch) { + u32 patch_slot; + u64 patch_slot_max; + + if (gr_ctx == NULL) { + nvgpu_err(g, + "failed to access gr_ctx[NULL] but patch true"); + return; + } + + patch_slot = + nvgpu_safe_mult_u32(gr_ctx->patch_ctx.data_count, + PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY); + patch_slot_max = + nvgpu_safe_sub_u64( + PATCH_CTX_ENTRIES_FROM_SIZE( + gr_ctx->patch_ctx.mem.size), + PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY); + + if (patch_slot > patch_slot_max) { + nvgpu_err(g, "failed to access patch_slot %d", + patch_slot); + return; + } + + nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, + (u64)patch_slot, addr); + nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, + (u64)patch_slot + 1ULL, data); + gr_ctx->patch_ctx.data_count = nvgpu_safe_add_u32( + gr_ctx->patch_ctx.data_count, 1U); + nvgpu_log(g, gpu_dbg_info, + "patch addr = 0x%x data = 0x%x data_count %d", + addr, data, gr_ctx->patch_ctx.data_count); + } else { + nvgpu_writel(g, addr, data); + } +} + +void nvgpu_gr_ctx_init_compute_preemption_mode(struct nvgpu_gr_ctx *gr_ctx, + u32 compute_preempt_mode) +{ + gr_ctx->compute_preempt_mode = compute_preempt_mode; +} + +u32 nvgpu_gr_ctx_get_compute_preemption_mode(struct nvgpu_gr_ctx *gr_ctx) +{ + return gr_ctx->compute_preempt_mode; +} + +bool nvgpu_gr_ctx_check_valid_preemption_mode(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + u32 graphics_preempt_mode, u32 compute_preempt_mode) +{ + u32 supported_graphics_preempt_mode = 0U; + u32 supported_compute_preempt_mode = 0U; + + if ((graphics_preempt_mode == 0U) && (compute_preempt_mode == 0U)) { + return false; + } + + g->ops.gr.init.get_supported__preemption_modes( + &supported_graphics_preempt_mode, + &supported_compute_preempt_mode); + + if (graphics_preempt_mode != 0U) { + if ((graphics_preempt_mode & supported_graphics_preempt_mode) == 0U) { + return false; + } + + /* Do not allow lower preemption modes than current ones */ + if (graphics_preempt_mode < gr_ctx->graphics_preempt_mode) { + return false; + } + } + + if (compute_preempt_mode != 0U) { + if ((compute_preempt_mode & supported_compute_preempt_mode) == 0U) { + return false; + } + + /* Do not allow lower preemption modes than current ones */ + if (compute_preempt_mode < gr_ctx->compute_preempt_mode) { + return false; + } + } + +#if defined(CONFIG_NVGPU_CILP) && defined(CONFIG_NVGPU_GFXP) + /* Invalid combination */ + if ((graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) && + (compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP)) { + return false; + } +#endif + + return true; +} + +void nvgpu_gr_ctx_set_preemption_modes(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx) +{ +#ifdef CONFIG_NVGPU_GFXP + if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { + g->ops.gr.ctxsw_prog.set_graphics_preemption_mode_gfxp(g, + &gr_ctx->mem); + } +#endif + +#ifdef CONFIG_NVGPU_CILP + if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP) { + g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cilp(g, + &gr_ctx->mem); + } +#endif + + if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) { + g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cta(g, + &gr_ctx->mem); + } + +} + +void nvgpu_gr_ctx_set_tsgid(struct nvgpu_gr_ctx *gr_ctx, u32 tsgid) +{ + gr_ctx->tsgid = tsgid; +} + +u32 nvgpu_gr_ctx_get_tsgid(struct nvgpu_gr_ctx *gr_ctx) +{ + return gr_ctx->tsgid; +} + +#ifdef CONFIG_NVGPU_GRAPHICS +void nvgpu_gr_ctx_init_graphics_preemption_mode(struct nvgpu_gr_ctx *gr_ctx, + u32 graphics_preempt_mode) +{ + gr_ctx->graphics_preempt_mode = graphics_preempt_mode; +} + +u32 nvgpu_gr_ctx_get_graphics_preemption_mode(struct nvgpu_gr_ctx *gr_ctx) +{ + return gr_ctx->graphics_preempt_mode; +} + +void nvgpu_gr_ctx_set_zcull_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + u32 mode, u64 gpu_va) +{ + struct zcull_ctx_desc *zcull_ctx = &gr_ctx->zcull_ctx; + + zcull_ctx->ctx_sw_mode = mode; + zcull_ctx->gpu_va = gpu_va; +} + +u64 nvgpu_gr_ctx_get_zcull_ctx_va(struct nvgpu_gr_ctx *gr_ctx) +{ + return gr_ctx->zcull_ctx.gpu_va; +} + +int nvgpu_gr_ctx_init_zcull(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) +{ + int err; + + nvgpu_log(g, gpu_dbg_gr, " "); + + err = g->ops.mm.cache.l2_flush(g, true); + if (err != 0) { + nvgpu_err(g, "l2_flush failed"); + return err; + } + + g->ops.gr.ctxsw_prog.set_zcull_mode_no_ctxsw(g, &gr_ctx->mem); + g->ops.gr.ctxsw_prog.set_zcull_ptr(g, &gr_ctx->mem, 0); + + return err; +} + +int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + bool set_zcull_ptr) +{ + nvgpu_log_fn(g, " "); + + if (gr_ctx->zcull_ctx.gpu_va == 0ULL && + g->ops.gr.ctxsw_prog.is_zcull_mode_separate_buffer( + gr_ctx->zcull_ctx.ctx_sw_mode)) { + return -EINVAL; + } + + g->ops.gr.ctxsw_prog.set_zcull(g, &gr_ctx->mem, + gr_ctx->zcull_ctx.ctx_sw_mode); + + if (set_zcull_ptr) { + g->ops.gr.ctxsw_prog.set_zcull_ptr(g, &gr_ctx->mem, + gr_ctx->zcull_ctx.gpu_va); + } + + return 0; +} +#endif /* CONFIG_NVGPU_GRAPHICS */ + +#ifdef CONFIG_NVGPU_GFXP +void nvgpu_gr_ctx_set_preemption_buffer_va(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx) +{ + g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, &gr_ctx->mem, + gr_ctx->preempt_ctxsw_buffer.gpu_va); + + if (g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0 != NULL) { + g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0(g, + &gr_ctx->mem, gr_ctx->preempt_ctxsw_buffer.gpu_va); + } +} + +bool nvgpu_gr_ctx_desc_force_preemption_gfxp(struct nvgpu_gr_ctx_desc *gr_ctx_desc) +{ + return gr_ctx_desc->force_preemption_gfxp; +} + +static int nvgpu_gr_ctx_alloc_ctxsw_buffer(struct vm_gk20a *vm, size_t size, + struct nvgpu_mem *mem) +{ + int err; + + err = nvgpu_dma_alloc_sys(vm->mm->g, size, mem); + if (err != 0) { + return err; + } + + mem->gpu_va = nvgpu_gmmu_map(vm, + mem, + mem->aligned_size, + NVGPU_VM_MAP_CACHEABLE, + gk20a_mem_flag_none, + false, + mem->aperture); + if (mem->gpu_va == 0ULL) { + nvgpu_dma_free(vm->mm->g, mem); + return -ENOMEM; + } + + return 0; +} + +static int nvgpu_gr_ctx_alloc_preemption_buffers(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_desc *gr_ctx_desc, + struct vm_gk20a *vm) +{ + int err = 0; + + err = nvgpu_gr_ctx_alloc_ctxsw_buffer(vm, + gr_ctx_desc->size[NVGPU_GR_CTX_PREEMPT_CTXSW], + &gr_ctx->preempt_ctxsw_buffer); + if (err != 0) { + nvgpu_err(g, "cannot allocate preempt buffer"); + goto fail; + } + + err = nvgpu_gr_ctx_alloc_ctxsw_buffer(vm, + gr_ctx_desc->size[NVGPU_GR_CTX_SPILL_CTXSW], + &gr_ctx->spill_ctxsw_buffer); + if (err != 0) { + nvgpu_err(g, "cannot allocate spill buffer"); + goto fail_free_preempt; + } + + err = nvgpu_gr_ctx_alloc_ctxsw_buffer(vm, + gr_ctx_desc->size[NVGPU_GR_CTX_BETACB_CTXSW], + &gr_ctx->betacb_ctxsw_buffer); + if (err != 0) { + nvgpu_err(g, "cannot allocate beta buffer"); + goto fail_free_spill; + } + + if (gr_ctx_desc->size[NVGPU_GR_CTX_GFXP_RTVCB_CTXSW] != 0U) { + err = nvgpu_gr_ctx_alloc_ctxsw_buffer(vm, + gr_ctx_desc->size[NVGPU_GR_CTX_GFXP_RTVCB_CTXSW], + &gr_ctx->gfxp_rtvcb_ctxsw_buffer); + if (err != 0) { + nvgpu_err(g, "cannot allocate gfxp rtvcb"); + goto fail_free_betacb; + } + } + return 0; + +fail_free_betacb: + nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); +fail_free_spill: + nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); +fail_free_preempt: + nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); +fail: + return err; +} + +int nvgpu_gr_ctx_alloc_ctxsw_buffers(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_desc *gr_ctx_desc, + struct vm_gk20a *vm) +{ + int err = 0; + + /* nothing to do if already initialized */ + if (nvgpu_mem_is_valid(&gr_ctx->preempt_ctxsw_buffer)) { + return 0; + } + + if (gr_ctx_desc->size[NVGPU_GR_CTX_PREEMPT_CTXSW] == 0U || + gr_ctx_desc->size[NVGPU_GR_CTX_SPILL_CTXSW] == 0U || + gr_ctx_desc->size[NVGPU_GR_CTX_BETACB_CTXSW] == 0U || + gr_ctx_desc->size[NVGPU_GR_CTX_PAGEPOOL_CTXSW] == 0U) { + return -EINVAL; + } + + err = nvgpu_gr_ctx_alloc_preemption_buffers(g, gr_ctx, + gr_ctx_desc, vm); + + if (err != 0) { + nvgpu_err(g, "cannot allocate preemption buffers"); + goto fail; + } + + err = nvgpu_gr_ctx_alloc_ctxsw_buffer(vm, + gr_ctx_desc->size[NVGPU_GR_CTX_PAGEPOOL_CTXSW], + &gr_ctx->pagepool_ctxsw_buffer); + if (err != 0) { + nvgpu_err(g, "cannot allocate page pool"); + goto fail; + } + + return 0; + +fail: + return err; +} + +struct nvgpu_mem *nvgpu_gr_ctx_get_preempt_ctxsw_buffer( + struct nvgpu_gr_ctx *gr_ctx) +{ + return &gr_ctx->preempt_ctxsw_buffer; +} + +struct nvgpu_mem *nvgpu_gr_ctx_get_spill_ctxsw_buffer( + struct nvgpu_gr_ctx *gr_ctx) +{ + return &gr_ctx->spill_ctxsw_buffer; +} + +struct nvgpu_mem *nvgpu_gr_ctx_get_betacb_ctxsw_buffer( + struct nvgpu_gr_ctx *gr_ctx) +{ + return &gr_ctx->betacb_ctxsw_buffer; +} + +struct nvgpu_mem *nvgpu_gr_ctx_get_pagepool_ctxsw_buffer( + struct nvgpu_gr_ctx *gr_ctx) +{ + return &gr_ctx->pagepool_ctxsw_buffer; +} + +struct nvgpu_mem *nvgpu_gr_ctx_get_gfxp_rtvcb_ctxsw_buffer( + struct nvgpu_gr_ctx *gr_ctx) +{ + return &gr_ctx->gfxp_rtvcb_ctxsw_buffer; +} +#endif /* CONFIG_NVGPU_GFXP */ + +#ifdef CONFIG_NVGPU_CILP +bool nvgpu_gr_ctx_desc_force_preemption_cilp(struct nvgpu_gr_ctx_desc *gr_ctx_desc) +{ + return gr_ctx_desc->force_preemption_cilp; +} + +bool nvgpu_gr_ctx_get_cilp_preempt_pending(struct nvgpu_gr_ctx *gr_ctx) +{ + return gr_ctx->cilp_preempt_pending; +} + +void nvgpu_gr_ctx_set_cilp_preempt_pending(struct nvgpu_gr_ctx *gr_ctx, + bool cilp_preempt_pending) +{ + gr_ctx->cilp_preempt_pending = cilp_preempt_pending; +} +#endif + +#ifdef CONFIG_NVGPU_DEBUGGER +void nvgpu_gr_ctx_reset_patch_count(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx) +{ + u32 tmp; + + tmp = g->ops.gr.ctxsw_prog.get_patch_count(g, &gr_ctx->mem); + if (tmp == 0U) { + gr_ctx->patch_ctx.data_count = 0; + } +} + +void nvgpu_gr_ctx_set_patch_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + bool set_patch_addr) +{ + g->ops.gr.ctxsw_prog.set_patch_count(g, &gr_ctx->mem, + gr_ctx->patch_ctx.data_count); + if (set_patch_addr) { + g->ops.gr.ctxsw_prog.set_patch_addr(g, &gr_ctx->mem, + gr_ctx->patch_ctx.mem.gpu_va); + } +} + +int nvgpu_gr_ctx_alloc_pm_ctx(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_desc *gr_ctx_desc, + struct vm_gk20a *vm, + u64 gpu_va) +{ + struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx; + int err; + + if (pm_ctx->mem.gpu_va != 0ULL) { + return 0; + } + + err = nvgpu_dma_alloc_sys(g, gr_ctx_desc->size[NVGPU_GR_CTX_PM_CTX], + &pm_ctx->mem); + if (err != 0) { + nvgpu_err(g, + "failed to allocate pm ctx buffer"); + return err; + } + + pm_ctx->mem.gpu_va = nvgpu_gmmu_map_fixed(vm, + &pm_ctx->mem, + gpu_va, + pm_ctx->mem.size, + NVGPU_VM_MAP_CACHEABLE, + gk20a_mem_flag_none, true, + pm_ctx->mem.aperture); + if (pm_ctx->mem.gpu_va == 0ULL) { + nvgpu_err(g, + "failed to map pm ctxt buffer"); + nvgpu_dma_free(g, &pm_ctx->mem); + return -ENOMEM; + } + + return 0; +} + +void nvgpu_gr_ctx_free_pm_ctx(struct gk20a *g, struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx) +{ + struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx; + + if (pm_ctx->mem.gpu_va != 0ULL) { + nvgpu_gmmu_unmap(vm, &pm_ctx->mem, pm_ctx->mem.gpu_va); + + nvgpu_dma_free(g, &pm_ctx->mem); + } +} + +struct nvgpu_mem *nvgpu_gr_ctx_get_pm_ctx_mem(struct nvgpu_gr_ctx *gr_ctx) +{ + return &gr_ctx->pm_ctx.mem; +} + +void nvgpu_gr_ctx_set_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx, u32 pm_mode) +{ + gr_ctx->pm_ctx.pm_mode = pm_mode; +} + +u32 nvgpu_gr_ctx_get_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx) +{ + return gr_ctx->pm_ctx.pm_mode; +} + +u32 nvgpu_gr_ctx_get_ctx_id(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) +{ + if (!gr_ctx->ctx_id_valid) { + /* Channel gr_ctx buffer is gpu cacheable. + Flush and invalidate before cpu update. */ + if (g->ops.mm.cache.l2_flush(g, true) != 0) { + nvgpu_err(g, "l2_flush failed"); + } + + gr_ctx->ctx_id = g->ops.gr.ctxsw_prog.get_main_image_ctx_id(g, + &gr_ctx->mem); + gr_ctx->ctx_id_valid = true; + } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, "ctx_id: 0x%x", gr_ctx->ctx_id); + + return gr_ctx->ctx_id; +} + +u32 nvgpu_gr_ctx_read_ctx_id(struct nvgpu_gr_ctx *gr_ctx) +{ + return gr_ctx->ctx_id; +} + +#ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING +void nvgpu_gr_ctx_set_boosted_ctx(struct nvgpu_gr_ctx *gr_ctx, bool boost) +{ + gr_ctx->boosted_ctx = boost; +} + +bool nvgpu_gr_ctx_get_boosted_ctx(struct nvgpu_gr_ctx *gr_ctx) +{ + return gr_ctx->boosted_ctx; +} +#endif + +#ifdef CONFIG_DEBUG_FS +bool nvgpu_gr_ctx_desc_dump_ctxsw_stats_on_channel_close( + struct nvgpu_gr_ctx_desc *gr_ctx_desc) +{ + return gr_ctx_desc->dump_ctxsw_stats_on_channel_close; +} +#endif + +int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + bool enable) +{ + int err; + + if (!nvgpu_mem_is_valid(&gr_ctx->mem)) { + nvgpu_err(g, "no graphics context allocated"); + return -EFAULT; + } + + /* Channel gr_ctx buffer is gpu cacheable. + Flush and invalidate before cpu update. */ + err = g->ops.mm.cache.l2_flush(g, true); + if (err != 0) { + nvgpu_err(g, "l2_flush failed"); + return err; + } + + g->ops.gr.ctxsw_prog.set_pm_smpc_mode(g, &gr_ctx->mem, enable); + + return err; +} + +int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + u32 mode, bool *skip_update) +{ + struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx; + int ret = 0; + + *skip_update = false; + + if (!nvgpu_mem_is_valid(&gr_ctx->mem)) { + nvgpu_err(g, "no graphics context allocated"); + return -EFAULT; + } + + if ((mode == NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) && + (g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw == NULL)) { + nvgpu_err(g, + "Mode-E hwpm context switch mode is not supported"); + return -EINVAL; + } + + switch (mode) { + case NVGPU_GR_CTX_HWPM_CTXSW_MODE_CTXSW: + if (pm_ctx->pm_mode == + g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw()) { + *skip_update = true; + return 0; + } + pm_ctx->pm_mode = g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw(); + pm_ctx->gpu_va = pm_ctx->mem.gpu_va; + break; + case NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW: + if (pm_ctx->pm_mode == + g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()) { + *skip_update = true; + return 0; + } + pm_ctx->pm_mode = + g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw(); + pm_ctx->gpu_va = 0; + break; + case NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW: + if (pm_ctx->pm_mode == + g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) { + *skip_update = true; + return 0; + } + pm_ctx->pm_mode = + g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw(); + pm_ctx->gpu_va = pm_ctx->mem.gpu_va; + break; + default: + nvgpu_err(g, "invalid hwpm context switch mode"); + ret = -EINVAL; + break; + } + + return ret; +} + +int nvgpu_gr_ctx_set_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + bool set_pm_ptr) +{ + int err; + + /* Channel gr_ctx buffer is gpu cacheable. + Flush and invalidate before cpu update. */ + err = g->ops.mm.cache.l2_flush(g, true); + if (err != 0) { + nvgpu_err(g, "l2_flush failed"); + return err; + } + + g->ops.gr.ctxsw_prog.set_pm_mode(g, &gr_ctx->mem, + gr_ctx->pm_ctx.pm_mode); + if (set_pm_ptr) { + g->ops.gr.ctxsw_prog.set_pm_ptr(g, &gr_ctx->mem, + gr_ctx->pm_ctx.gpu_va); + } + + return err; +} +#endif /* CONFIG_NVGPU_DEBUGGER */ diff --git a/drivers/gpu/nvgpu/common/gr/ctx_priv.h b/drivers/gpu/nvgpu/common/gr/ctx_priv.h new file mode 100644 index 000000000..99f150487 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/ctx_priv.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_CTX_PRIV_H +#define NVGPU_GR_CTX_PRIV_H + +struct nvgpu_mem; + +/** + * Patch context buffer descriptor structure. + * + * Pointer to this structure is maintained in #nvgpu_gr_ctx structure. + */ +struct patch_desc { + /** + * Memory to hold patch context buffer. + */ + struct nvgpu_mem mem; + + /** + * Count of entries written into patch context buffer. + */ + u32 data_count; +}; + +#ifdef CONFIG_NVGPU_GRAPHICS +struct zcull_ctx_desc { + u64 gpu_va; + u32 ctx_sw_mode; +}; +#endif + +#ifdef CONFIG_NVGPU_DEBUGGER +struct pm_ctx_desc { + struct nvgpu_mem mem; + u64 gpu_va; + u32 pm_mode; +}; +#endif + +/** + * GR context descriptor structure. + * + * This structure stores various properties of all GR context buffers. + */ +struct nvgpu_gr_ctx_desc { + /** + * Array to store all GR context buffer sizes. + */ + u32 size[NVGPU_GR_CTX_COUNT]; + +#ifdef CONFIG_NVGPU_GRAPHICS + bool force_preemption_gfxp; +#endif + +#ifdef CONFIG_NVGPU_CILP + bool force_preemption_cilp; +#endif + +#ifdef CONFIG_DEBUG_FS + bool dump_ctxsw_stats_on_channel_close; +#endif +}; + +/** + * Graphics context buffer structure. + * + * This structure stores all the properties of a graphics context + * buffer. One graphics context is allocated per GPU Time Slice + * Group (TSG). + */ +struct nvgpu_gr_ctx { + /** + * Context ID read from graphics context buffer. + */ + u32 ctx_id; + + /** + * Flag to indicate if above context ID is valid or not. + */ + bool ctx_id_valid; + + /** + * Memory to hold graphics context buffer. + */ + struct nvgpu_mem mem; + +#ifdef CONFIG_NVGPU_GFXP + struct nvgpu_mem preempt_ctxsw_buffer; + struct nvgpu_mem spill_ctxsw_buffer; + struct nvgpu_mem betacb_ctxsw_buffer; + struct nvgpu_mem pagepool_ctxsw_buffer; + struct nvgpu_mem gfxp_rtvcb_ctxsw_buffer; +#endif + + /** + * Patch context buffer descriptor struct. + */ + struct patch_desc patch_ctx; + +#ifdef CONFIG_NVGPU_GRAPHICS + struct zcull_ctx_desc zcull_ctx; +#endif +#ifdef CONFIG_NVGPU_DEBUGGER + struct pm_ctx_desc pm_ctx; +#endif + + /** + * Graphics preemption mode of the graphics context. + */ + u32 graphics_preempt_mode; + + /** + * Compute preemption mode of the graphics context. + */ + u32 compute_preempt_mode; + +#ifdef CONFIG_NVGPU_NON_FUSA + bool golden_img_loaded; +#endif + +#ifdef CONFIG_NVGPU_CILP + bool cilp_preempt_pending; +#endif + +#ifdef CONFIG_NVGPU_DEBUGGER + bool boosted_ctx; +#endif + + /** + * Array to store GPU virtual addresses of all global context + * buffers. + */ + u64 global_ctx_buffer_va[NVGPU_GR_CTX_VA_COUNT]; + + /** + * Array to store indexes of global context buffers + * corresponding to GPU virtual addresses above. + */ + u32 global_ctx_buffer_index[NVGPU_GR_CTX_VA_COUNT]; + + /** + * Flag to indicate if global context buffers are mapped and + * #global_ctx_buffer_va array is populated. + */ + bool global_ctx_buffer_mapped; + + /** + * TSG identifier corresponding to the graphics context. + */ + u32 tsgid; + +#ifdef CONFIG_NVGPU_SM_DIVERSITY + /** SM diversity configuration offset. + * It is valid only if NVGPU_SUPPORT_SM_DIVERSITY support is true. + * else input param is just ignored. + * A valid offset starts from 0 to + * (#gk20a.max_sm_diversity_config_count - 1). + */ + u32 sm_diversity_config; +#endif +}; + +#endif /* NVGPU_GR_CTX_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/gr/fecs_trace.c b/drivers/gpu/nvgpu/common/gr/fecs_trace.c new file mode 100644 index 000000000..0deae927e --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/fecs_trace.c @@ -0,0 +1,700 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int nvgpu_gr_fecs_trace_periodic_polling(void *arg); + +int nvgpu_gr_fecs_trace_add_context(struct gk20a *g, u32 context_ptr, + pid_t pid, u32 vmid, struct nvgpu_list_node *list) +{ + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + struct nvgpu_fecs_trace_context_entry *entry; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, + "adding hash entry context_ptr=%x -> pid=%d, vmid=%d", + context_ptr, pid, vmid); + + entry = nvgpu_kzalloc(g, sizeof(*entry)); + if (entry == NULL) { + nvgpu_err(g, + "can't alloc new entry for context_ptr=%x pid=%d vmid=%d", + context_ptr, pid, vmid); + return -ENOMEM; + } + + nvgpu_init_list_node(&entry->entry); + entry->context_ptr = context_ptr; + entry->pid = pid; + entry->vmid = vmid; + + nvgpu_mutex_acquire(&trace->list_lock); + nvgpu_list_add_tail(&entry->entry, list); + nvgpu_mutex_release(&trace->list_lock); + + return 0; +} + +void nvgpu_gr_fecs_trace_remove_context(struct gk20a *g, u32 context_ptr, + struct nvgpu_list_node *list) +{ + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + struct nvgpu_fecs_trace_context_entry *entry, *tmp; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, + "freeing entry context_ptr=%x", context_ptr); + + nvgpu_mutex_acquire(&trace->list_lock); + nvgpu_list_for_each_entry_safe(entry, tmp, list, + nvgpu_fecs_trace_context_entry, entry) { + if (entry->context_ptr == context_ptr) { + nvgpu_list_del(&entry->entry); + nvgpu_log(g, gpu_dbg_ctxsw, + "freed entry=%p context_ptr=%x", entry, + entry->context_ptr); + nvgpu_kfree(g, entry); + break; + } + } + nvgpu_mutex_release(&trace->list_lock); +} + +void nvgpu_gr_fecs_trace_remove_contexts(struct gk20a *g, + struct nvgpu_list_node *list) +{ + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + struct nvgpu_fecs_trace_context_entry *entry, *tmp; + + nvgpu_mutex_acquire(&trace->list_lock); + nvgpu_list_for_each_entry_safe(entry, tmp, list, + nvgpu_fecs_trace_context_entry, entry) { + nvgpu_list_del(&entry->entry); + nvgpu_kfree(g, entry); + } + nvgpu_mutex_release(&trace->list_lock); +} + +void nvgpu_gr_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr, + struct nvgpu_list_node *list, pid_t *pid, u32 *vmid) +{ + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + struct nvgpu_fecs_trace_context_entry *entry; + + nvgpu_mutex_acquire(&trace->list_lock); + nvgpu_list_for_each_entry(entry, list, nvgpu_fecs_trace_context_entry, + entry) { + if (entry->context_ptr == context_ptr) { + nvgpu_log(g, gpu_dbg_ctxsw, + "found context_ptr=%x -> pid=%d, vmid=%d", + entry->context_ptr, entry->pid, entry->vmid); + *pid = entry->pid; + *vmid = entry->vmid; + nvgpu_mutex_release(&trace->list_lock); + return; + } + } + nvgpu_mutex_release(&trace->list_lock); + + *pid = 0; + *vmid = 0xffffffffU; +} + +int nvgpu_gr_fecs_trace_init(struct gk20a *g) +{ + struct nvgpu_gr_fecs_trace *trace; + + if (!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS)) { + nvgpu_err(g, "invalid NUM_RECORDS chosen"); + nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, false); + return -EINVAL; + } + + trace = nvgpu_kzalloc(g, sizeof(struct nvgpu_gr_fecs_trace)); + if (trace == NULL) { + nvgpu_err(g, "failed to allocate fecs_trace"); + nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, false); + return -ENOMEM; + } + g->fecs_trace = trace; + + nvgpu_mutex_init(&trace->poll_lock); + nvgpu_mutex_init(&trace->list_lock); + nvgpu_mutex_init(&trace->enable_lock); + + nvgpu_init_list_node(&trace->context_list); + + trace->enable_count = 0; + + return 0; +} + +int nvgpu_gr_fecs_trace_deinit(struct gk20a *g) +{ + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + + if (trace == NULL) { + return 0; + } + + /* + * Check if tracer was enabled before attempting to stop the + * tracer thread. + */ + if (trace->enable_count > 0) { + nvgpu_thread_stop(&trace->poll_task); + } + + nvgpu_gr_fecs_trace_remove_contexts(g, &trace->context_list); + + nvgpu_mutex_destroy(&g->fecs_trace->list_lock); + nvgpu_mutex_destroy(&g->fecs_trace->poll_lock); + nvgpu_mutex_destroy(&g->fecs_trace->enable_lock); + + nvgpu_kfree(g, g->fecs_trace); + g->fecs_trace = NULL; + return 0; +} + +int nvgpu_gr_fecs_trace_num_ts(struct gk20a *g) +{ + return (g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes() + - sizeof(struct nvgpu_fecs_trace_record)) / sizeof(u64); +} + +struct nvgpu_fecs_trace_record *nvgpu_gr_fecs_trace_get_record( + struct gk20a *g, int idx) +{ + struct nvgpu_gr_global_ctx_buffer_desc *gr_global_ctx_buffer = + nvgpu_gr_get_global_ctx_buffer_ptr(g); + struct nvgpu_mem *mem = nvgpu_gr_global_ctx_buffer_get_mem( + gr_global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER); + if (mem == NULL) { + return NULL; + } + + return (struct nvgpu_fecs_trace_record *) + ((u8 *) mem->cpu_va + + (idx * g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes())); +} + +bool nvgpu_gr_fecs_trace_is_valid_record(struct gk20a *g, + struct nvgpu_fecs_trace_record *r) +{ + /* + * testing magic_hi should suffice. magic_lo is sometimes used + * as a sequence number in experimental ucode. + */ + return g->ops.gr.ctxsw_prog.is_ts_valid_record(r->magic_hi); +} + +size_t nvgpu_gr_fecs_trace_buffer_size(struct gk20a *g) +{ + return GK20A_FECS_TRACE_NUM_RECORDS + * g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes(); +} + +int nvgpu_gr_fecs_trace_max_entries(struct gk20a *g, + struct nvgpu_gpu_ctxsw_trace_filter *filter) +{ + int n; + int tag; + + /* Compute number of entries per record, with given filter */ + for (n = 0, tag = 0; tag < nvgpu_gr_fecs_trace_num_ts(g); tag++) + n += (NVGPU_GPU_CTXSW_FILTER_ISSET(tag, filter) != 0); + + /* Return max number of entries generated for the whole ring */ + return n * GK20A_FECS_TRACE_NUM_RECORDS; +} + +int nvgpu_gr_fecs_trace_enable(struct gk20a *g) +{ + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + int write; + int err = 0; + + nvgpu_mutex_acquire(&trace->enable_lock); + trace->enable_count++; + + if (trace->enable_count == 1U) { + /* drop data in hw buffer */ + if (g->ops.gr.fecs_trace.flush) + g->ops.gr.fecs_trace.flush(g); + + write = g->ops.gr.fecs_trace.get_write_index(g); + + if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) { + /* + * For enabling FECS trace support, MAILBOX1's MSB + * (Bit 31:31) should be set to 1. Bits 30:0 represents + * actual pointer value. + */ + write = write | + (BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT)); + } + + g->ops.gr.fecs_trace.set_read_index(g, write); + + /* + * FECS ucode does a priv holdoff around the assertion of + * context reset. So, pri transactions (e.g. mailbox1 register + * write) might fail due to this. Hence, do write with ack + * i.e. write and read it back to make sure write happened for + * mailbox1. + */ + while (g->ops.gr.fecs_trace.get_read_index(g) != write) { + nvgpu_log(g, gpu_dbg_ctxsw, "mailbox1 update failed"); + g->ops.gr.fecs_trace.set_read_index(g, write); + } + + err = nvgpu_thread_create(&trace->poll_task, g, + nvgpu_gr_fecs_trace_periodic_polling, __func__); + if (err != 0) { + nvgpu_warn(g, "failed to create FECS polling task"); + goto done; + } + } + +done: + nvgpu_mutex_release(&trace->enable_lock); + return err; +} + +int nvgpu_gr_fecs_trace_disable(struct gk20a *g) +{ + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + int read = 0; + + if (trace == NULL) { + return -EINVAL; + } + + nvgpu_mutex_acquire(&trace->enable_lock); + if (trace->enable_count <= 0U) { + nvgpu_mutex_release(&trace->enable_lock); + return 0; + } + + trace->enable_count--; + if (trace->enable_count == 0U) { + if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) { + /* + * For disabling FECS trace support, MAILBOX1's MSB + * (Bit 31:31) should be set to 0. + */ + read = g->ops.gr.fecs_trace.get_read_index(g) & + (~(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT))); + + g->ops.gr.fecs_trace.set_read_index(g, read); + + /* + * FECS ucode does a priv holdoff around the assertion + * of context reset. So, pri transactions (e.g. + * mailbox1 register write) might fail due to this. + * Hence, do write with ack i.e. write and read it back + * to make sure write happened for mailbox1. + */ + while (g->ops.gr.fecs_trace.get_read_index(g) != read) { + nvgpu_log(g, gpu_dbg_ctxsw, + "mailbox1 update failed"); + g->ops.gr.fecs_trace.set_read_index(g, read); + } + } + nvgpu_thread_stop(&trace->poll_task); + } + nvgpu_mutex_release(&trace->enable_lock); + + return 0; +} + +bool nvgpu_gr_fecs_trace_is_enabled(struct gk20a *g) +{ + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + + return (trace && (trace->enable_count > 0)); +} + +void nvgpu_gr_fecs_trace_reset_buffer(struct gk20a *g) +{ + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); + + g->ops.gr.fecs_trace.set_read_index(g, + g->ops.gr.fecs_trace.get_write_index(g)); +} + +/* + * Converts HW entry format to userspace-facing format and pushes it to the + * queue. + */ +int nvgpu_gr_fecs_trace_ring_read(struct gk20a *g, int index, + u32 *vm_update_mask) +{ + int i; + struct nvgpu_gpu_ctxsw_trace_entry entry = { }; + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + pid_t cur_pid = 0, new_pid = 0; + u32 cur_vmid = 0U, new_vmid = 0U; + u32 vmid = 0U; + int count = 0; + + struct nvgpu_fecs_trace_record *r = + nvgpu_gr_fecs_trace_get_record(g, index); + if (r == NULL) { + return -EINVAL; + } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, + "consuming record trace=%p read=%d record=%p", trace, index, r); + + if (!nvgpu_gr_fecs_trace_is_valid_record(g, r)) { + nvgpu_warn(g, + "trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)", + trace, index, r, r->magic_lo, r->magic_hi); + return -EINVAL; + } + + /* Clear magic_hi to detect cases where CPU could read write index + * before FECS record is actually written to DRAM. This should not + * as we force FECS writes to SYSMEM by reading through PRAMIN. + */ + r->magic_hi = 0; + + if ((r->context_ptr != 0U) && (r->context_id != 0U)) { + nvgpu_gr_fecs_trace_find_pid(g, r->context_ptr, + &trace->context_list, &cur_pid, &cur_vmid); + } else { + cur_vmid = 0xffffffffU; + cur_pid = 0; + } + + if (r->new_context_ptr != 0U) { + nvgpu_gr_fecs_trace_find_pid(g, r->new_context_ptr, + &trace->context_list, &new_pid, &new_vmid); + } else { + new_vmid = 0xffffffffU; + new_pid = 0; + } + + nvgpu_log(g, gpu_dbg_ctxsw, + "context_ptr=%x (vmid=%u pid=%d)", + r->context_ptr, cur_vmid, cur_pid); + nvgpu_log(g, gpu_dbg_ctxsw, + "new_context_ptr=%x (vmid=%u pid=%d)", + r->new_context_ptr, new_vmid, new_pid); + + entry.context_id = r->context_id; + + /* break out FECS record into trace events */ + for (i = 0; i < nvgpu_gr_fecs_trace_num_ts(g); i++) { + + entry.tag = g->ops.gr.ctxsw_prog.hw_get_ts_tag(r->ts[i]); + entry.timestamp = + g->ops.gr.ctxsw_prog.hw_record_ts_timestamp(r->ts[i]); + entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT; + + nvgpu_log(g, gpu_dbg_ctxsw, + "tag=%x timestamp=%llx context_id=%08x new_context_id=%08x", + entry.tag, entry.timestamp, r->context_id, + r->new_context_id); + + switch (nvgpu_gpu_ctxsw_tags_to_common_tags(entry.tag)) { + case NVGPU_GPU_CTXSW_TAG_RESTORE_START: + case NVGPU_GPU_CTXSW_TAG_CONTEXT_START: + entry.context_id = r->new_context_id; + entry.pid = new_pid; + entry.vmid = new_vmid; + break; + + case NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST: + case NVGPU_GPU_CTXSW_TAG_FE_ACK: + case NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI: + case NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP: + case NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP: + case NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP: + case NVGPU_GPU_CTXSW_TAG_SAVE_END: + entry.context_id = r->context_id; + entry.pid = cur_pid; + entry.vmid = cur_vmid; + break; + + default: + /* tags are not guaranteed to start at the beginning */ + if ((entry.tag != 0) && (entry.tag != + NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP)) { + nvgpu_warn(g, "TAG not found"); + } + continue; + } + + nvgpu_log(g, gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld", + entry.tag, entry.context_id, entry.pid); + + if (!entry.context_id) + continue; + + if (g->ops.gr.fecs_trace.vm_dev_write != NULL) { + g->ops.gr.fecs_trace.vm_dev_write(g, entry.vmid, + vm_update_mask, &entry); + } else { + nvgpu_gr_fecs_trace_write_entry(g, &entry); + } + count++; + } + + nvgpu_gr_fecs_trace_wake_up(g, vmid); + return count; +} + +int nvgpu_gr_fecs_trace_poll(struct gk20a *g) +{ + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + u32 vm_update_mask = 0U; + int read = 0; + int write = 0; + int cnt; + int err = 0; + + nvgpu_mutex_acquire(&trace->poll_lock); + if (trace->enable_count == 0) { + goto done_unlock; + } + + err = gk20a_busy(g); + if (err) { + goto done_unlock; + } + + write = g->ops.gr.fecs_trace.get_write_index(g); + if ((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS)) { + nvgpu_err(g, + "failed to acquire write index, write=%d", write); + err = write; + goto done; + } + + read = g->ops.gr.fecs_trace.get_read_index(g); + + cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS); + if (!cnt) + goto done; + + nvgpu_log(g, gpu_dbg_ctxsw, + "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d", + read, g->ops.gr.fecs_trace.get_read_index(g), write, cnt); + + /* Ensure all FECS writes have made it to SYSMEM */ + err = g->ops.mm.cache.fb_flush(g); + if (err != 0) { + nvgpu_err(g, "mm.cache.fb_flush() failed err=%d", err); + goto done; + } + + if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) { + /* Bits 30:0 of MAILBOX1 represents actual read pointer value */ + read = read & (~(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT))); + } + + while (read != write) { + cnt = nvgpu_gr_fecs_trace_ring_read(g, read, &vm_update_mask); + if (cnt <= 0) { + break; + } + + /* Get to next record. */ + read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1); + } + + if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) { + /* + * In the next step, read pointer is going to be updated. + * So, MSB of read pointer should be set back to 1. This will + * keep FECS trace enabled. + */ + read = read | (BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT)); + } + + /* ensure FECS records has been updated before incrementing read index */ + nvgpu_wmb(); + g->ops.gr.fecs_trace.set_read_index(g, read); + + /* + * FECS ucode does a priv holdoff around the assertion of context + * reset. So, pri transactions (e.g. mailbox1 register write) might + * fail due to this. Hence, do write with ack i.e. write and read + * it back to make sure write happened for mailbox1. + */ + while (g->ops.gr.fecs_trace.get_read_index(g) != read) { + nvgpu_log(g, gpu_dbg_ctxsw, "mailbox1 update failed"); + g->ops.gr.fecs_trace.set_read_index(g, read); + } + + if (g->ops.gr.fecs_trace.vm_dev_update) { + g->ops.gr.fecs_trace.vm_dev_update(g, vm_update_mask); + } + +done: + gk20a_idle(g); +done_unlock: + nvgpu_mutex_release(&trace->poll_lock); + return err; +} + +static int nvgpu_gr_fecs_trace_periodic_polling(void *arg) +{ + struct gk20a *g = (struct gk20a *)arg; + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + + nvgpu_log(g, gpu_dbg_ctxsw, "thread running"); + + while (!nvgpu_thread_should_stop(&trace->poll_task) && + trace->enable_count > 0U) { + + nvgpu_usleep_range(GK20A_FECS_TRACE_FRAME_PERIOD_US, + GK20A_FECS_TRACE_FRAME_PERIOD_US * 2U); + + nvgpu_gr_fecs_trace_poll(g); + } + + return 0; +} + +int nvgpu_gr_fecs_trace_reset(struct gk20a *g) +{ + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); + + if (!g->ops.gr.fecs_trace.is_enabled(g)) + return 0; + + nvgpu_gr_fecs_trace_poll(g); + return g->ops.gr.fecs_trace.set_read_index(g, 0); +} + +/* + * map global circ_buf to the context space and store the GPU VA + * in the context header. + */ +int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g, + struct nvgpu_mem *inst_block, struct nvgpu_gr_subctx *subctx, + struct nvgpu_gr_ctx *gr_ctx, pid_t pid, u32 vmid) +{ + u64 addr = 0ULL; + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + struct nvgpu_mem *mem; + struct nvgpu_gr_global_ctx_buffer_desc *gr_global_ctx_buffer = + nvgpu_gr_get_global_ctx_buffer_ptr(g); + u32 context_ptr; + u32 aperture_mask; + int ret; + + if (trace == NULL) { + return -EINVAL; + } + + context_ptr = nvgpu_inst_block_ptr(g, inst_block); + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, + "pid=%d context_ptr=%x inst_block=%llx", + pid, context_ptr, + nvgpu_inst_block_addr(g, inst_block)); + + mem = nvgpu_gr_global_ctx_buffer_get_mem(gr_global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER); + if (mem == NULL) { + return -EINVAL; + } + + if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) { + addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, + NVGPU_GR_CTX_FECS_TRACE_BUFFER_VA); + nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr); + aperture_mask = 0; + } else { + addr = nvgpu_inst_block_addr(g, mem); + nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr); + aperture_mask = + g->ops.gr.ctxsw_prog.get_ts_buffer_aperture_mask(g, mem); + } + if (addr == 0ULL) { + return -ENOMEM; + } + + mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx); + + nvgpu_log(g, gpu_dbg_ctxsw, "addr=%llx count=%d", addr, + GK20A_FECS_TRACE_NUM_RECORDS); + + g->ops.gr.ctxsw_prog.set_ts_num_records(g, mem, + GK20A_FECS_TRACE_NUM_RECORDS); + + if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA) && subctx != NULL) { + mem = nvgpu_gr_subctx_get_ctx_header(subctx); + } + + g->ops.gr.ctxsw_prog.set_ts_buffer_ptr(g, mem, addr, aperture_mask); + + ret = nvgpu_gr_fecs_trace_add_context(g, context_ptr, pid, vmid, + &trace->context_list); + + return ret; +} + +int nvgpu_gr_fecs_trace_unbind_channel(struct gk20a *g, + struct nvgpu_mem *inst_block) +{ + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + u32 context_ptr; + + if (trace == NULL) { + return -EINVAL; + } + + context_ptr = nvgpu_inst_block_ptr(g, inst_block); + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, + "context_ptr=%x", context_ptr); + + if (g->ops.gr.fecs_trace.is_enabled(g)) { + if (g->ops.gr.fecs_trace.flush) { + g->ops.gr.fecs_trace.flush(g); + } + nvgpu_gr_fecs_trace_poll(g); + } + + nvgpu_gr_fecs_trace_remove_context(g, context_ptr, + &trace->context_list); + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/gr/fs_state.c b/drivers/gpu/nvgpu/common/gr/fs_state.c new file mode 100644 index 000000000..d6b2e0f91 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/fs_state.c @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include +#include +#include +#include + +static int gr_load_sm_id_config(struct gk20a *g, struct nvgpu_gr_config *config) +{ + int err; + u32 *tpc_sm_id; + u32 sm_id_size = g->ops.gr.init.get_sm_id_size(); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + tpc_sm_id = nvgpu_kcalloc(g, sm_id_size, sizeof(u32)); + if (tpc_sm_id == NULL) { + return -ENOMEM; + } + + err = g->ops.gr.init.sm_id_config(g, tpc_sm_id, config, NULL, false); + + nvgpu_kfree(g, tpc_sm_id); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + return err; +} + +static void gr_load_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config) +{ + u32 pes_tpc_mask = 0; + u32 gpc, pes; + u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, + GPU_LIT_NUM_TPC_PER_GPC); +#ifdef CONFIG_NVGPU_NON_FUSA + u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(config); + u32 fuse_tpc_mask; + u32 val; + u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g); + u32 gpc_phys_id; +#endif + + /* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */ + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(config); gpc++) { + for (pes = 0; + pes < nvgpu_gr_config_get_pe_count_per_gpc(config); + pes++) { + pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask( + config, gpc, pes) << + nvgpu_safe_mult_u32(num_tpc_per_gpc, gpc); + } + } + + nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask); + +#ifdef CONFIG_NVGPU_NON_FUSA + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + /* + * Fuse registers must be queried with physical gpc-id and not + * the logical ones. For tu104 and before chips logical gpc-id + * is same as physical gpc-id for non-floorswept config but for + * chips after tu104 it may not be true. + */ + gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g, + cur_gr_instance, 0U); + fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id); + if ((g->tpc_fs_mask_user != 0U) && + (g->tpc_fs_mask_user != fuse_tpc_mask)) { + if (fuse_tpc_mask == nvgpu_safe_sub_u32(BIT32(max_tpc_count), + U32(1))) { + val = g->tpc_fs_mask_user; + val &= nvgpu_safe_sub_u32(BIT32(max_tpc_count), U32(1)); + /* + * skip tpc to disable the other tpc cause channel + * timeout + */ + val = nvgpu_safe_sub_u32(BIT32(hweight32(val)), U32(1)); + pes_tpc_mask = val; + } + } + } +#endif + + g->ops.gr.init.tpc_mask(g, 0, pes_tpc_mask); +} + +int nvgpu_gr_fs_state_init(struct gk20a *g, struct nvgpu_gr_config *config) +{ + u32 tpc_index, gpc_index; + u32 sm_id = 0; +#ifdef CONFIG_NVGPU_NON_FUSA + u32 fuse_tpc_mask; + u32 max_tpc_cnt; + u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g); + u32 gpc_phys_id; +#endif + u32 gpc_cnt, tpc_cnt; + u32 num_sm; + int err = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + g->ops.gr.init.fs_state(g); + + err = g->ops.gr.config.init_sm_id_table(g, config); + if (err != 0) { + return err; + } + + num_sm = nvgpu_gr_config_get_no_of_sm(config); + nvgpu_assert(num_sm > 0U); + + for (sm_id = 0; sm_id < num_sm; sm_id++) { + struct nvgpu_sm_info *sm_info = + nvgpu_gr_config_get_sm_info(config, sm_id); + tpc_index = nvgpu_gr_config_get_sm_info_tpc_index(sm_info); + gpc_index = nvgpu_gr_config_get_sm_info_gpc_index(sm_info); + + g->ops.gr.init.sm_id_numbering(g, gpc_index, tpc_index, sm_id, + config, NULL, false); + } + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + g->ops.gr.init.pd_tpc_per_gpc(g, config); + } + +#ifdef CONFIG_NVGPU_GRAPHICS + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + /* gr__setup_pd_mapping */ + g->ops.gr.init.rop_mapping(g, config); + g->ops.gr.init.pd_skip_table_gpc(g, config); + } +#endif + + gpc_cnt = nvgpu_gr_config_get_gpc_count(config); + tpc_cnt = nvgpu_gr_config_get_tpc_count(config); + +#ifdef CONFIG_NVGPU_NON_FUSA + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + /* + * Fuse registers must be queried with physical gpc-id and not + * the logical ones. For tu104 and before chips logical gpc-id + * is same as physical gpc-id for non-floorswept config but for + * chips after tu104 it may not be true. + */ + gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g, + cur_gr_instance, 0U); + fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id); + max_tpc_cnt = nvgpu_gr_config_get_max_tpc_count(config); + + if ((g->tpc_fs_mask_user != 0U) && + (fuse_tpc_mask == + nvgpu_safe_sub_u32(BIT32(max_tpc_cnt), U32(1)))) { + u32 val = g->tpc_fs_mask_user; + val &= nvgpu_safe_sub_u32(BIT32(max_tpc_cnt), U32(1)); + tpc_cnt = (u32)hweight32(val); + } + } +#endif + + g->ops.gr.init.cwd_gpcs_tpcs_num(g, gpc_cnt, tpc_cnt); + + gr_load_tpc_mask(g, config); + + err = gr_load_sm_id_config(g, config); + if (err != 0) { + nvgpu_err(g, "load_smid_config failed err=%d", err); + } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + return err; +} + diff --git a/drivers/gpu/nvgpu/common/gr/global_ctx.c b/drivers/gpu/nvgpu/common/gr/global_ctx.c new file mode 100644 index 000000000..9294bbc28 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/global_ctx.c @@ -0,0 +1,477 @@ +/* + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION +#include +#include +#endif + +#include + +#include "global_ctx_priv.h" + +#ifdef NVGPU_UNITTEST_FAULT_INJECTION_ENABLEMENT +#include + +struct nvgpu_posix_fault_inj *nvgpu_golden_ctx_verif_get_fault_injection(void) +{ + struct nvgpu_posix_fault_inj_container *c = + nvgpu_posix_fault_injection_get_container(); + + return &c->golden_ctx_verif_fi; +} + +struct nvgpu_posix_fault_inj *nvgpu_local_golden_image_get_fault_injection(void) +{ + struct nvgpu_posix_fault_inj_container *c = + nvgpu_posix_fault_injection_get_container(); + + return &c->local_golden_image_fi; +} +#endif + +struct nvgpu_gr_global_ctx_buffer_desc * +nvgpu_gr_global_ctx_desc_alloc(struct gk20a *g) +{ + struct nvgpu_gr_global_ctx_buffer_desc *desc = + nvgpu_kzalloc(g, sizeof(*desc) * + U64(NVGPU_GR_GLOBAL_CTX_COUNT)); + return desc; +} + +void nvgpu_gr_global_ctx_desc_free(struct gk20a *g, + struct nvgpu_gr_global_ctx_buffer_desc *desc) +{ + nvgpu_kfree(g, desc); +} + + +void nvgpu_gr_global_ctx_set_size(struct nvgpu_gr_global_ctx_buffer_desc *desc, + u32 index, size_t size) +{ + nvgpu_assert(index < NVGPU_GR_GLOBAL_CTX_COUNT); + desc[index].size = size; +} + +size_t nvgpu_gr_global_ctx_get_size(struct nvgpu_gr_global_ctx_buffer_desc *desc, + u32 index) +{ + return desc[index].size; +} + +static void nvgpu_gr_global_ctx_buffer_destroy(struct gk20a *g, + struct nvgpu_mem *mem) +{ + nvgpu_dma_free(g, mem); +} + +void nvgpu_gr_global_ctx_buffer_free(struct gk20a *g, + struct nvgpu_gr_global_ctx_buffer_desc *desc) +{ + u32 i; + + if (desc == NULL) { + return; + } + + for (i = 0; i < NVGPU_GR_GLOBAL_CTX_COUNT; i++) { + if (desc[i].destroy != NULL) { + desc[i].destroy(g, &desc[i].mem); + desc[i].destroy = NULL; + } + } + + nvgpu_log_fn(g, "done"); +} + +static int nvgpu_gr_global_ctx_buffer_alloc_sys(struct gk20a *g, + struct nvgpu_gr_global_ctx_buffer_desc *desc, + u32 index) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + if (nvgpu_mem_is_valid(&desc[index].mem)) { + return 0; + } + + err = nvgpu_dma_alloc_sys(g, desc[index].size, + &desc[index].mem); + if (err != 0) { + return err; + } + + desc[index].destroy = nvgpu_gr_global_ctx_buffer_destroy; + + return err; +} + +#ifdef CONFIG_NVGPU_VPR +static int nvgpu_gr_global_ctx_buffer_alloc_vpr(struct gk20a *g, + struct nvgpu_gr_global_ctx_buffer_desc *desc, + u32 index) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + if (nvgpu_mem_is_valid(&desc[index].mem)) { + return 0; + } + + if (g->ops.secure_alloc != NULL) { + err = g->ops.secure_alloc(g, + &desc[index].mem, desc[index].size, + &desc[index].destroy); + if (err != 0) { + return err; + } + } + + return err; +} +#endif + +static bool nvgpu_gr_global_ctx_buffer_sizes_are_valid(struct gk20a *g, + struct nvgpu_gr_global_ctx_buffer_desc *desc) +{ + + if (desc[NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP].size == 0U) { + return false; + } + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + if ((desc[NVGPU_GR_GLOBAL_CTX_CIRCULAR].size == 0U) || + (desc[NVGPU_GR_GLOBAL_CTX_PAGEPOOL].size == 0U) || + (desc[NVGPU_GR_GLOBAL_CTX_ATTRIBUTE].size == 0U)) { + return false; + } +#ifdef CONFIG_NVGPU_VPR + if ((desc[NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR].size == 0U) || + (desc[NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR].size == 0U) || + (desc[NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR].size == 0U)) { + return false; + } +#endif + } + + return true; +} + +#ifdef CONFIG_NVGPU_VPR +static int nvgpu_gr_global_ctx_buffer_vpr_alloc(struct gk20a *g, + struct nvgpu_gr_global_ctx_buffer_desc *desc) +{ + int err = 0; + + /* + * MIG supports only compute class. + * Allocate BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB + * if 2D/3D/I2M classes(graphics) are supported. + */ + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + nvgpu_log(g, gpu_dbg_gr | gpu_dbg_mig, + "2D class is not supported " + "skip BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB " + "and RTV_CB"); + return 0; + } + + err = nvgpu_gr_global_ctx_buffer_alloc_vpr(g, desc, + NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR); + if (err != 0) { + goto fail; + } + + err = nvgpu_gr_global_ctx_buffer_alloc_vpr(g, desc, + NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR); + if (err != 0) { + goto fail; + } + + err = nvgpu_gr_global_ctx_buffer_alloc_vpr(g, desc, + NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR); + if (err != 0) { + goto fail; + } +fail: + return err; +} +#endif + +static int nvgpu_gr_global_ctx_buffer_sys_alloc(struct gk20a *g, + struct nvgpu_gr_global_ctx_buffer_desc *desc) +{ + int err = 0; + + /* + * MIG supports only compute class. + * Allocate BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB + * if 2D/3D/I2M classes(graphics) are supported. + */ + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc, + NVGPU_GR_GLOBAL_CTX_CIRCULAR); + if (err != 0) { + goto fail; + } + + err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc, + NVGPU_GR_GLOBAL_CTX_PAGEPOOL); + if (err != 0) { + goto fail; + } + + err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc, + NVGPU_GR_GLOBAL_CTX_ATTRIBUTE); + if (err != 0) { + goto fail; + } + } + + err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc, + NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP); + if (err != 0) { + goto fail; + } +fail: + return err; +} + + +int nvgpu_gr_global_ctx_buffer_alloc(struct gk20a *g, + struct nvgpu_gr_global_ctx_buffer_desc *desc) +{ + int err = 0; + + if (nvgpu_gr_global_ctx_buffer_sizes_are_valid(g, desc) != true) { + return -EINVAL; + } + + err = nvgpu_gr_global_ctx_buffer_sys_alloc(g, desc); + if (err != 0) { + goto clean_up; + } + +#ifdef CONFIG_NVGPU_FECS_TRACE + if (desc[NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER].size != 0U) { + err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc, + NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER); + if (err != 0) { + goto clean_up; + } + } +#endif + +#ifdef CONFIG_NVGPU_GRAPHICS + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + if (desc[NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER].size != 0U) { + err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc, + NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER); + if (err != 0) { + goto clean_up; + } + } + } +#endif + +#ifdef CONFIG_NVGPU_VPR + if (nvgpu_gr_global_ctx_buffer_vpr_alloc(g, desc) != 0) { + goto clean_up; + } +#endif + + return err; + +clean_up: + nvgpu_gr_global_ctx_buffer_free(g, desc); + return err; +} + +u64 nvgpu_gr_global_ctx_buffer_map(struct nvgpu_gr_global_ctx_buffer_desc *desc, + u32 index, + struct vm_gk20a *vm, u32 flags, bool priv) +{ + u64 gpu_va; + + if (!nvgpu_mem_is_valid(&desc[index].mem)) { + return 0; + } + + gpu_va = nvgpu_gmmu_map(vm, &desc[index].mem, desc[index].mem.size, + flags, gk20a_mem_flag_none, priv, + desc[index].mem.aperture); + return gpu_va; +} + +void nvgpu_gr_global_ctx_buffer_unmap( + struct nvgpu_gr_global_ctx_buffer_desc *desc, + u32 index, + struct vm_gk20a *vm, u64 gpu_va) +{ + if (nvgpu_mem_is_valid(&desc[index].mem)) { + nvgpu_gmmu_unmap(vm, &desc[index].mem, gpu_va); + } +} + +struct nvgpu_mem *nvgpu_gr_global_ctx_buffer_get_mem( + struct nvgpu_gr_global_ctx_buffer_desc *desc, + u32 index) +{ + if (nvgpu_mem_is_valid(&desc[index].mem)) { + return &desc[index].mem; + } + return NULL; +} + +bool nvgpu_gr_global_ctx_buffer_ready( + struct nvgpu_gr_global_ctx_buffer_desc *desc, + u32 index) +{ + if (nvgpu_mem_is_valid(&desc[index].mem)) { + return true; + } + return false; +} + +struct nvgpu_gr_global_ctx_local_golden_image * +nvgpu_gr_global_ctx_init_local_golden_image(struct gk20a *g, + struct nvgpu_mem *source_mem, size_t size) +{ + struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image; + +#ifdef NVGPU_UNITTEST_FAULT_INJECTION_ENABLEMENT + if (nvgpu_posix_fault_injection_handle_call( + nvgpu_local_golden_image_get_fault_injection())) { + return NULL; + } +#endif + + local_golden_image = nvgpu_kzalloc(g, sizeof(*local_golden_image)); + if (local_golden_image == NULL) { + return NULL; + } + + local_golden_image->context = nvgpu_vzalloc(g, size); + if (local_golden_image->context == NULL) { + nvgpu_kfree(g, local_golden_image); + return NULL; + } + + local_golden_image->size = size; + + nvgpu_mem_rd_n(g, source_mem, 0, local_golden_image->context, + nvgpu_safe_cast_u64_to_u32(size)); + + return local_golden_image; +} + +#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION +bool nvgpu_gr_global_ctx_compare_golden_images(struct gk20a *g, + bool is_sysmem, + struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image1, + struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image2, + size_t size) +{ + bool is_identical = true; + u32 *data1 = local_golden_image1->context; + u32 *data2 = local_golden_image2->context; +#ifdef CONFIG_NVGPU_DGPU + u32 i; +#endif + +#ifdef NVGPU_UNITTEST_FAULT_INJECTION_ENABLEMENT + if (nvgpu_posix_fault_injection_handle_call( + nvgpu_golden_ctx_verif_get_fault_injection())) { + return false; + } +#endif + + /* + * In case of sysmem, direct mem compare can be used. + * For vidmem, word by word comparison only works and + * it is too early to use ce engine for read operations. + */ + if (is_sysmem) { + if (nvgpu_memcmp((u8 *)data1, (u8 *)data2, size) != 0) { + is_identical = false; + } + } + else { +#ifdef CONFIG_NVGPU_DGPU + for( i = 0U; i < nvgpu_safe_cast_u64_to_u32(size/sizeof(u32)); + i = nvgpu_safe_add_u32(i, 1U)) { + if (*(data1 + i) != *(data2 + i)) { + is_identical = false; + nvgpu_log_info(g, + "mismatch i = %u golden1: %u golden2 %u", + i, *(data1 + i), *(data2 + i)); + break; + } + } +#else + is_identical = false; +#endif + } + + nvgpu_log_info(g, "%s result %u", __func__, is_identical); + return is_identical; +} +#endif + +void nvgpu_gr_global_ctx_load_local_golden_image(struct gk20a *g, + struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image, + struct nvgpu_mem *target_mem) +{ + /* Channel gr_ctx buffer is gpu cacheable. + Flush and invalidate before cpu update. */ + if (g->ops.mm.cache.l2_flush(g, true) != 0) { + nvgpu_err(g, "l2_flush failed"); + } + + nvgpu_mem_wr_n(g, target_mem, 0, local_golden_image->context, + nvgpu_safe_cast_u64_to_u32(local_golden_image->size)); + + nvgpu_log(g, gpu_dbg_gr, "loaded saved golden image into gr_ctx"); +} + +void nvgpu_gr_global_ctx_deinit_local_golden_image(struct gk20a *g, + struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image) +{ + nvgpu_vfree(g, local_golden_image->context); + nvgpu_kfree(g, local_golden_image); +} + +#ifdef CONFIG_NVGPU_DEBUGGER +u32 *nvgpu_gr_global_ctx_get_local_golden_image_ptr( + struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image) +{ + return local_golden_image->context; +} +#endif diff --git a/drivers/gpu/nvgpu/common/gr/global_ctx_priv.h b/drivers/gpu/nvgpu/common/gr/global_ctx_priv.h new file mode 100644 index 000000000..06647c6f4 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/global_ctx_priv.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_GLOBAL_CTX_PRIV_H +#define NVGPU_GR_GLOBAL_CTX_PRIV_H + +/** + * Global context buffer descriptor structure. + * + * This structure stores properties applicable to each global + * context buffer. + */ +struct nvgpu_gr_global_ctx_buffer_desc { + /** + * Memory to hold global context buffer. + */ + struct nvgpu_mem mem; + + /** + * Size of global context buffer. + */ + size_t size; + + /** + * Function pointer to free global context buffer. + */ + global_ctx_mem_destroy_fn destroy; +}; + +/** + * Local Golden context image descriptor structure. + * + * This structure stores details of a local Golden context image. + * Pointer to this struct is maintained in + * #nvgpu_gr_obj_ctx_golden_image structure. + */ +struct nvgpu_gr_global_ctx_local_golden_image { + /** + * Pointer to local Golden context image memory. + */ + u32 *context; + + /** + * Size of local Golden context image. + */ + size_t size; +}; + +#endif /* NVGPU_GR_GLOBAL_CTX_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/gr/gr.c b/drivers/gpu/nvgpu/common/gr/gr.c new file mode 100644 index 000000000..ffb41ba24 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/gr.c @@ -0,0 +1,1204 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_NVGPU_GRAPHICS +#include +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) +#include +#endif +#include + +#include "gr_priv.h" + +static int gr_alloc_global_ctx_buffers(struct gk20a *g, struct nvgpu_gr *gr) +{ + int err; + u32 size; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + /* + * MIG supports only compute class. + * Allocate BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB + * if 2D/3D/I2M classes(graphics) are supported. + */ + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + size = g->ops.gr.init.get_global_ctx_cb_buffer_size(g); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, + "cb_buffer_size : %d", size); + + nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_CIRCULAR, size); +#ifdef CONFIG_NVGPU_VPR + nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR, size); +#endif + + size = g->ops.gr.init.get_global_ctx_pagepool_buffer_size(g); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, + "pagepool_buffer_size : %d", size); + + nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_PAGEPOOL, size); +#ifdef CONFIG_NVGPU_VPR + nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR, size); +#endif + size = g->ops.gr.init.get_global_attr_cb_size(g, + nvgpu_gr_config_get_tpc_count(gr->config), + nvgpu_gr_config_get_max_tpc_count(gr->config)); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, + "attr_buffer_size : %u", size); + + nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_ATTRIBUTE, size); +#ifdef CONFIG_NVGPU_VPR + nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR, size); +#endif + +#ifdef CONFIG_NVGPU_GRAPHICS + if (g->ops.gr.init.get_rtv_cb_size != NULL) { + size = g->ops.gr.init.get_rtv_cb_size(g); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, + "rtv_circular_buffer_size : %u", size); + + nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER, size); + } +#endif + } + + size = NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP_SIZE; + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "priv_access_map_size : %d", size); + + nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP, size); + +#ifdef CONFIG_NVGPU_FECS_TRACE + size = nvgpu_gr_fecs_trace_buffer_size(g); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "fecs_trace_buffer_size : %d", size); + + nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER, size); +#endif + + err = nvgpu_gr_global_ctx_buffer_alloc(g, gr->global_ctx_buffer); + if (err != 0) { + return err; + } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + return 0; +} + +u32 nvgpu_gr_get_no_of_sm(struct gk20a *g) +{ + return nvgpu_gr_config_get_no_of_sm(g->gr->config); +} + +u32 nvgpu_gr_gpc_offset(struct gk20a *g, u32 gpc) +{ + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 gpc_offset = nvgpu_safe_mult_u32(gpc_stride , gpc); + + nvgpu_assert(gpc < nvgpu_gr_config_get_gpc_count(nvgpu_gr_get_config_ptr(g))); + + return gpc_offset; +} + +u32 nvgpu_gr_tpc_offset(struct gk20a *g, u32 tpc) +{ + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, + GPU_LIT_TPC_IN_GPC_STRIDE); + u32 tpc_offset = nvgpu_safe_mult_u32(tpc_in_gpc_stride, tpc); + + nvgpu_assert(tpc < nvgpu_gr_config_get_max_tpc_per_gpc_count(nvgpu_gr_get_config_ptr(g))); + + return tpc_offset; +} + +u32 nvgpu_gr_sm_offset(struct gk20a *g, u32 sm) +{ + u32 sm_pri_stride = nvgpu_get_litter_value(g, GPU_LIT_SM_PRI_STRIDE); + u32 sm_offset = nvgpu_safe_mult_u32(sm_pri_stride, sm); + + nvgpu_assert(sm < nvgpu_gr_config_get_sm_count_per_tpc(nvgpu_gr_get_config_ptr(g))); + + return sm_offset; +} + +u32 nvgpu_gr_rop_offset(struct gk20a *g, u32 rop) +{ + u32 rop_pri_stride = nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE); + u32 rop_offset = nvgpu_safe_mult_u32(rop_pri_stride, rop); + + return rop_offset; +} + +static void disable_gr_interrupts(struct gk20a *g) +{ + /** Disable gr intr */ + g->ops.gr.intr.enable_interrupts(g, false); + + /** Disable all exceptions */ + g->ops.gr.intr.enable_exceptions(g, g->gr->config, false); + + /** Disable interrupts at MC level */ + nvgpu_cic_intr_stall_unit_config(g, NVGPU_CIC_INTR_UNIT_GR, + NVGPU_CIC_INTR_DISABLE); + nvgpu_cic_intr_nonstall_unit_config(g, NVGPU_CIC_INTR_UNIT_GR, + NVGPU_CIC_INTR_DISABLE); +} + +int nvgpu_gr_suspend(struct gk20a *g) +{ + int ret = 0; + + nvgpu_log_fn(g, " "); + + ret = g->ops.gr.init.wait_empty(g); + if (ret != 0) { + return ret; + } + + /* Disable fifo access */ + g->ops.gr.init.fifo_access(g, false); + + disable_gr_interrupts(g); + + g->ops.gr.intr.flush_channel_tlb(g); + + g->gr->initialized = false; + + nvgpu_log_fn(g, "done"); + return ret; +} + +static int gr_init_setup_hw(struct gk20a *g, struct nvgpu_gr *gr) +{ + int err; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (g->ops.gr.init.eng_config != NULL) { + g->ops.gr.init.eng_config(g); + } +#endif + + g->ops.gr.init.gpc_mmu(g); + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + g->ops.gr.init.pes_vsc_stream(g); + } + + if (g->ops.priv_ring.set_ppriv_timeout_settings != NULL) { + g->ops.priv_ring.set_ppriv_timeout_settings(g); + } + + /** Enable fecs error interrupts */ + g->ops.gr.falcon.fecs_host_int_enable(g); + g->ops.gr.intr.enable_hww_exceptions(g); + /** Enable TPC exceptions per GPC */ + g->ops.gr.intr.enable_gpc_exceptions(g, gr->config); + /** Reset and enable exceptions */ + g->ops.gr.intr.enable_exceptions(g, gr->config, true); + + /* + * SM HWWs are enabled during golden context creation, which happens + * at the time of first context creation i.e. first GPU job submission. + * Hence, injection of SM HWWs should only be attempted afterwards. + */ + + /* enable ECC for L1/SM */ + if (g->ops.gr.init.ecc_scrub_reg != NULL) { + err = g->ops.gr.init.ecc_scrub_reg(g, gr->config); + if (err != 0) { + goto out; + } + } + +#ifdef CONFIG_NVGPU_GRAPHICS + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + err = nvgpu_gr_zcull_init_hw(g, gr->zcull, gr->config); + if (err != 0) { + goto out; + } + + nvgpu_gr_zbc_load_table(g, gr->zbc); + +#ifdef CONFIG_NVGPU_GFXP + if (g->ops.gr.init.preemption_state != NULL) { + err = g->ops.gr.init.preemption_state(g); + if (err != 0) { + goto out; + } + } +#endif /* CONFIG_NVGPU_GFXP */ + } +#endif /* CONFIG_NVGPU_GRAPHICS */ + + /* + * Disable both surface and LG coalesce. + */ + if (g->ops.gr.init.su_coalesce != NULL) { + g->ops.gr.init.su_coalesce(g, 0); + } + if (g->ops.gr.init.lg_coalesce != NULL) { + g->ops.gr.init.lg_coalesce(g, 0); + } + + /* floorsweep anything left */ + err = nvgpu_gr_fs_state_init(g, gr->config); + if (err != 0) { + goto out; + } + + err = g->ops.gr.init.wait_idle(g); +out: + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + return err; +} + +static void gr_remove_support(struct gk20a *g) +{ + struct nvgpu_gr *gr = g->gr; + + nvgpu_log_fn(g, " "); + + nvgpu_gr_global_ctx_buffer_free(g, gr->global_ctx_buffer); + nvgpu_gr_global_ctx_desc_free(g, gr->global_ctx_buffer); + + nvgpu_gr_ctx_desc_free(g, gr->gr_ctx_desc); + + nvgpu_gr_config_deinit(g, gr->config); + + nvgpu_netlist_deinit_ctx_vars(g); + +#ifdef CONFIG_NVGPU_DEBUGGER + nvgpu_gr_hwpm_map_deinit(g, gr->hwpm_map); +#endif + +#ifdef CONFIG_NVGPU_GRAPHICS + nvgpu_gr_zbc_deinit(g, gr->zbc); + nvgpu_gr_zcull_deinit(g, gr->zcull); +#endif /* CONFIG_NVGPU_GRAPHICS */ + + nvgpu_gr_obj_ctx_deinit(g, gr->golden_image); + + nvgpu_gr_free(g); +} + +static int gr_init_access_map(struct gk20a *g, struct nvgpu_gr *gr) +{ + struct nvgpu_mem *mem; + u32 nr_pages = + DIV_ROUND_UP(NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP_SIZE, + NVGPU_CPU_PAGE_SIZE); + u32 nr_pages_size = nvgpu_safe_mult_u32(NVGPU_CPU_PAGE_SIZE, nr_pages); +#ifdef CONFIG_NVGPU_SET_FALCON_ACCESS_MAP + u32 *whitelist = NULL; + u32 w, num_entries = 0U; +#endif + + nvgpu_log(g, gpu_dbg_gr, " "); + + mem = nvgpu_gr_global_ctx_buffer_get_mem(gr->global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP); + if (mem == NULL) { + return -EINVAL; + } + + nvgpu_memset(g, mem, 0, 0, nr_pages_size); + +#ifdef CONFIG_NVGPU_SET_FALCON_ACCESS_MAP + g->ops.gr.init.get_access_map(g, &whitelist, &num_entries); + + for (w = 0U; w < num_entries; w++) { + u32 map_bit, map_byte, map_shift, x; + map_bit = whitelist[w] >> 2; + map_byte = map_bit >> 3; + map_shift = map_bit & 0x7U; /* i.e. 0-7 */ + nvgpu_log_info(g, "access map addr:0x%x byte:0x%x bit:%d", + whitelist[w], map_byte, map_shift); + x = nvgpu_mem_rd32(g, mem, (u64)map_byte / (u64)sizeof(u32)); + x |= BIT32( + (map_byte % (u32)sizeof(u32) * BITS_PER_BYTE_U32) + + map_shift); + nvgpu_mem_wr32(g, mem, (u64)map_byte / (u64)sizeof(u32), x); + } +#endif + + return 0; +} + +static int gr_init_config(struct gk20a *g, struct nvgpu_gr *gr) +{ + gr->config = nvgpu_gr_config_init(g); + if (gr->config == NULL) { + return -ENOMEM; + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "bundle_cb_default_size: %d", + g->ops.gr.init.get_bundle_cb_default_size(g)); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "min_gpm_fifo_depth: %d", + g->ops.gr.init.get_min_gpm_fifo_depth(g)); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "bundle_cb_token_limit: %d", + g->ops.gr.init.get_bundle_cb_token_limit(g)); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "attrib_cb_default_size: %d", + g->ops.gr.init.get_attrib_cb_default_size(g)); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "attrib_cb_size: %d", + g->ops.gr.init.get_attrib_cb_size(g, + nvgpu_gr_config_get_tpc_count(gr->config))); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "alpha_cb_default_size: %d", + g->ops.gr.init.get_alpha_cb_default_size(g)); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "alpha_cb_size: %d", + g->ops.gr.init.get_alpha_cb_size(g, + nvgpu_gr_config_get_tpc_count(gr->config))); + + return 0; +} + +static int nvgpu_gr_init_ctx_state(struct gk20a *g, struct nvgpu_gr *gr) +{ + int err = 0; + + /* Initialize ctx state during boot and recovery */ + err = nvgpu_gr_falcon_init_ctx_state(g, gr->falcon); + if (err != 0) { + nvgpu_err(g, "gr ctx_state init failed"); + } + + return err; +} + +static int gr_init_ctx_bufs(struct gk20a *g, struct nvgpu_gr *gr) +{ + int err = 0; + + gr->gr_ctx_desc = nvgpu_gr_ctx_desc_alloc(g); + if (gr->gr_ctx_desc == NULL) { + err = -ENOMEM; + goto clean_up; + } + +#ifdef CONFIG_NVGPU_GFXP + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + nvgpu_gr_ctx_set_size(gr->gr_ctx_desc, + NVGPU_GR_CTX_PREEMPT_CTXSW, + nvgpu_gr_falcon_get_preempt_image_size(gr->falcon)); + } +#endif + + gr->global_ctx_buffer = nvgpu_gr_global_ctx_desc_alloc(g); + if (gr->global_ctx_buffer == NULL) { + err = -ENOMEM; + goto clean_up; + } + + err = gr_alloc_global_ctx_buffers(g, gr); + if (err != 0) { + goto clean_up; + } + + err = gr_init_access_map(g, gr); + if (err != 0) { + goto clean_up; + } + + return 0; + +clean_up: + return err; +} + +static int gr_init_ecc_init(struct gk20a *g) +{ + int err = 0; + + nvgpu_log(g, gpu_dbg_gr, " "); + + if ((g->ops.gr.ecc.gpc_tpc_ecc_init != NULL) && !g->ecc.initialized) { + err = g->ops.gr.ecc.gpc_tpc_ecc_init(g); + if (err != 0) { + nvgpu_err(g, "failed to init gr gpc/tpc ecc"); + return err; + } + } + + nvgpu_log(g, gpu_dbg_gr, "done"); + return err; +} + +static int gr_init_setup_sw(struct gk20a *g, struct nvgpu_gr *gr) +{ + int err = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + if (gr->sw_ready) { + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "skip init"); + return 0; + } + + err = nvgpu_gr_obj_ctx_init(g, &gr->golden_image, + nvgpu_gr_falcon_get_golden_image_size(gr->falcon)); + if (err != 0) { + goto clean_up; + } + +#ifdef CONFIG_NVGPU_DEBUGGER + err = nvgpu_gr_hwpm_map_init(g, &gr->hwpm_map, + nvgpu_gr_falcon_get_pm_ctxsw_image_size(gr->falcon)); + if (err != 0) { + nvgpu_err(g, "hwpm_map init failed"); + goto clean_up; + } +#endif + + err = gr_init_ctx_bufs(g, gr); + if (err != 0) { + goto clean_up; + } + +#ifdef CONFIG_NVGPU_GRAPHICS + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + err = nvgpu_gr_config_init_map_tiles(g, gr->config); + if (err != 0) { + goto clean_up; + } + + err = nvgpu_gr_zcull_init(g, &gr->zcull, + nvgpu_gr_falcon_get_zcull_image_size(gr->falcon), + gr->config); + if (err != 0) { + goto clean_up; + } + + err = nvgpu_gr_zbc_init(g, &gr->zbc); + if (err != 0) { + goto clean_up; + } + } +#endif /* CONFIG_NVGPU_GRAPHICS */ + + gr->remove_support = gr_remove_support; + gr->sw_ready = true; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + return 0; + +clean_up: + nvgpu_err(g, "fail"); + gr_remove_support(g); + return err; +} + +static int gr_init_prepare_hw_impl(struct gk20a *g) +{ + struct netlist_av_list *sw_non_ctx_load = + nvgpu_netlist_get_sw_non_ctx_load_av_list(g); + u32 i; + int err = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "Prepare GR%u HW", + nvgpu_gr_get_cur_instance_id(g)); + + /** Enable interrupts */ + g->ops.gr.intr.enable_interrupts(g, true); + + /* enable fifo access */ + g->ops.gr.init.fifo_access(g, true); + + /* load non_ctx init */ + nvgpu_log_info(g, "begin: netlist: sw_non_ctx_load: register writes"); + for (i = 0; i < sw_non_ctx_load->count; i++) { + nvgpu_writel(g, sw_non_ctx_load->l[i].addr, + sw_non_ctx_load->l[i].value); + } + +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + nvgpu_next_gr_init_reset_enable_hw_non_ctx_local(g); + nvgpu_next_gr_init_reset_enable_hw_non_ctx_global(g); +#endif + nvgpu_log_info(g, "end: netlist: sw_non_ctx_load: register writes"); + + err = g->ops.gr.falcon.wait_mem_scrubbing(g); + if (err != 0) { + goto out; + } + + err = g->ops.gr.init.wait_idle(g); + if (err != 0) { + goto out; + } + +out: + if (err != 0) { + nvgpu_err(g, "fail"); + } else { + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + } + + return err; +} + +static int gr_init_prepare_hw(struct gk20a *g) +{ + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + /** Enable interrupts at MC level */ + nvgpu_cic_intr_stall_unit_config(g, NVGPU_CIC_INTR_UNIT_GR, NVGPU_CIC_INTR_ENABLE); + nvgpu_cic_intr_nonstall_unit_config(g, NVGPU_CIC_INTR_UNIT_GR, NVGPU_CIC_INTR_ENABLE); + + return nvgpu_gr_exec_with_ret_for_each_instance(g, + gr_init_prepare_hw_impl(g)); +} + +static int gr_reset_engine(struct gk20a *g) +{ + u32 cur_gr_instance_id = nvgpu_gr_get_cur_instance_id(g); + int err; + const struct nvgpu_device *dev = + nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, + nvgpu_gr_get_syspipe_id(g, g->mig.cur_gr_instance)); + + nvgpu_log(g, gpu_dbg_gr, "Reset GR%u", cur_gr_instance_id); + + /* Reset GR engine: Disable then enable GR engine */ + err = g->ops.mc.enable_dev(g, dev, false); + if (err != 0) { + nvgpu_log(g, gpu_dbg_info, "Device reset_id:%u disable failed", + dev->reset_id); + return err; + } + +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (g->ops.gr.init.reset_gpcs != NULL) { + err = g->ops.gr.init.reset_gpcs(g); + if (err != 0) { + nvgpu_err(g, "Reset gpcs failed"); + return err; + } + } +#endif + + err = g->ops.mc.enable_dev(g, dev, true); + if (err != 0) { + nvgpu_log(g, gpu_dbg_info, "Device reset_id:%u enable failed", + dev->reset_id); + return err; + } + + /* + * Do not reset PERFMON and BLG when MIG is enabled as PERFMON is a + * global engine which is shared by all syspipes. Individual PERF + * counters can be reset during gr syspipe reset. + */ + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + err = nvgpu_mc_reset_units(g, + NVGPU_UNIT_PERFMON | NVGPU_UNIT_BLG); + if (err != 0) { + nvgpu_log_info(g, "PERMON | BLG unit reset failed"); + return err; + } + } + + nvgpu_log(g, gpu_dbg_gr, "done"); + return 0; +} + +static int gr_reset_hw_and_load_prod(struct gk20a *g) +{ + int err; + + err = nvgpu_gr_exec_with_ret_for_each_instance(g, gr_reset_engine(g)); + if (err != 0) { + return err; + } + + nvgpu_gr_exec_for_all_instances(g, nvgpu_cg_init_gr_load_gating_prod(g)); + + /* Disable elcg until it gets enabled later in the init*/ + nvgpu_cg_elcg_disable_no_wait(g); + + return 0; +} + +int nvgpu_gr_enable_hw(struct gk20a *g) +{ + int err; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + err = gr_reset_hw_and_load_prod(g); + if (err != 0) { + return err; + } + + err = gr_init_prepare_hw(g); + if (err != 0) { + return err; + } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + + return 0; +} + +#ifdef CONFIG_NVGPU_ENGINE_RESET +static int nvgpu_gr_enable_hw_for_instance(struct gk20a *g) +{ + int err; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "Enable GR%u HW", + nvgpu_gr_get_cur_instance_id(g)); + + err = gr_reset_engine(g); + if (err != 0) { + nvgpu_err(g, "Gr Reset failed"); + return err; + } + + nvgpu_cg_init_gr_load_gating_prod(g); + + /* Disable elcg until it gets enabled later in the init*/ + nvgpu_cg_elcg_disable_no_wait(g); + + /** Enable interrupts at MC level */ + nvgpu_cic_intr_stall_unit_config(g, NVGPU_CIC_INTR_UNIT_GR, NVGPU_CIC_INTR_ENABLE); + nvgpu_cic_intr_nonstall_unit_config(g, NVGPU_CIC_INTR_UNIT_GR, NVGPU_CIC_INTR_ENABLE); + + err = gr_init_prepare_hw_impl(g); + if (err != 0) { + nvgpu_err(g, "gr_init_prepare_hw_impl failed"); + return err; + } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + + return 0; +} + +int nvgpu_gr_reset(struct gk20a *g) +{ + int err; + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + struct nvgpu_mutex *fecs_mutex = + nvgpu_gr_falcon_get_fecs_mutex(gr->falcon); + + g->gr->initialized = false; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr | gpu_dbg_rec, "Resetting GR%u HW", + nvgpu_gr_get_cur_instance_id(g)); + + nvgpu_mutex_acquire(fecs_mutex); + + err = nvgpu_gr_enable_hw_for_instance(g); + if (err != 0) { + nvgpu_err(g, "nvgpu_gr_enable_hw_for_instance failed"); + nvgpu_mutex_release(fecs_mutex); + return err; + } + + err = gr_init_setup_hw(g, gr); + if (err != 0) { + nvgpu_err(g, "gr_init_setup_hw failed"); + nvgpu_mutex_release(fecs_mutex); + return err; + } + + err = nvgpu_gr_falcon_init_ctxsw(g, gr->falcon); + if (err != 0) { + nvgpu_err(g, "nvgpu_gr_falcon_init_ctxsw failed"); + nvgpu_mutex_release(fecs_mutex); + return err; + } + + nvgpu_mutex_release(fecs_mutex); + + /* + * This appears query for sw states but fecs actually inits + * ramchain, etc so this is hw init. Hence should be executed + * for every GR engine HW initialization. + */ + err = nvgpu_gr_init_ctx_state(g, gr); + if (err != 0) { + nvgpu_err(g, "nvgpu_gr_init_ctx_state failed"); + return err; + } + +#ifdef CONFIG_NVGPU_POWER_PG + if (g->can_elpg) { + err = nvgpu_gr_falcon_bind_fecs_elpg(g); + if (err != 0) { + nvgpu_err(g, "nvgpu_gr_falcon_bind_fecs_elpg failed"); + return err; + } + } +#endif + + nvgpu_cg_init_gr_load_gating_prod(g); + + nvgpu_cg_elcg_enable_no_wait(g); + + /* GR is inialized, signal possible waiters */ + g->gr->initialized = true; + nvgpu_cond_signal(&gr->init_wq); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + return err; +} +#endif + +#if defined(CONFIG_NVGPU_NEXT) +static int gr_init_sm_id_config_early(struct gk20a *g, struct nvgpu_gr *gr) +{ + int err; + + if (g->ops.gr.init.sm_id_config_early != NULL) { + err = g->ops.gr.init.sm_id_config_early(g, gr->config); + if (err != 0) { + return err; + } + } + + return 0; +} +#endif + +static int gr_init_ctxsw_falcon_support(struct gk20a *g, struct nvgpu_gr *gr) +{ + int err; + + err = nvgpu_gr_falcon_init_ctxsw(g, gr->falcon); + if (err != 0) { + gr_intr_report_ctxsw_error(g, GPU_FECS_CTXSW_INIT_ERROR, 0, 0); + return err; + } + + /* + * This appears query for sw states but fecs actually inits + * ramchain, etc so this is hw init. Hence should be executed + * for every GR engine HW initialization. + */ + err = nvgpu_gr_init_ctx_state(g, gr); + if (err != 0) { + return err; + } + + return 0; +} + +static int gr_init_support_impl(struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + int err = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "Init support for GR%u", gr->instance_id); + + gr->initialized = false; + + /* This is prerequisite for calling sm_id_config_early hal. */ + if (!gr->sw_ready) { + err = gr_init_config(g, gr); + if (err != 0) { + return err; + } + } + +#if defined(CONFIG_NVGPU_NEXT) + /* + * Move sm id programming before loading ctxsw and gpccs firmwares. This + * is the actual sequence expected by ctxsw ucode. + */ + err = gr_init_sm_id_config_early(g, gr); + if (err != 0) { + return err; + } +#endif + + err = gr_init_ctxsw_falcon_support(g, gr); + if (err != 0) { + return err; + } + +#ifdef CONFIG_NVGPU_POWER_PG + if (g->can_elpg) { + err = nvgpu_gr_falcon_bind_fecs_elpg(g); + if (err != 0) { + return err; + } + } +#endif + + err = gr_init_setup_sw(g, gr); + if (err != 0) { + return err; + } + + err = gr_init_setup_hw(g, gr); + if (err != 0) { + return err; + } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + + return 0; +} + +static void gr_init_support_finalize(struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "Finalize support for GR%u", + gr->instance_id); + + gr->initialized = true; + nvgpu_cond_signal(&gr->init_wq); +} + +int nvgpu_gr_init_support(struct gk20a *g) +{ + int err = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + err = nvgpu_gr_exec_with_ret_for_each_instance(g, gr_init_support_impl(g)); + if (err != 0) { + return err; + } + + err = gr_init_ecc_init(g); + if (err != 0) { + return err; + } + + nvgpu_cg_elcg_enable_no_wait(g); + + /* GR is inialized, signal possible waiters */ + nvgpu_gr_exec_for_each_instance(g, gr_init_support_finalize(g)); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + + return 0; +} + +int nvgpu_gr_alloc(struct gk20a *g) +{ + struct nvgpu_gr *gr = NULL; + int err; + u32 i; + + nvgpu_log(g, gpu_dbg_gr, " "); + + /* if gr exists return */ + if (g->gr != NULL) { + return 0; + } + + g->num_gr_instances = nvgpu_grmgr_get_num_gr_instances(g); + if (g->num_gr_instances == 0U) { + nvgpu_err(g, "No GR engine enumerated"); + return -EINVAL; + } + + /* Allocate memory for gr struct */ + g->gr = nvgpu_kzalloc(g, sizeof(*gr) * g->num_gr_instances); + if (g->gr == NULL) { + return -ENOMEM; + } + + g->mig.cur_gr_instance = 0U; /* default */ + + for (i = 0U; i < g->num_gr_instances; i++) { + gr = &g->gr[i]; + gr->instance_id = i; + + gr->syspipe_id = nvgpu_grmgr_get_gr_syspipe_id(g, i); + if (gr->syspipe_id == U32_MAX) { + nvgpu_err(g, "failed to get syspipe id"); + err = -EINVAL; + goto fail; + } + + nvgpu_log(g, gpu_dbg_gr, "GR instance %u attached to GR syspipe %u", + i, gr->syspipe_id); + + gr->falcon = nvgpu_gr_falcon_init_support(g); + if (gr->falcon == NULL) { + nvgpu_err(g, "failed to init gr falcon"); + err = -ENOMEM; + goto fail; + } + + gr->intr = nvgpu_gr_intr_init_support(g); + if (gr->intr == NULL) { + nvgpu_err(g, "failed to init gr intr support"); + err = -ENOMEM; + goto fail; + } + + gr->g = g; + nvgpu_cond_init(&gr->init_wq); +#ifdef CONFIG_NVGPU_NON_FUSA + nvgpu_gr_override_ecc_val(gr, g->fecs_feature_override_ecc_val); +#endif +#if defined(CONFIG_NVGPU_RECOVERY) || defined(CONFIG_NVGPU_DEBUGGER) + nvgpu_mutex_init(&gr->ctxsw_disable_mutex); + gr->ctxsw_disable_count = 0; +#endif + } + + /* + * Initialize FECS ECC counters here before acr_construct_execute as the + * FECS ECC errors during FECS load need to be handled and reported + * using the ECC counters. + */ + if ((g->ops.gr.ecc.fecs_ecc_init != NULL) && !g->ecc.initialized) { + err = g->ops.gr.ecc.fecs_ecc_init(g); + if (err != 0) { + nvgpu_err(g, "failed to init gr fecs ecc"); + goto fail; + } + } + + nvgpu_log(g, gpu_dbg_gr, "Initialized %u GR engine instances", + g->num_gr_instances); + + return 0; + +fail: + nvgpu_gr_free(g); + return err; +} + +void nvgpu_gr_free(struct gk20a *g) +{ + struct nvgpu_gr *gr = NULL; + u32 i; + + if (g->gr == NULL) { + return; + } + + for (i = 0U; i < g->num_gr_instances; i++) { + gr = &g->gr[i]; + + nvgpu_gr_falcon_remove_support(g, gr->falcon); + gr->falcon = NULL; + + nvgpu_gr_intr_remove_support(g, gr->intr); + gr->intr = NULL; + } + + nvgpu_kfree(g, g->gr); + g->gr = NULL; +} + +u32 nvgpu_gr_get_syspipe_id(struct gk20a *g, u32 gr_instance_id) +{ + return g->gr[gr_instance_id].syspipe_id; +} + +#if defined(CONFIG_NVGPU_RECOVERY) || defined(CONFIG_NVGPU_DEBUGGER) +/** + * Stop processing (stall) context switches at FECS:- + * If fecs is sent stop_ctxsw method, elpg entry/exit cannot happen + * and may timeout. It could manifest as different error signatures + * depending on when stop_ctxsw fecs method gets sent with respect + * to pmu elpg sequence. It could come as pmu halt or abort or + * maybe ext error too. + */ +int nvgpu_gr_disable_ctxsw(struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + int err = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + + nvgpu_mutex_acquire(&gr->ctxsw_disable_mutex); + + /* check for gr->ctxsw_disable_count overflow */ + if (INT_MAX == gr->ctxsw_disable_count) { + nvgpu_err(g, "ctxsw_disable_count overflow"); + err = -ERANGE; + goto out; + } + + gr->ctxsw_disable_count++; + if (gr->ctxsw_disable_count == 1) { +#ifdef CONFIG_NVGPU_POWER_PG + err = nvgpu_pg_elpg_disable(g); + if (err != 0) { + nvgpu_err(g, + "failed to disable elpg for stop_ctxsw"); + /* stop ctxsw command is not sent */ + gr->ctxsw_disable_count--; + } else +#endif + { + err = g->ops.gr.falcon.ctrl_ctxsw(g, + NVGPU_GR_FALCON_METHOD_CTXSW_STOP, 0U, NULL); + if (err != 0) { + nvgpu_err(g, "failed to stop fecs ctxsw"); + /* stop ctxsw failed */ + gr->ctxsw_disable_count--; + } + } + } else { + nvgpu_log_info(g, "ctxsw disabled, ctxsw_disable_count: %d", + gr->ctxsw_disable_count); + } +out: + nvgpu_mutex_release(&gr->ctxsw_disable_mutex); + + return err; +} + +/* Start processing (continue) context switches at FECS */ +int nvgpu_gr_enable_ctxsw(struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + int err = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + + nvgpu_mutex_acquire(&gr->ctxsw_disable_mutex); + if (gr->ctxsw_disable_count == 0) { + goto ctxsw_already_enabled; + } + gr->ctxsw_disable_count--; + nvgpu_assert(gr->ctxsw_disable_count >= 0); + if (gr->ctxsw_disable_count == 0) { + err = g->ops.gr.falcon.ctrl_ctxsw(g, + NVGPU_GR_FALCON_METHOD_CTXSW_START, 0U, NULL); + if (err != 0) { + nvgpu_err(g, "failed to start fecs ctxsw"); + } +#ifdef CONFIG_NVGPU_POWER_PG + else { + if (nvgpu_pg_elpg_enable(g) != 0) { + nvgpu_err(g, + "failed to enable elpg for start_ctxsw"); + } + } +#endif + } else { + nvgpu_log_info(g, "ctxsw_disable_count: %d is not 0 yet", + gr->ctxsw_disable_count); + } +ctxsw_already_enabled: + nvgpu_mutex_release(&gr->ctxsw_disable_mutex); + + return err; +} +#endif + +void nvgpu_gr_remove_support(struct gk20a *g) +{ + if (g->gr != NULL && g->gr->remove_support != NULL) { + g->gr->remove_support(g); + } +} + +void nvgpu_gr_sw_ready(struct gk20a *g, bool enable) +{ + if (g->gr != NULL) { + g->gr->sw_ready = enable; + } +} + +#ifdef CONFIG_NVGPU_HAL_NON_FUSA +/* Wait until GR is initialized */ +void nvgpu_gr_wait_initialized(struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + + NVGPU_COND_WAIT(&gr->init_wq, gr->initialized, 0U); +} +#endif + +bool nvgpu_gr_is_tpc_addr(struct gk20a *g, u32 addr) +{ + u32 tpc_in_gpc_base = + nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); + u32 tpc_in_gpc_stride = + nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); + u32 num_tpc_per_gpc = + nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); + u32 tpc_in_gpc_shared_base = + nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_SHARED_BASE); + bool is_tpc_addr_shared = ((addr >= tpc_in_gpc_shared_base) && + (addr < (tpc_in_gpc_shared_base + tpc_in_gpc_stride))); + + return (((addr >= tpc_in_gpc_base) && + (addr < (tpc_in_gpc_base + + (num_tpc_per_gpc * tpc_in_gpc_stride)))) || + is_tpc_addr_shared); +} + +u32 nvgpu_gr_get_tpc_num(struct gk20a *g, u32 addr) +{ + u32 i, start; + u32 num_tpcs = + nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); + u32 tpc_in_gpc_base = + nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); + u32 tpc_in_gpc_stride = + nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); + + for (i = 0; i < num_tpcs; i++) { + start = tpc_in_gpc_base + (i * tpc_in_gpc_stride); + if ((addr >= start) && + (addr < (start + tpc_in_gpc_stride))) { + return i; + } + } + return 0; +} diff --git a/drivers/gpu/nvgpu/common/gr/gr_config.c b/drivers/gpu/nvgpu/common/gr/gr_config.c new file mode 100644 index 000000000..04c985111 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/gr_config.c @@ -0,0 +1,864 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "gr_config_priv.h" + +static void gr_config_init_pes_tpc(struct gk20a *g, + struct nvgpu_gr_config *config, + u32 gpc_index) +{ + u32 pes_index; + u32 pes_tpc_mask; + u32 pes_tpc_count; + + for (pes_index = 0; pes_index < config->pe_count_per_gpc; + pes_index++) { + pes_tpc_mask = g->ops.gr.config.get_pes_tpc_mask(g, + config, gpc_index, pes_index); + pes_tpc_count = hweight32(pes_tpc_mask); + + /* detect PES presence by seeing if there are + * TPCs connected to it. + */ + if (pes_tpc_count != 0U) { + config->gpc_ppc_count[gpc_index] = nvgpu_safe_add_u32( + config->gpc_ppc_count[gpc_index], 1U); + } + + config->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count; + config->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask; + } +} + +static void gr_config_init_gpc_skip_mask(struct nvgpu_gr_config *config, + u32 gpc_index) +{ + u32 pes_heavy_index; + u32 gpc_new_skip_mask = 0U; + u32 pes_tpc_cnt = 0U, pes_tpc_mask = 0U; + + if (config->pe_count_per_gpc <= 1U) { + goto skip_mask_end; + } + + pes_tpc_cnt = nvgpu_safe_add_u32( + config->pes_tpc_count[0][gpc_index], + config->pes_tpc_count[1][gpc_index]); + + pes_heavy_index = + (config->pes_tpc_count[0][gpc_index] > + config->pes_tpc_count[1][gpc_index]) ? 0U : 1U; + + if ((pes_tpc_cnt == 5U) || ((pes_tpc_cnt == 4U) && + (config->pes_tpc_count[0][gpc_index] != + config->pes_tpc_count[1][gpc_index]))) { + pes_tpc_mask = nvgpu_safe_sub_u32( + config->pes_tpc_mask[pes_heavy_index][gpc_index], 1U); + gpc_new_skip_mask = + config->pes_tpc_mask[pes_heavy_index][gpc_index] ^ + (config->pes_tpc_mask[pes_heavy_index][gpc_index] & + pes_tpc_mask); + } + +skip_mask_end: + config->gpc_skip_mask[gpc_index] = gpc_new_skip_mask; +} + +static void gr_config_log_info(struct gk20a *g, + struct nvgpu_gr_config *config) +{ + u32 gpc_index, pes_index; + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_gpc_count: %d", config->max_gpc_count); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_count: %d", config->gpc_count); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_mask: 0x%x", config->gpc_mask); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_tpc_per_gpc_count: %d", config->max_tpc_per_gpc_count); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_tpc_count: %d", config->max_tpc_count); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "tpc_count: %d", config->tpc_count); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "sm_count_per_tpc: %d", config->sm_count_per_tpc); +#ifdef CONFIG_NVGPU_GRAPHICS + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_zcull_per_gpc_count: %d", config->max_zcull_per_gpc_count); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "zcb_count: %d", config->zcb_count); +#endif + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "pe_count_per_gpc: %d", config->pe_count_per_gpc); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "ppc_count: %d", config->ppc_count); + + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_tpc_count[%d] : %d", + gpc_index, config->gpc_tpc_count[gpc_index]); + } + for (gpc_index = 0; gpc_index < config->max_gpc_count; gpc_index++) { + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_tpc_mask[%d] : 0x%x", + gpc_index, config->gpc_tpc_mask[gpc_index]); + } +#ifdef CONFIG_NVGPU_GRAPHICS + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_zcb_count[%d] : %d", + gpc_index, config->gpc_zcb_count != NULL ? + config->gpc_zcb_count[gpc_index] : 0U); + } +#endif + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_ppc_count[%d] : %d", + gpc_index, config->gpc_ppc_count[gpc_index]); + } + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_skip_mask[%d] : 0x%x", + gpc_index, config->gpc_skip_mask[gpc_index]); + } + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + for (pes_index = 0; + pes_index < config->pe_count_per_gpc; + pes_index++) { + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "pes_tpc_count[%d][%d] : %d", + pes_index, gpc_index, + config->pes_tpc_count[pes_index][gpc_index]); + } + } + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + for (pes_index = 0; + pes_index < config->pe_count_per_gpc; + pes_index++) { + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "pes_tpc_mask[%d][%d] : 0x%x", + pes_index, gpc_index, + config->pes_tpc_mask[pes_index][gpc_index]); + } + } +} + +static void gr_config_set_gpc_mask(struct gk20a *g, + struct nvgpu_gr_config *config) +{ +#ifdef CONFIG_NVGPU_DGPU + if (g->ops.gr.config.get_gpc_mask != NULL) { + config->gpc_mask = g->ops.gr.config.get_gpc_mask(g); + } else +#endif + { + config->gpc_mask = nvgpu_safe_sub_u32(BIT32(config->gpc_count), + 1U); + } +} + +static bool gr_config_alloc_valid(struct nvgpu_gr_config *config) +{ + if ((config->gpc_tpc_count == NULL) || (config->gpc_tpc_mask == NULL) || + (config->gpc_ppc_count == NULL) || + (config->gpc_skip_mask == NULL)) { + return false; + } + +#ifdef CONFIG_NVGPU_GRAPHICS + if (!nvgpu_is_enabled(config->g, NVGPU_SUPPORT_MIG) && + (config->gpc_zcb_count == NULL)) { + return false; + } +#endif + + return true; +} + +static void gr_config_free_mem(struct gk20a *g, + struct nvgpu_gr_config *config) +{ + u32 pes_index; + + for (pes_index = 0U; pes_index < config->pe_count_per_gpc; pes_index++) { + nvgpu_kfree(g, config->pes_tpc_count[pes_index]); + nvgpu_kfree(g, config->pes_tpc_mask[pes_index]); + } + + nvgpu_kfree(g, config->gpc_skip_mask); + nvgpu_kfree(g, config->gpc_ppc_count); +#ifdef CONFIG_NVGPU_GRAPHICS + nvgpu_kfree(g, config->gpc_zcb_count); +#endif + nvgpu_kfree(g, config->gpc_tpc_mask); + nvgpu_kfree(g, config->gpc_tpc_count); +} + +static bool gr_config_alloc_struct_mem(struct gk20a *g, + struct nvgpu_gr_config *config) +{ + u32 pes_index; + u32 total_tpc_cnt; + size_t sm_info_size; + size_t gpc_size, sm_size, max_gpc_cnt; + size_t pd_tbl_size; + + total_tpc_cnt = nvgpu_safe_mult_u32(config->gpc_count, + config->max_tpc_per_gpc_count); + sm_size = nvgpu_safe_mult_u64((size_t)config->sm_count_per_tpc, + sizeof(struct nvgpu_sm_info)); + /* allocate for max tpc per gpc */ + sm_info_size = nvgpu_safe_mult_u64((size_t)total_tpc_cnt, sm_size); + + config->sm_to_cluster = nvgpu_kzalloc(g, sm_info_size); + if (config->sm_to_cluster == NULL) { + nvgpu_err(g, "sm_to_cluster == NULL"); + goto alloc_err; + } + +#ifdef CONFIG_NVGPU_SM_DIVERSITY + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SM_DIVERSITY)) { + config->sm_to_cluster_redex_config = + nvgpu_kzalloc(g, sm_info_size); + if (config->sm_to_cluster_redex_config == NULL) { + nvgpu_err(g, "sm_to_cluster_redex_config == NULL"); + goto clean_alloc_mem; + } + } +#endif + config->no_of_sm = 0; + + gpc_size = nvgpu_safe_mult_u64((size_t)config->gpc_count, sizeof(u32)); + max_gpc_cnt = nvgpu_safe_mult_u64((size_t)config->max_gpc_count, sizeof(u32)); + config->gpc_tpc_count = nvgpu_kzalloc(g, gpc_size); + config->gpc_tpc_mask = nvgpu_kzalloc(g, max_gpc_cnt); +#ifdef CONFIG_NVGPU_GRAPHICS + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + config->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, + GPU_LIT_NUM_ZCULL_BANKS); + + config->gpc_zcb_count = nvgpu_kzalloc(g, gpc_size); + } +#endif + config->gpc_ppc_count = nvgpu_kzalloc(g, gpc_size); + + pd_tbl_size = nvgpu_safe_mult_u64( + (size_t)g->ops.gr.config.get_pd_dist_skip_table_size(), + sizeof(u32)); + pd_tbl_size = nvgpu_safe_mult_u64(pd_tbl_size, 4UL); + config->gpc_skip_mask = nvgpu_kzalloc(g, pd_tbl_size); + + if (gr_config_alloc_valid(config) == false) { + goto clean_alloc_mem; + } + + for (pes_index = 0U; pes_index < config->pe_count_per_gpc; pes_index++) { + config->pes_tpc_count[pes_index] = nvgpu_kzalloc(g, gpc_size); + config->pes_tpc_mask[pes_index] = nvgpu_kzalloc(g, gpc_size); + if ((config->pes_tpc_count[pes_index] == NULL) || + (config->pes_tpc_mask[pes_index] == NULL)) { + goto clean_alloc_mem; + } + } + + return true; + +clean_alloc_mem: + nvgpu_kfree(g, config->sm_to_cluster); + config->sm_to_cluster = NULL; +#ifdef CONFIG_NVGPU_SM_DIVERSITY + if (config->sm_to_cluster_redex_config != NULL) { + nvgpu_kfree(g, config->sm_to_cluster_redex_config); + config->sm_to_cluster_redex_config = NULL; + } +#endif + gr_config_free_mem(g, config); + +alloc_err: + return false; +} + +static int gr_config_init_mig_gpcs(struct nvgpu_gr_config *config) +{ + struct gk20a *g = config->g; + u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g); + + config->max_gpc_count = nvgpu_grmgr_get_max_gpc_count(g); + config->gpc_count = nvgpu_grmgr_get_gr_num_gpcs(g, cur_gr_instance); + if (config->gpc_count == 0U) { + nvgpu_err(g, "gpc_count==0!"); + return -EINVAL; + } + + config->gpc_mask = nvgpu_grmgr_get_gr_logical_gpc_mask( + g, cur_gr_instance); + + return 0; +} + +static int gr_config_init_gpcs(struct nvgpu_gr_config *config) +{ + struct gk20a *g = config->g; + + config->max_gpc_count = g->ops.top.get_max_gpc_count(g); + config->gpc_count = g->ops.priv_ring.get_gpc_count(g); + if (config->gpc_count == 0U) { + nvgpu_err(g, "gpc_count==0!"); + return -EINVAL; + } + + gr_config_set_gpc_mask(g, config); + + return 0; +} + +struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g) +{ + struct nvgpu_gr_config *config; + u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g); + u32 gpc_index; + u32 gpc_phys_id; + int err; + + config = nvgpu_kzalloc(g, sizeof(*config)); + if (config == NULL) { + return NULL; + } + + config->g = g; + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + err = gr_config_init_mig_gpcs(config); + if (err < 0) { + nvgpu_err(g, "MIG GPC config init failed"); + nvgpu_kfree(g, config); + return NULL; + } + } else { + err = gr_config_init_gpcs(config); + if (err < 0) { + nvgpu_err(g, "GPC config init failed"); + nvgpu_kfree(g, config); + return NULL; + } + } + + /* Required to read gpc_tpc_mask below */ + config->max_tpc_per_gpc_count = g->ops.top.get_max_tpc_per_gpc_count(g); + + config->max_tpc_count = nvgpu_safe_mult_u32(config->max_gpc_count, + config->max_tpc_per_gpc_count); + + config->pe_count_per_gpc = nvgpu_get_litter_value(g, + GPU_LIT_NUM_PES_PER_GPC); + if (config->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC) { + nvgpu_err(g, "too many pes per gpc"); + goto clean_up_init; + } + + config->sm_count_per_tpc = + nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); + if (config->sm_count_per_tpc == 0U) { + nvgpu_err(g, "sm_count_per_tpc==0!"); + goto clean_up_init; + } + + if (gr_config_alloc_struct_mem(g, config) == false) { + goto clean_up_init; + } + + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + /* + * Fuse registers must be queried with physical gpc-id and not + * the logical ones. For tu104 and before chips logical gpc-id + * is same as physical gpc-id for non-floorswept config but for + * chips after tu104 it may not be true. + */ + gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g, + cur_gr_instance, gpc_index); + config->gpc_tpc_mask[gpc_index] = + g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id); + } + + config->ppc_count = 0; + config->tpc_count = 0; +#ifdef CONFIG_NVGPU_GRAPHICS + config->zcb_count = 0; +#endif + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + config->gpc_tpc_count[gpc_index] = + g->ops.gr.config.get_tpc_count_in_gpc(g, config, + gpc_index); + config->tpc_count = nvgpu_safe_add_u32(config->tpc_count, + config->gpc_tpc_count[gpc_index]); + +#ifdef CONFIG_NVGPU_GRAPHICS + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + config->gpc_zcb_count[gpc_index] = + g->ops.gr.config.get_zcull_count_in_gpc(g, config, + gpc_index); + config->zcb_count = nvgpu_safe_add_u32(config->zcb_count, + config->gpc_zcb_count[gpc_index]); + } +#endif + + gr_config_init_pes_tpc(g, config, gpc_index); + + config->ppc_count = nvgpu_safe_add_u32(config->ppc_count, + config->gpc_ppc_count[gpc_index]); + + gr_config_init_gpc_skip_mask(config, gpc_index); + } + + gr_config_log_info(g, config); + return config; + +clean_up_init: + nvgpu_kfree(g, config); + return NULL; +} + +#ifdef CONFIG_NVGPU_GRAPHICS +static u32 prime_set[18] = { + 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 }; + +/* + * Return map tiles count for given index + * Return 0 if index is out-of-bounds + */ +u32 nvgpu_gr_config_get_map_tile_count(struct nvgpu_gr_config *config, u32 index) +{ + if (index >= config->map_tile_count) { + return 0; + } + + return config->map_tiles[index]; +} + +u8 *nvgpu_gr_config_get_map_tiles(struct nvgpu_gr_config *config) +{ + return config->map_tiles; +} + +u32 nvgpu_gr_config_get_map_row_offset(struct nvgpu_gr_config *config) +{ + return config->map_row_offset; +} + +int nvgpu_gr_config_init_map_tiles(struct gk20a *g, + struct nvgpu_gr_config *config) +{ + s32 comm_denom; + s32 mul_factor; + s32 *init_frac = NULL; + s32 *init_err = NULL; + s32 *run_err = NULL; + u32 *sorted_num_tpcs = NULL; + u32 *sorted_to_unsorted_gpc_map = NULL; + u32 gpc_index; + u32 gpc_mark = 0; + u32 num_tpc; + u32 max_tpc_count = 0; + u32 swap; + u32 tile_count; + u32 index; + bool delete_map = false; + bool gpc_sorted; + int ret = 0; + u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); + u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); + u32 map_tile_count = num_gpcs * num_tpc_per_gpc; + + nvgpu_log(g, gpu_dbg_gr, " "); + + init_frac = nvgpu_kzalloc(g, num_gpcs * sizeof(s32)); + init_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32)); + run_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32)); + sorted_num_tpcs = + nvgpu_kzalloc(g, (size_t)num_gpcs * + (size_t)num_tpc_per_gpc * + sizeof(s32)); + sorted_to_unsorted_gpc_map = + nvgpu_kzalloc(g, (size_t)num_gpcs * sizeof(s32)); + + if (!((init_frac != NULL) && + (init_err != NULL) && + (run_err != NULL) && + (sorted_num_tpcs != NULL) && + (sorted_to_unsorted_gpc_map != NULL))) { + ret = -ENOMEM; + goto clean_up; + } + + config->map_row_offset = 0xFFFFFFFFU; + + if (config->tpc_count == 3U) { + config->map_row_offset = 2; + } else if (config->tpc_count < 3U) { + config->map_row_offset = 1; + } else { + config->map_row_offset = 3; + + for (index = 1U; index < 18U; index++) { + u32 prime = prime_set[index]; + if ((config->tpc_count % prime) != 0U) { + config->map_row_offset = prime; + break; + } + } + } + + switch (config->tpc_count) { + case 15: + config->map_row_offset = 6; + break; + case 14: + config->map_row_offset = 5; + break; + case 13: + config->map_row_offset = 2; + break; + case 11: + config->map_row_offset = 7; + break; + case 10: + config->map_row_offset = 6; + break; + case 7: + case 5: + config->map_row_offset = 1; + break; + default: + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "unsupported tpc count = %u", + config->tpc_count); + break; + } + + if (config->map_tiles != NULL) { + if (config->map_tile_count != config->tpc_count) { + delete_map = true; + } + + for (tile_count = 0; tile_count < config->map_tile_count; tile_count++) { + if (nvgpu_gr_config_get_map_tile_count(config, tile_count) + >= config->tpc_count) { + delete_map = true; + } + } + + if (delete_map) { + nvgpu_kfree(g, config->map_tiles); + config->map_tiles = NULL; + config->map_tile_count = 0; + } + } + + if (config->map_tiles == NULL) { + config->map_tiles = nvgpu_kzalloc(g, map_tile_count * sizeof(u8)); + if (config->map_tiles == NULL) { + ret = -ENOMEM; + goto clean_up; + } + config->map_tile_count = map_tile_count; + + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + sorted_num_tpcs[gpc_index] = config->gpc_tpc_count[gpc_index]; + sorted_to_unsorted_gpc_map[gpc_index] = gpc_index; + } + + gpc_sorted = false; + while (!gpc_sorted) { + gpc_sorted = true; + for (gpc_index = 0U; gpc_index < config->gpc_count - 1U; gpc_index++) { + if (sorted_num_tpcs[gpc_index + 1U] > sorted_num_tpcs[gpc_index]) { + gpc_sorted = false; + swap = sorted_num_tpcs[gpc_index]; + sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1U]; + sorted_num_tpcs[gpc_index + 1U] = swap; + swap = sorted_to_unsorted_gpc_map[gpc_index]; + sorted_to_unsorted_gpc_map[gpc_index] = + sorted_to_unsorted_gpc_map[gpc_index + 1U]; + sorted_to_unsorted_gpc_map[gpc_index + 1U] = swap; + } + } + } + + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + if (config->gpc_tpc_count[gpc_index] > max_tpc_count) { + max_tpc_count = config->gpc_tpc_count[gpc_index]; + } + } + + mul_factor = S32(config->gpc_count) * S32(max_tpc_count); + if ((U32(mul_factor) & 0x1U) != 0U) { + mul_factor = 2; + } else { + mul_factor = 1; + } + + comm_denom = S32(config->gpc_count) * S32(max_tpc_count) * mul_factor; + + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + num_tpc = sorted_num_tpcs[gpc_index]; + + init_frac[gpc_index] = S32(num_tpc) * S32(config->gpc_count) * mul_factor; + + if (num_tpc != 0U) { + init_err[gpc_index] = S32(gpc_index) * S32(max_tpc_count) * mul_factor - comm_denom/2; + } else { + init_err[gpc_index] = 0; + } + + run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index]; + } + + while (gpc_mark < config->tpc_count) { + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + if ((run_err[gpc_index] * 2) >= comm_denom) { + config->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index]; + run_err[gpc_index] += init_frac[gpc_index] - comm_denom; + } else { + run_err[gpc_index] += init_frac[gpc_index]; + } + } + } + } + +clean_up: + nvgpu_kfree(g, init_frac); + nvgpu_kfree(g, init_err); + nvgpu_kfree(g, run_err); + nvgpu_kfree(g, sorted_num_tpcs); + nvgpu_kfree(g, sorted_to_unsorted_gpc_map); + + if (ret != 0) { + nvgpu_err(g, "fail"); + } else { + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + } + + return ret; +} + +u32 nvgpu_gr_config_get_max_zcull_per_gpc_count(struct nvgpu_gr_config *config) +{ + return config->max_zcull_per_gpc_count; +} + +u32 nvgpu_gr_config_get_zcb_count(struct nvgpu_gr_config *config) +{ + return config->zcb_count; +} + +u32 nvgpu_gr_config_get_gpc_zcb_count(struct nvgpu_gr_config *config, + u32 gpc_index) +{ + return config->gpc_zcb_count[gpc_index]; +} +#endif + +void nvgpu_gr_config_deinit(struct gk20a *g, struct nvgpu_gr_config *config) +{ + if (config == NULL) { + return; + } + + gr_config_free_mem(g, config); +#ifdef CONFIG_NVGPU_GRAPHICS + nvgpu_kfree(g, config->map_tiles); +#endif + nvgpu_kfree(g, config->sm_to_cluster); +#ifdef CONFIG_NVGPU_SM_DIVERSITY + if (config->sm_to_cluster_redex_config != NULL) { + nvgpu_kfree(g, config->sm_to_cluster_redex_config); + config->sm_to_cluster_redex_config = NULL; + } +#endif +} + +u32 nvgpu_gr_config_get_max_gpc_count(struct nvgpu_gr_config *config) +{ + return config->max_gpc_count; +} + +u32 nvgpu_gr_config_get_max_tpc_per_gpc_count(struct nvgpu_gr_config *config) +{ + return config->max_tpc_per_gpc_count; +} + +u32 nvgpu_gr_config_get_max_tpc_count(struct nvgpu_gr_config *config) +{ + return config->max_tpc_count; +} + +u32 nvgpu_gr_config_get_gpc_count(struct nvgpu_gr_config *config) +{ + return config->gpc_count; +} + +u32 nvgpu_gr_config_get_tpc_count(struct nvgpu_gr_config *config) +{ + return config->tpc_count; +} + +u32 nvgpu_gr_config_get_ppc_count(struct nvgpu_gr_config *config) +{ + return config->ppc_count; +} + +u32 nvgpu_gr_config_get_pe_count_per_gpc(struct nvgpu_gr_config *config) +{ + return config->pe_count_per_gpc; +} + +u32 nvgpu_gr_config_get_sm_count_per_tpc(struct nvgpu_gr_config *config) +{ + return config->sm_count_per_tpc; +} + +u32 nvgpu_gr_config_get_gpc_ppc_count(struct nvgpu_gr_config *config, + u32 gpc_index) +{ + nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config)); + return config->gpc_ppc_count[gpc_index]; +} + +u32 *nvgpu_gr_config_get_gpc_tpc_count_base(struct nvgpu_gr_config *config) +{ + return config->gpc_tpc_count; +} + +u32 nvgpu_gr_config_get_gpc_tpc_count(struct nvgpu_gr_config *config, + u32 gpc_index) +{ + if (gpc_index >= config->gpc_count) { + return 0; + } + return config->gpc_tpc_count[gpc_index]; +} + +u32 nvgpu_gr_config_get_pes_tpc_count(struct nvgpu_gr_config *config, + u32 gpc_index, u32 pes_index) +{ + nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config)); + nvgpu_assert(pes_index < nvgpu_gr_config_get_pe_count_per_gpc(config)); + return config->pes_tpc_count[pes_index][gpc_index]; +} + +u32 *nvgpu_gr_config_get_gpc_tpc_mask_base(struct nvgpu_gr_config *config) +{ + return config->gpc_tpc_mask; +} + +u32 nvgpu_gr_config_get_gpc_tpc_mask(struct nvgpu_gr_config *config, + u32 gpc_index) +{ + nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config)); + return config->gpc_tpc_mask[gpc_index]; +} + +void nvgpu_gr_config_set_gpc_tpc_mask(struct nvgpu_gr_config *config, + u32 gpc_index, u32 val) +{ + nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config)); + config->gpc_tpc_mask[gpc_index] = val; +} + +u32 nvgpu_gr_config_get_gpc_skip_mask(struct nvgpu_gr_config *config, + u32 gpc_index) +{ + if (gpc_index >= config->gpc_count) { + return 0; + } + return config->gpc_skip_mask[gpc_index]; +} + +u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config, + u32 gpc_index, u32 pes_index) +{ + nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config)); + nvgpu_assert(pes_index < nvgpu_gr_config_get_pe_count_per_gpc(config)); + return config->pes_tpc_mask[pes_index][gpc_index]; +} + +u32 nvgpu_gr_config_get_gpc_mask(struct nvgpu_gr_config *config) +{ + return config->gpc_mask; +} + +u32 nvgpu_gr_config_get_no_of_sm(struct nvgpu_gr_config *config) +{ + return config->no_of_sm; +} + +void nvgpu_gr_config_set_no_of_sm(struct nvgpu_gr_config *config, u32 no_of_sm) +{ + config->no_of_sm = no_of_sm; +} + +struct nvgpu_sm_info *nvgpu_gr_config_get_sm_info(struct nvgpu_gr_config *config, + u32 sm_id) +{ + return &config->sm_to_cluster[sm_id]; +} + +#ifdef CONFIG_NVGPU_SM_DIVERSITY +struct nvgpu_sm_info *nvgpu_gr_config_get_redex_sm_info( + struct nvgpu_gr_config *config, u32 sm_id) +{ + return &config->sm_to_cluster_redex_config[sm_id]; +} +#endif + +u32 nvgpu_gr_config_get_sm_info_gpc_index(struct nvgpu_sm_info *sm_info) +{ + return sm_info->gpc_index; +} + +void nvgpu_gr_config_set_sm_info_gpc_index(struct nvgpu_sm_info *sm_info, + u32 gpc_index) +{ + sm_info->gpc_index = gpc_index; +} + +u32 nvgpu_gr_config_get_sm_info_tpc_index(struct nvgpu_sm_info *sm_info) +{ + return sm_info->tpc_index; +} + +void nvgpu_gr_config_set_sm_info_tpc_index(struct nvgpu_sm_info *sm_info, + u32 tpc_index) +{ + sm_info->tpc_index = tpc_index; +} + +u32 nvgpu_gr_config_get_sm_info_global_tpc_index(struct nvgpu_sm_info *sm_info) +{ + return sm_info->global_tpc_index; +} + +void nvgpu_gr_config_set_sm_info_global_tpc_index(struct nvgpu_sm_info *sm_info, + u32 global_tpc_index) +{ + sm_info->global_tpc_index = global_tpc_index; +} + +u32 nvgpu_gr_config_get_sm_info_sm_index(struct nvgpu_sm_info *sm_info) +{ + return sm_info->sm_index; +} + +void nvgpu_gr_config_set_sm_info_sm_index(struct nvgpu_sm_info *sm_info, + u32 sm_index) +{ + sm_info->sm_index = sm_index; +} diff --git a/drivers/gpu/nvgpu/common/gr/gr_config_priv.h b/drivers/gpu/nvgpu/common/gr/gr_config_priv.h new file mode 100644 index 000000000..df7415041 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/gr_config_priv.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_CONFIG_PRIV_H +#define NVGPU_GR_CONFIG_PRIV_H + +#include + +/** + * Max possible PES count per GPC. + */ +#define GK20A_GR_MAX_PES_PER_GPC 3U + +struct gk20a; + +/** + * Detailed information of SM indexes in GR engine. + */ +struct nvgpu_sm_info { + /** + * Index of GPC for SM. + */ + u32 gpc_index; + + /** + * Index of TPC for SM. + */ + u32 tpc_index; + + /** + * Index of SM within TPC. + */ + u32 sm_index; + + /** + * Global TPC index for SM. + */ + u32 global_tpc_index; +}; + +/** + * GR engine configuration data. + * + * This data is populated during GR initialization and referred across + * GPU driver through public APIs. + */ +struct nvgpu_gr_config { + /** + * Pointer to GPU driver struct. + */ + struct gk20a *g; + + /** + * Max possible number of GPCs in GR engine. + */ + u32 max_gpc_count; + /** + * Max possible number of TPCs per GPC in GR engine. + */ + u32 max_tpc_per_gpc_count; + /** + * Max possible number of TPCs in GR engine. + */ + u32 max_tpc_count; + + /** + * Number of GPCs in GR engine. + */ + u32 gpc_count; + /** + * Number of TPCs in GR engine. + */ + u32 tpc_count; + /** + * Number of PPCs in GR engine. + */ + u32 ppc_count; + + /** + * Number of PES per GPC in GR engine. + */ + u32 pe_count_per_gpc; + /** + * Number of SMs per TPC in GR engine. + */ + u32 sm_count_per_tpc; + + /** + * Array to hold number of PPC units per GPC. + * Array is indexed by GPC index. + */ + u32 *gpc_ppc_count; + /** + * Array to hold number of TPCs per GPC. + * Array is indexed by GPC index. + */ + u32 *gpc_tpc_count; + /** + * 2-D array to hold number of TPCs attached to a PES unit + * in a GPC. + */ + u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC]; + + /** + * Mask of GPCs. A set bit indicates GPC is available, otherwise + * it is not available. + */ + u32 gpc_mask; + + /** + * Array to hold mask of TPCs per GPC. + * Array is indexed by GPC index. + */ + u32 *gpc_tpc_mask; + /** + * 2-D array to hold mask of TPCs attached to a PES unit + * in a GPC. + */ + u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC]; + /** + * Array to hold skip mask of TPCs per GPC. + * Array is indexed by GPC index. + */ + u32 *gpc_skip_mask; + + /** + * Number of SMs in GR engine. + */ + u32 no_of_sm; + /** + * Pointer to SM information struct. + */ + struct nvgpu_sm_info *sm_to_cluster; +#ifdef CONFIG_NVGPU_SM_DIVERSITY + /** + * Pointer to redundant execution config SM information struct. + * It is valid only if NVGPU_SUPPORT_SM_DIVERSITY support is true. + */ + struct nvgpu_sm_info *sm_to_cluster_redex_config; +#endif + +#ifdef CONFIG_NVGPU_GRAPHICS + u32 max_zcull_per_gpc_count; + u32 zcb_count; + u32 *gpc_zcb_count; + + u8 *map_tiles; + u32 map_tile_count; + u32 map_row_offset; +#endif +}; + +#endif /* NVGPU_GR_CONFIG_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/gr/gr_ecc.c b/drivers/gpu/nvgpu/common/gr/gr_ecc.c new file mode 100644 index 000000000..827131e3f --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/gr_ecc.c @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +int nvgpu_ecc_counter_init_per_gr(struct gk20a *g, + struct nvgpu_ecc_stat **stat, const char *name) +{ + struct nvgpu_ecc_stat *stats; + u32 i; + char gr_str[10] = {0}; + + stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats), + g->num_gr_instances)); + if (stats == NULL) { + return -ENOMEM; + } + + for (i = 0; i < g->num_gr_instances; i++) { + /** + * Store stats name as below: + * gr_ + */ + (void)strcpy(stats[i].name, "gr"); + (void)nvgpu_strnadd_u32(gr_str, i, sizeof(gr_str), 10U); + (void)strncat(stats[i].name, gr_str, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[i].name)); + (void)strncat(stats[i].name, "_", + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[i].name)); + (void)strncat(stats[i].name, name, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[i].name)); + + nvgpu_ecc_stat_add(g, &stats[i]); + } + + *stat = stats; + return 0; +} + +int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g, + struct nvgpu_ecc_stat ***stat, const char *name) +{ + struct nvgpu_ecc_stat **stats; + struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g); + u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr_config); + u32 gpc, tpc; + char gpc_str[10] = {0}, tpc_str[10] = {0}; + int err = 0; + + stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats), + gpc_count)); + if (stats == NULL) { + return -ENOMEM; + } + + for (gpc = 0; gpc < gpc_count; gpc++) { + stats[gpc] = nvgpu_kzalloc(g, + nvgpu_safe_mult_u64(sizeof(*stats[gpc]), + nvgpu_gr_config_get_gpc_tpc_count(gr_config, + gpc))); + if (stats[gpc] == NULL) { + err = -ENOMEM; + goto fail; + } + } + + for (gpc = 0; gpc < gpc_count; gpc++) { + for (tpc = 0; + tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc); + tpc++) { + /** + * Store stats name as below: + * gpc_tpc_ + */ + (void)strcpy(stats[gpc][tpc].name, "gpc"); + (void)nvgpu_strnadd_u32(gpc_str, gpc, + sizeof(gpc_str), 10U); + (void)strncat(stats[gpc][tpc].name, gpc_str, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[gpc][tpc].name)); + (void)strncat(stats[gpc][tpc].name, "_tpc", + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[gpc][tpc].name)); + (void)nvgpu_strnadd_u32(tpc_str, tpc, + sizeof(tpc_str), 10U); + (void)strncat(stats[gpc][tpc].name, tpc_str, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[gpc][tpc].name)); + (void)strncat(stats[gpc][tpc].name, "_", + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[gpc][tpc].name)); + (void)strncat(stats[gpc][tpc].name, name, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[gpc][tpc].name)); + + nvgpu_ecc_stat_add(g, &stats[gpc][tpc]); + } + } + + *stat = stats; + +fail: + if (err != 0) { + while (gpc-- != 0u) { + nvgpu_kfree(g, stats[gpc]); + } + + nvgpu_kfree(g, stats); + } + + return err; +} + +int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g, + struct nvgpu_ecc_stat **stat, const char *name) +{ + struct nvgpu_ecc_stat *stats; + struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g); + u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr_config); + u32 gpc; + char gpc_str[10] = {0}; + + stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats), + gpc_count)); + if (stats == NULL) { + return -ENOMEM; + } + + for (gpc = 0; gpc < gpc_count; gpc++) { + /** + * Store stats name as below: + * gpc_ + */ + (void)strcpy(stats[gpc].name, "gpc"); + (void)nvgpu_strnadd_u32(gpc_str, gpc, sizeof(gpc_str), 10U); + (void)strncat(stats[gpc].name, gpc_str, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[gpc].name)); + (void)strncat(stats[gpc].name, "_", + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[gpc].name)); + (void)strncat(stats[gpc].name, name, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[gpc].name)); + + nvgpu_ecc_stat_add(g, &stats[gpc]); + } + + *stat = stats; + return 0; +} + +void nvgpu_ecc_counter_deinit_per_gr(struct gk20a *g, + struct nvgpu_ecc_stat **stats_p) +{ + struct nvgpu_ecc_stat *stats = NULL; + u32 i; + + if (*stats_p != NULL) { + stats = *stats_p; + + for (i = 0; i < g->num_gr_instances; i++) { + nvgpu_ecc_stat_del(g, &stats[i]); + } + + nvgpu_kfree(g, stats); + *stats_p = NULL; + } +} + +void nvgpu_ecc_counter_deinit_per_tpc(struct gk20a *g, + struct nvgpu_ecc_stat ***stats_p) +{ + struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g); + struct nvgpu_ecc_stat **stats = NULL; + u32 gpc_count; + u32 gpc, tpc; + + if (*stats_p != NULL) { + gpc_count = nvgpu_gr_config_get_gpc_count(gr_config); + stats = *stats_p; + + for (gpc = 0; gpc < gpc_count; gpc++) { + if (stats[gpc] == NULL) { + continue; + } + + for (tpc = 0; + tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc); + tpc++) { + nvgpu_ecc_stat_del(g, &stats[gpc][tpc]); + } + + nvgpu_kfree(g, stats[gpc]); + stats[gpc] = NULL; + } + + nvgpu_kfree(g, stats); + *stats_p = NULL; + } +} + +void nvgpu_ecc_counter_deinit_per_gpc(struct gk20a *g, + struct nvgpu_ecc_stat **stats_p) +{ + struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g); + struct nvgpu_ecc_stat *stats = NULL; + u32 gpc_count; + u32 gpc; + + if (*stats_p != NULL) { + gpc_count = nvgpu_gr_config_get_gpc_count(gr_config); + stats = *stats_p; + + for (gpc = 0; gpc < gpc_count; gpc++) { + nvgpu_ecc_stat_del(g, &stats[gpc]); + } + + nvgpu_kfree(g, stats); + *stats_p = NULL; + } +} + +void nvgpu_gr_ecc_free(struct gk20a *g) +{ + struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g); + + nvgpu_log(g, gpu_dbg_gr, " "); + + if (gr_config == NULL) { + return; + } + + if (g->ops.gr.ecc.fecs_ecc_deinit != NULL) { + g->ops.gr.ecc.fecs_ecc_deinit(g); + } + + if (g->ops.gr.ecc.gpc_tpc_ecc_deinit != NULL) { + g->ops.gr.ecc.gpc_tpc_ecc_deinit(g); + } +} diff --git a/drivers/gpu/nvgpu/common/gr/gr_falcon.c b/drivers/gpu/nvgpu/common/gr/gr_falcon.c new file mode 100644 index 000000000..c40a90d00 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/gr_falcon.c @@ -0,0 +1,755 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_NVGPU_LS_PMU +#include +#include +#endif +#ifdef CONFIG_NVGPU_DGPU +#include +#endif +#include +#include + +#include "gr_falcon_priv.h" + +#define NVGPU_FECS_UCODE_IMAGE "fecs.bin" +#define NVGPU_GPCCS_UCODE_IMAGE "gpccs.bin" + +struct nvgpu_gr_falcon *nvgpu_gr_falcon_init_support(struct gk20a *g) +{ + struct nvgpu_gr_falcon *falcon; + + nvgpu_log_fn(g, " "); + + falcon = nvgpu_kzalloc(g, sizeof(*falcon)); + if (falcon == NULL) { + return falcon; + } + + nvgpu_mutex_init(&falcon->fecs_mutex); + falcon->coldboot_bootstrap_done = false; + + return falcon; +} + +void nvgpu_gr_falcon_remove_support(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) +{ + nvgpu_log_fn(g, " "); + + if (falcon == NULL) { + return; + } + nvgpu_kfree(g, falcon); +} + +#ifdef CONFIG_NVGPU_POWER_PG +int nvgpu_gr_falcon_bind_fecs_elpg(struct gk20a *g) +{ +#ifdef CONFIG_NVGPU_LS_PMU + struct nvgpu_pmu *pmu = g->pmu; + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = mm->pmu.vm; + int err = 0; + u32 size; + u32 data; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + size = 0; + + err = g->ops.gr.falcon.ctrl_ctxsw(g, + NVGPU_GR_FALCON_METHOD_REGLIST_DISCOVER_IMAGE_SIZE, 0U, &size); + if (err != 0) { + nvgpu_err(g, + "fail to query fecs pg buffer size"); + return err; + } + + nvgpu_log(g, gpu_dbg_gr, "FECS PG buffer size = %u", size); + + if (nvgpu_pmu_pg_buf_get_cpu_va(g, pmu) == NULL) { + err = nvgpu_dma_alloc_map_sys(vm, size, nvgpu_pmu_pg_buf(g, pmu)); + if (err != 0) { + nvgpu_err(g, "failed to allocate memory"); + return -ENOMEM; + } + } + + data = g->ops.gr.falcon.get_fecs_current_ctx_data(g, + &mm->pmu.inst_block); + err = g->ops.gr.falcon.ctrl_ctxsw(g, + NVGPU_GR_FALCON_METHOD_REGLIST_BIND_INSTANCE, data, NULL); + if (err != 0) { + nvgpu_err(g, + "fail to bind pmu inst to gr"); + return err; + } + + data = u64_lo32(nvgpu_pmu_pg_buf_get_gpu_va(g, pmu) >> 8); + err = g->ops.gr.falcon.ctrl_ctxsw(g, + NVGPU_GR_FALCON_METHOD_REGLIST_SET_VIRTUAL_ADDRESS, data, NULL); + if (err != 0) { + nvgpu_err(g, + "fail to set pg buffer pmu va"); + return err; + } + + nvgpu_log(g, gpu_dbg_gr, "done"); + return err; +#else + return 0; +#endif +} +#endif + +int nvgpu_gr_falcon_init_ctxsw(struct gk20a *g, struct nvgpu_gr_falcon *falcon) +{ + int err = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + err = g->ops.gr.falcon.load_ctxsw_ucode(g, falcon); + if (err != 0) { + goto out; + } + + err = g->ops.gr.falcon.wait_ctxsw_ready(g); + +out: + if (err != 0) { + nvgpu_err(g, "fail"); + } else { + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + } + + return err; +} + +int nvgpu_gr_falcon_init_ctx_state(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) +{ + struct nvgpu_gr_falcon_query_sizes *sizes = &falcon->sizes; + int err = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + /* fecs init ramchain */ + err = g->ops.gr.falcon.init_ctx_state(g, sizes); + if (err != 0) { + goto out; + } + +out: + if (err != 0) { + nvgpu_err(g, "fail"); + } else { + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + } + + return err; +} + +u32 nvgpu_gr_falcon_get_golden_image_size(struct nvgpu_gr_falcon *falcon) +{ + return falcon->sizes.golden_image_size; +} + +#ifdef CONFIG_NVGPU_DEBUGGER +u32 nvgpu_gr_falcon_get_pm_ctxsw_image_size(struct nvgpu_gr_falcon *falcon) +{ + return falcon->sizes.pm_ctxsw_image_size; +} +#endif + +#ifdef CONFIG_NVGPU_GFXP +u32 nvgpu_gr_falcon_get_preempt_image_size(struct nvgpu_gr_falcon *falcon) +{ + return falcon->sizes.preempt_image_size; +} +#endif /* CONFIG_NVGPU_GFXP */ + +#ifdef CONFIG_NVGPU_GRAPHICS +u32 nvgpu_gr_falcon_get_zcull_image_size(struct nvgpu_gr_falcon *falcon) +{ + return falcon->sizes.zcull_image_size; +} +#endif /* CONFIG_NVGPU_GRAPHICS */ + +static int nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) +{ + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = mm->pmu.vm; + struct nvgpu_ctxsw_ucode_info *ucode_info = &falcon->ctxsw_ucode_info; + int err; + + err = nvgpu_alloc_inst_block(g, &ucode_info->inst_blk_desc); + if (err != 0) { + return err; + } + + g->ops.mm.init_inst_block(&ucode_info->inst_blk_desc, vm, 0); + + /* Map ucode surface to GMMU */ + ucode_info->surface_desc.gpu_va = nvgpu_gmmu_map(vm, + &ucode_info->surface_desc, + ucode_info->surface_desc.size, + 0, /* flags */ + gk20a_mem_flag_read_only, + false, + ucode_info->surface_desc.aperture); + if (ucode_info->surface_desc.gpu_va == 0ULL) { + nvgpu_err(g, "failed to update gmmu ptes"); + return -ENOMEM; + } + + return 0; +} + +static void nvgpu_gr_falcon_init_ctxsw_ucode_segment( + struct nvgpu_ctxsw_ucode_segment *p_seg, u32 *offset, u32 size) +{ + u32 ucode_offset; + + p_seg->offset = *offset; + p_seg->size = size; + ucode_offset = nvgpu_safe_add_u32(*offset, size); + *offset = NVGPU_ALIGN(ucode_offset, 256U); +} + +static void nvgpu_gr_falcon_init_ctxsw_ucode_segments( + struct nvgpu_ctxsw_ucode_segments *segments, u32 *offset, + struct nvgpu_ctxsw_bootloader_desc *bootdesc, + u32 code_size, u32 data_size) +{ + u32 boot_size = NVGPU_ALIGN(bootdesc->size, sizeof(u32)); + + segments->boot_entry = bootdesc->entry_point; + segments->boot_imem_offset = bootdesc->imem_offset; + nvgpu_gr_falcon_init_ctxsw_ucode_segment(&segments->boot, + offset, boot_size); + nvgpu_gr_falcon_init_ctxsw_ucode_segment(&segments->code, + offset, code_size); + nvgpu_gr_falcon_init_ctxsw_ucode_segment(&segments->data, + offset, data_size); +} + +static void nvgpu_gr_falcon_copy_ctxsw_ucode_segments( + struct gk20a *g, + struct nvgpu_mem *dst, + struct nvgpu_ctxsw_ucode_segments *segments, + u32 *bootimage, + u32 *code, u32 *data) +{ + unsigned int i; + + nvgpu_mem_wr_n(g, dst, segments->boot.offset, bootimage, + segments->boot.size); + nvgpu_mem_wr_n(g, dst, segments->code.offset, code, + segments->code.size); + nvgpu_mem_wr_n(g, dst, segments->data.offset, data, + segments->data.size); + + /* compute a "checksum" for the boot binary to detect its version */ + segments->boot_signature = 0; + for (i = 0; i < (segments->boot.size / sizeof(u32)); i++) { + segments->boot_signature = nvgpu_gr_checksum_u32( + segments->boot_signature, bootimage[i]); + } +} + +int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) +{ + struct nvgpu_ctxsw_bootloader_desc *fecs_boot_desc; + struct nvgpu_ctxsw_bootloader_desc *gpccs_boot_desc; + struct nvgpu_firmware *fecs_fw; + struct nvgpu_firmware *gpccs_fw; + u32 *fecs_boot_image; + u32 *gpccs_boot_image; + struct nvgpu_ctxsw_ucode_info *ucode_info = &falcon->ctxsw_ucode_info; + u32 ucode_size; + int err = 0; + + nvgpu_log(g, gpu_dbg_gr, "Requst and copy FECS/GPCCS firmwares"); + + fecs_fw = nvgpu_request_firmware(g, NVGPU_FECS_UCODE_IMAGE, 0); + if (fecs_fw == NULL) { + nvgpu_err(g, "failed to load fecs ucode!!"); + return -ENOENT; + } + + fecs_boot_desc = (void *)fecs_fw->data; + fecs_boot_image = (void *)(fecs_fw->data + + sizeof(struct nvgpu_ctxsw_bootloader_desc)); + + gpccs_fw = nvgpu_request_firmware(g, NVGPU_GPCCS_UCODE_IMAGE, 0); + if (gpccs_fw == NULL) { + nvgpu_release_firmware(g, fecs_fw); + nvgpu_err(g, "failed to load gpccs ucode!!"); + return -ENOENT; + } + + gpccs_boot_desc = (void *)gpccs_fw->data; + gpccs_boot_image = (void *)(gpccs_fw->data + + sizeof(struct nvgpu_ctxsw_bootloader_desc)); + + ucode_size = 0; + nvgpu_gr_falcon_init_ctxsw_ucode_segments(&ucode_info->fecs, + &ucode_size, fecs_boot_desc, + nvgpu_safe_mult_u32( + nvgpu_netlist_get_fecs_inst_count(g), (u32)sizeof(u32)), + nvgpu_safe_mult_u32( + nvgpu_netlist_get_fecs_data_count(g), (u32)sizeof(u32))); + nvgpu_gr_falcon_init_ctxsw_ucode_segments(&ucode_info->gpccs, + &ucode_size, gpccs_boot_desc, + nvgpu_safe_mult_u32( + nvgpu_netlist_get_gpccs_inst_count(g), (u32)sizeof(u32)), + nvgpu_safe_mult_u32( + nvgpu_netlist_get_gpccs_data_count(g), (u32)sizeof(u32))); + + err = nvgpu_dma_alloc_sys(g, ucode_size, &ucode_info->surface_desc); + if (err != 0) { + goto clean_up; + } + + nvgpu_gr_falcon_copy_ctxsw_ucode_segments(g, + &ucode_info->surface_desc, + &ucode_info->fecs, + fecs_boot_image, + nvgpu_netlist_get_fecs_inst_list(g), + nvgpu_netlist_get_fecs_data_list(g)); + + nvgpu_release_firmware(g, fecs_fw); + fecs_fw = NULL; + + nvgpu_gr_falcon_copy_ctxsw_ucode_segments(g, + &ucode_info->surface_desc, + &ucode_info->gpccs, + gpccs_boot_image, + nvgpu_netlist_get_gpccs_inst_list(g), + nvgpu_netlist_get_gpccs_data_list(g)); + + nvgpu_release_firmware(g, gpccs_fw); + gpccs_fw = NULL; + + err = nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(g, falcon); + if (err != 0) { + goto clean_up; + } + + return 0; + +clean_up: + nvgpu_dma_free(g, &ucode_info->surface_desc); + + if (gpccs_fw != NULL) { + nvgpu_release_firmware(g, gpccs_fw); + gpccs_fw = NULL; + } + if (fecs_fw != NULL) { + nvgpu_release_firmware(g, fecs_fw); + fecs_fw = NULL; + } + + return err; +} + +static void nvgpu_gr_falcon_bind_instblk(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) +{ + struct nvgpu_ctxsw_ucode_info *ucode_info = + &falcon->ctxsw_ucode_info; + u64 inst_ptr; + + if (g->ops.gr.falcon.bind_instblk == NULL) { + return; + } + + inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc); + + g->ops.gr.falcon.bind_instblk(g, &ucode_info->inst_blk_desc, + inst_ptr); + +} + +#ifdef CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT +static void nvgpu_gr_falcon_load_dmem(struct gk20a *g) +{ + u32 ucode_u32_size; + const u32 *ucode_u32_data; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + ucode_u32_size = nvgpu_netlist_get_gpccs_data_count(g); + ucode_u32_data = (const u32 *)nvgpu_netlist_get_gpccs_data_list(g); + g->ops.gr.falcon.load_gpccs_dmem(g, ucode_u32_data, ucode_u32_size); + + ucode_u32_size = nvgpu_netlist_get_fecs_data_count(g); + ucode_u32_data = (const u32 *)nvgpu_netlist_get_fecs_data_list(g); + g->ops.gr.falcon.load_fecs_dmem(g, ucode_u32_data, ucode_u32_size); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); +} + +static void nvgpu_gr_falcon_load_imem(struct gk20a *g) +{ + u32 ucode_u32_size; + const u32 *ucode_u32_data; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + ucode_u32_size = nvgpu_netlist_get_gpccs_inst_count(g); + ucode_u32_data = (const u32 *)nvgpu_netlist_get_gpccs_inst_list(g); + g->ops.gr.falcon.load_gpccs_imem(g, ucode_u32_data, ucode_u32_size); + + + ucode_u32_size = nvgpu_netlist_get_fecs_inst_count(g); + ucode_u32_data = (const u32 *)nvgpu_netlist_get_fecs_inst_list(g); + g->ops.gr.falcon.load_fecs_imem(g, ucode_u32_data, ucode_u32_size); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); +} + +static void nvgpu_gr_falcon_load_ctxsw_ucode_header(struct gk20a *g, + u64 addr_base, struct nvgpu_ctxsw_ucode_segments *segments, + u32 reg_offset) +{ + u32 addr_code32 = u64_lo32(nvgpu_safe_add_u64(addr_base, + segments->code.offset) >> 8); + u32 addr_data32 = u64_lo32(nvgpu_safe_add_u64(addr_base, + segments->data.offset) >> 8); + + g->ops.gr.falcon.load_ctxsw_ucode_header(g, reg_offset, + segments->boot_signature, addr_code32, addr_data32, + segments->code.size, segments->data.size); +} + +static void nvgpu_gr_falcon_load_ctxsw_ucode_boot(struct gk20a *g, + u64 addr_base, struct nvgpu_ctxsw_ucode_segments *segments, + u32 reg_offset) +{ + u32 addr_load32 = u64_lo32(nvgpu_safe_add_u64(addr_base, + segments->boot.offset) >> 8); + u32 blocks = (nvgpu_safe_add_u32(segments->boot.size, 0xFFU) + & ~0xFFU) >> 8; + u32 dst = segments->boot_imem_offset; + + g->ops.gr.falcon.load_ctxsw_ucode_boot(g, reg_offset, + segments->boot_entry, addr_load32, blocks, dst); + +} + +static void nvgpu_gr_falcon_load_ctxsw_ucode_segments( + struct gk20a *g, u64 addr_base, + struct nvgpu_ctxsw_ucode_segments *segments, u32 reg_offset) +{ + + /* Copy falcon bootloader into dmem */ + nvgpu_gr_falcon_load_ctxsw_ucode_header(g, addr_base, + segments, reg_offset); + nvgpu_gr_falcon_load_ctxsw_ucode_boot(g, + addr_base, segments, reg_offset); +} + +static void nvgpu_gr_falcon_load_with_bootloader(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) +{ + struct nvgpu_ctxsw_ucode_info *ucode_info = + &falcon->ctxsw_ucode_info; + u64 addr_base = ucode_info->surface_desc.gpu_va; + + nvgpu_log(g, gpu_dbg_gr, " "); + + nvgpu_gr_falcon_bind_instblk(g, falcon); + + nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base, + &falcon->ctxsw_ucode_info.fecs, 0); + + nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base, + &falcon->ctxsw_ucode_info.gpccs, + g->ops.gr.falcon.get_gpccs_start_reg_offset()); +} + +int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) +{ + int err; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + +#ifdef CONFIG_NVGPU_SIM + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { + g->ops.gr.falcon.configure_fmodel(g); + } +#endif + + /* + * In case bootloader is not supported, revert to the old way of + * loading gr ucode, without the faster bootstrap routine. + */ + if (!nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP)) { + nvgpu_gr_falcon_load_dmem(g); + nvgpu_gr_falcon_load_imem(g); + g->ops.gr.falcon.start_ucode(g); + } else { + if (!falcon->skip_ucode_init) { + err = nvgpu_gr_falcon_init_ctxsw_ucode(g, falcon); + if (err != 0) { + return err; + } + } + nvgpu_gr_falcon_load_with_bootloader(g, falcon); + falcon->skip_ucode_init = true; + } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + return 0; +} + +static void nvgpu_gr_falcon_load_gpccs_with_bootloader(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) +{ + struct nvgpu_ctxsw_ucode_info *ucode_info = + &falcon->ctxsw_ucode_info; + u64 addr_base = ucode_info->surface_desc.gpu_va; + + nvgpu_gr_falcon_bind_instblk(g, falcon); + + nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base, + &falcon->ctxsw_ucode_info.gpccs, + g->ops.gr.falcon.get_gpccs_start_reg_offset()); +} +#endif + +#if defined(CONFIG_NVGPU_DGPU) || defined(CONFIG_NVGPU_LS_PMU) +static int gr_falcon_sec2_or_ls_pmu_bootstrap(struct gk20a *g, + bool *bootstrap, u32 falcon_id_mask) +{ + int err = 0; + bool bootstrap_set = false; + +#ifdef CONFIG_NVGPU_DGPU + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) { + bootstrap_set = true; + nvgpu_log(g, gpu_dbg_gr, "bootstrap by SEC2"); + + err = nvgpu_sec2_bootstrap_ls_falcons(g, + &g->sec2, FALCON_ID_FECS); + if (err == 0) { + err = nvgpu_sec2_bootstrap_ls_falcons(g, + &g->sec2, FALCON_ID_GPCCS); + } + } else +#endif +#ifdef CONFIG_NVGPU_LS_PMU + if (g->support_ls_pmu) { + bootstrap_set = true; + nvgpu_log(g, gpu_dbg_gr, "bootstrap by LS PMU"); + + err = nvgpu_pmu_lsfm_bootstrap_ls_falcon(g, + g->pmu, g->pmu->lsfm, + falcon_id_mask); + } +#endif + + *bootstrap = bootstrap_set; + return err; +} + +static int gr_falcon_sec2_or_ls_pmu_recovery_bootstrap(struct gk20a *g) +{ + int err = 0; + bool bootstrap = false; + u32 falcon_idmask = BIT32(FALCON_ID_FECS) | BIT32(FALCON_ID_GPCCS); + + err = gr_falcon_sec2_or_ls_pmu_bootstrap(g, + &bootstrap, + falcon_idmask); + if ((err == 0) && (!bootstrap)) { + err = nvgpu_acr_bootstrap_hs_acr(g, g->acr); + if (err != 0) { + nvgpu_err(g, + "ACR GR LSF bootstrap failed"); + } + } + + return err; +} + +static int gr_falcon_sec2_or_ls_pmu_coldboot_bootstrap(struct gk20a *g) +{ + int err = 0; + u8 falcon_id_mask = 0; + bool bootstrap = false; + + if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { + return err; + } + + if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr, + FALCON_ID_FECS)) { + falcon_id_mask |= BIT8(FALCON_ID_FECS); + } + if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr, + FALCON_ID_GPCCS)) { + falcon_id_mask |= BIT8(FALCON_ID_GPCCS); + } + + err = gr_falcon_sec2_or_ls_pmu_bootstrap(g, + &bootstrap, + (u32)falcon_id_mask); + if ((err == 0) && (!bootstrap)) { + /* GR falcons bootstrapped by ACR */ + nvgpu_log(g, gpu_dbg_gr, "bootstrap by ACR"); + err = 0; + } + + return err; +} +#endif + +static int gr_falcon_recovery_bootstrap(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) +{ + int err = 0; + +#ifdef CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT + if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { + nvgpu_gr_falcon_load_gpccs_with_bootloader(g, falcon); +#ifdef CONFIG_NVGPU_LS_PMU + err = nvgpu_pmu_lsfm_bootstrap_ls_falcon(g, g->pmu, + g->pmu->lsfm, BIT32(FALCON_ID_FECS)); +#endif + } else +#endif + { + /* bind WPR VA inst block */ + nvgpu_gr_falcon_bind_instblk(g, falcon); +#if defined(CONFIG_NVGPU_DGPU) || defined(CONFIG_NVGPU_LS_PMU) + err = gr_falcon_sec2_or_ls_pmu_recovery_bootstrap(g); +#else + err = nvgpu_acr_bootstrap_hs_acr(g, g->acr); + if (err != 0) { + nvgpu_err(g, + "ACR GR LSF bootstrap failed"); + } +#endif + } + + return err; +} + +static void gr_falcon_coldboot_bootstrap(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) +{ +#ifdef CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT + if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { + nvgpu_gr_falcon_load_gpccs_with_bootloader(g, falcon); + } else +#endif + { + /* bind WPR VA inst block */ + nvgpu_gr_falcon_bind_instblk(g, falcon); + } +} + +int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) +{ + int err = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + +#ifdef CONFIG_NVGPU_SIM + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { + g->ops.gr.falcon.configure_fmodel(g); + } +#endif + + if (falcon->coldboot_bootstrap_done) { + nvgpu_log(g, gpu_dbg_gr, "recovery bootstrap"); + + /* this must be recovery so bootstrap fecs and gpccs */ + err = gr_falcon_recovery_bootstrap(g, falcon); + if (err != 0) { + nvgpu_err(g, "Unable to recover GR falcon"); + return err; + } + + } else { + nvgpu_log(g, gpu_dbg_gr, "coldboot bootstrap"); + + /* cold boot or rg exit */ + falcon->coldboot_bootstrap_done = true; + gr_falcon_coldboot_bootstrap(g, falcon); +#if defined(CONFIG_NVGPU_DGPU) || defined(CONFIG_NVGPU_LS_PMU) + err = gr_falcon_sec2_or_ls_pmu_coldboot_bootstrap(g); + if (err != 0) { + nvgpu_err(g, "Unable to boot GPCCS"); + return err; + } +#endif + } + + g->ops.gr.falcon.start_gpccs(g); + g->ops.gr.falcon.start_fecs(g); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + + return 0; +} + +struct nvgpu_ctxsw_ucode_segments *nvgpu_gr_falcon_get_fecs_ucode_segments( + struct nvgpu_gr_falcon *falcon) +{ + return &falcon->ctxsw_ucode_info.fecs; +} +struct nvgpu_ctxsw_ucode_segments *nvgpu_gr_falcon_get_gpccs_ucode_segments( + struct nvgpu_gr_falcon *falcon) +{ + return &falcon->ctxsw_ucode_info.gpccs; +} +void *nvgpu_gr_falcon_get_surface_desc_cpu_va(struct nvgpu_gr_falcon *falcon) +{ + return falcon->ctxsw_ucode_info.surface_desc.cpu_va; +} +#ifdef CONFIG_NVGPU_ENGINE_RESET +struct nvgpu_mutex *nvgpu_gr_falcon_get_fecs_mutex( + struct nvgpu_gr_falcon *falcon) +{ + return &falcon->fecs_mutex; +} +#endif diff --git a/drivers/gpu/nvgpu/common/gr/gr_falcon_priv.h b/drivers/gpu/nvgpu/common/gr/gr_falcon_priv.h new file mode 100644 index 000000000..a4193516d --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/gr_falcon_priv.h @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_FALCON_PRIV_H +#define NVGPU_GR_FALCON_PRIV_H + +#include +#include + +struct nvgpu_ctxsw_ucode_segments; + +/** GPCCS boot signature for T18X chip, type: with reserved. */ +#define FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED 0x68edab34U + +/** FECS boot signature for T21X chip, type: with DMEM size. */ +#define FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE 0x9121ab5cU +/** FECS boot signature for T21X chip, type: with reserved. */ +#define FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED 0x9125ab5cU +/** FECS boot signature for T21X chip, type: without reserved. */ +#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED 0x93671b7dU +/** FECS boot signature for T21X chip, type: without reserved2. */ +#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2 0x4d6cbc10U +/** GPCCS boot signature for T21X chip, type: with reserved. */ +#define FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED 0x3d3d65e2U +/** GPCCS boot signature for T21X chip, type: without reserved. */ +#define FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED 0x393161daU + +/** FECS boot signature for T12X chip, type: with reserved. */ +#define FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED 0x8a621f78U +/** FECS boot signature for T12X chip, type: without reserved. */ +#define FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED 0x67e5344bU +/** FECS boot signature for T12X chip, type: older. */ +#define FALCON_UCODE_SIG_T12X_FECS_OLDER 0x56da09fU + +/** GPCCS boot signature for T12X chip, type: with reserved. */ +#define FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED 0x303465d5U +/** GPCCS boot signature for T12X chip, type: without reserved. */ +#define FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED 0x3fdd33d3U +/** GPCCS boot signature for T12X chip, type: older. */ +#define FALCON_UCODE_SIG_T12X_GPCCS_OLDER 0x53d7877U + +enum wait_ucode_status { + /** Status of ucode wait operation : LOOP. */ + WAIT_UCODE_LOOP, + /** Status of ucode wait operation : timedout. */ + WAIT_UCODE_TIMEOUT, + /** Status of ucode wait operation : error. */ + WAIT_UCODE_ERROR, + /** Status of ucode wait operation : success. */ + WAIT_UCODE_OK +}; + +/** Falcon operation condition : EQUAL. */ +#define GR_IS_UCODE_OP_EQUAL 0U +/** Falcon operation condition : NOT_EQUAL. */ +#define GR_IS_UCODE_OP_NOT_EQUAL 1U +/** Falcon operation condition : AND. */ +#define GR_IS_UCODE_OP_AND 2U +/** Falcon operation condition : LESSER. */ +#define GR_IS_UCODE_OP_LESSER 3U +/** Falcon operation condition : LESSER_EQUAL. */ +#define GR_IS_UCODE_OP_LESSER_EQUAL 4U +/** Falcon operation condition : SKIP. */ +#define GR_IS_UCODE_OP_SKIP 5U + +/** Mailbox value in case of successful operation. */ +#define FALCON_UCODE_HANDSHAKE_INIT_COMPLETE 1U + +struct fecs_mthd_op_method { + /** Method address to send to FECS microcontroller. */ + u32 addr; + /** Method data to send to FECS microcontroller. */ + u32 data; +}; + +struct fecs_mthd_op_mailbox { + /** Mailbox ID to perform operation. */ + u32 id; + /** Mailbox data to be written. */ + u32 data; + /** Mailbox clear value. */ + u32 clr; + /** Last read mailbox value. */ + u32 *ret; + /** Mailbox value in case of operation success. */ + u32 ok; + /** Mailbox value in case of operation failure. */ + u32 fail; +}; + +struct fecs_mthd_op_cond { + /** Operation success condition. */ + u32 ok; + /** Operation fail condition. */ + u32 fail; +}; + +/** + * FECS method operation structure. + * + * This structure defines the protocol for communication with FECS + * microcontroller. + */ +struct nvgpu_fecs_method_op { + /** Method struct */ + struct fecs_mthd_op_method method; + /** Mailbox struct */ + struct fecs_mthd_op_mailbox mailbox; + /** Condition struct */ + struct fecs_mthd_op_cond cond; +}; + +/** + * CTXSW falcon bootloader descriptor structure. + */ +struct nvgpu_ctxsw_bootloader_desc { + /** Start offset, unused. */ + u32 start_offset; + /** Size, unused. */ + u32 size; + /** IMEM offset. */ + u32 imem_offset; + /** Falcon boot vector. */ + u32 entry_point; +}; + +/** + * CTXSW ucode information structure. + */ +struct nvgpu_ctxsw_ucode_info { + /** Memory to store ucode instance block. */ + struct nvgpu_mem inst_blk_desc; + /** Memory to store ucode contents locally. */ + struct nvgpu_mem surface_desc; + /** Ucode segments for FECS. */ + struct nvgpu_ctxsw_ucode_segments fecs; + /** Ucode segments for GPCCS. */ + struct nvgpu_ctxsw_ucode_segments gpccs; +}; + +/** + * Structure to store various sizes queried from FECS + */ +struct nvgpu_gr_falcon_query_sizes { + /** Size of golden context image. */ + u32 golden_image_size; + +#ifdef CONFIG_NVGPU_DEBUGGER + u32 pm_ctxsw_image_size; +#endif +#ifdef CONFIG_NVGPU_GFXP + u32 preempt_image_size; +#endif +#ifdef CONFIG_NVGPU_GRAPHICS + u32 zcull_image_size; +#endif +}; + +/** + * GR falcon data structure. + * + * This structure stores all data required to load and boot CTXSW ucode, + * and also to communicate with FECS microcontroller. + */ +struct nvgpu_gr_falcon { + /** + * CTXSW ucode information structure. + */ + struct nvgpu_ctxsw_ucode_info ctxsw_ucode_info; + + /** + * Mutex to protect all FECS methods. + */ + struct nvgpu_mutex fecs_mutex; + + /** + * Flag to skip ucode initialization if it is already done. + */ + bool skip_ucode_init; + + /** + * Flag to trigger recovery bootstrap in case coldboot bootstrap + * was already done. + */ + bool coldboot_bootstrap_done; + + /** + * Structure to hold various sizes that are queried from FECS + * microcontroller. + */ + struct nvgpu_gr_falcon_query_sizes sizes; +}; + +#endif /* NVGPU_GR_FALCON_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/gr/gr_intr.c b/drivers/gpu/nvgpu/common/gr/gr_intr.c new file mode 100644 index 000000000..987e10334 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/gr_intr.c @@ -0,0 +1,1138 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(CONFIG_NVGPU_CYCLESTATS) +#include +#endif + +#include +#include +#include +#include +#include +#include + +#include "gr_intr_priv.h" + +void gr_intr_report_ctxsw_error(struct gk20a *g, u32 err_type, u32 chid, + u32 mailbox_value) +{ + struct ctxsw_err_info err_info; + + err_info.curr_ctx = g->ops.gr.falcon.get_current_ctx(g); + err_info.ctxsw_status0 = g->ops.gr.falcon.read_fecs_ctxsw_status0(g); + err_info.ctxsw_status1 = g->ops.gr.falcon.read_fecs_ctxsw_status1(g); + err_info.mailbox_value = mailbox_value; + err_info.chid = chid; + + nvgpu_report_ctxsw_err(g, NVGPU_ERR_MODULE_FECS, + err_type, (void *)&err_info); +} + +static int gr_intr_handle_pending_tpc_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, + bool *post_event, struct nvgpu_channel *fault_ch, + u32 *hww_global_esr) +{ + int tmp_ret, ret = 0; + u32 esr_sm_sel, sm; + u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); + + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, + "GPC%d TPC%d: SM exception pending", gpc, tpc); + + if (g->ops.gr.intr.handle_tpc_sm_ecc_exception != NULL) { + g->ops.gr.intr.handle_tpc_sm_ecc_exception(g, gpc, tpc); + } + + g->ops.gr.intr.get_esr_sm_sel(g, gpc, tpc, &esr_sm_sel); + + for (sm = 0; sm < sm_per_tpc; sm++) { + + if ((esr_sm_sel & BIT32(sm)) == 0U) { + continue; + } + + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, + "GPC%d TPC%d: SM%d exception pending", + gpc, tpc, sm); + + tmp_ret = g->ops.gr.intr.handle_sm_exception(g, + gpc, tpc, sm, post_event, fault_ch, + hww_global_esr); + ret = (ret != 0) ? ret : tmp_ret; + + /* clear the hwws, also causes tpc and gpc + * exceptions to be cleared. Should be cleared + * only if SM is locked down or empty. + */ + g->ops.gr.intr.clear_sm_hww(g, + gpc, tpc, sm, *hww_global_esr); + } + + return ret; +} + +static int gr_intr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc, + bool *post_event, struct nvgpu_channel *fault_ch, + u32 *hww_global_esr) +{ + int ret = 0; + struct nvgpu_gr_tpc_exception pending_tpc; + u32 offset = nvgpu_safe_add_u32(nvgpu_gr_gpc_offset(g, gpc), + nvgpu_gr_tpc_offset(g, tpc)); + u32 tpc_exception = g->ops.gr.intr.get_tpc_exception(g, offset, + &pending_tpc); + + + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, + "GPC%d TPC%d: pending exception 0x%x", + gpc, tpc, tpc_exception); + + /* check if an sm exception is pending */ + if (pending_tpc.sm_exception) { + ret = gr_intr_handle_pending_tpc_sm_exception(g, gpc, tpc, + post_event, fault_ch, hww_global_esr); + } + + /* check if a tex exception is pending */ + if (pending_tpc.tex_exception) { + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, + "GPC%d TPC%d: TEX exception pending", gpc, tpc); +#ifdef CONFIG_NVGPU_HAL_NON_FUSA + if (g->ops.gr.intr.handle_tex_exception != NULL) { + g->ops.gr.intr.handle_tex_exception(g, gpc, tpc); + } +#endif + } + + /* check if a mpc exception is pending */ + if (pending_tpc.mpc_exception) { + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, + "GPC%d TPC%d: MPC exception pending", gpc, tpc); + if (g->ops.gr.intr.handle_tpc_mpc_exception != NULL) { + g->ops.gr.intr.handle_tpc_mpc_exception(g, gpc, tpc); + } + } + + /* check if a pe exception is pending */ + if (pending_tpc.pe_exception) { + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, + "GPC%d TPC%d: PE exception pending", gpc, tpc); + if (g->ops.gr.intr.handle_tpc_pe_exception != NULL) { + g->ops.gr.intr.handle_tpc_pe_exception(g, gpc, tpc); + } + } + + return ret; +} + +#if defined(CONFIG_NVGPU_CHANNEL_TSG_CONTROL) && defined(CONFIG_NVGPU_DEBUGGER) +static void gr_intr_post_bpt_events(struct gk20a *g, struct nvgpu_tsg *tsg, + u32 global_esr) +{ + if (g->ops.gr.esr_bpt_pending_events(global_esr, + NVGPU_EVENT_ID_BPT_INT)) { + g->ops.tsg.post_event_id(tsg, NVGPU_EVENT_ID_BPT_INT); + } + + if (g->ops.gr.esr_bpt_pending_events(global_esr, + NVGPU_EVENT_ID_BPT_PAUSE)) { + g->ops.tsg.post_event_id(tsg, NVGPU_EVENT_ID_BPT_PAUSE); + } +} +#endif + +static int gr_intr_handle_illegal_method(struct gk20a *g, + struct nvgpu_gr_isr_data *isr_data) +{ + int ret = g->ops.gr.intr.handle_sw_method(g, isr_data->addr, + isr_data->class_num, isr_data->offset, + isr_data->data_lo); + if (ret != 0) { + nvgpu_gr_intr_set_error_notifier(g, isr_data, + NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY); + nvgpu_err(g, "invalid method class 0x%08x" + ", offset 0x%08x address 0x%08x", + isr_data->class_num, isr_data->offset, isr_data->addr); + } + return ret; +} + +static void gr_intr_handle_class_error(struct gk20a *g, + struct nvgpu_gr_isr_data *isr_data) +{ + u32 chid = (isr_data->ch != NULL) ? + isr_data->ch->chid : NVGPU_INVALID_CHANNEL_ID; + + nvgpu_log_fn(g, " "); + + g->ops.gr.intr.handle_class_error(g, chid, isr_data); + + nvgpu_gr_intr_set_error_notifier(g, isr_data, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); +} + +static void gr_intr_report_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, + u32 sm, u32 hww_warp_esr_status, u64 hww_warp_esr_pc) +{ + struct gr_sm_mcerr_info err_info; + struct nvgpu_channel *ch; + struct gr_err_info info; + u32 tsgid, chid, curr_ctx, inst = 0; + + tsgid = NVGPU_INVALID_TSG_ID; + curr_ctx = g->ops.gr.falcon.get_current_ctx(g); + if (curr_ctx == 0U) { + return; + } + + ch = nvgpu_gr_intr_get_channel_from_ctx(g, curr_ctx, &tsgid); + chid = (ch != NULL) ? ch->chid : NVGPU_INVALID_CHANNEL_ID; + if (ch != NULL) { + nvgpu_channel_put(ch); + } + + (void) memset(&err_info, 0, sizeof(err_info)); + (void) memset(&info, 0, sizeof(info)); + err_info.curr_ctx = curr_ctx; + err_info.chid = chid; + err_info.tsgid = tsgid; + err_info.hww_warp_esr_pc = hww_warp_esr_pc; + err_info.hww_warp_esr_status = hww_warp_esr_status; + err_info.gpc = gpc; + err_info.tpc = tpc; + err_info.sm = sm; + info.sm_mcerr_info = &err_info; + nvgpu_report_gr_err(g, NVGPU_ERR_MODULE_SM, inst, + GPU_SM_MACHINE_CHECK_ERROR, &info, 0U); +} + +/* Used by sw interrupt thread to translate current ctx to chid. + * Also used by regops to translate current ctx to chid and tsgid. + * For performance, we don't want to go through 128 channels every time. + * curr_ctx should be the value read from gr falcon get_current_ctx op + * A small tlb is used here to cache translation. + * + * Returned channel must be freed with nvgpu_channel_put() */ +struct nvgpu_channel *nvgpu_gr_intr_get_channel_from_ctx(struct gk20a *g, + u32 curr_ctx, u32 *curr_tsgid) +{ + struct nvgpu_fifo *f = &g->fifo; + struct nvgpu_gr_intr *intr = nvgpu_gr_get_intr_ptr(g); + u32 chid; + u32 tsgid = NVGPU_INVALID_TSG_ID; + u32 i; + struct nvgpu_channel *ret_ch = NULL; + + /* when contexts are unloaded from GR, the valid bit is reset + * but the instance pointer information remains intact. + * This might be called from gr_isr where contexts might be + * unloaded. No need to check ctx_valid bit + */ + + nvgpu_spinlock_acquire(&intr->ch_tlb_lock); + + /* check cache first */ + for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { + if (intr->chid_tlb[i].curr_ctx == curr_ctx) { + chid = intr->chid_tlb[i].chid; + tsgid = intr->chid_tlb[i].tsgid; + ret_ch = nvgpu_channel_from_id(g, chid); + goto unlock; + } + } + + /* slow path */ + for (chid = 0; chid < f->num_channels; chid++) { + struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid); + + if (ch == NULL) { + continue; + } + + if (nvgpu_inst_block_ptr(g, &ch->inst_block) == + g->ops.gr.falcon.get_ctx_ptr(curr_ctx)) { + tsgid = ch->tsgid; + /* found it */ + ret_ch = ch; + break; + } + nvgpu_channel_put(ch); + } + + if (ret_ch == NULL) { + goto unlock; + } + + /* add to free tlb entry */ + for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { + if (intr->chid_tlb[i].curr_ctx == 0U) { + intr->chid_tlb[i].curr_ctx = curr_ctx; + intr->chid_tlb[i].chid = chid; + intr->chid_tlb[i].tsgid = tsgid; + goto unlock; + } + } + + /* no free entry, flush one */ + intr->chid_tlb[intr->channel_tlb_flush_index].curr_ctx = curr_ctx; + intr->chid_tlb[intr->channel_tlb_flush_index].chid = chid; + intr->chid_tlb[intr->channel_tlb_flush_index].tsgid = tsgid; + + intr->channel_tlb_flush_index = + (nvgpu_safe_add_u32(intr->channel_tlb_flush_index, 1U)) & + (nvgpu_safe_sub_u32(GR_CHANNEL_MAP_TLB_SIZE, 1U)); + +unlock: + nvgpu_spinlock_release(&intr->ch_tlb_lock); + *curr_tsgid = tsgid; + return ret_ch; +} + +void nvgpu_gr_intr_report_exception(struct gk20a *g, u32 inst, + u32 err_type, u32 status, u32 sub_err_type) +{ + struct nvgpu_channel *ch = NULL; + struct gr_exception_info err_info; + struct gr_err_info info; + u32 tsgid, chid, curr_ctx; + + tsgid = NVGPU_INVALID_TSG_ID; + curr_ctx = g->ops.gr.falcon.get_current_ctx(g); + if (curr_ctx != 0U) { + ch = nvgpu_gr_intr_get_channel_from_ctx(g, curr_ctx, &tsgid); + } + chid = (ch != NULL) ? ch->chid : NVGPU_INVALID_CHANNEL_ID; + if (ch != NULL) { + nvgpu_channel_put(ch); + } + + (void) memset(&err_info, 0, sizeof(err_info)); + (void) memset(&info, 0, sizeof(info)); + err_info.curr_ctx = curr_ctx; + err_info.chid = chid; + err_info.tsgid = tsgid; + err_info.status = status; + info.exception_info = &err_info; + nvgpu_report_gr_err(g, NVGPU_ERR_MODULE_PGRAPH, + inst, err_type, &info, sub_err_type); +} + +void nvgpu_gr_intr_set_error_notifier(struct gk20a *g, + struct nvgpu_gr_isr_data *isr_data, u32 error_notifier) +{ + struct nvgpu_channel *ch; + struct nvgpu_tsg *tsg; + + ch = isr_data->ch; + + if (ch == NULL) { + return; + } + + tsg = nvgpu_tsg_from_ch(ch); + if (tsg != NULL) { + nvgpu_tsg_set_error_notifier(g, tsg, error_notifier); + } else { + nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid); + } +} + +static bool is_global_esr_error(u32 global_esr, u32 global_mask) +{ + return ((global_esr & ~global_mask) != 0U) ? true: false; +} + +static void gr_intr_report_warp_error(struct gk20a *g, u32 gpc, u32 tpc, + u32 sm, u32 global_esr, u32 warp_esr, + u32 global_mask, u32 offset) +{ + u64 hww_warp_esr_pc = 0; + + if (is_global_esr_error(global_esr, global_mask)) { + if (g->ops.gr.intr.get_sm_hww_warp_esr_pc != NULL) { + hww_warp_esr_pc = g->ops.gr.intr.get_sm_hww_warp_esr_pc(g, + offset); + } + gr_intr_report_sm_exception(g, gpc, tpc, sm, warp_esr, + hww_warp_esr_pc); + } +} + +#ifdef CONFIG_NVGPU_DEBUGGER +static int gr_intr_sm_exception_warp_sync(struct gk20a *g, + u32 gpc, u32 tpc, u32 sm, + u32 global_esr, u32 warp_esr, u32 global_mask, + bool ignore_debugger, bool *post_event) +{ + int ret = 0; + bool do_warp_sync = false; + + if (!ignore_debugger && ((warp_esr != 0U) || + (is_global_esr_error(global_esr, global_mask)))) { + nvgpu_log(g, gpu_dbg_intr, "warp sync needed"); + do_warp_sync = true; + } + + if (do_warp_sync) { + ret = g->ops.gr.lock_down_sm(g, gpc, tpc, sm, + global_mask, true); + if (ret != 0) { + nvgpu_err(g, "sm did not lock down!"); + return ret; + } + } + + if (ignore_debugger) { + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, + "ignore_debugger set, skipping event posting"); + } else { + *post_event = true; + } + + return ret; +} +#endif + +int nvgpu_gr_intr_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, + bool *post_event, struct nvgpu_channel *fault_ch, + u32 *hww_global_esr) +{ + int ret = 0; + u32 offset = nvgpu_safe_add_u32(nvgpu_gr_gpc_offset(g, gpc), + nvgpu_gr_tpc_offset(g, tpc)); + u32 global_esr, warp_esr, global_mask; +#ifdef CONFIG_NVGPU_DEBUGGER + bool sm_debugger_attached; + bool early_exit = false, ignore_debugger = false; + bool disable_sm_exceptions = true; +#endif + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + + global_esr = g->ops.gr.intr.get_sm_hww_global_esr(g, gpc, tpc, sm); + *hww_global_esr = global_esr; + + warp_esr = g->ops.gr.intr.get_sm_hww_warp_esr(g, gpc, tpc, sm); + global_mask = g->ops.gr.intr.get_sm_no_lock_down_hww_global_esr_mask(g); + + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, + "sm hww global 0x%08x warp 0x%08x", global_esr, warp_esr); + + /* + * Check and report any fatal warp errors. + */ + gr_intr_report_warp_error(g, gpc, tpc, sm, global_esr, warp_esr, + global_mask, offset); + + (void)nvgpu_pg_elpg_protected_call(g, + nvgpu_safe_cast_u32_to_s32( + g->ops.gr.intr.record_sm_error_state(g, gpc, tpc, + sm, fault_ch))); + +#ifdef CONFIG_NVGPU_DEBUGGER + sm_debugger_attached = g->ops.gr.sm_debugger_attached(g); + if (!sm_debugger_attached) { + nvgpu_err(g, "sm hww global 0x%08x warp 0x%08x", + global_esr, warp_esr); + return -EFAULT; + } + + if (g->ops.gr.pre_process_sm_exception != NULL) { + ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc, sm, + global_esr, warp_esr, + sm_debugger_attached, + fault_ch, + &early_exit, + &ignore_debugger); + if (ret != 0) { + nvgpu_err(g, "could not pre-process sm error!"); + return ret; + } + } + + if (early_exit) { + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, + "returning early"); + return ret; + } + + /* + * Disable forwarding of tpc exceptions, + * the debugger will reenable exceptions after servicing them. + * + * Do not disable exceptions if the only SM exception is BPT_INT + */ + if ((g->ops.gr.esr_bpt_pending_events(global_esr, + NVGPU_EVENT_ID_BPT_INT)) && (warp_esr == 0U)) { + disable_sm_exceptions = false; + } + + if (!ignore_debugger && disable_sm_exceptions) { + g->ops.gr.intr.tpc_exception_sm_disable(g, offset); + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, + "SM Exceptions disabled"); + } + + /* if debugger is present and an error has occurred, do a warp sync */ + ret = gr_intr_sm_exception_warp_sync(g, gpc, tpc, sm, + global_esr, warp_esr, global_mask, + ignore_debugger, post_event); +#else + /* Return error so that recovery is triggered */ + ret = -EFAULT; +#endif + + return ret; +} + +int nvgpu_gr_intr_handle_fecs_error(struct gk20a *g, struct nvgpu_channel *ch, + struct nvgpu_gr_isr_data *isr_data) +{ + u32 gr_fecs_intr, mailbox_value; + int ret = 0; + u32 chid = (isr_data->ch != NULL) ? + isr_data->ch->chid : NVGPU_INVALID_CHANNEL_ID; + u32 mailbox_id = NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX6; + struct nvgpu_fecs_host_intr_status *fecs_host_intr; + + gr_fecs_intr = isr_data->fecs_intr; + if (gr_fecs_intr == 0U) { + return 0; + } + fecs_host_intr = &isr_data->fecs_host_intr_status; + + if (fecs_host_intr->unimp_fw_method_active) { + mailbox_value = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g, + mailbox_id); + nvgpu_gr_intr_set_error_notifier(g, isr_data, + NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD); + nvgpu_err(g, "firmware method error: " + "mailxbox6 0x%08x, trapped_addr_reg 0x%08x " + "set_falcon_method 0x%08x, class 0x%08x " + "non-whitelist reg: 0x%08x", + mailbox_value, isr_data->addr, + isr_data->offset << 2U, isr_data->class_num, + isr_data->data_lo); + ret = -1; + } + + if (fecs_host_intr->ctxsw_intr0 != 0U) { + mailbox_value = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g, + mailbox_id); +#ifdef CONFIG_NVGPU_FECS_TRACE + if (mailbox_value == + g->ops.gr.fecs_trace.get_buffer_full_mailbox_val()) { + nvgpu_info(g, "ctxsw intr0 set by ucode, " + "timestamp buffer full"); + nvgpu_gr_fecs_trace_reset_buffer(g); + } else +#endif + /* + * The mailbox values may vary across chips hence keeping it + * as a HAL. + */ + if ((g->ops.gr.intr.get_ctxsw_checksum_mismatch_mailbox_val != NULL) + && (mailbox_value == + g->ops.gr.intr.get_ctxsw_checksum_mismatch_mailbox_val())) { + + gr_intr_report_ctxsw_error(g, + GPU_FECS_CTXSW_CRC_MISMATCH, + chid, mailbox_value); + nvgpu_err(g, "ctxsw intr0 set by ucode, " + "ctxsw checksum mismatch"); + ret = -1; + } else { + /* + * Other errors are also treated as fatal and channel + * recovery is initiated and error is reported to + * 3LSS. + */ + gr_intr_report_ctxsw_error(g, + GPU_FECS_FAULT_DURING_CTXSW, + chid, mailbox_value); + nvgpu_err(g, + "ctxsw intr0 set by ucode, error_code: 0x%08x", + mailbox_value); + ret = -1; + } + } + + if (fecs_host_intr->fault_during_ctxsw_active) { + gr_intr_report_ctxsw_error(g, + GPU_FECS_FAULT_DURING_CTXSW, + chid, 0); + nvgpu_err(g, "fecs fault during ctxsw for channel %u", chid); + ret = -1; + } + + if (fecs_host_intr->watchdog_active) { + gr_intr_report_ctxsw_error(g, + GPU_FECS_CTXSW_WATCHDOG_TIMEOUT, + chid, 0); + /* currently, recovery is not initiated */ + nvgpu_err(g, "fecs watchdog triggered for channel %u, " + "cannot ctxsw anymore !!", chid); + g->ops.gr.falcon.dump_stats(g); + } + + /* + * un-supported interrupts will be flagged in + * g->ops.gr.falcon.fecs_host_intr_status. + */ + g->ops.gr.falcon.fecs_host_clear_intr(g, gr_fecs_intr); + + return ret; +} + +static int gr_intr_check_handle_tpc_exception(struct gk20a *g, u32 gpc, + u32 tpc_exception, bool *post_event, struct nvgpu_gr_config *gr_config, + struct nvgpu_channel *fault_ch, u32 *hww_global_esr) +{ + int tmp_ret, ret = 0; + u32 tpc; + + for (tpc = 0; + tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc); + tpc++) { + if ((tpc_exception & BIT32(tpc)) == 0U) { + continue; + } + + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, + "GPC%d: TPC%d exception pending", gpc, tpc); + + tmp_ret = gr_intr_handle_tpc_exception(g, gpc, tpc, + post_event, fault_ch, hww_global_esr); + ret = (ret != 0) ? ret : tmp_ret; + } + return ret; +} + +int nvgpu_gr_intr_handle_gpc_exception(struct gk20a *g, bool *post_event, + struct nvgpu_gr_config *gr_config, struct nvgpu_channel *fault_ch, + u32 *hww_global_esr) +{ + int ret = 0; + u32 gpc; + u32 exception1 = g->ops.gr.intr.read_exception1(g); + u32 gpc_exception, tpc_exception; + + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, " "); + + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr_config); gpc++) { + if ((exception1 & BIT32(gpc)) == 0U) { + continue; + } + + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, + "GPC%d exception pending", gpc); + gpc_exception = g->ops.gr.intr.read_gpc_exception(g, gpc); + tpc_exception = g->ops.gr.intr.read_gpc_tpc_exception( + gpc_exception); + + /* check and handle if any tpc has an exception */ + ret = gr_intr_check_handle_tpc_exception(g, gpc, tpc_exception, + post_event, gr_config, fault_ch, hww_global_esr); + + /* Handle GCC exception */ + if (g->ops.gr.intr.handle_gcc_exception != NULL) { + g->ops.gr.intr.handle_gcc_exception(g, gpc, + gpc_exception, + &g->ecc.gr.gcc_l15_ecc_corrected_err_count[gpc].counter, + &g->ecc.gr.gcc_l15_ecc_uncorrected_err_count[gpc].counter); + } + + /* Handle GPCCS exceptions */ + if (g->ops.gr.intr.handle_gpc_gpccs_exception != NULL) { + g->ops.gr.intr.handle_gpc_gpccs_exception(g, gpc, + gpc_exception, + &g->ecc.gr.gpccs_ecc_corrected_err_count[gpc].counter, + &g->ecc.gr.gpccs_ecc_uncorrected_err_count[gpc].counter); + } + + /* Handle GPCMMU exceptions */ + if (g->ops.gr.intr.handle_gpc_gpcmmu_exception != NULL) { + g->ops.gr.intr.handle_gpc_gpcmmu_exception(g, gpc, + gpc_exception, + &g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter, + &g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter); + } + + /* Handle PROP exception */ + if (g->ops.gr.intr.handle_gpc_prop_exception != NULL) { + g->ops.gr.intr.handle_gpc_prop_exception(g, gpc, + gpc_exception); + } + + /* Handle ZCULL exception */ + if (g->ops.gr.intr.handle_gpc_zcull_exception != NULL) { + g->ops.gr.intr.handle_gpc_zcull_exception(g, gpc, + gpc_exception); + } + + /* Handle SETUP exception */ + if (g->ops.gr.intr.handle_gpc_setup_exception != NULL) { + g->ops.gr.intr.handle_gpc_setup_exception(g, gpc, + gpc_exception); + } + + /* Handle PES exception */ + if (g->ops.gr.intr.handle_gpc_pes_exception != NULL) { + g->ops.gr.intr.handle_gpc_pes_exception(g, gpc, + gpc_exception); + } + + /* Handle ZROP exception */ + if (g->ops.gr.intr.handle_gpc_zrop_hww != NULL) { + g->ops.gr.intr.handle_gpc_zrop_hww(g, gpc, + gpc_exception); + } + + /* Handle CROP exception */ + if (g->ops.gr.intr.handle_gpc_crop_hww != NULL) { + g->ops.gr.intr.handle_gpc_crop_hww(g, gpc, + gpc_exception); + } + + /* Handle RRH exception */ + if (g->ops.gr.intr.handle_gpc_rrh_hww != NULL) { + g->ops.gr.intr.handle_gpc_rrh_hww(g, gpc, + gpc_exception); + } + + } + + return ret; +} + +void nvgpu_gr_intr_handle_notify_pending(struct gk20a *g, + struct nvgpu_gr_isr_data *isr_data) +{ + struct nvgpu_channel *ch = isr_data->ch; + int err; + + if (ch == NULL) { + return; + } + + if (nvgpu_tsg_from_ch(ch) == NULL) { + return; + } + + nvgpu_log_fn(g, " "); + +#if defined(CONFIG_NVGPU_CYCLESTATS) + nvgpu_cyclestats_exec(g, ch, isr_data->data_lo); +#endif + + err = nvgpu_cond_broadcast_interruptible(&ch->notifier_wq); + if (err != 0) { + nvgpu_log(g, gpu_dbg_intr, "failed to broadcast"); + } +} + +void nvgpu_gr_intr_handle_semaphore_pending(struct gk20a *g, + struct nvgpu_gr_isr_data *isr_data) +{ + struct nvgpu_channel *ch = isr_data->ch; + struct nvgpu_tsg *tsg; + + if (ch == NULL) { + return; + } + + tsg = nvgpu_tsg_from_ch(ch); + if (tsg != NULL) { + int err; + +#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL + g->ops.tsg.post_event_id(tsg, + NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN); +#endif + + err = nvgpu_cond_broadcast(&ch->semaphore_wq); + if (err != 0) { + nvgpu_log(g, gpu_dbg_intr, "failed to broadcast"); + } + } else { + nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid); + } +} + +#ifdef CONFIG_NVGPU_DEBUGGER +static void gr_intr_signal_exception_event(struct gk20a *g, + bool post_event, + struct nvgpu_channel *fault_ch) +{ + if (g->ops.gr.sm_debugger_attached(g) && + post_event && (fault_ch != NULL)) { + g->ops.debugger.post_events(fault_ch); + } +} +#endif + +static u32 gr_intr_handle_exception_interrupts(struct gk20a *g, + u32 *clear_intr, + struct nvgpu_tsg *tsg, u32 *global_esr, + struct nvgpu_gr_intr_info *intr_info, + struct nvgpu_gr_isr_data *isr_data) +{ + struct nvgpu_channel *fault_ch = NULL; + struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g); + bool need_reset = false; + + if (intr_info->exception != 0U) { + bool is_gpc_exception = false; + + need_reset = g->ops.gr.intr.handle_exceptions(g, + &is_gpc_exception); + + /* check if a gpc exception has occurred */ + if (is_gpc_exception && !need_reset) { + bool post_event = false; + + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, + "GPC exception pending"); + + if (tsg != NULL) { + fault_ch = isr_data->ch; + } + + /* fault_ch can be NULL */ + /* check if any gpc has an exception */ + if (nvgpu_gr_intr_handle_gpc_exception(g, &post_event, + gr_config, fault_ch, global_esr) != 0) { + need_reset = true; + } + +#ifdef CONFIG_NVGPU_DEBUGGER + /* signal clients waiting on an event */ + gr_intr_signal_exception_event(g, + post_event, fault_ch); +#endif + } + *clear_intr &= ~intr_info->exception; + + if (need_reset) { + nvgpu_err(g, "set gr exception notifier"); + nvgpu_gr_intr_set_error_notifier(g, isr_data, + NVGPU_ERR_NOTIFIER_GR_EXCEPTION); + } + } + + return (need_reset)? 1U : 0U; +} + +static u32 gr_intr_handle_illegal_interrupts(struct gk20a *g, + u32 gr_intr, u32 *clear_intr, + struct nvgpu_gr_intr_info *intr_info, + struct nvgpu_gr_isr_data *isr_data) +{ + u32 do_reset = 0U; + + if (intr_info->illegal_notify != 0U) { + nvgpu_err(g, "illegal notify pending"); + + nvgpu_gr_intr_report_exception(g, 0U, + GPU_PGRAPH_ILLEGAL_ERROR, gr_intr, + GPU_PGRAPH_ILLEGAL_NOTIFY); + nvgpu_gr_intr_set_error_notifier(g, isr_data, + NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY); + do_reset = 1U; + *clear_intr &= ~intr_info->illegal_notify; + } + + if (intr_info->illegal_method != 0U) { + if (gr_intr_handle_illegal_method(g, isr_data) != 0) { + nvgpu_gr_intr_report_exception(g, 0U, + GPU_PGRAPH_ILLEGAL_ERROR, gr_intr, + GPU_PGRAPH_ILLEGAL_METHOD); + + do_reset = 1U; + } + *clear_intr &= ~intr_info->illegal_method; + } + + if (intr_info->illegal_class != 0U) { + nvgpu_gr_intr_report_exception(g, 0U, + GPU_PGRAPH_ILLEGAL_ERROR, gr_intr, + GPU_PGRAPH_ILLEGAL_CLASS); + nvgpu_err(g, "invalid class 0x%08x, offset 0x%08x", + isr_data->class_num, isr_data->offset); + + nvgpu_gr_intr_set_error_notifier(g, isr_data, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); + do_reset = 1U; + *clear_intr &= ~intr_info->illegal_class; + } + return do_reset; +} + +static u32 gr_intr_handle_error_interrupts(struct gk20a *g, + u32 gr_intr, u32 *clear_intr, + struct nvgpu_gr_intr_info *intr_info, + struct nvgpu_gr_isr_data *isr_data) +{ + u32 do_reset = 0U; + + if (intr_info->fecs_error != 0U) { + isr_data->fecs_intr = g->ops.gr.falcon.fecs_host_intr_status(g, + &(isr_data->fecs_host_intr_status)); + if (g->ops.gr.intr.handle_fecs_error(g, + isr_data->ch, isr_data) != 0) { + do_reset = 1U; + } + *clear_intr &= ~intr_info->fecs_error; + } + + if (intr_info->class_error != 0U) { + nvgpu_gr_intr_report_exception(g, 0U, + GPU_PGRAPH_ILLEGAL_ERROR, gr_intr, + GPU_PGRAPH_CLASS_ERROR); + gr_intr_handle_class_error(g, isr_data); + do_reset = 1U; + *clear_intr &= ~intr_info->class_error; + } + + /* this one happens if someone tries to hit a non-whitelisted + * register using set_falcon[4] */ + if (intr_info->fw_method != 0U) { + u32 ch_id = (isr_data->ch != NULL) ? + isr_data->ch->chid : NVGPU_INVALID_CHANNEL_ID; + nvgpu_err(g, + "firmware method 0x%08x, offset 0x%08x for channel %u", + isr_data->class_num, isr_data->offset, + ch_id); + + nvgpu_gr_intr_set_error_notifier(g, isr_data, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); + do_reset = 1U; + *clear_intr &= ~intr_info->fw_method; + } + return do_reset; +} + +static void gr_intr_handle_pending_interrupts(struct gk20a *g, + u32 *clear_intr, + struct nvgpu_gr_intr_info *intr_info, + struct nvgpu_gr_isr_data *isr_data) +{ + if (intr_info->notify != 0U) { + g->ops.gr.intr.handle_notify_pending(g, isr_data); + *clear_intr &= ~intr_info->notify; + } + + if (intr_info->semaphore != 0U) { + g->ops.gr.intr.handle_semaphore_pending(g, isr_data); + *clear_intr &= ~intr_info->semaphore; + } + + if (intr_info->buffer_notify != 0U) { + /* + * This notifier event is ignored at present as there is no + * real usecase. + */ + nvgpu_log(g, gpu_dbg_intr, "buffer notify interrupt"); + *clear_intr &= ~intr_info->buffer_notify; + } + + if (intr_info->debug_method != 0U) { + nvgpu_warn(g, "dropping method(0x%x) on subchannel(%d)", + isr_data->offset, isr_data->sub_chan); + + *clear_intr &= ~intr_info->debug_method; + } +} + +static struct nvgpu_tsg *gr_intr_get_channel_from_ctx(struct gk20a *g, + u32 gr_intr, u32 *chid, + struct nvgpu_gr_isr_data *isr_data) +{ + struct nvgpu_channel *ch = NULL; + u32 tsgid = NVGPU_INVALID_TSG_ID; + struct nvgpu_tsg *tsg_info = NULL; + u32 channel_id; + + ch = nvgpu_gr_intr_get_channel_from_ctx(g, isr_data->curr_ctx, &tsgid); + isr_data->ch = ch; + channel_id = (ch != NULL) ? ch->chid : NVGPU_INVALID_CHANNEL_ID; + + if (ch == NULL) { + nvgpu_err(g, + "pgraph intr: 0x%08x, channel_id: INVALID", gr_intr); + } else { + tsg_info = nvgpu_tsg_from_ch(ch); + if (tsg_info == NULL) { + nvgpu_err(g, "pgraph intr: 0x%08x, channel_id: %d " + "not bound to tsg", gr_intr, channel_id); + } + } + + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, + "channel %d: addr 0x%08x, " + "data 0x%08x 0x%08x," + "ctx 0x%08x, offset 0x%08x, " + "subchannel 0x%08x, class 0x%08x", + channel_id, isr_data->addr, + isr_data->data_hi, isr_data->data_lo, + isr_data->curr_ctx, isr_data->offset, + isr_data->sub_chan, isr_data->class_num); + + *chid = channel_id; + + return tsg_info; +} + +static void gr_clear_intr_status(struct gk20a *g, + struct nvgpu_gr_isr_data *isr_data, + u32 clear_intr, u32 gr_intr, u32 chid) +{ + if (clear_intr != 0U) { + if (isr_data->ch == NULL) { + /* + * This is probably an interrupt during + * gk20a_free_channel() + */ + nvgpu_err(g, "unhandled gr intr 0x%08x for " + "unreferenceable channel, clearing", + gr_intr); + } else { + nvgpu_err(g, "unhandled gr intr 0x%08x for chid %u", + gr_intr, chid); + } + } +} + +int nvgpu_gr_intr_stall_isr(struct gk20a *g) +{ + struct nvgpu_gr_isr_data isr_data; + struct nvgpu_gr_intr_info intr_info; + u32 need_reset = 0U; + struct nvgpu_tsg *tsg = NULL; + u32 global_esr = 0; + u32 chid = NVGPU_INVALID_CHANNEL_ID; + u32 gr_intr = g->ops.gr.intr.read_pending_interrupts(g, &intr_info); + u32 clear_intr = gr_intr; + + nvgpu_log_fn(g, " "); + nvgpu_log(g, gpu_dbg_intr, "pgraph intr 0x%08x", gr_intr); + + if (gr_intr == 0U) { + return 0; + } + + (void) memset(&isr_data, 0, sizeof(struct nvgpu_gr_isr_data)); + + /* Disable fifo access */ + g->ops.gr.init.fifo_access(g, false); + + g->ops.gr.intr.trapped_method_info(g, &isr_data); + + if (isr_data.curr_ctx != 0U) { + tsg = gr_intr_get_channel_from_ctx(g, gr_intr, &chid, + &isr_data); + } + + gr_intr_handle_pending_interrupts(g, &clear_intr, + &intr_info, &isr_data); + + need_reset |= gr_intr_handle_illegal_interrupts(g, gr_intr, + &clear_intr, &intr_info, &isr_data); + + need_reset |= gr_intr_handle_error_interrupts(g, gr_intr, + &clear_intr, &intr_info, &isr_data); + + need_reset |= gr_intr_handle_exception_interrupts(g, &clear_intr, + tsg, &global_esr, &intr_info, &isr_data); + + if (need_reset != 0U) { + nvgpu_rc_gr_fault(g, tsg, isr_data.ch); + } + + gr_clear_intr_status(g, &isr_data, clear_intr, gr_intr, chid); + + /* clear handled and unhandled interrupts */ + g->ops.gr.intr.clear_pending_interrupts(g, gr_intr); + + /* Enable fifo access */ + g->ops.gr.init.fifo_access(g, true); + +#if defined(CONFIG_NVGPU_CHANNEL_TSG_CONTROL) && defined(CONFIG_NVGPU_DEBUGGER) + /* Posting of BPT events should be the last thing in this function */ + if ((global_esr != 0U) && (tsg != NULL) && (need_reset == 0U)) { + gr_intr_post_bpt_events(g, tsg, global_esr); + } +#endif + + if (isr_data.ch != NULL) { + nvgpu_channel_put(isr_data.ch); + } + + return 0; +} + +/* invalidate channel lookup tlb */ +void nvgpu_gr_intr_flush_channel_tlb(struct gk20a *g) +{ + struct nvgpu_gr_intr *intr = nvgpu_gr_get_intr_ptr(g); + + nvgpu_spinlock_acquire(&intr->ch_tlb_lock); + (void) memset(intr->chid_tlb, 0, + sizeof(struct gr_channel_map_tlb_entry) * + GR_CHANNEL_MAP_TLB_SIZE); + nvgpu_spinlock_release(&intr->ch_tlb_lock); +} + +struct nvgpu_gr_intr *nvgpu_gr_intr_init_support(struct gk20a *g) +{ + struct nvgpu_gr_intr *intr; + + nvgpu_log_fn(g, " "); + + intr = nvgpu_kzalloc(g, sizeof(*intr)); + if (intr == NULL) { + return intr; + } + + nvgpu_spinlock_init(&intr->ch_tlb_lock); + + return intr; +} + +void nvgpu_gr_intr_remove_support(struct gk20a *g, struct nvgpu_gr_intr *intr) +{ + nvgpu_log_fn(g, " "); + + if (intr == NULL) { + return; + } + nvgpu_kfree(g, intr); +} diff --git a/drivers/gpu/nvgpu/common/gr/gr_intr_priv.h b/drivers/gpu/nvgpu/common/gr/gr_intr_priv.h new file mode 100644 index 000000000..a3efbd9a2 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/gr_intr_priv.h @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_INTR_PRIV_H +#define NVGPU_GR_INTR_PRIV_H + +#include +#include +#include + +struct nvgpu_channel; + +/** + * Size of lookup buffer used for context translation to GPU channel + * and TSG identifiers. + * This value must be a power of 2. + */ +#define GR_CHANNEL_MAP_TLB_SIZE 2U + +/** + * GR interrupt information struct. + * + * This structure maintains information on pending GR engine interrupts. + */ +struct nvgpu_gr_intr_info { + /** + * This value is set in case notification interrupt is pending. + * Same value is used to clear the interrupt. + */ + u32 notify; + /** + * This value is set in case semaphore interrupt is pending. + * Same value is used to clear the interrupt. + */ + u32 semaphore; + /** + * This value is set in case illegal notify interrupt is pending. + * Same value is used to clear the interrupt. + */ + u32 illegal_notify; + /** + * This value is set in case illegal method interrupt is pending. + * Same value is used to clear the interrupt. + */ + u32 illegal_method; + /** + * This value is set in case illegal class interrupt is pending. + * Same value is used to clear the interrupt. + */ + u32 illegal_class; + /** + * This value is set in case FECS error interrupt is pending. + * Same value is used to clear the interrupt. + */ + u32 fecs_error; + /** + * This value is set in case illegal class interrupt is pending. + * Same value is used to clear the interrupt. + */ + u32 class_error; + /** + * This value is set in case firmware method interrupt is pending. + * Same value is used to clear the interrupt. + */ + u32 fw_method; + /** + * This value is set in case exception is pending in graphics pipe. + * Same value is used to clear the interrupt. + */ + u32 exception; + /* + * This value is set when the FE receives a valid method and it + * matches with the value configured in PRI_FE_DEBUG_METHOD_* pri + * registers; In case of a match, FE proceeds to drop that method. + * This provides a way to the SW to turn off HW decoding of this + * method and convert it to a SW method. + */ + u32 debug_method; + /* + * This value is set on the completion of a LaunchDma method with + * InterruptType field configured to INTERRUPT. + */ + u32 buffer_notify; +}; + +/** + * TPC exception data structure. + * + * TPC exceptions can be decomposed into exceptions triggered by its + * subunits. This structure keeps track of which subunits have + * triggered exception. + */ +struct nvgpu_gr_tpc_exception { + /** + * This flag is set in case TEX exception is pending. + */ + bool tex_exception; + /** + * This flag is set in case SM exception is pending. + */ + bool sm_exception; + /** + * This flag is set in case MPC exception is pending. + */ + bool mpc_exception; + /** + * This flag is set in case PE exception is pending. + */ + bool pe_exception; +}; + +/** + * GR ISR data structure. + * + * This structure holds all necessary information to handle all GR engine + * error/exception interrupts. + */ +struct nvgpu_gr_isr_data { + /** + * Contents of TRAPPED_ADDR register used to decode below + * fields. + */ + u32 addr; + /** + * Low word of the trapped method data. + */ + u32 data_lo; + /** + * High word of the trapped method data. + */ + u32 data_hi; + /** + * Information of current context. + */ + u32 curr_ctx; + /** + * Pointer to faulted GPU channel. + */ + struct nvgpu_channel *ch; + /** + * Address of the trapped method. + */ + u32 offset; + /** + * Subchannel ID of the trapped method. + */ + u32 sub_chan; + /** + * Class ID corresponding to above subchannel. + */ + u32 class_num; + /** + * Value read from fecs_host_int_status h/w reg. + */ + u32 fecs_intr; + /** + * S/W defined status for fecs_host_int_status. + */ + struct nvgpu_fecs_host_intr_status fecs_host_intr_status; +}; + +/** + * Details of lookup buffer used to translate context to GPU + * channel/TSG identifiers. + */ +struct gr_channel_map_tlb_entry { + /** + * Information of context. + */ + u32 curr_ctx; + /** + * GPU channel ID. + */ + u32 chid; + /** + * GPU Time Slice Group ID. + */ + u32 tsgid; +}; + +/** + * GR interrupt management data structure. + * + * This structure holds various fields to manage GR engine interrupt + * handling. + */ +struct nvgpu_gr_intr { + /** + * Lookup buffer structure used to translate context to GPU + * channel and TSG identifiers. + */ + struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE]; + /** + * Entry in lookup buffer that should be overwritten if there is + * no remaining free entry. + */ + u32 channel_tlb_flush_index; + /** + * Spinlock for all lookup buffer accesses. + */ + struct nvgpu_spinlock ch_tlb_lock; +}; + +#endif /* NVGPU_GR_INTR_PRIV_H */ + diff --git a/drivers/gpu/nvgpu/common/gr/gr_priv.h b/drivers/gpu/nvgpu/common/gr/gr_priv.h new file mode 100644 index 000000000..032eed990 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/gr_priv.h @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_PRIV_H +#define NVGPU_GR_PRIV_H + +#include +#include + +struct nvgpu_gr_ctx_desc; +struct nvgpu_gr_global_ctx_buffer_desc; +struct nvgpu_gr_obj_ctx_golden_image; +struct nvgpu_gr_config; +#ifdef CONFIG_NVGPU_GRAPHICS +struct nvgpu_gr_zbc; +struct nvgpu_gr_zcull; +#endif +#ifdef CONFIG_NVGPU_DEBUGGER +struct nvgpu_gr_hwpm_map; +#endif + +/** + * GR engine data structure. + * + * This is the parent structure to all other GR engine data structures, + * and holds a pointer to all of them. This structure also stores + * various fields to track GR engine initialization state. + * + * Pointer to this structure is maintained in GPU driver structure. + */ +struct nvgpu_gr { + /** + * Pointer to GPU driver struct. + */ + struct gk20a *g; + + /** + * Instance ID of GR engine. + */ + u32 instance_id; + + /** + * Condition variable for GR initialization. + * Waiters shall wait on this condition to ensure GR engine + * is initialized. + */ + struct nvgpu_cond init_wq; + + /** + * Flag to indicate if GR engine is initialized. + */ + bool initialized; + + /** + * Syspipe ID of the GR instance. + */ + u32 syspipe_id; + + /** + * Pointer to global context buffer descriptor structure. + */ + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer; + + /** + * Pointer to Golden context image structure. + */ + struct nvgpu_gr_obj_ctx_golden_image *golden_image; + + /** + * Pointer to GR context descriptor structure. + */ + struct nvgpu_gr_ctx_desc *gr_ctx_desc; + + /** + * Pointer to GR configuration structure. + */ + struct nvgpu_gr_config *config; + + /** + * Pointer to GR falcon data structure. + */ + struct nvgpu_gr_falcon *falcon; + + /** + * Pointer to GR interrupt data structure. + */ + struct nvgpu_gr_intr *intr; + + /** + * Function pointer to remove GR s/w support. + */ + void (*remove_support)(struct gk20a *g); + + /** + * Flag to indicate GR s/w has been initialized. + */ + bool sw_ready; + +#ifdef CONFIG_NVGPU_DEBUGGER + struct nvgpu_gr_hwpm_map *hwpm_map; +#endif + +#ifdef CONFIG_NVGPU_GRAPHICS + struct nvgpu_gr_zcull *zcull; + + struct nvgpu_gr_zbc *zbc; +#endif + +#ifdef CONFIG_NVGPU_NON_FUSA + u32 fecs_feature_override_ecc_val; +#endif + +#ifdef CONFIG_NVGPU_CILP + u32 cilp_preempt_pending_chid; +#endif + +#if defined(CONFIG_NVGPU_RECOVERY) || defined(CONFIG_NVGPU_DEBUGGER) + struct nvgpu_mutex ctxsw_disable_mutex; + int ctxsw_disable_count; +#endif +}; + +#endif /* NVGPU_GR_PRIV_H */ + diff --git a/drivers/gpu/nvgpu/common/gr/gr_setup.c b/drivers/gpu/nvgpu/common/gr/gr_setup.c new file mode 100644 index 000000000..77b034578 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/gr_setup.c @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#ifdef CONFIG_NVGPU_GRAPHICS +#include +#endif +#include +#include +#include +#include + +#include "gr_priv.h" + +#ifdef CONFIG_NVGPU_GRAPHICS +static int nvgpu_gr_setup_zcull(struct gk20a *g, struct nvgpu_channel *c, + struct nvgpu_gr_ctx *gr_ctx) +{ + int ret = 0; + + nvgpu_log_fn(g, " "); + + ret = nvgpu_channel_disable_tsg(g, c); + if (ret != 0) { + nvgpu_err(g, "failed to disable channel/TSG"); + return ret; + } + + ret = nvgpu_preempt_channel(g, c); + if (ret != 0) { + nvgpu_err(g, "failed to preempt channel/TSG"); + goto out; + } + + ret = nvgpu_gr_zcull_ctx_setup(g, c->subctx, gr_ctx); + if (ret != 0) { + nvgpu_err(g, "failed to setup zcull"); + goto out; + } + /* no error at this point */ + ret = nvgpu_channel_enable_tsg(g, c); + if (ret != 0) { + nvgpu_err(g, "failed to re-enable channel/TSG"); + } + + return ret; + +out: + /* + * control reaches here if preempt failed or nvgpu_gr_zcull_ctx_setup + * failed. Propagate preempt failure err or err for + * nvgpu_gr_zcull_ctx_setup + */ + if (nvgpu_channel_enable_tsg(g, c) != 0) { + /* ch might not be bound to tsg */ + nvgpu_err(g, "failed to enable channel/TSG"); + } + + return ret; +} + +int nvgpu_gr_setup_bind_ctxsw_zcull(struct gk20a *g, struct nvgpu_channel *c, + u64 zcull_va, u32 mode) +{ + struct nvgpu_tsg *tsg; + struct nvgpu_gr_ctx *gr_ctx; + + tsg = nvgpu_tsg_from_ch(c); + if (tsg == NULL) { + return -EINVAL; + } + + gr_ctx = tsg->gr_ctx; + nvgpu_gr_ctx_set_zcull_ctx(g, gr_ctx, mode, zcull_va); + + return nvgpu_gr_setup_zcull(g, c, gr_ctx); +} +#endif + +static int nvgpu_gr_setup_validate_channel_and_class(struct gk20a *g, + struct nvgpu_channel *c, u32 class_num) +{ + int err = 0; + + /* an address space needs to have been bound at this point.*/ + if (!nvgpu_channel_as_bound(c)) { + nvgpu_err(g, + "not bound to address space at time" + " of grctx allocation"); + return -EINVAL; + } + + if (!g->ops.gpu_class.is_valid(class_num)) { + nvgpu_err(g, + "invalid obj class 0x%x", class_num); + err = -EINVAL; + } + + return err; +} + +static int nvgpu_gr_setup_alloc_subctx(struct gk20a *g, struct nvgpu_channel *c) +{ + int err = 0; + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + if (c->subctx == NULL) { + c->subctx = nvgpu_gr_subctx_alloc(g, c->vm); + if (c->subctx == NULL) { + err = -ENOMEM; + } + } + } + + return err; +} + +int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, + u32 flags) +{ + struct gk20a *g = c->g; + struct nvgpu_gr_ctx *gr_ctx; + struct nvgpu_tsg *tsg = NULL; + int err = 0; + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, + "GR%u: allocate object context for channel %u", + gr->instance_id, c->chid); + + err = nvgpu_gr_setup_validate_channel_and_class(g, c, class_num); + if (err != 0) { + goto out; + } + + c->obj_class = class_num; + +#ifndef CONFIG_NVGPU_HAL_NON_FUSA + /* + * Only compute and graphics classes need object context. + * Return success for valid non-compute and non-graphics classes. + * Invalid classes are already captured in + * nvgpu_gr_setup_validate_channel_and_class() function. + */ + if (!g->ops.gpu_class.is_valid_compute(class_num) && + !g->ops.gpu_class.is_valid_gfx(class_num)) { + return 0; + } +#endif + + tsg = nvgpu_tsg_from_ch(c); + if (tsg == NULL) { + return -EINVAL; + } + + err = nvgpu_gr_setup_alloc_subctx(g, c); + if (err != 0) { + nvgpu_err(g, "failed to allocate gr subctx buffer"); + goto out; + } + + nvgpu_mutex_acquire(&tsg->ctx_init_lock); + + gr_ctx = tsg->gr_ctx; + + if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_ctx_mem(gr_ctx))) { + tsg->vm = c->vm; + nvgpu_vm_get(tsg->vm); + + err = nvgpu_gr_obj_ctx_alloc(g, gr->golden_image, + gr->global_ctx_buffer, gr->gr_ctx_desc, + gr->config, gr_ctx, c->subctx, + tsg->vm, &c->inst_block, class_num, flags, + c->cde, c->vpr); + if (err != 0) { + nvgpu_err(g, + "failed to allocate gr ctx buffer"); + nvgpu_mutex_release(&tsg->ctx_init_lock); + nvgpu_vm_put(tsg->vm); + tsg->vm = NULL; + goto out; + } + + nvgpu_gr_ctx_set_tsgid(gr_ctx, tsg->tsgid); + } else { + /* commit gr ctx buffer */ + nvgpu_gr_obj_ctx_commit_inst(g, &c->inst_block, gr_ctx, + c->subctx, nvgpu_gr_ctx_get_ctx_mem(gr_ctx)->gpu_va); + } + +#ifdef CONFIG_NVGPU_FECS_TRACE + if (g->ops.gr.fecs_trace.bind_channel && !c->vpr) { + err = g->ops.gr.fecs_trace.bind_channel(g, &c->inst_block, + c->subctx, gr_ctx, tsg->tgid, 0); + if (err != 0) { + nvgpu_warn(g, + "fail to bind channel for ctxsw trace"); + } + } +#endif + +#ifdef CONFIG_NVGPU_DEBUGGER + if ((g->num_sys_perfmon == 0U) && + (g->ops.perf.get_num_hwpm_perfmon != NULL) && + (err == 0)) { + g->ops.perf.get_num_hwpm_perfmon(g, &g->num_sys_perfmon, + &g->num_fbp_perfmon, &g->num_gpc_perfmon); + nvgpu_log(g, gpu_dbg_gr | gpu_dbg_gpu_dbg, + "num_sys_perfmon[%u] num_fbp_perfmon[%u] " + "num_gpc_perfmon[%u] ", + g->num_sys_perfmon, g->num_fbp_perfmon, + g->num_gpc_perfmon); + nvgpu_assert((g->num_sys_perfmon != 0U) && + (g->num_fbp_perfmon != 0U) && + (g->num_gpc_perfmon != 0U)); + } +#endif + + nvgpu_mutex_release(&tsg->ctx_init_lock); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + return 0; +out: + if (c->subctx != NULL) { + nvgpu_gr_subctx_free(g, c->subctx, c->vm); + c->subctx = NULL; + } + + /* 1. gr_ctx, patch_ctx and global ctx buffer mapping + can be reused so no need to release them. + 2. golden image init and load is a one time thing so if + they pass, no need to undo. */ + nvgpu_err(g, "fail"); + return err; +} + +void nvgpu_gr_setup_free_gr_ctx(struct gk20a *g, + struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx) +{ + nvgpu_log_fn(g, " "); + + if (gr_ctx != NULL) { +#ifdef CONFIG_DEBUG_FS + if ((g->ops.gr.ctxsw_prog.dump_ctxsw_stats != NULL) && + nvgpu_gr_ctx_desc_dump_ctxsw_stats_on_channel_close( + g->gr->gr_ctx_desc)) { + g->ops.gr.ctxsw_prog.dump_ctxsw_stats(g, + nvgpu_gr_ctx_get_ctx_mem(gr_ctx)); + } +#endif + + nvgpu_gr_ctx_free(g, gr_ctx, g->gr->global_ctx_buffer, vm); + } +} + +void nvgpu_gr_setup_free_subctx(struct nvgpu_channel *c) +{ + nvgpu_log_fn(c->g, " "); + + if (!nvgpu_is_enabled(c->g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + return; + } + + if (c->subctx != NULL) { + nvgpu_gr_subctx_free(c->g, c->subctx, c->vm); + c->subctx = NULL; + } +} + +static bool nvgpu_gr_setup_validate_preemption_mode(u32 *graphics_preempt_mode, + u32 *compute_preempt_mode, + struct nvgpu_gr_ctx *gr_ctx) +{ +#ifdef CONFIG_NVGPU_GRAPHICS + /* skip setting anything if both modes are already set */ + if ((*graphics_preempt_mode != 0U) && + (*graphics_preempt_mode == + nvgpu_gr_ctx_get_graphics_preemption_mode(gr_ctx))) { + *graphics_preempt_mode = 0; + } +#endif /* CONFIG_NVGPU_GRAPHICS */ + + if ((*compute_preempt_mode != 0U) && + (*compute_preempt_mode == + nvgpu_gr_ctx_get_compute_preemption_mode(gr_ctx))) { + *compute_preempt_mode = 0; + } + + if ((*graphics_preempt_mode == 0U) && (*compute_preempt_mode == 0U)) { + return false; + } + + return true; +} + + + +int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch, + u32 graphics_preempt_mode, u32 compute_preempt_mode, + u32 gr_instance_id) +{ + struct nvgpu_gr_ctx *gr_ctx; + struct gk20a *g = ch->g; + struct nvgpu_tsg *tsg; + struct vm_gk20a *vm; + struct nvgpu_gr *gr; + u32 class_num; + int err = 0; + + gr = &g->gr[gr_instance_id]; + + class_num = ch->obj_class; + if (class_num == 0U) { + return -EINVAL; + } + + if (!g->ops.gpu_class.is_valid(class_num)) { + nvgpu_err(g, "invalid obj class 0x%x", class_num); + return -EINVAL; + } + + tsg = nvgpu_tsg_from_ch(ch); + if (tsg == NULL) { + return -EINVAL; + } + + vm = tsg->vm; + gr_ctx = tsg->gr_ctx; + + if (nvgpu_gr_setup_validate_preemption_mode(&graphics_preempt_mode, + &compute_preempt_mode, gr_ctx) == false) { + return 0; + } + + nvgpu_log(g, gpu_dbg_gr | gpu_dbg_sched, "chid=%d tsgid=%d pid=%d " + "graphics_preempt_mode=%u compute_preempt_mode=%u", + ch->chid, ch->tsgid, ch->tgid, + graphics_preempt_mode, compute_preempt_mode); + + err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, gr->config, + gr->gr_ctx_desc, gr_ctx, vm, class_num, + graphics_preempt_mode, compute_preempt_mode); + if (err != 0) { + nvgpu_err(g, "set_ctxsw_preemption_mode failed"); + return err; + } + + g->ops.tsg.disable(tsg); + + err = nvgpu_preempt_channel(g, ch); + if (err != 0) { + nvgpu_err(g, "failed to preempt channel/TSG"); + goto enable_ch; + } + + nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, gr->config, gr_ctx, + ch->subctx); + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true); + g->ops.gr.init.commit_global_cb_manager(g, gr->config, gr_ctx, + true); + nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true); + } + + g->ops.tsg.enable(tsg); + + return err; + +enable_ch: + g->ops.tsg.enable(tsg); + return err; +} diff --git a/drivers/gpu/nvgpu/common/gr/gr_utils.c b/drivers/gpu/nvgpu/common/gr/gr_utils.c new file mode 100644 index 000000000..79c3eba1e --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/gr_utils.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include + +#include "gr_priv.h" + +u32 nvgpu_gr_checksum_u32(u32 a, u32 b) +{ + return nvgpu_safe_cast_u64_to_u32(((u64)a + (u64)b) & (U32_MAX)); +} + +struct nvgpu_gr_falcon *nvgpu_gr_get_falcon_ptr(struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + return gr->falcon; +} + +struct nvgpu_gr_config *nvgpu_gr_get_config_ptr(struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + return gr->config; +} + +struct nvgpu_gr_config *nvgpu_gr_get_gr_instance_config_ptr(struct gk20a *g, + u32 gr_instance_id) +{ + return g->gr[gr_instance_id].config; +} + +struct nvgpu_gr_intr *nvgpu_gr_get_intr_ptr(struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + return gr->intr; +} + +#ifdef CONFIG_NVGPU_NON_FUSA +u32 nvgpu_gr_get_override_ecc_val(struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + return gr->fecs_feature_override_ecc_val; +} + +void nvgpu_gr_override_ecc_val(struct nvgpu_gr *gr, u32 ecc_val) +{ + gr->fecs_feature_override_ecc_val = ecc_val; +} +#endif + +#ifdef CONFIG_NVGPU_GRAPHICS +struct nvgpu_gr_zcull *nvgpu_gr_get_zcull_ptr(struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + return gr->zcull; +} + +struct nvgpu_gr_zbc *nvgpu_gr_get_zbc_ptr(struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + return gr->zbc; +} +#endif + +#ifdef CONFIG_NVGPU_FECS_TRACE +struct nvgpu_gr_global_ctx_buffer_desc *nvgpu_gr_get_global_ctx_buffer_ptr( + struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + return gr->global_ctx_buffer; +} +#endif + +#ifdef CONFIG_NVGPU_CILP +u32 nvgpu_gr_get_cilp_preempt_pending_chid(struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + + return gr->cilp_preempt_pending_chid; +} + +void nvgpu_gr_clear_cilp_preempt_pending_chid(struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + + gr->cilp_preempt_pending_chid = + NVGPU_INVALID_CHANNEL_ID; +} +#endif + +#ifdef CONFIG_NVGPU_DEBUGGER +struct nvgpu_gr_obj_ctx_golden_image *nvgpu_gr_get_golden_image_ptr( + struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + + return gr->golden_image; +} + +struct nvgpu_gr_hwpm_map *nvgpu_gr_get_hwpm_map_ptr(struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + + return gr->hwpm_map; +} + +void nvgpu_gr_reset_falcon_ptr(struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + + gr->falcon = NULL; +} + +void nvgpu_gr_reset_golden_image_ptr(struct gk20a *g) +{ + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + + gr->golden_image = NULL; +} +#endif diff --git a/drivers/gpu/nvgpu/common/gr/hwpm_map.c b/drivers/gpu/nvgpu/common/gr/hwpm_map.c new file mode 100644 index 000000000..8344bb101 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/hwpm_map.c @@ -0,0 +1,615 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* needed for pri_is_ppc_addr_shared */ +#include "hal/gr/gr/gr_pri_gk20a.h" + +#define NV_PCFG_BASE 0x00088000U +#define NV_PERF_PMM_FBP_ROUTER_STRIDE 0x0200U +#define NV_PERF_PMMGPCROUTER_STRIDE 0x0200U +#define NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE 0x0020U + +/* Dummy address for ctxsw'ed pri reg checksum. */ +#define CTXSW_PRI_CHECKSUM_DUMMY_REG 0x00ffffffU + +int nvgpu_gr_hwpm_map_init(struct gk20a *g, struct nvgpu_gr_hwpm_map **hwpm_map, + u32 size) +{ + struct nvgpu_gr_hwpm_map *tmp_map; + + nvgpu_log(g, gpu_dbg_gr, "size = %u", size); + + if (size == 0U) { + return -EINVAL; + } + + tmp_map = nvgpu_kzalloc(g, sizeof(*tmp_map)); + if (tmp_map == NULL) { + return -ENOMEM; + } + + tmp_map->pm_ctxsw_image_size = size; + tmp_map->init = false; + + *hwpm_map = tmp_map; + + return 0; +} + +void nvgpu_gr_hwpm_map_deinit(struct gk20a *g, + struct nvgpu_gr_hwpm_map *hwpm_map) +{ + if (hwpm_map->init) { + nvgpu_big_free(g, hwpm_map->map); + } + + nvgpu_kfree(g, hwpm_map); +} + +u32 nvgpu_gr_hwpm_map_get_size(struct nvgpu_gr_hwpm_map *hwpm_map) +{ + return hwpm_map->pm_ctxsw_image_size; +} + +static int map_cmp(const void *a, const void *b) +{ + const struct ctxsw_buf_offset_map_entry *e1; + const struct ctxsw_buf_offset_map_entry *e2; + + e1 = (const struct ctxsw_buf_offset_map_entry *)a; + e2 = (const struct ctxsw_buf_offset_map_entry *)b; + + if (e1->addr < e2->addr) { + return -1; + } + + if (e1->addr > e2->addr) { + return 1; + } + return 0; +} + +static int add_ctxsw_buffer_map_entries_pmsys( + struct ctxsw_buf_offset_map_entry *map, + struct netlist_aiv_list *regs, u32 *count, u32 *offset, + u32 max_cnt, u32 base, u32 mask) +{ + u32 idx; + u32 cnt = *count; + u32 off = *offset; + + if ((cnt + regs->count) > max_cnt) { + return -EINVAL; + } + + for (idx = 0; idx < regs->count; idx++) { + if ((base + (regs->l[idx].addr & mask)) < 0xFFFU) { + map[cnt].addr = base + (regs->l[idx].addr & mask) + + NV_PCFG_BASE; + } else { + map[cnt].addr = base + (regs->l[idx].addr & mask); + } + map[cnt++].offset = off; + off += 4U; + } + *count = cnt; + *offset = off; + return 0; +} + +static int add_ctxsw_buffer_map_entries_pmgpc(struct gk20a *g, + struct ctxsw_buf_offset_map_entry *map, + struct netlist_aiv_list *regs, + u32 *count, u32 *offset, + u32 max_cnt, u32 base, u32 mask) +{ + u32 idx; + u32 cnt = *count; + u32 off = *offset; + + if ((cnt + regs->count) > max_cnt) { + return -EINVAL; + } + + /* NOTE: The PPC offsets get added to the pm_gpc list if numPpc <= 1 + * To handle the case of PPC registers getting added into GPC, the below + * code specifically checks for any PPC offsets and adds them using + * proper mask + */ + for (idx = 0; idx < regs->count; idx++) { + /* Check if the address is PPC address */ + if (pri_is_ppc_addr_shared(g, regs->l[idx].addr & mask)) { + u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, + GPU_LIT_PPC_IN_GPC_BASE); + u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, + GPU_LIT_PPC_IN_GPC_STRIDE); + /* Use PPC mask instead of the GPC mask provided */ + u32 ppcmask = ppc_in_gpc_stride - 1U; + + map[cnt].addr = base + ppc_in_gpc_base + + (regs->l[idx].addr & ppcmask); + } else { + map[cnt].addr = base + (regs->l[idx].addr & mask); + } + map[cnt++].offset = off; + off += 4U; + } + *count = cnt; + *offset = off; + return 0; +} + +static int add_ctxsw_buffer_map_entries(struct ctxsw_buf_offset_map_entry *map, + struct netlist_aiv_list *regs, + u32 *count, u32 *offset, + u32 max_cnt, u32 base, u32 mask) +{ + u32 idx; + u32 cnt = *count; + u32 off = *offset; + + if ((cnt + regs->count) > max_cnt) { + return -EINVAL; + } + + for (idx = 0; idx < regs->count; idx++) { + map[cnt].addr = base + (regs->l[idx].addr & mask); + map[cnt++].offset = off; + off += 4U; + } + *count = cnt; + *offset = off; + return 0; +} + +/* Helper function to add register entries to the register map for all + * subunits + */ +static int add_ctxsw_buffer_map_entries_subunits( + struct ctxsw_buf_offset_map_entry *map, + struct netlist_aiv_list *regs, + u32 *count, u32 *offset, + u32 max_cnt, u32 base, u32 num_units, + u32 active_unit_mask, u32 stride, u32 mask) +{ + u32 unit; + u32 idx; + u32 cnt = *count; + u32 off = *offset; + + if ((cnt + (regs->count * num_units)) > max_cnt) { + return -EINVAL; + } + + /* Data is interleaved for units in ctxsw buffer */ + for (idx = 0; idx < regs->count; idx++) { + for (unit = 0; unit < num_units; unit++) { + if ((active_unit_mask & BIT32(unit)) != 0U) { + map[cnt].addr = base + + (regs->l[idx].addr & mask) + + (unit * stride); + map[cnt++].offset = off; + off += 4U; + + /* + * The ucode computes and saves the checksum of + * all ctxsw'ed register values within a list. + * Entries with addr=0x00ffffff are placeholder + * for these checksums. + * + * There is only one checksum for a list + * even if it contains multiple subunits. Hence, + * skip iterating over all subunits for this + * entry. + */ + if (regs->l[idx].addr == + CTXSW_PRI_CHECKSUM_DUMMY_REG) { + break; + } + } + } + } + *count = cnt; + *offset = off; + return 0; +} + +static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, + struct ctxsw_buf_offset_map_entry *map, + u32 *count, u32 *offset, u32 max_cnt, + struct nvgpu_gr_config *config) +{ + u32 num_gpcs = nvgpu_gr_config_get_gpc_count(config); + u32 num_ppcs, num_tpcs, gpc_num, base; + u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE); + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE); + u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); + u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); + + for (gpc_num = 0; gpc_num < num_gpcs; gpc_num++) { + num_tpcs = nvgpu_gr_config_get_gpc_tpc_count(config, gpc_num); + base = gpc_base + (gpc_stride * gpc_num) + tpc_in_gpc_base; + if (add_ctxsw_buffer_map_entries_subunits(map, + nvgpu_netlist_get_pm_tpc_ctxsw_regs(g), + count, offset, max_cnt, base, + num_tpcs, ~U32(0U), tpc_in_gpc_stride, + (tpc_in_gpc_stride - 1U)) != 0) { + return -EINVAL; + } + + num_ppcs = nvgpu_gr_config_get_gpc_ppc_count(config, gpc_num); + base = gpc_base + (gpc_stride * gpc_num) + ppc_in_gpc_base; + if (add_ctxsw_buffer_map_entries_subunits(map, + nvgpu_netlist_get_pm_ppc_ctxsw_regs(g), + count, offset, max_cnt, base, num_ppcs, + ~U32(0U), ppc_in_gpc_stride, + (ppc_in_gpc_stride - 1U)) != 0) { + return -EINVAL; + } + + base = gpc_base + (gpc_stride * gpc_num); + if (add_ctxsw_buffer_map_entries_pmgpc(g, map, + nvgpu_netlist_get_pm_gpc_ctxsw_regs(g), + count, offset, max_cnt, base, + (gpc_stride - 1U)) != 0) { + return -EINVAL; + } + + base = NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE * gpc_num; + if (add_ctxsw_buffer_map_entries(map, + nvgpu_netlist_get_pm_ucgpc_ctxsw_regs(g), + count, offset, max_cnt, base, ~U32(0U)) != 0) { + return -EINVAL; + } + + base = (g->ops.perf.get_pmmgpc_per_chiplet_offset() * gpc_num); + if (add_ctxsw_buffer_map_entries(map, + nvgpu_netlist_get_perf_gpc_ctxsw_regs(g), + count, offset, max_cnt, base, ~U32(0U)) != 0) { + return -EINVAL; + } + + base = (NV_PERF_PMMGPCROUTER_STRIDE * gpc_num); + if (add_ctxsw_buffer_map_entries(map, + nvgpu_netlist_get_gpc_router_ctxsw_regs(g), + count, offset, max_cnt, base, ~U32(0U)) != 0) { + return -EINVAL; + } + + /* Counter Aggregation Unit, if available */ + if (nvgpu_netlist_get_pm_cau_ctxsw_regs(g)->count != 0U) { + base = gpc_base + (gpc_stride * gpc_num) + + tpc_in_gpc_base; + if (add_ctxsw_buffer_map_entries_subunits(map, + nvgpu_netlist_get_pm_cau_ctxsw_regs(g), + count, offset, max_cnt, base, num_tpcs, + ~U32(0U), tpc_in_gpc_stride, + (tpc_in_gpc_stride - 1U)) != 0) { + return -EINVAL; + } + } + + *offset = NVGPU_ALIGN(*offset, 256U); + + base = (g->ops.perf.get_pmmgpc_per_chiplet_offset() * gpc_num); + if (add_ctxsw_buffer_map_entries(map, + nvgpu_netlist_get_perf_gpc_control_ctxsw_regs(g), + count, offset, max_cnt, base, ~U32(0U)) != 0) { + return -EINVAL; + } + + *offset = NVGPU_ALIGN(*offset, 256U); + } + return 0; +} + +/* + * PM CTXSW BUFFER LAYOUT: + *|=============================================|0x00 <----PM CTXSW BUFFER BASE + *| LIST_compressed_pm_ctx_reg_SYS |Space allocated: numRegs words + *| LIST_compressed_nv_perf_ctx_reg_SYS |Space allocated: numRegs words + *| LIST_compressed_nv_perf_ctx_reg_sysrouter |Space allocated: numRegs words + *| PADDING for 256 byte alignment on Maxwell+ | + *|=============================================|<----256 byte aligned on Maxwell and later + *| LIST_compressed_nv_perf_sys_control_ctx_regs|Space allocated: numRegs words (+ padding) + *| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.) + *|=============================================|<----256 byte aligned + *| LIST_compressed_nv_perf_ctx_reg_PMA |Space allocated: numRegs words (+ padding) + *| PADDING for 256 byte alignment | + *|=============================================|<----256 byte aligned (if prev segment exists) + *| LIST_compressed_nv_perf_pma_control_ctx_regs|Space allocated: numRegs words (+ padding) + *| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.) + *|=============================================|<----256 byte aligned + *| LIST_compressed_nv_perf_fbp_ctx_regs |Space allocated: numRegs * n words (for n FB units) + *| LIST_compressed_nv_perf_fbprouter_ctx_regs |Space allocated: numRegs * n words (for n FB units) + *| LIST_compressed_pm_fbpa_ctx_regs |Space allocated: numRegs * n words (for n FB units) + *| LIST_compressed_pm_rop_ctx_regs |Space allocated: numRegs * n words (for n FB units) + *| LIST_compressed_pm_ltc_ctx_regs | + *| LTC0 LTS0 | + *| LTC1 LTS0 |Space allocated: numRegs * n words (for n LTC units) + *| LTCn LTS0 | + *| LTC0 LTS1 | + *| LTC1 LTS1 | + *| LTCn LTS1 | + *| LTC0 LTSn | + *| LTC1 LTSn | + *| LTCn LTSn | + *| PADDING for 256 byte alignment | + *|=============================================|<----256 byte aligned on Maxwell and later + *| LIST_compressed_nv_perf_fbp_control_ctx_regs|Space allocated: numRegs words + padding + *| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.) + *|=============================================|<----256 byte aligned on Maxwell and later + * + * Each "GPCn PRI register" segment above has this layout: + *|=============================================|<----256 byte aligned + *| GPC0 REG0 TPC0 |Each GPC has space allocated to accomodate + *| REG0 TPC1 | all the GPC/TPC register lists + *| Lists in each GPC region: REG0 TPCn |Per GPC allocated space is always 256 byte aligned + *| LIST_pm_ctx_reg_TPC REG1 TPC0 | + *| * numTpcs REG1 TPC1 | + *| LIST_pm_ctx_reg_PPC REG1 TPCn | + *| * numPpcs REGn TPC0 | + *| LIST_pm_ctx_reg_GPC REGn TPC1 | + *| List_pm_ctx_reg_uc_GPC REGn TPCn | + *| LIST_nv_perf_ctx_reg_GPC | + *| LIST_nv_perf_gpcrouter_ctx_reg | + *| LIST_nv_perf_ctx_reg_CAU (Tur) | + *|=============================================| + *| LIST_compressed_nv_perf_gpc_control_ctx_regs|Space allocated: numRegs words + padding + *| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.) + *|=============================================|<----256 byte aligned on Maxwell and later + */ + +static int nvgpu_gr_hwpm_map_create(struct gk20a *g, + struct nvgpu_gr_hwpm_map *hwpm_map, struct nvgpu_gr_config *config) +{ + u32 hwpm_ctxsw_buffer_size = hwpm_map->pm_ctxsw_image_size; + struct ctxsw_buf_offset_map_entry *map; + u32 hwpm_ctxsw_reg_count_max; + u32 map_size; + u32 i, count = 0; + u32 offset = 0; + int ret; + u32 active_fbpa_mask; + u32 num_fbps = nvgpu_fbp_get_num_fbps(g->fbp); + u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); + u32 num_fbpas = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS); + u32 fbpa_stride = nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE); + u32 num_ltc = g->ops.top.get_max_ltc_per_fbp(g) * + g->ops.priv_ring.get_fbp_count(g); + + if (hwpm_ctxsw_buffer_size == 0U) { + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, + "no PM Ctxsw buffer memory in context buffer"); + return -EINVAL; + } + + hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2; + map_size = hwpm_ctxsw_reg_count_max * (u32)sizeof(*map); + + map = nvgpu_big_zalloc(g, map_size); + if (map == NULL) { + return -ENOMEM; + } + + /* Add entries from _LIST_pm_ctx_reg_SYS */ + if (add_ctxsw_buffer_map_entries_pmsys(map, + nvgpu_netlist_get_pm_sys_ctxsw_regs(g), + &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~U32(0U)) != 0) { + goto cleanup; + } + + /* Add entries from _LIST_nv_perf_ctx_reg_SYS */ + if (add_ctxsw_buffer_map_entries(map, + nvgpu_netlist_get_perf_sys_ctxsw_regs(g), + &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~U32(0U)) != 0) { + goto cleanup; + } + + /* Add entries from _LIST_nv_perf_sysrouter_ctx_reg*/ + if (add_ctxsw_buffer_map_entries(map, + nvgpu_netlist_get_perf_sys_router_ctxsw_regs(g), + &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~U32(0U)) != 0) { + goto cleanup; + } + + /* Add entries from _LIST_nv_perf_sys_control_ctx_reg*/ + if (nvgpu_netlist_get_perf_sys_control_ctxsw_regs(g)->count > 0U) { + offset = NVGPU_ALIGN(offset, 256U); + + ret = add_ctxsw_buffer_map_entries(map, + nvgpu_netlist_get_perf_sys_control_ctxsw_regs(g), + &count, &offset, + hwpm_ctxsw_reg_count_max, 0, ~U32(0U)); + if (ret != 0) { + goto cleanup; + } + } + + if (g->ops.gr.hwpm_map.align_regs_perf_pma) { + g->ops.gr.hwpm_map.align_regs_perf_pma(&offset); + } + + /* Add entries from _LIST_nv_perf_pma_ctx_reg*/ + ret = add_ctxsw_buffer_map_entries(map, + nvgpu_netlist_get_perf_pma_ctxsw_regs(g), &count, &offset, + hwpm_ctxsw_reg_count_max, 0, ~U32(0U)); + if (ret != 0) { + goto cleanup; + } + + offset = NVGPU_ALIGN(offset, 256U); + + /* Add entries from _LIST_nv_perf_pma_control_ctx_reg*/ + ret = add_ctxsw_buffer_map_entries(map, + nvgpu_netlist_get_perf_pma_control_ctxsw_regs(g), &count, &offset, + hwpm_ctxsw_reg_count_max, 0, ~U32(0U)); + if (ret != 0) { + goto cleanup; + } + + offset = NVGPU_ALIGN(offset, 256U); + + /* Add entries from _LIST_nv_perf_fbp_ctx_regs */ + if (add_ctxsw_buffer_map_entries_subunits(map, + nvgpu_netlist_get_fbp_ctxsw_regs(g), &count, &offset, + hwpm_ctxsw_reg_count_max, 0, num_fbps, ~U32(0U), + g->ops.perf.get_pmmfbp_per_chiplet_offset(), + ~U32(0U)) != 0) { + goto cleanup; + } + + /* Add entries from _LIST_nv_perf_fbprouter_ctx_regs */ + if (add_ctxsw_buffer_map_entries_subunits(map, + nvgpu_netlist_get_fbp_router_ctxsw_regs(g), + &count, &offset, hwpm_ctxsw_reg_count_max, 0, + num_fbps, ~U32(0U), NV_PERF_PMM_FBP_ROUTER_STRIDE, + ~U32(0U)) != 0) { + goto cleanup; + } + + if (g->ops.gr.hwpm_map.get_active_fbpa_mask) { + active_fbpa_mask = g->ops.gr.hwpm_map.get_active_fbpa_mask(g); + } else { + active_fbpa_mask = ~U32(0U); + } + + /* Add entries from _LIST_nv_pm_fbpa_ctx_regs */ + if (add_ctxsw_buffer_map_entries_subunits(map, + nvgpu_netlist_get_pm_fbpa_ctxsw_regs(g), + &count, &offset, hwpm_ctxsw_reg_count_max, 0, + num_fbpas, active_fbpa_mask, fbpa_stride, ~U32(0U)) + != 0) { + goto cleanup; + } + + /* Add entries from _LIST_nv_pm_rop_ctx_regs */ + if (add_ctxsw_buffer_map_entries(map, + nvgpu_netlist_get_pm_rop_ctxsw_regs(g), &count, &offset, + hwpm_ctxsw_reg_count_max, 0, ~U32(0U)) != 0) { + goto cleanup; + } + + /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */ + if (add_ctxsw_buffer_map_entries_subunits(map, + nvgpu_netlist_get_pm_ltc_ctxsw_regs(g), &count, &offset, + hwpm_ctxsw_reg_count_max, 0, num_ltc, ~U32(0U), + ltc_stride, ~U32(0U)) != 0) { + goto cleanup; + } + + offset = NVGPU_ALIGN(offset, 256U); + + /* Add entries from _LIST_nv_perf_fbp_control_ctx_regs */ + if (add_ctxsw_buffer_map_entries_subunits(map, + nvgpu_netlist_get_perf_fbp_control_ctxsw_regs(g), + &count, &offset, hwpm_ctxsw_reg_count_max, 0, + num_fbps, ~U32(0U), + g->ops.perf.get_pmmfbp_per_chiplet_offset(), + ~U32(0U)) != 0) { + goto cleanup; + } + + offset = NVGPU_ALIGN(offset, 256U); + + /* Add GPC entries */ + if (add_ctxsw_buffer_map_entries_gpcs(g, map, &count, &offset, + hwpm_ctxsw_reg_count_max, config) != 0) { + goto cleanup; + } + + if (offset > hwpm_ctxsw_buffer_size) { + nvgpu_err(g, "offset > buffer size"); + goto cleanup; + } + + sort(map, count, sizeof(*map), map_cmp, NULL); + + hwpm_map->map = map; + hwpm_map->count = count; + hwpm_map->init = true; + + nvgpu_log_info(g, "Reg Addr => HWPM Ctxt switch buffer offset"); + + for (i = 0; i < count; i++) { + nvgpu_log_info(g, "%08x => %08x", map[i].addr, map[i].offset); + } + + return 0; + +cleanup: + nvgpu_err(g, "Failed to create HWPM buffer offset map"); + nvgpu_big_free(g, map); + return -EINVAL; +} + +/* + * This function will return the 32 bit offset for a priv register if it is + * present in the PM context buffer. + */ +int nvgpu_gr_hwmp_map_find_priv_offset(struct gk20a *g, + struct nvgpu_gr_hwpm_map *hwpm_map, + u32 addr, u32 *priv_offset, struct nvgpu_gr_config *config) +{ + struct ctxsw_buf_offset_map_entry *map, *result, map_key; + int err = 0; + u32 count; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); + + /* Create map of pri address and pm offset if necessary */ + if (!hwpm_map->init) { + err = nvgpu_gr_hwpm_map_create(g, hwpm_map, config); + if (err != 0) { + return err; + } + } + + *priv_offset = 0; + + map = hwpm_map->map; + count = hwpm_map->count; + + map_key.addr = addr; + result = nvgpu_bsearch(&map_key, map, count, sizeof(*map), map_cmp); + + if (result != NULL) { + *priv_offset = result->offset; + } else { + nvgpu_err(g, "Lookup failed for address 0x%x", addr); + err = -EINVAL; + } + + return err; +} diff --git a/drivers/gpu/nvgpu/common/gr/obj_ctx.c b/drivers/gpu/nvgpu/common/gr/obj_ctx.c new file mode 100644 index 000000000..8f6bbea56 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/obj_ctx.c @@ -0,0 +1,982 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#ifdef CONFIG_NVGPU_POWER_PG +#include +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "obj_ctx_priv.h" + +void nvgpu_gr_obj_ctx_commit_inst_gpu_va(struct gk20a *g, + struct nvgpu_mem *inst_block, u64 gpu_va) +{ + g->ops.ramin.set_gr_ptr(g, inst_block, gpu_va); +} + +void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block, + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, u64 gpu_va) +{ + struct nvgpu_mem *ctxheader; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + nvgpu_gr_subctx_load_ctx_header(g, subctx, gr_ctx, gpu_va); + + ctxheader = nvgpu_gr_subctx_get_ctx_header(subctx); + nvgpu_gr_obj_ctx_commit_inst_gpu_va(g, inst_block, + ctxheader->gpu_va); + } else { + nvgpu_gr_obj_ctx_commit_inst_gpu_va(g, inst_block, gpu_va); + } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); +} + +#if defined(CONFIG_NVGPU_GFXP) || defined(CONFIG_NVGPU_CILP) +static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g, + struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, + u32 class_num, u32 flags) +{ + int err; + u32 graphics_preempt_mode = 0U; + u32 compute_preempt_mode = 0U; + u32 default_graphics_preempt_mode = 0U; + u32 default_compute_preempt_mode = 0U; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + /* Skip for engines other than GR */ + if (!g->ops.gpu_class.is_valid_compute(class_num) && + !g->ops.gpu_class.is_valid_gfx(class_num)) { + return 0; + } + + g->ops.gr.init.get_default_preemption_modes( + &default_graphics_preempt_mode, + &default_compute_preempt_mode); + +#ifdef CONFIG_NVGPU_GFXP + if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) != 0U) { + graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; + } + + if (g->ops.gpu_class.is_valid_gfx(class_num) && + nvgpu_gr_ctx_desc_force_preemption_gfxp(gr_ctx_desc)) { + graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; + } +#endif + +#ifdef CONFIG_NVGPU_CILP + if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) != 0U) { + compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP; + } + + if (g->ops.gpu_class.is_valid_compute(class_num) && + nvgpu_gr_ctx_desc_force_preemption_cilp(gr_ctx_desc)) { + compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP; + } +#endif + + if (compute_preempt_mode == 0U) { + compute_preempt_mode = default_compute_preempt_mode; + } + + if (graphics_preempt_mode == 0U) { + graphics_preempt_mode = default_graphics_preempt_mode; + } + + err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, config, + gr_ctx_desc, gr_ctx, vm, class_num, graphics_preempt_mode, + compute_preempt_mode); + if (err != 0) { + nvgpu_err(g, "set_ctxsw_preemption_mode failed"); + return err; + } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + + return 0; +} +#endif + +#ifdef CONFIG_NVGPU_GRAPHICS +static int nvgpu_gr_obj_ctx_set_graphics_preemption_mode(struct gk20a *g, + struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, + u32 graphics_preempt_mode) +{ + int err = 0; + + /* set preemption modes */ + switch (graphics_preempt_mode) { +#ifdef CONFIG_NVGPU_GFXP + case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP: + { + u32 rtv_cb_size; + u32 spill_size = g->ops.gr.init.get_ctx_spill_size(g); + u32 pagepool_size = g->ops.gr.init.get_ctx_pagepool_size(g); + u32 betacb_size = g->ops.gr.init.get_ctx_betacb_size(g); + u32 attrib_cb_size = + g->ops.gr.init.get_ctx_attrib_cb_size(g, betacb_size, + nvgpu_gr_config_get_tpc_count(config), + nvgpu_gr_config_get_max_tpc_count(config)); + + nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size); + nvgpu_log_info(g, "gfxp context pagepool_size=%d", pagepool_size); + nvgpu_log_info(g, "gfxp context attrib_cb_size=%d", + attrib_cb_size); + + nvgpu_gr_ctx_set_size(gr_ctx_desc, + NVGPU_GR_CTX_SPILL_CTXSW, spill_size); + nvgpu_gr_ctx_set_size(gr_ctx_desc, + NVGPU_GR_CTX_BETACB_CTXSW, attrib_cb_size); + nvgpu_gr_ctx_set_size(gr_ctx_desc, + NVGPU_GR_CTX_PAGEPOOL_CTXSW, pagepool_size); + + if (g->ops.gr.init.get_gfxp_rtv_cb_size != NULL) { + rtv_cb_size = g->ops.gr.init.get_gfxp_rtv_cb_size(g); + nvgpu_gr_ctx_set_size(gr_ctx_desc, + NVGPU_GR_CTX_GFXP_RTVCB_CTXSW, rtv_cb_size); + } + + err = nvgpu_gr_ctx_alloc_ctxsw_buffers(g, gr_ctx, + gr_ctx_desc, vm); + if (err != 0) { + nvgpu_err(g, "cannot allocate ctxsw buffers"); + return err; + } + + nvgpu_gr_ctx_init_graphics_preemption_mode(gr_ctx, + graphics_preempt_mode); + break; + } +#endif + case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI: + nvgpu_gr_ctx_init_graphics_preemption_mode(gr_ctx, + graphics_preempt_mode); + break; + + default: + nvgpu_log_info(g, "graphics_preempt_mode=%u", + graphics_preempt_mode); + break; + } + + return err; +} +#endif + +static int nvgpu_gr_obj_ctx_set_compute_preemption_mode(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, u32 class_num, u32 compute_preempt_mode) +{ + + if (g->ops.gpu_class.is_valid_compute(class_num) +#ifdef CONFIG_NVGPU_GRAPHICS + || g->ops.gpu_class.is_valid_gfx(class_num) +#endif + ) { + nvgpu_gr_ctx_init_compute_preemption_mode(gr_ctx, + compute_preempt_mode); + return 0; + } else { + return -EINVAL; + } + +} + +int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g, + struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class_num, + u32 graphics_preempt_mode, u32 compute_preempt_mode) +{ + int err = 0; + + /* check for invalid combinations */ + if (nvgpu_gr_ctx_check_valid_preemption_mode(g, gr_ctx, + graphics_preempt_mode, compute_preempt_mode) == false) { + err = -EINVAL; + goto fail; + } + + nvgpu_log(g, gpu_dbg_gr, "graphics_preempt_mode=%u compute_preempt_mode=%u", + graphics_preempt_mode, compute_preempt_mode); + +#ifdef CONFIG_NVGPU_GRAPHICS + err = nvgpu_gr_obj_ctx_set_graphics_preemption_mode(g, config, + gr_ctx_desc, gr_ctx, vm, graphics_preempt_mode); + + if (err != 0) { + goto fail; + } +#endif + + err = nvgpu_gr_obj_ctx_set_compute_preemption_mode(g, gr_ctx, + class_num, compute_preempt_mode); + +fail: + return err; +} + +void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, + struct nvgpu_gr_config *config, + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx) +{ +#ifdef CONFIG_NVGPU_GFXP + u64 addr; + u32 size; + struct nvgpu_mem *mem; +#endif + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + nvgpu_gr_ctx_set_preemption_modes(g, gr_ctx); + +#ifdef CONFIG_NVGPU_GFXP + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP)) { + goto done; + } + + if (!nvgpu_mem_is_valid( + nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx))) { + goto done; + } + + if (subctx != NULL) { + nvgpu_gr_subctx_set_preemption_buffer_va(g, subctx, + gr_ctx); + } else { + nvgpu_gr_ctx_set_preemption_buffer_va(g, gr_ctx); + } + + nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true); + + addr = nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->gpu_va; + g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx, + nvgpu_gr_config_get_tpc_count(config), + nvgpu_gr_config_get_max_tpc_count(config), addr, + true); + + mem = nvgpu_gr_ctx_get_pagepool_ctxsw_buffer(gr_ctx); + addr = mem->gpu_va; + nvgpu_assert(mem->size <= U32_MAX); + size = (u32)mem->size; + + g->ops.gr.init.commit_global_pagepool(g, gr_ctx, addr, size, + true, false); + + mem = nvgpu_gr_ctx_get_spill_ctxsw_buffer(gr_ctx); + addr = mem->gpu_va; + nvgpu_assert(mem->size <= U32_MAX); + size = (u32)mem->size; + + g->ops.gr.init.commit_ctxsw_spill(g, gr_ctx, addr, size, true); + + g->ops.gr.init.commit_cbes_reserve(g, gr_ctx, true); + + if (g->ops.gr.init.gfxp_wfi_timeout != NULL) { + g->ops.gr.init.gfxp_wfi_timeout(g, gr_ctx, true); + } + + if (g->ops.gr.init.commit_gfxp_rtv_cb != NULL) { + g->ops.gr.init.commit_gfxp_rtv_cb(g, gr_ctx, true); + } + + nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true); + +done: +#endif + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); +} + +void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, bool patch) +{ + u64 addr; + u32 size; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + if (patch) { + nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, false); + } + + /* + * MIG supports only compute class. + * Skip BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB + * if 2D/3D/I2M classes(graphics) are not supported. + */ + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + /* global pagepool buffer */ + addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, + NVGPU_GR_CTX_PAGEPOOL_VA); + size = nvgpu_safe_cast_u64_to_u32(nvgpu_gr_global_ctx_get_size( + global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_PAGEPOOL)); + + g->ops.gr.init.commit_global_pagepool(g, gr_ctx, addr, size, + patch, true); + + /* global bundle cb */ + addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, + NVGPU_GR_CTX_CIRCULAR_VA); + size = nvgpu_safe_cast_u64_to_u32( + g->ops.gr.init.get_bundle_cb_default_size(g)); + + g->ops.gr.init.commit_global_bundle_cb(g, gr_ctx, addr, size, + patch); + + /* global attrib cb */ + addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, + NVGPU_GR_CTX_ATTRIBUTE_VA); + + g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx, + nvgpu_gr_config_get_tpc_count(config), + nvgpu_gr_config_get_max_tpc_count(config), addr, patch); + + g->ops.gr.init.commit_global_cb_manager(g, config, gr_ctx, + patch); + +#ifdef CONFIG_NVGPU_GRAPHICS + if (g->ops.gr.init.commit_rtv_cb != NULL) { + /* RTV circular buffer */ + addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, + NVGPU_GR_CTX_RTV_CIRCULAR_BUFFER_VA); + + g->ops.gr.init.commit_rtv_cb(g, addr, gr_ctx, patch); + } +#endif + } + +#ifdef CONFIG_NVGPU_SM_DIVERSITY + if ((nvgpu_is_enabled(g, NVGPU_SUPPORT_SM_DIVERSITY)) && + (nvgpu_gr_ctx_get_sm_diversity_config(gr_ctx) != + NVGPU_DEFAULT_SM_DIVERSITY_CONFIG) && + (g->ops.gr.init.commit_sm_id_programming != NULL)) { + int err; + + err = g->ops.gr.init.commit_sm_id_programming( + g, config, gr_ctx, patch); + if (err != 0) { + nvgpu_err(g, + "commit_sm_id_programming failed err=%d", err); + } + } +#endif + + if (patch) { + nvgpu_gr_ctx_patch_write_end(g, gr_ctx, false); + } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); +} + +static int nvgpu_gr_obj_ctx_alloc_sw_bundle(struct gk20a *g) +{ + int err = 0; + struct netlist_av_list *sw_bundle_init = + nvgpu_netlist_get_sw_bundle_init_av_list(g); + struct netlist_av_list *sw_veid_bundle_init = + nvgpu_netlist_get_sw_veid_bundle_init_av_list(g); +#ifdef CONFIG_NVGPU_DGPU + struct netlist_av64_list *sw_bundle64_init = + nvgpu_netlist_get_sw_bundle64_init_av64_list(g); +#endif + + /* enable pipe mode override */ + g->ops.gr.init.pipe_mode_override(g, true); + + /* load bundle init */ + err = g->ops.gr.init.load_sw_bundle_init(g, sw_bundle_init); + if (err != 0) { + goto error; + } + + if (g->ops.gr.init.load_sw_veid_bundle != NULL) { + err = g->ops.gr.init.load_sw_veid_bundle(g, + sw_veid_bundle_init); + if (err != 0) { + goto error; + } + } + +#ifdef CONFIG_NVGPU_DGPU + if (g->ops.gr.init.load_sw_bundle64 != NULL) { + err = g->ops.gr.init.load_sw_bundle64(g, sw_bundle64_init); + if (err != 0) { + goto error; + } + } +#endif + + /* disable pipe mode override */ + g->ops.gr.init.pipe_mode_override(g, false); + + err = g->ops.gr.init.wait_idle(g); + + return err; + +error: + /* in case of error skip waiting for GR idle - just restore state */ + g->ops.gr.init.pipe_mode_override(g, false); + + return err; +} + +static int nvgpu_gr_obj_ctx_init_hw_state(struct gk20a *g, + struct nvgpu_mem *inst_block) +{ + int err = 0; + u32 data; + u32 i; + struct netlist_aiv_list *sw_ctx_load = + nvgpu_netlist_get_sw_ctx_load_aiv_list(g); + + nvgpu_log(g, gpu_dbg_gr, " "); + + err = g->ops.gr.init.fe_pwr_mode_force_on(g, true); + if (err != 0) { + goto clean_up; + } + + g->ops.gr.init.override_context_reset(g); + + err = g->ops.gr.init.fe_pwr_mode_force_on(g, false); + if (err != 0) { + goto clean_up; + } + + data = g->ops.gr.falcon.get_fecs_current_ctx_data(g, inst_block); + err = g->ops.gr.falcon.ctrl_ctxsw(g, + NVGPU_GR_FALCON_METHOD_ADDRESS_BIND_PTR, data, NULL); + if (err != 0) { + goto clean_up; + } + + err = g->ops.gr.init.wait_idle(g); + + /* load ctx init */ + nvgpu_log_info(g, "begin: netlist: sw_ctx_load: register writes"); + for (i = 0U; i < sw_ctx_load->count; i++) { + nvgpu_writel(g, sw_ctx_load->l[i].addr, + sw_ctx_load->l[i].value); + } + nvgpu_log_info(g, "end: netlist: sw_ctx_load: register writes"); + + nvgpu_log_info(g, "configure sm_hww_esr_report mask after sw_ctx_load"); + g->ops.gr.intr.set_hww_esr_report_mask(g); + +#ifdef CONFIG_NVGPU_GFXP + if (g->ops.gr.init.preemption_state != NULL) { + err = g->ops.gr.init.preemption_state(g); + if (err != 0) { + goto clean_up; + } + } +#endif + + nvgpu_cg_blcg_gr_load_enable(g); + + err = g->ops.gr.init.wait_idle(g); + +clean_up: + if (err == 0) { + nvgpu_log(g, gpu_dbg_gr, "done"); + } + return err; +} + +static int nvgpu_gr_obj_ctx_commit_hw_state(struct gk20a *g, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx) +{ + int err = 0; + struct netlist_av_list *sw_method_init = + nvgpu_netlist_get_sw_method_init_av_list(g); +#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION + struct netlist_av_list *sw_bundle_init = + nvgpu_netlist_get_sw_bundle_init_av_list(g); +#endif + + nvgpu_log(g, gpu_dbg_gr, " "); + + /* disable fe_go_idle */ + g->ops.gr.init.fe_go_idle_timeout(g, false); + + nvgpu_gr_obj_ctx_commit_global_ctx_buffers(g, global_ctx_buffer, + config, gr_ctx, false); + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + /* override a few ctx state registers */ + g->ops.gr.init.commit_global_timeslice(g); + } + + /* floorsweep anything left */ + err = nvgpu_gr_fs_state_init(g, config); + if (err != 0) { + goto restore_fe_go_idle; + } + + err = g->ops.gr.init.wait_idle(g); + if (err != 0) { + goto restore_fe_go_idle; + } +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (g->ops.gr.init.auto_go_idle != NULL) { + g->ops.gr.init.auto_go_idle(g, false); + } +#endif + err = nvgpu_gr_obj_ctx_alloc_sw_bundle(g); + if (err != 0) { + goto restore_fe_go_idle; + } + +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (g->ops.gr.init.auto_go_idle != NULL) { + g->ops.gr.init.auto_go_idle(g, true); + } +#endif + + /* restore fe_go_idle */ + g->ops.gr.init.fe_go_idle_timeout(g, true); + + /* load method init */ + g->ops.gr.init.load_method_init(g, sw_method_init); + +#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION + /* restore stats bundle data through mme shadow methods */ + if (g->ops.gr.init.restore_stats_counter_bundle_data != NULL) { + g->ops.gr.init.restore_stats_counter_bundle_data(g, + sw_bundle_init); + } +#endif + + err = g->ops.gr.init.wait_idle(g); + if (err != 0) { + goto clean_up; + } + + nvgpu_log(g, gpu_dbg_gr, "done"); + return 0; + +restore_fe_go_idle: + /* restore fe_go_idle */ + g->ops.gr.init.fe_go_idle_timeout(g, true); +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (g->ops.gr.init.auto_go_idle != NULL) { + g->ops.gr.init.auto_go_idle(g, true); + } +#endif + +clean_up: + return err; +} + +static int nvgpu_gr_obj_ctx_save_golden_ctx(struct gk20a *g, + struct nvgpu_gr_obj_ctx_golden_image *golden_image, + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_mem *inst_block) +{ + int err = 0; + struct nvgpu_mem *gr_mem; + u64 size; + u32 data; +#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION + struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image_temp = + NULL; +#endif + + nvgpu_log(g, gpu_dbg_gr, " "); + + gr_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx); + +#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION + /* + * Save ctx data before first golden context save. Restore same data + * before second golden context save. This temporary copy is + * saved in local_golden_image_temp. + */ + + size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image); + + local_golden_image_temp = + nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem, size); + if (local_golden_image_temp == NULL) { + err = -ENOMEM; + goto clean_up; + } +#endif + + data = g->ops.gr.falcon.get_fecs_current_ctx_data(g, inst_block); + err = g->ops.gr.falcon.ctrl_ctxsw(g, + NVGPU_GR_FALCON_METHOD_GOLDEN_IMAGE_SAVE, data, NULL); + if (err != 0) { + goto clean_up; + } + + size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image); + + golden_image->local_golden_image = + nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem, size); + if (golden_image->local_golden_image == NULL) { + err = -ENOMEM; + goto clean_up; + } + +#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION + /* Before second golden context save restore to before known state */ + nvgpu_gr_global_ctx_load_local_golden_image(g, + local_golden_image_temp, gr_mem); + /* free local copy now */ + nvgpu_gr_global_ctx_deinit_local_golden_image(g, + local_golden_image_temp); + local_golden_image_temp = NULL; + + /* Initiate second golden context save */ + data = g->ops.gr.falcon.get_fecs_current_ctx_data(g, inst_block); + err = g->ops.gr.falcon.ctrl_ctxsw(g, + NVGPU_GR_FALCON_METHOD_GOLDEN_IMAGE_SAVE, data, NULL); + if (err != 0) { + goto clean_up; + } + + /* Copy the data to local buffer */ + local_golden_image_temp = + nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem, size); + if (local_golden_image_temp == NULL) { + err = -ENOMEM; + goto clean_up; + } + + /* Compare two golden context images */ + if (!nvgpu_gr_global_ctx_compare_golden_images(g, + nvgpu_mem_is_sysmem(gr_mem), + golden_image->local_golden_image, + local_golden_image_temp, + size)) { + nvgpu_err(g, "golden context mismatch"); + err = -ENOMEM; + } +#endif + +clean_up: +#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION + if (local_golden_image_temp != NULL) { + nvgpu_gr_global_ctx_deinit_local_golden_image(g, + local_golden_image_temp); + } +#endif + + if (err == 0) { + nvgpu_log(g, gpu_dbg_gr, "golden image saved with size = %llu", size); + } + return err; +} + +/* + * init global golden image from a fresh gr_ctx in channel ctx. + * save a copy in local_golden_image. + */ +int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g, + struct nvgpu_gr_obj_ctx_golden_image *golden_image, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct nvgpu_gr_config *config, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_mem *inst_block) +{ + int err = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + /* + * golden ctx is global to all channels. Although only the first + * channel initializes golden image, driver needs to prevent multiple + * channels from initializing golden ctx at the same time + */ + nvgpu_mutex_acquire(&golden_image->ctx_mutex); + + if (golden_image->ready) { + nvgpu_log(g, gpu_dbg_gr, "golden image already saved"); + goto clean_up; + } + + err = nvgpu_gr_obj_ctx_init_hw_state(g, inst_block); + if (err != 0) { + goto clean_up; + } + + err = nvgpu_gr_obj_ctx_commit_hw_state(g, global_ctx_buffer, + config, gr_ctx); + if (err != 0) { + goto clean_up; + } + +#ifdef CONFIG_NVGPU_GRAPHICS + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + err = nvgpu_gr_ctx_init_zcull(g, gr_ctx); + if (err != 0) { + goto clean_up; + } + } +#endif + + err = nvgpu_gr_obj_ctx_save_golden_ctx(g, golden_image, + gr_ctx, inst_block); + if (err != 0) { + goto clean_up; + } + + golden_image->ready = true; +#ifdef CONFIG_NVGPU_POWER_PG + nvgpu_pmu_set_golden_image_initialized(g, true); +#endif + g->ops.gr.falcon.set_current_ctx_invalid(g); + +clean_up: + if (err != 0) { + nvgpu_err(g, "fail"); + } else { + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + } + + nvgpu_mutex_release(&golden_image->ctx_mutex); + return err; +} + +static int nvgpu_gr_obj_ctx_gr_ctx_alloc(struct gk20a *g, + struct nvgpu_gr_obj_ctx_golden_image *golden_image, + struct nvgpu_gr_ctx_desc *gr_ctx_desc, struct nvgpu_gr_ctx *gr_ctx, + struct vm_gk20a *vm) +{ + u64 size; + int err = 0; + + nvgpu_log_fn(g, " "); + + size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image); + nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_CTX, + nvgpu_safe_cast_u64_to_u32(size)); + + nvgpu_log(g, gpu_dbg_gr, "gr_ctx size = %llu", size); + err = nvgpu_gr_ctx_alloc(g, gr_ctx, gr_ctx_desc, vm); + if (err != 0) { + return err; + } + + return 0; +} + +int nvgpu_gr_obj_ctx_alloc(struct gk20a *g, + struct nvgpu_gr_obj_ctx_golden_image *golden_image, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct nvgpu_gr_ctx_desc *gr_ctx_desc, + struct nvgpu_gr_config *config, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_subctx *subctx, + struct vm_gk20a *vm, + struct nvgpu_mem *inst_block, + u32 class_num, u32 flags, + bool cde, bool vpr) +{ + int err = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + err = nvgpu_gr_obj_ctx_gr_ctx_alloc(g, golden_image, gr_ctx_desc, + gr_ctx, vm); + if (err != 0) { + nvgpu_err(g, "fail to allocate TSG gr ctx buffer"); + goto out; + } + + /* allocate patch buffer */ + if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx))) { + nvgpu_gr_ctx_set_patch_ctx_data_count(gr_ctx, 0); + + nvgpu_gr_ctx_set_size(gr_ctx_desc, + NVGPU_GR_CTX_PATCH_CTX, + nvgpu_safe_mult_u32( + g->ops.gr.init.get_patch_slots(g, config), + PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)); + + err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx, gr_ctx_desc, vm); + if (err != 0) { + nvgpu_err(g, "fail to allocate patch buffer"); + goto out; + } + } + +#if defined(CONFIG_NVGPU_GFXP) || defined(CONFIG_NVGPU_CILP) + err = nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(g, config, + gr_ctx_desc, gr_ctx, vm, class_num, flags); + if (err != 0) { + nvgpu_err(g, "fail to init preemption mode"); + goto out; + } +#endif + + /* map global buffer to channel gpu_va and commit */ + err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, + global_ctx_buffer, vm, vpr); + if (err != 0) { + nvgpu_err(g, "fail to map global ctx buffer"); + goto out; + } + + nvgpu_gr_obj_ctx_commit_global_ctx_buffers(g, global_ctx_buffer, + config, gr_ctx, true); + + /* commit gr ctx buffer */ + nvgpu_gr_obj_ctx_commit_inst(g, inst_block, gr_ctx, subctx, + nvgpu_gr_ctx_get_ctx_mem(gr_ctx)->gpu_va); + + /* init golden image */ + err = nvgpu_gr_obj_ctx_alloc_golden_ctx_image(g, golden_image, + global_ctx_buffer, config, gr_ctx, inst_block); + if (err != 0) { + nvgpu_err(g, "fail to init golden ctx image"); + goto out; + } + +#ifdef CONFIG_NVGPU_POWER_PG + /* Re-enable ELPG now that golden image has been initialized. + * The PMU PG init code may already have tried to enable elpg, but + * would not have been able to complete this action since the golden + * image hadn't been initialized yet, so do this now. + */ + err = nvgpu_pmu_reenable_elpg(g); + if (err != 0) { + nvgpu_err(g, "fail to re-enable elpg"); + goto out; + } +#endif + + /* load golden image */ + nvgpu_gr_ctx_load_golden_ctx_image(g, gr_ctx, + golden_image->local_golden_image, cde); + + nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, config, gr_ctx, + subctx); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); + return 0; +out: + /* + * 1. gr_ctx, patch_ctx and global ctx buffer mapping + * can be reused so no need to release them. + * 2. golden image init and load is a one time thing so if + * they pass, no need to undo. + */ + nvgpu_err(g, "fail"); + return err; +} + +void nvgpu_gr_obj_ctx_set_golden_image_size( + struct nvgpu_gr_obj_ctx_golden_image *golden_image, + size_t size) +{ + golden_image->size = size; +} + +size_t nvgpu_gr_obj_ctx_get_golden_image_size( + struct nvgpu_gr_obj_ctx_golden_image *golden_image) +{ + return golden_image->size; +} + +#ifdef CONFIG_NVGPU_DEBUGGER +u32 *nvgpu_gr_obj_ctx_get_local_golden_image_ptr( + struct nvgpu_gr_obj_ctx_golden_image *golden_image) +{ + return nvgpu_gr_global_ctx_get_local_golden_image_ptr( + golden_image->local_golden_image); +} +#endif + +bool nvgpu_gr_obj_ctx_is_golden_image_ready( + struct nvgpu_gr_obj_ctx_golden_image *golden_image) +{ + bool ready; + + nvgpu_mutex_acquire(&golden_image->ctx_mutex); + ready = golden_image->ready; + nvgpu_mutex_release(&golden_image->ctx_mutex); + + return ready; +} + +int nvgpu_gr_obj_ctx_init(struct gk20a *g, + struct nvgpu_gr_obj_ctx_golden_image **gr_golden_image, u32 size) +{ + struct nvgpu_gr_obj_ctx_golden_image *golden_image; + + nvgpu_log(g, gpu_dbg_gr, "size = %u", size); + + golden_image = nvgpu_kzalloc(g, sizeof(*golden_image)); + if (golden_image == NULL) { + return -ENOMEM; + } + + nvgpu_gr_obj_ctx_set_golden_image_size(golden_image, size); + + nvgpu_mutex_init(&golden_image->ctx_mutex); + + *gr_golden_image = golden_image; + + return 0; +} + +void nvgpu_gr_obj_ctx_deinit(struct gk20a *g, + struct nvgpu_gr_obj_ctx_golden_image *golden_image) +{ + if (golden_image == NULL) { + return; + } + + if (golden_image->local_golden_image != NULL) { + nvgpu_gr_global_ctx_deinit_local_golden_image(g, + golden_image->local_golden_image); + golden_image->local_golden_image = NULL; + } +#ifdef CONFIG_NVGPU_POWER_PG + nvgpu_pmu_set_golden_image_initialized(g, false); +#endif + golden_image->ready = false; + nvgpu_kfree(g, golden_image); +} + diff --git a/drivers/gpu/nvgpu/common/gr/obj_ctx_priv.h b/drivers/gpu/nvgpu/common/gr/obj_ctx_priv.h new file mode 100644 index 000000000..52ae6d4e1 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/obj_ctx_priv.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_OBJ_CTX_PRIV_H +#define NVGPU_GR_OBJ_CTX_PRIV_H + +#include +#include + +struct nvgpu_gr_global_ctx_local_golden_image; + +/** + * Golden context image descriptor structure. + * + * This structure stores details of the Golden context image. + */ +struct nvgpu_gr_obj_ctx_golden_image { + /** + * Flag to indicate if Golden context image is ready or not. + */ + bool ready; + + /** + * Mutex to hold for accesses to Golden context image. + */ + struct nvgpu_mutex ctx_mutex; + + /** + * Size of Golden context image. + */ + size_t size; + + /** + * Pointer to local Golden context image struct. + */ + struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image; +}; + +#endif /* NVGPU_GR_OBJ_CTX_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/gr/subctx.c b/drivers/gpu/nvgpu/common/gr/subctx.c new file mode 100644 index 000000000..f4ba647ed --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/subctx.c @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "common/gr/subctx_priv.h" + +struct nvgpu_gr_subctx *nvgpu_gr_subctx_alloc(struct gk20a *g, + struct vm_gk20a *vm) +{ + struct nvgpu_gr_subctx *subctx; + int err = 0; + + nvgpu_log_fn(g, " "); + + subctx = nvgpu_kzalloc(g, sizeof(*subctx)); + if (subctx == NULL) { + return NULL; + } + + err = nvgpu_dma_alloc_sys(g, + g->ops.gr.ctxsw_prog.hw_get_fecs_header_size(), + &subctx->ctx_header); + if (err != 0) { + nvgpu_err(g, "failed to allocate sub ctx header"); + goto err_free_subctx; + } + + subctx->ctx_header.gpu_va = nvgpu_gmmu_map(vm, + &subctx->ctx_header, + subctx->ctx_header.size, + 0, /* not GPU-cacheable */ + gk20a_mem_flag_none, true, + subctx->ctx_header.aperture); + if (subctx->ctx_header.gpu_va == 0ULL) { + nvgpu_err(g, "failed to map ctx header"); + goto err_free_ctx_header; + } + + return subctx; + +err_free_ctx_header: + nvgpu_dma_free(g, &subctx->ctx_header); +err_free_subctx: + nvgpu_kfree(g, subctx); + return NULL; +} + +void nvgpu_gr_subctx_free(struct gk20a *g, + struct nvgpu_gr_subctx *subctx, + struct vm_gk20a *vm) +{ + nvgpu_log_fn(g, " "); + + nvgpu_gmmu_unmap(vm, &subctx->ctx_header, + subctx->ctx_header.gpu_va); + nvgpu_dma_free(g, &subctx->ctx_header); + nvgpu_kfree(g, subctx); +} + +void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g, + struct nvgpu_gr_subctx *subctx, + struct nvgpu_gr_ctx *gr_ctx, u64 gpu_va) +{ + struct nvgpu_mem *ctxheader = &subctx->ctx_header; + int err = 0; + + err = g->ops.mm.cache.l2_flush(g, true); + if (err != 0) { + nvgpu_err(g, "l2_flush failed"); + } + + /* set priv access map */ + g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, ctxheader, + nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, + NVGPU_GR_CTX_PRIV_ACCESS_MAP_VA)); + + g->ops.gr.ctxsw_prog.set_patch_addr(g, ctxheader, + nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx)->gpu_va); + +#ifdef CONFIG_NVGPU_DEBUGGER + g->ops.gr.ctxsw_prog.set_pm_ptr(g, ctxheader, + nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx)->gpu_va); +#endif + +#ifdef CONFIG_NVGPU_GRAPHICS + g->ops.gr.ctxsw_prog.set_zcull_ptr(g, ctxheader, + nvgpu_gr_ctx_get_zcull_ctx_va(gr_ctx)); +#endif + + g->ops.gr.ctxsw_prog.set_context_buffer_ptr(g, ctxheader, gpu_va); + + g->ops.gr.ctxsw_prog.set_type_per_veid_header(g, ctxheader); +} + +struct nvgpu_mem *nvgpu_gr_subctx_get_ctx_header(struct nvgpu_gr_subctx *subctx) +{ + return &subctx->ctx_header; +} + +#ifdef CONFIG_NVGPU_HAL_NON_FUSA +void nvgpu_gr_subctx_set_patch_ctx(struct gk20a *g, + struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx) +{ + g->ops.gr.ctxsw_prog.set_patch_addr(g, &subctx->ctx_header, + nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx)->gpu_va); +} +#endif + +#ifdef CONFIG_NVGPU_GRAPHICS +void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx, + struct nvgpu_gr_ctx *gr_ctx) +{ + + nvgpu_log_fn(g, " "); + + g->ops.gr.ctxsw_prog.set_zcull_ptr(g, &subctx->ctx_header, + nvgpu_gr_ctx_get_zcull_ctx_va(gr_ctx)); +} +#endif /* CONFIG_NVGPU_GRAPHICS */ + +#ifdef CONFIG_NVGPU_GFXP +void nvgpu_gr_subctx_set_preemption_buffer_va(struct gk20a *g, + struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx) +{ + g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, &subctx->ctx_header, + nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va); + + if (g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0 != NULL) { + g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0(g, + &subctx->ctx_header, + nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va); + } +} +#endif /* CONFIG_NVGPU_GFXP */ + +#ifdef CONFIG_NVGPU_DEBUGGER +void nvgpu_gr_subctx_set_hwpm_mode(struct gk20a *g, + struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx) +{ + g->ops.gr.ctxsw_prog.set_pm_ptr(g, &subctx->ctx_header, + nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx)->gpu_va); +} +#endif diff --git a/drivers/gpu/nvgpu/common/gr/subctx_priv.h b/drivers/gpu/nvgpu/common/gr/subctx_priv.h new file mode 100644 index 000000000..5737aedd8 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/subctx_priv.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_SUBCTX_PRIV_H +#define NVGPU_GR_SUBCTX_PRIV_H + +struct nvgpu_mem; + +/** + * GR subcontext data structure. + * + * One subcontext is allocated per GPU channel. + */ +struct nvgpu_gr_subctx { + /** + * Memory to hold subcontext header image. + */ + struct nvgpu_mem ctx_header; +}; + +#endif /* NVGPU_GR_SUBCTX_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/gr/zbc.c b/drivers/gpu/nvgpu/common/gr/zbc.c new file mode 100644 index 000000000..8299dfdc0 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/zbc.c @@ -0,0 +1,690 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#ifdef CONFIG_NVGPU_LS_PMU +#include +#endif + +#include "zbc_priv.h" + +#define ZBC_ENTRY_UPDATED 1 +#define ZBC_ENTRY_ADDED 2 + +static void nvgpu_gr_zbc_update_stencil_reg(struct gk20a *g, + struct nvgpu_gr_zbc_entry *stencil_val, u32 index) +{ + /* update l2 table */ + if (g->ops.ltc.set_zbc_s_entry != NULL) { + g->ops.ltc.set_zbc_s_entry(g, stencil_val->stencil, index); + } + + /* update zbc stencil registers */ + g->ops.gr.zbc.add_stencil(g, stencil_val, index); +} + +static int nvgpu_gr_zbc_add_stencil(struct gk20a *g, struct nvgpu_gr_zbc *zbc, + struct nvgpu_gr_zbc_entry *stencil_val) +{ + struct zbc_stencil_table *s_tbl; + u32 i; + int entry_added = -ENOSPC; + bool entry_exist = false; + + /* search existing tables */ + for (i = zbc->min_stencil_index; i <= zbc->max_used_stencil_index; + i++) { + + s_tbl = &zbc->zbc_s_tbl[i]; + + if ((s_tbl->ref_cnt != 0U) && + (s_tbl->stencil == stencil_val->stencil) && + (s_tbl->format == stencil_val->format)) { + s_tbl->ref_cnt = nvgpu_safe_add_u32(s_tbl->ref_cnt, 1U); + entry_exist = true; + entry_added = ZBC_ENTRY_UPDATED; + break; + } + } + /* add new table */ + if (!entry_exist && + (zbc->max_used_stencil_index < zbc->max_stencil_index)) { + + /* Increment used index and add new entry at that index */ + zbc->max_used_stencil_index = + nvgpu_safe_add_u32(zbc->max_used_stencil_index, 1U); + + s_tbl = &zbc->zbc_s_tbl[zbc->max_used_stencil_index]; + WARN_ON(s_tbl->ref_cnt != 0U); + + /* update sw copy */ + s_tbl->stencil = stencil_val->stencil; + s_tbl->format = stencil_val->format; + s_tbl->ref_cnt = nvgpu_safe_add_u32(s_tbl->ref_cnt, 1U); + + nvgpu_gr_zbc_update_stencil_reg(g, stencil_val, + zbc->max_used_stencil_index); + + entry_added = ZBC_ENTRY_ADDED; + } + return entry_added; +} + +static void nvgpu_gr_zbc_update_depth_reg(struct gk20a *g, + struct nvgpu_gr_zbc_entry *depth_val, u32 index) +{ + /* update l2 table */ + g->ops.ltc.set_zbc_depth_entry(g, depth_val->depth, index); + + /* update zbc registers */ + g->ops.gr.zbc.add_depth(g, depth_val, index); +} + +static int nvgpu_gr_zbc_add_depth(struct gk20a *g, struct nvgpu_gr_zbc *zbc, + struct nvgpu_gr_zbc_entry *depth_val) +{ + struct zbc_depth_table *d_tbl; + u32 i; + int entry_added = -ENOSPC; + bool entry_exist = false; + + /* search existing tables */ + for (i = zbc->min_depth_index; i <= zbc->max_used_depth_index; i++) { + + d_tbl = &zbc->zbc_dep_tbl[i]; + + if ((d_tbl->ref_cnt != 0U) && + (d_tbl->depth == depth_val->depth) && + (d_tbl->format == depth_val->format)) { + d_tbl->ref_cnt = nvgpu_safe_add_u32(d_tbl->ref_cnt, 1U); + entry_exist = true; + entry_added = ZBC_ENTRY_UPDATED; + break; + } + } + /* add new table */ + if (!entry_exist && + (zbc->max_used_depth_index < zbc->max_depth_index)) { + + /* Increment used index and add new entry at that index */ + zbc->max_used_depth_index = + nvgpu_safe_add_u32(zbc->max_used_depth_index, 1U); + + d_tbl = &zbc->zbc_dep_tbl[zbc->max_used_depth_index]; + WARN_ON(d_tbl->ref_cnt != 0U); + + /* update sw copy */ + d_tbl->depth = depth_val->depth; + d_tbl->format = depth_val->format; + d_tbl->ref_cnt = nvgpu_safe_add_u32(d_tbl->ref_cnt, 1U); + + nvgpu_gr_zbc_update_depth_reg(g, depth_val, + zbc->max_used_depth_index); + + entry_added = ZBC_ENTRY_ADDED; + } + + return entry_added; +} + +static void nvgpu_gr_zbc_update_color_reg(struct gk20a *g, + struct nvgpu_gr_zbc_entry *color_val, u32 index) +{ + /* update l2 table */ + g->ops.ltc.set_zbc_color_entry(g, color_val->color_l2, index); + + /* update zbc registers */ + g->ops.gr.zbc.add_color(g, color_val, index); +} + +static int nvgpu_gr_zbc_add_color(struct gk20a *g, struct nvgpu_gr_zbc *zbc, + struct nvgpu_gr_zbc_entry *color_val) +{ + struct zbc_color_table *c_tbl; + u32 i; + int entry_added = -ENOSPC; + bool entry_exist = false; + + /* search existing table */ + for (i = zbc->min_color_index; i <= zbc->max_used_color_index; i++) { + + c_tbl = &zbc->zbc_col_tbl[i]; + + if ((c_tbl->ref_cnt != 0U) && + (c_tbl->format == color_val->format) && + (nvgpu_memcmp((u8 *)c_tbl->color_ds, + (u8 *)color_val->color_ds, + sizeof(color_val->color_ds)) == 0) && + (nvgpu_memcmp((u8 *)c_tbl->color_l2, + (u8 *)color_val->color_l2, + sizeof(color_val->color_l2)) == 0)) { + + c_tbl->ref_cnt = nvgpu_safe_add_u32(c_tbl->ref_cnt, 1U); + entry_exist = true; + entry_added = ZBC_ENTRY_UPDATED; + break; + } + } + + /* add new entry */ + if (!entry_exist && + (zbc->max_used_color_index < zbc->max_color_index)) { + + /* Increment used index and add new entry at that index */ + zbc->max_used_color_index = + nvgpu_safe_add_u32(zbc->max_used_color_index, 1U); + + c_tbl = &zbc->zbc_col_tbl[zbc->max_used_color_index]; + WARN_ON(c_tbl->ref_cnt != 0U); + + /* update local copy */ + for (i = 0; i < NVGPU_GR_ZBC_COLOR_VALUE_SIZE; i++) { + c_tbl->color_l2[i] = color_val->color_l2[i]; + c_tbl->color_ds[i] = color_val->color_ds[i]; + } + c_tbl->format = color_val->format; + c_tbl->ref_cnt = nvgpu_safe_add_u32(c_tbl->ref_cnt, 1U); + + nvgpu_gr_zbc_update_color_reg(g, color_val, + zbc->max_used_color_index); + + entry_added = ZBC_ENTRY_ADDED; + } + + return entry_added; +} + +static int nvgpu_gr_zbc_add(struct gk20a *g, struct nvgpu_gr_zbc *zbc, + struct nvgpu_gr_zbc_entry *zbc_val) +{ + int added = false; +#if defined(CONFIG_NVGPU_LS_PMU) && defined(CONFIG_NVGPU_POWER_PG) + u32 entries; +#endif + + /* no endian swap ? */ + nvgpu_mutex_acquire(&zbc->zbc_lock); + nvgpu_speculation_barrier(); + switch (zbc_val->type) { + case NVGPU_GR_ZBC_TYPE_COLOR: + added = nvgpu_gr_zbc_add_color(g, zbc, zbc_val); + break; + case NVGPU_GR_ZBC_TYPE_DEPTH: + added = nvgpu_gr_zbc_add_depth(g, zbc, zbc_val); + break; + case NVGPU_GR_ZBC_TYPE_STENCIL: + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL)) { + added = nvgpu_gr_zbc_add_stencil(g, zbc, zbc_val); + } else { + nvgpu_err(g, + "invalid zbc table type %d", zbc_val->type); + added = -EINVAL; + goto err_mutex; + } + break; + default: + nvgpu_err(g, + "invalid zbc table type %d", zbc_val->type); + added = -EINVAL; + goto err_mutex; + } + +#if defined(CONFIG_NVGPU_LS_PMU) && defined(CONFIG_NVGPU_POWER_PG) + if (added == ZBC_ENTRY_ADDED) { + /* update zbc for elpg only when new entry is added */ + entries = max( + nvgpu_safe_sub_u32(zbc->max_used_color_index, + zbc->min_color_index), + nvgpu_safe_sub_u32(zbc->max_used_depth_index, + zbc->min_depth_index)); + if (g->elpg_enabled) { + nvgpu_pmu_save_zbc(g, entries); + } + } +#endif + +err_mutex: + nvgpu_mutex_release(&zbc->zbc_lock); + if (added < 0) { + return added; + } + return 0; +} + +int nvgpu_gr_zbc_set_table(struct gk20a *g, struct nvgpu_gr_zbc *zbc, + struct nvgpu_gr_zbc_entry *zbc_val) +{ + nvgpu_log(g, gpu_dbg_zbc, " zbc_val->type %u", zbc_val->type); + + return nvgpu_pg_elpg_protected_call(g, + nvgpu_gr_zbc_add(g, zbc, zbc_val)); +} + +/* get a zbc table entry specified by index + * return table size when type is invalid */ +int nvgpu_gr_zbc_query_table(struct gk20a *g, struct nvgpu_gr_zbc *zbc, + struct nvgpu_gr_zbc_query_params *query_params) +{ + u32 index = query_params->index_size; + u32 i; + + nvgpu_speculation_barrier(); + switch (query_params->type) { + case NVGPU_GR_ZBC_TYPE_INVALID: + nvgpu_log(g, gpu_dbg_zbc, "Query zbc size"); + query_params->index_size = nvgpu_safe_add_u32( + nvgpu_safe_sub_u32(zbc->max_color_index, + zbc->min_color_index), 1U); + break; + case NVGPU_GR_ZBC_TYPE_COLOR: + if ((index < zbc->min_color_index) || + (index > zbc->max_color_index)) { + nvgpu_err(g, "invalid zbc color table index %u", index); + return -EINVAL; + } + nvgpu_log(g, gpu_dbg_zbc, "Query zbc color at index %u", index); + + nvgpu_speculation_barrier(); + for (i = 0; i < NVGPU_GR_ZBC_COLOR_VALUE_SIZE; i++) { + query_params->color_l2[i] = + zbc->zbc_col_tbl[index].color_l2[i]; + query_params->color_ds[i] = + zbc->zbc_col_tbl[index].color_ds[i]; + } + query_params->format = zbc->zbc_col_tbl[index].format; + query_params->ref_cnt = zbc->zbc_col_tbl[index].ref_cnt; + + break; + case NVGPU_GR_ZBC_TYPE_DEPTH: + if ((index < zbc->min_depth_index) || + (index > zbc->max_depth_index)) { + nvgpu_err(g, "invalid zbc depth table index %u", index); + return -EINVAL; + } + nvgpu_log(g, gpu_dbg_zbc, "Query zbc depth at index %u", index); + + nvgpu_speculation_barrier(); + query_params->depth = zbc->zbc_dep_tbl[index].depth; + query_params->format = zbc->zbc_dep_tbl[index].format; + query_params->ref_cnt = zbc->zbc_dep_tbl[index].ref_cnt; + break; + case NVGPU_GR_ZBC_TYPE_STENCIL: + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL)) { + if ((index < zbc->min_stencil_index) || + (index > zbc->max_stencil_index)) { + nvgpu_err(g, + "invalid zbc stencil table index %u", + index); + return -EINVAL; + } + nvgpu_log(g, gpu_dbg_zbc, + "Query zbc stencil at index %u", index); + + nvgpu_speculation_barrier(); + query_params->stencil = zbc->zbc_s_tbl[index].stencil; + query_params->format = zbc->zbc_s_tbl[index].format; + query_params->ref_cnt = zbc->zbc_s_tbl[index].ref_cnt; + } else { + nvgpu_err(g, "invalid zbc table type"); + return -EINVAL; + } + break; + default: + nvgpu_err(g, "invalid zbc table type"); + return -EINVAL; + } + + return 0; +} + +/* + * Update zbc table registers as per sw copy of zbc tables + */ +void nvgpu_gr_zbc_load_table(struct gk20a *g, struct nvgpu_gr_zbc *zbc) +{ + unsigned int i; + + for (i = zbc->min_color_index; i <= zbc->max_used_color_index; i++) { + struct zbc_color_table *c_tbl = &zbc->zbc_col_tbl[i]; + struct nvgpu_gr_zbc_entry zbc_val; + + zbc_val.type = NVGPU_GR_ZBC_TYPE_COLOR; + nvgpu_memcpy((u8 *)zbc_val.color_ds, + (u8 *)c_tbl->color_ds, sizeof(zbc_val.color_ds)); + nvgpu_memcpy((u8 *)zbc_val.color_l2, + (u8 *)c_tbl->color_l2, sizeof(zbc_val.color_l2)); + zbc_val.format = c_tbl->format; + + nvgpu_gr_zbc_update_color_reg(g, &zbc_val, i); + } + + for (i = zbc->min_depth_index; i <= zbc->max_used_depth_index; i++) { + struct zbc_depth_table *d_tbl = &zbc->zbc_dep_tbl[i]; + struct nvgpu_gr_zbc_entry zbc_val; + + zbc_val.type = NVGPU_GR_ZBC_TYPE_DEPTH; + zbc_val.depth = d_tbl->depth; + zbc_val.format = d_tbl->format; + + nvgpu_gr_zbc_update_depth_reg(g, &zbc_val, i); + } + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL)) { + for (i = zbc->min_stencil_index; + i <= zbc->max_used_stencil_index; i++) { + struct zbc_stencil_table *s_tbl = &zbc->zbc_s_tbl[i]; + struct nvgpu_gr_zbc_entry zbc_val; + + zbc_val.type = NVGPU_GR_ZBC_TYPE_STENCIL; + zbc_val.stencil = s_tbl->stencil; + zbc_val.format = s_tbl->format; + + nvgpu_gr_zbc_update_stencil_reg(g, &zbc_val, i); + } + } +} + +static void nvgpu_gr_zbc_load_default_sw_stencil_table(struct gk20a *g, + struct nvgpu_gr_zbc *zbc) +{ + u32 index = zbc->min_stencil_index; + + zbc->zbc_s_tbl[index].stencil = 0x0; + zbc->zbc_s_tbl[index].format = GR_ZBC_STENCIL_CLEAR_FMT_U8; + zbc->zbc_s_tbl[index].ref_cnt = + nvgpu_safe_add_u32(zbc->zbc_s_tbl[index].ref_cnt, 1U); + index = nvgpu_safe_add_u32(index, 1U); + + zbc->zbc_s_tbl[index].stencil = 0x1; + zbc->zbc_s_tbl[index].format = GR_ZBC_STENCIL_CLEAR_FMT_U8; + zbc->zbc_s_tbl[index].ref_cnt = + nvgpu_safe_add_u32(zbc->zbc_s_tbl[index].ref_cnt, 1U); + index = nvgpu_safe_add_u32(index, 1U); + + zbc->zbc_s_tbl[index].stencil = 0xff; + zbc->zbc_s_tbl[index].format = GR_ZBC_STENCIL_CLEAR_FMT_U8; + zbc->zbc_s_tbl[index].ref_cnt = + nvgpu_safe_add_u32(zbc->zbc_s_tbl[index].ref_cnt, 1U); + + zbc->max_used_stencil_index = index; +} + +static void nvgpu_gr_zbc_load_default_sw_depth_table(struct gk20a *g, + struct nvgpu_gr_zbc *zbc) +{ + u32 index = zbc->min_depth_index; + + zbc->zbc_dep_tbl[index].format = GR_ZBC_Z_FMT_VAL_FP32; + zbc->zbc_dep_tbl[index].depth = 0x3f800000; + zbc->zbc_dep_tbl[index].ref_cnt = + nvgpu_safe_add_u32(zbc->zbc_dep_tbl[index].ref_cnt, 1U); + index = nvgpu_safe_add_u32(index, 1U); + + zbc->zbc_dep_tbl[index].format = GR_ZBC_Z_FMT_VAL_FP32; + zbc->zbc_dep_tbl[index].depth = 0; + zbc->zbc_dep_tbl[index].ref_cnt = + nvgpu_safe_add_u32(zbc->zbc_dep_tbl[index].ref_cnt, 1U); + + zbc->max_used_depth_index = index; +} + +static void nvgpu_gr_zbc_load_default_sw_color_table(struct gk20a *g, + struct nvgpu_gr_zbc *zbc) +{ + u32 i; + u32 index = zbc->min_color_index; + + /* Opaque black (i.e. solid black, fmt 0x28 = A8B8G8R8) */ + zbc->zbc_col_tbl[index].format = GR_ZBC_SOLID_BLACK_COLOR_FMT; + for (i = 0U; i < NVGPU_GR_ZBC_COLOR_VALUE_SIZE; i++) { + zbc->zbc_col_tbl[index].color_ds[i] = 0U; + zbc->zbc_col_tbl[index].color_l2[i] = 0xff000000U; + } + zbc->zbc_col_tbl[index].color_ds[3] = 0x3f800000U; + zbc->zbc_col_tbl[index].ref_cnt = + nvgpu_safe_add_u32(zbc->zbc_col_tbl[index].ref_cnt, 1U); + index = nvgpu_safe_add_u32(index, 1U); + + /* Transparent black = (fmt 1 = zero) */ + zbc->zbc_col_tbl[index].format = GR_ZBC_TRANSPARENT_BLACK_COLOR_FMT; + for (i = 0; i < NVGPU_GR_ZBC_COLOR_VALUE_SIZE; i++) { + zbc->zbc_col_tbl[index].color_ds[i] = 0U; + zbc->zbc_col_tbl[index].color_l2[i] = 0U; + } + zbc->zbc_col_tbl[index].ref_cnt = + nvgpu_safe_add_u32(zbc->zbc_col_tbl[index].ref_cnt, 1U); + index = nvgpu_safe_add_u32(index, 1U); + + /* Opaque white (i.e. solid white) = (fmt 2 = uniform 1) */ + zbc->zbc_col_tbl[index].format = GR_ZBC_SOLID_WHITE_COLOR_FMT; + for (i = 0; i < NVGPU_GR_ZBC_COLOR_VALUE_SIZE; i++) { + zbc->zbc_col_tbl[index].color_ds[i] = 0x3f800000U; + zbc->zbc_col_tbl[index].color_l2[i] = 0xffffffffU; + } + zbc->zbc_col_tbl[index].ref_cnt = + nvgpu_safe_add_u32(zbc->zbc_col_tbl[index].ref_cnt, 1U); + + zbc->max_used_color_index = index; +} + +static void nvgpu_gr_zbc_init_indices(struct gk20a *g, struct nvgpu_gr_zbc *zbc) +{ + struct nvgpu_gr_zbc_table_indices zbc_indices; + + g->ops.gr.zbc.init_table_indices(g, &zbc_indices); + + zbc->min_color_index = zbc_indices.min_color_index; + zbc->max_color_index = zbc_indices.max_color_index; + zbc->min_depth_index = zbc_indices.min_depth_index; + zbc->max_depth_index = zbc_indices.max_depth_index; + zbc->min_stencil_index = zbc_indices.min_stencil_index; + zbc->max_stencil_index = zbc_indices.max_stencil_index; + + nvgpu_log(g, gpu_dbg_zbc, "zbc->min_color_index %u", + zbc->min_color_index); + nvgpu_log(g, gpu_dbg_zbc, "zbc->max_color_index %u", + zbc->max_color_index); + nvgpu_log(g, gpu_dbg_zbc, "zbc->min_depth_index %u", + zbc->min_depth_index); + nvgpu_log(g, gpu_dbg_zbc, "zbc->max_depth_index %u", + zbc->max_depth_index); + nvgpu_log(g, gpu_dbg_zbc, "zbc->min_stencil_index %u", + zbc->min_stencil_index); + nvgpu_log(g, gpu_dbg_zbc, "zbc->max_stencil_index %u", + zbc->max_stencil_index); +} + +static void nvgpu_gr_zbc_load_default_sw_table(struct gk20a *g, + struct nvgpu_gr_zbc *zbc) +{ + nvgpu_mutex_init(&zbc->zbc_lock); + + nvgpu_gr_zbc_load_default_sw_color_table(g, zbc); + + nvgpu_gr_zbc_load_default_sw_depth_table(g, zbc); + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL)) { + nvgpu_gr_zbc_load_default_sw_stencil_table(g, zbc); + } +} + +static int gr_zbc_allocate_local_tbls(struct gk20a *g, struct nvgpu_gr_zbc *zbc) +{ + u32 zbc_col_size = nvgpu_safe_add_u32(zbc->max_color_index, + zbc->min_color_index); + u32 zbc_dep_size = nvgpu_safe_add_u32(zbc->max_depth_index, + zbc->min_depth_index); + u32 zbc_s_size = nvgpu_safe_add_u32(zbc->max_stencil_index, + zbc->min_stencil_index); + + zbc->zbc_col_tbl = nvgpu_kzalloc(g, + sizeof(struct zbc_color_table) * zbc_col_size); + if (zbc->zbc_col_tbl == NULL) { + goto alloc_col_tbl_err; + } + + zbc->zbc_dep_tbl = nvgpu_kzalloc(g, + sizeof(struct zbc_depth_table) * zbc_dep_size); + + if (zbc->zbc_dep_tbl == NULL) { + goto alloc_dep_tbl_err; + } + + zbc->zbc_s_tbl = nvgpu_kzalloc(g, + sizeof(struct zbc_stencil_table) * zbc_s_size); + if (zbc->zbc_s_tbl == NULL) { + goto alloc_s_tbl_err; + } + + return 0; + +alloc_s_tbl_err: + nvgpu_kfree(g, zbc->zbc_dep_tbl); +alloc_dep_tbl_err: + nvgpu_kfree(g, zbc->zbc_col_tbl); +alloc_col_tbl_err: + return -ENOMEM; +} + +/* allocate the struct and load the table */ +int nvgpu_gr_zbc_init(struct gk20a *g, struct nvgpu_gr_zbc **zbc) +{ + int ret = -ENOMEM; + struct nvgpu_gr_zbc *gr_zbc = NULL; + + *zbc = NULL; + + gr_zbc = nvgpu_kzalloc(g, sizeof(*gr_zbc)); + if (gr_zbc == NULL) { + return ret; + } + + nvgpu_gr_zbc_init_indices(g, gr_zbc); + + ret = gr_zbc_allocate_local_tbls(g, gr_zbc); + if (ret != 0) { + goto alloc_err; + } + + nvgpu_gr_zbc_load_default_sw_table(g, gr_zbc); + + *zbc = gr_zbc; + return ret; + +alloc_err: + nvgpu_kfree(g, gr_zbc); + return ret; +} + +/* deallocate the memory for the struct */ +void nvgpu_gr_zbc_deinit(struct gk20a *g, struct nvgpu_gr_zbc *zbc) +{ + if (zbc == NULL) { + return; + } + + nvgpu_kfree(g, zbc->zbc_col_tbl); + nvgpu_kfree(g, zbc->zbc_dep_tbl); + nvgpu_kfree(g, zbc->zbc_s_tbl); + nvgpu_kfree(g, zbc); +} + +struct nvgpu_gr_zbc_entry *nvgpu_gr_zbc_entry_alloc(struct gk20a *g) +{ + return nvgpu_kzalloc(g, sizeof(struct nvgpu_gr_zbc_entry)); +} +void nvgpu_gr_zbc_entry_free(struct gk20a *g, struct nvgpu_gr_zbc_entry *entry) +{ + nvgpu_kfree(g, entry); +} + +u32 nvgpu_gr_zbc_get_entry_color_ds(struct nvgpu_gr_zbc_entry *entry, + int idx) +{ + return entry->color_ds[idx]; +} + +void nvgpu_gr_zbc_set_entry_color_ds(struct nvgpu_gr_zbc_entry *entry, + int idx, u32 ds) +{ + entry->color_ds[idx] = ds; +} + +u32 nvgpu_gr_zbc_get_entry_color_l2(struct nvgpu_gr_zbc_entry *entry, + int idx) +{ + return entry->color_l2[idx]; +} + +void nvgpu_gr_zbc_set_entry_color_l2(struct nvgpu_gr_zbc_entry *entry, + int idx, u32 l2) +{ + entry->color_l2[idx] = l2; +} + +u32 nvgpu_gr_zbc_get_entry_depth(struct nvgpu_gr_zbc_entry *entry) +{ + return entry->depth; +} + +void nvgpu_gr_zbc_set_entry_depth(struct nvgpu_gr_zbc_entry *entry, + u32 depth) +{ + entry->depth = depth; +} + +u32 nvgpu_gr_zbc_get_entry_stencil(struct nvgpu_gr_zbc_entry *entry) +{ + return entry->stencil; +} + +void nvgpu_gr_zbc_set_entry_stencil(struct nvgpu_gr_zbc_entry *entry, + u32 stencil) +{ + entry->stencil = stencil; +} + +u32 nvgpu_gr_zbc_get_entry_type(struct nvgpu_gr_zbc_entry *entry) +{ + return entry->type; +} + +void nvgpu_gr_zbc_set_entry_type(struct nvgpu_gr_zbc_entry *entry, + u32 type) +{ + entry->type = type; +} + +u32 nvgpu_gr_zbc_get_entry_format(struct nvgpu_gr_zbc_entry *entry) +{ + return entry->format; +} + +void nvgpu_gr_zbc_set_entry_format(struct nvgpu_gr_zbc_entry *entry, + u32 format) +{ + entry->format = format; +} diff --git a/drivers/gpu/nvgpu/common/gr/zbc_priv.h b/drivers/gpu/nvgpu/common/gr/zbc_priv.h new file mode 100644 index 000000000..3fb615bf2 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/zbc_priv.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_ZBC_PRIV_H +#define NVGPU_GR_ZBC_PRIV_H + +#include + +/* Opaque black (i.e. solid black, fmt 0x28 = A8B8G8R8) */ +#define GR_ZBC_SOLID_BLACK_COLOR_FMT 0x28 +/* Transparent black = (fmt 1 = zero) */ +#define GR_ZBC_TRANSPARENT_BLACK_COLOR_FMT 0x1 +/* Opaque white (i.e. solid white) = (fmt 2 = uniform 1) */ +#define GR_ZBC_SOLID_WHITE_COLOR_FMT 0x2 +/* z format with fp32 */ +#define GR_ZBC_Z_FMT_VAL_FP32 0x1 + +#define GR_ZBC_STENCIL_CLEAR_FMT_INVAILD 0U +#define GR_ZBC_STENCIL_CLEAR_FMT_U8 1U + +struct zbc_color_table { + u32 color_ds[NVGPU_GR_ZBC_COLOR_VALUE_SIZE]; + u32 color_l2[NVGPU_GR_ZBC_COLOR_VALUE_SIZE]; + u32 format; + u32 ref_cnt; +}; + +struct zbc_depth_table { + u32 depth; + u32 format; + u32 ref_cnt; +}; + +struct zbc_stencil_table { + u32 stencil; + u32 format; + u32 ref_cnt; +}; + +struct nvgpu_gr_zbc_entry { + u32 color_ds[NVGPU_GR_ZBC_COLOR_VALUE_SIZE]; + u32 color_l2[NVGPU_GR_ZBC_COLOR_VALUE_SIZE]; + u32 depth; + u32 stencil; + u32 type; + u32 format; +}; + +/* + * HW ZBC table valid entries start at index 1. + * Entry 0 is reserved to mean "no matching entry found, do not use ZBC" + */ +struct nvgpu_gr_zbc { + struct nvgpu_mutex zbc_lock; /* Lock to access zbc table */ + struct zbc_color_table *zbc_col_tbl; /* SW zbc color table pointer */ + struct zbc_depth_table *zbc_dep_tbl; /* SW zbc depth table pointer */ + struct zbc_stencil_table *zbc_s_tbl; /* SW zbc stencil table pointer */ + u32 min_color_index; /* Minimum valid color table index */ + u32 min_depth_index; /* Minimum valid depth table index */ + u32 min_stencil_index; /* Minimum valid stencil table index */ + u32 max_color_index; /* Maximum valid color table index */ + u32 max_depth_index; /* Maximum valid depth table index */ + u32 max_stencil_index; /* Maximum valid stencil table index */ + u32 max_used_color_index; /* Max used color table index */ + u32 max_used_depth_index; /* Max used depth table index */ + u32 max_used_stencil_index; /* Max used stencil table index */ +}; + +#endif /* NVGPU_GR_ZBC_PRIV_H */ + diff --git a/drivers/gpu/nvgpu/common/gr/zcull.c b/drivers/gpu/nvgpu/common/gr/zcull.c new file mode 100644 index 000000000..e86cf9562 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/zcull.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "zcull_priv.h" + +int nvgpu_gr_zcull_init(struct gk20a *g, struct nvgpu_gr_zcull **gr_zcull, + u32 size, struct nvgpu_gr_config *config) +{ + struct nvgpu_gr_zcull *zcull; + int err = 0; + + nvgpu_log(g, gpu_dbg_gr, "size = %u", size); + + zcull = nvgpu_kzalloc(g, sizeof(*zcull)); + if (zcull == NULL) { + err = -ENOMEM; + goto exit; + } + + zcull->g = g; + + zcull->zcull_ctxsw_image_size = size; + + zcull->aliquot_width = nvgpu_gr_config_get_tpc_count(config) * 16U; + zcull->aliquot_height = 16; + + zcull->width_align_pixels = + nvgpu_gr_config_get_tpc_count(config) * 16U; + zcull->height_align_pixels = 32; + + zcull->aliquot_size = + zcull->aliquot_width * zcull->aliquot_height; + + /* assume no floor sweeping since we only have 1 tpc in 1 gpc */ + zcull->pixel_squares_by_aliquots = + nvgpu_gr_config_get_zcb_count(config) * 16U * 16U * + nvgpu_gr_config_get_tpc_count(config) / + (nvgpu_gr_config_get_gpc_count(config) * + nvgpu_gr_config_get_gpc_tpc_count(config, 0U)); + +exit: + *gr_zcull = zcull; + return err; +} + +void nvgpu_gr_zcull_deinit(struct gk20a *g, struct nvgpu_gr_zcull *gr_zcull) +{ + if (gr_zcull == NULL) { + return; + } + + nvgpu_kfree(g, gr_zcull); +} + +u32 nvgpu_gr_get_ctxsw_zcull_size(struct gk20a *g, + struct nvgpu_gr_zcull *gr_zcull) +{ + /* assuming zcull has already been initialized */ + return gr_zcull->zcull_ctxsw_image_size; +} + +int nvgpu_gr_zcull_init_hw(struct gk20a *g, + struct nvgpu_gr_zcull *gr_zcull, + struct nvgpu_gr_config *gr_config) +{ + u32 *zcull_map_tiles, *zcull_bank_counters; + u32 map_counter; + u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); + u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, + GPU_LIT_NUM_TPC_PER_GPC); + u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc; + u32 map_tile_count; + int ret = 0; + + nvgpu_log(g, gpu_dbg_gr, " "); + + if (nvgpu_gr_config_get_map_tiles(gr_config) == NULL) { + return -1; + } + + if (zcull_alloc_num % 8U != 0U) { + /* Total 8 fields per map reg i.e. tile_0 to tile_7*/ + zcull_alloc_num += (zcull_alloc_num % 8U); + } + zcull_map_tiles = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32)); + + if (zcull_map_tiles == NULL) { + nvgpu_err(g, + "failed to allocate zcull map titles"); + return -ENOMEM; + } + + zcull_bank_counters = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32)); + + if (zcull_bank_counters == NULL) { + nvgpu_err(g, + "failed to allocate zcull bank counters"); + nvgpu_kfree(g, zcull_map_tiles); + return -ENOMEM; + } + + for (map_counter = 0; + map_counter < nvgpu_gr_config_get_tpc_count(gr_config); + map_counter++) { + map_tile_count = + nvgpu_gr_config_get_map_tile_count(gr_config, + map_counter); + zcull_map_tiles[map_counter] = + zcull_bank_counters[map_tile_count]; + zcull_bank_counters[map_tile_count]++; + } + + if (g->ops.gr.zcull.program_zcull_mapping != NULL) { + g->ops.gr.zcull.program_zcull_mapping(g, zcull_alloc_num, + zcull_map_tiles); + } + + nvgpu_kfree(g, zcull_map_tiles); + nvgpu_kfree(g, zcull_bank_counters); + + if (g->ops.gr.zcull.init_zcull_hw != NULL) { + ret = g->ops.gr.zcull.init_zcull_hw(g, gr_zcull, gr_config); + if (ret != 0) { + nvgpu_err(g, "failed to init zcull hw. err:%d", ret); + return ret; + } + } + + nvgpu_log(g, gpu_dbg_gr, "done"); + return 0; +} + +int nvgpu_gr_zcull_ctx_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx, + struct nvgpu_gr_ctx *gr_ctx) +{ + int ret = 0; + + if (subctx != NULL) { + ret = nvgpu_gr_ctx_zcull_setup(g, gr_ctx, false); + if (ret == 0) { + nvgpu_gr_subctx_zcull_setup(g, subctx, gr_ctx); + } + } else { + ret = nvgpu_gr_ctx_zcull_setup(g, gr_ctx, true); + } + + return ret; +} + diff --git a/drivers/gpu/nvgpu/common/gr/zcull_priv.h b/drivers/gpu/nvgpu/common/gr/zcull_priv.h new file mode 100644 index 000000000..bdb0c5ec9 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/zcull_priv.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_ZCULL_PRIV_H +#define NVGPU_GR_ZCULL_PRIV_H + +#include + +struct gk20a; + +struct nvgpu_gr_zcull { + struct gk20a *g; + + u32 aliquot_width; + u32 aliquot_height; + u32 aliquot_size; + u32 total_aliquots; + + u32 width_align_pixels; + u32 height_align_pixels; + u32 pixel_squares_by_aliquots; + + u32 zcull_ctxsw_image_size; +}; + +#endif /* NVGPU_GR_ZCULL_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/grmgr/grmgr.c b/drivers/gpu/nvgpu/common/grmgr/grmgr.c new file mode 100644 index 000000000..f629ee7a8 --- /dev/null +++ b/drivers/gpu/nvgpu/common/grmgr/grmgr.c @@ -0,0 +1,720 @@ +/* + * GR MANAGER + * + * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +int nvgpu_init_gr_manager(struct gk20a *g) +{ + u32 gpc_id; + struct nvgpu_gpu_instance *gpu_instance = &g->mig.gpu_instance[0]; + struct nvgpu_gr_syspipe *gr_syspipe = &gpu_instance->gr_syspipe; + u32 local_gpc_mask; + u32 ffs_bit = 0U; + u32 index; + const struct nvgpu_device *gr_dev = NULL; +#ifdef CONFIG_NVGPU_NEXT + if (g->ops.grmgr.load_timestamp_prod != NULL) { + g->ops.grmgr.load_timestamp_prod(g); + } +#endif + + /* Number of gpu instance is 1 for legacy mode */ + g->mig.max_gpc_count = g->ops.top.get_max_gpc_count(g); + nvgpu_assert(g->mig.max_gpc_count > 0U); + g->mig.gpc_count = g->ops.priv_ring.get_gpc_count(g); + nvgpu_assert(g->mig.gpc_count > 0U); + g->mig.num_gpu_instances = 1U; + g->mig.is_nongr_engine_sharable = false; + g->mig.max_fbps_count = nvgpu_fbp_get_max_fbps_count(g->fbp); + + gpu_instance->gpu_instance_id = 0U; + gpu_instance->is_memory_partition_supported = false; + gpu_instance->gpu_instance_type = NVGPU_MIG_TYPE_PHYSICAL; + + gr_syspipe->gr_instance_id = 0U; + gr_syspipe->gr_syspipe_id = 0U; + gr_syspipe->num_gpc = g->mig.gpc_count; + gr_syspipe->gr_dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, 0U); + nvgpu_assert(gr_syspipe->gr_dev != NULL); + + if (g->ops.gr.config.get_gpc_mask != NULL) { + gr_syspipe->gpc_mask = g->ops.gr.config.get_gpc_mask(g); + nvgpu_assert(gr_syspipe->gpc_mask != 0U); + } else { + gr_syspipe->gpc_mask = nvgpu_safe_sub_u32( + BIT32(gr_syspipe->num_gpc), + 1U); + } + + if (g->ops.grmgr.discover_gpc_ids != NULL) { + if (g->ops.grmgr.discover_gpc_ids(g, + gr_syspipe->num_gpc, + gr_syspipe->gpcs) != 0) { + nvgpu_err(g, "discover_gpc_ids -failed"); + return -EINVAL; + } + } else { + /* + * For tu104 and before chips, + * Local GPC Id = physical GPC Id = Logical GPC Id for + * non-floorswept config else physical gpcs are assigned + * serially and floorswept gpcs are skipped. + */ + local_gpc_mask = gr_syspipe->gpc_mask; + for (gpc_id = 0U; gpc_id < gr_syspipe->num_gpc; gpc_id++) { + gr_syspipe->gpcs[gpc_id].logical_id = gpc_id; + nvgpu_assert(local_gpc_mask != 0U); + ffs_bit = nvgpu_ffs(local_gpc_mask) - 1U; + local_gpc_mask &= ~(1U << ffs_bit); + gr_syspipe->gpcs[gpc_id].physical_id = ffs_bit; + gr_syspipe->gpcs[gpc_id].gpcgrp_id = 0U; + } + nvgpu_assert(local_gpc_mask == 0U); + } + + g->mig.usable_gr_syspipe_count = + nvgpu_device_count(g, NVGPU_DEVTYPE_GRAPHICS); + if ((g->mig.usable_gr_syspipe_count == 0U) || + (g->mig.usable_gr_syspipe_count >= + NVGPU_MIG_MAX_ENGINES)) { + nvgpu_err(g, "Usable GR engine syspipe" + "count[%u] is more than[%u]! or " + "No GR engine available on the device!", + g->mig.usable_gr_syspipe_count, + NVGPU_MIG_MAX_ENGINES); + nvgpu_assert(g->mig.usable_gr_syspipe_count < + NVGPU_MIG_MAX_ENGINES); + return -EINVAL; + } + + index = 0U; + nvgpu_device_for_each(g, gr_dev, NVGPU_DEVTYPE_GRAPHICS) { + g->mig.usable_gr_syspipe_instance_id[index] = + gr_dev->inst_id; + g->mig.usable_gr_syspipe_mask |= + BIT32(gr_dev->inst_id); + index = nvgpu_safe_add_u32(index, 1U); + } + + if (g->ops.grmgr.get_gpcgrp_count != NULL) { + g->ops.grmgr.get_gpcgrp_count(g); + } else { + g->mig.gpcgrp_gpc_count[0] = gr_syspipe->num_gpc; + } + + if (g->ops.gr.init.get_max_subctx_count != NULL) { + gr_syspipe->max_veid_count_per_tsg = + g->ops.gr.init.get_max_subctx_count(); + nvgpu_assert(gr_syspipe->max_veid_count_per_tsg > 0U); + } else { + /* + * For vgpu, NvGpu has to rely on chip constant + * queried from nvgpu server. + * For legacy chips, g->fifo.max_subctx_count should be 0U. + */ + gr_syspipe->max_veid_count_per_tsg = g->fifo.max_subctx_count; + } + gr_syspipe->veid_start_offset = 0U; + + gpu_instance->num_lce = nvgpu_device_get_copies(g, gpu_instance->lce_devs, + NVGPU_MIG_MAX_ENGINES); + nvgpu_assert(gpu_instance->num_lce > 0U); + + g->mig.max_gr_sys_pipes_supported = 1U; + g->mig.gr_syspipe_en_mask = 1U; + g->mig.num_gr_sys_pipes_enabled = 1U; + g->mig.recursive_ref_count = 0U; + g->mig.cur_tid = -1; + + gpu_instance->fbp_en_mask = nvgpu_fbp_get_fbp_en_mask(g->fbp); +#ifdef CONFIG_NVGPU_NON_FUSA + gpu_instance->num_fbp = nvgpu_fbp_get_num_fbps(g->fbp); + gpu_instance->fbp_rop_l2_en_mask = nvgpu_fbp_get_rop_l2_en_mask(g->fbp); +#endif + + g->mig.current_gr_syspipe_id = NVGPU_MIG_INVALID_GR_SYSPIPE_ID; + + nvgpu_log(g, gpu_dbg_mig, + "[Physical device] gpu_instance_id[%u] gr_instance_id[%u] " + "gr_syspipe_id[%u] max_gpc_count[%u] num_gpc[%u] " + "gr_engine_id[%u] max_veid_count_per_tsg[%u] " + "veid_start_offset[%u] is_memory_partition_support[%d] " + "num_lce[%u] max_fbps_count[%u] num_fbp[%u] " + "fbp_en_mask [0x%x] ", + gpu_instance->gpu_instance_id, + gr_syspipe->gr_instance_id, + gr_syspipe->gr_syspipe_id, + g->mig.max_gpc_count, + gr_syspipe->num_gpc, + gr_syspipe->gr_dev->engine_id, + gr_syspipe->max_veid_count_per_tsg, + gr_syspipe->veid_start_offset, + gpu_instance->is_memory_partition_supported, + gpu_instance->num_lce, + g->mig.max_fbps_count, + gpu_instance->num_fbp, + gpu_instance->fbp_en_mask); + + return 0; +} + +#if defined(CONFIG_NVGPU_NEXT) && defined(CONFIG_NVGPU_MIG) +static void nvgpu_grmgr_acquire_gr_syspipe(struct gk20a *g, u32 gr_syspipe_id) +{ + g->mig.recursive_ref_count = nvgpu_safe_add_u32( + g->mig.recursive_ref_count, 1U); + + if (g->mig.cur_tid == -1) { + g->mig.current_gr_syspipe_id = gr_syspipe_id; + g->mig.cur_tid = nvgpu_current_tid(g); + } else { + nvgpu_log(g, gpu_dbg_mig, + "Repeated gr remap window acquire call from same " + "thread tid[%d] requsted gr_syspipe_id[%u] " + "current_gr_syspipe_id[%u] " + "recursive_ref_count[%u]", + g->mig.cur_tid, gr_syspipe_id, + g->mig.current_gr_syspipe_id, + g->mig.recursive_ref_count); + nvgpu_assert((g->mig.cur_tid == nvgpu_current_tid(g)) && + (g->mig.current_gr_syspipe_id == gr_syspipe_id)); + } +} + +static void nvgpu_grmgr_release_gr_syspipe(struct gk20a *g) +{ + g->mig.recursive_ref_count = nvgpu_safe_sub_u32( + g->mig.recursive_ref_count, 1U); + + if (g->mig.recursive_ref_count == 0U) { + g->mig.current_gr_syspipe_id = NVGPU_MIG_INVALID_GR_SYSPIPE_ID; + g->mig.cur_tid = -1; + nvgpu_mutex_release(&g->mig.gr_syspipe_lock); + } else { + nvgpu_log(g, gpu_dbg_mig, + "Repeated gr remap window release call from same " + "thread tid[%d] current_gr_syspipe_id[%u] " + "recursive_ref_count[%u]", + g->mig.cur_tid, g->mig.current_gr_syspipe_id, + g->mig.recursive_ref_count); + nvgpu_assert(g->mig.cur_tid == nvgpu_current_tid(g)); + } +} +#endif + +int nvgpu_grmgr_config_gr_remap_window(struct gk20a *g, + u32 gr_syspipe_id, bool enable) +{ + int err = 0; +#if defined(CONFIG_NVGPU_NEXT) && defined(CONFIG_NVGPU_MIG) + if (nvgpu_grmgr_is_multi_gr_enabled(g)) { + /* + * GR remap window enable/disable sequence for a GR + * SYSPIPE PGRAPH programming: + * 1) Config_gr_remap_window (syspipe_index, enable). + * 2) Acquire gr_syspipe_lock. + * 3) HW write to enable the gr syspipe programming. + * 4) Return success. + * 5) Do GR programming belong to particular gr syspipe. + * 6) Config_gr_remap_window (syspipe_index, disable). + * 7) HW write to disable the gr syspipe programming. + * 8) Release the gr_syspipe_lock. + * + * GR remap window disable/enable request for legacy + * GR PGRAPH programming: + * 1) Config_gr_remap_window (invalid_syspipe_index, disable). + * 2) Acquire gr_syspipe_lock. + * 3) HW write to enable the legacy gr syspipe programming. + * 4) Return success. + * 5) Do legacy GR PGRAPH programming. + * 6) Config_gr_remap_window (invalid_syspipe_index, enable). + * 7) HW write to disable the legacy gr syspipe programming. + * 8) Release the gr_syspipe_lock. + */ + + if ((gr_syspipe_id != + NVGPU_MIG_INVALID_GR_SYSPIPE_ID) && + ((g->mig.usable_gr_syspipe_mask & BIT32( + gr_syspipe_id)) == 0U)) { + nvgpu_err(g, "Invalid param syspipe_id[%x] en_mask[%x]", + gr_syspipe_id, + g->mig.usable_gr_syspipe_mask); + return -EINVAL; + } + + if (enable && (g->mig.current_gr_syspipe_id == + NVGPU_MIG_INVALID_GR_SYSPIPE_ID) && + (gr_syspipe_id == + NVGPU_MIG_INVALID_GR_SYSPIPE_ID)) { + nvgpu_warn(g, + "Legacy GR PGRAPH window enable called before " + "disable sequence call "); + return -EPERM; + } + + if (enable) { + if ((gr_syspipe_id != + NVGPU_MIG_INVALID_GR_SYSPIPE_ID) && + (g->mig.cur_tid != nvgpu_current_tid(g))) { + nvgpu_mutex_acquire(&g->mig.gr_syspipe_lock); + } + } else { + if ((gr_syspipe_id == + NVGPU_MIG_INVALID_GR_SYSPIPE_ID) && + (g->mig.cur_tid != nvgpu_current_tid(g))) { + nvgpu_mutex_acquire(&g->mig.gr_syspipe_lock); + } else { + gr_syspipe_id = 0U; + } + } + + nvgpu_log(g, gpu_dbg_mig, + "[start]tid[%d] current_gr_syspipe_id[%u] " + "requested_gr_syspipe_id[%u] enable[%d] " + "recursive_ref_count[%u] ", + g->mig.cur_tid, g->mig.current_gr_syspipe_id, + gr_syspipe_id, enable, g->mig.recursive_ref_count); + + if (gr_syspipe_id != NVGPU_MIG_INVALID_GR_SYSPIPE_ID) { + if (((g->mig.current_gr_syspipe_id == + NVGPU_MIG_INVALID_GR_SYSPIPE_ID) && + (g->mig.recursive_ref_count == 0U)) || + (!enable && + (g->mig.recursive_ref_count == 1U))) { + err = g->ops.priv_ring.config_gr_remap_window(g, + gr_syspipe_id, enable); + } + } else { + nvgpu_log(g, gpu_dbg_mig, + "Legacy GR PGRAPH window enable[%d] ", + enable); + } + + if (err != 0) { + nvgpu_mutex_release(&g->mig.gr_syspipe_lock); + nvgpu_err(g, "Failed [%d]", err); + return err; + } + + if (enable) { + if ((gr_syspipe_id == + NVGPU_MIG_INVALID_GR_SYSPIPE_ID) && + (g->mig.current_gr_syspipe_id == 0U)) { + nvgpu_grmgr_release_gr_syspipe(g); + } else { + nvgpu_grmgr_acquire_gr_syspipe(g, + gr_syspipe_id); + } + } else { + if (g->mig.current_gr_syspipe_id != + NVGPU_MIG_INVALID_GR_SYSPIPE_ID) { + nvgpu_grmgr_release_gr_syspipe(g); + } else { + nvgpu_grmgr_acquire_gr_syspipe(g, 0U); + } + } + nvgpu_log(g, gpu_dbg_mig, + "[end]tid[%d] current_gr_syspipe_id[%u] " + "requested_gr_syspipe_id[%u] enable[%d] " + "recursive_ref_count[%u] ", + g->mig.cur_tid, g->mig.current_gr_syspipe_id, + gr_syspipe_id, enable, g->mig.recursive_ref_count); + } +#endif + return err; +} + +u32 nvgpu_grmgr_get_num_gr_instances(struct gk20a *g) +{ + /* + * There is only one gr engine instance per syspipe. + * Hence just return number of syspipes here. + */ + return g->mig.num_gr_sys_pipes_enabled; +} + +static inline u32 nvgpu_grmgr_get_gpu_instance_id(struct gk20a *g, + u32 gr_instance_id) +{ + u32 gpu_instance_id = 0U; + + if (nvgpu_grmgr_is_multi_gr_enabled(g)) { + /* 0th entry is physical device gpu instance */ + gpu_instance_id = nvgpu_safe_add_u32(gr_instance_id, 1U); + + if (gpu_instance_id >= g->mig.num_gpu_instances) { + nvgpu_err(g, + "gpu_instance_id[%u] >= num_gpu_instances[%u]", + gpu_instance_id, g->mig.num_gpu_instances); + nvgpu_assert( + gpu_instance_id < g->mig.num_gpu_instances); + gpu_instance_id = 0U; + } + } + + nvgpu_log(g, gpu_dbg_mig, "gr_instance_id[%u] gpu_instance_id[%u]", + gr_instance_id, gpu_instance_id); + + return gpu_instance_id; +} + +u32 nvgpu_grmgr_get_gr_syspipe_id(struct gk20a *g, u32 gr_instance_id) +{ + struct nvgpu_gpu_instance *gpu_instance; + struct nvgpu_gr_syspipe *gr_syspipe; + u32 gpu_instance_id = nvgpu_grmgr_get_gpu_instance_id( + g, gr_instance_id); + + gpu_instance = &g->mig.gpu_instance[gpu_instance_id]; + gr_syspipe = &gpu_instance->gr_syspipe; + + return gr_syspipe->gr_syspipe_id; +} + +u32 nvgpu_grmgr_get_gr_num_gpcs(struct gk20a *g, u32 gr_instance_id) +{ + struct nvgpu_gpu_instance *gpu_instance; + struct nvgpu_gr_syspipe *gr_syspipe; + u32 gpu_instance_id = nvgpu_grmgr_get_gpu_instance_id( + g, gr_instance_id); + + gpu_instance = &g->mig.gpu_instance[gpu_instance_id]; + gr_syspipe = &gpu_instance->gr_syspipe; + + return gr_syspipe->num_gpc; +} + +u32 nvgpu_grmgr_get_gr_gpc_phys_id(struct gk20a *g, u32 gr_instance_id, + u32 gpc_local_id) +{ + struct nvgpu_gpu_instance *gpu_instance; + struct nvgpu_gr_syspipe *gr_syspipe; + u32 gpu_instance_id = nvgpu_grmgr_get_gpu_instance_id( + g, gr_instance_id); + + gpu_instance = &g->mig.gpu_instance[gpu_instance_id]; + gr_syspipe = &gpu_instance->gr_syspipe; + + nvgpu_assert(gpc_local_id < gr_syspipe->num_gpc); + + nvgpu_log(g, gpu_dbg_mig, + "gpu_instance_id[%u] gpc_local_id[%u] physical_id[%u]", + gpu_instance_id, gpc_local_id, + gr_syspipe->gpcs[gpc_local_id].physical_id); + + return gr_syspipe->gpcs[gpc_local_id].physical_id; +} + +u32 nvgpu_grmgr_get_gr_gpc_logical_id(struct gk20a *g, u32 gr_instance_id, + u32 gpc_local_id) +{ + struct nvgpu_gpu_instance *gpu_instance; + struct nvgpu_gr_syspipe *gr_syspipe; + u32 gpu_instance_id = nvgpu_grmgr_get_gpu_instance_id( + g, gr_instance_id); + + gpu_instance = &g->mig.gpu_instance[gpu_instance_id]; + gr_syspipe = &gpu_instance->gr_syspipe; + + nvgpu_assert(gpc_local_id < gr_syspipe->num_gpc); + + nvgpu_log(g, gpu_dbg_mig, + "gpu_instance_id[%u] gpc_local_id[%u] logical_id[%u]", + gpu_instance_id, gpc_local_id, + gr_syspipe->gpcs[gpc_local_id].logical_id); + + return gr_syspipe->gpcs[gpc_local_id].logical_id; +} + +u32 nvgpu_grmgr_get_gr_instance_id(struct gk20a *g, u32 gpu_instance_id) +{ + u32 gr_instance_id = 0U; + + /* TODO : Add gr_instance_id for physical device when MIG is enabled. */ + if ((nvgpu_grmgr_is_multi_gr_enabled(g)) && + (gpu_instance_id != 0U)) { + if (gpu_instance_id < g->mig.num_gpu_instances) { + /* 0th entry is physical device gpu instance */ + gr_instance_id = nvgpu_safe_sub_u32( + gpu_instance_id, 1U); + } else { + nvgpu_err(g, + "gpu_instance_id[%u] >= num_gpu_instances[%u]", + gpu_instance_id, g->mig.num_gpu_instances); + nvgpu_assert( + gpu_instance_id < g->mig.num_gpu_instances); + } + } + + nvgpu_log(g, gpu_dbg_mig, "gpu_instance_id[%u] gr_instance_id[%u]", + gpu_instance_id, gr_instance_id); + + return gr_instance_id; +} + +bool nvgpu_grmgr_is_valid_runlist_id(struct gk20a *g, + u32 gpu_instance_id, u32 runlist_id) +{ + if (gpu_instance_id < g->mig.num_gpu_instances) { + struct nvgpu_gpu_instance *gpu_instance = + &g->mig.gpu_instance[gpu_instance_id]; + struct nvgpu_gr_syspipe *gr_syspipe = + &gpu_instance->gr_syspipe; + const struct nvgpu_device *gr_dev = gr_syspipe->gr_dev; + u32 id; + + if (gr_dev->runlist_id == runlist_id) { + nvgpu_log(g, gpu_dbg_mig, "gr runlist found[%u]", + runlist_id); + return true; + } + + for (id = 0U; id < gpu_instance->num_lce; id++) { + const struct nvgpu_device *lce_dev = + gpu_instance->lce_devs[id]; + if (lce_dev->runlist_id == runlist_id) { + nvgpu_log(g, gpu_dbg_mig, + "lce/ce runlist found[%u]", + runlist_id); + return true; + } + } + } + + nvgpu_err(g, + "gpu_instance_id[%u] >= num_gpu_instances[%u]", + gpu_instance_id, g->mig.num_gpu_instances); + + return false; +} + +u32 nvgpu_grmgr_get_gpu_instance_runlist_id(struct gk20a *g, + u32 gpu_instance_id) +{ + if (gpu_instance_id < g->mig.num_gpu_instances) { + struct nvgpu_gpu_instance *gpu_instance = + &g->mig.gpu_instance[gpu_instance_id]; + struct nvgpu_gr_syspipe *gr_syspipe = + &gpu_instance->gr_syspipe; + const struct nvgpu_device *gr_dev = gr_syspipe->gr_dev; + + return gr_dev->runlist_id; + } + + nvgpu_err(g, + "gpu_instance_id[%u] >= num_gpu_instances[%u]", + gpu_instance_id, g->mig.num_gpu_instances); + + return U32_MAX; +} + +u32 nvgpu_grmgr_get_gr_instance_id_for_syspipe(struct gk20a *g, + u32 gr_syspipe_id) +{ + if (nvgpu_grmgr_is_multi_gr_enabled(g)) { + u32 gr_instance_id = 0U; + u32 index; + /* 0th entry is physical device gpu instance. */ + for (index = 1U; index < g->mig.num_gpu_instances; ++index) { + struct nvgpu_gpu_instance *gpu_instance = + &g->mig.gpu_instance[index]; + struct nvgpu_gr_syspipe *gr_syspipe = + &gpu_instance->gr_syspipe; + + if (gr_syspipe->gr_syspipe_id == gr_syspipe_id) { + nvgpu_log(g, gpu_dbg_mig, + "gr_instance_id[%u] gr_syspipe_id[%u]", + gr_instance_id, gr_syspipe_id); + return gr_instance_id; + } + ++gr_instance_id; + } + } + + /* Default gr_instance_id is 0U for legacy mode. */ + return 0U; +} + +static u32 nvgpu_grmgr_get_max_veid_count(struct gk20a *g, u32 gpu_instance_id) +{ + struct nvgpu_gpu_instance *gpu_instance; + struct nvgpu_gr_syspipe *gr_syspipe; + + if (gpu_instance_id < g->mig.num_gpu_instances) { + gpu_instance = &g->mig.gpu_instance[gpu_instance_id]; + gr_syspipe = &gpu_instance->gr_syspipe; + + nvgpu_log(g, gpu_dbg_mig, + "gpu_instance_id[%u] max_veid_count_per_tsg[%u]", + gpu_instance_id, gr_syspipe->max_veid_count_per_tsg); + + return gr_syspipe->max_veid_count_per_tsg; + } + + nvgpu_err(g, + "gpu_instance_id[%u] >= num_gpu_instances[%u]", + gpu_instance_id, g->mig.num_gpu_instances); + + return U32_MAX; +} + +u32 nvgpu_grmgr_get_gpu_instance_max_veid_count(struct gk20a *g, + u32 gpu_instance_id) +{ + return nvgpu_grmgr_get_max_veid_count(g, gpu_instance_id); +} + +u32 nvgpu_grmgr_get_gr_max_veid_count(struct gk20a *g, u32 gr_instance_id) +{ + u32 gpu_instance_id = nvgpu_grmgr_get_gpu_instance_id( + g, gr_instance_id); + + return nvgpu_grmgr_get_max_veid_count(g, gpu_instance_id); +} + +u32 nvgpu_grmgr_get_gr_logical_gpc_mask(struct gk20a *g, u32 gr_instance_id) +{ + u32 logical_gpc_mask = 0U; + u32 gpc_indx; + struct nvgpu_gpu_instance *gpu_instance; + struct nvgpu_gr_syspipe *gr_syspipe; + u32 gpu_instance_id = nvgpu_grmgr_get_gpu_instance_id( + g, gr_instance_id); + + gpu_instance = &g->mig.gpu_instance[gpu_instance_id]; + gr_syspipe = &gpu_instance->gr_syspipe; + + for (gpc_indx = 0U; gpc_indx < gr_syspipe->num_gpc; gpc_indx++) { + logical_gpc_mask |= BIT32( + gr_syspipe->gpcs[gpc_indx].logical_id); + + nvgpu_log(g, gpu_dbg_mig, + "gpu_instance_id[%u] gr_instance_id[%u] gpc_indx[%u] " + "logical_gpc_id[%u] logical_gpc_mask[%x]", + gpu_instance_id, gr_instance_id, gpc_indx, + gr_syspipe->gpcs[gpc_indx].logical_id, + logical_gpc_mask); + } + + return logical_gpc_mask; +} + +u32 nvgpu_grmgr_get_gr_physical_gpc_mask(struct gk20a *g, u32 gr_instance_id) +{ + u32 physical_gpc_mask = 0U; + u32 gpc_indx; + struct nvgpu_gpu_instance *gpu_instance; + struct nvgpu_gr_syspipe *gr_syspipe; + u32 gpu_instance_id = nvgpu_grmgr_get_gpu_instance_id( + g, gr_instance_id); + + gpu_instance = &g->mig.gpu_instance[gpu_instance_id]; + gr_syspipe = &gpu_instance->gr_syspipe; + + for (gpc_indx = 0U; gpc_indx < gr_syspipe->num_gpc; gpc_indx++) { + physical_gpc_mask |= BIT32( + gr_syspipe->gpcs[gpc_indx].physical_id); + + nvgpu_log(g, gpu_dbg_mig, + "gpu_instance_id[%u] gr_instance_id[%u] gpc_indx[%u] " + "physical_id[%u] physical_gpc_mask[%x]", + gpu_instance_id, gr_instance_id, gpc_indx, + gr_syspipe->gpcs[gpc_indx].physical_id, + physical_gpc_mask); + } + + return physical_gpc_mask; +} + +u32 nvgpu_grmgr_get_num_fbps(struct gk20a *g, u32 gpu_instance_id) +{ + struct nvgpu_gpu_instance *gpu_instance; + + if (gpu_instance_id < g->mig.num_gpu_instances) { + gpu_instance = &g->mig.gpu_instance[gpu_instance_id]; + + nvgpu_log(g, gpu_dbg_mig, + "gpu_instance_id[%u] num_fbp[%u]", + gpu_instance_id, gpu_instance->num_fbp); + + return gpu_instance->num_fbp; + } + + nvgpu_err(g, + "gpu_instance_id[%u] >= num_gpu_instances[%u]", + gpu_instance_id, g->mig.num_gpu_instances); + + nvgpu_assert(gpu_instance_id < g->mig.num_gpu_instances); + + return U32_MAX; +} + +u32 nvgpu_grmgr_get_fbp_en_mask(struct gk20a *g, u32 gpu_instance_id) +{ + struct nvgpu_gpu_instance *gpu_instance; + + if (gpu_instance_id < g->mig.num_gpu_instances) { + gpu_instance = &g->mig.gpu_instance[gpu_instance_id]; + + nvgpu_log(g, gpu_dbg_mig, + "gpu_instance_id[%u] fbp_en_mask[0x%x]", + gpu_instance_id, gpu_instance->fbp_en_mask); + + return gpu_instance->fbp_en_mask; + } + + nvgpu_err(g, + "gpu_instance_id[%u] >= num_gpu_instances[%u]", + gpu_instance_id, g->mig.num_gpu_instances); + + nvgpu_assert(gpu_instance_id < g->mig.num_gpu_instances); + + return U32_MAX; +} + +u32 *nvgpu_grmgr_get_fbp_rop_l2_en_mask(struct gk20a *g, u32 gpu_instance_id) +{ + struct nvgpu_gpu_instance *gpu_instance; + + if (gpu_instance_id < g->mig.num_gpu_instances) { + gpu_instance = &g->mig.gpu_instance[gpu_instance_id]; + + return gpu_instance->fbp_rop_l2_en_mask; + } + + nvgpu_err(g, + "gpu_instance_id[%u] >= num_gpu_instances[%u]", + gpu_instance_id, g->mig.num_gpu_instances); + + nvgpu_assert(gpu_instance_id < g->mig.num_gpu_instances); + + return NULL; +} diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c new file mode 100644 index 000000000..a46cf5979 --- /dev/null +++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c @@ -0,0 +1,1082 @@ +/* + * GK20A Graphics + * + * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_NVGPU_NON_FUSA +#include +#endif +#include + +#ifdef CONFIG_NVGPU_LS_PMU +#include +#endif + +bool is_nvgpu_gpu_state_valid(struct gk20a *g) +{ + u32 boot_0 = g->ops.mc.get_chip_details(g, NULL, NULL, NULL); + + if (boot_0 == 0xffffffffU) { + nvgpu_err(g, "GPU has disappeared from bus!!"); + return false; + } + return true; +} + +void nvgpu_check_gpu_state(struct gk20a *g) +{ + if (!is_nvgpu_gpu_state_valid(g)) { + nvgpu_err(g, "Entering SW Quiesce!!"); + nvgpu_sw_quiesce(g); + } +} + +static void gk20a_mask_interrupts(struct gk20a *g) +{ + nvgpu_cic_intr_mask(g); +#ifdef CONFIG_NVGPU_NON_FUSA + nvgpu_cic_log_pending_intrs(g); +#endif +} + +#define NVGPU_SW_QUIESCE_TIMEOUT_MS 50 + +static int nvgpu_sw_quiesce_thread(void *data) +{ + struct gk20a *g = data; + + /* wait until SW quiesce is requested */ + NVGPU_COND_WAIT_INTERRUPTIBLE(&g->sw_quiesce_cond, + g->sw_quiesce_pending || + nvgpu_thread_should_stop(&g->sw_quiesce_thread), 0U); + + if (nvgpu_thread_should_stop(&g->sw_quiesce_thread)) { + goto done; + } + + nvgpu_err(g, "SW quiesce thread running"); + + nvgpu_msleep(NVGPU_SW_QUIESCE_TIMEOUT_MS); + + nvgpu_disable_irqs(g); + nvgpu_channel_sw_quiesce(g); + nvgpu_bug_exit(1); + +done: + nvgpu_log_info(g, "done"); + return 0; +} + +static void nvgpu_sw_quiesce_bug_cb(void *arg) +{ + struct gk20a *g = arg; + + nvgpu_sw_quiesce(g); +} + +static void nvgpu_sw_quiesce_thread_stop_fn(void *data) +{ + struct gk20a *g = data; + + /* + * If the thread is still waiting on the cond, + * nvgpu_thread_should_stop() will return true, and the thread will + * exit. + */ + nvgpu_cond_signal_interruptible(&g->sw_quiesce_cond); +} + +void nvgpu_sw_quiesce_remove_support(struct gk20a *g) +{ + if (g->sw_quiesce_init_done) { + nvgpu_bug_unregister_cb(&g->sw_quiesce_bug_cb); + nvgpu_thread_stop_graceful(&g->sw_quiesce_thread, + nvgpu_sw_quiesce_thread_stop_fn, + g); + nvgpu_cond_destroy(&g->sw_quiesce_cond); + g->sw_quiesce_init_done = false; + } +} + +static int nvgpu_sw_quiesce_init_support(struct gk20a *g) +{ + int err; + + if (g->sw_quiesce_init_done) { + return 0; + } + + + err = nvgpu_cond_init(&g->sw_quiesce_cond); + if (err != 0) { + nvgpu_err(g, "nvgpu_cond_init() failed err=%d", err); + return err; + } + + g->sw_quiesce_pending = false; + + err = nvgpu_thread_create(&g->sw_quiesce_thread, g, + nvgpu_sw_quiesce_thread, "sw-quiesce"); + if (err != 0) { + nvgpu_cond_destroy(&g->sw_quiesce_cond); + return err; + } + + g->sw_quiesce_init_done = true; + + /* register callback to SW quiesce GPU in case of BUG() */ + g->sw_quiesce_bug_cb.cb = nvgpu_sw_quiesce_bug_cb; + g->sw_quiesce_bug_cb.arg = g; + g->sw_quiesce_bug_cb.sw_quiesce_data = true; + nvgpu_bug_register_cb(&g->sw_quiesce_bug_cb); + +#ifdef CONFIG_NVGPU_RECOVERY + nvgpu_set_enabled(g, NVGPU_SUPPORT_FAULT_RECOVERY, true); +#else + nvgpu_set_enabled(g, NVGPU_SUPPORT_FAULT_RECOVERY, false); +#endif + + return 0; +} + +void nvgpu_sw_quiesce(struct gk20a *g) +{ + if (g->is_virtual || (g->enabled_flags == NULL) || + nvgpu_is_enabled(g, NVGPU_DISABLE_SW_QUIESCE)) { + nvgpu_err(g, "SW quiesce not supported"); + return; + } + + if (!g->sw_quiesce_init_done) { + nvgpu_err(g, "SW quiesce not initialized"); + return; + } + + if (g->sw_quiesce_pending) { + nvgpu_err(g, "SW quiesce already pending"); + return; + } + + nvgpu_err(g, "SW quiesce requested"); + + /* + * When this flag is set, interrupt handlers should + * exit after masking interrupts. This should mitigate + * interrupt storm cases. + */ + g->sw_quiesce_pending = true; + + nvgpu_cond_signal_interruptible(&g->sw_quiesce_cond); + nvgpu_start_gpu_idle(g); + /* + * Avoid register accesses when GPU had disappeared + * from the bus. + */ + if (is_nvgpu_gpu_state_valid(g)) { + gk20a_mask_interrupts(g); + nvgpu_fifo_sw_quiesce(g); + } +} + +/* init interface layer support for all falcons */ +static int nvgpu_falcons_sw_init(struct gk20a *g) +{ + int err; + + err = g->ops.falcon.falcon_sw_init(g, FALCON_ID_PMU); + if (err != 0) { + nvgpu_err(g, "failed to sw init FALCON_ID_PMU"); + return err; + } + + err = g->ops.falcon.falcon_sw_init(g, FALCON_ID_FECS); + if (err != 0) { + nvgpu_err(g, "failed to sw init FALCON_ID_FECS"); + goto done_pmu; + } + +#ifdef CONFIG_NVGPU_DGPU + err = g->ops.falcon.falcon_sw_init(g, FALCON_ID_SEC2); + if (err != 0) { + nvgpu_err(g, "failed to sw init FALCON_ID_SEC2"); + goto done_fecs; + } + + err = g->ops.falcon.falcon_sw_init(g, FALCON_ID_NVDEC); + if (err != 0) { + nvgpu_err(g, "failed to sw init FALCON_ID_NVDEC"); + goto done_sec2; + } + + err = g->ops.falcon.falcon_sw_init(g, FALCON_ID_GSPLITE); + if (err != 0) { + nvgpu_err(g, "failed to sw init FALCON_ID_GSPLITE"); + goto done_nvdec; + } +#endif + + return 0; + +#ifdef CONFIG_NVGPU_DGPU +done_nvdec: + g->ops.falcon.falcon_sw_free(g, FALCON_ID_NVDEC); +done_sec2: + g->ops.falcon.falcon_sw_free(g, FALCON_ID_SEC2); +done_fecs: + g->ops.falcon.falcon_sw_free(g, FALCON_ID_FECS); +#endif +done_pmu: + g->ops.falcon.falcon_sw_free(g, FALCON_ID_PMU); + + return err; +} + +/* handle poweroff and error case for all falcons interface layer support */ +static void nvgpu_falcons_sw_free(struct gk20a *g) +{ + g->ops.falcon.falcon_sw_free(g, FALCON_ID_PMU); + g->ops.falcon.falcon_sw_free(g, FALCON_ID_FECS); + +#ifdef CONFIG_NVGPU_DGPU + g->ops.falcon.falcon_sw_free(g, FALCON_ID_GSPLITE); + g->ops.falcon.falcon_sw_free(g, FALCON_ID_NVDEC); + g->ops.falcon.falcon_sw_free(g, FALCON_ID_SEC2); +#endif +} + +int nvgpu_prepare_poweroff(struct gk20a *g) +{ + int tmp_ret, ret = 0; + + nvgpu_log_fn(g, " "); + + if (g->ops.channel.suspend_all_serviceable_ch != NULL) { + ret = g->ops.channel.suspend_all_serviceable_ch(g); + if (ret != 0) { + return ret; + } + } + +#ifdef CONFIG_NVGPU_LS_PMU + /* disable elpg before gr or fifo suspend */ + if (g->support_ls_pmu) { + ret = g->ops.pmu.pmu_destroy(g, g->pmu); + } +#endif + + nvgpu_pmu_enable_irq(g, false); + +#ifdef CONFIG_NVGPU_DGPU + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) { + tmp_ret = g->ops.sec2.sec2_destroy(g); + if ((tmp_ret != 0) && (ret == 0)) { + ret = tmp_ret; + } + } +#endif + tmp_ret = g->ops.gr.gr_suspend(g); + if (tmp_ret != 0) { + ret = tmp_ret; + } + if (g->ops.grmgr.remove_gr_manager != NULL) { + tmp_ret = g->ops.grmgr.remove_gr_manager(g); + if (tmp_ret != 0) { + nvgpu_err(g, "g->ops.grmgr.remove_gr_manager-failed"); + ret = tmp_ret; + } + } + tmp_ret = g->ops.mm.mm_suspend(g); + if (tmp_ret != 0) { + ret = tmp_ret; + } + tmp_ret = g->ops.fifo.fifo_suspend(g); + if (tmp_ret != 0) { + ret = tmp_ret; + } + + nvgpu_falcons_sw_free(g); + +#ifdef CONFIG_NVGPU_DGPU + g->ops.ce.ce_app_suspend(g); +#endif + +#ifdef CONFIG_NVGPU_DGPU + if (g->ops.bios.bios_sw_deinit != NULL) { + /* deinit the bios */ + g->ops.bios.bios_sw_deinit(g, g->bios); + } +#endif + + /* Disable GPCPLL */ + if (g->ops.clk.suspend_clk_support != NULL) { + g->ops.clk.suspend_clk_support(g); + } +#ifdef CONFIG_NVGPU_CLK_ARB + if (g->ops.clk_arb.stop_clk_arb_threads != NULL) { + g->ops.clk_arb.stop_clk_arb_threads(g); + } +#endif + gk20a_mask_interrupts(g); + + /* Disable CIC after the interrupts are masked; + * This will ensure that CIC will not get probed + * after it's deinit. + */ + nvgpu_cic_deinit_common(g); + + return ret; +} + +#ifdef CONFIG_NVGPU_TPC_POWERGATE +static bool have_tpc_pg_lock = false; + +static int nvgpu_init_acquire_tpc_pg_lock(struct gk20a *g) +{ + nvgpu_mutex_acquire(&g->tpc_pg_lock); + have_tpc_pg_lock = true; + return 0; +} + +static int nvgpu_init_release_tpc_pg_lock(struct gk20a *g) +{ + nvgpu_mutex_release(&g->tpc_pg_lock); + have_tpc_pg_lock = false; + return 0; +} +#endif + +#ifdef CONFIG_NVGPU_DGPU +static int nvgpu_init_fb_mem_unlock(struct gk20a *g) +{ + int err; + + if ((g->ops.fb.mem_unlock != NULL) && (!g->is_fusa_sku)) { + err = g->ops.fb.mem_unlock(g); + if (err != 0) { + return err; + } + } else { + nvgpu_log_info(g, "skipping fb mem_unlock"); + } + + return 0; +} + +static int nvgpu_init_fbpa_ecc(struct gk20a *g) +{ + int err; + + if (g->ops.fb.fbpa_ecc_init != NULL && !g->ecc.initialized) { + err = g->ops.fb.fbpa_ecc_init(g); + if (err != 0) { + return err; + } + } + + return 0; +} +#endif + +#ifdef CONFIG_NVGPU_TPC_POWERGATE +static int nvgpu_init_power_gate(struct gk20a *g) +{ + int err; + u32 fuse_status; + + /* + * Power gate the chip as per the TPC PG mask + * and the fuse_status register. + * If TPC PG mask is invalid halt the GPU poweron. + */ + g->can_tpc_powergate = false; + fuse_status = g->ops.fuse.fuse_status_opt_tpc_gpc(g, 0); + + if (g->ops.tpc.init_tpc_powergate != NULL) { + err = g->ops.tpc.init_tpc_powergate(g, fuse_status); + if (err != 0) { + return err; + } + } + + return 0; +} + +static int nvgpu_init_power_gate_gr(struct gk20a *g) +{ + if (g->can_tpc_powergate && (g->ops.tpc.tpc_gr_pg != NULL)) { + g->ops.tpc.tpc_gr_pg(g); + } + return 0; +} +#endif + +static int nvgpu_init_boot_clk_or_clk_arb(struct gk20a *g) +{ + int err = 0; + +#ifdef CONFIG_NVGPU_LS_PMU + if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE) && + (g->pmu->fw->ops.clk.clk_set_boot_clk != NULL)) { + err = g->pmu->fw->ops.clk.clk_set_boot_clk(g); + if (err != 0) { + nvgpu_err(g, "failed to set boot clk"); + return err; + } + } else +#endif + { +#ifdef CONFIG_NVGPU_CLK_ARB + err = g->ops.clk_arb.clk_arb_init_arbiter(g); + if (err != 0) { + nvgpu_err(g, "failed to init clk arb"); + return err; + } +#endif + } + + return err; +} + +static int nvgpu_init_per_device_identifier(struct gk20a *g) +{ + int err = 0; + + if (g->ops.fuse.read_per_device_identifier != NULL) { + err = g->ops.fuse.read_per_device_identifier( + g, &g->per_device_identifier); + } + + return err; +} + +static int nvgpu_init_set_debugger_mode(struct gk20a *g) +{ +#ifdef CONFIG_NVGPU_DEBUGGER + /* Restore the debug setting */ + g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl); +#endif + return 0; +} + +static int nvgpu_init_xve_set_speed(struct gk20a *g) +{ +#ifdef CONFIG_NVGPU_DGPU + int err; + + if (g->ops.xve.available_speeds != NULL) { + u32 speed; + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_ASPM) && + (g->ops.xve.disable_aspm != NULL)) { + g->ops.xve.disable_aspm(g); + } + + g->ops.xve.available_speeds(g, &speed); + + /* Set to max speed */ + speed = (u32)nvgpu_fls(speed); + + if (speed > 0U) { + speed = BIT32((speed - 1U)); + } else { + speed = BIT32(speed); + } + + err = g->ops.xve.set_speed(g, speed); + if (err != 0) { + nvgpu_err(g, "Failed to set PCIe bus speed!"); + return err; + } + } +#endif + return 0; +} + +static int nvgpu_init_syncpt_mem(struct gk20a *g) +{ +#if defined(CONFIG_TEGRA_GK20A_NVHOST) + int err; + u64 nr_pages; + + if (nvgpu_has_syncpoints(g) && (g->syncpt_unit_size != 0UL)) { + if (!nvgpu_mem_is_valid(&g->syncpt_mem)) { + nr_pages = U64(DIV_ROUND_UP(g->syncpt_unit_size, + NVGPU_CPU_PAGE_SIZE)); + err = nvgpu_mem_create_from_phys(g, &g->syncpt_mem, + g->syncpt_unit_base, nr_pages); + if (err != 0) { + nvgpu_err(g, "Failed to create syncpt mem"); + return err; + } + } + } +#endif + return 0; +} + +static int nvgpu_init_slcg_acb_load_gating_prod(struct gk20a *g) +{ + if (g->ops.cg.slcg_acb_load_gating_prod != NULL) { + g->ops.cg.slcg_acb_load_gating_prod(g, true); + } + + return 0; +} + +static int nvgpu_init_interrupt_setup(struct gk20a *g) +{ + /** + * Disable all interrupts at the start. + */ + nvgpu_cic_intr_mask(g); + +#ifdef CONFIG_NVGPU_NON_FUSA + /** + * For certain chips like gm20b, there is global interrupt control in + * registers mc_intr_en_*_r. Program them here upfront. + */ + nvgpu_cic_intr_enable(g); +#endif + + return 0; +} + +typedef int (*nvgpu_init_func_t)(struct gk20a *g); +struct nvgpu_init_table_t { + nvgpu_init_func_t func; + const char *name; + u32 enable_flag; +}; +#define NVGPU_INIT_TABLE_ENTRY(ops_ptr, enable_flag) \ + { (ops_ptr), #ops_ptr, (enable_flag) } +#define NO_FLAG 0U + +static bool needs_init(struct gk20a *g, nvgpu_init_func_t func, u32 enable_flag) +{ + return ((enable_flag == NO_FLAG) || + nvgpu_is_enabled(g, enable_flag)) && (func != NULL); +} + +static int nvgpu_early_init(struct gk20a *g) +{ + int err = 0; + size_t i; + + /* + * This cannot be static because we use the func ptrs as initializers + * and static variables require constant literals for initializers. + */ + const struct nvgpu_init_table_t nvgpu_early_init_table[] = { + NVGPU_INIT_TABLE_ENTRY(&nvgpu_init_slcg_acb_load_gating_prod, + NO_FLAG), + /* + * ECC support initialization is split into generic init + * followed by per unit initialization and ends with sysfs + * support init. This is done to setup ECC data structures + * prior to enabling interrupts for corresponding units. + */ + NVGPU_INIT_TABLE_ENTRY(g->ops.ecc.ecc_init_support, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(&nvgpu_device_init, NO_FLAG), +#ifdef CONFIG_NVGPU_DGPU + NVGPU_INIT_TABLE_ENTRY(g->ops.bios.bios_sw_init, NO_FLAG), +#endif + NVGPU_INIT_TABLE_ENTRY(&nvgpu_init_interrupt_setup, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(g->ops.bus.init_hw, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(g->ops.priv_ring.enable_priv_ring, + NO_FLAG), +#ifdef CONFIG_NVGPU_NON_FUSA + NVGPU_INIT_TABLE_ENTRY(&nvgpu_ptimer_init, NO_FLAG), +#endif + /* TBD: move this after graphics init in which blcg/slcg is + * enabled. This function removes SlowdownOnBoot which applies + * 32x divider on gpcpll bypass path. The purpose of slowdown is + * to save power during boot but it also significantly slows + * down gk20a init on simulation and emulation. We should remove + * SOB after graphics power saving features (blcg/slcg) are + * enabled. For now, do it here. + */ + NVGPU_INIT_TABLE_ENTRY(g->ops.clk.init_clk_support, NO_FLAG), +#ifdef CONFIG_NVGPU_DGPU + NVGPU_INIT_TABLE_ENTRY(&nvgpu_init_fbpa_ecc, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(g->ops.fb.init_fbpa, NO_FLAG), +#endif + NVGPU_INIT_TABLE_ENTRY(g->ops.fifo.reset_enable_hw, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(&nvgpu_init_fb_support, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(g->ops.ltc.init_ltc_support, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(g->ops.fbp.fbp_init_support, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(g->ops.grmgr.init_gr_manager, NO_FLAG), + }; + + for (i = 0; i < ARRAY_SIZE(nvgpu_early_init_table); i++) { + if (!needs_init(g, nvgpu_early_init_table[i].func, + nvgpu_early_init_table[i].enable_flag)) { + nvgpu_log_info(g, + "Skipping initializing %s (enable_flag=%u func=%p)", + nvgpu_early_init_table[i].name, + nvgpu_early_init_table[i].enable_flag, + nvgpu_early_init_table[i].func); + } else { + nvgpu_log_info(g, "Initializing %s", + nvgpu_early_init_table[i].name); + err = nvgpu_early_init_table[i].func(g); + if (err != 0) { + nvgpu_err(g, "Failed initialization for: %s", + nvgpu_early_init_table[i].name); + goto done; + } + } + } + +done: + return err; +} + +int nvgpu_early_poweron(struct gk20a *g) +{ + int err = 0; + + err = nvgpu_detect_chip(g); + if (err != 0) { + nvgpu_err(g, "nvgpu_detect_chip failed[%d]", err); + goto done; + } + +#ifdef CONFIG_NVGPU_DGPU + /* + * Before probing the GPU make sure the GPU's state is cleared. This is + * relevant for rebind operations. + */ + if ((g->ops.xve.reset_gpu != NULL) && !g->gpu_reset_done) { + g->ops.xve.reset_gpu(g); + g->gpu_reset_done = true; + } +#endif + + /* + * nvgpu poweron sequence split into two stages: + * - nvgpu_early_init() - Initializes the sub units + * which are required to be initialized before the grgmr init. + * For creating dev node, grmgr init and its dependency unit + * needs to move to early stage of GPU power on. + * After successful nvgpu_early_init() sequence, + * NvGpu can indetify the number of MIG instance required + * for each physical GPU. + * - nvgpu_finalize_poweron() - Initializes the sub units which + * can be initialized at the later stage of GPU power on sequence. + * + * grmgr init depends on the following HAL sub units, + * device - To get the device caps. + * priv_ring - To get the gpc count and other MIG config programming. + * fifo_reset_hw - In simulation/emulation/GPU standalone platform, + * XBAR, L2 and HUB are enabled during + * g->ops.fifo.reset_enable_hw(). This introduces a + * dependency to get the MIG map conf information. + * (if nvgpu_is_bpmp_running() == false treated as + * simulation/emulation/GPU standalone platform). + * fb - MIG config programming. + * ltc - MIG config programming. + * bios, bus, ecc and clk - dependent module of priv_ring/fb/ltc. + * + */ + err = nvgpu_early_init(g); + if (err != 0) { + nvgpu_err(g, "nvgpu_early_init failed[%d]", err); + goto done; + } + + /* Initialize CIC early on before the interrupts are + * enabled. + */ + err = nvgpu_cic_init_common(g); + if (err != 0) { + nvgpu_err(g, "CIC Initialization failed[%d]", err); + goto done; + } +done: + return err; +} + +int nvgpu_finalize_poweron(struct gk20a *g) +{ + int err = 0; + /* + * This cannot be static because we use the func ptrs as initializers + * and static variables require constant literals for initializers. + */ + const struct nvgpu_init_table_t nvgpu_init_table[] = { + /* + * Do this early so any early VMs that get made are capable of + * mapping buffers. + */ + NVGPU_INIT_TABLE_ENTRY(g->ops.mm.pd_cache_init, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(&nvgpu_falcons_sw_init, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(g->ops.pmu.pmu_early_init, NO_FLAG), + +#ifdef CONFIG_NVGPU_DGPU + NVGPU_INIT_TABLE_ENTRY(g->ops.sec2.init_sec2_setup_sw, + NVGPU_SUPPORT_SEC2_RTOS), +#endif + NVGPU_INIT_TABLE_ENTRY(g->ops.acr.acr_init, + NVGPU_SEC_PRIVSECURITY), + NVGPU_INIT_TABLE_ENTRY(&nvgpu_sw_quiesce_init_support, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(g->ops.nvlink.init, + NVGPU_SUPPORT_NVLINK), + +#ifdef CONFIG_NVGPU_DEBUGGER + NVGPU_INIT_TABLE_ENTRY(g->ops.ptimer.config_gr_tick_freq, + NO_FLAG), +#endif + +#ifdef CONFIG_NVGPU_DGPU + NVGPU_INIT_TABLE_ENTRY(&nvgpu_init_fb_mem_unlock, NO_FLAG), +#endif + + NVGPU_INIT_TABLE_ENTRY(g->ops.mm.init_mm_support, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(g->ops.fifo.fifo_init_support, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(g->ops.therm.elcg_init_idle_filters, + NO_FLAG), +#ifdef CONFIG_NVGPU_TPC_POWERGATE + NVGPU_INIT_TABLE_ENTRY(&nvgpu_init_power_gate, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(&nvgpu_init_acquire_tpc_pg_lock, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(&nvgpu_init_power_gate_gr, NO_FLAG), +#endif + NVGPU_INIT_TABLE_ENTRY(&nvgpu_netlist_init_ctx_vars, NO_FLAG), + /* prepare portion of sw required for enable hw */ + NVGPU_INIT_TABLE_ENTRY(&nvgpu_gr_alloc, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(&nvgpu_gr_enable_hw, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(g->ops.acr.acr_construct_execute, + NVGPU_SEC_PRIVSECURITY), + /** + * Set atomic mode after acr boot(See Bug 3268664 for + * details). For acr to boot, nvgpu_init_fb_support + * and init_mm_support is required. + * So, set_atomic_mode is decoupled from nvgpu_init_fb_support + * in the init sequence and called after acr boot. + */ + NVGPU_INIT_TABLE_ENTRY(g->ops.fb.set_atomic_mode, NO_FLAG), +#ifdef CONFIG_NVGPU_DGPU + NVGPU_INIT_TABLE_ENTRY(g->ops.sec2.init_sec2_support, + NVGPU_SUPPORT_SEC2_RTOS), +#endif +#ifdef CONFIG_NVGPU_LS_PMU + NVGPU_INIT_TABLE_ENTRY(g->ops.pmu.pmu_rtos_init, NO_FLAG), +#endif + NVGPU_INIT_TABLE_ENTRY(g->ops.gr.gr_init_support, NO_FLAG), + /** + * All units requiring ECC stats must initialize ECC counters + * before this call to finalize ECC support. + */ + NVGPU_INIT_TABLE_ENTRY(g->ops.ecc.ecc_finalize_support, + NO_FLAG), +#ifdef CONFIG_NVGPU_TPC_POWERGATE + NVGPU_INIT_TABLE_ENTRY(&nvgpu_init_release_tpc_pg_lock, + NO_FLAG), +#endif + +#ifdef CONFIG_NVGPU_LS_PMU + NVGPU_INIT_TABLE_ENTRY(g->ops.pmu.pmu_pstate_sw_setup, + NVGPU_PMU_PSTATE), + NVGPU_INIT_TABLE_ENTRY(g->ops.pmu.pmu_pstate_pmu_setup, + NVGPU_PMU_PSTATE), +#endif + NVGPU_INIT_TABLE_ENTRY(&nvgpu_init_boot_clk_or_clk_arb, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(g->ops.therm.init_therm_support, NO_FLAG), +#ifdef CONFIG_NVGPU_COMPRESSION + NVGPU_INIT_TABLE_ENTRY(g->ops.cbc.cbc_init_support, NO_FLAG), +#endif + NVGPU_INIT_TABLE_ENTRY(g->ops.chip_init_gpu_characteristics, + NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(&nvgpu_init_per_device_identifier, + NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(&nvgpu_init_set_debugger_mode, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(g->ops.ce.ce_init_support, NO_FLAG), +#ifdef CONFIG_NVGPU_DGPU + NVGPU_INIT_TABLE_ENTRY(g->ops.ce.ce_app_init_support, NO_FLAG), +#endif + NVGPU_INIT_TABLE_ENTRY(&nvgpu_init_xve_set_speed, NO_FLAG), + NVGPU_INIT_TABLE_ENTRY(&nvgpu_init_syncpt_mem, NO_FLAG), +#ifdef CONFIG_NVGPU_PROFILER + NVGPU_INIT_TABLE_ENTRY(&nvgpu_pm_reservation_init, NO_FLAG), +#endif + NVGPU_INIT_TABLE_ENTRY(g->ops.channel.resume_all_serviceable_ch, + NO_FLAG), + }; + size_t i; + + nvgpu_log_fn(g, " "); + + for (i = 0; i < ARRAY_SIZE(nvgpu_init_table); i++) { + if (!needs_init(g, nvgpu_init_table[i].func, + nvgpu_init_table[i].enable_flag)) { + nvgpu_log_info(g, "Skipping initializing %s (enable_flag=%u func=%p)", + nvgpu_init_table[i].name, + nvgpu_init_table[i].enable_flag, + nvgpu_init_table[i].func); + } else { + nvgpu_log_info(g, "Initializing %s", + nvgpu_init_table[i].name); + err = nvgpu_init_table[i].func(g); + if (err != 0) { + nvgpu_err(g, "Failed initialization for: %s", + nvgpu_init_table[i].name); + goto done; + } + } + } + + nvgpu_print_enabled_flags(g); + return err; + +done: +#ifdef CONFIG_NVGPU_TPC_POWERGATE + if (have_tpc_pg_lock) { + int release_err = nvgpu_init_release_tpc_pg_lock(g); + + if (release_err != 0) { + nvgpu_err(g, "failed to release tpc_gp_lock"); + } + } +#endif + nvgpu_falcons_sw_free(g); + + return err; +} + +/* + * Check if the device can go busy. Basically if the driver is currently + * in the process of dying then do not let new places make the driver busy. + */ +int nvgpu_can_busy(struct gk20a *g) +{ + /* Can't do anything if the system is rebooting/shutting down + * or the driver is restarting + */ + + if (g->sw_quiesce_pending) { + return 0; + } + + if (nvgpu_is_enabled(g, NVGPU_KERNEL_IS_DYING) || + nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { + return 0; + } else { + return 1; + } +} + +int nvgpu_init_gpu_characteristics(struct gk20a *g) +{ +#ifdef CONFIG_NVGPU_BUILD_CONFIGURATION_IS_SAFETY + nvgpu_set_enabled(g, NVGPU_DRIVER_REDUCED_PROFILE, true); +#endif + nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_SPARSE_ALLOCS, true); + + /* + * Fast submits are supported as long as the user doesn't request + * anything that depends on job tracking. (Here, fast means strictly no + * metadata, just the gpfifo contents are copied and gp_put updated). + */ + nvgpu_set_enabled(g, + NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING, + true); + + /* + * Full deterministic submit means that synchronization (pre and post + * fences; implies job tracking) can be used. If such submits can be + * guaranteed as long as the channel is set up correctly by userspace + * (e.g., watchdog disabled), this bit is set. + * + * Sync framework is needed when we don't have syncpoint support + * because we don't have a means to expose raw gpu semas in a way + * similar to raw syncpts. Use of the framework requires unpredictable + * actions including deferred job cleanup and wrapping syncs in FDs. + * + * Aggressive sync destroy causes the channel syncpoint to be abruptly + * allocated and deleted during submit path and deferred cleanup. + * + * Note that userspace expects this to be set for usermode submits + * (even if kernel-mode submits aren't enabled where full deterministic + * features matter). + */ + if (nvgpu_has_syncpoints(g) && + g->aggressive_sync_destroy_thresh == 0U) { + nvgpu_set_enabled(g, + NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL, + true); + } + + nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true); + +#ifdef CONFIG_NVGPU_CLK_ARB + if ((g->ops.clk_arb.check_clk_arb_support != NULL) && + (nvgpu_is_enabled(g, NVGPU_CLK_ARB_ENABLED))){ + if (g->ops.clk_arb.check_clk_arb_support(g)) { + nvgpu_set_enabled(g, NVGPU_SUPPORT_CLOCK_CONTROLS, + true); + } + } +#endif + + g->ops.gr.init.detect_sm_arch(g); + +#ifdef CONFIG_NVGPU_CYCLESTATS + if (g->ops.gr.init_cyclestats != NULL) { + g->ops.gr.init_cyclestats(g); + } +#endif + + nvgpu_set_enabled(g, NVGPU_SUPPORT_COMPUTE, true); + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + /* + * In MIG mode, 2D, 3D, I2M and ZBC classes are not supported + * by GR engine. Default values for legacy mode (non MIG). + */ + nvgpu_set_enabled(g, NVGPU_SUPPORT_2D, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_3D, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_I2M, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC, true); + } else { + nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, false); + nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, false); + } + + nvgpu_set_enabled(g, NVGPU_SUPPORT_GET_GR_CONTEXT, true); + + return 0; +} + +static struct gk20a *gk20a_from_refcount(struct nvgpu_ref *refcount) +{ + return (struct gk20a *)((uintptr_t)refcount - + offsetof(struct gk20a, refcount)); +} + +/* + * Free the gk20a struct. + */ +static void gk20a_free_cb(struct nvgpu_ref *refcount) +{ + struct gk20a *g = gk20a_from_refcount(refcount); + + nvgpu_log(g, gpu_dbg_shutdown, "Freeing GK20A struct!"); + +#ifdef CONFIG_NVGPU_DGPU + if (g->ops.ce.ce_app_destroy != NULL) { + g->ops.ce.ce_app_destroy(g); + } +#endif + +#ifdef CONFIG_NVGPU_COMPRESSION + if (g->ops.cbc.cbc_remove_support != NULL) { + g->ops.cbc.cbc_remove_support(g); + } +#endif + + if (g->ops.ecc.ecc_remove_support != NULL) { + g->ops.ecc.ecc_remove_support(g); + } + + if (g->remove_support != NULL) { + g->remove_support(g); + } + + if (g->ops.ltc.ltc_remove_support != NULL) { + g->ops.ltc.ltc_remove_support(g); + } + + /* + * Free the device list once the gk20a struct is removed. We don't want + * to do this during the railgate poweroff sequence since that means + * that the device list disappears every time we rail-gate. That will + * cause the fifo engine code to explode. + */ + nvgpu_device_cleanup(g); + +#ifdef CONFIG_NVGPU_PROFILER + nvgpu_pm_reservation_deinit(g); +#endif + + nvgpu_sw_quiesce_remove_support(g); + + if (g->gfree != NULL) { + g->gfree(g); + } +} + +struct gk20a *nvgpu_get(struct gk20a *g) +{ + int success; + + /* + * Handle the possibility we are still freeing the gk20a struct while + * nvgpu_get() is called. Unlikely but plausible race condition. Ideally + * the code will never be in such a situation that this race is + * possible. + */ + success = nvgpu_ref_get_unless_zero(&g->refcount); + + nvgpu_log(g, gpu_dbg_shutdown, "GET: refs currently %d %s", + nvgpu_atomic_read(&g->refcount.refcount), + (success != 0) ? "" : "(FAILED)"); + + return (success != 0) ? g : NULL; +} + +void nvgpu_put(struct gk20a *g) +{ + /* + * Note - this is racy, two instances of this could run before the + * actual kref_put(0 runs, you could see something like: + * + * ... PUT: refs currently 2 + * ... PUT: refs currently 2 + * ... Freeing GK20A struct! + */ + nvgpu_log(g, gpu_dbg_shutdown, "PUT: refs currently %d", + nvgpu_atomic_read(&g->refcount.refcount)); + + nvgpu_ref_put(&g->refcount, gk20a_free_cb); +} diff --git a/drivers/gpu/nvgpu/common/io/io.c b/drivers/gpu/nvgpu/common/io/io.c new file mode 100644 index 000000000..91f9460ae --- /dev/null +++ b/drivers/gpu/nvgpu/common/io/io.c @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +static void nvgpu_warn_on_no_regs(struct gk20a *g) +{ + nvgpu_warn(g, "Attempted access to GPU regs after unmapping!"); +} + +void nvgpu_writel(struct gk20a *g, u32 r, u32 v) +{ + if (unlikely(!g->regs)) { + nvgpu_warn_on_no_regs(g); + nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); + } else { + nvgpu_os_writel(v, g->regs + r); + nvgpu_wmb(); + nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); + } +} + +#ifdef CONFIG_NVGPU_DGPU +void nvgpu_writel_relaxed(struct gk20a *g, u32 r, u32 v) +{ + if (unlikely(!g->regs)) { + nvgpu_warn_on_no_regs(g); + nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); + } else { + nvgpu_os_writel_relaxed(v, g->regs + r); + } +} +#endif + +u32 nvgpu_readl(struct gk20a *g, u32 r) +{ + u32 v = nvgpu_readl_impl(g, r); + + if (v == 0xffffffff) + nvgpu_check_gpu_state(g); + + return v; +} + +u32 nvgpu_readl_impl(struct gk20a *g, u32 r) +{ + u32 v = 0xffffffff; + + if (unlikely(!g->regs)) { + nvgpu_warn_on_no_regs(g); + nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); + } else { + v = nvgpu_os_readl(g->regs + r); + nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); + } + + return v; +} + +void nvgpu_writel_loop(struct gk20a *g, u32 r, u32 v) +{ + if (unlikely(!g->regs)) { + nvgpu_warn_on_no_regs(g); + nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); + } else { + nvgpu_wmb(); + do { + nvgpu_os_writel(v, g->regs + r); + } while (nvgpu_os_readl(g->regs + r) != v); + nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); + } +} + +void nvgpu_bar1_writel(struct gk20a *g, u32 b, u32 v) +{ + if (unlikely(!g->bar1)) { + nvgpu_warn_on_no_regs(g); + nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); + } else { + nvgpu_wmb(); + nvgpu_os_writel(v, g->bar1 + b); + nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v); + } +} + +u32 nvgpu_bar1_readl(struct gk20a *g, u32 b) +{ + u32 v = 0xffffffff; + + if (unlikely(!g->bar1)) { + nvgpu_warn_on_no_regs(g); + nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); + } else { + v = nvgpu_os_readl(g->bar1 + b); + nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v); + } + + return v; +} + +bool nvgpu_io_exists(struct gk20a *g) +{ + return g->regs != 0U; +} + +bool nvgpu_io_valid_reg(struct gk20a *g, u32 r) +{ + return r < g->regs_size; +} + +void nvgpu_writel_check(struct gk20a *g, u32 r, u32 v) +{ + u32 read_val = 0U; + + nvgpu_writel(g, r, v); + read_val = nvgpu_readl(g, r); + if (v != read_val) { + nvgpu_err(g, "r=0x%x rd=0x%x wr=0x%x (mismatch)", + r, read_val, v); + BUG_ON(1); + } +} + +void nvgpu_func_writel(struct gk20a *g, u32 r, u32 v) +{ + if (g->ops.func.get_full_phys_offset == NULL) { + BUG_ON(1); + } + nvgpu_writel(g, + nvgpu_safe_add_u32(r, g->ops.func.get_full_phys_offset(g)), v); +} + +u32 nvgpu_func_readl(struct gk20a *g, u32 r) +{ + if (g->ops.func.get_full_phys_offset == NULL) { + BUG_ON(1); + } + return nvgpu_readl(g, + nvgpu_safe_add_u32(r, g->ops.func.get_full_phys_offset(g))); +} diff --git a/drivers/gpu/nvgpu/common/log_common.c b/drivers/gpu/nvgpu/common/log_common.c new file mode 100644 index 000000000..9ae3c9cce --- /dev/null +++ b/drivers/gpu/nvgpu/common/log_common.c @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +bool nvgpu_log_mask_enabled(struct gk20a *g, u64 log_mask) +{ + return (g->log_mask & log_mask) != 0ULL; +} \ No newline at end of file diff --git a/drivers/gpu/nvgpu/common/ltc/ltc.c b/drivers/gpu/nvgpu/common/ltc/ltc.c new file mode 100644 index 000000000..327f00472 --- /dev/null +++ b/drivers/gpu/nvgpu/common/ltc/ltc.c @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +void nvgpu_ltc_remove_support(struct gk20a *g) +{ + struct nvgpu_ltc *ltc = g->ltc; + + nvgpu_log_fn(g, " "); + + if (ltc == NULL) { + return; + } + + nvgpu_kfree(g, ltc); + g->ltc = NULL; +} + +int nvgpu_init_ltc_support(struct gk20a *g) +{ + struct nvgpu_ltc *ltc = g->ltc; + int err; + + nvgpu_log_fn(g, " "); + + g->mm.ltc_enabled_current = true; + g->mm.ltc_enabled_target = true; + + if (ltc == NULL) { + ltc = nvgpu_kzalloc(g, sizeof(*ltc)); + if (ltc == NULL) { + return -ENOMEM; + } + g->ltc = ltc; + nvgpu_spinlock_init(&g->ltc->ltc_enabled_lock); + } + + if (g->ops.ltc.init_fs_state != NULL) { + g->ops.ltc.init_fs_state(g); + } + + if ((g->ops.ltc.ecc_init != NULL) && !g->ecc.initialized) { + err = g->ops.ltc.ecc_init(g); + if (err != 0) { + nvgpu_kfree(g, ltc); + g->ltc = NULL; + return err; + } + } + + if (g->ops.ltc.intr.configure != NULL) { + nvgpu_cic_intr_stall_unit_config(g, NVGPU_CIC_INTR_UNIT_LTC, + NVGPU_CIC_INTR_ENABLE); + g->ops.ltc.intr.configure(g); + } + + return 0; +} + +void nvgpu_ltc_sync_enabled(struct gk20a *g) +{ + if (g->ops.ltc.set_enabled == NULL) { + return; + } + + nvgpu_spinlock_acquire(&g->ltc->ltc_enabled_lock); + if (g->mm.ltc_enabled_current != g->mm.ltc_enabled_target) { + g->ops.ltc.set_enabled(g, g->mm.ltc_enabled_target); + g->mm.ltc_enabled_current = g->mm.ltc_enabled_target; + } + nvgpu_spinlock_release(&g->ltc->ltc_enabled_lock); +} + +u32 nvgpu_ltc_get_ltc_count(struct gk20a *g) +{ + return g->ltc->ltc_count; +} + +u32 nvgpu_ltc_get_slices_per_ltc(struct gk20a *g) +{ + return g->ltc->slices_per_ltc; +} + +u32 nvgpu_ltc_get_cacheline_size(struct gk20a *g) +{ + return g->ltc->cacheline_size; +} + +int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, + struct nvgpu_ecc_stat ***stat, const char *name) +{ + struct nvgpu_ecc_stat **stats; + u32 ltc, lts; + char ltc_str[10] = {0}, lts_str[10] = {0}; + int err = 0; + u32 ltc_count = nvgpu_ltc_get_ltc_count(g); + u32 slices_per_ltc = nvgpu_ltc_get_slices_per_ltc(g); + + stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats), + ltc_count)); + if (stats == NULL) { + return -ENOMEM; + } + + for (ltc = 0; ltc < ltc_count; ltc++) { + stats[ltc] = nvgpu_kzalloc(g, + nvgpu_safe_mult_u64(sizeof(*stats[ltc]), + slices_per_ltc)); + if (stats[ltc] == NULL) { + err = -ENOMEM; + goto fail; + } + } + + for (ltc = 0; ltc < ltc_count; ltc++) { + for (lts = 0; lts < slices_per_ltc; lts++) { + /** + * Store stats name as below: + * ltc_lts_ + */ + (void)strcpy(stats[ltc][lts].name, "ltc"); + (void)nvgpu_strnadd_u32(ltc_str, ltc, + sizeof(ltc_str), 10U); + (void)strncat(stats[ltc][lts].name, ltc_str, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[ltc][lts].name)); + (void)strncat(stats[ltc][lts].name, "_lts", + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[ltc][lts].name)); + (void)nvgpu_strnadd_u32(lts_str, lts, + sizeof(lts_str), 10U); + (void)strncat(stats[ltc][lts].name, lts_str, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[ltc][lts].name)); + (void)strncat(stats[ltc][lts].name, "_", + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[ltc][lts].name)); + (void)strncat(stats[ltc][lts].name, name, + NVGPU_ECC_STAT_NAME_MAX_SIZE - + strlen(stats[ltc][lts].name)); + + nvgpu_ecc_stat_add(g, &stats[ltc][lts]); + } + } + + *stat = stats; + +fail: + if (err != 0) { + while (ltc-- > 0u) { + nvgpu_kfree(g, stats[ltc]); + } + + nvgpu_kfree(g, stats); + } + + return err; +} + +void nvgpu_ltc_ecc_free(struct gk20a *g) +{ + struct nvgpu_ecc *ecc = &g->ecc; + struct nvgpu_ecc_stat *stat; + u32 slices_per_ltc; + u32 ltc_count; + u32 ltc, lts; + + if (g->ltc == NULL) { + return; + } + + ltc_count = nvgpu_ltc_get_ltc_count(g); + slices_per_ltc = nvgpu_ltc_get_slices_per_ltc(g); + + for (ltc = 0; ltc < ltc_count; ltc++) { + if (ecc->ltc.ecc_sec_count != NULL && + ecc->ltc.ecc_sec_count[ltc] != NULL) { + for (lts = 0; lts < slices_per_ltc; lts++) { + stat = &ecc->ltc.ecc_sec_count[ltc][lts]; + nvgpu_ecc_stat_del(g, stat); + } + + nvgpu_kfree(g, ecc->ltc.ecc_sec_count[ltc]); + ecc->ltc.ecc_sec_count[ltc] = NULL; + } + + if (ecc->ltc.ecc_ded_count != NULL && + ecc->ltc.ecc_ded_count[ltc] != NULL) { + for (lts = 0; lts < slices_per_ltc; lts++) { + stat = &ecc->ltc.ecc_ded_count[ltc][lts]; + nvgpu_ecc_stat_del(g, stat); + } + + nvgpu_kfree(g, ecc->ltc.ecc_ded_count[ltc]); + ecc->ltc.ecc_ded_count[ltc] = NULL; + } + } + + nvgpu_kfree(g, ecc->ltc.ecc_sec_count); + ecc->ltc.ecc_sec_count = NULL; + + nvgpu_kfree(g, ecc->ltc.ecc_ded_count); + ecc->ltc.ecc_ded_count = NULL; +} diff --git a/drivers/gpu/nvgpu/common/mc/mc.c b/drivers/gpu/nvgpu/common/mc/mc.c new file mode 100644 index 000000000..6dbc33998 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mc/mc.c @@ -0,0 +1,84 @@ +/* + * GK20A Master Control + * + * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +int nvgpu_mc_reset_units(struct gk20a *g, u32 units) +{ + int err; + + err = g->ops.mc.enable_units(g, units, false); + if (err != 0) { + nvgpu_log(g, gpu_dbg_info, "Unit disable failed"); + return err; + } + + err = g->ops.mc.enable_units(g, units, true); + if (err != 0) { + nvgpu_log(g, gpu_dbg_info, "Unit disable failed"); + return err; + } + return 0; +} + +int nvgpu_mc_reset_dev(struct gk20a *g, const struct nvgpu_device *dev) +{ + int err; + + err = g->ops.mc.enable_dev(g, dev, false); + if (err != 0) { + nvgpu_device_dump_dev(g, dev); + return err; + } + + err = g->ops.mc.enable_dev(g, dev, true); + if (err != 0) { + nvgpu_device_dump_dev(g, dev); + return err; + } + return 0; +} + +int nvgpu_mc_reset_devtype(struct gk20a *g, u32 devtype) +{ + int err; + + err = g->ops.mc.enable_devtype(g, devtype, false); + if (err != 0) { + nvgpu_log(g, gpu_dbg_info, "Devtype:%u disable failed", + devtype); + return err; + } + + err = g->ops.mc.enable_devtype(g, devtype, true); + if (err != 0) { + nvgpu_log(g, gpu_dbg_info, "Devtype:%u enable failed", + devtype); + return err; + } + return 0; +} + diff --git a/drivers/gpu/nvgpu/common/mm/allocators/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/allocators/bitmap_allocator.c new file mode 100644 index 000000000..25976a48f --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/allocators/bitmap_allocator.c @@ -0,0 +1,514 @@ +/* + * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "bitmap_allocator_priv.h" + +static u64 nvgpu_bitmap_alloc_length(struct nvgpu_allocator *a) +{ + struct nvgpu_bitmap_allocator *ba = a->priv; + + return ba->length; +} + +static u64 nvgpu_bitmap_alloc_base(struct nvgpu_allocator *a) +{ + struct nvgpu_bitmap_allocator *ba = a->priv; + + return ba->base; +} + +static bool nvgpu_bitmap_alloc_inited(struct nvgpu_allocator *a) +{ + struct nvgpu_bitmap_allocator *ba = a->priv; + bool inited = ba->inited; + + nvgpu_smp_rmb(); + return inited; +} + +static u64 nvgpu_bitmap_alloc_end(struct nvgpu_allocator *a) +{ + struct nvgpu_bitmap_allocator *ba = a->priv; + + return nvgpu_safe_add_u64(ba->base, ba->length); +} + +/* + * @page_size is ignored. + */ +static u64 nvgpu_bitmap_balloc_fixed(struct nvgpu_allocator *na, + u64 base, u64 len, u32 page_size) +{ + struct nvgpu_bitmap_allocator *a = bitmap_allocator(na); + u64 blks, offs, ret; + + /* Compute the bit offset and make sure it's aligned to a block. */ + offs = base >> a->blk_shift; + if (nvgpu_safe_mult_u64(offs, a->blk_size) != base) { + return 0; + } + + offs = nvgpu_safe_sub_u64(offs, a->bit_offs); + + blks = len >> a->blk_shift; + if (nvgpu_safe_mult_u64(blks, a->blk_size) != len) { + blks++; + } + nvgpu_assert(blks <= U32_MAX); + + alloc_lock(na); + + /* Check if the space requested is already occupied. */ + ret = bitmap_find_next_zero_area(a->bitmap, a->num_bits, offs, + (u32)blks, 0UL); + if (ret != offs) { + goto fail; + } + + nvgpu_assert(blks <= U32_MAX); + nvgpu_bitmap_set(a->bitmap, (u32)offs, U32(blks)); + + a->bytes_alloced = nvgpu_safe_add_u64(a->bytes_alloced, + nvgpu_safe_mult_u64(blks, a->blk_size)); +NVGPU_COV_WHITELIST(false_positive, NVGPU_MISRA(Rule, 14_3), "Bug 2615925") + nvgpu_assert(a->nr_fixed_allocs < U64_MAX); + a->nr_fixed_allocs++; + alloc_unlock(na); + + alloc_dbg(na, "Alloc-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]", + base, len, blks, blks); + return base; + +fail: + alloc_unlock(na); + alloc_dbg(na, "Alloc-fixed failed! (0x%llx)", base); + return 0; +} + +/* + * Two possibilities for this function: either we are freeing a fixed allocation + * or we are freeing a regular alloc but with GPU_ALLOC_NO_ALLOC_PAGE defined. + * + * Note: this function won't do much error checking. Thus you could really + * confuse the allocator if you misuse this function. + */ +static void nvgpu_bitmap_free_fixed(struct nvgpu_allocator *na, + u64 base, u64 len) +{ + struct nvgpu_bitmap_allocator *a = bitmap_allocator(na); + u64 blks, offs; + + offs = base >> a->blk_shift; + if (nvgpu_safe_mult_u64(offs, a->blk_size) != base) { + nvgpu_do_assert(); + return; + } + + offs = nvgpu_safe_sub_u64(offs, a->bit_offs); + + blks = len >> a->blk_shift; + if (nvgpu_safe_mult_u64(blks, a->blk_size) != len) { + blks++; + } + + alloc_lock(na); + nvgpu_assert(offs <= U32_MAX); + nvgpu_assert(blks <= (u32)INT_MAX); + nvgpu_bitmap_clear(a->bitmap, (u32)offs, (u32)blks); + a->bytes_freed = nvgpu_safe_add_u64(a->bytes_freed, + nvgpu_safe_mult_u64(blks, a->blk_size)); + alloc_unlock(na); + + alloc_dbg(na, "Free-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]", + base, len, blks, blks); +} + +/* + * Add the passed alloc to the tree of stored allocations. + */ +static void insert_alloc_metadata(struct nvgpu_bitmap_allocator *a, + struct nvgpu_bitmap_alloc *alloc) +{ + alloc->alloc_entry.key_start = alloc->base; + alloc->alloc_entry.key_end = nvgpu_safe_add_u64(alloc->base, + alloc->length); + + nvgpu_rbtree_insert(&alloc->alloc_entry, &a->allocs); +} + +/* + * Find and remove meta-data from the outstanding allocations. + */ +static struct nvgpu_bitmap_alloc *find_alloc_metadata( + struct nvgpu_bitmap_allocator *a, u64 addr) +{ + struct nvgpu_bitmap_alloc *alloc; + struct nvgpu_rbtree_node *node = NULL; + + nvgpu_rbtree_search(addr, &node, a->allocs); + if (node == NULL) { + return NULL; + } + + alloc = nvgpu_bitmap_alloc_from_rbtree_node(node); + + nvgpu_rbtree_unlink(node, &a->allocs); + + return alloc; +} + +/* + * Tree of alloc meta data stores the address of the alloc not the bit offset. + */ +static int nvgpu_bitmap_store_alloc(struct nvgpu_bitmap_allocator *a, + u64 addr, u64 len) +{ + struct nvgpu_bitmap_alloc *alloc = + nvgpu_kmem_cache_alloc(a->meta_data_cache); + + if (alloc == NULL) { + return -ENOMEM; + } + + alloc->base = addr; + alloc->length = len; + + insert_alloc_metadata(a, alloc); + + return 0; +} + +/* + * @len is in bytes. This routine will figure out the right number of bits to + * actually allocate. The return is the address in bytes as well. + * + * This is a find-first-fit allocator. + * Check the input parameter validity. + * Acquire the alloc_lock. + * Searche a bitmap for the first space that is large enough to satisfy the + * requested size of bits by walking the next available free blocks by + * bitmap_find_next_zero_area(). + * Release the alloc_lock. + */ +static u64 nvgpu_bitmap_balloc(struct nvgpu_allocator *na, u64 len) +{ + u64 tmp_u64, addr; + u32 blks; + unsigned long offs, adjusted_offs, limit; + struct nvgpu_bitmap_allocator *a = bitmap_allocator(na); + + if (len == 0ULL) { + alloc_dbg(na, "len = 0, Alloc failed!"); + return 0; + } + + tmp_u64 = len >> a->blk_shift; + nvgpu_assert(tmp_u64 <= U32_MAX); + blks = (u32)tmp_u64; + + if (nvgpu_safe_mult_u64(blks, a->blk_size) != len) { + blks++; + } + + alloc_lock(na); + + /* + * First look from next_blk and onwards... + */ + offs = bitmap_find_next_zero_area(a->bitmap, a->num_bits, + a->next_blk, blks, 0); + if (offs >= a->num_bits) { + /* + * If that didn't work try the remaining area. Since there can + * be available space that spans across a->next_blk we need to + * search up to the first set bit after that. + */ + limit = find_next_bit(a->bitmap, a->num_bits, a->next_blk); + offs = bitmap_find_next_zero_area(a->bitmap, limit, + 0, blks, 0); + if (offs >= a->next_blk) { + goto fail; + } + } + + nvgpu_assert(offs <= U32_MAX); + nvgpu_bitmap_set(a->bitmap, (u32)offs, blks); + a->next_blk = offs + blks; + + adjusted_offs = nvgpu_safe_add_u64(offs, a->bit_offs); + addr = nvgpu_safe_mult_u64(((u64)adjusted_offs), a->blk_size); + + /* + * Only do meta-data storage if we are allowed to allocate storage for + * that meta-data. The issue with using malloc and friends is that + * in latency and success critical paths an alloc_page() call can either + * sleep for potentially a long time or fail. Since we might not want + * either of these possibilities assume that the caller will keep what + * data it needs around to successfully free this allocation. + */ + if ((a->flags & GPU_ALLOC_NO_ALLOC_PAGE) == 0ULL) { + if (nvgpu_bitmap_store_alloc(a, addr, + blks * a->blk_size) != 0) { + goto fail_reset_bitmap; + } + } + + alloc_dbg(na, "Alloc 0x%-10llx 0x%-5llx [bits=0x%x (%u)]", + addr, len, blks, blks); + +NVGPU_COV_WHITELIST(false_positive, NVGPU_MISRA(Rule, 14_3), "Bug 2615925") + nvgpu_assert(a->nr_allocs < U64_MAX); + a->nr_allocs++; + a->bytes_alloced = nvgpu_safe_add_u64(a->bytes_alloced, + nvgpu_safe_mult_u64(blks, a->blk_size)); + alloc_unlock(na); + + return addr; + +fail_reset_bitmap: + nvgpu_assert(blks <= (u32)INT_MAX); + nvgpu_assert(offs <= U32_MAX); + nvgpu_bitmap_clear(a->bitmap, (u32)offs, blks); +fail: + a->next_blk = 0; + alloc_unlock(na); + alloc_dbg(na, "Alloc failed!"); + return 0; +} + +static void nvgpu_bitmap_free(struct nvgpu_allocator *na, u64 addr) +{ + struct nvgpu_bitmap_allocator *a = bitmap_allocator(na); + struct nvgpu_bitmap_alloc *alloc = NULL; + u64 offs, adjusted_offs, blks; + + alloc_lock(na); + + if ((a->flags & GPU_ALLOC_NO_ALLOC_PAGE) != 0ULL) { + (void) WARN(true, + "Using wrong free for NO_ALLOC_PAGE bitmap allocator"); + goto done; + } + + alloc = find_alloc_metadata(a, addr); + if (alloc == NULL) { + goto done; + } + + /* + * Address comes from adjusted offset (i.e the bit offset with + * a->bit_offs added. So start with that and then work out the real + * offs into the bitmap. + */ + adjusted_offs = addr >> a->blk_shift; + offs = adjusted_offs - a->bit_offs; + blks = alloc->length >> a->blk_shift; + + nvgpu_assert(blks <= (u32)INT_MAX); + nvgpu_assert(offs <= U32_MAX); + nvgpu_bitmap_clear(a->bitmap, (u32)offs, (u32)blks); + alloc_dbg(na, "Free 0x%-10llx", addr); + + a->bytes_freed = nvgpu_safe_add_u64(a->bytes_freed, alloc->length); + +done: + if ((a->meta_data_cache != NULL) && (alloc != NULL)) { + nvgpu_kmem_cache_free(a->meta_data_cache, alloc); + } + alloc_unlock(na); +} + +static void nvgpu_bitmap_alloc_destroy(struct nvgpu_allocator *na) +{ + struct nvgpu_bitmap_allocator *a = bitmap_allocator(na); + struct nvgpu_bitmap_alloc *alloc; + struct nvgpu_rbtree_node *node = NULL; + + /* + * Kill any outstanding allocations. + */ + nvgpu_rbtree_enum_start(0, &node, a->allocs); + while (node != NULL) { + alloc = nvgpu_bitmap_alloc_from_rbtree_node(node); + + nvgpu_rbtree_unlink(node, &a->allocs); + nvgpu_kmem_cache_free(a->meta_data_cache, alloc); + + nvgpu_rbtree_enum_start(0, &node, a->allocs); + } + + nvgpu_kmem_cache_destroy(a->meta_data_cache); + nvgpu_kfree(nvgpu_alloc_to_gpu(na), a->bitmap); + nvgpu_kfree(nvgpu_alloc_to_gpu(na), a); +} + +#ifdef __KERNEL__ +static void nvgpu_bitmap_print_stats(struct nvgpu_allocator *na, + struct seq_file *s, int lock) +{ + struct nvgpu_bitmap_allocator *a = bitmap_allocator(na); + + alloc_pstat(s, na, "Bitmap allocator params:"); + alloc_pstat(s, na, " start = 0x%llx", a->base); + alloc_pstat(s, na, " end = 0x%llx", a->base + a->length); + alloc_pstat(s, na, " blks = 0x%llx", a->num_bits); + + /* Actual stats. */ + alloc_pstat(s, na, "Stats:"); + alloc_pstat(s, na, " Number allocs = 0x%llx", a->nr_allocs); + alloc_pstat(s, na, " Number fixed = 0x%llx", a->nr_fixed_allocs); + alloc_pstat(s, na, " Bytes alloced = 0x%llx", a->bytes_alloced); + alloc_pstat(s, na, " Bytes freed = 0x%llx", a->bytes_freed); + alloc_pstat(s, na, " Outstanding = 0x%llx", + a->bytes_alloced - a->bytes_freed); +} +#endif + +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 8_7), "Bug 2823817") +static const struct nvgpu_allocator_ops bitmap_ops = { +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 8_7)) + .alloc = nvgpu_bitmap_balloc, + .free_alloc = nvgpu_bitmap_free, + + .alloc_fixed = nvgpu_bitmap_balloc_fixed, + .free_fixed = nvgpu_bitmap_free_fixed, + + .base = nvgpu_bitmap_alloc_base, + .length = nvgpu_bitmap_alloc_length, + .end = nvgpu_bitmap_alloc_end, + .inited = nvgpu_bitmap_alloc_inited, + + .fini = nvgpu_bitmap_alloc_destroy, + +#ifdef __KERNEL__ + .print_stats = nvgpu_bitmap_print_stats, +#endif +}; + +static int nvgpu_bitmap_check_argument_limits(u64 base, u64 length, + u64 blk_size) +{ + bool is_blk_size_pwr_2; + bool is_base_aligned; + bool is_length_aligned; + + nvgpu_assert(blk_size > 0ULL); + is_blk_size_pwr_2 = (blk_size & (blk_size - 1ULL)) == 0ULL; + is_base_aligned = (base & (blk_size - 1ULL)) == 0ULL; + is_length_aligned = (length & (blk_size - 1ULL)) == 0ULL; + + if (!is_blk_size_pwr_2) { + nvgpu_do_assert(); + return -EINVAL; + } + + if (!is_base_aligned || !is_length_aligned) { + return -EINVAL; + } + + if (length == 0ULL) { + return -EINVAL; + } + + return 0; +} + +int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *na, + const char *name, u64 base, u64 length, + u64 blk_size, u64 flags) +{ + int err; + struct nvgpu_bitmap_allocator *a; + + err = nvgpu_bitmap_check_argument_limits(base, length, blk_size); + if (err != 0) { + return err; + } + + if (base == 0ULL) { + base = blk_size; + length = nvgpu_safe_sub_u64(length, blk_size); + } + + a = nvgpu_kzalloc(g, sizeof(struct nvgpu_bitmap_allocator)); + if (a == NULL) { + return -ENOMEM; + } + + err = nvgpu_alloc_common_init(na, g, name, a, false, &bitmap_ops); + if (err != 0) { + goto fail; + } + + if ((flags & GPU_ALLOC_NO_ALLOC_PAGE) == 0ULL) { + a->meta_data_cache = nvgpu_kmem_cache_create(g, + sizeof(struct nvgpu_bitmap_alloc)); + if (a->meta_data_cache == NULL) { + err = -ENOMEM; + goto fail; + } + } + + a->base = base; + a->length = length; + a->blk_size = blk_size; + a->blk_shift = nvgpu_safe_sub_u64(nvgpu_ffs(a->blk_size), 1UL); + a->num_bits = length >> a->blk_shift; + a->bit_offs = a->base >> a->blk_shift; + a->flags = flags; + a->allocs = NULL; + + a->bitmap = nvgpu_kcalloc(g, BITS_TO_LONGS(a->num_bits), + sizeof(*a->bitmap)); + if (a->bitmap == NULL) { + err = -ENOMEM; + goto fail; + } + + nvgpu_smp_wmb(); + a->inited = true; + +#ifdef CONFIG_DEBUG_FS + nvgpu_init_alloc_debug(g, na); +#endif + alloc_dbg(na, "New allocator: type bitmap"); + alloc_dbg(na, " base 0x%llx", a->base); + alloc_dbg(na, " bit_offs 0x%llx", a->bit_offs); + alloc_dbg(na, " size 0x%llx", a->length); + alloc_dbg(na, " blk_size 0x%llx", a->blk_size); + alloc_dbg(na, " flags 0x%llx", a->flags); + + return 0; + +fail: + if (a->meta_data_cache != NULL) { + nvgpu_kmem_cache_destroy(a->meta_data_cache); + } + nvgpu_kfree(g, a); + return err; +} diff --git a/drivers/gpu/nvgpu/common/mm/allocators/bitmap_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/allocators/bitmap_allocator_priv.h new file mode 100644 index 000000000..f0bf8dd90 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/allocators/bitmap_allocator_priv.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef BITMAP_ALLOCATOR_PRIV_H +#define BITMAP_ALLOCATOR_PRIV_H + +/** + * @file + * + * Implementation of a bitmap allocator. + */ + +#include +#include + +struct nvgpu_allocator; + +/** + * Structure to hold the implementation details of the bitmap allocator. + */ +struct nvgpu_bitmap_allocator { + /** + * Pointer to the common allocator structure. + */ + struct nvgpu_allocator *owner; + + /** + * Base address of the space. + */ + u64 base; + + /** + * Length of the space. + */ + u64 length; + + /** + * Size that corresponds to 1 bit. + */ + u64 blk_size; + + /** + * Bit shift to divide by blk_size. + */ + u64 blk_shift; + + /** + * Number of allocatable bits. + */ + u64 num_bits; + + /** + * Offset of bitmap. + */ + u64 bit_offs; + + /** + * Optimization for making repeated allocations faster. Keep track of + * the next bit after the most recent allocation. This is where the next + * search will start from. This should make allocation faster in cases + * where lots of allocations get made one after another. It shouldn't + * have a negative impact on the case where the allocator is fragmented. + */ + u64 next_blk; + + /** + * The actual bitmap used for allocations. + */ + unsigned long *bitmap; + + /** + * Tree of outstanding allocations. + */ + struct nvgpu_rbtree_node *allocs; + + /** + * Metadata cache of allocations (contains address and size of + * allocations). + */ + struct nvgpu_kmem_cache *meta_data_cache; + + /** + * Configuration flags of the allocator. See \a GPU_ALLOC_* flags. + */ + u64 flags; + + /** + * Boolean to indicate if the allocator has been fully initialized. + */ + bool inited; + + /** + * Statistics: track the number of non-fixed allocations. + */ + u64 nr_allocs; + + /** + * Statistics: track the number of fixed allocations. + */ + u64 nr_fixed_allocs; + + /** + * Statistics: total number of bytes allocated for both fixed and non- + * fixed allocations. + */ + u64 bytes_alloced; + + /** + * Statistics: total number of bytes freed for both fixed and non-fixed + * allocations. + */ + u64 bytes_freed; +}; + +/** + * Structure to hold the allocation metadata. + */ +struct nvgpu_bitmap_alloc { + /** + * Base address of the allocation. + */ + u64 base; + + /** + * Size of the allocation. + */ + u64 length; + + /** + * RB tree of allocations. + */ + struct nvgpu_rbtree_node alloc_entry; +}; + +/** + * @brief Given a tree node, retrieve the metdata of the allocation. + * + * @param[in] node Pointer to the tree node. + * + * @return pointer to the struct nvgpu_bitmap_alloc of the node. + */ +static inline struct nvgpu_bitmap_alloc * +nvgpu_bitmap_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node) +{ + return (struct nvgpu_bitmap_alloc *) + ((uintptr_t)node - offsetof(struct nvgpu_bitmap_alloc, alloc_entry)); +}; + +/** + * @brief Given a generic allocator context, retrieve a pointer to the bitmap + * allocator context structure. + * + * @param[in] a Pointer to nvgpu allocator. + * + * @return pointer to the struct nvgpu_bitmap_allocator. + */ +static inline struct nvgpu_bitmap_allocator *bitmap_allocator( + struct nvgpu_allocator *a) +{ + return (struct nvgpu_bitmap_allocator *)(a)->priv; +} + + +#endif diff --git a/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator.c new file mode 100644 index 000000000..b4211a2cd --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator.c @@ -0,0 +1,1569 @@ +/* + * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "buddy_allocator_priv.h" + +/* Some other buddy allocator functions. */ +static struct nvgpu_buddy *balloc_free_buddy(struct nvgpu_buddy_allocator *a, + u64 addr); +static void balloc_coalesce(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *b); +static void balloc_do_free_fixed(struct nvgpu_buddy_allocator *a, + struct nvgpu_fixed_alloc *falloc); + +/* + * This function is not present in older kernel's list.h code. + */ +#ifndef list_last_entry +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) +#endif + +/* + * GPU buddy allocator for various address spaces. + * + * Current limitations: + * o A fixed allocation could potentially be made that borders PDEs with + * different PTE sizes. This would require that fixed buffer to have + * different sized PTEs for different parts of the allocation. Probably + * best to just require PDE alignment for fixed address allocs. + * + * o It is currently possible to make an allocator that has a buddy alignment + * out of sync with the PDE block size alignment. A simple example is a + * 32GB address space starting at byte 1. Every buddy is shifted off by 1 + * which means each buddy corresponf to more than one actual GPU page. The + * best way to fix this is probably just require PDE blocksize alignment + * for the start of the address space. At the moment all allocators are + * easily PDE aligned so this hasn't been a problem. + */ + +static u32 nvgpu_balloc_page_size_to_pte_size(struct nvgpu_buddy_allocator *a, + u32 page_size) +{ + if ((a->flags & GPU_ALLOC_GVA_SPACE) == 0ULL) { + return BALLOC_PTE_SIZE_ANY; + } + + /* + * Make sure the page size is actually valid! + */ + if (page_size == a->vm->big_page_size) { + return BALLOC_PTE_SIZE_BIG; + } else if (page_size == SZ_4K) { + return BALLOC_PTE_SIZE_SMALL; + } else if (page_size == BALLOC_PTE_SIZE_ANY) { + /* With gva_space enabled, only 2 types of PTE sizes allowed */ + return BALLOC_PTE_SIZE_SMALL; + } else { + return BALLOC_PTE_SIZE_INVALID; + } +} + +/* + * Pick a suitable maximum order for this allocator. + * + * Hueristic: Just guessing that the best max order is the largest single + * block that will fit in the address space. + */ +static void balloc_compute_max_order(struct nvgpu_buddy_allocator *a) +{ + u64 true_max_order = ilog2(a->blks); + + if (true_max_order > GPU_BALLOC_MAX_ORDER) { + alloc_dbg(balloc_owner(a), + "Oops: Can't manage more than 1 Exabyte memory"); + nvgpu_do_assert(); + } + + if ((a->max_order == 0ULL) || (a->max_order > true_max_order)) { + a->max_order = true_max_order; + } +} + +/* + * Since we can only allocate in chucks of a->blk_size we need to trim off + * any excess data that is not aligned to a->blk_size. + */ +static void balloc_allocator_align(struct nvgpu_buddy_allocator *a) +{ + a->start = NVGPU_ALIGN(a->base, a->blk_size); +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + WARN_ON(a->start != a->base); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + nvgpu_assert(a->blk_size > 0ULL); + a->end = nvgpu_safe_add_u64(a->base, a->length) & + ~(a->blk_size - 1U); + a->count = nvgpu_safe_sub_u64(a->end, a->start); + a->blks = a->count >> a->blk_shift; +} + +/* + * Pass NULL for parent if you want a top level buddy. + */ +static struct nvgpu_buddy *balloc_new_buddy(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *parent, + u64 start, u64 order) +{ + struct nvgpu_buddy *new_buddy; + + new_buddy = nvgpu_kmem_cache_alloc(a->buddy_cache); + if (new_buddy == NULL) { + return NULL; + } + + (void) memset(new_buddy, 0, sizeof(struct nvgpu_buddy)); + + new_buddy->parent = parent; + new_buddy->start = start; + new_buddy->order = order; + new_buddy->end = nvgpu_safe_mult_u64(U64(1) << order, a->blk_size); + new_buddy->end = nvgpu_safe_add_u64(new_buddy->end, start); + new_buddy->pte_size = BALLOC_PTE_SIZE_ANY; + + return new_buddy; +} + +static void balloc_buddy_list_do_add(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *b, + struct nvgpu_list_node *list) +{ + if (buddy_is_in_list(b)) { + alloc_dbg(balloc_owner(a), + "Oops: adding added buddy (%llu:0x%llx)", + b->order, b->start); + nvgpu_do_assert(); + } + + /* + * Add big PTE blocks to the tail, small to the head for GVA spaces. + * This lets the code that checks if there are available blocks check + * without cycling through the entire list. + */ + if (((a->flags & GPU_ALLOC_GVA_SPACE) != 0ULL) && + (b->pte_size == BALLOC_PTE_SIZE_BIG)) { + nvgpu_list_add_tail(&b->buddy_entry, list); + } else { + nvgpu_list_add(&b->buddy_entry, list); + } + + buddy_set_in_list(b); +} + +static void balloc_buddy_list_do_rem(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *b) +{ + if (!buddy_is_in_list(b)) { + alloc_dbg(balloc_owner(a), + "Oops: removing removed buddy (%llu:0x%llx)", + b->order, b->start); + nvgpu_do_assert(); + } + + nvgpu_list_del(&b->buddy_entry); + buddy_clr_in_list(b); +} + +/* + * Add a buddy to one of the buddy lists and deal with the necessary + * book keeping. Adds the buddy to the list specified by the buddy's order. + */ +static void balloc_blist_add(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *b) +{ + balloc_buddy_list_do_add(a, b, balloc_get_order_list(a, b->order)); + a->buddy_list_len[b->order] = + nvgpu_safe_add_u64(a->buddy_list_len[b->order], 1ULL); +} + +static void balloc_blist_rem(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *b) +{ + balloc_buddy_list_do_rem(a, b); + nvgpu_assert(a->buddy_list_len[b->order] > 0ULL); + a->buddy_list_len[b->order]--; +} + +static u64 balloc_get_order(struct nvgpu_buddy_allocator *a, u64 len) +{ + if (len == 0U) { + return 0; + } + + len--; + len >>= a->blk_shift; + + return nvgpu_fls(len); +} + +static u64 balloc_max_order_in(struct nvgpu_buddy_allocator *a, + u64 start, u64 end) +{ + u64 size = nvgpu_safe_sub_u64(end, start) >> a->blk_shift; + + if (size > 0U) { + return min_t(u64, ilog2(size), a->max_order); + } else { + return GPU_BALLOC_MAX_ORDER; + } +} + +/* + * Initialize the buddy lists. + */ +static int balloc_init_lists(struct nvgpu_buddy_allocator *a) +{ + u32 i; + u64 bstart, bend, order; + struct nvgpu_buddy *buddy; + + bstart = a->start; + bend = a->end; + + /* First make sure the LLs are valid. */ + for (i = 0U; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { + nvgpu_init_list_node(balloc_get_order_list(a, i)); + } + + while (bstart < bend) { + order = balloc_max_order_in(a, bstart, bend); + + buddy = balloc_new_buddy(a, NULL, bstart, order); + if (buddy == NULL) { + goto cleanup; + } + + balloc_blist_add(a, buddy); + bstart = nvgpu_safe_add_u64(bstart, + balloc_order_to_len(a, order)); + } + + return 0; + +cleanup: + for (i = 0U; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { + if (!nvgpu_list_empty(balloc_get_order_list(a, i))) { + buddy = nvgpu_list_first_entry( + balloc_get_order_list(a, i), + nvgpu_buddy, buddy_entry); + balloc_blist_rem(a, buddy); + nvgpu_kmem_cache_free(a->buddy_cache, buddy); + } + } + + return -ENOMEM; +} + +/* + * Clean up and destroy the passed allocator. + * Walk the allocator for any pending allocations. + * Free up all pending allocations. + * Free any memory allocated a allocator init time. + * Destroy the lock and bzero the allocator completely. + */ +static void nvgpu_buddy_allocator_destroy(struct nvgpu_allocator *na) +{ + u32 i; + struct nvgpu_rbtree_node *node = NULL; + struct nvgpu_buddy *bud; + struct nvgpu_fixed_alloc *falloc; + struct nvgpu_buddy_allocator *a = buddy_allocator(na); + + alloc_lock(na); + +#ifdef CONFIG_DEBUG_FS + nvgpu_fini_alloc_debug(na); +#endif + + /* + * Free the fixed allocs first. + */ + nvgpu_rbtree_enum_start(0, &node, a->fixed_allocs); + while (node != NULL) { + falloc = nvgpu_fixed_alloc_from_rbtree_node(node); + + nvgpu_rbtree_unlink(node, &a->fixed_allocs); + balloc_do_free_fixed(a, falloc); + + nvgpu_rbtree_enum_start(0, &node, a->fixed_allocs); + } + + /* + * And now free all outstanding allocations. + */ + nvgpu_rbtree_enum_start(0, &node, a->alloced_buddies); + while (node != NULL) { + bud = nvgpu_buddy_from_rbtree_node(node); + + (void) balloc_free_buddy(a, bud->start); + balloc_blist_add(a, bud); + balloc_coalesce(a, bud); + + nvgpu_rbtree_enum_start(0, &node, a->alloced_buddies); + } + + /* + * Now clean up the unallocated buddies. + */ + for (i = 0U; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + BUG_ON(a->buddy_list_alloced[i] != 0U); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + + while (!nvgpu_list_empty(balloc_get_order_list(a, i))) { + bud = nvgpu_list_first_entry( + balloc_get_order_list(a, i), + nvgpu_buddy, buddy_entry); + balloc_blist_rem(a, bud); + nvgpu_kmem_cache_free(a->buddy_cache, bud); + } + + if (a->buddy_list_len[i] != 0U) { + nvgpu_info(na->g, + "Excess buddies!!! (%d: %llu)", + i, a->buddy_list_len[i]); + nvgpu_do_assert(); + } + if (a->buddy_list_split[i] != 0U) { + nvgpu_info(na->g, + "Excess split nodes!!! (%d: %llu)", + i, a->buddy_list_split[i]); + nvgpu_do_assert(); + } + if (a->buddy_list_alloced[i] != 0U) { + nvgpu_info(na->g, + "Excess alloced nodes!!! (%d: %llu)", + i, a->buddy_list_alloced[i]); + nvgpu_do_assert(); + } + } + + nvgpu_kmem_cache_destroy(a->buddy_cache); + nvgpu_kfree(nvgpu_alloc_to_gpu(na), a); + + alloc_unlock(na); +} + +/* + * Combine the passed buddy if possible. The pointer in @b may not be valid + * after this as the buddy may be freed. + * + * @a must be locked. + */ +static void balloc_coalesce(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *b) +{ + struct nvgpu_buddy *parent; + + while (!buddy_is_alloced(b) && !buddy_is_split(b)) { + /* + * If both our buddy and I are both not allocated and not split + * then we can coalesce ourselves. + */ + if (b->buddy == NULL) { + return; + } + if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy)) { + return; + } + + parent = b->parent; + + balloc_blist_rem(a, b); + balloc_blist_rem(a, b->buddy); + + buddy_clr_split(parent); + nvgpu_assert(a->buddy_list_split[parent->order] > 0ULL); + a->buddy_list_split[parent->order]--; + balloc_blist_add(a, parent); + + /* Clean up the remains. */ + nvgpu_kmem_cache_free(a->buddy_cache, b->buddy); + nvgpu_kmem_cache_free(a->buddy_cache, b); + + b = parent; + } +} + +/* + * Split a buddy into two new buddies who are 1/2 the size of the parent buddy. + * + * @a must be locked. + */ +static int balloc_split_buddy(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *b, u32 pte_size) +{ + struct nvgpu_buddy *left, *right; + u64 half; + u64 new_buddy_start; + + nvgpu_assert(b->order > 0ULL); + left = balloc_new_buddy(a, b, b->start, b->order - 1U); + if (left == NULL) { + return -ENOMEM; + } + + half = nvgpu_safe_sub_u64(b->end, b->start) / 2U; + + new_buddy_start = nvgpu_safe_add_u64(b->start, half); + right = balloc_new_buddy(a, b, new_buddy_start, b->order - 1U); + if (right == NULL) { + nvgpu_kmem_cache_free(a->buddy_cache, left); + return -ENOMEM; + } + + buddy_set_split(b); + a->buddy_list_split[b->order] = + nvgpu_safe_add_u64(a->buddy_list_split[b->order], 1ULL); + + b->left = left; + b->right = right; + left->buddy = right; + right->buddy = left; + left->parent = b; + right->parent = b; + + /* + * Potentially assign a PTE size to the new buddies. The obvious case is + * when we don't have a GPU VA space; just leave it alone. When we do + * have a GVA space we need to assign the passed PTE size to the buddy + * only if the buddy is less than the PDE block size. This is because if + * the buddy is less than the PDE block size then the buddy's parent + * may already have a PTE size. Thus we can only allocate this buddy to + * mappings with that PTE size (due to the large/small PTE separation + * requirement). + * + * When the buddy size is greater than or equal to the block size then + * we can leave the buddies PTE field alone since the PDE block has yet + * to be assigned a PTE size. + */ + if (((a->flags & GPU_ALLOC_GVA_SPACE) != 0ULL) && + (left->order < a->pte_blk_order)) { + left->pte_size = pte_size; + right->pte_size = pte_size; + } + + balloc_blist_rem(a, b); + balloc_blist_add(a, left); + balloc_blist_add(a, right); + + return 0; +} + +/* + * Place the passed buddy into the RB tree for allocated buddies. Never fails + * unless the passed entry is a duplicate which is a bug. + * + * @a must be locked. + */ +static void balloc_alloc_buddy(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *b) +{ + b->alloced_entry.key_start = b->start; + b->alloced_entry.key_end = b->end; + + nvgpu_rbtree_insert(&b->alloced_entry, &a->alloced_buddies); + + buddy_set_alloced(b); + a->buddy_list_alloced[b->order] = + nvgpu_safe_add_u64(a->buddy_list_alloced[b->order], 1ULL); +} + +/* + * Remove the passed buddy from the allocated buddy RB tree. Returns the + * deallocated buddy for further processing. + * + * @a must be locked. + */ +static struct nvgpu_buddy *balloc_free_buddy(struct nvgpu_buddy_allocator *a, + u64 addr) +{ + struct nvgpu_rbtree_node *node = NULL; + struct nvgpu_buddy *bud; + + nvgpu_rbtree_search(addr, &node, a->alloced_buddies); + if (node == NULL) { + return NULL; + } + + bud = nvgpu_buddy_from_rbtree_node(node); + + nvgpu_rbtree_unlink(node, &a->alloced_buddies); + buddy_clr_alloced(bud); + nvgpu_assert(a->buddy_list_alloced[bud->order] > 0ULL); + a->buddy_list_alloced[bud->order]--; + + return bud; +} + +/* + * Find a suitable buddy for the given order and PTE type (big or little). + */ +static struct nvgpu_buddy *balloc_find_buddy(struct nvgpu_buddy_allocator *a, + u64 order, u32 pte_size) +{ + struct nvgpu_buddy *bud; + + if ((order > a->max_order) || + nvgpu_list_empty(balloc_get_order_list(a, order))) { + return NULL; + } + + if (((a->flags & GPU_ALLOC_GVA_SPACE) != 0ULL) && + (pte_size == BALLOC_PTE_SIZE_BIG)) { + bud = nvgpu_list_last_entry(balloc_get_order_list(a, order), + nvgpu_buddy, buddy_entry); + } else { + bud = nvgpu_list_first_entry(balloc_get_order_list(a, order), + nvgpu_buddy, buddy_entry); + } + + if ((pte_size != BALLOC_PTE_SIZE_ANY) && + (pte_size != bud->pte_size) && + (bud->pte_size != BALLOC_PTE_SIZE_ANY)) { + return NULL; + } + + return bud; +} + +/* + * Allocate a suitably sized buddy. If no suitable buddy exists split higher + * order buddies until we have a suitable buddy to allocate. + * + * For PDE grouping add an extra check to see if a buddy is suitable: that the + * buddy exists in a PDE who's PTE size is reasonable + * + * @a must be locked. + */ +static u64 balloc_do_alloc(struct nvgpu_buddy_allocator *a, + u64 order, u32 pte_size) +{ + u64 split_order; + struct nvgpu_buddy *bud = NULL; + + for (split_order = order; split_order <= a->max_order; split_order++) { + bud = balloc_find_buddy(a, split_order, pte_size); + if (bud != NULL) { + break; + } + } + + /* Out of memory! */ + if (bud == NULL) { + return 0; + } + + while (bud->order != order) { + if (balloc_split_buddy(a, bud, pte_size) != 0) { + balloc_coalesce(a, bud); + return 0; /* No mem... */ + } + bud = bud->left; + } + + balloc_blist_rem(a, bud); + balloc_alloc_buddy(a, bud); + + return bud->start; +} + +/* + * See if the passed range is actually available for allocation. If so, then + * return 1, otherwise return 0. + * + * TODO: Right now this uses the unoptimal approach of going through all + * outstanding allocations and checking their base/ends. This could be better. + */ +static bool balloc_is_range_free(struct nvgpu_buddy_allocator *a, + u64 base, u64 end) +{ + struct nvgpu_rbtree_node *node = NULL; + struct nvgpu_buddy *bud; + + nvgpu_rbtree_enum_start(0, &node, a->alloced_buddies); + if (node == NULL) { + return true; /* No allocs yet. */ + } + + bud = nvgpu_buddy_from_rbtree_node(node); + + while (bud->start < end) { + if (((bud->start > base) && (bud->start < end)) || + ((bud->end > base) && (bud->end < end))) { + return false; + } + + nvgpu_rbtree_enum_next(&node, node); + if (node == NULL) { + break; + } + bud = nvgpu_buddy_from_rbtree_node(node); + } + + return true; +} + +static void balloc_alloc_fixed(struct nvgpu_buddy_allocator *a, + struct nvgpu_fixed_alloc *f) +{ + f->alloced_entry.key_start = f->start; + f->alloced_entry.key_end = f->end; + + nvgpu_rbtree_insert(&f->alloced_entry, &a->fixed_allocs); +} + +/* + * Remove the passed buddy from the allocated buddy RB tree. Returns the + * deallocated buddy for further processing. + * + * @a must be locked. + */ +static struct nvgpu_fixed_alloc *balloc_free_fixed( + struct nvgpu_buddy_allocator *a, u64 addr) +{ + struct nvgpu_fixed_alloc *falloc; + struct nvgpu_rbtree_node *node = NULL; + + nvgpu_rbtree_search(addr, &node, a->fixed_allocs); + if (node == NULL) { + return NULL; + } + + falloc = nvgpu_fixed_alloc_from_rbtree_node(node); + + nvgpu_rbtree_unlink(node, &a->fixed_allocs); + + return falloc; +} + +/* + * Find the parent range - doesn't necessarily need the parent to actually exist + * as a buddy. Finding an existing parent comes later... + */ +static void balloc_get_parent_range(struct nvgpu_buddy_allocator *a, + u64 base, u64 order, + u64 *pbase, u64 *porder) +{ + u64 base_mask; + u64 shifted_base = balloc_base_shift(a, base); + + nvgpu_assert(order < 63U); + order++; + base_mask = ~nvgpu_safe_sub_u64((a->blk_size << order), 1U); + + shifted_base &= base_mask; + + *pbase = balloc_base_unshift(a, shifted_base); + *porder = order; +} + +static struct nvgpu_buddy *balloc_get_target_buddy( + struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *bud, + u64 base, u64 order, u32 pte_size) +{ + /* Split this buddy as necessary until we get the target buddy. */ + while ((bud->start != base) || (bud->order != order)) { + if (balloc_split_buddy(a, bud, pte_size) != 0) { + alloc_dbg(balloc_owner(a), + "split buddy failed? {0x%llx, %llu}", + bud->start, bud->order); + balloc_coalesce(a, bud); + return NULL; + } + + if (base < bud->right->start) { + bud = bud->left; + } else { + bud = bud->right; + } + } + + return bud; +} + +/* + * Makes a buddy at the passed address. This will make all parent buddies + * necessary for this buddy to exist as well. + */ +static struct nvgpu_buddy *balloc_make_fixed_buddy( + struct nvgpu_buddy_allocator *a, u64 base, u64 order, u32 pte_size) +{ + struct nvgpu_buddy *bud = NULL; + struct nvgpu_list_node *order_list; + u64 cur_order = order, cur_base = base; + + /* + * Algo: + * 1. Keep jumping up a buddy order until we find the real buddy that + * this buddy exists in. + * 2. Then work our way down through the buddy tree until we hit a dead + * end. + * 3. Start splitting buddies until we split to the one we need to + * make. + */ + while (cur_order <= a->max_order) { + bool found = false; + + order_list = balloc_get_order_list(a, cur_order); + nvgpu_list_for_each_entry(bud, order_list, + nvgpu_buddy, buddy_entry) { + if (bud->start == cur_base) { + /* + * Make sure page size matches if it's smaller + * than a PDE sized buddy. + */ + if ((bud->order <= a->pte_blk_order) && + (bud->pte_size != + BALLOC_PTE_SIZE_ANY) && + (bud->pte_size != pte_size)) { + /* Welp, that's the end of that. */ + alloc_dbg(balloc_owner(a), + "Fixed buddy PTE " + "size mismatch!"); + return NULL; + } + + found = true; + break; + } + } + + if (found) { + break; + } + + balloc_get_parent_range(a, cur_base, cur_order, + &cur_base, &cur_order); + } + + if (cur_order > a->max_order) { + alloc_dbg(balloc_owner(a), "No buddy for range ???"); + return NULL; + } + + /* Get target buddy */ + bud = balloc_get_target_buddy(a, bud, base, order, pte_size); + + return bud; +} + +static u64 balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a, + struct nvgpu_fixed_alloc *falloc, + u64 base, u64 len, u32 pte_size) +{ + u64 shifted_base, inc_base; + u64 align_order; + + /* + * Ensure that we have a valid PTE size here (ANY is a valid size). If + * this is INVALID then we are going to experience imminent corruption + * in the lists that hold buddies. This leads to some very strange + * crashes. + */ +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + BUG_ON(pte_size == BALLOC_PTE_SIZE_INVALID); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + + shifted_base = balloc_base_shift(a, base); + if (shifted_base == 0U) { + align_order = nvgpu_safe_sub_u64( + nvgpu_ffs(len >> a->blk_shift), 1UL); + } else { + u64 shifted_base_order = + nvgpu_safe_sub_u64( + nvgpu_ffs(shifted_base >> a->blk_shift), 1UL); + u64 len_order = + nvgpu_safe_sub_u64( + nvgpu_ffs(len >> a->blk_shift), 1UL); + align_order = min_t(u64, shifted_base_order, len_order); + } + + if (align_order > a->max_order) { + alloc_dbg(balloc_owner(a), + "Align order too big: %llu > %llu", + align_order, a->max_order); + return 0; + } + + /* + * Generate a list of buddies that satisfy this allocation. + */ + inc_base = shifted_base; + while (inc_base < nvgpu_safe_add_u64(shifted_base, len)) { + u64 order_len = balloc_order_to_len(a, align_order); + u64 remaining; + struct nvgpu_buddy *bud; + + bud = balloc_make_fixed_buddy(a, + balloc_base_unshift(a, inc_base), + align_order, pte_size); + if (bud == NULL) { + alloc_dbg(balloc_owner(a), + "Fixed buddy failed: {0x%llx, %llu}!", + balloc_base_unshift(a, inc_base), + align_order); + goto err_and_cleanup; + } + + balloc_blist_rem(a, bud); + balloc_alloc_buddy(a, bud); + balloc_buddy_list_do_add(a, bud, &falloc->buddies); + + /* Book keeping. */ + inc_base = nvgpu_safe_add_u64(inc_base, order_len); + remaining = (shifted_base + len) - inc_base; + align_order = nvgpu_safe_sub_u64( + nvgpu_ffs(inc_base >> a->blk_shift), 1UL); + + /* If we don't have much left - trim down align_order. */ + if (balloc_order_to_len(a, align_order) > remaining) { + align_order = balloc_max_order_in(a, inc_base, + nvgpu_safe_add_u64(inc_base, + remaining)); + } + } + + return base; + +err_and_cleanup: + while (!nvgpu_list_empty(&falloc->buddies)) { + struct nvgpu_buddy *bud = nvgpu_list_first_entry( + &falloc->buddies, + nvgpu_buddy, buddy_entry); + + balloc_buddy_list_do_rem(a, bud); + (void) balloc_free_buddy(a, bud->start); + balloc_blist_add(a, bud); + /* + * Attemp to defrag the allocation. + */ + balloc_coalesce(a, bud); + } + + return 0; +} + +static void balloc_do_free_fixed(struct nvgpu_buddy_allocator *a, + struct nvgpu_fixed_alloc *falloc) +{ + struct nvgpu_buddy *bud; + + while (!nvgpu_list_empty(&falloc->buddies)) { + bud = nvgpu_list_first_entry(&falloc->buddies, + nvgpu_buddy, + buddy_entry); + balloc_buddy_list_do_rem(a, bud); + + (void) balloc_free_buddy(a, bud->start); + balloc_blist_add(a, bud); + a->bytes_freed = nvgpu_safe_add_u64(a->bytes_freed, + balloc_order_to_len(a, bud->order)); + + /* + * Attemp to defrag the allocation. + */ + balloc_coalesce(a, bud); + } + + nvgpu_kfree(nvgpu_alloc_to_gpu(a->owner), falloc); +} + +/* + * Allocate memory from the passed allocator. + * Acquire the allocator lock. + * Compute the order by calling balloc_get_order(). + * Compute the pte size supported for this allocation by calling + * nvgpu_balloc_page_size_to_pte_size(). + * If we could not satisfy the required size buddy then call + * balloc_split_buddy() to get the requiredsize by dividing the large size buddy. + * Free the remaining buddy to the respective list. + * Release the alloc_lock. + */ +static u64 nvgpu_buddy_balloc_pte(struct nvgpu_allocator *na, u64 len, + u32 page_size) +{ + u64 order, addr; + u32 pte_size; + struct nvgpu_buddy_allocator *a = buddy_allocator(na); + + if (len == 0ULL) { + alloc_dbg(balloc_owner(a), "Alloc fail"); + return 0; + } + + alloc_lock(na); + + order = balloc_get_order(a, len); + + if (order > a->max_order) { + alloc_unlock(na); + alloc_dbg(balloc_owner(a), "Alloc fail"); + return 0; + } + + pte_size = nvgpu_balloc_page_size_to_pte_size(a, page_size); + if (pte_size == BALLOC_PTE_SIZE_INVALID) { + alloc_unlock(na); + return 0; + } + + addr = balloc_do_alloc(a, order, pte_size); + + if (addr != 0ULL) { + a->bytes_alloced += len; + a->bytes_alloced_real += balloc_order_to_len(a, order); + alloc_dbg(balloc_owner(a), + "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s", + addr, order, len, + (pte_size == BALLOC_PTE_SIZE_BIG) ? "big" : + (pte_size == BALLOC_PTE_SIZE_SMALL) ? "small" : + "NA/any"); + a->alloc_made = true; + } else { + alloc_dbg(balloc_owner(a), "Alloc failed: no mem!"); + } + + alloc_unlock(na); + + return addr; +} + +static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *na, u64 len) +{ + return nvgpu_buddy_balloc_pte(na, len, BALLOC_PTE_SIZE_ANY); +} + +/* + * Check the input parameter validity. + * Acquire the alloc_lock. + * Compute the order with respective to the input size. + * Compute the pte_size for the given page size and return error for + * invalid pte size. + * Call balloc_is_range_free() to get the free range with the address given. + * Call balloc_make_fixed_buddy() to generate the list of buddies. + * Make the book keeping of allocated objects to the respective lists. + * Release the alloc_lock. + */ +static u64 nvgpu_balloc_fixed_buddy_locked(struct nvgpu_allocator *na, + u64 base, u64 len, u32 page_size) +{ + u32 pte_size; + u64 ret, real_bytes = 0; + struct nvgpu_buddy *bud; + struct nvgpu_fixed_alloc *falloc = NULL; + struct nvgpu_buddy_allocator *a = buddy_allocator(na); + + /* If base isn't aligned to an order 0 block, fail. */ + nvgpu_assert(a->blk_size > 0ULL); + if ((base & (a->blk_size - 1ULL)) != 0ULL) { + goto fail; + } + + if (len == 0ULL) { + goto fail; + } + + if (base < a->start) { + goto fail; + } + + if (a->end < nvgpu_safe_add_u64(base, len)) { + goto fail; + } + + pte_size = nvgpu_balloc_page_size_to_pte_size(a, page_size); + if (pte_size == BALLOC_PTE_SIZE_INVALID) { + goto fail; + } + + falloc = nvgpu_kmalloc(nvgpu_alloc_to_gpu(na), sizeof(*falloc)); + if (falloc == NULL) { + goto fail; + } + + nvgpu_init_list_node(&falloc->buddies); + falloc->start = base; + falloc->end = base + len; + + if (!balloc_is_range_free(a, base, base + len)) { + alloc_dbg(balloc_owner(a), + "Range not free: 0x%llx -> 0x%llx", + base, base + len); + goto fail; + } + + ret = balloc_do_alloc_fixed(a, falloc, base, len, pte_size); + if (ret == 0ULL) { + alloc_dbg(balloc_owner(a), + "Alloc-fixed failed ?? 0x%llx -> 0x%llx", + base, base + len); + goto fail; + } + + balloc_alloc_fixed(a, falloc); + + nvgpu_list_for_each_entry(bud, &falloc->buddies, + nvgpu_buddy, buddy_entry) { + real_bytes = nvgpu_safe_add_u64(real_bytes, + nvgpu_safe_sub_u64(bud->end, bud->start)); + } + + a->bytes_alloced = nvgpu_safe_add_u64(a->bytes_alloced, len); + a->bytes_alloced_real = nvgpu_safe_add_u64(a->bytes_alloced_real, + real_bytes); + + alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx", base); + + return base; + +fail: + nvgpu_kfree(nvgpu_alloc_to_gpu(na), falloc); + return 0; +} + +/* + * Allocate a fixed address allocation. The address of the allocation is @base + * and the length is @len. This is not a typical buddy allocator operation and + * as such has a high posibility of failure if the address space is heavily in + * use. + * + * Please do not use this function unless _absolutely_ necessary. + */ +static u64 nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *na, + u64 base, u64 len, u32 page_size) +{ + u64 alloc; + struct nvgpu_buddy_allocator *a = buddy_allocator(na); + + alloc_lock(na); + alloc = nvgpu_balloc_fixed_buddy_locked(na, base, len, page_size); + + if (alloc != 0ULL) { + a->alloc_made = true; + } + + alloc_unlock(na); + + return alloc; +} + +/* + * @a must be locked. + */ +static void nvgpu_buddy_bfree_locked(struct nvgpu_allocator *na, u64 addr) +{ + struct nvgpu_buddy *bud; + struct nvgpu_fixed_alloc *falloc; + struct nvgpu_buddy_allocator *a = buddy_allocator(na); + + /* + * First see if this is a fixed alloc. If not fall back to a regular + * buddy. + */ + falloc = balloc_free_fixed(a, addr); + if (falloc != NULL) { + balloc_do_free_fixed(a, falloc); + goto done; + } + + bud = balloc_free_buddy(a, addr); + if (bud == NULL) { + goto done; + } + + balloc_blist_add(a, bud); + a->bytes_freed = nvgpu_safe_add_u64(a->bytes_freed, + balloc_order_to_len(a, bud->order)); + + /* + * Attemp to defrag the allocation. + */ + balloc_coalesce(a, bud); + +done: + alloc_dbg(balloc_owner(a), "Free 0x%llx", addr); +} + +/* + * Free the passed allocation. + */ +static void nvgpu_buddy_bfree(struct nvgpu_allocator *na, u64 addr) +{ + if (addr == 0ULL) { + return; + } + + alloc_lock(na); + nvgpu_buddy_bfree_locked(na, addr); + alloc_unlock(na); +} + +static bool nvgpu_buddy_reserve_is_possible(struct nvgpu_buddy_allocator *a, + struct nvgpu_alloc_carveout *co) +{ + struct nvgpu_alloc_carveout *tmp; + u64 co_base, co_end; + + co_base = co->base; + co_end = nvgpu_safe_add_u64(co->base, co->length); + + /* + * Not the fastest approach but we should not have that many carveouts + * for any reasonable allocator. + */ + nvgpu_list_for_each_entry(tmp, &a->co_list, + nvgpu_alloc_carveout, co_entry) { + u64 tmp_end = nvgpu_safe_add_u64(tmp->base, tmp->length); + + if (((co_base >= tmp->base) && (co_base < tmp_end)) || + ((co_end >= tmp->base) && (co_end < tmp_end))) { + return false; + } + } + + return true; +} + +/* + * Carveouts can only be reserved before any regular allocations have been + * made. + * - Check the validity of input paramemters. + * - Acquire the allocator lock. + * - Call nvgpu_balloc_fixed_buddy_locked() to reserve the object + * with \a co.base and \a co.length. + * - Add the allocated object to the book keeping list. + * - Release the allocator lock. + */ +static int nvgpu_buddy_reserve_co(struct nvgpu_allocator *na, + struct nvgpu_alloc_carveout *co) +{ + struct nvgpu_buddy_allocator *a = buddy_allocator(na); + u64 addr; + int err = 0; + + if (co->base < a->start) { + return -EINVAL; + } + if (nvgpu_safe_add_u64(co->base, co->length) > a->end) { + return -EINVAL; + } + if (a->alloc_made) { + return -EINVAL; + } + + alloc_lock(na); + + if (!nvgpu_buddy_reserve_is_possible(a, co)) { + err = -EBUSY; + goto done; + } + + /* Should not be possible to fail... */ + addr = nvgpu_balloc_fixed_buddy_locked(na, co->base, co->length, + BALLOC_PTE_SIZE_ANY); + if (addr == 0ULL) { + err = -ENOMEM; + nvgpu_warn(na->g, + "%s: Failed to reserve a valid carveout!", + __func__); + goto done; + } + + nvgpu_list_add(&co->co_entry, &a->co_list); + +done: + alloc_unlock(na); + return err; +} + +/* + * Carveouts can be release at any time. + * - Acquire the allocator lock. + * - Remove the carve out from the allocator list. + * - Call nvgpu_buddy_bfree_locked() to free the carve out + * - nvgpu_buddy_bfree_locked() will first check the address is fixed + * or not by calling balloc_free_fixed(). + * - If the address is fixed then free it by calling balloc_do_free_fixed(). + * - Else free it through balloc_free_buddy(). + * - Recompute the size of the allocator and coalesce the objects. + * - Release the lock. + */ +static void nvgpu_buddy_release_co(struct nvgpu_allocator *na, + struct nvgpu_alloc_carveout *co) +{ + alloc_lock(na); + + nvgpu_list_del(&co->co_entry); + nvgpu_buddy_bfree_locked(na, co->base); + + alloc_unlock(na); +} + +static u64 nvgpu_buddy_alloc_length(struct nvgpu_allocator *a) +{ + struct nvgpu_buddy_allocator *ba = buddy_allocator(a); + + return ba->length; +} + +static u64 nvgpu_buddy_alloc_base(struct nvgpu_allocator *a) +{ + struct nvgpu_buddy_allocator *ba = buddy_allocator(a); + + return ba->start; +} + +static bool nvgpu_buddy_alloc_inited(struct nvgpu_allocator *a) +{ + struct nvgpu_buddy_allocator *ba = buddy_allocator(a); + bool inited = ba->initialized; + + nvgpu_smp_rmb(); + return inited; +} + +static u64 nvgpu_buddy_alloc_end(struct nvgpu_allocator *a) +{ + struct nvgpu_buddy_allocator *ba = buddy_allocator(a); + + return ba->end; +} +/* + * - Acquire the allocator lock. + * - Check the availability of space between start and end of + * the allocator. + * - Release the allocator lock. + */ +static u64 nvgpu_buddy_alloc_space(struct nvgpu_allocator *a) +{ + struct nvgpu_buddy_allocator *ba = buddy_allocator(a); + u64 space; + + alloc_lock(a); + space = nvgpu_safe_sub_u64(ba->end, ba->start); + space = nvgpu_safe_sub_u64(space, + nvgpu_safe_sub_u64(ba->bytes_alloced_real, + ba->bytes_freed)); + alloc_unlock(a); + + return space; +} + +#ifdef __KERNEL__ +/* + * Print the buddy allocator top level stats. If you pass @s as NULL then the + * stats are printed to the kernel log. This lets this code be used for + * debugging purposes internal to the allocator. + */ +static void nvgpu_buddy_print_stats(struct nvgpu_allocator *na, + struct seq_file *s, int lock) +{ + int i = 0; + struct nvgpu_rbtree_node *node = NULL; + struct nvgpu_fixed_alloc *falloc; + struct nvgpu_alloc_carveout *tmp; + struct nvgpu_buddy_allocator *a = buddy_allocator(na); + + alloc_pstat(s, na, "base = %llu, limit = %llu, blk_size = %llu", + a->base, a->length, a->blk_size); + alloc_pstat(s, na, "Internal params:"); + alloc_pstat(s, na, " start = 0x%llx", a->start); + alloc_pstat(s, na, " end = 0x%llx", a->end); + alloc_pstat(s, na, " count = 0x%llx", a->count); + alloc_pstat(s, na, " blks = 0x%llx", a->blks); + alloc_pstat(s, na, " max_order = %llu", a->max_order); + + if (lock) + alloc_lock(na); + + if (!nvgpu_list_empty(&a->co_list)) { + alloc_pstat(s, na, ""); + alloc_pstat(s, na, "Carveouts:"); + nvgpu_list_for_each_entry(tmp, &a->co_list, + nvgpu_alloc_carveout, co_entry) + alloc_pstat(s, na, + " CO %2d: %-20s 0x%010llx + 0x%llx", + i++, tmp->name, tmp->base, tmp->length); + } + + alloc_pstat(s, na, ""); + alloc_pstat(s, na, "Buddy blocks:"); + alloc_pstat(s, na, " Order Free Alloced Split"); + alloc_pstat(s, na, " ----- ---- ------- -----"); + + for (i = a->max_order; i >= 0; i--) { + if (a->buddy_list_len[i] == 0 && + a->buddy_list_alloced[i] == 0 && + a->buddy_list_split[i] == 0) + continue; + + alloc_pstat(s, na, " %3d %-7llu %-9llu %llu", i, + a->buddy_list_len[i], + a->buddy_list_alloced[i], + a->buddy_list_split[i]); + } + + alloc_pstat(s, na, ""); + + nvgpu_rbtree_enum_start(0, &node, a->fixed_allocs); + i = 1; + while (node) { + falloc = nvgpu_fixed_alloc_from_rbtree_node(node); + + alloc_pstat(s, na, "Fixed alloc (%d): [0x%llx -> 0x%llx]", + i, falloc->start, falloc->end); + + nvgpu_rbtree_enum_next(&node, a->fixed_allocs); + } + + alloc_pstat(s, na, ""); + alloc_pstat(s, na, "Bytes allocated: %llu", + a->bytes_alloced); + alloc_pstat(s, na, "Bytes allocated (real): %llu", + a->bytes_alloced_real); + alloc_pstat(s, na, "Bytes freed: %llu", + a->bytes_freed); + + if (lock) + alloc_unlock(na); +} +#endif + +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 8_7), "Bug 2823817") +static const struct nvgpu_allocator_ops buddy_ops = { +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 8_7)) + .alloc = nvgpu_buddy_balloc, + .alloc_pte = nvgpu_buddy_balloc_pte, + .free_alloc = nvgpu_buddy_bfree, + + .alloc_fixed = nvgpu_balloc_fixed_buddy, + /* .free_fixed not needed. */ + + .reserve_carveout = nvgpu_buddy_reserve_co, + .release_carveout = nvgpu_buddy_release_co, + + .base = nvgpu_buddy_alloc_base, + .length = nvgpu_buddy_alloc_length, + .end = nvgpu_buddy_alloc_end, + .inited = nvgpu_buddy_alloc_inited, + .space = nvgpu_buddy_alloc_space, + + .fini = nvgpu_buddy_allocator_destroy, + +#ifdef __KERNEL__ + .print_stats = nvgpu_buddy_print_stats, +#endif +}; + +static int nvgpu_buddy_check_argument_limits(struct vm_gk20a *vm, u64 size, + u64 blk_size, u64 max_order, u64 flags) +{ + bool is_blk_size_pwr_2; + bool is_gva_space = (flags & GPU_ALLOC_GVA_SPACE) != 0ULL; + + /* blk_size must be greater than 0 and a power of 2. */ + if (blk_size == 0U) { + return -EINVAL; + } + + is_blk_size_pwr_2 = (blk_size & (blk_size - 1ULL)) == 0ULL; + if (!is_blk_size_pwr_2) { + return -EINVAL; + } + + if (max_order > GPU_BALLOC_MAX_ORDER) { + return -EINVAL; + } + + if (size == 0U) { + return -EINVAL; + } + + /* If this is to manage a GVA space we need a VM. */ + if (is_gva_space && (vm == NULL)) { + return -EINVAL; + } + + return 0; +} + +static int nvgpu_buddy_set_attributes(struct nvgpu_buddy_allocator *a, + struct nvgpu_allocator *na, struct vm_gk20a *vm, + u64 base, u64 size, u64 blk_size, u64 max_order, + u64 flags) +{ + bool is_gva_space = (flags & GPU_ALLOC_GVA_SPACE) != 0ULL; + + a->base = base; + a->length = size; + a->blk_size = blk_size; + a->blk_shift = nvgpu_safe_sub_u64(nvgpu_ffs(blk_size), 1UL); + a->owner = na; + + /* + * If base is 0 then modfy base to be the size of one block so that we + * can return errors by returning addr == 0. + */ + if (a->base == 0U) { + a->base = a->blk_size; + a->length = nvgpu_safe_sub_u64(a->length, a->blk_size); + } + + a->vm = vm; + if (is_gva_space) { + u64 pde_size_mask, base_pde_align, size_pde_align; + u64 pde_size = BIT64(nvgpu_vm_pde_coverage_bit_count( + gk20a_from_vm(vm), vm->big_page_size)); + a->pte_blk_order = balloc_get_order(a, pde_size); + + /* + * When we have a GVA space with big_pages enabled the size and + * base must be PDE aligned. If big_pages are not enabled then + * this requirement is not necessary. + */ + + pde_size_mask = nvgpu_safe_sub_u64(pde_size, U64(1)); + base_pde_align = a->base & pde_size_mask; + size_pde_align = a->length & pde_size_mask; + if (vm->big_pages && + ((base_pde_align != 0ULL) || + (size_pde_align != 0ULL))) { + return -EINVAL; + } + } + + a->flags = flags; + a->max_order = max_order; + return 0; +} + +/* + * Initialize a buddy allocator. Returns 0 on success. This allocator does + * not necessarily manage bytes. It manages distinct ranges of resources. This + * allows the allocator to work for things like comp_tags, semaphores, etc. + * + * @allocator: Ptr to an allocator struct to init. + * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to + * get PTE size for GVA spaces. + * @name: Name of the allocator. Doesn't have to be static storage. + * @base: The base address of the resource pool being managed. + * @size: Number of resources in the pool. + * @blk_size: Minimum number of resources to allocate at once. For things like + * semaphores this is 1. For GVA this might be as much as 64k. This + * corresponds to order 0. Must be power of 2. + * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator + * will try and pick a reasonable max order. + * @flags: Extra flags necessary. See GPU_BALLOC_*. + */ +int nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *na, + struct vm_gk20a *vm, const char *name, + u64 base, u64 size, u64 blk_size, + u64 max_order, u64 flags) +{ + int err; + bool is_gva_space = (flags & GPU_ALLOC_GVA_SPACE) != 0ULL; + struct nvgpu_buddy_allocator *a; + + err = nvgpu_buddy_check_argument_limits(vm, size, blk_size, max_order, + flags); + if (err != 0) { + return err; + } + + a = nvgpu_kzalloc(g, sizeof(struct nvgpu_buddy_allocator)); + if (a == NULL) { + return -ENOMEM; + } + + err = nvgpu_alloc_common_init(na, g, name, a, false, &buddy_ops); + if (err != 0) { + goto fail; + } + + err = nvgpu_buddy_set_attributes(a, na, vm, base, size, blk_size, + max_order, flags); + if (err != 0) { + goto fail; + } + + balloc_allocator_align(a); + balloc_compute_max_order(a); + + a->buddy_cache = nvgpu_kmem_cache_create(g, sizeof(struct nvgpu_buddy)); + if (a->buddy_cache == NULL) { + err = -ENOMEM; + goto fail; + } + + a->alloced_buddies = NULL; + a->fixed_allocs = NULL; + nvgpu_init_list_node(&a->co_list); + err = balloc_init_lists(a); + if (err != 0) { + goto fail; + } + + nvgpu_smp_wmb(); + a->initialized = true; + +#ifdef CONFIG_DEBUG_FS + nvgpu_init_alloc_debug(g, na); +#endif + alloc_dbg(na, "New allocator: type buddy"); + alloc_dbg(na, " base 0x%llx", a->base); + alloc_dbg(na, " size 0x%llx", a->length); + alloc_dbg(na, " blk_size 0x%llx", a->blk_size); + if (is_gva_space) { + alloc_dbg(balloc_owner(a), + " pde_size 0x%llx", + balloc_order_to_len(a, a->pte_blk_order)); + } + alloc_dbg(na, " max_order %llu", a->max_order); + alloc_dbg(na, " flags 0x%llx", a->flags); + + return 0; + +fail: + if (a->buddy_cache != NULL) { + nvgpu_kmem_cache_destroy(a->buddy_cache); + } + nvgpu_kfree(g, a); + return err; +} diff --git a/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator_priv.h new file mode 100644 index 000000000..bd34477fa --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator_priv.h @@ -0,0 +1,451 @@ +/* + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_MM_BUDDY_ALLOCATOR_PRIV_H +#define NVGPU_MM_BUDDY_ALLOCATOR_PRIV_H + +/** + * @file + * + * Implementation of the buddy allocator. + */ + +#include +#include +#include + +struct nvgpu_kmem_cache; +struct nvgpu_allocator; +struct vm_gk20a; + +/** + * Structure that defines each buddy as an element in a binary tree. + */ +struct nvgpu_buddy { + /** + * Parent node. + */ + struct nvgpu_buddy *parent; + + /** + * This node's buddy. + */ + struct nvgpu_buddy *buddy; + + /** + * Lower address sub-node. + */ + struct nvgpu_buddy *left; + + /** + * Higher address sub-node. + */ + struct nvgpu_buddy *right; + + /** + * List entry for various lists. + */ + struct nvgpu_list_node buddy_entry; + + /** + * RB tree of allocations. + */ + struct nvgpu_rbtree_node alloced_entry; + + /** + * Start address of this buddy. + */ + u64 start; + + /** + * End address of this buddy. + */ + u64 end; + + /** + * Buddy order. + */ + u64 order; + + /** + * Possible flags to use in the buddy allocator. Set in the #flags + * member. + * @addtogroup BALLOC_BUDDY_FLAGS + * @{ + */ +#define BALLOC_BUDDY_ALLOCED 0x1U +#define BALLOC_BUDDY_SPLIT 0x2U +#define BALLOC_BUDDY_IN_LIST 0x4U + /**@}*/ + + /** + * Buddy flags among the @ref BALLOC_BUDDY_FLAGS + */ + u32 flags; + + + /** + * Possible PDE sizes. This allows for grouping like sized allocations + * into the same PDE. Set in the #pte_size member. + * @addtogroup BALLOC_PTE_SIZE + * @{ + */ +#define BALLOC_PTE_SIZE_ANY (~0U) +#define BALLOC_PTE_SIZE_INVALID 0U +#define BALLOC_PTE_SIZE_SMALL 1U +#define BALLOC_PTE_SIZE_BIG 2U + /**@}*/ + + /** + * Size of the PDE this buddy is using. Possible values in + * @ref BALLOC_PTE_SIZE + */ + u32 pte_size; +}; + +/** + * @brief Given a list node, retrieve the buddy. + * + * @param[in] node Pointer to the list node. + * + * @return pointer to the struct nvgpu_buddy of the node. + */ +static inline struct nvgpu_buddy * +nvgpu_buddy_from_buddy_entry(struct nvgpu_list_node *node) +{ + return (struct nvgpu_buddy *) + ((uintptr_t)node - offsetof(struct nvgpu_buddy, buddy_entry)); +}; + +/** + * @brief Given a tree node, retrieve the buddy. + * + * @param[in] node Pointer to the tree node. + * + * @return pointer to the struct nvgpu_buddy of the node. + */ +static inline struct nvgpu_buddy * +nvgpu_buddy_from_rbtree_node(struct nvgpu_rbtree_node *node) +{ + return (struct nvgpu_buddy *) + ((uintptr_t)node - offsetof(struct nvgpu_buddy, alloced_entry)); +}; + +/** + * @brief Macro generator to create is/set/clr operations for each of the + * flags in @ref BALLOC_BUDDY_FLAGS. + * + * The created functions are: + * + * bool buddy_is_alloced(struct nvgpu_buddy *b); + * void buddy_set_alloced(struct nvgpu_buddy *b); + * void buddy_clr_alloced(struct nvgpu_buddy *b); + * + * bool buddy_is_split(struct nvgpu_buddy *b); + * void buddy_set_split(struct nvgpu_buddy *b); + * void buddy_clr_split(struct nvgpu_buddy *b); + * + * bool buddy_is_in_list(struct nvgpu_buddy *b); + * void buddy_set_in_list(struct nvgpu_buddy *b); + * void buddy_clr_in_list(struct nvgpu_buddy *b); + * + * @param[in] flag One of is, set or clr + * @param[in] flag_up One of the @ref BALLOC_BUDDY_FLAGS + * + * @{ + */ +#define nvgpu_buddy_allocator_flag_ops(flag, flag_up) \ + static inline bool buddy_is_ ## flag(struct nvgpu_buddy *b) \ + { \ + return (b->flags & BALLOC_BUDDY_ ## flag_up) != 0U; \ + } \ + static inline void buddy_set_ ## flag(struct nvgpu_buddy *b) \ + { \ + b->flags |= BALLOC_BUDDY_ ## flag_up; \ + } \ + static inline void buddy_clr_ ## flag(struct nvgpu_buddy *b) \ + { \ + b->flags &= ~BALLOC_BUDDY_ ## flag_up; \ + } + +nvgpu_buddy_allocator_flag_ops(alloced, ALLOCED); +nvgpu_buddy_allocator_flag_ops(split, SPLIT); +nvgpu_buddy_allocator_flag_ops(in_list, IN_LIST); +/**@} */ + +/** + * Structure to keep information for a fixed allocation. + */ +struct nvgpu_fixed_alloc { + /** + * List of buddies. + */ + struct nvgpu_list_node buddies; + /** + * RB tree of fixed allocations. + */ + struct nvgpu_rbtree_node alloced_entry; + /** + * Start of fixed block. + */ + u64 start; + /** + * End address. + */ + u64 end; +}; + +/** + * @brief Given a tree node, retrieve the fixed allocation. + * + * @param[in] node Pointer to the tree node. + * + * @return pointer to the struct nvgpu_fixed_alloc of the node. + */ +static inline struct nvgpu_fixed_alloc * +nvgpu_fixed_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node) +{ + return (struct nvgpu_fixed_alloc *) + ((uintptr_t)node - offsetof(struct nvgpu_fixed_alloc, alloced_entry)); +}; + +/** + * GPU buddy allocator for the various GPU address spaces. Each addressable unit + * doesn't have to correspond to a byte. In some cases each unit is a more + * complex object such as a comp_tag line or the like. + * + * The max order is computed based on the size of the minimum order and the size + * of the address space. + * + * #blk_size is the size of an order 0 buddy. + */ +struct nvgpu_buddy_allocator { + /** + * Pointer to the common allocator structure. + */ + struct nvgpu_allocator *owner; + /** + * Parent VM - can be NULL. + */ + struct vm_gk20a *vm; + + /** + * Base address of the space. + */ + u64 base; + /** + * Length of the space. + */ + u64 length; + /** + * Size of order 0 allocation. + */ + u64 blk_size; + /** + * Shift to divide by blk_size. + */ + u64 blk_shift; + + /** + * Internal: real start (aligned to #blk_size). + */ + u64 start; + /** + * Internal: real end, trimmed if needed. + */ + u64 end; + /** + * Internal: count of objects in space. + */ + u64 count; + /** + * Internal: count of blks in the space. + */ + u64 blks; + /** + * Internal: specific maximum order. + */ + u64 max_order; + + /** + * Outstanding allocations. + */ + struct nvgpu_rbtree_node *alloced_buddies; + /** + * Outstanding fixed allocations. + */ + struct nvgpu_rbtree_node *fixed_allocs; + + /** + * List of carveouts. + */ + struct nvgpu_list_node co_list; + + /** + * Cache of allocations (contains address and size of allocations). + */ + struct nvgpu_kmem_cache *buddy_cache; + + /** + * Impose an upper bound on the maximum order. + */ +#define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1U) + + /** + * List of buddies. + */ + struct nvgpu_list_node buddy_list[GPU_BALLOC_ORDER_LIST_LEN]; + /** + * Length of the buddy list. + */ + u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN]; + /** + * Number of split nodes. + */ + u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN]; + /** + * Number of allocated nodes. + */ + u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN]; + + /** + * This is for when the allocator is managing a GVA space (the + * #GPU_ALLOC_GVA_SPACE bit is set in #flags). This requires + * that we group like sized allocations into PDE blocks. + */ + u64 pte_blk_order; + + /** + * Boolean to indicate if the allocator has been fully initialized. + */ + bool initialized; + /** + * Boolean set to true after the first allocation is made. + */ + bool alloc_made; + + /** + * Flags in used by the allocator as defined by @ref GPU_ALLOC_FLAGS + */ + u64 flags; + + /** + * Statistics: total number of bytes allocated. + */ + u64 bytes_alloced; + /** + * Statistics: total number of bytes allocated taking into account the + * buddy order. + */ + u64 bytes_alloced_real; + /** + * Statistics: total number of bytes freed. + */ + u64 bytes_freed; +}; + +/** + * @brief Given a generic allocator context, retrieve a pointer to the buddy + * allocator context structure. + * + * @param[in] a Pointer to nvgpu allocator. + * + * @return pointer to the struct nvgpu_bitmap_allocator. + */ +static inline struct nvgpu_buddy_allocator *buddy_allocator( + struct nvgpu_allocator *a) +{ + return (struct nvgpu_buddy_allocator *)(a)->priv; +} + +/** + * @brief Given a buddy allocator, retrieve the list of buddies of the chosen + * order. + * + * @param[in] a Pointer to the buddy allocator. + * @param[in] order Buddy order. + * + * @return list of buddies whose order is \a order. + */ +static inline struct nvgpu_list_node *balloc_get_order_list( + struct nvgpu_buddy_allocator *a, u64 order) +{ + return &a->buddy_list[order]; +} + +/** + * @brief Convert a buddy order to a length in bytes, based on the block size. + * + * @param[in] a Pointer to the buddy allocator. + * @param[in] order Buddy order. + * + * @return length in bytes. + */ +static inline u64 balloc_order_to_len(struct nvgpu_buddy_allocator *a, + u64 order) +{ + return nvgpu_safe_mult_u64(BIT64(order), a->blk_size); +} + +/** + * @brief Given a base address, shift it by the base address of the buddy. + * + * @param[in] a Pointer to the buddy allocator. + * @param[in] order Base address. + * + * @return shifted address. + */ +static inline u64 balloc_base_shift(struct nvgpu_buddy_allocator *a, + u64 base) +{ + return nvgpu_safe_sub_u64(base, a->start); +} + +/** + * @brief Given a shifted address, unshift it by the base address of the buddy. + * + * @param[in] a Pointer to the buddy allocator. + * @param[in] order Shifted address. + * + * @return unshifted address. + */ +static inline u64 balloc_base_unshift(struct nvgpu_buddy_allocator *a, + u64 base) +{ + return nvgpu_safe_add_u64(base, a->start); +} + +/** + * @brief Given a buddy allocator context, retrieve a pointer to the generic + * allocator context structure. + * + * @param[in] a Pointer to nvgpu buddy allocator. + * + * @return pointer to the struct nvgpu_allocator. + */ +static inline struct nvgpu_allocator *balloc_owner( + struct nvgpu_buddy_allocator *a) +{ + return a->owner; +} + +#endif /* NVGPU_MM_BUDDY_ALLOCATOR_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/mm/allocators/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/allocators/nvgpu_allocator.c new file mode 100644 index 000000000..fbcb08afb --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/allocators/nvgpu_allocator.c @@ -0,0 +1,216 @@ +/* + * gk20a allocator + * + * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + + +u64 nvgpu_alloc_length(struct nvgpu_allocator *a) +{ + if (a->ops->length != NULL) { + return a->ops->length(a); + } + + return 0; +} + +u64 nvgpu_alloc_base(struct nvgpu_allocator *a) +{ + if (a->ops->base != NULL) { + return a->ops->base(a); + } + + return 0; +} + +bool nvgpu_alloc_initialized(struct nvgpu_allocator *a) +{ + if ((a->ops == NULL) || (a->ops->inited == NULL)) { + return false; + } + + return a->ops->inited(a); +} + +u64 nvgpu_alloc_end(struct nvgpu_allocator *a) +{ + if (a->ops->end != NULL) { + return a->ops->end(a); + } + + return 0; +} + +u64 nvgpu_alloc_space(struct nvgpu_allocator *a) +{ + if (a->ops->space != NULL) { + return a->ops->space(a); + } + + return 0; +} + +u64 nvgpu_alloc(struct nvgpu_allocator *a, u64 len) +{ + return a->ops->alloc(a, len); +} + +u64 nvgpu_alloc_pte(struct nvgpu_allocator *a, u64 len, u32 page_size) +{ + return a->ops->alloc_pte(a, len, page_size); +} + +void nvgpu_free(struct nvgpu_allocator *a, u64 addr) +{ + a->ops->free_alloc(a, addr); +} + +u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len, + u32 page_size) +{ + if ((U64_MAX - base) < len) { + return 0ULL; + } + + if (a->ops->alloc_fixed != NULL) { + return a->ops->alloc_fixed(a, base, len, page_size); + } + + return 0; +} + +void nvgpu_free_fixed(struct nvgpu_allocator *a, u64 base, u64 len) +{ + /* + * If this operation is not defined for the allocator then just do + * nothing. The alternative would be to fall back on the regular + * free but that may be harmful in unexpected ways. + */ + if (a->ops->free_fixed != NULL) { + a->ops->free_fixed(a, base, len); + } +} + +int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a, + struct nvgpu_alloc_carveout *co) +{ + if (a->ops->reserve_carveout != NULL) { + return a->ops->reserve_carveout(a, co); + } + + return -ENODEV; +} + +void nvgpu_alloc_release_carveout(struct nvgpu_allocator *a, + struct nvgpu_alloc_carveout *co) +{ + if (a->ops->release_carveout != NULL) { + a->ops->release_carveout(a, co); + } +} + +void nvgpu_alloc_destroy(struct nvgpu_allocator *a) +{ + a->ops->fini(a); + nvgpu_mutex_destroy(&a->lock); + (void) memset(a, 0, sizeof(*a)); +} + +#ifdef __KERNEL__ +void nvgpu_alloc_print_stats(struct nvgpu_allocator *na, + struct seq_file *s, int lock) +{ + na->ops->print_stats(na, s, lock); +} +#endif + +/* + * Handle the common init stuff for a nvgpu_allocator. + */ +int nvgpu_alloc_common_init(struct nvgpu_allocator *a, struct gk20a *g, + const char *name, void *priv, bool dbg, + const struct nvgpu_allocator_ops *ops) +{ + if (ops == NULL) { + return -EINVAL; + } + + /* + * This is the bare minimum operations required for a sensible + * allocator. + */ + if ((ops->alloc == NULL) || (ops->free_alloc == NULL) || + (ops->fini == NULL)) { + return -EINVAL; + } + + nvgpu_mutex_init(&a->lock); + + a->g = g; + a->ops = ops; + a->priv = priv; + a->debug = dbg; + + (void) strncpy(a->name, name, sizeof(a->name)); + a->name[sizeof(a->name) - 1U] = '\0'; + + return 0; +} + +/* + * Initialize requested type of allocator + */ + +int nvgpu_allocator_init(struct gk20a *g, struct nvgpu_allocator *na, + struct vm_gk20a *vm, const char *name, + u64 base, u64 length, u64 blk_size, u64 max_order, + u64 flags, enum nvgpu_allocator_type alloc_type) +{ + int err = -EINVAL; + + switch (alloc_type) { + case BUDDY_ALLOCATOR: + err = nvgpu_buddy_allocator_init(g, na, vm, name, base, length, + blk_size, max_order, flags); + break; +#ifdef CONFIG_NVGPU_DGPU + case PAGE_ALLOCATOR: + err = nvgpu_page_allocator_init(g, na, name, base, length, + blk_size, flags); + break; +#endif + case BITMAP_ALLOCATOR: + err = nvgpu_bitmap_allocator_init(g, na, name, base, length, + blk_size, flags); + break; + default: + nvgpu_err(g, "Incorrect allocator type, couldn't initialize"); + break; + } + + if (err < 0) { + nvgpu_err(g, "Failed!"); + } + return err; +} diff --git a/drivers/gpu/nvgpu/common/mm/allocators/page_allocator.c b/drivers/gpu/nvgpu/common/mm/allocators/page_allocator.c new file mode 100644 index 000000000..790869910 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/allocators/page_allocator.c @@ -0,0 +1,1141 @@ +/* + * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "buddy_allocator_priv.h" + +#define palloc_dbg(a, fmt, arg...) \ + alloc_dbg(palloc_owner(a), fmt, ##arg) + +/* + * Since some Linux headers are still leaked into common code this is necessary + * for some builds. + */ +#ifdef PAGE_SIZE +#undef PAGE_SIZE +#endif + +#ifdef PAGE_ALIGN +#undef PAGE_ALIGN +#endif + +/* + * VIDMEM page size is 4k. + */ +#define PAGE_SIZE 0x1000U +#define PAGE_ALIGN(addr) (nvgpu_safe_add_u64(addr, (PAGE_SIZE - 1U)) & \ + ((typeof(addr)) ~(PAGE_SIZE - 1U))) + +/* + * Handle the book-keeping for these operations. + */ +static inline void add_slab_page_to_empty(struct page_alloc_slab *slab, + struct page_alloc_slab_page *page_ptr) +{ + BUG_ON(page_ptr->state != SP_NONE); + nvgpu_list_add(&page_ptr->list_entry, &slab->empty); + nvgpu_assert(slab->nr_empty < U32_MAX); + slab->nr_empty++; + page_ptr->state = SP_EMPTY; +} +static inline void add_slab_page_to_partial(struct page_alloc_slab *slab, + struct page_alloc_slab_page *page_ptr) +{ + BUG_ON(page_ptr->state != SP_NONE); + nvgpu_list_add(&page_ptr->list_entry, &slab->partial); + nvgpu_assert(slab->nr_partial < U32_MAX); + slab->nr_partial++; + page_ptr->state = SP_PARTIAL; +} +static inline void add_slab_page_to_full(struct page_alloc_slab *slab, + struct page_alloc_slab_page *page_ptr) +{ + BUG_ON(page_ptr->state != SP_NONE); + nvgpu_list_add(&page_ptr->list_entry, &slab->full); + nvgpu_assert(slab->nr_full < U32_MAX); + slab->nr_full++; + page_ptr->state = SP_FULL; +} + +static inline void del_slab_page_from_empty(struct page_alloc_slab *slab, + struct page_alloc_slab_page *page_ptr) +{ + nvgpu_list_del(&page_ptr->list_entry); + nvgpu_assert(slab->nr_empty > 0U); + slab->nr_empty--; + page_ptr->state = SP_NONE; +} +static inline void del_slab_page_from_partial(struct page_alloc_slab *slab, + struct page_alloc_slab_page *page_ptr) +{ + nvgpu_list_del(&page_ptr->list_entry); + nvgpu_assert(slab->nr_partial > 0U); + slab->nr_partial--; + page_ptr->state = SP_NONE; +} +static inline void del_slab_page_from_full(struct page_alloc_slab *slab, + struct page_alloc_slab_page *page_ptr) +{ + nvgpu_list_del(&page_ptr->list_entry); + nvgpu_assert(slab->nr_full > 0U); + slab->nr_full--; + page_ptr->state = SP_NONE; +} + +static u64 nvgpu_page_alloc_length(struct nvgpu_allocator *a) +{ + struct nvgpu_page_allocator *va = a->priv; + + return nvgpu_alloc_length(&va->source_allocator); +} + +static u64 nvgpu_page_alloc_base(struct nvgpu_allocator *a) +{ + struct nvgpu_page_allocator *va = a->priv; + + return nvgpu_alloc_base(&va->source_allocator); +} + +static bool nvgpu_page_alloc_inited(struct nvgpu_allocator *a) +{ + struct nvgpu_page_allocator *va = a->priv; + + return nvgpu_alloc_initialized(&va->source_allocator); +} + +static u64 nvgpu_page_alloc_end(struct nvgpu_allocator *a) +{ + struct nvgpu_page_allocator *va = a->priv; + + return nvgpu_alloc_end(&va->source_allocator); +} + +static u64 nvgpu_page_alloc_space(struct nvgpu_allocator *a) +{ + struct nvgpu_page_allocator *va = a->priv; + + return nvgpu_alloc_space(&va->source_allocator); +} + +static int nvgpu_page_reserve_co(struct nvgpu_allocator *a, + struct nvgpu_alloc_carveout *co) +{ + struct nvgpu_page_allocator *va = a->priv; + + return nvgpu_alloc_reserve_carveout(&va->source_allocator, co); +} + +static void nvgpu_page_release_co(struct nvgpu_allocator *a, + struct nvgpu_alloc_carveout *co) +{ + struct nvgpu_page_allocator *va = a->priv; + + nvgpu_alloc_release_carveout(&va->source_allocator, co); +} + +static void *nvgpu_page_alloc_sgl_next(void *sgl) +{ + struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl; + + return (void *)sgl_impl->next; +} + +static u64 nvgpu_page_alloc_sgl_phys(struct gk20a *g, void *sgl) +{ + struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl; + + return sgl_impl->phys; +} + +static u64 nvgpu_page_alloc_sgl_ipa_to_pa(struct gk20a *g, + void *sgl, u64 ipa, u64 *pa_len) +{ + return ipa; +} + +static u64 nvgpu_page_alloc_sgl_dma(void *sgl) +{ + struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl; + + return sgl_impl->dma; +} + +static u64 nvgpu_page_alloc_sgl_length(void *sgl) +{ + struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl; + + return sgl_impl->length; +} + +static u64 nvgpu_page_alloc_sgl_gpu_addr(struct gk20a *g, void *sgl, + struct nvgpu_gmmu_attrs *attrs) +{ + struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl; + + return sgl_impl->phys; +} + +static void nvgpu_page_alloc_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt) +{ + /* + * No-op here. The free is handled by the page_alloc free() functions. + */ +} + +/* + * These implement the generic scatter gather ops for pages allocated + * by the page allocator. however, the primary aim for this, is of course, + * vidmem. + */ +static const struct nvgpu_sgt_ops page_alloc_sgl_ops = { + .sgl_next = nvgpu_page_alloc_sgl_next, + .sgl_phys = nvgpu_page_alloc_sgl_phys, + .sgl_ipa = nvgpu_page_alloc_sgl_phys, + .sgl_ipa_to_pa = nvgpu_page_alloc_sgl_ipa_to_pa, + .sgl_dma = nvgpu_page_alloc_sgl_dma, + .sgl_length = nvgpu_page_alloc_sgl_length, + .sgl_gpu_addr = nvgpu_page_alloc_sgl_gpu_addr, + .sgt_free = nvgpu_page_alloc_sgt_free, +}; + +/* + * This actually frees the sgl memory. Used by the page_alloc free() functions. + */ +static void nvgpu_page_alloc_sgl_proper_free(struct gk20a *g, + struct nvgpu_mem_sgl *sgl) +{ + struct nvgpu_mem_sgl *next; + + while (sgl != NULL) { + next = sgl->next; + nvgpu_kfree(g, sgl); + sgl = next; + } +} + +static void nvgpu_page_alloc_free_pages(struct nvgpu_page_allocator *a, + struct nvgpu_page_alloc *alloc, + bool free_buddy_alloc) +{ + void *sgl = alloc->sgt.sgl; + struct gk20a *g = a->owner->g; + + if (free_buddy_alloc) { + while (sgl != NULL) { + nvgpu_free(&a->source_allocator, + nvgpu_sgt_get_phys(g, &alloc->sgt, sgl)); + sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl); + } + } + + nvgpu_page_alloc_sgl_proper_free(a->owner->g, + (struct nvgpu_mem_sgl *)sgl); + nvgpu_kmem_cache_free(a->alloc_cache, alloc); +} + +static void insert_page_alloc(struct nvgpu_page_allocator *a, + struct nvgpu_page_alloc *alloc) +{ + alloc->tree_entry.key_start = alloc->base; + alloc->tree_entry.key_end = nvgpu_safe_add_u64(alloc->base, + alloc->length); + + nvgpu_rbtree_insert(&alloc->tree_entry, &a->allocs); +} + +static struct nvgpu_page_alloc *find_page_alloc( + struct nvgpu_page_allocator *a, + u64 addr) +{ + struct nvgpu_page_alloc *alloc; + struct nvgpu_rbtree_node *node = NULL; + + nvgpu_rbtree_search(addr, &node, a->allocs); + if (node == NULL) { + return NULL; + } + + alloc = nvgpu_page_alloc_from_rbtree_node(node); + + nvgpu_rbtree_unlink(node, &a->allocs); + + return alloc; +} + +static struct page_alloc_slab_page *alloc_slab_page( + struct nvgpu_page_allocator *a, + struct page_alloc_slab *slab) +{ + struct page_alloc_slab_page *slab_page; + + slab_page = nvgpu_kmem_cache_alloc(a->slab_page_cache); + if (slab_page == NULL) { + palloc_dbg(a, "OOM: unable to alloc slab_page struct!"); + return NULL; + } + + (void) memset(slab_page, 0, sizeof(*slab_page)); + + slab_page->page_addr = nvgpu_alloc(&a->source_allocator, a->page_size); + if (slab_page->page_addr == 0ULL) { + nvgpu_kmem_cache_free(a->slab_page_cache, slab_page); + palloc_dbg(a, "OOM: vidmem is full!"); + return NULL; + } + + nvgpu_init_list_node(&slab_page->list_entry); + slab_page->slab_size = slab->slab_size; + slab_page->nr_objects = nvgpu_safe_cast_u64_to_u32(a->page_size) / + slab->slab_size; + slab_page->nr_objects_alloced = 0; + slab_page->owner = slab; + slab_page->state = SP_NONE; + + nvgpu_assert(a->pages_alloced < U64_MAX); + a->pages_alloced++; + + palloc_dbg(a, "Allocated new slab page @ 0x%012llx size=%u", + slab_page->page_addr, slab_page->slab_size); + + return slab_page; +} + +static void free_slab_page(struct nvgpu_page_allocator *a, + struct page_alloc_slab_page *slab_page) +{ + palloc_dbg(a, "Freeing slab page @ 0x%012llx", slab_page->page_addr); + + BUG_ON(((slab_page->state != SP_NONE) && + (slab_page->state != SP_EMPTY)) || + slab_page->nr_objects_alloced != 0U || + slab_page->bitmap != 0U); + + nvgpu_free(&a->source_allocator, slab_page->page_addr); + nvgpu_assert(a->pages_freed < U64_MAX); + a->pages_freed++; + + nvgpu_kmem_cache_free(a->slab_page_cache, slab_page); +} + +/* + * This expects @alloc to have 1 empty sgl_entry ready for usage. + */ +static int do_slab_alloc(struct nvgpu_page_allocator *a, + struct page_alloc_slab *slab, + struct nvgpu_page_alloc *alloc) +{ + struct page_alloc_slab_page *slab_page = NULL; + struct nvgpu_mem_sgl *sgl; + unsigned long offs; + + /* + * Check the partial and empty lists to see if we have some space + * readily available. Take the slab_page out of what ever list it + * was in since it may be put back into a different list later. + */ + if (!nvgpu_list_empty(&slab->partial)) { + slab_page = nvgpu_list_first_entry(&slab->partial, + page_alloc_slab_page, + list_entry); + del_slab_page_from_partial(slab, slab_page); + } else { + if (!nvgpu_list_empty(&slab->empty)) { + slab_page = nvgpu_list_first_entry(&slab->empty, + page_alloc_slab_page, + list_entry); + del_slab_page_from_empty(slab, slab_page); + } + } + + if (slab_page == NULL) { + slab_page = alloc_slab_page(a, slab); + if (slab_page == NULL) { + return -ENOMEM; + } + } + + /* + * We now have a slab_page. Do the alloc. + */ + offs = bitmap_find_next_zero_area(&slab_page->bitmap, + slab_page->nr_objects, + 0, 1, 0); + if (offs >= slab_page->nr_objects) { + WARN(true, "Empty/partial slab with no free objects?"); + + /* Add the buggy page to the full list... This isn't ideal. */ + add_slab_page_to_full(slab, slab_page); + return -ENOMEM; + } + + nvgpu_assert(offs <= U64(U32_MAX)); + nvgpu_bitmap_set(&slab_page->bitmap, U32(offs), 1U); + nvgpu_assert(slab_page->nr_objects_alloced < U32_MAX); + slab_page->nr_objects_alloced++; + + if (slab_page->nr_objects_alloced < slab_page->nr_objects) { + add_slab_page_to_partial(slab, slab_page); + } else if (slab_page->nr_objects_alloced == slab_page->nr_objects) { + add_slab_page_to_full(slab, slab_page); + } else { + BUG(); /* Should be impossible to hit this. */ + } + + /* + * Handle building the nvgpu_page_alloc struct. We expect one sgl + * to be present. + */ + alloc->slab_page = slab_page; + alloc->nr_chunks = 1; + alloc->length = slab_page->slab_size; + alloc->base = nvgpu_safe_add_u64(slab_page->page_addr, + nvgpu_safe_mult_u64(offs, slab_page->slab_size)); + + sgl = (struct nvgpu_mem_sgl *)alloc->sgt.sgl; + sgl->phys = alloc->base; + sgl->dma = alloc->base; + sgl->length = alloc->length; + sgl->next = NULL; + + return 0; +} + +/* + * Allocate from a slab instead of directly from the page allocator. + */ +static struct nvgpu_page_alloc *nvgpu_alloc_slab( + struct nvgpu_page_allocator *a, u64 len) +{ + int err; + u64 slab_nr; + struct page_alloc_slab *slab; + struct nvgpu_page_alloc *alloc = NULL; + struct nvgpu_mem_sgl *sgl = NULL; + + /* + * Align the length to a page and then divide by the page size (4k for + * this code). ilog2() of that then gets us the correct slab to use. + */ + slab_nr = ilog2(PAGE_ALIGN(len) >> 12); + slab = &a->slabs[slab_nr]; + + alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); + if (alloc == NULL) { + palloc_dbg(a, "OOM: could not alloc page_alloc struct!"); + goto fail; + } + + alloc->sgt.ops = &page_alloc_sgl_ops; + + sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); + if (sgl == NULL) { + palloc_dbg(a, "OOM: could not alloc sgl struct!"); + goto fail; + } + + alloc->sgt.sgl = (void *)sgl; + err = do_slab_alloc(a, slab, alloc); + if (err != 0) { + goto fail; + } + + palloc_dbg(a, "Alloc 0x%04llx sr=%llu id=0x%010llx [slab]", + len, slab_nr, alloc->base); + nvgpu_assert(a->nr_slab_allocs < U64_MAX); + a->nr_slab_allocs++; + + return alloc; + +fail: + if (alloc != NULL) { + nvgpu_kmem_cache_free(a->alloc_cache, alloc); + } + if (sgl != NULL) { + nvgpu_kfree(a->owner->g, sgl); + } + return NULL; +} + +static void nvgpu_free_slab(struct nvgpu_page_allocator *a, + struct nvgpu_page_alloc *alloc) +{ + struct page_alloc_slab_page *slab_page = alloc->slab_page; + struct page_alloc_slab *slab = slab_page->owner; + enum slab_page_state new_state; + u32 offs; + + offs = nvgpu_safe_cast_u64_to_u32(nvgpu_safe_sub_u64(alloc->base, + slab_page->page_addr)) / + slab_page->slab_size; + nvgpu_bitmap_clear(&slab_page->bitmap, offs, 1U); + + nvgpu_assert(slab_page->nr_objects_alloced < U32_MAX); + slab_page->nr_objects_alloced--; + + if (slab_page->nr_objects_alloced == 0U) { + new_state = SP_EMPTY; + } else { + new_state = SP_PARTIAL; + } + + /* + * Need to migrate the page to a different list. + */ + if (new_state != slab_page->state) { + /* Delete - can't be in empty. */ + if (slab_page->state == SP_PARTIAL) { + del_slab_page_from_partial(slab, slab_page); + } else { + del_slab_page_from_full(slab, slab_page); + } + + /* And add. */ + if (new_state == SP_EMPTY) { + if (nvgpu_list_empty(&slab->empty)) { + add_slab_page_to_empty(slab, slab_page); + } else { + free_slab_page(a, slab_page); + } + } else { + add_slab_page_to_partial(slab, slab_page); + } + } + + /* + * Now handle the page_alloc. + */ + nvgpu_page_alloc_free_pages(a, alloc, false); + nvgpu_assert(a->nr_slab_frees < U64_MAX); + a->nr_slab_frees++; + + return; +} + +/* + * Allocate physical pages. Since the underlying allocator is a buddy allocator + * the returned pages are always contiguous. However, since there could be + * fragmentation in the space this allocator will collate smaller non-contiguous + * allocations together if necessary. + */ +static struct nvgpu_page_alloc *do_nvgpu_alloc_pages( + struct nvgpu_page_allocator *a, u64 pages) +{ + struct nvgpu_page_alloc *alloc; + struct nvgpu_mem_sgl *sgl, *prev_sgl = NULL; + u64 max_chunk_len = pages << a->page_shift; + int i = 0; + + alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); + if (alloc == NULL) { + goto fail; + } + + (void) memset(alloc, 0, sizeof(*alloc)); + + alloc->length = pages << a->page_shift; + alloc->sgt.ops = &page_alloc_sgl_ops; + + while (pages != 0ULL) { + u64 chunk_addr = 0; + u64 chunk_pages = (u64)1 << (nvgpu_fls(pages) - 1UL); + u64 chunk_len = chunk_pages << a->page_shift; + + /* + * Take care of the possibility that the allocation must be + * contiguous. If this is not the first iteration then that + * means the first iteration failed to alloc the entire + * requested size. The buddy allocator guarantees any given + * single alloc is contiguous. + */ + if ((a->flags & GPU_ALLOC_FORCE_CONTIG) != 0ULL && i != 0) { + goto fail_cleanup; + } + + if (chunk_len > max_chunk_len) { + chunk_len = max_chunk_len; + } + + /* + * Keep attempting to allocate in smaller chunks until the alloc + * either succeeds or is smaller than the page_size of the + * allocator (i.e the allocator is OOM). + */ + do { + chunk_addr = nvgpu_alloc(&a->source_allocator, + chunk_len); + + /* Divide by 2 and try again */ + if (chunk_addr == 0ULL) { + palloc_dbg(a, "balloc failed: 0x%llx", + chunk_len); + chunk_len >>= 1; + max_chunk_len = chunk_len; + } + } while (chunk_addr == 0ULL && chunk_len >= a->page_size); + + chunk_pages = chunk_len >> a->page_shift; + + if (chunk_addr == 0ULL) { + palloc_dbg(a, "bailing @ 0x%llx", chunk_len); + goto fail_cleanup; + } + + sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); + if (sgl == NULL) { + nvgpu_free(&a->source_allocator, chunk_addr); + goto fail_cleanup; + } + + pages = nvgpu_safe_sub_u64(pages, chunk_pages); + + sgl->phys = chunk_addr; + sgl->dma = chunk_addr; + sgl->length = chunk_len; + + /* + * Build the singly linked list with a head node that is part of + * the list. + */ + if (prev_sgl != NULL) { + prev_sgl->next = sgl; + } else { + alloc->sgt.sgl = (void *)sgl; + } + + prev_sgl = sgl; + + i++; + } + + alloc->nr_chunks = i; + alloc->base = ((struct nvgpu_mem_sgl *)alloc->sgt.sgl)->phys; + + return alloc; + +fail_cleanup: + sgl = (struct nvgpu_mem_sgl *)alloc->sgt.sgl; + while (sgl != NULL) { + struct nvgpu_mem_sgl *next = sgl->next; + + nvgpu_free(&a->source_allocator, sgl->phys); + nvgpu_kfree(a->owner->g, sgl); + + sgl = next; + } + + nvgpu_kmem_cache_free(a->alloc_cache, alloc); +fail: + return NULL; +} + +static struct nvgpu_page_alloc *nvgpu_alloc_pages( + struct nvgpu_page_allocator *a, u64 len) +{ + struct gk20a *g = a->owner->g; + struct nvgpu_page_alloc *alloc = NULL; + void *sgl; + u64 pages; + u32 i = 0; + + pages = NVGPU_ALIGN(len, a->page_size) >> a->page_shift; + + alloc = do_nvgpu_alloc_pages(a, pages); + if (alloc == NULL) { + palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)", + pages << a->page_shift, pages); + return NULL; + } + + palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx", + pages << a->page_shift, pages, alloc->base); + sgl = alloc->sgt.sgl; + while (sgl != NULL) { + nvgpu_assert(i < U32_MAX); + palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx", + i++, + nvgpu_sgt_get_phys(g, &alloc->sgt, sgl), + nvgpu_sgt_get_length(&alloc->sgt, sgl)); + sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl); + } + palloc_dbg(a, "Alloc done"); + + return alloc; +} + +/* + * Allocate enough pages to satisfy @len. Page size is determined at + * initialization of the allocator. + * + * The return is actually a pointer to a struct nvgpu_page_alloc pointer. This + * is because it doesn't make a lot of sense to return the address of the first + * page in the list of pages (since they could be discontiguous). This has + * precedent in the dma_alloc APIs, though, it's really just an annoying + * artifact of the fact that the nvgpu_alloc() API requires a u64 return type. + */ +static u64 nvgpu_page_palloc(struct nvgpu_allocator *na, u64 len) +{ + struct nvgpu_page_allocator *a = page_allocator(na); + struct nvgpu_page_alloc *alloc = NULL; + u64 real_len; + + /* + * If we want contig pages we have to round up to a power of two. It's + * easier to do that here than in the buddy allocator. + */ + real_len = ((a->flags & GPU_ALLOC_FORCE_CONTIG) != 0ULL) ? + roundup_pow_of_two(len) : len; + + alloc_lock(na); + if ((a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) != 0ULL && + real_len <= (a->page_size / 2U)) { + alloc = nvgpu_alloc_slab(a, real_len); + } else { + alloc = nvgpu_alloc_pages(a, real_len); + } + + if (alloc == NULL) { + alloc_unlock(na); + return 0; + } + + insert_page_alloc(a, alloc); + + nvgpu_assert(a->nr_allocs < U64_MAX); + a->nr_allocs++; + if (real_len > a->page_size / 2U) { + a->pages_alloced += alloc->length >> a->page_shift; + } + alloc_unlock(na); + + if ((a->flags & GPU_ALLOC_NO_SCATTER_GATHER) != 0ULL) { + return alloc->base; + } else { + return (u64) (uintptr_t) alloc; + } +} + +/* + * Note: this will remove the nvgpu_page_alloc struct from the RB tree + * if it's found. + */ +static void nvgpu_page_free(struct nvgpu_allocator *na, u64 base) +{ + struct nvgpu_page_allocator *a = page_allocator(na); + struct nvgpu_page_alloc *alloc; + + alloc_lock(na); + + if ((a->flags & GPU_ALLOC_NO_SCATTER_GATHER) != 0ULL) { + alloc = find_page_alloc(a, base); + } else { + alloc = find_page_alloc(a, + ((struct nvgpu_page_alloc *)(uintptr_t)base)->base); + } + + if (alloc == NULL) { + palloc_dbg(a, "Hrm, found no alloc?"); + goto done; + } + + nvgpu_assert(a->nr_frees < U64_MAX); + a->nr_frees++; + + palloc_dbg(a, "Free 0x%llx id=0x%010llx", + alloc->length, alloc->base); + + /* + * Frees *alloc. + */ + if (alloc->slab_page != NULL) { + nvgpu_free_slab(a, alloc); + } else { + a->pages_freed = nvgpu_safe_add_u64(a->pages_freed, + alloc->length >> a->page_shift); + nvgpu_page_alloc_free_pages(a, alloc, true); + } + +done: + alloc_unlock(na); +} + +static struct nvgpu_page_alloc *nvgpu_alloc_pages_fixed( + struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused) +{ + struct nvgpu_page_alloc *alloc; + struct nvgpu_mem_sgl *sgl; + + alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); + sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); + if (alloc == NULL || sgl == NULL) { + goto fail; + } + + alloc->sgt.ops = &page_alloc_sgl_ops; + alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); + if (alloc->base == 0ULL) { + WARN(true, "nvgpu: failed to fixed alloc pages @ 0x%010llx", + base); + goto fail; + } + + alloc->nr_chunks = 1; + alloc->length = length; + alloc->sgt.sgl = (void *)sgl; + + sgl->phys = alloc->base; + sgl->dma = alloc->base; + sgl->length = length; + sgl->next = NULL; + + return alloc; + +fail: + if (sgl != NULL) { + nvgpu_kfree(a->owner->g, sgl); + } + if (alloc != NULL) { + nvgpu_kmem_cache_free(a->alloc_cache, alloc); + } + return NULL; +} + +/* + * @page_size is ignored. + */ +static u64 nvgpu_page_palloc_fixed(struct nvgpu_allocator *na, + u64 base, u64 len, u32 page_size) +{ + struct nvgpu_page_allocator *a = page_allocator(na); + struct nvgpu_page_alloc *alloc = NULL; + void *sgl; + struct gk20a *g = a->owner->g; + u64 aligned_len, pages; + u32 i = 0; + + aligned_len = NVGPU_ALIGN(len, a->page_size); + pages = aligned_len >> a->page_shift; + + alloc_lock(na); + + alloc = nvgpu_alloc_pages_fixed(a, base, aligned_len, 0); + if (alloc == NULL) { + alloc_unlock(na); + return 0; + } + + insert_page_alloc(a, alloc); + alloc_unlock(na); + + palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)", + alloc->base, aligned_len, pages); + sgl = alloc->sgt.sgl; + while (sgl != NULL) { + nvgpu_assert(i < U32_MAX); + palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx", + i++, + nvgpu_sgt_get_phys(g, &alloc->sgt, sgl), + nvgpu_sgt_get_length(&alloc->sgt, sgl)); + sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl); + } + + nvgpu_assert(a->nr_fixed_allocs < U64_MAX); + a->nr_fixed_allocs++; + a->pages_alloced = nvgpu_safe_add_u64(a->pages_alloced, pages); + + if ((a->flags & GPU_ALLOC_NO_SCATTER_GATHER) != 0ULL) { + return alloc->base; + } else { + return (u64) (uintptr_t) alloc; + } +} + +static void nvgpu_page_free_fixed(struct nvgpu_allocator *na, + u64 base, u64 len) +{ + struct nvgpu_page_allocator *a = page_allocator(na); + struct nvgpu_page_alloc *alloc; + + alloc_lock(na); + + if ((a->flags & GPU_ALLOC_NO_SCATTER_GATHER) != 0ULL) { + alloc = find_page_alloc(a, base); + if (alloc == NULL) { + goto done; + } + } else { + alloc = (struct nvgpu_page_alloc *) (uintptr_t) base; + } + + palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx", + alloc->base, alloc->length); + + nvgpu_assert(a->nr_fixed_frees < U64_MAX); + a->nr_fixed_frees++; + a->pages_freed = nvgpu_safe_add_u64(a->pages_freed, + alloc->length >> a->page_shift); + + /* + * This works for the time being since the buddy allocator + * uses the same free function for both fixed and regular + * allocs. This would have to be updated if the underlying + * allocator were to change. + */ + nvgpu_page_alloc_free_pages(a, alloc, true); + +done: + alloc_unlock(na); +} + +static void nvgpu_page_allocator_destroy(struct nvgpu_allocator *na) +{ + struct nvgpu_page_allocator *a = page_allocator(na); + + alloc_lock(na); + nvgpu_kfree(nvgpu_alloc_to_gpu(na), a); + na->priv = NULL; + alloc_unlock(na); +} + +#ifdef __KERNEL__ +static void nvgpu_page_print_stats(struct nvgpu_allocator *na, + struct seq_file *s, int lock) +{ + struct nvgpu_page_allocator *a = page_allocator(na); + u32 i; + + if (lock) + alloc_lock(na); + + alloc_pstat(s, na, "Page allocator:"); + alloc_pstat(s, na, " allocs %lld", a->nr_allocs); + alloc_pstat(s, na, " frees %lld", a->nr_frees); + alloc_pstat(s, na, " fixed_allocs %lld", a->nr_fixed_allocs); + alloc_pstat(s, na, " fixed_frees %lld", a->nr_fixed_frees); + alloc_pstat(s, na, " slab_allocs %lld", a->nr_slab_allocs); + alloc_pstat(s, na, " slab_frees %lld", a->nr_slab_frees); + alloc_pstat(s, na, " pages alloced %lld", a->pages_alloced); + alloc_pstat(s, na, " pages freed %lld", a->pages_freed); + alloc_pstat(s, na, ""); + + alloc_pstat(s, na, "Page size: %lld KB", + a->page_size >> 10); + alloc_pstat(s, na, "Total pages: %lld (%lld MB)", + a->length / a->page_size, + a->length >> 20); + alloc_pstat(s, na, "Available pages: %lld (%lld MB)", + nvgpu_alloc_space(&a->source_allocator) / a->page_size, + nvgpu_alloc_space(&a->source_allocator) >> 20); + alloc_pstat(s, na, ""); + + /* + * Slab info. + */ + if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) { + alloc_pstat(s, na, "Slabs:"); + alloc_pstat(s, na, " size empty partial full"); + alloc_pstat(s, na, " ---- ----- ------- ----"); + + for (i = 0; i < a->nr_slabs; i++) { + struct page_alloc_slab *slab = &a->slabs[i]; + + alloc_pstat(s, na, " %-9u %-9d %-9u %u", + slab->slab_size, + slab->nr_empty, slab->nr_partial, + slab->nr_full); + } + alloc_pstat(s, na, ""); + } + + alloc_pstat(s, na, "Source alloc: %s", + a->source_allocator.name); + nvgpu_alloc_print_stats(&a->source_allocator, s, lock); + + if (lock) + alloc_unlock(na); +} +#endif + +static const struct nvgpu_allocator_ops page_ops = { + .alloc = nvgpu_page_palloc, + .free_alloc = nvgpu_page_free, + + .alloc_fixed = nvgpu_page_palloc_fixed, + .free_fixed = nvgpu_page_free_fixed, + + .reserve_carveout = nvgpu_page_reserve_co, + .release_carveout = nvgpu_page_release_co, + + .base = nvgpu_page_alloc_base, + .length = nvgpu_page_alloc_length, + .end = nvgpu_page_alloc_end, + .inited = nvgpu_page_alloc_inited, + .space = nvgpu_page_alloc_space, + + .fini = nvgpu_page_allocator_destroy, + +#ifdef __KERNEL__ + .print_stats = nvgpu_page_print_stats, +#endif +}; + +/* + * nr_slabs is computed as follows: divide page_size by 4096 to get number of + * 4k pages in page_size. Then take the base 2 log of that to get number of + * slabs. For 64k page_size that works on like: + * + * 1024*64 / 1024*4 = 16 + * ilog2(16) = 4 + * + * That gives buckets of 1, 2, 4, and 8 pages (i.e 4k, 8k, 16k, 32k). + */ +static int nvgpu_page_alloc_init_slabs(struct nvgpu_page_allocator *a) +{ + /* Use temp var for MISRA 10.8 */ + unsigned long tmp_nr_slabs = ilog2(a->page_size >> 12); + u32 nr_slabs = nvgpu_safe_cast_u64_to_u32(tmp_nr_slabs); + u32 i; + + /* + * As slab_size is 32-bits wide, maximum possible slab_size + * is 2^32 i.e. 4Gb + * So, we can have maximum 20 buckets of slabs starting at 4K. + * Return error if number of slabs is greater than 20. + */ + if (nr_slabs > 20U) { + return -EINVAL; + } + + a->slabs = nvgpu_kcalloc(nvgpu_alloc_to_gpu(a->owner), + (size_t)nr_slabs, + sizeof(struct page_alloc_slab)); + if (a->slabs == NULL) { + return -ENOMEM; + } + a->nr_slabs = nr_slabs; + + for (i = 0; i < nr_slabs; i++) { + struct page_alloc_slab *slab = &a->slabs[i]; + + /* Slab_size starts from 4K */ + slab->slab_size = BIT32(i + 12U); + nvgpu_init_list_node(&slab->empty); + nvgpu_init_list_node(&slab->partial); + nvgpu_init_list_node(&slab->full); + slab->nr_empty = 0; + slab->nr_partial = 0; + slab->nr_full = 0; + } + + return 0; +} + +int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *na, + const char *name, u64 base, u64 length, + u64 blk_size, u64 flags) +{ + struct nvgpu_page_allocator *a; + char buddy_name[sizeof(na->name)]; + int err; + + if (blk_size < SZ_4K) { + return -EINVAL; + } + + a = nvgpu_kzalloc(g, sizeof(struct nvgpu_page_allocator)); + if (a == NULL) { + return -ENOMEM; + } + + err = nvgpu_alloc_common_init(na, g, name, a, false, &page_ops); + if (err != 0) { + goto fail; + } + + a->alloc_cache = nvgpu_kmem_cache_create(g, + sizeof(struct nvgpu_page_alloc)); + a->slab_page_cache = nvgpu_kmem_cache_create(g, + sizeof(struct page_alloc_slab_page)); + if (a->alloc_cache == NULL || a->slab_page_cache == NULL) { + err = -ENOMEM; + goto fail; + } + + a->base = base; + a->length = length; + a->page_size = blk_size; + a->page_shift = nvgpu_safe_cast_u64_to_u32((nvgpu_ffs(blk_size) - 1UL)); + a->allocs = NULL; + a->owner = na; + a->flags = flags; + + if ((flags & GPU_ALLOC_4K_VIDMEM_PAGES) != 0ULL && + blk_size > SZ_4K) { + err = nvgpu_page_alloc_init_slabs(a); + if (err != 0) { + goto fail; + } + } + + (void) strncpy(buddy_name, name, + (sizeof(buddy_name)) - (sizeof("-src"))); + (void) strcat(buddy_name, "-src"); + + err = nvgpu_buddy_allocator_init(g, &a->source_allocator, NULL, + buddy_name, base, length, blk_size, + 0ULL, 0ULL); + if (err != 0) { + goto fail; + } + +#ifdef CONFIG_DEBUG_FS + nvgpu_init_alloc_debug(g, na); +#endif + palloc_dbg(a, "New allocator: type page"); + palloc_dbg(a, " base 0x%llx", a->base); + palloc_dbg(a, " size 0x%llx", a->length); + palloc_dbg(a, " page_size 0x%llx", a->page_size); + palloc_dbg(a, " flags 0x%llx", a->flags); + palloc_dbg(a, " slabs: %d", a->nr_slabs); + + return 0; + +fail: + if (a->slabs != NULL) { + nvgpu_kfree(g, a->slabs); + } + if (a->alloc_cache != NULL) { + nvgpu_kmem_cache_destroy(a->alloc_cache); + } + if (a->slab_page_cache != NULL) { + nvgpu_kmem_cache_destroy(a->slab_page_cache); + } + nvgpu_kfree(g, a); + return err; +} diff --git a/drivers/gpu/nvgpu/common/mm/as.c b/drivers/gpu/nvgpu/common/mm/as.c new file mode 100644 index 000000000..1b3fc1fdb --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/as.c @@ -0,0 +1,245 @@ +/* + * GK20A Address Spaces + * + * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define VM_NAME_PREFIX "as_" + +/* dumb allocator... */ +static int generate_as_share_id(struct gk20a_as *as) +{ + struct gk20a *g = gk20a_from_as(as); + + nvgpu_log_fn(g, " "); + as->last_share_id = nvgpu_safe_add_s32(as->last_share_id, 1); + return as->last_share_id; +} +/* still dumb */ +static void release_as_share_id(struct gk20a_as_share *as_share) +{ + struct gk20a *g = gk20a_from_as(as_share->as); + + nvgpu_log_fn(g, " "); + return; +} + +/* address space interfaces for the gk20a module */ +static int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, + u32 big_page_size, u32 flags, + u64 va_range_start, u64 va_range_end, + u64 va_range_split) +{ + struct gk20a_as *as = as_share->as; + struct gk20a *g = gk20a_from_as(as); + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm; + char name[NVGPU_VM_NAME_LEN] = VM_NAME_PREFIX; + char *p; + u64 user_size; + u64 kernel_size = mm->channel.kernel_size; + u64 pde_size, pde_size_mask; + bool big_pages; + const bool userspace_managed = + (flags & NVGPU_AS_ALLOC_USERSPACE_MANAGED) != 0U; + const bool unified_va = + nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES) || + ((flags & NVGPU_AS_ALLOC_UNIFIED_VA) != 0U); + + nvgpu_log_fn(g, " "); + + if (big_page_size == 0U) { + big_pages = false; + big_page_size = g->ops.mm.gmmu.get_default_big_page_size(); + } else { + if (!is_power_of_2(big_page_size)) { + return -EINVAL; + } + + if ((big_page_size & + nvgpu_mm_get_available_big_page_sizes(g)) == 0U) { + return -EINVAL; + } + big_pages = true; + } + + pde_size = BIT64(nvgpu_vm_pde_coverage_bit_count(g, big_page_size)); + pde_size_mask = nvgpu_safe_sub_u64(pde_size, U64(1)); + + if ((va_range_start == 0ULL) || + ((va_range_start & pde_size_mask) != 0ULL)) { + return -EINVAL; + } + + if ((va_range_end == 0ULL) || + ((va_range_end & pde_size_mask) != 0ULL)) { + return -EINVAL; + } + + if (va_range_start >= va_range_end) { + return -EINVAL; + } + + user_size = nvgpu_safe_sub_u64(va_range_end, va_range_start); + + if (unified_va || !big_pages) { + if (va_range_split != 0ULL) { + return -EINVAL; + } + } else { + /* non-unified VA: split required */ + if ((va_range_split == 0ULL) || + ((va_range_split & pde_size_mask) != 0ULL)) { + return -EINVAL; + } + + /* non-unified VA: split range checks */ + if ((va_range_split <= va_range_start) || + (va_range_split >= va_range_end)) { + return -EINVAL; + } + } + + nvgpu_log_info(g, + "vm: low_hole=0x%llx, user_size=0x%llx, kernel_size=0x%llx", + va_range_start, user_size, kernel_size); + + p = name + strlen(name); + (void) nvgpu_strnadd_u32(p, nvgpu_safe_cast_s32_to_u32(as_share->id), + sizeof(name) - sizeof(VM_NAME_PREFIX), 10U); + + vm = nvgpu_vm_init(g, big_page_size, + va_range_start, + user_size, + kernel_size, + va_range_split, + big_pages, userspace_managed, unified_va, name); + if (vm == NULL) { + return -ENOMEM; + } + + as_share->vm = vm; + vm->as_share = as_share; + vm->enable_ctag = true; + + return 0; +} + +int gk20a_as_alloc_share(struct gk20a *g, + u32 big_page_size, u32 flags, u64 va_range_start, + u64 va_range_end, u64 va_range_split, + struct gk20a_as_share **out) +{ + struct gk20a_as_share *as_share; + int err = 0; + + nvgpu_log_fn(g, " "); + g = nvgpu_get(g); + if (g == NULL) { + return -ENODEV; + } + + *out = NULL; + as_share = nvgpu_kzalloc(g, sizeof(*as_share)); + if (as_share == NULL) { + return -ENOMEM; + } + + as_share->as = &g->as; + as_share->id = generate_as_share_id(as_share->as); + + /* this will set as_share->vm. */ + err = gk20a_busy(g); + if (err != 0) { + goto failed; + } + err = gk20a_vm_alloc_share(as_share, big_page_size, flags, + va_range_start, va_range_end, va_range_split); + gk20a_idle(g); + + if (err != 0) { + goto failed; + } + + *out = as_share; + return 0; + +failed: + nvgpu_kfree(g, as_share); + return err; +} + +int gk20a_vm_release_share(struct gk20a_as_share *as_share) +{ + struct vm_gk20a *vm = as_share->vm; + struct gk20a *g = gk20a_from_vm(vm); + + nvgpu_log_fn(g, " "); + + vm->as_share = NULL; + as_share->vm = NULL; + + nvgpu_vm_put(vm); + + return 0; +} + +/* + * channels and the device nodes call this to release. + * once the ref_cnt hits zero the share is deleted. + */ +int gk20a_as_release_share(struct gk20a_as_share *as_share) +{ + struct gk20a *g = as_share->vm->mm->g; + int err; + + nvgpu_log_fn(g, " "); + + err = gk20a_busy(g); + + if (err != 0) { + goto release_fail; + } + + err = gk20a_vm_release_share(as_share); + + gk20a_idle(g); + +release_fail: + release_as_share_id(as_share); + nvgpu_put(g); + nvgpu_kfree(g, as_share); + + return err; +} + +struct gk20a *gk20a_from_as(struct gk20a_as *as) +{ + return (struct gk20a *)((uintptr_t)as - offsetof(struct gk20a, as)); +} diff --git a/drivers/gpu/nvgpu/common/mm/comptags.c b/drivers/gpu/nvgpu/common/mm/comptags.c new file mode 100644 index 000000000..13d594a94 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/comptags.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator, + u32 *offset, u32 len) +{ + unsigned long addr; + int err = 0; + + if (allocator->size == 0UL) { + return -EINVAL; + } + + nvgpu_mutex_acquire(&allocator->lock); + addr = bitmap_find_next_zero_area(allocator->bitmap, allocator->size, + 0, len, 0); + if (addr < allocator->size) { + /* number zero is reserved; bitmap base is 1 */ + nvgpu_assert(addr < U64(U32_MAX)); + *offset = 1U + U32(addr); + nvgpu_bitmap_set(allocator->bitmap, U32(addr), len); + } else { + err = -ENOMEM; + } + nvgpu_mutex_release(&allocator->lock); + + return err; +} + +void gk20a_comptaglines_free(struct gk20a_comptag_allocator *allocator, + u32 offset, u32 len) +{ + /* number zero is reserved; bitmap base is 1 */ + u32 addr = offset - 1U; + + if (allocator->size == 0UL) { + return; + } + + WARN_ON(offset == 0U); + WARN_ON(addr > allocator->size); + WARN_ON((unsigned long)addr + (unsigned long)len > allocator->size); + + nvgpu_mutex_acquire(&allocator->lock); + nvgpu_bitmap_clear(allocator->bitmap, addr, len); + nvgpu_mutex_release(&allocator->lock); +} + +int gk20a_comptag_allocator_init(struct gk20a *g, + struct gk20a_comptag_allocator *allocator, + unsigned long size) +{ + nvgpu_mutex_init(&allocator->lock); + + /* + * 0th comptag is special and is never used. The base for this bitmap + * is 1, and its size is one less than the size of comptag store. + */ + size--; + allocator->bitmap = nvgpu_vzalloc(g, + BITS_TO_LONGS(size) * sizeof(long)); + if (allocator->bitmap == NULL) { + return -ENOMEM; + } + + allocator->size = size; + + return 0; +} + +void gk20a_comptag_allocator_destroy(struct gk20a *g, + struct gk20a_comptag_allocator *allocator) +{ + /* + * called only when exiting the driver (gk20a_remove, or unwinding the + * init stage); no users should be active, so taking the mutex is + * unnecessary here. + */ + allocator->size = 0; + nvgpu_vfree(g, allocator->bitmap); +} diff --git a/drivers/gpu/nvgpu/common/mm/dma.c b/drivers/gpu/nvgpu/common/mm/dma.c new file mode 100644 index 000000000..3a07b0574 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/dma.c @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem) +{ + return nvgpu_dma_alloc_flags(g, 0, size, mem); +} + +int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size, + struct nvgpu_mem *mem) +{ +#ifdef CONFIG_NVGPU_DGPU + if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) { + /* + * Force the no-kernel-mapping flag on because we don't support + * the lack of it for vidmem - the user should not care when + * using nvgpu_gmmu_alloc_map and it's vidmem, or if there's a + * difference, the user should use the flag explicitly anyway. + * + * Incoming flags are ignored here, since bits other than the + * no-kernel-mapping flag are ignored by the vidmem mapping + * functions anyway. + */ + int err = nvgpu_dma_alloc_flags_vid(g, + NVGPU_DMA_NO_KERNEL_MAPPING, + size, mem); + + if (err == 0) { + return 0; + } + + /* + * Fall back to sysmem (which may then also fail) in case + * vidmem is exhausted. + */ + } +#endif + + return nvgpu_dma_alloc_flags_sys(g, flags, size, mem); +} + +int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem) +{ + return nvgpu_dma_alloc_flags_sys(g, 0, size, mem); +} + +#ifdef CONFIG_NVGPU_DGPU +int nvgpu_dma_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem) +{ + return nvgpu_dma_alloc_flags_vid(g, + NVGPU_DMA_NO_KERNEL_MAPPING, size, mem); +} + +int nvgpu_dma_alloc_flags_vid(struct gk20a *g, unsigned long flags, + size_t size, struct nvgpu_mem *mem) +{ + return nvgpu_dma_alloc_flags_vid_at(g, flags, size, mem, 0); +} + +int nvgpu_dma_alloc_vid_at(struct gk20a *g, + size_t size, struct nvgpu_mem *mem, u64 at) +{ + return nvgpu_dma_alloc_flags_vid_at(g, + NVGPU_DMA_NO_KERNEL_MAPPING, size, mem, at); +} +#endif + +int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size, + struct nvgpu_mem *mem) +{ + int err = nvgpu_dma_alloc_map_flags(vm, 0, size, mem); + + if (err < 0) { + nvgpu_err(vm->mm->g, "Failed!"); + } + return err; +} + +int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags, + size_t size, struct nvgpu_mem *mem) +{ + int err = 0; + +#ifdef CONFIG_NVGPU_DGPU + if (!nvgpu_is_enabled(gk20a_from_vm(vm), NVGPU_MM_UNIFIED_MEMORY)) { + /* + * Force the no-kernel-mapping flag on because we don't support + * the lack of it for vidmem - the user should not care when + * using nvgpu_dma_alloc_map and it's vidmem, or if there's a + * difference, the user should use the flag explicitly anyway. + */ + err = nvgpu_dma_alloc_map_flags_vid(vm, + flags | NVGPU_DMA_NO_KERNEL_MAPPING, + size, mem); + + if (err == 0) { + return 0; + } + + /* + * Fall back to sysmem (which may then also fail) in case + * vidmem is exhausted. + */ + } +#endif + + err = nvgpu_dma_alloc_map_flags_sys(vm, flags, size, mem); + if (err < 0) { + nvgpu_err(vm->mm->g, "Failed!"); + } + return err; +} + +int nvgpu_dma_alloc_map_sys(struct vm_gk20a *vm, size_t size, + struct nvgpu_mem *mem) +{ + int err = 0; + + err = nvgpu_dma_alloc_map_flags_sys(vm, 0, size, mem); + if (err < 0) { + nvgpu_err(vm->mm->g, "Failed!"); + } + return err; +} + +int nvgpu_dma_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags, + size_t size, struct nvgpu_mem *mem) +{ + int err = nvgpu_dma_alloc_flags_sys(vm->mm->g, flags, size, mem); + + if (err != 0) { + return err; + } + + mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0, + gk20a_mem_flag_none, false, + mem->aperture); + if (mem->gpu_va == 0ULL) { + err = -ENOMEM; + goto fail_free; + } + + return 0; + +fail_free: + nvgpu_dma_free(vm->mm->g, mem); + return err; +} + +#ifdef CONFIG_NVGPU_DGPU +int nvgpu_dma_alloc_map_vid(struct vm_gk20a *vm, size_t size, + struct nvgpu_mem *mem) +{ + return nvgpu_dma_alloc_map_flags_vid(vm, + NVGPU_DMA_NO_KERNEL_MAPPING, size, mem); +} + +int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags, + size_t size, struct nvgpu_mem *mem) +{ + int err = nvgpu_dma_alloc_flags_vid(vm->mm->g, flags, size, mem); + + if (err != 0) { + return err; + } + + mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0, + gk20a_mem_flag_none, false, + mem->aperture); + if (mem->gpu_va == 0ULL) { + err = -ENOMEM; + goto fail_free; + } + + return 0; + +fail_free: + nvgpu_dma_free(vm->mm->g, mem); + return err; +} +#endif + +void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem) +{ + switch (mem->aperture) { + case APERTURE_SYSMEM: + nvgpu_dma_free_sys(g, mem); + break; +#ifdef CONFIG_NVGPU_DGPU + case APERTURE_VIDMEM: + nvgpu_dma_free_vid(g, mem); + break; +#endif + default: + /* like free() on "null" memory */ + break; + } +} + +void nvgpu_dma_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem) +{ + if (mem->gpu_va != 0ULL) { + nvgpu_gmmu_unmap(vm, mem, mem->gpu_va); + } + mem->gpu_va = 0; + + nvgpu_dma_free(vm->mm->g, mem); +} diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c new file mode 100644 index 000000000..4fe32b526 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c @@ -0,0 +1,1254 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) +#include +#endif + +#ifdef CONFIG_NVGPU_TRACE +#define nvgpu_gmmu_dbg(g, attrs, fmt, args...) \ + do { \ + if ((attrs)->debug) { \ + nvgpu_info(g, fmt, ##args); \ + } else { \ + nvgpu_log(g, gpu_dbg_map, fmt, ##args); \ + } \ +NVGPU_COV_WHITELIST(false_positive, NVGPU_MISRA(Rule, 14_4), "Bug 2623654") \ + } while (false) + +#define nvgpu_gmmu_dbg_v(g, attrs, fmt, args...) \ + do { \ + if ((attrs)->debug) { \ + nvgpu_info(g, fmt, ##args); \ + } else { \ + nvgpu_log(g, gpu_dbg_map_v, fmt, ##args); \ + } \ +NVGPU_COV_WHITELIST(false_positive, NVGPU_MISRA(Rule, 14_4), "Bug 2623654") \ + } while (false) +#endif + +static int pd_allocate(struct vm_gk20a *vm, + struct nvgpu_gmmu_pd *pd, + const struct gk20a_mmu_level *l, + struct nvgpu_gmmu_attrs *attrs); +static u32 pd_get_size(const struct gk20a_mmu_level *l, + struct nvgpu_gmmu_attrs *attrs); +/* + * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU + * VA will be allocated for you. If addr is non-zero then the buffer will be + * mapped at @addr. + */ +static u64 nvgpu_gmmu_map_core(struct vm_gk20a *vm, + struct nvgpu_mem *mem, + u64 addr, + u64 size, + u32 flags, + enum gk20a_mem_rw_flag rw_flag, + bool priv, + enum nvgpu_aperture aperture) +{ + struct gk20a *g = gk20a_from_vm(vm); + u64 vaddr; + + struct nvgpu_sgt *sgt = nvgpu_sgt_create_from_mem(g, mem); + + if (sgt == NULL) { + return 0; + } + + /* + * Later on, when we free this nvgpu_mem's GPU mapping, we are going to + * potentially have to free the GPU VA space. If the address passed in + * is non-zero then this API is not expected to manage the VA space and + * therefor we should not try and free it. But otherwise, if we do + * manage the VA alloc, we obviously must free it. + */ + if (addr != 0U) { + mem->free_gpu_va = false; + } else { + mem->free_gpu_va = true; + } + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + vaddr = g->ops.mm.gmmu.map(vm, addr, + sgt, /* sg list */ + 0, /* sg offset */ + size, + GMMU_PAGE_SIZE_KERNEL, + 0, /* kind */ + 0, /* ctag_offset */ + flags, rw_flag, + false, /* clear_ctags */ + false, /* sparse */ + priv, /* priv */ + NULL, /* mapping_batch handle */ + aperture); + nvgpu_mutex_release(&vm->update_gmmu_lock); + + nvgpu_sgt_free(g, sgt); + + if (vaddr == 0ULL) { + nvgpu_err(g, "failed to map buffer!"); + return 0; + } + + return vaddr; +} + +/* + * Map a nvgpu_mem into the GMMU. This is for kernel space to use. + */ +u64 nvgpu_gmmu_map(struct vm_gk20a *vm, + struct nvgpu_mem *mem, + u64 size, + u32 flags, + enum gk20a_mem_rw_flag rw_flag, + bool priv, + enum nvgpu_aperture aperture) +{ + return nvgpu_gmmu_map_core(vm, mem, 0, size, flags, rw_flag, priv, + aperture); +} + +/* + * Like nvgpu_gmmu_map() except this can work on a fixed address. + */ +u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, + struct nvgpu_mem *mem, + u64 addr, + u64 size, + u32 flags, + enum gk20a_mem_rw_flag rw_flag, + bool priv, + enum nvgpu_aperture aperture) +{ + return nvgpu_gmmu_map_core(vm, mem, addr, size, flags, rw_flag, priv, + aperture); +} + +void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 gpu_va) +{ + struct gk20a *g = gk20a_from_vm(vm); + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + g->ops.mm.gmmu.unmap(vm, + gpu_va, + mem->size, + GMMU_PAGE_SIZE_KERNEL, + mem->free_gpu_va, + gk20a_mem_flag_none, + false, + NULL); + + nvgpu_mutex_release(&vm->update_gmmu_lock); +} + +int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm) +{ + u32 pdb_size; + int err; + + /* + * Need this just for page size. Everything else can be ignored. Also + * note that we can just use pgsz 0 (i.e small pages) since the number + * of bits present in the top level PDE are the same for small/large + * page VMs. + */ + struct nvgpu_gmmu_attrs attrs = { + .pgsz = 0, + }; + + /* + * PDB size here must be at least 4096 bytes so that its address is 4K + * aligned. Although lower PDE tables can be aligned at 256B boundaries + * the PDB must be 4K aligned. + * + * Currently NVGPU_CPU_PAGE_SIZE is used, even when 64K, to work around an issue + * with the PDB TLB invalidate code not being pd_cache aware yet. + * + * Similarly, we can't use nvgpu_pd_alloc() here, because the top-level + * PD must have mem_offs be 0 for the invalidate code to work, so we + * can't use the PD cache. + */ + pdb_size = NVGPU_ALIGN(pd_get_size(&vm->mmu_levels[0], &attrs), NVGPU_CPU_PAGE_SIZE); + + err = nvgpu_pd_cache_alloc_direct(vm->mm->g, &vm->pdb, pdb_size); + if (err != 0) { + return err; + } + vm->pdb.pd_size = pdb_size; + + /* + * One nvgpu_mb() is done after all mapping operations. Don't need + * individual barriers for each PD write. + */ + vm->pdb.mem->skip_wmb = true; + + return err; +} + +/* + * Return the aligned length based on the page size in attrs. + */ +static u64 nvgpu_align_map_length(struct vm_gk20a *vm, u64 length, + struct nvgpu_gmmu_attrs *attrs) +{ + u64 page_size = vm->gmmu_page_sizes[attrs->pgsz]; + + return NVGPU_ALIGN(length, page_size); +} + +static u32 pd_entries(const struct gk20a_mmu_level *l, + struct nvgpu_gmmu_attrs *attrs) +{ + /* + * Number of entries in a PD is easy to compute from the number of bits + * used to index the page directory. That is simply 2 raised to the + * number of bits. + */ + + u32 bit; + + bit = nvgpu_safe_sub_u32(l->hi_bit[attrs->pgsz], + l->lo_bit[attrs->pgsz]); + bit = nvgpu_safe_add_u32(bit, 1U); + return BIT32(bit); +} + +/* + * Computes the size of a PD table (in bytes). + */ +static u32 pd_get_size(const struct gk20a_mmu_level *l, + struct nvgpu_gmmu_attrs *attrs) +{ + return nvgpu_safe_mult_u32(pd_entries(l, attrs), l->entry_size); +} + +/* + * Allocate a physically contiguous region big enough for a gmmu page table + * of the specified level and page size. The whole range is zeroed so that any + * accesses will fault until proper values are programmed. + */ +static int pd_allocate(struct vm_gk20a *vm, + struct nvgpu_gmmu_pd *pd, + const struct gk20a_mmu_level *l, + struct nvgpu_gmmu_attrs *attrs) +{ + int err; + + /* + * Same basic logic as in pd_allocate_children() except we (re)allocate + * the underlying DMA memory here. + */ + if (pd->mem != NULL) { + if (pd->pd_size >= pd_get_size(l, attrs)) { + return 0; + } + nvgpu_pd_free(vm, pd); + pd->mem = NULL; + } + + err = nvgpu_pd_alloc(vm, pd, pd_get_size(l, attrs)); + if (err != 0) { + nvgpu_info(vm->mm->g, "error allocating page directory!"); + return err; + } + + /* + * One nvgpu_mb() is done after all mapping operations. Don't need + * individual barriers for each PD write. + */ + pd->mem->skip_wmb = true; + + return 0; +} + +/* + * Compute what page directory index at the passed level the passed virtual + * address corresponds to. @attrs is necessary for determining the page size + * which is used to pick the right bit offsets for the GMMU level. + */ +static u32 pd_index(const struct gk20a_mmu_level *l, u64 virt, + struct nvgpu_gmmu_attrs *attrs) +{ + u64 pd_mask; + u32 pd_shift; + u64 tmp_index; + + nvgpu_assert(attrs->pgsz < ARRAY_SIZE(l->lo_bit)); + pd_shift = l->lo_bit[attrs->pgsz]; + + pd_mask = BIT64(nvgpu_safe_add_u64((u64)l->hi_bit[attrs->pgsz], 1ULL)); + pd_mask = nvgpu_safe_sub_u64(pd_mask, 1ULL); + + /* + * For convenience we don't bother computing the lower bound of the + * mask; it's easier to just shift it off. + */ + tmp_index = (virt & pd_mask) >> pd_shift; + nvgpu_assert(tmp_index <= U64(U32_MAX)); + return U32(tmp_index); +} + +static int pd_allocate_children(struct vm_gk20a *vm, + const struct gk20a_mmu_level *l, + struct nvgpu_gmmu_pd *pd, + struct nvgpu_gmmu_attrs *attrs) +{ + struct gk20a *g = gk20a_from_vm(vm); + + /* + * Check that we have already allocated enough pd_entries for this + * page directory. There's 4 possible cases: + * + * 1. This pd is new and therefor has no entries. + * 2. This pd does not have enough entries. + * 3. This pd has exactly the right number of entries. + * 4. This pd has more than enough entries. + * + * (3) and (4) are easy: just return. Case (1) is also straight forward: + * just allocate enough space for the number of pd_entries. + * + * Case (2) is rare but can happen. It occurs when we have a PD that has + * already been allocated for some VA range with a page size of 64K. If + * later on we free that VA range and then remap that VA range with a + * 4K page size map then we now need more pd space. As such we need to + * reallocate this pd entry array. + * + * Critically case (2) should _only_ ever happen when the PD is not in + * use. Obviously blowing away a bunch of previous PDEs would be + * catastrophic. But the buddy allocator logic prevents mixing page + * sizes within a single last level PD range. Therefor we should only + * ever see this once the entire PD range has been freed - otherwise + * there would be mixing (which, remember, is prevented by the buddy + * allocator). + */ + if (pd->num_entries >= pd_entries(l, attrs)) { + return 0; + } + + if (pd->entries != NULL) { + nvgpu_vfree(g, pd->entries); + } + + pd->num_entries = pd_entries(l, attrs); + pd->entries = nvgpu_vzalloc(g, + nvgpu_safe_mult_u64(sizeof(struct nvgpu_gmmu_pd), + (unsigned long)pd->num_entries)); + if (pd->entries == NULL) { + pd->num_entries = 0; + return -ENOMEM; + } + + return 0; +} + +/* + * If the next level has an update_entry function then we know + * that _this_ level points to PDEs (not PTEs). Thus we need to + * have a bunch of children PDs. + */ +static int nvgpu_set_pd_level_is_next_level_pde(struct vm_gk20a *vm, + struct nvgpu_gmmu_pd *pd, + struct nvgpu_gmmu_pd **next_pd_ptr, + const struct gk20a_mmu_level *l, + const struct gk20a_mmu_level *next_l, + u32 pd_idx, + struct nvgpu_gmmu_attrs *attrs) +{ + struct nvgpu_gmmu_pd *next_pd = *next_pd_ptr; + + if (next_l->update_entry != NULL) { + int err = 0; + + if (pd_allocate_children(vm, l, pd, attrs) != 0) { + return -ENOMEM; + } + + /* + * Get the next PD so that we know what to put in this + * current PD. If the next level is actually PTEs then + * we don't need this - we will just use the real + * physical target. + */ + next_pd = &pd->entries[pd_idx]; + + /* + * Allocate the backing memory for next_pd. + */ + err = pd_allocate(vm, next_pd, next_l, attrs); + if (err != 0) { + return err; + } + } + *next_pd_ptr = next_pd; + return 0; +} + +/* + * This function programs the GMMU based on two ranges: a physical range and a + * GPU virtual range. The virtual is mapped to the physical. Physical in this + * case can mean either a real physical sysmem address or a IO virtual address + * (for instance when a system has an IOMMU running). + * + * The rest of the parameters are for describing the actual mapping itself. + * + * This function recursively calls itself for handling PDEs. At the final level + * a PTE handler is called. The phys and virt ranges are adjusted for each + * recursion so that each invocation of this function need only worry about the + * range it is passed. + * + * phys_addr will always point to a contiguous range - the discontiguous nature + * of DMA buffers is taken care of at the layer above this. + */ +NVGPU_COV_WHITELIST_BLOCK_BEGIN(deviate, 1, NVGPU_MISRA(Rule, 17_2), "TID-278") +static int nvgpu_set_pd_level(struct vm_gk20a *vm, + struct nvgpu_gmmu_pd *pd, + u32 lvl, + u64 phys_addr, + u64 virt_addr, u64 length, + struct nvgpu_gmmu_attrs *attrs) +{ + int err = 0; + u64 pde_range; + struct gk20a *g = gk20a_from_vm(vm); + struct nvgpu_gmmu_pd *next_pd = NULL; + const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl]; + const struct gk20a_mmu_level *next_l = + &vm->mmu_levels[nvgpu_safe_add_u32(lvl, 1)]; + + /* + * 5 levels for Pascal+. For pre-pascal we only have 2. This puts + * offsets into the page table debugging code which makes it easier to + * see what level prints are from. + */ +#ifdef CONFIG_NVGPU_TRACE + static const char *lvl_debug[] = { + "", /* L=0 */ + " ", /* L=1 */ + " ", /* L=2 */ + " ", /* L=3 */ + " ", /* L=4 */ + }; + + nvgpu_gmmu_dbg_v(g, attrs, + "L=%d %sGPU virt %#-12llx +%#-9llx -> phys %#-12llx", + lvl, + lvl_debug[lvl], + virt_addr, + length, + phys_addr); +#endif /* CONFIG_NVGPU_TRACE */ + + /* This limits recursion */ + nvgpu_assert(lvl < g->ops.mm.gmmu.get_max_page_table_levels(g)); + + pde_range = 1ULL << (u64)l->lo_bit[attrs->pgsz]; + + /* + * Iterate across the mapping in chunks the size of this level's PDE. + * For each of those chunks program our level's PDE and then, if there's + * a next level, program the next level's PDEs/PTEs. + */ + while (length != 0ULL) { + u32 pd_idx = pd_index(l, virt_addr, attrs); + u64 chunk_size; + u64 target_addr; + u64 tmp_len; + + /* + * Truncate the pde_range when the virtual address does not + * start at a PDE boundary. + */ + nvgpu_assert(pde_range >= 1ULL); + tmp_len = nvgpu_safe_sub_u64(pde_range, + virt_addr & (pde_range - 1U)); + chunk_size = min(length, tmp_len); + + err = nvgpu_set_pd_level_is_next_level_pde(vm, pd, &next_pd, + l, next_l, pd_idx, attrs); + if (err != 0) { + return err; + } + + /* + * This is the address we want to program into the actual PDE/ + * PTE. When the next level is PDEs we need the target address + * to be the table of PDEs. When the next level is PTEs the + * target addr is the real physical address we are aiming for. + */ + target_addr = (next_pd != NULL) ? + nvgpu_pd_gpu_addr(g, next_pd) : phys_addr; + + l->update_entry(vm, l, + pd, pd_idx, + virt_addr, + target_addr, + attrs); + + if (next_l->update_entry != NULL) { + err = nvgpu_set_pd_level(vm, next_pd, + nvgpu_safe_add_u32(lvl, 1U), + phys_addr, + virt_addr, + chunk_size, + attrs); + + if (err != 0) { + return err; + } + } + + virt_addr = nvgpu_safe_add_u64(virt_addr, chunk_size); + + /* + * Only add to phys_addr if it's non-zero. A zero value implies + * we are unmapping as as a result we don't want to place + * non-zero phys addresses in the PTEs. A non-zero phys-addr + * would also confuse the lower level PTE programming code. + */ + if (phys_addr != 0ULL) { + phys_addr += chunk_size; + } + length -= chunk_size; + } + +#ifdef CONFIG_NVGPU_TRACE + nvgpu_gmmu_dbg_v(g, attrs, "L=%d %s%s", lvl, lvl_debug[lvl], + "ret!"); +#endif /* CONFIG_NVGPU_TRACE */ + + return 0; +} +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 17_2)) + +static int nvgpu_gmmu_do_update_page_table_sgl(struct vm_gk20a *vm, + struct nvgpu_sgt *sgt, void *sgl, + u64 *space_to_skip_ptr, + u64 *virt_addr_ptr, u64 *length_ptr, + u64 phys_addr_val, u64 ipa_addr_val, + u64 phys_length_val, u64 sgl_length_val, + struct nvgpu_gmmu_attrs *attrs) +{ + struct gk20a *g = gk20a_from_vm(vm); + u64 space_to_skip = *space_to_skip_ptr; + u64 virt_addr = *virt_addr_ptr; + u64 length = *length_ptr; + u64 phys_addr = phys_addr_val; + u64 ipa_addr = ipa_addr_val; + u64 phys_length = phys_length_val; + u64 sgl_length = sgl_length_val; + int err; + + while ((sgl_length > 0ULL) && (length > 0ULL)) { + /* + * Holds the size of the portion of SGL that is backed + * with physically contiguous memory. + */ + u64 sgl_contiguous_length; + /* + * Number of bytes of the SGL entry that is actually + * mapped after accounting for space_to_skip. + */ + u64 mapped_sgl_length; + + /* + * For virtualized OSes translate IPA to PA. Retrieve + * the size of the underlying physical memory chunk to + * which SGL has been mapped. + */ + phys_addr = nvgpu_sgt_ipa_to_pa(g, sgt, sgl, ipa_addr, + &phys_length); + phys_addr = nvgpu_safe_add_u64( + g->ops.mm.gmmu.gpu_phys_addr(g, attrs, + phys_addr), + space_to_skip); + + /* + * For virtualized OSes when phys_length is less than + * sgl_length check if space_to_skip exceeds phys_length + * if so skip this memory chunk + */ + if (space_to_skip >= phys_length) { + space_to_skip -= phys_length; + ipa_addr = nvgpu_safe_add_u64(ipa_addr, + phys_length); + sgl_length -= phys_length; + continue; + } + + sgl_contiguous_length = min(phys_length, sgl_length); + mapped_sgl_length = min(length, sgl_contiguous_length - + space_to_skip); + + err = nvgpu_set_pd_level(vm, &vm->pdb, + 0U, + phys_addr, + virt_addr, + mapped_sgl_length, + attrs); + if (err != 0) { + return err; + } + + /* + * Update the map pointer and the remaining length. + */ + virt_addr = nvgpu_safe_add_u64(virt_addr, + mapped_sgl_length); + length = nvgpu_safe_sub_u64(length, + mapped_sgl_length); + sgl_length = nvgpu_safe_sub_u64(sgl_length, + nvgpu_safe_add_u64(mapped_sgl_length, + space_to_skip)); + ipa_addr = nvgpu_safe_add_u64(ipa_addr, + nvgpu_safe_add_u64(mapped_sgl_length, + space_to_skip)); + + /* + * Space has been skipped so zero this for future + * chunks. + */ + space_to_skip = 0; + } + *space_to_skip_ptr = space_to_skip; + *virt_addr_ptr = virt_addr; + *length_ptr = length; + return 0; +} + +static int nvgpu_gmmu_do_update_page_table_no_iommu(struct vm_gk20a *vm, + struct nvgpu_sgt *sgt, u64 space_to_skip_val, + u64 virt_addr_val, u64 length_val, + struct nvgpu_gmmu_attrs *attrs) +{ + struct gk20a *g = gk20a_from_vm(vm); + void *sgl; + u64 space_to_skip = space_to_skip_val; + u64 virt_addr = virt_addr_val; + u64 length = length_val; + int err; + + nvgpu_sgt_for_each_sgl(sgl, sgt) { + /* + * ipa_addr == phys_addr for non virtualized OSes. + */ + u64 phys_addr = 0ULL; + u64 ipa_addr = 0ULL; + /* + * For non virtualized OSes SGL entries are contiguous in + * physical memory (sgl_length == phys_length). For virtualized + * OSes SGL entries are mapped to intermediate physical memory + * which may subsequently point to discontiguous physical + * memory. Therefore phys_length may not be equal to sgl_length. + */ + u64 phys_length = 0ULL; + u64 sgl_length = 0ULL; + + /* + * Cut out sgl ents for space_to_skip. + */ + if ((space_to_skip != 0ULL) && + (space_to_skip >= nvgpu_sgt_get_length(sgt, sgl))) { + space_to_skip -= nvgpu_sgt_get_length(sgt, sgl); + continue; + } + + /* + * IPA and PA have 1:1 mapping for non virtualized OSes. + */ + ipa_addr = nvgpu_sgt_get_ipa(g, sgt, sgl); + + /* + * For non-virtualized OSes SGL entries are contiguous and hence + * sgl_length == phys_length. For virtualized OSes the + * phys_length will be updated by nvgpu_sgt_ipa_to_pa. + */ + sgl_length = nvgpu_sgt_get_length(sgt, sgl); + phys_length = sgl_length; + + err = nvgpu_gmmu_do_update_page_table_sgl(vm, sgt, sgl, + &space_to_skip, &virt_addr, &length, + phys_addr, ipa_addr, + phys_length, sgl_length, + attrs); + if (err != 0) { + return err; + } + + if (length == 0ULL) { + break; + } + } + return 0; +} + +static int nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm, + struct nvgpu_sgt *sgt, + u64 space_to_skip, + u64 virt_addr, + u64 length, + struct nvgpu_gmmu_attrs *attrs) +{ + struct gk20a *g = gk20a_from_vm(vm); + bool is_iommuable, sgt_is_iommuable; + int err = 0; + + if (sgt == NULL) { + /* + * This is considered an unmap. Just pass in 0 as the physical + * address for the entire GPU range. + */ + nvgpu_assert(virt_addr != 0ULL); + + err = nvgpu_set_pd_level(vm, &vm->pdb, + 0U, + 0, + virt_addr, length, + attrs); + if (err != 0) { + nvgpu_err(g, "Failed!"); + } + return err; + } + + /* + * At this point we have a scatter-gather list pointing to some number + * of discontiguous chunks of memory. We must iterate over that list and + * generate a GMMU map call for each chunk. There are several + * possibilities: + * + * 1. IOMMU enabled, IOMMU addressing (typical iGPU) + * 2. IOMMU enabled, IOMMU bypass (NVLINK bypasses SMMU) + * 3. IOMMU disabled (less common but still supported) + * 4. VIDMEM + * + * For (1) we can assume that there's really only one actual SG chunk + * since the IOMMU gives us a single contiguous address range. However, + * for (2), (3) and (4) we have to actually go through each SG entry and + * map each chunk individually. + */ + is_iommuable = nvgpu_iommuable(g); + sgt_is_iommuable = nvgpu_sgt_iommuable(g, sgt); + if (nvgpu_aperture_is_sysmem(attrs->aperture) && + is_iommuable && sgt_is_iommuable) { + u64 io_addr = nvgpu_sgt_get_gpu_addr(g, sgt, sgt->sgl, attrs); + + io_addr = nvgpu_safe_add_u64(io_addr, space_to_skip); + + err = nvgpu_set_pd_level(vm, &vm->pdb, + 0U, + io_addr, + virt_addr, + length, + attrs); + + return err; + } + + /* + * Handle cases (2), (3), and (4): do the no-IOMMU mapping. In this case + * we really are mapping physical pages directly. + */ + err = nvgpu_gmmu_do_update_page_table_no_iommu(vm, sgt, space_to_skip, + virt_addr, length, attrs); + + if (err < 0) { + nvgpu_err(g, "Failed!"); + } + return err; +} + +/* + * This is the true top level GMMU mapping logic. This breaks down the incoming + * scatter gather table and does actual programming of GPU virtual address to + * physical* address. + * + * The update of each level of the page tables is farmed out to chip specific + * implementations. But the logic around that is generic to all chips. Every + * chip has some number of PDE levels and then a PTE level. + * + * Each chunk of the incoming SGL is sent to the chip specific implementation + * of page table update. + * + * [*] Note: the "physical" address may actually be an IO virtual address in the + * case of SMMU usage. + */ +static void nvgpu_gmmu_update_page_table_dbg_print(struct gk20a *g, + struct nvgpu_gmmu_attrs *attrs, struct vm_gk20a *vm, + struct nvgpu_sgt *sgt, u64 space_to_skip, + u64 virt_addr, u64 length, u32 page_size) +{ +#ifdef CONFIG_NVGPU_TRACE + nvgpu_gmmu_dbg(g, attrs, + "vm=%s " + "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx " + "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " + "kind=%#02x APT=%-6s %c%c%c%c%c", + vm->name, + (sgt != NULL) ? "MAP" : "UNMAP", + virt_addr, + length, + (sgt != NULL) ? + nvgpu_sgt_get_phys(g, sgt, sgt->sgl) : 0ULL, + space_to_skip, + page_size >> 10, + nvgpu_gmmu_perm_str(attrs->rw_flag), + attrs->kind_v, + nvgpu_aperture_str(attrs->aperture), + attrs->cacheable ? 'C' : '-', + attrs->sparse ? 'S' : '-', + attrs->priv ? 'P' : '-', + attrs->valid ? 'V' : '-', + attrs->platform_atomic ? 'A' : '-'); +#endif /* CONFIG_NVGPU_TRACE */ +} + +static int nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, + struct nvgpu_sgt *sgt, + u64 space_to_skip, + u64 virt_addr, + u64 length, + struct nvgpu_gmmu_attrs *attrs) +{ + struct gk20a *g = gk20a_from_vm(vm); + u32 page_size; + int err; + + /* note: here we need to map kernel to small, since the + * low-level mmu code assumes 0 is small and 1 is big pages */ + if (attrs->pgsz == GMMU_PAGE_SIZE_KERNEL) { + attrs->pgsz = GMMU_PAGE_SIZE_SMALL; + } + + page_size = vm->gmmu_page_sizes[attrs->pgsz]; + + if ((page_size == 0U) || + ((space_to_skip & (U64(page_size) - U64(1))) != 0ULL)) { + return -EINVAL; + } + + /* + * Update length to be aligned to the passed page size. + */ + length = nvgpu_align_map_length(vm, length, attrs); + + nvgpu_gmmu_update_page_table_dbg_print(g, attrs, vm, sgt, + space_to_skip, virt_addr, length, page_size); + + err = nvgpu_gmmu_do_update_page_table(vm, + sgt, + space_to_skip, + virt_addr, + length, + attrs); + if (err != 0) { + nvgpu_err(g, "nvgpu_gmmu_do_update_page_table returned error"); + } + + nvgpu_mb(); + +#ifdef CONFIG_NVGPU_TRACE + nvgpu_gmmu_dbg(g, attrs, "%-5s Done!", + (sgt != NULL) ? "MAP" : "UNMAP"); +#endif /* CONFIG_NVGPU_TRACE */ + + return err; +} + +/** + * nvgpu_gmmu_map_locked - Map a buffer into the GMMU + * + * This is for non-vGPU chips. It's part of the HAL at the moment but really + * should not be. Chip specific stuff is handled at the PTE/PDE programming + * layer. The rest of the logic is essentially generic for all chips. + * + * To call this function you must have locked the VM lock: vm->update_gmmu_lock. + * However, note: this function is not called directly. It's used through the + * mm.gmmu_lock() HAL. So before calling the mm.gmmu_lock() HAL make sure you + * have the update_gmmu_lock aquired. + */ +u64 nvgpu_gmmu_map_locked(struct vm_gk20a *vm, + u64 vaddr, + struct nvgpu_sgt *sgt, + u64 buffer_offset, + u64 size, + u32 pgsz_idx, + u8 kind_v, + u32 ctag_offset, + u32 flags, + enum gk20a_mem_rw_flag rw_flag, + bool clear_ctags, + bool sparse, + bool priv, + struct vm_gk20a_mapping_batch *batch, + enum nvgpu_aperture aperture) +{ + struct gk20a *g = gk20a_from_vm(vm); + int err = 0; + bool allocated = false; + struct nvgpu_gmmu_attrs attrs = { + .pgsz = pgsz_idx, + .kind_v = kind_v, + .cacheable = ((flags & NVGPU_VM_MAP_CACHEABLE) != 0U), + .rw_flag = rw_flag, + .sparse = sparse, + .priv = priv, + .valid = (flags & NVGPU_VM_MAP_UNMAPPED_PTE) == 0U, + .aperture = aperture, + .platform_atomic = (flags & NVGPU_VM_MAP_PLATFORM_ATOMIC) != 0U + }; +#ifdef CONFIG_NVGPU_COMPRESSION + u64 ctag_granularity = g->ops.fb.compression_page_size(g); + + attrs.ctag = (u64)ctag_offset * ctag_granularity; + /* + * We need to add the buffer_offset within compression_page_size so that + * the programmed ctagline gets increased at compression_page_size + * boundaries. + */ + if (attrs.ctag != 0ULL) { + nvgpu_assert(ctag_granularity >= 1ULL); + attrs.ctag = nvgpu_safe_add_u64(attrs.ctag, + buffer_offset & (ctag_granularity - U64(1))); + } + +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + attrs.cbc_comptagline_mode = + g->ops.fb.is_comptagline_mode_enabled != NULL ? + g->ops.fb.is_comptagline_mode_enabled(g) : true; +#endif +#endif + + attrs.l3_alloc = ((flags & NVGPU_VM_MAP_L3_ALLOC) != 0U); +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (nvgpu_is_errata_present(g, NVGPU_ERRATA_3288192) && + (attrs.l3_alloc)) { + nvgpu_gmmu_dbg_v(g, &attrs, + "L3 alloc is requested when L3 cache is not supported"); + attrs.l3_alloc = false; + } +#endif + + /* + * Only allocate a new GPU VA range if we haven't already been passed a + * GPU VA range. This facilitates fixed mappings. + */ + if (vaddr == 0ULL) { + vaddr = nvgpu_vm_alloc_va(vm, size, pgsz_idx); + if (vaddr == 0ULL) { + nvgpu_err(g, "failed to allocate va space"); + err = -ENOMEM; + goto fail_alloc; + } + allocated = true; + } + + err = nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset, + vaddr, size, &attrs); + if (err != 0) { + nvgpu_err(g, "failed to update ptes on map"); + goto fail_validate; + } + + if (batch == NULL) { + err = g->ops.fb.tlb_invalidate(g, vm->pdb.mem); + if (err != 0) { + nvgpu_err(g, "fb.tlb_invalidate() failed err=%d", err); + goto fail_validate; + } + } else { + batch->need_tlb_invalidate = true; + } + + return vaddr; + +fail_validate: + if (allocated) { + nvgpu_vm_free_va(vm, vaddr, pgsz_idx); + } +fail_alloc: + nvgpu_err(g, "%s: failed with err=%d", __func__, err); + return 0; +} + +void nvgpu_gmmu_unmap_locked(struct vm_gk20a *vm, + u64 vaddr, + u64 size, + u32 pgsz_idx, + bool va_allocated, + enum gk20a_mem_rw_flag rw_flag, + bool sparse, + struct vm_gk20a_mapping_batch *batch) +{ + int err = 0; + struct gk20a *g = gk20a_from_vm(vm); + struct nvgpu_gmmu_attrs attrs = { + .pgsz = pgsz_idx, + .kind_v = 0, +#ifdef CONFIG_NVGPU_COMPRESSION + .ctag = 0, +#endif + .cacheable = false, + .rw_flag = rw_flag, + .sparse = sparse, + .priv = false, + .valid = false, + .aperture = APERTURE_INVALID, + }; +#ifdef CONFIG_NVGPU_COMPRESSION +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + attrs.cbc_comptagline_mode = + g->ops.fb.is_comptagline_mode_enabled != NULL ? + g->ops.fb.is_comptagline_mode_enabled(g) : true; +#endif +#endif + if (va_allocated) { + nvgpu_vm_free_va(vm, vaddr, pgsz_idx); + } + + /* unmap here needs to know the page size we assigned at mapping */ + err = nvgpu_gmmu_update_page_table(vm, NULL, 0, + vaddr, size, &attrs); + if (err != 0) { + nvgpu_err(g, "failed to update gmmu ptes on unmap"); + } + + if (batch == NULL) { + if (g->ops.mm.cache.l2_flush(g, true) != 0) { + nvgpu_err(g, "gk20a_mm_l2_flush[1] failed"); + } + err = g->ops.fb.tlb_invalidate(g, vm->pdb.mem); + if (err != 0) { + nvgpu_err(g, "fb.tlb_invalidate() failed err=%d", err); + } + } else { + if (!batch->gpu_l2_flushed) { + if (g->ops.mm.cache.l2_flush(g, true) != 0) { + nvgpu_err(g, "gk20a_mm_l2_flush[2] failed"); + } + batch->gpu_l2_flushed = true; + } + batch->need_tlb_invalidate = true; + } +} + +u32 nvgpu_pte_words(struct gk20a *g) +{ + const struct gk20a_mmu_level *l = + g->ops.mm.gmmu.get_mmu_levels(g, SZ_64K); + const struct gk20a_mmu_level *next_l; + + /* + * Iterate to the bottom GMMU level - the PTE level. The levels array + * is always NULL terminated (by the update_entry function). + */ + do { + next_l = l + 1; + if (next_l->update_entry == NULL) { + break; + } + + l++; + } while (true); + + return l->entry_size / (u32)sizeof(u32); +} + +/* Walk last level of page table to find PTE */ +static int nvgpu_locate_pte_last_level(struct gk20a *g, + struct nvgpu_gmmu_pd *pd, + const struct gk20a_mmu_level *l, + struct nvgpu_gmmu_pd **pd_out, + u32 *pd_idx_out, u32 *pd_offs_out, + u32 *data, u32 pd_idx) +{ + u32 pte_base; + u32 pte_size; + u32 idx; + + if (pd->mem == NULL) { + return -EINVAL; + } + + /* + * Take into account the real offset into the nvgpu_mem + * since the PD may be located at an offset other than 0 + * (due to PD packing). + */ + pte_base = nvgpu_safe_add_u32( + pd->mem_offs / (u32)sizeof(u32), + nvgpu_pd_offset_from_index(l, pd_idx)); + pte_size = l->entry_size / (u32)sizeof(u32); + + if (data != NULL) { + for (idx = 0; idx < pte_size; idx++) { + u32 tmp_word = nvgpu_safe_add_u32(idx, + pte_base); + data[idx] = nvgpu_mem_rd32(g, pd->mem, + tmp_word); + } + } + + if (pd_out != NULL) { + *pd_out = pd; + } + + if (pd_idx_out != NULL) { + *pd_idx_out = pd_idx; + } + + if (pd_offs_out != NULL) { + *pd_offs_out = nvgpu_pd_offset_from_index(l, + pd_idx); + } + return 0; +} + +/* + * Recursively walk the pages tables to find the PTE. + */ +static int nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm, + struct nvgpu_gmmu_pd *pd, + u64 vaddr, u32 lvl, + struct nvgpu_gmmu_attrs *attrs, + u32 *data, + struct nvgpu_gmmu_pd **pd_out, u32 *pd_idx_out, + u32 *pd_offs_out) +{ + const struct gk20a_mmu_level *l; + const struct gk20a_mmu_level *next_l; + u32 pd_idx; + bool done = false; + + do { + l = &vm->mmu_levels[lvl]; + next_l = &vm->mmu_levels[nvgpu_safe_add_u32(lvl, 1)]; + pd_idx = pd_index(l, vaddr, attrs); + /* + * If this isn't the final level (i.e there's a valid next level) + * then find the next level PD and recurse. + */ + if (next_l->update_entry != NULL) { + struct nvgpu_gmmu_pd *pd_next = pd->entries + pd_idx; + + /* Invalid entry! */ + if (pd_next->mem == NULL) { + return -EINVAL; + } + + attrs->pgsz = l->get_pgsz(g, l, pd, pd_idx); + + if (attrs->pgsz >= GMMU_NR_PAGE_SIZES) { + return -EINVAL; + } + + pd = pd_next; + lvl = nvgpu_safe_add_u32(lvl, 1); + } else { + int err = nvgpu_locate_pte_last_level(g, pd, l, pd_out, + pd_idx_out, pd_offs_out, data, pd_idx); + if (err != 0) { + return err; + } + + done = true; + } + } while (!done); + + return 0; +} + +int nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte) +{ + int err = 0; + struct nvgpu_gmmu_attrs attrs = { + .pgsz = 0, + }; + + err = nvgpu_locate_pte(g, vm, &vm->pdb, + vaddr, 0U, &attrs, + pte, NULL, NULL, NULL); + if (err < 0) { + nvgpu_err(g, "Failed!"); + } + return err; +} + +int nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte) +{ + struct nvgpu_gmmu_pd *pd = NULL; + u32 pd_idx = 0; + u32 pd_offs = 0; + u32 pte_size, i; + int err; + struct nvgpu_gmmu_attrs attrs = { + .pgsz = 0, + }; +#ifdef CONFIG_NVGPU_TRACE + struct nvgpu_gmmu_attrs *attrs_ptr = &attrs; +#endif /* CONFIG_NVGPU_TRACE */ + + err = nvgpu_locate_pte(g, vm, &vm->pdb, + vaddr, 0U, &attrs, + NULL, &pd, &pd_idx, &pd_offs); + if (err != 0) { + return err; + } + + pte_size = nvgpu_pte_words(g); + + for (i = 0; i < pte_size; i++) { + nvgpu_pd_write(g, pd, (size_t)pd_offs + (size_t)i, pte[i]); + +#ifdef CONFIG_NVGPU_TRACE + pte_dbg(g, attrs_ptr, + "PTE: idx=%-4u (%d) 0x%08x", pd_idx, i, pte[i]); +#endif /* CONFIG_NVGPU_TRACE */ + } + + /* + * Ensures the pd_write()s are done. The pd_write() does not do this + * since generally there's lots of pd_write()s called one after another. + * There probably also needs to be a TLB invalidate as well but we leave + * that to the caller of this function. + */ + nvgpu_wmb(); + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache.c b/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache.c new file mode 100644 index 000000000..ff41343ff --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache.c @@ -0,0 +1,514 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pd_cache_priv.h" + +static inline struct nvgpu_pd_mem_entry * +nvgpu_pd_mem_entry_from_list_entry(struct nvgpu_list_node *node) +{ + return (struct nvgpu_pd_mem_entry *) + ((uintptr_t)node - + offsetof(struct nvgpu_pd_mem_entry, list_entry)); +}; + +static inline struct nvgpu_pd_mem_entry * +nvgpu_pd_mem_entry_from_tree_entry(struct nvgpu_rbtree_node *node) +{ + return (struct nvgpu_pd_mem_entry *) + ((uintptr_t)node - + offsetof(struct nvgpu_pd_mem_entry, tree_entry)); +}; + +static u32 nvgpu_pd_cache_nr(u32 bytes) +{ + unsigned long tmp = ilog2((unsigned long)bytes >> + ((unsigned long)NVGPU_PD_CACHE_MIN_SHIFT - 1UL)); + + nvgpu_assert(tmp <= U32_MAX); + return (u32)tmp; +} + +static u32 nvgpu_pd_cache_get_nr_entries(struct nvgpu_pd_mem_entry *pentry) +{ + BUG_ON(pentry->pd_size == 0); + + return (nvgpu_safe_cast_u64_to_u32(NVGPU_PD_CACHE_SIZE)) / + pentry->pd_size; +} + +/* + * Return the _physical_ address of a page directory. + */ +u64 nvgpu_pd_gpu_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd) +{ + u64 page_addr; + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) { + page_addr = nvgpu_mem_get_phys_addr(g, pd->mem); + } else { + page_addr = nvgpu_mem_get_addr(g, pd->mem); + } + + return nvgpu_safe_add_u64(page_addr, U64(pd->mem_offs)); +} + +u32 nvgpu_pd_offset_from_index(const struct gk20a_mmu_level *l, u32 pd_idx) +{ + return nvgpu_safe_mult_u32(pd_idx, l->entry_size) / U32(sizeof(u32)); +} + +void nvgpu_pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd, + size_t w, u32 data) +{ + u64 tmp_offset = nvgpu_safe_add_u64((pd->mem_offs / sizeof(u32)), w); + + nvgpu_mem_wr32(g, pd->mem, + nvgpu_safe_cast_u64_to_u32(tmp_offset), + data); +} + +int nvgpu_pd_cache_init(struct gk20a *g) +{ + struct nvgpu_pd_cache *cache; + u32 i; + + /* + * This gets called from finalize_poweron() so we need to make sure we + * don't reinit the pd_cache over and over. + */ + if (g->mm.pd_cache != NULL) { + return 0; + } + + cache = nvgpu_kzalloc(g, sizeof(*cache)); + if (cache == NULL) { + nvgpu_err(g, "Failed to alloc pd_cache!"); + return -ENOMEM; + } + + for (i = 0U; i < NVGPU_PD_CACHE_COUNT; i++) { + nvgpu_init_list_node(&cache->full[i]); + nvgpu_init_list_node(&cache->partial[i]); + } + + cache->mem_tree = NULL; + + nvgpu_mutex_init(&cache->lock); + + g->mm.pd_cache = cache; + + pd_dbg(g, "PD cache initialized!"); + + return 0; +} + +void nvgpu_pd_cache_fini(struct gk20a *g) +{ + u32 i; + struct nvgpu_pd_cache *cache = g->mm.pd_cache; + + if (cache == NULL) { + return; + } + + for (i = 0U; i < NVGPU_PD_CACHE_COUNT; i++) { + nvgpu_assert(nvgpu_list_empty(&cache->full[i])); + nvgpu_assert(nvgpu_list_empty(&cache->partial[i])); + } + + nvgpu_kfree(g, g->mm.pd_cache); + g->mm.pd_cache = NULL; +} + +/* + * This is the simple pass-through for greater than page or page sized PDs. + * + * Note: this does not need the cache lock since it does not modify any of the + * PD cache data structures. + */ +int nvgpu_pd_cache_alloc_direct(struct gk20a *g, + struct nvgpu_gmmu_pd *pd, u32 bytes) +{ + int err; + unsigned long flags = 0; + + pd_dbg(g, "PD-Alloc [D] %u bytes", bytes); + + pd->mem = nvgpu_kzalloc(g, sizeof(*pd->mem)); + if (pd->mem == NULL) { + nvgpu_err(g, "OOM allocating nvgpu_mem struct!"); + return -ENOMEM; + } + + /* + * If bytes == NVGPU_CPU_PAGE_SIZE then it's impossible to get a discontiguous DMA + * allocation. Some DMA implementations may, despite this fact, still + * use the contiguous pool for page sized allocations. As such only + * request explicitly contiguous allocs if the page directory is larger + * than the page size. Also, of course, this is all only revelant for + * GPUs not using an IOMMU. If there is an IOMMU DMA allocs are always + * going to be virtually contiguous and we don't have to force the + * underlying allocations to be physically contiguous as well. + */ + if (!nvgpu_iommuable(g) && (bytes > NVGPU_CPU_PAGE_SIZE)) { + flags = NVGPU_DMA_PHYSICALLY_ADDRESSED; + } + + err = nvgpu_dma_alloc_flags(g, flags, bytes, pd->mem); + if (err != 0) { + nvgpu_err(g, "OOM allocating page directory!"); + nvgpu_kfree(g, pd->mem); + return -ENOMEM; + } + + pd->cached = false; + pd->mem_offs = 0; + + return 0; +} + +/* + * Make a new nvgpu_pd_cache_entry and allocate a PD from it. Update the passed + * pd to reflect this allocation. + */ +static int nvgpu_pd_cache_alloc_new(struct gk20a *g, + struct nvgpu_pd_cache *cache, + struct nvgpu_gmmu_pd *pd, + u32 bytes) +{ + struct nvgpu_pd_mem_entry *pentry; + u64 flags = 0UL; + int32_t err; + + pd_dbg(g, "PD-Alloc [C] New: offs=0"); + + pentry = nvgpu_kzalloc(g, sizeof(*pentry)); + if (pentry == NULL) { + nvgpu_err(g, "OOM allocating pentry!"); + return -ENOMEM; + } + + if (!nvgpu_iommuable(g) && (NVGPU_PD_CACHE_SIZE > NVGPU_CPU_PAGE_SIZE)) { + flags = NVGPU_DMA_PHYSICALLY_ADDRESSED; + } + + err = nvgpu_dma_alloc_flags(g, flags, + NVGPU_PD_CACHE_SIZE, &pentry->mem); + if (err != 0) { + nvgpu_kfree(g, pentry); + + /* Not enough contiguous space, but a direct + * allocation may work + */ + if (err == -ENOMEM) { + return nvgpu_pd_cache_alloc_direct(g, pd, bytes); + } + nvgpu_err(g, "Unable to DMA alloc!"); + return -ENOMEM; + } + + pentry->pd_size = bytes; + nvgpu_list_add(&pentry->list_entry, + &cache->partial[nvgpu_pd_cache_nr(bytes)]); + + /* + * This allocates the very first PD table in the set of tables in this + * nvgpu_pd_mem_entry. + */ + nvgpu_set_bit(0U, pentry->alloc_map); + pentry->allocs = 1; + + /* + * Now update the nvgpu_gmmu_pd to reflect this allocation. + */ + pd->mem = &pentry->mem; + pd->mem_offs = 0; + pd->cached = true; + + pentry->tree_entry.key_start = (u64)(uintptr_t)&pentry->mem; + nvgpu_rbtree_insert(&pentry->tree_entry, &cache->mem_tree); + + return 0; +} + +static int nvgpu_pd_cache_alloc_from_partial(struct gk20a *g, + struct nvgpu_pd_cache *cache, + struct nvgpu_pd_mem_entry *pentry, + struct nvgpu_gmmu_pd *pd) +{ + u32 bit_offs; + u32 mem_offs; + u32 nr_bits = nvgpu_pd_cache_get_nr_entries(pentry); + + /* + * Find and allocate an open PD. + */ + bit_offs = nvgpu_safe_cast_u64_to_u32( + find_first_zero_bit(pentry->alloc_map, nr_bits)); + mem_offs = nvgpu_safe_mult_u32(bit_offs, pentry->pd_size); + + pd_dbg(g, "PD-Alloc [C] Partial: offs=%u nr_bits=%d src=0x%p", + bit_offs, nr_bits, pentry); + + /* Bit map full. Somethings wrong. */ + nvgpu_assert(bit_offs < nr_bits); + + nvgpu_set_bit(bit_offs, pentry->alloc_map); + pentry->allocs = nvgpu_safe_add_u32(pentry->allocs, 1U); + + /* + * First update the pd. + */ + pd->mem = &pentry->mem; + pd->mem_offs = mem_offs; + pd->cached = true; + + /* + * Now make sure the pentry is in the correct list (full vs partial). + */ + if (pentry->allocs >= nr_bits) { + pd_dbg(g, "Adding pentry to full list!"); + nvgpu_list_del(&pentry->list_entry); + nvgpu_list_add(&pentry->list_entry, + &cache->full[nvgpu_pd_cache_nr(pentry->pd_size)]); + } + + return 0; +} + +/* + * Get a partially full nvgpu_pd_mem_entry. Returns NULL if there is no partial + * nvgpu_pd_mem_entry's. + */ +static struct nvgpu_pd_mem_entry *nvgpu_pd_cache_get_partial( + struct nvgpu_pd_cache *cache, u32 bytes) +{ + struct nvgpu_list_node *list = + &cache->partial[nvgpu_pd_cache_nr(bytes)]; + + if (nvgpu_list_empty(list)) { + return NULL; + } + + return nvgpu_list_first_entry(list, + nvgpu_pd_mem_entry, + list_entry); +} + +/* + * Allocate memory from an nvgpu_mem for the page directory. + */ +static int nvgpu_pd_cache_alloc(struct gk20a *g, struct nvgpu_pd_cache *cache, + struct nvgpu_gmmu_pd *pd, u32 bytes) +{ + struct nvgpu_pd_mem_entry *pentry; + int err; + bool bytes_valid; + + pd_dbg(g, "PD-Alloc [C] %u bytes", bytes); + + bytes_valid = bytes >= NVGPU_PD_CACHE_MIN; + if (bytes_valid) { + bytes_valid = (bytes & nvgpu_safe_sub_u32(bytes, 1U)) == 0U; + } + if (!bytes_valid) { + pd_dbg(g, "PD-Alloc [C] Invalid (bytes=%u)!", bytes); + return -EINVAL; + } + + nvgpu_assert(bytes < NVGPU_PD_CACHE_SIZE); + + pentry = nvgpu_pd_cache_get_partial(cache, bytes); + if (pentry == NULL) { + err = nvgpu_pd_cache_alloc_new(g, cache, pd, bytes); + } else { + err = nvgpu_pd_cache_alloc_from_partial(g, cache, pentry, pd); + } + + if (err != 0) { + nvgpu_err(g, "PD-Alloc [C] Failed!"); + } + + return err; +} + +/* + * Allocate the DMA memory for a page directory. This handles the necessary PD + * cache logistics. Since on Parker and later GPUs some of the page directories + * are smaller than a page packing these PDs together saves a lot of memory. + */ +int nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes) +{ + struct gk20a *g = gk20a_from_vm(vm); + int err; + + /* + * Simple case: PD is bigger than a page so just do a regular DMA + * alloc. + */ + if (bytes >= NVGPU_PD_CACHE_SIZE) { + err = nvgpu_pd_cache_alloc_direct(g, pd, bytes); + if (err != 0) { + return err; + } + pd->pd_size = bytes; + + return 0; + } + + if (g->mm.pd_cache == NULL) { + nvgpu_do_assert(); + return -ENOMEM; + } + + nvgpu_mutex_acquire(&g->mm.pd_cache->lock); + err = nvgpu_pd_cache_alloc(g, g->mm.pd_cache, pd, bytes); + if (err == 0) { + pd->pd_size = bytes; + } + nvgpu_mutex_release(&g->mm.pd_cache->lock); + + return err; +} + +static void nvgpu_pd_cache_free_direct(struct gk20a *g, + struct nvgpu_gmmu_pd *pd) +{ + pd_dbg(g, "PD-Free [D] 0x%p", pd->mem); + + if (pd->mem == NULL) { + return; + } + + nvgpu_dma_free(g, pd->mem); + nvgpu_kfree(g, pd->mem); + pd->mem = NULL; +} + +static void nvgpu_pd_cache_free_mem_entry(struct gk20a *g, + struct nvgpu_pd_cache *cache, + struct nvgpu_pd_mem_entry *pentry) +{ + nvgpu_dma_free(g, &pentry->mem); + nvgpu_list_del(&pentry->list_entry); + nvgpu_rbtree_unlink(&pentry->tree_entry, &cache->mem_tree); + nvgpu_kfree(g, pentry); +} + +static void nvgpu_pd_cache_do_free(struct gk20a *g, + struct nvgpu_pd_cache *cache, + struct nvgpu_pd_mem_entry *pentry, + struct nvgpu_gmmu_pd *pd) +{ + u32 bit = pd->mem_offs / pentry->pd_size; + + /* Mark entry as free. */ + nvgpu_clear_bit(bit, pentry->alloc_map); + pentry->allocs = nvgpu_safe_sub_u32(pentry->allocs, 1U); + + if (pentry->allocs > 0U) { + /* + * Partially full still. If it was already on the partial list + * this just re-adds it. + * + * Since the memory used for the entries is still mapped, if + * igpu make sure the entries are invalidated so that the hw + * doesn't acccidentally try to prefetch non-existent fb memory. + * + * As IOMMU prefetching of invalid pd entries cause the IOMMU fault, + * fill them with zero. + */ + if ((nvgpu_iommuable(g)) && + (NVGPU_PD_CACHE_SIZE > NVGPU_CPU_SMALL_PAGE_SIZE) && + (pd->mem->cpu_va != NULL)) { + (void)memset(((u8 *)pd->mem->cpu_va + pd->mem_offs), 0, + pd->pd_size); + } + + nvgpu_list_del(&pentry->list_entry); + nvgpu_list_add(&pentry->list_entry, + &cache->partial[nvgpu_pd_cache_nr(pentry->pd_size)]); + } else { + /* Empty now so free it. */ + nvgpu_pd_cache_free_mem_entry(g, cache, pentry); + } + + pd->mem = NULL; +} + +static struct nvgpu_pd_mem_entry *nvgpu_pd_cache_look_up( + struct nvgpu_pd_cache *cache, + struct nvgpu_gmmu_pd *pd) +{ + struct nvgpu_rbtree_node *node = NULL; + + nvgpu_rbtree_search((u64)(uintptr_t)pd->mem, &node, + cache->mem_tree); + if (node == NULL) { + return NULL; + } + + return nvgpu_pd_mem_entry_from_tree_entry(node); +} + +static void nvgpu_pd_cache_free(struct gk20a *g, struct nvgpu_pd_cache *cache, + struct nvgpu_gmmu_pd *pd) +{ + struct nvgpu_pd_mem_entry *pentry; + + pd_dbg(g, "PD-Free [C] 0x%p", pd->mem); + + pentry = nvgpu_pd_cache_look_up(cache, pd); + if (pentry == NULL) { + nvgpu_do_assert_print(g, "Attempting to free non-existent pd"); + return; + } + + nvgpu_pd_cache_do_free(g, cache, pentry, pd); +} + +void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd) +{ + struct gk20a *g = gk20a_from_vm(vm); + + /* + * Simple case: just DMA free. + */ + if (!pd->cached) { + return nvgpu_pd_cache_free_direct(g, pd); + } + + nvgpu_mutex_acquire(&g->mm.pd_cache->lock); + nvgpu_pd_cache_free(g, g->mm.pd_cache, pd); + nvgpu_mutex_release(&g->mm.pd_cache->lock); +} diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache_priv.h b/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache_priv.h new file mode 100644 index 000000000..c17f15987 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache_priv.h @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GMMU_PD_CACHE_PRIV_H +#define NVGPU_GMMU_PD_CACHE_PRIV_H + +/** + * @file + * + * Page directory cache private interface + * -------------------------------------- + * + * To save memory when using sub-page sized PD levels in Pascal and beyond a way + * of packing PD tables together is necessary. If a PD table only requires 1024 + * bytes, then it is possible to have 4 of these PDs in one page. This is even + * more pronounced for 256 byte PD tables. + * + * This also matters for page directories on any chip when using a 64K page + * granule. Having 4K PDs packed into a 64K page saves a bunch of memory. Even + * more so for the 256B PDs on Pascal+. + * + * The pd cache is basially just a slab allocator. Each instance of the nvgpu + * driver makes one of these structs: + * + * struct nvgpu_pd_cache { + * struct nvgpu_list_node full[NVGPU_PD_CACHE_COUNT]; + * struct nvgpu_list_node partial[NVGPU_PD_CACHE_COUNT]; + * + * struct nvgpu_rbtree_node *mem_tree; + * }; + * + * There are two sets of lists, the full and the partial. The full lists contain + * pages of memory for which all the memory in that page is in use. The partial + * lists contain partially full pages of memory which can be used for more PD + * allocations. There a couple of assumptions here: + * + * 1. PDs greater than or equal to the page size bypass the pd cache. + * 2. PDs are always power of 2 and greater than %NVGPU_PD_CACHE_MIN bytes. + * + * There are NVGPU_PD_CACHE_COUNT full lists and the same number of partial + * lists. For a 4Kb page NVGPU_PD_CACHE_COUNT is 4. This is enough space for + * 256, 512, 1024, and 2048 byte PDs. + * + * nvgpu_pd_alloc() will allocate a PD for the GMMU. It will check if the PD + * size is page size or larger and choose the correct allocation scheme - either + * from the PD cache or directly. Similarly nvgpu_pd_free() will free a PD + * allocated by nvgpu_pd_alloc(). + */ + +#include +#include +#include +#include +#include +#include +#include + +#define pd_dbg(g, fmt, args...) nvgpu_log(g, gpu_dbg_pd_cache, fmt, ##args) + +/** + * Minimum size of a cache. The number of different caches in the nvgpu_pd_cache + * structure is of course depending on this. + */ +#define NVGPU_PD_CACHE_MIN 256UL +/** + * MIN_SHIFT is the right number of bits to shift to determine + * which list to use in the array of lists. + */ +#define NVGPU_PD_CACHE_MIN_SHIFT 9UL + +/** + * Maximum PD cache count. This specifies the number of slabs; since each + * slab represents a PO2 increase in size a count of 8 leads to: + * + * NVGPU_PD_CACHE_SIZE = 256B * 2^8 = 64KB + * + * For Linux with 4K pages, if the cache size is larger than 4KB then we + * need to allocate from CMA. This puts a lot of pressure on the CMA space. + * For kernel with a PAGE_SIZE of 64K this isn't the case, so allow the + * PD cache size to be 64K if PAGE_SIZE > 4K (i.e PAGE_SIZE == 64K). + */ +#ifdef __KERNEL__ +# if NVGPU_CPU_PAGE_SIZE > 4096 +# define NVGPU_PD_CACHE_COUNT 8UL +# else +# define NVGPU_PD_CACHE_COUNT 4UL +# endif +#else +#define NVGPU_PD_CACHE_COUNT 8UL +#endif + +#define NVGPU_PD_CACHE_SIZE (NVGPU_PD_CACHE_MIN * \ + (1UL << NVGPU_PD_CACHE_COUNT)) + +/** + * This structure describes a slab within the slab allocator. + */ +struct nvgpu_pd_mem_entry { + /** + * Structure for storing the PD memory information. + */ + struct nvgpu_mem mem; + + /** + * Size of the page directories (not the mem). + */ + u32 pd_size; + /** + * alloc_map is a bitmap showing which PDs have been allocated. + * The size of mem will always + * be one page. pd_size will always be a power of 2. + */ + DECLARE_BITMAP(alloc_map, NVGPU_PD_CACHE_SIZE / NVGPU_PD_CACHE_MIN); + /** + * Total number of allocations in this PD. + */ + u32 allocs; + + /** + * This is a list node within the list. The list node will be either from + * a full or partial list in #nvgpu_pd_cache. + */ + struct nvgpu_list_node list_entry; + /** + * This is a tree node within the node. + */ + struct nvgpu_rbtree_node tree_entry; +}; + +/** + * A cache for allocating PD memory. This enables smaller PDs to be packed + * into single pages. + */ +struct nvgpu_pd_cache { + /** + * Array of lists of full nvgpu_pd_mem_entries and partially full + * nvgpu_pd_mem_entries. + */ + struct nvgpu_list_node full[NVGPU_PD_CACHE_COUNT]; + /** + * Array of lists of empty nvgpu_pd_mem_entries and partially + * empty nvgpu_pd_mem_entries. + */ + struct nvgpu_list_node partial[NVGPU_PD_CACHE_COUNT]; + + /** + * Tree of all allocated struct nvgpu_mem's for fast look up. + */ + struct nvgpu_rbtree_node *mem_tree; + + /** + * All access to the cache much be locked. This protects the lists and + * the rb tree. + */ + struct nvgpu_mutex lock; +}; + +#endif /* NVGPU_GMMU_PD_CACHE_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/pte.c b/drivers/gpu/nvgpu/common/mm/gmmu/pte.c new file mode 100644 index 000000000..870e137ff --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/gmmu/pte.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +u32 nvgpu_gmmu_default_big_page_size(void) +{ + return U32(SZ_64K); +} + +/* + * MSS NVLINK HW settings are in force_snoop mode. + * This will force all the GPU mappings to be coherent. + * By default the mem aperture is set to sysmem_non_coherent and will use L2 + * atomics. + * Change target pte aperture to sysmem_coherent if mem attribute requests for + * platform atomics to use rmw atomic capability. + * + */ +u32 nvgpu_gmmu_aperture_mask(struct gk20a *g, + enum nvgpu_aperture mem_ap, + bool platform_atomic_attr, + u32 sysmem_mask, + u32 sysmem_coh_mask, + u32 vidmem_mask) +{ + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC) && + platform_atomic_attr) { + mem_ap = APERTURE_SYSMEM_COH; + } + + return nvgpu_aperture_mask_raw(g, mem_ap, + sysmem_mask, + sysmem_coh_mask, + vidmem_mask); +} + +static char *map_attrs_to_str(char *dest, struct nvgpu_gmmu_attrs *attrs) +{ + dest[0] = attrs->cacheable ? 'C' : '-'; + dest[1] = attrs->sparse ? 'S' : '-'; + dest[2] = attrs->priv ? 'P' : '-'; + dest[3] = attrs->valid ? 'V' : '-'; + dest[4] = attrs->platform_atomic ? 'A' : '-'; + dest[5] = '\0'; + + return dest; +} + +void nvgpu_pte_dbg_print(struct gk20a *g, + struct nvgpu_gmmu_attrs *attrs, + const char *vm_name, u32 pd_idx, u32 mmu_level_entry_size, + u64 virt_addr, u64 phys_addr, u32 page_size, u32 *pte_w) +{ + char attrs_str[6]; + char ctag_str[32] = "\0"; + const char *aperture_str = nvgpu_aperture_str(attrs->aperture); + const char *perm_str = nvgpu_gmmu_perm_str(attrs->rw_flag); +#ifdef CONFIG_NVGPU_COMPRESSION + u64 ctag_tmp = attrs->ctag; + u32 str_len = 0U; + u32 ctag_num = 0U; + + /* + * attrs->ctag is incremented to count current page size as well. + * Subtract to get this page's ctag line number. + */ + if (ctag_tmp != 0ULL) { + ctag_tmp = nvgpu_safe_sub_u64(ctag_tmp, page_size); + } + + ctag_num = nvgpu_safe_cast_u64_to_u32(ctag_tmp / + g->ops.fb.compression_page_size(g)); + (void)strcpy(ctag_str, "ctag=0x\0"); + str_len = (u32)strlen(ctag_str); + (void)nvgpu_strnadd_u32(ctag_str + str_len, ctag_num, + nvgpu_safe_sub_u32(31U, str_len), 16U); +#endif + (void)map_attrs_to_str(attrs_str, attrs); + pte_dbg(g, attrs, + "vm=%s " + "PTE: i=%-4u size=%-2u | " + "GPU %#-12llx phys %#-12llx " + "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %-5s " + "%s " + "[0x%08x, 0x%08x]", + vm_name, + pd_idx, mmu_level_entry_size, + virt_addr, phys_addr, + page_size >> 10, + perm_str, + attrs->kind_v, + aperture_str, + attrs_str, + ctag_str, + pte_w[1], pte_w[0]); +} diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c new file mode 100644 index 000000000..db8abb017 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/mm.c @@ -0,0 +1,710 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int nvgpu_mm_suspend(struct gk20a *g) +{ + int err; + + nvgpu_log_info(g, "MM suspend running..."); + +#ifdef CONFIG_NVGPU_DGPU + nvgpu_vidmem_thread_pause_sync(&g->mm); +#endif + +#ifdef CONFIG_NVGPU_COMPRESSION + g->ops.mm.cache.cbc_clean(g); +#endif + err = g->ops.mm.cache.l2_flush(g, false); + if (err != 0) { + nvgpu_err(g, "l2_flush failed"); + return err; + } + + if (g->ops.fb.intr.disable != NULL) { + g->ops.fb.intr.disable(g); + } + + if (g->ops.mm.mmu_fault.disable_hw != NULL) { + g->ops.mm.mmu_fault.disable_hw(g); + } + + nvgpu_log_info(g, "MM suspend done!"); + + return err; +} + +u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block) +{ + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) { + return nvgpu_mem_get_phys_addr(g, inst_block); + } else { + return nvgpu_mem_get_addr(g, inst_block); + } +} + +u32 nvgpu_inst_block_ptr(struct gk20a *g, struct nvgpu_mem *inst_block) +{ + u64 addr = nvgpu_inst_block_addr(g, inst_block) >> + g->ops.ramin.base_shift(); + + nvgpu_assert(u64_hi32(addr) == 0U); + return u64_lo32(addr); +} + +void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block) +{ + if (nvgpu_mem_is_valid(inst_block)) { + nvgpu_dma_free(g, inst_block); + } +} + +int nvgpu_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block) +{ + int err; + + nvgpu_log_fn(g, " "); + + err = nvgpu_dma_alloc(g, g->ops.ramin.alloc_size(), inst_block); + if (err != 0) { + nvgpu_err(g, "%s: memory allocation failed", __func__); + return err; + } + + nvgpu_log_fn(g, "done"); + return 0; +} + +static int nvgpu_alloc_sysmem_flush(struct gk20a *g) +{ + return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush); +} + +#ifdef CONFIG_NVGPU_DGPU +static void nvgpu_remove_mm_ce_support(struct mm_gk20a *mm) +{ + struct gk20a *g = gk20a_from_mm(mm); + + if (mm->vidmem.ce_ctx_id != NVGPU_CE_INVAL_CTX_ID) { + nvgpu_ce_app_delete_context(g, mm->vidmem.ce_ctx_id); + } + mm->vidmem.ce_ctx_id = NVGPU_CE_INVAL_CTX_ID; + + nvgpu_vm_put(mm->ce.vm); +} +#endif + +static void nvgpu_remove_mm_support(struct mm_gk20a *mm) +{ + struct gk20a *g = gk20a_from_mm(mm); + + nvgpu_dma_free(g, &mm->mmu_wr_mem); + nvgpu_dma_free(g, &mm->mmu_rd_mem); + +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (nvgpu_fb_vab_teardown_hal(g) != 0) { + nvgpu_err(g, "failed to teardown VAB"); + } + +#endif + + if (g->ops.mm.mmu_fault.info_mem_destroy != NULL) { + g->ops.mm.mmu_fault.info_mem_destroy(g); + } + + if (g->ops.mm.remove_bar2_vm != NULL) { + g->ops.mm.remove_bar2_vm(g); + } + + nvgpu_free_inst_block(g, &mm->bar1.inst_block); + nvgpu_vm_put(mm->bar1.vm); + + nvgpu_free_inst_block(g, &mm->pmu.inst_block); + nvgpu_free_inst_block(g, &mm->hwpm.inst_block); + nvgpu_vm_put(mm->pmu.vm); + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_VM)) { + nvgpu_free_inst_block(g, &mm->sec2.inst_block); + nvgpu_vm_put(mm->sec2.vm); + } + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_GSP_VM)) { + nvgpu_free_inst_block(g, &mm->gsp.inst_block); + nvgpu_vm_put(mm->gsp.vm); + } + + if (g->has_cde) { + nvgpu_vm_put(mm->cde.vm); + } + +#ifdef CONFIG_NVGPU_SW_SEMAPHORE + nvgpu_semaphore_sea_destroy(g); +#endif +#ifdef CONFIG_NVGPU_DGPU + nvgpu_vidmem_destroy(g); + + if (nvgpu_is_errata_present(g, NVGPU_ERRATA_INIT_PDB_CACHE)) { + g->ops.ramin.deinit_pdb_cache_errata(g); + } +#endif + nvgpu_pd_cache_fini(g); +} + +/* pmu vm, share channel_vm interfaces */ +static int nvgpu_init_system_vm(struct mm_gk20a *mm) +{ + int err; + struct gk20a *g = gk20a_from_mm(mm); + struct nvgpu_mem *inst_block = &mm->pmu.inst_block; + u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size(); + u64 low_hole, aperture_size; + + /* + * For some reason the maxwell PMU code is dependent on the large page + * size. No reason AFAICT for this. Probably a bug somewhere. + */ + if (nvgpu_is_errata_present(g, NVGPU_ERRATA_MM_FORCE_128K_PMU_VM)) { + big_page_size = nvgpu_safe_cast_u64_to_u32(SZ_128K); + } + + /* + * No user region - so we will pass that as zero sized. + */ + low_hole = SZ_4K * 16UL; + aperture_size = GK20A_PMU_VA_SIZE; + + mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; + nvgpu_log_info(g, "pmu vm size = 0x%x", mm->pmu.aperture_size); + + mm->pmu.vm = nvgpu_vm_init(g, big_page_size, + low_hole, + 0ULL, + nvgpu_safe_sub_u64(aperture_size, low_hole), + 0ULL, + true, + false, + false, + "system"); + if (mm->pmu.vm == NULL) { + return -ENOMEM; + } + + err = nvgpu_alloc_inst_block(g, inst_block); + if (err != 0) { + goto clean_up_vm; + } + g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size); + + return 0; + +clean_up_vm: + nvgpu_vm_put(mm->pmu.vm); + return err; +} + +static int nvgpu_init_hwpm(struct mm_gk20a *mm) +{ + int err; + struct gk20a *g = gk20a_from_mm(mm); + struct nvgpu_mem *inst_block = &mm->hwpm.inst_block; + + err = nvgpu_alloc_inst_block(g, inst_block); + if (err != 0) { + return err; + } + g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0); + + return 0; +} + +static int nvgpu_init_cde_vm(struct mm_gk20a *mm) +{ + struct gk20a *g = gk20a_from_mm(mm); + u64 user_size, kernel_size; + u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size(); + + g->ops.mm.get_default_va_sizes(NULL, &user_size, &kernel_size); + + mm->cde.vm = nvgpu_vm_init(g, big_page_size, + U64(big_page_size) << U64(10), + nvgpu_safe_sub_u64(user_size, + U64(big_page_size) << U64(10)), + kernel_size, + 0ULL, + false, false, false, "cde"); + if (mm->cde.vm == NULL) { + return -ENOMEM; + } + return 0; +} + +static int nvgpu_init_ce_vm(struct mm_gk20a *mm) +{ + struct gk20a *g = gk20a_from_mm(mm); + u64 user_size, kernel_size; + u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size(); + + g->ops.mm.get_default_va_sizes(NULL, &user_size, &kernel_size); + + mm->ce.vm = nvgpu_vm_init(g, big_page_size, + U64(big_page_size) << U64(10), + nvgpu_safe_sub_u64(user_size, + U64(big_page_size) << U64(10)), + kernel_size, + 0ULL, + false, false, false, "ce"); + if (mm->ce.vm == NULL) { + return -ENOMEM; + } + return 0; +} + +static int nvgpu_init_mmu_debug(struct mm_gk20a *mm) +{ + struct gk20a *g = gk20a_from_mm(mm); + int err; + + if (!nvgpu_mem_is_valid(&mm->mmu_wr_mem)) { + err = nvgpu_dma_alloc_sys(g, SZ_4K, &mm->mmu_wr_mem); + if (err != 0) { + goto err; + } + } + + if (!nvgpu_mem_is_valid(&mm->mmu_rd_mem)) { + err = nvgpu_dma_alloc_sys(g, SZ_4K, &mm->mmu_rd_mem); + if (err != 0) { + goto err_free_wr_mem; + } + } + return 0; + + err_free_wr_mem: + nvgpu_dma_free(g, &mm->mmu_wr_mem); + err: + return -ENOMEM; +} + +#if defined(CONFIG_NVGPU_DGPU) +void nvgpu_init_mm_ce_context(struct gk20a *g) +{ + if (g->mm.vidmem.size > 0U && + (g->mm.vidmem.ce_ctx_id == NVGPU_CE_INVAL_CTX_ID)) { + g->mm.vidmem.ce_ctx_id = + nvgpu_ce_app_create_context(g, + nvgpu_engine_get_fast_ce_runlist_id(g), + -1, + -1); + + if (g->mm.vidmem.ce_ctx_id == NVGPU_CE_INVAL_CTX_ID) { + nvgpu_err(g, + "Failed to allocate CE context for vidmem page clearing support"); + } + } +} +#endif + +static int nvgpu_init_bar1_vm(struct mm_gk20a *mm) +{ + int err; + struct gk20a *g = gk20a_from_mm(mm); + struct nvgpu_mem *inst_block = &mm->bar1.inst_block; + u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size(); + + mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; + nvgpu_log_info(g, "bar1 vm size = 0x%x", mm->bar1.aperture_size); + mm->bar1.vm = nvgpu_vm_init(g, + big_page_size, + SZ_64K, + 0ULL, + nvgpu_safe_sub_u64(mm->bar1.aperture_size, SZ_64K), + 0ULL, + true, false, false, + "bar1"); + if (mm->bar1.vm == NULL) { + return -ENOMEM; + } + + err = nvgpu_alloc_inst_block(g, inst_block); + if (err != 0) { + goto clean_up_vm; + } + g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size); + + return 0; + +clean_up_vm: + nvgpu_vm_put(mm->bar1.vm); + return err; +} + +static int nvgpu_init_engine_ucode_vm(struct gk20a *g, + struct engine_ucode *ucode, const char *address_space_name) +{ + int err; + struct nvgpu_mem *inst_block = &ucode->inst_block; + u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size(); + + /* ucode aperture size is 32MB */ + ucode->aperture_size = U32(32) << 20U; + nvgpu_log_info(g, "%s vm size = 0x%x", address_space_name, + ucode->aperture_size); + + ucode->vm = nvgpu_vm_init(g, big_page_size, SZ_4K, + 0ULL, nvgpu_safe_sub_u64(ucode->aperture_size, SZ_4K), 0ULL, + false, false, false, + address_space_name); + if (ucode->vm == NULL) { + return -ENOMEM; + } + + /* allocate instance mem for engine ucode */ + err = nvgpu_alloc_inst_block(g, inst_block); + if (err != 0) { + goto clean_up_va; + } + + g->ops.mm.init_inst_block(inst_block, ucode->vm, big_page_size); + + return 0; + +clean_up_va: + nvgpu_vm_put(ucode->vm); + return err; +} + +static int nvgpu_init_mm_setup_bar(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + int err; + + err = nvgpu_init_bar1_vm(mm); + if (err != 0) { + return err; + } + + if (g->ops.mm.init_bar2_vm != NULL) { + err = g->ops.mm.init_bar2_vm(g); + if (err != 0) { + return err; + } + } + err = nvgpu_init_system_vm(mm); + if (err != 0) { + return err; + } + + err = nvgpu_init_hwpm(mm); + if (err != 0) { + return err; + } + + return err; +} + +static int nvgpu_init_mm_setup_vm(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + int err; + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_VM)) { + err = nvgpu_init_engine_ucode_vm(g, &mm->sec2, "sec2"); + if (err != 0) { + return err; + } + } + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_GSP_VM)) { + err = nvgpu_init_engine_ucode_vm(g, &mm->gsp, "gsp"); + if (err != 0) { + return err; + } + } + + if (g->has_cde) { + err = nvgpu_init_cde_vm(mm); + if (err != 0) { + return err; + } + } + + err = nvgpu_init_ce_vm(mm); + if (err != 0) { + return err; + } + + return err; +} + +static int nvgpu_init_mm_components(struct gk20a *g) +{ + int err = 0; + struct mm_gk20a *mm = &g->mm; + + err = nvgpu_alloc_sysmem_flush(g); + if (err != 0) { + return err; + } + + err = nvgpu_init_mm_setup_bar(g); + if (err != 0) { + return err; + } + + err = nvgpu_init_mm_setup_vm(g); + if (err != 0) { + return err; + } + + err = nvgpu_init_mmu_debug(mm); + if (err != 0) { + return err; + } + + /* + * Some chips support replayable MMU faults. For such chips make sure + * SW is initialized. + */ + if (g->ops.mm.mmu_fault.setup_sw != NULL) { + err = g->ops.mm.mmu_fault.setup_sw(g); + if (err != 0) { + return err; + } + } + + return 0; +} + +static int nvgpu_init_mm_setup_sw(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + int err = 0; + + if (mm->sw_ready) { + nvgpu_log_info(g, "skip init"); + return 0; + } + + mm->g = g; + nvgpu_mutex_init(&mm->l2_op_lock); + + /*TBD: make channel vm size configurable */ + g->ops.mm.get_default_va_sizes(NULL, &mm->channel.user_size, + &mm->channel.kernel_size); + + nvgpu_log_info(g, "channel vm size: user %uMB kernel %uMB", + nvgpu_safe_cast_u64_to_u32(mm->channel.user_size >> U64(20)), + nvgpu_safe_cast_u64_to_u32(mm->channel.kernel_size >> U64(20))); + +#ifdef CONFIG_NVGPU_DGPU + mm->vidmem.ce_ctx_id = NVGPU_CE_INVAL_CTX_ID; + + nvgpu_init_pramin(mm); + + err = nvgpu_vidmem_init(mm); + if (err != 0) { + return err; + } + + /* + * this requires fixed allocations in vidmem which must be + * allocated before all other buffers + */ + + if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY) && + nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { + err = nvgpu_acr_alloc_blob_prerequisite(g, g->acr, 0); + if (err != 0) { + return err; + } + } +#endif + + err = nvgpu_init_mm_components(g); + if (err != 0) { + return err; + } + + if ((g->ops.fb.ecc.init != NULL) && !g->ecc.initialized) { + err = g->ops.fb.ecc.init(g); + if (err != 0) { + return err; + } + } + +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (nvgpu_fb_vab_init_hal(g) != 0) { + nvgpu_err(g, "failed to init VAB"); + } +#endif + + mm->remove_support = nvgpu_remove_mm_support; +#ifdef CONFIG_NVGPU_DGPU + mm->remove_ce_support = nvgpu_remove_mm_ce_support; +#endif + + mm->sw_ready = true; + + return 0; +} + +#ifdef CONFIG_NVGPU_DGPU +static int nvgpu_init_mm_pdb_cache_errata(struct gk20a *g) +{ + int err; + + if (nvgpu_is_errata_present(g, NVGPU_ERRATA_INIT_PDB_CACHE)) { + err = g->ops.ramin.init_pdb_cache_errata(g); + if (err != 0) { + return err; + } + } + + if (nvgpu_is_errata_present(g, NVGPU_ERRATA_FB_PDB_CACHE)) { + err = g->ops.fb.apply_pdb_cache_errata(g); + if (err != 0) { + return err; + } + } + + return 0; +} +#endif + +/* + * Called through the HAL to handle vGPU: the vGPU doesn't have HW to initialize + * here. + */ +int nvgpu_mm_setup_hw(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + int err; + + nvgpu_log_fn(g, " "); + + if (g->ops.fb.set_mmu_page_size != NULL) { + g->ops.fb.set_mmu_page_size(g); + } + +#ifdef CONFIG_NVGPU_COMPRESSION + if (g->ops.fb.set_use_full_comp_tag_line != NULL) { + mm->use_full_comp_tag_line = + g->ops.fb.set_use_full_comp_tag_line(g); + } +#endif + + g->ops.fb.init_hw(g); + + if (g->ops.bus.bar1_bind != NULL) { + err = g->ops.bus.bar1_bind(g, &mm->bar1.inst_block); + if (err != 0) { + return err; + } + } + + if (g->ops.bus.bar2_bind != NULL) { + err = g->ops.bus.bar2_bind(g, &mm->bar2.inst_block); + if (err != 0) { + return err; + } + } + + if ((g->ops.mm.cache.fb_flush(g) != 0) || + (g->ops.mm.cache.fb_flush(g) != 0)) { + return -EBUSY; + } + + if (g->ops.mm.mmu_fault.setup_hw != NULL) { + g->ops.mm.mmu_fault.setup_hw(g); + } + + nvgpu_log_fn(g, "done"); + return 0; +} + +int nvgpu_init_mm_support(struct gk20a *g) +{ + int err; + +#ifdef CONFIG_NVGPU_DGPU + err = nvgpu_init_mm_pdb_cache_errata(g); + if (err != 0) { + return err; + } +#endif + + err = nvgpu_init_mm_setup_sw(g); + if (err != 0) { + return err; + } + + if (g->ops.mm.setup_hw != NULL) { + err = g->ops.mm.setup_hw(g); + } + + return err; +} + +u32 nvgpu_mm_get_default_big_page_size(struct gk20a *g) +{ + u32 big_page_size; + + big_page_size = g->ops.mm.gmmu.get_default_big_page_size(); + + if (g->mm.disable_bigpage) { + big_page_size = 0; + } + + return big_page_size; +} + +u32 nvgpu_mm_get_available_big_page_sizes(struct gk20a *g) +{ + u32 available_big_page_sizes = 0; + + if (g->mm.disable_bigpage) { + return available_big_page_sizes; + } + + available_big_page_sizes = g->ops.mm.gmmu.get_default_big_page_size(); + if (g->ops.mm.gmmu.get_big_page_sizes != NULL) { + available_big_page_sizes |= g->ops.mm.gmmu.get_big_page_sizes(); + } + + return available_big_page_sizes; +} diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c new file mode 100644 index 000000000..8a2df3f7a --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c @@ -0,0 +1,418 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Make sure to use the right coherency aperture if you use this function! This + * will not add any checks. If you want to simply use the default coherency then + * use nvgpu_aperture_mask(). + */ +u32 nvgpu_aperture_mask_raw(struct gk20a *g, enum nvgpu_aperture aperture, + u32 sysmem_mask, u32 sysmem_coh_mask, + u32 vidmem_mask) +{ + u32 ret_mask = 0; + + if ((aperture == APERTURE_INVALID) || (aperture >= APERTURE_MAX_ENUM)) { + nvgpu_do_assert_print(g, "Bad aperture"); + return 0; + } + + /* + * Some iGPUs treat sysmem (i.e SoC DRAM) as vidmem. In these cases the + * "sysmem" aperture should really be translated to VIDMEM. + */ + if (!nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)) { + aperture = APERTURE_VIDMEM; + } + + switch (aperture) { + case APERTURE_SYSMEM_COH: + ret_mask = sysmem_coh_mask; + break; + case APERTURE_SYSMEM: + ret_mask = sysmem_mask; + break; + case APERTURE_VIDMEM: + ret_mask = vidmem_mask; + break; + default: + nvgpu_do_assert_print(g, "Bad aperture"); + ret_mask = 0; + break; + } + return ret_mask; +} + +u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, + u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask) +{ + enum nvgpu_aperture ap = mem->aperture; + + return nvgpu_aperture_mask_raw(g, ap, + sysmem_mask, + sysmem_coh_mask, + vidmem_mask); +} + +bool nvgpu_aperture_is_sysmem(enum nvgpu_aperture ap) +{ + return (ap == APERTURE_SYSMEM_COH) || (ap == APERTURE_SYSMEM); +} + +bool nvgpu_mem_is_sysmem(struct nvgpu_mem *mem) +{ + return nvgpu_aperture_is_sysmem(mem->aperture); +} + +u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys) +{ + /* ensure it is not vidmem allocation */ +#ifdef CONFIG_NVGPU_DGPU + WARN_ON(nvgpu_addr_is_vidmem_page_alloc(phys)); +#endif + + if (nvgpu_iommuable(g) && (g->ops.mm.gmmu.get_iommu_bit != NULL)) { + return phys | (1ULL << g->ops.mm.gmmu.get_iommu_bit(g)); + } + + return phys; +} + +u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u64 w) +{ + u32 data = 0; + + if (mem->aperture == APERTURE_SYSMEM) { + u32 *ptr = mem->cpu_va; + +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + WARN_ON(ptr == NULL); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + data = ptr[w]; + } +#ifdef CONFIG_NVGPU_DGPU + else if (mem->aperture == APERTURE_VIDMEM) { + nvgpu_pramin_rd_n(g, mem, w * (u64)sizeof(u32), + (u64)sizeof(u32), &data); + } +#endif + else { + nvgpu_do_assert_print(g, "Accessing unallocated nvgpu_mem"); + } + + return data; +} + +u64 nvgpu_mem_rd32_pair(struct gk20a *g, struct nvgpu_mem *mem, u32 lo, u32 hi) +{ + u64 lo_data = U64(nvgpu_mem_rd32(g, mem, lo)); + u64 hi_data = U64(nvgpu_mem_rd32(g, mem, hi)); + + return lo_data | (hi_data << 32ULL); +} + +u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u64 offset) +{ +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + WARN_ON((offset & 3ULL) != 0ULL); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + return nvgpu_mem_rd32(g, mem, offset / (u64)sizeof(u32)); +} + +void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem, + u64 offset, void *dest, u64 size) +{ +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 10_3), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + WARN_ON((offset & 3ULL) != 0ULL); + WARN_ON((size & 3ULL) != 0ULL); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + + if (mem->aperture == APERTURE_SYSMEM) { + u8 *src = (u8 *)mem->cpu_va + offset; + +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + WARN_ON(mem->cpu_va == NULL); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + nvgpu_memcpy((u8 *)dest, src, size); + } +#ifdef CONFIG_NVGPU_DGPU + else if (mem->aperture == APERTURE_VIDMEM) { + nvgpu_pramin_rd_n(g, mem, offset, size, dest); + } +#endif + else { + nvgpu_do_assert_print(g, "Accessing unallocated nvgpu_mem"); + } +} + +void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u64 w, u32 data) +{ + if (mem->aperture == APERTURE_SYSMEM) { + u32 *ptr = mem->cpu_va; + +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + WARN_ON(ptr == NULL); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + ptr[w] = data; + } +#ifdef CONFIG_NVGPU_DGPU + else if (mem->aperture == APERTURE_VIDMEM) { + nvgpu_pramin_wr_n(g, mem, w * (u64)sizeof(u32), + (u64)sizeof(u32), &data); + + if (!mem->skip_wmb) { + nvgpu_wmb(); + } + } +#endif + else { + nvgpu_do_assert_print(g, "Accessing unallocated nvgpu_mem"); + } +} + +void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u64 offset, u32 data) +{ +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + WARN_ON((offset & 3ULL) != 0ULL); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + nvgpu_mem_wr32(g, mem, offset / (u64)sizeof(u32), data); +} + +void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u64 offset, + void *src, u64 size) +{ +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 10_3), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + WARN_ON((offset & 3ULL) != 0ULL); + WARN_ON((size & 3ULL) != 0ULL); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + + if (mem->aperture == APERTURE_SYSMEM) { + u8 *dest = (u8 *)mem->cpu_va + offset; + +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + WARN_ON(mem->cpu_va == NULL); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + nvgpu_memcpy(dest, (u8 *)src, size); + } +#ifdef CONFIG_NVGPU_DGPU + else if (mem->aperture == APERTURE_VIDMEM) { + nvgpu_pramin_wr_n(g, mem, offset, size, src); + if (!mem->skip_wmb) { + nvgpu_wmb(); + } + } +#endif + else { + nvgpu_do_assert_print(g, "Accessing unallocated nvgpu_mem"); + } +} + +void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u64 offset, + u32 c, u64 size) +{ +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 3, NVGPU_MISRA(Rule, 10_3), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 3, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 3, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + WARN_ON((offset & 3ULL) != 0ULL); + WARN_ON((size & 3ULL) != 0ULL); + WARN_ON((c & ~0xffU) != 0U); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + + c &= 0xffU; + + if (mem->aperture == APERTURE_SYSMEM) { + u8 *dest = (u8 *)mem->cpu_va + offset; + +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + WARN_ON(mem->cpu_va == NULL); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + (void) memset(dest, (int)c, size); + } +#ifdef CONFIG_NVGPU_DGPU + else if (mem->aperture == APERTURE_VIDMEM) { + u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24); + + nvgpu_pramin_memset(g, mem, offset, size, repeat_value); + if (!mem->skip_wmb) { + nvgpu_wmb(); + } + } +#endif + else { + nvgpu_do_assert_print(g, "Accessing unallocated nvgpu_mem"); + } +} + +static void *nvgpu_mem_phys_sgl_next(void *sgl) +{ + struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl; + + return (void *)(void *)sgl_impl->next; +} + +/* + * Provided for compatibility - the DMA address is the same as the phys address + * for these nvgpu_mem's. + */ +static u64 nvgpu_mem_phys_sgl_dma(void *sgl) +{ + struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl; + + return sgl_impl->phys; +} + +static u64 nvgpu_mem_phys_sgl_phys(struct gk20a *g, void *sgl) +{ + struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl; + + return sgl_impl->phys; +} + +static u64 nvgpu_mem_phys_sgl_ipa_to_pa(struct gk20a *g, + void *sgl, u64 ipa, u64 *pa_len) +{ + return ipa; +} + +static u64 nvgpu_mem_phys_sgl_length(void *sgl) +{ + struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl; + + return sgl_impl->length; +} + +static u64 nvgpu_mem_phys_sgl_gpu_addr(struct gk20a *g, void *sgl, + struct nvgpu_gmmu_attrs *attrs) +{ + struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl; + + return sgl_impl->phys; +} + +static void nvgpu_mem_phys_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt) +{ + /* + * No-op here. The free is handled by freeing the nvgpu_mem itself. + */ +} + +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 8_7), "Bug 2823817") +static const struct nvgpu_sgt_ops nvgpu_mem_phys_ops = { +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 8_7)) + .sgl_next = nvgpu_mem_phys_sgl_next, + .sgl_dma = nvgpu_mem_phys_sgl_dma, + .sgl_phys = nvgpu_mem_phys_sgl_phys, + .sgl_ipa = nvgpu_mem_phys_sgl_phys, + .sgl_ipa_to_pa = nvgpu_mem_phys_sgl_ipa_to_pa, + .sgl_length = nvgpu_mem_phys_sgl_length, + .sgl_gpu_addr = nvgpu_mem_phys_sgl_gpu_addr, + .sgt_free = nvgpu_mem_phys_sgt_free, + + /* + * The physical nvgpu_mems are never IOMMU'able by definition. + */ + .sgt_iommuable = NULL +}; + +int nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest, + u64 src_phys, u64 nr_pages) +{ + int ret = 0; + struct nvgpu_sgt *sgt; + struct nvgpu_mem_sgl *sgl; + + /* + * Do the two operations that can fail before touching *dest. + */ + sgt = nvgpu_kzalloc(g, sizeof(*sgt)); + sgl = nvgpu_kzalloc(g, sizeof(*sgl)); + if ((sgt == NULL) || (sgl == NULL)) { + nvgpu_kfree(g, sgt); + nvgpu_kfree(g, sgl); + return -ENOMEM; + } + + (void) memset(dest, 0, sizeof(*dest)); + + dest->aperture = APERTURE_SYSMEM; + dest->size = nvgpu_safe_mult_u64(nr_pages, + (u64)NVGPU_CPU_PAGE_SIZE); + dest->aligned_size = dest->size; + dest->mem_flags = NVGPU_MEM_FLAG_NO_DMA; + dest->phys_sgt = sgt; + + sgl->next = NULL; + sgl->phys = src_phys; + sgl->length = dest->size; + sgt->sgl = (void *)sgl; + sgt->ops = &nvgpu_mem_phys_ops; + + return ret; +} diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_sgt.c b/drivers/gpu/nvgpu/common/mm/nvgpu_sgt.c new file mode 100644 index 000000000..d21775ecc --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_sgt.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl) +{ + return sgt->ops->sgl_next(sgl); +} + +u64 nvgpu_sgt_get_phys(struct gk20a *g, struct nvgpu_sgt *sgt, void *sgl) +{ + return sgt->ops->sgl_phys(g, sgl); +} + +u64 nvgpu_sgt_get_ipa(struct gk20a *g, struct nvgpu_sgt *sgt, void *sgl) +{ + return sgt->ops->sgl_ipa(g, sgl); +} + +u64 nvgpu_sgt_ipa_to_pa(struct gk20a *g, struct nvgpu_sgt *sgt, + void *sgl, u64 ipa, u64 *pa_len) +{ + return sgt->ops->sgl_ipa_to_pa(g, sgl, ipa, pa_len); +} + +u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, void *sgl) +{ + return sgt->ops->sgl_dma(sgl); +} + +u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl) +{ + return sgt->ops->sgl_length(sgl); +} + +u64 nvgpu_sgt_get_gpu_addr(struct gk20a *g, struct nvgpu_sgt *sgt, void *sgl, + struct nvgpu_gmmu_attrs *attrs) +{ + return sgt->ops->sgl_gpu_addr(g, sgl, attrs); +} + +bool nvgpu_sgt_iommuable(struct gk20a *g, struct nvgpu_sgt *sgt) +{ + if (sgt->ops->sgt_iommuable != NULL) { + return sgt->ops->sgt_iommuable(g, sgt); + } + return false; +} + +void nvgpu_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt) +{ + if ((sgt != NULL) && (sgt->ops->sgt_free != NULL)) { + sgt->ops->sgt_free(g, sgt); + } +} + +/* + * Determine alignment for a passed buffer. Necessary since the buffer may + * appear big enough to map with large pages but the SGL may have chunks that + * are not aligned on a 64/128kB large page boundary. There's also the + * possibility chunks are odd sizes which will necessitate small page mappings + * to correctly glue them together into a contiguous virtual mapping. + */ +u64 nvgpu_sgt_alignment(struct gk20a *g, struct nvgpu_sgt *sgt) +{ + u64 align = 0, chunk_align = 0; + void *sgl; + + /* + * If this SGT is iommuable and we want to use the IOMMU address then + * the SGT's first entry has the IOMMU address. We will align on this + * and double check length of buffer later. Also, since there's an + * IOMMU we know that this DMA address is contiguous. + */ + if (nvgpu_iommuable(g) && + nvgpu_sgt_iommuable(g, sgt) && + (nvgpu_sgt_get_dma(sgt, sgt->sgl) != 0ULL)) { + return 1ULL << (nvgpu_ffs(nvgpu_sgt_get_dma(sgt, sgt->sgl)) + - 1UL); + } + + /* + * Otherwise the buffer is not iommuable (VIDMEM, for example) or we are + * bypassing the IOMMU and need to use the underlying physical entries + * of the SGT. + */ + nvgpu_sgt_for_each_sgl(sgl, sgt) { + chunk_align = 1ULL << nvgpu_safe_sub_u64(nvgpu_ffs( + nvgpu_sgt_get_phys(g, sgt, sgl) | + nvgpu_sgt_get_length(sgt, sgl)), 1UL); + + if (align != 0ULL) { + align = min(align, chunk_align); + } else { + align = chunk_align; + } + } + + return align; +} + +struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g, + struct nvgpu_mem *mem) +{ + if ((mem->mem_flags & NVGPU_MEM_FLAG_NO_DMA) != 0U) { + return mem->phys_sgt; + } + + return nvgpu_sgt_os_create_from_mem(g, mem); +} diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c new file mode 100644 index 000000000..c79b45e49 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/vidmem.c @@ -0,0 +1,621 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * This is expected to be called from the shutdown path (or the error path in + * the vidmem init code). As such we do not expect new vidmem frees to be + * enqueued. + */ +void nvgpu_vidmem_destroy(struct gk20a *g) +{ + struct nvgpu_timeout timeout; + int err; + + if (g->ops.fb.get_vidmem_size == NULL) { + return; + } + + err = nvgpu_timeout_init(g, &timeout, 100, NVGPU_TIMER_RETRY_TIMER); + if (err != 0) { + nvgpu_err(g, "nvgpu_timeout_init() failed err=%d", err); + } + + /* + * Ensure that the thread runs one last time to flush anything in the + * queue. + */ + nvgpu_cond_signal_interruptible(&g->mm.vidmem.clearing_thread_cond); + + /* + * Wait for at most 1 second before just continuing on. It doesn't make + * sense to hang the system over some potential memory leaks. + */ + do { + bool empty; + + nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); + empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head); + nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); + + if (empty) { + break; + } + + nvgpu_msleep(10); + } while (nvgpu_timeout_expired(&timeout) == 0); + + /* + * Kill the vidmem clearing thread now. This will wake the thread up + * automatically and cause the wait_interruptible condition trigger. + */ + nvgpu_thread_stop(&g->mm.vidmem.clearing_thread); + + if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) { + nvgpu_alloc_destroy(&g->mm.vidmem.allocator); + } + + if (nvgpu_alloc_initialized(&g->mm.vidmem.bootstrap_allocator)) { + nvgpu_alloc_destroy(&g->mm.vidmem.bootstrap_allocator); + } +} + +static int nvgpu_vidmem_clear_fence_wait(struct gk20a *g, + struct nvgpu_fence_type *fence_out) +{ + struct nvgpu_timeout timeout; + bool done; + int err; + + err = nvgpu_timeout_init(g, &timeout, + nvgpu_get_poll_timeout(g), + NVGPU_TIMER_CPU_TIMER); + if (err != 0) { + nvgpu_err(g, "nvgpu_timeout_init() failed err=%d", err); + return err; + } + + do { + err = nvgpu_fence_wait(g, fence_out, + nvgpu_get_poll_timeout(g)); + if (err != -ERESTARTSYS) { + done = true; + } else if (nvgpu_timeout_expired(&timeout) != 0) { + done = true; + } else { + done = false; + } + } while (!done); + + nvgpu_fence_put(fence_out); + if (err != 0) { + nvgpu_err(g, + "fence wait failed for CE execute ops"); + return err; + } + + return 0; +} + +static int nvgpu_vidmem_do_clear_all(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + struct nvgpu_fence_type *fence_out = NULL; + int err = 0; + + if (mm->vidmem.ce_ctx_id == NVGPU_CE_INVAL_CTX_ID) { + return -EINVAL; + } + + vidmem_dbg(g, "Clearing all VIDMEM:"); + +#ifdef CONFIG_NVGPU_DGPU + err = nvgpu_ce_execute_ops(g, + mm->vidmem.ce_ctx_id, + 0, + mm->vidmem.base, + mm->vidmem.bootstrap_base - mm->vidmem.base, + 0x00000000, + NVGPU_CE_DST_LOCATION_LOCAL_FB, + NVGPU_CE_MEMSET, + 0, + &fence_out); + if (err != 0) { + nvgpu_err(g, + "Failed to clear vidmem : %d", err); + return err; + } +#else + /* fail due to lack of ce app support */ + return -ENOSYS; +#endif + + if (fence_out != NULL) { + err = nvgpu_vidmem_clear_fence_wait(g, fence_out); + if (err != 0) { + return err; + } + } + + mm->vidmem.cleared = true; + + vidmem_dbg(g, "Done!"); + + return 0; +} + +void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm) +{ + /* + * On the first increment of the pause_count (0 -> 1) take the pause + * lock and prevent the vidmem clearing thread from processing work + * items. + * + * Otherwise the increment is all that's needed - it's essentially a + * ref-count for the number of pause() calls. + * + * The sync component is implemented by waiting for the lock to be + * released by the clearing thread in case the thread is currently + * processing work items. + */ + if (nvgpu_atomic_inc_return(&mm->vidmem.pause_count) == 1) { + nvgpu_mutex_acquire(&mm->vidmem.clearing_thread_lock); + } + + vidmem_dbg(mm->g, "Clearing thread paused; new count=%d", + nvgpu_atomic_read(&mm->vidmem.pause_count)); +} + +void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm) +{ + vidmem_dbg(mm->g, "Unpausing clearing thread; current count=%d", + nvgpu_atomic_read(&mm->vidmem.pause_count)); + + /* + * And on the last decrement (1 -> 0) release the pause lock and let + * the vidmem clearing thread continue. + */ + if (nvgpu_atomic_dec_return(&mm->vidmem.pause_count) == 0) { + nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock); + vidmem_dbg(mm->g, " > Clearing thread really unpaused!"); + } +} + +int nvgpu_vidmem_clear_list_enqueue(struct gk20a *g, struct nvgpu_mem *mem) +{ + struct mm_gk20a *mm = &g->mm; + + /* + * Crap. Can't enqueue new vidmem bufs! CE may be gone! + * + * However, an errant app can hold a vidmem dma_buf FD open past when + * the nvgpu driver has exited. Thus when the FD does get closed + * eventually the dma_buf release function will try to call the vidmem + * free function which will attempt to enqueue the vidmem into the + * vidmem clearing thread. + */ + if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { + return -ENOSYS; + } + + nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); + nvgpu_list_add_tail(&mem->clear_list_entry, + &mm->vidmem.clear_list_head); + nvgpu_atomic64_add((long)mem->aligned_size, &mm->vidmem.bytes_pending); + nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); + + nvgpu_cond_signal_interruptible(&mm->vidmem.clearing_thread_cond); + + return 0; +} + +static struct nvgpu_mem *nvgpu_vidmem_clear_list_dequeue(struct mm_gk20a *mm) +{ + struct nvgpu_mem *mem = NULL; + + nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex); + if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) { + mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head, + nvgpu_mem, clear_list_entry); + nvgpu_list_del(&mem->clear_list_entry); + } + nvgpu_mutex_release(&mm->vidmem.clear_list_mutex); + + return mem; +} + +static void nvgpu_vidmem_clear_pending_allocs(struct mm_gk20a *mm) +{ + struct gk20a *g = mm->g; + struct nvgpu_mem *mem; + int err; + + vidmem_dbg(g, "Running VIDMEM clearing thread:"); + + while ((mem = nvgpu_vidmem_clear_list_dequeue(mm)) != NULL) { + err = nvgpu_vidmem_clear(g, mem); + if (err != 0) { + nvgpu_err(g, "nvgpu_vidmem_clear() failed err=%d", err); + } + + WARN_ON(nvgpu_atomic64_sub_return((long)mem->aligned_size, + &g->mm.vidmem.bytes_pending) < 0); + mem->size = 0; + mem->aperture = APERTURE_INVALID; + + nvgpu_mem_free_vidmem_alloc(g, mem); + nvgpu_kfree(g, mem); + } + + vidmem_dbg(g, "Done!"); +} + +static int nvgpu_vidmem_clear_pending_allocs_thr(void *mm_ptr) +{ + struct mm_gk20a *mm = mm_ptr; + + /* + * Simple thread who's sole job is to periodically clear userspace + * vidmem allocations that have been recently freed. + * + * Since it doesn't make sense to run unless there's pending work a + * condition field is used to wait for work. When the DMA API frees a + * userspace vidmem buf it enqueues it into the clear list and alerts us + * that we have some work to do. + */ + + while (!nvgpu_thread_should_stop(&mm->vidmem.clearing_thread)) { + int ret; + + /* + * Wait for work but also make sure we should not be paused. + */ + ret = NVGPU_COND_WAIT_INTERRUPTIBLE( + &mm->vidmem.clearing_thread_cond, + nvgpu_thread_should_stop( + &mm->vidmem.clearing_thread) || + !nvgpu_list_empty(&mm->vidmem.clear_list_head), + 0U); + if (ret == -ERESTARTSYS) { + continue; + } + + /* + * Use this lock to implement a pause mechanism. By taking this + * lock some other code can prevent this thread from processing + * work items. + */ + if (nvgpu_mutex_tryacquire(&mm->vidmem.clearing_thread_lock) + == 0) { + continue; + } + + nvgpu_vidmem_clear_pending_allocs(mm); + + nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock); + } + + return 0; +} + +int nvgpu_vidmem_init(struct mm_gk20a *mm) +{ + struct gk20a *g = mm->g; + u64 bootstrap_base, base; + u64 bootstrap_size = SZ_512M; + u64 default_page_size = SZ_64K; + size_t size; + int err; + static struct nvgpu_alloc_carveout bootstrap_co = + NVGPU_CARVEOUT("bootstrap-region", 0, 0); + + if (g->ops.fb.get_vidmem_size == NULL) { + + /* + * As it is a common function, the return value + * need to be handled for igpu. + */ + return 0; + } else { + size = g->ops.fb.get_vidmem_size(g); + if (size == 0UL) { + nvgpu_err(g, "Found zero vidmem"); + return -ENOMEM; + } + } + + vidmem_dbg(g, "init begin"); + +#ifdef CONFIG_NVGPU_SIM + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { + bootstrap_size = SZ_32M; + } +#endif + + bootstrap_co.base = size - bootstrap_size; + bootstrap_co.length = bootstrap_size; + + bootstrap_base = bootstrap_co.base; + base = default_page_size; + + /* + * Bootstrap allocator for use before the CE is initialized (CE + * initialization requires vidmem but we want to use the CE to zero + * out vidmem before allocating it... + */ + err = nvgpu_allocator_init(g, &g->mm.vidmem.bootstrap_allocator, + NULL, "vidmem-bootstrap", bootstrap_base, + bootstrap_size, SZ_4K, 0ULL, + GPU_ALLOC_FORCE_CONTIG, PAGE_ALLOCATOR); + + err = nvgpu_allocator_init(g, &g->mm.vidmem.allocator, NULL, + "vidmem", base, size - base, default_page_size, 0ULL, + GPU_ALLOC_4K_VIDMEM_PAGES, PAGE_ALLOCATOR); + if (err != 0) { + nvgpu_err(g, "Failed to register vidmem for size %zu: %d", + size, err); + return err; + } + + /* Reserve bootstrap region in vidmem allocator */ + err = nvgpu_alloc_reserve_carveout(&g->mm.vidmem.allocator, + &bootstrap_co); + if (err != 0) { + nvgpu_err(g, "nvgpu_alloc_reserve_carveout() failed err=%d", + err); + goto fail; + } + + mm->vidmem.base = base; + mm->vidmem.size = size - base; + mm->vidmem.bootstrap_base = bootstrap_base; + mm->vidmem.bootstrap_size = bootstrap_size; + + err = nvgpu_cond_init(&mm->vidmem.clearing_thread_cond); + if (err != 0) { + goto fail; + } + + nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0); + nvgpu_init_list_node(&mm->vidmem.clear_list_head); + + nvgpu_mutex_init(&mm->vidmem.clear_list_mutex); + nvgpu_mutex_init(&mm->vidmem.clearing_thread_lock); + nvgpu_mutex_init(&mm->vidmem.first_clear_mutex); + + nvgpu_atomic_set(&mm->vidmem.pause_count, 0); + + /* + * Start the thread off in the paused state. The thread doesn't have to + * be running for this to work. It will be woken up later on in + * finalize_poweron(). We won't necessarily have a CE context yet + * either, so hypothetically one could cause a race where we try to + * clear a vidmem struct before we have a CE context to do so. + */ + nvgpu_vidmem_thread_pause_sync(mm); + + err = nvgpu_thread_create(&mm->vidmem.clearing_thread, mm, + nvgpu_vidmem_clear_pending_allocs_thr, + "vidmem-clear"); + if (err != 0) { + goto fail; + } + + vidmem_dbg(g, "VIDMEM Total: %zu MB", size >> 20); + vidmem_dbg(g, "VIDMEM Ranges:"); + vidmem_dbg(g, " 0x%-10llx -> 0x%-10llx Primary", + mm->vidmem.base, mm->vidmem.base + mm->vidmem.size); + vidmem_dbg(g, " 0x%-10llx -> 0x%-10llx Bootstrap", + mm->vidmem.bootstrap_base, + mm->vidmem.bootstrap_base + mm->vidmem.bootstrap_size); + vidmem_dbg(g, "VIDMEM carveouts:"); + vidmem_dbg(g, " 0x%-10llx -> 0x%-10llx %s", + bootstrap_co.base, bootstrap_co.base + bootstrap_co.length, + bootstrap_co.name); + + return 0; + +fail: + nvgpu_cond_destroy(&mm->vidmem.clearing_thread_cond); + nvgpu_vidmem_destroy(g); + return err; +} + +int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space) +{ + struct nvgpu_allocator *allocator = &g->mm.vidmem.allocator; + + nvgpu_log_fn(g, " "); + + if (!nvgpu_alloc_initialized(allocator)) { + return -ENOSYS; + } + + *space = nvgpu_alloc_space(allocator) + + U64(nvgpu_atomic64_read(&g->mm.vidmem.bytes_pending)); + return 0; +} + +int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem) +{ + struct nvgpu_fence_type *fence_out = NULL; + struct nvgpu_fence_type *last_fence = NULL; + struct nvgpu_page_alloc *alloc = NULL; + void *sgl = NULL; + int err = 0; + + if (g->mm.vidmem.ce_ctx_id == NVGPU_CE_INVAL_CTX_ID) { + return -EINVAL; + } + + alloc = mem->vidmem_alloc; + + nvgpu_sgt_for_each_sgl(sgl, &alloc->sgt) { + if (last_fence != NULL) { + nvgpu_fence_put(last_fence); + } + +#ifdef CONFIG_NVGPU_DGPU + err = nvgpu_ce_execute_ops(g, + g->mm.vidmem.ce_ctx_id, + 0, + nvgpu_sgt_get_phys(g, &alloc->sgt, sgl), + nvgpu_sgt_get_length(&alloc->sgt, sgl), + 0x00000000, + NVGPU_CE_DST_LOCATION_LOCAL_FB, + NVGPU_CE_MEMSET, + 0, + &fence_out); +#else + /* fail due to lack of ce app support */ + err = -ENOSYS; +#endif + + if (err != 0) { +#ifdef CONFIG_NVGPU_DGPU + nvgpu_err(g, + "Failed nvgpu_ce_execute_ops[%d]", err); +#endif + return err; + } + + vidmem_dbg(g, " > [0x%llx +0x%llx]", + nvgpu_sgt_get_phys(g, &alloc->sgt, sgl), + nvgpu_sgt_get_length(&alloc->sgt, sgl)); + + last_fence = fence_out; + } + + if (last_fence != NULL) { + err = nvgpu_vidmem_clear_fence_wait(g, last_fence); + if (err != 0) { + return err; + } + } + + vidmem_dbg(g, " Done"); + + return err; +} + +static int nvgpu_vidmem_clear_all(struct gk20a *g) +{ + int err; + + if (g->mm.vidmem.cleared) { + return 0; + } + + nvgpu_mutex_acquire(&g->mm.vidmem.first_clear_mutex); + if (!g->mm.vidmem.cleared) { + err = nvgpu_vidmem_do_clear_all(g); + if (err != 0) { + nvgpu_mutex_release(&g->mm.vidmem.first_clear_mutex); + nvgpu_err(g, "failed to clear whole vidmem"); + return err; + } + } + nvgpu_mutex_release(&g->mm.vidmem.first_clear_mutex); + + return 0; +} + +int nvgpu_vidmem_user_alloc(struct gk20a *g, size_t bytes, + struct nvgpu_vidmem_buf **vidmem_buf) +{ + struct nvgpu_vidmem_buf *buf; + int err; + + if (vidmem_buf == NULL) { + return -EINVAL; + } + + err = nvgpu_vidmem_clear_all(g); + if (err != 0) { + return -ENOMEM; + } + + buf = nvgpu_kzalloc(g, sizeof(*buf)); + if (buf == NULL) { + return -ENOMEM; + } + + buf->g = g; + buf->mem = nvgpu_kzalloc(g, sizeof(*buf->mem)); + if (buf->mem == NULL) { + err = -ENOMEM; + goto fail; + } + + err = nvgpu_dma_alloc_vid(g, bytes, buf->mem); + if (err != 0) { + goto fail; + } + + /* + * Alerts the DMA API that when we free this vidmem buf we have to + * clear it to avoid leaking data to userspace. + */ + buf->mem->mem_flags |= NVGPU_MEM_FLAG_USER_MEM; + + *vidmem_buf = buf; + + return 0; + +fail: + /* buf will never be NULL here. */ + nvgpu_kfree(g, buf->mem); + nvgpu_kfree(g, buf); + return err; +} + +void nvgpu_vidmem_buf_free(struct gk20a *g, struct nvgpu_vidmem_buf *buf) +{ + /* + * In some error paths it's convenient to be able to "free" a NULL buf. + */ + if (buf == NULL) { + return; + } + + nvgpu_dma_free(g, buf->mem); + + /* + * We don't free buf->mem here. This is handled by nvgpu_dma_free()! + * Since these buffers are cleared in the background the nvgpu_mem + * struct must live on through that. We transfer ownership here to the + * DMA API and let the DMA API free the buffer. + */ + nvgpu_kfree(g, buf); +} diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c new file mode 100644 index 000000000..b62574eea --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/vm.c @@ -0,0 +1,1730 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nvgpu_ctag_buffer_info { + u64 size; + u32 pgsz_idx; + u32 flags; + +#ifdef CONFIG_NVGPU_COMPRESSION + s16 compr_kind; +#endif + s16 incompr_kind; + + u32 ctag_offset; +}; + +#ifdef CONFIG_NVGPU_COMPRESSION +static int nvgpu_vm_compute_compression(struct vm_gk20a *vm, + struct nvgpu_ctag_buffer_info *binfo); +#endif + +static void nvgpu_vm_do_unmap(struct nvgpu_mapped_buf *mapped_buffer, + struct vm_gk20a_mapping_batch *batch); + +/* + * Attempt to find a reserved memory area to determine PTE size for the passed + * mapping. If no reserved area can be found use small pages. + */ +static u32 nvgpu_vm_get_pte_size_fixed_map(struct vm_gk20a *vm, u64 base) +{ + struct nvgpu_vm_area *vm_area; + + vm_area = nvgpu_vm_area_find(vm, base); + if (vm_area == NULL) { + return GMMU_PAGE_SIZE_SMALL; + } + + return vm_area->pgsz_idx; +} + +/* + * This is for when the address space does not support unified address spaces. + */ +static u32 nvgpu_vm_get_pte_size_split_addr(struct vm_gk20a *vm, + u64 base, u64 size) +{ + if (base == 0ULL) { + if (size >= vm->gmmu_page_sizes[GMMU_PAGE_SIZE_BIG]) { + return GMMU_PAGE_SIZE_BIG; + } + return GMMU_PAGE_SIZE_SMALL; + } else { + if (base < nvgpu_gmmu_va_small_page_limit()) { + return GMMU_PAGE_SIZE_SMALL; + } else { + return GMMU_PAGE_SIZE_BIG; + } + } +} + +/* + * This determines the PTE size for a given alloc. Used by both the GVA space + * allocator and the mm core code so that agreement can be reached on how to + * map allocations. + * + * The page size of a buffer is this: + * + * o If the VM doesn't support large pages then obviously small pages + * must be used. + * o If the base address is non-zero (fixed address map): + * - Attempt to find a reserved memory area and use the page size + * based on that. + * - If no reserved page size is available, default to small pages. + * o If the base is zero and we have an SMMU: + * - If the size is larger than or equal to the big page size, use big + * pages. + * - Otherwise use small pages. + * o If there's no SMMU: + * - Regardless of buffer size use small pages since we have no + * - guarantee of contiguity. + */ +static u32 nvgpu_vm_get_pte_size(struct vm_gk20a *vm, u64 base, u64 size) +{ + struct gk20a *g = gk20a_from_vm(vm); + + if (!vm->big_pages) { + return GMMU_PAGE_SIZE_SMALL; + } + + if (!vm->unified_va) { + return nvgpu_vm_get_pte_size_split_addr(vm, base, size); + } + + if (base != 0ULL) { + return nvgpu_vm_get_pte_size_fixed_map(vm, base); + } + + if ((size >= vm->gmmu_page_sizes[GMMU_PAGE_SIZE_BIG]) && + nvgpu_iommuable(g)) { + return GMMU_PAGE_SIZE_BIG; + } + return GMMU_PAGE_SIZE_SMALL; +} + +int vm_aspace_id(struct vm_gk20a *vm) +{ + return (vm->as_share != NULL) ? vm->as_share->id : -1; +} + +int nvgpu_vm_bind_channel(struct vm_gk20a *vm, struct nvgpu_channel *ch) +{ + if (ch == NULL) { + return -EINVAL; + } + + nvgpu_log_fn(ch->g, " "); + + nvgpu_vm_get(vm); + ch->vm = vm; + nvgpu_channel_commit_va(ch); + + nvgpu_log(gk20a_from_vm(vm), gpu_dbg_map, "Binding ch=%d -> VM:%s", + ch->chid, vm->name); + + return 0; +} + +/* + * Determine how many bits of the address space each last level PDE covers. For + * example, for gp10b, with a last level address bit PDE range of 28 to 21 the + * amount of memory each last level PDE addresses is 21 bits - i.e 2MB. + */ +u32 nvgpu_vm_pde_coverage_bit_count(struct gk20a *g, u64 big_page_size) +{ + int final_pde_level = 0; + const struct gk20a_mmu_level *mmu_levels = + g->ops.mm.gmmu.get_mmu_levels(g, big_page_size); + + /* + * Find the second to last level of the page table programming + * heirarchy: the last level is PTEs so we really want the level + * before that which is the last level of PDEs. + */ + while (mmu_levels[final_pde_level + 2].update_entry != NULL) { + final_pde_level++; + } + + return mmu_levels[final_pde_level].lo_bit[0]; +} + +NVGPU_COV_WHITELIST_BLOCK_BEGIN(deviate, 1, NVGPU_MISRA(Rule, 17_2), "TID-278") +static void nvgpu_vm_do_free_entries(struct vm_gk20a *vm, + struct nvgpu_gmmu_pd *pd, + u32 level) +{ + struct gk20a *g = gk20a_from_vm(vm); + u32 i; + + /* This limits recursion */ + nvgpu_assert(level < g->ops.mm.gmmu.get_max_page_table_levels(g)); + + if (pd->mem != NULL) { + nvgpu_pd_free(vm, pd); + pd->mem = NULL; + } + + if (pd->entries != NULL) { + for (i = 0; i < pd->num_entries; i++) { + nvgpu_assert(level < U32_MAX); + nvgpu_vm_do_free_entries(vm, &pd->entries[i], + level + 1U); + } + nvgpu_vfree(vm->mm->g, pd->entries); + pd->entries = NULL; + } +} +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 17_2)) + +static void nvgpu_vm_free_entries(struct vm_gk20a *vm, + struct nvgpu_gmmu_pd *pdb) +{ + struct gk20a *g = vm->mm->g; + u32 i; + + nvgpu_pd_free(vm, pdb); + + if (pdb->entries == NULL) { + return; + } + + for (i = 0; i < pdb->num_entries; i++) { + nvgpu_vm_do_free_entries(vm, &pdb->entries[i], 1U); + } + + nvgpu_vfree(g, pdb->entries); + pdb->entries = NULL; +} + +u64 nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size, u32 pgsz_idx) +{ + struct gk20a *g = vm->mm->g; + struct nvgpu_allocator *vma = NULL; + u64 addr; + u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; + + vma = vm->vma[pgsz_idx]; + + if (vm->guest_managed) { + nvgpu_err(g, "Illegal GPU allocation on behalf of guest OS"); + return 0; + } + + if (pgsz_idx >= GMMU_NR_PAGE_SIZES) { + nvgpu_err(g, "(%s) invalid page size requested", vma->name); + return 0; + } + + if ((pgsz_idx == GMMU_PAGE_SIZE_BIG) && !vm->big_pages) { + nvgpu_err(g, "(%s) unsupportd page size requested", vma->name); + return 0; + } + + /* Be certain we round up to page_size if needed */ + size = NVGPU_ALIGN(size, page_size); + + addr = nvgpu_alloc_pte(vma, size, page_size); + if (addr == 0ULL) { + nvgpu_err(g, "(%s) oom: sz=0x%llx", vma->name, size); + return 0; + } + + return addr; +} + +void nvgpu_vm_free_va(struct vm_gk20a *vm, u64 addr, u32 pgsz_idx) +{ + struct nvgpu_allocator *vma = vm->vma[pgsz_idx]; + + nvgpu_free(vma, addr); +} + +void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch) +{ + (void) memset(mapping_batch, 0, sizeof(*mapping_batch)); + mapping_batch->gpu_l2_flushed = false; + mapping_batch->need_tlb_invalidate = false; +} + +void nvgpu_vm_mapping_batch_finish_locked( + struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *mapping_batch) +{ + int err; + + /* hanging kref_put batch pointer? */ +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") +NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") + WARN_ON(vm->kref_put_batch == mapping_batch); +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) +NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) + + if (mapping_batch->need_tlb_invalidate) { + struct gk20a *g = gk20a_from_vm(vm); + err = g->ops.fb.tlb_invalidate(g, vm->pdb.mem); + if (err != 0) { + nvgpu_err(g, "fb.tlb_invalidate() failed err=%d", err); + } + } +} + +void nvgpu_vm_mapping_batch_finish(struct vm_gk20a *vm, + struct vm_gk20a_mapping_batch *mapping_batch) +{ + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + nvgpu_vm_mapping_batch_finish_locked(vm, mapping_batch); + nvgpu_mutex_release(&vm->update_gmmu_lock); +} + +/* + * Determine if the passed address space can support big pages or not. + */ +bool nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size) +{ + u64 pde_size = BIT64(nvgpu_vm_pde_coverage_bit_count( + gk20a_from_vm(vm), vm->big_page_size)); + u64 mask = nvgpu_safe_sub_u64(pde_size, 1ULL); + u64 base_big_page = base & mask; + u64 size_big_page = size & mask; + + if ((base_big_page != 0ULL) || (size_big_page != 0ULL)) { + return false; + } + return true; +} + +#ifdef CONFIG_NVGPU_SW_SEMAPHORE +/* + * Initialize a semaphore pool. Just return successfully if we do not need + * semaphores (i.e when sync-pts are active). + */ +static int nvgpu_init_sema_pool(struct vm_gk20a *vm) +{ + struct nvgpu_semaphore_sea *sema_sea; + struct mm_gk20a *mm = vm->mm; + struct gk20a *g = mm->g; + int err; + + /* + * Don't waste the memory on semaphores if we don't need them. + */ + if (nvgpu_has_syncpoints(g)) { + return 0; + } + + if (vm->sema_pool != NULL) { + return 0; + } + + sema_sea = nvgpu_semaphore_sea_create(g); + if (sema_sea == NULL) { + return -ENOMEM; + } + + err = nvgpu_semaphore_pool_alloc(sema_sea, &vm->sema_pool); + if (err != 0) { + return err; + } + + /* + * Allocate a chunk of GPU VA space for mapping the semaphores. We will + * do a fixed alloc in the kernel VM so that all channels have the same + * RO address range for the semaphores. + * + * !!! TODO: cleanup. + */ + nvgpu_semaphore_sea_allocate_gpu_va(sema_sea, &vm->kernel, + nvgpu_safe_sub_u64(vm->va_limit, + mm->channel.kernel_size), + 512U * NVGPU_CPU_PAGE_SIZE, + nvgpu_safe_cast_u64_to_u32(SZ_4K)); + if (nvgpu_semaphore_sea_get_gpu_va(sema_sea) == 0ULL) { + nvgpu_free(&vm->kernel, + nvgpu_semaphore_sea_get_gpu_va(sema_sea)); + nvgpu_vm_put(vm); + return -ENOMEM; + } + + err = nvgpu_semaphore_pool_map(vm->sema_pool, vm); + if (err != 0) { + nvgpu_semaphore_pool_unmap(vm->sema_pool, vm); + nvgpu_free(vm->vma[GMMU_PAGE_SIZE_SMALL], + nvgpu_semaphore_pool_gpu_va(vm->sema_pool, false)); + return err; + } + + return 0; +} +#endif + +static int nvgpu_vm_init_user_vma(struct gk20a *g, struct vm_gk20a *vm, + u64 user_vma_start, u64 user_vma_limit, + const char *name) +{ + int err = 0; + char alloc_name[NVGPU_ALLOC_NAME_LEN]; + size_t name_len; + + name_len = strlen("gk20a_") + strlen(name); + if (name_len >= NVGPU_ALLOC_NAME_LEN) { + nvgpu_err(g, "Invalid MAX_NAME_SIZE %lu %u", name_len, + NVGPU_ALLOC_NAME_LEN); + return -EINVAL; + } + + /* + * User VMA. + */ + if (user_vma_start < user_vma_limit) { + (void) strcpy(alloc_name, "gk20a_"); + (void) strcat(alloc_name, name); + err = nvgpu_allocator_init(g, &vm->user, + vm, alloc_name, + user_vma_start, + user_vma_limit - + user_vma_start, + SZ_4K, + GPU_BALLOC_MAX_ORDER, + GPU_ALLOC_GVA_SPACE, + BUDDY_ALLOCATOR); + if (err != 0) { + return err; + } + } else { + /* + * Make these allocator pointers point to the kernel allocator + * since we still use the legacy notion of page size to choose + * the allocator. + */ + vm->vma[0] = &vm->kernel; + vm->vma[1] = &vm->kernel; + } + return 0; +} + +static int nvgpu_vm_init_user_lp_vma(struct gk20a *g, struct vm_gk20a *vm, + u64 user_lp_vma_start, u64 user_lp_vma_limit, + const char *name) +{ + int err = 0; + char alloc_name[NVGPU_VM_NAME_LEN]; + size_t name_len; + const size_t prefix_len = strlen("gk20a_"); + + name_len = nvgpu_safe_add_u64(nvgpu_safe_add_u64(prefix_len, + strlen(name)), strlen("_lp")); + if (name_len >= NVGPU_VM_NAME_LEN) { + nvgpu_err(g, "Invalid MAX_NAME_SIZE %lu %u", name_len, + NVGPU_VM_NAME_LEN); + return -EINVAL; + } + + /* + * User VMA for large pages when a split address range is used. + */ + if (user_lp_vma_start < user_lp_vma_limit) { + (void) strcpy(alloc_name, "gk20a_"); + (void) strncat(alloc_name, name, nvgpu_safe_sub_u64( + NVGPU_VM_NAME_LEN, prefix_len)); + (void) strcat(alloc_name, "_lp"); + err = nvgpu_allocator_init(g, &vm->user_lp, + vm, alloc_name, + user_lp_vma_start, + user_lp_vma_limit - + user_lp_vma_start, + vm->big_page_size, + GPU_BALLOC_MAX_ORDER, + GPU_ALLOC_GVA_SPACE, + BUDDY_ALLOCATOR); + if (err != 0) { + return err; + } + } + return 0; +} + +static int nvgpu_vm_init_kernel_vma(struct gk20a *g, struct vm_gk20a *vm, + u64 kernel_vma_start, u64 kernel_vma_limit, + u64 kernel_vma_flags, const char *name) +{ + int err = 0; + char alloc_name[NVGPU_VM_NAME_LEN]; + size_t name_len; + const size_t prefix_len = strlen("gk20a_"); + + name_len = nvgpu_safe_add_u64(nvgpu_safe_add_u64(prefix_len, + strlen(name)),strlen("-sys")); + if (name_len >= NVGPU_VM_NAME_LEN) { + nvgpu_err(g, "Invalid MAX_NAME_SIZE %lu %u", name_len, + NVGPU_VM_NAME_LEN); + return -EINVAL; + } + + /* + * Kernel VMA. + */ + if (kernel_vma_start < kernel_vma_limit) { + (void) strcpy(alloc_name, "gk20a_"); + (void) strncat(alloc_name, name, nvgpu_safe_sub_u64( + NVGPU_VM_NAME_LEN, prefix_len)); + (void) strcat(alloc_name, "-sys"); + err = nvgpu_allocator_init(g, &vm->kernel, + vm, alloc_name, + kernel_vma_start, + kernel_vma_limit - + kernel_vma_start, + SZ_4K, + GPU_BALLOC_MAX_ORDER, + kernel_vma_flags, + BUDDY_ALLOCATOR); + if (err != 0) { + return err; + } + } + return 0; +} + +static int nvgpu_vm_init_vma_allocators(struct gk20a *g, struct vm_gk20a *vm, + u64 user_vma_start, u64 user_vma_limit, + u64 user_lp_vma_start, u64 user_lp_vma_limit, + u64 kernel_vma_start, u64 kernel_vma_limit, + u64 kernel_vma_flags, const char *name) +{ + int err = 0; + + err = nvgpu_vm_init_user_vma(g, vm, + user_vma_start, user_vma_limit, name); + if (err != 0) { + return err; + } + + err = nvgpu_vm_init_user_lp_vma(g, vm, + user_lp_vma_start, user_lp_vma_limit, name); + if (err != 0) { + goto clean_up_allocators; + } + + err = nvgpu_vm_init_kernel_vma(g, vm, kernel_vma_start, + kernel_vma_limit, kernel_vma_flags, name); + if (err != 0) { + goto clean_up_allocators; + } + + return 0; + +clean_up_allocators: + if (nvgpu_alloc_initialized(&vm->kernel)) { + nvgpu_alloc_destroy(&vm->kernel); + } + if (nvgpu_alloc_initialized(&vm->user)) { + nvgpu_alloc_destroy(&vm->user); + } + if (nvgpu_alloc_initialized(&vm->user_lp)) { + nvgpu_alloc_destroy(&vm->user_lp); + } + return err; +} + +static void nvgpu_vm_init_check_big_pages(struct vm_gk20a *vm, + u64 user_vma_start, u64 user_vma_limit, + u64 user_lp_vma_start, u64 user_lp_vma_limit, + bool big_pages, bool unified_va) +{ + /* + * Determine if big pages are possible in this VM. If a split address + * space is used then check the user_lp vma instead of the user vma. + */ + if (!big_pages) { + vm->big_pages = false; + } else { + if (unified_va) { + vm->big_pages = nvgpu_big_pages_possible(vm, + user_vma_start, + nvgpu_safe_sub_u64(user_vma_limit, + user_vma_start)); + } else { + vm->big_pages = nvgpu_big_pages_possible(vm, + user_lp_vma_start, + nvgpu_safe_sub_u64(user_lp_vma_limit, + user_lp_vma_start)); + } + } +} + +static int nvgpu_vm_init_check_vma_limits(struct gk20a *g, struct vm_gk20a *vm, + u64 user_vma_start, u64 user_vma_limit, + u64 user_lp_vma_start, u64 user_lp_vma_limit, + u64 kernel_vma_start, u64 kernel_vma_limit) +{ + if ((user_vma_start > user_vma_limit) || + (user_lp_vma_start > user_lp_vma_limit) || + (!vm->guest_managed && + (kernel_vma_start >= kernel_vma_limit))) { + nvgpu_err(g, "Invalid vm configuration"); + nvgpu_do_assert(); + return -EINVAL; + } + + /* + * A "user" area only makes sense for the GVA spaces. For VMs where + * there is no "user" area user_vma_start will be equal to + * user_vma_limit (i.e a 0 sized space). In such a situation the kernel + * area must be non-zero in length. + */ + if ((user_vma_start >= user_vma_limit) && + (kernel_vma_start >= kernel_vma_limit)) { + return -EINVAL; + } + + return 0; +} + +static int nvgpu_vm_init_vma(struct gk20a *g, struct vm_gk20a *vm, + u64 user_reserved, + u64 kernel_reserved, + u64 small_big_split, + bool big_pages, + bool unified_va, + const char *name) +{ + int err = 0; + u64 kernel_vma_flags = 0ULL; + u64 user_vma_start, user_vma_limit; + u64 user_lp_vma_start, user_lp_vma_limit; + u64 kernel_vma_start, kernel_vma_limit; + + /* Setup vma limits. */ + if (user_reserved > 0ULL) { + kernel_vma_flags = GPU_ALLOC_GVA_SPACE; + /* + * If big_pages are disabled for this VM then it only makes + * sense to make one VM, same as if the unified address flag + * is set. + */ + if (!big_pages || unified_va) { + user_vma_start = vm->va_start; + user_vma_limit = nvgpu_safe_sub_u64(vm->va_limit, + kernel_reserved); + user_lp_vma_start = user_vma_limit; + user_lp_vma_limit = user_vma_limit; + } else { + /* + * Ensure small_big_split falls between user vma + * start and end. + */ + if ((small_big_split <= vm->va_start) || + (small_big_split >= + nvgpu_safe_sub_u64(vm->va_limit, + kernel_reserved))) { + return -EINVAL; + } + + user_vma_start = vm->va_start; + user_vma_limit = small_big_split; + user_lp_vma_start = small_big_split; + user_lp_vma_limit = nvgpu_safe_sub_u64(vm->va_limit, + kernel_reserved); + } + } else { + user_vma_start = 0; + user_vma_limit = 0; + user_lp_vma_start = 0; + user_lp_vma_limit = 0; + } + kernel_vma_start = nvgpu_safe_sub_u64(vm->va_limit, kernel_reserved); + kernel_vma_limit = vm->va_limit; + + nvgpu_log_info(g, "user_vma [0x%llx,0x%llx)", + user_vma_start, user_vma_limit); + if (!unified_va) { + nvgpu_log_info(g, "user_lp_vma [0x%llx,0x%llx)", + user_lp_vma_start, user_lp_vma_limit); + } + nvgpu_log_info(g, "kernel_vma [0x%llx,0x%llx)", + kernel_vma_start, kernel_vma_limit); + + err = nvgpu_vm_init_check_vma_limits(g, vm, + user_vma_start, user_vma_limit, + user_lp_vma_start, user_lp_vma_limit, + kernel_vma_start, kernel_vma_limit); + if (err != 0) { + goto clean_up_page_tables; + } + + nvgpu_vm_init_check_big_pages(vm, user_vma_start, user_vma_limit, + user_lp_vma_start, user_lp_vma_limit, + big_pages, unified_va); + + err = nvgpu_vm_init_vma_allocators(g, vm, + user_vma_start, user_vma_limit, + user_lp_vma_start, user_lp_vma_limit, + kernel_vma_start, kernel_vma_limit, + kernel_vma_flags, name); + if (err != 0) { + goto clean_up_page_tables; + } + + return 0; + +clean_up_page_tables: + /* Cleans up nvgpu_gmmu_init_page_table() */ + nvgpu_pd_free(vm, &vm->pdb); + return err; +} + +static int nvgpu_vm_init_attributes(struct mm_gk20a *mm, + struct vm_gk20a *vm, + u32 big_page_size, + u64 low_hole, + u64 user_reserved, + u64 kernel_reserved, + bool big_pages, + bool userspace_managed, + bool unified_va, + const char *name) +{ + struct gk20a *g = gk20a_from_mm(mm); + u64 aperture_size; + u64 default_aperture_size; + + g->ops.mm.get_default_va_sizes(&default_aperture_size, NULL, NULL); + + aperture_size = nvgpu_safe_add_u64(kernel_reserved, + nvgpu_safe_add_u64(user_reserved, low_hole)); + + if (aperture_size > default_aperture_size) { + nvgpu_do_assert_print(g, + "Overlap between user and kernel spaces"); + return -ENOMEM; + } + + if (vm->guest_managed && (kernel_reserved != 0U)) { + nvgpu_do_assert_print(g, + "Cannot use guest managed VM with kernel space"); + return -EINVAL; + } + + nvgpu_log_info(g, "Init space for %s: valimit=0x%llx, " + "LP size=0x%x lowhole=0x%llx", + name, aperture_size, + (unsigned int)big_page_size, low_hole); + + vm->mm = mm; + + vm->gmmu_page_sizes[GMMU_PAGE_SIZE_SMALL] = + nvgpu_safe_cast_u64_to_u32(SZ_4K); + vm->gmmu_page_sizes[GMMU_PAGE_SIZE_BIG] = big_page_size; + vm->gmmu_page_sizes[GMMU_PAGE_SIZE_KERNEL] = + nvgpu_safe_cast_u64_to_u32(NVGPU_CPU_PAGE_SIZE); + + /* Set up vma pointers. */ + vm->vma[GMMU_PAGE_SIZE_SMALL] = &vm->user; + vm->vma[GMMU_PAGE_SIZE_BIG] = &vm->user; + vm->vma[GMMU_PAGE_SIZE_KERNEL] = &vm->kernel; + if (!unified_va) { + vm->vma[GMMU_PAGE_SIZE_BIG] = &vm->user_lp; + } + + vm->va_start = low_hole; + vm->va_limit = aperture_size; + + vm->big_page_size = vm->gmmu_page_sizes[GMMU_PAGE_SIZE_BIG]; + vm->userspace_managed = userspace_managed; + vm->unified_va = unified_va; + vm->mmu_levels = + g->ops.mm.gmmu.get_mmu_levels(g, vm->big_page_size); + +#ifdef CONFIG_NVGPU_GR_VIRTUALIZATION + if (g->is_virtual && userspace_managed) { + nvgpu_err(g, "vGPU: no userspace managed addr space support"); + return -ENOSYS; + } +#endif + return 0; +} + +/* + * Initialize a preallocated vm. + */ +int nvgpu_vm_do_init(struct mm_gk20a *mm, + struct vm_gk20a *vm, + u32 big_page_size, + u64 low_hole, + u64 user_reserved, + u64 kernel_reserved, + u64 small_big_split, + bool big_pages, + bool userspace_managed, + bool unified_va, + const char *name) +{ + struct gk20a *g = gk20a_from_mm(mm); + int err = 0; + + err = nvgpu_vm_init_attributes(mm, vm, big_page_size, low_hole, + user_reserved, kernel_reserved, big_pages, userspace_managed, + unified_va, name); + if (err != 0) { + return err; + } + + if (g->ops.mm.vm_as_alloc_share != NULL) { + err = g->ops.mm.vm_as_alloc_share(g, vm); + if (err != 0) { + nvgpu_err(g, "Failed to init gpu vm!"); + return err; + } + } + + /* Initialize the page table data structures. */ + (void) strncpy(vm->name, name, + min(strlen(name), (size_t)(sizeof(vm->name)-1ULL))); + err = nvgpu_gmmu_init_page_table(vm); + if (err != 0) { + goto clean_up_gpu_vm; + } + + err = nvgpu_vm_init_vma(g, vm, user_reserved, kernel_reserved, + small_big_split, big_pages, unified_va, name); + if (err != 0) { + goto clean_up_gpu_vm; + } + + vm->mapped_buffers = NULL; + + nvgpu_mutex_init(&vm->syncpt_ro_map_lock); + nvgpu_mutex_init(&vm->update_gmmu_lock); + + nvgpu_ref_init(&vm->ref); + nvgpu_init_list_node(&vm->vm_area_list); + +#ifdef CONFIG_NVGPU_SW_SEMAPHORE + /* + * This is only necessary for channel address spaces. The best way to + * distinguish channel address spaces from other address spaces is by + * size - if the address space is 4GB or less, it's not a channel. + */ + if (vm->va_limit > 4ULL * SZ_1G) { + err = nvgpu_init_sema_pool(vm); + if (err != 0) { + goto clean_up_gmmu_lock; + } + } +#endif + + return 0; + +#ifdef CONFIG_NVGPU_SW_SEMAPHORE +clean_up_gmmu_lock: + nvgpu_mutex_destroy(&vm->update_gmmu_lock); + nvgpu_mutex_destroy(&vm->syncpt_ro_map_lock); +#endif +clean_up_gpu_vm: + if (g->ops.mm.vm_as_free_share != NULL) { + g->ops.mm.vm_as_free_share(vm); + } + return err; +} + +/** + * nvgpu_init_vm() - Initialize an address space. + * + * @mm - Parent MM. + * @vm - The VM to init. + * @big_page_size - Size of big pages associated with this VM. + * @low_hole - The size of the low hole (unaddressable memory at the bottom of + * the address space). + * @user_reserved - Space reserved for user allocations.. + * @kernel_reserved - Space reserved for kernel only allocations. + * @big_pages - If true then big pages are possible in the VM. Note this does + * not guarantee that big pages will be possible. + * @name - Name of the address space. + * + * This function initializes an address space according to the following map: + * + * +--+ 0x0 + * | | + * +--+ @low_hole + * | | + * ~ ~ This is the "user" section. + * | | + * +--+ @aperture_size - @kernel_reserved + * | | + * ~ ~ This is the "kernel" section. + * | | + * +--+ @aperture_size + * + * The user section is therefor what ever is left over after the @low_hole and + * @kernel_reserved memory have been portioned out. The @kernel_reserved is + * always persent at the top of the memory space and the @low_hole is always at + * the bottom. + * + * For certain address spaces a "user" section makes no sense (bar1, etc) so in + * such cases the @kernel_reserved and @low_hole should sum to exactly + * @aperture_size. + */ +struct vm_gk20a *nvgpu_vm_init(struct gk20a *g, + u32 big_page_size, + u64 low_hole, + u64 user_reserved, + u64 kernel_reserved, + u64 small_big_split, + bool big_pages, + bool userspace_managed, + bool unified_va, + const char *name) +{ + struct vm_gk20a *vm = nvgpu_kzalloc(g, sizeof(*vm)); + int err; + + if (vm == NULL) { + return NULL; + } + + err = nvgpu_vm_do_init(&g->mm, vm, big_page_size, low_hole, + user_reserved, kernel_reserved, small_big_split, + big_pages, userspace_managed, unified_va, name); + if (err != 0) { + nvgpu_kfree(g, vm); + return NULL; + } + + return vm; +} + +/* + * Cleanup the VM! + */ +static void nvgpu_vm_remove(struct vm_gk20a *vm) +{ + struct nvgpu_mapped_buf *mapped_buffer; + struct nvgpu_vm_area *vm_area; + struct nvgpu_rbtree_node *node = NULL; + struct gk20a *g = vm->mm->g; + bool done; + +#ifdef CONFIG_NVGPU_SW_SEMAPHORE + /* + * Do this outside of the update_gmmu_lock since unmapping the semaphore + * pool involves unmapping a GMMU mapping which means aquiring the + * update_gmmu_lock. + */ + if (!nvgpu_has_syncpoints(g)) { + if (vm->sema_pool != NULL) { + nvgpu_semaphore_pool_unmap(vm->sema_pool, vm); + nvgpu_semaphore_pool_put(vm->sema_pool); + } + } +#endif + + if (nvgpu_mem_is_valid(&g->syncpt_mem) && + (vm->syncpt_ro_map_gpu_va != 0ULL)) { + nvgpu_gmmu_unmap(vm, &g->syncpt_mem, + vm->syncpt_ro_map_gpu_va); + } + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + + nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); + while (node != NULL) { + mapped_buffer = mapped_buffer_from_rbtree_node(node); + nvgpu_vm_do_unmap(mapped_buffer, NULL); + nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); + } + + /* destroy remaining reserved memory areas */ + done = false; + do { + if (nvgpu_list_empty(&vm->vm_area_list)) { + done = true; + } else { + vm_area = nvgpu_list_first_entry(&vm->vm_area_list, + nvgpu_vm_area, + vm_area_list); + nvgpu_list_del(&vm_area->vm_area_list); + nvgpu_kfree(vm->mm->g, vm_area); + } + } while (!done); + + if (nvgpu_alloc_initialized(&vm->kernel)) { + nvgpu_alloc_destroy(&vm->kernel); + } + if (nvgpu_alloc_initialized(&vm->user)) { + nvgpu_alloc_destroy(&vm->user); + } + if (nvgpu_alloc_initialized(&vm->user_lp)) { + nvgpu_alloc_destroy(&vm->user_lp); + } + + nvgpu_vm_free_entries(vm, &vm->pdb); + + if (g->ops.mm.vm_as_free_share != NULL) { + g->ops.mm.vm_as_free_share(vm); + } + + nvgpu_mutex_release(&vm->update_gmmu_lock); + nvgpu_mutex_destroy(&vm->update_gmmu_lock); + + nvgpu_mutex_destroy(&vm->syncpt_ro_map_lock); + nvgpu_kfree(g, vm); +} + +static struct vm_gk20a *vm_gk20a_from_ref(struct nvgpu_ref *ref) +{ + return (struct vm_gk20a *) + ((uintptr_t)ref - offsetof(struct vm_gk20a, ref)); +} + +static void nvgpu_vm_remove_ref(struct nvgpu_ref *ref) +{ + struct vm_gk20a *vm = vm_gk20a_from_ref(ref); + + nvgpu_vm_remove(vm); +} + +void nvgpu_vm_get(struct vm_gk20a *vm) +{ + nvgpu_ref_get(&vm->ref); +} + +void nvgpu_vm_put(struct vm_gk20a *vm) +{ + nvgpu_ref_put(&vm->ref, nvgpu_vm_remove_ref); +} + +void nvgpu_insert_mapped_buf(struct vm_gk20a *vm, + struct nvgpu_mapped_buf *mapped_buffer) +{ + mapped_buffer->node.key_start = mapped_buffer->addr; + mapped_buffer->node.key_end = nvgpu_safe_add_u64(mapped_buffer->addr, + mapped_buffer->size); + + nvgpu_rbtree_insert(&mapped_buffer->node, &vm->mapped_buffers); + nvgpu_assert(vm->num_user_mapped_buffers < U32_MAX); + vm->num_user_mapped_buffers++; +} + +static void nvgpu_remove_mapped_buf(struct vm_gk20a *vm, + struct nvgpu_mapped_buf *mapped_buffer) +{ + nvgpu_rbtree_unlink(&mapped_buffer->node, &vm->mapped_buffers); + nvgpu_assert(vm->num_user_mapped_buffers > 0U); + vm->num_user_mapped_buffers--; +} + +struct nvgpu_mapped_buf *nvgpu_vm_find_mapped_buf( + struct vm_gk20a *vm, u64 addr) +{ + struct nvgpu_rbtree_node *node = NULL; + struct nvgpu_rbtree_node *root = vm->mapped_buffers; + + nvgpu_rbtree_search(addr, &node, root); + if (node == NULL) { + return NULL; + } + + return mapped_buffer_from_rbtree_node(node); +} + +struct nvgpu_mapped_buf *nvgpu_vm_find_mapped_buf_range( + struct vm_gk20a *vm, u64 addr) +{ + struct nvgpu_rbtree_node *node = NULL; + struct nvgpu_rbtree_node *root = vm->mapped_buffers; + + nvgpu_rbtree_range_search(addr, &node, root); + if (node == NULL) { + return NULL; + } + + return mapped_buffer_from_rbtree_node(node); +} + +struct nvgpu_mapped_buf *nvgpu_vm_find_mapped_buf_less_than( + struct vm_gk20a *vm, u64 addr) +{ + struct nvgpu_rbtree_node *node = NULL; + struct nvgpu_rbtree_node *root = vm->mapped_buffers; + + nvgpu_rbtree_less_than_search(addr, &node, root); + if (node == NULL) { + return NULL; + } + + return mapped_buffer_from_rbtree_node(node); +} + +int nvgpu_vm_get_buffers(struct vm_gk20a *vm, + struct nvgpu_mapped_buf ***mapped_buffers, + u32 *num_buffers) +{ + struct nvgpu_mapped_buf *mapped_buffer; + struct nvgpu_mapped_buf **buffer_list; + struct nvgpu_rbtree_node *node = NULL; + u32 i = 0; + + if (vm->userspace_managed) { + *mapped_buffers = NULL; + *num_buffers = 0; + return 0; + } + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + + if (vm->num_user_mapped_buffers == 0U) { + nvgpu_mutex_release(&vm->update_gmmu_lock); + return 0; + } + + buffer_list = nvgpu_big_zalloc(vm->mm->g, + nvgpu_safe_mult_u64(sizeof(*buffer_list), + vm->num_user_mapped_buffers)); + if (buffer_list == NULL) { + nvgpu_mutex_release(&vm->update_gmmu_lock); + return -ENOMEM; + } + + nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); + while (node != NULL) { + mapped_buffer = mapped_buffer_from_rbtree_node(node); + buffer_list[i] = mapped_buffer; + nvgpu_ref_get(&mapped_buffer->ref); + nvgpu_assert(i < U32_MAX); + i++; + nvgpu_rbtree_enum_next(&node, node); + } + + if (i != vm->num_user_mapped_buffers) { + BUG(); + } + + *num_buffers = vm->num_user_mapped_buffers; + *mapped_buffers = buffer_list; + + nvgpu_mutex_release(&vm->update_gmmu_lock); + + return 0; +} + +void nvgpu_vm_put_buffers(struct vm_gk20a *vm, + struct nvgpu_mapped_buf **mapped_buffers, + u32 num_buffers) +{ + u32 i; + struct vm_gk20a_mapping_batch batch; + + if (num_buffers == 0U) { + return; + } + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + nvgpu_vm_mapping_batch_start(&batch); + vm->kref_put_batch = &batch; + + for (i = 0U; i < num_buffers; ++i) { + nvgpu_ref_put(&mapped_buffers[i]->ref, + nvgpu_vm_unmap_ref_internal); + } + + vm->kref_put_batch = NULL; + nvgpu_vm_mapping_batch_finish_locked(vm, &batch); + nvgpu_mutex_release(&vm->update_gmmu_lock); + + nvgpu_big_free(vm->mm->g, mapped_buffers); +} + +static int nvgpu_vm_do_map(struct vm_gk20a *vm, + struct nvgpu_os_buffer *os_buf, + struct nvgpu_sgt *sgt, + u64 *map_addr_ptr, + u64 map_size, + u64 phys_offset, + enum gk20a_mem_rw_flag rw, + u32 flags, + struct vm_gk20a_mapping_batch *batch, + enum nvgpu_aperture aperture, + struct nvgpu_ctag_buffer_info *binfo_ptr) +{ + struct gk20a *g = gk20a_from_vm(vm); + int err = 0; + bool clear_ctags = false; + u32 ctag_offset = 0; + u64 map_addr = *map_addr_ptr; + /* + * The actual GMMU PTE kind + */ + u8 pte_kind; + +#ifdef CONFIG_NVGPU_COMPRESSION + err = nvgpu_vm_compute_compression(vm, binfo_ptr); + if (err != 0) { + nvgpu_err(g, "failure setting up compression"); + goto ret_err; + } + + if ((binfo_ptr->compr_kind != NVGPU_KIND_INVALID) && + ((flags & NVGPU_VM_MAP_FIXED_OFFSET) != 0U)) { + /* + * Fixed-address compressible mapping is + * requested. Make sure we're respecting the alignment + * requirement for virtual addresses and buffer + * offsets. + * + * This check must be done before we may fall back to + * the incompressible kind. + */ + + const u64 offset_mask = g->ops.fb.compression_align_mask(g); + + if ((map_addr & offset_mask) != (phys_offset & offset_mask)) { + nvgpu_log(g, gpu_dbg_map, + "Misaligned compressible-kind fixed-address " + "mapping"); + err = -EINVAL; + goto ret_err; + } + } + + if (binfo_ptr->compr_kind != NVGPU_KIND_INVALID) { + struct gk20a_comptags comptags = { 0 }; + + /* + * Get the comptags state, alloc if necessary + */ + err = gk20a_alloc_or_get_comptags(g, os_buf, + &g->cbc->comp_tags, + &comptags); + if (err != 0) { + /* + * This is an irrecoverable failure and we need to + * abort. In particular, it is not safe to proceed with + * the incompressible fallback, since we cannot not mark + * our alloc failure anywere. Later we would retry + * allocation and break compressible map aliasing. + */ + nvgpu_err(g, "Error %d setting up comptags", err); + goto ret_err; + } + + /* + * Newly allocated comptags needs to be cleared + */ + if (comptags.needs_clear) { + if (g->ops.cbc.ctrl != NULL) { + if (gk20a_comptags_start_clear(os_buf)) { + err = g->ops.cbc.ctrl( + g, nvgpu_cbc_op_clear, + comptags.offset, + (comptags.offset + + comptags.lines - 1U)); + gk20a_comptags_finish_clear( + os_buf, err == 0); + if (err != 0) { + goto ret_err; + } + } + } else { + /* + * Cleared as part of gmmu map + */ + clear_ctags = true; + } + } + + /* + * Store the ctag offset for later use if we got the comptags + */ + if (comptags.lines != 0U) { + ctag_offset = comptags.offset; + } + } + + /* + * Figure out the kind and ctag offset for the GMMU page tables + */ + if (binfo_ptr->compr_kind != NVGPU_KIND_INVALID && ctag_offset != 0U) { + /* + * Adjust the ctag_offset as per the buffer map offset + */ + ctag_offset += (u32)(phys_offset >> + ilog2(g->ops.fb.compression_page_size(g))); + nvgpu_assert((binfo_ptr->compr_kind >= 0) && + (binfo_ptr->compr_kind <= (s16)U8_MAX)); + pte_kind = (u8)binfo_ptr->compr_kind; + binfo_ptr->ctag_offset = ctag_offset; + } else +#endif + if ((binfo_ptr->incompr_kind >= 0) && + (binfo_ptr->incompr_kind <= (s16)U8_MAX)) { + /* + * Incompressible kind, ctag offset will not be programmed + */ + ctag_offset = 0; + pte_kind = (u8)binfo_ptr->incompr_kind; + } else { + /* + * Caller required compression, but we cannot provide it + */ + nvgpu_err(g, "No comptags and no incompressible fallback kind"); + err = -ENOMEM; + goto ret_err; + } + +#ifdef CONFIG_NVGPU_COMPRESSION + if (clear_ctags) { + clear_ctags = gk20a_comptags_start_clear(os_buf); + } +#endif + + map_addr = g->ops.mm.gmmu.map(vm, + map_addr, + sgt, + phys_offset, + map_size, + binfo_ptr->pgsz_idx, + pte_kind, + ctag_offset, + binfo_ptr->flags, + rw, + clear_ctags, + false, + false, + batch, + aperture); + +#ifdef CONFIG_NVGPU_COMPRESSION + if (clear_ctags) { + gk20a_comptags_finish_clear(os_buf, map_addr != 0U); + } +#endif + + if (map_addr == 0ULL) { + err = -ENOMEM; + goto ret_err; + } + + *map_addr_ptr = map_addr; + +ret_err: + return err; +} + +static int nvgpu_vm_new_mapping(struct vm_gk20a *vm, + struct nvgpu_os_buffer *os_buf, + struct nvgpu_mapped_buf *mapped_buffer, + struct nvgpu_sgt *sgt, + struct nvgpu_ctag_buffer_info *binfo_ptr, + u64 map_addr, u64 *map_size_ptr, + u64 phys_offset, s16 map_key_kind, + struct nvgpu_mapped_buf **mapped_buffer_arg) +{ + struct gk20a *g = gk20a_from_vm(vm); + u64 align; + u64 map_size = *map_size_ptr; + + /* + * Check if this buffer is already mapped. + */ + if (!vm->userspace_managed) { + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + mapped_buffer = nvgpu_vm_find_mapping(vm, + os_buf, + map_addr, + binfo_ptr->flags, + map_key_kind); + + if (mapped_buffer != NULL) { + nvgpu_ref_get(&mapped_buffer->ref); + nvgpu_mutex_release(&vm->update_gmmu_lock); + *mapped_buffer_arg = mapped_buffer; + return 1; + } + nvgpu_mutex_release(&vm->update_gmmu_lock); + } + + /* + * Generate a new mapping! + */ + mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); + if (mapped_buffer == NULL) { + nvgpu_warn(g, "oom allocating tracking buffer"); + return -ENOMEM; + } + *mapped_buffer_arg = mapped_buffer; + + align = nvgpu_sgt_alignment(g, sgt); + if (g->mm.disable_bigpage) { + binfo_ptr->pgsz_idx = GMMU_PAGE_SIZE_SMALL; + } else { + binfo_ptr->pgsz_idx = nvgpu_vm_get_pte_size(vm, map_addr, + min_t(u64, binfo_ptr->size, align)); + } + map_size = (map_size != 0ULL) ? map_size : binfo_ptr->size; + map_size = NVGPU_ALIGN(map_size, SZ_4K); + + if ((map_size > binfo_ptr->size) || + (phys_offset > (binfo_ptr->size - map_size))) { + return -EINVAL; + } + + *map_size_ptr = map_size; + return 0; +} + +static int nvgpu_vm_map_check_attributes(struct vm_gk20a *vm, + struct nvgpu_os_buffer *os_buf, + struct nvgpu_ctag_buffer_info *binfo_ptr, + u32 flags, + s16 compr_kind, + s16 incompr_kind, + s16 *map_key_kind_ptr) +{ + struct gk20a *g = gk20a_from_vm(vm); + + if (vm->userspace_managed && + ((flags & NVGPU_VM_MAP_FIXED_OFFSET) == 0U)) { + nvgpu_err(g, + "non-fixed-offset mapping not available on " + "userspace managed address spaces"); + return -EINVAL; + } + + binfo_ptr->flags = flags; + binfo_ptr->size = nvgpu_os_buf_get_size(os_buf); + if (binfo_ptr->size == 0UL) { + nvgpu_err(g, "Invalid buffer size"); + return -EINVAL; + } + binfo_ptr->incompr_kind = incompr_kind; + +#ifdef CONFIG_NVGPU_COMPRESSION + if (vm->enable_ctag && compr_kind != NVGPU_KIND_INVALID) { + binfo_ptr->compr_kind = compr_kind; + } else { + binfo_ptr->compr_kind = NVGPU_KIND_INVALID; + } + + if (compr_kind != NVGPU_KIND_INVALID) { + *map_key_kind_ptr = compr_kind; + } else { + *map_key_kind_ptr = incompr_kind; + } +#else + *map_key_kind_ptr = incompr_kind; +#endif + return 0; +} + +int nvgpu_vm_map(struct vm_gk20a *vm, + struct nvgpu_os_buffer *os_buf, + struct nvgpu_sgt *sgt, + u64 map_addr, + u64 map_size, + u64 phys_offset, + enum gk20a_mem_rw_flag rw, + u32 flags, + s16 compr_kind, + s16 incompr_kind, + struct vm_gk20a_mapping_batch *batch, + enum nvgpu_aperture aperture, + struct nvgpu_mapped_buf **mapped_buffer_arg) +{ + struct gk20a *g = gk20a_from_vm(vm); + struct nvgpu_mapped_buf *mapped_buffer = NULL; + struct nvgpu_ctag_buffer_info binfo = { 0 }; + struct nvgpu_vm_area *vm_area = NULL; + int err = 0; + bool va_allocated = true; + + /* + * The kind used as part of the key for map caching. HW may + * actually be programmed with the fallback kind in case the + * key kind is compressible but we're out of comptags. + */ + s16 map_key_kind; + + *mapped_buffer_arg = NULL; + + err = nvgpu_vm_map_check_attributes(vm, os_buf, &binfo, flags, + compr_kind, incompr_kind, &map_key_kind); + if (err != 0) { + return err; + } + + err = nvgpu_vm_new_mapping(vm, os_buf, mapped_buffer, sgt, &binfo, + map_addr, &map_size, phys_offset, map_key_kind, + mapped_buffer_arg); + + mapped_buffer = *mapped_buffer_arg; + if (err < 0) { + goto clean_up_nolock; + } + if (err == 1) { + return 0; + } + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + + /* + * Check if we should use a fixed offset for mapping this buffer. + */ + if ((flags & NVGPU_VM_MAP_FIXED_OFFSET) != 0U) { + err = nvgpu_vm_area_validate_buffer(vm, + map_addr, + map_size, + binfo.pgsz_idx, + &vm_area); + if (err != 0) { + goto clean_up; + } + + va_allocated = false; + } + + err = nvgpu_vm_do_map(vm, os_buf, sgt, &map_addr, + map_size, phys_offset, rw, flags, batch, + aperture, &binfo); + if (err != 0) { + goto clean_up; + } + + nvgpu_init_list_node(&mapped_buffer->buffer_list); + nvgpu_ref_init(&mapped_buffer->ref); + mapped_buffer->addr = map_addr; + mapped_buffer->size = map_size; + mapped_buffer->pgsz_idx = binfo.pgsz_idx; + mapped_buffer->vm = vm; + mapped_buffer->flags = binfo.flags; + mapped_buffer->kind = map_key_kind; + mapped_buffer->va_allocated = va_allocated; + mapped_buffer->vm_area = vm_area; + mapped_buffer->ctag_offset = binfo.ctag_offset; + mapped_buffer->rw_flag = rw; + mapped_buffer->aperture = aperture; + + nvgpu_insert_mapped_buf(vm, mapped_buffer); + + if (vm_area != NULL) { + nvgpu_list_add_tail(&mapped_buffer->buffer_list, + &vm_area->buffer_list_head); + mapped_buffer->vm_area = vm_area; + } + + nvgpu_mutex_release(&vm->update_gmmu_lock); + + return 0; + +clean_up: + nvgpu_mutex_release(&vm->update_gmmu_lock); +clean_up_nolock: + nvgpu_kfree(g, mapped_buffer); + + return err; +} + +/* + * Really unmap. This does the real GMMU unmap and removes the mapping from the + * VM map tracking tree (and vm_area list if necessary). + */ +static void nvgpu_vm_do_unmap(struct nvgpu_mapped_buf *mapped_buffer, + struct vm_gk20a_mapping_batch *batch) +{ + struct vm_gk20a *vm = mapped_buffer->vm; + struct gk20a *g = vm->mm->g; + + g->ops.mm.gmmu.unmap(vm, + mapped_buffer->addr, + mapped_buffer->size, + mapped_buffer->pgsz_idx, + mapped_buffer->va_allocated, + gk20a_mem_flag_none, + (mapped_buffer->vm_area != NULL) ? + mapped_buffer->vm_area->sparse : false, + batch); + + /* + * Remove from mapped buffer tree. Then delete the buffer from the + * linked list of mapped buffers; though note: not all mapped buffers + * are part of a vm_area. + */ + nvgpu_remove_mapped_buf(vm, mapped_buffer); + nvgpu_list_del(&mapped_buffer->buffer_list); + + /* + * OS specific freeing. This is after the generic freeing incase the + * generic freeing relies on some component of the OS specific + * nvgpu_mapped_buf in some abstraction or the like. + */ + nvgpu_vm_unmap_system(mapped_buffer); + + nvgpu_kfree(g, mapped_buffer); +} + +static struct nvgpu_mapped_buf *nvgpu_mapped_buf_from_ref(struct nvgpu_ref *ref) +{ + return (struct nvgpu_mapped_buf *) + ((uintptr_t)ref - offsetof(struct nvgpu_mapped_buf, ref)); +} + +/* + * Note: the update_gmmu_lock of the VM that owns this buffer must be locked + * before calling nvgpu_ref_put() with this function as the unref function + * argument since this can modify the tree of maps. + */ +void nvgpu_vm_unmap_ref_internal(struct nvgpu_ref *ref) +{ + struct nvgpu_mapped_buf *mapped_buffer = nvgpu_mapped_buf_from_ref(ref); + + nvgpu_vm_do_unmap(mapped_buffer, mapped_buffer->vm->kref_put_batch); +} + +/* + * For fixed-offset buffers we must sync the buffer. That means we wait for the + * buffer to hit a ref-count of 1 before proceeding. + * + * Note: this requires the update_gmmu_lock to be held since we release it and + * re-aquire it in this function. + */ +static int nvgpu_vm_unmap_sync_buffer(struct vm_gk20a *vm, + struct nvgpu_mapped_buf *mapped_buffer) +{ + struct nvgpu_timeout timeout; + int ret = 0; + bool done = false; + + /* + * 100ms timer. + */ + ret = nvgpu_timeout_init(vm->mm->g, &timeout, 100, + NVGPU_TIMER_CPU_TIMER); + if (ret != 0) { + nvgpu_err(vm->mm->g, "timeout_init failed (%d)", ret); + return ret; + } + + nvgpu_mutex_release(&vm->update_gmmu_lock); + + do { + if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) <= 1) { + done = true; + } else if (nvgpu_timeout_expired_msg(&timeout, + "sync-unmap failed on 0x%llx", + mapped_buffer->addr) != 0) { + done = true; + } else { + nvgpu_msleep(10); + } + } while (!done); + + if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) > 1) { + ret = -ETIMEDOUT; + } + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + + return ret; +} + +void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset, + struct vm_gk20a_mapping_batch *batch) +{ + struct nvgpu_mapped_buf *mapped_buffer; + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + + mapped_buffer = nvgpu_vm_find_mapped_buf(vm, offset); + if (mapped_buffer == NULL) { + goto done; + } + + if ((mapped_buffer->flags & NVGPU_VM_MAP_FIXED_OFFSET) != 0U) { + if (nvgpu_vm_unmap_sync_buffer(vm, mapped_buffer) != 0) { + nvgpu_warn(vm->mm->g, "%d references remaining on 0x%llx", + nvgpu_atomic_read(&mapped_buffer->ref.refcount), + mapped_buffer->addr); + } + } + + /* + * Make sure we have access to the batch if we end up calling through to + * the unmap_ref function. + */ + vm->kref_put_batch = batch; + nvgpu_ref_put(&mapped_buffer->ref, nvgpu_vm_unmap_ref_internal); + vm->kref_put_batch = NULL; + +done: + nvgpu_mutex_release(&vm->update_gmmu_lock); + return; +} + +#ifdef CONFIG_NVGPU_COMPRESSION +static int nvgpu_vm_compute_compression(struct vm_gk20a *vm, + struct nvgpu_ctag_buffer_info *binfo) +{ + bool kind_compressible = (binfo->compr_kind != NVGPU_KIND_INVALID); + struct gk20a *g = gk20a_from_vm(vm); + + if (kind_compressible && + vm->gmmu_page_sizes[binfo->pgsz_idx] < + g->ops.fb.compressible_page_size(g)) { + /* + * Let's double check that there is a fallback kind + */ + if (binfo->incompr_kind == NVGPU_KIND_INVALID) { + nvgpu_err(g, + "Unsupported page size for compressible " + "kind, but no fallback kind"); + return -EINVAL; + } else { + nvgpu_log(g, gpu_dbg_map, + "Unsupported page size for compressible " + "kind, demoting to incompressible"); + binfo->compr_kind = NVGPU_KIND_INVALID; + } + } + + return 0; +} +#endif diff --git a/drivers/gpu/nvgpu/common/mm/vm_area.c b/drivers/gpu/nvgpu/common/mm/vm_area.c new file mode 100644 index 000000000..2fb7593e0 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/vm_area.c @@ -0,0 +1,311 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +struct nvgpu_vm_area *nvgpu_vm_area_find(struct vm_gk20a *vm, u64 addr) +{ + struct nvgpu_vm_area *vm_area; + + nvgpu_list_for_each_entry(vm_area, &vm->vm_area_list, + nvgpu_vm_area, vm_area_list) { + if (addr >= vm_area->addr) { + if (addr < nvgpu_safe_add_u64(vm_area->addr, + vm_area->size)) { + return vm_area; + } + } + } + + return NULL; +} + +int nvgpu_vm_area_validate_buffer(struct vm_gk20a *vm, + u64 map_addr, u64 map_size, u32 pgsz_idx, + struct nvgpu_vm_area **pvm_area) +{ + struct gk20a *g = vm->mm->g; + struct nvgpu_vm_area *vm_area; + struct nvgpu_mapped_buf *buffer; + u64 map_end; + + /* can wrap around with insane map_size; zero is disallowed too */ + if (((U64_MAX - map_size) < map_addr) || (map_size == 0ULL)) { + nvgpu_warn(g, "fixed offset mapping with invalid map_size"); + return -EINVAL; + } + map_end = map_addr + map_size; + + if ((map_addr & + nvgpu_safe_sub_u64(U64(vm->gmmu_page_sizes[pgsz_idx]), U64(1))) + != 0ULL) { + nvgpu_err(g, "map offset must be buffer page size aligned 0x%llx", + map_addr); + return -EINVAL; + } + + /* Find the space reservation, but it's ok to have none for + * userspace-managed address spaces */ + vm_area = nvgpu_vm_area_find(vm, map_addr); + if ((vm_area == NULL) && !vm->userspace_managed) { + nvgpu_warn(g, "fixed offset mapping without space allocation"); + return -EINVAL; + } + + /* Mapped area should fit inside va, if there's one */ + if (vm_area != NULL) { + if (map_end > nvgpu_safe_add_u64(vm_area->addr, + vm_area->size)) { + nvgpu_warn(g, + "fixed offset mapping size overflows va node"); + return -EINVAL; + } + } + + /* check that this mapping does not collide with existing + * mappings by checking the buffer with the highest GPU VA + * that is less than our buffer end */ + buffer = nvgpu_vm_find_mapped_buf_less_than( + vm, map_end); + if (buffer != NULL) { + if (nvgpu_safe_add_u64(buffer->addr, buffer->size) > map_addr) { + nvgpu_warn(g, "overlapping buffer map requested"); + return -EINVAL; + } + } + + *pvm_area = vm_area; + + return 0; +} + +static int nvgpu_vm_area_alloc_get_pagesize_index(struct vm_gk20a *vm, + u32 *pgsz_idx_ptr, u32 page_size) +{ + u32 pgsz_idx = *pgsz_idx_ptr; + + for (; pgsz_idx < GMMU_NR_PAGE_SIZES; pgsz_idx++) { + if (vm->gmmu_page_sizes[pgsz_idx] == page_size) { + break; + } + } + + *pgsz_idx_ptr = pgsz_idx; + + if (pgsz_idx > GMMU_PAGE_SIZE_BIG) { + return -EINVAL; + } + + /* + * pgsz_idx isn't likely to get too crazy, since it starts at 0 and + * increments but this ensures that we still have a definitely valid + * page size before proceeding. + */ + nvgpu_speculation_barrier(); + + if (!vm->big_pages && (pgsz_idx == GMMU_PAGE_SIZE_BIG)) { + return -EINVAL; + } + + return 0; +} + +static int nvgpu_vm_area_alloc_memory(struct nvgpu_allocator *vma, u64 our_addr, + u64 pages, u32 page_size, u32 flags, + u64 *vaddr_start_ptr) +{ + u64 vaddr_start = 0; + + if ((flags & NVGPU_VM_AREA_ALLOC_FIXED_OFFSET) != 0U) { + vaddr_start = nvgpu_alloc_fixed(vma, our_addr, + pages * + (u64)page_size, + page_size); + } else { + vaddr_start = nvgpu_alloc_pte(vma, + pages * + (u64)page_size, + page_size); + } + + if (vaddr_start == 0ULL) { + return -ENOMEM; + } + + *vaddr_start_ptr = vaddr_start; + return 0; +} + +static int nvgpu_vm_area_alloc_gmmu_map(struct vm_gk20a *vm, + struct nvgpu_vm_area *vm_area, u64 vaddr_start, + u32 pgsz_idx, u32 flags) +{ + struct gk20a *g = vm->mm->g; + + if ((flags & NVGPU_VM_AREA_ALLOC_SPARSE) != 0U) { + u64 map_addr = g->ops.mm.gmmu.map(vm, vaddr_start, + NULL, + 0, + vm_area->size, + pgsz_idx, + 0, + 0, + flags, + gk20a_mem_flag_none, + false, + true, + false, + NULL, + APERTURE_INVALID); + if (map_addr == 0ULL) { + return -ENOMEM; + } + + vm_area->sparse = true; + } + nvgpu_list_add_tail(&vm_area->vm_area_list, &vm->vm_area_list); + + return 0; +} + +int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u64 pages, u32 page_size, + u64 *addr, u32 flags) +{ + struct gk20a *g = vm->mm->g; + struct nvgpu_allocator *vma; + struct nvgpu_vm_area *vm_area; + u64 vaddr_start = 0; + u64 our_addr = *addr; + u32 pgsz_idx = GMMU_PAGE_SIZE_SMALL; + + /* + * If we have a fixed address then use the passed address in *addr. This + * corresponds to the o_a field in the IOCTL. But since we do not + * support specific alignments in the buddy allocator we ignore the + * field if it isn't a fixed offset. + */ + if ((flags & NVGPU_VM_AREA_ALLOC_FIXED_OFFSET) != 0U) { + our_addr = *addr; + } + + nvgpu_log(g, gpu_dbg_map, + "ADD vm_area: pgsz=%#-8x pages=%-9llu a/o=%#-14llx flags=0x%x", + page_size, pages, our_addr, flags); + + if (nvgpu_vm_area_alloc_get_pagesize_index(vm, &pgsz_idx, + page_size) != 0) { + return -EINVAL; + } + + vm_area = nvgpu_kzalloc(g, sizeof(*vm_area)); + if (vm_area == NULL) { + return -ENOMEM; + } + + vma = vm->vma[pgsz_idx]; + if (nvgpu_vm_area_alloc_memory(vma, our_addr, pages, + page_size, flags, &vaddr_start) != 0) { + goto free_vm_area; + } + + vm_area->flags = flags; + vm_area->addr = vaddr_start; + vm_area->size = (u64)page_size * pages; + vm_area->pgsz_idx = pgsz_idx; + nvgpu_init_list_node(&vm_area->buffer_list_head); + nvgpu_init_list_node(&vm_area->vm_area_list); + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + + if (nvgpu_vm_area_alloc_gmmu_map(vm, vm_area, vaddr_start, + pgsz_idx, flags) != 0) { + nvgpu_mutex_release(&vm->update_gmmu_lock); + goto free_vaddr; + } + + nvgpu_mutex_release(&vm->update_gmmu_lock); + + *addr = vaddr_start; + return 0; + +free_vaddr: + nvgpu_free(vma, vaddr_start); +free_vm_area: + nvgpu_kfree(g, vm_area); + return -ENOMEM; +} + +int nvgpu_vm_area_free(struct vm_gk20a *vm, u64 addr) +{ + struct gk20a *g = gk20a_from_vm(vm); + struct nvgpu_mapped_buf *buffer; + struct nvgpu_vm_area *vm_area; + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + vm_area = nvgpu_vm_area_find(vm, addr); + if (vm_area == NULL) { + nvgpu_mutex_release(&vm->update_gmmu_lock); + return 0; + } + nvgpu_list_del(&vm_area->vm_area_list); + + nvgpu_log(g, gpu_dbg_map, + "DEL vm_area: pgsz=%#-8x pages=%-9llu " + "addr=%#-14llx flags=0x%x", + vm->gmmu_page_sizes[vm_area->pgsz_idx], + vm_area->size / vm->gmmu_page_sizes[vm_area->pgsz_idx], + vm_area->addr, + vm_area->flags); + + /* Decrement the ref count on all buffers in this vm_area. This + * allows userspace to let the kernel free mappings that are + * only used by this vm_area. */ + while (!nvgpu_list_empty(&vm_area->buffer_list_head)) { + buffer = nvgpu_list_first_entry(&vm_area->buffer_list_head, + nvgpu_mapped_buf, buffer_list); + nvgpu_list_del(&buffer->buffer_list); + nvgpu_ref_put(&buffer->ref, nvgpu_vm_unmap_ref_internal); + } + + /* if this was a sparse mapping, free the va */ + if (vm_area->sparse) { + g->ops.mm.gmmu.unmap(vm, + vm_area->addr, + vm_area->size, + vm_area->pgsz_idx, + false, + gk20a_mem_flag_none, + true, + NULL); + } + + nvgpu_mutex_release(&vm->update_gmmu_lock); + + nvgpu_free(vm->vma[vm_area->pgsz_idx], vm_area->addr); + nvgpu_kfree(g, vm_area); + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/netlist/netlist.c b/drivers/gpu/nvgpu/common/netlist/netlist.c new file mode 100644 index 000000000..ab62f9fc0 --- /dev/null +++ b/drivers/gpu/nvgpu/common/netlist/netlist.c @@ -0,0 +1,1077 @@ +/* + * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) +#include "nvgpu/nvgpu_next_netlist.h" +#endif + +#include "netlist_priv.h" +#include "netlist_defs.h" + +/* + * Need to support multiple ARCH in same GPU family + * then need to provide path like ARCH/NETIMAGE to + * point to correct netimage within GPU family, + * Example, gm20x can support gm204 or gm206,so path + * for netimage is gm204/NETC_img.bin, and '/' char + * will inserted at null terminator char of "GAxxx" + * to get complete path like gm204/NETC_img.bin + */ + +#define MAX_NETLIST_NAME (sizeof("GAxxx/") + sizeof("NET?_img.bin")) + +struct netlist_av *nvgpu_netlist_alloc_av_list(struct gk20a *g, + struct netlist_av_list *avl) +{ + avl->l = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(avl->count, + sizeof(*avl->l))); + return avl->l; +} + +struct netlist_av64 *nvgpu_netlist_alloc_av64_list(struct gk20a *g, + struct netlist_av64_list *av64l) +{ + av64l->l = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(av64l->count, + sizeof(*av64l->l))); + return av64l->l; +} + +struct netlist_aiv *nvgpu_netlist_alloc_aiv_list(struct gk20a *g, + struct netlist_aiv_list *aivl) +{ + aivl->l = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(aivl->count, + sizeof(*aivl->l))); + return aivl->l; +} + +u32 *nvgpu_netlist_alloc_u32_list(struct gk20a *g, + struct netlist_u32_list *u32l) +{ + u32l->l = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(u32l->count, + sizeof(*u32l->l))); + return u32l->l; +} + +static int nvgpu_netlist_alloc_load_u32_list(struct gk20a *g, u8 *src, u32 len, + struct netlist_u32_list *u32_list) +{ + u32_list->count = nvgpu_safe_add_u32(len, + nvgpu_safe_cast_u64_to_u32(sizeof(u32) - 1UL)) + / U32(sizeof(u32)); + if (nvgpu_netlist_alloc_u32_list(g, u32_list) == NULL) { + return -ENOMEM; + } + + nvgpu_memcpy((u8 *)u32_list->l, src, len); + + return 0; +} + +static int nvgpu_netlist_alloc_load_av_list(struct gk20a *g, u8 *src, u32 len, + struct netlist_av_list *av_list) +{ + av_list->count = len / U32(sizeof(struct netlist_av)); + if (nvgpu_netlist_alloc_av_list(g, av_list) == NULL) { + return -ENOMEM; + } + + nvgpu_memcpy((u8 *)av_list->l, src, len); + + return 0; +} + +static int nvgpu_netlist_alloc_load_av_list64(struct gk20a *g, u8 *src, u32 len, + struct netlist_av64_list *av64_list) +{ + av64_list->count = len / U32(sizeof(struct netlist_av64)); + if (nvgpu_netlist_alloc_av64_list(g, av64_list) == NULL) { + return -ENOMEM; + } + + nvgpu_memcpy((u8 *)av64_list->l, src, len); + + return 0; +} + +static int nvgpu_netlist_alloc_load_aiv_list(struct gk20a *g, u8 *src, u32 len, + struct netlist_aiv_list *aiv_list) +{ + aiv_list->count = len / U32(sizeof(struct netlist_aiv)); + if (nvgpu_netlist_alloc_aiv_list(g, aiv_list) == NULL) { + return -ENOMEM; + } + + nvgpu_memcpy((u8 *)aiv_list->l, src, len); + + return 0; +} + +static bool nvgpu_netlist_handle_ucode_region_id(struct gk20a *g, + u32 region_id, u8 *src, u32 size, + struct nvgpu_netlist_vars *netlist_vars, int *err_code) +{ + int err = 0; + bool handled = true; + + switch (region_id) { + case NETLIST_REGIONID_FECS_UCODE_DATA: + nvgpu_log_info(g, "NETLIST_REGIONID_FECS_UCODE_DATA"); + err = nvgpu_netlist_alloc_load_u32_list(g, + src, size, &netlist_vars->ucode.fecs.data); + break; + case NETLIST_REGIONID_FECS_UCODE_INST: + nvgpu_log_info(g, "NETLIST_REGIONID_FECS_UCODE_INST"); + err = nvgpu_netlist_alloc_load_u32_list(g, + src, size, &netlist_vars->ucode.fecs.inst); + break; + case NETLIST_REGIONID_GPCCS_UCODE_DATA: + nvgpu_log_info(g, "NETLIST_REGIONID_GPCCS_UCODE_DATA"); + err = nvgpu_netlist_alloc_load_u32_list(g, + src, size, &netlist_vars->ucode.gpccs.data); + break; + case NETLIST_REGIONID_GPCCS_UCODE_INST: + nvgpu_log_info(g, "NETLIST_REGIONID_GPCCS_UCODE_INST"); + err = nvgpu_netlist_alloc_load_u32_list(g, + src, size, &netlist_vars->ucode.gpccs.inst); + break; + default: + handled = false; + break; + } + + *err_code = err; + + return handled; +} + +static bool nvgpu_netlist_handle_sw_bundles_region_id(struct gk20a *g, + u32 region_id, u8 *src, u32 size, + struct nvgpu_netlist_vars *netlist_vars, int *err_code) +{ + int err = 0; + bool handled = true; + + switch (region_id) { + case NETLIST_REGIONID_SW_BUNDLE_INIT: + nvgpu_log_info(g, "NETLIST_REGIONID_SW_BUNDLE_INIT"); + err = nvgpu_netlist_alloc_load_av_list(g, + src, size, &netlist_vars->sw_bundle_init); + break; + case NETLIST_REGIONID_SW_METHOD_INIT: + nvgpu_log_info(g, "NETLIST_REGIONID_SW_METHOD_INIT"); + err = nvgpu_netlist_alloc_load_av_list(g, + src, size, &netlist_vars->sw_method_init); + break; + case NETLIST_REGIONID_SW_CTX_LOAD: + nvgpu_log_info(g, "NETLIST_REGIONID_SW_CTX_LOAD"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->sw_ctx_load); + break; + case NETLIST_REGIONID_SW_NON_CTX_LOAD: + nvgpu_log_info(g, "NETLIST_REGIONID_SW_NON_CTX_LOAD"); + err = nvgpu_netlist_alloc_load_av_list(g, + src, size, &netlist_vars->sw_non_ctx_load); + break; + case NETLIST_REGIONID_SWVEIDBUNDLEINIT: + nvgpu_log_info(g, "NETLIST_REGIONID_SW_VEID_BUNDLE_INIT"); + err = nvgpu_netlist_alloc_load_av_list(g, + src, size, &netlist_vars->sw_veid_bundle_init); + break; + case NETLIST_REGIONID_SW_BUNDLE64_INIT: + nvgpu_log_info(g, "NETLIST_REGIONID_SW_BUNDLE64_INIT"); + err = nvgpu_netlist_alloc_load_av_list64(g, + src, size, &netlist_vars->sw_bundle64_init); + break; + default: + handled = false; +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + handled = nvgpu_next_netlist_handle_sw_bundles_region_id(g, + region_id, src, size, netlist_vars, &err); +#endif + break; + } + + *err_code = err; + + return handled; +} + +static bool nvgpu_netlist_handle_generic_region_id(struct gk20a *g, + u32 region_id, u8 *src, u32 size, + u32 *major_v, u32 *netlist_num, + struct nvgpu_netlist_vars *netlist_vars) +{ + bool handled = true; + + switch (region_id) { + case NETLIST_REGIONID_BUFFER_SIZE: + nvgpu_memcpy((u8 *)&netlist_vars->buffer_size, + src, sizeof(u32)); + nvgpu_log_info(g, "NETLIST_REGIONID_BUFFER_SIZE : %d", + netlist_vars->buffer_size); + break; + case NETLIST_REGIONID_CTXSW_REG_BASE_INDEX: + nvgpu_memcpy((u8 *)&netlist_vars->regs_base_index, + src, sizeof(u32)); + nvgpu_log_info(g, "NETLIST_REGIONID_CTXSW_REG_BASE_INDEX : %u", + netlist_vars->regs_base_index); + break; + case NETLIST_REGIONID_MAJORV: + nvgpu_memcpy((u8 *)major_v, src, sizeof(u32)); + nvgpu_log_info(g, "NETLIST_REGIONID_MAJORV : %d", *major_v); + break; + case NETLIST_REGIONID_NETLIST_NUM: + nvgpu_memcpy((u8 *)netlist_num, src, sizeof(u32)); + nvgpu_log_info(g, "NETLIST_REGIONID_NETLIST_NUM : %d", + *netlist_num); + break; + default: + handled = false; + break; + } + + return handled; +} + +#ifdef CONFIG_NVGPU_DEBUGGER +static bool nvgpu_netlist_handle_debugger_region_id(struct gk20a *g, + u32 region_id, u8 *src, u32 size, + struct nvgpu_netlist_vars *netlist_vars, int *err_code) +{ + int err = 0; + bool handled = true; + + switch (region_id) { + case NETLIST_REGIONID_CTXREG_PM_SYS: + nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PM_SYS"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.pm_sys); + break; + case NETLIST_REGIONID_CTXREG_PM_GPC: + nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PM_GPC"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.pm_gpc); + break; + case NETLIST_REGIONID_CTXREG_PM_TPC: + nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PM_TPC"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.pm_tpc); + break; + case NETLIST_REGIONID_CTXREG_PMPPC: + nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PMPPC"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.pm_ppc); + break; + case NETLIST_REGIONID_NVPERF_CTXREG_SYS: + nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_CTXREG_SYS"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.perf_sys); + break; + case NETLIST_REGIONID_NVPERF_FBP_CTXREGS: + nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_FBP_CTXREGS"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.fbp); + break; + case NETLIST_REGIONID_NVPERF_CTXREG_GPC: + nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_CTXREG_GPC"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.perf_gpc); + break; + case NETLIST_REGIONID_NVPERF_FBP_ROUTER: + nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_FBP_ROUTER"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.fbp_router); + break; + case NETLIST_REGIONID_NVPERF_GPC_ROUTER: + nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_GPC_ROUTER"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.gpc_router); + break; + case NETLIST_REGIONID_CTXREG_PMLTC: + nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PMLTC"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.pm_ltc); + break; + case NETLIST_REGIONID_CTXREG_PMFBPA: + nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PMFBPA"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.pm_fbpa); + break; + case NETLIST_REGIONID_NVPERF_SYS_ROUTER: + nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_SYS_ROUTER"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.perf_sys_router); + break; + case NETLIST_REGIONID_NVPERF_PMA: + nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_PMA"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.perf_pma); + break; + case NETLIST_REGIONID_CTXREG_PMUCGPC: + nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PMUCGPC"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.pm_ucgpc); + break; + case NETLIST_REGIONID_NVPERF_PMCAU: + nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_PMCAU"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.pm_cau); + break; + case NETLIST_REGIONID_NVPERF_SYS_CONTROL: + nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_SYS_CONTROL"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.perf_sys_control); + break; + case NETLIST_REGIONID_NVPERF_FBP_CONTROL: + nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_FBP_CONTROL"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.perf_fbp_control); + break; + case NETLIST_REGIONID_NVPERF_GPC_CONTROL: + nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_GPC_CONTROL"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.perf_gpc_control); + break; + case NETLIST_REGIONID_NVPERF_PMA_CONTROL: + nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_PMA_CONTROL"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.perf_pma_control); + break; + default: + handled = false; +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + handled = nvgpu_next_netlist_handle_debugger_region_id(g, + region_id, src, size, netlist_vars, &err); +#endif + break; + } + + if ((handled == false) && (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG))) { + handled = true; + switch (region_id) { + case NETLIST_REGIONID_CTXREG_SYS: + nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_SYS"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.sys); + break; + case NETLIST_REGIONID_CTXREG_GPC: + nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_GPC"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.gpc); + break; + case NETLIST_REGIONID_CTXREG_TPC: + nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_TPC"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.tpc); + break; +#ifdef CONFIG_NVGPU_GRAPHICS + case NETLIST_REGIONID_CTXREG_ZCULL_GPC: + nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_ZCULL_GPC"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.zcull_gpc); + break; +#endif + case NETLIST_REGIONID_CTXREG_PPC: + nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PPC"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.ppc); + break; + case NETLIST_REGIONID_CTXREG_PMROP: + nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_PMROP"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.pm_rop); + break; + case NETLIST_REGIONID_CTXREG_ETPC: + nvgpu_log_info(g, "NETLIST_REGIONID_CTXREG_ETPC"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.etpc); + break; + default: + handled = false; + break; + } + } + *err_code = err; + + return handled; +} +#endif /* CONFIG_NVGPU_DEBUGGER */ + +static int nvgpu_netlist_handle_region_id(struct gk20a *g, + u32 region_id, u8 *src, u32 size, + u32 *major_v, u32 *netlist_num, + struct nvgpu_netlist_vars *netlist_vars) +{ + bool handled; + int err = 0; + + handled = nvgpu_netlist_handle_ucode_region_id(g, region_id, + src, size, netlist_vars, &err); + if ((err != 0) || handled) { + goto clean_up; + } + handled = nvgpu_netlist_handle_sw_bundles_region_id(g, region_id, + src, size, netlist_vars, &err); + if ((err != 0) || handled) { + goto clean_up; + } + handled = nvgpu_netlist_handle_generic_region_id(g, region_id, + src, size, major_v, netlist_num, + netlist_vars); + if (handled) { + goto clean_up; + } +#ifdef CONFIG_NVGPU_DEBUGGER + handled = nvgpu_netlist_handle_debugger_region_id(g, region_id, + src, size, netlist_vars, &err); + if ((err != 0) || handled) { + goto clean_up; + } +#endif /* CONFIG_NVGPU_DEBUGGER */ + + /* region id command not handled */ + nvgpu_log_info(g, "unrecognized region %d skipped", region_id); + +clean_up: + return err; +} + +static bool nvgpu_netlist_is_valid(int net, u32 major_v, u32 major_v_hw) +{ + if ((net != NETLIST_FINAL) && (major_v != major_v_hw)) { + return false; + } + return true; +} + +static int nvgpu_netlist_init_ctx_vars_fw(struct gk20a *g) +{ + struct nvgpu_netlist_vars *netlist_vars = g->netlist_vars; + struct nvgpu_firmware *netlist_fw; + struct netlist_image *netlist = NULL; + char name[MAX_NETLIST_NAME]; + u32 i, major_v = ~U32(0U), major_v_hw, netlist_num; + int net, max_netlist_num, err = -ENOENT; + + nvgpu_log_fn(g, " "); + + if (g->ops.netlist.is_fw_defined()) { + net = NETLIST_FINAL; + max_netlist_num = 0; + major_v_hw = ~U32(0U); + netlist_vars->dynamic = false; + } else { + net = NETLIST_SLOT_A; + max_netlist_num = MAX_NETLIST; + major_v_hw = + g->ops.gr.falcon.get_fecs_ctx_state_store_major_rev_id(g); + netlist_vars->dynamic = true; + } + + for (; net < max_netlist_num; net++) { + if (g->ops.netlist.get_netlist_name(g, net, name) != 0) { + nvgpu_warn(g, "invalid netlist index %d", net); + continue; + } + + netlist_fw = nvgpu_request_firmware(g, name, 0); + if (netlist_fw == NULL) { + nvgpu_warn(g, "failed to load netlist %s", name); + continue; + } + + netlist = (struct netlist_image *)(uintptr_t)netlist_fw->data; + + for (i = 0; i < netlist->header.regions; i++) { + u8 *src = ((u8 *)netlist + netlist->regions[i].data_offset); + u32 size = netlist->regions[i].data_size; + + err = nvgpu_netlist_handle_region_id(g, + netlist->regions[i].region_id, + src, size, &major_v, &netlist_num, + netlist_vars); + if (err != 0) { + goto clean_up; + } + } + + if (!nvgpu_netlist_is_valid(net, major_v, major_v_hw)) { + nvgpu_log_info(g, "skip %s: major_v 0x%08x doesn't match hw 0x%08x", + name, major_v, major_v_hw); + goto clean_up; + } + + g->netlist_valid = true; + + nvgpu_release_firmware(g, netlist_fw); + nvgpu_log_fn(g, "done"); + goto done; + +clean_up: + g->netlist_valid = false; + nvgpu_kfree(g, netlist_vars->ucode.fecs.inst.l); + nvgpu_kfree(g, netlist_vars->ucode.fecs.data.l); + nvgpu_kfree(g, netlist_vars->ucode.gpccs.inst.l); + nvgpu_kfree(g, netlist_vars->ucode.gpccs.data.l); + nvgpu_kfree(g, netlist_vars->sw_bundle_init.l); + nvgpu_kfree(g, netlist_vars->sw_bundle64_init.l); + nvgpu_kfree(g, netlist_vars->sw_veid_bundle_init.l); + nvgpu_kfree(g, netlist_vars->sw_method_init.l); + nvgpu_kfree(g, netlist_vars->sw_ctx_load.l); + nvgpu_kfree(g, netlist_vars->sw_non_ctx_load.l); +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + nvgpu_next_netlist_deinit_ctx_vars(g); +#endif +#ifdef CONFIG_NVGPU_DEBUGGER + nvgpu_kfree(g, netlist_vars->ctxsw_regs.sys.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.gpc.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.tpc.l); +#ifdef CONFIG_NVGPU_GRAPHICS + nvgpu_kfree(g, netlist_vars->ctxsw_regs.zcull_gpc.l); +#endif + nvgpu_kfree(g, netlist_vars->ctxsw_regs.ppc.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_sys.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_gpc.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_tpc.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_ppc.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_sys.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.fbp.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_gpc.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.fbp_router.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.gpc_router.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_ltc.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_fbpa.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_sys_router.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_pma.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_rop.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_ucgpc.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.etpc.l); +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + nvgpu_next_netlist_deinit_ctxsw_regs(g); +#endif + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_cau.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_sys_control.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_fbp_control.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_gpc_control.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_pma_control.l); +#endif /* CONFIG_NVGPU_DEBUGGER */ + nvgpu_release_firmware(g, netlist_fw); + err = -ENOENT; + } + +done: + if (g->netlist_valid) { + nvgpu_log_info(g, "netlist image %s loaded", name); + return 0; + } else { + nvgpu_err(g, "failed to load netlist image!!"); + return err; + } +} + +int nvgpu_netlist_init_ctx_vars(struct gk20a *g) +{ + int err; + + if (g->netlist_valid == true) { + return 0; + } + + g->netlist_vars = nvgpu_kzalloc(g, sizeof(*g->netlist_vars)); + if (g->netlist_vars == NULL) { + return -ENOMEM; + } + +#ifdef CONFIG_NVGPU_SIM + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { + err = nvgpu_init_sim_netlist_ctx_vars(g); + if (err != 0) { + nvgpu_err(g, "nvgpu_init_sim_netlist_ctx_vars failed!"); + } + } else +#endif + { + err = nvgpu_netlist_init_ctx_vars_fw(g); + if (err != 0) { + nvgpu_err(g, "nvgpu_netlist_init_ctx_vars_fw failed!"); + } + } +#ifdef CONFIG_NVGPU_DEBUGGER + nvgpu_netlist_print_ctxsw_reg_info(g); +#endif + + return err; +} + +void nvgpu_netlist_deinit_ctx_vars(struct gk20a *g) +{ + struct nvgpu_netlist_vars *netlist_vars = g->netlist_vars; + + if (netlist_vars == NULL) { + return; + } + + g->netlist_valid = false; + nvgpu_kfree(g, netlist_vars->ucode.fecs.inst.l); + nvgpu_kfree(g, netlist_vars->ucode.fecs.data.l); + nvgpu_kfree(g, netlist_vars->ucode.gpccs.inst.l); + nvgpu_kfree(g, netlist_vars->ucode.gpccs.data.l); + nvgpu_kfree(g, netlist_vars->sw_bundle_init.l); + nvgpu_kfree(g, netlist_vars->sw_bundle64_init.l); + nvgpu_kfree(g, netlist_vars->sw_veid_bundle_init.l); + nvgpu_kfree(g, netlist_vars->sw_method_init.l); + nvgpu_kfree(g, netlist_vars->sw_ctx_load.l); + nvgpu_kfree(g, netlist_vars->sw_non_ctx_load.l); +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + nvgpu_next_netlist_deinit_ctx_vars(g); +#endif +#ifdef CONFIG_NVGPU_DEBUGGER + nvgpu_kfree(g, netlist_vars->ctxsw_regs.sys.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.gpc.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.tpc.l); +#ifdef CONFIG_NVGPU_GRAPHICS + nvgpu_kfree(g, netlist_vars->ctxsw_regs.zcull_gpc.l); +#endif + nvgpu_kfree(g, netlist_vars->ctxsw_regs.ppc.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_sys.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_gpc.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_tpc.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_ppc.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_sys.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.fbp.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_gpc.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.fbp_router.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.gpc_router.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_ltc.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_fbpa.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_sys_router.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_pma.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_rop.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_ucgpc.l); +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + nvgpu_next_netlist_deinit_ctxsw_regs(g); +#endif + nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_cau.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_sys_control.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_fbp_control.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_gpc_control.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_pma_control.l); +#endif /* CONFIG_NVGPU_DEBUGGER */ + + nvgpu_kfree(g, netlist_vars); + g->netlist_vars = NULL; +} + +struct netlist_av_list *nvgpu_netlist_get_sw_non_ctx_load_av_list( + struct gk20a *g) +{ + return &g->netlist_vars->sw_non_ctx_load; +} + +struct netlist_aiv_list *nvgpu_netlist_get_sw_ctx_load_aiv_list( + struct gk20a *g) +{ + return &g->netlist_vars->sw_ctx_load; +} + +struct netlist_av_list *nvgpu_netlist_get_sw_method_init_av_list( + struct gk20a *g) +{ + return &g->netlist_vars->sw_method_init; +} + +struct netlist_av_list *nvgpu_netlist_get_sw_bundle_init_av_list( + struct gk20a *g) +{ + return &g->netlist_vars->sw_bundle_init; +} + +struct netlist_av_list *nvgpu_netlist_get_sw_veid_bundle_init_av_list( + struct gk20a *g) +{ + return &g->netlist_vars->sw_veid_bundle_init; +} + +struct netlist_av64_list *nvgpu_netlist_get_sw_bundle64_init_av64_list( + struct gk20a *g) +{ + return &g->netlist_vars->sw_bundle64_init; +} + +u32 nvgpu_netlist_get_fecs_inst_count(struct gk20a *g) +{ + return g->netlist_vars->ucode.fecs.inst.count; +} + +u32 nvgpu_netlist_get_fecs_data_count(struct gk20a *g) +{ + return g->netlist_vars->ucode.fecs.data.count; +} + +u32 nvgpu_netlist_get_gpccs_inst_count(struct gk20a *g) +{ + return g->netlist_vars->ucode.gpccs.inst.count; +} + +u32 nvgpu_netlist_get_gpccs_data_count(struct gk20a *g) +{ + return g->netlist_vars->ucode.gpccs.data.count; +} + +u32 *nvgpu_netlist_get_fecs_inst_list(struct gk20a *g) +{ + return g->netlist_vars->ucode.fecs.inst.l; +} + +u32 *nvgpu_netlist_get_fecs_data_list(struct gk20a *g) +{ + return g->netlist_vars->ucode.fecs.data.l; +} + +u32 *nvgpu_netlist_get_gpccs_inst_list(struct gk20a *g) +{ + return g->netlist_vars->ucode.gpccs.inst.l; +} + +u32 *nvgpu_netlist_get_gpccs_data_list(struct gk20a *g) +{ + return g->netlist_vars->ucode.gpccs.data.l; +} + +#ifdef CONFIG_NVGPU_DEBUGGER +struct netlist_aiv_list *nvgpu_netlist_get_sys_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.sys; +} + +struct netlist_aiv_list *nvgpu_netlist_get_gpc_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.gpc; +} + +struct netlist_aiv_list *nvgpu_netlist_get_tpc_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.tpc; +} + +#ifdef CONFIG_NVGPU_GRAPHICS +struct netlist_aiv_list *nvgpu_netlist_get_zcull_gpc_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.zcull_gpc; +} +#endif + +struct netlist_aiv_list *nvgpu_netlist_get_ppc_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.ppc; +} + +struct netlist_aiv_list *nvgpu_netlist_get_pm_sys_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.pm_sys; +} + +struct netlist_aiv_list *nvgpu_netlist_get_pm_gpc_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.pm_gpc; +} + +struct netlist_aiv_list *nvgpu_netlist_get_pm_tpc_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.pm_tpc; +} + +struct netlist_aiv_list *nvgpu_netlist_get_pm_ppc_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.pm_ppc; +} + +struct netlist_aiv_list *nvgpu_netlist_get_perf_sys_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.perf_sys; +} + +struct netlist_aiv_list *nvgpu_netlist_get_perf_gpc_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.perf_gpc; +} + +struct netlist_aiv_list *nvgpu_netlist_get_fbp_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.fbp; +} + +struct netlist_aiv_list *nvgpu_netlist_get_fbp_router_ctxsw_regs( + struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.fbp_router; +} + +struct netlist_aiv_list *nvgpu_netlist_get_gpc_router_ctxsw_regs( + struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.gpc_router; +} + +struct netlist_aiv_list *nvgpu_netlist_get_pm_ltc_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.pm_ltc; +} + +struct netlist_aiv_list *nvgpu_netlist_get_pm_fbpa_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.pm_fbpa; +} + +struct netlist_aiv_list *nvgpu_netlist_get_perf_sys_router_ctxsw_regs( + struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.perf_sys_router; +} + +struct netlist_aiv_list *nvgpu_netlist_get_perf_pma_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.perf_pma; +} + +struct netlist_aiv_list *nvgpu_netlist_get_pm_rop_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.pm_rop; +} + +struct netlist_aiv_list *nvgpu_netlist_get_pm_ucgpc_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.pm_ucgpc; +} + +struct netlist_aiv_list *nvgpu_netlist_get_etpc_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.etpc; +} + +struct netlist_aiv_list *nvgpu_netlist_get_pm_cau_ctxsw_regs(struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.pm_cau; +} + +struct netlist_aiv_list *nvgpu_netlist_get_perf_sys_control_ctxsw_regs( + struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.perf_sys_control; +} + +struct netlist_aiv_list *nvgpu_netlist_get_perf_fbp_control_ctxsw_regs( + struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.perf_fbp_control; +} + +struct netlist_aiv_list *nvgpu_netlist_get_perf_gpc_control_ctxsw_regs( + struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.perf_gpc_control; +} + +struct netlist_aiv_list *nvgpu_netlist_get_perf_pma_control_ctxsw_regs( + struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.perf_pma_control; +} + +u32 nvgpu_netlist_get_ppc_ctxsw_regs_count(struct gk20a *g) +{ + u32 count = nvgpu_netlist_get_ppc_ctxsw_regs(g)->count; + +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (count == 0U) { + count = nvgpu_next_netlist_get_ppc_ctxsw_regs_count(g); + } +#endif + return count; +} + +u32 nvgpu_netlist_get_gpc_ctxsw_regs_count(struct gk20a *g) +{ + u32 count = nvgpu_netlist_get_gpc_ctxsw_regs(g)->count; + +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (count == 0U) { + count = nvgpu_next_netlist_get_gpc_ctxsw_regs_count(g); + } +#endif + return count; +} + +u32 nvgpu_netlist_get_tpc_ctxsw_regs_count(struct gk20a *g) +{ + u32 count = nvgpu_netlist_get_tpc_ctxsw_regs(g)->count; + +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (count == 0U) { + count = nvgpu_next_netlist_get_tpc_ctxsw_regs_count(g); + } +#endif + return count; +} + +u32 nvgpu_netlist_get_etpc_ctxsw_regs_count(struct gk20a *g) +{ + u32 count = nvgpu_netlist_get_etpc_ctxsw_regs(g)->count; + +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (count == 0U) { + count = nvgpu_next_netlist_get_etpc_ctxsw_regs_count(g); + } +#endif + return count; +} + +void nvgpu_netlist_print_ctxsw_reg_info(struct gk20a *g) +{ + nvgpu_log_info(g, "<<<<---------- CTXSW'ed register info ---------->>>>"); + nvgpu_log_info(g, "GRCTX_REG_LIST_SYS_COUNT :%d", + nvgpu_netlist_get_sys_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_GPC_COUNT :%d", + nvgpu_netlist_get_gpc_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_TPC_COUNT :%d", + nvgpu_netlist_get_tpc_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_ZCULL_GPC_COUNT :%d", + nvgpu_netlist_get_zcull_gpc_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PM_SYS_COUNT :%d", + nvgpu_netlist_get_pm_sys_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PM_GPC_COUNT :%d", + nvgpu_netlist_get_pm_gpc_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PM_TPC_COUNT :%d", + nvgpu_netlist_get_pm_tpc_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PPC_COUNT :%d", + nvgpu_netlist_get_ppc_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_ETPC_COUNT :%d", + nvgpu_netlist_get_etpc_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PM_PPC_COUNT :%d", + nvgpu_netlist_get_pm_ppc_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PERF_SYS_COUNT :%d", + nvgpu_netlist_get_perf_sys_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PERF_SYSROUTER_COUNT :%d", + nvgpu_netlist_get_perf_sys_router_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PERF_SYS_CONTROL_COUNT :%d", + nvgpu_netlist_get_perf_sys_control_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PERF_PMA_COUNT :%d", + nvgpu_netlist_get_perf_pma_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PERF_FBP_COUNT :%d", + nvgpu_netlist_get_fbp_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PERF_FBPROUTER_COUNT :%d", + nvgpu_netlist_get_fbp_router_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PERF_GPC_COUNT :%d", + nvgpu_netlist_get_perf_gpc_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PERF_GPCROUTER_COUNT :%d", + nvgpu_netlist_get_gpc_router_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PM_LTC_COUNT :%d", + nvgpu_netlist_get_pm_ltc_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PM_ROP_COUNT :%d", + nvgpu_netlist_get_pm_rop_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PM_UNICAST_GPC_COUNT :%d", + nvgpu_netlist_get_pm_ucgpc_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PM_CAU_COUNT :%d", + nvgpu_netlist_get_pm_cau_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PM_FBPA_COUNT :%d", + nvgpu_netlist_get_pm_fbpa_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PERF_FBP_CONTROL_COUNT :%d", + nvgpu_netlist_get_perf_fbp_control_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PERF_GPC_CONTROL_COUNT :%d", + nvgpu_netlist_get_perf_gpc_control_ctxsw_regs(g)->count); + nvgpu_log_info(g, "GRCTX_REG_LIST_PERF_PMA_CONTROL_COUNT :%d", + nvgpu_netlist_get_perf_pma_control_ctxsw_regs(g)->count); +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + nvgpu_next_netlist_print_ctxsw_reg_info(g); +#endif +} + +#endif /* CONFIG_NVGPU_DEBUGGER */ + +#ifdef CONFIG_NVGPU_NON_FUSA +void nvgpu_netlist_set_fecs_inst_count(struct gk20a *g, u32 count) +{ + g->netlist_vars->ucode.fecs.inst.count = count; +} + +void nvgpu_netlist_set_fecs_data_count(struct gk20a *g, u32 count) +{ + g->netlist_vars->ucode.fecs.data.count = count; +} + +void nvgpu_netlist_set_gpccs_inst_count(struct gk20a *g, u32 count) +{ + g->netlist_vars->ucode.gpccs.inst.count = count; +} + +void nvgpu_netlist_set_gpccs_data_count(struct gk20a *g, u32 count) +{ + g->netlist_vars->ucode.gpccs.data.count = count; +} + +struct netlist_u32_list *nvgpu_netlist_get_fecs_inst(struct gk20a *g) +{ + return &g->netlist_vars->ucode.fecs.inst; +} + +struct netlist_u32_list *nvgpu_netlist_get_fecs_data(struct gk20a *g) +{ + return &g->netlist_vars->ucode.fecs.data; +} + +struct netlist_u32_list *nvgpu_netlist_get_gpccs_inst(struct gk20a *g) +{ + return &g->netlist_vars->ucode.gpccs.inst; +} + +struct netlist_u32_list *nvgpu_netlist_get_gpccs_data(struct gk20a *g) +{ + return &g->netlist_vars->ucode.gpccs.data; +} + + +void nvgpu_netlist_vars_set_dynamic(struct gk20a *g, bool set) +{ + g->netlist_vars->dynamic = set; +} + +void nvgpu_netlist_vars_set_buffer_size(struct gk20a *g, u32 size) +{ + g->netlist_vars->buffer_size = size; +} + +void nvgpu_netlist_vars_set_regs_base_index(struct gk20a *g, u32 index) +{ + g->netlist_vars->regs_base_index = index; +} +#endif diff --git a/drivers/gpu/nvgpu/common/netlist/netlist_defs.h b/drivers/gpu/nvgpu/common/netlist/netlist_defs.h new file mode 100644 index 000000000..219531aa2 --- /dev/null +++ b/drivers/gpu/nvgpu/common/netlist/netlist_defs.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_NETLIST_DEFS_H +#define NVGPU_NETLIST_DEFS_H + +#include + +/* emulation netlists, match majorV with HW */ +#define NVGPU_NETLIST_IMAGE_A "NETA_img.bin" +#define NVGPU_NETLIST_IMAGE_B "NETB_img.bin" +#define NVGPU_NETLIST_IMAGE_C "NETC_img.bin" +#define NVGPU_NETLIST_IMAGE_D "NETD_img.bin" + +/* index for emulation netlists */ +#define NETLIST_FINAL -1 +#define NETLIST_SLOT_A 0 +#define NETLIST_SLOT_B 1 +#define NETLIST_SLOT_C 2 +#define NETLIST_SLOT_D 3 +#define MAX_NETLIST 4 + +#endif /* NVGPU_NETLIST_DEFS_H */ diff --git a/drivers/gpu/nvgpu/common/netlist/netlist_priv.h b/drivers/gpu/nvgpu/common/netlist/netlist_priv.h new file mode 100644 index 000000000..d46c15950 --- /dev/null +++ b/drivers/gpu/nvgpu/common/netlist/netlist_priv.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_NETLIST_PRIV_H +#define NVGPU_NETLIST_PRIV_H + +#include + +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) +#include "common/netlist/nvgpu_next_netlist_priv.h" +#endif + +struct netlist_u32_list; +struct netlist_av_list; +struct netlist_av64_list; +struct netlist_aiv_list; + +/* netlist regions */ +#define NETLIST_REGIONID_FECS_UCODE_DATA 0 +#define NETLIST_REGIONID_FECS_UCODE_INST 1 +#define NETLIST_REGIONID_GPCCS_UCODE_DATA 2 +#define NETLIST_REGIONID_GPCCS_UCODE_INST 3 +#define NETLIST_REGIONID_SW_BUNDLE_INIT 4 +#define NETLIST_REGIONID_SW_CTX_LOAD 5 +#define NETLIST_REGIONID_SW_NON_CTX_LOAD 6 +#define NETLIST_REGIONID_SW_METHOD_INIT 7 +#ifdef CONFIG_NVGPU_DEBUGGER +#define NETLIST_REGIONID_CTXREG_SYS 8 +#define NETLIST_REGIONID_CTXREG_GPC 9 +#define NETLIST_REGIONID_CTXREG_TPC 10 +#define NETLIST_REGIONID_CTXREG_ZCULL_GPC 11 +#define NETLIST_REGIONID_CTXREG_PM_SYS 12 +#define NETLIST_REGIONID_CTXREG_PM_GPC 13 +#define NETLIST_REGIONID_CTXREG_PM_TPC 14 +#endif +#define NETLIST_REGIONID_MAJORV 15 +#define NETLIST_REGIONID_BUFFER_SIZE 16 +#define NETLIST_REGIONID_CTXSW_REG_BASE_INDEX 17 +#define NETLIST_REGIONID_NETLIST_NUM 18 +#ifdef CONFIG_NVGPU_DEBUGGER +#define NETLIST_REGIONID_CTXREG_PPC 19 +#define NETLIST_REGIONID_CTXREG_PMPPC 20 +#define NETLIST_REGIONID_NVPERF_CTXREG_SYS 21 +#define NETLIST_REGIONID_NVPERF_FBP_CTXREGS 22 +#define NETLIST_REGIONID_NVPERF_CTXREG_GPC 23 +#define NETLIST_REGIONID_NVPERF_FBP_ROUTER 24 +#define NETLIST_REGIONID_NVPERF_GPC_ROUTER 25 +#define NETLIST_REGIONID_CTXREG_PMLTC 26 +#define NETLIST_REGIONID_CTXREG_PMFBPA 27 +#endif +#define NETLIST_REGIONID_SWVEIDBUNDLEINIT 28 +#ifdef CONFIG_NVGPU_DEBUGGER +#define NETLIST_REGIONID_NVPERF_SYS_ROUTER 29 +#define NETLIST_REGIONID_NVPERF_PMA 30 +#define NETLIST_REGIONID_CTXREG_PMROP 31 +#define NETLIST_REGIONID_CTXREG_PMUCGPC 32 +#define NETLIST_REGIONID_CTXREG_ETPC 33 +#endif +#define NETLIST_REGIONID_SW_BUNDLE64_INIT 34 +#ifdef CONFIG_NVGPU_DEBUGGER +#define NETLIST_REGIONID_NVPERF_PMCAU 35 +#define NETLIST_REGIONID_NVPERF_SYS_CONTROL 52 +#define NETLIST_REGIONID_NVPERF_FBP_CONTROL 53 +#define NETLIST_REGIONID_NVPERF_GPC_CONTROL 54 +#define NETLIST_REGIONID_NVPERF_PMA_CONTROL 55 +#endif + +struct netlist_region { + u32 region_id; + u32 data_size; + u32 data_offset; +}; + +struct netlist_image_header { + u32 version; + u32 regions; +}; + +struct netlist_image { + struct netlist_image_header header; + struct netlist_region regions[1]; +}; + +struct netlist_gr_ucode { + struct { + struct netlist_u32_list inst; + struct netlist_u32_list data; + } gpccs, fecs; +}; + +struct nvgpu_netlist_vars { + bool dynamic; + + u32 regs_base_index; + u32 buffer_size; + + struct netlist_gr_ucode ucode; + + struct netlist_av_list sw_bundle_init; + struct netlist_av64_list sw_bundle64_init; + struct netlist_av_list sw_method_init; + struct netlist_aiv_list sw_ctx_load; + struct netlist_av_list sw_non_ctx_load; +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + struct nvgpu_next_netlist_vars nvgpu_next; +#endif + struct netlist_av_list sw_veid_bundle_init; +#ifdef CONFIG_NVGPU_DEBUGGER + struct { + struct netlist_aiv_list sys; + struct netlist_aiv_list gpc; + struct netlist_aiv_list tpc; +#ifdef CONFIG_NVGPU_GRAPHICS + struct netlist_aiv_list zcull_gpc; +#endif + struct netlist_aiv_list ppc; + struct netlist_aiv_list pm_sys; + struct netlist_aiv_list pm_gpc; + struct netlist_aiv_list pm_tpc; + struct netlist_aiv_list pm_ppc; + struct netlist_aiv_list perf_sys; + struct netlist_aiv_list perf_gpc; + struct netlist_aiv_list fbp; + struct netlist_aiv_list fbp_router; + struct netlist_aiv_list gpc_router; + struct netlist_aiv_list pm_ltc; + struct netlist_aiv_list pm_fbpa; + struct netlist_aiv_list perf_sys_router; + struct netlist_aiv_list perf_pma; + struct netlist_aiv_list pm_rop; + struct netlist_aiv_list pm_ucgpc; + struct netlist_aiv_list etpc; + struct netlist_aiv_list pm_cau; + struct netlist_aiv_list perf_sys_control; + struct netlist_aiv_list perf_fbp_control; + struct netlist_aiv_list perf_gpc_control; + struct netlist_aiv_list perf_pma_control; +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + struct nvgpu_next_ctxsw_regs nvgpu_next; +#endif + } ctxsw_regs; +#endif /* CONFIG_NVGPU_DEBUGGER */ +}; + +#endif /* NVGPU_NETLIST_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/nvlink/init/device_reginit.c b/drivers/gpu/nvgpu/common/nvlink/init/device_reginit.c new file mode 100644 index 000000000..e541a3e02 --- /dev/null +++ b/drivers/gpu/nvgpu/common/nvlink/init/device_reginit.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#ifdef CONFIG_NVGPU_NVLINK +int nvgpu_nvlink_reg_init(struct gk20a *g) +{ + int err; + + err = g->ops.nvlink.reg_init(g); + + return err; +} +#endif diff --git a/drivers/gpu/nvgpu/common/nvlink/init/device_reginit_gv100.c b/drivers/gpu/nvgpu/common/nvlink/init/device_reginit_gv100.c new file mode 100644 index 000000000..215b04174 --- /dev/null +++ b/drivers/gpu/nvgpu/common/nvlink/init/device_reginit_gv100.c @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include "device_reginit_gv100.h" + +#ifdef CONFIG_NVGPU_NVLINK +struct nvlink_reginit { + u32 addr; + u32 value; +}; + +static const struct nvlink_reginit nvlink_reginit_per_link_tegra[] = { + /* NVTLC when connected to Tegra */ + { 0x300U, 0x00800040U }, + { 0x304U, 0x00000000U }, + { 0x308U, 0x00000000U }, + { 0x30CU, 0x00000000U }, + { 0x310U, 0x00000000U }, + { 0x314U, 0x00800040U }, + { 0x318U, 0x00000000U }, + { 0x31CU, 0x00000000U }, + { 0x200U, 0x007F003FU }, + { 0x204U, 0x007F003FU }, + { 0x208U, 0x007F003FU }, + { 0x20CU, 0x007F003FU }, + { 0x210U, 0x007F003FU }, + { 0x214U, 0x00FF007FU }, + { 0x218U, 0x00FF007FU }, + { 0x21CU, 0x00FF007FU }, + { 0xB00U, 0x010000C0U }, + { 0xB04U, 0x00000000U }, + { 0xB08U, 0x00000000U }, + { 0xB0CU, 0x00000000U }, + { 0xB10U, 0x00000000U }, + { 0xB14U, 0x010000C0U }, + { 0xB18U, 0x00000000U }, + { 0xB1CU, 0x00000000U }, + { 0xA00U, 0x00FF00BFU }, + { 0xA04U, 0x00FF00BFU }, + { 0xA08U, 0x00FF00BFU }, + { 0xA0CU, 0x00FF00BFU }, + { 0xA10U, 0x00FF00BFU }, + { 0xA14U, 0x01FF017FU }, + { 0xA18U, 0x01FF017FU }, + { 0xA1CU, 0x01FF017FU }, + { 0x400U, 0x00000001U }, + { 0xC00U, 0x00000001U }, +}; + +static const struct nvlink_reginit nvlink_reginit_per_link_gpu[] = { + /* NVTLC for PEER GPU */ + { 0x300U, 0x00800040U }, + { 0x304U, 0x00000000U }, + { 0x308U, 0x00000000U }, + { 0x30CU, 0x00000000U }, + { 0x310U, 0x00000000U }, + { 0x314U, 0x00800040U }, + { 0x318U, 0x00000000U }, + { 0x31CU, 0x00000000U }, + { 0x200U, 0x007F003FU }, + { 0x204U, 0x007F003FU }, + { 0x208U, 0x007F003FU }, + { 0x20CU, 0x007F003FU }, + { 0x210U, 0x007F003FU }, + { 0x214U, 0x00FF007FU }, + { 0x218U, 0x00FF007FU }, + { 0x21CU, 0x00FF007FU }, + { 0xB00U, 0x010000C0U }, + { 0xB04U, 0x00000000U }, + { 0xB08U, 0x00000000U }, + { 0xB0CU, 0x00000000U }, + { 0xB10U, 0x00000000U }, + { 0xB14U, 0x010000C0U }, + { 0xB18U, 0x00000000U }, + { 0xB1CU, 0x00000000U }, + { 0xA00U, 0x00FF00BFU }, + { 0xA04U, 0x00FF00BFU }, + { 0xA08U, 0x00FF00BFU }, + { 0xA0CU, 0x00FF00BFU }, + { 0xA10U, 0x00FF00BFU }, + { 0xA14U, 0x01FF017FU }, + { 0xA18U, 0x01FF017FU }, + { 0xA1CU, 0x01FF017FU }, + { 0xF04U, 0x00FFFFFFU }, + { 0xF0CU, 0x00FFFFFFU }, + { 0xF1CU, 0x003FFFFFU }, + { 0xF24U, 0x003FFFFFU }, + { 0x704U, 0x003FFFFFU }, + { 0x70CU, 0x003FFFFFU }, + { 0x400U, 0x00000001U }, + { 0xC00U, 0x00000001U }, +}; + +static int gv100_nvlink_get_tlc_reginit(enum nvgpu_nvlink_endp endp, + const struct nvlink_reginit **reg, u32 *count) +{ + int ret = 0; + + switch(endp) { + case nvgpu_nvlink_endp_tegra: + *reg = (const struct nvlink_reginit *) + nvlink_reginit_per_link_tegra; + *count = (u32)ARRAY_SIZE(nvlink_reginit_per_link_tegra); + break; + case nvgpu_nvlink_endp_gpu: + *reg = (const struct nvlink_reginit *) + nvlink_reginit_per_link_gpu; + *count = (u32)ARRAY_SIZE(nvlink_reginit_per_link_gpu); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +int gv100_nvlink_reg_init(struct gk20a *g) +{ + u32 i = 0; + u32 count = 0; + const struct nvlink_reginit *reg; + enum nvgpu_nvlink_endp endp; + int err; + u32 link_id; + unsigned long mask = g->nvlink.enabled_links; + struct nvgpu_nvlink_link *link; + unsigned long bit; + + /* Apply automated reg init flow for PROD settings */ + for_each_set_bit(bit, &mask, NVLINK_MAX_LINKS_SW) { + link_id = (u32)bit; + link = &g->nvlink.links[link_id]; + if (!link->remote_info.is_connected) { + continue; + } + + endp = link->remote_info.device_type; + err = gv100_nvlink_get_tlc_reginit(endp, ®, &count); + if (err != 0) { + nvgpu_err(g, "no reginit for endp=%u", endp); + continue; + } + + for (i = 0; i < count; i++) { + TLC_REG_WR32(g, link_id, reg->addr, reg->value); + reg++; + } + } + return 0; +} +#endif /* CONFIG_NVGPU_NVLINK */ diff --git a/drivers/gpu/nvgpu/common/nvlink/init/device_reginit_gv100.h b/drivers/gpu/nvgpu/common/nvlink/init/device_reginit_gv100.h new file mode 100644 index 000000000..52a55cc24 --- /dev/null +++ b/drivers/gpu/nvgpu/common/nvlink/init/device_reginit_gv100.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef DEVICE_REGINIT_GV100_H +#define DEVICE_REGINIT_GV100_H + +struct gk20a; + +int gv100_nvlink_reg_init(struct gk20a *g); + +#endif /* DEVICE_REGINIT_GV100_H */ diff --git a/drivers/gpu/nvgpu/common/nvlink/link_mode_transitions.c b/drivers/gpu/nvgpu/common/nvlink/link_mode_transitions.c new file mode 100644 index 000000000..81a97fd84 --- /dev/null +++ b/drivers/gpu/nvgpu/common/nvlink/link_mode_transitions.c @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#ifdef CONFIG_NVGPU_NVLINK +/* + * Fix: use this function to find detault link, as only one is supported + * on the library for now + * Returns NVLINK_MAX_LINKS_SW on failure + */ +static u32 nvgpu_nvlink_get_link(struct gk20a *g) +{ + u32 link_id; + + if (g == NULL) { + return NVLINK_MAX_LINKS_SW; + } + + /* Lets find the detected link */ + if (g->nvlink.initialized_links != 0U) { + link_id = (u32)(nvgpu_ffs(g->nvlink.initialized_links) - 1UL); + } else { + return NVLINK_MAX_LINKS_SW; + } + + if (g->nvlink.links[link_id].remote_info.is_connected) { + return link_id; + } + + return NVLINK_MAX_LINKS_SW; +} + +enum nvgpu_nvlink_link_mode nvgpu_nvlink_get_link_mode(struct gk20a *g) +{ + u32 link_id; + + link_id = nvgpu_nvlink_get_link(g); + if (link_id == NVLINK_MAX_LINKS_SW) { + return nvgpu_nvlink_link__last; + } + + return g->ops.nvlink.link_mode_transitions.get_link_mode(g, link_id); +} + +u32 nvgpu_nvlink_get_link_state(struct gk20a *g) +{ + u32 link_id; + + link_id = nvgpu_nvlink_get_link(g); + if (link_id == NVLINK_MAX_LINKS_SW) { + /* 0xff is an undefined link_state */ + return U32_MAX; + } + + return g->ops.nvlink.link_mode_transitions.get_link_state(g, link_id); +} + +int nvgpu_nvlink_set_link_mode(struct gk20a *g, + enum nvgpu_nvlink_link_mode mode) +{ + + u32 link_id; + + link_id = nvgpu_nvlink_get_link(g); + if (link_id == NVLINK_MAX_LINKS_SW) { + return -EINVAL; + } + + return g->ops.nvlink.link_mode_transitions.set_link_mode(g, link_id, + mode); +} + +void nvgpu_nvlink_get_tx_sublink_state(struct gk20a *g, u32 *state) +{ + u32 link_id; + + link_id = nvgpu_nvlink_get_link(g); + if (link_id == NVLINK_MAX_LINKS_SW) { + return; + } + if (state != NULL) { + *state = g->ops.nvlink.link_mode_transitions. + get_tx_sublink_state(g, link_id); + } +} + +void nvgpu_nvlink_get_rx_sublink_state(struct gk20a *g, u32 *state) +{ + u32 link_id; + + link_id = nvgpu_nvlink_get_link(g); + if (link_id == NVLINK_MAX_LINKS_SW) { + return; + } + if (state != NULL) { + *state = g->ops.nvlink.link_mode_transitions. + get_rx_sublink_state(g, link_id); + } +} + +enum nvgpu_nvlink_sublink_mode nvgpu_nvlink_get_sublink_mode(struct gk20a *g, + bool is_rx_sublink) +{ + u32 link_id; + + link_id = nvgpu_nvlink_get_link(g); + if (link_id == NVLINK_MAX_LINKS_SW) { + return nvgpu_nvlink_sublink_rx__last; + } + + return g->ops.nvlink.link_mode_transitions.get_sublink_mode(g, + link_id, is_rx_sublink); +} + +int nvgpu_nvlink_set_sublink_mode(struct gk20a *g, + bool is_rx_sublink, enum nvgpu_nvlink_sublink_mode mode) +{ + u32 link_id; + + link_id = nvgpu_nvlink_get_link(g); + if (link_id == NVLINK_MAX_LINKS_SW) { + return -EINVAL; + } + + return g->ops.nvlink.link_mode_transitions.set_sublink_mode(g, link_id, + is_rx_sublink, mode); +} + +#endif diff --git a/drivers/gpu/nvgpu/common/nvlink/minion.c b/drivers/gpu/nvgpu/common/nvlink/minion.c new file mode 100644 index 000000000..7e3f25fae --- /dev/null +++ b/drivers/gpu/nvgpu/common/nvlink/minion.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#ifdef CONFIG_NVGPU_NVLINK + +/* Extract a WORD from the MINION ucode */ +u32 nvgpu_nvlink_minion_extract_word(struct nvgpu_firmware *fw, u32 idx) +{ + u32 out_data = 0U; + u8 byte = 0U; + u32 i = 0U; + + for (i = 0U; i < 4U; i++) { + byte = fw->data[idx + i]; + out_data |= ((u32)byte) << (8U * i); + } + + return out_data; +} + +/* + * Load minion FW and set up bootstrap + */ +int nvgpu_nvlink_minion_load(struct gk20a *g) +{ + int err = 0; + struct nvgpu_firmware *nvgpu_minion_fw = NULL; + struct nvgpu_timeout timeout; + u32 delay = POLL_DELAY_MIN_US; + bool boot_cmplte; + + nvgpu_log_fn(g, " "); + + if (g->ops.nvlink.minion.is_running(g)) { + return 0; + } + + /* Get minion ucode binary */ + if (g->ops.nvlink.minion.is_debug_mode != NULL) { + if (g->ops.nvlink.minion.is_debug_mode(g)) { + nvgpu_minion_fw = nvgpu_request_firmware(g, + "dgpu_minion_debug.bin", 0); + } else { + nvgpu_minion_fw = nvgpu_request_firmware(g, + "dgpu_minion_prod.bin", 0); + } + } + + if (nvgpu_minion_fw == NULL) { + nvgpu_err(g, "minion ucode get fail"); + err = -ENOENT; + goto exit; + } + + /* Minion reset */ + err = nvgpu_falcon_reset(&g->minion_flcn); + if (err != 0) { + nvgpu_err(g, "Minion reset failed"); + goto exit; + } + + /* Clear interrupts */ + g->ops.nvlink.minion.clear_intr(g); + + err = nvgpu_nvlink_minion_load_ucode(g, nvgpu_minion_fw); + if (err != 0) { + goto exit; + } + + /* set BOOTVEC to start of non-secure code */ + err = nvgpu_falcon_bootstrap(&g->minion_flcn, 0x0); + if (err != 0) { + nvgpu_err(g, "Minion bootstrap failed"); + goto exit; + } + + err = nvgpu_timeout_init(g, &timeout, nvgpu_get_poll_timeout(g), + NVGPU_TIMER_CPU_TIMER); + if (err != 0) { + nvgpu_err(g, "Minion boot timeout init failed"); + goto exit; + } + + do { + err = g->ops.nvlink.minion.is_boot_complete(g, &boot_cmplte); + if (err != 0) { + goto exit; + } + if (boot_cmplte) { + nvgpu_log(g, gpu_dbg_nvlink,"MINION boot successful"); + break; + } + nvgpu_usleep_range(delay, delay * 2U); + delay = min_t(unsigned int, + delay << 1, POLL_DELAY_MAX_US); + } while (nvgpu_timeout_expired_msg(&timeout, + "minion boot timeout") == 0); + + /* Service interrupts */ + g->ops.nvlink.minion.falcon_isr(g); + + if (nvgpu_timeout_peek_expired(&timeout)) { + err = -ETIMEDOUT; + goto exit; + } + + g->ops.nvlink.minion.init_intr(g); + + nvgpu_release_firmware(g, nvgpu_minion_fw); + + return err; + +exit: + nvgpu_nvlink_free_minion_used_mem(g, nvgpu_minion_fw); + return err; +} + +#endif + diff --git a/drivers/gpu/nvgpu/common/nvlink/nvlink.c b/drivers/gpu/nvgpu/common/nvlink/nvlink.c new file mode 100644 index 000000000..16f56a569 --- /dev/null +++ b/drivers/gpu/nvgpu/common/nvlink/nvlink.c @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_NVGPU_NVLINK + +static int nvgpu_nvlink_enable_links_pre_top(struct gk20a *g, + unsigned long links) +{ + u32 link_id; + int err; + unsigned long bit; + + nvgpu_log(g, gpu_dbg_nvlink, " enabling 0x%lx links", links); + for_each_set_bit(bit, &links, NVLINK_MAX_LINKS_SW) { + link_id = (u32)bit; + + /* Take links out of reset */ + g->ops.nvlink.clear_link_reset(g, link_id); + + /* Before doing any link initialization, run RXDET to check + * if link is connected on other end. + */ + if (g->ops.nvlink.rxdet != NULL) { + err = g->ops.nvlink.rxdet(g, link_id); + if (err != 0) { + return err; + } + } + + /* Enable Link DLPL for AN0 */ + g->ops.nvlink.enable_link_an0(g, link_id); + + /* This should be done by the NVLINK API */ + err = g->ops.nvlink.link_mode_transitions.set_sublink_mode(g, + link_id, false, nvgpu_nvlink_sublink_tx_common); + if (err != 0) { + nvgpu_err(g, "Failed to init phy of link: %u", link_id); + return err; + } + + err = g->ops.nvlink.link_mode_transitions.set_sublink_mode(g, + link_id, true, nvgpu_nvlink_sublink_rx_rxcal); + if (err != 0) { + nvgpu_err(g, "Failed to RXcal on link: %u", link_id); + return err; + } + + err = g->ops.nvlink.link_mode_transitions.set_sublink_mode(g, + link_id, false, nvgpu_nvlink_sublink_tx_data_ready); + if (err != 0) { + nvgpu_err(g, "Failed to set data ready link:%u", + link_id); + return err; + } + + g->nvlink.enabled_links |= BIT32(link_id); + } + + nvgpu_log(g, gpu_dbg_nvlink, "enabled_links=0x%08x", + g->nvlink.enabled_links); + + if (g->nvlink.enabled_links != 0U) { + return 0; + } + + nvgpu_err(g, "No links were enabled"); + return -EINVAL; +} + +static int nvgpu_nvlink_enable_links_post_top(struct gk20a *g, + unsigned long links) +{ + u32 link_id; + unsigned long bit; + unsigned long enabled_links = (links & g->nvlink.enabled_links) & + ~g->nvlink.initialized_links; + + for_each_set_bit(bit, &enabled_links, NVLINK_MAX_LINKS_SW) { + link_id = (u32)bit; + if (nvgpu_is_errata_present(g, NVGPU_ERRATA_1888034)) { + g->ops.nvlink.set_sw_errata(g, link_id); + } + g->ops.nvlink.intr.init_link_err_intr(g, link_id); + g->ops.nvlink.intr.enable_link_err_intr(g, link_id, true); + + g->nvlink.initialized_links |= BIT32(link_id); + }; + + return 0; +} + +/* + * Main Nvlink init function. Calls into the Nvlink core API + */ +int nvgpu_nvlink_init(struct gk20a *g) +{ + int err = 0; + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) { + return -ENODEV; + } + + err = nvgpu_nvlink_enumerate(g); + if (err != 0) { + nvgpu_err(g, "failed to enumerate nvlink"); + goto fail; + } + + /* Set HSHUB and SG_PHY */ + nvgpu_set_enabled(g, NVGPU_MM_USE_PHYSICAL_SG, true); + + err = g->ops.fb.enable_nvlink(g); + if (err != 0) { + nvgpu_err(g, "failed switch to nvlink sysmem"); + goto fail; + } + + return err; + +fail: + nvgpu_set_enabled(g, NVGPU_MM_USE_PHYSICAL_SG, false); + nvgpu_set_enabled(g, NVGPU_SUPPORT_NVLINK, false); + return err; +} + + +/* + * Query IOCTRL for device discovery + */ +static int nvgpu_nvlink_discover_ioctrl(struct gk20a *g) +{ + u32 i; + u32 ioctrl_num_entries = 0U; + struct nvgpu_nvlink_ioctrl_list *ioctrl_table; + + ioctrl_num_entries = nvgpu_device_count(g, NVGPU_DEVTYPE_IOCTRL); + nvgpu_log_info(g, "ioctrl_num_entries: %d", ioctrl_num_entries); + + if (ioctrl_num_entries == 0U) { + nvgpu_err(g, "No NVLINK IOCTRL entry found in dev_info table"); + return -EINVAL; + } + + ioctrl_table = nvgpu_kzalloc(g, ioctrl_num_entries * + sizeof(struct nvgpu_nvlink_ioctrl_list)); + if (ioctrl_table == NULL) { + nvgpu_err(g, "Failed to allocate memory for nvlink io table"); + return -ENOMEM; + } + + for (i = 0U; i < ioctrl_num_entries; i++) { + const struct nvgpu_device *dev; + + dev = nvgpu_device_get(g, NVGPU_DEVTYPE_IOCTRL, i); + if (dev == NULL) { + nvgpu_err(g, "Failed to parse dev_info table IOCTRL dev (%d)", + NVGPU_DEVTYPE_IOCTRL); + nvgpu_kfree(g, ioctrl_table); + return -EINVAL; + } + + ioctrl_table[i].valid = true; + ioctrl_table[i].intr_enum = dev->intr_id; + ioctrl_table[i].reset_enum = dev->reset_id; + ioctrl_table[i].pri_base_addr = dev->pri_base; + nvgpu_log(g, gpu_dbg_nvlink, + "Dev %d: Pri_Base = 0x%0x Intr = %d Reset = %d", + i, ioctrl_table[i].pri_base_addr, + ioctrl_table[i].intr_enum, + ioctrl_table[i].reset_enum); + } + g->nvlink.ioctrl_table = ioctrl_table; + g->nvlink.io_num_entries = ioctrl_num_entries; + + return 0; +} + + +/* + * Performs nvlink device level initialization by discovering the topology + * taking device out of reset, boot minion, set clocks up and common interrupts + */ +int nvgpu_nvlink_early_init(struct gk20a *g) +{ + int err = 0; + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) { + return -EINVAL; + } + + err = nvgpu_bios_get_nvlink_config_data(g); + if (err != 0) { + nvgpu_err(g, "failed to read nvlink vbios data"); + goto exit; + } + + err = nvgpu_nvlink_discover_ioctrl(g); + if (err != 0) { + goto exit; + } + + /* Enable NVLINK in MC */ + nvgpu_log(g, gpu_dbg_nvlink, "mc_reset_nvlink_mask: 0x%x", + BIT32(g->nvlink.ioctrl_table[0].reset_enum)); + err = nvgpu_mc_reset_units(g, NVGPU_UNIT_NVLINK); + if (err != 0) { + nvgpu_err(g, "Failed to reset NVLINK unit"); + } + + nvgpu_cic_intr_stall_unit_config(g, NVGPU_CIC_INTR_UNIT_NVLINK, + NVGPU_CIC_INTR_ENABLE); + + err = g->ops.nvlink.discover_link(g); + if ((err != 0) || (g->nvlink.discovered_links == 0U)) { + nvgpu_err(g, "No links available"); + goto exit; + } + + err = nvgpu_falcon_sw_init(g, FALCON_ID_MINION); + if (err != 0) { + nvgpu_err(g, "failed to sw init FALCON_ID_MINION"); + goto exit; + } + + g->nvlink.discovered_links &= ~g->nvlink.link_disable_mask; + nvgpu_log(g, gpu_dbg_nvlink, "link_disable_mask = 0x%08x (from VBIOS)", + g->nvlink.link_disable_mask); + + /* Links in reset should be removed from initialized link sw state */ + g->nvlink.initialized_links &= g->ops.nvlink.get_link_reset_mask(g); + + /* VBIOS link_disable_mask should be sufficient to find the connected + * links. As VBIOS is not updated with correct mask, we parse the DT + * node where we hardcode the link_id. DT method is not scalable as same + * DT node is used for different dGPUs connected over PCIE. + * Remove the DT parsing of link id and use HAL to get link_mask based + * on the GPU. This is temporary fix while we get the VBIOS updated with + * correct mask. + */ + if (nvgpu_is_errata_present(g, NVGPU_ERRATA_VBIOS_NVLINK_MASK)) { + g->ops.nvlink.get_connected_link_mask( + &(g->nvlink.connected_links)); + } + + nvgpu_log(g, gpu_dbg_nvlink, "connected_links = 0x%08x", + g->nvlink.connected_links); + + /* Track only connected links */ + g->nvlink.discovered_links &= g->nvlink.connected_links; + + nvgpu_log(g, gpu_dbg_nvlink, "discovered_links = 0x%08x (combination)", + g->nvlink.discovered_links); + + if (hweight32(g->nvlink.discovered_links) > 1U) { + nvgpu_err(g, "more than one link enabled"); + err = -EINVAL; + goto nvlink_init_exit; + } + + g->nvlink.speed = nvgpu_nvlink_speed_20G; + err = nvgpu_nvlink_minion_load(g); + if (err != 0) { + nvgpu_err(g, "Failed Nvlink state load"); + goto nvlink_init_exit; + } + err = g->ops.nvlink.configure_ac_coupling(g, + g->nvlink.ac_coupling_mask, true); + if (err != 0) { + nvgpu_err(g, "Failed AC coupling configuration"); + goto nvlink_init_exit; + } + + /* Program clocks */ + g->ops.nvlink.prog_alt_clk(g); + +nvlink_init_exit: + nvgpu_falcon_sw_free(g, FALCON_ID_MINION); +exit: + return err; +} + +int nvgpu_nvlink_link_early_init(struct gk20a *g) +{ + u32 link_id; + int ret = 0; + /* + * First check the topology and setup connectivity + * HACK: we are only enabling one link for now!!! + */ + link_id = (u32)(nvgpu_ffs(g->nvlink.discovered_links) - 1UL); + g->nvlink.links[link_id].remote_info.is_connected = true; + g->nvlink.links[link_id].remote_info.device_type = + nvgpu_nvlink_endp_tegra; + + ret = nvgpu_nvlink_enable_links_pre_top(g, BIT32(link_id)); + if (ret != 0) { + nvgpu_err(g, "Pre topology failed for link"); + return ret; + } + ret = nvgpu_nvlink_enable_links_post_top(g, BIT32(link_id)); + if (ret != 0) { + nvgpu_err(g, "Post topology failed for link"); + return ret; + } + return ret; +} + +int nvgpu_nvlink_interface_init(struct gk20a *g) +{ + int err; + + err = g->ops.fb.init_nvlink(g); + if (err != 0) { + nvgpu_err(g, "failed to setup nvlinks for sysmem"); + return err; + } + + return 0; +} + +int nvgpu_nvlink_interface_disable(struct gk20a *g) +{ + return 0; +} + +int nvgpu_nvlink_dev_shutdown(struct gk20a *g) +{ + nvgpu_falcon_sw_free(g, FALCON_ID_MINION); + return 0; +} +#endif + +int nvgpu_nvlink_remove(struct gk20a *g) +{ +#ifdef CONFIG_NVGPU_NVLINK + int err; + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) { + return -ENODEV; + } + + nvgpu_set_enabled(g, NVGPU_SUPPORT_NVLINK, false); + + err = nvgpu_nvlink_unregister_link(g); + if (err != 0) { + nvgpu_err(g, "failed on nvlink link unregistration"); + return err; + } + + err = nvgpu_nvlink_unregister_device(g); + if (err != 0) { + nvgpu_err(g, "failed on nvlink device unregistration"); + return err; + } + + nvgpu_kfree(g, g->nvlink.priv); + + return 0; +#else + return -ENODEV; +#endif +} diff --git a/drivers/gpu/nvgpu/common/nvlink/probe.c b/drivers/gpu/nvgpu/common/nvlink/probe.c new file mode 100644 index 000000000..4d0ae2e44 --- /dev/null +++ b/drivers/gpu/nvgpu/common/nvlink/probe.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +int nvgpu_nvlink_probe(struct gk20a *g) +{ +#ifdef CONFIG_NVGPU_NVLINK + int err; + + err = nvgpu_nvlink_setup_ndev(g); + if (err != 0) { + return err; + } + + err = nvgpu_nvlink_read_dt_props(g); + if (err != 0) { + goto free_ndev; + } + + err = nvgpu_nvlink_init_ops(g); + if (err != 0) { + goto free_ndev; + } + + /* Register device with core driver*/ + err = nvgpu_nvlink_register_device(g); + if (err != 0) { + nvgpu_err(g, "failed on nvlink device registration"); + goto free_ndev; + } + + /* Register link with core driver */ + err = nvgpu_nvlink_register_link(g); + if (err != 0) { + nvgpu_err(g, "failed on nvlink link registration"); + goto unregister_ndev; + } + + /* Enable NVLINK support */ + nvgpu_set_enabled(g, NVGPU_SUPPORT_NVLINK, true); + return 0; + +unregister_ndev: + err = nvgpu_nvlink_unregister_device(g); + +free_ndev: + nvgpu_kfree(g, g->nvlink.priv); + g->nvlink.priv = NULL; + return err; +#else + return -ENODEV; +#endif +} + diff --git a/drivers/gpu/nvgpu/common/perf/cyclestats_snapshot.c b/drivers/gpu/nvgpu/common/perf/cyclestats_snapshot.c new file mode 100644 index 000000000..0b084186b --- /dev/null +++ b/drivers/gpu/nvgpu/common/perf/cyclestats_snapshot.c @@ -0,0 +1,606 @@ +/* + * Cycle stats snapshots support + * + * Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* check client for pointed perfmon ownership */ +#define CONTAINS_PERFMON(cl, pm) \ + ((cl)->perfmon_start <= (pm) && \ + ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count) + +/* address of fifo entry by offset */ +#define CSS_FIFO_ENTRY(fifo, offs) \ + ((struct gk20a_cs_snapshot_fifo_entry *)(((char *)(fifo)) + (offs))) + +/* calculate area capacity in number of fifo entries */ +#define CSS_FIFO_ENTRY_CAPACITY(s) \ + (((s) - sizeof(struct gk20a_cs_snapshot_fifo)) \ + / sizeof(struct gk20a_cs_snapshot_fifo_entry)) + +/* reserved to indicate failures with data */ +#define CSS_FIRST_PERFMON_ID 32 +/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ +#define CSS_MAX_PERFMON_IDS 256 + +/* reports whether the hw queue overflowed */ +bool nvgpu_css_get_overflow_status(struct gk20a *g) +{ + return g->ops.perf.get_membuf_overflow_status(g); +} + +/* returns how many pending snapshot entries are pending */ +u32 nvgpu_css_get_pending_snapshots(struct gk20a *g) +{ + return g->ops.perf.get_membuf_pending_bytes(g) / + U32(sizeof(struct gk20a_cs_snapshot_fifo_entry)); +} + +/* informs hw how many snapshots have been processed (frees up fifo space) */ +void nvgpu_css_set_handled_snapshots(struct gk20a *g, u32 done) +{ + if (done > 0) { + g->ops.perf.set_membuf_handled_bytes(g, done, + sizeof(struct gk20a_cs_snapshot_fifo_entry)); + } +} + +/* + * WARNING: all css_gr_XXX functions are local and expected to be called + * from locked context (protected by cs_lock) + */ + +static int css_gr_create_shared_data(struct gk20a *g) +{ + struct gk20a_cs_snapshot *data; + + if (g->cs_data) { + return 0; + } + + data = nvgpu_kzalloc(g, sizeof(*data)); + if (!data) { + return -ENOMEM; + } + + nvgpu_init_list_node(&data->clients); + g->cs_data = data; + + return 0; +} + +int nvgpu_css_enable_snapshot(struct nvgpu_channel *ch, + struct gk20a_cs_snapshot_client *cs_client) +{ + struct gk20a *g = ch->g; + struct gk20a_cs_snapshot *data = g->cs_data; + u32 snapshot_size = cs_client->snapshot_size; + int ret; + + if (data->hw_snapshot) { + return 0; + } + + if (snapshot_size < CSS_MIN_HW_SNAPSHOT_SIZE) { + snapshot_size = CSS_MIN_HW_SNAPSHOT_SIZE; + } + + ret = nvgpu_dma_alloc_map_sys(g->mm.pmu.vm, snapshot_size, + &data->hw_memdesc); + if (ret != 0) { + return ret; + } + + /* perf output buffer may not cross a 4GB boundary - with a separate */ + /* va smaller than that, it won't but check anyway */ + if (!data->hw_memdesc.cpu_va || + data->hw_memdesc.size < snapshot_size || + data->hw_memdesc.gpu_va + u64_lo32(snapshot_size) > SZ_4G) { + ret = -EFAULT; + goto failed_allocation; + } + + data->hw_snapshot = + (struct gk20a_cs_snapshot_fifo_entry *)data->hw_memdesc.cpu_va; + data->hw_end = data->hw_snapshot + + snapshot_size / sizeof(struct gk20a_cs_snapshot_fifo_entry); + data->hw_get = data->hw_snapshot; + (void) memset(data->hw_snapshot, 0xff, snapshot_size); + + g->ops.perf.membuf_reset_streaming(g); + g->ops.perf.init_inst_block(g, &g->mm.hwpm.inst_block); + g->ops.perf.enable_membuf(g, snapshot_size, data->hw_memdesc.gpu_va); + + nvgpu_log_info(g, "cyclestats: buffer for hardware snapshots enabled\n"); + + return 0; + +failed_allocation: + if (data->hw_memdesc.size) { + nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc); + (void) memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc)); + } + data->hw_snapshot = NULL; + + return ret; +} + +void nvgpu_css_disable_snapshot(struct gk20a *g) +{ + struct gk20a_cs_snapshot *data = g->cs_data; + + if (!data->hw_snapshot) { + return; + } + + g->ops.perf.membuf_reset_streaming(g); + g->ops.perf.disable_membuf(g); + g->ops.perf.deinit_inst_block(g); + + nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc); + (void) memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc)); + data->hw_snapshot = NULL; + + nvgpu_log_info(g, "cyclestats: buffer for hardware snapshots disabled\n"); +} + +static void css_gr_free_shared_data(struct gk20a *g) +{ + if (g->cs_data) { + /* the clients list is expected to be empty */ + g->ops.css.disable_snapshot(g); + + /* release the objects */ + nvgpu_kfree(g, g->cs_data); + g->cs_data = NULL; + } +} + + +struct gk20a_cs_snapshot_client * +nvgpu_css_gr_search_client(struct nvgpu_list_node *clients, u32 perfmon) +{ + struct gk20a_cs_snapshot_client *client; + + nvgpu_list_for_each_entry(client, clients, + gk20a_cs_snapshot_client, list) { + if (CONTAINS_PERFMON(client, perfmon)) { + return client; + } + } + + return NULL; +} + +static int css_gr_flush_snapshots(struct nvgpu_channel *ch) +{ + struct gk20a *g = ch->g; + struct gk20a_cs_snapshot *css = g->cs_data; + struct gk20a_cs_snapshot_client *cur; + u32 pending, completed; + bool hw_overflow; + int err; + + /* variables for iterating over HW entries */ + u32 sid; + struct gk20a_cs_snapshot_fifo_entry *src; + + /* due to data sharing with userspace we allowed update only */ + /* overflows and put field in the fifo header */ + struct gk20a_cs_snapshot_fifo *dst; + struct gk20a_cs_snapshot_fifo_entry *dst_get; + struct gk20a_cs_snapshot_fifo_entry *dst_put; + struct gk20a_cs_snapshot_fifo_entry *dst_nxt; + struct gk20a_cs_snapshot_fifo_entry *dst_head; + struct gk20a_cs_snapshot_fifo_entry *dst_tail; + + if (!css) { + return -EINVAL; + } + + if (nvgpu_list_empty(&css->clients)) { + return -EBADF; + } + + /* check data available */ + err = g->ops.css.check_data_available(ch, &pending, &hw_overflow); + if (err != 0) { + return err; + } + + if (!pending) { + return 0; + } + + if (hw_overflow) { + nvgpu_list_for_each_entry(cur, &css->clients, + gk20a_cs_snapshot_client, list) { + cur->snapshot->hw_overflow_events_occured++; + } + + nvgpu_warn(g, "cyclestats: hardware overflow detected"); + } + + /* process all items in HW buffer */ + sid = 0; + completed = 0; + cur = NULL; + dst = NULL; + dst_put = NULL; + src = css->hw_get; + + /* proceed all completed records */ + while (sid < pending && 0 == src->zero0) { + /* we may have a new perfmon_id which required to */ + /* switch to a new client -> let's forget current */ + if (cur && !CONTAINS_PERFMON(cur, src->perfmon_id)) { + s64 tmp_ptr = (char *)dst_put - (char *)dst; + + nvgpu_assert(tmp_ptr < (s64)U32_MAX); + dst->put = U32(tmp_ptr); + dst = NULL; + cur = NULL; + } + + /* now we have to select a new current client */ + /* the client selection rate depends from experiment */ + /* activity but on Android usually happened 1-2 times */ + if (!cur) { + cur = nvgpu_css_gr_search_client(&css->clients, + src->perfmon_id); + if (cur) { + /* found - setup all required data */ + dst = cur->snapshot; + dst_get = CSS_FIFO_ENTRY(dst, dst->get); + dst_put = CSS_FIFO_ENTRY(dst, dst->put); + dst_head = CSS_FIFO_ENTRY(dst, dst->start); + dst_tail = CSS_FIFO_ENTRY(dst, dst->end); + + dst_nxt = dst_put + 1; + if (dst_nxt == dst_tail) { + dst_nxt = dst_head; + } + } else { + /* client not found - skipping this entry */ + nvgpu_warn(g, "cyclestats: orphaned perfmon %u", + src->perfmon_id); + goto next_hw_fifo_entry; + } + } + + /* check for software overflows */ + if (dst_nxt == dst_get) { + /* no data copy, no pointer updates */ + dst->sw_overflow_events_occured++; + nvgpu_warn(g, "cyclestats: perfmon %u soft overflow", + src->perfmon_id); + } else { + *dst_put = *src; + completed++; + + dst_put = dst_nxt++; + + if (dst_nxt == dst_tail) { + dst_nxt = dst_head; + } + } + +next_hw_fifo_entry: + sid++; + if (++src >= css->hw_end) { + src = css->hw_snapshot; + } + } + + /* update client put pointer if necessary */ + if (cur && dst) { + s64 tmp_ptr = (char *)dst_put - (char *)dst; + + nvgpu_assert(tmp_ptr < (s64)U32_MAX); + dst->put = U32(tmp_ptr); + } + + /* re-set HW buffer after processing taking wrapping into account */ + if (css->hw_get < src) { + (void) memset(css->hw_get, 0xff, + (src - css->hw_get) * sizeof(*src)); + } else { + (void) memset(css->hw_snapshot, 0xff, + (src - css->hw_snapshot) * sizeof(*src)); + (void) memset(css->hw_get, 0xff, + (css->hw_end - css->hw_get) * sizeof(*src)); + } + g->cs_data->hw_get = src; + + if (g->ops.css.set_handled_snapshots) { + g->ops.css.set_handled_snapshots(g, sid); + } + + if (completed != sid) { + /* not all entries proceed correctly. some of problems */ + /* reported as overflows, some as orphaned perfmons, */ + /* but it will be better notify with summary about it */ + nvgpu_warn(g, "cyclestats: completed %u from %u entries", + completed, pending); + } + + return 0; +} + +u32 nvgpu_css_allocate_perfmon_ids(struct gk20a_cs_snapshot *data, + u32 count) +{ + unsigned long *pids = data->perfmon_ids; + unsigned int f; + + f = U32(bitmap_find_next_zero_area(pids, CSS_MAX_PERFMON_IDS, + CSS_FIRST_PERFMON_ID, count, 0)); + if (f > CSS_MAX_PERFMON_IDS) { + f = 0; + } else { + nvgpu_bitmap_set(pids, f, count); + } + + return f; +} + +u32 nvgpu_css_release_perfmon_ids(struct gk20a_cs_snapshot *data, + u32 start, + u32 count) +{ + unsigned long *pids = data->perfmon_ids; + u32 end = start + count; + u32 cnt = 0; + + if (start >= CSS_FIRST_PERFMON_ID && end <= CSS_MAX_PERFMON_IDS) { + nvgpu_bitmap_clear(pids, start, count); + cnt = count; + } + + return cnt; +} + + +static int css_gr_free_client_data(struct gk20a *g, + struct gk20a_cs_snapshot *data, + struct gk20a_cs_snapshot_client *client) +{ + int ret = 0; + + if (client->list.next && client->list.prev) { + nvgpu_list_del(&client->list); + } + + if (client->perfmon_start && client->perfmon_count + && g->ops.css.release_perfmon_ids) { + if (client->perfmon_count != g->ops.css.release_perfmon_ids(data, + client->perfmon_start, client->perfmon_count)) { + ret = -EINVAL; + } + } + + return ret; +} + +static int css_gr_create_client_data(struct gk20a *g, + struct gk20a_cs_snapshot *data, + u32 perfmon_count, + struct gk20a_cs_snapshot_client *cur) +{ + /* + * Special handling in-case of rm-server + * + * client snapshot buffer will not be mapped + * in-case of rm-server its only mapped in + * guest side + */ + if (cur->snapshot) { + (void) memset(cur->snapshot, 0, sizeof(*cur->snapshot)); + cur->snapshot->start = U32(sizeof(*cur->snapshot)); + /* we should be ensure that can fit all fifo entries here */ + cur->snapshot->end = + U32(CSS_FIFO_ENTRY_CAPACITY(cur->snapshot_size) + * sizeof(struct gk20a_cs_snapshot_fifo_entry) + + sizeof(struct gk20a_cs_snapshot_fifo)); + cur->snapshot->get = cur->snapshot->start; + cur->snapshot->put = cur->snapshot->start; + } + + cur->perfmon_count = perfmon_count; + + /* In virtual case, perfmon ID allocation is handled by the server + * at the time of the attach (allocate_perfmon_ids is NULL in this case) + */ + if (cur->perfmon_count && g->ops.css.allocate_perfmon_ids) { + cur->perfmon_start = g->ops.css.allocate_perfmon_ids(data, + cur->perfmon_count); + if (!cur->perfmon_start) { + return -ENOENT; + } + } + + nvgpu_list_add_tail(&cur->list, &data->clients); + + return 0; +} + + +int nvgpu_css_attach(struct nvgpu_channel *ch, + u32 perfmon_count, + u32 *perfmon_start, + struct gk20a_cs_snapshot_client *cs_client) +{ + int ret = 0; + struct gk20a *g = ch->g; + + /* we must have a placeholder to store pointer to client structure */ + if (!cs_client) { + return -EINVAL; + } + + if (!perfmon_count || + perfmon_count > CSS_MAX_PERFMON_IDS - CSS_FIRST_PERFMON_ID) { + return -EINVAL; + } + + nvgpu_speculation_barrier(); + + nvgpu_mutex_acquire(&g->cs_lock); + + ret = css_gr_create_shared_data(g); + if (ret != 0) { + goto failed; + } + + ret = css_gr_create_client_data(g, g->cs_data, + perfmon_count, + cs_client); + if (ret != 0) { + goto failed; + } + + ret = g->ops.css.enable_snapshot(ch, cs_client); + if (ret != 0) { + goto failed; + } + + if (perfmon_start) { + *perfmon_start = cs_client->perfmon_start; + } + + nvgpu_mutex_release(&g->cs_lock); + + return 0; + +failed: + if (g->cs_data) { + if (cs_client) { + css_gr_free_client_data(g, g->cs_data, cs_client); + cs_client = NULL; + } + + if (nvgpu_list_empty(&g->cs_data->clients)) { + css_gr_free_shared_data(g); + } + } + nvgpu_mutex_release(&g->cs_lock); + + if (perfmon_start) { + *perfmon_start = 0; + } + + return ret; +} + +int nvgpu_css_detach(struct nvgpu_channel *ch, + struct gk20a_cs_snapshot_client *cs_client) +{ + int ret = 0; + struct gk20a *g = ch->g; + + if (!cs_client) { + return -EINVAL; + } + + nvgpu_mutex_acquire(&g->cs_lock); + if (g->cs_data) { + struct gk20a_cs_snapshot *data = g->cs_data; + + if (g->ops.css.detach_snapshot) { + g->ops.css.detach_snapshot(ch, cs_client); + } + + ret = css_gr_free_client_data(g, data, cs_client); + if (nvgpu_list_empty(&data->clients)) { + css_gr_free_shared_data(g); + } + } else { + ret = -EBADF; + } + nvgpu_mutex_release(&g->cs_lock); + + return ret; +} + +int nvgpu_css_flush(struct nvgpu_channel *ch, + struct gk20a_cs_snapshot_client *cs_client) +{ + int ret = 0; + struct gk20a *g = ch->g; + + if (!cs_client) { + return -EINVAL; + } + + nvgpu_mutex_acquire(&g->cs_lock); + ret = css_gr_flush_snapshots(ch); + nvgpu_mutex_release(&g->cs_lock); + + return ret; +} + +/* helper function with locking to cleanup snapshot code code in gr_gk20a.c */ +void nvgpu_free_cyclestats_snapshot_data(struct gk20a *g) +{ + nvgpu_mutex_acquire(&g->cs_lock); + css_gr_free_shared_data(g); + nvgpu_mutex_release(&g->cs_lock); + nvgpu_mutex_destroy(&g->cs_lock); +} + +int nvgpu_css_check_data_available(struct nvgpu_channel *ch, u32 *pending, + bool *hw_overflow) +{ + struct gk20a *g = ch->g; + struct gk20a_cs_snapshot *css = g->cs_data; + + if (!css->hw_snapshot) { + return -EINVAL; + } + + *pending = nvgpu_css_get_pending_snapshots(g); + if (!*pending) { + return 0; + } + + *hw_overflow = nvgpu_css_get_overflow_status(g); + return 0; +} + +u32 nvgpu_css_get_max_buffer_size(struct gk20a *g) +{ + return 0xffffffffU; +} diff --git a/drivers/gpu/nvgpu/common/perf/perfbuf.c b/drivers/gpu/nvgpu/common/perf/perfbuf.c new file mode 100644 index 000000000..85554e49e --- /dev/null +++ b/drivers/gpu/nvgpu/common/perf/perfbuf.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define PERFBUF_PMA_BYTES_AVAILABLE_BUFFER_FIXED_GPU_VA 0x4000000ULL +#define PERFBUF_PMA_BYTES_AVAILABLE_BUFFER_MAX_SIZE NVGPU_CPU_PAGE_SIZE + +int nvgpu_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size) +{ + int err; + + err = gk20a_busy(g); + if (err != 0) { + nvgpu_err(g, "failed to poweron"); + return err; + } + + g->ops.perf.membuf_reset_streaming(g); + g->ops.perf.enable_membuf(g, size, offset); + + gk20a_idle(g); + + return 0; +} + +int nvgpu_perfbuf_disable_locked(struct gk20a *g) +{ + int err = gk20a_busy(g); + if (err != 0) { + nvgpu_err(g, "failed to poweron"); + return err; + } + + g->ops.perf.membuf_reset_streaming(g); + g->ops.perf.disable_membuf(g); + + gk20a_idle(g); + + return 0; +} + +int nvgpu_perfbuf_init_inst_block(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + int err; + + err = nvgpu_alloc_inst_block(g, &mm->perfbuf.inst_block); + if (err != 0) { + return err; + } + + g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0); + g->ops.perf.init_inst_block(g, &mm->perfbuf.inst_block); + + return 0; +} + +int nvgpu_perfbuf_init_vm(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size(); + int err; + u64 user_size, kernel_size; + + g->ops.mm.get_default_va_sizes(NULL, &user_size, &kernel_size); + + mm->perfbuf.vm = nvgpu_vm_init(g, big_page_size, SZ_4K, + nvgpu_safe_sub_u64(user_size, SZ_4K), + kernel_size, + 0ULL, + false, false, false, "perfbuf"); + if (mm->perfbuf.vm == NULL) { + return -ENOMEM; + } + + /* + * PMA available byte buffer GPU_VA needs to fit in 32 bit + * register, hence use a fixed GPU_VA to map it. + * Only one PMA stream is allowed right now so this works. + * This should be updated later to support multiple PMA streams. + */ + mm->perfbuf.pma_bytes_available_buffer_gpu_va = + PERFBUF_PMA_BYTES_AVAILABLE_BUFFER_FIXED_GPU_VA; + + err = nvgpu_vm_area_alloc(mm->perfbuf.vm, + PERFBUF_PMA_BYTES_AVAILABLE_BUFFER_MAX_SIZE / SZ_4K, + SZ_4K, &mm->perfbuf.pma_bytes_available_buffer_gpu_va, + NVGPU_VM_AREA_ALLOC_FIXED_OFFSET); + if (err != 0) { + nvgpu_vm_put(mm->perfbuf.vm); + return err; + } + + err = g->ops.perfbuf.init_inst_block(g); + if (err != 0) { + nvgpu_vm_put(mm->perfbuf.vm); + return err; + } + + return 0; +} + +void nvgpu_perfbuf_deinit_inst_block(struct gk20a *g) +{ + g->ops.perf.deinit_inst_block(g); + nvgpu_free_inst_block(g, &g->mm.perfbuf.inst_block); +} + +void nvgpu_perfbuf_deinit_vm(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + + g->ops.perfbuf.deinit_inst_block(g); + + nvgpu_vm_area_free(mm->perfbuf.vm, + mm->perfbuf.pma_bytes_available_buffer_gpu_va); + nvgpu_vm_put(g->mm.perfbuf.vm); +} + +int nvgpu_perfbuf_update_get_put(struct gk20a *g, u64 bytes_consumed, + u64 *bytes_available, void *cpuva, bool wait, + u64 *put_ptr, bool *overflowed) +{ + struct nvgpu_timeout timeout; + int err; + bool update_available_bytes = (bytes_available == NULL) ? false : true; + volatile u32 *available_bytes_va = (u32 *)cpuva; + + if (update_available_bytes && available_bytes_va != NULL) { + *available_bytes_va = 0xffffffff; + } + + err = g->ops.perf.update_get_put(g, bytes_consumed, + update_available_bytes, put_ptr, overflowed); + if (err != 0) { + return err; + } + + if (update_available_bytes && wait && available_bytes_va != NULL) { + err = nvgpu_timeout_init(g, &timeout, 10000, NVGPU_TIMER_CPU_TIMER); + if (err != 0) { + nvgpu_err(g, "nvgpu_timeout_init() failed err=%d", err); + return err; + } + + do { + if (*available_bytes_va != 0xffffffff) { + break; + } + + nvgpu_msleep(10); + } while (nvgpu_timeout_expired(&timeout) == 0); + + if (*available_bytes_va == 0xffffffff) { + nvgpu_err(g, "perfbuf update get put timed out"); + return -ETIMEDOUT; + } + + *bytes_available = *available_bytes_va; + } + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/pmu/allocator.c b/drivers/gpu/nvgpu/common/pmu/allocator.c new file mode 100644 index 000000000..ff9566ff3 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/allocator.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +void nvgpu_pmu_allocator_dmem_init(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_allocator *dmem, + union pmu_init_msg_pmu *init) +{ + struct pmu_fw_ver_ops *fw_ops = &pmu->fw->ops; + + if (!nvgpu_alloc_initialized(dmem)) { + /* Align start and end addresses */ + u32 start = + NVGPU_ALIGN(U32(fw_ops->get_init_msg_sw_mngd_area_off(init)), + PMU_DMEM_ALLOC_ALIGNMENT); + u32 end = (U32(fw_ops->get_init_msg_sw_mngd_area_off(init)) + + U32(fw_ops->get_init_msg_sw_mngd_area_size(init))) & + ~(PMU_DMEM_ALLOC_ALIGNMENT - 1U); + u32 size = end - start; + + if (size != 0U) { + nvgpu_allocator_init(g, dmem, NULL, "gk20a_pmu_dmem", + start, size, PMU_DMEM_ALLOC_ALIGNMENT, 0ULL, 0ULL, + BITMAP_ALLOCATOR); + } else { + dmem->priv = NULL; + } + } +} + +void nvgpu_pmu_allocator_dmem_destroy(struct nvgpu_allocator *dmem) +{ + if (nvgpu_alloc_initialized(dmem)) { + nvgpu_alloc_destroy(dmem); + } +} + +void nvgpu_pmu_allocator_surface_free(struct gk20a *g, struct nvgpu_mem *mem) +{ + if (nvgpu_mem_is_valid(mem)) { + nvgpu_dma_free(g, mem); + } +} + +void nvgpu_pmu_allocator_surface_describe(struct gk20a *g, struct nvgpu_mem *mem, + struct flcn_mem_desc_v0 *fb) +{ + fb->address.lo = u64_lo32(mem->gpu_va); + fb->address.hi = u64_hi32(mem->gpu_va); + fb->params = ((u32)mem->size & 0xFFFFFFU); + fb->params |= (GK20A_PMU_DMAIDX_VIRT << 24U); +} + +int nvgpu_pmu_allocator_sysmem_surface_alloc(struct gk20a *g, + struct nvgpu_mem *mem, u32 size) +{ + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = mm->pmu.vm; + int err; + + err = nvgpu_dma_alloc_map_sys(vm, size, mem); + if (err != 0) { + nvgpu_err(g, "failed to allocate memory\n"); + return -ENOMEM; + } + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/pmu/boardobj/boardobj.c b/drivers/gpu/nvgpu/common/pmu/boardobj/boardobj.c new file mode 100644 index 000000000..ae2d74024 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/boardobj/boardobj.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include "boardobj.h" + +/* +* Destructor for the base board object. Called by each device-Specific +* implementation of the BOARDOBJ interface to destroy the board object. +* This has to be explicitly set by each device that extends from the +* board object. +*/ +static int destruct_super(struct pmu_board_obj *obj) +{ + if (obj == NULL) { + return -EINVAL; + } + + nvgpu_list_del(&obj->node); + if (obj->allocated) { + nvgpu_kfree(obj->g, obj); + } + + return 0; +} + +/* +* check whether the specified BOARDOBJ object implements the queried +* type/class enumeration. +*/ +static bool implements_super(struct gk20a *g, struct pmu_board_obj *obj, + u8 type) +{ + nvgpu_log_info(g, " "); + + return (0U != (obj->type_mask & BIT32(type))); +} + +int pmu_board_obj_pmu_data_init_super(struct gk20a *g, + struct pmu_board_obj *obj, struct nv_pmu_boardobj *pmu_obj) +{ + nvgpu_log_info(g, " "); + if (obj == NULL) { + return -EINVAL; + } + if (pmu_obj == NULL) { + return -EINVAL; + } + pmu_obj->type = obj->type; + nvgpu_log_info(g, " Done"); + return 0; +} + +int pmu_board_obj_construct_super(struct gk20a *g, + struct pmu_board_obj *obj, void *args) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)args; + + nvgpu_log_info(g, " "); + + if ((obj_tmp == NULL) || (obj == NULL)) { + return -EINVAL; + } + + obj->allocated = true; + obj->g = g; + obj->type = obj_tmp->type; + obj->idx = CTRL_BOARDOBJ_IDX_INVALID; + obj->type_mask = + BIT32(obj->type) | obj_tmp->type_mask; + obj->implements = implements_super; + obj->destruct = destruct_super; + obj->pmudatainit = pmu_board_obj_pmu_data_init_super; + nvgpu_list_add(&obj->node, &g->boardobj_head); + return 0; +} + +u8 pmu_board_obj_get_type(void *obj) +{ + return (((struct pmu_board_obj *)(obj))->type); +} + +u8 pmu_board_obj_get_idx(void *obj) +{ + return (((struct pmu_board_obj *)(obj))->idx); +} + diff --git a/drivers/gpu/nvgpu/common/pmu/boardobj/boardobj.h b/drivers/gpu/nvgpu/common/pmu/boardobj/boardobj.h new file mode 100644 index 000000000..8404ffd00 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/boardobj/boardobj.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_BOARDOBJ_H +#define NVGPU_BOARDOBJ_H + +struct pmu_board_obj; +struct nvgpu_list_node; +struct gk20a; +struct nv_pmu_boardobj; + +/* +* Base Class for all physical or logical device on the PCB. +* Contains fields common to all devices on the board. Specific types of +* devices may extend this object adding any details specific to that +* device or device-type. +*/ + +struct pmu_board_obj { + struct gk20a *g; + + u8 type; /*type of the device*/ + u8 idx; /*index of boardobj within in its group*/ + /* true if allocated in constructor. destructor should free */ + bool allocated; + u32 type_mask; /*mask of types this boardobjimplements*/ + bool (*implements)(struct gk20a *g, struct pmu_board_obj *obj, + u8 type); + int (*destruct)(struct pmu_board_obj *obj); + /* + * Access interface apis which will be overridden by the devices + * that inherit from BOARDOBJ + */ + int (*pmudatainit)(struct gk20a *g, struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj); + struct nvgpu_list_node node; +}; + +#define HIGHESTBITIDX_32(n32) \ +{ \ + u32 count = 0U; \ + while (((n32) >>= 1U) != 0U) { \ + count++; \ + } \ + (n32) = count; \ +} + +#define LOWESTBIT(x) ((x) & (((x)-1U) ^ (x))) + +#define HIGHESTBIT(n32) \ +{ \ + HIGHESTBITIDX_32(n32); \ + n32 = NVBIT(n32); \ +} + +#define ONEBITSET(x) ((x) && (((x) & ((x)-1U)) == 0U)) + +#define LOWESTBITIDX_32(n32) \ +{ \ + n32 = LOWESTBIT(n32); \ + IDX_32(n32); \ +} + +#define NUMSETBITS_32(n32) \ +{ \ + (n32) = (n32) - (((n32) >> 1U) & 0x55555555U); \ + (n32) = ((n32) & 0x33333333U) + (((n32) >> 2U) & 0x33333333U); \ + (n32) = ((((n32) + ((n32) >> 4U)) & 0x0F0F0F0FU) * 0x01010101U) >> 24U;\ +} + +#define IDX_32(n32) \ +{ \ + u32 idx = 0U; \ + if (((n32) & 0xFFFF0000U) != 0U) { \ + idx += 16U; \ + } \ + if (((n32) & 0xFF00FF00U) != 0U) { \ + idx += 8U; \ + } \ + if (((n32) & 0xF0F0F0F0U) != 0U) { \ + idx += 4U; \ + } \ + if (((n32) & 0xCCCCCCCCU) != 0U) { \ + idx += 2U; \ + } \ + if (((n32) & 0xAAAAAAAAU) != 0U) { \ + idx += 1U; \ + } \ + (n32) = idx; \ +} + +/* +* Fills out the appropriate the nv_pmu_xxxx_device_desc_ driver->PMU +* description structure, describing this BOARDOBJ board device to the PMU. +* +*/ +int pmu_board_obj_pmu_data_init_super(struct gk20a *g, struct pmu_board_obj + *obj, struct nv_pmu_boardobj *pmu_obj); + +/* +* Constructor for the base Board Object. Called by each device-specific +* implementation of the BOARDOBJ interface to initialize the board object. +*/ +int pmu_board_obj_construct_super(struct gk20a *g, struct pmu_board_obj + *obj, void *args); + +static inline struct pmu_board_obj * +boardobj_from_node(struct nvgpu_list_node *node) +{ + return (struct pmu_board_obj *) + ((uintptr_t)node - offsetof(struct pmu_board_obj, node)); +}; + +u8 pmu_board_obj_get_type(void *obj); +u8 pmu_board_obj_get_idx(void *obj); + +#endif /* NVGPU_BOARDOBJ_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/boardobj/boardobjgrp.c b/drivers/gpu/nvgpu/common/pmu/boardobj/boardobjgrp.c new file mode 100644 index 000000000..9fd51dd16 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/boardobj/boardobjgrp.c @@ -0,0 +1,760 @@ +/* +* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. +* + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include + +#include "boardobj.h" + +static int check_boardobjgrp_param(struct gk20a *g, + struct boardobjgrp *pboardobjgrp) +{ + if (pboardobjgrp == NULL) { + return -EINVAL; + } + + if (!pboardobjgrp->bconstructed) { + return -EINVAL; + } + + if (pboardobjgrp->pmu.unitid == BOARDOBJGRP_UNIT_ID_INVALID) { + return -EINVAL; + } + + if (pboardobjgrp->classid == BOARDOBJGRP_GRP_CLASS_ID_INVALID) { + return -EINVAL; + } + + /* If no objects in the group, return early */ + if (BOARDOBJGRP_IS_EMPTY(pboardobjgrp)) { + return -EINVAL; + } + + return 0; +} + +/* + * Inserts a previously constructed Board Object into a Board Object Group for + * tracking. Objects are inserted in the array based on the given index. + */ +static int +obj_insert_final(struct boardobjgrp *pboardobjgrp, + struct pmu_board_obj *obj, u8 index) +{ + struct gk20a *g = pboardobjgrp->g; + + nvgpu_log_info(g, " "); + + if (pboardobjgrp == NULL) { + return -EINVAL; + } + + if (obj == NULL) { + return -EINVAL; + } + + if (index > pboardobjgrp->objslots) { + return -EINVAL; + } + + if (pboardobjgrp->ppobjects[index] != NULL) { + return -EINVAL; + } + + /* + * Check that this BOARDOBJ has not already been added to a + * BOARDOBJGRP + */ + if (obj->idx != CTRL_BOARDOBJ_IDX_INVALID) { + return -EINVAL; + } + + pboardobjgrp->ppobjects[index] = obj; + pboardobjgrp->objmaxidx = (u8)(BOARDOBJGRP_IS_EMPTY(pboardobjgrp) ? + index : max(pboardobjgrp->objmaxidx, index)); + obj->idx = index; + + pboardobjgrp->objmask |= BIT32(index); + + nvgpu_log_info(g, " Done"); + + return nvgpu_boardobjgrpmask_bit_set(pboardobjgrp->mask, index); +} + +/* + * Retrieves a Board Object from a Board Object Group using the group's index. + */ +static struct pmu_board_obj *obj_get_by_idx_final( + struct boardobjgrp *pboardobjgrp, u8 index) +{ + if (!boardobjgrp_idxisvalid(pboardobjgrp, index)) { + return NULL; + } + return pboardobjgrp->ppobjects[index]; +} + +/* + * Retrieve Board Object immediately following one pointed by @ref currentindex + * filtered out by the provided mask. If (mask == NULL) => no filtering. + */ +static struct pmu_board_obj *obj_get_next_final( + struct boardobjgrp *pboardobjgrp, u8 *currentindex, + struct boardobjgrpmask *mask) +{ + struct pmu_board_obj *obj_next = NULL; + u8 objmaxidx; + u8 index; + + if (currentindex == NULL) { + return NULL; + } + + if (pboardobjgrp == NULL) { + return NULL; + } + + /* Search from next element unless first object was requested */ + index = (*currentindex != CTRL_BOARDOBJ_IDX_INVALID) ? + (*currentindex + 1U) : 0U; + + /* For the cases below in which we have to return NULL */ + *currentindex = CTRL_BOARDOBJ_IDX_INVALID; + + + /* Validate provided mask */ + if (mask != NULL) { + if (!(nvgpu_boardobjgrpmask_sizeeq(pboardobjgrp->mask, mask))) { + return NULL; + } + } + + objmaxidx = pboardobjgrp->objmaxidx; + + if (objmaxidx != CTRL_BOARDOBJ_IDX_INVALID) { + for (; index <= objmaxidx; index++) { + obj_next = pboardobjgrp->ppobjects[index]; + if (obj_next != NULL) { + /* Filter results using client provided mask.*/ + if (mask != NULL) { + if (!nvgpu_boardobjgrpmask_bit_get(mask, + index)) { + obj_next = NULL; + continue; + } + } + *currentindex = index; + break; + } + } + } + + return obj_next; +} + +static int pmu_data_inst_get_stub(struct gk20a *g, + struct nv_pmu_boardobjgrp *boardobjgrppmu, + struct nv_pmu_boardobj **pmu_obj, u8 idx) +{ + nvgpu_log_info(g, " "); + return -EINVAL; +} + + +static int pmu_status_inst_get_stub(struct gk20a *g, + void *pboardobjgrppmu, + struct nv_pmu_boardobj_query **obj_pmu_status, u8 idx) +{ + nvgpu_log_info(g, " "); + return -EINVAL; +} + +static int obj_remove_and_destroy_final( + struct boardobjgrp *pboardobjgrp, + u8 index) +{ + int status = 0; + int stat; + struct gk20a *g = pboardobjgrp->g; + + nvgpu_log_info(g, " "); + + if (!boardobjgrp_idxisvalid(pboardobjgrp, index)) { + return -EINVAL; + } + + if (pboardobjgrp->objmaxidx == CTRL_BOARDOBJ_IDX_INVALID) { + return -EINVAL; + } + + status = pboardobjgrp->ppobjects[index]->destruct( + pboardobjgrp->ppobjects[index]); + + pboardobjgrp->ppobjects[index] = NULL; + + pboardobjgrp->objmask &= ~BIT32(index); + + stat = nvgpu_boardobjgrpmask_bit_clr(pboardobjgrp->mask, index); + if (stat != 0) { + if (status == 0) { + status = stat; + } + } + + /* objmaxidx requires update only if that very object was removed */ + if (pboardobjgrp->objmaxidx == index) { + pboardobjgrp->objmaxidx = + nvgpu_boardobjgrpmask_bit_idx_highest( + pboardobjgrp->mask); + } + + return status; +} + +static int pmu_cmd_destroy_impl(struct gk20a *g, + struct boardobjgrp_pmu_cmd *cmd) +{ + struct nvgpu_mem *mem = &cmd->surf.sysmem_desc; + + nvgpu_pmu_allocator_surface_free(g, mem); + return 0; +} + +static int destruct_super(struct boardobjgrp *pboardobjgrp) +{ + struct pmu_board_obj *obj; + struct gk20a *g = pboardobjgrp->g; + int status = 0; + int stat; + u8 index; + + nvgpu_log_info(g, " "); + + if (pboardobjgrp->mask == NULL) { + return -EINVAL; + } + if (pboardobjgrp->ppobjects == NULL) { + return -EINVAL; + } + + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pmu_board_obj*, obj, index) { + stat = pboardobjgrp->objremoveanddestroy(pboardobjgrp, index); + if (status == 0) { + status = stat; + } + + pboardobjgrp->ppobjects[index] = NULL; + pboardobjgrp->objmask &= ~BIT32(index); + } + + pboardobjgrp->objmask = 0; + + if (pboardobjgrp->objmaxidx != CTRL_BOARDOBJ_IDX_INVALID) { + if (status == 0) { + status = -EINVAL; + } + + WARN_ON(true); + } + + /* Destroy the PMU CMD data */ + stat = pmu_cmd_destroy_impl(g, &pboardobjgrp->pmu.set); + if (status == 0) { + status = stat; + } + + stat = pmu_cmd_destroy_impl(g, &pboardobjgrp->pmu.getstatus); + if (status == 0) { + status = stat; + } + + nvgpu_list_del(&pboardobjgrp->node); + + pboardobjgrp->bconstructed = false; + + return status; +} + +static int is_pmu_cmd_id_valid(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct boardobjgrp_pmu_cmd *cmd) +{ + int err = 0; + + if (pboardobjgrp->pmu.rpc_func_id == + BOARDOBJGRP_GRP_RPC_FUNC_ID_INVALID) { + err = -EINVAL; + } + + return err; +} + +static int pmu_cmd_pmu_init_handle_impl(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct boardobjgrp_pmu_cmd *pcmd) +{ + int status = 0; + struct nvgpu_mem *sysmem_desc = &pcmd->surf.sysmem_desc; + + nvgpu_log_info(g, " "); + + if (is_pmu_cmd_id_valid(g, + pboardobjgrp, pcmd) != 0) { + goto pmu_cmd_pmu_init_handle_impl_exit; + } + + if (pcmd->fbsize == 0U) { + goto pmu_cmd_pmu_init_handle_impl_exit; + } + + status = nvgpu_pmu_allocator_sysmem_surface_alloc(g, sysmem_desc, pcmd->fbsize); + if (status != 0) { + nvgpu_err(g, "failed to allocate memory\n"); + return -ENOMEM; + } + + /* we only have got sysmem later this will get copied to vidmem + surface*/ + pcmd->surf.vidmem_desc.size = 0; + + pcmd->buf = (struct nv_pmu_boardobjgrp_super *)sysmem_desc->cpu_va; + + pmu_cmd_pmu_init_handle_impl_exit: + return status; +} + +static int pmu_init_handle_impl(struct gk20a *g, + struct boardobjgrp *pboardobjgrp) +{ + int status = 0; + + nvgpu_log_info(g, " "); + + status = pmu_cmd_pmu_init_handle_impl(g, pboardobjgrp, + &pboardobjgrp->pmu.set); + if (status != 0) { + nvgpu_err(g, "failed to init pmu set cmd"); + goto pmu_init_handle_impl_exit; + } + + status = pmu_cmd_pmu_init_handle_impl(g, pboardobjgrp, + &pboardobjgrp->pmu.getstatus); + if (status != 0) { + nvgpu_err(g, "failed to init get status command"); + goto pmu_init_handle_impl_exit; + } + + /* If the GRP_SET CMD has not been allocated, nothing left to do. */ + if ((is_pmu_cmd_id_valid(g, + pboardobjgrp, &pboardobjgrp->pmu.set) != 0)|| + (BOARDOBJGRP_IS_EMPTY(pboardobjgrp))) { + goto pmu_init_handle_impl_exit; + } + + /* Send the BOARDOBJGRP to the pmu via RM_PMU_BOARDOBJ_CMD_GRP. */ + status = pboardobjgrp->pmuset(g, pboardobjgrp); + if (status != 0) { + nvgpu_err(g, "failed to send boardobg grp to PMU"); + } + + pmu_init_handle_impl_exit: + return status; +} + +static int pmu_cmd_send_rpc(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct boardobjgrp_pmu_cmd *pcmd, + bool copy_out) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct nv_pmu_rpc_struct_board_obj_grp_cmd rpc; + int status = 0; + + nvgpu_log_fn(g, " "); + + (void) memset(&rpc, 0, + sizeof(struct nv_pmu_rpc_struct_board_obj_grp_cmd)); + + rpc.class_id = pboardobjgrp->classid; + rpc.command_id = copy_out ? + NV_PMU_BOARDOBJGRP_CMD_GET_STATUS : + NV_PMU_BOARDOBJGRP_CMD_SET; + + rpc.hdr.unit_id = pboardobjgrp->pmu.unitid; + rpc.hdr.function = pboardobjgrp->pmu.rpc_func_id; + rpc.hdr.flags = 0x0; + + status = nvgpu_pmu_rpc_execute(pmu, &(rpc.hdr), + U16(sizeof(rpc) - sizeof(rpc.scratch)), + pcmd->dmem_buffer_size, + NULL, NULL, copy_out); + + if (status != 0) { + nvgpu_err(g, "Failed to execute RPC, status=0x%x", status); + } + + return status; +} + +/* +* Sends a BOARDOBJGRP to the PMU via the PMU_BOARDOBJ_CMD_GRP interface. +* This interface leverages @ref boardobjgrp_pmudatainit to populate the +* structure. +*/ +static int pmu_set_impl(struct gk20a *g, + struct boardobjgrp *pboardobjgrp) +{ + struct nvgpu_pmu *pmu = g->pmu; + int status = 0; + struct boardobjgrp_pmu_cmd *pcmd = + (struct boardobjgrp_pmu_cmd *)(&pboardobjgrp->pmu.set); + + nvgpu_log_info(g, " "); + + if (check_boardobjgrp_param(g, pboardobjgrp) != 0) { + return -EINVAL; + } + + if ((pcmd->buf == NULL) && + (pboardobjgrp->pmu.rpc_func_id == + BOARDOBJGRP_GRP_RPC_FUNC_ID_INVALID)) { + return -EINVAL; + } + + /* Initialize PMU buffer with BOARDOBJGRP data. */ + (void) memset(pcmd->buf, 0x0, pcmd->fbsize); + status = pboardobjgrp->pmudatainit(g, pboardobjgrp, + pcmd->buf); + if (status != 0) { + nvgpu_err(g, "could not parse pmu data"); + goto pmu_set_impl_exit; + } + + /* + * Reset the boolean that indicates set status + * for most recent instance of BOARDOBJGRP. + */ + pboardobjgrp->pmu.bset = false; + + /* + * copy constructed pmu boardobjgrp data from + * sysmem to pmu super surface present in FB + */ + nvgpu_mem_wr_n(g, nvgpu_pmu_super_surface_mem(g, + pmu, pmu->super_surface), + pcmd->super_surface_offset, pcmd->buf, + pcmd->fbsize); + + /* Send the SET PMU CMD to the PMU using RPC*/ + status = pmu_cmd_send_rpc(g, pboardobjgrp, + pcmd, false); + if (status != 0) { + nvgpu_err(g, "could not send SET CMD to PMU"); + goto pmu_set_impl_exit; + } + + pboardobjgrp->pmu.bset = true; + + pmu_set_impl_exit: + return status; +} + +/* +* Gets the dynamic status of the PMU BOARDOBJGRP via the +* PMU_BOARDOBJ_CMD_GRP GET_STATUS interface. +*/ +static int +pmu_get_status_impl(struct gk20a *g, struct boardobjgrp *pboardobjgrp, + struct boardobjgrpmask *mask) +{ + struct nvgpu_pmu *pmu = g->pmu; + int status = 0; + struct boardobjgrp_pmu_cmd *pcmd = + (struct boardobjgrp_pmu_cmd *)(&pboardobjgrp->pmu.getstatus); + + nvgpu_log_info(g, " "); + + if (check_boardobjgrp_param(g, pboardobjgrp) != 0) { + return -EINVAL; + } + + if ((pcmd->buf == NULL) && + (pboardobjgrp->pmu.rpc_func_id == + BOARDOBJGRP_GRP_RPC_FUNC_ID_INVALID)) { + return -EINVAL; + } + + /* + * Can only GET_STATUS if the BOARDOBJGRP has been + * previously SET to the PMU + */ + if (!pboardobjgrp->pmu.bset) { + return -EINVAL; + } + + /* + * Initialize PMU buffer with the mask of + * BOARDOBJGRPs for which to retrieve status + */ + (void) memset(pcmd->buf, 0x0, pcmd->fbsize); + status = pboardobjgrp->pmuhdrdatainit(g, pboardobjgrp, + pcmd->buf, mask); + if (status != 0) { + nvgpu_err(g, "could not init PMU HDR data"); + goto pmu_get_status_impl_exit; + } + + /* + * copy constructed pmu boardobjgrp data from + * sysmem to pmu super surface present in FB + */ + nvgpu_mem_wr_n(g, nvgpu_pmu_super_surface_mem(g, + pmu, pmu->super_surface), + pcmd->super_surface_offset, + pcmd->buf, pcmd->fbsize); + /* Send the GET_STATUS PMU CMD to the PMU */ + status = pmu_cmd_send_rpc(g, pboardobjgrp, + pcmd, true); + if (status != 0) { + nvgpu_err(g, "could not send GET_STATUS cmd to PMU"); + goto pmu_get_status_impl_exit; + } + + /*copy the data back to sysmem buffer that belongs to command*/ + nvgpu_mem_rd_n(g, nvgpu_pmu_super_surface_mem(g, + pmu, pmu->super_surface), + pcmd->super_surface_offset, + pcmd->buf, pcmd->fbsize); + + pmu_get_status_impl_exit: + return status; +} + +int nvgpu_boardobjgrp_construct_super(struct gk20a *g, + struct boardobjgrp *pboardobjgrp) +{ + nvgpu_log_info(g, " "); + + if (pboardobjgrp == NULL) { + return -EINVAL; + } + + if (pboardobjgrp->ppobjects == NULL) { + return -EINVAL; + } + + if (pboardobjgrp->mask == NULL) { + return -EINVAL; + } + + pboardobjgrp->g = g; + pboardobjgrp->objmask = 0; + + pboardobjgrp->classid = BOARDOBJGRP_GRP_CLASS_ID_INVALID; + pboardobjgrp->pmu.unitid = BOARDOBJGRP_UNIT_ID_INVALID; + pboardobjgrp->pmu.bset = false; + pboardobjgrp->pmu.rpc_func_id = BOARDOBJGRP_GRP_RPC_FUNC_ID_INVALID; + pboardobjgrp->pmu.set.id = BOARDOBJGRP_GRP_CMD_ID_INVALID; + pboardobjgrp->pmu.getstatus.id = BOARDOBJGRP_GRP_CMD_ID_INVALID; + + /* Initialize basic interfaces */ + pboardobjgrp->destruct = destruct_super; + pboardobjgrp->objinsert = obj_insert_final; + pboardobjgrp->objgetbyidx = obj_get_by_idx_final; + pboardobjgrp->objgetnext = obj_get_next_final; + pboardobjgrp->objremoveanddestroy = + obj_remove_and_destroy_final; + + pboardobjgrp->pmuinithandle = pmu_init_handle_impl; + pboardobjgrp->pmuhdrdatainit = nvgpu_boardobjgrp_pmu_hdr_data_init_super; + pboardobjgrp->pmudatainit = nvgpu_boardobjgrp_pmu_data_init_super; + pboardobjgrp->pmuset = pmu_set_impl; + pboardobjgrp->pmugetstatus = pmu_get_status_impl; + + pboardobjgrp->pmudatainstget = pmu_data_inst_get_stub; + pboardobjgrp->pmustatusinstget = pmu_status_inst_get_stub; + + pboardobjgrp->objmaxidx = CTRL_BOARDOBJ_IDX_INVALID; + pboardobjgrp->bconstructed = true; + + nvgpu_list_add(&pboardobjgrp->node, &g->boardobjgrp_head); + + return 0; +} + + +int nvgpu_boardobjgrp_pmucmd_construct_impl(struct gk20a *g, struct boardobjgrp + *pboardobjgrp, struct boardobjgrp_pmu_cmd *cmd, u8 id, u8 msgid, + u16 hdrsize, u16 entrysize, u32 fbsize, u32 ss_offset, u8 rpc_func_id) +{ + nvgpu_log_fn(g, " "); + + /* Copy the parameters into the CMD*/ + cmd->dmem_buffer_size = ((hdrsize > entrysize) ? hdrsize : entrysize); + cmd->super_surface_offset = ss_offset; + pboardobjgrp->pmu.rpc_func_id = rpc_func_id; + cmd->fbsize = fbsize; + + nvgpu_log_fn(g, "DONE"); + return 0; +} + +int nvgpu_boardobjgrp_pmu_hdr_data_init_super(struct gk20a *g, struct boardobjgrp + *pboardobjgrp, struct nv_pmu_boardobjgrp_super *pboardobjgrppmu, + struct boardobjgrpmask *mask) +{ + nvgpu_log_info(g, " "); + + if (pboardobjgrp == NULL) { + return -EINVAL; + } + if (pboardobjgrppmu == NULL) { + return -EINVAL; + } + pboardobjgrppmu->type = pboardobjgrp->type; + pboardobjgrppmu->class_id = pboardobjgrp->classid; + pboardobjgrppmu->obj_slots = BOARDOBJGRP_PMU_SLOTS_GET(pboardobjgrp); + pboardobjgrppmu->flags = 0; + + nvgpu_log_info(g, " Done"); + return 0; +} + +int nvgpu_boardobjgrp_pmu_data_init_legacy(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct nv_pmu_boardobjgrp_super *pboardobjgrppmu) +{ + int status = 0; + struct pmu_board_obj *obj = NULL; + struct nv_pmu_boardobj *pmu_obj = NULL; + u8 index; + + nvgpu_log_info(g, " "); + + if (pboardobjgrp == NULL) { + return -EINVAL; + } + if (pboardobjgrppmu == NULL) { + return -EINVAL; + } + + nvgpu_boardobjgrp_e32_hdr_set((struct nv_pmu_boardobjgrp *) + (void *)pboardobjgrppmu, pboardobjgrp->objmask); + + BOARDOBJGRP_FOR_EACH_INDEX_IN_MASK(32, index, pboardobjgrp->objmask) { + /* Obtain pointer to the current instance of the + * Object from the Group */ + obj = pboardobjgrp->objgetbyidx(pboardobjgrp, index); + if (NULL == obj) { + nvgpu_err(g, "could not get object instance"); + status = -EINVAL; + goto nvgpu_boardobjgrp_pmu_data_init_legacy_exit; + } + + status = pboardobjgrp->pmudatainstget(g, + (struct nv_pmu_boardobjgrp *) + (void *)pboardobjgrppmu, + &pmu_obj, index); + if (status != 0) { + nvgpu_err(g, "could not get object instance"); + goto nvgpu_boardobjgrp_pmu_data_init_legacy_exit; + } + + /* Initialize the PMU Data */ + status = obj->pmudatainit(g, obj, pmu_obj); + if (status != 0) { + nvgpu_err(g, + "could not parse pmu for device %d", index); + goto nvgpu_boardobjgrp_pmu_data_init_legacy_exit; + } + } + BOARDOBJGRP_FOR_EACH_INDEX_IN_MASK_END + + nvgpu_boardobjgrp_pmu_data_init_legacy_exit: + nvgpu_log_info(g, " Done"); + return status; +} + + +int nvgpu_boardobjgrp_pmu_data_init_super(struct gk20a *g, struct boardobjgrp + *pboardobjgrp, struct nv_pmu_boardobjgrp_super *pboardobjgrppmu) +{ + int status = 0; + struct pmu_board_obj *obj = NULL; + struct nv_pmu_boardobj *pmu_obj = NULL; + u8 index; + + nvgpu_log_info(g, " "); + + if (pboardobjgrp == NULL) { + return -EINVAL; + } + if (pboardobjgrppmu == NULL) { + return -EINVAL; + } + + /* Initialize the PMU HDR data.*/ + status = pboardobjgrp->pmuhdrdatainit(g, pboardobjgrp, pboardobjgrppmu, + pboardobjgrp->mask); + if (status != 0) { + nvgpu_err(g, "unable to init boardobjgrp pmuhdr data"); + goto boardobjgrp_pmu_data_init_super_exit; + } + + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pmu_board_obj*, obj, index) { + status = pboardobjgrp->pmudatainstget(g, + (struct nv_pmu_boardobjgrp *) + (void *)pboardobjgrppmu, &pmu_obj, index); + if (status != 0) { + nvgpu_err(g, "could not get object instance"); + goto boardobjgrp_pmu_data_init_super_exit; + } + + /* Initialize the PMU Data and send to PMU */ + status = obj->pmudatainit(g, obj, pmu_obj); + if (status != 0) { + nvgpu_err(g, + "could not parse pmu for device %d", index); + goto boardobjgrp_pmu_data_init_super_exit; + } + } + + boardobjgrp_pmu_data_init_super_exit: + nvgpu_log_info(g, " Done"); + return status; +} + +void nvgpu_boardobjgrp_e32_hdr_set(struct nv_pmu_boardobjgrp *hdr, u32 objmask) +{ + u32 slots = objmask; + + HIGHESTBITIDX_32(slots); + slots++; + + hdr->super.type = CTRL_BOARDOBJGRP_TYPE_E32; + hdr->super.class_id = 0; + hdr->super.obj_slots = (u8)slots; + hdr->obj_mask = objmask; +} + diff --git a/drivers/gpu/nvgpu/common/pmu/boardobj/boardobjgrp_e255.c b/drivers/gpu/nvgpu/common/pmu/boardobj/boardobjgrp_e255.c new file mode 100644 index 000000000..895d63f1c --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/boardobj/boardobjgrp_e255.c @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +static int boardobjgrp_pmu_hdr_data_init_e255(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct nv_pmu_boardobjgrp_super *pboardobjgrppmu, + struct boardobjgrpmask *mask) +{ + struct nv_pmu_boardobjgrp_e255 *pgrpe255 = + (struct nv_pmu_boardobjgrp_e255 *)(void *)pboardobjgrppmu; + int status; + + nvgpu_log_info(g, " "); + + if (pboardobjgrp == NULL) { + return -EINVAL; + } + + if (pboardobjgrppmu == NULL) { + return -EINVAL; + } + + status = nvgpu_boardobjgrpmask_export(mask, + mask->bitcount, + &pgrpe255->obj_mask.super); + if (status != 0) { + nvgpu_err(g, "e255 init:failed export grpmask"); + return status; + } + + return nvgpu_boardobjgrp_pmu_hdr_data_init_super(g, + pboardobjgrp, pboardobjgrppmu, mask); +} + +int nvgpu_boardobjgrp_construct_e255(struct gk20a *g, + struct boardobjgrp_e255 *pboardobjgrp_e255) +{ + int status = 0; + u8 objslots; + + nvgpu_log_info(g, " "); + + objslots = 255; + status = boardobjgrpmask_e255_init(&pboardobjgrp_e255->mask, NULL); + if (status != 0) { + goto nvgpu_boardobjgrpconstruct_e255_exit; + } + + pboardobjgrp_e255->super.type = CTRL_BOARDOBJGRP_TYPE_E255; + pboardobjgrp_e255->super.ppobjects = pboardobjgrp_e255->objects; + pboardobjgrp_e255->super.objslots = objslots; + pboardobjgrp_e255->super.mask = &(pboardobjgrp_e255->mask.super); + + status = nvgpu_boardobjgrp_construct_super(g, &pboardobjgrp_e255->super); + if (status != 0) { + goto nvgpu_boardobjgrpconstruct_e255_exit; + } + + pboardobjgrp_e255->super.pmuhdrdatainit = + boardobjgrp_pmu_hdr_data_init_e255; + +nvgpu_boardobjgrpconstruct_e255_exit: + return status; +} + diff --git a/drivers/gpu/nvgpu/common/pmu/boardobj/boardobjgrp_e32.c b/drivers/gpu/nvgpu/common/pmu/boardobj/boardobjgrp_e32.c new file mode 100644 index 000000000..7d733943f --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/boardobj/boardobjgrp_e32.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include + +static int boardobjgrp_pmu_hdr_data_init_e32(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct nv_pmu_boardobjgrp_super *pboardobjgrppmu, + struct boardobjgrpmask *mask) +{ + struct nv_pmu_boardobjgrp_e32 *pgrpe32 = + (struct nv_pmu_boardobjgrp_e32 *)(void *)pboardobjgrppmu; + int status; + + nvgpu_log_info(g, " "); + + if (pboardobjgrp == NULL) { + return -EINVAL; + } + + if (pboardobjgrppmu == NULL) { + return -EINVAL; + } + status = nvgpu_boardobjgrpmask_export(mask, + mask->bitcount, + &pgrpe32->obj_mask.super); + if (status != 0) { + nvgpu_err(g, "e32 init:failed export grpmask"); + return status; + } + + return nvgpu_boardobjgrp_pmu_hdr_data_init_super(g, + pboardobjgrp, pboardobjgrppmu, mask); +} + +int nvgpu_boardobjgrp_construct_e32(struct gk20a *g, + struct boardobjgrp_e32 *pboardobjgrp_e32) +{ + int status; + u8 objslots; + + nvgpu_log_info(g, " "); + objslots = 32; + + status = boardobjgrpmask_e32_init(&pboardobjgrp_e32->mask, NULL); + if (status != 0) { + goto nvgpu_boardobjgrpconstruct_e32_exit; + } + + pboardobjgrp_e32->super.type = CTRL_BOARDOBJGRP_TYPE_E32; + pboardobjgrp_e32->super.ppobjects = pboardobjgrp_e32->objects; + pboardobjgrp_e32->super.objslots = objslots; + pboardobjgrp_e32->super.mask = &(pboardobjgrp_e32->mask.super); + + status = nvgpu_boardobjgrp_construct_super(g, &pboardobjgrp_e32->super); + if (status != 0) { + goto nvgpu_boardobjgrpconstruct_e32_exit; + } + + pboardobjgrp_e32->super.pmuhdrdatainit = + boardobjgrp_pmu_hdr_data_init_e32; + +nvgpu_boardobjgrpconstruct_e32_exit: + return status; +} + diff --git a/drivers/gpu/nvgpu/common/pmu/boardobj/boardobjgrpmask.c b/drivers/gpu/nvgpu/common/pmu/boardobj/boardobjgrpmask.c new file mode 100644 index 000000000..06d648913 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/boardobj/boardobjgrpmask.c @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include "boardobj.h" + +/* +* Assures that unused bits (size .. (maskDataCount * 32 - 1)) are always zero. +*/ +#define BOARDOBJGRPMASK_NORMALIZE(_pmask) \ + ((_pmask)->data[(_pmask)->maskdatacount-1U] &= (_pmask)->lastmaskfilter) + +static int import_mask_data(struct boardobjgrpmask *mask, u8 bitsize, + struct ctrl_boardobjgrp_mask *extmask) +{ + u8 index; + + if (mask == NULL) { + return -EINVAL; + } + if (extmask == NULL) { + return -EINVAL; + } + if (mask->bitcount != bitsize) { + return -EINVAL; + } + + for (index = 0; index < mask->maskdatacount; index++) { + mask->data[index] = extmask->data[index]; + } + + BOARDOBJGRPMASK_NORMALIZE(mask); + + return 0; +} + + +static int clr_mask_data(struct boardobjgrpmask *mask) +{ + u8 index; + + if (mask == NULL) { + return -EINVAL; + } + for (index = 0; index < mask->maskdatacount; index++) { + mask->data[index] = 0; + } + + return 0; +} + +int nvgpu_boardobjgrpmask_init(struct boardobjgrpmask *mask, u8 bitsize, + struct ctrl_boardobjgrp_mask *extmask) +{ + if (mask == NULL) { + return -EINVAL; + } + if ((bitsize != CTRL_BOARDOBJGRP_E32_MAX_OBJECTS) && + (bitsize != CTRL_BOARDOBJGRP_E255_MAX_OBJECTS)) { + return -EINVAL; + } + + mask->bitcount = bitsize; + mask->maskdatacount = CTRL_BOARDOBJGRP_MASK_DATA_SIZE(bitsize); + mask->lastmaskfilter = U32(bitsize) % + CTRL_BOARDOBJGRP_MASK_MASK_ELEMENT_BIT_SIZE; + + mask->lastmaskfilter = (mask->lastmaskfilter == 0U) ? + 0xFFFFFFFFU : (BIT32(mask->lastmaskfilter) - 1U); + + return (extmask == NULL) ? + clr_mask_data(mask) : + import_mask_data(mask, bitsize, extmask); +} + +bool nvgpu_boardobjgrpmask_bit_get(struct boardobjgrpmask *mask, u8 bitidx) +{ + u8 index; + u8 offset; + + if (mask == NULL) { + return false; + } + if (bitidx >= mask->bitcount) { + return false; + } + + index = CTRL_BOARDOBJGRP_MASK_MASK_ELEMENT_INDEX(bitidx); + offset = CTRL_BOARDOBJGRP_MASK_MASK_ELEMENT_OFFSET(bitidx); + + return (mask->data[index] & BIT32(offset)) != 0U; +} + +int nvgpu_boardobjgrpmask_export(struct boardobjgrpmask *mask, u8 bitsize, + struct ctrl_boardobjgrp_mask *extmask) +{ + u8 index; + + if (mask == NULL) { + return -EINVAL; + } + if (extmask == NULL) { + return -EINVAL; + } + if (mask->bitcount != bitsize) { + return -EINVAL; + } + + for (index = 0; index < mask->maskdatacount; index++) { + extmask->data[index] = mask->data[index]; + } + + return 0; +} + +u8 nvgpu_boardobjgrpmask_bit_set_count(struct boardobjgrpmask *mask) +{ + u8 index; + u8 result = 0; + + if (mask == NULL) { + return result; + } + + for (index = 0; index < mask->maskdatacount; index++) { + u32 m = mask->data[index]; + + NUMSETBITS_32(m); + result += (u8)m; + } + + return result; +} + +u8 nvgpu_boardobjgrpmask_bit_idx_highest(struct boardobjgrpmask *mask) +{ + u8 index; + u8 result = CTRL_BOARDOBJ_IDX_INVALID; + + if (mask == NULL) { + return result; + } + + for (index = 0; index < mask->maskdatacount; index++) { + u32 m = mask->data[index]; + + if (m != 0U) { + HIGHESTBITIDX_32(m); + result = (u8)m + index * + CTRL_BOARDOBJGRP_MASK_MASK_ELEMENT_BIT_SIZE; + break; + } + } + + return result; +} + +int nvgpu_boardobjgrpmask_bit_clr(struct boardobjgrpmask *mask, u8 bitidx) +{ + u8 index; + u8 offset; + + if (mask == NULL) { + return -EINVAL; + } + if (bitidx >= mask->bitcount) { + return -EINVAL; + } + + index = CTRL_BOARDOBJGRP_MASK_MASK_ELEMENT_INDEX(bitidx); + offset = CTRL_BOARDOBJGRP_MASK_MASK_ELEMENT_OFFSET(bitidx); + + mask->data[index] &= ~BIT32(offset); + + return 0; +} + +int nvgpu_boardobjgrpmask_bit_set(struct boardobjgrpmask *mask, u8 bitidx) +{ + u8 index; + u8 offset; + + if (mask == NULL) { + return -EINVAL; + } + if (bitidx >= mask->bitcount) { + return -EINVAL; + } + + index = CTRL_BOARDOBJGRP_MASK_MASK_ELEMENT_INDEX(bitidx); + offset = CTRL_BOARDOBJGRP_MASK_MASK_ELEMENT_OFFSET(bitidx); + + mask->data[index] |= BIT32(offset); + + return 0; +} + +bool nvgpu_boardobjgrpmask_sizeeq(struct boardobjgrpmask *op1, + struct boardobjgrpmask *op2) +{ + if (op1 == NULL) { + return false; + } + if (op2 == NULL) { + return false; + } + + return op1->bitcount == op2->bitcount; +} + +int nvgpu_boardobjmask_or(struct boardobjgrpmask *dst, + struct boardobjgrpmask *mask1, struct boardobjgrpmask *mask2) +{ + u8 idx; + + for (idx = 0; idx < dst->maskdatacount; idx++) { + dst->data[idx] = mask1->data[idx] | mask2->data[idx]; + } + + return 0; + +} + +int nvgpu_boardobjmask_and(struct boardobjgrpmask *dst, + struct boardobjgrpmask *mask1, struct boardobjgrpmask *mask2) +{ + u8 idx; + + for (idx = 0; idx < dst->maskdatacount; idx++) { + dst->data[idx] = mask1->data[idx] & mask2->data[idx]; + } + + return 0; + +} + diff --git a/drivers/gpu/nvgpu/common/pmu/boardobj/ucode_boardobj_inf.h b/drivers/gpu/nvgpu/common/pmu/boardobj/ucode_boardobj_inf.h new file mode 100644 index 000000000..755fbdd0f --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/boardobj/ucode_boardobj_inf.h @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_PMUIF_BOARDOBJ_H +#define NVGPU_PMUIF_BOARDOBJ_H + +#include +#include + +/* board object group command id's. */ +#define NV_PMU_BOARDOBJGRP_CMD_SET 0x00U +#define NV_PMU_BOARDOBJGRP_CMD_GET_STATUS 0x01U + +#define NV_PMU_RPC_ID_CLK_BOARD_OBJ_GRP_CMD 0x00U +#define NV_PMU_RPC_ID_FAN_BOARD_OBJ_GRP_CMD 0x00U +#define NV_PMU_RPC_ID_PERF_BOARD_OBJ_GRP_CMD 0x00U +#define NV_PMU_RPC_ID_PERF_CF_BOARD_OBJ_GRP_CMD 0x00U +#define NV_PMU_RPC_ID_PMGR_BOARD_OBJ_GRP_CMD 0x00U +#define NV_PMU_RPC_ID_THERM_BOARD_OBJ_GRP_CMD 0x00U +#define NV_PMU_RPC_ID_VOLT_BOARD_OBJ_GRP_CMD 0x00U + +#define CTRL_BOARDOBJGRP_TYPE_INVALID 0x00U +#define CTRL_BOARDOBJGRP_TYPE_E32 0x01U +#define CTRL_BOARDOBJGRP_TYPE_E255 0x02U + +#define CTRL_BOARDOBJGRP_E32_MAX_OBJECTS 32U +#define CTRL_BOARDOBJGRP_E255_MAX_OBJECTS 255U + +#define CTRL_BOARDOBJ_MAX_BOARD_OBJECTS \ + CTRL_BOARDOBJGRP_E32_MAX_OBJECTS + +#define CTRL_BOARDOBJ_IDX_INVALID 255U + +#define CTRL_BOARDOBJGRP_MASK_MASK_ELEMENT_BIT_SIZE 32U + +#define CTRL_BOARDOBJGRP_MASK_MASK_ELEMENT_INDEX(_bit) \ + ((_bit) / CTRL_BOARDOBJGRP_MASK_MASK_ELEMENT_BIT_SIZE) + +#define CTRL_BOARDOBJGRP_MASK_MASK_ELEMENT_OFFSET(_bit) \ + ((_bit) % CTRL_BOARDOBJGRP_MASK_MASK_ELEMENT_BIT_SIZE) + +#define CTRL_BOARDOBJGRP_MASK_DATA_SIZE(_bits) \ + (CTRL_BOARDOBJGRP_MASK_MASK_ELEMENT_INDEX((_bits) - 1U) + 1U) + +#define CTRL_BOARDOBJGRP_MASK_ARRAY_START_SIZE 1U +#define CTRL_BOARDOBJGRP_MASK_ARRAY_EXTENSION_SIZE(_bits) \ + (CTRL_BOARDOBJGRP_MASK_DATA_SIZE(_bits) - \ + CTRL_BOARDOBJGRP_MASK_ARRAY_START_SIZE) + +struct ctrl_boardobj { + u8 type; +}; + +struct ctrl_boardobjgrp_mask { + u32 data[1]; +}; + +struct ctrl_boardobjgrp_mask_e32 { + struct ctrl_boardobjgrp_mask super; +}; + +struct ctrl_boardobjgrp_mask_e255 { + struct ctrl_boardobjgrp_mask super; + u32 data_e255[7]; +}; + +struct ctrl_boardobjgrp_super { + struct ctrl_boardobjgrp_mask obj_mask; +}; + +struct ctrl_boardobjgrp_e32 { + struct ctrl_boardobjgrp_mask_e32 obj_mask; +}; + +struct CTRL_boardobjgrp_e255 { + struct ctrl_boardobjgrp_mask_e255 obj_mask; +}; + +struct ctrl_boardobjgrp { + u32 obj_mask; +}; + +/* + * Base structure describing a BOARDOBJ for communication between Kernel and + * PMU. + */ +struct nv_pmu_boardobj { + u8 type; + u8 grp_idx; +}; + +/* + * Base structure describing a BOARDOBJ for Query interface between Kernel and + * PMU. + */ +struct nv_pmu_boardobj_query { + u8 type; + u8 grp_idx; +}; + +/* + * Virtual base structure describing a BOARDOBJGRP interface between Kernel and + * PMU. + */ +struct nv_pmu_boardobjgrp_super { + u8 type; + u8 class_id; + u8 obj_slots; + u8 flags; +}; + +struct nv_pmu_boardobjgrp { + struct nv_pmu_boardobjgrp_super super; + u32 obj_mask; +}; + +struct nv_pmu_boardobjgrp_e32 { + struct nv_pmu_boardobjgrp_super super; + struct ctrl_boardobjgrp_mask_e32 obj_mask; +}; + +struct nv_pmu_boardobjgrp_e255 { + struct nv_pmu_boardobjgrp_super super; + struct ctrl_boardobjgrp_mask_e255 obj_mask; +}; + +struct nv_pmu_boardobj_cmd_grp_payload { + struct pmu_allocation_v3 dmem_buf; + struct flcn_mem_desc_v0 fb; + u8 hdr_size; + u8 entry_size; +}; + +struct nv_pmu_boardobj_cmd_grp { + u8 cmd_type; + u8 pad[2]; + u8 class_id; + struct nv_pmu_boardobj_cmd_grp_payload grp; +}; + +#define NV_PMU_BOARDOBJ_GRP_ALLOC_OFFSET \ + (NV_OFFSETOF(NV_PMU_BOARDOBJ_CMD_GRP, grp)) + +struct nv_pmu_boardobj_cmd { + union { + u8 cmd_type; + struct nv_pmu_boardobj_cmd_grp grp; + struct nv_pmu_boardobj_cmd_grp grp_set; + struct nv_pmu_boardobj_cmd_grp grp_get_status; + }; +}; + +struct nv_pmu_boardobj_msg_grp { + u8 msg_type; + bool b_success; + falcon_status flcn_status; + u8 class_id; +}; + +struct nv_pmu_boardobj_msg { + union { + u8 msg_type; + struct nv_pmu_boardobj_msg_grp grp; + struct nv_pmu_boardobj_msg_grp grp_set; + struct nv_pmu_boardobj_msg_grp grp_get_status; + }; +}; + +/* + * Macro generating structures describing classes which implement + * NV_PMU_BOARDOBJGRP via the NV_PMU_BOARDBOBJ_CMD_GRP SET interface. + * + * @para _eng Name of implementing engine in which this structure is + * found. + * @param _class Class ID of Objects within Board Object Group. + * @param _slots Max number of elements this group can contain. + */ +#define NV_PMU_BOARDOBJ_GRP_SET_MAKE(_eng, _class, _slots) \ + NV_PMU_MAKE_ALIGNED_STRUCT( \ + nv_pmu_##_eng##_##_class##_boardobjgrp_set_header, one_structure); \ + NV_PMU_MAKE_ALIGNED_UNION( \ + nv_pmu_##_eng##_##_class##_boardobj_set_union, one_union); \ + struct nv_pmu_##_eng##_##_class##_boardobj_grp_set { \ + union nv_pmu_##_eng##_##_class##_boardobjgrp_set_header_aligned \ + hdr; \ + union nv_pmu_##_eng##_##_class##_boardobj_set_union_aligned \ + objects[(_slots)]; \ + } + +/* + * Macro generating structures describing classes which implement + * NV_PMU_BOARDOBJGRP_E32 via the NV_PMU_BOARDBOBJ_CMD_GRP SET interface. + * + * @para _eng Name of implementing engine in which this structure is + * found. + * @param _class Class ID of Objects within Board Object Group. + */ +#define NV_PMU_BOARDOBJ_GRP_SET_MAKE_E32(_eng, _class) \ + NV_PMU_BOARDOBJ_GRP_SET_MAKE(_eng, _class, \ + CTRL_BOARDOBJGRP_E32_MAX_OBJECTS) + +/* + * Macro generating structures describing classes which implement + * NV_PMU_BOARDOBJGRP_E255 via the NV_PMU_BOARDBOBJ_CMD_GRP SET interface. + * + * @para _eng Name of implementing engine in which this structure is + * found. + * @param _class Class ID of Objects within Board Object Group. + */ +#define NV_PMU_BOARDOBJ_GRP_SET_MAKE_E255(_eng, _class) \ + NV_PMU_BOARDOBJ_GRP_SET_MAKE(_eng, _class, \ + CTRL_BOARDOBJGRP_E255_MAX_OBJECTS) + +/* + * Macro generating structures for querying dynamic state for classes which + * implement NV_PMU_BOARDOBJGRP via the NV_PMU_BOARDOBJ_CMD_GRP GET_STATUS + * interface. + * + * @para _eng Name of implementing engine in which this structure is + * found. + * @param _class Class ID of Objects within Board Object Group. + * @param _slots Max number of elements this group can contain. + */ +#define NV_PMU_BOARDOBJ_GRP_GET_STATUS_MAKE(_eng, _class, _slots) \ + NV_PMU_MAKE_ALIGNED_STRUCT( \ + nv_pmu_##_eng##_##_class##_boardobjgrp_get_status_header, struct); \ + NV_PMU_MAKE_ALIGNED_UNION( \ + nv_pmu_##_eng##_##_class##_boardobj_get_status_union, union); \ + struct nv_pmu_##_eng##_##_class##_boardobj_grp_get_status { \ + union nv_pmu_##_eng##_##_class##_boardobjgrp_get_status_header_aligned \ + hdr; \ + union nv_pmu_##_eng##_##_class##_boardobj_get_status_union_aligned \ + objects[(_slots)]; \ + } + +/* + * Macro generating structures for querying dynamic state for classes which + * implement NV_PMU_BOARDOBJGRP_E32 via the NV_PMU_BOARDOBJ_CMD_GRP GET_STATUS + * interface. + * + * @para _eng Name of implementing engine in which this structure is + * found. + * @param _class Class ID of Objects within Board Object Group. + */ +#define NV_PMU_BOARDOBJ_GRP_GET_STATUS_MAKE_E32(_eng, _class) \ + NV_PMU_BOARDOBJ_GRP_GET_STATUS_MAKE(_eng, _class, \ + CTRL_BOARDOBJGRP_E32_MAX_OBJECTS) + +/* + * Macro generating structures for querying dynamic state for classes which + * implement NV_PMU_BOARDOBJGRP_E255 via the NV_PMU_BOARDOBJ_CMD_GRP GET_STATUS + * interface. + * + * @para _eng Name of implementing engine in which this structure is + * found. + * @param _class Class ID of Objects within Board Object Group. + */ +#define NV_PMU_BOARDOBJ_GRP_GET_STATUS_MAKE_E255(_eng, _class) \ + NV_PMU_BOARDOBJ_GRP_GET_STATUS_MAKE(_eng, _class, \ + CTRL_BOARDOBJGRP_E255_MAX_OBJECTS) + +/* RPC */ + +/* + * structure that holds data used to + * execute BOARD_OBJ_GRP_CMD RPC. + */ +struct nv_pmu_rpc_struct_board_obj_grp_cmd { + /* [IN/OUT] Must be first field in RPC structure */ + struct nv_pmu_rpc_header hdr; + /* [IN] BOARDOBJGRP class IDs. */ + u8 class_id; + /* [IN] Requested command ID (@ref NV_PMU_BOARDOBJGRP_CMD_***)*/ + u8 command_id; + u32 scratch[1]; +}; + +#endif /* NVGPU_PMUIF_BOARDOBJ_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/clk/clk.c b/drivers/gpu/nvgpu/common/pmu/clk/clk.c new file mode 100644 index 000000000..05a76db42 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/clk/clk.c @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ucode_clk_inf.h" +#include "clk_domain.h" +#include "clk_prog.h" +#include "clk_vin.h" +#include "clk_fll.h" +#include "clk_vf_point.h" +#include "clk.h" + +static int clk_init_pmupstate(struct gk20a *g) +{ + /* If already allocated, do not re-allocate */ + if (g->pmu->clk_pmu != NULL) { + return 0; + } + + g->pmu->clk_pmu = nvgpu_kzalloc(g, sizeof(*g->pmu->clk_pmu)); + if (g->pmu->clk_pmu == NULL) { + return -ENOMEM; + } + + return 0; +} + +static void clk_free_pmupstate(struct gk20a *g) +{ + nvgpu_kfree(g, g->pmu->clk_pmu); + g->pmu->clk_pmu = NULL; +} + +u32 nvgpu_pmu_clk_mon_init_domains(struct gk20a *g) +{ + u32 domain_mask; + + domain_mask = (CTRL_CLK_DOMAIN_MCLK | + CTRL_CLK_DOMAIN_XBARCLK | + CTRL_CLK_DOMAIN_SYSCLK | + CTRL_CLK_DOMAIN_HUBCLK | + CTRL_CLK_DOMAIN_GPCCLK | + CTRL_CLK_DOMAIN_HOSTCLK | + CTRL_CLK_DOMAIN_UTILSCLK | + CTRL_CLK_DOMAIN_PWRCLK | + CTRL_CLK_DOMAIN_NVDCLK | + CTRL_CLK_DOMAIN_XCLK | + CTRL_CLK_DOMAIN_NVL_COMMON | + CTRL_CLK_DOMAIN_PEX_REFCLK ); + return domain_mask; +} + +int nvgpu_pmu_clk_pmu_setup(struct gk20a *g) +{ + int err; + nvgpu_log_fn(g, " "); + + err = clk_domain_pmu_setup(g); + if (err != 0) { + return err; + } + + err = clk_prog_pmu_setup(g); + if (err != 0) { + return err; + } + + err = clk_vin_pmu_setup(g); + if (err != 0) { + return err; + } + + err = clk_fll_pmu_setup(g); + if (err != 0) { + return err; + } + + if (g->ops.clk.support_vf_point) { + err = clk_vf_point_pmu_setup(g); + if (err != 0) { + return err; + } + } + + err = clk_pmu_vin_load(g); + if (err != 0) { + return err; + } + + err = clk_pmu_clk_domains_load(g); + if (err != 0) { + return err; + } + + return 0; +} + +int nvgpu_pmu_clk_sw_setup(struct gk20a *g) +{ + int err; + nvgpu_log_fn(g, " "); + + err = clk_vin_sw_setup(g); + if (err != 0) { + clk_vin_free_pmupstate(g); + return err; + } + + err = clk_fll_sw_setup(g); + if (err != 0) { + clk_fll_free_pmupstate(g); + return err; + } + + err = clk_domain_sw_setup(g); + if (err != 0) { + clk_domain_free_pmupstate(g); + return err; + } + + if (g->ops.clk.support_vf_point) { + err = clk_vf_point_sw_setup(g); + if (err != 0) { + clk_vf_point_free_pmupstate(g); + return err; + } + } + + err = clk_prog_sw_setup(g); + if (err != 0) { + clk_prog_free_pmupstate(g); + return err; + } + + return 0; +} +int nvgpu_pmu_clk_init(struct gk20a *g) +{ + int err; + nvgpu_log_fn(g, " "); + + err = clk_init_pmupstate(g); + if (err != 0) { + clk_free_pmupstate(g); + return err; + } + + err = clk_domain_init_pmupstate(g); + if (err != 0) { + clk_domain_free_pmupstate(g); + return err; + } + + err = clk_prog_init_pmupstate(g); + if (err != 0) { + clk_prog_free_pmupstate(g); + return err; + } + + err = clk_vf_point_init_pmupstate(g); + if (err != 0) { + clk_vf_point_free_pmupstate(g); + return err; + } + + err = clk_vin_init_pmupstate(g); + if (err != 0) { + clk_vin_free_pmupstate(g); + return err; + } + + err = clk_fll_init_pmupstate(g); + if (err != 0) { + clk_fll_free_pmupstate(g); + return err; + } + + return 0; +} +void nvgpu_pmu_clk_deinit(struct gk20a *g) +{ + if ((g->pmu != NULL) && (g->pmu->clk_pmu != NULL)) { + clk_domain_free_pmupstate(g); + clk_prog_free_pmupstate(g); + clk_vf_point_free_pmupstate(g); + clk_fll_free_pmupstate(g); + clk_vin_free_pmupstate(g); + clk_free_pmupstate(g); + } +} diff --git a/drivers/gpu/nvgpu/common/pmu/clk/clk.h b/drivers/gpu/nvgpu/common/pmu/clk/clk.h new file mode 100644 index 000000000..a2199661f --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/clk/clk.h @@ -0,0 +1,84 @@ +/* +* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +* + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. +*/ + +#ifndef NVGPU_CLK_H +#define NVGPU_CLK_H + +#include +#include "ucode_clk_inf.h" + +#define CTRL_CLK_FLL_REGIME_ID_INVALID ((u8)0x00000000) +#define CTRL_CLK_FLL_REGIME_ID_FFR ((u8)0x00000001) +#define CTRL_CLK_FLL_REGIME_ID_FR ((u8)0x00000002) + +#define CTRL_CLK_FLL_LUT_VSELECT_LOGIC (0x00000000U) +#define CTRL_CLK_FLL_LUT_VSELECT_MIN (0x00000001U) +#define CTRL_CLK_FLL_LUT_VSELECT_SRAM (0x00000002U) + +#define CTRL_CLK_VIN_SW_OVERRIDE_VIN_USE_HW_REQ (0x00000000U) +#define CTRL_CLK_VIN_SW_OVERRIDE_VIN_USE_MIN (0x00000001U) +#define CTRL_CLK_VIN_SW_OVERRIDE_VIN_USE_SW_REQ (0x00000003U) + +#define CTRL_CLK_VIN_STEP_SIZE_UV (6250U) +#define CTRL_CLK_LUT_MIN_VOLTAGE_UV (450000U) +#define CTRL_CLK_FLL_TYPE_DISABLED (0U) + +struct nvgpu_clk_pmupstate { + struct nvgpu_avfsvinobjs *avfs_vinobjs; + struct clk_avfs_fll_objs *avfs_fllobjs; + struct nvgpu_clk_domains *clk_domainobjs; + struct nvgpu_clk_progs *clk_progobjs; + struct nvgpu_clk_vf_points *clk_vf_pointobjs; +}; + +struct clk_vf_point { + struct pmu_board_obj super; + u8 vfe_equ_idx; + u8 volt_rail_idx; + struct ctrl_clk_vf_pair pair; +}; + +struct clk_vf_point_volt { + struct clk_vf_point super; + u32 source_voltage_uv; + struct ctrl_clk_freq_delta freq_delta; +}; + +struct clk_vf_point_freq { + struct clk_vf_point super; + int volt_delta_uv; +}; + +struct nvgpu_clk_vf_points { + struct boardobjgrp_e255 super; +}; + +struct clk_vf_point *nvgpu_construct_clk_vf_point(struct gk20a *g, + void *pargs); + +u32 nvgpu_pmu_clk_fll_get_lut_min_volt(struct nvgpu_clk_pmupstate *pclk); +u8 clk_get_fll_lut_vf_num_entries(struct nvgpu_clk_pmupstate *pclk); +struct clk_vin_device *clk_get_vin_from_index( + struct nvgpu_avfsvinobjs *pvinobjs, u8 idx); +int clk_domain_clk_prog_link(struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk); +#endif /* NVGPU_CLK_VIN_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/clk/clk_domain.c b/drivers/gpu/nvgpu/common/pmu/clk/clk_domain.c new file mode 100644 index 000000000..be8165b8b --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/clk/clk_domain.c @@ -0,0 +1,1746 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ucode_clk_inf.h" +#include "clk_domain.h" +#include "clk_prog.h" +#include "clk.h" + +static struct nvgpu_clk_domain *construct_clk_domain(struct gk20a *g, + void *pargs); + +static int devinit_get_clocks_table(struct gk20a *g, + struct nvgpu_clk_domains *pclkdomainobjs); + +static int clk_domain_pmudatainit_super(struct gk20a *g, struct pmu_board_obj + *obj, struct nv_pmu_boardobj *pmu_obj); + +struct vbios_clocks_table_1x_hal_clock_entry { + u32 domain; + bool b_noise_aware_capable; + u8 clk_vf_curve_count; +}; + +static struct vbios_clocks_table_1x_hal_clock_entry + vbiosclktbl1xhalentry_gv[] = { + { CLKWHICH_GPCCLK, true, 1, }, + { CLKWHICH_XBARCLK, true, 1, }, + { CLKWHICH_MCLK, false, 1, }, + { CLKWHICH_SYSCLK, true, 1, }, + { CLKWHICH_HUBCLK, false, 1, }, + { CLKWHICH_NVDCLK, true, 1, }, + { CLKWHICH_PWRCLK, false, 1, }, + { CLKWHICH_DISPCLK, false, 1, }, + { CLKWHICH_PCIEGENCLK, false, 1, }, + { CLKWHICH_HOSTCLK, true, 1, } +}; + +static u32 clktranslatehalmumsettoapinumset(u32 clkhaldomains) +{ + u32 clkapidomains = 0; + + if ((clkhaldomains & BIT32(CLKWHICH_GPCCLK)) != 0U) { + clkapidomains |= CTRL_CLK_DOMAIN_GPCCLK; + } + if ((clkhaldomains & BIT32(CLKWHICH_XBARCLK)) != 0U) { + clkapidomains |= CTRL_CLK_DOMAIN_XBARCLK; + } + if ((clkhaldomains & BIT32(CLKWHICH_SYSCLK)) != 0U) { + clkapidomains |= CTRL_CLK_DOMAIN_SYSCLK; + } + if ((clkhaldomains & BIT32(CLKWHICH_HUBCLK)) != 0U) { + clkapidomains |= CTRL_CLK_DOMAIN_HUBCLK; + } + if ((clkhaldomains & BIT32(CLKWHICH_HOSTCLK)) != 0U) { + clkapidomains |= CTRL_CLK_DOMAIN_HOSTCLK; + } + if ((clkhaldomains & BIT32(CLKWHICH_GPC2CLK)) != 0U) { + clkapidomains |= CTRL_CLK_DOMAIN_GPC2CLK; + } + if ((clkhaldomains & BIT32(CLKWHICH_XBAR2CLK)) != 0U) { + clkapidomains |= CTRL_CLK_DOMAIN_XBAR2CLK; + } + if ((clkhaldomains & BIT32(CLKWHICH_SYS2CLK)) != 0U) { + clkapidomains |= CTRL_CLK_DOMAIN_SYS2CLK; + } + if ((clkhaldomains & BIT32(CLKWHICH_HUB2CLK)) != 0U) { + clkapidomains |= CTRL_CLK_DOMAIN_HUB2CLK; + } + if ((clkhaldomains & BIT32(CLKWHICH_PWRCLK)) != 0U) { + clkapidomains |= CTRL_CLK_DOMAIN_PWRCLK; + } + if ((clkhaldomains & BIT32(CLKWHICH_PCIEGENCLK)) != 0U) { + clkapidomains |= CTRL_CLK_DOMAIN_PCIEGENCLK; + } + if ((clkhaldomains & BIT32(CLKWHICH_MCLK)) != 0U) { + clkapidomains |= CTRL_CLK_DOMAIN_MCLK; + } + if ((clkhaldomains & BIT32(CLKWHICH_NVDCLK)) != 0U) { + clkapidomains |= CTRL_CLK_DOMAIN_NVDCLK; + } + if ((clkhaldomains & BIT32(CLKWHICH_DISPCLK)) != 0U) { + clkapidomains |= CTRL_CLK_DOMAIN_DISPCLK; + } + + return clkapidomains; +} + +static struct nvgpu_clk_domain *clk_get_clk_domain_from_index( + struct nvgpu_clk_pmupstate *pclk, u8 idx) +{ + return (struct nvgpu_clk_domain *)(void *)BOARDOBJGRP_OBJ_GET_BY_IDX( + &(pclk->clk_domainobjs->super.super), idx); +} + +static int _clk_domains_pmudatainit_3x(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct nv_pmu_boardobjgrp_super *pboardobjgrppmu) +{ + struct nv_pmu_clk_clk_domain_boardobjgrp_set_header *pset = + (struct nv_pmu_clk_clk_domain_boardobjgrp_set_header *) + (void *)pboardobjgrppmu; + struct nvgpu_clk_domains *pdomains = + (struct nvgpu_clk_domains *)(void *)pboardobjgrp; + int status = 0; + + status = boardobjgrp_pmudatainit_e32(g, pboardobjgrp, pboardobjgrppmu); + if (status != 0) { + nvgpu_err(g, + "error updating pmu boardobjgrp for clk domain 0x%x", + status); + goto done; + } + + pset->vbios_domains = pdomains->vbios_domains; + pset->cntr_sampling_periodms = pdomains->cntr_sampling_periodms; + pset->clkmon_refwin_usec = pdomains->clkmon_refwin_usec; + pset->version = pdomains->version; + pset->b_override_o_v_o_c = false; + pset->b_debug_mode = false; + pset->b_enforce_vf_monotonicity = pdomains->b_enforce_vf_monotonicity; + pset->b_enforce_vf_smoothening = pdomains->b_enforce_vf_smoothening; + if (g->ops.clk.split_rail_support) { + pset->volt_rails_max = 2; + } else { + pset->volt_rails_max = 1; + } + status = nvgpu_boardobjgrpmask_export( + &pdomains->master_domains_mask.super, + pdomains->master_domains_mask.super.bitcount, + &pset->master_domains_mask.super); + if (status != 0) { + nvgpu_err(g, "Error exporting Clk master domains masks"); + return status; + } + + status = nvgpu_boardobjgrpmask_export( + &pdomains->prog_domains_mask.super, + pdomains->prog_domains_mask.super.bitcount, + &pset->prog_domains_mask.super); + if (status != 0) { + nvgpu_err(g, "Error exporting Clk prog domains masks"); + return status; + } + + status = nvgpu_boardobjgrpmask_export( + &pdomains->clkmon_domains_mask.super, + pdomains->clkmon_domains_mask.super.bitcount, + &pset->clkmon_domains_mask.super); + if (status != 0) { + nvgpu_err(g, "Error exporting Clk monitor domains masks"); + return status; + } + nvgpu_memcpy((u8 *)&pset->deltas, (u8 *)&pdomains->deltas, + (sizeof(struct ctrl_clk_clk_delta))); + +done: + return status; +} + +static int _clk_domains_pmudata_instget(struct gk20a *g, + struct nv_pmu_boardobjgrp *pmuboardobjgrp, + struct nv_pmu_boardobj **pmu_obj, u8 idx) +{ + struct nv_pmu_clk_clk_domain_boardobj_grp_set *pgrp_set = + (struct nv_pmu_clk_clk_domain_boardobj_grp_set *)(void *) + pmuboardobjgrp; + + nvgpu_log_info(g, " "); + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (((u32)BIT(idx) & + pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *pmu_obj = (struct nv_pmu_boardobj *) + &pgrp_set->objects[idx].data.obj; + nvgpu_log_info(g, " Done"); + return 0; +} + +int clk_domain_sw_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + struct nvgpu_clk_domains *pclkdomainobjs; + struct nvgpu_clk_domain *pdomain; + struct clk_domain_35_master *pdomain_master_35; + struct clk_domain_35_slave *pdomain_slave_35; + struct clk_domain_35_prog *pdomain_prog_35; + u8 i; + + nvgpu_log_info(g, " "); + + status = nvgpu_boardobjgrp_construct_e32(g, + &g->pmu->clk_pmu->clk_domainobjs->super); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for clk domain, status - 0x%x", + status); + goto done; + } + + pboardobjgrp = &g->pmu->clk_pmu->clk_domainobjs->super.super; + pclkdomainobjs = g->pmu->clk_pmu->clk_domainobjs; + + BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, CLK, CLK_DOMAIN); + + status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp, + clk, CLK, clk_domain, CLK_DOMAIN); + if (status != 0) { + nvgpu_err(g, + "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x", + status); + goto done; + } + + pboardobjgrp->pmudatainit = _clk_domains_pmudatainit_3x; + pboardobjgrp->pmudatainstget = _clk_domains_pmudata_instget; + + /* Initialize mask to zero.*/ + status = boardobjgrpmask_e32_init(&pclkdomainobjs->prog_domains_mask, + NULL); + if (status != 0) { + nvgpu_err(g, "boardobjgrpmask_e32_init(prog) failed err=%d", + status); + goto done; + } + status = boardobjgrpmask_e32_init(&pclkdomainobjs->master_domains_mask, + NULL); + if (status != 0) { + nvgpu_err(g, "boardobjgrpmask_e32_init(master) failed err=%d", + status); + goto done; + } + status = boardobjgrpmask_e32_init(&pclkdomainobjs->clkmon_domains_mask, + NULL); + if (status != 0) { + nvgpu_err(g, "boardobjgrpmask_e32_init(clkmon) failed err=%d", + status); + goto done; + } + + pclkdomainobjs->b_enforce_vf_monotonicity = true; + pclkdomainobjs->b_enforce_vf_smoothening = true; + + (void) memset(&pclkdomainobjs->ordered_noise_aware_list, 0, + sizeof(pclkdomainobjs->ordered_noise_aware_list)); + + (void) memset(&pclkdomainobjs->ordered_noise_unaware_list, 0, + sizeof(pclkdomainobjs->ordered_noise_unaware_list)); + + (void) memset(&pclkdomainobjs->deltas, 0, + sizeof(struct ctrl_clk_clk_delta)); + + status = devinit_get_clocks_table(g, pclkdomainobjs); + if (status != 0) { + goto done; + } + + BOARDOBJGRP_FOR_EACH(&(pclkdomainobjs->super.super), + struct nvgpu_clk_domain *, pdomain, i) { + pdomain_master_35 = NULL; + + if (pdomain->super.implements(g, &pdomain->super, + CTRL_CLK_CLK_DOMAIN_TYPE_35_PROG)) { + pdomain_prog_35 = + (struct clk_domain_35_prog *)(void *)pdomain; + status = nvgpu_boardobjgrpmask_bit_set( + &pclkdomainobjs->prog_domains_mask.super, i); + if (status != 0) { + goto done; + } + + /* Create the mask of clk monitors that are supported */ + if ((pdomain_prog_35-> + clkmon_info.high_threshold_vfe_idx != + CLK_CLKMON_VFE_INDEX_INVALID) || + (pdomain_prog_35-> + clkmon_info.low_threshold_vfe_idx != + CLK_CLKMON_VFE_INDEX_INVALID)) { + status = nvgpu_boardobjgrpmask_bit_set( + &pclkdomainobjs-> + clkmon_domains_mask.super, i); + if (status != 0) { + nvgpu_err(g, + "Error setting Clk monitor masks"); + return status; + } + } + } + + if (pdomain->super.implements(g, &pdomain->super, + CTRL_CLK_CLK_DOMAIN_TYPE_35_MASTER)) { + status = nvgpu_boardobjgrpmask_bit_set( + &pclkdomainobjs->master_domains_mask.super, i); + if (status != 0) { + goto done; + } + pdomain_master_35 = + (struct clk_domain_35_master *)(void *)pdomain; + status = nvgpu_boardobjgrpmask_bit_set( + &pdomain_master_35-> + master_slave_domains_grp_mask.super, i); + if (status != 0) { + goto done; + } + } + + if (pdomain->super.implements(g, &pdomain->super, + CTRL_CLK_CLK_DOMAIN_TYPE_35_SLAVE)) { + pdomain_slave_35 = + (struct clk_domain_35_slave *)(void *)pdomain; + pdomain_master_35 = (struct clk_domain_35_master *) + (void *) + (clk_get_clk_domain_from_index( + (g->pmu->clk_pmu), + pdomain_slave_35->slave.master_idx)); + pdomain_master_35->master.slave_idxs_mask |= BIT32(i); + pdomain_slave_35->super.clk_pos = + nvgpu_boardobjgrpmask_bit_set_count( + &pdomain_master_35-> + master_slave_domains_grp_mask.super); + status = nvgpu_boardobjgrpmask_bit_set( + &pdomain_master_35-> + master_slave_domains_grp_mask.super, i); + if (status != 0) { + goto done; + } + } + + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +int clk_domain_pmu_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + + nvgpu_log_info(g, " "); + + pboardobjgrp = &g->pmu->clk_pmu->clk_domainobjs->super.super; + + if (!pboardobjgrp->bconstructed) { + return -EINVAL; + } + + status = pboardobjgrp->pmuinithandle(g, pboardobjgrp); + + nvgpu_log_info(g, "Done"); + return status; +} + +static int devinit_get_clocks_table_35(struct gk20a *g, + struct nvgpu_clk_domains *pclkdomainobjs, u8 *clocks_table_ptr) +{ + int status = 0; + struct vbios_clocks_table_35_header clocks_table_header = { 0 }; + struct vbios_clocks_table_35_entry clocks_table_entry = { 0 }; + struct vbios_clocks_table_1x_hal_clock_entry *vbiosclktbl1xhalentry; + u8 *clocks_tbl_entry_ptr = NULL; + u32 index = 0; + bool done = false; + struct nvgpu_clk_domain *pclkdomain_dev; + union { + struct pmu_board_obj obj; + struct nvgpu_clk_domain clk_domain; + struct clk_domain_3x v3x; + struct clk_domain_3x_fixed v3x_fixed; + struct clk_domain_35_prog v35_prog; + struct clk_domain_35_master v35_master; + struct clk_domain_35_slave v35_slave; + } clk_domain_data; + + nvgpu_log_info(g, " "); + pclkdomainobjs->version = CLK_DOMAIN_BOARDOBJGRP_VERSION_35; + + nvgpu_memcpy((u8 *)&clocks_table_header, clocks_table_ptr, + VBIOS_CLOCKS_TABLE_35_HEADER_SIZE_09); + if (clocks_table_header.header_size < + (u8) VBIOS_CLOCKS_TABLE_35_HEADER_SIZE_09) { + status = -EINVAL; + goto done; + } + + if (clocks_table_header.entry_size < + (u8) VBIOS_CLOCKS_TABLE_35_ENTRY_SIZE_11) { + status = -EINVAL; + goto done; + } + + switch (clocks_table_header.clocks_hal) { + case CLK_TABLE_HAL_ENTRY_GV: + vbiosclktbl1xhalentry = vbiosclktbl1xhalentry_gv; + break; + default: + status = -EINVAL; + break; + } + + if (status == -EINVAL) { + goto done; + } + + pclkdomainobjs->cntr_sampling_periodms = + (u16)clocks_table_header.cntr_sampling_periodms; + pclkdomainobjs->clkmon_refwin_usec = + (u16)clocks_table_header.reference_window; + + /* Read table entries*/ + clocks_tbl_entry_ptr = clocks_table_ptr + + clocks_table_header.header_size; + for (index = 0; index < clocks_table_header.entry_count; index++) { + nvgpu_memcpy((u8 *)&clocks_table_entry, + clocks_tbl_entry_ptr, clocks_table_header.entry_size); + clk_domain_data.clk_domain.domain = + (u8) vbiosclktbl1xhalentry[index].domain; + clk_domain_data.clk_domain.api_domain = + clktranslatehalmumsettoapinumset( + (u32) BIT(clk_domain_data.clk_domain.domain)); + clk_domain_data.v3x.b_noise_aware_capable = + vbiosclktbl1xhalentry[index].b_noise_aware_capable; + + switch (BIOS_GET_FIELD(u32, clocks_table_entry.flags0, + NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_FLAGS0_USAGE)) { + case NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_FLAGS0_USAGE_FIXED: + { + clk_domain_data.obj.type = + CTRL_CLK_CLK_DOMAIN_TYPE_3X_FIXED; + clk_domain_data.v3x_fixed.freq_mhz = BIOS_GET_FIELD(u16, + clocks_table_entry.param1, + NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM1_FIXED_FREQUENCY_MHZ); + break; + } + + case NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_FLAGS0_USAGE_MASTER: + { + clk_domain_data.obj.type = + CTRL_CLK_CLK_DOMAIN_TYPE_35_MASTER; + clk_domain_data.v35_prog.super.clk_prog_idx_first = + BIOS_GET_FIELD(u8, clocks_table_entry.param0, + NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM0_PROG_CLK_PROG_IDX_FIRST); + clk_domain_data.v35_prog.super.clk_prog_idx_last = + BIOS_GET_FIELD(u8, clocks_table_entry.param0, + NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM0_PROG_CLK_PROG_IDX_LAST); + clk_domain_data.v35_prog.super.noise_unaware_ordering_index = + BIOS_GET_FIELD(u8, clocks_table_entry.param2, + NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_NOISE_UNAWARE_ORDERING_IDX); + if (clk_domain_data.v3x.b_noise_aware_capable) { + clk_domain_data.v35_prog.super.b_force_noise_unaware_ordering = + BIOS_GET_FIELD(bool, clocks_table_entry.param2, + NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_FORCE_NOISE_UNAWARE_ORDERING); + + } else { + clk_domain_data.v35_prog.super.noise_aware_ordering_index = + CTRL_CLK_CLK_DOMAIN_3X_PROG_ORDERING_INDEX_INVALID; + clk_domain_data.v35_prog.super.b_force_noise_unaware_ordering = + false; + } + clk_domain_data.v35_prog.pre_volt_ordering_index = + BIOS_GET_FIELD(u8, clocks_table_entry.param2, + NV_VBIOS_CLOCKS_TABLE_35_ENTRY_PARAM2_PROG_PRE_VOLT_ORDERING_IDX); + + clk_domain_data.v35_prog.post_volt_ordering_index = + BIOS_GET_FIELD(u8, clocks_table_entry.param2, + NV_VBIOS_CLOCKS_TABLE_35_ENTRY_PARAM2_PROG_POST_VOLT_ORDERING_IDX); + + clk_domain_data.v35_prog.super.factory_delta.data.delta_khz = 0; + clk_domain_data.v35_prog.super.factory_delta.type = 0; + + clk_domain_data.v35_prog.super.freq_delta_min_mhz = + BIOS_GET_FIELD(s16, clocks_table_entry.param1, + NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM1_MASTER_FREQ_OC_DELTA_MIN_MHZ); + + clk_domain_data.v35_prog.super.freq_delta_max_mhz = + BIOS_GET_FIELD(s16, clocks_table_entry.param1, + NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM1_MASTER_FREQ_OC_DELTA_MAX_MHZ); + clk_domain_data.v35_prog.clk_vf_curve_count = + vbiosclktbl1xhalentry[index].clk_vf_curve_count; + + clk_domain_data.v35_prog.clkmon_info.low_threshold_vfe_idx = + BIOS_GET_FIELD(u8, clocks_table_entry.param3, + NV_VBIOS_CLOCKS_TABLE_35_ENTRY_PARAM3_CLK_MONITOR_THRESHOLD_MIN); + clk_domain_data.v35_prog.clkmon_info.high_threshold_vfe_idx = + BIOS_GET_FIELD(u8, clocks_table_entry.param3, + NV_VBIOS_CLOCKS_TABLE_35_ENTRY_PARAM3_CLK_MONITOR_THRESHOLD_MAX); + break; + } + + case NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_FLAGS0_USAGE_SLAVE: + { + clk_domain_data.obj.type = + CTRL_CLK_CLK_DOMAIN_TYPE_35_SLAVE; + clk_domain_data.v35_prog.super.clk_prog_idx_first = + BIOS_GET_FIELD(u8, clocks_table_entry.param0, + NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM0_PROG_CLK_PROG_IDX_FIRST); + clk_domain_data.v35_prog.super.clk_prog_idx_last = + BIOS_GET_FIELD(u8, clocks_table_entry.param0, + NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM0_PROG_CLK_PROG_IDX_LAST); + clk_domain_data.v35_prog.super.noise_unaware_ordering_index = + BIOS_GET_FIELD(u8, clocks_table_entry.param2, + NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_NOISE_UNAWARE_ORDERING_IDX); + + if (clk_domain_data.v3x.b_noise_aware_capable) { + clk_domain_data.v35_prog.super.b_force_noise_unaware_ordering = + BIOS_GET_FIELD(bool, clocks_table_entry.param2, + NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_FORCE_NOISE_UNAWARE_ORDERING); + + } else { + clk_domain_data.v35_prog.super.noise_aware_ordering_index = + CTRL_CLK_CLK_DOMAIN_3X_PROG_ORDERING_INDEX_INVALID; + clk_domain_data.v35_prog.super.b_force_noise_unaware_ordering = + false; + } + clk_domain_data.v35_prog.pre_volt_ordering_index = + BIOS_GET_FIELD(u8, clocks_table_entry.param2, + NV_VBIOS_CLOCKS_TABLE_35_ENTRY_PARAM2_PROG_PRE_VOLT_ORDERING_IDX); + + clk_domain_data.v35_prog.post_volt_ordering_index = + BIOS_GET_FIELD(u8, clocks_table_entry.param2, + NV_VBIOS_CLOCKS_TABLE_35_ENTRY_PARAM2_PROG_POST_VOLT_ORDERING_IDX); + + clk_domain_data.v35_prog.super.factory_delta.data.delta_khz = 0; + clk_domain_data.v35_prog.super.factory_delta.type = 0; + clk_domain_data.v35_prog.super.freq_delta_min_mhz = 0; + clk_domain_data.v35_prog.super.freq_delta_max_mhz = 0; + clk_domain_data.v35_slave.slave.master_idx = + BIOS_GET_FIELD(u8, clocks_table_entry.param1, + NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM1_SLAVE_MASTER_DOMAIN); + + clk_domain_data.v35_prog.clkmon_info.low_threshold_vfe_idx = + BIOS_GET_FIELD(u8, clocks_table_entry.param3, + NV_VBIOS_CLOCKS_TABLE_35_ENTRY_PARAM3_CLK_MONITOR_THRESHOLD_MIN); + clk_domain_data.v35_prog.clkmon_info.high_threshold_vfe_idx = + BIOS_GET_FIELD(u8, clocks_table_entry.param3, + NV_VBIOS_CLOCKS_TABLE_35_ENTRY_PARAM3_CLK_MONITOR_THRESHOLD_MAX); + break; + } + + default: + { + nvgpu_err(g, + "error reading clock domain entry %d", index); + status = -EINVAL; + done = true; + break; + } + + } + /* + * Previously we were doing "goto done" from the default case of + * the switch-case block above. MISRA however, gets upset about + * this because it wants a break statement in the default case. + * That's why we had to move the goto statement outside of the + * switch-case block. + */ + if(done) { + goto done; + } + + pclkdomain_dev = construct_clk_domain(g, + (void *)&clk_domain_data); + if (pclkdomain_dev == NULL) { + nvgpu_err(g, + "unable to construct clock domain boardobj for %d", + index); + status = -EINVAL; + goto done; + } + status = boardobjgrp_objinsert(&pclkdomainobjs->super.super, + (struct pmu_board_obj *)(void *) + pclkdomain_dev, index); + if (status != 0) { + nvgpu_err(g, + "unable to insert clock domain boardobj for %d", index); + status = -EINVAL; + goto done; + } + clocks_tbl_entry_ptr += clocks_table_header.entry_size; + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +static int devinit_get_clocks_table(struct gk20a *g, + struct nvgpu_clk_domains *pclkdomainobjs) +{ + int status = 0; + u8 *clocks_table_ptr = NULL; + struct vbios_clocks_table_35_header clocks_table_header = { 0 }; + nvgpu_log_info(g, " "); + + clocks_table_ptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_CLOCK_TOKEN), + CLOCKS_TABLE); + if (clocks_table_ptr == NULL) { + status = -EINVAL; + goto done; + } + nvgpu_memcpy((u8 *)&clocks_table_header, clocks_table_ptr, + VBIOS_CLOCKS_TABLE_35_HEADER_SIZE_09); + + status = devinit_get_clocks_table_35(g, pclkdomainobjs, + clocks_table_ptr); + +done: + return status; + +} + +static int clk_domain_construct_super(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct nvgpu_clk_domain *pdomain; + struct nvgpu_clk_domain *ptmpdomain = (struct nvgpu_clk_domain *)pargs; + int status = 0; + + pdomain = nvgpu_kzalloc(g, size); + if (pdomain == NULL) { + return -ENOMEM; + } + + status = pmu_board_obj_construct_super(g, + (struct pmu_board_obj *)(void *)pdomain, pargs); + if (status != 0) { + return -EINVAL; + } + + *obj = (struct pmu_board_obj *)(void *)pdomain; + + pdomain->super.pmudatainit = + clk_domain_pmudatainit_super; + + pdomain->api_domain = ptmpdomain->api_domain; + pdomain->domain = ptmpdomain->domain; + pdomain->perf_domain_grp_idx = + ptmpdomain->perf_domain_grp_idx; + + return status; +} + +static int _clk_domain_pmudatainit_3x(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct clk_domain_3x *pclk_domain_3x; + struct nv_pmu_clk_clk_domain_3x_boardobj_set *pset; + + nvgpu_log_info(g, " "); + + status = clk_domain_pmudatainit_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pclk_domain_3x = (struct clk_domain_3x *)(void *)obj; + + pset = (struct nv_pmu_clk_clk_domain_3x_boardobj_set *)(void *)pmu_obj; + + pset->b_noise_aware_capable = pclk_domain_3x->b_noise_aware_capable; + + return status; +} + +static int clk_domain_construct_3x(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct clk_domain_3x *pdomain; + struct clk_domain_3x *ptmpdomain = + (struct clk_domain_3x *)pargs; + int status = 0; + + obj_tmp->type_mask = BIT32(CTRL_CLK_CLK_DOMAIN_TYPE_3X); + status = clk_domain_construct_super(g, obj, + size, pargs); + if (status != 0) { + return -EINVAL; + } + + pdomain = (struct clk_domain_3x *)(void *)*obj; + + pdomain->super.super.pmudatainit = + _clk_domain_pmudatainit_3x; + + pdomain->b_noise_aware_capable = ptmpdomain->b_noise_aware_capable; + + return status; +} + +static int clkdomainclkproglink_3x_prog(struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk, + struct nvgpu_clk_domain *pdomain) +{ + int status = 0; + struct clk_domain_3x_prog *p3xprog = + (struct clk_domain_3x_prog *)(void *)pdomain; + struct clk_prog *pprog = NULL; + u8 i; + + nvgpu_log_info(g, " "); + + for (i = p3xprog->clk_prog_idx_first; + i <= p3xprog->clk_prog_idx_last; + i++) { + pprog = CLK_CLK_PROG_GET(pclk, i); + if (pprog == NULL) { + status = -EINVAL; + } + } + return status; +} + +static int clkdomaingetslaveclk(struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk, + struct nvgpu_clk_domain *pdomain, + u16 *pclkmhz, + u16 masterclkmhz) +{ + int status = 0; + struct clk_prog *pprog = NULL; + struct clk_prog_1x_master *pprog1xmaster = NULL; + u8 slaveidx; + struct clk_domain_35_master *p35master; + nvgpu_log_info(g, " "); + + if (pclkmhz == NULL) { + return -EINVAL; + } + if (masterclkmhz == 0U) { + return -EINVAL; + } + slaveidx = pmu_board_obj_get_idx(pdomain); + p35master = (struct clk_domain_35_master *)(void *) + clk_get_clk_domain_from_index(pclk, + ((struct clk_domain_35_slave *) + (void *)pdomain)->slave.master_idx); + pprog = CLK_CLK_PROG_GET(pclk, p35master-> + master.super.clk_prog_idx_first); + pprog1xmaster = (struct clk_prog_1x_master *)(void *)pprog; + + status = pprog1xmaster->getslaveclk(g, pclk, pprog1xmaster, + slaveidx, pclkmhz, masterclkmhz, &pdomain->ratio_domain); + + return status; +} + +static int clkdomainvfsearch(struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk, + struct nvgpu_clk_domain *pdomain, + u16 *pclkmhz, + u32 *pvoltuv, + u8 rail) +{ + int status = 0; + struct clk_domain_3x_master *p3xmaster = + (struct clk_domain_3x_master *)(void *)pdomain; + struct clk_prog *pprog = NULL; + struct clk_prog_1x_master *pprog1xmaster = NULL; + u8 i; + u8 *pslaveidx = NULL; + u8 slaveidx; + u16 clkmhz; + u32 voltuv; + u16 bestclkmhz; + u32 bestvoltuv; + + nvgpu_log_info(g, " "); + + if ((pclkmhz == NULL) || (pvoltuv == NULL)) { + return -EINVAL; + } + + if ((*pclkmhz != 0U) && (*pvoltuv != 0U)) { + return -EINVAL; + } + + bestclkmhz = *pclkmhz; + bestvoltuv = *pvoltuv; + + if (pdomain->super.implements(g, &pdomain->super, + CTRL_CLK_CLK_DOMAIN_TYPE_3X_SLAVE)) { + slaveidx = pmu_board_obj_get_idx(pdomain); + pslaveidx = &slaveidx; + p3xmaster = (struct clk_domain_3x_master *)(void *) + clk_get_clk_domain_from_index(pclk, + ((struct clk_domain_3x_slave *)(void *) + pdomain)->master_idx); + } + /* Iterate over the set of CLK_PROGs pointed at by this domain.*/ + for (i = p3xmaster->super.clk_prog_idx_first; + i <= p3xmaster->super.clk_prog_idx_last; + i++) { + clkmhz = *pclkmhz; + voltuv = *pvoltuv; + pprog = CLK_CLK_PROG_GET(pclk, i); + + /* MASTER CLK_DOMAINs must point to MASTER CLK_PROGs.*/ + if (!pprog->super.implements(g, &pprog->super, + CTRL_CLK_CLK_PROG_TYPE_1X_MASTER)) { + status = -EINVAL; + goto done; + } + + pprog1xmaster = (struct clk_prog_1x_master *)(void *)pprog; + status = pprog1xmaster->vflookup(g, pclk, pprog1xmaster, + pslaveidx, &clkmhz, &voltuv, rail); + /* if look up has found the V or F value matching to other + exit */ + if (status == 0) { + if (*pclkmhz == 0U) { + bestclkmhz = clkmhz; + } else { + bestvoltuv = voltuv; + break; + } + } + } + /* clk and volt sent as zero to print vf table */ + if ((*pclkmhz == 0U) && (*pvoltuv == 0U)) { + status = 0; + goto done; + } + /* atleast one search found a matching value? */ + if ((bestvoltuv != 0U) && (bestclkmhz != 0U)) { + *pclkmhz = bestclkmhz; + *pvoltuv = bestvoltuv; + status = 0; + goto done; + } +done: + nvgpu_log_info(g, "done status %x", status); + return status; +} + +static int clkdomaingetfpoints +( + struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk, + struct nvgpu_clk_domain *pdomain, + u32 *pfpointscount, + u16 *pfreqpointsinmhz, + u8 rail +) +{ + int status = 0; + struct clk_domain_3x_master *p3xmaster = + (struct clk_domain_3x_master *)(void *)pdomain; + struct clk_prog *pprog = NULL; + struct clk_prog_1x_master *pprog1xmaster = NULL; + u32 fpointscount = 0; + u32 remainingcount; + u32 totalcount; + u16 *freqpointsdata; + u8 i; + + nvgpu_log_info(g, " "); + + if (pfpointscount == NULL) { + return -EINVAL; + } + + if ((pfreqpointsinmhz == NULL) && (*pfpointscount != 0U)) { + return -EINVAL; + } + + if (pdomain->super.implements(g, &pdomain->super, + CTRL_CLK_CLK_DOMAIN_TYPE_3X_SLAVE)) { + return -EINVAL; + } + + freqpointsdata = pfreqpointsinmhz; + totalcount = 0; + fpointscount = *pfpointscount; + remainingcount = fpointscount; + /* Iterate over the set of CLK_PROGs pointed at by this domain.*/ + for (i = p3xmaster->super.clk_prog_idx_first; + i <= p3xmaster->super.clk_prog_idx_last; + i++) { + pprog = CLK_CLK_PROG_GET(pclk, i); + pprog1xmaster = (struct clk_prog_1x_master *)(void *)pprog; + status = pprog1xmaster->getfpoints(g, pclk, pprog1xmaster, + &fpointscount, &freqpointsdata, rail); + if (status != 0) { + *pfpointscount = 0; + goto done; + } + totalcount += fpointscount; + if (*pfpointscount != 0U) { + remainingcount -= fpointscount; + fpointscount = remainingcount; + } else { + fpointscount = 0; + } + + } + + *pfpointscount = totalcount; +done: + nvgpu_log_info(g, "done status %x", status); + return status; +} + +static int clk_domain_pmudatainit_35_prog(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct clk_domain_35_prog *pclk_domain_35_prog; + struct clk_domain_3x_prog *pclk_domain_3x_prog; + struct nv_pmu_clk_clk_domain_35_prog_boardobj_set *pset; + struct nvgpu_clk_domains *pdomains = g->pmu->clk_pmu->clk_domainobjs; + + nvgpu_log_info(g, " "); + + status = _clk_domain_pmudatainit_3x(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pclk_domain_35_prog = (struct clk_domain_35_prog *)(void *)obj; + pclk_domain_3x_prog = &pclk_domain_35_prog->super; + + pset = (struct nv_pmu_clk_clk_domain_35_prog_boardobj_set *) + (void *)pmu_obj; + + pset->super.clk_prog_idx_first = + pclk_domain_3x_prog->clk_prog_idx_first; + pset->super.clk_prog_idx_last = pclk_domain_3x_prog->clk_prog_idx_last; + pset->super.b_force_noise_unaware_ordering = + pclk_domain_3x_prog->b_force_noise_unaware_ordering; + pset->super.factory_delta = pclk_domain_3x_prog->factory_delta; + pset->super.freq_delta_min_mhz = + pclk_domain_3x_prog->freq_delta_min_mhz; + pset->super.freq_delta_max_mhz = + pclk_domain_3x_prog->freq_delta_max_mhz; + nvgpu_memcpy((u8 *)&pset->super.deltas, (u8 *)&pdomains->deltas, + (sizeof(struct ctrl_clk_clk_delta))); + pset->pre_volt_ordering_index = + pclk_domain_35_prog->pre_volt_ordering_index; + pset->post_volt_ordering_index = + pclk_domain_35_prog->post_volt_ordering_index; + pset->clk_pos = pclk_domain_35_prog->clk_pos; + pset->clk_vf_curve_count = pclk_domain_35_prog->clk_vf_curve_count; + pset->clkmon_info.high_threshold_vfe_idx = pclk_domain_35_prog-> + clkmon_info.high_threshold_vfe_idx; + pset->clkmon_info.low_threshold_vfe_idx = pclk_domain_35_prog-> + clkmon_info.low_threshold_vfe_idx; + pset->clkmon_ctrl.high_threshold_override = pclk_domain_35_prog-> + clkmon_ctrl.high_threshold_override; + pset->clkmon_ctrl.low_threshold_override = pclk_domain_35_prog-> + clkmon_ctrl.low_threshold_override; + + return status; +} + +static int clk_domain_construct_35_prog(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct clk_domain_35_prog *pdomain; + struct clk_domain_35_prog *ptmpdomain = + (struct clk_domain_35_prog *)pargs; + int status = 0; + + obj_tmp->type_mask |= BIT32(CTRL_CLK_CLK_DOMAIN_TYPE_35_PROG); + status = clk_domain_construct_3x(g, obj, size, pargs); + if (status != 0) + { + return -EINVAL; + } + + pdomain = (struct clk_domain_35_prog *)(void *) *obj; + + pdomain->super.super.super.super.type_mask |= + BIT32(CTRL_CLK_CLK_DOMAIN_TYPE_35_PROG); + + pdomain->super.super.super.super.pmudatainit = + clk_domain_pmudatainit_35_prog; + + pdomain->super.super.super.clkdomainclkproglink = + clkdomainclkproglink_3x_prog; + + pdomain->super.super.super.clkdomainclkvfsearch = + clkdomainvfsearch; + + pdomain->super.super.super.clkdomainclkgetfpoints = + clkdomaingetfpoints; + + pdomain->super.clk_prog_idx_first = + ptmpdomain->super.clk_prog_idx_first; + pdomain->super.clk_prog_idx_last = ptmpdomain->super.clk_prog_idx_last; + pdomain->super.noise_unaware_ordering_index = + ptmpdomain->super.noise_unaware_ordering_index; + pdomain->super.noise_aware_ordering_index = + ptmpdomain->super.noise_aware_ordering_index; + pdomain->super.b_force_noise_unaware_ordering = + ptmpdomain->super.b_force_noise_unaware_ordering; + pdomain->super.factory_delta = ptmpdomain->super.factory_delta; + pdomain->super.freq_delta_min_mhz = + ptmpdomain->super.freq_delta_min_mhz; + pdomain->super.freq_delta_max_mhz = + ptmpdomain->super.freq_delta_max_mhz; + pdomain->pre_volt_ordering_index = ptmpdomain->pre_volt_ordering_index; + pdomain->post_volt_ordering_index = + ptmpdomain->post_volt_ordering_index; + pdomain->clk_pos = ptmpdomain->clk_pos; + pdomain->clk_vf_curve_count = ptmpdomain->clk_vf_curve_count; + pdomain->clkmon_info.high_threshold_vfe_idx = ptmpdomain-> + clkmon_info.high_threshold_vfe_idx; + pdomain->clkmon_info.low_threshold_vfe_idx = ptmpdomain-> + clkmon_info.low_threshold_vfe_idx; + + return status; +} + +static int _clk_domain_pmudatainit_35_slave(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct clk_domain_35_slave *pclk_domain_35_slave; + struct nv_pmu_clk_clk_domain_35_slave_boardobj_set *pset; + + nvgpu_log_info(g, " "); + + status = clk_domain_pmudatainit_35_prog(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pclk_domain_35_slave = (struct clk_domain_35_slave *) + (void *)obj; + + pset = (struct nv_pmu_clk_clk_domain_35_slave_boardobj_set *) + (void *)pmu_obj; + + pset->slave.master_idx = pclk_domain_35_slave->slave.master_idx; + + return status; +} + +static int clk_domain_construct_35_slave(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct clk_domain_35_slave *pdomain; + struct clk_domain_35_slave *ptmpdomain = + (struct clk_domain_35_slave *)pargs; + int status = 0; + + if (pmu_board_obj_get_type(pargs) != + (u8) CTRL_CLK_CLK_DOMAIN_TYPE_35_SLAVE) { + return -EINVAL; + } + + obj_tmp->type_mask |= BIT32(CTRL_CLK_CLK_DOMAIN_TYPE_35_SLAVE); + status = clk_domain_construct_35_prog(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pdomain = (struct clk_domain_35_slave *)(void *)*obj; + + pdomain->super.super.super.super.super.pmudatainit = + _clk_domain_pmudatainit_35_slave; + + pdomain->slave.master_idx = ptmpdomain->slave.master_idx; + + pdomain->slave.clkdomainclkgetslaveclk = + clkdomaingetslaveclk; + + return status; +} + +static int clkdomainclkproglink_3x_master(struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk, + struct nvgpu_clk_domain *pdomain) +{ + int status = 0; + struct clk_domain_3x_master *p3xmaster = + (struct clk_domain_3x_master *)(void *)pdomain; + struct clk_prog *pprog = NULL; + struct clk_prog_1x_master *pprog1xmaster = NULL; + u16 freq_max_last_mhz = 0; + u8 i; + + nvgpu_log_info(g, " "); + + status = clkdomainclkproglink_3x_prog(g, pclk, pdomain); + if (status != 0) { + goto done; + } + + /* Iterate over the set of CLK_PROGs pointed at by this domain.*/ + for (i = p3xmaster->super.clk_prog_idx_first; + i <= p3xmaster->super.clk_prog_idx_last; + i++) { + pprog = CLK_CLK_PROG_GET(pclk, i); + + /* MASTER CLK_DOMAINs must point to MASTER CLK_PROGs.*/ + if (!pprog->super.implements(g, &pprog->super, + CTRL_CLK_CLK_PROG_TYPE_1X_MASTER)) { + status = -EINVAL; + goto done; + } + + pprog1xmaster = (struct clk_prog_1x_master *)(void *)pprog; + status = pprog1xmaster->vfflatten(g, pclk, pprog1xmaster, + pmu_board_obj_get_idx(p3xmaster), &freq_max_last_mhz); + if (status != 0) { + goto done; + } + } +done: + nvgpu_log_info(g, "done status %x", status); + return status; +} + +static int clk_domain_pmudatainit_35_master(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct clk_domain_35_master *pclk_domain_35_master; + struct nv_pmu_clk_clk_domain_35_master_boardobj_set *pset; + + nvgpu_log_info(g, " "); + + status = clk_domain_pmudatainit_35_prog(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pclk_domain_35_master = (struct clk_domain_35_master *) + (void *)obj; + + pset = (struct nv_pmu_clk_clk_domain_35_master_boardobj_set *) + (void *)pmu_obj; + + pset->master.slave_idxs_mask = + pclk_domain_35_master->master.slave_idxs_mask; + + status = nvgpu_boardobjgrpmask_export( + &pclk_domain_35_master->master_slave_domains_grp_mask.super, + pclk_domain_35_master-> + master_slave_domains_grp_mask.super.bitcount, + &pset->master_slave_domains_grp_mask.super); + + return status; +} + +static int clk_domain_construct_35_master(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct clk_domain_35_master *pdomain; + int status = 0; + + if (pmu_board_obj_get_type(pargs) != + (u8) CTRL_CLK_CLK_DOMAIN_TYPE_35_MASTER) { + return -EINVAL; + } + + obj_tmp->type_mask |= BIT32(CTRL_CLK_CLK_DOMAIN_TYPE_35_MASTER); + status = clk_domain_construct_35_prog(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pdomain = (struct clk_domain_35_master *)(void *) *obj; + + pdomain->super.super.super.super.super.pmudatainit = + clk_domain_pmudatainit_35_master; + pdomain->super.super.super.super.clkdomainclkproglink = + clkdomainclkproglink_3x_master; + + pdomain->master.slave_idxs_mask = 0; + pdomain->super.clk_pos = 0; + + status = boardobjgrpmask_e32_init( + &pdomain->master_slave_domains_grp_mask, NULL); + + return status; +} + +static int clkdomainclkproglink_fixed(struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk, + struct nvgpu_clk_domain *pdomain) +{ + nvgpu_log_info(g, " "); + return 0; +} + +static int _clk_domain_pmudatainit_3x_fixed(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct clk_domain_3x_fixed *pclk_domain_3x_fixed; + struct nv_pmu_clk_clk_domain_3x_fixed_boardobj_set *pset; + + nvgpu_log_info(g, " "); + + status = _clk_domain_pmudatainit_3x(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pclk_domain_3x_fixed = (struct clk_domain_3x_fixed *) + (void *)obj; + + pset = (struct nv_pmu_clk_clk_domain_3x_fixed_boardobj_set *) + (void *)pmu_obj; + + pset->freq_mhz = pclk_domain_3x_fixed->freq_mhz; + + return status; +} + +static int clk_domain_construct_3x_fixed(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct clk_domain_3x_fixed *pdomain; + struct clk_domain_3x_fixed *ptmpdomain = + (struct clk_domain_3x_fixed *)pargs; + int status = 0; + + if (pmu_board_obj_get_type(pargs) != CTRL_CLK_CLK_DOMAIN_TYPE_3X_FIXED) { + return -EINVAL; + } + + obj_tmp->type_mask |= BIT32(CTRL_CLK_CLK_DOMAIN_TYPE_3X_FIXED); + status = clk_domain_construct_3x(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pdomain = (struct clk_domain_3x_fixed *)(void *)*obj; + + pdomain->super.super.super.pmudatainit = + _clk_domain_pmudatainit_3x_fixed; + + pdomain->super.super.clkdomainclkproglink = + clkdomainclkproglink_fixed; + + pdomain->freq_mhz = ptmpdomain->freq_mhz; + + return status; +} + +static struct nvgpu_clk_domain *construct_clk_domain(struct gk20a *g, + void *pargs) +{ + struct pmu_board_obj *obj = NULL; + int status; + + nvgpu_log_info(g, " %d", (pmu_board_obj_get_type(pargs))); + switch (pmu_board_obj_get_type(pargs)) { + case CTRL_CLK_CLK_DOMAIN_TYPE_3X_FIXED: + status = clk_domain_construct_3x_fixed(g, &obj, + sizeof(struct clk_domain_3x_fixed), pargs); + break; + + case CTRL_CLK_CLK_DOMAIN_TYPE_35_MASTER: + status = clk_domain_construct_35_master(g, &obj, + sizeof(struct clk_domain_35_master), pargs); + break; + + case CTRL_CLK_CLK_DOMAIN_TYPE_35_SLAVE: + status = clk_domain_construct_35_slave(g, &obj, + sizeof(struct clk_domain_35_slave), pargs); + break; + + default: + nvgpu_err(g, "Unsupported Clk domain type"); + status = -EINVAL; + break; + } + + if (status != 0) { + return NULL; + } + + nvgpu_log_info(g, " Done"); + + return (struct nvgpu_clk_domain *)(void *)obj; +} + +static int clk_domain_pmudatainit_super(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct nvgpu_clk_domain *pclk_domain; + struct nv_pmu_clk_clk_domain_boardobj_set *pset; + + nvgpu_log_info(g, " "); + + status = pmu_board_obj_pmu_data_init_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pclk_domain = (struct nvgpu_clk_domain *)(void *)obj; + + pset = (struct nv_pmu_clk_clk_domain_boardobj_set *)(void *)pmu_obj; + + pset->domain = pclk_domain->domain; + pset->api_domain = pclk_domain->api_domain; + pset->perf_domain_grp_idx = pclk_domain->perf_domain_grp_idx; + + return status; +} + +int clk_domain_clk_prog_link(struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk) +{ + int status = 0; + struct nvgpu_clk_domain *pdomain; + u8 i; + + /* Iterate over all CLK_DOMAINs and flatten their VF curves.*/ + BOARDOBJGRP_FOR_EACH(&(pclk->clk_domainobjs->super.super), + struct nvgpu_clk_domain *, pdomain, i) { + status = pdomain->clkdomainclkproglink(g, pclk, pdomain); + if (status != 0) { + nvgpu_err(g, + "error flattening VF for CLK DOMAIN - 0x%x", + pdomain->domain); + goto done; + } + } + +done: + return status; +} + +int clk_pmu_clk_domains_load(struct gk20a *g) +{ + int status; + struct nvgpu_pmu *pmu = g->pmu; + struct nv_pmu_rpc_struct_clk_load clk_load_rpc; + + (void) memset(&clk_load_rpc, 0, + sizeof(struct nv_pmu_rpc_struct_clk_load)); + + clk_load_rpc.clk_load.feature = NV_NV_PMU_CLK_LOAD_FEATURE_CLK_DOMAIN; + + /* Continue with PMU setup, assume FB map is done */ + PMU_RPC_EXECUTE_CPB(status, pmu, CLK, LOAD, &clk_load_rpc, 0); + if (status != 0) { + nvgpu_err(g, + "Failed to execute Clock domain Load RPC status=0x%x", + status); + } + + return status; +} + +#ifdef CONFIG_NVGPU_CLK_ARB +int clk_get_fll_clks_per_clk_domain(struct gk20a *g, + struct nvgpu_clk_slave_freq *setfllclk) +{ + int status = -EINVAL; + struct nvgpu_clk_domain *pdomain; + u8 i; + struct nvgpu_clk_pmupstate *pclk = g->pmu->clk_pmu; + unsigned long bit; + u16 clkmhz = 0; + struct clk_domain_35_master *p35master; + struct clk_domain_35_slave *p35slave; + unsigned long slaveidxmask; + + if (setfllclk->gpc_mhz == 0U) { + return -EINVAL; + } + + BOARDOBJGRP_FOR_EACH(&(pclk->clk_domainobjs->super.super), + struct nvgpu_clk_domain *, pdomain, i) { + + if (pdomain->api_domain == CTRL_CLK_DOMAIN_GPCCLK) { + if (!pdomain->super.implements(g, &pdomain->super, + CTRL_CLK_CLK_DOMAIN_TYPE_35_MASTER)) { + status = -EINVAL; + goto done; + } + p35master = (struct clk_domain_35_master *) + (void *)pdomain; + slaveidxmask = p35master->master.slave_idxs_mask; + for_each_set_bit(bit, &slaveidxmask, 32U) { + i = (u8)bit; + p35slave = (struct clk_domain_35_slave *) + (void *) + clk_get_clk_domain_from_index(pclk, i); + + clkmhz = 0; + status = p35slave-> + slave.clkdomainclkgetslaveclk(g, + pclk, (struct nvgpu_clk_domain *) + (void *)p35slave, + &clkmhz, setfllclk->gpc_mhz); + if (status != 0) { + status = -EINVAL; + goto done; + } + if (p35slave->super.super.super.super. + api_domain == CTRL_CLK_DOMAIN_XBARCLK) { + setfllclk->xbar_mhz = clkmhz; + } + if (p35slave->super.super.super.super. + api_domain == CTRL_CLK_DOMAIN_SYSCLK) { + setfllclk->sys_mhz = clkmhz; + } + if (p35slave->super.super.super.super. + api_domain == CTRL_CLK_DOMAIN_NVDCLK) { + setfllclk->nvd_mhz = clkmhz; + } + if (p35slave->super.super.super.super. + api_domain == CTRL_CLK_DOMAIN_HOSTCLK) { + setfllclk->host_mhz = clkmhz; + } + } + } + } +done: + return status; +} + +void clk_set_p0_clk_per_domain(struct gk20a *g, u8 *gpcclk_domain, + u32 *gpcclk_clkmhz, + struct nvgpu_clk_slave_freq *vf_point, + struct nvgpu_pmu_perf_change_input_clk_info *change_input) +{ + struct nvgpu_clk_domain *pclk_domain; + struct nvgpu_pmu_perf_pstate_clk_info *p0_info; + u32 max_clkmhz; + u16 max_ratio; + u8 i = 0; + + BOARDOBJGRP_FOR_EACH(&(g->pmu->clk_pmu->clk_domainobjs->super.super), + struct nvgpu_clk_domain *, pclk_domain, i) { + + switch (pclk_domain->api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + *gpcclk_domain = i; + *gpcclk_clkmhz = vf_point->gpc_mhz; + + p0_info = nvgpu_pmu_perf_pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, CLKWHICH_GPCCLK); + if (p0_info == NULL) { + nvgpu_err(g, "failed to get GPCCLK P0 info"); + break; + } + if (vf_point->gpc_mhz < p0_info->min_mhz) { + vf_point->gpc_mhz = p0_info->min_mhz; + } + if (vf_point->gpc_mhz > p0_info->max_mhz) { + vf_point->gpc_mhz = p0_info->max_mhz; + } + change_input->clk[i].clk_freq_khz = + (u32)vf_point->gpc_mhz * 1000U; + change_input->clk_domains_mask.super.data[0] |= + (u32) BIT(i); + break; + case CTRL_CLK_DOMAIN_XBARCLK: + p0_info = nvgpu_pmu_perf_pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, CLKWHICH_XBARCLK); + if (p0_info == NULL) { + nvgpu_err(g, "failed to get XBARCLK P0 info"); + break; + } + max_ratio = pclk_domain->ratio_domain; + + if (vf_point->xbar_mhz < p0_info->min_mhz) { + vf_point->xbar_mhz = p0_info->min_mhz; + } + if (vf_point->xbar_mhz > p0_info->max_mhz) { + vf_point->xbar_mhz = p0_info->max_mhz; + } + change_input->clk[i].clk_freq_khz = + (u32)vf_point->xbar_mhz * 1000U; + change_input->clk_domains_mask.super.data[0] |= + (u32) BIT(i); + if (vf_point->gpc_mhz < vf_point->xbar_mhz) { + max_clkmhz = (((u32)vf_point->xbar_mhz * 100U) / + (u32)max_ratio); + if (*gpcclk_clkmhz < max_clkmhz) { + *gpcclk_clkmhz = max_clkmhz; + } + } + break; + case CTRL_CLK_DOMAIN_SYSCLK: + p0_info = nvgpu_pmu_perf_pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, CLKWHICH_SYSCLK); + if (p0_info == NULL) { + nvgpu_err(g, "failed to get SYSCLK P0 info"); + break; + } + max_ratio = pclk_domain->ratio_domain; + if (vf_point->sys_mhz < p0_info->min_mhz) { + vf_point->sys_mhz = p0_info->min_mhz; + } + if (vf_point->sys_mhz > p0_info->max_mhz) { + vf_point->sys_mhz = p0_info->max_mhz; + } + change_input->clk[i].clk_freq_khz = + (u32)vf_point->sys_mhz * 1000U; + change_input->clk_domains_mask.super.data[0] |= + (u32) BIT(i); + if (vf_point->gpc_mhz < vf_point->sys_mhz) { + max_clkmhz = (((u32)vf_point->sys_mhz * 100U) / + (u32)max_ratio); + if (*gpcclk_clkmhz < max_clkmhz) { + *gpcclk_clkmhz = max_clkmhz; + } + } + break; + case CTRL_CLK_DOMAIN_NVDCLK: + p0_info = nvgpu_pmu_perf_pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, CLKWHICH_NVDCLK); + if (p0_info == NULL) { + nvgpu_err(g, "failed to get NVDCLK P0 info"); + break; + } + max_ratio = pclk_domain->ratio_domain; + if (vf_point->nvd_mhz < p0_info->min_mhz) { + vf_point->nvd_mhz = p0_info->min_mhz; + } + if (vf_point->nvd_mhz > p0_info->max_mhz) { + vf_point->nvd_mhz = p0_info->max_mhz; + } + change_input->clk[i].clk_freq_khz = + (u32)vf_point->nvd_mhz * 1000U; + change_input->clk_domains_mask.super.data[0] |= + (u32) BIT(i); + if (vf_point->gpc_mhz < vf_point->nvd_mhz) { + max_clkmhz = (((u32)vf_point->nvd_mhz * 100U) / + (u32)max_ratio); + if (*gpcclk_clkmhz < max_clkmhz) { + *gpcclk_clkmhz = max_clkmhz; + } + } + break; + case CTRL_CLK_DOMAIN_HOSTCLK: + p0_info = nvgpu_pmu_perf_pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, CLKWHICH_HOSTCLK); + if (p0_info == NULL) { + nvgpu_err(g, "failed to get HOSTCLK P0 info"); + break; + } + max_ratio = pclk_domain->ratio_domain; + if (vf_point->host_mhz < p0_info->min_mhz) { + vf_point->host_mhz = p0_info->min_mhz; + } + if (vf_point->host_mhz > p0_info->max_mhz) { + vf_point->host_mhz = p0_info->max_mhz; + } + change_input->clk[i].clk_freq_khz = + (u32)vf_point->host_mhz * 1000U; + change_input->clk_domains_mask.super.data[0] |= + (u32) BIT(i); + if (vf_point->gpc_mhz < vf_point->host_mhz) { + max_clkmhz = (((u32)vf_point->host_mhz * 100U) / + (u32)max_ratio); + if (*gpcclk_clkmhz < max_clkmhz) { + *gpcclk_clkmhz = max_clkmhz; + } + } + break; + default: + nvgpu_pmu_dbg(g, "Fixed clock domain"); + break; + } + } +} +#endif + +int clk_domain_init_pmupstate(struct gk20a *g) +{ + /* If already allocated, do not re-allocate */ + if (g->pmu->clk_pmu->clk_domainobjs != NULL) { + return 0; + } + + g->pmu->clk_pmu->clk_domainobjs = nvgpu_kzalloc(g, + sizeof(*g->pmu->clk_pmu->clk_domainobjs)); + if (g->pmu->clk_pmu->clk_domainobjs == NULL) { + return -ENOMEM; + } + + return 0; +} + +void clk_domain_free_pmupstate(struct gk20a *g) +{ + nvgpu_kfree(g, g->pmu->clk_pmu->clk_domainobjs); + g->pmu->clk_pmu->clk_domainobjs = NULL; +} + +int nvgpu_pmu_clk_domain_get_from_index(struct gk20a *g, u32 *domain, u32 index) +{ + struct nvgpu_clk_domain *clk_domain; + + clk_domain = (struct nvgpu_clk_domain *) BOARDOBJGRP_OBJ_GET_BY_IDX( + &g->pmu->clk_pmu->clk_domainobjs->super.super, index); + if (clk_domain == NULL) { + return -EINVAL; + } + + *domain = clk_domain->domain; + return 0; +} + +int nvgpu_pmu_clk_domain_get_f_points(struct gk20a *g, + u32 clkapidomain, + u32 *pfpointscount, + u16 *pfreqpointsinmhz) +{ + int status = -EINVAL; + struct nvgpu_clk_domain *pdomain; + struct nvgpu_clk_pmupstate *pclk = g->pmu->clk_pmu; + u8 i; + + BOARDOBJGRP_FOR_EACH(&(pclk->clk_domainobjs->super.super), + struct nvgpu_clk_domain *, pdomain, i) { + if (pdomain->api_domain == clkapidomain) { + status = pdomain->clkdomainclkgetfpoints(g, pclk, + pdomain, pfpointscount, + pfreqpointsinmhz, + CLK_PROG_VFE_ENTRY_LOGIC); + return status; + } + } + return status; +} + +u8 nvgpu_pmu_clk_domain_update_clk_info(struct gk20a *g, + struct ctrl_clk_clk_domain_list *clk_list) +{ + struct nvgpu_pmu_perf_pstate_clk_info *p0_info; + struct nvgpu_clk_domain *pdomain; + u8 i = 0U, num_domains = 0U; + + BOARDOBJGRP_FOR_EACH(&(g->pmu->clk_pmu->clk_domainobjs->super.super), + struct nvgpu_clk_domain *, pdomain, i) { + + p0_info = nvgpu_pmu_perf_pstate_get_clk_set_info(g, + CTRL_PERF_PSTATE_P0, pdomain->domain); + if (p0_info == NULL) { + nvgpu_err(g, "Unable to get P0 info"); + return num_domains; + } + clk_list->clk_domains[i].clk_domain = + pdomain->api_domain; + + clk_list->clk_domains[i].clk_freq_khz = + p0_info->nominal_mhz * 1000U; + + /* VBIOS always boots with FFR*/ + clk_list->clk_domains[i].regime_id = + CTRL_CLK_FLL_REGIME_ID_FFR; + + num_domains = + nvgpu_safe_cast_u32_to_u8(nvgpu_safe_add_u32(num_domains, 1U)); + + nvgpu_pmu_dbg(g, "Domain %x, Nom Freq = %d Max Freq =%d," + "regime %d", pdomain->api_domain, p0_info->nominal_mhz, + p0_info->max_mhz, CTRL_CLK_FLL_REGIME_ID_FFR); + } + return num_domains; + +} + +int nvgpu_pmu_clk_domain_freq_to_volt(struct gk20a *g, u8 clkdomain_idx, + u32 *pclkmhz, u32 *pvoltuv, u8 railidx) +{ + + struct nvgpu_clk_vf_points *pclk_vf_points; + struct boardobjgrp *pboardobjgrp; + struct pmu_board_obj *obj = NULL; + int status = -EINVAL; + struct clk_vf_point *pclk_vf_point; + u8 index; + + nvgpu_log_info(g, " "); + pclk_vf_points = g->pmu->clk_pmu->clk_vf_pointobjs; + pboardobjgrp = &pclk_vf_points->super.super; + + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pmu_board_obj*, obj, index) { + pclk_vf_point = (struct clk_vf_point *)(void *)obj; + if((*pclkmhz) <= pclk_vf_point->pair.freq_mhz) { + *pvoltuv = pclk_vf_point->pair.voltage_uv; + return 0; + } + } + return status; +} + diff --git a/drivers/gpu/nvgpu/common/pmu/clk/clk_domain.h b/drivers/gpu/nvgpu/common/pmu/clk/clk_domain.h new file mode 100644 index 000000000..b2074b2ed --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/clk/clk_domain.h @@ -0,0 +1,157 @@ +/* +* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. +* + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. +*/ + +#ifndef NVGPU_CLK_DOMAIN_H +#define NVGPU_CLK_DOMAIN_H + +#include +#include + +#define CLK_DOMAIN_BOARDOBJGRP_VERSION 0x30 +#define CLK_DOMAIN_BOARDOBJGRP_VERSION_35 0x35 + +#define CLK_TABLE_HAL_ENTRY_GP 0x02 +#define CLK_TABLE_HAL_ENTRY_GV 0x03 + +#define CLK_CLKMON_VFE_INDEX_INVALID 0xFF + +typedef int nvgpu_clkproglink(struct gk20a *g, struct nvgpu_clk_pmupstate *pclk, + struct nvgpu_clk_domain *pdomain); + +typedef int nvgpu_clkvfsearch(struct gk20a *g, struct nvgpu_clk_pmupstate *pclk, + struct nvgpu_clk_domain *pdomain, u16 *clkmhz, + u32 *voltuv, u8 rail); + +typedef int nvgpu_clkgetfpoints(struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk, struct nvgpu_clk_domain *pdomain, + u32 *pfpointscount, u16 *pfreqpointsinmhz, u8 rail); + +struct nvgpu_clk_domain { + struct pmu_board_obj super; + u32 api_domain; + u32 part_mask; + u32 domain; + u8 perf_domain_index; + u8 perf_domain_grp_idx; + u8 ratio_domain; + u8 usage; + nvgpu_clkproglink *clkdomainclkproglink; + nvgpu_clkvfsearch *clkdomainclkvfsearch; + nvgpu_clkgetfpoints *clkdomainclkgetfpoints; +}; + +struct nvgpu_clk_domains { + struct boardobjgrp_e32 super; + u8 n_num_entries; + u8 version; + bool b_enforce_vf_monotonicity; + bool b_enforce_vf_smoothening; + bool b_override_o_v_o_c; + bool b_debug_mode; + u32 vbios_domains; + u16 cntr_sampling_periodms; + u16 clkmon_refwin_usec; + struct boardobjgrpmask_e32 prog_domains_mask; + struct boardobjgrpmask_e32 master_domains_mask; + struct boardobjgrpmask_e32 clkmon_domains_mask; + struct ctrl_clk_clk_delta deltas; + + struct nvgpu_clk_domain + *ordered_noise_aware_list[CTRL_BOARDOBJ_MAX_BOARD_OBJECTS]; + + struct nvgpu_clk_domain + *ordered_noise_unaware_list[CTRL_BOARDOBJ_MAX_BOARD_OBJECTS]; +}; + +typedef int clkgetslaveclk(struct gk20a *g, struct nvgpu_clk_pmupstate *pclk, + struct nvgpu_clk_domain *pdomain, u16 *clkmhz, + u16 masterclkmhz); + +struct clk_domain_3x { + struct nvgpu_clk_domain super; + bool b_noise_aware_capable; +}; + +struct clk_domain_3x_fixed { + struct clk_domain_3x super; + u16 freq_mhz; +}; + +struct clk_domain_3x_prog { + struct clk_domain_3x super; + u8 clk_prog_idx_first; + u8 clk_prog_idx_last; + bool b_force_noise_unaware_ordering; + struct ctrl_clk_freq_delta factory_delta; + short freq_delta_min_mhz; + short freq_delta_max_mhz; + struct ctrl_clk_clk_delta deltas; + u8 noise_unaware_ordering_index; + u8 noise_aware_ordering_index; +}; + +struct clk_domain_35_prog { + struct clk_domain_3x_prog super; + u8 pre_volt_ordering_index; + u8 post_volt_ordering_index; + u8 clk_pos; + u8 clk_vf_curve_count; + struct ctrl_clk_domain_info_35_prog_clk_mon clkmon_info; + struct ctrl_clk_domain_control_35_prog_clk_mon clkmon_ctrl; + u32 por_volt_delta_uv[CTRL_VOLT_VOLT_RAIL_CLIENT_MAX_RAILS]; +}; + +struct clk_domain_3x_master { + struct clk_domain_3x_prog super; + u32 slave_idxs_mask; +}; + +struct clk_domain_35_master { + struct clk_domain_35_prog super; + struct clk_domain_3x_master master; + struct boardobjgrpmask_e32 master_slave_domains_grp_mask; +}; + +struct clk_domain_3x_slave { + struct clk_domain_3x_prog super; + u8 master_idx; + clkgetslaveclk *clkdomainclkgetslaveclk; +}; + +struct clk_domain_30_slave { + u8 rsvd; + u8 master_idx; + clkgetslaveclk *clkdomainclkgetslaveclk; +}; + +struct clk_domain_35_slave { + struct clk_domain_35_prog super; + struct clk_domain_30_slave slave; +}; + +int clk_domain_init_pmupstate(struct gk20a *g); +void clk_domain_free_pmupstate(struct gk20a *g); +int clk_pmu_clk_domains_load(struct gk20a *g); +int clk_domain_sw_setup(struct gk20a *g); +int clk_domain_pmu_setup(struct gk20a *g); + +#endif /* NVGPU_CLK_DOMAIN_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/clk/clk_fll.c b/drivers/gpu/nvgpu/common/pmu/clk/clk_fll.c new file mode 100644 index 000000000..af06e5eff --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/clk/clk_fll.c @@ -0,0 +1,594 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ucode_clk_inf.h" +#include "clk_fll.h" +#include "clk_vin.h" +#include "clk.h" + +#define NV_PERF_DOMAIN_4X_CLOCK_DOMAIN_SKIP 0x10U +#define NV_PERF_DOMAIN_4X_CLOCK_DOMAIN_MASK 0x1FU + +static int devinit_get_fll_device_table(struct gk20a *g, + struct clk_avfs_fll_objs *pfllobjs); +static struct fll_device *construct_fll_device(struct gk20a *g, + void *pargs); +static int fll_device_init_pmudata_super(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj); + +static u32 clk_get_vbios_clk_domain(u32 vbios_domain); + +u8 clk_get_fll_lut_vf_num_entries(struct nvgpu_clk_pmupstate *pclk) +{ + return ((pclk)->avfs_fllobjs->lut_num_entries); +} + +u32 nvgpu_pmu_clk_fll_get_lut_min_volt(struct nvgpu_clk_pmupstate *pclk) +{ + return ((pclk)->avfs_fllobjs->lut_min_voltage_uv); +} + +u32 nvgpu_pmu_clk_fll_get_lut_step_size(struct nvgpu_clk_pmupstate *pclk) +{ + return ((pclk)->avfs_fllobjs->lut_step_size_uv); +} + +static int _clk_fll_devgrp_pmudatainit_super(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct nv_pmu_boardobjgrp_super *pboardobjgrppmu) +{ + struct nv_pmu_clk_clk_fll_device_boardobjgrp_set_header *pset = + (struct nv_pmu_clk_clk_fll_device_boardobjgrp_set_header *) + pboardobjgrppmu; + struct clk_avfs_fll_objs *pfll_objs = (struct clk_avfs_fll_objs *) + pboardobjgrp; + int status = 0; + + nvgpu_log_info(g, " "); + + status = boardobjgrp_pmudatainit_e32(g, pboardobjgrp, pboardobjgrppmu); + if (status != 0) { + nvgpu_err(g, "failed to init fll pmuobjgrp"); + return status; + } + pset->lut_num_entries = pfll_objs->lut_num_entries; + pset->lut_step_size_uv = pfll_objs->lut_step_size_uv; + pset->lut_min_voltage_uv = pfll_objs->lut_min_voltage_uv; + pset->max_min_freq_mhz = pfll_objs->max_min_freq_mhz; + + status = nvgpu_boardobjgrpmask_export( + &pfll_objs->lut_prog_master_mask.super, + pfll_objs->lut_prog_master_mask.super.bitcount, + &pset->lut_prog_master_mask.super); + + nvgpu_log_info(g, " Done"); + return status; +} + +static int _clk_fll_devgrp_pmudata_instget(struct gk20a *g, + struct nv_pmu_boardobjgrp *pmuboardobjgrp, + struct nv_pmu_boardobj **pmu_obj, u8 idx) +{ + struct nv_pmu_clk_clk_fll_device_boardobj_grp_set *pgrp_set = + (struct nv_pmu_clk_clk_fll_device_boardobj_grp_set *) + pmuboardobjgrp; + + nvgpu_log_info(g, " "); + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (((u32)BIT(idx) & + pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *pmu_obj = (struct nv_pmu_boardobj *) + &pgrp_set->objects[idx].data.obj; + nvgpu_log_info(g, " Done"); + return 0; +} + +static int _clk_fll_devgrp_pmustatus_instget(struct gk20a *g, + void *pboardobjgrppmu, + struct nv_pmu_boardobj_query **obj_pmu_status, u8 idx) +{ + struct nv_pmu_clk_clk_fll_device_boardobj_grp_get_status + *pgrp_get_status = + (struct nv_pmu_clk_clk_fll_device_boardobj_grp_get_status *) + pboardobjgrppmu; + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (((u32)BIT(idx) & + pgrp_get_status->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *obj_pmu_status = (struct nv_pmu_boardobj_query *) + &pgrp_get_status->objects[idx].data.obj; + return 0; +} + +int clk_fll_sw_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + struct clk_avfs_fll_objs *pfllobjs; + struct fll_device *pfll; + struct fll_device *pfll_master; + struct fll_device *pfll_local; + u8 i; + u8 j; + + nvgpu_log_info(g, " "); + + status = nvgpu_boardobjgrp_construct_e32(g, + &g->pmu->clk_pmu->avfs_fllobjs->super); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for fll, status - 0x%x", status); + goto done; + } + pfllobjs = g->pmu->clk_pmu->avfs_fllobjs; + pboardobjgrp = &(g->pmu->clk_pmu->avfs_fllobjs->super.super); + + BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, CLK, FLL_DEVICE); + + status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp, + clk, CLK, clk_fll_device, CLK_FLL_DEVICE); + if (status != 0) { + nvgpu_err(g, + "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x", + status); + goto done; + } + + pboardobjgrp->pmudatainit = _clk_fll_devgrp_pmudatainit_super; + pboardobjgrp->pmudatainstget = _clk_fll_devgrp_pmudata_instget; + pboardobjgrp->pmustatusinstget = _clk_fll_devgrp_pmustatus_instget; + pfllobjs = (struct clk_avfs_fll_objs *)pboardobjgrp; + pfllobjs->lut_num_entries = g->ops.clk.lut_num_entries; + pfllobjs->lut_step_size_uv = CTRL_CLK_VIN_STEP_SIZE_UV; + pfllobjs->lut_min_voltage_uv = CTRL_CLK_LUT_MIN_VOLTAGE_UV; + + /* Initialize lut prog master mask to zero.*/ + status = boardobjgrpmask_e32_init(&pfllobjs->lut_prog_master_mask, + NULL); + if (status != 0) { + nvgpu_err(g, "boardobjgrpmask_e32_init failed err=%d", status); + goto done; + } + + status = devinit_get_fll_device_table(g, pfllobjs); + if (status != 0) { + goto done; + } + + status = BOARDOBJGRP_PMU_CMD_GRP_GET_STATUS_CONSTRUCT(g, + &g->pmu->clk_pmu->avfs_fllobjs->super.super, + clk, CLK, clk_fll_device, CLK_FLL_DEVICE); + if (status != 0) { + nvgpu_err(g, + "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x", + status); + goto done; + } + + BOARDOBJGRP_FOR_EACH(&(pfllobjs->super.super), + struct fll_device *, pfll, i) { + pfll_master = NULL; + j = 0; + BOARDOBJGRP_ITERATOR(&(pfllobjs->super.super), + struct fll_device *, pfll_local, j, + &pfllobjs->lut_prog_master_mask.super) { + if (pfll_local->clk_domain == pfll->clk_domain) { + pfll_master = pfll_local; + break; + } + } + + if (pfll_master == NULL) { + status = nvgpu_boardobjgrpmask_bit_set( + &pfllobjs->lut_prog_master_mask.super, + pmu_board_obj_get_idx(pfll)); + if (status != 0) { + nvgpu_err(g, "err setting lutprogmask"); + goto done; + } + pfll_master = pfll; + } + status = pfll_master->lut_broadcast_slave_register( + g, pfllobjs, pfll_master, pfll); + + if (status != 0) { + nvgpu_err(g, "err setting lutslavemask"); + goto done; + } + } +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +int clk_fll_pmu_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + + nvgpu_log_info(g, " "); + + pboardobjgrp = &g->pmu->clk_pmu->avfs_fllobjs->super.super; + + if (!pboardobjgrp->bconstructed) { + return -EINVAL; + } + + status = pboardobjgrp->pmuinithandle(g, pboardobjgrp); + + nvgpu_log_info(g, "Done"); + return status; +} + +static int devinit_get_fll_device_table(struct gk20a *g, + struct clk_avfs_fll_objs *pfllobjs) +{ + int status = 0; + u8 *fll_table_ptr = NULL; + struct fll_descriptor_header fll_desc_table_header_sz = { 0 }; + struct fll_descriptor_header_10 fll_desc_table_header = { 0 }; + struct fll_descriptor_entry_10 fll_desc_table_entry = { 0 }; + u8 *fll_tbl_entry_ptr = NULL; + u32 index = 0; + struct fll_device fll_dev_data; + struct fll_device *pfll_dev; + struct clk_vin_device *pvin_dev; + u32 desctablesize; + u32 vbios_domain = NV_PERF_DOMAIN_4X_CLOCK_DOMAIN_SKIP; + struct nvgpu_avfsvinobjs *pvinobjs = g->pmu->clk_pmu->avfs_vinobjs; + + nvgpu_log_info(g, " "); + + fll_table_ptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_CLOCK_TOKEN), + FLL_TABLE); + if (fll_table_ptr == NULL) { + status = -1; + goto done; + } + + nvgpu_memcpy((u8 *)&fll_desc_table_header_sz, fll_table_ptr, + sizeof(struct fll_descriptor_header)); + if (fll_desc_table_header_sz.size >= FLL_DESCRIPTOR_HEADER_10_SIZE_7) { + desctablesize = FLL_DESCRIPTOR_HEADER_10_SIZE_7; + } else { + if (fll_desc_table_header_sz.size == + FLL_DESCRIPTOR_HEADER_10_SIZE_6) { + desctablesize = FLL_DESCRIPTOR_HEADER_10_SIZE_6; + } else { + nvgpu_err(g, "Invalid FLL_DESCRIPTOR_HEADER size"); + return -EINVAL; + } + } + + nvgpu_memcpy((u8 *)&fll_desc_table_header, fll_table_ptr, + desctablesize); + + pfllobjs->max_min_freq_mhz = + fll_desc_table_header.max_min_freq_mhz; + pfllobjs->freq_margin_vfe_idx = + fll_desc_table_header.freq_margin_vfe_idx; + + /* Read table entries*/ + fll_tbl_entry_ptr = fll_table_ptr + desctablesize; + for (index = 0; index < fll_desc_table_header.entry_count; index++) { + u32 fll_id; + + nvgpu_memcpy((u8 *)&fll_desc_table_entry, fll_tbl_entry_ptr, + sizeof(struct fll_descriptor_entry_10)); + + if (fll_desc_table_entry.fll_device_type == + CTRL_CLK_FLL_TYPE_DISABLED) { + continue; + } + + fll_id = fll_desc_table_entry.fll_device_id; + + if ((u8)fll_desc_table_entry.vin_idx_logic != + CTRL_CLK_VIN_ID_UNDEFINED) { + pvin_dev = clk_get_vin_from_index(pvinobjs, + (u8)fll_desc_table_entry.vin_idx_logic); + if (pvin_dev == NULL) { + return -EINVAL; + } else { + pvin_dev->flls_shared_mask |= BIT32(fll_id); + } + } else { + nvgpu_err(g, "Invalid Logic ID"); + return -EINVAL; + } + + fll_dev_data.lut_device.vselect_mode = + BIOS_GET_FIELD(u8, fll_desc_table_entry.lut_params, + NV_FLL_DESC_LUT_PARAMS_VSELECT); + + if ((u8)fll_desc_table_entry.vin_idx_sram != + CTRL_CLK_VIN_ID_UNDEFINED) { + pvin_dev = clk_get_vin_from_index(pvinobjs, + (u8)fll_desc_table_entry.vin_idx_sram); + if (pvin_dev == NULL) { + return -EINVAL; + } else { + pvin_dev->flls_shared_mask |= BIT32(fll_id); + } + } else { + /* Make sure VSELECT mode is set correctly to _LOGIC*/ + if (fll_dev_data.lut_device.vselect_mode != + CTRL_CLK_FLL_LUT_VSELECT_LOGIC) { + return -EINVAL; + } + } + + fll_dev_data.super.type = + (u8)fll_desc_table_entry.fll_device_type; + fll_dev_data.id = (u8)fll_desc_table_entry.fll_device_id; + fll_dev_data.mdiv = BIOS_GET_FIELD(u8, + fll_desc_table_entry.fll_params, + NV_FLL_DESC_FLL_PARAMS_MDIV); + fll_dev_data.input_freq_mhz = + (u16)fll_desc_table_entry.ref_freq_mhz; + fll_dev_data.min_freq_vfe_idx = + (u8)fll_desc_table_entry.min_freq_vfe_idx; + fll_dev_data.freq_ctrl_idx = CTRL_BOARDOBJ_IDX_INVALID; + + vbios_domain = U32(fll_desc_table_entry.clk_domain) & + U32(NV_PERF_DOMAIN_4X_CLOCK_DOMAIN_MASK); + fll_dev_data.clk_domain = + clk_get_vbios_clk_domain(vbios_domain); + + fll_dev_data.rail_idx_for_lut = 0; + fll_dev_data.vin_idx_logic = + (u8)fll_desc_table_entry.vin_idx_logic; + fll_dev_data.vin_idx_sram = + (u8)fll_desc_table_entry.vin_idx_sram; + fll_dev_data.b_skip_pldiv_below_dvco_min = + BIOS_GET_FIELD(bool, fll_desc_table_entry.fll_params, + NV_FLL_DESC_FLL_PARAMS_SKIP_PLDIV_BELOW_DVCO_MIN); + fll_dev_data.lut_device.hysteresis_threshold = + BIOS_GET_FIELD(u16, fll_desc_table_entry.lut_params, + NV_FLL_DESC_LUT_PARAMS_HYSTERISIS_THRESHOLD); + fll_dev_data.regime_desc.regime_id = + CTRL_CLK_FLL_REGIME_ID_FFR; + fll_dev_data.regime_desc.fixed_freq_regime_limit_mhz = + (u16)fll_desc_table_entry.ffr_cutoff_freq_mhz; + if (fll_desc_table_entry.fll_device_type == 0x1U) { + fll_dev_data.regime_desc.target_regime_id_override = 0U; + fll_dev_data.b_dvco_1x = false; + } else { + fll_dev_data.regime_desc.target_regime_id_override = + CTRL_CLK_FLL_REGIME_ID_FFR; + fll_dev_data.b_dvco_1x = true; + } + + /*construct fll device*/ + pfll_dev = construct_fll_device(g, (void *)&fll_dev_data); + + status = boardobjgrp_objinsert(&pfllobjs->super.super, + (struct pmu_board_obj *)pfll_dev, index); + fll_tbl_entry_ptr += fll_desc_table_header.entry_size; + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +static u32 clk_get_vbios_clk_domain(u32 vbios_domain) +{ + if (vbios_domain == 0U) { + return CTRL_CLK_DOMAIN_GPCCLK; + } else if (vbios_domain == 1U) { + return CTRL_CLK_DOMAIN_XBARCLK; + } else if (vbios_domain == 3U) { + return CTRL_CLK_DOMAIN_SYSCLK; + } else if (vbios_domain == 5U) { + return CTRL_CLK_DOMAIN_NVDCLK; + } else if (vbios_domain == 9U) { + return CTRL_CLK_DOMAIN_HOSTCLK; + } else { + return 0; + } +} + +static int lutbroadcastslaveregister(struct gk20a *g, + struct clk_avfs_fll_objs *pfllobjs, struct fll_device *pfll, + struct fll_device *pfll_slave) +{ + if (pfll->clk_domain != pfll_slave->clk_domain) { + return -EINVAL; + } + + return nvgpu_boardobjgrpmask_bit_set(&pfll-> + lut_prog_broadcast_slave_mask.super, + pmu_board_obj_get_idx(pfll_slave)); +} + +static struct fll_device *construct_fll_device(struct gk20a *g, + void *pargs) +{ + struct pmu_board_obj *obj = NULL; + struct fll_device *pfll_dev; + struct fll_device *board_obj_fll_ptr = NULL; + int status; + + nvgpu_log_info(g, " "); + + board_obj_fll_ptr = nvgpu_kzalloc(g, sizeof(struct fll_device)); + if (board_obj_fll_ptr == NULL) { + return NULL; + } + obj = (struct pmu_board_obj *)(void *)board_obj_fll_ptr; + + status = pmu_board_obj_construct_super(g, obj, pargs); + if (status != 0) { + return NULL; + } + + pfll_dev = (struct fll_device *)pargs; + obj->pmudatainit = fll_device_init_pmudata_super; + board_obj_fll_ptr->lut_broadcast_slave_register = + lutbroadcastslaveregister; + board_obj_fll_ptr->id = pfll_dev->id; + board_obj_fll_ptr->mdiv = pfll_dev->mdiv; + board_obj_fll_ptr->rail_idx_for_lut = pfll_dev->rail_idx_for_lut; + board_obj_fll_ptr->input_freq_mhz = pfll_dev->input_freq_mhz; + board_obj_fll_ptr->clk_domain = pfll_dev->clk_domain; + board_obj_fll_ptr->vin_idx_logic = pfll_dev->vin_idx_logic; + board_obj_fll_ptr->vin_idx_sram = pfll_dev->vin_idx_sram; + board_obj_fll_ptr->min_freq_vfe_idx = + pfll_dev->min_freq_vfe_idx; + board_obj_fll_ptr->freq_ctrl_idx = pfll_dev->freq_ctrl_idx; + board_obj_fll_ptr->b_skip_pldiv_below_dvco_min = + pfll_dev->b_skip_pldiv_below_dvco_min; + nvgpu_memcpy((u8 *)&board_obj_fll_ptr->lut_device, + (u8 *)&pfll_dev->lut_device, + sizeof(struct nv_pmu_clk_lut_device_desc)); + nvgpu_memcpy((u8 *)&board_obj_fll_ptr->regime_desc, + (u8 *)&pfll_dev->regime_desc, + sizeof(struct nv_pmu_clk_regime_desc)); + board_obj_fll_ptr->b_dvco_1x=pfll_dev->b_dvco_1x; + + status = boardobjgrpmask_e32_init( + &board_obj_fll_ptr->lut_prog_broadcast_slave_mask, NULL); + if (status != 0) { + nvgpu_err(g, "boardobjgrpmask_e32_init failed err=%d", status); + status = obj->destruct(obj); + if (status != 0) { + nvgpu_err(g, "destruct failed err=%d", status); + } + return NULL; + } + + nvgpu_log_info(g, " Done"); + + return (struct fll_device *)(void *)obj; +} + +static int fll_device_init_pmudata_super(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct fll_device *pfll_dev; + struct nv_pmu_clk_clk_fll_device_boardobj_set *perf_pmu_data; + + nvgpu_log_info(g, " "); + + status = pmu_board_obj_pmu_data_init_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pfll_dev = (struct fll_device *)(void *)obj; + perf_pmu_data = (struct nv_pmu_clk_clk_fll_device_boardobj_set *) + pmu_obj; + + perf_pmu_data->id = pfll_dev->id; + perf_pmu_data->mdiv = pfll_dev->mdiv; + perf_pmu_data->rail_idx_for_lut = pfll_dev->rail_idx_for_lut; + perf_pmu_data->input_freq_mhz = pfll_dev->input_freq_mhz; + perf_pmu_data->vin_idx_logic = pfll_dev->vin_idx_logic; + perf_pmu_data->vin_idx_sram = pfll_dev->vin_idx_sram; + perf_pmu_data->clk_domain = pfll_dev->clk_domain; + perf_pmu_data->min_freq_vfe_idx = + pfll_dev->min_freq_vfe_idx; + perf_pmu_data->freq_ctrl_idx = pfll_dev->freq_ctrl_idx; + perf_pmu_data->b_skip_pldiv_below_dvco_min = + pfll_dev->b_skip_pldiv_below_dvco_min; + perf_pmu_data->b_dvco_1x = pfll_dev->b_dvco_1x; + nvgpu_memcpy((u8 *)&perf_pmu_data->lut_device, + (u8 *)&pfll_dev->lut_device, + sizeof(struct nv_pmu_clk_lut_device_desc)); + nvgpu_memcpy((u8 *)&perf_pmu_data->regime_desc, + (u8 *)&pfll_dev->regime_desc, + sizeof(struct nv_pmu_clk_regime_desc)); + + status = nvgpu_boardobjgrpmask_export( + &pfll_dev->lut_prog_broadcast_slave_mask.super, + pfll_dev->lut_prog_broadcast_slave_mask.super.bitcount, + &perf_pmu_data->lut_prog_broadcast_slave_mask.super); + + nvgpu_log_info(g, " Done"); + + return status; +} + + +u8 nvgpu_pmu_clk_fll_get_fmargin_idx(struct gk20a *g) +{ + struct clk_avfs_fll_objs *pfllobjs = g->pmu->clk_pmu->avfs_fllobjs; + u8 fmargin_idx; + + fmargin_idx = pfllobjs->freq_margin_vfe_idx; + if (fmargin_idx == 255U) { + return 0; + } + return fmargin_idx; +} + +u16 nvgpu_pmu_clk_fll_get_min_max_freq(struct gk20a *g) +{ + if ((g->pmu->clk_pmu != NULL) && + (g->pmu->clk_pmu->avfs_fllobjs != NULL)) { + return (g->pmu->clk_pmu->avfs_fllobjs->max_min_freq_mhz); + } + return 0; +} + +int clk_fll_init_pmupstate(struct gk20a *g) +{ + /* If already allocated, do not re-allocate */ + if (g->pmu->clk_pmu->avfs_fllobjs != NULL) { + return 0; + } + + g->pmu->clk_pmu->avfs_fllobjs = nvgpu_kzalloc(g, + sizeof(*g->pmu->clk_pmu->avfs_fllobjs)); + if (g->pmu->clk_pmu->avfs_fllobjs == NULL) { + return -ENOMEM; + } + + return 0; +} + +void clk_fll_free_pmupstate(struct gk20a *g) +{ + nvgpu_kfree(g, g->pmu->clk_pmu->avfs_fllobjs); + g->pmu->clk_pmu->avfs_fllobjs = NULL; +} diff --git a/drivers/gpu/nvgpu/common/pmu/clk/clk_fll.h b/drivers/gpu/nvgpu/common/pmu/clk/clk_fll.h new file mode 100644 index 000000000..c41105776 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/clk/clk_fll.h @@ -0,0 +1,69 @@ +/* + * general clock structures & definitions + * + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_CLK_FLL_H +#define NVGPU_CLK_FLL_H + +struct gk20a; +struct fll_device; + +struct clk_avfs_fll_objs { + struct boardobjgrp_e32 super; + struct boardobjgrpmask_e32 lut_prog_master_mask; + u32 lut_step_size_uv; + u32 lut_min_voltage_uv; + u8 lut_num_entries; + u16 max_min_freq_mhz; + u8 freq_margin_vfe_idx; +}; + +typedef int fll_lut_broadcast_slave_register(struct gk20a *g, + struct clk_avfs_fll_objs *pfllobjs, + struct fll_device *pfll, + struct fll_device *pfll_slave); + +struct fll_device { + struct pmu_board_obj super; + u8 id; + u8 mdiv; + u16 input_freq_mhz; + u32 clk_domain; + u8 vin_idx_logic; + u8 vin_idx_sram; + u8 rail_idx_for_lut; + struct nv_pmu_clk_lut_device_desc lut_device; + struct nv_pmu_clk_regime_desc regime_desc; + u8 min_freq_vfe_idx; + u8 freq_ctrl_idx; + u8 target_regime_id_override; + bool b_skip_pldiv_below_dvco_min; + bool b_dvco_1x; + struct boardobjgrpmask_e32 lut_prog_broadcast_slave_mask; + fll_lut_broadcast_slave_register *lut_broadcast_slave_register; +}; + +int clk_fll_init_pmupstate(struct gk20a *g); +void clk_fll_free_pmupstate(struct gk20a *g); +int clk_fll_sw_setup(struct gk20a *g); +int clk_fll_pmu_setup(struct gk20a *g); +#endif /* NVGPU_CLK_FLL_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/clk/clk_prog.c b/drivers/gpu/nvgpu/common/pmu/clk/clk_prog.c new file mode 100644 index 000000000..0457894ba --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/clk/clk_prog.c @@ -0,0 +1,1409 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ucode_clk_inf.h" +#include "clk_prog.h" +#include "clk.h" + +static struct clk_prog *construct_clk_prog(struct gk20a *g, void *pargs); +static int devinit_get_clk_prog_table(struct gk20a *g, + struct nvgpu_clk_progs *pprogobjs); +static int vfflatten_prog_1x_master(struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk, + struct clk_prog_1x_master *p1xmaster, + u8 clk_domain_idx, u16 *pfreqmaxlastmhz); +static int vflookup_prog_1x_master(struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk, + struct clk_prog_1x_master *p1xmaster, + u8 *slave_clk_domain, + u16 *pclkmhz, + u32 *pvoltuv, + u8 rail); +static int getfpoints_prog_1x_master(struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk, + struct clk_prog_1x_master *p1xmaster, + u32 *pfpointscount, + u16 **ppfreqpointsinmhz, + u8 rail); +static int getslaveclk_prog_1x_master(struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk, + struct clk_prog_1x_master *p1xmaster, + u8 slave_clk_domain, + u16 *pclkmhz, + u16 masterclkmhz, u8 *ratio); + +static int _clk_progs_pmudatainit(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct nv_pmu_boardobjgrp_super *pboardobjgrppmu) +{ + struct nv_pmu_clk_clk_prog_boardobjgrp_set_header *pset = + (struct nv_pmu_clk_clk_prog_boardobjgrp_set_header *) + (void *)pboardobjgrppmu; + struct nvgpu_clk_progs *pprogs = (struct nvgpu_clk_progs *) + (void *)pboardobjgrp; + int status = 0; + + status = boardobjgrp_pmudatainit_e32(g, pboardobjgrp, pboardobjgrppmu); + if (status != 0) { + nvgpu_err(g, "error updating pmu boardobjgrp for clk prog 0x%x", + status); + goto done; + } + pset->slave_entry_count = pprogs->slave_entry_count; + pset->vf_entry_count = pprogs->vf_entry_count; + pset->vf_sec_entry_count = pprogs->vf_sec_entry_count; + +done: + return status; +} + +static int _clk_progs_pmudata_instget(struct gk20a *g, + struct nv_pmu_boardobjgrp *pmuboardobjgrp, + struct nv_pmu_boardobj **pmu_obj, + u8 idx) +{ + struct nv_pmu_clk_clk_prog_boardobj_grp_set *pgrp_set = + (struct nv_pmu_clk_clk_prog_boardobj_grp_set *) + (void *)pmuboardobjgrp; + + nvgpu_log_info(g, " "); + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (((u32)BIT(idx) & + pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *pmu_obj = (struct nv_pmu_boardobj *) + &pgrp_set->objects[idx].data.obj; + nvgpu_log_info(g, " Done"); + return 0; +} + +int clk_prog_sw_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + struct nvgpu_clk_progs *pclkprogobjs; + + nvgpu_log_info(g, " "); + + status = nvgpu_boardobjgrp_construct_e255(g, + &g->pmu->clk_pmu->clk_progobjs->super); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for clk prog, status- 0x%x", + status); + goto done; + } + + pboardobjgrp = &g->pmu->clk_pmu->clk_progobjs->super.super; + pclkprogobjs = g->pmu->clk_pmu->clk_progobjs; + + BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, CLK, CLK_PROG); + + status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp, + clk, CLK, clk_prog, CLK_PROG); + if (status != 0) { + nvgpu_err(g, + "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x", + status); + goto done; + } + + pboardobjgrp->pmudatainit = _clk_progs_pmudatainit; + pboardobjgrp->pmudatainstget = _clk_progs_pmudata_instget; + + status = devinit_get_clk_prog_table(g, pclkprogobjs); + if (status != 0) { + nvgpu_err(g, "Error parsing the clk prog Vbios tables"); + goto done; + } + + status = clk_domain_clk_prog_link(g, g->pmu->clk_pmu); + if (status != 0) { + nvgpu_err(g, "error constructing VF point board objects"); + goto done; + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +int clk_prog_pmu_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + + nvgpu_log_info(g, " "); + + pboardobjgrp = &g->pmu->clk_pmu->clk_progobjs->super.super; + + if (!pboardobjgrp->bconstructed) { + return -EINVAL; + } + + status = pboardobjgrp->pmuinithandle(g, pboardobjgrp); + + nvgpu_log_info(g, "Done"); + return status; +} + +static int devinit_get_clk_prog_table_35(struct gk20a *g, + struct nvgpu_clk_progs *pclkprogobjs, + u8 *clkprogs_tbl_ptr) +{ + int status = 0; + struct vbios_clock_programming_table_35_header header = { 0 }; + struct vbios_clock_programming_table_1x_entry prog = { 0 }; + struct vbios_clock_programming_table_1x_slave_entry slaveprog = { 0 }; + struct vbios_clock_programming_table_35_vf_entry vfprog = { 0 }; + struct vbios_clock_programming_table_35_vf_sec_entry vfsecprog = { 0 }; + u8 *entry = NULL; + u8 *slaveentry = NULL; + u8 *vfentry = NULL; + u8 *vfsecentry = NULL; + u32 i, j, k = 0; + struct clk_prog *pprog; + u8 prog_type; + u8 src_type; + u32 szfmt = VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_SIZE_0D; + u32 hszfmt = VBIOS_CLOCK_PROGRAMMING_TABLE_35_HEADER_SIZE_0A; + u32 slaveszfmt = VBIOS_CLOCK_PROGRAMMING_TABLE_1X_SLAVE_ENTRY_SIZE_03; + u32 vfszfmt = VBIOS_CLOCK_PROGRAMMING_TABLE_35_VF_ENTRY_SIZE_01; + u32 vfsecszfmt = VBIOS_CLOCK_PROGRAMMING_TABLE_35_VF_SEC_ENTRY_SIZE_02; + struct ctrl_clk_clk_prog_1x_master_vf_entry + vfentries[CTRL_CLK_CLK_PROG_1X_MASTER_VF_ENTRY_MAX_ENTRIES]; + struct ctrl_clk_clk_prog_35_master_sec_vf_entry_voltrail + voltrailsecvfentries[ + CTRL_CLK_CLK_PROG_1X_MASTER_VF_ENTRY_MAX_ENTRIES]; + struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry + ratioslaveentries[CTRL_CLK_PROG_1X_MASTER_MAX_SLAVE_ENTRIES]; + struct ctrl_clk_clk_prog_1x_master_table_slave_entry + tableslaveentries[CTRL_CLK_PROG_1X_MASTER_MAX_SLAVE_ENTRIES]; + struct ctrl_clk_clk_prog_1x_source_pll *source_pll; + union { + struct pmu_board_obj obj; + struct clk_prog clkprog; + struct clk_prog_1x v1x; + struct clk_prog_35_master v35_master; + struct clk_prog_35_master_ratio v35_master_ratio; + struct clk_prog_35_master_table v35_master_table; + } prog_data; + + nvgpu_log_info(g, " "); + + if (clkprogs_tbl_ptr == NULL) { + status = -EINVAL; + goto done; + } + + nvgpu_memcpy((u8 *)&header, clkprogs_tbl_ptr, hszfmt); + if (header.header_size < hszfmt) { + status = -EINVAL; + goto done; + } + hszfmt = header.header_size; + + if (header.entry_size < szfmt) { + status = -EINVAL; + goto done; + } + szfmt = header.entry_size; + + if (header.vf_entry_size < vfszfmt) { + status = -EINVAL; + goto done; + } + vfszfmt = header.vf_entry_size; + + if (header.slave_entry_size < slaveszfmt) { + status = -EINVAL; + goto done; + } + slaveszfmt = header.slave_entry_size; + + if (header.vf_entry_count > CTRL_CLK_CLK_DELTA_MAX_VOLT_RAILS) { + status = -EINVAL; + goto done; + } + + if (header.vf_sec_entry_size < vfsecszfmt) { + status = -EINVAL; + goto done; + } + vfsecszfmt = header.vf_sec_entry_size; + + pclkprogobjs->slave_entry_count = header.slave_entry_count; + pclkprogobjs->vf_entry_count = header.vf_entry_count; + /* VFE Secondary entry is not supported for auto profile */ + pclkprogobjs->vf_sec_entry_count = 0U; + + for (i = 0; i < header.entry_count; i++) { + (void) memset(&prog_data, 0x0, (u32)sizeof(prog_data)); + + /* Read table entries*/ + entry = clkprogs_tbl_ptr + hszfmt + + (i * (szfmt + (header.slave_entry_count * slaveszfmt) + + (header.vf_entry_count * vfszfmt) + + (header.vf_sec_entry_count * vfsecszfmt))); + + nvgpu_memcpy((u8 *)&prog, entry, szfmt); + (void) memset(vfentries, 0xFF, + sizeof(struct ctrl_clk_clk_prog_1x_master_vf_entry) * + CTRL_CLK_CLK_PROG_1X_MASTER_VF_ENTRY_MAX_ENTRIES); + (void) memset(voltrailsecvfentries, 0xFF, + sizeof(struct ctrl_clk_clk_prog_35_master_sec_vf_entry_voltrail) * + CTRL_CLK_CLK_PROG_1X_MASTER_VF_ENTRY_MAX_ENTRIES); + (void) memset(ratioslaveentries, 0xFF, + sizeof(struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry) * + CTRL_CLK_PROG_1X_MASTER_MAX_SLAVE_ENTRIES); + (void) memset(tableslaveentries, 0xFF, + sizeof(struct ctrl_clk_clk_prog_1x_master_table_slave_entry) * + CTRL_CLK_PROG_1X_MASTER_MAX_SLAVE_ENTRIES); + + prog_type = BIOS_GET_FIELD(u8, prog.flags0, + NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE); + nvgpu_log_info(g, "Prog_type (master, slave type): 0x%x", + prog_type); + if (prog_type == NV_VBIOS_CLOCK_PROGRAMMING_TABLE_35_ENTRY_FLAGS0_TYPE_DISABLED) { + nvgpu_log_info(g, "Skipped Entry"); + continue; + } + + src_type = BIOS_GET_FIELD(u8, prog.flags0, + NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_SOURCE); + nvgpu_log_info(g, "source type: 0x%x", src_type); + switch (src_type) { + case NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_SOURCE_PLL: + nvgpu_log_info(g, "Source type is PLL"); + prog_data.v1x.source = CTRL_CLK_PROG_1X_SOURCE_PLL; + source_pll = &prog_data.v1x.source_data.source_pll; + source_pll->pll_idx = + BIOS_GET_FIELD(u8, prog.param0, + NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_PARAM0_PLL_PLL_INDEX); + source_pll->freq_step_size_mhz = + BIOS_GET_FIELD(u8, prog.param1, + NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_PARAM1_PLL_FREQ_STEP_SIZE); + nvgpu_log_info(g, "pll_index: 0x%x freq_step_size: %d", + source_pll->pll_idx, + source_pll->freq_step_size_mhz); + break; + + case NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_SOURCE_ONE_SOURCE: + nvgpu_log_info(g, "Source type is ONE_SOURCE"); + prog_data.v1x.source = CTRL_CLK_PROG_1X_SOURCE_ONE_SOURCE; + break; + + case NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_SOURCE_FLL: + nvgpu_log_info(g, "Source type is FLL"); + prog_data.v1x.source = CTRL_CLK_PROG_1X_SOURCE_FLL; + break; + + default: + nvgpu_err(g, "invalid source %d", prog_type); + status = -EINVAL; + break; + } + + if (status != 0) { + goto done; + } + prog_data.v1x.freq_max_mhz = (u16)prog.freq_max_mhz; + nvgpu_log_info(g, "Max freq: %d", prog_data.v1x.freq_max_mhz); + + slaveentry = entry + szfmt; + vfentry = entry + szfmt + header.slave_entry_count * slaveszfmt; + vfsecentry = entry + szfmt + + header.slave_entry_count * slaveszfmt + + header.vf_entry_count * vfszfmt; + + switch (prog_type) { + case NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE_MASTER_RATIO: + case NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE_MASTER_TABLE: + prog_data.v35_master.master.b_o_c_o_v_enabled = false; + for (j = 0; j < header.vf_entry_count; j++) { + nvgpu_memcpy((u8 *)&vfprog, vfentry, vfszfmt); + + vfentries[j].vfe_idx = (u8)vfprog.vfe_idx; + vfentries[j].gain_vfe_idx = CTRL_BOARDOBJ_IDX_INVALID; + vfentry += vfszfmt; + + for (k = 0; k < header.vf_sec_entry_count; k++) { + nvgpu_memcpy((u8 *)&vfsecprog, + vfsecentry, vfsecszfmt); + + voltrailsecvfentries[j].sec_vf_entries[k].vfe_idx = (u8)vfsecprog.sec_vfe_idx; + if (prog_data.v1x.source == CTRL_CLK_PROG_1X_SOURCE_FLL) { + voltrailsecvfentries[j].sec_vf_entries[k].dvco_offset_vfe_idx = + BIOS_GET_FIELD(u8, + vfsecprog.param0, + NV_VBIOS_CLOCK_PROGRAMMING_TABLE_35_SEC_VF_ENTRY_PARAM0_FLL_DVCO_OFFSET_VFE_IDX); + } else { + voltrailsecvfentries[j].sec_vf_entries[k].dvco_offset_vfe_idx = CTRL_BOARDOBJ_IDX_INVALID; + } + vfsecentry += vfsecszfmt; + nvgpu_log_info(g, "Sec_VF_entry %d: vfe_idx: 0x%x " + "dcvo_offset_vfe_idx: 0x%x", j, + voltrailsecvfentries[j].sec_vf_entries[k].vfe_idx, + voltrailsecvfentries[j].sec_vf_entries[k].dvco_offset_vfe_idx); + } + } + prog_data.v35_master.master.p_vf_entries = vfentries; + prog_data.v35_master.p_voltrail_sec_vf_entries = voltrailsecvfentries; + + for (j = 0; j < header.slave_entry_count; j++) { + nvgpu_memcpy((u8 *)&slaveprog, slaveentry, + slaveszfmt); + if (prog_type == NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE_MASTER_RATIO) { + ratioslaveentries[j].clk_dom_idx = + (u8)slaveprog.clk_dom_idx; + ratioslaveentries[j].ratio = + BIOS_GET_FIELD(u8, + slaveprog.param0, + NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_SLAVE_ENTRY_PARAM0_MASTER_RATIO_RATIO); + } else { + tableslaveentries[j].clk_dom_idx = + (u8)slaveprog.clk_dom_idx; + tableslaveentries[j].freq_mhz = + BIOS_GET_FIELD(u16, + slaveprog.param0, + NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_SLAVE_ENTRY_PARAM0_MASTER_TABLE_FREQ); + } + slaveentry += slaveszfmt; + } + + if (prog_type == NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE_MASTER_RATIO) { + prog_data.obj.type = CTRL_CLK_CLK_PROG_TYPE_35_MASTER_RATIO; + prog_data.v35_master_ratio.ratio.p_slave_entries = + ratioslaveentries; + } else { + prog_data.obj.type = CTRL_CLK_CLK_PROG_TYPE_35_MASTER_TABLE; + + prog_data.v35_master_table.table.p_slave_entries = + tableslaveentries; + } + break; + + case NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE_SLAVE: + prog_data.obj.type = CTRL_CLK_CLK_PROG_TYPE_35; + break; + + default: + nvgpu_err(g, "Wrong Prog entry type %d", prog_type); + status = -EINVAL; + break; + } + + if (status != 0) { + goto done; + } + pprog = construct_clk_prog(g, (void *)&prog_data); + if (pprog == NULL) { + nvgpu_err(g, + "error constructing clk_prog boardobj %d", i); + status = -EINVAL; + goto done; + } + + status = boardobjgrp_objinsert(&pclkprogobjs->super.super, + (struct pmu_board_obj *)(void *)pprog, i); + if (status != 0) { + nvgpu_err(g, "error adding clk_prog boardobj %d", i); + status = -EINVAL; + goto done; + } + } +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +static int devinit_get_clk_prog_table(struct gk20a *g, + struct nvgpu_clk_progs *pprogobjs) +{ + int status = 0; + u8 *clkprogs_tbl_ptr = NULL; + struct vbios_clock_programming_table_1x_header header = { 0 }; + nvgpu_log_info(g, " "); + + clkprogs_tbl_ptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_CLOCK_TOKEN), + CLOCK_PROGRAMMING_TABLE); + if (clkprogs_tbl_ptr == NULL) { + return -EINVAL; + } + nvgpu_memcpy((u8 *)&header, clkprogs_tbl_ptr, + VBIOS_CLOCK_PROGRAMMING_TABLE_1X_HEADER_SIZE_08); + + if (header.version == + VBIOS_CLOCK_PROGRAMMING_TABLE_35_HEADER_VERSION) { + status = devinit_get_clk_prog_table_35(g, pprogobjs, + clkprogs_tbl_ptr); + } else { + nvgpu_err(g, "Invalid Clock Prog Table Header version\n"); + status = -EINVAL; + } + + return status; +} + +static int clk_prog_pmudatainit_super(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + + nvgpu_log_info(g, " "); + + status = pmu_board_obj_pmu_data_init_super(g, obj, pmu_obj); + return status; +} + +static int clk_prog_pmudatainit_1x(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct clk_prog_1x *pclk_prog_1x; + struct nv_pmu_clk_clk_prog_1x_boardobj_set *pset; + + nvgpu_log_info(g, " "); + + status = clk_prog_pmudatainit_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pclk_prog_1x = (struct clk_prog_1x *)(void *)obj; + + pset = (struct nv_pmu_clk_clk_prog_1x_boardobj_set *)(void *) + pmu_obj; + + pset->source = pclk_prog_1x->source; + pset->freq_max_mhz = pclk_prog_1x->freq_max_mhz; + pset->source_data = pclk_prog_1x->source_data; + + return status; +} + +static int clk_prog_pmudatainit_1x_master(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct clk_prog_1x_master *pclk_prog_1x_master; + struct nv_pmu_clk_clk_prog_1x_master_boardobj_set *pset; + size_t vfsize = sizeof(struct ctrl_clk_clk_prog_1x_master_vf_entry) * + g->pmu->clk_pmu->clk_progobjs->vf_entry_count; + + nvgpu_log_info(g, " "); + + status = clk_prog_pmudatainit_1x(g, obj, pmu_obj); + + pclk_prog_1x_master = + (struct clk_prog_1x_master *)(void *)obj; + + pset = (struct nv_pmu_clk_clk_prog_1x_master_boardobj_set *)(void *) + pmu_obj; + + nvgpu_memcpy((u8 *)pset->vf_entries, + (u8 *)pclk_prog_1x_master->p_vf_entries, vfsize); + + pset->b_o_c_o_v_enabled = pclk_prog_1x_master->b_o_c_o_v_enabled; + pset->source_data = pclk_prog_1x_master->source_data; + + nvgpu_memcpy((u8 *)&pset->deltas, (u8 *)&pclk_prog_1x_master->deltas, + (u32) sizeof(struct ctrl_clk_clk_delta)); + + return status; +} + +static int clk_prog_pmudatainit_35_master(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct clk_prog_35_master *pclk_prog_35_master; + struct nv_pmu_clk_clk_prog_35_master_boardobj_set *pset; + size_t voltrail_sec_vfsize = + sizeof(struct ctrl_clk_clk_prog_35_master_sec_vf_entry_voltrail) + * g->pmu->clk_pmu->clk_progobjs->vf_sec_entry_count; + + nvgpu_log_info(g, " "); + + status = clk_prog_pmudatainit_1x_master(g, obj, pmu_obj); + + pclk_prog_35_master = + (struct clk_prog_35_master *)(void *)obj; + + pset = (struct nv_pmu_clk_clk_prog_35_master_boardobj_set *)(void *) + pmu_obj; + + nvgpu_memcpy((u8 *)pset->voltrail_sec_vf_entries, + (u8 *)pclk_prog_35_master->p_voltrail_sec_vf_entries, + voltrail_sec_vfsize); + + return status; +} + +static int clk_prog_pmudatainit_35_master_ratio(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct clk_prog_35_master_ratio *pclk_prog_35_master_ratio; + struct nv_pmu_clk_clk_prog_35_master_ratio_boardobj_set *pset; + size_t slavesize = sizeof(struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry) * + g->pmu->clk_pmu->clk_progobjs->slave_entry_count; + + nvgpu_log_info(g, " "); + + status = clk_prog_pmudatainit_35_master(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pclk_prog_35_master_ratio = + (struct clk_prog_35_master_ratio *)(void *)obj; + + pset = (struct nv_pmu_clk_clk_prog_35_master_ratio_boardobj_set *) + (void *)pmu_obj; + + nvgpu_memcpy((u8 *)pset->ratio.slave_entries, + (u8 *)pclk_prog_35_master_ratio->ratio.p_slave_entries, + slavesize); + + return status; +} + +static int clk_prog_pmudatainit_35_master_table(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct clk_prog_35_master_table *pclk_prog_35_master_table; + struct nv_pmu_clk_clk_prog_35_master_table_boardobj_set *pset; + size_t slavesize = sizeof( + struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry) * + g->pmu->clk_pmu->clk_progobjs->slave_entry_count; + + nvgpu_log_info(g, " "); + + status = clk_prog_pmudatainit_35_master(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pclk_prog_35_master_table = + (struct clk_prog_35_master_table *)(void *)obj; + + pset = (struct nv_pmu_clk_clk_prog_35_master_table_boardobj_set *) + (void *)pmu_obj; + nvgpu_memcpy((u8 *)pset->table.slave_entries, + (u8 *)pclk_prog_35_master_table->table.p_slave_entries, + slavesize); + + return status; +} + +static int _clk_prog_1x_master_rail_construct_vf_point(struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk, + struct clk_prog_1x_master *p1xmaster, + struct ctrl_clk_clk_prog_1x_master_vf_entry *p_vf_rail, + struct clk_vf_point *p_vf_point_tmp, u8 *p_vf_point_idx) +{ + struct clk_vf_point *p_vf_point; + int status; + + nvgpu_log_info(g, " "); + + p_vf_point = nvgpu_construct_clk_vf_point(g, (void *)p_vf_point_tmp); + if (p_vf_point == NULL) { + status = -ENOMEM; + goto done; + } + status = pclk->clk_vf_pointobjs->super.super.objinsert( + &pclk->clk_vf_pointobjs->super.super, + &p_vf_point->super, + *p_vf_point_idx); + if (status != 0) { + goto done; + } + + p_vf_rail->vf_point_idx_last = (*p_vf_point_idx)++; + +done: + nvgpu_log_info(g, "done status %x", status); + return status; +} + +static int clk_prog_construct_super(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pargs) +{ + struct clk_prog *pclkprog; + int status = 0; + + pclkprog = nvgpu_kzalloc(g, size); + if (pclkprog == NULL) { + return -ENOMEM; + } + + status = pmu_board_obj_construct_super(g, + (struct pmu_board_obj *)(void *)pclkprog, pargs); + if (status != 0) { + return -EINVAL; + } + + *obj = (struct pmu_board_obj *)(void *)pclkprog; + + pclkprog->super.pmudatainit = + clk_prog_pmudatainit_super; + return status; +} + + +static int clk_prog_construct_1x(struct gk20a *g, struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct clk_prog_1x *pclkprog; + struct clk_prog_1x *ptmpprog = + (struct clk_prog_1x *)pargs; + int status = 0; + + nvgpu_log_info(g, " "); + obj_tmp->type_mask |= (u32)BIT(CTRL_CLK_CLK_PROG_TYPE_1X); + status = clk_prog_construct_super(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pclkprog = (struct clk_prog_1x *)(void *)*obj; + + pclkprog->super.super.pmudatainit = + clk_prog_pmudatainit_1x; + + pclkprog->source = ptmpprog->source; + pclkprog->freq_max_mhz = ptmpprog->freq_max_mhz; + pclkprog->source_data = ptmpprog->source_data; + + return status; +} + +static int clk_prog_construct_35(struct gk20a *g, struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct clk_prog_1x *pclkprog; + struct clk_prog_1x *ptmpprog = + (struct clk_prog_1x *)pargs; + int status = 0; + + nvgpu_log_info(g, " "); + obj_tmp->type_mask |= (u32)BIT(CTRL_CLK_CLK_PROG_TYPE_35); + status = clk_prog_construct_super(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pclkprog = (struct clk_prog_1x *)(void *)*obj; + + pclkprog->super.super.pmudatainit = + clk_prog_pmudatainit_1x; + + pclkprog->source = ptmpprog->source; + pclkprog->freq_max_mhz = ptmpprog->freq_max_mhz; + pclkprog->source_data = ptmpprog->source_data; + + return status; +} + +static int clk_prog_construct_1x_master(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct clk_prog_1x_master *pclkprog; + struct clk_prog_1x_master *ptmpprog = + (struct clk_prog_1x_master *)pargs; + int status = 0; + size_t vfsize = sizeof(struct ctrl_clk_clk_prog_1x_master_vf_entry) * + g->pmu->clk_pmu->clk_progobjs->vf_entry_count; + u8 railidx; + + nvgpu_log_info(g, " type - %x", pmu_board_obj_get_type(pargs)); + + obj_tmp->type_mask |= (u32)BIT(CTRL_CLK_CLK_PROG_TYPE_1X_MASTER); + status = clk_prog_construct_1x(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pclkprog = (struct clk_prog_1x_master *)(void *)*obj; + + pclkprog->super.super.super.pmudatainit = + clk_prog_pmudatainit_1x_master; + + pclkprog->vfflatten = + vfflatten_prog_1x_master; + + pclkprog->vflookup = + vflookup_prog_1x_master; + + pclkprog->getfpoints = + getfpoints_prog_1x_master; + + pclkprog->getslaveclk = + getslaveclk_prog_1x_master; + + pclkprog->p_vf_entries = (struct ctrl_clk_clk_prog_1x_master_vf_entry *) + nvgpu_kzalloc(g, vfsize); + + nvgpu_memcpy((u8 *)pclkprog->p_vf_entries, + (u8 *)ptmpprog->p_vf_entries, vfsize); + + pclkprog->b_o_c_o_v_enabled = ptmpprog->b_o_c_o_v_enabled; + + for (railidx = 0; + railidx < g->pmu->clk_pmu->clk_progobjs->vf_entry_count; + railidx++) { + pclkprog->p_vf_entries[railidx].vf_point_idx_first = + CTRL_CLK_CLK_VF_POINT_IDX_INVALID; + pclkprog->p_vf_entries[railidx].vf_point_idx_last = + CTRL_CLK_CLK_VF_POINT_IDX_INVALID; + } + + return status; +} + +static int clk_prog_construct_35_master(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct clk_prog_35_master *pclkprog; + struct clk_prog_35_master *ptmpprog = + (struct clk_prog_35_master *)pargs; + int status = 0; + size_t voltrail_sec_vfsize = + sizeof(struct ctrl_clk_clk_prog_35_master_sec_vf_entry_voltrail) + * CTRL_CLK_CLK_PROG_1X_MASTER_VF_ENTRY_MAX_ENTRIES; + + nvgpu_log_info(g, " type - %x", pmu_board_obj_get_type(pargs)); + + obj_tmp->type_mask |= (u32)BIT(CTRL_CLK_CLK_PROG_TYPE_35_MASTER); + status = clk_prog_construct_1x_master(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pclkprog = (struct clk_prog_35_master *)(void *)*obj; + + pclkprog->super.super.super.pmudatainit = + clk_prog_pmudatainit_35_master; + + pclkprog->p_voltrail_sec_vf_entries = + (struct ctrl_clk_clk_prog_35_master_sec_vf_entry_voltrail *) + nvgpu_kzalloc(g, voltrail_sec_vfsize); + if (pclkprog->p_voltrail_sec_vf_entries == NULL) { + return -ENOMEM; + } + + (void) memset(pclkprog->p_voltrail_sec_vf_entries, + CTRL_CLK_CLK_DOMAIN_INDEX_INVALID, voltrail_sec_vfsize); + + nvgpu_memcpy((u8 *)pclkprog->p_voltrail_sec_vf_entries, + (u8 *)ptmpprog->p_voltrail_sec_vf_entries, voltrail_sec_vfsize); + + return status; +} + +static int clk_prog_construct_35_master_ratio(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct clk_prog_35_master_ratio *pclkprog; + struct clk_prog_35_master_ratio *ptmpprog = + (struct clk_prog_35_master_ratio *)pargs; + int status = 0; + size_t slavesize = sizeof( + struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry) * + g->pmu->clk_pmu->clk_progobjs->slave_entry_count; + + if (pmu_board_obj_get_type(pargs) != CTRL_CLK_CLK_PROG_TYPE_35_MASTER_RATIO) { + return -EINVAL; + } + + obj_tmp->type_mask |= (u32)BIT(CTRL_CLK_CLK_PROG_TYPE_35_MASTER_RATIO); + status = clk_prog_construct_35_master(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pclkprog = (struct clk_prog_35_master_ratio *)(void *)*obj; + + pclkprog->super.super.super.super.pmudatainit = + clk_prog_pmudatainit_35_master_ratio; + + pclkprog->ratio.p_slave_entries = + (struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry *) + nvgpu_kzalloc(g, slavesize); + if (pclkprog->ratio.p_slave_entries == NULL) { + return -ENOMEM; + } + + (void) memset(pclkprog->ratio.p_slave_entries, + CTRL_CLK_CLK_DOMAIN_INDEX_INVALID, slavesize); + + nvgpu_memcpy((u8 *)pclkprog->ratio.p_slave_entries, + (u8 *)ptmpprog->ratio.p_slave_entries, slavesize); + + return status; +} + +static int clk_prog_construct_35_master_table(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct clk_prog_35_master_table *pclkprog; + struct clk_prog_35_master_table *ptmpprog = + (struct clk_prog_35_master_table *)pargs; + int status = 0; + size_t slavesize = + sizeof(struct ctrl_clk_clk_prog_1x_master_table_slave_entry) * + g->pmu->clk_pmu->clk_progobjs->slave_entry_count; + + nvgpu_log_info(g, "type - %x", pmu_board_obj_get_type(pargs)); + + if (pmu_board_obj_get_type(pargs) != CTRL_CLK_CLK_PROG_TYPE_35_MASTER_TABLE) { + return -EINVAL; + } + + obj_tmp->type_mask |= (u32)BIT(CTRL_CLK_CLK_PROG_TYPE_35_MASTER_TABLE); + status = clk_prog_construct_35_master(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pclkprog = (struct clk_prog_35_master_table *)(void *)*obj; + + pclkprog->super.super.super.super.pmudatainit = + clk_prog_pmudatainit_35_master_table; + + pclkprog->table.p_slave_entries = + (struct ctrl_clk_clk_prog_1x_master_table_slave_entry *) + nvgpu_kzalloc(g, slavesize); + + if (pclkprog->table.p_slave_entries == NULL) { + status = -ENOMEM; + goto exit; + } + + (void) memset(pclkprog->table.p_slave_entries, + CTRL_CLK_CLK_DOMAIN_INDEX_INVALID, slavesize); + + nvgpu_memcpy((u8 *)pclkprog->table.p_slave_entries, + (u8 *)ptmpprog->table.p_slave_entries, slavesize); + +exit: + if (status != 0) { + status = (*obj)->destruct(*obj); + } + + return status; +} + +static struct clk_vf_point *get_vf_point_by_idx( + struct nvgpu_clk_pmupstate *pclk, u32 idx) +{ + return (struct clk_vf_point *)BOARDOBJGRP_OBJ_GET_BY_IDX( + &pclk->clk_vf_pointobjs->super.super, (u8)(idx)); +} + +static struct clk_prog *construct_clk_prog(struct gk20a *g, void *pargs) +{ + struct pmu_board_obj *obj = NULL; + int status; + + nvgpu_log_info(g, " type - %x", pmu_board_obj_get_type(pargs)); + switch (pmu_board_obj_get_type(pargs)) { + case CTRL_CLK_CLK_PROG_TYPE_35: + status = clk_prog_construct_35(g, &obj, + sizeof(struct clk_prog_1x), pargs); + break; + + case CTRL_CLK_CLK_PROG_TYPE_35_MASTER_TABLE: + status = clk_prog_construct_35_master_table(g, &obj, + sizeof(struct clk_prog_35_master_table), pargs); + break; + + case CTRL_CLK_CLK_PROG_TYPE_35_MASTER_RATIO: + status = clk_prog_construct_35_master_ratio(g, &obj, + sizeof(struct clk_prog_35_master_ratio), pargs); + break; + default: + nvgpu_err(g, "Unsupported Clk_prog type in Vbios table"); + status = -EINVAL; + break; + } + + if (status != 0) { + if (obj != NULL) { + status = obj->destruct(obj); + if (status != 0) { + nvgpu_err(g, "destruct failed err=%d", status); + } + } + return NULL; + } + + nvgpu_log_info(g, " Done"); + + return (struct clk_prog *)(void *)obj; +} + +static int vfflatten_prog_1x_master(struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk, + struct clk_prog_1x_master *p1xmaster, + u8 clk_domain_idx, u16 *pfreqmaxlastmhz) +{ + struct ctrl_clk_clk_prog_1x_master_vf_entry *p_vf_rail; + struct ctrl_clk_clk_prog_1x_source_pll *source_pll; + union { + struct pmu_board_obj obj; + struct clk_vf_point vf_point; + struct clk_vf_point_freq freq; + struct clk_vf_point_volt volt; + } vf_point_data; + int status = 0; + u8 step_count; + u8 freq_step_size_mhz = 0; + u8 vf_point_idx; + u8 vf_rail_idx; + + nvgpu_log_info(g, " "); + (void) memset(&vf_point_data, 0x0, sizeof(vf_point_data)); + + vf_point_idx = BOARDOBJGRP_NEXT_EMPTY_IDX( + &pclk->clk_vf_pointobjs->super.super); + + for (vf_rail_idx = 0; + vf_rail_idx < pclk->clk_progobjs->vf_entry_count; + vf_rail_idx++) { + u32 voltage_min_uv; + u32 voltage_step_size_uv; + u8 i; + + p_vf_rail = &p1xmaster->p_vf_entries[vf_rail_idx]; + if (p_vf_rail->vfe_idx == CTRL_BOARDOBJ_IDX_INVALID) { + continue; + } + + p_vf_rail->vf_point_idx_first = vf_point_idx; + + vf_point_data.vf_point.vfe_equ_idx = p_vf_rail->vfe_idx; + vf_point_data.vf_point.volt_rail_idx = vf_rail_idx; + + step_count = 0; + + switch (p1xmaster->super.source) { + case CTRL_CLK_PROG_1X_SOURCE_PLL: + source_pll = &p1xmaster->super.source_data.source_pll; + freq_step_size_mhz = source_pll->freq_step_size_mhz; + step_count = (freq_step_size_mhz == 0U) ? 0U : + (u8)(p1xmaster->super.freq_max_mhz - + *pfreqmaxlastmhz - 1U) / + freq_step_size_mhz; + /* Intentional fall-through.*/ + + case CTRL_CLK_PROG_1X_SOURCE_ONE_SOURCE: + vf_point_data.obj.type = + CTRL_CLK_CLK_VF_POINT_TYPE_35_FREQ; + do { + vf_point_data.vf_point.pair.freq_mhz = + p1xmaster->super.freq_max_mhz - + U16(step_count) * + U16(freq_step_size_mhz); + + status = _clk_prog_1x_master_rail_construct_vf_point(g, pclk, + p1xmaster, p_vf_rail, + &vf_point_data.vf_point, &vf_point_idx); + if (status != 0) { + goto done; + } + } while (step_count-- > 0U); + break; + + case CTRL_CLK_PROG_1X_SOURCE_FLL: + voltage_min_uv = + nvgpu_pmu_clk_fll_get_lut_min_volt(pclk); + voltage_step_size_uv = + nvgpu_pmu_clk_fll_get_lut_step_size(pclk); + step_count = + clk_get_fll_lut_vf_num_entries(pclk); + + /* FLL sources use a voltage-based VF_POINT.*/ + vf_point_data.obj.type = + CTRL_CLK_CLK_VF_POINT_TYPE_35_VOLT_PRI; + for (i = 0; i < step_count; i++) { + vf_point_data.volt.source_voltage_uv = + voltage_min_uv + i * voltage_step_size_uv; + + status = _clk_prog_1x_master_rail_construct_vf_point(g, pclk, + p1xmaster, p_vf_rail, + &vf_point_data.vf_point, &vf_point_idx); + if (status != 0) { + goto done; + } + } + break; + default: + break; + } + } + + *pfreqmaxlastmhz = p1xmaster->super.freq_max_mhz; + +done: + nvgpu_log_info(g, "done status %x", status); + return status; +} + +static int vflookup_prog_1x_master(struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk, + struct clk_prog_1x_master *p1xmaster, u8 *slave_clk_domain, + u16 *pclkmhz, u32 *pvoltuv, u8 rail) +{ + u32 j; + struct ctrl_clk_clk_prog_1x_master_vf_entry + *pvfentry; + struct clk_vf_point *pvfpoint; + struct nvgpu_clk_progs *pclkprogobjs; + struct clk_prog_1x_master_ratio *p1xmasterratio; + u16 clkmhz; + u32 voltuv; + u8 slaveentrycount; + u32 i; + struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry *pslaveents; + + if ((*pclkmhz != 0U) && (*pvoltuv != 0U)) { + return -EINVAL; + } + + pclkprogobjs = pclk->clk_progobjs; + + slaveentrycount = pclkprogobjs->slave_entry_count; + + if (pclkprogobjs->vf_entry_count > + CTRL_CLK_CLK_PROG_1X_MASTER_VF_ENTRY_MAX_ENTRIES) { + return -EINVAL; + } + + if (rail >= pclkprogobjs->vf_entry_count) { + return -EINVAL; + } + + pvfentry = p1xmaster->p_vf_entries; + + pvfentry = (struct ctrl_clk_clk_prog_1x_master_vf_entry *)(void *)( + (u8 *)pvfentry + + (sizeof(struct ctrl_clk_clk_prog_1x_master_vf_entry) * + rail)); + + clkmhz = *pclkmhz; + voltuv = *pvoltuv; + + /*if domain is slave domain and freq is input + then derive master clk */ + if ((slave_clk_domain != NULL) && (*pclkmhz != 0U)) { + if (p1xmaster->super.super.super.implements(g, + &p1xmaster->super.super.super, + CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_RATIO)) { + + p1xmasterratio = + (struct clk_prog_1x_master_ratio *)(void *)p1xmaster; + pslaveents = p1xmasterratio->p_slave_entries; + for (i = 0; i < slaveentrycount; i++) { + if (pslaveents->clk_dom_idx == + *slave_clk_domain) { + break; + } + pslaveents++; + } + if (i == slaveentrycount) { + return -EINVAL; + } + clkmhz = (clkmhz * 100U)/pslaveents->ratio; + } else { + /* only support ratio for now */ + return -EINVAL; + } + } + + /* if both volt and clks are zero simply print*/ + if ((*pvoltuv == 0U) && (*pclkmhz == 0U)) { + for (j = pvfentry->vf_point_idx_first; + j <= pvfentry->vf_point_idx_last; j++) { + pvfpoint = get_vf_point_by_idx(pclk, j); + nvgpu_err(g, "v %x c %x", + pvfpoint->pair.voltage_uv, + pvfpoint->pair.freq_mhz); + } + return -EINVAL; + } + /* start looking up f for v for v for f */ + /* looking for volt? */ + if (*pvoltuv == 0U) { + pvfpoint = get_vf_point_by_idx(pclk, + pvfentry->vf_point_idx_last); + /* above range? */ + if (clkmhz > pvfpoint->pair.freq_mhz) { + return -EINVAL; + } + + for (j = pvfentry->vf_point_idx_last; + j >= pvfentry->vf_point_idx_first; j--) { + pvfpoint = get_vf_point_by_idx(pclk, j); + if (clkmhz <= pvfpoint->pair.freq_mhz) { + voltuv = pvfpoint->pair.voltage_uv; + } else { + break; + } + } + } else { /* looking for clk? */ + + pvfpoint = get_vf_point_by_idx(pclk, + pvfentry->vf_point_idx_first); + /* below range? */ + if (voltuv < pvfpoint->pair.voltage_uv) { + return -EINVAL; + } + + for (j = pvfentry->vf_point_idx_first; + j <= pvfentry->vf_point_idx_last; j++) { + pvfpoint = get_vf_point_by_idx(pclk, j); + if (voltuv >= pvfpoint->pair.voltage_uv) { + clkmhz = pvfpoint->pair.freq_mhz; + } else { + break; + } + } + } + + /*if domain is slave domain and freq was looked up + then derive slave clk */ + if ((slave_clk_domain != NULL) && (*pclkmhz == 0U)) { + if (p1xmaster->super.super.super.implements(g, + &p1xmaster->super.super.super, + CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_RATIO)) { + + p1xmasterratio = + (struct clk_prog_1x_master_ratio *)(void *)p1xmaster; + pslaveents = p1xmasterratio->p_slave_entries; + for (i = 0; i < slaveentrycount; i++) { + if (pslaveents->clk_dom_idx == + *slave_clk_domain) { + break; + } + pslaveents++; + } + if (i == slaveentrycount) { + return -EINVAL; + } + clkmhz = (clkmhz * pslaveents->ratio)/100U; + } else { + /* only support ratio for now */ + return -EINVAL; + } + } + *pclkmhz = clkmhz; + *pvoltuv = voltuv; + if ((clkmhz == 0U) || (voltuv == 0U)) { + return -EINVAL; + } + return 0; +} + +static int getfpoints_prog_1x_master(struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk, + struct clk_prog_1x_master *p1xmaster, + u32 *pfpointscount, u16 **ppfreqpointsinmhz, u8 rail) +{ + + struct ctrl_clk_clk_prog_1x_master_vf_entry + *pvfentry; + struct clk_vf_point *pvfpoint; + struct nvgpu_clk_progs *pclkprogobjs; + u8 j; + u32 fpointscount = 0; + + if (pfpointscount == NULL) { + return -EINVAL; + } + + pclkprogobjs = pclk->clk_progobjs; + + if (pclkprogobjs->vf_entry_count > + CTRL_CLK_CLK_PROG_1X_MASTER_VF_ENTRY_MAX_ENTRIES) { + return -EINVAL; + } + + if (rail >= pclkprogobjs->vf_entry_count) { + return -EINVAL; + } + + pvfentry = p1xmaster->p_vf_entries; + + pvfentry = (struct ctrl_clk_clk_prog_1x_master_vf_entry *)(void *)( + (u8 *)pvfentry + + ((u8)sizeof(struct ctrl_clk_clk_prog_1x_master_vf_entry) * + rail)); + + fpointscount = (u32)pvfentry->vf_point_idx_last - + (u32)pvfentry->vf_point_idx_first + 1U; + + /* if pointer for freq data is NULL simply return count */ + if (*ppfreqpointsinmhz == NULL) { + goto done; + } + + if (fpointscount > *pfpointscount) { + return -ENOMEM; + } + for (j = pvfentry->vf_point_idx_first; + j <= pvfentry->vf_point_idx_last; j++) { + pvfpoint = get_vf_point_by_idx(pclk, j); + **ppfreqpointsinmhz = pvfpoint->pair.freq_mhz; + (*ppfreqpointsinmhz)++; + } +done: + *pfpointscount = fpointscount; + return 0; +} + +static int getslaveclk_prog_1x_master(struct gk20a *g, + struct nvgpu_clk_pmupstate *pclk, + struct clk_prog_1x_master *p1xmaster, + u8 slave_clk_domain, u16 *pclkmhz, u16 masterclkmhz, u8 *ratio +) +{ + struct nvgpu_clk_progs *pclkprogobjs; + struct clk_prog_1x_master_ratio *p1xmasterratio; + struct clk_prog_35_master_ratio *p35masterratio; + u8 slaveentrycount; + u8 i; + struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry *pslaveents; + u32 ver = g->params.gpu_arch + g->params.gpu_impl; + if (pclkmhz == NULL) { + return -EINVAL; + } + + if (masterclkmhz == 0U) { + return -EINVAL; + } + + *pclkmhz = 0; + pclkprogobjs = pclk->clk_progobjs; + + slaveentrycount = pclkprogobjs->slave_entry_count; + if(ver == NVGPU_GPUID_GV100) { + if (p1xmaster->super.super.super.implements(g, + &p1xmaster->super.super.super, + CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_RATIO)) { + p1xmasterratio = + (struct clk_prog_1x_master_ratio *)(void *)p1xmaster; + pslaveents = p1xmasterratio->p_slave_entries; + for (i = 0; i < slaveentrycount; i++) { + if (pslaveents->clk_dom_idx == + slave_clk_domain) { + break; + } + pslaveents++; + } + if (i == slaveentrycount) { + return -EINVAL; + } + *pclkmhz = (masterclkmhz * pslaveents->ratio)/100U; + } else { + /* only support ratio for now */ + return -EINVAL; + } + } else { + if (p1xmaster->super.super.super.implements(g, + &p1xmaster->super.super.super, + CTRL_CLK_CLK_PROG_TYPE_35_MASTER_RATIO)) { + p35masterratio = + (struct clk_prog_35_master_ratio *)(void *)p1xmaster; + pslaveents = p35masterratio->ratio.p_slave_entries; + for (i = 0; i < slaveentrycount; i++) { + if (pslaveents->clk_dom_idx == + slave_clk_domain) { + break; + } + pslaveents++; + } + if (i == slaveentrycount) { + return -EINVAL; + } + *pclkmhz = (masterclkmhz * pslaveents->ratio)/100U; + /* Floor/Quantize all the slave clocks to the multiple of step size*/ + *pclkmhz = (*pclkmhz / FREQ_STEP_SIZE_MHZ) * FREQ_STEP_SIZE_MHZ; + *ratio = pslaveents->ratio; + } else { + /* only support ratio for now */ + return -EINVAL; + } + } + return 0; +} + +int clk_prog_init_pmupstate(struct gk20a *g) +{ + /* If already allocated, do not re-allocate */ + if (g->pmu->clk_pmu->clk_progobjs != NULL) { + return 0; + } + + g->pmu->clk_pmu->clk_progobjs = nvgpu_kzalloc(g, + sizeof(*g->pmu->clk_pmu->clk_progobjs)); + if (g->pmu->clk_pmu->clk_progobjs == NULL) { + return -ENOMEM; + } + + return 0; +} + +void clk_prog_free_pmupstate(struct gk20a *g) +{ + nvgpu_kfree(g, g->pmu->clk_pmu->clk_progobjs); + g->pmu->clk_pmu->clk_progobjs = NULL; +} diff --git a/drivers/gpu/nvgpu/common/pmu/clk/clk_prog.h b/drivers/gpu/nvgpu/common/pmu/clk/clk_prog.h new file mode 100644 index 000000000..ece024474 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/clk/clk_prog.h @@ -0,0 +1,139 @@ +/* +* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. +* + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. +*/ + +#ifndef NVGPU_CLK_PROG_H +#define NVGPU_CLK_PROG_H + +#include +#include +#include +#include +#include + +struct clk_prog_1x_master; + +typedef int vf_flatten(struct gk20a *g, struct nvgpu_clk_pmupstate *pclk, + struct clk_prog_1x_master *p1xmaster, + u8 clk_domain_idx, u16 *pfreqmaxlastmhz); + +typedef int vf_lookup(struct gk20a *g, struct nvgpu_clk_pmupstate *pclk, + struct clk_prog_1x_master *p1xmaster, + u8 *slave_clk_domain_idx, u16 *pclkmhz, + u32 *pvoltuv, u8 rail); + +typedef int get_slaveclk(struct gk20a *g, struct nvgpu_clk_pmupstate *pclk, + struct clk_prog_1x_master *p1xmaster, + u8 slave_clk_domain_idx, u16 *pclkmhz, + u16 masterclkmhz, u8 *ratio); + +typedef int get_fpoints(struct gk20a *g, struct nvgpu_clk_pmupstate *pclk, + struct clk_prog_1x_master *p1xmaster, + u32 *pfpointscount, + u16 **ppfreqpointsinmhz, u8 rail); + + +struct clk_prog { + struct pmu_board_obj super; +}; + +struct clk_prog_1x { + struct clk_prog super; + u8 source; + u16 freq_max_mhz; + union ctrl_clk_clk_prog_1x_source_data source_data; +}; + +struct clk_prog_1x_master { + struct clk_prog_1x super; + bool b_o_c_o_v_enabled; + struct ctrl_clk_clk_prog_1x_master_vf_entry *p_vf_entries; + struct ctrl_clk_clk_delta deltas; + union ctrl_clk_clk_prog_1x_master_source_data source_data; + vf_flatten *vfflatten; + vf_lookup *vflookup; + get_fpoints *getfpoints; + get_slaveclk *getslaveclk; +}; + +struct clk_prog_1x_master_ratio { + struct clk_prog_1x_master super; + struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry *p_slave_entries; +}; + +struct clk_prog_1x_master_table { + struct clk_prog_1x_master super; + struct ctrl_clk_clk_prog_1x_master_table_slave_entry *p_slave_entries; +}; + +struct clk_prog_3x_master { + bool b_o_c_o_v_enabled; + struct ctrl_clk_clk_prog_1x_master_vf_entry *p_vf_entries; + struct ctrl_clk_clk_delta deltas; + union ctrl_clk_clk_prog_1x_master_source_data source_data; + vf_flatten *vfflatten; + vf_lookup *vflookup; + get_fpoints *getfpoints; + get_slaveclk *getslaveclk; +}; + +struct clk_prog_3x_master_ratio { + struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry *p_slave_entries; +}; + +struct clk_prog_3x_master_table { + struct ctrl_clk_clk_prog_1x_master_table_slave_entry *p_slave_entries; +}; + +struct clk_prog_35_master { + struct clk_prog_1x super; + struct clk_prog_3x_master master; + struct ctrl_clk_clk_prog_35_master_sec_vf_entry_voltrail + *p_voltrail_sec_vf_entries; +}; + +struct clk_prog_35_master_ratio { + struct clk_prog_35_master super; + struct clk_prog_3x_master_ratio ratio; +}; + +struct clk_prog_35_master_table { + struct clk_prog_35_master super; + struct clk_prog_3x_master_table table; +}; + +struct nvgpu_clk_progs { + struct boardobjgrp_e255 super; + u8 slave_entry_count; + u8 vf_entry_count; + u8 vf_sec_entry_count; +}; + +#define CLK_CLK_PROG_GET(pclk, idx)\ + ((struct clk_prog *)(void *)BOARDOBJGRP_OBJ_GET_BY_IDX(\ + &pclk->clk_progobjs->super.super, (u8)(idx))) + + +int clk_prog_init_pmupstate(struct gk20a *g); +void clk_prog_free_pmupstate(struct gk20a *g); +int clk_prog_sw_setup(struct gk20a *g); +int clk_prog_pmu_setup(struct gk20a *g); +#endif /* NVGPU_CLK_PROG_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/clk/clk_vf_point.c b/drivers/gpu/nvgpu/common/pmu/clk/clk_vf_point.c new file mode 100644 index 000000000..9e329e67f --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/clk/clk_vf_point.c @@ -0,0 +1,510 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ucode_clk_inf.h" +#include "clk_vf_point.h" +#include "clk.h" + +int nvgpu_clk_domain_volt_to_freq(struct gk20a *g, u8 clkdomain_idx, + u32 *pclkmhz, u32 *pvoltuv, u8 railidx) +{ + struct nv_pmu_rpc_clk_domain_35_prog_freq_to_volt rpc; + struct nvgpu_pmu *pmu = g->pmu; + int status = -EINVAL; + + (void)memset(&rpc, 0, + sizeof(struct nv_pmu_rpc_clk_domain_35_prog_freq_to_volt)); + rpc.volt_rail_idx = + nvgpu_pmu_volt_rail_volt_domain_convert_to_idx(g, railidx); + rpc.clk_domain_idx = clkdomain_idx; + rpc.voltage_type = CTRL_VOLT_DOMAIN_LOGIC; + rpc.input.value = *pvoltuv; + PMU_RPC_EXECUTE_CPB(status, pmu, CLK, + CLK_DOMAIN_35_PROG_VOLT_TO_FREQ, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute Freq to Volt RPC status=0x%x", + status); + } + *pclkmhz = rpc.output.value; + return status; +} + +static int _clk_vf_point_pmudatainit_super(struct gk20a *g, struct pmu_board_obj + *obj, struct nv_pmu_boardobj *pmu_obj); + +static int _clk_vf_points_pmudatainit(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct nv_pmu_boardobjgrp_super *pboardobjgrppmu) +{ + int status = 0; + + status = boardobjgrp_pmudatainit_e32(g, pboardobjgrp, pboardobjgrppmu); + if (status != 0) { + nvgpu_err(g, + "error updating pmu boardobjgrp for clk vfpoint 0x%x", + status); + goto done; + } + +done: + return status; +} + +static int _clk_vf_points_pmudata_instget(struct gk20a *g, + struct nv_pmu_boardobjgrp *pmuboardobjgrp, + struct nv_pmu_boardobj **pmu_obj, u8 idx) +{ + struct nv_pmu_clk_clk_vf_point_boardobj_grp_set *pgrp_set = + (struct nv_pmu_clk_clk_vf_point_boardobj_grp_set *) + pmuboardobjgrp; + + nvgpu_log_info(g, " "); + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (idx >= CTRL_BOARDOBJGRP_E255_MAX_OBJECTS) { + return -EINVAL; + } + + *pmu_obj = (struct nv_pmu_boardobj *) + &pgrp_set->objects[idx].data.obj; + nvgpu_log_info(g, " Done"); + return 0; +} + +static int _clk_vf_points_pmustatus_instget(struct gk20a *g, + void *pboardobjgrppmu, + struct nv_pmu_boardobj_query **obj_pmu_status, u8 idx) +{ + struct nv_pmu_clk_clk_vf_point_boardobj_grp_get_status + *pgrp_get_status = + (struct nv_pmu_clk_clk_vf_point_boardobj_grp_get_status *) + pboardobjgrppmu; + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (idx >= CTRL_BOARDOBJGRP_E255_MAX_OBJECTS) { + return -EINVAL; + } + + *obj_pmu_status = (struct nv_pmu_boardobj_query *)(void *) + &pgrp_get_status->objects[idx].data.obj; + return 0; +} + +int clk_vf_point_sw_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + + nvgpu_log_info(g, " "); + + status = nvgpu_boardobjgrp_construct_e255(g, + &g->pmu->clk_pmu->clk_vf_pointobjs->super); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for clk vfpoint, status - 0x%x", + status); + goto done; + } + + pboardobjgrp = &g->pmu->clk_pmu->clk_vf_pointobjs->super.super; + + BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, CLK, CLK_VF_POINT); + + status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp, + clk, CLK, clk_vf_point, CLK_VF_POINT); + if (status != 0) { + nvgpu_err(g, + "error constructing PMU_BOARDOBJ_CMD_GRP_SET - 0x%x", + status); + goto done; + } + + status = BOARDOBJGRP_PMU_CMD_GRP_GET_STATUS_CONSTRUCT(g, + &g->pmu->clk_pmu->clk_vf_pointobjs->super.super, + clk, CLK, clk_vf_point, CLK_VF_POINT); + if (status != 0) { + nvgpu_err(g, + "error constructing PMU_BOARDOBJ_CMD_GRP_SET - 0x%x", + status); + goto done; + } + + pboardobjgrp->pmudatainit = _clk_vf_points_pmudatainit; + pboardobjgrp->pmudatainstget = _clk_vf_points_pmudata_instget; + pboardobjgrp->pmustatusinstget = _clk_vf_points_pmustatus_instget; + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +int clk_vf_point_pmu_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + + nvgpu_log_info(g, " "); + + pboardobjgrp = &g->pmu->clk_pmu->clk_vf_pointobjs->super.super; + + if (!pboardobjgrp->bconstructed) { + return -EINVAL; + } + + status = pboardobjgrp->pmuinithandle(g, pboardobjgrp); + + nvgpu_log_info(g, "Done"); + return status; +} + +static int clk_vf_point_construct_super(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pargs) +{ + struct clk_vf_point *pclkvfpoint; + struct clk_vf_point *ptmpvfpoint = + (struct clk_vf_point *)pargs; + int status = 0; + + pclkvfpoint = nvgpu_kzalloc(g, size); + if (pclkvfpoint == NULL) { + return -ENOMEM; + } + + status = pmu_board_obj_construct_super(g, + (struct pmu_board_obj *)(void *)pclkvfpoint, pargs); + if (status != 0) { + return -EINVAL; + } + + *obj = (struct pmu_board_obj *)(void *)pclkvfpoint; + + pclkvfpoint->super.pmudatainit = + _clk_vf_point_pmudatainit_super; + + pclkvfpoint->vfe_equ_idx = ptmpvfpoint->vfe_equ_idx; + pclkvfpoint->volt_rail_idx = ptmpvfpoint->volt_rail_idx; + + return status; +} + +static int _clk_vf_point_pmudatainit_volt(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct clk_vf_point_volt *pclk_vf_point_volt; + struct nv_pmu_clk_clk_vf_point_volt_boardobj_set *pset; + + nvgpu_log_info(g, " "); + + status = _clk_vf_point_pmudatainit_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pclk_vf_point_volt = + (struct clk_vf_point_volt *)(void *)obj; + + pset = (struct nv_pmu_clk_clk_vf_point_volt_boardobj_set *) + pmu_obj; + + pset->source_voltage_uv = pclk_vf_point_volt->source_voltage_uv; + pset->freq_delta.data = pclk_vf_point_volt->freq_delta.data; + pset->freq_delta.type = pclk_vf_point_volt->freq_delta.type; + + return status; +} + +static int _clk_vf_point_pmudatainit_freq(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct clk_vf_point_freq *pclk_vf_point_freq; + struct nv_pmu_clk_clk_vf_point_freq_boardobj_set *pset; + + nvgpu_log_info(g, " "); + + status = _clk_vf_point_pmudatainit_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pclk_vf_point_freq = + (struct clk_vf_point_freq *)(void *)obj; + + pset = (struct nv_pmu_clk_clk_vf_point_freq_boardobj_set *) + pmu_obj; + + pset->freq_mhz = pclk_vf_point_freq->super.pair.freq_mhz; + + pset->volt_delta_uv = pclk_vf_point_freq->volt_delta_uv; + + return status; +} + +static int clk_vf_point_construct_volt_35(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct clk_vf_point_volt *pclkvfpoint; + struct clk_vf_point_volt *ptmpvfpoint = + (struct clk_vf_point_volt *)pargs; + int status = 0; + + if (pmu_board_obj_get_type(pargs) != + CTRL_CLK_CLK_VF_POINT_TYPE_35_VOLT_PRI) { + return -EINVAL; + } + + obj_tmp->type_mask = (u32) BIT(CTRL_CLK_CLK_VF_POINT_TYPE_35_VOLT_PRI); + status = clk_vf_point_construct_super(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pclkvfpoint = (struct clk_vf_point_volt *) (void *) *obj; + + pclkvfpoint->super.super.pmudatainit = + _clk_vf_point_pmudatainit_volt; + + pclkvfpoint->source_voltage_uv = ptmpvfpoint->source_voltage_uv; + pclkvfpoint->freq_delta = ptmpvfpoint->freq_delta; + + return status; +} + +static int clk_vf_point_construct_freq_35(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct clk_vf_point_freq *pclkvfpoint; + struct clk_vf_point_freq *ptmpvfpoint = + (struct clk_vf_point_freq *)pargs; + int status = 0; + + if (pmu_board_obj_get_type(pargs) != CTRL_CLK_CLK_VF_POINT_TYPE_35_FREQ) { + return -EINVAL; + } + + obj_tmp->type_mask = (u32) BIT(CTRL_CLK_CLK_VF_POINT_TYPE_35_FREQ); + status = clk_vf_point_construct_super(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pclkvfpoint = (struct clk_vf_point_freq *)(void *) *obj; + + pclkvfpoint->super.super.pmudatainit = + _clk_vf_point_pmudatainit_freq; + + pclkvfpoint->super.pair.freq_mhz = ptmpvfpoint->super.pair.freq_mhz; + + return status; +} + +struct clk_vf_point *nvgpu_construct_clk_vf_point(struct gk20a *g, void *pargs) +{ + struct pmu_board_obj *obj = NULL; + int status; + + nvgpu_log_info(g, " "); + switch (pmu_board_obj_get_type(pargs)) { + + case CTRL_CLK_CLK_VF_POINT_TYPE_35_FREQ: + status = clk_vf_point_construct_freq_35(g, &obj, + sizeof(struct clk_vf_point_freq), pargs); + break; + + case CTRL_CLK_CLK_VF_POINT_TYPE_35_VOLT_PRI: + status = clk_vf_point_construct_volt_35(g, &obj, + sizeof(struct clk_vf_point_volt), pargs); + break; + + default: + status = -EINVAL; + break; + } + + if (status != 0) { + return NULL; + } + + nvgpu_log_info(g, " Done"); + + return (struct clk_vf_point *)(void *)obj; +} + +static int _clk_vf_point_pmudatainit_super(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct clk_vf_point *pclk_vf_point; + struct nv_pmu_clk_clk_vf_point_boardobj_set *pset; + + nvgpu_log_info(g, " "); + + status = pmu_board_obj_pmu_data_init_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pclk_vf_point = + (struct clk_vf_point *)(void *)obj; + + pset = (struct nv_pmu_clk_clk_vf_point_boardobj_set *) + pmu_obj; + + + pset->vfe_equ_idx = pclk_vf_point->vfe_equ_idx; + pset->volt_rail_idx = pclk_vf_point->volt_rail_idx; + return status; +} + +#ifdef CONFIG_NVGPU_CLK_ARB +int nvgpu_clk_arb_find_slave_points(struct nvgpu_clk_arb *arb, + struct nvgpu_clk_slave_freq *vf_point) +{ + + u16 gpc2clk_target; + struct nvgpu_clk_vf_table *table; + u32 index; + int status = 0; + do { + gpc2clk_target = vf_point->gpc_mhz; + + table = NV_READ_ONCE(arb->current_vf_table); + /* pointer to table can be updated by callback */ + nvgpu_smp_rmb(); + + if (table == NULL) { + continue; + } + if ((table->gpc2clk_num_points == 0U)) { + nvgpu_err(arb->g, "found empty table"); + status = -EINVAL; ; + } + + /* round up the freq requests */ + for (index = 0; index < table->gpc2clk_num_points; index++) { + if ((table->gpc2clk_points[index].gpc_mhz >= + gpc2clk_target)) { + gpc2clk_target = + table->gpc2clk_points[index].gpc_mhz; + vf_point->sys_mhz = + table->gpc2clk_points[index].sys_mhz; + vf_point->xbar_mhz = + table->gpc2clk_points[index].xbar_mhz; + vf_point->nvd_mhz = + table->gpc2clk_points[index].nvd_mhz; + vf_point->host_mhz = + table->gpc2clk_points[index].host_mhz; + break; + } + } + /* + * If the requested freq is lower than available + * one in VF table, use the VF table freq + */ + if (gpc2clk_target > vf_point->gpc_mhz) { + vf_point->gpc_mhz = gpc2clk_target; + } + } while ((table == NULL) || + (NV_READ_ONCE(arb->current_vf_table) != table)); + + return status; + +} + +/*get latest vf point data from PMU */ +int nvgpu_clk_vf_point_cache(struct gk20a *g) +{ + struct nvgpu_clk_vf_points *pclk_vf_points; + struct boardobjgrp *pboardobjgrp; + struct pmu_board_obj *obj = NULL; + int status; + struct clk_vf_point *pclk_vf_point; + u8 index; + u32 voltage_min_uv,voltage_step_size_uv; + u32 gpcclk_clkmhz=0, gpcclk_voltuv=0; + + nvgpu_log_info(g, " "); + pclk_vf_points = g->pmu->clk_pmu->clk_vf_pointobjs; + pboardobjgrp = &pclk_vf_points->super.super; + + voltage_min_uv = nvgpu_pmu_clk_fll_get_lut_min_volt(g->pmu->clk_pmu); + voltage_step_size_uv = + nvgpu_pmu_clk_fll_get_lut_step_size(g->pmu->clk_pmu); + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pmu_board_obj*, obj, index) { + pclk_vf_point = (struct clk_vf_point *)(void *)obj; + gpcclk_voltuv = + voltage_min_uv + index * voltage_step_size_uv; + status = nvgpu_clk_domain_volt_to_freq(g, 0, &gpcclk_clkmhz, + &gpcclk_voltuv, CTRL_VOLT_DOMAIN_LOGIC); + if (status != 0) { + nvgpu_err(g, + "Failed to get freq for requested voltage"); + return status; + } + + pclk_vf_point->pair.freq_mhz = (u16)gpcclk_clkmhz; + pclk_vf_point->pair.voltage_uv = gpcclk_voltuv; + } + return status; +} +#endif + +int clk_vf_point_init_pmupstate(struct gk20a *g) +{ + /* If already allocated, do not re-allocate */ + if (g->pmu->clk_pmu->clk_vf_pointobjs != NULL) { + return 0; + } + + g->pmu->clk_pmu->clk_vf_pointobjs = nvgpu_kzalloc(g, + sizeof(*g->pmu->clk_pmu->clk_vf_pointobjs)); + if (g->pmu->clk_pmu->clk_vf_pointobjs == NULL) { + return -ENOMEM; + } + + return 0; +} + +void clk_vf_point_free_pmupstate(struct gk20a *g) +{ + nvgpu_kfree(g, g->pmu->clk_pmu->clk_vf_pointobjs); + g->pmu->clk_pmu->clk_vf_pointobjs = NULL; +} diff --git a/drivers/gpu/nvgpu/common/pmu/clk/clk_vf_point.h b/drivers/gpu/nvgpu/common/pmu/clk/clk_vf_point.h new file mode 100644 index 000000000..a5c846ed2 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/clk/clk_vf_point.h @@ -0,0 +1,33 @@ +/* + * general clock structures & definitions + * + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CLK_VF_POINT_H +#define NVGPU_CLK_VF_POINT_H + +int clk_vf_point_init_pmupstate(struct gk20a *g); +void clk_vf_point_free_pmupstate(struct gk20a *g); +int clk_vf_point_sw_setup(struct gk20a *g); +int clk_vf_point_pmu_setup(struct gk20a *g); + +#endif /* NVGPU_CLK_VF_POINT_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/clk/clk_vin.c b/drivers/gpu/nvgpu/common/pmu/clk/clk_vin.c new file mode 100644 index 000000000..acbd072fd --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/clk/clk_vin.c @@ -0,0 +1,533 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ucode_clk_inf.h" +#include "clk_vin.h" +#include "clk.h" + +static int devinit_get_vin_device_table(struct gk20a *g, + struct nvgpu_avfsvinobjs *pvinobjs); + +static int vin_device_construct_v20(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pargs); +static int vin_device_construct_super(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pargs); +static struct clk_vin_device *construct_vin_device( + struct gk20a *g, void *pargs); + +static int vin_device_init_pmudata_v20(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj); +static int vin_device_init_pmudata_super(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj); + +struct clk_vin_device *clk_get_vin_from_index( + struct nvgpu_avfsvinobjs *pvinobjs, u8 idx) +{ + return ((struct clk_vin_device *)BOARDOBJGRP_OBJ_GET_BY_IDX( + ((struct boardobjgrp *)&(pvinobjs->super.super)), idx)); +} + +static int nvgpu_clk_avfs_get_vin_cal_fuse_v20(struct gk20a *g, + struct nvgpu_avfsvinobjs *pvinobjs, + struct vin_device_v20 *pvindev) +{ + int status = 0; + s8 gain, offset; + u8 i; + + if (pvinobjs->calibration_rev_vbios == + g->ops.fuse.read_vin_cal_fuse_rev(g)) { + BOARDOBJGRP_FOR_EACH(&(pvinobjs->super.super), + struct vin_device_v20 *, pvindev, i) { + gain = 0; + offset = 0; + pvindev = (struct vin_device_v20 *)(void *) + clk_get_vin_from_index(pvinobjs, i); + status = g->ops.fuse.read_vin_cal_gain_offset_fuse(g, + pvindev->super.id, &gain, &offset); + if (status != 0) { + nvgpu_err(g, + "err reading vin cal for id %x", pvindev->super.id); + return status; + } + pvindev->data.vin_cal.cal_v20.gain = gain; + pvindev->data.vin_cal.cal_v20.offset = offset; + } + } + return status; + +} + +static int _clk_vin_devgrp_pmudatainit_super(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct nv_pmu_boardobjgrp_super *pboardobjgrppmu) +{ + struct nv_pmu_clk_clk_vin_device_boardobjgrp_set_header *pset = + (struct nv_pmu_clk_clk_vin_device_boardobjgrp_set_header *) + pboardobjgrppmu; + struct nvgpu_avfsvinobjs *pvin_obbj = (struct nvgpu_avfsvinobjs *) + (void *)pboardobjgrp; + int status = 0; + + nvgpu_log_info(g, " "); + + status = boardobjgrp_pmudatainit_e32(g, pboardobjgrp, pboardobjgrppmu); + + pset->b_vin_is_disable_allowed = pvin_obbj->vin_is_disable_allowed; + pset->version = pvin_obbj->version; + + nvgpu_log_info(g, " Done"); + return status; +} + +static int _clk_vin_devgrp_pmudata_instget(struct gk20a *g, + struct nv_pmu_boardobjgrp *pmuboardobjgrp, + struct nv_pmu_boardobj **pmu_obj, u8 idx) +{ + struct nv_pmu_clk_clk_vin_device_boardobj_grp_set *pgrp_set = + (struct nv_pmu_clk_clk_vin_device_boardobj_grp_set *) + pmuboardobjgrp; + + nvgpu_log_info(g, " "); + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (((u32)BIT(idx) & + pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *pmu_obj = (struct nv_pmu_boardobj *) + &pgrp_set->objects[idx].data.obj; + nvgpu_log_info(g, " Done"); + return 0; +} + +static int _clk_vin_devgrp_pmustatus_instget(struct gk20a *g, + void *pboardobjgrppmu, + struct nv_pmu_boardobj_query **obj_pmu_status, u8 idx) +{ + struct nv_pmu_clk_clk_vin_device_boardobj_grp_get_status + *pgrp_get_status = + (struct nv_pmu_clk_clk_vin_device_boardobj_grp_get_status *) + pboardobjgrppmu; + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (((u32)BIT(idx) & + pgrp_get_status->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *obj_pmu_status = (struct nv_pmu_boardobj_query *) + &pgrp_get_status->objects[idx].data.obj; + return 0; +} + +int clk_vin_sw_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + struct vin_device_v20 *pvindev = NULL; + struct nvgpu_avfsvinobjs *pvinobjs; + + nvgpu_log_info(g, " "); + + status = nvgpu_boardobjgrp_construct_e32(g, + &g->pmu->clk_pmu->avfs_vinobjs->super); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for clk vin, statu - 0x%x", + status); + goto done; + } + + pboardobjgrp = &g->pmu->clk_pmu->avfs_vinobjs->super.super; + pvinobjs = g->pmu->clk_pmu->avfs_vinobjs; + + BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, CLK, VIN_DEVICE); + + status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp, + clk, CLK, clk_vin_device, CLK_VIN_DEVICE); + if (status != 0) { + nvgpu_err(g, + "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x", + status); + goto done; + } + + pboardobjgrp->pmudatainit = _clk_vin_devgrp_pmudatainit_super; + pboardobjgrp->pmudatainstget = _clk_vin_devgrp_pmudata_instget; + pboardobjgrp->pmustatusinstget = _clk_vin_devgrp_pmustatus_instget; + + status = devinit_get_vin_device_table(g, g->pmu->clk_pmu->avfs_vinobjs); + if (status != 0) { + goto done; + } + + /*update vin calibration to fuse */ + status = nvgpu_clk_avfs_get_vin_cal_fuse_v20(g, pvinobjs, pvindev); + if (status != 0) { + nvgpu_err(g, "clk_avfs_get_vin_cal_fuse_v20 failed err=%d", + status); + goto done; + } + + status = BOARDOBJGRP_PMU_CMD_GRP_GET_STATUS_CONSTRUCT(g, + &g->pmu->clk_pmu->avfs_vinobjs->super.super, + clk, CLK, clk_vin_device, CLK_VIN_DEVICE); + if (status != 0) { + nvgpu_err(g, + "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x", + status); + goto done; + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +int clk_vin_pmu_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + + nvgpu_log_info(g, " "); + + pboardobjgrp = &g->pmu->clk_pmu->avfs_vinobjs->super.super; + + if (!pboardobjgrp->bconstructed) { + return -EINVAL; + } + + status = pboardobjgrp->pmuinithandle(g, pboardobjgrp); + + nvgpu_log_info(g, "Done"); + return status; +} + +static int devinit_get_vin_device_table(struct gk20a *g, + struct nvgpu_avfsvinobjs *pvinobjs) +{ + int status = 0; + u8 *vin_table_ptr = NULL; + struct vin_descriptor_header_10 vin_desc_table_header = { 0 }; + struct vin_descriptor_entry_10 vin_desc_table_entry = { 0 }; + u8 *vin_tbl_entry_ptr = NULL; + u32 index = 0; + s8 offset = 0, gain = 0; + struct clk_vin_device *pvin_dev; + u32 cal_type; + + union { + struct pmu_board_obj obj; + struct clk_vin_device vin_device; + struct vin_device_v20 vin_device_v20; + } vin_device_data; + + nvgpu_log_info(g, " "); + + vin_table_ptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_CLOCK_TOKEN), + VIN_TABLE); + if (vin_table_ptr == NULL) { + status = -1; + goto done; + } + + nvgpu_memcpy((u8 *)&vin_desc_table_header, vin_table_ptr, + sizeof(struct vin_descriptor_header_10)); + /* Right now we support 0x10 version only */ + pvinobjs->version = (vin_desc_table_header.version == 0x10U) ? + NV2080_CTRL_CLK_VIN_DEVICES_V10 : + NV2080_CTRL_CLK_VIN_DEVICES_DISABLED; + pvinobjs->calibration_rev_vbios = + BIOS_GET_FIELD(u8, vin_desc_table_header.flags0, + NV_VIN_DESC_FLAGS0_VIN_CAL_REVISION); + pvinobjs->vin_is_disable_allowed = + BIOS_GET_FIELD(bool, vin_desc_table_header.flags0, + NV_VIN_DESC_FLAGS0_DISABLE_CONTROL); + cal_type = BIOS_GET_FIELD(u32, vin_desc_table_header.flags0, + NV_VIN_DESC_FLAGS0_VIN_CAL_TYPE); + if (cal_type != CTRL_CLK_VIN_CAL_TYPE_V20) { + nvgpu_err(g, "Unsupported Vin calibration type"); + status = -1; + goto done; + } + + offset = BIOS_GET_FIELD(s8, vin_desc_table_header.vin_cal, + NV_VIN_DESC_VIN_CAL_OFFSET); + gain = BIOS_GET_FIELD(s8, vin_desc_table_header.vin_cal, + NV_VIN_DESC_VIN_CAL_GAIN); + + /* Read table entries*/ + vin_tbl_entry_ptr = vin_table_ptr + vin_desc_table_header.header_sizee; + for (index = 0; index < vin_desc_table_header.entry_count; index++) { + nvgpu_memcpy((u8 *)&vin_desc_table_entry, vin_tbl_entry_ptr, + sizeof(struct vin_descriptor_entry_10)); + + if (vin_desc_table_entry.vin_device_type == + CTRL_CLK_VIN_TYPE_DISABLED) { + continue; + } + + vin_device_data.obj.type = + (u8)vin_desc_table_entry.vin_device_type; + vin_device_data.vin_device.id = + (u8)vin_desc_table_entry.vin_device_id; + vin_device_data.vin_device.volt_domain_vbios = + (u8)vin_desc_table_entry.volt_domain_vbios; + vin_device_data.vin_device.flls_shared_mask = 0; + vin_device_data.vin_device.por_override_mode = + CTRL_CLK_VIN_SW_OVERRIDE_VIN_USE_HW_REQ; + vin_device_data.vin_device.override_mode = + CTRL_CLK_VIN_SW_OVERRIDE_VIN_USE_HW_REQ; + vin_device_data.vin_device_v20.data.cal_type = (u8) cal_type; + vin_device_data.vin_device_v20.data.vin_cal.cal_v20.offset = + offset; + vin_device_data.vin_device_v20.data.vin_cal.cal_v20.gain = + gain; + vin_device_data.vin_device_v20.data.vin_cal.cal_v20.offset_vfe_idx = + CTRL_CLK_VIN_VFE_IDX_INVALID; + + pvin_dev = construct_vin_device(g, (void *)&vin_device_data); + + status = boardobjgrp_objinsert(&pvinobjs->super.super, + (struct pmu_board_obj *)pvin_dev, index); + + vin_tbl_entry_ptr += vin_desc_table_header.entry_size; + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +static int vin_device_construct_v20(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct vin_device_v20 *pvin_device_v20; + struct vin_device_v20 *ptmpvin_device_v20 = (struct vin_device_v20 *)pargs; + int status = 0; + + if (pmu_board_obj_get_type(pargs) != CTRL_CLK_VIN_TYPE_V20) { + return -EINVAL; + } + + obj_tmp->type_mask |= BIT32(CTRL_CLK_VIN_TYPE_V20); + status = vin_device_construct_super(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pvin_device_v20 = (struct vin_device_v20 *)(void *)*obj; + + pvin_device_v20->super.super.pmudatainit = + vin_device_init_pmudata_v20; + + pvin_device_v20->data.cal_type = ptmpvin_device_v20->data.cal_type; + pvin_device_v20->data.vin_cal.cal_v20.offset = + ptmpvin_device_v20->data.vin_cal.cal_v20.offset; + pvin_device_v20->data.vin_cal.cal_v20.gain = + ptmpvin_device_v20->data.vin_cal.cal_v20.gain; + pvin_device_v20->data.vin_cal.cal_v20.offset_vfe_idx = + ptmpvin_device_v20->data.vin_cal.cal_v20.offset_vfe_idx; + + return status; +} +static int vin_device_construct_super(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pargs) +{ + struct clk_vin_device *pvin_device; + struct clk_vin_device *ptmpvin_device = + (struct clk_vin_device *)pargs; + int status = 0; + + pvin_device = nvgpu_kzalloc(g, size); + if (pvin_device == NULL) { + return -ENOMEM; + } + + status = pmu_board_obj_construct_super(g, + (struct pmu_board_obj *)(void *)pvin_device, pargs); + if (status != 0) { + return -EINVAL; + } + + *obj = (struct pmu_board_obj *)(void *)pvin_device; + + pvin_device->super.pmudatainit = + vin_device_init_pmudata_super; + + pvin_device->id = ptmpvin_device->id; + pvin_device->volt_domain_vbios = ptmpvin_device->volt_domain_vbios; + pvin_device->flls_shared_mask = ptmpvin_device->flls_shared_mask; + pvin_device->volt_domain = CTRL_VOLT_DOMAIN_LOGIC; + pvin_device->por_override_mode = ptmpvin_device->por_override_mode; + pvin_device->override_mode = ptmpvin_device->override_mode; + + return status; +} +static struct clk_vin_device *construct_vin_device( + struct gk20a *g, void *pargs) +{ + struct pmu_board_obj *obj = NULL; + int status; + + nvgpu_log_info(g, " %d", pmu_board_obj_get_type(pargs)); + + status = vin_device_construct_v20(g, &obj, + sizeof(struct vin_device_v20), pargs); + + if (status != 0) { + return NULL; + } + + nvgpu_log_info(g, " Done"); + + return (struct clk_vin_device *)(void *)obj; +} + +static int vin_device_init_pmudata_v20(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct vin_device_v20 *pvin_dev_v20; + struct nv_pmu_clk_clk_vin_device_v20_boardobj_set *perf_pmu_data; + + nvgpu_log_info(g, " "); + + status = vin_device_init_pmudata_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pvin_dev_v20 = (struct vin_device_v20 *)(void *)obj; + perf_pmu_data = (struct nv_pmu_clk_clk_vin_device_v20_boardobj_set *) + pmu_obj; + + perf_pmu_data->data.cal_type = pvin_dev_v20->data.cal_type; + perf_pmu_data->data.vin_cal.cal_v20.offset = + pvin_dev_v20->data.vin_cal.cal_v20.offset; + perf_pmu_data->data.vin_cal.cal_v20.gain = + pvin_dev_v20->data.vin_cal.cal_v20.gain; + perf_pmu_data->data.vin_cal.cal_v20.offset_vfe_idx = + pvin_dev_v20->data.vin_cal.cal_v20.offset_vfe_idx; + + nvgpu_log_info(g, " Done"); + + return status; +} + +static int vin_device_init_pmudata_super(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct clk_vin_device *pvin_dev; + struct nv_pmu_clk_clk_vin_device_boardobj_set *perf_pmu_data; + + nvgpu_log_info(g, " "); + + status = pmu_board_obj_pmu_data_init_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pvin_dev = (struct clk_vin_device *)(void *)obj; + perf_pmu_data = (struct nv_pmu_clk_clk_vin_device_boardobj_set *) + pmu_obj; + + perf_pmu_data->id = pvin_dev->id; + perf_pmu_data->volt_rail_idx = + nvgpu_pmu_volt_rail_volt_domain_convert_to_idx( + g, pvin_dev->volt_domain); + perf_pmu_data->flls_shared_mask = pvin_dev->flls_shared_mask; + perf_pmu_data->por_override_mode = pvin_dev->por_override_mode; + perf_pmu_data->override_mode = pvin_dev->override_mode; + + nvgpu_log_info(g, " Done"); + + return status; +} + +int clk_pmu_vin_load(struct gk20a *g) +{ + int status; + struct nvgpu_pmu *pmu = g->pmu; + struct nv_pmu_rpc_struct_clk_load clk_load_rpc; + + (void) memset(&clk_load_rpc, 0, + sizeof(struct nv_pmu_rpc_struct_clk_load)); + + clk_load_rpc.clk_load.feature = NV_NV_PMU_CLK_LOAD_FEATURE_VIN; + clk_load_rpc.clk_load.action_mask = + NV_NV_PMU_CLK_LOAD_ACTION_MASK_VIN_HW_CAL_PROGRAM_YES << 4; + + /* Continue with PMU setup, assume FB map is done */ + PMU_RPC_EXECUTE_CPB(status, pmu, CLK, LOAD, &clk_load_rpc, 0); + if (status != 0) { + nvgpu_err(g, + "Failed to execute Clock Load RPC status=0x%x", + status); + } + + return status; +} + +int clk_vin_init_pmupstate(struct gk20a *g) +{ + /* If already allocated, do not re-allocate */ + if (g->pmu->clk_pmu->avfs_vinobjs != NULL) { + return 0; + } + + g->pmu->clk_pmu->avfs_vinobjs = nvgpu_kzalloc(g, + sizeof(*g->pmu->clk_pmu->avfs_vinobjs)); + if (g->pmu->clk_pmu->avfs_vinobjs == NULL) { + return -ENOMEM; + } + + return 0; +} + +void clk_vin_free_pmupstate(struct gk20a *g) +{ + nvgpu_kfree(g, g->pmu->clk_pmu->avfs_vinobjs); + g->pmu->clk_pmu->avfs_vinobjs = NULL; +} diff --git a/drivers/gpu/nvgpu/common/pmu/clk/clk_vin.h b/drivers/gpu/nvgpu/common/pmu/clk/clk_vin.h new file mode 100644 index 000000000..8a8f8e97b --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/clk/clk_vin.h @@ -0,0 +1,60 @@ +/* +* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. +* + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. +*/ + +#ifndef NVGPU_CLK_VIN_H +#define NVGPU_CLK_VIN_H + +#include +#include +#include + +typedef u32 vin_device_state_load(struct gk20a *g, + struct nvgpu_clk_pmupstate *clk, struct clk_vin_device *pdev); + +struct clk_vin_device { + struct pmu_board_obj super; + u8 id; + u8 volt_domain; + u8 volt_domain_vbios; + u8 por_override_mode; + u8 override_mode; + u32 flls_shared_mask; + vin_device_state_load *state_load; +}; + +struct vin_device_v20 { + struct clk_vin_device super; + struct ctrl_clk_vin_device_info_data_v20 data; +}; +struct nvgpu_avfsvinobjs { + struct boardobjgrp_e32 super; + u8 calibration_rev_vbios; + u8 calibration_rev_fused; + u8 version; + bool vin_is_disable_allowed; +}; +int clk_vin_init_pmupstate(struct gk20a *g); +void clk_vin_free_pmupstate(struct gk20a *g); +int clk_pmu_vin_load(struct gk20a *g); +int clk_vin_sw_setup(struct gk20a *g); +int clk_vin_pmu_setup(struct gk20a *g); +#endif /* NVGPU_CLK_VIN_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/clk/ucode_clk_inf.h b/drivers/gpu/nvgpu/common/pmu/clk/ucode_clk_inf.h new file mode 100644 index 000000000..f65158910 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/clk/ucode_clk_inf.h @@ -0,0 +1,765 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_PMUIF_CLK_H +#define NVGPU_PMUIF_CLK_H + +#include +#include +#include +#include + +/*! + * Various types of VIN calibration that the GPU can support + */ +#define CTRL_CLK_VIN_CAL_TYPE_V20 (0x00000001U) +#define CTRL_CLK_VIN_VFE_IDX_INVALID (0xFFU) + +/*! + * Various Vin device table versions that are supported + */ +#define NV2080_CTRL_CLK_VIN_DEVICES_DISABLED (0x00000000U) +#define NV2080_CTRL_CLK_VIN_DEVICES_V10 (0x00000001U) +#define NV2080_CTRL_CLK_VIN_DEVICES_V20 (0x00000002U) + +/*! + * Enumeration of CLK_DOMAIN types. + */ +#define CTRL_CLK_CLK_DOMAIN_TYPE_3X 0x01U +#define CTRL_CLK_CLK_DOMAIN_TYPE_3X_FIXED 0x02U +#define CTRL_CLK_CLK_DOMAIN_TYPE_3X_PROG 0x03U +#define CTRL_CLK_CLK_DOMAIN_TYPE_3X_MASTER 0x04U +#define CTRL_CLK_CLK_DOMAIN_TYPE_3X_SLAVE 0x05U +#define CTRL_CLK_CLK_DOMAIN_TYPE_30_PROG 0x06U +#define CTRL_CLK_CLK_DOMAIN_TYPE_35_MASTER 0x07U +#define CTRL_CLK_CLK_DOMAIN_TYPE_35_SLAVE 0x08U +#define CTRL_CLK_CLK_DOMAIN_TYPE_35_PROG 0x09U +#define CTRL_CLK_CLK_DOMAIN_3X_PROG_ORDERING_INDEX_INVALID 0xFFU +#define CTRL_CLK_CLK_DOMAIN_INDEX_INVALID 0xFFU + +/*! + * Enumeration of CLK_PROG types. + */ +#define CTRL_CLK_CLK_PROG_TYPE_3X 0x00U +#define CTRL_CLK_CLK_PROG_TYPE_1X 0x01U +#define CTRL_CLK_CLK_PROG_TYPE_1X_MASTER 0x02U +#define CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_RATIO 0x03U +#define CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_TABLE 0x04U +#define CTRL_CLK_CLK_PROG_TYPE_35 0x05U +#define CTRL_CLK_CLK_PROG_TYPE_35_MASTER 0x06U +#define CTRL_CLK_CLK_PROG_TYPE_35_MASTER_RATIO 0x07U +#define CTRL_CLK_CLK_PROG_TYPE_35_MASTER_TABLE 0x08U +#define CTRL_CLK_CLK_PROG_TYPE_UNKNOWN 0xFFU +#define CTRL_CLK_CLK_PROG_1X_MASTER_VF_ENTRY_MAX_ENTRIES 0x4U +#define CTRL_CLK_CLK_PROG_35_MASTER_SEC_VF_ENTRY_VOLTRAIL_MAX 0x1U +#define CTRL_CLK_PROG_1X_MASTER_MAX_SLAVE_ENTRIES 0x6U +/*! + * Enumeration of CLK_PROG source types. + */ +#define CTRL_CLK_PROG_1X_SOURCE_PLL 0x00U +#define CTRL_CLK_PROG_1X_SOURCE_ONE_SOURCE 0x01U +#define CTRL_CLK_PROG_1X_SOURCE_FLL 0x02U +#define CTRL_CLK_PROG_1X_SOURCE_INVALID 0xFFU + +#define CTRL_CLK_CLK_VF_POINT_TYPE_FREQ 0x01U +#define CTRL_CLK_CLK_VF_POINT_TYPE_VOLT 0x02U +#define CTRL_CLK_CLK_VF_POINT_TYPE_35 0x04U +#define CTRL_CLK_CLK_VF_POINT_TYPE_35_FREQ 0x05U +#define CTRL_CLK_CLK_VF_POINT_TYPE_35_VOLT 0x06U +#define CTRL_CLK_CLK_VF_POINT_TYPE_35_VOLT_PRI 0x07U +#define CTRL_CLK_CLK_VF_POINT_TYPE_35_VOLT_SEC 0x08U +#define CTRL_CLK_CLK_VF_POINT_IDX_INVALID 0xFFU +#define CTRL_CLK_CLK_VF_POINT_FREQ_TUPLE_MAX_SIZE 0x5U + +#define NV_PMU_RPC_ID_CLK_CNTR_SAMPLE_DOMAIN 0x01U +#define NV_PMU_RPC_ID_CLK_CLK_DOMAIN_35_PROG_VOLT_TO_FREQ 0x02U +#define NV_PMU_RPC_ID_CLK_CLK_DOMAIN_35_PROG_FREQ_TO_VOLT 0x03U +#define NV_PMU_RPC_ID_CLK_CLK_DOMAIN_35_PROG_FREQ_QUANTIZE 0x04U +#define NV_PMU_RPC_ID_CLK_CLK_DOMAIN_35_PROG_CLIENT_FREQ_DELTA_ADJ 0x05U +#define NV_PMU_RPC_ID_CLK_FREQ_EFFECTIVE_AVG 0x06U +#define NV_PMU_RPC_ID_CLK_LOAD 0x07U +#define NV_PMU_RPC_ID_CLK_VF_CHANGE_INJECT 0x08U +#define NV_PMU_RPC_ID_CLK_MCLK_SWITCH 0x09U +#define NV_PMU_RPC_ID_CLK__COUNT 0x0AU + +/*! + * Macros for the @ref feature parameter in the @ref NV_PMU_CLK_LOAD structure + */ +#define NV_NV_PMU_CLK_LOAD_FEATURE_INVALID (0x00000000U) +#define NV_NV_PMU_CLK_LOAD_FEATURE_FLL (0x00000001U) +#define NV_NV_PMU_CLK_LOAD_FEATURE_VIN (0x00000002U) +#define NV_NV_PMU_CLK_LOAD_FEATURE_FREQ_CONTROLLER (0x00000003U) +#define NV_NV_PMU_CLK_LOAD_FEATURE_FREQ_EFFECTIVE_AVG (0x00000004U) +#define NV_NV_PMU_CLK_LOAD_FEATURE_CLK_DOMAIN (0x00000005U) +#define NV_NV_PMU_CLK_LOAD_FEATURE_CLK_CONTROLLER (0x00000006U) + +/* CLK CMD ID definitions.*/ +#define NV_PMU_CLK_CMD_ID_BOARDOBJ_GRP_SET (0x00000001U) +#define NV_PMU_CLK_CMD_ID_BOARDOBJ_GRP_GET_STATUS (0x00000002U) +/* CLK MSG ID definitions */ +#define NV_PMU_CLK_MSG_ID_BOARDOBJ_GRP_SET (0x00000001U) +#define NV_PMU_CLK_MSG_ID_BOARDOBJ_GRP_GET_STATUS (0x00000002U) +#define NV_NV_PMU_CLK_LOAD_ACTION_MASK_VIN_HW_CAL_PROGRAM_YES (0x00000001U) + +#define CTRL_CLK_CLK_DELTA_MAX_VOLT_RAILS 4U + +union ctrl_clk_freq_delta_data { + s32 delta_khz; + s16 delta_percent; +}; + +struct ctrl_clk_freq_delta { + u8 type; + union ctrl_clk_freq_delta_data data; +}; + +struct ctrl_clk_clk_delta { + struct ctrl_clk_freq_delta freq_delta; + int volt_deltauv[CTRL_CLK_CLK_DELTA_MAX_VOLT_RAILS]; +}; + +struct ctrl_clk_domain_control_35_prog_clk_mon { + u32 flags; + u32 low_threshold_override; + u32 high_threshold_override; +}; + +struct ctrl_clk_domain_info_35_prog_clk_mon { + u8 low_threshold_vfe_idx; + u8 high_threshold_vfe_idx; +}; + +struct ctrl_clk_clk_prog_1x_master_source_fll { + u32 base_vfsmooth_volt_uv; + u32 max_vf_ramprate; + u32 max_freq_stepsize_mhz; +}; + +union ctrl_clk_clk_prog_1x_master_source_data { + struct ctrl_clk_clk_prog_1x_master_source_fll fll; +}; + +struct ctrl_clk_clk_vf_point_info_freq { + u16 freq_mhz; +}; + +struct ctrl_clk_clk_vf_point_info_volt { + u32 sourceVoltageuV; + u8 vfGainVfeEquIdx; + u8 clkDomainIdx; +}; + +struct ctrl_clk_clk_prog_1x_master_vf_entry { + u8 vfe_idx; + u8 gain_vfe_idx; + u8 vf_point_idx_first; + u8 vf_point_idx_last; +}; + +struct ctrl_clk_clk_prog_35_master_sec_vf_entry { + u8 vfe_idx; + u8 dvco_offset_vfe_idx; + u8 vf_point_idx_first; + u8 vf_point_idx_last; +}; + +struct ctrl_clk_clk_prog_35_master_sec_vf_entry_voltrail { + struct ctrl_clk_clk_prog_35_master_sec_vf_entry sec_vf_entries[ + CTRL_CLK_CLK_PROG_35_MASTER_SEC_VF_ENTRY_VOLTRAIL_MAX]; +}; + +struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry { + u8 clk_dom_idx; + u8 ratio; +}; + +struct ctrl_clk_clk_prog_1x_master_table_slave_entry { + u8 clk_dom_idx; + u16 freq_mhz; +}; + +struct ctrl_clk_clk_prog_1x_source_pll { + u8 pll_idx; + u8 freq_step_size_mhz; +}; + +struct ctrl_clk_vin_v10 { + u32 slope; + u32 intercept; +}; + +struct ctrl_clk_vin_v20 { + s8 offset; + s8 gain; + u8 coarse_control; + u8 offset_vfe_idx; +}; + +union ctrl_clk_vin_data_v20 { + struct ctrl_clk_vin_v10 cal_v10; + struct ctrl_clk_vin_v20 cal_v20; +}; + +struct ctrl_clk_vin_device_info_data_v10 { + struct ctrl_clk_vin_v10 vin_cal; +}; + +struct ctrl_clk_vin_device_info_data_v20 { + u8 cal_type; + union ctrl_clk_vin_data_v20 vin_cal; +}; + +union ctrl_clk_clk_prog_1x_source_data { + struct ctrl_clk_clk_prog_1x_source_pll source_pll; +}; + +struct ctrl_clk_vf_point_freq_tuple { + u16 freqMHz; +}; + +struct ctrl_clk_vf_point_base_vf_tuple { + struct ctrl_clk_vf_point_freq_tuple + freqTuple[CTRL_CLK_CLK_VF_POINT_FREQ_TUPLE_MAX_SIZE]; + u32 voltageuV; +}; + +struct ctrl_clk_vf_point_base_vf_tuple_sec { + struct ctrl_clk_vf_point_base_vf_tuple super; + u8 dvco_offset_code; +}; + +struct ctrl_clk_vf_point_vf_tuple { + u16 freqMHz; + u32 voltageuV; +}; + +struct ctrl_clk_vf_input { + u8 flags; + u32 value; +}; + +struct ctrl_clk_vf_output { + u32 input_best_match; + u32 value; +}; + +struct nv_pmu_rpc_clk_domain_35_prog_freq_to_volt { + /* + * [IN/OUT] Must be first field in RPC structure + */ + struct nv_pmu_rpc_header hdr; + u8 clk_domain_idx; + u8 volt_rail_idx; + u8 voltage_type; + struct ctrl_clk_vf_input input; + struct ctrl_clk_vf_output output; + u32 scratch[1]; +}; + +/* + * CLK_DOMAIN BOARDOBJGRP Header structure. Describes global state about the + * CLK_DOMAIN feature. + */ +struct nv_pmu_clk_clk_domain_boardobjgrp_set_header { + struct nv_pmu_boardobjgrp_e32 super; + u32 vbios_domains; + struct ctrl_boardobjgrp_mask_e32 prog_domains_mask; + struct ctrl_boardobjgrp_mask_e32 master_domains_mask; + struct ctrl_boardobjgrp_mask_e32 clkmon_domains_mask; + u16 cntr_sampling_periodms; + u16 clkmon_refwin_usec; + u8 version; + bool b_override_o_v_o_c; + bool b_debug_mode; + bool b_enforce_vf_monotonicity; + bool b_enforce_vf_smoothening; + u8 volt_rails_max; + struct ctrl_clk_clk_delta deltas; +}; + +struct nv_pmu_clk_clk_domain_boardobj_set { + struct nv_pmu_boardobj super; + u32 domain; + u32 api_domain; + u8 perf_domain_grp_idx; +}; + +struct nv_pmu_clk_clk_domain_3x_boardobj_set { + struct nv_pmu_clk_clk_domain_boardobj_set super; + bool b_noise_aware_capable; +}; + +struct nv_pmu_clk_clk_domain_3x_fixed_boardobj_set { + struct nv_pmu_clk_clk_domain_3x_boardobj_set super; + u16 freq_mhz; +}; + +struct nv_pmu_clk_clk_domain_3x_prog_boardobj_set { + struct nv_pmu_clk_clk_domain_3x_boardobj_set super; + u8 clk_prog_idx_first; + u8 clk_prog_idx_last; + bool b_force_noise_unaware_ordering; + struct ctrl_clk_freq_delta factory_delta; + short freq_delta_min_mhz; + short freq_delta_max_mhz; + struct ctrl_clk_clk_delta deltas; +}; + +struct nv_pmu_clk_clk_domain_30_prog_boardobj_set { + struct nv_pmu_clk_clk_domain_3x_prog_boardobj_set super; + u8 noise_unaware_ordering_index; + u8 noise_aware_ordering_index; +}; + +struct nv_pmu_clk_clk_domain_3x_master_boardobj_set { + u8 rsvd; /* Stubbing for RM_PMU_BOARDOBJ_INTERFACE */ + u32 slave_idxs_mask; +}; + +struct nv_pmu_clk_clk_domain_30_master_boardobj_set { + struct nv_pmu_clk_clk_domain_30_prog_boardobj_set super; + struct nv_pmu_clk_clk_domain_3x_master_boardobj_set master; +}; + +struct nv_pmu_clk_clk_domain_3x_slave_boardobj_set { + u8 rsvd; /* Stubbing for RM_PMU_BOARDOBJ_INTERFACE */ + u8 master_idx; +}; + +struct nv_pmu_clk_clk_domain_30_slave_boardobj_set { + struct nv_pmu_clk_clk_domain_30_prog_boardobj_set super; + struct nv_pmu_clk_clk_domain_3x_slave_boardobj_set slave; +}; + +struct nv_pmu_clk_clk_domain_35_prog_boardobj_set { + struct nv_pmu_clk_clk_domain_3x_prog_boardobj_set super; + u8 pre_volt_ordering_index; + u8 post_volt_ordering_index; + u8 clk_pos; + u8 clk_vf_curve_count; + struct ctrl_clk_domain_info_35_prog_clk_mon clkmon_info; + struct ctrl_clk_domain_control_35_prog_clk_mon clkmon_ctrl; + u32 por_volt_delta_uv[CTRL_VOLT_VOLT_RAIL_CLIENT_MAX_RAILS]; +}; + +struct nv_pmu_clk_clk_domain_35_master_boardobj_set { + struct nv_pmu_clk_clk_domain_35_prog_boardobj_set super; + struct nv_pmu_clk_clk_domain_3x_master_boardobj_set master; + struct ctrl_boardobjgrp_mask_e32 master_slave_domains_grp_mask; +}; + + +struct nv_pmu_clk_clk_domain_35_slave_boardobj_set { + struct nv_pmu_clk_clk_domain_35_prog_boardobj_set super; + struct nv_pmu_clk_clk_domain_3x_slave_boardobj_set slave; +}; + +union nv_pmu_clk_clk_domain_boardobj_set_union { + struct nv_pmu_boardobj obj; + struct nv_pmu_clk_clk_domain_boardobj_set super; + struct nv_pmu_clk_clk_domain_3x_boardobj_set v3x; + struct nv_pmu_clk_clk_domain_3x_fixed_boardobj_set v3x_fixed; + struct nv_pmu_clk_clk_domain_3x_prog_boardobj_set v3x_prog; + struct nv_pmu_clk_clk_domain_30_prog_boardobj_set v30_prog; + struct nv_pmu_clk_clk_domain_30_master_boardobj_set v30_master; + struct nv_pmu_clk_clk_domain_30_slave_boardobj_set v30_slave; + struct nv_pmu_clk_clk_domain_35_prog_boardobj_set v35_prog; + struct nv_pmu_clk_clk_domain_35_master_boardobj_set v35_master; + struct nv_pmu_clk_clk_domain_35_slave_boardobj_set v35_slave; +}; + +NV_PMU_BOARDOBJ_GRP_SET_MAKE_E32(clk, clk_domain); + +struct nv_pmu_clk_clk_prog_boardobjgrp_set_header { + struct nv_pmu_boardobjgrp_e255 super; + u8 slave_entry_count; + u8 vf_entry_count; + u8 vf_sec_entry_count; +}; + +struct nv_pmu_clk_clk_prog_boardobj_set { + struct nv_pmu_boardobj super; +}; + +struct nv_pmu_clk_clk_prog_1x_boardobj_set { + struct nv_pmu_clk_clk_prog_boardobj_set super; + u8 source; + u16 freq_max_mhz; + union ctrl_clk_clk_prog_1x_source_data source_data; +}; + +struct nv_pmu_clk_clk_prog_1x_master_boardobj_set { + struct nv_pmu_clk_clk_prog_1x_boardobj_set super; + u8 rsvd; /* Stubbing for RM_PMU_BOARDOBJ_INTERFACE */ + bool b_o_c_o_v_enabled; + struct ctrl_clk_clk_prog_1x_master_vf_entry vf_entries[ + CTRL_CLK_CLK_PROG_1X_MASTER_VF_ENTRY_MAX_ENTRIES]; + struct ctrl_clk_clk_delta deltas; + union ctrl_clk_clk_prog_1x_master_source_data source_data; +}; + +struct nv_pmu_clk_clk_prog_1x_master_ratio_boardobj_set { + struct nv_pmu_clk_clk_prog_1x_master_boardobj_set super; + u8 rsvd; /* Stubbing for RM_PMU_BOARDOBJ_INTERFACE */ + struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry slave_entries[ + CTRL_CLK_PROG_1X_MASTER_MAX_SLAVE_ENTRIES]; +}; + +struct nv_pmu_clk_clk_prog_1x_master_table_boardobj_set { + struct nv_pmu_clk_clk_prog_1x_master_boardobj_set super; + u8 rsvd; /* Stubbing for RM_PMU_BOARDOBJ_INTERFACE */ + struct ctrl_clk_clk_prog_1x_master_table_slave_entry + slave_entries[CTRL_CLK_PROG_1X_MASTER_MAX_SLAVE_ENTRIES]; +}; + +struct nv_pmu_clk_clk_prog_3x_master_boardobj_set { + u8 rsvd; /* Stubbing for RM_PMU_BOARDOBJ_INTERFACE */ + bool b_o_c_o_v_enabled; + struct ctrl_clk_clk_prog_1x_master_vf_entry vf_entries[ + CTRL_CLK_CLK_PROG_1X_MASTER_VF_ENTRY_MAX_ENTRIES]; + struct ctrl_clk_clk_delta deltas; + union ctrl_clk_clk_prog_1x_master_source_data source_data; +}; + +struct nv_pmu_clk_clk_prog_3x_master_ratio_boardobj_set { + u8 rsvd; /* Stubbing for RM_PMU_BOARDOBJ_INTERFACE */ + struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry slave_entries[ + CTRL_CLK_PROG_1X_MASTER_MAX_SLAVE_ENTRIES]; +}; + +struct nv_pmu_clk_clk_prog_3x_master_table_boardobj_set { + u8 rsvd; /* Stubbing for RM_PMU_BOARDOBJ_INTERFACE */ + struct ctrl_clk_clk_prog_1x_master_table_slave_entry slave_entries[ + CTRL_CLK_PROG_1X_MASTER_MAX_SLAVE_ENTRIES]; +}; + +struct nv_pmu_clk_clk_prog_35_master_boardobj_set { + struct nv_pmu_clk_clk_prog_1x_boardobj_set super; + struct nv_pmu_clk_clk_prog_3x_master_boardobj_set master; + struct ctrl_clk_clk_prog_35_master_sec_vf_entry_voltrail + voltrail_sec_vf_entries[ + CTRL_CLK_CLK_PROG_1X_MASTER_VF_ENTRY_MAX_ENTRIES]; +}; + +struct nv_pmu_clk_clk_prog_35_master_ratio_boardobj_set { + struct nv_pmu_clk_clk_prog_35_master_boardobj_set super; + struct nv_pmu_clk_clk_prog_3x_master_ratio_boardobj_set ratio; +}; + +struct nv_pmu_clk_clk_prog_35_master_table_boardobj_set { + struct nv_pmu_clk_clk_prog_35_master_boardobj_set super; + struct nv_pmu_clk_clk_prog_3x_master_table_boardobj_set table; +}; + +union nv_pmu_clk_clk_prog_boardobj_set_union { + struct nv_pmu_boardobj obj; + struct nv_pmu_clk_clk_prog_boardobj_set super; + struct nv_pmu_clk_clk_prog_1x_boardobj_set v1x; + struct nv_pmu_clk_clk_prog_1x_master_boardobj_set v1x_master; + struct nv_pmu_clk_clk_prog_1x_master_ratio_boardobj_set + v1x_master_ratio; + struct nv_pmu_clk_clk_prog_1x_master_table_boardobj_set + v1x_master_table; + struct nv_pmu_clk_clk_prog_35_master_boardobj_set v35_master; + struct nv_pmu_clk_clk_prog_35_master_ratio_boardobj_set + v35_master_ratio; + struct nv_pmu_clk_clk_prog_35_master_table_boardobj_set + v35_master_table; +}; + +NV_PMU_BOARDOBJ_GRP_SET_MAKE_E255(clk, clk_prog); + +struct nv_pmu_clk_clk_fll_device_boardobjgrp_set_header { + struct nv_pmu_boardobjgrp_e32 super; + struct ctrl_boardobjgrp_mask_e32 lut_prog_master_mask; + u32 lut_step_size_uv; + u32 lut_min_voltage_uv; + u8 lut_num_entries; + u16 max_min_freq_mhz; +}; + +struct nv_pmu_clk_lut_device_desc { + u8 vselect_mode; + u16 hysteresis_threshold; +}; + +struct nv_pmu_clk_regime_desc { + u8 regime_id; + u8 target_regime_id_override; + u16 fixed_freq_regime_limit_mhz; +}; + +struct nv_pmu_clk_clk_fll_device_boardobj_set { + struct nv_pmu_boardobj super; + u8 id; + u8 mdiv; + u8 vin_idx_logic; + u8 vin_idx_sram; + u8 rail_idx_for_lut; + u16 input_freq_mhz; + u32 clk_domain; + struct nv_pmu_clk_lut_device_desc lut_device; + struct nv_pmu_clk_regime_desc regime_desc; + u8 min_freq_vfe_idx; + u8 freq_ctrl_idx; + bool b_skip_pldiv_below_dvco_min; + bool b_dvco_1x; + struct ctrl_boardobjgrp_mask_e32 lut_prog_broadcast_slave_mask; +}; + +union nv_pmu_clk_clk_fll_device_boardobj_set_union { + struct nv_pmu_boardobj obj; + struct nv_pmu_clk_clk_fll_device_boardobj_set super; +}; + +NV_PMU_BOARDOBJ_GRP_SET_MAKE_E32(clk, clk_fll_device); + +struct nv_pmu_clk_clk_vin_device_boardobjgrp_set_header { + struct nv_pmu_boardobjgrp_e32 super; + u8 version; + bool b_vin_is_disable_allowed; + u8 reserved[13]; +}; + +struct nv_pmu_clk_clk_vin_device_boardobj_set { + struct nv_pmu_boardobj super; + u8 id; + u8 volt_rail_idx; + u8 por_override_mode; + u8 override_mode; + u32 flls_shared_mask; +}; + +struct nv_pmu_clk_clk_vin_device_v20_boardobj_set { + struct nv_pmu_clk_clk_vin_device_boardobj_set super; + struct ctrl_clk_vin_device_info_data_v20 data; +}; + +union nv_pmu_clk_clk_vin_device_boardobj_set_union { + struct nv_pmu_boardobj obj; + struct nv_pmu_clk_clk_vin_device_boardobj_set super; + struct nv_pmu_clk_clk_vin_device_v20_boardobj_set v20; +}; + +NV_PMU_BOARDOBJ_GRP_SET_MAKE_E32(clk, clk_vin_device); + +struct nv_pmu_clk_clk_vf_point_boardobjgrp_set_header { + struct nv_pmu_boardobjgrp_e255 super; +}; + +struct nv_pmu_clk_clk_vf_point_sec_boardobjgrp_set_header { + struct nv_pmu_boardobjgrp_e255 super; +}; +struct nv_pmu_clk_clk_vf_point_boardobj_set { + struct nv_pmu_boardobj super; + u8 vfe_equ_idx; + u8 volt_rail_idx; +}; + +struct nv_pmu_clk_clk_vf_point_freq_boardobj_set { + struct nv_pmu_clk_clk_vf_point_boardobj_set super; + u16 freq_mhz; + int volt_delta_uv; +}; + +struct nv_pmu_clk_clk_vf_point_volt_boardobj_set { + struct nv_pmu_clk_clk_vf_point_boardobj_set super; + u32 source_voltage_uv; + struct ctrl_clk_freq_delta freq_delta; +}; + +struct nv_pmu_clk_clk_vf_point_volt_35_sec_boardobj_set { + struct nv_pmu_clk_clk_vf_point_volt_boardobj_set super; + u8 dvco_offset_code_override; +}; + +union nv_pmu_clk_clk_vf_point_boardobj_set_union { + struct nv_pmu_boardobj obj; + struct nv_pmu_clk_clk_vf_point_boardobj_set super; + struct nv_pmu_clk_clk_vf_point_freq_boardobj_set freq; + struct nv_pmu_clk_clk_vf_point_volt_boardobj_set volt; +}; + +union nv_pmu_clk_clk_vf_point_sec_boardobj_set_union { + struct nv_pmu_boardobj obj; + struct nv_pmu_clk_clk_vf_point_boardobj_set super; + struct nv_pmu_clk_clk_vf_point_freq_boardobj_set freq; + struct nv_pmu_clk_clk_vf_point_volt_boardobj_set volt; + struct nv_pmu_clk_clk_vf_point_volt_35_sec_boardobj_set v35_volt_sec; +}; + +NV_PMU_BOARDOBJ_GRP_SET_MAKE_E255(clk, clk_vf_point); +NV_PMU_BOARDOBJ_GRP_SET_MAKE_E255(clk, clk_vf_point_sec); + +struct nv_pmu_clk_clk_vf_point_boardobjgrp_get_status_header { + struct nv_pmu_boardobjgrp_e255 super; + u32 vf_points_cahce_counter; +}; + +struct nv_pmu_clk_clk_vf_point_35_freq_boardobj_get_status { + struct nv_pmu_boardobj super; + struct ctrl_clk_vf_point_base_vf_tuple base_vf_tuple; + struct ctrl_clk_vf_point_vf_tuple + offseted_vf_tuple[CTRL_CLK_CLK_VF_POINT_FREQ_TUPLE_MAX_SIZE]; +}; + +struct nv_pmu_clk_clk_vf_point_35_volt_pri_boardobj_get_status { + struct nv_pmu_boardobj super; + struct ctrl_clk_vf_point_base_vf_tuple base_vf_tuple; + struct ctrl_clk_vf_point_vf_tuple + offseted_vf_tuple[CTRL_CLK_CLK_VF_POINT_FREQ_TUPLE_MAX_SIZE]; +}; + +struct nv_pmu_clk_clk_vf_point_35_volt_sec_boardobj_get_status { + struct nv_pmu_boardobj super; + struct ctrl_clk_vf_point_base_vf_tuple_sec base_vf_tuple; + struct ctrl_clk_vf_point_vf_tuple + offseted_vf_tuple[CTRL_CLK_CLK_VF_POINT_FREQ_TUPLE_MAX_SIZE]; +}; + +struct ctrl_clk_vf_pair { + u16 freq_mhz; + u32 voltage_uv; +}; + +struct nv_pmu_clk_clk_vf_point_boardobj_get_status { + struct nv_pmu_boardobj super; + struct ctrl_clk_vf_pair pair; + u8 dummy[38]; +}; + +struct nv_pmu_clk_clk_vf_point_volt_boardobj_get_status { + struct nv_pmu_clk_clk_vf_point_boardobj_get_status super; + u16 vf_gain_value; +}; + +union nv_pmu_clk_clk_vf_point_boardobj_get_status_union { + struct nv_pmu_boardobj obj; + struct nv_pmu_clk_clk_vf_point_boardobj_get_status super; + struct nv_pmu_clk_clk_vf_point_volt_boardobj_get_status volt; + struct nv_pmu_clk_clk_vf_point_35_freq_boardobj_get_status v35_freq; + struct nv_pmu_clk_clk_vf_point_35_volt_pri_boardobj_get_status + v35_volt_pri; + struct nv_pmu_clk_clk_vf_point_35_volt_sec_boardobj_get_status + v35_volt_sec; +}; + +NV_PMU_BOARDOBJ_GRP_GET_STATUS_MAKE_E255(clk, clk_vf_point); + +struct nv_pmu_clk_load { + u8 feature; + u32 action_mask; +}; + +struct nv_pmu_clk_freq_effective_avg { + u32 clkDomainMask; + u32 freqkHz[CTRL_BOARDOBJ_MAX_BOARD_OBJECTS]; +}; + +struct nv_pmu_rpc_struct_clk_load { + struct nv_pmu_rpc_header hdr; + struct nv_pmu_clk_load clk_load; + u32 scratch[1]; +}; + +struct nv_pmu_clk_cmd_rpc { + u8 cmd_type; + u8 pad[3]; + struct nv_pmu_allocation request; +}; + +struct nv_pmu_clk_cmd_generic { + u8 cmd_type; + bool b_perf_daemon_cmd; + u8 pad[2]; +}; + +struct nv_pmu_clk_cmd { + union { + u8 cmd_type; + struct nv_pmu_boardobj_cmd_grp grp_set; + struct nv_pmu_clk_cmd_generic generic; + struct nv_pmu_clk_cmd_rpc rpc; + struct nv_pmu_boardobj_cmd_grp grp_get_status; + }; +}; + +struct nv_pmu_clk_msg_rpc { + u8 msg_type; + u8 rsvd[3]; + struct nv_pmu_allocation response; +}; + +struct nv_pmu_clk_msg { + union { + u8 msg_type; + struct nv_pmu_boardobj_msg_grp grp_set; + struct nv_pmu_clk_msg_rpc rpc; + struct nv_pmu_boardobj_msg_grp grp_get_status; + }; +}; + +struct nv_pmu_clk_clk_vin_device_boardobjgrp_get_status_header { + struct nv_pmu_boardobjgrp_e32 super; +}; + +struct nv_pmu_clk_clk_vin_device_boardobj_get_status { + struct nv_pmu_boardobj_query super; + u32 actual_voltage_uv; + u32 corrected_voltage_uv; + u8 sampled_code; + u8 override_code; +}; + +union nv_pmu_clk_clk_vin_device_boardobj_get_status_union { + struct nv_pmu_boardobj_query obj; + struct nv_pmu_clk_clk_vin_device_boardobj_get_status super; +}; + +NV_PMU_BOARDOBJ_GRP_GET_STATUS_MAKE_E32(clk, clk_vin_device); + +struct nv_pmu_clk_lut_vf_entry { + u32 entry; +}; + +struct nv_pmu_clk_clk_fll_device_boardobjgrp_get_status_header { + struct nv_pmu_boardobjgrp_e32 super; +}; + +struct nv_pmu_clk_clk_fll_device_boardobj_get_status { + struct nv_pmu_boardobj_query super; + u8 current_regime_id; + bool b_dvco_min_reached; + u16 min_freq_mhz; + struct nv_pmu_clk_lut_vf_entry + lut_vf_curve[NV_UNSIGNED_ROUNDED_DIV( + CTRL_CLK_LUT_NUM_ENTRIES_MAX, 2)]; +}; + +union nv_pmu_clk_clk_fll_device_boardobj_get_status_union { + struct nv_pmu_boardobj_query obj; + struct nv_pmu_clk_clk_fll_device_boardobj_get_status super; +}; + +NV_PMU_BOARDOBJ_GRP_GET_STATUS_MAKE_E32(clk, clk_fll_device); + +#endif /* NVGPU_PMUIF_CLK_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/fw/fw.c b/drivers/gpu/nvgpu/common/pmu/fw/fw.c new file mode 100644 index 000000000..caeb5e2b6 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/fw/fw.c @@ -0,0 +1,339 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* PMU UCODE IMG */ +#define NVGPU_PMU_UCODE_IMAGE "gpmu_ucode_image.bin" +#define NVGPU_PMU_UCODE_DESC "gpmu_ucode_desc.bin" +#define NVGPU_PMU_UCODE_SIG "pmu_sig.bin" +#define NVGPU_PMU_UCODE_NEXT_IMAGE "gpmu_ucode_next_image.bin" +#define NVGPU_PMU_UCODE_NEXT_DESC "gpmu_ucode_next_desc.bin" + +void nvgpu_pmu_fw_get_cmd_line_args_offset(struct gk20a *g, + u32 *args_offset) +{ + struct nvgpu_pmu *pmu = g->pmu; + u32 dmem_size = 0; + int err = 0; + + err = nvgpu_falcon_get_mem_size(pmu->flcn, MEM_DMEM, &dmem_size); + if (err != 0) { + nvgpu_err(g, "dmem size request failed"); + *args_offset = 0; + return; + } + + *args_offset = dmem_size - pmu->fw->ops.get_cmd_line_args_size(pmu); +} + +void nvgpu_pmu_fw_state_change(struct gk20a *g, struct nvgpu_pmu *pmu, + u32 pmu_state, bool post_change_event) +{ + nvgpu_pmu_dbg(g, "pmu_state - %d", pmu_state); + + nvgpu_smp_wmb(); + pmu->fw->state = pmu_state; + + /* Set a sticky flag to indicate PMU state exit */ + if (pmu_state == PMU_FW_STATE_EXIT) { + pmu->pg->pg_init.state_destroy = true; + } + if (post_change_event) { + if (g->can_elpg) { + pmu->pg->pg_init.state_change = true; + nvgpu_cond_signal_interruptible(&pmu->pg->pg_init.wq); + } + } +} + +u32 nvgpu_pmu_get_fw_state(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + u32 state = pmu->fw->state; + nvgpu_smp_rmb(); + + return state; +} + +void nvgpu_pmu_set_fw_ready(struct gk20a *g, struct nvgpu_pmu *pmu, + bool status) +{ + nvgpu_smp_wmb(); + pmu->fw->ready = status; +} + +bool nvgpu_pmu_get_fw_ready(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + bool state = pmu->fw->ready; + nvgpu_smp_rmb(); + + return state; +} + +int nvgpu_pmu_wait_fw_ack_status(struct gk20a *g, struct nvgpu_pmu *pmu, + u32 timeout_ms, void *var, u8 val) +{ + struct nvgpu_timeout timeout; + int err; + unsigned int delay = POLL_DELAY_MIN_US; + + err = nvgpu_timeout_init(g, &timeout, timeout_ms, + NVGPU_TIMER_CPU_TIMER); + if (err != 0) { + nvgpu_err(g, "PMU wait timeout init failed."); + return err; + } + + do { + nvgpu_rmb(); + + if (nvgpu_can_busy(g) == 0) { + /* + * Since the system is shutting down so we don't + * wait for the ACK from PMU. + * Set ACK received so that state machine is maintained + * properly and falcon stats are not dumped due to + * PMU command failure + */ + + *(volatile u8 *)var = val; + return 0; + } + + if (g->ops.pmu.pmu_is_interrupted(pmu)) { + g->ops.pmu.pmu_isr(g); + } + + nvgpu_usleep_range(delay, delay * 2U); + delay = min_t(u32, delay << 1, POLL_DELAY_MAX_US); + + /* Confirm ACK from PMU before timeout check */ + if (*(volatile u8 *)var == val) { + return 0; + } + + } while (nvgpu_timeout_expired(&timeout) == 0); + + return -ETIMEDOUT; +} + +int nvgpu_pmu_wait_fw_ready(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + int status = 0; + + status = nvgpu_pmu_wait_fw_ack_status(g, pmu, + nvgpu_get_poll_timeout(g), + &pmu->fw->ready, (u8)true); + if (status != 0) { + nvgpu_err(g, "PMU is not ready yet"); + } + + return status; +} + +static void pmu_fw_release(struct gk20a *g, struct pmu_rtos_fw *rtos_fw) +{ + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = mm->pmu.vm; + + nvgpu_log_fn(g, " "); + + if (rtos_fw->fw_sig != NULL) { + nvgpu_release_firmware(g, rtos_fw->fw_sig); + } + + if (rtos_fw->fw_desc != NULL) { + nvgpu_release_firmware(g, rtos_fw->fw_desc); + } + + if (rtos_fw->fw_image != NULL) { + nvgpu_release_firmware(g, rtos_fw->fw_image); + } + + if (nvgpu_mem_is_valid(&rtos_fw->ucode)) { + nvgpu_dma_unmap_free(vm, &rtos_fw->ucode); + } + + if (nvgpu_mem_is_valid(&rtos_fw->ucode_boot_args)) { + nvgpu_dma_unmap_free(vm, &rtos_fw->ucode_boot_args); + } + + if (nvgpu_mem_is_valid(&rtos_fw->ucode_core_dump)) { + nvgpu_dma_unmap_free(vm, &rtos_fw->ucode_core_dump); + } +} + +struct nvgpu_firmware *nvgpu_pmu_fw_sig_desc(struct gk20a *g, + struct nvgpu_pmu *pmu) +{ + return pmu->fw->fw_sig; +} + +struct nvgpu_firmware *nvgpu_pmu_fw_desc_desc(struct gk20a *g, + struct nvgpu_pmu *pmu) +{ + return pmu->fw->fw_desc; +} + +struct nvgpu_firmware *nvgpu_pmu_fw_image_desc(struct gk20a *g, + struct nvgpu_pmu *pmu) +{ + return pmu->fw->fw_image; +} + +static int pmu_fw_read(struct gk20a *g, const char *ucode, + const char *desc, const char *sig) +{ + struct pmu_rtos_fw *rtos_fw = g->pmu->fw; + int err = 0; + + nvgpu_log_fn(g, " "); + + /* secure boot ucodes's */ + nvgpu_pmu_dbg(g, "requesting PMU ucode image"); + rtos_fw->fw_image = nvgpu_request_firmware(g, ucode, 0); + if (rtos_fw->fw_image == NULL) { + nvgpu_err(g, "failed to load pmu ucode!!"); + err = -ENOENT; + goto exit; + } + + nvgpu_pmu_dbg(g, "requesting PMU ucode desc"); + rtos_fw->fw_desc = nvgpu_request_firmware(g, desc, 0); + if (rtos_fw->fw_desc == NULL) { + nvgpu_err(g, "failed to load pmu ucode desc!!"); + err = -ENOENT; + goto exit; + } + + if (nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { + nvgpu_pmu_dbg(g, "requesting PMU ucode sign"); + rtos_fw->fw_sig = + nvgpu_request_firmware(g, sig, 0); + if (rtos_fw->fw_sig == NULL) { + nvgpu_err(g, "failed to load pmu sig!!"); + err = -ENOENT; + goto exit; + } + } + +exit: + if (err) { + pmu_fw_release(g, rtos_fw); + } + + return err; +} + +static int pmu_fw_init_ops(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + struct pmu_rtos_fw *rtos_fw = g->pmu->fw; + struct falcon_next_core_ucode_desc *ncore_desc; + struct pmu_ucode_desc *desc; + u32 app_version = 0; + int err; + + + if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) { + ncore_desc = (struct falcon_next_core_ucode_desc *)(void *) + rtos_fw->fw_desc->data; + app_version = ncore_desc->version; + } else { + desc = (struct pmu_ucode_desc *)(void *)rtos_fw->fw_desc->data; + app_version = desc->app_version; + } + + err = nvgpu_pmu_init_fw_ver_ops(g, pmu, app_version); + if (err != 0) { + nvgpu_err(g, "failed to set function pointers"); + } + + return err; +} + +int nvgpu_pmu_init_pmu_fw(struct gk20a *g, struct nvgpu_pmu *pmu, + struct pmu_rtos_fw **rtos_fw_p) +{ + struct pmu_rtos_fw *rtos_fw = NULL; + int err; + + if (*rtos_fw_p != NULL) { + /* skip alloc/reinit for unrailgate sequence */ + nvgpu_pmu_dbg(g, "skip fw init for unrailgate sequence"); + return 0; + } + + rtos_fw = (struct pmu_rtos_fw *) + nvgpu_kzalloc(g, sizeof(struct pmu_rtos_fw)); + if (rtos_fw == NULL) { + err = -ENOMEM; + goto exit; + } + + *rtos_fw_p = rtos_fw; + + if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) { + err = pmu_fw_read(g, NVGPU_PMU_UCODE_NEXT_IMAGE, + NVGPU_PMU_UCODE_NEXT_DESC, NVGPU_PMU_UCODE_SIG); + + } else { + err = pmu_fw_read(g, NVGPU_PMU_UCODE_IMAGE, + NVGPU_PMU_UCODE_DESC, NVGPU_PMU_UCODE_SIG); + } + + if (err) { + goto exit; + } + + err = pmu_fw_init_ops(g, pmu); + +exit: + if (err) { + pmu_fw_release(g, rtos_fw); + } + return err; +} + +void nvgpu_pmu_fw_deinit(struct gk20a *g, struct nvgpu_pmu *pmu, + struct pmu_rtos_fw *rtos_fw) +{ + nvgpu_log_fn(g, " "); + + if (rtos_fw == NULL) { + return; + } + + pmu_fw_release(g, rtos_fw); + + nvgpu_kfree(g, rtos_fw); +} diff --git a/drivers/gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c b/drivers/gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c new file mode 100644 index 000000000..4946eae55 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +static void pmu_free_ns_ucode_blob(struct gk20a *g) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = mm->pmu.vm; + struct pmu_rtos_fw *rtos_fw = pmu->fw; + + nvgpu_log_fn(g, " "); + + if (nvgpu_mem_is_valid(&rtos_fw->ucode)) { + nvgpu_dma_unmap_free(vm, &rtos_fw->ucode); + } +} + +int nvgpu_pmu_ns_fw_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + int err; + u32 args_offset = 0; + + /* prepare blob for non-secure PMU boot */ + err = pmu->fw->ops.prepare_ns_ucode_blob(g); + if (err != 0) { + nvgpu_err(g, "non secure ucode blop consrtuct failed"); + return err; + } + + /* Do non-secure PMU boot */ + err = nvgpu_falcon_reset(pmu->flcn); + if (err != 0) { + nvgpu_err(g, "falcon reset failed"); + /* free the ns ucode blob */ + pmu_free_ns_ucode_blob(g); + return err; + } + + nvgpu_pmu_enable_irq(g, true); + + nvgpu_mutex_acquire(&pmu->isr_mutex); + pmu->isr_enabled = true; + nvgpu_mutex_release(&pmu->isr_mutex); + + g->ops.pmu.setup_apertures(g); + +#if defined(CONFIG_NVGPU_NEXT) + if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) { + nvgpu_pmu_next_core_rtos_args_setup(g, pmu); + } else +#endif + { + nvgpu_pmu_rtos_cmdline_args_init(g, pmu); + nvgpu_pmu_fw_get_cmd_line_args_offset(g, &args_offset); + + err = nvgpu_falcon_copy_to_dmem(pmu->flcn, args_offset, + (u8 *)(pmu->fw->ops.get_cmd_line_args_ptr(pmu)), + pmu->fw->ops.get_cmd_line_args_size(pmu), 0); + if (err != 0) { + nvgpu_err(g, "NS PMU ucode setup failed"); + return err; + } + } + + return g->ops.pmu.pmu_ns_bootstrap(g, pmu, args_offset); +} diff --git a/drivers/gpu/nvgpu/common/pmu/fw/fw_ver_ops.c b/drivers/gpu/nvgpu/common/pmu/fw/fw_ver_ops.c new file mode 100644 index 000000000..80bb227bc --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/fw/fw_ver_ops.c @@ -0,0 +1,1571 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* PMU F/W version */ +#define APP_VERSION_NVGPU_NEXT_CORE 3U +#define APP_VERSION_NVGPU_NEXT 29323513U +#define APP_VERSION_TU10X 28084434U +#define APP_VERSION_GV11B 25005711U +#define APP_VERSION_GV10X 25633490U +#define APP_VERSION_GP10X 24076634U +#define APP_VERSION_GP10B 29888552U +#define APP_VERSION_GM20B 20490253U + +/* PMU version specific functions */ +static u32 pmu_perfmon_cntr_sz_v2(struct nvgpu_pmu *pmu) +{ + return (u32)sizeof(struct pmu_perfmon_counter_v2); +} + +static void *pmu_get_perfmon_cntr_ptr_v2(struct nvgpu_pmu *pmu) +{ + return (void *)(&pmu->pmu_perfmon->perfmon_counter_v2); +} + +static void pmu_set_perfmon_cntr_ut_v2(struct nvgpu_pmu *pmu, u16 ut) +{ + pmu->pmu_perfmon->perfmon_counter_v2.upper_threshold = ut; +} + +static void pmu_set_perfmon_cntr_lt_v2(struct nvgpu_pmu *pmu, u16 lt) +{ + pmu->pmu_perfmon->perfmon_counter_v2.lower_threshold = lt; +} + +static void pmu_set_perfmon_cntr_valid_v2(struct nvgpu_pmu *pmu, u8 valid) +{ + pmu->pmu_perfmon->perfmon_counter_v2.valid = valid; +} + +static void pmu_set_perfmon_cntr_index_v2(struct nvgpu_pmu *pmu, u8 index) +{ + pmu->pmu_perfmon->perfmon_counter_v2.index = index; +} + +static void pmu_set_perfmon_cntr_group_id_v2(struct nvgpu_pmu *pmu, u8 gid) +{ + pmu->pmu_perfmon->perfmon_counter_v2.group_id = gid; +} + +static void pmu_set_cmd_line_args_trace_dma_base_v4(struct nvgpu_pmu *pmu) +{ + pmu->fw->args_v4.dma_addr.dma_base = + ((u32)pmu->trace_buf.gpu_va)/0x100U; + pmu->fw->args_v4.dma_addr.dma_base1 = 0; + pmu->fw->args_v4.dma_addr.dma_offset = 0; +} + +static u32 pmu_cmd_line_size_v4(struct nvgpu_pmu *pmu) +{ + return (u32)sizeof(struct pmu_cmdline_args_v4); +} + +static void pmu_set_cmd_line_args_cpu_freq_v4(struct nvgpu_pmu *pmu, u32 freq) +{ + pmu->fw->args_v4.cpu_freq_hz = freq; +} +static void pmu_set_cmd_line_args_secure_mode_v4(struct nvgpu_pmu *pmu, u8 val) +{ + pmu->fw->args_v4.secure_mode = val; +} + +static void pmu_set_cmd_line_args_trace_size_v4( + struct nvgpu_pmu *pmu, u32 size) +{ + pmu->fw->args_v4.falc_trace_size = size; +} +static void pmu_set_cmd_line_args_trace_dma_idx_v4( + struct nvgpu_pmu *pmu, u32 idx) +{ + pmu->fw->args_v4.falc_trace_dma_idx = idx; +} + +static u32 pmu_cmd_line_size_v6(struct nvgpu_pmu *pmu) +{ + return (u32)sizeof(struct pmu_cmdline_args_v6); +} + +static u32 pmu_cmd_line_size_v7(struct nvgpu_pmu *pmu) +{ + return (u32)sizeof(struct pmu_cmdline_args_v7); +} + +static void pmu_set_cmd_line_args_cpu_freq_v5(struct nvgpu_pmu *pmu, u32 freq) +{ + pmu->fw->args_v5.cpu_freq_hz = 204000000; +} +static void pmu_set_cmd_line_args_secure_mode_v5(struct nvgpu_pmu *pmu, u8 val) +{ + pmu->fw->args_v5.secure_mode = val; +} + +static void pmu_set_cmd_line_args_trace_size_v5( + struct nvgpu_pmu *pmu, u32 size) +{ + /* set by surface describe */ +} + +static void pmu_set_cmd_line_args_trace_dma_base_v5(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = pmu->g; + + nvgpu_pmu_allocator_surface_describe(g, &pmu->trace_buf, + &pmu->fw->args_v5.trace_buf); +} + +static void config_cmd_line_args_super_surface_v6(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = pmu->g; + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_PMU_SUPER_SURFACE)) { + nvgpu_pmu_allocator_surface_describe(g, + nvgpu_pmu_super_surface_mem(g, pmu, pmu->super_surface), + &pmu->fw->args_v6.super_surface); + } +} + +static void config_cmd_line_args_super_surface_v7(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = pmu->g; + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_PMU_SUPER_SURFACE)) { + nvgpu_pmu_allocator_surface_describe(g, + nvgpu_pmu_super_surface_mem(g, pmu, pmu->super_surface), + &pmu->fw->args_v7.super_surface); + } +} + +static void pmu_set_cmd_line_args_trace_dma_idx_v5( + struct nvgpu_pmu *pmu, u32 idx) +{ + /* set by surface describe */ +} + +static u32 pmu_cmd_line_size_v3(struct nvgpu_pmu *pmu) +{ + return (u32)sizeof(struct pmu_cmdline_args_v3); +} + +static void pmu_set_cmd_line_args_cpu_freq_v3(struct nvgpu_pmu *pmu, u32 freq) +{ + pmu->fw->args_v3.cpu_freq_hz = freq; +} +static void pmu_set_cmd_line_args_secure_mode_v3(struct nvgpu_pmu *pmu, u8 val) +{ + pmu->fw->args_v3.secure_mode = val; +} + +static void pmu_set_cmd_line_args_trace_size_v3( + struct nvgpu_pmu *pmu, u32 size) +{ + pmu->fw->args_v3.falc_trace_size = size; +} + +static void pmu_set_cmd_line_args_trace_dma_base_v3(struct nvgpu_pmu *pmu) +{ + pmu->fw->args_v3.falc_trace_dma_base = + ((u32)pmu->trace_buf.gpu_va)/0x100U; +} + +static void pmu_set_cmd_line_args_trace_dma_idx_v3( + struct nvgpu_pmu *pmu, u32 idx) +{ + pmu->fw->args_v3.falc_trace_dma_idx = idx; +} + +static void *pmu_get_cmd_line_args_ptr_v4(struct nvgpu_pmu *pmu) +{ + return (void *)(&pmu->fw->args_v4); +} + +static void *pmu_get_cmd_line_args_ptr_v3(struct nvgpu_pmu *pmu) +{ + return (void *)(&pmu->fw->args_v3); +} + +static void *pmu_get_cmd_line_args_ptr_v5(struct nvgpu_pmu *pmu) +{ + return (void *)(&pmu->fw->args_v5); +} + +static u32 pmu_get_allocation_size_v3(struct nvgpu_pmu *pmu) +{ + return (u32)sizeof(struct pmu_allocation_v3); +} + +static u32 pmu_get_allocation_size_v2(struct nvgpu_pmu *pmu) +{ + return (u32)sizeof(struct pmu_allocation_v2); +} + +static u32 pmu_get_allocation_size_v1(struct nvgpu_pmu *pmu) +{ + return (u32)sizeof(struct pmu_allocation_v1); +} + +static void pmu_set_allocation_ptr_v3(struct nvgpu_pmu *pmu, + void **pmu_alloc_ptr, void *assign_ptr) +{ + struct pmu_allocation_v3 **pmu_a_ptr = + (struct pmu_allocation_v3 **)pmu_alloc_ptr; + + *pmu_a_ptr = (struct pmu_allocation_v3 *)assign_ptr; +} + +static void pmu_set_allocation_ptr_v2(struct nvgpu_pmu *pmu, + void **pmu_alloc_ptr, void *assign_ptr) +{ + struct pmu_allocation_v2 **pmu_a_ptr = + (struct pmu_allocation_v2 **)pmu_alloc_ptr; + + *pmu_a_ptr = (struct pmu_allocation_v2 *)assign_ptr; +} + +static void pmu_set_allocation_ptr_v1(struct nvgpu_pmu *pmu, + void **pmu_alloc_ptr, void *assign_ptr) +{ + struct pmu_allocation_v1 **pmu_a_ptr = + (struct pmu_allocation_v1 **)pmu_alloc_ptr; + + *pmu_a_ptr = (struct pmu_allocation_v1 *)assign_ptr; +} + +static void pmu_allocation_set_dmem_size_v3(struct nvgpu_pmu *pmu, + void *pmu_alloc_ptr, u16 size) +{ + struct pmu_allocation_v3 *pmu_a_ptr = + (struct pmu_allocation_v3 *)pmu_alloc_ptr; + + pmu_a_ptr->alloc.dmem.size = size; +} + +static void pmu_allocation_set_dmem_size_v2(struct nvgpu_pmu *pmu, + void *pmu_alloc_ptr, u16 size) +{ + struct pmu_allocation_v2 *pmu_a_ptr = + (struct pmu_allocation_v2 *)pmu_alloc_ptr; + + pmu_a_ptr->alloc.dmem.size = size; +} + +static void pmu_allocation_set_dmem_size_v1(struct nvgpu_pmu *pmu, + void *pmu_alloc_ptr, u16 size) +{ + struct pmu_allocation_v1 *pmu_a_ptr = + (struct pmu_allocation_v1 *)pmu_alloc_ptr; + + pmu_a_ptr->alloc.dmem.size = size; +} + +static u16 pmu_allocation_get_dmem_size_v3(struct nvgpu_pmu *pmu, + void *pmu_alloc_ptr) +{ + struct pmu_allocation_v3 *pmu_a_ptr = + (struct pmu_allocation_v3 *)pmu_alloc_ptr; + + return pmu_a_ptr->alloc.dmem.size; +} + +static u16 pmu_allocation_get_dmem_size_v2(struct nvgpu_pmu *pmu, + void *pmu_alloc_ptr) +{ + struct pmu_allocation_v2 *pmu_a_ptr = + (struct pmu_allocation_v2 *)pmu_alloc_ptr; + + return pmu_a_ptr->alloc.dmem.size; +} + +static u16 pmu_allocation_get_dmem_size_v1(struct nvgpu_pmu *pmu, + void *pmu_alloc_ptr) +{ + struct pmu_allocation_v1 *pmu_a_ptr = + (struct pmu_allocation_v1 *)pmu_alloc_ptr; + + return pmu_a_ptr->alloc.dmem.size; +} + +static u32 pmu_allocation_get_dmem_offset_v3(struct nvgpu_pmu *pmu, + void *pmu_alloc_ptr) +{ + struct pmu_allocation_v3 *pmu_a_ptr = + (struct pmu_allocation_v3 *)pmu_alloc_ptr; + + return pmu_a_ptr->alloc.dmem.offset; +} + +static u32 pmu_allocation_get_dmem_offset_v2(struct nvgpu_pmu *pmu, + void *pmu_alloc_ptr) +{ + struct pmu_allocation_v2 *pmu_a_ptr = + (struct pmu_allocation_v2 *)pmu_alloc_ptr; + + return pmu_a_ptr->alloc.dmem.offset; +} + +static u32 pmu_allocation_get_dmem_offset_v1(struct nvgpu_pmu *pmu, + void *pmu_alloc_ptr) +{ + struct pmu_allocation_v1 *pmu_a_ptr = + (struct pmu_allocation_v1 *)pmu_alloc_ptr; + + return pmu_a_ptr->alloc.dmem.offset; +} + +static u32 *pmu_allocation_get_dmem_offset_addr_v3(struct nvgpu_pmu *pmu, + void *pmu_alloc_ptr) +{ + struct pmu_allocation_v3 *pmu_a_ptr = + (struct pmu_allocation_v3 *)pmu_alloc_ptr; + + return &pmu_a_ptr->alloc.dmem.offset; +} + +static void *pmu_allocation_get_fb_addr_v3( + struct nvgpu_pmu *pmu, void *pmu_alloc_ptr) +{ + struct pmu_allocation_v3 *pmu_a_ptr = + (struct pmu_allocation_v3 *)pmu_alloc_ptr; + + return (void *)&pmu_a_ptr->alloc.fb; +} + +static u32 pmu_allocation_get_fb_size_v3( + struct nvgpu_pmu *pmu, void *pmu_alloc_ptr) +{ + struct pmu_allocation_v3 *pmu_a_ptr = + (struct pmu_allocation_v3 *)pmu_alloc_ptr; + + return (u32)sizeof(pmu_a_ptr->alloc.fb); +} + +static u32 *pmu_allocation_get_dmem_offset_addr_v2(struct nvgpu_pmu *pmu, + void *pmu_alloc_ptr) +{ + struct pmu_allocation_v2 *pmu_a_ptr = + (struct pmu_allocation_v2 *)pmu_alloc_ptr; + + return &pmu_a_ptr->alloc.dmem.offset; +} + +static u32 *pmu_allocation_get_dmem_offset_addr_v1(struct nvgpu_pmu *pmu, + void *pmu_alloc_ptr) +{ + struct pmu_allocation_v1 *pmu_a_ptr = + (struct pmu_allocation_v1 *)pmu_alloc_ptr; + + return &pmu_a_ptr->alloc.dmem.offset; +} + +static void pmu_allocation_set_dmem_offset_v3(struct nvgpu_pmu *pmu, + void *pmu_alloc_ptr, u32 offset) +{ + struct pmu_allocation_v3 *pmu_a_ptr = + (struct pmu_allocation_v3 *)pmu_alloc_ptr; + + pmu_a_ptr->alloc.dmem.offset = offset; +} + +static void pmu_allocation_set_dmem_offset_v2(struct nvgpu_pmu *pmu, + void *pmu_alloc_ptr, u32 offset) +{ + struct pmu_allocation_v2 *pmu_a_ptr = + (struct pmu_allocation_v2 *)pmu_alloc_ptr; + + pmu_a_ptr->alloc.dmem.offset = offset; +} + +static void pmu_allocation_set_dmem_offset_v1(struct nvgpu_pmu *pmu, + void *pmu_alloc_ptr, u32 offset) +{ + struct pmu_allocation_v1 *pmu_a_ptr = + (struct pmu_allocation_v1 *)pmu_alloc_ptr; + + pmu_a_ptr->alloc.dmem.offset = offset; +} + +static void *pmu_get_init_msg_ptr_v5(struct pmu_init_msg *init) +{ + return (void *)(&(init->pmu_init_v5)); +} + +static void *pmu_get_init_msg_ptr_v4(struct pmu_init_msg *init) +{ + return (void *)(&(init->pmu_init_v4)); +} + +static u16 pmu_get_init_msg_sw_mngd_area_off_v5( + union pmu_init_msg_pmu *init_msg) +{ + struct pmu_nvgpu_rpc_struct_cmdmgmt_init *init = + (struct pmu_nvgpu_rpc_struct_cmdmgmt_init *)(&init_msg->v5); + + return init->sw_managed_area_offset; +} + +static u16 pmu_get_init_msg_sw_mngd_area_off_v4( + union pmu_init_msg_pmu *init_msg) +{ + struct pmu_init_msg_pmu_v4 *init = + (struct pmu_init_msg_pmu_v4 *)(&init_msg->v4); + + return init->sw_managed_area_offset; +} + +static u16 pmu_get_init_msg_sw_mngd_area_size_v5( + union pmu_init_msg_pmu *init_msg) +{ + struct pmu_nvgpu_rpc_struct_cmdmgmt_init *init = + (struct pmu_nvgpu_rpc_struct_cmdmgmt_init *)(&init_msg->v5); + + return init->sw_managed_area_size; +} + +static u16 pmu_get_init_msg_sw_mngd_area_size_v4( + union pmu_init_msg_pmu *init_msg) +{ + struct pmu_init_msg_pmu_v4 *init = + (struct pmu_init_msg_pmu_v4 *)(&init_msg->v4); + + return init->sw_managed_area_size; +} + +static void *pmu_get_init_msg_ptr_v1(struct pmu_init_msg *init) +{ + return (void *)(&(init->pmu_init_v1)); +} + +static u16 pmu_get_init_msg_sw_mngd_area_off_v1( + union pmu_init_msg_pmu *init_msg) +{ + struct pmu_init_msg_pmu_v1 *init = + (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1); + + return init->sw_managed_area_offset; +} + +static u16 pmu_get_init_msg_sw_mngd_area_size_v1( + union pmu_init_msg_pmu *init_msg) +{ + struct pmu_init_msg_pmu_v1 *init = + (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1); + + return init->sw_managed_area_size; +} + +static u32 pmu_get_perfmon_cmd_start_size_v3(void) +{ + return (u32)sizeof(struct pmu_perfmon_cmd_start_v3); +} + +static u32 pmu_get_perfmon_cmd_start_size_v2(void) +{ + return (u32)sizeof(struct pmu_perfmon_cmd_start_v2); +} + +static u32 pmu_get_perfmon_cmd_start_size_v1(void) +{ + return (u32)sizeof(struct pmu_perfmon_cmd_start_v1); +} + +static int pmu_get_perfmon_cmd_start_offset_of_var_v3( + enum pmu_perfmon_cmd_start_fields field, u32 *offset) +{ + int status = 0; + + switch (field) { + case COUNTER_ALLOC: + *offset = (u32)offsetof(struct pmu_perfmon_cmd_start_v3, + counter_alloc); + break; + + default: + status = -EINVAL; + break; + } + + return status; +} + +static int pmu_get_perfmon_cmd_start_offset_of_var_v2( + enum pmu_perfmon_cmd_start_fields field, u32 *offset) +{ + int status = 0; + + switch (field) { + case COUNTER_ALLOC: + *offset = (u32)offsetof(struct pmu_perfmon_cmd_start_v2, + counter_alloc); + break; + + default: + status = -EINVAL; + break; + } + + return status; +} + +static int pmu_get_perfmon_cmd_start_offset_of_var_v1( + enum pmu_perfmon_cmd_start_fields field, u32 *offset) +{ + int status = 0; + + switch (field) { + case COUNTER_ALLOC: + *offset = (u32)offsetof(struct pmu_perfmon_cmd_start_v1, + counter_alloc); + break; + + default: + status = -EINVAL; + break; + } + + return status; +} + +static u32 pmu_get_perfmon_cmd_init_size_v3(void) +{ + return (u32)sizeof(struct pmu_perfmon_cmd_init_v3); +} + +static u32 pmu_get_perfmon_cmd_init_size_v2(void) +{ + return (u32)sizeof(struct pmu_perfmon_cmd_init_v2); +} + +static u32 pmu_get_perfmon_cmd_init_size_v1(void) +{ + return (u32)sizeof(struct pmu_perfmon_cmd_init_v1); +} + +static int pmu_get_perfmon_cmd_init_offset_of_var_v3( + enum pmu_perfmon_cmd_start_fields field, u32 *offset) +{ + int status = 0; + + switch (field) { + case COUNTER_ALLOC: + *offset = (u32)offsetof(struct pmu_perfmon_cmd_init_v3, + counter_alloc); + break; + + default: + status = -EINVAL; + break; + } + + return status; +} + +static int pmu_get_perfmon_cmd_init_offset_of_var_v2( + enum pmu_perfmon_cmd_start_fields field, u32 *offset) +{ + int status = 0; + + switch (field) { + case COUNTER_ALLOC: + *offset = (u32)offsetof(struct pmu_perfmon_cmd_init_v2, + counter_alloc); + break; + + default: + status = -EINVAL; + break; + } + + return status; +} + +static int pmu_get_perfmon_cmd_init_offset_of_var_v1( + enum pmu_perfmon_cmd_start_fields field, u32 *offset) +{ + int status = 0; + + switch (field) { + case COUNTER_ALLOC: + *offset = (u32)offsetof(struct pmu_perfmon_cmd_init_v1, + counter_alloc); + break; + + default: + status = -EINVAL; + break; + } + + return status; +} + +static void pmu_perfmon_start_set_cmd_type_v3(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_start_v3 *start = &pc->start_v3; + + start->cmd_type = value; +} + +static void pmu_perfmon_start_set_cmd_type_v2(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_start_v2 *start = &pc->start_v2; + + start->cmd_type = value; +} + +static void pmu_perfmon_start_set_cmd_type_v1(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1; + + start->cmd_type = value; +} + +static void pmu_perfmon_start_set_group_id_v3(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_start_v3 *start = &pc->start_v3; + + start->group_id = value; +} + +static void pmu_perfmon_start_set_group_id_v2(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_start_v2 *start = &pc->start_v2; + + start->group_id = value; +} + +static void pmu_perfmon_start_set_group_id_v1(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1; + + start->group_id = value; +} + +static void pmu_perfmon_start_set_state_id_v3(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_start_v3 *start = &pc->start_v3; + + start->state_id = value; +} + +static void pmu_perfmon_start_set_state_id_v2(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_start_v2 *start = &pc->start_v2; + + start->state_id = value; +} + +static void pmu_perfmon_start_set_state_id_v1(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1; + + start->state_id = value; +} + +static void pmu_perfmon_start_set_flags_v3(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_start_v3 *start = &pc->start_v3; + + start->flags = value; +} + +static void pmu_perfmon_start_set_flags_v2(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_start_v2 *start = &pc->start_v2; + + start->flags = value; +} + +static void pmu_perfmon_start_set_flags_v1(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1; + + start->flags = value; +} + +static u8 pmu_perfmon_start_get_flags_v3(struct pmu_perfmon_cmd *pc) +{ + struct pmu_perfmon_cmd_start_v3 *start = &pc->start_v3; + + return start->flags; +} + +static u8 pmu_perfmon_start_get_flags_v2(struct pmu_perfmon_cmd *pc) +{ + struct pmu_perfmon_cmd_start_v2 *start = &pc->start_v2; + + return start->flags; +} + +static u8 pmu_perfmon_start_get_flags_v1(struct pmu_perfmon_cmd *pc) +{ + struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1; + + return start->flags; +} + +static void pmu_perfmon_cmd_init_set_sample_buffer_v3( + struct pmu_perfmon_cmd *pc, u16 value) +{ + struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3; + + init->sample_buffer = value; +} + +static void pmu_perfmon_cmd_init_set_sample_buffer_v2( + struct pmu_perfmon_cmd *pc, u16 value) +{ + struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2; + + init->sample_buffer = value; +} + + +static void pmu_perfmon_cmd_init_set_sample_buffer_v1( + struct pmu_perfmon_cmd *pc, u16 value) +{ + struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; + + init->sample_buffer = value; +} + +static void pmu_perfmon_cmd_init_set_dec_cnt_v3(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3; + + init->to_decrease_count = value; +} + +static void pmu_perfmon_cmd_init_set_dec_cnt_v2(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2; + + init->to_decrease_count = value; +} + +static void pmu_perfmon_cmd_init_set_dec_cnt_v1(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; + + init->to_decrease_count = value; +} + +static void pmu_perfmon_cmd_init_set_base_cnt_id_v3(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3; + + init->base_counter_id = value; +} + +static void pmu_perfmon_cmd_init_set_base_cnt_id_v2(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2; + + init->base_counter_id = value; +} + +static void pmu_perfmon_cmd_init_set_base_cnt_id_v1(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; + + init->base_counter_id = value; +} + +static void pmu_perfmon_cmd_init_set_samp_period_us_v3( + struct pmu_perfmon_cmd *pc, u32 value) +{ + struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3; + + init->sample_period_us = value; +} + +static void pmu_perfmon_cmd_init_set_samp_period_us_v2( + struct pmu_perfmon_cmd *pc, u32 value) +{ + struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2; + + init->sample_period_us = value; +} + +static void pmu_perfmon_cmd_init_set_samp_period_us_v1( + struct pmu_perfmon_cmd *pc, u32 value) +{ + struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; + + init->sample_period_us = value; +} + +static void pmu_perfmon_cmd_init_set_num_cnt_v3(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3; + + init->num_counters = value; +} + +static void pmu_perfmon_cmd_init_set_num_cnt_v2(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2; + + init->num_counters = value; +} + +static void pmu_perfmon_cmd_init_set_num_cnt_v1(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; + + init->num_counters = value; +} + +static void pmu_perfmon_cmd_init_set_mov_avg_v3(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3; + + init->samples_in_moving_avg = value; +} + +static void pmu_perfmon_cmd_init_set_mov_avg_v2(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2; + + init->samples_in_moving_avg = value; +} + +static void pmu_perfmon_cmd_init_set_mov_avg_v1(struct pmu_perfmon_cmd *pc, + u8 value) +{ + struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1; + + init->samples_in_moving_avg = value; +} + +static void pmu_get_init_msg_queue_params_v1( + u32 id, void *init_msg, u32 *index, u32 *offset, u32 *size) +{ + struct pmu_init_msg_pmu_v1 *init = + (struct pmu_init_msg_pmu_v1 *)init_msg; + + *index = init->queue_info[id].index; + *offset = init->queue_info[id].offset; + *size = init->queue_info[id].size; +} + +static void pmu_get_init_msg_queue_params_v4( + u32 id, void *init_msg, u32 *index, u32 *offset, u32 *size) +{ + struct pmu_init_msg_pmu_v4 *init = init_msg; + u32 current_ptr = 0; + u32 i; + + if (id == PMU_COMMAND_QUEUE_HPQ) { + id = PMU_QUEUE_HPQ_IDX_FOR_V3; + } else if (id == PMU_COMMAND_QUEUE_LPQ) { + id = PMU_QUEUE_LPQ_IDX_FOR_V3; + } else if (id == PMU_MESSAGE_QUEUE) { + id = PMU_QUEUE_MSG_IDX_FOR_V3; + } else { + return; + } + + *index = init->queue_index[id]; + *size = init->queue_size[id]; + if (id != 0U) { + for (i = 0 ; i < id; i++) { + current_ptr += init->queue_size[i]; + } + } + *offset = init->queue_offset + current_ptr; +} + +static void *pmu_get_sequence_in_alloc_ptr_v3(struct pmu_sequence *seq) +{ + return (void *)(&seq->in_v3); +} + +static void *pmu_get_sequence_in_alloc_ptr_v1(struct pmu_sequence *seq) +{ + return (void *)(&seq->in_v1); +} + +static void *pmu_get_sequence_out_alloc_ptr_v3(struct pmu_sequence *seq) +{ + return (void *)(&seq->out_v3); +} + +static void *pmu_get_sequence_out_alloc_ptr_v1(struct pmu_sequence *seq) +{ + return (void *)(&seq->out_v1); +} + +static u8 pmu_pg_cmd_eng_buf_load_size_v0(struct pmu_pg_cmd *pg) +{ + size_t tmp_size = sizeof(pg->eng_buf_load_v0); + + nvgpu_assert(tmp_size <= (size_t)U8_MAX); + return U8(tmp_size); +} + +static u8 pmu_pg_cmd_eng_buf_load_size_v1(struct pmu_pg_cmd *pg) +{ + size_t tmp_size = sizeof(pg->eng_buf_load_v1); + + nvgpu_assert(tmp_size <= (size_t)U8_MAX); + return U8(tmp_size); +} + +static u8 pmu_pg_cmd_eng_buf_load_size_v2(struct pmu_pg_cmd *pg) +{ + size_t tmp_size = sizeof(pg->eng_buf_load_v2); + + nvgpu_assert(tmp_size <= (size_t)U8_MAX); + return U8(tmp_size); +} + +static void pmu_pg_cmd_eng_buf_load_set_cmd_type_v0(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v0.cmd_type = value; +} + +static void pmu_pg_cmd_eng_buf_load_set_cmd_type_v1(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v1.cmd_type = value; +} + +static void pmu_pg_cmd_eng_buf_load_set_cmd_type_v2(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v2.cmd_type = value; +} + +static void pmu_pg_cmd_eng_buf_load_set_engine_id_v0(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v0.engine_id = value; +} +static void pmu_pg_cmd_eng_buf_load_set_engine_id_v1(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v1.engine_id = value; +} +static void pmu_pg_cmd_eng_buf_load_set_engine_id_v2(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v2.engine_id = value; +} +static void pmu_pg_cmd_eng_buf_load_set_buf_idx_v0(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v0.buf_idx = value; +} +static void pmu_pg_cmd_eng_buf_load_set_buf_idx_v1(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v1.buf_idx = value; +} +static void pmu_pg_cmd_eng_buf_load_set_buf_idx_v2(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v2.buf_idx = value; +} + +static void pmu_pg_cmd_eng_buf_load_set_pad_v0(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v0.pad = value; +} +static void pmu_pg_cmd_eng_buf_load_set_pad_v1(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v1.pad = value; +} +static void pmu_pg_cmd_eng_buf_load_set_pad_v2(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v2.pad = value; +} + +static void pmu_pg_cmd_eng_buf_load_set_buf_size_v0(struct pmu_pg_cmd *pg, + u16 value) +{ + pg->eng_buf_load_v0.buf_size = value; +} +static void pmu_pg_cmd_eng_buf_load_set_buf_size_v1(struct pmu_pg_cmd *pg, + u16 value) +{ + pg->eng_buf_load_v1.dma_desc.dma_size = value; +} +static void pmu_pg_cmd_eng_buf_load_set_buf_size_v2(struct pmu_pg_cmd *pg, + u16 value) +{ + pg->eng_buf_load_v2.dma_desc.params = value; +} + +static void pmu_pg_cmd_eng_buf_load_set_dma_base_v0(struct pmu_pg_cmd *pg, + u32 value) +{ + pg->eng_buf_load_v0.dma_base = (value >> 8); +} +static void pmu_pg_cmd_eng_buf_load_set_dma_base_v1(struct pmu_pg_cmd *pg, + u32 value) +{ + pg->eng_buf_load_v1.dma_desc.dma_addr.lo |= u64_lo32(value); + pg->eng_buf_load_v1.dma_desc.dma_addr.hi |= u64_hi32(value); +} +static void pmu_pg_cmd_eng_buf_load_set_dma_base_v2(struct pmu_pg_cmd *pg, + u32 value) +{ + pg->eng_buf_load_v2.dma_desc.address.lo = u64_lo32(value); + pg->eng_buf_load_v2.dma_desc.address.hi = u64_lo32(value); +} + +static void pmu_pg_cmd_eng_buf_load_set_dma_offset_v0(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v0.dma_offset = value; +} +static void pmu_pg_cmd_eng_buf_load_set_dma_offset_v1(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v1.dma_desc.dma_addr.lo |= value; +} +static void pmu_pg_cmd_eng_buf_load_set_dma_offset_v2(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v2.dma_desc.address.lo |= u64_lo32(value); + pg->eng_buf_load_v2.dma_desc.address.hi |= u64_lo32(value); +} + +static void pmu_pg_cmd_eng_buf_load_set_dma_idx_v0(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v0.dma_idx = value; +} + +static void pmu_pg_cmd_eng_buf_load_set_dma_idx_v1(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v1.dma_desc.dma_idx = value; +} + +static void pmu_pg_cmd_eng_buf_load_set_dma_idx_v2(struct pmu_pg_cmd *pg, + u8 value) +{ + pg->eng_buf_load_v2.dma_desc.params |= (U32(value) << U32(24)); +} + +static int pmu_prepare_ns_ucode_blob(struct gk20a *g) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = mm->pmu.vm; + struct pmu_ucode_desc *desc; + struct pmu_rtos_fw *rtos_fw = pmu->fw; + u32 *ucode_image = NULL; + int err = 0; + + nvgpu_log_fn(g, " "); + + desc = (struct pmu_ucode_desc *)(void *)rtos_fw->fw_desc->data; + ucode_image = (u32 *)(void *)rtos_fw->fw_image->data; + + if (!nvgpu_mem_is_valid(&rtos_fw->ucode)) { + err = nvgpu_dma_alloc_map_sys(vm, PMU_RTOS_UCODE_SIZE_MAX, + &rtos_fw->ucode); + if (err != 0) { + goto exit; + } + } + + nvgpu_mem_wr_n(g, &pmu->fw->ucode, 0, ucode_image, + (desc->app_start_offset + desc->app_size)); + +exit: + return err; +} + +static int pmu_prepare_ns_ucode_blob_v1(struct gk20a *g) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = mm->pmu.vm; + struct pmu_ucode_desc_v1 *desc; + struct pmu_rtos_fw *rtos_fw = pmu->fw; + u32 *ucode_image = NULL; + int err = 0; + + nvgpu_log_fn(g, " "); + + ucode_image = (u32 *)(void *)rtos_fw->fw_image->data; + + if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) { + if (!nvgpu_mem_is_valid(&rtos_fw->ucode)) { + err = nvgpu_dma_alloc_flags_sys(g, + NVGPU_DMA_PHYSICALLY_ADDRESSED, + PMU_RTOS_UCODE_SIZE_MAX, + &rtos_fw->ucode); + if (err != 0) { + goto exit; + } + } + + nvgpu_mem_wr_n(g, &pmu->fw->ucode, 0, ucode_image, + rtos_fw->fw_image->size); + +#if defined(CONFIG_NVGPU_NEXT) + /* alloc boot args */ + err = nvgpu_pmu_next_core_rtos_args_allocate(g, pmu); + if (err != 0) { + goto exit; + } +#endif + } else { + desc = (struct pmu_ucode_desc_v1 *)(void *) + rtos_fw->fw_desc->data; + + if (!nvgpu_mem_is_valid(&rtos_fw->ucode)) { + err = nvgpu_dma_alloc_map_sys(vm, + PMU_RTOS_UCODE_SIZE_MAX, + &rtos_fw->ucode); + if (err != 0) { + goto exit; + } + } + + nvgpu_mem_wr_n(g, &pmu->fw->ucode, 0, ucode_image, + (desc->app_start_offset + desc->app_size)); + } + +exit: + return err; +} + +int nvgpu_pmu_init_fw_ver_ops(struct gk20a *g, + struct nvgpu_pmu *pmu, u32 app_version) +{ + struct pmu_fw_ver_ops *fw_ops = &pmu->fw->ops; + int err = 0; + + nvgpu_log_fn(g, " "); + + switch (app_version) { + case APP_VERSION_GP10B: + fw_ops->pg_cmd_eng_buf_load_size = + pmu_pg_cmd_eng_buf_load_size_v1; + fw_ops->pg_cmd_eng_buf_load_set_cmd_type = + pmu_pg_cmd_eng_buf_load_set_cmd_type_v1; + fw_ops->pg_cmd_eng_buf_load_set_engine_id = + pmu_pg_cmd_eng_buf_load_set_engine_id_v1; + fw_ops->pg_cmd_eng_buf_load_set_buf_idx = + pmu_pg_cmd_eng_buf_load_set_buf_idx_v1; + fw_ops->pg_cmd_eng_buf_load_set_pad = + pmu_pg_cmd_eng_buf_load_set_pad_v1; + fw_ops->pg_cmd_eng_buf_load_set_buf_size = + pmu_pg_cmd_eng_buf_load_set_buf_size_v1; + fw_ops->pg_cmd_eng_buf_load_set_dma_base = + pmu_pg_cmd_eng_buf_load_set_dma_base_v1; + fw_ops->pg_cmd_eng_buf_load_set_dma_offset = + pmu_pg_cmd_eng_buf_load_set_dma_offset_v1; + fw_ops->pg_cmd_eng_buf_load_set_dma_idx = + pmu_pg_cmd_eng_buf_load_set_dma_idx_v1; + fw_ops->get_perfmon_cntr_ptr = pmu_get_perfmon_cntr_ptr_v2; + fw_ops->set_perfmon_cntr_ut = pmu_set_perfmon_cntr_ut_v2; + fw_ops->set_perfmon_cntr_lt = pmu_set_perfmon_cntr_lt_v2; + fw_ops->set_perfmon_cntr_valid = + pmu_set_perfmon_cntr_valid_v2; + fw_ops->set_perfmon_cntr_index = + pmu_set_perfmon_cntr_index_v2; + fw_ops->set_perfmon_cntr_group_id = + pmu_set_perfmon_cntr_group_id_v2; + fw_ops->get_perfmon_cntr_sz = pmu_perfmon_cntr_sz_v2; + g->pmu_ver_cmd_id_zbc_table_update = 16; + nvgpu_set_enabled(g, NVGPU_PMU_ZBC_SAVE, false); + fw_ops->get_cmd_line_args_size = + pmu_cmd_line_size_v4; + fw_ops->set_cmd_line_args_cpu_freq = + pmu_set_cmd_line_args_cpu_freq_v4; + fw_ops->set_cmd_line_args_secure_mode = + pmu_set_cmd_line_args_secure_mode_v4; + fw_ops->set_cmd_line_args_trace_size = + pmu_set_cmd_line_args_trace_size_v4; + fw_ops->set_cmd_line_args_trace_dma_base = + pmu_set_cmd_line_args_trace_dma_base_v4; + fw_ops->set_cmd_line_args_trace_dma_idx = + pmu_set_cmd_line_args_trace_dma_idx_v4; + fw_ops->get_cmd_line_args_ptr = + pmu_get_cmd_line_args_ptr_v4; + fw_ops->get_allocation_struct_size = + pmu_get_allocation_size_v2; + fw_ops->set_allocation_ptr = + pmu_set_allocation_ptr_v2; + fw_ops->allocation_set_dmem_size = + pmu_allocation_set_dmem_size_v2; + fw_ops->allocation_get_dmem_size = + pmu_allocation_get_dmem_size_v2; + fw_ops->allocation_get_dmem_offset = + pmu_allocation_get_dmem_offset_v2; + fw_ops->allocation_get_dmem_offset_addr = + pmu_allocation_get_dmem_offset_addr_v2; + fw_ops->allocation_set_dmem_offset = + pmu_allocation_set_dmem_offset_v2; + fw_ops->get_init_msg_queue_params = + pmu_get_init_msg_queue_params_v1; + fw_ops->get_init_msg_ptr = + pmu_get_init_msg_ptr_v1; + fw_ops->get_init_msg_sw_mngd_area_off = + pmu_get_init_msg_sw_mngd_area_off_v1; + fw_ops->get_init_msg_sw_mngd_area_size = + pmu_get_init_msg_sw_mngd_area_size_v1; + fw_ops->get_perfmon_cmd_start_size = + pmu_get_perfmon_cmd_start_size_v2; + fw_ops->get_perfmon_cmd_start_offset_of_var = + pmu_get_perfmon_cmd_start_offset_of_var_v2; + fw_ops->perfmon_start_set_cmd_type = + pmu_perfmon_start_set_cmd_type_v2; + fw_ops->perfmon_start_set_group_id = + pmu_perfmon_start_set_group_id_v2; + fw_ops->perfmon_start_set_state_id = + pmu_perfmon_start_set_state_id_v2; + fw_ops->perfmon_start_set_flags = + pmu_perfmon_start_set_flags_v2; + fw_ops->perfmon_start_get_flags = + pmu_perfmon_start_get_flags_v2; + fw_ops->get_perfmon_cmd_init_size = + pmu_get_perfmon_cmd_init_size_v2; + fw_ops->get_perfmon_cmd_init_offset_of_var = + pmu_get_perfmon_cmd_init_offset_of_var_v2; + fw_ops->perfmon_cmd_init_set_sample_buffer = + pmu_perfmon_cmd_init_set_sample_buffer_v2; + fw_ops->perfmon_cmd_init_set_dec_cnt = + pmu_perfmon_cmd_init_set_dec_cnt_v2; + fw_ops->perfmon_cmd_init_set_base_cnt_id = + pmu_perfmon_cmd_init_set_base_cnt_id_v2; + fw_ops->perfmon_cmd_init_set_samp_period_us = + pmu_perfmon_cmd_init_set_samp_period_us_v2; + fw_ops->perfmon_cmd_init_set_num_cnt = + pmu_perfmon_cmd_init_set_num_cnt_v2; + fw_ops->perfmon_cmd_init_set_mov_avg = + pmu_perfmon_cmd_init_set_mov_avg_v2; + fw_ops->get_seq_in_alloc_ptr = + pmu_get_sequence_in_alloc_ptr_v1; + fw_ops->get_seq_out_alloc_ptr = + pmu_get_sequence_out_alloc_ptr_v1; + fw_ops->prepare_ns_ucode_blob = + pmu_prepare_ns_ucode_blob; + break; + case APP_VERSION_GV11B: + case APP_VERSION_GV10X: + case APP_VERSION_TU10X: + case APP_VERSION_NVGPU_NEXT: + case APP_VERSION_NVGPU_NEXT_CORE: + fw_ops->pg_cmd_eng_buf_load_size = + pmu_pg_cmd_eng_buf_load_size_v2; + fw_ops->pg_cmd_eng_buf_load_set_cmd_type = + pmu_pg_cmd_eng_buf_load_set_cmd_type_v2; + fw_ops->pg_cmd_eng_buf_load_set_engine_id = + pmu_pg_cmd_eng_buf_load_set_engine_id_v2; + fw_ops->pg_cmd_eng_buf_load_set_buf_idx = + pmu_pg_cmd_eng_buf_load_set_buf_idx_v2; + fw_ops->pg_cmd_eng_buf_load_set_pad = + pmu_pg_cmd_eng_buf_load_set_pad_v2; + fw_ops->pg_cmd_eng_buf_load_set_buf_size = + pmu_pg_cmd_eng_buf_load_set_buf_size_v2; + fw_ops->pg_cmd_eng_buf_load_set_dma_base = + pmu_pg_cmd_eng_buf_load_set_dma_base_v2; + fw_ops->pg_cmd_eng_buf_load_set_dma_offset = + pmu_pg_cmd_eng_buf_load_set_dma_offset_v2; + fw_ops->pg_cmd_eng_buf_load_set_dma_idx = + pmu_pg_cmd_eng_buf_load_set_dma_idx_v2; + fw_ops->get_perfmon_cntr_ptr = pmu_get_perfmon_cntr_ptr_v2; + fw_ops->set_perfmon_cntr_ut = pmu_set_perfmon_cntr_ut_v2; + fw_ops->set_perfmon_cntr_lt = pmu_set_perfmon_cntr_lt_v2; + fw_ops->set_perfmon_cntr_valid = + pmu_set_perfmon_cntr_valid_v2; + fw_ops->set_perfmon_cntr_index = + pmu_set_perfmon_cntr_index_v2; + fw_ops->set_perfmon_cntr_group_id = + pmu_set_perfmon_cntr_group_id_v2; + fw_ops->get_perfmon_cntr_sz = pmu_perfmon_cntr_sz_v2; + g->pmu_ver_cmd_id_zbc_table_update = 16; + nvgpu_set_enabled(g, NVGPU_PMU_ZBC_SAVE, false); + fw_ops->get_cmd_line_args_size = + pmu_cmd_line_size_v6; + fw_ops->set_cmd_line_args_cpu_freq = + pmu_set_cmd_line_args_cpu_freq_v5; + fw_ops->set_cmd_line_args_secure_mode = + pmu_set_cmd_line_args_secure_mode_v5; + fw_ops->set_cmd_line_args_trace_size = + pmu_set_cmd_line_args_trace_size_v5; + fw_ops->set_cmd_line_args_trace_dma_base = + pmu_set_cmd_line_args_trace_dma_base_v5; + fw_ops->set_cmd_line_args_trace_dma_idx = + pmu_set_cmd_line_args_trace_dma_idx_v5; + fw_ops->config_cmd_line_args_super_surface = + config_cmd_line_args_super_surface_v6; + fw_ops->get_cmd_line_args_ptr = + pmu_get_cmd_line_args_ptr_v5; + fw_ops->get_allocation_struct_size = + pmu_get_allocation_size_v3; + fw_ops->set_allocation_ptr = + pmu_set_allocation_ptr_v3; + fw_ops->allocation_set_dmem_size = + pmu_allocation_set_dmem_size_v3; + fw_ops->allocation_get_dmem_size = + pmu_allocation_get_dmem_size_v3; + fw_ops->allocation_get_dmem_offset = + pmu_allocation_get_dmem_offset_v3; + fw_ops->allocation_get_dmem_offset_addr = + pmu_allocation_get_dmem_offset_addr_v3; + fw_ops->allocation_set_dmem_offset = + pmu_allocation_set_dmem_offset_v3; + fw_ops->allocation_get_fb_addr = + pmu_allocation_get_fb_addr_v3; + fw_ops->allocation_get_fb_size = + pmu_allocation_get_fb_size_v3; + if (app_version == APP_VERSION_GV10X || + app_version == APP_VERSION_TU10X || + app_version == APP_VERSION_NVGPU_NEXT || + app_version == APP_VERSION_NVGPU_NEXT_CORE) { + fw_ops->get_init_msg_ptr = + pmu_get_init_msg_ptr_v5; + fw_ops->get_init_msg_sw_mngd_area_off = + pmu_get_init_msg_sw_mngd_area_off_v5; + fw_ops->get_init_msg_sw_mngd_area_size = + pmu_get_init_msg_sw_mngd_area_size_v5; + if (app_version == APP_VERSION_GV10X) { + fw_ops->clk.clk_set_boot_clk = NULL; + } else { + fw_ops->clk.clk_set_boot_clk = NULL; + } + } else { + fw_ops->get_init_msg_queue_params = + pmu_get_init_msg_queue_params_v4; + fw_ops->get_init_msg_ptr = + pmu_get_init_msg_ptr_v4; + fw_ops->get_init_msg_sw_mngd_area_off = + pmu_get_init_msg_sw_mngd_area_off_v4; + fw_ops->get_init_msg_sw_mngd_area_size = + pmu_get_init_msg_sw_mngd_area_size_v4; + } + fw_ops->get_perfmon_cmd_start_size = + pmu_get_perfmon_cmd_start_size_v3; + fw_ops->get_perfmon_cmd_start_offset_of_var = + pmu_get_perfmon_cmd_start_offset_of_var_v3; + fw_ops->perfmon_start_set_cmd_type = + pmu_perfmon_start_set_cmd_type_v3; + fw_ops->perfmon_start_set_group_id = + pmu_perfmon_start_set_group_id_v3; + fw_ops->perfmon_start_set_state_id = + pmu_perfmon_start_set_state_id_v3; + fw_ops->perfmon_start_set_flags = + pmu_perfmon_start_set_flags_v3; + fw_ops->perfmon_start_get_flags = + pmu_perfmon_start_get_flags_v3; + fw_ops->get_perfmon_cmd_init_size = + pmu_get_perfmon_cmd_init_size_v3; + fw_ops->get_perfmon_cmd_init_offset_of_var = + pmu_get_perfmon_cmd_init_offset_of_var_v3; + fw_ops->perfmon_cmd_init_set_sample_buffer = + pmu_perfmon_cmd_init_set_sample_buffer_v3; + fw_ops->perfmon_cmd_init_set_dec_cnt = + pmu_perfmon_cmd_init_set_dec_cnt_v3; + fw_ops->perfmon_cmd_init_set_base_cnt_id = + pmu_perfmon_cmd_init_set_base_cnt_id_v3; + fw_ops->perfmon_cmd_init_set_samp_period_us = + pmu_perfmon_cmd_init_set_samp_period_us_v3; + fw_ops->perfmon_cmd_init_set_num_cnt = + pmu_perfmon_cmd_init_set_num_cnt_v3; + fw_ops->perfmon_cmd_init_set_mov_avg = + pmu_perfmon_cmd_init_set_mov_avg_v3; + fw_ops->get_seq_in_alloc_ptr = + pmu_get_sequence_in_alloc_ptr_v3; + fw_ops->get_seq_out_alloc_ptr = + pmu_get_sequence_out_alloc_ptr_v3; + if (app_version == APP_VERSION_NVGPU_NEXT || + app_version == APP_VERSION_NVGPU_NEXT_CORE) { + fw_ops->prepare_ns_ucode_blob = + pmu_prepare_ns_ucode_blob_v1; + fw_ops->get_cmd_line_args_size = + pmu_cmd_line_size_v7; + fw_ops->config_cmd_line_args_super_surface = + config_cmd_line_args_super_surface_v7; + } else { + fw_ops->prepare_ns_ucode_blob = + pmu_prepare_ns_ucode_blob; + } + break; + case APP_VERSION_GM20B: + fw_ops->pg_cmd_eng_buf_load_size = + pmu_pg_cmd_eng_buf_load_size_v0; + fw_ops->pg_cmd_eng_buf_load_set_cmd_type = + pmu_pg_cmd_eng_buf_load_set_cmd_type_v0; + fw_ops->pg_cmd_eng_buf_load_set_engine_id = + pmu_pg_cmd_eng_buf_load_set_engine_id_v0; + fw_ops->pg_cmd_eng_buf_load_set_buf_idx = + pmu_pg_cmd_eng_buf_load_set_buf_idx_v0; + fw_ops->pg_cmd_eng_buf_load_set_pad = + pmu_pg_cmd_eng_buf_load_set_pad_v0; + fw_ops->pg_cmd_eng_buf_load_set_buf_size = + pmu_pg_cmd_eng_buf_load_set_buf_size_v0; + fw_ops->pg_cmd_eng_buf_load_set_dma_base = + pmu_pg_cmd_eng_buf_load_set_dma_base_v0; + fw_ops->pg_cmd_eng_buf_load_set_dma_offset = + pmu_pg_cmd_eng_buf_load_set_dma_offset_v0; + fw_ops->pg_cmd_eng_buf_load_set_dma_idx = + pmu_pg_cmd_eng_buf_load_set_dma_idx_v0; + fw_ops->get_perfmon_cntr_ptr = pmu_get_perfmon_cntr_ptr_v2; + fw_ops->set_perfmon_cntr_ut = pmu_set_perfmon_cntr_ut_v2; + fw_ops->set_perfmon_cntr_lt = pmu_set_perfmon_cntr_lt_v2; + fw_ops->set_perfmon_cntr_valid = + pmu_set_perfmon_cntr_valid_v2; + fw_ops->set_perfmon_cntr_index = + pmu_set_perfmon_cntr_index_v2; + fw_ops->set_perfmon_cntr_group_id = + pmu_set_perfmon_cntr_group_id_v2; + fw_ops->get_perfmon_cntr_sz = pmu_perfmon_cntr_sz_v2; + g->pmu_ver_cmd_id_zbc_table_update = 16; + nvgpu_set_enabled(g, NVGPU_PMU_ZBC_SAVE, true); + fw_ops->get_cmd_line_args_size = + pmu_cmd_line_size_v3; + fw_ops->set_cmd_line_args_cpu_freq = + pmu_set_cmd_line_args_cpu_freq_v3; + fw_ops->set_cmd_line_args_secure_mode = + pmu_set_cmd_line_args_secure_mode_v3; + fw_ops->set_cmd_line_args_trace_size = + pmu_set_cmd_line_args_trace_size_v3; + fw_ops->set_cmd_line_args_trace_dma_base = + pmu_set_cmd_line_args_trace_dma_base_v3; + fw_ops->set_cmd_line_args_trace_dma_idx = + pmu_set_cmd_line_args_trace_dma_idx_v3; + fw_ops->get_cmd_line_args_ptr = + pmu_get_cmd_line_args_ptr_v3; + fw_ops->get_allocation_struct_size = + pmu_get_allocation_size_v1; + fw_ops->set_allocation_ptr = + pmu_set_allocation_ptr_v1; + fw_ops->allocation_set_dmem_size = + pmu_allocation_set_dmem_size_v1; + fw_ops->allocation_get_dmem_size = + pmu_allocation_get_dmem_size_v1; + fw_ops->allocation_get_dmem_offset = + pmu_allocation_get_dmem_offset_v1; + fw_ops->allocation_get_dmem_offset_addr = + pmu_allocation_get_dmem_offset_addr_v1; + fw_ops->allocation_set_dmem_offset = + pmu_allocation_set_dmem_offset_v1; + fw_ops->get_init_msg_queue_params = + pmu_get_init_msg_queue_params_v1; + fw_ops->get_init_msg_ptr = + pmu_get_init_msg_ptr_v1; + fw_ops->get_init_msg_sw_mngd_area_off = + pmu_get_init_msg_sw_mngd_area_off_v1; + fw_ops->get_init_msg_sw_mngd_area_size = + pmu_get_init_msg_sw_mngd_area_size_v1; + fw_ops->get_perfmon_cmd_start_size = + pmu_get_perfmon_cmd_start_size_v1; + fw_ops->get_perfmon_cmd_start_offset_of_var = + pmu_get_perfmon_cmd_start_offset_of_var_v1; + fw_ops->perfmon_start_set_cmd_type = + pmu_perfmon_start_set_cmd_type_v1; + fw_ops->perfmon_start_set_group_id = + pmu_perfmon_start_set_group_id_v1; + fw_ops->perfmon_start_set_state_id = + pmu_perfmon_start_set_state_id_v1; + fw_ops->perfmon_start_set_flags = + pmu_perfmon_start_set_flags_v1; + fw_ops->perfmon_start_get_flags = + pmu_perfmon_start_get_flags_v1; + fw_ops->get_perfmon_cmd_init_size = + pmu_get_perfmon_cmd_init_size_v1; + fw_ops->get_perfmon_cmd_init_offset_of_var = + pmu_get_perfmon_cmd_init_offset_of_var_v1; + fw_ops->perfmon_cmd_init_set_sample_buffer = + pmu_perfmon_cmd_init_set_sample_buffer_v1; + fw_ops->perfmon_cmd_init_set_dec_cnt = + pmu_perfmon_cmd_init_set_dec_cnt_v1; + fw_ops->perfmon_cmd_init_set_base_cnt_id = + pmu_perfmon_cmd_init_set_base_cnt_id_v1; + fw_ops->perfmon_cmd_init_set_samp_period_us = + pmu_perfmon_cmd_init_set_samp_period_us_v1; + fw_ops->perfmon_cmd_init_set_num_cnt = + pmu_perfmon_cmd_init_set_num_cnt_v1; + fw_ops->perfmon_cmd_init_set_mov_avg = + pmu_perfmon_cmd_init_set_mov_avg_v1; + fw_ops->get_seq_in_alloc_ptr = + pmu_get_sequence_in_alloc_ptr_v1; + fw_ops->get_seq_out_alloc_ptr = + pmu_get_sequence_out_alloc_ptr_v1; + fw_ops->prepare_ns_ucode_blob = + pmu_prepare_ns_ucode_blob; + break; + default: + nvgpu_err(g, "PMU code version not supported version: %d\n", + app_version); + err = -EINVAL; + break; + } + + fw_ops->set_perfmon_cntr_index(pmu, 3); /* GR & CE2 */ + fw_ops->set_perfmon_cntr_group_id(pmu, PMU_DOMAIN_GROUP_PSTATE); + + return err; +} + diff --git a/drivers/gpu/nvgpu/common/pmu/ipc/pmu_cmd.c b/drivers/gpu/nvgpu/common/pmu/ipc/pmu_cmd.c new file mode 100644 index 000000000..b5b96026e --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/ipc/pmu_cmd.c @@ -0,0 +1,748 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static bool pmu_validate_in_out_payload(struct nvgpu_pmu *pmu, struct pmu_cmd *cmd, + struct pmu_in_out_payload_desc *payload) +{ + u32 size; + + if (payload->offset != 0U && payload->buf == NULL) { + return false; + } + + if (payload->buf == NULL) { + return true; + } + + if (payload->size == 0U) { + return false; + } + + size = PMU_CMD_HDR_SIZE; + size += payload->offset; + size += pmu->fw->ops.get_allocation_struct_size(pmu); + + if (size > cmd->hdr.size) { + return false; + } + + return true; +} + +static bool pmu_validate_rpc_payload(struct pmu_payload *payload) +{ + if (payload->rpc.prpc == NULL) { + return true; + } + + if (payload->rpc.size_rpc == 0U) { + goto invalid_cmd; + } + + return true; + +invalid_cmd: + + return false; +} + +static bool pmu_validate_cmd(struct nvgpu_pmu *pmu, struct pmu_cmd *cmd, + struct pmu_payload *payload, u32 queue_id) +{ + struct gk20a *g = pmu->g; + u32 queue_size; + + if (cmd == NULL) { + nvgpu_err(g, "PMU cmd buffer is NULL"); + return false; + } + + if (!PMU_IS_SW_COMMAND_QUEUE(queue_id)) { + goto invalid_cmd; + } + + if (cmd->hdr.size < PMU_CMD_HDR_SIZE) { + goto invalid_cmd; + } + + queue_size = nvgpu_pmu_queue_get_size(&pmu->queues, queue_id); + + if (cmd->hdr.size > (queue_size >> 1)) { + goto invalid_cmd; + } + + if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id)) { + goto invalid_cmd; + } + + if (payload == NULL) { + return true; + } + + if (payload->in.buf == NULL && payload->out.buf == NULL && + payload->rpc.prpc == NULL) { + goto invalid_cmd; + } + + if (!pmu_validate_in_out_payload(pmu, cmd, &payload->in)) { + goto invalid_cmd; + } + + if (!pmu_validate_in_out_payload(pmu, cmd, &payload->out)) { + goto invalid_cmd; + } + + if (!pmu_validate_rpc_payload(payload)) { + goto invalid_cmd; + } + + return true; + +invalid_cmd: + nvgpu_err(g, "invalid pmu cmd :\n" + "queue_id=%d,\n" + "cmd_size=%d, cmd_unit_id=%d,\n" + "payload in=%p, in_size=%d, in_offset=%d,\n" + "payload out=%p, out_size=%d, out_offset=%d", + queue_id, cmd->hdr.size, cmd->hdr.unit_id, + &payload->in, payload->in.size, payload->in.offset, + &payload->out, payload->out.size, payload->out.offset); + + return false; +} + +static int pmu_write_cmd(struct nvgpu_pmu *pmu, struct pmu_cmd *cmd, + u32 queue_id) +{ + struct gk20a *g = pmu->g; + struct nvgpu_timeout timeout; + int err; + + nvgpu_log_fn(g, " "); + + err = nvgpu_timeout_init(g, &timeout, U32_MAX, NVGPU_TIMER_CPU_TIMER); + if (err != 0) { + nvgpu_err(g, "failed to init timer"); + return err; + } + + do { + err = nvgpu_pmu_queue_push(&pmu->queues, pmu->flcn, + queue_id, cmd); + if (nvgpu_timeout_expired(&timeout) == 0 && err == -EAGAIN) { + nvgpu_usleep_range(1000, 2000); + } else { + break; + } + } while (true); + + if (err != 0) { + nvgpu_err(g, "fail to write cmd to queue %d", queue_id); + } else { + nvgpu_log_fn(g, "done"); + } + + return err; +} + +static void pmu_payload_deallocate(struct gk20a *g, + struct falcon_payload_alloc *alloc) +{ + struct nvgpu_pmu *pmu = g->pmu; + + if (alloc->dmem_offset != 0U) { + nvgpu_free(&pmu->dmem, alloc->dmem_offset); + } +} + +static int pmu_payload_allocate(struct gk20a *g, struct pmu_sequence *seq, + struct falcon_payload_alloc *alloc) +{ + struct nvgpu_pmu *pmu = g->pmu; + u16 buffer_size; + int err = 0; + u64 tmp; + + if (nvgpu_pmu_fb_queue_enabled(&pmu->queues)) { + buffer_size = nvgpu_pmu_seq_get_buffer_size(seq); + nvgpu_pmu_seq_set_fbq_out_offset(seq, buffer_size); + /* Save target address in FBQ work buffer. */ + alloc->dmem_offset = buffer_size; + buffer_size += alloc->dmem_size; + nvgpu_pmu_seq_set_buffer_size(seq, buffer_size); + } else { + tmp = nvgpu_alloc(&pmu->dmem, alloc->dmem_size); + nvgpu_assert(tmp <= U32_MAX); + alloc->dmem_offset = (u32)tmp; + if (alloc->dmem_offset == 0U) { + err = -ENOMEM; + goto clean_up; + } + } + +clean_up: + if (err != 0) { + pmu_payload_deallocate(g, alloc); + } + + return err; +} + +static int pmu_cmd_payload_setup_rpc(struct gk20a *g, struct pmu_cmd *cmd, + struct pmu_payload *payload, struct pmu_sequence *seq) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct pmu_fw_ver_ops *fw_ops = &g->pmu->fw->ops; + struct nvgpu_engine_fb_queue *queue = nvgpu_pmu_seq_get_cmd_queue(seq); + struct falcon_payload_alloc alloc; + int err = 0; + + nvgpu_log_fn(g, " "); + + (void) memset(&alloc, 0, sizeof(struct falcon_payload_alloc)); + + alloc.dmem_size = payload->rpc.size_rpc + + payload->rpc.size_scratch; + + err = pmu_payload_allocate(g, seq, &alloc); + if (err != 0) { + goto clean_up; + } + + alloc.dmem_size = payload->rpc.size_rpc; + + if (nvgpu_pmu_fb_queue_enabled(&pmu->queues)) { + /* copy payload to FBQ work buffer */ + nvgpu_memcpy((u8 *) + nvgpu_engine_fb_queue_get_work_buffer(queue) + + alloc.dmem_offset, + (u8 *)payload->rpc.prpc, alloc.dmem_size); + + alloc.dmem_offset += nvgpu_pmu_seq_get_fbq_heap_offset(seq); + + nvgpu_pmu_seq_set_in_payload_fb_queue(seq, true); + nvgpu_pmu_seq_set_out_payload_fb_queue(seq, true); + } else { + err = nvgpu_falcon_copy_to_dmem(pmu->flcn, alloc.dmem_offset, + payload->rpc.prpc, payload->rpc.size_rpc, 0); + if (err != 0) { + pmu_payload_deallocate(g, &alloc); + goto clean_up; + } + } + + cmd->cmd.rpc.rpc_dmem_size = payload->rpc.size_rpc; + cmd->cmd.rpc.rpc_dmem_ptr = alloc.dmem_offset; + + nvgpu_pmu_seq_set_out_payload(seq, payload->rpc.prpc); + g->pmu->fw->ops.allocation_set_dmem_size(pmu, + fw_ops->get_seq_out_alloc_ptr(seq), + payload->rpc.size_rpc); + g->pmu->fw->ops.allocation_set_dmem_offset(pmu, + fw_ops->get_seq_out_alloc_ptr(seq), + alloc.dmem_offset); + +clean_up: + if (err != 0) { + nvgpu_log_fn(g, "fail"); + } else { + nvgpu_log_fn(g, "done"); + } + + return err; +} + +static int pmu_cmd_in_payload_setup(struct gk20a *g, struct pmu_cmd *cmd, + struct pmu_payload *payload, struct pmu_sequence *seq) +{ + struct nvgpu_engine_fb_queue *fb_queue = + nvgpu_pmu_seq_get_cmd_queue(seq); + struct pmu_fw_ver_ops *fw_ops = &g->pmu->fw->ops; + struct falcon_payload_alloc alloc; + struct nvgpu_pmu *pmu = g->pmu; + void *in = NULL; + int err = 0; + u32 offset; + + (void) memset(&alloc, 0, sizeof(struct falcon_payload_alloc)); + + if (payload != NULL && payload->in.offset != 0U) { + fw_ops->set_allocation_ptr(pmu, &in, + ((u8 *)&cmd->cmd + payload->in.offset)); + + if (payload->in.buf != payload->out.buf) { + fw_ops->allocation_set_dmem_size(pmu, in, + (u16)payload->in.size); + } else { + fw_ops->allocation_set_dmem_size(pmu, in, + (u16)max(payload->in.size, payload->out.size)); + } + + alloc.dmem_size = fw_ops->allocation_get_dmem_size(pmu, in); + + err = pmu_payload_allocate(g, seq, &alloc); + if (err != 0) { + return err; + } + + *(fw_ops->allocation_get_dmem_offset_addr(pmu, in)) = + alloc.dmem_offset; + + if (nvgpu_pmu_fb_queue_enabled(&pmu->queues)) { + /* copy payload to FBQ work buffer */ + nvgpu_memcpy((u8 *) + nvgpu_engine_fb_queue_get_work_buffer( + fb_queue) + + alloc.dmem_offset, + (u8 *)payload->in.buf, + payload->in.size); + + alloc.dmem_offset += + nvgpu_pmu_seq_get_fbq_heap_offset(seq); + *(fw_ops->allocation_get_dmem_offset_addr(pmu, + in)) = alloc.dmem_offset; + + nvgpu_pmu_seq_set_in_payload_fb_queue(seq, true); + } else { + offset = + fw_ops->allocation_get_dmem_offset(pmu, + in); + err = nvgpu_falcon_copy_to_dmem(pmu->flcn, + offset, payload->in.buf, + payload->in.size, 0); + if (err != 0) { + pmu_payload_deallocate(g, &alloc); + return err; + } + } + + fw_ops->allocation_set_dmem_size(pmu, + fw_ops->get_seq_in_alloc_ptr(seq), + fw_ops->allocation_get_dmem_size(pmu, in)); + fw_ops->allocation_set_dmem_offset(pmu, + fw_ops->get_seq_in_alloc_ptr(seq), + fw_ops->allocation_get_dmem_offset(pmu, in)); + } + + return 0; +} + +static int pmu_cmd_out_payload_setup(struct gk20a *g, struct pmu_cmd *cmd, + struct pmu_payload *payload, struct pmu_sequence *seq) +{ + struct pmu_fw_ver_ops *fw_ops = &g->pmu->fw->ops; + struct falcon_payload_alloc alloc; + struct nvgpu_pmu *pmu = g->pmu; + void *in = NULL, *out = NULL; + int err = 0; + + (void) memset(&alloc, 0, sizeof(struct falcon_payload_alloc)); + + if (payload != NULL && payload->out.offset != 0U) { + fw_ops->set_allocation_ptr(pmu, &out, + ((u8 *)&cmd->cmd + payload->out.offset)); + fw_ops->allocation_set_dmem_size(pmu, out, + (u16)payload->out.size); + + if (payload->in.buf != payload->out.buf) { + alloc.dmem_size = + fw_ops->allocation_get_dmem_size(pmu, out); + + err = pmu_payload_allocate(g, seq, &alloc); + if (err != 0) { + return err; + } + + *(fw_ops->allocation_get_dmem_offset_addr(pmu, + out)) = alloc.dmem_offset; + } else { + WARN_ON(payload->in.offset == 0U); + + fw_ops->set_allocation_ptr(pmu, &in, + ((u8 *)&cmd->cmd + payload->in.offset)); + + fw_ops->allocation_set_dmem_offset(pmu, out, + fw_ops->allocation_get_dmem_offset(pmu, + in)); + } + + if (nvgpu_pmu_fb_queue_enabled(&pmu->queues)) { + if (payload->in.buf != payload->out.buf) { + *(fw_ops->allocation_get_dmem_offset_addr(pmu, + out)) += + nvgpu_pmu_seq_get_fbq_heap_offset(seq); + } + + nvgpu_pmu_seq_set_out_payload_fb_queue(seq, true); + } + + fw_ops->allocation_set_dmem_size(pmu, + fw_ops->get_seq_out_alloc_ptr(seq), + fw_ops->allocation_get_dmem_size(pmu, out)); + fw_ops->allocation_set_dmem_offset(pmu, + fw_ops->get_seq_out_alloc_ptr(seq), + fw_ops->allocation_get_dmem_offset(pmu, out)); + } + + return 0; +} + +static int pmu_cmd_payload_setup(struct gk20a *g, struct pmu_cmd *cmd, + struct pmu_payload *payload, struct pmu_sequence *seq) +{ + struct pmu_fw_ver_ops *fw_ops = &g->pmu->fw->ops; + struct nvgpu_pmu *pmu = g->pmu; + void *in = NULL; + int err = 0; + + nvgpu_log_fn(g, " "); + + if (payload != NULL) { + nvgpu_pmu_seq_set_out_payload(seq, payload->out.buf); + } + + err = pmu_cmd_in_payload_setup(g, cmd, payload, seq); + if (err != 0) { + goto exit; + } + + err = pmu_cmd_out_payload_setup(g, cmd, payload, seq); + if (err != 0) { + goto clean_up; + } + + goto exit; + +clean_up: + if (payload->in.offset != 0U) { + fw_ops->set_allocation_ptr(pmu, &in, + ((u8 *)&cmd->cmd + payload->in.offset)); + + nvgpu_free(&pmu->dmem, + fw_ops->allocation_get_dmem_offset(pmu, + in)); + } + +exit: + if (err != 0) { + nvgpu_log_fn(g, "fail"); + } else { + nvgpu_log_fn(g, "done"); + } + + return err; +} + +static int pmu_fbq_cmd_setup(struct gk20a *g, struct pmu_cmd *cmd, + struct nvgpu_engine_fb_queue *queue, struct pmu_payload *payload, + struct pmu_sequence *seq) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct nv_falcon_fbq_hdr *fbq_hdr = NULL; + struct pmu_cmd *flcn_cmd = NULL; + u32 fbq_size_needed = 0; + u16 heap_offset = 0; + u64 tmp; + int err = 0; + + fbq_hdr = (struct nv_falcon_fbq_hdr *) + nvgpu_engine_fb_queue_get_work_buffer(queue); + + flcn_cmd = (struct pmu_cmd *) + (nvgpu_engine_fb_queue_get_work_buffer(queue) + + sizeof(struct nv_falcon_fbq_hdr)); + + if (cmd->cmd.rpc.cmd_type == NV_PMU_RPC_CMD_ID) { + if (payload != NULL) { + fbq_size_needed = (u32)payload->rpc.size_rpc + + (u32)payload->rpc.size_scratch; + } + } else { + err = -EINVAL; + goto exit; + } + + tmp = fbq_size_needed + + sizeof(struct nv_falcon_fbq_hdr) + + cmd->hdr.size; + nvgpu_assert(tmp <= (size_t)U32_MAX); + fbq_size_needed = (u32)tmp; + + fbq_size_needed = ALIGN_UP(fbq_size_needed, 4U); + + /* Check for allocator pointer and proceed */ + if (pmu->dmem.priv != NULL) { + tmp = nvgpu_alloc(&pmu->dmem, fbq_size_needed); + } + nvgpu_assert(tmp <= U32_MAX); + heap_offset = (u16) tmp; + if (heap_offset == 0U) { + err = -ENOMEM; + goto exit; + } + + /* clear work queue buffer */ + (void) memset(nvgpu_engine_fb_queue_get_work_buffer(queue), 0, + nvgpu_engine_fb_queue_get_element_size(queue)); + + /* Need to save room for both FBQ hdr, and the CMD */ + tmp = sizeof(struct nv_falcon_fbq_hdr) + + cmd->hdr.size; + nvgpu_assert(tmp <= (size_t)U16_MAX); + nvgpu_pmu_seq_set_buffer_size(seq, (u16)tmp); + + /* copy cmd into the work buffer */ + nvgpu_memcpy((u8 *)flcn_cmd, (u8 *)cmd, cmd->hdr.size); + + /* Fill in FBQ hdr, and offset in seq structure */ + nvgpu_assert(fbq_size_needed < U16_MAX); + fbq_hdr->heap_size = (u16)fbq_size_needed; + fbq_hdr->heap_offset = heap_offset; + nvgpu_pmu_seq_set_fbq_heap_offset(seq, heap_offset); + + /* + * save queue index in seq structure + * so can free queue element when response is received + */ + nvgpu_pmu_seq_set_fbq_element_index(seq, + nvgpu_engine_fb_queue_get_position(queue)); + +exit: + return err; +} + +int nvgpu_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, + struct pmu_payload *payload, + u32 queue_id, pmu_callback callback, void *cb_param) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct pmu_sequence *seq = NULL; + struct nvgpu_engine_fb_queue *fb_queue = NULL; + int err; + + nvgpu_log_fn(g, " "); + + if (!nvgpu_pmu_get_fw_ready(g, pmu)) { + nvgpu_warn(g, "PMU is not ready"); + return -EINVAL; + } + + if (!pmu_validate_cmd(pmu, cmd, payload, queue_id)) { + return -EINVAL; + } + + err = nvgpu_pmu_seq_acquire(g, pmu->sequences, &seq, callback, + cb_param); + if (err != 0) { + return err; + } + + cmd->hdr.seq_id = nvgpu_pmu_seq_get_id(seq); + + cmd->hdr.ctrl_flags = 0; + cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS; + cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR; + + if (nvgpu_pmu_fb_queue_enabled(&pmu->queues)) { + fb_queue = nvgpu_pmu_fb_queue(&pmu->queues, queue_id); + /* Save the queue in the seq structure. */ + nvgpu_pmu_seq_set_cmd_queue(seq, fb_queue); + + /* Lock the FBQ work buffer */ + nvgpu_engine_fb_queue_lock_work_buffer(fb_queue); + + /* Create FBQ work buffer & copy cmd to FBQ work buffer */ + err = pmu_fbq_cmd_setup(g, cmd, fb_queue, payload, seq); + if (err != 0) { + nvgpu_err(g, "FBQ cmd setup failed"); + nvgpu_pmu_seq_release(g, pmu->sequences, seq); + goto exit; + } + + /* + * change cmd pointer to point to FBQ work + * buffer as cmd copied to FBQ work buffer + * in call pmu_fbq_cmd_setup() + */ + cmd = (struct pmu_cmd *) + (nvgpu_engine_fb_queue_get_work_buffer(fb_queue) + + sizeof(struct nv_falcon_fbq_hdr)); + } + + if (cmd->cmd.rpc.cmd_type == NV_PMU_RPC_CMD_ID) { + err = pmu_cmd_payload_setup_rpc(g, cmd, payload, seq); + } else { + err = pmu_cmd_payload_setup(g, cmd, payload, seq); + } + + if (err != 0) { + nvgpu_err(g, "payload setup failed"); + pmu->fw->ops.allocation_set_dmem_size(pmu, + pmu->fw->ops.get_seq_in_alloc_ptr(seq), 0); + pmu->fw->ops.allocation_set_dmem_size(pmu, + pmu->fw->ops.get_seq_out_alloc_ptr(seq), 0); + + nvgpu_pmu_seq_release(g, pmu->sequences, seq); + goto exit; + } + + nvgpu_pmu_seq_set_state(seq, PMU_SEQ_STATE_USED); + + err = pmu_write_cmd(pmu, cmd, queue_id); + if (err != 0) { + nvgpu_pmu_seq_set_state(seq, PMU_SEQ_STATE_PENDING); + } + +exit: + if (nvgpu_pmu_fb_queue_enabled(&pmu->queues)) { + /* Unlock the FBQ work buffer */ + nvgpu_engine_fb_queue_unlock_work_buffer(fb_queue); + } + + nvgpu_log_fn(g, "Done, err %x", err); + return err; +} + +int nvgpu_pmu_rpc_execute(struct nvgpu_pmu *pmu, struct nv_pmu_rpc_header *rpc, + u16 size_rpc, u16 size_scratch, pmu_callback caller_cb, + void *caller_cb_param, bool is_copy_back) +{ + struct gk20a *g = pmu->g; + struct pmu_cmd cmd; + struct pmu_payload payload; + struct rpc_handler_payload *rpc_payload = NULL; + pmu_callback callback = NULL; + void *rpc_buff = NULL; + int status = 0; + + if (nvgpu_can_busy(g) == 0) { + return 0; + } + + if (!nvgpu_pmu_get_fw_ready(g, pmu)) { + nvgpu_warn(g, "PMU is not ready to process RPC"); + status = EINVAL; + goto exit; + } + + if (caller_cb == NULL) { + rpc_payload = nvgpu_kzalloc(g, + sizeof(struct rpc_handler_payload) + size_rpc); + if (rpc_payload == NULL) { + status = ENOMEM; + goto exit; + } + + rpc_payload->rpc_buff = (u8 *)rpc_payload + + sizeof(struct rpc_handler_payload); + rpc_payload->is_mem_free_set = + is_copy_back ? false : true; + + /* assign default RPC handler*/ + callback = nvgpu_pmu_rpc_handler; + } else { + if (caller_cb_param == NULL) { + nvgpu_err(g, "Invalid cb param addr"); + status = EINVAL; + goto exit; + } + rpc_payload = nvgpu_kzalloc(g, + sizeof(struct rpc_handler_payload)); + if (rpc_payload == NULL) { + status = ENOMEM; + goto exit; + } + rpc_payload->rpc_buff = caller_cb_param; + rpc_payload->is_mem_free_set = true; + callback = caller_cb; + WARN_ON(is_copy_back); + } + + rpc_buff = rpc_payload->rpc_buff; + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + (void) memset(&payload, 0, sizeof(struct pmu_payload)); + + cmd.hdr.unit_id = rpc->unit_id; + cmd.hdr.size = (u8)(PMU_CMD_HDR_SIZE + sizeof(struct nv_pmu_rpc_cmd)); + cmd.cmd.rpc.cmd_type = NV_PMU_RPC_CMD_ID; + cmd.cmd.rpc.flags = rpc->flags; + + nvgpu_memcpy((u8 *)rpc_buff, (u8 *)rpc, size_rpc); + payload.rpc.prpc = rpc_buff; + payload.rpc.size_rpc = size_rpc; + payload.rpc.size_scratch = size_scratch; + + status = nvgpu_pmu_cmd_post(g, &cmd, &payload, + PMU_COMMAND_QUEUE_LPQ, callback, + rpc_payload); + if (status != 0) { + nvgpu_err(g, "Failed to execute RPC status=0x%x, func=0x%x", + status, rpc->function); + goto cleanup; + } + + /* + * Option act like blocking call, which waits till RPC request + * executes on PMU & copy back processed data to rpc_buff + * to read data back in nvgpu + */ + if (is_copy_back) { + /* wait till RPC execute in PMU & ACK */ + if (nvgpu_pmu_wait_fw_ack_status(g, pmu, + nvgpu_get_poll_timeout(g), + &rpc_payload->complete, 1U) != 0) { + nvgpu_err(g, "PMU wait timeout expired."); + status = -ETIMEDOUT; + goto cleanup; + } + /* copy back data to caller */ + nvgpu_memcpy((u8 *)rpc, (u8 *)rpc_buff, size_rpc); + /* free allocated memory */ + nvgpu_kfree(g, rpc_payload); + } + + return 0; + +cleanup: + nvgpu_kfree(g, rpc_payload); +exit: + return status; +} diff --git a/drivers/gpu/nvgpu/common/pmu/ipc/pmu_msg.c b/drivers/gpu/nvgpu/common/pmu/ipc/pmu_msg.c new file mode 100644 index 000000000..7bed652f8 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/ipc/pmu_msg.c @@ -0,0 +1,639 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int pmu_payload_extract(struct nvgpu_pmu *pmu, struct pmu_sequence *seq) +{ + struct nvgpu_engine_fb_queue *fb_queue = + nvgpu_pmu_seq_get_cmd_queue(seq); + struct gk20a *g = pmu->g; + struct pmu_fw_ver_ops *fw_ops = &g->pmu->fw->ops; + u32 fbq_payload_offset = 0U; + int err = 0; + + nvgpu_log_fn(g, " "); + + if (nvgpu_pmu_seq_get_out_payload_fb_queue(seq)) { + fbq_payload_offset = + nvgpu_engine_fb_queue_get_offset(fb_queue) + + nvgpu_pmu_seq_get_fbq_out_offset(seq) + + (nvgpu_pmu_seq_get_fbq_element_index(seq) * + nvgpu_engine_fb_queue_get_element_size(fb_queue)); + + nvgpu_mem_rd_n(g, nvgpu_pmu_super_surface_mem(g, + pmu, pmu->super_surface), fbq_payload_offset, + nvgpu_pmu_seq_get_out_payload(seq), + fw_ops->allocation_get_dmem_size(pmu, + fw_ops->get_seq_out_alloc_ptr(seq))); + + } else { + if (fw_ops->allocation_get_dmem_size(pmu, + fw_ops->get_seq_out_alloc_ptr(seq)) != 0U) { + err = nvgpu_falcon_copy_from_dmem(pmu->flcn, + fw_ops->allocation_get_dmem_offset(pmu, + fw_ops->get_seq_out_alloc_ptr(seq)), + nvgpu_pmu_seq_get_out_payload(seq), + fw_ops->allocation_get_dmem_size(pmu, + fw_ops->get_seq_out_alloc_ptr(seq)), + 0); + if (err != 0) { + nvgpu_err(g, "PMU falcon DMEM copy failed"); + return err; + } + } + } + + return err; +} + +static void pmu_payload_free(struct nvgpu_pmu *pmu, struct pmu_sequence *seq) +{ + struct nvgpu_engine_fb_queue *fb_queue = + nvgpu_pmu_seq_get_cmd_queue(seq); + struct gk20a *g = pmu->g; + struct pmu_fw_ver_ops *fw_ops = &g->pmu->fw->ops; + void *seq_in_ptr = fw_ops->get_seq_in_alloc_ptr(seq); + void *seq_out_ptr = fw_ops->get_seq_out_alloc_ptr(seq); + int err; + + nvgpu_log_fn(g, " "); + + if (nvgpu_pmu_fb_queue_enabled(&pmu->queues)) { + /* Check for allocator pointer and proceed */ + if (pmu->dmem.priv != NULL) { + nvgpu_free(&pmu->dmem, + nvgpu_pmu_seq_get_fbq_heap_offset(seq)); + } + + /* + * free FBQ allocated work buffer + * set FBQ element work buffer to NULL + * Clear the in use bit for the queue entry this CMD used. + */ + err = nvgpu_engine_fb_queue_free_element(fb_queue, + nvgpu_pmu_seq_get_fbq_element_index(seq)); + if (err != 0) { + nvgpu_err(g, "fb queue element free failed %d", err); + } + } else { + /* free DMEM space payload*/ + if (fw_ops->allocation_get_dmem_size(pmu, + seq_in_ptr) != 0U) { + nvgpu_free(&pmu->dmem, + fw_ops->allocation_get_dmem_offset(pmu, + seq_in_ptr)); + + fw_ops->allocation_set_dmem_size(pmu, + seq_in_ptr, 0); + } + + if (fw_ops->allocation_get_dmem_size(pmu, + seq_out_ptr) != 0U) { + nvgpu_free(&pmu->dmem, + fw_ops->allocation_get_dmem_offset(pmu, + seq_out_ptr)); + + fw_ops->allocation_set_dmem_size(pmu, + seq_out_ptr, 0); + } + } + + nvgpu_pmu_seq_payload_free(g, seq); +} + +static int pmu_response_handle(struct nvgpu_pmu *pmu, + struct pmu_msg *msg) +{ + struct gk20a *g = pmu->g; + enum pmu_seq_state state; + struct pmu_sequence *seq; + int err = 0; + u8 id; + + nvgpu_log_fn(g, " "); + + seq = nvgpu_pmu_sequences_get_seq(pmu->sequences, msg->hdr.seq_id); + state = nvgpu_pmu_seq_get_state(seq); + id = nvgpu_pmu_seq_get_id(seq); + + if (state != PMU_SEQ_STATE_USED) { + nvgpu_err(g, "msg for an unknown sequence %u", (u32) id); + err = -EINVAL; + goto exit; + } + + if (msg->hdr.unit_id == PMU_UNIT_RC && + msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) { + nvgpu_err(g, "unhandled cmd: seq %u", (u32) id); + err = -EINVAL; + } else { + err = pmu_payload_extract(pmu, seq); + } + +exit: + /* + * free allocated space for payload in + * DMEM/FB-surface/FB_QUEUE as data is + * copied to buffer pointed by + * seq->out_payload + */ + pmu_payload_free(pmu, seq); + + nvgpu_pmu_seq_callback(g, seq, msg, err); + + nvgpu_pmu_seq_release(g, pmu->sequences, seq); + + /* TBD: notify client waiting for available dmem */ + + nvgpu_log_fn(g, "done err %d", err); + + return err; +} + +static int pmu_handle_event(struct nvgpu_pmu *pmu, struct pmu_msg *msg) +{ + int err = 0; + struct gk20a *g = pmu->g; + + nvgpu_log_fn(g, " "); + switch (msg->hdr.unit_id) { + case PMU_UNIT_PERFMON: + case PMU_UNIT_PERFMON_T18X: + err = nvgpu_pmu_perfmon_event_handler(g, pmu, msg); + break; + case PMU_UNIT_PERF: + if (g->ops.pmu_perf.handle_pmu_perf_event != NULL) { + err = g->ops.pmu_perf.handle_pmu_perf_event(g, + (void *)&msg->hdr); + } else { + WARN_ON(true); + } + break; + default: + nvgpu_log_info(g, "Received invalid PMU unit event"); + break; + } + + return err; +} + +static bool pmu_engine_mem_queue_read(struct nvgpu_pmu *pmu, + u32 queue_id, void *data, + u32 bytes_to_read, int *status) +{ + struct gk20a *g = pmu->g; + u32 bytes_read; + int err; + + err = nvgpu_pmu_queue_pop(&pmu->queues, pmu->flcn, queue_id, data, + bytes_to_read, &bytes_read); + if (err != 0) { + nvgpu_err(g, "fail to read msg: err %d", err); + *status = err; + return false; + } + if (bytes_read != bytes_to_read) { + nvgpu_err(g, "fail to read requested bytes: 0x%x != 0x%x", + bytes_to_read, bytes_read); + *status = -EINVAL; + return false; + } + + return true; +} + +static bool pmu_read_message(struct nvgpu_pmu *pmu, u32 queue_id, + struct pmu_msg *msg, int *status) +{ + struct gk20a *g = pmu->g; + u32 read_size; + int err; + + *status = 0; + + if (nvgpu_pmu_queue_is_empty(&pmu->queues, queue_id)) { + return false; + } + + if (!pmu_engine_mem_queue_read(pmu, queue_id, &msg->hdr, + PMU_MSG_HDR_SIZE, status)) { + nvgpu_err(g, "fail to read msg from queue %d", queue_id); + goto clean_up; + } + + if (msg->hdr.unit_id == PMU_UNIT_REWIND) { + if (!nvgpu_pmu_fb_queue_enabled(&pmu->queues)) { + err = nvgpu_pmu_queue_rewind(&pmu->queues, queue_id, + pmu->flcn); + if (err != 0) { + nvgpu_err(g, "fail to rewind queue %d", + queue_id); + *status = err; + goto clean_up; + } + } + + /* read again after rewind */ + if (!pmu_engine_mem_queue_read(pmu, queue_id, &msg->hdr, + PMU_MSG_HDR_SIZE, status)) { + nvgpu_err(g, "fail to read msg from queue %d", + queue_id); + goto clean_up; + } + } + + if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) { + nvgpu_err(g, "read invalid unit_id %d from queue %d", + msg->hdr.unit_id, queue_id); + *status = -EINVAL; + goto clean_up; + } + + if (msg->hdr.size > PMU_MSG_HDR_SIZE) { + read_size = U32(msg->hdr.size) - PMU_MSG_HDR_SIZE; + if (!pmu_engine_mem_queue_read(pmu, queue_id, &msg->msg, + read_size, status)) { + nvgpu_err(g, "fail to read msg from queue %d", + queue_id); + goto clean_up; + } + } + + return true; + +clean_up: + return false; +} + +static void pmu_read_init_msg_fb(struct gk20a *g, struct nvgpu_pmu *pmu, + u32 element_index, u32 size, void *buffer) +{ + u32 fbq_msg_queue_ss_offset = 0U; + + fbq_msg_queue_ss_offset = + nvgpu_pmu_get_ss_msg_fbq_element_offset(g, pmu, + pmu->super_surface, element_index); + + fbq_msg_queue_ss_offset = nvgpu_safe_add_u32(fbq_msg_queue_ss_offset, + (u32)sizeof(struct nv_falcon_fbq_msgq_hdr)); + nvgpu_mem_rd_n(g, nvgpu_pmu_super_surface_mem(g, + pmu, pmu->super_surface), fbq_msg_queue_ss_offset, + buffer, size); +} + +static int pmu_process_init_msg_fb(struct gk20a *g, struct nvgpu_pmu *pmu, + struct pmu_msg *msg) +{ + u32 tail = 0U; + int err = 0; + + nvgpu_log_fn(g, " "); + + g->ops.pmu.pmu_msgq_tail(pmu, &tail, QUEUE_GET); + + pmu_read_init_msg_fb(g, pmu, tail, PMU_MSG_HDR_SIZE, + (void *)&msg->hdr); + + if (msg->hdr.unit_id != PMU_UNIT_INIT_DGPU && + msg->hdr.unit_id != PMU_UNIT_CMDMGMT) { + nvgpu_err(g, "FB MSG Q: expecting init msg"); + err = -EINVAL; + goto exit; + } + + pmu_read_init_msg_fb(g, pmu, tail, msg->hdr.size, + (void *)&msg->hdr); + if (msg->event_rpc.cmdmgmt_init.hdr.function != + PMU_INIT_MSG_TYPE_PMU_INIT) { + nvgpu_err(g, "FB MSG Q: expecting pmu init msg"); + err = -EINVAL; + goto exit; + } + + /* Queue is not yet constructed, so inline next element code here.*/ + tail++; + if (tail >= NV_PMU_FBQ_MSG_NUM_ELEMENTS) { + tail = 0U; + } + + g->ops.pmu.pmu_msgq_tail(pmu, &tail, QUEUE_SET); + +exit: + return err; +} + +static int pmu_process_init_msg_dmem(struct gk20a *g, struct nvgpu_pmu *pmu, + struct pmu_msg *msg) +{ + u32 tail = 0U; + int err = 0; + + nvgpu_log_fn(g, " "); + + g->ops.pmu.pmu_msgq_tail(pmu, &tail, QUEUE_GET); + + err = nvgpu_falcon_copy_from_dmem(pmu->flcn, tail, + (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0); + if (err != 0) { + nvgpu_err(g, "PMU falcon DMEM copy failed"); + goto exit; + } + if (msg->hdr.unit_id != PMU_UNIT_INIT) { + nvgpu_err(g, "expecting init msg"); + err = -EINVAL; + goto exit; + } + + err = nvgpu_falcon_copy_from_dmem(pmu->flcn, tail + PMU_MSG_HDR_SIZE, + (u8 *)&msg->msg, (u32)msg->hdr.size - PMU_MSG_HDR_SIZE, 0); + if (err != 0) { + nvgpu_err(g, "PMU falcon DMEM copy failed"); + goto exit; + } + + if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) { + nvgpu_err(g, "expecting pmu init msg"); + err = -EINVAL; + goto exit; + } + + tail += NVGPU_ALIGN(U32(msg->hdr.size), PMU_DMEM_ALIGNMENT); + g->ops.pmu.pmu_msgq_tail(pmu, &tail, QUEUE_SET); + +exit: + return err; +} + +static int pmu_gid_info_dmem_read(struct nvgpu_pmu *pmu, + union pmu_init_msg_pmu *init) +{ + struct pmu_fw_ver_ops *fw_ops = &pmu->fw->ops; + struct pmu_sha1_gid *gid_info = &pmu->gid_info; + struct pmu_sha1_gid_data gid_data; + int err = 0; + + if (!gid_info->valid) { + err = nvgpu_falcon_copy_from_dmem(pmu->flcn, + fw_ops->get_init_msg_sw_mngd_area_off(init), + (u8 *)&gid_data, + (u32)sizeof(struct pmu_sha1_gid_data), 0); + if (err != 0) { + nvgpu_err(pmu->g, "PMU falcon DMEM copy failed"); + goto exit; + } + + gid_info->valid = + (gid_data.signature == PMU_SHA1_GID_SIGNATURE); + + if (gid_info->valid) { + if (sizeof(gid_info->gid) != + sizeof(gid_data.gid)) { + WARN_ON(1); + } + + nvgpu_memcpy((u8 *)gid_info->gid, (u8 *)gid_data.gid, + sizeof(gid_info->gid)); + } + } + +exit: + return err; +} + +static int pmu_process_init_msg(struct nvgpu_pmu *pmu, + struct pmu_msg *msg) +{ + struct gk20a *g = pmu->g; + struct pmu_fw_ver_ops *fw_ops = &g->pmu->fw->ops; + union pmu_init_msg_pmu *init; + int err = 0; + + nvgpu_log_fn(g, " "); + + nvgpu_pmu_dbg(g, "init received\n"); + + (void)memset((void *)msg, 0x0, sizeof(struct pmu_msg)); + + init = fw_ops->get_init_msg_ptr(&(msg->msg.init)); + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_PMU_RTOS_FBQ)) { + err = pmu_process_init_msg_fb(g, pmu, msg); + if (err != 0) { + goto exit; + } + } else { + err = pmu_process_init_msg_dmem(g, pmu, msg); + if (err != 0) { + goto exit; + } + + err = pmu_gid_info_dmem_read(pmu, init); + if (err != 0) { + goto exit; + } + } + + err = nvgpu_pmu_queues_init(g, init, &pmu->queues, + nvgpu_pmu_super_surface_mem(g, pmu, + pmu->super_surface)); + if (err != 0) { + goto exit; + } + + nvgpu_pmu_allocator_dmem_init(g, pmu, &pmu->dmem, init); + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_PMU_SUPER_SURFACE)) { + err = nvgpu_pmu_ss_create_ssmd_lookup_table(g, + pmu, pmu->super_surface); + if (err != 0) { + goto exit; + } + } + + nvgpu_pmu_set_fw_ready(g, pmu, true); + + nvgpu_pmu_fw_state_change(g, pmu, PMU_FW_STATE_INIT_RECEIVED, true); +exit: + nvgpu_pmu_dbg(g, "init received end, err %x", err); + return err; +} + +int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu) +{ + struct pmu_msg msg; + int status; + struct gk20a *g = pmu->g; + int err; + + if (nvgpu_can_busy(g) == 0) { + return 0; + } + + if (unlikely(!nvgpu_pmu_get_fw_ready(g, pmu))) { + err = pmu_process_init_msg(pmu, &msg); + if (err != 0) { + return err; + } + + err = nvgpu_pmu_lsfm_int_wpr_region(g, pmu, pmu->lsfm); + if (err != 0) { + return err; + } + + if (nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { + err = nvgpu_pmu_perfmon_initialization(g, pmu, + pmu->pmu_perfmon); + if (err != 0) { + return err; + } + } + + return 0; + } + + while (pmu_read_message(pmu, PMU_MESSAGE_QUEUE, &msg, &status)) { + + if (nvgpu_can_busy(g) == 0) { + return 0; + } + + nvgpu_pmu_dbg(g, "read msg hdr: "); + nvgpu_pmu_dbg(g, "unit_id = 0x%08x, size = 0x%08x", + msg.hdr.unit_id, msg.hdr.size); + nvgpu_pmu_dbg(g, "ctrl_flags = 0x%08x, seq_id = 0x%08x", + msg.hdr.ctrl_flags, msg.hdr.seq_id); + + msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK; + + if ((msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) || + (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_RPC_EVENT)) { + err = pmu_handle_event(pmu, &msg); + } else { + err = pmu_response_handle(pmu, &msg); + } + + if (err != 0) { + return err; + } + } + + return 0; +} + +static void pmu_rpc_handler(struct gk20a *g, struct pmu_msg *msg, + struct nv_pmu_rpc_header rpc, + struct rpc_handler_payload *rpc_payload) +{ + struct nvgpu_pmu *pmu = g->pmu; + + switch (msg->hdr.unit_id) { + case PMU_UNIT_ACR: + nvgpu_pmu_lsfm_rpc_handler(g, rpc_payload); + break; + case PMU_UNIT_PERFMON_T18X: + case PMU_UNIT_PERFMON: + nvgpu_pmu_perfmon_rpc_handler(g, pmu, &rpc, rpc_payload); + break; + case PMU_UNIT_VOLT: + if (pmu->volt->volt_rpc_handler != NULL) { + pmu->volt->volt_rpc_handler(g, &rpc); + } + break; + case PMU_UNIT_CLK: + nvgpu_pmu_dbg(g, "reply PMU_UNIT_CLK"); + break; + case PMU_UNIT_PERF: + nvgpu_pmu_dbg(g, "reply PMU_UNIT_PERF"); + break; + case PMU_UNIT_THERM: + if (pmu->therm_rpc_handler != NULL) { + pmu->therm_rpc_handler(g, pmu, &rpc); + } + break; + case PMU_UNIT_PG_LOADING: + case PMU_UNIT_PG: + if (pmu->pg->rpc_handler != NULL) { + pmu->pg->rpc_handler(g, pmu, &rpc); + } + break; + default: + nvgpu_err(g, " Invalid RPC response, stats 0x%x", + rpc.flcn_status); + break; + } +} + +void nvgpu_pmu_rpc_handler(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 status) +{ + struct nv_pmu_rpc_header rpc; + struct rpc_handler_payload *rpc_payload = + (struct rpc_handler_payload *)param; + + if (nvgpu_can_busy(g) == 0) { + return; + } + + (void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_header)); + nvgpu_memcpy((u8 *)&rpc, (u8 *)rpc_payload->rpc_buff, + sizeof(struct nv_pmu_rpc_header)); + + if (rpc.flcn_status != 0U) { + nvgpu_err(g, + "failed RPC response, unit-id=0x%x, func=0x%x, status=0x%x", + rpc.unit_id, rpc.function, rpc.flcn_status); + goto exit; + } + + pmu_rpc_handler(g, msg, rpc, rpc_payload); + +exit: + rpc_payload->complete = true; + + /* free allocated memory */ + if (rpc_payload->is_mem_free_set) { + nvgpu_kfree(g, rpc_payload); + } +} + +void pmu_wait_message_cond(struct nvgpu_pmu *pmu, u32 timeout_ms, + void *var, u8 val) +{ + struct gk20a *g = pmu->g; + + if (nvgpu_pmu_wait_fw_ack_status(g, pmu, timeout_ms, var, val) != 0) { + nvgpu_err(g, "PMU wait timeout expired."); + } +} diff --git a/drivers/gpu/nvgpu/common/pmu/ipc/pmu_queue.c b/drivers/gpu/nvgpu/common/pmu/ipc/pmu_queue.c new file mode 100644 index 000000000..4660e4f9b --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/ipc/pmu_queue.c @@ -0,0 +1,329 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* FB queue init */ +static int pmu_fb_queue_init(struct gk20a *g, struct pmu_queues *queues, + u32 id, union pmu_init_msg_pmu *init, + struct nvgpu_mem *super_surface_buf) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct nvgpu_engine_fb_queue_params params = {0}; + u32 oflag = 0; + int err = 0; + u32 tmp_id = id; + + /* init queue parameters */ + if (PMU_IS_COMMAND_QUEUE(id)) { + + /* currently PMU FBQ support SW command queue only */ + if (!PMU_IS_SW_COMMAND_QUEUE(id)) { + queues->queue[id] = NULL; + err = 0; + goto exit; + } + + /* + * set OFLAG_WRITE for command queue + * i.e, push from nvgpu & + * pop form falcon ucode + */ + oflag = OFLAG_WRITE; + + params.super_surface_mem = super_surface_buf; + params.fbq_offset = + nvgpu_pmu_get_ss_cmd_fbq_offset(g, pmu, + pmu->super_surface, id); + params.size = NV_PMU_FBQ_CMD_NUM_ELEMENTS; + params.fbq_element_size = NV_PMU_FBQ_CMD_ELEMENT_SIZE; + } else if (PMU_IS_MESSAGE_QUEUE(id)) { + /* + * set OFLAG_READ for message queue + * i.e, push from falcon ucode & + * pop form nvgpu + */ + oflag = OFLAG_READ; + + params.super_surface_mem = super_surface_buf; + params.fbq_offset = + nvgpu_pmu_get_ss_msg_fbq_offset(g, pmu, + pmu->super_surface); + params.size = NV_PMU_FBQ_MSG_NUM_ELEMENTS; + params.fbq_element_size = NV_PMU_FBQ_MSG_ELEMENT_SIZE; + } else { + nvgpu_err(g, "invalid queue-id %d", id); + err = -EINVAL; + goto exit; + } + + params.g = g; + params.flcn_id = FALCON_ID_PMU; + params.id = id; + params.oflag = oflag; + params.queue_head = g->ops.pmu.pmu_queue_head; + params.queue_tail = g->ops.pmu.pmu_queue_tail; + + if (tmp_id == PMU_COMMAND_QUEUE_HPQ) { + tmp_id = PMU_QUEUE_HPQ_IDX_FOR_V3; + } else if (tmp_id == PMU_COMMAND_QUEUE_LPQ) { + tmp_id = PMU_QUEUE_LPQ_IDX_FOR_V3; + } else { + tmp_id = PMU_QUEUE_MSG_IDX_FOR_V5; + } + + params.index = init->v5.queue_phy_id[tmp_id]; + + err = nvgpu_engine_fb_queue_init(&queues->fb_queue[id], params); + if (err != 0) { + nvgpu_err(g, "queue-%d init failed", id); + } + +exit: + return err; +} + +/* DMEM queue init */ +static int pmu_dmem_queue_init(struct gk20a *g, struct pmu_queues *queues, + u32 id, union pmu_init_msg_pmu *init) +{ + struct nvgpu_engine_mem_queue_params params = {0}; + u32 oflag = 0; + int err = 0; + + if (PMU_IS_COMMAND_QUEUE(id)) { + /* + * set OFLAG_WRITE for command queue + * i.e, push from nvgpu & + * pop form falcon ucode + */ + oflag = OFLAG_WRITE; + } else if (PMU_IS_MESSAGE_QUEUE(id)) { + /* + * set OFLAG_READ for message queue + * i.e, push from falcon ucode & + * pop form nvgpu + */ + oflag = OFLAG_READ; + } else { + nvgpu_err(g, "invalid queue-id %d", id); + err = -EINVAL; + goto exit; + } + + /* init queue parameters */ + params.g = g; + params.flcn_id = FALCON_ID_PMU; + params.id = id; + params.oflag = oflag; + params.queue_head = g->ops.pmu.pmu_queue_head; + params.queue_tail = g->ops.pmu.pmu_queue_tail; + params.queue_type = QUEUE_TYPE_DMEM; + g->pmu->fw->ops.get_init_msg_queue_params(id, init, + ¶ms.index, + ¶ms.offset, + ¶ms.size); + err = nvgpu_engine_mem_queue_init(&queues->queue[id], params); + if (err != 0) { + nvgpu_err(g, "queue-%d init failed", id); + } + +exit: + return err; +} + +static void pmu_queue_free(struct gk20a *g, struct pmu_queues *queues, u32 id) +{ + if (!PMU_IS_COMMAND_QUEUE(id) && !PMU_IS_MESSAGE_QUEUE(id)) { + nvgpu_err(g, "invalid queue-id %d", id); + goto exit; + } + + if (queues->queue_type == QUEUE_TYPE_FB) { + if (queues->fb_queue[id] == NULL) { + goto exit; + } + + nvgpu_engine_fb_queue_free(&queues->fb_queue[id]); + } else { + if (queues->queue[id] == NULL) { + goto exit; + } + + nvgpu_engine_mem_queue_free(&queues->queue[id]); + } + +exit: + return; +} + +int nvgpu_pmu_queues_init(struct gk20a *g, + union pmu_init_msg_pmu *init, + struct pmu_queues *queues, + struct nvgpu_mem *super_surface_buf) +{ + u32 i = 0U; + u32 j = 0U; + int err; + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_PMU_RTOS_FBQ)) { + queues->queue_type = QUEUE_TYPE_FB; + for (i = 0; i < PMU_QUEUE_COUNT; i++) { + err = pmu_fb_queue_init(g, queues, i, init, + super_surface_buf); + if (err != 0) { + for (j = 0; j < i; j++) { + pmu_queue_free(g, queues, j); + } + nvgpu_err(g, "PMU queue init failed"); + return err; + } + } + } else { + queues->queue_type = QUEUE_TYPE_DMEM; + for (i = 0; i < PMU_QUEUE_COUNT; i++) { + err = pmu_dmem_queue_init(g, queues, i, init); + if (err != 0) { + for (j = 0; j < i; j++) { + pmu_queue_free(g, queues, j); + } + nvgpu_err(g, "PMU queue init failed"); + return err; + } + } + } + + return 0; +} + +void nvgpu_pmu_queues_free(struct gk20a *g, struct pmu_queues *queues) +{ + u32 i = 0U; + + for (i = 0U; i < PMU_QUEUE_COUNT; i++) { + pmu_queue_free(g, queues, i); + } +} + +u32 nvgpu_pmu_queue_get_size(struct pmu_queues *queues, u32 queue_id) +{ + struct nvgpu_engine_fb_queue *fb_queue = NULL; + struct nvgpu_engine_mem_queue *queue = NULL; + u32 queue_size; + + if (queues->queue_type == QUEUE_TYPE_FB) { + fb_queue = queues->fb_queue[queue_id]; + queue_size = nvgpu_engine_fb_queue_get_element_size(fb_queue); + } else { + queue = queues->queue[queue_id]; + queue_size = nvgpu_engine_mem_queue_get_size(queue); + } + + return queue_size; +} + +int nvgpu_pmu_queue_push(struct pmu_queues *queues, struct nvgpu_falcon *flcn, + u32 queue_id, struct pmu_cmd *cmd) +{ + struct nvgpu_engine_fb_queue *fb_queue = NULL; + struct nvgpu_engine_mem_queue *queue = NULL; + int err; + + if (queues->queue_type == QUEUE_TYPE_FB) { + fb_queue = queues->fb_queue[queue_id]; + err = nvgpu_engine_fb_queue_push(fb_queue, + cmd, cmd->hdr.size); + } else { + queue = queues->queue[queue_id]; + err = nvgpu_engine_mem_queue_push(flcn, queue, + cmd, cmd->hdr.size); + } + + return err; +} + +int nvgpu_pmu_queue_pop(struct pmu_queues *queues, struct nvgpu_falcon *flcn, + u32 queue_id, void *data, u32 bytes_to_read, + u32 *bytes_read) +{ + struct nvgpu_engine_fb_queue *fb_queue = NULL; + struct nvgpu_engine_mem_queue *queue = NULL; + int err; + + if (queues->queue_type == QUEUE_TYPE_FB) { + fb_queue = queues->fb_queue[queue_id]; + err = nvgpu_engine_fb_queue_pop(fb_queue, data, + bytes_to_read, bytes_read); + } else { + queue = queues->queue[queue_id]; + err = nvgpu_engine_mem_queue_pop(flcn, queue, data, + bytes_to_read, bytes_read); + } + + return err; +} + +bool nvgpu_pmu_queue_is_empty(struct pmu_queues *queues, u32 queue_id) +{ + struct nvgpu_engine_mem_queue *queue = NULL; + struct nvgpu_engine_fb_queue *fb_queue = NULL; + bool empty; + + if (queues->queue_type == QUEUE_TYPE_FB) { + fb_queue = queues->fb_queue[queue_id]; + empty = nvgpu_engine_fb_queue_is_empty(fb_queue); + } else { + queue = queues->queue[queue_id]; + empty = nvgpu_engine_mem_queue_is_empty(queue); + } + + return empty; +} + +bool nvgpu_pmu_fb_queue_enabled(struct pmu_queues *queues) +{ + return queues->queue_type == QUEUE_TYPE_FB; +} + +struct nvgpu_engine_fb_queue *nvgpu_pmu_fb_queue(struct pmu_queues *queues, + u32 queue_id) +{ + return queues->fb_queue[queue_id]; +} + +int nvgpu_pmu_queue_rewind(struct pmu_queues *queues, u32 queue_id, + struct nvgpu_falcon *flcn) +{ + struct nvgpu_engine_mem_queue *queue = queues->queue[queue_id]; + + if (queues->queue_type == QUEUE_TYPE_FB) { + return -EINVAL; + } + + return nvgpu_engine_mem_queue_rewind(flcn, queue); +} diff --git a/drivers/gpu/nvgpu/common/pmu/ipc/pmu_seq.c b/drivers/gpu/nvgpu/common/pmu/ipc/pmu_seq.c new file mode 100644 index 000000000..06cb5d910 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/ipc/pmu_seq.c @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +struct nvgpu_pmu; + +void nvgpu_pmu_sequences_sw_setup(struct gk20a *g, struct nvgpu_pmu *pmu, + struct pmu_sequences *sequences) +{ + u32 i; + + nvgpu_log_fn(g, " "); + + (void) memset(sequences->seq, 0, + sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES); + (void) memset(sequences->pmu_seq_tbl, 0, + sizeof(sequences->pmu_seq_tbl)); + + for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++) { + sequences->seq[i].id = (u8)i; + } +} + +int nvgpu_pmu_sequences_init(struct gk20a *g, struct nvgpu_pmu *pmu, + struct pmu_sequences **sequences_p) +{ + int err = 0; + struct pmu_sequences *sequences; + + nvgpu_log_fn(g, " "); + + if (*sequences_p != NULL) { + /* skip alloc/reinit for unrailgate sequence */ + nvgpu_pmu_dbg(g, "skip sequences init for unrailgate sequence"); + goto exit; + } + + sequences = (struct pmu_sequences *) + nvgpu_kzalloc(g, sizeof(struct pmu_sequences)); + if (sequences == NULL) { + err = -ENOMEM; + goto exit; + } + + sequences->seq = (struct pmu_sequence *) + nvgpu_kzalloc(g, PMU_MAX_NUM_SEQUENCES * + sizeof(struct pmu_sequence)); + if (sequences->seq == NULL) { + nvgpu_kfree(g, sequences); + return -ENOMEM; + } + + nvgpu_mutex_init(&sequences->pmu_seq_lock); + + *sequences_p = sequences; +exit: + return err; +} + +void nvgpu_pmu_sequences_deinit(struct gk20a *g, struct nvgpu_pmu *pmu, + struct pmu_sequences *sequences) +{ + nvgpu_log_fn(g, " "); + + if (sequences == NULL) { + return; + } + + nvgpu_mutex_destroy(&sequences->pmu_seq_lock); + if (sequences->seq != NULL) { + nvgpu_kfree(g, sequences->seq); + } + nvgpu_kfree(g, sequences); +} + +void nvgpu_pmu_seq_payload_free(struct gk20a *g, struct pmu_sequence *seq) +{ + nvgpu_log_fn(g, " "); + + seq->out_payload_fb_queue = false; + seq->in_payload_fb_queue = false; + seq->fbq_heap_offset = 0; + seq->in_mem = NULL; + seq->out_mem = NULL; +} + +int nvgpu_pmu_seq_acquire(struct gk20a *g, + struct pmu_sequences *sequences, + struct pmu_sequence **pseq, + pmu_callback callback, void *cb_params) +{ + struct pmu_sequence *seq; + unsigned long index; + + nvgpu_mutex_acquire(&sequences->pmu_seq_lock); + index = find_first_zero_bit(sequences->pmu_seq_tbl, + sizeof(sequences->pmu_seq_tbl)); + if (index >= sizeof(sequences->pmu_seq_tbl)) { + nvgpu_err(g, "no free sequence available"); + nvgpu_mutex_release(&sequences->pmu_seq_lock); + return -EAGAIN; + } + nvgpu_assert(index <= U32_MAX); + nvgpu_set_bit((u32)index, sequences->pmu_seq_tbl); + nvgpu_mutex_release(&sequences->pmu_seq_lock); + + seq = &sequences->seq[index]; + seq->state = PMU_SEQ_STATE_PENDING; + seq->callback = callback; + seq->cb_params = cb_params; + seq->out_payload = NULL; + seq->in_payload_fb_queue = false; + seq->out_payload_fb_queue = false; + + *pseq = seq; + return 0; +} + +void nvgpu_pmu_seq_release(struct gk20a *g, + struct pmu_sequences *sequences, + struct pmu_sequence *seq) +{ + seq->state = PMU_SEQ_STATE_FREE; + seq->callback = NULL; + seq->cb_params = NULL; + seq->out_payload = NULL; + + nvgpu_mutex_acquire(&sequences->pmu_seq_lock); + nvgpu_clear_bit(seq->id, sequences->pmu_seq_tbl); + nvgpu_mutex_release(&sequences->pmu_seq_lock); +} + +u16 nvgpu_pmu_seq_get_fbq_out_offset(struct pmu_sequence *seq) +{ + return seq->fbq_out_offset_in_queue_element; +} + +void nvgpu_pmu_seq_set_fbq_out_offset(struct pmu_sequence *seq, u16 size) +{ + seq->fbq_out_offset_in_queue_element = size; +} + +u16 nvgpu_pmu_seq_get_buffer_size(struct pmu_sequence *seq) +{ + return seq->buffer_size_used; +} + +void nvgpu_pmu_seq_set_buffer_size(struct pmu_sequence *seq, u16 size) +{ + seq->buffer_size_used = size; +} + +struct nvgpu_engine_fb_queue *nvgpu_pmu_seq_get_cmd_queue( + struct pmu_sequence *seq) +{ + return seq->cmd_queue; +} + +void nvgpu_pmu_seq_set_cmd_queue(struct pmu_sequence *seq, + struct nvgpu_engine_fb_queue *fb_queue) +{ + seq->cmd_queue = fb_queue; +} + +u16 nvgpu_pmu_seq_get_fbq_heap_offset(struct pmu_sequence *seq) +{ + return seq->fbq_heap_offset; +} + +void nvgpu_pmu_seq_set_fbq_heap_offset(struct pmu_sequence *seq, u16 size) +{ + seq->fbq_heap_offset = size; +} + +u8 *nvgpu_pmu_seq_get_out_payload(struct pmu_sequence *seq) +{ + return seq->out_payload; +} + +void nvgpu_pmu_seq_set_out_payload(struct pmu_sequence *seq, u8 *payload) +{ + seq->out_payload = payload; +} + +void nvgpu_pmu_seq_set_in_payload_fb_queue(struct pmu_sequence *seq, bool state) +{ + seq->in_payload_fb_queue = state; +} + +bool nvgpu_pmu_seq_get_out_payload_fb_queue(struct pmu_sequence *seq) +{ + return seq->out_payload_fb_queue; +} + +void nvgpu_pmu_seq_set_out_payload_fb_queue(struct pmu_sequence *seq, + bool state) +{ + seq->out_payload_fb_queue = state; +} + +u32 nvgpu_pmu_seq_get_fbq_element_index(struct pmu_sequence *seq) +{ + return seq->fbq_element_index; +} + +void nvgpu_pmu_seq_set_fbq_element_index(struct pmu_sequence *seq, u32 index) +{ + seq->fbq_element_index = index; +} + +u8 nvgpu_pmu_seq_get_id(struct pmu_sequence *seq) +{ + return seq->id; +} + +enum pmu_seq_state nvgpu_pmu_seq_get_state(struct pmu_sequence *seq) +{ + return seq->state; +} + +void nvgpu_pmu_seq_set_state(struct pmu_sequence *seq, enum pmu_seq_state state) +{ + seq->state = state; +} + +struct pmu_sequence *nvgpu_pmu_sequences_get_seq(struct pmu_sequences *seqs, + u8 id) +{ + return &seqs->seq[id]; +} + +void nvgpu_pmu_seq_callback(struct gk20a *g, struct pmu_sequence *seq, + struct pmu_msg *msg, int err) +{ + if (seq->callback != NULL) { + seq->callback(g, msg, seq->cb_params, err); + } +} diff --git a/drivers/gpu/nvgpu/common/pmu/lpwr/lpwr.c b/drivers/gpu/nvgpu/common/pmu/lpwr/lpwr.c new file mode 100644 index 000000000..6610bf2cd --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/lpwr/lpwr.c @@ -0,0 +1,459 @@ +/* + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lpwr.h" + +static int get_lpwr_idx_table(struct gk20a *g) +{ + u8 *lpwr_idx_table_ptr; + u8 *entry_addr; + u32 idx; + struct nvgpu_lpwr_bios_idx_data *pidx_data = + &g->perf_pmu->lpwr.lwpr_bios_data.idx; + struct nvgpu_bios_lpwr_idx_table_1x_header header = { 0 }; + struct nvgpu_bios_lpwr_idx_table_1x_entry entry = { 0 }; + + lpwr_idx_table_ptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_PERF_TOKEN), + LOWPOWER_TABLE); + if (lpwr_idx_table_ptr == NULL) { + return -EINVAL; + } + + nvgpu_memcpy((u8 *)&header, lpwr_idx_table_ptr, + sizeof(struct nvgpu_bios_lpwr_idx_table_1x_header)); + + if (header.entry_count >= LPWR_VBIOS_IDX_ENTRY_COUNT_MAX) { + return -EINVAL; + } + + pidx_data->base_sampling_period = (u16)header.base_sampling_period; + + /* Parse the LPWR Index Table entries.*/ + for (idx = 0; idx < header.entry_count; idx++) { + entry_addr = lpwr_idx_table_ptr + header.header_size + + (idx * header.entry_size); + + nvgpu_memcpy((u8 *)&entry, entry_addr, + sizeof(struct nvgpu_bios_lpwr_idx_table_1x_entry)); + + pidx_data->entry[idx].pcie_idx = entry.pcie_idx; + pidx_data->entry[idx].gr_idx = entry.gr_idx; + pidx_data->entry[idx].ms_idx = entry.ms_idx; + pidx_data->entry[idx].di_idx = entry.di_idx; + pidx_data->entry[idx].gc6_idx = entry.gc6_idx; + + } + + return 0; +} + +static int get_lpwr_gr_table(struct gk20a *g) +{ + u8 *lpwr_gr_table_ptr; + u8 *entry_addr; + u32 idx; + struct nvgpu_lpwr_bios_gr_data *pgr_data = + &g->perf_pmu->lpwr.lwpr_bios_data.gr; + struct nvgpu_bios_lpwr_gr_table_1x_header header = { 0 }; + struct nvgpu_bios_lpwr_gr_table_1x_entry entry = { 0 }; + + lpwr_gr_table_ptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_PERF_TOKEN), + LOWPOWER_GR_TABLE); + if (lpwr_gr_table_ptr == NULL) { + return -EINVAL; + } + + nvgpu_memcpy((u8 *)&header, lpwr_gr_table_ptr, + sizeof(struct nvgpu_bios_lpwr_gr_table_1x_header)); + + /* Parse the LPWR Index Table entries.*/ + for (idx = 0; idx < header.entry_count; idx++) { + entry_addr = lpwr_gr_table_ptr + header.header_size + + (idx * header.entry_size); + + nvgpu_memcpy((u8 *)&entry, entry_addr, + sizeof(struct nvgpu_bios_lpwr_gr_table_1x_entry)); + + if (BIOS_GET_FIELD(bool, entry.feautre_mask, + NV_VBIOS_LPWR_MS_FEATURE_MASK_MS)) { + pgr_data->entry[idx].gr_enabled = true; + + pgr_data->entry[idx].feature_mask = + NVGPU_PMU_GR_FEATURE_MASK_ALL; + + if (!BIOS_GET_FIELD(bool, entry.feautre_mask, + NV_VBIOS_LPWR_GR_FEATURE_MASK_GR_RPPG)) { + pgr_data->entry[idx].feature_mask &= + ~NVGPU_PMU_GR_FEATURE_MASK_RPPG; + } + } + + } + + return 0; +} + +static int get_lpwr_ms_table(struct gk20a *g) +{ + u8 *lpwr_ms_table_ptr; + u8 *entry_addr; + u32 idx; + struct nvgpu_lpwr_bios_ms_data *pms_data = + &g->perf_pmu->lpwr.lwpr_bios_data.ms; + struct nvgpu_bios_lpwr_ms_table_1x_header header = { 0 }; + struct nvgpu_bios_lpwr_ms_table_1x_entry entry = { 0 }; + + lpwr_ms_table_ptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_PERF_TOKEN), + LOWPOWER_MS_TABLE); + if (lpwr_ms_table_ptr == NULL) { + return -EINVAL; + } + + nvgpu_memcpy((u8 *)&header, lpwr_ms_table_ptr, + sizeof(struct nvgpu_bios_lpwr_ms_table_1x_header)); + + if (header.entry_count >= LPWR_VBIOS_MS_ENTRY_COUNT_MAX) { + return -EINVAL; + } + + pms_data->default_entry_idx = (u8)header.default_entry_idx; + + pms_data->idle_threshold_us = U32(header.idle_threshold_us) * U32(10); + + /* Parse the LPWR MS Table entries.*/ + for (idx = 0; idx < header.entry_count; idx++) { + entry_addr = lpwr_ms_table_ptr + header.header_size + + (idx * header.entry_size); + + nvgpu_memcpy((u8 *)&entry, entry_addr, + sizeof(struct nvgpu_bios_lpwr_ms_table_1x_entry)); + + if (BIOS_GET_FIELD(bool, entry.feautre_mask, + NV_VBIOS_LPWR_MS_FEATURE_MASK_MS)) { + pms_data->entry[idx].ms_enabled = true; + + pms_data->entry[idx].feature_mask = + NVGPU_PMU_MS_FEATURE_MASK_ALL; + + if (!BIOS_GET_FIELD(bool, entry.feautre_mask, + NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_CLOCK_GATING)) { + pms_data->entry[idx].feature_mask &= + ~NVGPU_PMU_MS_FEATURE_MASK_CLOCK_GATING; + } + + if (!BIOS_GET_FIELD(bool, entry.feautre_mask, + NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_SWASR)) { + pms_data->entry[idx].feature_mask &= + ~NVGPU_PMU_MS_FEATURE_MASK_SW_ASR; + } + + if (!BIOS_GET_FIELD(bool, entry.feautre_mask, + NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_RPPG)) { + pms_data->entry[idx].feature_mask &= + ~NVGPU_PMU_MS_FEATURE_MASK_RPPG; + } + } + + pms_data->entry[idx].dynamic_current_logic = + entry.dynamic_current_logic; + + pms_data->entry[idx].dynamic_current_sram = + entry.dynamic_current_sram; + } + + return 0; +} + +int nvgpu_lpwr_pg_setup(struct gk20a *g) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + err = get_lpwr_gr_table(g); + if (err != 0) { + return err; + } + + err = get_lpwr_ms_table(g); + if (err != 0) { + return err; + } + + err = get_lpwr_idx_table(g); + + return err; +} + +static void nvgpu_pmu_handle_param_lpwr_msg(struct gk20a *g, + struct pmu_msg *msg, void *param, + u32 status) +{ + u32 *ack_status = param; + + nvgpu_log_fn(g, " "); + + if (status != 0U) { + nvgpu_err(g, "LWPR PARAM cmd aborted"); + return; + } + + *ack_status = 1; + + nvgpu_pmu_dbg(g, "lpwr-param is acknowledged from PMU %x", + msg->msg.pg.msg_type); +} + +int nvgpu_lwpr_mclk_change(struct gk20a *g, u32 pstate) +{ + struct pmu_cmd cmd; + int status = 0; + u32 payload = NV_PMU_PG_PARAM_MCLK_CHANGE_MS_SWASR_ENABLED; + struct clk_set_info *pstate_info; + u32 ack_status = 0; + + nvgpu_log_fn(g, " "); + + pstate_info = nvgpu_pmu_perf_pstate_get_clk_set_info(g, pstate, + CLKWHICH_MCLK); + if (pstate_info == NULL) { + return -EINVAL; + } + + if (pstate_info->max_mhz > + MAX_SWASR_MCLK_FREQ_WITHOUT_WR_TRAINING_MAXWELL_MHZ) { + payload |= + NV_PMU_PG_PARAM_MCLK_CHANGE_GDDR5_WR_TRAINING_ENABLED; + } + + if (payload != g->perf_pmu->lpwr.mclk_change_cache) { + size_t tmp_size = PMU_CMD_HDR_SIZE + + sizeof(struct pmu_pg_cmd_mclk_change); + g->perf_pmu->lpwr.mclk_change_cache = payload; + + cmd.hdr.unit_id = PMU_UNIT_PG; + nvgpu_assert(tmp_size <= U8_MAX); + cmd.hdr.size = (u8)(tmp_size); + cmd.cmd.pg.mclk_change.cmd_type = + PMU_PG_CMD_ID_PG_PARAM; + cmd.cmd.pg.mclk_change.cmd_id = + PMU_PG_PARAM_CMD_MCLK_CHANGE; + cmd.cmd.pg.mclk_change.data = payload; + + nvgpu_pmu_dbg(g, "cmd post MS PMU_PG_PARAM_CMD_MCLK_CHANGE"); + status = nvgpu_pmu_cmd_post(g, &cmd, NULL, + PMU_COMMAND_QUEUE_HPQ, + nvgpu_pmu_handle_param_lpwr_msg, &ack_status); + + pmu_wait_message_cond(g->pmu, nvgpu_get_poll_timeout(g), + &ack_status, 1); + if (ack_status == 0U) { + status = -EINVAL; + nvgpu_err(g, "MCLK-CHANGE ACK failed"); + } + } + + return status; +} + +int nvgpu_lpwr_post_init(struct gk20a *g) +{ + struct pmu_cmd cmd; + int status = 0; + u32 ack_status = 0; + size_t tmp_size = PMU_CMD_HDR_SIZE + + sizeof(struct pmu_pg_cmd_post_init_param); + + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + + cmd.hdr.unit_id = PMU_UNIT_PG; + nvgpu_assert(tmp_size <= U8_MAX); + cmd.hdr.size = (u8)tmp_size; + + cmd.cmd.pg.post_init.cmd_type = + PMU_PG_CMD_ID_PG_PARAM; + cmd.cmd.pg.post_init.cmd_id = + PMU_PG_PARAM_CMD_POST_INIT; + + nvgpu_pmu_dbg(g, "cmd post post-init PMU_PG_PARAM_CMD_POST_INIT"); + status = nvgpu_pmu_cmd_post(g, &cmd, NULL, + PMU_COMMAND_QUEUE_LPQ, + nvgpu_pmu_handle_param_lpwr_msg, &ack_status); + + pmu_wait_message_cond(g->pmu, nvgpu_get_poll_timeout(g), + &ack_status, 1); + if (ack_status == 0U) { + status = -EINVAL; + nvgpu_err(g, "post-init ack failed"); + } + + return status; +} + +bool nvgpu_lpwr_is_mscg_supported(struct gk20a *g, u32 pstate_num) +{ + struct nvgpu_lpwr_bios_ms_data *pms_data = + &g->perf_pmu->lpwr.lwpr_bios_data.ms; + struct nvgpu_lpwr_bios_idx_data *pidx_data = + &g->perf_pmu->lpwr.lwpr_bios_data.idx; + u32 ms_idx; + u8 lpwr_idx = 0; + int status; + + nvgpu_log_fn(g, " "); + status = nvgpu_perf_pstate_get_lpwr_index(g, pstate_num, &lpwr_idx); + if (status != 0) { + return false; + } + + ms_idx = pidx_data->entry[lpwr_idx].ms_idx; + if (pms_data->entry[ms_idx].ms_enabled) { + return true; + } else { + return false; + } +} + +bool nvgpu_lpwr_is_rppg_supported(struct gk20a *g, u32 pstate_num) +{ + struct nvgpu_lpwr_bios_gr_data *pgr_data = + &g->perf_pmu->lpwr.lwpr_bios_data.gr; + struct nvgpu_lpwr_bios_idx_data *pidx_data = + &g->perf_pmu->lpwr.lwpr_bios_data.idx; + u32 idx; + u8 lpwr_idx = 0; + int status; + + nvgpu_log_fn(g, " "); + status = nvgpu_perf_pstate_get_lpwr_index(g, pstate_num, &lpwr_idx); + if (status != 0) { + return false; + } + + idx = pidx_data->entry[lpwr_idx].gr_idx; + if (pgr_data->entry[idx].gr_enabled) { + return true; + } else { + return false; + } +} + + +int nvgpu_lpwr_enable_pg(struct gk20a *g, bool pstate_lock) +{ + struct nvgpu_pmu *pmu = g->pmu; + int status = 0; + bool is_mscg_supported = false; + bool is_rppg_supported = false; + u32 present_pstate = 0; + + nvgpu_log_fn(g, " "); + + if (pstate_lock) { + nvgpu_clk_arb_pstate_change_lock(g, true); + } + nvgpu_mutex_acquire(&pmu->pg->pg_mutex); + + present_pstate = nvgpu_clk_arb_get_current_pstate(g); + + is_mscg_supported = nvgpu_lpwr_is_mscg_supported(g, + present_pstate); + if (is_mscg_supported && g->mscg_enabled) { + if (pmu->pg->mscg_stat == 0U) { + pmu->pg->mscg_stat = PMU_MSCG_ENABLED; + } + } + + is_rppg_supported = nvgpu_lpwr_is_rppg_supported(g, + present_pstate); + if (is_rppg_supported) { + if (g->can_elpg) { + status = nvgpu_pmu_enable_elpg(g); + } + } + + nvgpu_mutex_release(&pmu->pg->pg_mutex); + if (pstate_lock) { + nvgpu_clk_arb_pstate_change_lock(g, false); + } + + return status; +} + +int nvgpu_lpwr_disable_pg(struct gk20a *g, bool pstate_lock) +{ + struct nvgpu_pmu *pmu = g->pmu; + int status = 0; + bool is_mscg_supported = false; + bool is_rppg_supported = false; + u32 present_pstate = 0; + + nvgpu_log_fn(g, " "); + + if (pstate_lock) { + nvgpu_clk_arb_pstate_change_lock(g, true); + } + nvgpu_mutex_acquire(&pmu->pg->pg_mutex); + + present_pstate = nvgpu_clk_arb_get_current_pstate(g); + + is_rppg_supported = nvgpu_lpwr_is_rppg_supported(g, + present_pstate); + if (is_rppg_supported) { + if (g->elpg_enabled) { + status = nvgpu_pmu_disable_elpg(g); + if (status != 0) { + goto exit_unlock; + } + } + } + + is_mscg_supported = nvgpu_lpwr_is_mscg_supported(g, + present_pstate); + if (is_mscg_supported && g->mscg_enabled) { + if (pmu->pg->mscg_stat != 0U) { + pmu->pg->mscg_stat = PMU_MSCG_DISABLED; + } + } + +exit_unlock: + nvgpu_mutex_release(&pmu->pg->pg_mutex); + if (pstate_lock) { + nvgpu_clk_arb_pstate_change_lock(g, false); + } + + nvgpu_log_fn(g, "done"); + return status; +} diff --git a/drivers/gpu/nvgpu/common/pmu/lpwr/lpwr.h b/drivers/gpu/nvgpu/common/pmu/lpwr/lpwr.h new file mode 100644 index 000000000..4431d9262 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/lpwr/lpwr.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_LPWR_LPWR_H +#define NVGPU_LPWR_LPWR_H + +#include + +#define MAX_SWASR_MCLK_FREQ_WITHOUT_WR_TRAINING_MAXWELL_MHZ 540U + +#define NV_PMU_PG_PARAM_MCLK_CHANGE_MS_SWASR_ENABLED BIT32(0x1) +#define NV_PMU_PG_PARAM_MCLK_CHANGE_GDDR5_WR_TRAINING_ENABLED BIT32(0x3) + +#endif /* NVGPU_LPWR_LPWR_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/lpwr/rppg.c b/drivers/gpu/nvgpu/common/pmu/lpwr/rppg.c new file mode 100644 index 000000000..0caadd6ac --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/lpwr/rppg.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + + +static void pmu_handle_rppg_init_msg(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 status) +{ + u32 *success = param; + + if (status == 0U) { + switch (msg->msg.pg.rppg_msg.cmn.msg_id) { + case NV_PMU_RPPG_MSG_ID_INIT_CTRL_ACK: + *success = 1; + nvgpu_pmu_dbg(g, "RPPG is acknowledged from PMU %x", + msg->msg.pg.msg_type); + break; + default: + *success = 0; + nvgpu_err(g, "Invalid message ID:%u", + msg->msg.pg.rppg_msg.cmn.msg_id); + break; + } + } +} + +static int rppg_send_cmd(struct gk20a *g, struct nv_pmu_rppg_cmd *prppg_cmd) +{ + struct pmu_cmd cmd; + int status = 0; + u32 success = 0; + size_t tmp_size = PMU_CMD_HDR_SIZE + sizeof(struct nv_pmu_rppg_cmd); + + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + nvgpu_assert(tmp_size <= U8_MAX); + cmd.hdr.size = (u8)tmp_size; + + cmd.cmd.pg.rppg_cmd.cmn.cmd_type = PMU_PMU_PG_CMD_ID_RPPG; + cmd.cmd.pg.rppg_cmd.cmn.cmd_id = prppg_cmd->cmn.cmd_id; + + switch (prppg_cmd->cmn.cmd_id) { + case NV_PMU_RPPG_CMD_ID_INIT: + break; + case NV_PMU_RPPG_CMD_ID_INIT_CTRL: + cmd.cmd.pg.rppg_cmd.init_ctrl.ctrl_id = + prppg_cmd->init_ctrl.ctrl_id; + cmd.cmd.pg.rppg_cmd.init_ctrl.domain_id = + prppg_cmd->init_ctrl.domain_id; + break; + case NV_PMU_RPPG_CMD_ID_STATS_RESET: + cmd.cmd.pg.rppg_cmd.stats_reset.ctrl_id = + prppg_cmd->stats_reset.ctrl_id; + break; + default: + nvgpu_err(g, "Invalid RPPG command %d", + prppg_cmd->cmn.cmd_id); + status = -1; + break; + } + + if (status != 0) { + goto exit; + } + + status = nvgpu_pmu_cmd_post(g, &cmd, NULL, PMU_COMMAND_QUEUE_HPQ, + pmu_handle_rppg_init_msg, &success); + if (status != 0) { + nvgpu_err(g, "Unable to submit parameter command %d", + prppg_cmd->cmn.cmd_id); + goto exit; + } + + if (prppg_cmd->cmn.cmd_id == NV_PMU_RPPG_CMD_ID_INIT_CTRL) { + pmu_wait_message_cond(g->pmu, nvgpu_get_poll_timeout(g), + &success, 1); + if (success == 0U) { + status = -EINVAL; + nvgpu_err(g, "Ack for the parameter command %x", + prppg_cmd->cmn.cmd_id); + } + } + +exit: + return status; +} + +static int rppg_init(struct gk20a *g) +{ + struct nv_pmu_rppg_cmd rppg_cmd; + + rppg_cmd.init.cmd_id = NV_PMU_RPPG_CMD_ID_INIT; + + return rppg_send_cmd(g, &rppg_cmd); +} + +static int rppg_ctrl_init(struct gk20a *g, u8 ctrl_id) +{ + struct nv_pmu_rppg_cmd rppg_cmd; + + rppg_cmd.init_ctrl.cmd_id = NV_PMU_RPPG_CMD_ID_INIT_CTRL; + rppg_cmd.init_ctrl.ctrl_id = ctrl_id; + + switch (ctrl_id) { + case NV_PMU_RPPG_CTRL_ID_GR: + rppg_cmd.init_ctrl.domain_id = NV_PMU_RPPG_DOMAIN_ID_GFX; + break; + case NV_PMU_RPPG_CTRL_ID_MS: + rppg_cmd.init_ctrl.domain_id = NV_PMU_RPPG_DOMAIN_ID_GFX; + break; + default: + nvgpu_err(g, "Invalid ctrl_id %u for %s", ctrl_id, __func__); + break; + } + + return rppg_send_cmd(g, &rppg_cmd); +} + +int init_rppg(struct gk20a *g) +{ + int status; + + status = rppg_init(g); + if (status != 0) { + nvgpu_err(g, + "Failed to initialize RPPG in PMU: 0x%08x", status); + return status; + } + + + status = rppg_ctrl_init(g, NV_PMU_RPPG_CTRL_ID_GR); + if (status != 0) { + nvgpu_err(g, + "Failed to initialize RPPG_CTRL: GR in PMU: 0x%08x", + status); + return status; + } + + status = rppg_ctrl_init(g, NV_PMU_RPPG_CTRL_ID_MS); + if (status != 0) { + nvgpu_err(g, + "Failed to initialize RPPG_CTRL: MS in PMU: 0x%08x", + status); + return status; + } + + return status; +} diff --git a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm.c b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm.c new file mode 100644 index 000000000..93307873a --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "lsfm_sw_gm20b.h" +#include "lsfm_sw_gp10b.h" +#ifdef CONFIG_NVGPU_DGPU +#include "lsfm_sw_gv100.h" +#include "lsfm_sw_tu104.h" +#endif + +#if defined(CONFIG_NVGPU_NEXT) && defined(CONFIG_NVGPU_NON_FUSA) +#include "nvgpu_next_gpuid.h" +#endif + +static bool is_lsfm_supported(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_lsfm *lsfm) +{ + /* + * Low secure falcon manager is a secure iGPU functionality to support + * Lazy bootstrap feature. Enabling lsfm will allow nvgpu to send cmds + * to lspmu to bootstrap LS falcons. + */ + if (nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY) && + (lsfm != NULL)) { + return true; + } + + return false; +} + +int nvgpu_pmu_lsfm_int_wpr_region(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_lsfm *lsfm) +{ + if (is_lsfm_supported(g, pmu, lsfm)) { + if (lsfm->init_wpr_region != NULL) { + return lsfm->init_wpr_region(g, pmu); + } + } + + return 0; +} + +int nvgpu_pmu_lsfm_bootstrap_ls_falcon(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_lsfm *lsfm, u32 falcon_id_mask) +{ + if (is_lsfm_supported(g, pmu, lsfm)) { + if (lsfm->bootstrap_ls_falcon != NULL) { + return lsfm->bootstrap_ls_falcon(g, pmu, lsfm, + falcon_id_mask); + } + } + + return 0; +} + +int nvgpu_pmu_lsfm_ls_pmu_cmdline_args_copy(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_lsfm *lsfm) +{ + if (is_lsfm_supported(g, pmu, lsfm)) { + if (lsfm->ls_pmu_cmdline_args_copy != NULL) { + return lsfm->ls_pmu_cmdline_args_copy(g, pmu); + } + } + + return 0; +} + +void nvgpu_pmu_lsfm_rpc_handler(struct gk20a *g, + struct rpc_handler_payload *rpc_payload) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct nv_pmu_rpc_struct_acr_bootstrap_gr_falcons acr_rpc; + + (void) memset(&acr_rpc, 0, sizeof(struct nv_pmu_rpc_header)); + nvgpu_memcpy((u8 *)&acr_rpc, (u8 *)rpc_payload->rpc_buff, + sizeof(struct nv_pmu_rpc_struct_acr_bootstrap_gr_falcons)); + + switch (acr_rpc.hdr.function) { + case NV_PMU_RPC_ID_ACR_INIT_WPR_REGION: + nvgpu_pmu_dbg(g, + "reply NV_PMU_RPC_ID_ACR_INIT_WPR_REGION"); + pmu->lsfm->is_wpr_init_done = true; + break; + case NV_PMU_RPC_ID_ACR_BOOTSTRAP_GR_FALCONS: + nvgpu_pmu_dbg(g, + "reply NV_PMU_RPC_ID_ACR_BOOTSTRAP_GR_FALCONS"); + pmu->lsfm->loaded_falcon_id = 1U; + break; + default: + nvgpu_pmu_dbg(g, "unsupported ACR function"); + break; + } +} + +void nvgpu_pmu_lsfm_clean(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nvgpu_pmu_lsfm *lsfm) +{ + nvgpu_log_fn(g, " "); + + if (is_lsfm_supported(g, pmu, lsfm)) { + lsfm->is_wpr_init_done = false; + lsfm->loaded_falcon_id = 0U; + } +} + +int nvgpu_pmu_lsfm_init(struct gk20a *g, struct nvgpu_pmu_lsfm **lsfm) +{ + u32 ver = g->params.gpu_arch + g->params.gpu_impl; + int err = 0; + + if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { + return 0; + } + + if (*lsfm != NULL) { + /* skip alloc/reinit for unrailgate sequence */ + nvgpu_pmu_dbg(g, "skip lsfm init for unrailgate sequence"); + goto done; + } + + *lsfm = (struct nvgpu_pmu_lsfm *) + nvgpu_kzalloc(g, sizeof(struct nvgpu_pmu_lsfm)); + if (*lsfm == NULL) { + err = -ENOMEM; + goto done; + } + + switch (ver) { + case GK20A_GPUID_GM20B: + case GK20A_GPUID_GM20B_B: + nvgpu_gm20b_lsfm_sw_init(g, *lsfm); + break; + case NVGPU_GPUID_GP10B: + case NVGPU_GPUID_GV11B: + nvgpu_gp10b_lsfm_sw_init(g, *lsfm); + break; +#ifdef CONFIG_NVGPU_DGPU + case NVGPU_GPUID_GV100: + nvgpu_gv100_lsfm_sw_init(g, *lsfm); + break; + case NVGPU_GPUID_TU104: + nvgpu_tu104_lsfm_sw_init(g, *lsfm); + break; +#endif +#if defined(CONFIG_NVGPU_NEXT) + case NVGPU_NEXT_GPUID: + nvgpu_gv100_lsfm_sw_init(g, *lsfm); + break; +#endif + default: + nvgpu_kfree(g, *lsfm); + err = -EINVAL; + nvgpu_err(g, "no support for GPUID %x", ver); + break; + } + +done: + return err; +} + +void nvgpu_pmu_lsfm_deinit(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nvgpu_pmu_lsfm *lsfm) +{ + if (is_lsfm_supported(g, pmu, lsfm)) { + nvgpu_kfree(g, lsfm); + } + pmu->lsfm = NULL; +} diff --git a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gm20b.c b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gm20b.c new file mode 100644 index 000000000..5794cfe1e --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gm20b.c @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lsfm_sw_gm20b.h" + +static void lsfm_handle_acr_init_wpr_region_msg(struct gk20a *g, + struct pmu_msg *msg, void *param, u32 status) +{ + struct nvgpu_pmu *pmu = g->pmu; + + nvgpu_log_fn(g, " "); + + nvgpu_pmu_dbg(g, "reply PMU_ACR_CMD_ID_INIT_WPR_REGION"); + + if (msg->msg.acr.acrmsg.errorcode == PMU_ACR_SUCCESS) { + pmu->lsfm->is_wpr_init_done = true; + } +} + +int gm20b_pmu_lsfm_init_acr_wpr_region(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + struct pmu_cmd cmd; + size_t tmp_size; + + nvgpu_log_fn(g, " "); + + /* init ACR */ + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_ACR; + + tmp_size = PMU_CMD_HDR_SIZE + + sizeof(struct pmu_acr_cmd_init_wpr_details); + nvgpu_assert(tmp_size <= (size_t)U8_MAX); + cmd.hdr.size = (u8)tmp_size; + + cmd.cmd.acr.init_wpr.cmd_type = PMU_ACR_CMD_ID_INIT_WPR_REGION; + cmd.cmd.acr.init_wpr.regionid = 0x01U; + cmd.cmd.acr.init_wpr.wproffset = 0x00U; + + nvgpu_pmu_dbg(g, "cmd post PMU_ACR_CMD_ID_INIT_WPR_REGION"); + + return nvgpu_pmu_cmd_post(g, &cmd, NULL, PMU_COMMAND_QUEUE_HPQ, + lsfm_handle_acr_init_wpr_region_msg, pmu); +} + +void gm20b_pmu_lsfm_handle_bootstrap_falcon_msg(struct gk20a *g, + struct pmu_msg *msg, void *param, u32 status) +{ + struct nvgpu_pmu *pmu = g->pmu; + + nvgpu_log_fn(g, " "); + + nvgpu_pmu_dbg(g, "reply PMU_ACR_CMD_ID_BOOTSTRAP_FALCON"); + nvgpu_pmu_dbg(g, "response code = %x", msg->msg.acr.acrmsg.falconid); + + pmu->lsfm->loaded_falcon_id = msg->msg.acr.acrmsg.falconid; +} + +static int gm20b_pmu_lsfm_bootstrap_falcon(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_lsfm *lsfm, + u32 falcon_id, u32 flags) +{ + struct pmu_cmd cmd; + size_t tmp_size; + + nvgpu_log_fn(g, " "); + + lsfm->loaded_falcon_id = 0U; + + if (!lsfm->is_wpr_init_done) { + return -EINVAL; + } + + /* send message to load FECS falcon */ + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_ACR; + + tmp_size = PMU_CMD_HDR_SIZE + + sizeof(struct pmu_acr_cmd_bootstrap_falcon); + nvgpu_assert(tmp_size <= (size_t)U8_MAX); + cmd.hdr.size = (u8)tmp_size; + + cmd.cmd.acr.bootstrap_falcon.cmd_type = + PMU_ACR_CMD_ID_BOOTSTRAP_FALCON; + cmd.cmd.acr.bootstrap_falcon.flags = flags; + cmd.cmd.acr.bootstrap_falcon.falconid = falcon_id; + nvgpu_pmu_dbg(g, "cmd post PMU_ACR_CMD_ID_BOOTSTRAP_FALCON: %x", + falcon_id); + + return nvgpu_pmu_cmd_post(g, &cmd, NULL, PMU_COMMAND_QUEUE_HPQ, + gm20b_pmu_lsfm_handle_bootstrap_falcon_msg, pmu); +} + +static int gm20b_pmu_lsfm_bootstrap_ls_falcon(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_lsfm *lsfm, u32 falcon_id_mask) +{ + int err = 0; + u32 flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES; + + /* GM20B PMU supports loading FECS only */ + if (!(falcon_id_mask == BIT32(FALCON_ID_FECS))) { + return -EINVAL; + } + + /* check whether pmu is ready to bootstrap lsf if not wait for it */ + if (!lsfm->is_wpr_init_done) { + pmu_wait_message_cond(g->pmu, + nvgpu_get_poll_timeout(g), + &lsfm->is_wpr_init_done, 1U); + /* check again if it still not ready indicate an error */ + if (!lsfm->is_wpr_init_done) { + nvgpu_err(g, "PMU not ready to load LSF"); + return -ETIMEDOUT; + } + } + + /* load FECS */ + nvgpu_falcon_mailbox_write(&g->fecs_flcn, FALCON_MAILBOX_0, ~U32(0x0U)); + + err = gm20b_pmu_lsfm_bootstrap_falcon(g, pmu, lsfm, + FALCON_ID_FECS, flags); + if (err != 0) { + return err; + } + + nvgpu_assert(falcon_id_mask <= U8_MAX); + pmu_wait_message_cond(g->pmu, nvgpu_get_poll_timeout(g), + &lsfm->loaded_falcon_id, (u8)FALCON_ID_FECS); + if (lsfm->loaded_falcon_id != FALCON_ID_FECS) { + err = -ETIMEDOUT; + } + + return err; +} + +int gm20b_pmu_lsfm_pmu_cmd_line_args_copy(struct gk20a *g, + struct nvgpu_pmu *pmu) +{ + u32 cmd_line_args_offset = 0U; + u32 dmem_size = 0U; + int err = 0; + + err = nvgpu_falcon_get_mem_size(pmu->flcn, MEM_DMEM, &dmem_size); + if (err != 0) { + nvgpu_err(g, "dmem size request failed"); + return -EINVAL; + } + + cmd_line_args_offset = dmem_size - + pmu->fw->ops.get_cmd_line_args_size(pmu); + + /* Copying pmu cmdline args */ + pmu->fw->ops.set_cmd_line_args_cpu_freq(pmu, + g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_PWRCLK)); + pmu->fw->ops.set_cmd_line_args_secure_mode(pmu, 1U); + pmu->fw->ops.set_cmd_line_args_trace_size( + pmu, PMU_RTOS_TRACE_BUFSIZE); + pmu->fw->ops.set_cmd_line_args_trace_dma_base(pmu); + pmu->fw->ops.set_cmd_line_args_trace_dma_idx( + pmu, GK20A_PMU_DMAIDX_VIRT); + + return nvgpu_falcon_copy_to_dmem(pmu->flcn, cmd_line_args_offset, + (u8 *)(pmu->fw->ops.get_cmd_line_args_ptr(pmu)), + pmu->fw->ops.get_cmd_line_args_size(pmu), 0U); +} + +void nvgpu_gm20b_lsfm_sw_init(struct gk20a *g, struct nvgpu_pmu_lsfm *lsfm) +{ + nvgpu_log_fn(g, " "); + + lsfm->is_wpr_init_done = false; + lsfm->loaded_falcon_id = 0U; + + lsfm->init_wpr_region = gm20b_pmu_lsfm_init_acr_wpr_region; + lsfm->bootstrap_ls_falcon = gm20b_pmu_lsfm_bootstrap_ls_falcon; + lsfm->ls_pmu_cmdline_args_copy = gm20b_pmu_lsfm_pmu_cmd_line_args_copy; +} diff --git a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gm20b.h b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gm20b.h new file mode 100644 index 000000000..011bf83bc --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gm20b.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_LSFM_SW_GM20B_H +#define NVGPU_LSFM_SW_GM20B_H + +int gm20b_pmu_lsfm_init_acr_wpr_region(struct gk20a *g, struct nvgpu_pmu *pmu); +void gm20b_pmu_lsfm_handle_bootstrap_falcon_msg(struct gk20a *g, + struct pmu_msg *msg, void *param, u32 status); +int gm20b_pmu_lsfm_pmu_cmd_line_args_copy(struct gk20a *g, + struct nvgpu_pmu *pmu); + +void nvgpu_gm20b_lsfm_sw_init(struct gk20a *g, struct nvgpu_pmu_lsfm *lsfm); + +#endif /*NVGPU_LSFM_SW_GM20B_H*/ diff --git a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gp10b.c b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gp10b.c new file mode 100644 index 000000000..749386f03 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gp10b.c @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "lsfm_sw_gm20b.h" +#include "lsfm_sw_gp10b.h" + +static int gp10b_pmu_lsfm_bootstrap_falcon(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_lsfm *lsfm, + u32 falconidmask, u32 flags) +{ + struct pmu_cmd cmd; + size_t tmp_size; + + nvgpu_log_fn(g, " "); + + lsfm->loaded_falcon_id = 0U; + + nvgpu_pmu_dbg(g, "wprinit status = %x", lsfm->is_wpr_init_done); + if (!lsfm->is_wpr_init_done) { + return -EINVAL; + } + + /* send message to load FECS falcon */ + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_ACR; + + tmp_size = PMU_CMD_HDR_SIZE + + sizeof(struct pmu_acr_cmd_bootstrap_multiple_falcons); + nvgpu_assert(tmp_size <= (size_t)U8_MAX); + cmd.hdr.size = (u8)tmp_size; + + cmd.cmd.acr.boot_falcons.cmd_type = + PMU_ACR_CMD_ID_BOOTSTRAP_MULTIPLE_FALCONS; + cmd.cmd.acr.boot_falcons.flags = flags; + cmd.cmd.acr.boot_falcons.falconidmask = falconidmask; + cmd.cmd.acr.boot_falcons.usevamask = 0; + cmd.cmd.acr.boot_falcons.wprvirtualbase.lo = 0x0U; + cmd.cmd.acr.boot_falcons.wprvirtualbase.hi = 0x0U; + + nvgpu_pmu_dbg(g, "PMU_ACR_CMD_ID_BOOTSTRAP_MULTIPLE_FALCONS:%x", + falconidmask); + + return nvgpu_pmu_cmd_post(g, &cmd, NULL, PMU_COMMAND_QUEUE_HPQ, + gm20b_pmu_lsfm_handle_bootstrap_falcon_msg, pmu); +} + +static int gp10b_pmu_lsfm_bootstrap_ls_falcon(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_lsfm *lsfm, u32 falcon_id_mask) +{ + u32 flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES; + int err = 0; + + /* GP10B PMU supports loading FECS and GPCCS only */ + if (falcon_id_mask == 0U) { + err = -EINVAL; + goto done; + } + + if ((falcon_id_mask & + ~(BIT32(FALCON_ID_FECS) | BIT32(FALCON_ID_GPCCS))) != 0U) { + err = -EINVAL; + goto done; + } + + lsfm->loaded_falcon_id = 0U; + /* check whether pmu is ready to bootstrap lsf if not wait for it */ + if (!lsfm->is_wpr_init_done) { + pmu_wait_message_cond(g->pmu, + nvgpu_get_poll_timeout(g), + &lsfm->is_wpr_init_done, 1U); + /* check again if it still not ready indicate an error */ + if (!lsfm->is_wpr_init_done) { + nvgpu_err(g, "PMU not ready to load LSF"); + err = -ETIMEDOUT; + goto done; + } + } + + /* bootstrap falcon(s) */ + err = gp10b_pmu_lsfm_bootstrap_falcon(g, pmu, lsfm, + falcon_id_mask, flags); + if (err != 0) { + err = -EINVAL; + goto done; + } + + nvgpu_assert(falcon_id_mask <= U8_MAX); + pmu_wait_message_cond(g->pmu, nvgpu_get_poll_timeout(g), + &lsfm->loaded_falcon_id, (u8)falcon_id_mask); + if (lsfm->loaded_falcon_id != falcon_id_mask) { + err = -ETIMEDOUT; + } + +done: + return err; +} + +void nvgpu_gp10b_lsfm_sw_init(struct gk20a *g, struct nvgpu_pmu_lsfm *lsfm) +{ + nvgpu_log_fn(g, " "); + + lsfm->is_wpr_init_done = false; + lsfm->loaded_falcon_id = 0U; + + lsfm->init_wpr_region = gm20b_pmu_lsfm_init_acr_wpr_region; + lsfm->bootstrap_ls_falcon = gp10b_pmu_lsfm_bootstrap_ls_falcon; + lsfm->ls_pmu_cmdline_args_copy = gm20b_pmu_lsfm_pmu_cmd_line_args_copy; +} diff --git a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gp10b.h b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gp10b.h new file mode 100644 index 000000000..a005708f7 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gp10b.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_LSFM_SW_GP10B_H +#define NVGPU_LSFM_SW_GP10B_H + +void nvgpu_gp10b_lsfm_sw_init(struct gk20a *g, struct nvgpu_pmu_lsfm *lsfm); + +#endif /*NVGPU_LSFM_SW_GP10B_H*/ diff --git a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gv100.c b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gv100.c new file mode 100644 index 000000000..73c2367c3 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gv100.c @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lsfm_sw_gv100.h" + +static int gv100_pmu_lsfm_init_acr_wpr_region(struct gk20a *g, + struct nvgpu_pmu *pmu) +{ + struct nv_pmu_rpc_struct_acr_init_wpr_region rpc; + int status = 0; + + (void) memset(&rpc, 0, + sizeof(struct nv_pmu_rpc_struct_acr_init_wpr_region)); + rpc.wpr_regionId = 0x1U; + rpc.wpr_offset = 0x0U; + nvgpu_pmu_dbg(g, "Post NV_PMU_RPC_ID_ACR_INIT_WPR_REGION"); + PMU_RPC_EXECUTE(status, pmu, ACR, INIT_WPR_REGION, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute RPC status=0x%x", + status); + } + + return status; +} + +static int gv100_pmu_lsfm_bootstrap_ls_falcon(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_lsfm *lsfm, u32 falcon_id_mask) +{ + struct nv_pmu_rpc_struct_acr_bootstrap_gr_falcons rpc; + u32 flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES; + int status = 0; + + if (falcon_id_mask == 0U) { + return -EINVAL; + } + + if ((falcon_id_mask & + ~(BIT32(FALCON_ID_FECS) | + BIT32(FALCON_ID_GPCCS))) != 0U) { + return -EINVAL; + } + + lsfm->loaded_falcon_id = 0U; + /* check whether pmu is ready to bootstrap lsf if not wait for it */ + if (!lsfm->is_wpr_init_done) { + pmu_wait_message_cond(g->pmu, + nvgpu_get_poll_timeout(g), + &lsfm->is_wpr_init_done, 1U); + /* check again if it still not ready indicate an error */ + if (!lsfm->is_wpr_init_done) { + nvgpu_err(g, "PMU not ready to load LSF"); + status = -ETIMEDOUT; + goto exit; + } + } + + (void) memset(&rpc, 0, + sizeof(struct nv_pmu_rpc_struct_acr_bootstrap_gr_falcons)); + rpc.falcon_id_mask = falcon_id_mask; + rpc.flags = flags; + nvgpu_pmu_dbg(g, "Post NV_PMU_RPC_ID_ACR_BOOTSTRAP_GR_FALCONS"); + PMU_RPC_EXECUTE(status, pmu, ACR, BOOTSTRAP_GR_FALCONS, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute RPC, status=0x%x", status); + goto exit; + } + + pmu_wait_message_cond(g->pmu, nvgpu_get_poll_timeout(g), + &lsfm->loaded_falcon_id, 1U); + + if (lsfm->loaded_falcon_id != 1U) { + status = -ETIMEDOUT; + } + +exit: + return status; +} + +int gv100_update_lspmu_cmdline_args_copy(struct gk20a *g, + struct nvgpu_pmu *pmu) +{ + u32 cmd_line_args_offset = 0U; + u32 dmem_size = 0U; + int err = 0; + + err = nvgpu_falcon_get_mem_size(pmu->flcn, MEM_DMEM, &dmem_size); + if (err != 0) { + nvgpu_err(g, "dmem size request failed"); + return -EINVAL; + } + + cmd_line_args_offset = dmem_size - + pmu->fw->ops.get_cmd_line_args_size(pmu); + + /*Copying pmu cmdline args*/ + pmu->fw->ops.set_cmd_line_args_cpu_freq(pmu, 0U); + pmu->fw->ops.set_cmd_line_args_secure_mode(pmu, 0U); + pmu->fw->ops.set_cmd_line_args_trace_size( + pmu, PMU_RTOS_TRACE_BUFSIZE); + pmu->fw->ops.set_cmd_line_args_trace_dma_base(pmu); + pmu->fw->ops.set_cmd_line_args_trace_dma_idx( + pmu, GK20A_PMU_DMAIDX_VIRT); + if (pmu->fw->ops.config_cmd_line_args_super_surface != NULL) { + pmu->fw->ops.config_cmd_line_args_super_surface(pmu); + } + + return nvgpu_falcon_copy_to_dmem(pmu->flcn, cmd_line_args_offset, + (u8 *)(pmu->fw->ops.get_cmd_line_args_ptr(pmu)), + pmu->fw->ops.get_cmd_line_args_size(pmu), 0U); +} + +void nvgpu_gv100_lsfm_sw_init(struct gk20a *g, struct nvgpu_pmu_lsfm *lsfm) +{ + nvgpu_log_fn(g, " "); + + lsfm->is_wpr_init_done = false; + lsfm->loaded_falcon_id = 0U; + + lsfm->init_wpr_region = gv100_pmu_lsfm_init_acr_wpr_region; + lsfm->bootstrap_ls_falcon = gv100_pmu_lsfm_bootstrap_ls_falcon; + lsfm->ls_pmu_cmdline_args_copy = gv100_update_lspmu_cmdline_args_copy; +} diff --git a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gv100.h b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gv100.h new file mode 100644 index 000000000..b284f6bd1 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gv100.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_LSFM_SW_GV100_H +#define NVGPU_LSFM_SW_GV100_H + +int gv100_update_lspmu_cmdline_args_copy(struct gk20a *g, + struct nvgpu_pmu *pmu); +void nvgpu_gv100_lsfm_sw_init(struct gk20a *g, struct nvgpu_pmu_lsfm *lsfm); + +#endif /* NVGPU_LSFM_SW_GV100_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_tu104.c b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_tu104.c new file mode 100644 index 000000000..ef6830d1b --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_tu104.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "lsfm_sw_gv100.h" +#include "lsfm_sw_tu104.h" + +void nvgpu_tu104_lsfm_sw_init(struct gk20a *g, struct nvgpu_pmu_lsfm *lsfm) +{ + nvgpu_log_fn(g, " "); + + lsfm->is_wpr_init_done = false; + + /* LSF is not handled by PMU on this chip */ + lsfm->init_wpr_region = NULL; + lsfm->bootstrap_ls_falcon = NULL; + + lsfm->ls_pmu_cmdline_args_copy = gv100_update_lspmu_cmdline_args_copy; +} diff --git a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_tu104.h b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_tu104.h new file mode 100644 index 000000000..a6148b6cd --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_tu104.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_LSFM_SW_TU104_H +#define NVGPU_LSFM_SW_TU104_H + +void nvgpu_tu104_lsfm_sw_init(struct gk20a *g, struct nvgpu_pmu_lsfm *lsfm); + +#endif /* NVGPU_LSFM_SW_TU104_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/perf/change_seq.c b/drivers/gpu/nvgpu/common/pmu/perf/change_seq.c new file mode 100644 index 000000000..fd752f12b --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perf/change_seq.c @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "change_seq.h" +#include "perf.h" + +static int perf_change_seq_sw_setup_super(struct gk20a *g, + struct change_seq *p_change_seq) +{ + int status = 0; + + nvgpu_log_fn(g, " "); + + /* Initialize parameters */ + p_change_seq->client_lock_mask = 0; + + p_change_seq->version = CTRL_PERF_CHANGE_SEQ_VERSION_35; + + status = nvgpu_boardobjgrpmask_init( + &p_change_seq->clk_domains_exclusion_mask.super, + 32U, ((void*)0)); + if (status != 0) { + nvgpu_err(g, "clk_domains_exclusion_mask failed to init %d", + status); + goto perf_change_seq_sw_setup_super_exit; + } + + status = nvgpu_boardobjgrpmask_init( + &p_change_seq->clk_domains_inclusion_mask.super, + 32U, ((void*)0)); + if (status != 0) { + nvgpu_err(g, "clk_domains_inclusion_mask failed to init %d", + status); + goto perf_change_seq_sw_setup_super_exit; + } + +perf_change_seq_sw_setup_super_exit: + return status; +} + +int perf_change_seq_sw_setup(struct gk20a *g) +{ + struct change_seq_pmu *perf_change_seq_pmu = + &(g->pmu->perf_pmu->changeseq_pmu); + int status = 0; + + nvgpu_log_fn(g, " "); + + (void) memset(perf_change_seq_pmu, 0, + sizeof(struct change_seq_pmu)); + + status = perf_change_seq_sw_setup_super(g, &perf_change_seq_pmu->super); + if (status != 0) { + goto exit; + } + + perf_change_seq_pmu->super.b_enabled_pmu_support = true; + /*exclude MCLK, may not be needed as MCLK is already fixed */ + perf_change_seq_pmu->super.clk_domains_exclusion_mask.super.data[0] + = 0x04U; + perf_change_seq_pmu->b_vf_point_check_ignore = false; + perf_change_seq_pmu->b_lock = false; + perf_change_seq_pmu->cpu_step_id_mask = 0; + perf_change_seq_pmu->cpu_adverised_step_id_mask = 0; + perf_change_seq_pmu->change_state = 0U; + +exit: + return status; +} + +static void build_change_seq_boot (struct gk20a *g) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct change_seq_pmu *perf_change_seq_pmu = + &(g->pmu->perf_pmu->changeseq_pmu); + struct change_seq_pmu_script *script_last = + &perf_change_seq_pmu->script_last; + u8 num_domains = 0U; + + nvgpu_log_fn(g, " "); + + script_last->super_surface_offset = + nvgpu_pmu_get_ss_member_set_offset(g, pmu, + NV_PMU_SUPER_SURFACE_MEMBER_CHANGE_SEQ_GRP) + + (u32)(sizeof(struct perf_change_seq_pmu_script) * + SEQ_SCRIPT_LAST); + + nvgpu_mem_rd_n(g, nvgpu_pmu_super_surface_mem(g, + pmu, pmu->super_surface), + script_last->super_surface_offset, + &script_last->buf, + (u32) sizeof(struct perf_change_seq_pmu_script)); + + script_last->buf.change.data.flags = CTRL_PERF_CHANGE_SEQ_CHANGE_NONE; + + num_domains = nvgpu_pmu_clk_domain_update_clk_info(g, + &script_last->buf.change.data.clk_list); + script_last->buf.change.data.clk_list.num_domains = num_domains; + + nvgpu_pmu_dbg(g,"Total domains = %d\n", + script_last->buf.change.data.clk_list.num_domains); + + /* Assume everything is P0 - Need to find the index for P0 */ + script_last->buf.change.data.pstate_index = + perf_pstate_get_table_entry_idx(g, CTRL_PERF_PSTATE_P0); + + nvgpu_mem_wr_n(g, nvgpu_pmu_super_surface_mem(g, + pmu, pmu->super_surface), + script_last->super_surface_offset, + &script_last->buf, + (u32) sizeof(struct perf_change_seq_pmu_script)); + + return; +} + +int perf_change_seq_pmu_setup(struct gk20a *g) +{ + struct nv_pmu_rpc_perf_change_seq_info_get info_get; + struct nv_pmu_rpc_perf_change_seq_info_set info_set; + struct nvgpu_pmu *pmu = g->pmu; + struct change_seq_pmu *perf_change_seq_pmu = + &(g->pmu->perf_pmu->changeseq_pmu); + int status; + + /* Do this till we enable performance table */ + build_change_seq_boot(g); + + (void) memset(&info_get, 0, + sizeof(struct nv_pmu_rpc_perf_change_seq_info_get)); + (void) memset(&info_set, 0, + sizeof(struct nv_pmu_rpc_perf_change_seq_info_set)); + + PMU_RPC_EXECUTE_CPB(status, pmu, PERF, CHANGE_SEQ_INFO_GET, &info_get, 0); + if (status != 0) { + nvgpu_err(g, + "Failed to execute Change Seq GET RPC status=0x%x", + status); + goto perf_change_seq_pmu_setup_exit; + } + + info_set.info_set.super.version = perf_change_seq_pmu->super.version; + + status = nvgpu_boardobjgrpmask_export( + &perf_change_seq_pmu->super.clk_domains_exclusion_mask.super, + perf_change_seq_pmu-> + super.clk_domains_exclusion_mask.super.bitcount, + &info_set.info_set.super.clk_domains_exclusion_mask.super); + if ( status != 0 ) { + nvgpu_err(g, "Could not export clkdomains exclusion mask"); + goto perf_change_seq_pmu_setup_exit; + } + + status = nvgpu_boardobjgrpmask_export( + &perf_change_seq_pmu->super.clk_domains_inclusion_mask.super, + perf_change_seq_pmu-> + super.clk_domains_inclusion_mask.super.bitcount, + &info_set.info_set.super.clk_domains_inclusion_mask.super); + if ( status != 0 ) { + nvgpu_err(g, "Could not export clkdomains inclusion mask"); + goto perf_change_seq_pmu_setup_exit; + } + + info_set.info_set.b_vf_point_check_ignore = + perf_change_seq_pmu->b_vf_point_check_ignore; + info_set.info_set.cpu_step_id_mask = + perf_change_seq_pmu->cpu_step_id_mask; + info_set.info_set.b_lock = + perf_change_seq_pmu->b_lock; + + perf_change_seq_pmu->script_last.super_surface_offset = + nvgpu_pmu_get_ss_member_set_offset(g, pmu, + NV_PMU_SUPER_SURFACE_MEMBER_CHANGE_SEQ_GRP) + + (u32)(sizeof(struct perf_change_seq_pmu_script) * + SEQ_SCRIPT_LAST); + + nvgpu_mem_rd_n(g, nvgpu_pmu_super_surface_mem(g, + pmu, pmu->super_surface), + perf_change_seq_pmu->script_last.super_surface_offset, + &perf_change_seq_pmu->script_last.buf, + (u32) sizeof(struct perf_change_seq_pmu_script)); + + /* Assume everything is P0 - Need to find the index for P0 */ + perf_change_seq_pmu->script_last.buf.change.data.pstate_index = + perf_pstate_get_table_entry_idx(g, CTRL_PERF_PSTATE_P0);; + + nvgpu_mem_wr_n(g, nvgpu_pmu_super_surface_mem(g, + pmu, pmu->super_surface), + perf_change_seq_pmu->script_last.super_surface_offset, + &perf_change_seq_pmu->script_last.buf, + (u32) sizeof(struct perf_change_seq_pmu_script)); + + /* Continue with PMU setup, assume FB map is done */ + PMU_RPC_EXECUTE_CPB(status, pmu, PERF, CHANGE_SEQ_INFO_SET, &info_set, 0); + if (status != 0) { + nvgpu_err(g, + "Failed to execute Change Seq SET RPC status=0x%x", + status); + goto perf_change_seq_pmu_setup_exit; + } + +perf_change_seq_pmu_setup_exit: + return status; +} + +int nvgpu_pmu_perf_changeseq_set_clks(struct gk20a *g, + struct nvgpu_clk_slave_freq *vf_point) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct nv_pmu_rpc_perf_change_seq_queue_change rpc; + struct ctrl_perf_change_seq_change_input change_input; + struct change_seq_pmu *change_seq_pmu = &g->pmu->perf_pmu->changeseq_pmu; + int status = 0; + u8 gpcclk_domain = 0U; + u32 gpcclk_voltuv = 0U, gpcclk_clkmhz = 0U; + u32 vmin_uv = 0U, vmax_uv = 0U; + u32 vmargin_uv = 0U, fmargin_mhz = 0U; + + (void) memset(&change_input, 0, + sizeof(struct ctrl_perf_change_seq_change_input)); + + clk_set_p0_clk_per_domain(g, &gpcclk_domain, &gpcclk_clkmhz, + vf_point, &change_input.clk); + + change_input.pstate_index = + perf_pstate_get_table_entry_idx(g, CTRL_PERF_PSTATE_P0); + change_input.flags = (u32)CTRL_PERF_CHANGE_SEQ_CHANGE_FORCE; + change_input.vf_points_cache_counter = 0xFFFFFFFFU; + + status = nvgpu_pmu_perf_vfe_get_freq_margin(g, &fmargin_mhz); + if (status != 0) { + nvgpu_err(g, "Failed to fetch Fmargin status=0x%x", status); + return status; + } + + gpcclk_clkmhz += fmargin_mhz; + status = nvgpu_pmu_clk_domain_freq_to_volt(g, gpcclk_domain, + &gpcclk_clkmhz, &gpcclk_voltuv, CTRL_VOLT_DOMAIN_LOGIC); + + status = nvgpu_pmu_perf_vfe_get_volt_margin(g, &vmargin_uv); + if (status != 0) { + nvgpu_err(g, "Failed to fetch Vmargin status=0x%x", status); + return status; + } + + gpcclk_voltuv += vmargin_uv; + status = nvgpu_pmu_volt_get_vmin_vmax_ps35(g, &vmin_uv, &vmax_uv); + if (status != 0) { + nvgpu_pmu_dbg(g, "Get vmin,vmax failed, proceeding with " + "freq_to_volt value"); + } + if ((status == 0) && (vmin_uv > gpcclk_voltuv)) { + gpcclk_voltuv = vmin_uv; + nvgpu_log_fn(g, "Vmin is higher than evaluated Volt"); + } + + if (gpcclk_voltuv > vmax_uv) { + nvgpu_err(g, "Error: Requested voltage is more than chip max"); + return -EINVAL; + } + + change_input.volt[0].voltage_uv = gpcclk_voltuv; + change_input.volt[0].voltage_min_noise_unaware_uv = gpcclk_voltuv; + change_input.volt_rails_mask.super.data[0] = 1U; + + /* RPC to PMU to queue to execute change sequence request*/ + (void) memset(&rpc, 0, + sizeof(struct nv_pmu_rpc_perf_change_seq_queue_change)); + rpc.change = change_input; + rpc.change.pstate_index = + perf_pstate_get_table_entry_idx(g, CTRL_PERF_PSTATE_P0); + change_seq_pmu->change_state = 0U; + change_seq_pmu->start_time = nvgpu_current_time_us(); + PMU_RPC_EXECUTE_CPB(status, pmu, PERF, + CHANGE_SEQ_QUEUE_CHANGE, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute Change Seq RPC status=0x%x", + status); + } + + /* Wait for sync change to complete. */ + if ((rpc.change.flags & CTRL_PERF_CHANGE_SEQ_CHANGE_ASYNC) == 0U) { + /* wait till RPC execute in PMU & ACK */ + if (nvgpu_pmu_wait_fw_ack_status(g, pmu, + nvgpu_get_poll_timeout(g), + &change_seq_pmu->change_state, 1U) != 0) { + nvgpu_err(g, "PMU wait timeout expired."); + status = -ETIMEDOUT; + } + } + change_seq_pmu->stop_time = nvgpu_current_time_us(); + return status; +} + +void nvgpu_perf_change_seq_execute_time(struct gk20a *g, s64 *change_time) +{ + struct change_seq_pmu *change_seq_pmu = + &g->pmu->perf_pmu->changeseq_pmu; + s64 diff = change_seq_pmu->stop_time - change_seq_pmu->start_time; + + *change_time = diff; +} diff --git a/drivers/gpu/nvgpu/common/pmu/perf/change_seq.h b/drivers/gpu/nvgpu/common/pmu/perf/change_seq.h new file mode 100644 index 000000000..399331677 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perf/change_seq.h @@ -0,0 +1,70 @@ +/* + * general clock structures & definitions + * + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CHANGE_SEQ_H +#define NVGPU_CHANGE_SEQ_H + +#include "ucode_perf_change_seq_inf.h" + +#define SEQ_SCRIPT_CURR 0x0U +#define SEQ_SCRIPT_LAST 0x1U +#define SEQ_SCRIPT_QUERY 0x2U + +struct change_seq_pmu_script { + struct perf_change_seq_pmu_script buf; + u32 super_surface_offset; +}; + +struct change_seq { + u8 version; + bool b_enabled_pmu_support; + u32 thread_seq_id_last; + u64 thread_carry_over_timens; + struct ctrl_perf_change_seq_change last_pstate_values; + struct boardobjgrpmask_e32 clk_domains_exclusion_mask; + struct boardobjgrpmask_e32 clk_domains_inclusion_mask; + u32 client_lock_mask; +}; + +struct change_seq_pmu { + struct change_seq super; + bool b_lock; + bool b_vf_point_check_ignore; + u32 cpu_adverised_step_id_mask; + u32 cpu_step_id_mask; + u32 event_mask_pending; + u32 event_mask_received; + u32 last_completed_change_Seq_id; + struct change_seq_pmu_script script_curr; + struct change_seq_pmu_script script_last; + struct change_seq_pmu_script script_query; + u32 change_state; + s64 start_time; + s64 stop_time; +}; + +int perf_change_seq_sw_setup(struct gk20a *g); +int perf_change_seq_pmu_setup(struct gk20a *g); + +#endif /* NVGPU_CHANGE_SEQ_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/perf/perf.c b/drivers/gpu/nvgpu/common/pmu/perf/perf.c new file mode 100644 index 000000000..b8fd5330a --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perf/perf.c @@ -0,0 +1,226 @@ +/* + * + * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ucode_perf_change_seq_inf.h" +#include "ucode_perf_pstate_inf.h" +#include "ucode_perf_vfe_inf.h" +#include "perf.h" + +static int pmu_set_boot_clk_runcb_fn(void *arg) +{ + struct gk20a *g = (struct gk20a *)arg; + struct nvgpu_pmu_perf *perf_pmu = g->pmu->perf_pmu; + struct perf_vfe_invalidate *vfe_init = &perf_pmu->vfe_init; + + nvgpu_log_fn(g, "thread start"); + + while (true) { + NVGPU_COND_WAIT_INTERRUPTIBLE(&vfe_init->wq, + (vfe_init->state_change || + nvgpu_thread_should_stop(&vfe_init->state_task)), 0U); + if (nvgpu_thread_should_stop(&vfe_init->state_task)) { + break; + } + vfe_init->state_change = false; + +#ifdef CONFIG_NVGPU_CLK_ARB + nvgpu_clk_arb_schedule_vf_table_update(g); +#endif + } + + return 0; +} + +static int tu104_pmu_handle_perf_event(struct gk20a *g, void *pmumsg) +{ + struct pmu_nvgpu_rpc_perf_event *msg = + (struct pmu_nvgpu_rpc_perf_event *)pmumsg; + struct nvgpu_pmu_perf *perf_pmu = g->pmu->perf_pmu; + struct change_seq_pmu *change_pmu = &g->pmu->perf_pmu->changeseq_pmu; + + nvgpu_log_fn(g, " "); + switch (msg->rpc_hdr.function) { + case NV_PMU_RPC_ID_PERF_VFE_CALLBACK: + perf_pmu->vfe_init.state_change = true; + (void) nvgpu_cond_signal_interruptible(&perf_pmu->vfe_init.wq); + break; + case NV_PMU_RPC_ID_PERF_SEQ_COMPLETION: + change_pmu->change_state = 1U; + nvgpu_log_info(g, "Change Seq Completed"); + break; + case NV_PMU_RPC_ID_PERF_PSTATES_INVALIDATE: + nvgpu_log_info(g, "Pstate Invalidated"); + break; + default: + WARN_ON(true); + break; + } + return 0; +} + +static int perf_pmu_init_vfe_perf_event(struct gk20a *g) +{ + struct nvgpu_pmu_perf *perf_pmu = g->pmu->perf_pmu; + char thread_name[64]; + int err = 0; + + nvgpu_log_fn(g, " "); + + nvgpu_cond_init(&perf_pmu->vfe_init.wq); + + (void) snprintf(thread_name, sizeof(thread_name), + "nvgpu_vfe_invalidate_init_%s", g->name); + + err = nvgpu_thread_create(&perf_pmu->vfe_init.state_task, g, + pmu_set_boot_clk_runcb_fn, thread_name); + if (err != 0) { + nvgpu_err(g, "failed to start nvgpu_vfe_invalidate_init thread"); + } + + return err; + +} + +int nvgpu_pmu_perf_load(struct gk20a *g) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct nv_pmu_rpc_struct_perf_load rpc; + int status = 0; + + status = perf_pmu_init_vfe_perf_event(g); + if (status != 0) { + return status; + } + + /*register call back for future VFE updates*/ + g->ops.pmu_perf.handle_pmu_perf_event = tu104_pmu_handle_perf_event; + + (void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perf_load)); + rpc.b_load = true; + PMU_RPC_EXECUTE_CPB(status, pmu, PERF, LOAD, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute RPC status=0x%x", + status); + nvgpu_thread_stop(&g->pmu->perf_pmu->vfe_init.state_task); + } + + return status; +} + +int nvgpu_pmu_perf_init(struct gk20a *g) +{ + /* If already allocated, do not re-allocate */ + if (g->pmu->perf_pmu != NULL) { + return 0; + } + + g->pmu->perf_pmu = nvgpu_kzalloc(g, sizeof(*g->pmu->perf_pmu)); + if (g->pmu->perf_pmu == NULL) { + return -ENOMEM; + } + + return 0; +} + +static void vfe_thread_stop_cb(void *data) +{ + struct nvgpu_cond *cond = (struct nvgpu_cond *)data; + + nvgpu_cond_signal(cond); +} + +void nvgpu_pmu_perf_deinit(struct gk20a *g) +{ + if (nvgpu_thread_is_running(&g->pmu->perf_pmu->vfe_init.state_task)) { + nvgpu_thread_stop_graceful(&g->pmu->perf_pmu->vfe_init.state_task, + vfe_thread_stop_cb, &g->pmu->perf_pmu->vfe_init.wq); + } + nvgpu_cond_destroy(&g->pmu->perf_pmu->vfe_init.wq); + nvgpu_kfree(g, g->pmu->perf_pmu); + g->pmu->perf_pmu = NULL; +} + +int nvgpu_pmu_perf_sw_setup(struct gk20a *g) +{ + int err; + nvgpu_log_fn(g, " "); + + err = perf_vfe_var_sw_setup(g); + if (err != 0) { + return err; + } + + err = perf_vfe_equ_sw_setup(g); + if (err != 0) { + return err; + } + + err = perf_pstate_sw_setup(g); + if (err != 0) { + return err; + } + + err = perf_change_seq_sw_setup(g); + if (err != 0) { + return err; + } + + return 0; +} + +int nvgpu_pmu_perf_pmu_setup(struct gk20a *g) +{ + int err; + nvgpu_log_fn(g, " "); + + err = perf_vfe_var_pmu_setup(g); + if (err != 0) { + return err; + } + + err = perf_vfe_equ_pmu_setup(g); + if (err != 0) { + return err; + } + + err = perf_pstate_pmu_setup(g); + if (err != 0) { + return err; + } + + err = perf_change_seq_pmu_setup(g); + if (err != 0) { + return err; + } + + return 0; +} + diff --git a/drivers/gpu/nvgpu/common/pmu/perf/perf.h b/drivers/gpu/nvgpu/common/pmu/perf/perf.h new file mode 100644 index 000000000..971885a51 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perf/perf.h @@ -0,0 +1,74 @@ +/* + * general perf structures & definitions + * + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef PMU_PERF_H_ +#define PMU_PERF_H_ + +#include "vfe_equ.h" +#include "vfe_var.h" +#include "change_seq.h" +#include "pstate.h" + +/* PERF RPC ID Definitions */ +#define NV_PMU_RPC_ID_PERF_VFE_CALLBACK 0x01U +#define NV_PMU_RPC_ID_PERF_SEQ_COMPLETION 0x02U +#define NV_PMU_RPC_ID_PERF_PSTATES_INVALIDATE 0x03U + +/* + * Defines the structure that holds data + * used to execute LOAD RPC. + */ +struct nv_pmu_rpc_struct_perf_load { + /* [IN/OUT] Must be first field in RPC structure */ + struct nv_pmu_rpc_header hdr; + bool b_load; + u32 scratch[1]; +}; + +/* + * Simply a union of all specific PERF messages. Forms the general packet + * exchanged between the Kernel and PMU when sending and receiving PERF messages + * (respectively). + */ + +struct pmu_nvgpu_rpc_perf_event { + struct pmu_hdr msg_hdr; + struct pmu_nvgpu_rpc_header rpc_hdr; +}; + +struct perf_vfe_invalidate { + bool state_change; + struct nvgpu_cond wq; + struct nvgpu_thread state_task; +}; + +struct nvgpu_pmu_perf { + struct vfe_vars vfe_varobjs; + struct vfe_equs vfe_equobjs; + struct pstates pstatesobjs; + struct perf_vfe_invalidate vfe_init; + struct change_seq_pmu changeseq_pmu; +}; + +#endif /* PMU_PERF_H_ */ diff --git a/drivers/gpu/nvgpu/common/pmu/perf/pstate.c b/drivers/gpu/nvgpu/common/pmu/perf/pstate.c new file mode 100644 index 000000000..96c84c0be --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perf/pstate.c @@ -0,0 +1,467 @@ +/* + * general p state infrastructure + * + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ucode_perf_pstate_inf.h" +#include "pstate.h" +#include "perf.h" + +int perf_pstate_get_table_entry_idx(struct gk20a *g, u32 num) +{ + struct pstates *pstates = &(g->pmu->perf_pmu->pstatesobjs); + struct pstate *pstate; + u8 i; + + nvgpu_log_info(g, "pstates = %p", pstates); + + BOARDOBJGRP_FOR_EACH(&pstates->super.super, + struct pstate *, pstate, i) { + if (pstate->num == num) { + return i; + } + } + return 0; +} + +static int pstate_init_pmudata_super(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + return pmu_board_obj_pmu_data_init_super(g, obj, pmu_obj); +} + +static int pstate_init_pmudata(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + u32 clkidx; + struct pstate *pstate; + struct nv_pmu_perf_pstate_35 *pstate_pmu_data; + + status = pstate_init_pmudata_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pstate = (struct pstate *)(void *)obj; + pstate_pmu_data = (struct nv_pmu_perf_pstate_35 *)(void *)pmu_obj; + + pstate_pmu_data->super.super.lpwrEntryIdx = pstate->lpwr_entry_idx; + pstate_pmu_data->super.super.flags = pstate->flags; + pstate_pmu_data->nvlinkIdx = pstate->nvlink_idx; + pstate_pmu_data->pcieIdx = pstate->pcie_idx; + + for (clkidx = 0; clkidx < pstate->clklist.num_info; clkidx++) { + pstate_pmu_data->clkEntries[clkidx].max.baseFreqKhz = + pstate->clklist.clksetinfo[clkidx].max_mhz*1000; + pstate_pmu_data->clkEntries[clkidx].max.freqKz = + pstate->clklist.clksetinfo[clkidx].max_mhz*1000; + pstate_pmu_data->clkEntries[clkidx].max.origFreqKhz = + pstate->clklist.clksetinfo[clkidx].max_mhz*1000; + pstate_pmu_data->clkEntries[clkidx].max.porFreqKhz = + pstate->clklist.clksetinfo[clkidx].max_mhz*1000; + + pstate_pmu_data->clkEntries[clkidx].min.baseFreqKhz = + pstate->clklist.clksetinfo[clkidx].min_mhz*1000; + pstate_pmu_data->clkEntries[clkidx].min.freqKz = + pstate->clklist.clksetinfo[clkidx].min_mhz*1000; + pstate_pmu_data->clkEntries[clkidx].min.origFreqKhz = + pstate->clklist.clksetinfo[clkidx].min_mhz*1000; + pstate_pmu_data->clkEntries[clkidx].min.porFreqKhz = + pstate->clklist.clksetinfo[clkidx].min_mhz*1000; + + pstate_pmu_data->clkEntries[clkidx].nom.baseFreqKhz = + pstate->clklist.clksetinfo[clkidx].nominal_mhz*1000; + pstate_pmu_data->clkEntries[clkidx].nom.freqKz = + pstate->clklist.clksetinfo[clkidx].nominal_mhz*1000; + pstate_pmu_data->clkEntries[clkidx].nom.origFreqKhz = + pstate->clklist.clksetinfo[clkidx].nominal_mhz*1000; + pstate_pmu_data->clkEntries[clkidx].nom.porFreqKhz = + pstate->clklist.clksetinfo[clkidx].nominal_mhz*1000; + } + + return status; +} + +static int pstate_construct_super(struct gk20a *g, struct pmu_board_obj *obj, + void *args) +{ + int status; + + status = pmu_board_obj_construct_super(g, obj, args); + if (status != 0) { + return -EINVAL; + } + + return 0; +} + +static int pstate_construct_35(struct gk20a *g, struct pmu_board_obj *obj, + void *args) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)args; + + obj_tmp->type_mask |= BIT32(CTRL_PERF_PSTATE_TYPE_35); + return pstate_construct_super(g, obj, args); +} + +static struct pstate *pstate_construct(struct gk20a *g, void *args) +{ + struct pstate *pstate = NULL; + struct pstate *ptmppstate = (struct pstate *)args; + int status; + u32 clkidx; + + pstate = nvgpu_kzalloc(g, sizeof(struct pstate)); + if (pstate == NULL) { + return NULL; + } + + status = pstate_construct_35(g, (struct pmu_board_obj *) + (void *)pstate, args); + if (status != 0) { + nvgpu_err(g, + "error constructing pstate num=%u", ptmppstate->num); + return NULL; + } + + pstate->super.pmudatainit = pstate_init_pmudata; + pstate->num = ptmppstate->num; + pstate->flags = ptmppstate->flags; + pstate->lpwr_entry_idx = ptmppstate->lpwr_entry_idx; + pstate->pcie_idx = ptmppstate->pcie_idx; + pstate->nvlink_idx = ptmppstate->nvlink_idx; + pstate->clklist.num_info = ptmppstate->clklist.num_info; + + for (clkidx = 0; clkidx < ptmppstate->clklist.num_info; clkidx++) { + pstate->clklist.clksetinfo[clkidx].clkwhich = + ptmppstate->clklist.clksetinfo[clkidx].clkwhich; + pstate->clklist.clksetinfo[clkidx].max_mhz = + ptmppstate->clklist.clksetinfo[clkidx].max_mhz; + pstate->clklist.clksetinfo[clkidx].min_mhz = + ptmppstate->clklist.clksetinfo[clkidx].min_mhz; + pstate->clklist.clksetinfo[clkidx].nominal_mhz = + ptmppstate->clklist.clksetinfo[clkidx].nominal_mhz; + } + + return pstate; +} + +static int pstate_insert(struct gk20a *g, struct pstate *pstate, u8 index) +{ + struct pstates *pstates = &(g->pmu->perf_pmu->pstatesobjs); + int err; + + err = boardobjgrp_objinsert(&pstates->super.super, + (struct pmu_board_obj *)pstate, index); + if (err != 0) { + nvgpu_err(g, + "error adding pstate boardobj %d", index); + return err; + } + + pstates->num_clk_domains++; + + return err; +} + +static int parse_pstate_entry_6x(struct gk20a *g, + struct vbios_pstate_header_6x *hdr, + struct vbios_pstate_entry_6x *entry, + struct pstate *pstate) +{ + u8 *p = (u8 *)entry; + u32 clkidx, domain; + int status; + + p += hdr->base_entry_size; + (void) memset(pstate, 0, sizeof(struct pstate)); + pstate->super.type = CTRL_PERF_PSTATE_TYPE_35; + pstate->num = 0x0FU - U32(entry->pstate_level); + pstate->clklist.num_info = hdr->clock_entry_count; + pstate->lpwr_entry_idx = entry->lpwr_entry_idx; + pstate->flags = entry->flags0; + pstate->nvlink_idx = entry->nvlink_idx; + pstate->pcie_idx = entry->pcie_idx; + + for (clkidx = 0; clkidx < hdr->clock_entry_count; clkidx++) { + struct nvgpu_pmu_perf_pstate_clk_info *pclksetinfo; + struct vbios_pstate_entry_clock_6x *clk_entry; + domain = 0; + + pclksetinfo = &pstate->clklist.clksetinfo[clkidx]; + clk_entry = (struct vbios_pstate_entry_clock_6x *)p; + + status = nvgpu_pmu_clk_domain_get_from_index(g, &domain, + clkidx); + if (status != 0) { + nvgpu_err(g, "Invalid clk_domain index"); + return -EINVAL; + } + + pclksetinfo->clkwhich = domain; + pclksetinfo->nominal_mhz = + BIOS_GET_FIELD(u32, clk_entry->param0, + VBIOS_PSTATE_6X_CLOCK_PROG_PARAM0_NOM_FREQ_MHZ); + pclksetinfo->min_mhz = + BIOS_GET_FIELD(u16, clk_entry->param1, + VBIOS_PSTATE_6X_CLOCK_PROG_PARAM1_MIN_FREQ_MHZ); + pclksetinfo->max_mhz = + BIOS_GET_FIELD(u16, clk_entry->param1, + VBIOS_PSTATE_6X_CLOCK_PROG_PARAM1_MAX_FREQ_MHZ); + + p += hdr->clock_entry_size; + } + + return 0; +} + +static int parse_pstate_table_6x(struct gk20a *g, + struct vbios_pstate_header_6x *hdr) +{ + struct pstate _pstate, *pstate; + struct vbios_pstate_entry_6x *entry; + u32 entry_size; + u8 i; + u8 *p = (u8 *)hdr; + int err = 0; + + if ((hdr->header_size != VBIOS_PSTATE_HEADER_6X_SIZE_10) || + (hdr->base_entry_count == 0U) || + (hdr->clock_entry_size != VBIOS_PSTATE_CLOCK_ENTRY_6X_SIZE_6) || + (hdr->clock_entry_count > CLK_SET_INFO_MAX_SIZE)) { + return -EINVAL; + } + + p += hdr->header_size; + + entry_size = U32(hdr->base_entry_size) + + U32(hdr->clock_entry_count) * + U32(hdr->clock_entry_size); + + for (i = 0; i < hdr->base_entry_count; i++) { + entry = (struct vbios_pstate_entry_6x *)p; + + if (entry->pstate_level == VBIOS_PERFLEVEL_SKIP_ENTRY) { + p += entry_size; + continue; + } + + err = parse_pstate_entry_6x(g, hdr, entry, &_pstate); + if (err != 0) { + goto done; + } + + pstate = pstate_construct(g, &_pstate); + if (pstate == NULL) { + goto done; + } + + err = pstate_insert(g, pstate, i); + if (err != 0) { + goto done; + } + p += entry_size; + } + +done: + return err; +} + +static int devinit_get_pstate_table(struct gk20a *g) +{ + struct vbios_pstate_header_6x *hdr = NULL; + int err = 0; + + hdr = (struct vbios_pstate_header_6x *) + nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_PERF_TOKEN), + PERFORMANCE_TABLE); + + if (hdr == NULL) { + nvgpu_err(g, "performance table not found"); + err = -EINVAL; + goto done; + } + + if (hdr->version != VBIOS_PSTATE_TABLE_VERSION_6X) { + nvgpu_err(g, "unknown/unsupported clocks table version=0x%02x", + hdr->version); + err = -EINVAL; + goto done; + } + + err = parse_pstate_table_6x(g, hdr); +done: + return err; +} + +static int perf_pstate_pmudatainit(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct nv_pmu_boardobjgrp_super *pboardobjgrppmu) +{ + int status = 0; + struct nv_pmu_perf_pstate_boardobjgrp_set_header *pset = + (struct nv_pmu_perf_pstate_boardobjgrp_set_header *) + (void *)pboardobjgrppmu; + struct pstates *pprogs = (struct pstates *)(void *)pboardobjgrp; + + status = boardobjgrp_pmudatainit_e32(g, pboardobjgrp, pboardobjgrppmu); + if (status != 0) { + nvgpu_err(g, "error updating pmu boardobjgrp for vfe equ 0x%x", + status); + goto done; + } + + pset->numClkDomains = pprogs->num_clk_domains; + pset->boot_pstate_idx = + perf_pstate_get_table_entry_idx(g, CTRL_PERF_PSTATE_P0); + +done: + return status; +} + +static int perf_pstate_pmudata_instget(struct gk20a *g, + struct nv_pmu_boardobjgrp *pmuboardobjgrp, + struct nv_pmu_boardobj **pmu_obj, u8 idx) +{ + struct nv_pmu_perf_pstate_boardobj_grp_set *pgrp_set = + (struct nv_pmu_perf_pstate_boardobj_grp_set *) + (void *)pmuboardobjgrp; + + /* check whether pmuboardobjgrp has a valid boardobj in index */ + if (idx >= CTRL_BOARDOBJGRP_E32_MAX_OBJECTS) { + return -EINVAL; + } + + *pmu_obj = (struct nv_pmu_boardobj *) + &pgrp_set->objects[idx].data.obj; + + return 0; +} + +int perf_pstate_sw_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + + status = nvgpu_boardobjgrp_construct_e32(g, + &g->pmu->perf_pmu->pstatesobjs.super); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for pstate, status - 0x%x", + status); + goto done; + } + + pboardobjgrp = &g->pmu->perf_pmu->pstatesobjs.super.super; + + BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, PERF, PSTATE); + + status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp, + perf, PERF, pstate, PSTATE); + if (status != 0) { + nvgpu_err(g, + "error constructing PSTATE_SET interface - 0x%x", + status); + goto done; + } + + g->pmu->perf_pmu->pstatesobjs.num_clk_domains = + VBIOS_PSTATE_CLOCK_ENTRY_6X_COUNT; + + pboardobjgrp->pmudatainit = perf_pstate_pmudatainit; + pboardobjgrp->pmudatainstget = perf_pstate_pmudata_instget; + + status = devinit_get_pstate_table(g); + if (status != 0) { + nvgpu_err(g, "Error parsing the performance Vbios tables"); + goto done; + } + +done: + return status; +} + +int perf_pstate_pmu_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + + pboardobjgrp = &g->pmu->perf_pmu->pstatesobjs.super.super; + if (!pboardobjgrp->bconstructed) { + return -EINVAL; + } + + status = pboardobjgrp->pmuinithandle(g, pboardobjgrp); + + return status; +} + + +static struct pstate *perf_pstate_find(struct gk20a *g, u32 num) +{ + struct pstates *pstates = &(g->pmu->perf_pmu->pstatesobjs); + struct pstate *pstate; + u8 i; + + BOARDOBJGRP_FOR_EACH(&pstates->super.super, + struct pstate *, pstate, i) { + if (pstate->num == num) { + return pstate; + } + } + return NULL; +} + +struct nvgpu_pmu_perf_pstate_clk_info *nvgpu_pmu_perf_pstate_get_clk_set_info( + struct gk20a *g, u32 pstate_num, u32 clkwhich) +{ + struct pstate *pstate = perf_pstate_find(g, pstate_num); + struct nvgpu_pmu_perf_pstate_clk_info *info; + u32 clkidx; + + if (pstate == NULL) { + return NULL; + } + + for (clkidx = 0; clkidx < pstate->clklist.num_info; clkidx++) { + info = &pstate->clklist.clksetinfo[clkidx]; + if (info->clkwhich == clkwhich) { + return info; + } + } + return NULL; +} diff --git a/drivers/gpu/nvgpu/common/pmu/perf/pstate.h b/drivers/gpu/nvgpu/common/pmu/perf/pstate.h new file mode 100644 index 000000000..ba13cf4c8 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perf/pstate.h @@ -0,0 +1,54 @@ +/* + * general p state infrastructure + * + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_PERF_PSTATE_H +#define NVGPU_PERF_PSTATE_H + +#define CTRL_PERF_PSTATE_TYPE_35 0x04U + +struct pstate_clk_info_list { + u32 num_info; + struct nvgpu_pmu_perf_pstate_clk_info clksetinfo[CLK_SET_INFO_MAX_SIZE]; +}; + +struct pstates { + struct boardobjgrp_e32 super; + u8 num_clk_domains; +}; + +struct pstate { + struct pmu_board_obj super; + u32 num; + u8 lpwr_entry_idx; + u32 flags; + u8 pcie_idx; + u8 nvlink_idx; + struct pstate_clk_info_list clklist; +}; + +int perf_pstate_sw_setup(struct gk20a *g); +int perf_pstate_pmu_setup(struct gk20a *g); +int perf_pstate_get_table_entry_idx(struct gk20a *g, u32 num); + +#endif /* NVGPU_PERF_PSTATE_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/perf/ucode_perf_change_seq_inf.h b/drivers/gpu/nvgpu/common/pmu/perf/ucode_perf_change_seq_inf.h new file mode 100644 index 000000000..e728fa5e2 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perf/ucode_perf_change_seq_inf.h @@ -0,0 +1,282 @@ +/* + * general p state infrastructure + * + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_PMUIF_CTRLPERF_H +#define NVGPU_PMUIF_CTRLPERF_H + + +#define CTRL_PERF_CHANGE_SEQ_VERSION_35 0x04U + +/*! + * Flags to provide information about the input perf change request. + * This flags will be used to understand the type of perf change req. + */ +#define CTRL_PERF_CHANGE_SEQ_CHANGE_NONE 0x00U +#define CTRL_PERF_CHANGE_SEQ_CHANGE_FORCE BIT(0) +#define CTRL_PERF_CHANGE_SEQ_CHANGE_FORCE_CLOCKS BIT(1) +#define CTRL_PERF_CHANGE_SEQ_CHANGE_ASYNC BIT(2) +#define CTRL_PERF_CHANGE_SEQ_CHANGE_SKIP_VBLANK_WAIT BIT(3) +#define CTRL_PERF_CHANGE_SEQ_SYNC_CHANGE_QUEUE_SIZE 0x04U +#define CTRL_PERF_CHANGE_SEQ_SCRIPT_MAX_PROFILING_THREADS 8 +#define CTRL_PERF_CHANGE_SEQ_SCRIPT_VF_SWITCH_MAX_STEPS 13U + +struct ctrl_clk_domain_clk_mon_item { + u32 clk_api_domain; + u32 clk_freq_Mhz; + u32 low_threshold_percentage; + u32 high_threshold_percentage; +}; + +struct ctrl_clk_domain_clk_mon_list { + u8 num_domain; + struct ctrl_clk_domain_clk_mon_item + clk_domain[CTRL_CLK_CLK_DOMAIN_CLIENT_MAX_DOMAINS]; +}; + +struct ctrl_volt_volt_rail_list_item { + u8 rail_idx; + u32 voltage_uv; + u32 voltage_min_noise_unaware_uv; + u32 voltage_offset_uV[2]; +}; + +struct ctrl_volt_volt_rail_list { + u8 num_rails; + struct ctrl_volt_volt_rail_list_item + rails[CTRL_VOLT_VOLT_RAIL_CLIENT_MAX_RAILS]; +}; + +struct ctrl_perf_chage_seq_change_pmu { + u32 seq_id; +}; + +struct ctrl_perf_change_seq_change { + struct ctrl_clk_clk_domain_list clk_list; + struct ctrl_volt_volt_rail_list volt_list; + u32 pstate_index; + u32 flags; + u32 vf_points_cache_counter; + u8 version; + struct ctrl_perf_chage_seq_change_pmu data; +}; + +struct ctrl_perf_chage_seq_input_clk { + u32 clk_freq_khz; +}; + +struct ctrl_perf_chage_seq_input_volt { + u32 voltage_uv; + u32 voltage_min_noise_unaware_uv; +}; + +struct ctrl_perf_change_seq_change_input { + u32 pstate_index; + u32 flags; + u32 vf_points_cache_counter; + struct nvgpu_pmu_perf_change_input_clk_info clk; + struct ctrl_boardobjgrp_mask_e32 volt_rails_mask; + struct ctrl_perf_chage_seq_input_volt + volt[CTRL_VOLT_VOLT_RAIL_CLIENT_MAX_RAILS]; +}; + +struct u64_align32 { + u32 lo; + u32 hi; +}; +struct ctrl_perf_change_seq_script_profiling_thread { + u32 step_mask; + struct u64_align32 timens; +}; + +struct ctrl_perf_change_seq_script_profiling { + struct u64_align32 total_timens; /*align 32 */ + struct u64_align32 total_build_timens; + struct u64_align32 total_execution_timens; + u8 num_threads; /*number of threads required to process this script*/ + struct ctrl_perf_change_seq_script_profiling_thread + nvgpu_threads[CTRL_PERF_CHANGE_SEQ_SCRIPT_MAX_PROFILING_THREADS]; +}; + +struct ctrl_perf_change_seq_pmu_script_header { + bool b_increase; + u8 num_steps; + u8 cur_step_index; + struct ctrl_perf_change_seq_script_profiling profiling; +}; + +enum ctrl_perf_change_seq_pmu_step_id { + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_NONE, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_PRE_CHANGE_RM, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_PRE_CHANGE_PMU, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_POST_CHANGE_RM, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_POST_CHANGE_PMU, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_PRE_PSTATE_RM, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_PRE_PSTATE_PMU, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_POST_PSTATE_RM, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_POST_PSTATE_PMU, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_VOLT, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_LPWR, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_BIF, + CTRL_PERF_CHANGE_SEQ_31_STEP_ID_NOISE_UNAWARE_CLKS, + CTRL_PERF_CHANGE_SEQ_31_STEP_ID_NOISE_AWARE_CLKS, + CTRL_PERF_CHANGE_SEQ_35_STEP_ID_PRE_VOLT_CLKS, + CTRL_PERF_CHANGE_SEQ_35_STEP_ID_POST_VOLT_CLKS, + CTRL_PERF_CHANGE_SEQ_PMU_STEP_ID_MAX_STEPS = 26, +}; + +struct ctrl_perf_change_seq_step_profiling { + /*all aligned to 32 */ + u64 total_timens; + u64 nv_thread_timens; + u64 pmu_thread_timens; +}; + +struct ctrl_perf_change_seq_pmu_script_step_super { + enum ctrl_perf_change_seq_pmu_step_id step_id; + struct ctrl_perf_change_seq_step_profiling profiling; +}; + +struct ctrl_perf_change_seq_pmu_script_step_change { + struct ctrl_perf_change_seq_pmu_script_step_super super; + u32 pstate_index; +}; + +struct ctrl_perf_change_seq_pmu_script_step_pstate { + struct ctrl_perf_change_seq_pmu_script_step_super super; + u32 pstate_index; +}; + +struct ctrl_perf_change_seq_pmu_script_step_lpwr { + struct ctrl_perf_change_seq_pmu_script_step_super super; + u32 pstate_index; +}; + +struct ctrl_perf_change_seq_pmu_script_step_bif { + struct ctrl_perf_change_seq_pmu_script_step_super super; + u32 pstate_index; + u8 pcie_idx; + u8 nvlink_idx; +}; + +struct ctrl_clk_vin_sw_override_list_item { + u8 override_mode; + u32 voltage_uV; +}; + +struct ctrl_clk_vin_sw_override_list { + struct ctrl_boardobjgrp_mask_e32 volt_rails_mask; + struct ctrl_clk_vin_sw_override_list_item + volt[4]; +}; + +struct ctrl_perf_change_seq_pmu_script_step_clks { + struct ctrl_perf_change_seq_pmu_script_step_super super; + struct ctrl_clk_clk_domain_list clk_list; + struct ctrl_clk_vin_sw_override_list vin_sw_override_list; +}; + +struct ctrl_perf_change_seq_pmu_script_step_volt { + struct ctrl_perf_change_seq_pmu_script_step_super super; + struct ctrl_volt_volt_rail_list volt_list; + struct ctrl_clk_vin_sw_override_list vin_sw_override_list; +}; + +struct ctrl_perf_change_seq_pmu_script_step_clk_mon { + struct ctrl_perf_change_seq_pmu_script_step_super super; + struct ctrl_clk_domain_clk_mon_list clk_mon_list; +}; + +union ctrl_perf_change_seq_pmu_script_step_data { + struct ctrl_perf_change_seq_pmu_script_step_super super; + struct ctrl_perf_change_seq_pmu_script_step_change change; + struct ctrl_perf_change_seq_pmu_script_step_pstate ctrlperf_pstate; + struct ctrl_perf_change_seq_pmu_script_step_lpwr lpwr; + struct ctrl_perf_change_seq_pmu_script_step_bif bif; + struct ctrl_perf_change_seq_pmu_script_step_clks clk; + struct ctrl_perf_change_seq_pmu_script_step_volt volt; + struct ctrl_perf_change_seq_pmu_script_step_clk_mon clk_mon; +}; + +struct nv_pmu_rpc_perf_change_seq_queue_change { + /*[IN/OUT] Must be first field in RPC structure */ + struct nv_pmu_rpc_header hdr; + struct ctrl_perf_change_seq_change_input change; + u32 seq_id; + u32 scratch[1]; +}; + +struct nv_pmu_perf_change_seq_super_info_get { + u8 version; +}; + +struct nv_pmu_perf_change_seq_pmu_info_get { + struct nv_pmu_perf_change_seq_super_info_get super; + u32 cpu_advertised_step_id_mask; +}; + +struct nv_pmu_perf_change_seq_super_info_set { + u8 version; + struct ctrl_boardobjgrp_mask_e32 clk_domains_exclusion_mask; + struct ctrl_boardobjgrp_mask_e32 clk_domains_inclusion_mask; + u32 strp_id_exclusive_mask; +}; + +struct nv_pmu_perf_change_seq_pmu_info_set { + struct nv_pmu_perf_change_seq_super_info_set super; + bool b_lock; + bool b_vf_point_check_ignore; + u32 cpu_step_id_mask; +}; + +struct nv_pmu_rpc_perf_change_seq_info_get { + /*[IN/OUT] Must be first field in RPC structure */ + struct nv_pmu_rpc_header hdr; + struct nv_pmu_perf_change_seq_pmu_info_get info_get; + u32 scratch[1]; +}; + +struct nv_pmu_rpc_perf_change_seq_info_set { + /*[IN/OUT] Must be first field in RPC structure */ + struct nv_pmu_rpc_header hdr; + struct nv_pmu_perf_change_seq_pmu_info_set info_set; + u32 scratch[1]; +}; + +NV_PMU_MAKE_ALIGNED_STRUCT(ctrl_perf_change_seq_change, + sizeof(struct ctrl_perf_change_seq_change)); + +NV_PMU_MAKE_ALIGNED_STRUCT(ctrl_perf_change_seq_pmu_script_header, + sizeof(struct ctrl_perf_change_seq_pmu_script_header)); + +NV_PMU_MAKE_ALIGNED_UNION(ctrl_perf_change_seq_pmu_script_step_data, + sizeof(union ctrl_perf_change_seq_pmu_script_step_data)); + +struct perf_change_seq_pmu_script { + union ctrl_perf_change_seq_pmu_script_header_aligned hdr; + union ctrl_perf_change_seq_change_aligned change; + /* below should be an aligned structure */ + union ctrl_perf_change_seq_pmu_script_step_data_aligned + steps[CTRL_PERF_CHANGE_SEQ_SCRIPT_VF_SWITCH_MAX_STEPS]; +}; + +#endif /* NVGPU_PMUIF_CTRLPERF_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/perf/ucode_perf_pstate_inf.h b/drivers/gpu/nvgpu/common/pmu/perf/ucode_perf_pstate_inf.h new file mode 100644 index 000000000..6fb0f158d --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perf/ucode_perf_pstate_inf.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_PMUIF_PERFPSTATE_H_ +#define NVGPU_PMUIF_PERFPSTATE_H_ + +#define PMU_PERF_CLK_DOMAINS_IDX_MAX (16U) + +struct nv_pmu_perf_pstate_boardobjgrp_set_header { + struct nv_pmu_boardobjgrp_e32 super; + u8 numClkDomains; + u8 boot_pstate_idx; +}; + +struct nv_pmu_perf_pstate { + struct nv_pmu_boardobj super; + u8 lpwrEntryIdx; + u32 flags; +}; + +struct nv_pmu_perf_pstate_3x { + struct nv_pmu_perf_pstate super; +}; + +struct nv_ctrl_perf_pstate_clk_freq_35 { + u32 freqKz; + u32 freqVfMaxKhz; + u32 baseFreqKhz; + u32 origFreqKhz; + u32 porFreqKhz; +}; + +struct ctrl_perf_pstate_clk_entry_35 { + struct nv_ctrl_perf_pstate_clk_freq_35 min; + struct nv_ctrl_perf_pstate_clk_freq_35 max; + struct nv_ctrl_perf_pstate_clk_freq_35 nom; +}; + +struct ctrl_perf_pstate_clk_entry_30 { + u32 targetFreqKhz; + u32 freqRangeMinKhz; + u32 freqRangeMaxKhz; +}; + +struct nv_pmu_perf_pstate_30 { + struct nv_pmu_perf_pstate_3x super; + struct ctrl_perf_pstate_clk_entry_30 + clkEntries[PMU_PERF_CLK_DOMAINS_IDX_MAX]; +}; + +struct nv_pmu_perf_pstate_35 { + struct nv_pmu_perf_pstate_3x super; + u8 pcieIdx; + u8 nvlinkIdx; + struct ctrl_perf_pstate_clk_entry_35 + clkEntries[PMU_PERF_CLK_DOMAINS_IDX_MAX]; +}; + +union nv_pmu_perf_pstate_boardobj_set_union { + struct nv_pmu_boardobj obj; + struct nv_pmu_perf_pstate super; + struct nv_pmu_perf_pstate_3x v3x; + struct nv_pmu_perf_pstate_30 v30; + struct nv_pmu_perf_pstate_35 v35; +}; + +NV_PMU_BOARDOBJ_GRP_SET_MAKE_E32(perf, pstate); + +#endif /* NVGPU_PMUIF_PERFPSTATE_H_ */ diff --git a/drivers/gpu/nvgpu/common/pmu/perf/ucode_perf_vfe_inf.h b/drivers/gpu/nvgpu/common/pmu/perf/ucode_perf_vfe_inf.h new file mode 100644 index 000000000..ed79958a7 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perf/ucode_perf_vfe_inf.h @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_PMUIF_PERFVFE_H +#define NVGPU_PMUIF_PERFVFE_H + +#define NV_PMU_PERF_RPC_VFE_EQU_EVAL_VAR_COUNT_MAX 2U +#define NV_PMU_VFE_VAR_SINGLE_SENSED_FUSE_SEGMENTS_MAX 1U + +#define CTRL_PERF_VFE_VAR_TYPE_INVALID 0x00U +#define CTRL_PERF_VFE_VAR_TYPE_DERIVED 0x01U +#define CTRL_PERF_VFE_VAR_TYPE_DERIVED_PRODUCT 0x02U +#define CTRL_PERF_VFE_VAR_TYPE_DERIVED_SUM 0x03U +#define CTRL_PERF_VFE_VAR_TYPE_SINGLE 0x04U +#define CTRL_PERF_VFE_VAR_TYPE_SINGLE_FREQUENCY 0x05U +#define CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED 0x06U +#define CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_FUSE 0x07U +#define CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_TEMP 0x08U +#define CTRL_PERF_VFE_VAR_TYPE_SINGLE_VOLTAGE 0x09U +#define CTRL_PERF_VFE_VAR_TYPE_SINGLE_CALLER_SPECIFIED 0x0AU + +#define CTRL_PERF_VFE_VAR_SINGLE_OVERRIDE_TYPE_NONE 0x00U +#define CTRL_PERF_VFE_VAR_SINGLE_OVERRIDE_TYPE_VALUE 0x01U +#define CTRL_PERF_VFE_VAR_SINGLE_OVERRIDE_TYPE_OFFSET 0x02U +#define CTRL_PERF_VFE_VAR_SINGLE_OVERRIDE_TYPE_SCALE 0x03U + +#define CTRL_PERF_VFE_EQU_TYPE_INVALID 0x00U +#define CTRL_PERF_VFE_EQU_TYPE_COMPARE 0x01U +#define CTRL_PERF_VFE_EQU_TYPE_MINMAX 0x02U +#define CTRL_PERF_VFE_EQU_TYPE_QUADRATIC 0x03U +#define CTRL_PERF_VFE_EQU_TYPE_SCALAR 0x04U + +#define CTRL_PERF_VFE_EQU_OUTPUT_TYPE_UNITLESS 0x00U +#define CTRL_PERF_VFE_EQU_OUTPUT_TYPE_FREQ_MHZ 0x01U +#define CTRL_PERF_VFE_EQU_OUTPUT_TYPE_VOLT_UV 0x02U +#define CTRL_PERF_VFE_EQU_OUTPUT_TYPE_VF_GAIN 0x03U +#define CTRL_PERF_VFE_EQU_OUTPUT_TYPE_VOLT_DELTA_UV 0x04U +#define CTRL_PERF_VFE_EQU_OUTPUT_TYPE_WORK_TYPE 0x06U +#define CTRL_PERF_VFE_EQU_OUTPUT_TYPE_UTIL_RATIO 0x07U +#define CTRL_PERF_VFE_EQU_OUTPUT_TYPE_WORK_FB_NORM 0x08U +#define CTRL_PERF_VFE_EQU_OUTPUT_TYPE_POWER_MW 0x09U +#define CTRL_PERF_VFE_EQU_OUTPUT_TYPE_PWR_OVER_UTIL_SLOPE 0x0AU +#define CTRL_PERF_VFE_EQU_OUTPUT_TYPE_VIN_CODE 0x0BU + +#define CTRL_PERF_VFE_EQU_QUADRATIC_COEFF_COUNT 0x03U + +#define CTRL_PERF_VFE_EQU_COMPARE_FUNCTION_EQUAL 0x00U +#define CTRL_PERF_VFE_EQU_COMPARE_FUNCTION_GREATER_EQ 0x01U +#define CTRL_PERF_VFE_EQU_COMPARE_FUNCTION_GREATER 0x02U + +union nv_pmu_perf_vfe_var_type_data { + u8 uid; + u8 clk_domain_idx; +}; + +struct nv_pmu_perf_vfe_var_value { + u8 var_type; + union nv_pmu_perf_vfe_var_type_data var_type_data; + u8 reserved[2]; + u32 var_value; +}; + +union nv_pmu_perf_vfe_equ_result { + u32 freq_m_hz; + u32 voltu_v; + u32 vf_gain; + int volt_deltau_v; + u32 work_type; + u32 util_ratio; + u32 work_fb_norm; + u32 power_mw; + u32 pwr_over_util_slope; + int vin_code; +}; + +struct nv_pmu_perf_rpc_vfe_equ_eval { + u8 equ_idx; + u8 var_count; + u8 output_type; + struct nv_pmu_perf_vfe_var_value var_values[ + NV_PMU_PERF_RPC_VFE_EQU_EVAL_VAR_COUNT_MAX]; + union nv_pmu_perf_vfe_equ_result result; +}; + +struct nv_pmu_rpc_struct_perf_vfe_eval { + /*[IN/OUT] Must be first field in RPC structure */ + struct nv_pmu_rpc_header hdr; + struct nv_pmu_perf_rpc_vfe_equ_eval data; + u32 scratch[1]; +}; + +struct nv_pmu_perf_rpc_vfe_load { + bool b_load; +}; + +struct nv_pmu_perf_vfe_var_boardobjgrp_get_status_header { + struct nv_pmu_boardobjgrp_e32 super; +}; + +struct nv_pmu_perf_vfe_var_get_status_super { + struct nv_pmu_boardobj_query obj; +}; + +union ctrl_perf_vfe_var_single_sensed_fuse_value_data { + int signed_value; + u32 unsigned_value; +}; + +struct ctrl_perf_vfe_var_single_sensed_fuse_value { + bool b_signed; + union ctrl_perf_vfe_var_single_sensed_fuse_value_data data; +}; + +struct nv_pmu_perf_vfe_var_single_sensed_fuse_get_status { + struct nv_pmu_perf_vfe_var_get_status_super super; + struct ctrl_perf_vfe_var_single_sensed_fuse_value fuse_value_integer; + struct ctrl_perf_vfe_var_single_sensed_fuse_value fuse_value_hw_integer; + u8 fuse_version; + bool b_version_check_failed; +}; + +union nv_pmu_perf_vfe_var_boardobj_get_status_union { + struct nv_pmu_boardobj_query obj; + struct nv_pmu_perf_vfe_var_get_status_super super; + struct nv_pmu_perf_vfe_var_single_sensed_fuse_get_status fuse_status; +}; + +NV_PMU_BOARDOBJ_GRP_GET_STATUS_MAKE_E32(perf, vfe_var); + +struct nv_pmu_perf_vfe_var_boardobj_grp_get_status_pack { + struct nv_pmu_perf_vfe_var_boardobj_grp_get_status pri; + struct nv_pmu_perf_vfe_var_boardobj_grp_get_status rppm; +}; + +struct nv_pmu_vfe_var { + struct nv_pmu_boardobj super; + u32 out_range_min; + u32 out_range_max; + struct ctrl_boardobjgrp_mask_e32 mask_dependent_vars; + struct ctrl_boardobjgrp_mask_e255 mask_dependent_equs; +}; + +struct nv_pmu_vfe_var_derived { + struct nv_pmu_vfe_var super; +}; + +struct nv_pmu_vfe_var_derived_product { + struct nv_pmu_vfe_var_derived super; + u8 var_idx0; + u8 var_idx1; +}; + +struct nv_pmu_vfe_var_derived_sum { + struct nv_pmu_vfe_var_derived super; + u8 var_idx0; + u8 var_idx1; +}; + +struct nv_pmu_vfe_var_single { + struct nv_pmu_vfe_var super; + u8 override_type; + u32 override_value; +}; + +struct nv_pmu_vfe_var_single_frequency { + struct nv_pmu_vfe_var_single super; + u8 clk_domain_idx; +}; + +struct nv_pmu_vfe_var_single_caller_specified { + struct nv_pmu_vfe_var_single super; + u8 uid; +}; + +struct nv_pmu_vfe_var_single_sensed { + struct nv_pmu_vfe_var_single super; +}; + +struct ctrl_bios_vfield_register_segment_super { + u8 low_bit; + u8 high_bit; +}; + +struct ctrl_bios_vfield_register_segment_reg { + struct ctrl_bios_vfield_register_segment_super super; + u32 addr; +}; + +struct ctrl_bios_vfield_register_segment_index_reg { + struct ctrl_bios_vfield_register_segment_super super; + u32 addr; + u32 reg_index; + u32 index; +}; + +union ctrl_bios_vfield_register_segment_data { + struct ctrl_bios_vfield_register_segment_reg reg; + struct ctrl_bios_vfield_register_segment_index_reg index_reg; +}; + +struct ctrl_bios_vfield_register_segment { + u8 type; + union ctrl_bios_vfield_register_segment_data data; +}; + +struct ctrl_perf_vfe_var_single_sensed_fuse_info { + u8 segment_count; + struct ctrl_bios_vfield_register_segment + segments[NV_PMU_VFE_VAR_SINGLE_SENSED_FUSE_SEGMENTS_MAX]; +}; + +struct ctrl_perf_vfe_var_single_sensed_fuse_override_info { + u32 fuse_val_override; + u8 b_fuse_regkey_override; +}; + +struct ctrl_perf_vfe_var_single_sensed_fuse_vfield_info { + struct ctrl_perf_vfe_var_single_sensed_fuse_info fuse; + u32 fuse_val_default; + u32 hw_correction_scale; + int hw_correction_offset; + u8 v_field_id; +}; + +struct ctrl_perf_vfe_var_single_sensed_fuse_ver_vfield_info { + struct ctrl_perf_vfe_var_single_sensed_fuse_info fuse; + u8 ver_expected; + bool b_ver_expected_is_mask; + bool b_ver_check; + bool b_ver_check_ignore; + bool b_use_default_on_ver_check_fail; + u8 v_field_id_ver; +}; + +struct nv_pmu_vfe_var_single_sensed_fuse { + struct nv_pmu_vfe_var_single_sensed super; + struct ctrl_perf_vfe_var_single_sensed_fuse_override_info override_info; + struct ctrl_perf_vfe_var_single_sensed_fuse_vfield_info vfield_info; + struct ctrl_perf_vfe_var_single_sensed_fuse_ver_vfield_info + vfield_ver_info; + struct ctrl_perf_vfe_var_single_sensed_fuse_value fuse_val_default; + bool b_fuse_value_signed; +}; + +struct nv_pmu_vfe_var_single_sensed_temp { + struct nv_pmu_vfe_var_single_sensed super; + u8 therm_channel_index; + int temp_hysteresis_positive; + int temp_hysteresis_negative; + int temp_default; +}; + +struct nv_pmu_vfe_var_single_voltage { + struct nv_pmu_vfe_var_single super; +}; + +struct nv_pmu_perf_vfe_var_boardobjgrp_set_header { + struct nv_pmu_boardobjgrp_e32 super; + u8 polling_periodms; +}; + +union nv_pmu_perf_vfe_var_boardobj_set_union { + struct nv_pmu_boardobj obj; + struct nv_pmu_vfe_var var; + struct nv_pmu_vfe_var_derived var_derived; + struct nv_pmu_vfe_var_derived_product var_derived_product; + struct nv_pmu_vfe_var_derived_sum var_derived_sum; + struct nv_pmu_vfe_var_single var_single; + struct nv_pmu_vfe_var_single_frequency var_single_frequiency; + struct nv_pmu_vfe_var_single_sensed var_single_sensed; + struct nv_pmu_vfe_var_single_sensed_fuse var_single_sensed_fuse; + struct nv_pmu_vfe_var_single_sensed_temp var_single_sensed_temp; + struct nv_pmu_vfe_var_single_voltage var_single_voltage; + struct nv_pmu_vfe_var_single_caller_specified + var_single_caller_specified; +}; + +NV_PMU_BOARDOBJ_GRP_SET_MAKE_E32(perf, vfe_var); + +struct nv_pmu_perf_vfe_var_boardobj_grp_set_pack { + struct nv_pmu_perf_vfe_var_boardobj_grp_set pri; + struct nv_pmu_perf_vfe_var_boardobj_grp_set rppm; +}; + +struct nv_pmu_vfe_equ { + struct nv_pmu_boardobj super; + u8 var_idx; + u8 equ_idx_next; + u8 output_type; + u32 out_range_min; + u32 out_range_max; +}; + +struct nv_pmu_vfe_equ_compare { + struct nv_pmu_vfe_equ super; + u8 func_id; + u8 equ_idx_true; + u8 equ_idx_false; + u32 criteria; +}; + +struct nv_pmu_vfe_equ_minmax { + struct nv_pmu_vfe_equ super; + bool b_max; + u8 equ_idx0; + u8 equ_idx1; +}; + +struct nv_pmu_vfe_equ_quadratic { + struct nv_pmu_vfe_equ super; + u32 coeffs[CTRL_PERF_VFE_EQU_QUADRATIC_COEFF_COUNT]; +}; + +struct nv_pmu_vfe_equ_scalar { + struct nv_pmu_vfe_equ super; + u8 equ_idx_to_scale; +}; + +struct nv_pmu_perf_vfe_equ_boardobjgrp_set_header { + struct nv_pmu_boardobjgrp_e255 super; +}; + +union nv_pmu_perf_vfe_equ_boardobj_set_union { + struct nv_pmu_boardobj obj; + struct nv_pmu_vfe_equ equ; + struct nv_pmu_vfe_equ_compare equ_comapre; + struct nv_pmu_vfe_equ_minmax equ_minmax; + struct nv_pmu_vfe_equ_quadratic equ_quadratic; + struct nv_pmu_vfe_equ_scalar equ_scalar; +}; + +NV_PMU_BOARDOBJ_GRP_SET_MAKE_E255(perf, vfe_equ); + +struct nv_pmu_perf_vfe_equ_boardobj_grp_set_pack { + struct nv_pmu_perf_vfe_equ_boardobj_grp_set pri; + struct nv_pmu_perf_vfe_var_boardobj_grp_set rppm; +}; + +#endif /* NVGPU_PMUIF_PERFVFE_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/perf/vfe_equ.c b/drivers/gpu/nvgpu/common/pmu/perf/vfe_equ.c new file mode 100644 index 000000000..596a384d3 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perf/vfe_equ.c @@ -0,0 +1,972 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ucode_perf_vfe_inf.h" +#include "vfe_equ.h" +#include "vfe_var.h" +#include "perf.h" + +static int vfe_equ_node_depending_mask_combine(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, u8 equ_idx, + struct boardobjgrpmask *pmask_dst) +{ + int status; + struct vfe_equ *tmp_vfe_equ; + + while (equ_idx != CTRL_BOARDOBJ_IDX_INVALID) { + tmp_vfe_equ = (struct vfe_equ *)(void *) + BOARDOBJGRP_OBJ_GET_BY_IDX( + pboardobjgrp, equ_idx); + status = tmp_vfe_equ->mask_depending_build(g, pboardobjgrp , + tmp_vfe_equ); + if (status != 0) { + nvgpu_err(g, " Failed calling vfeequ[%d].mskdpningbld", + equ_idx); + return status; + } + + status = nvgpu_boardobjmask_or(pmask_dst, pmask_dst, + &(tmp_vfe_equ->mask_depending_vars.super)); + if (status != 0) { + nvgpu_err(g, " Failed calling vfeequ boardobjmask_or"); + return status; + } + + equ_idx = tmp_vfe_equ->equ_idx_next; + } + return status; +} + +static int vfe_equ_build_depending_mask_minmax(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct vfe_equ *pvfe_equ) +{ + struct vfe_equ_minmax *pequ_mm = + (struct vfe_equ_minmax *)(void *)pvfe_equ; + int status; + + status = vfe_equ_node_depending_mask_combine(g, pboardobjgrp, + pequ_mm->equ_idx0, &pvfe_equ->mask_depending_vars.super); + if (status != 0) { + nvgpu_err(g, " Failed calling depending_mask_combine for idx0"); + return status; + } + + status = vfe_equ_node_depending_mask_combine(g, pboardobjgrp, + pequ_mm->equ_idx1, &pvfe_equ->mask_depending_vars.super); + if (status != 0) { + nvgpu_err(g, " Failed calling depending_mask_combine for idx1"); + return status; + } + + return status; +} + +static int vfe_equ_build_depending_mask_super(struct gk20a *g, + struct vfe_equ *pvfe_equ) +{ + struct vfe_var *tmp_vfe_var; + struct boardobjgrp *pboardobjgrp = + &g->pmu->perf_pmu->vfe_varobjs.super.super; + + tmp_vfe_var = (struct vfe_var *)(void *)BOARDOBJGRP_OBJ_GET_BY_IDX( + pboardobjgrp, pvfe_equ->var_idx); + + pvfe_equ->mask_depending_vars = tmp_vfe_var->mask_depending_vars; + + return 0; +} + +static int vfe_equ_build_depending_mask_compare(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct vfe_equ *pvfe_equ) +{ + struct vfe_equ_compare *pequ_cmp = + (struct vfe_equ_compare *)(void *)pvfe_equ; + int status; + + status = vfe_equ_build_depending_mask_super(g, pvfe_equ); + if (status != 0) { + nvgpu_err(g, " Failed calling depending_mask_super"); + return status; + } + + status = vfe_equ_node_depending_mask_combine(g, pboardobjgrp, + pequ_cmp->equ_idx_true, + &pvfe_equ->mask_depending_vars.super); + if (status != 0) { + nvgpu_err(g, " Failed calling depending_mask_combine for idx1"); + return status; + } + + status = vfe_equ_node_depending_mask_combine(g, pboardobjgrp, + pequ_cmp->equ_idx_false, + &pvfe_equ->mask_depending_vars.super); + if (status != 0) { + nvgpu_err(g, " Failed calling depending_mask_combine for idx1"); + return status; + } + + return status; +} + +static int vfe_equ_build_depending_mask_quad(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct vfe_equ *pvfe_equ) +{ + return vfe_equ_build_depending_mask_super(g, pvfe_equ); +} + +static int vfe_equ_build_depending_mask_equ_scalar(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct vfe_equ *pvfe_equ) +{ + struct vfe_equ_scalar *pequ_escalar = + (struct vfe_equ_scalar *)(void *)pvfe_equ; + int status; + + status = vfe_equ_build_depending_mask_super(g, pvfe_equ); + if (status != 0) { + nvgpu_err(g, " Failed calling depending_mask_super"); + return status; + } + + status = vfe_equ_node_depending_mask_combine(g, pboardobjgrp, + pequ_escalar->equ_idx_to_scale, + &pvfe_equ->mask_depending_vars.super); + if (status != 0) { + nvgpu_err(g, " Failed calling depending_mask_combine for idx1"); + return status; + } + + return status; +} + +static int vfe_equ_dependency_mask_build(struct gk20a *g, + struct vfe_equs *pvfe_equs, struct vfe_vars *pvfe_vars) +{ + int status; + struct vfe_equ *tmp_vfe_equ; + struct vfe_var *tmp_vfe_var; + u8 index_1, index_2; + struct pmu_board_obj *obj_tmp_1 = NULL, *obj_tmp_2 = NULL; + struct boardobjgrp *pboardobjgrp_equ = &(pvfe_equs->super.super); + struct boardobjgrp *pboardobjgrp_var = &(pvfe_vars->super.super); + + /* Initialize mask_depending_vars */ + BOARDOBJGRP_FOR_EACH(pboardobjgrp_equ, struct pmu_board_obj*, + obj_tmp_1, index_1) { + tmp_vfe_equ = (struct vfe_equ *)(void *)obj_tmp_1; + status = tmp_vfe_equ->mask_depending_build(g, pboardobjgrp_equ, + tmp_vfe_equ); + if (status != 0) { + nvgpu_err(g, "failure in calling vfeequ[%d].depmskbld", + index_1); + return status; + } + } + /* Initialize mask_dependent_vars */ + BOARDOBJGRP_FOR_EACH(pboardobjgrp_equ, struct pmu_board_obj*, + obj_tmp_1, index_1) { + tmp_vfe_equ = (struct vfe_equ *)(void *)obj_tmp_1; + BOARDOBJGRP_ITERATOR(pboardobjgrp_var, struct pmu_board_obj*, + obj_tmp_2, index_2, + &tmp_vfe_equ->mask_depending_vars.super) { + tmp_vfe_var = (struct vfe_var *)(void *)obj_tmp_2; + status = nvgpu_boardobjgrpmask_bit_set( + &tmp_vfe_var->mask_dependent_equs.super, + index_1); + if (status != 0) { + nvgpu_err(g, "failing boardobjgrpmask_bit_set"); + return status; + } + } + } + return status; + +} + +static int vfe_equs_pmudatainit(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct nv_pmu_boardobjgrp_super *pboardobjgrppmu) +{ + int status = 0; + + status = boardobjgrp_pmu_data_init_e255(g, pboardobjgrp, pboardobjgrppmu); + if (status != 0) { + nvgpu_err(g, "error updating pmu boardobjgrp for vfe equ 0x%x", + status); + goto done; + } + +done: + return status; +} + +static int vfe_equs_pmudata_instget(struct gk20a *g, + struct nv_pmu_boardobjgrp *pmuboardobjgrp, + struct nv_pmu_boardobj **pmu_obj, + u8 idx) +{ + struct nv_pmu_perf_vfe_equ_boardobj_grp_set *pgrp_set = + (struct nv_pmu_perf_vfe_equ_boardobj_grp_set *)(void *)pmuboardobjgrp; + + /* check whether pmuboardobjgrp has a valid boardobj in index */ + if (idx >= CTRL_BOARDOBJGRP_E255_MAX_OBJECTS) { + return -EINVAL; + } + + *pmu_obj = (struct nv_pmu_boardobj *) + &pgrp_set->objects[idx].data.obj; + nvgpu_log_info(g, " Done"); + return 0; +} + + + +static int vfe_equ_pmudatainit_super(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct vfe_equ *pvfe_equ; + struct nv_pmu_vfe_equ *pset; + + status = pmu_board_obj_pmu_data_init_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pvfe_equ = (struct vfe_equ *)(void *)obj; + + pset = (struct nv_pmu_vfe_equ *)(void *) + pmu_obj; + + pset->var_idx = pvfe_equ->var_idx; + pset->equ_idx_next = pvfe_equ->equ_idx_next; + pset->output_type = pvfe_equ->output_type; + pset->out_range_min = pvfe_equ->out_range_min; + pset->out_range_max = pvfe_equ->out_range_max; + + return status; +} + +static int vfe_equ_construct_super(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct vfe_equ *pvfeequ; + struct vfe_equ *ptmpequ = (struct vfe_equ *)pargs; + int status = 0; + + pvfeequ = nvgpu_kzalloc(g, size); + if (pvfeequ == NULL) { + return -ENOMEM; + } + + status = pmu_board_obj_construct_super(g, + (struct pmu_board_obj *)(void *)pvfeequ, pargs); + if (status != 0) { + return -EINVAL; + } + + *obj = (struct pmu_board_obj *)(void *)pvfeequ; + + status = boardobjgrpmask_e32_init(&pvfeequ->mask_depending_vars, NULL); + pvfeequ->super.pmudatainit = + vfe_equ_pmudatainit_super; + + pvfeequ->var_idx = ptmpequ->var_idx; + pvfeequ->equ_idx_next = ptmpequ->equ_idx_next; + pvfeequ->output_type = ptmpequ->output_type; + pvfeequ->out_range_min = ptmpequ->out_range_min; + pvfeequ->out_range_max = ptmpequ->out_range_max; + + return status; +} + +static int vfe_equ_pmudatainit_compare(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct vfe_equ_compare *pvfe_equ_compare; + struct nv_pmu_vfe_equ_compare *pset; + + status = vfe_equ_pmudatainit_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pvfe_equ_compare = (struct vfe_equ_compare *)(void *)obj; + + pset = (struct nv_pmu_vfe_equ_compare *)(void *)pmu_obj; + + pset->func_id = pvfe_equ_compare->func_id; + pset->equ_idx_true = pvfe_equ_compare->equ_idx_true; + pset->equ_idx_false = pvfe_equ_compare->equ_idx_false; + pset->criteria = pvfe_equ_compare->criteria; + + return status; +} + + +static int vfe_equ_construct_compare(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct vfe_equ_compare *pvfeequ; + struct vfe_equ_compare *ptmpequ = + (struct vfe_equ_compare *)pargs; + int status = 0; + + if (pmu_board_obj_get_type(pargs) != CTRL_PERF_VFE_EQU_TYPE_COMPARE) { + return -EINVAL; + } + + obj_tmp->type_mask |= (u32)BIT(CTRL_PERF_VFE_EQU_TYPE_COMPARE); + status = vfe_equ_construct_super(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pvfeequ = (struct vfe_equ_compare *)(void *)*obj; + pvfeequ->super.mask_depending_build = + vfe_equ_build_depending_mask_compare; + pvfeequ->super.super.pmudatainit = + vfe_equ_pmudatainit_compare; + + pvfeequ->func_id = ptmpequ->func_id; + pvfeequ->equ_idx_true = ptmpequ->equ_idx_true; + pvfeequ->equ_idx_false = ptmpequ->equ_idx_false; + pvfeequ->criteria = ptmpequ->criteria; + + + return status; +} + +static int vfe_equ_pmudatainit_minmax(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct vfe_equ_minmax *pvfe_equ_minmax; + struct nv_pmu_vfe_equ_minmax *pset; + + status = vfe_equ_pmudatainit_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pvfe_equ_minmax = (struct vfe_equ_minmax *)(void *)obj; + + pset = (struct nv_pmu_vfe_equ_minmax *)(void *) + pmu_obj; + + pset->b_max = pvfe_equ_minmax->b_max; + pset->equ_idx0 = pvfe_equ_minmax->equ_idx0; + pset->equ_idx1 = pvfe_equ_minmax->equ_idx1; + + return status; +} + +static int vfe_equ_construct_minmax(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct vfe_equ_minmax *pvfeequ; + struct vfe_equ_minmax *ptmpequ = + (struct vfe_equ_minmax *)pargs; + int status = 0; + + if (pmu_board_obj_get_type(pargs) != CTRL_PERF_VFE_EQU_TYPE_MINMAX) { + return -EINVAL; + } + + obj_tmp->type_mask |= (u32)BIT(CTRL_PERF_VFE_EQU_TYPE_MINMAX); + status = vfe_equ_construct_super(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pvfeequ = (struct vfe_equ_minmax *)(void *)*obj; + pvfeequ->super.mask_depending_build = + vfe_equ_build_depending_mask_minmax; + pvfeequ->super.super.pmudatainit = + vfe_equ_pmudatainit_minmax; + pvfeequ->b_max = ptmpequ->b_max; + pvfeequ->equ_idx0 = ptmpequ->equ_idx0; + pvfeequ->equ_idx1 = ptmpequ->equ_idx1; + + return status; +} + +static int vfe_equ_pmudatainit_quadratic(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct vfe_equ_quadratic *pvfe_equ_quadratic; + struct nv_pmu_vfe_equ_quadratic *pset; + u32 i; + + status = vfe_equ_pmudatainit_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pvfe_equ_quadratic = (struct vfe_equ_quadratic *)(void *)obj; + + pset = (struct nv_pmu_vfe_equ_quadratic *)(void *)pmu_obj; + + for (i = 0; i < CTRL_PERF_VFE_EQU_QUADRATIC_COEFF_COUNT; i++) { + pset->coeffs[i] = pvfe_equ_quadratic->coeffs[i]; + } + + return status; +} + +static int vfe_equ_construct_quadratic(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct vfe_equ_quadratic *pvfeequ; + struct vfe_equ_quadratic *ptmpequ = + (struct vfe_equ_quadratic *)pargs; + int status = 0; + u32 i; + + if (pmu_board_obj_get_type(pargs) != CTRL_PERF_VFE_EQU_TYPE_QUADRATIC) { + return -EINVAL; + } + + obj_tmp->type_mask |= (u32)BIT(CTRL_PERF_VFE_EQU_TYPE_QUADRATIC); + status = vfe_equ_construct_super(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pvfeequ = (struct vfe_equ_quadratic *)(void *)*obj; + pvfeequ->super.mask_depending_build = + vfe_equ_build_depending_mask_quad; + + pvfeequ->super.super.pmudatainit = + vfe_equ_pmudatainit_quadratic; + + for (i = 0; i < CTRL_PERF_VFE_EQU_QUADRATIC_COEFF_COUNT; i++) { + pvfeequ->coeffs[i] = ptmpequ->coeffs[i]; + } + + return status; +} + +static int vfe_equ_pmudatainit_scalar(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct vfe_equ_scalar *pvfe_equ_scalar; + struct nv_pmu_vfe_equ_scalar *pset; + + status = vfe_equ_pmudatainit_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pvfe_equ_scalar = (struct vfe_equ_scalar *)(void *)obj; + + pset = (struct nv_pmu_vfe_equ_scalar *)(void *) + pmu_obj; + + pset->equ_idx_to_scale = pvfe_equ_scalar->equ_idx_to_scale; + + return status; +} + +static int vfe_equ_construct_scalar(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct vfe_equ_scalar *pvfeequ; + struct vfe_equ_scalar *ptmpequ = + (struct vfe_equ_scalar *)pargs; + int status = 0; + + if (pmu_board_obj_get_type(pargs) != CTRL_PERF_VFE_EQU_TYPE_SCALAR) { + return -EINVAL; + } + + obj_tmp->type_mask |= (u32)BIT(CTRL_PERF_VFE_EQU_TYPE_SCALAR); + status = vfe_equ_construct_super(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pvfeequ = (struct vfe_equ_scalar *)(void *)*obj; + pvfeequ->super.mask_depending_build = + vfe_equ_build_depending_mask_equ_scalar; + + pvfeequ->super.super.pmudatainit = + vfe_equ_pmudatainit_scalar; + + pvfeequ->equ_idx_to_scale = ptmpequ->equ_idx_to_scale; + + return status; +} + +static struct vfe_equ *construct_vfe_equ(struct gk20a *g, void *pargs) +{ + struct pmu_board_obj *obj = NULL; + int status; + + switch (pmu_board_obj_get_type(pargs)) { + case CTRL_PERF_VFE_EQU_TYPE_COMPARE: + status = vfe_equ_construct_compare(g, &obj, + sizeof(struct vfe_equ_compare), pargs); + break; + + case CTRL_PERF_VFE_EQU_TYPE_MINMAX: + status = vfe_equ_construct_minmax(g, &obj, + sizeof(struct vfe_equ_minmax), pargs); + break; + + case CTRL_PERF_VFE_EQU_TYPE_QUADRATIC: + status = vfe_equ_construct_quadratic(g, &obj, + sizeof(struct vfe_equ_quadratic), pargs); + break; + + case CTRL_PERF_VFE_EQU_TYPE_SCALAR: + status = vfe_equ_construct_scalar(g, &obj, + sizeof(struct vfe_equ_scalar), pargs); + break; + + default: + status = -EINVAL; + break; + } + + if (status != 0) { + return NULL; + } + + nvgpu_log_info(g, " Done"); + + return (struct vfe_equ *)(void *)obj; +} + +static int devinit_get_vfe_equ_table(struct gk20a *g, + struct vfe_equs *pvfeequobjs) +{ + int status = 0; + u8 *vfeequs_tbl_ptr = NULL; + struct vbios_vfe_3x_header_struct vfeequs_tbl_header = { 0 }; + struct vbios_vfe_3x_equ_entry_struct equ = { 0 }; + u8 *vfeequs_tbl_entry_ptr = NULL; + u8 *rd_offset_ptr = NULL; + u32 index = 0; + struct vfe_equ *pequ; + u8 equ_type = 0; + u32 szfmt; + bool done = false; + u32 hdrszfmt = 0; + union { + struct pmu_board_obj obj; + struct vfe_equ super; + struct vfe_equ_compare compare; + struct vfe_equ_minmax minmax; + struct vfe_equ_quadratic quadratic; + struct vfe_equ_scalar scalar; + } equ_data; + + vfeequs_tbl_ptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_PERF_TOKEN), + CONTINUOUS_VIRTUAL_BINNING_TABLE); + + if (vfeequs_tbl_ptr == NULL) { + status = -EINVAL; + goto done; + } + + nvgpu_memcpy((u8 *)&vfeequs_tbl_header, vfeequs_tbl_ptr, + VBIOS_VFE_3X_HEADER_SIZE_09); + if (vfeequs_tbl_header.header_size == VBIOS_VFE_3X_HEADER_SIZE_09) { + hdrszfmt = VBIOS_VFE_3X_HEADER_SIZE_09; + nvgpu_memcpy((u8 *)&vfeequs_tbl_header, vfeequs_tbl_ptr, hdrszfmt); + } else { + nvgpu_err(g, "Invalid VFE Table Header size\n"); + status = -EINVAL; + goto done; + } + + if (vfeequs_tbl_header.vfe_equ_entry_size == + VBIOS_VFE_3X_EQU_ENTRY_SIZE_18) { + szfmt = VBIOS_VFE_3X_EQU_ENTRY_SIZE_18; + } else { + nvgpu_err(g, "Invalid VFE EQU entry size\n"); + status = -EINVAL; + goto done; + } + + vfeequs_tbl_entry_ptr = vfeequs_tbl_ptr + hdrszfmt + + (vfeequs_tbl_header.vfe_var_entry_count * + vfeequs_tbl_header.vfe_var_entry_size); + + for (index = 0; + index < vfeequs_tbl_header.vfe_equ_entry_count; + index++) { + (void) memset(&equ, 0, + sizeof(struct vbios_vfe_3x_equ_entry_struct)); + + rd_offset_ptr = vfeequs_tbl_entry_ptr + + (index * vfeequs_tbl_header.vfe_equ_entry_size); + + nvgpu_memcpy((u8 *)&equ, rd_offset_ptr, szfmt); + + equ_data.super.var_idx = (u8)equ.var_idx; + equ_data.super.equ_idx_next = + (equ.equ_idx_next == VBIOS_VFE_3X_EQU_ENTRY_IDX_INVALID) ? + CTRL_BOARDOBJ_IDX_INVALID : (u8)equ.equ_idx_next; + equ_data.super.out_range_min = equ.out_range_min; + equ_data.super.out_range_max = equ.out_range_max; + + switch (BIOS_GET_FIELD(u32, equ.param3, + VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE)) { + case VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_UNITLESS: + equ_data.super.output_type = + (u8)CTRL_PERF_VFE_EQU_OUTPUT_TYPE_UNITLESS; + break; + + case VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_FREQ_MHZ: + equ_data.super.output_type = + (u8)CTRL_PERF_VFE_EQU_OUTPUT_TYPE_FREQ_MHZ; + break; + + case VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_VOLT_UV: + equ_data.super.output_type = + (u8)CTRL_PERF_VFE_EQU_OUTPUT_TYPE_VOLT_UV; + break; + + case VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_VF_GAIN: + equ_data.super.output_type = + (u8)CTRL_PERF_VFE_EQU_OUTPUT_TYPE_VF_GAIN; + break; + + case VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_VOLT_DELTA_UV: + equ_data.super.output_type = + (u8)CTRL_PERF_VFE_EQU_OUTPUT_TYPE_VOLT_DELTA_UV; + break; + + case VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_WORK_TYPE: + equ_data.super.output_type = + (u8)CTRL_PERF_VFE_EQU_OUTPUT_TYPE_WORK_TYPE; + break; + + case VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_UTIL_RATIO: + equ_data.super.output_type = + (u8)CTRL_PERF_VFE_EQU_OUTPUT_TYPE_UTIL_RATIO; + break; + + case VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_WORK_FB_NORM: + equ_data.super.output_type = + (u8)CTRL_PERF_VFE_EQU_OUTPUT_TYPE_WORK_FB_NORM; + break; + + case VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_POWER_MW: + equ_data.super.output_type = + (u8)CTRL_PERF_VFE_EQU_OUTPUT_TYPE_POWER_MW; + break; + + case VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_PWR_OVER_UTIL_SLOPE: + equ_data.super.output_type = + (u8)CTRL_PERF_VFE_EQU_OUTPUT_TYPE_PWR_OVER_UTIL_SLOPE; + break; + + case VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_VIN_CODE: + equ_data.super.output_type = + (u8)CTRL_PERF_VFE_EQU_OUTPUT_TYPE_VIN_CODE; + break; + case VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_THRESHOLD: + equ_data.super.output_type = + (u8)VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_THRESHOLD; + break; + + default: + nvgpu_err(g, "unrecognized output id @vfeequ index %d", + index); + done = true; + break; + } + /* + * Previously we were doing "goto done" from the default case of + * the switch-case block above. MISRA however, gets upset about + * this because it wants a break statement in the default case. + * That's why we had to move the goto statement outside of the + * switch-case block. + */ + if (done) { + goto done; + } + + switch ((u8)equ.type) { + case VBIOS_VFE_3X_EQU_ENTRY_TYPE_DISABLED: + case VBIOS_VFE_3X_EQU_ENTRY_TYPE_QUADRATIC_FXP: + case VBIOS_VFE_3X_EQU_ENTRY_TYPE_MINMAX_FXP: + continue; + break; + + case VBIOS_VFE_3X_EQU_ENTRY_TYPE_QUADRATIC: + equ_type = (u8)CTRL_PERF_VFE_EQU_TYPE_QUADRATIC; + equ_data.quadratic.coeffs[0] = equ.param0; + equ_data.quadratic.coeffs[1] = equ.param1; + equ_data.quadratic.coeffs[2] = equ.param2; + break; + + case VBIOS_VFE_3X_EQU_ENTRY_TYPE_MINMAX: + equ_type = (u8)CTRL_PERF_VFE_EQU_TYPE_MINMAX; + equ_data.minmax.b_max = BIOS_GET_FIELD(bool, equ.param0, + VBIOS_VFE_3X_EQU_ENTRY_PAR0_MINMAX_CRIT) && + (VBIOS_VFE_3X_EQU_ENTRY_PAR0_MINMAX_CRIT_MAX != 0U); + equ_data.minmax.equ_idx0 = BIOS_GET_FIELD(u8, + equ.param0, + VBIOS_VFE_3X_EQU_ENTRY_PAR0_MINMAX_VFE_EQU_IDX_0); + equ_data.minmax.equ_idx1 = BIOS_GET_FIELD(u8, + equ.param0, + VBIOS_VFE_3X_EQU_ENTRY_PAR0_MINMAX_VFE_EQU_IDX_1); + break; + + case VBIOS_VFE_3X_EQU_ENTRY_TYPE_COMPARE: + { + u8 cmp_func = BIOS_GET_FIELD(u8, equ.param1, + VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_FUNCTION); + equ_type = (u8)CTRL_PERF_VFE_EQU_TYPE_COMPARE; + + switch (cmp_func) { + case VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_FUNCTION_EQUAL: + equ_data.compare.func_id = + (u8)CTRL_PERF_VFE_EQU_COMPARE_FUNCTION_EQUAL; + break; + + case VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_FUNCTION_GREATER_EQ: + equ_data.compare.func_id = + (u8)CTRL_PERF_VFE_EQU_COMPARE_FUNCTION_GREATER_EQ; + break; + case VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_FUNCTION_GREATER: + equ_data.compare.func_id = + (u8)CTRL_PERF_VFE_EQU_COMPARE_FUNCTION_GREATER; + break; + default: + nvgpu_err(g, + "invalid vfe compare index %x type %x ", + index, cmp_func); + status = -EINVAL; + break; + } + if (status != 0) { + goto done; + } + equ_data.compare.equ_idx_true = BIOS_GET_FIELD(u8, + equ.param1, + VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_VFE_EQU_IDX_TRUE); + equ_data.compare.equ_idx_false = BIOS_GET_FIELD(u8, + equ.param1, + VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_VFE_EQU_IDX_FALSE); + equ_data.compare.criteria = equ.param0; + break; + } + + case VBIOS_VFE_3X_EQU_ENTRY_TYPE_EQUATION_SCALAR: + { + equ_type = (u8)CTRL_PERF_VFE_EQU_TYPE_SCALAR; + equ_data.scalar.equ_idx_to_scale = + BIOS_GET_FIELD(u8, equ.param0, + VBIOS_VFE_3X_EQU_ENTRY_PAR0_EQUATION_SCALAR_IDX_TO_SCALE); + break; + } + + default: + status = -EINVAL; + nvgpu_err(g, "Invalid equ[%d].type = 0x%x.", + index, (u8)equ.type); + break; + } + if (status != 0) { + goto done; + } + + equ_data.obj.type = equ_type; + pequ = construct_vfe_equ(g, (void *)&equ_data); + + if (pequ == NULL) { + nvgpu_err(g, + "error constructing vfe_equ boardobj %d", index); + status = -EINVAL; + goto done; + } + + status = boardobjgrp_objinsert(&pvfeequobjs->super.super, + (struct pmu_board_obj *)pequ, index); + if (status != 0) { + nvgpu_err(g, "error adding vfe_equ boardobj %d", index); + status = -EINVAL; + goto done; + } + } +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +int perf_vfe_equ_sw_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + struct vfe_equs *pvfeequobjs; + struct vfe_vars *pvfevarobjs; + + status = nvgpu_boardobjgrp_construct_e255(g, + &g->pmu->perf_pmu->vfe_equobjs.super); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for clk domain, " + "status - 0x%x", status); + goto done; + } + + pboardobjgrp = &g->pmu->perf_pmu->vfe_equobjs.super.super; + pvfeequobjs = &(g->pmu->perf_pmu->vfe_equobjs); + pvfevarobjs = &(g->pmu->perf_pmu->vfe_varobjs); + + BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, PERF, VFE_EQU); + + status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp, + perf, PERF, vfe_equ, VFE_EQU); + if (status != 0) { + nvgpu_err(g, + "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x", + status); + goto done; + } + + pboardobjgrp->pmudatainit = vfe_equs_pmudatainit; + pboardobjgrp->pmudatainstget = vfe_equs_pmudata_instget; + + status = devinit_get_vfe_equ_table(g, pvfeequobjs); + if (status != 0) { + goto done; + } + + status = vfe_equ_dependency_mask_build(g, pvfeequobjs, pvfevarobjs); + if (status != 0) { + goto done; + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +int perf_vfe_equ_pmu_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + + pboardobjgrp = &g->pmu->perf_pmu->vfe_equobjs.super.super; + + if (!pboardobjgrp->bconstructed) { + return -EINVAL; + } + + status = pboardobjgrp->pmuinithandle(g, pboardobjgrp); + + nvgpu_log_info(g, "Done"); + return status; +} + +int nvgpu_pmu_perf_vfe_get_volt_margin(struct gk20a *g, u32 *vmargin_uv) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct nv_pmu_rpc_struct_perf_vfe_eval rpc; + int status = 0; + u8 vmargin_idx; + + vmargin_idx = nvgpu_pmu_volt_get_vmargin_ps35(g); + if (vmargin_idx == 0U) { + return 0; + } + + (void) memset(&rpc, 0, sizeof(rpc)); + rpc.data.equ_idx = vmargin_idx; + rpc.data.output_type = CTRL_PERF_VFE_EQU_OUTPUT_TYPE_VOLT_DELTA_UV; + rpc.data.var_count = 0U; + PMU_RPC_EXECUTE_CPB(status, pmu, PERF, VFE_EQU_EVAL, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute RPC status=0x%x", + status); + return status; + } + + *vmargin_uv = rpc.data.result.voltu_v; + return status; +} + +int nvgpu_pmu_perf_vfe_get_freq_margin(struct gk20a *g, u32 *fmargin_mhz) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct nv_pmu_rpc_struct_perf_vfe_eval rpc; + int status = 0; + u8 fmargin_idx; + + fmargin_idx = nvgpu_pmu_clk_fll_get_fmargin_idx(g); + if (fmargin_idx == 0U) { + return 0; + } + + (void) memset(&rpc, 0, sizeof(rpc)); + rpc.data.equ_idx = fmargin_idx; + rpc.data.output_type = CTRL_PERF_VFE_EQU_OUTPUT_TYPE_FREQ_MHZ; + rpc.data.var_count = 0U; + PMU_RPC_EXECUTE_CPB(status, pmu, PERF, VFE_EQU_EVAL, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute RPC status=0x%x", + status); + return status; + } + + *fmargin_mhz = rpc.data.result.voltu_v; + return status; +} diff --git a/drivers/gpu/nvgpu/common/pmu/perf/vfe_equ.h b/drivers/gpu/nvgpu/common/pmu/perf/vfe_equ.h new file mode 100644 index 000000000..6e429de33 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perf/vfe_equ.h @@ -0,0 +1,75 @@ +/* + * general perf structures & definitions + * + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_PERF_VFE_EQU_H +#define NVGPU_PERF_VFE_EQU_H + +#include "ucode_perf_vfe_inf.h" + +struct vfe_equs { + struct boardobjgrp_e255 super; +}; + +struct vfe_equ { + struct pmu_board_obj super; + u8 var_idx; + u8 equ_idx_next; + u8 output_type; + u32 out_range_min; + u32 out_range_max; + struct boardobjgrpmask_e32 mask_depending_vars; + int (*mask_depending_build)(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct vfe_equ *pvfe_equ); + bool b_is_dynamic_valid; + bool b_is_dynamic; +}; + +struct vfe_equ_compare { + struct vfe_equ super; + u8 func_id; + u8 equ_idx_true; + u8 equ_idx_false; + u32 criteria; +}; + +struct vfe_equ_minmax { + struct vfe_equ super; + bool b_max; + u8 equ_idx0; + u8 equ_idx1; +}; + +struct vfe_equ_quadratic { + struct vfe_equ super; + u32 coeffs[CTRL_PERF_VFE_EQU_QUADRATIC_COEFF_COUNT]; +}; + +struct vfe_equ_scalar { + struct vfe_equ super; + u8 equ_idx_to_scale; +}; + +int perf_vfe_equ_sw_setup(struct gk20a *g); +int perf_vfe_equ_pmu_setup(struct gk20a *g); +#endif /* NVGPU_PERF_VFE_EQU_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/perf/vfe_var.c b/drivers/gpu/nvgpu/common/pmu/perf/vfe_var.c new file mode 100644 index 000000000..2c45dd71f --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perf/vfe_var.c @@ -0,0 +1,1433 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ucode_perf_vfe_inf.h" +#include "vfe_var.h" +#include "perf.h" + +static int vfe_vars_pmudatainit(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct nv_pmu_boardobjgrp_super *pboardobjgrppmu) +{ + struct nv_pmu_perf_vfe_var_boardobjgrp_set_header *pset = + (struct nv_pmu_perf_vfe_var_boardobjgrp_set_header *)(void *) + pboardobjgrppmu; + struct vfe_vars *pvars = (struct vfe_vars *)(void *)pboardobjgrp; + int status = 0; + + status = boardobjgrp_pmudatainit_e32(g, pboardobjgrp, pboardobjgrppmu); + if (status != 0) { + nvgpu_err(g, + "error updating pmu boardobjgrp for vfe var 0x%x", + status); + goto done; + } + pset->polling_periodms = pvars->polling_periodms; + +done: + return status; +} + +static int vfe_vars_pmudata_instget(struct gk20a *g, + struct nv_pmu_boardobjgrp *pmuboardobjgrp, + struct nv_pmu_boardobj **pmu_obj, + u8 idx) +{ + struct nv_pmu_perf_vfe_var_boardobj_grp_set *pgrp_set = + (struct nv_pmu_perf_vfe_var_boardobj_grp_set *)(void *) + pmuboardobjgrp; + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (idx >= CTRL_BOARDOBJGRP_E32_MAX_OBJECTS) { + return -EINVAL; + } + + *pmu_obj = (struct nv_pmu_boardobj *) + &pgrp_set->objects[idx].data.obj; + + nvgpu_log_info(g, " Done"); + return 0; +} + +static int vfe_vars_pmustatus_instget(struct gk20a *g, void *pboardobjgrppmu, + struct nv_pmu_boardobj_query **obj_pmu_status, u8 idx) +{ + struct nv_pmu_perf_vfe_var_boardobj_grp_get_status *pgrp_get_status = + (struct nv_pmu_perf_vfe_var_boardobj_grp_get_status *) + pboardobjgrppmu; + + if (((u32)BIT(idx) & + pgrp_get_status->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *obj_pmu_status = (struct nv_pmu_boardobj_query *) + &pgrp_get_status->objects[idx].data.obj; + return 0; +} + +static int vfe_var_get_s_param_value(struct gk20a *g, + struct vfe_var_single_sensed_fuse *fuse_value, + struct nv_pmu_boardobj *pmu_obj) +{ + struct nv_pmu_perf_vfe_var_single_sensed_fuse_get_status *pstatus; + pstatus = (struct nv_pmu_perf_vfe_var_single_sensed_fuse_get_status *) + (void *)pmu_obj; + + if (pstatus->super.obj.type != + fuse_value->super.super.super.super.type) { + nvgpu_err(g, "pmu data and boardobj type not matching"); + return -EINVAL; + } + + if (pstatus->fuse_value_integer.b_signed) { + fuse_value->b_fuse_value_signed = + pstatus->fuse_value_integer.b_signed; + fuse_value->fuse_value_integer = + (u32)pstatus->fuse_value_integer.data.signed_value; + fuse_value->fuse_value_hw_integer = + (u32)pstatus->fuse_value_hw_integer.data.signed_value; + } else { + fuse_value->b_fuse_value_signed = + pstatus->fuse_value_integer.b_signed; + fuse_value->fuse_value_integer = + pstatus->fuse_value_integer.data.unsigned_value; + fuse_value->fuse_value_hw_integer = + pstatus->fuse_value_hw_integer.data.unsigned_value; + } + return 0; +} + +static int vfe_var_dependency_mask_build(struct gk20a *g, + struct vfe_vars *pvfe_vars) +{ + int status; + u8 index_1 = 0, index_2 = 0; + struct vfe_var *tmp_vfe_var_1 = NULL, *tmp_vfe_var_2 = NULL; + struct pmu_board_obj *obj_tmp_1 = NULL, *obj_tmp_2 = NULL; + struct boardobjgrp *pboardobjgrp = &(pvfe_vars->super.super); + + /* Initialize mask_depending_vars */ + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pmu_board_obj*, + obj_tmp_1, index_1) { + tmp_vfe_var_1 = (struct vfe_var *)(void *)obj_tmp_1; + status = tmp_vfe_var_1->mask_depending_build(g, pboardobjgrp, + tmp_vfe_var_1); + if (status != 0) { + nvgpu_err(g, "failure in calling vfevar[%d].depmskbld", + index_1); + return status; + } + } + /* Initialize mask_dependent_vars */ + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pmu_board_obj*, + obj_tmp_1, index_1) { + tmp_vfe_var_1 = (struct vfe_var *)(void *)obj_tmp_1; + BOARDOBJGRP_ITERATOR(pboardobjgrp, struct pmu_board_obj*, + obj_tmp_2, index_2, + &tmp_vfe_var_1->mask_depending_vars.super) { + tmp_vfe_var_2 = (struct vfe_var *)(void *)obj_tmp_2; + status = nvgpu_boardobjgrpmask_bit_set( + &tmp_vfe_var_2->mask_dependent_vars.super, + index_1); + if (status != 0) { + nvgpu_err(g, "failing boardobjgrpmask_bit_set"); + return status; + } + } + } + return status; +} + +static int dev_init_get_vfield_info(struct gk20a *g, + struct vfe_var_single_sensed_fuse *pvfevar) +{ + u8 *vfieldtableptr = NULL; + u32 vfieldheadersize = VFIELD_HEADER_SIZE; + u8 *vfieldregtableptr = NULL; + u32 vfieldregheadersize = VFIELD_REG_HEADER_SIZE; + u32 i; + u32 oldindex = 0xFFFFFFFFU; + u32 currindex; + struct vfield_reg_header vregheader; + struct vfield_reg_entry vregentry; + struct vfield_header vheader; + struct vfield_entry ventry; + struct ctrl_bios_vfield_register_segment *psegment = NULL; + u8 *psegmentcount = NULL; + int status = 0; + + vfieldregtableptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_VIRT_TOKEN), + VP_FIELD_REGISTER); + if (vfieldregtableptr == NULL) { + status = -EINVAL; + goto done; + } + + vfieldtableptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_VIRT_TOKEN), + VP_FIELD_TABLE); + if (vfieldtableptr == NULL) { + status = -EINVAL; + goto done; + } + + nvgpu_memcpy((u8 *)&vregheader, vfieldregtableptr, + VFIELD_REG_HEADER_SIZE); + + if (vregheader.version != VBIOS_VFIELD_REG_TABLE_VERSION_1_0) { + nvgpu_err(g, "invalid vreg header version"); + goto done; + } + + nvgpu_memcpy((u8 *)&vheader, vfieldtableptr, VFIELD_HEADER_SIZE); + + if (vregheader.version != VBIOS_VFIELD_TABLE_VERSION_1_0) { + nvgpu_err(g, "invalid vfield header version"); + goto done; + } + + pvfevar->vfield_info.fuse.segment_count = 0; + pvfevar->vfield_ver_info.fuse.segment_count = 0; + for (i = 0; i < (u32)vheader.count; i++) { + nvgpu_memcpy((u8 *)&ventry, vfieldtableptr + vfieldheadersize + + (i * vheader.entry_size), + vheader.entry_size); + + currindex = U32(VFIELD_BIT_REG(ventry)); + if (currindex != oldindex) { + + nvgpu_memcpy((u8 *)&vregentry, vfieldregtableptr + + vfieldregheadersize + + (currindex * vregheader.entry_size), + vregheader.entry_size); + oldindex = currindex; + } + + if (pvfevar->vfield_info.v_field_id == ventry.strap_id) { + psegmentcount = + &(pvfevar->vfield_info.fuse.segment_count); + psegment = + &(pvfevar->vfield_info.fuse.segments[*psegmentcount]); + if (*psegmentcount > NV_PMU_VFE_VAR_SINGLE_SENSED_FUSE_SEGMENTS_MAX) { + status = -EINVAL; + goto done; + } + } else if (pvfevar->vfield_ver_info.v_field_id_ver == ventry.strap_id) { + psegmentcount = + &(pvfevar->vfield_ver_info.fuse.segment_count); + psegment = + &(pvfevar->vfield_ver_info.fuse.segments[*psegmentcount]); + if (*psegmentcount > NV_PMU_VFE_VAR_SINGLE_SENSED_FUSE_SEGMENTS_MAX) { + status = -EINVAL; + goto done; + } + } else { + continue; + } + + switch (VFIELD_CODE((&vregentry))) { + case NV_VFIELD_DESC_CODE_REG: + psegment->type = + NV_PMU_BIOS_VFIELD_DESC_CODE_REG; + psegment->data.reg.addr = vregentry.reg; + psegment->data.reg.super.high_bit = (u8)(VFIELD_BIT_STOP(ventry)); + psegment->data.reg.super.low_bit = (u8)(VFIELD_BIT_START(ventry)); + break; + + case NV_VFIELD_DESC_CODE_INDEX_REG: + psegment->type = + NV_PMU_BIOS_VFIELD_DESC_CODE_INDEX_REG; + psegment->data.index_reg.addr = vregentry.reg; + psegment->data.index_reg.index = vregentry.index; + psegment->data.index_reg.reg_index = vregentry.reg_index; + psegment->data.index_reg.super.high_bit = (u8)(VFIELD_BIT_STOP(ventry)); + psegment->data.index_reg.super.low_bit = (u8)(VFIELD_BIT_START(ventry)); + break; + + default: + psegment->type = + NV_PMU_BIOS_VFIELD_DESC_CODE_INVALID; + status = -EINVAL; + break; + } + if (status != 0) { + goto done; + } + + if (VFIELD_SIZE((&vregentry)) != NV_VFIELD_DESC_SIZE_DWORD) { + psegment->type = + NV_PMU_BIOS_VFIELD_DESC_CODE_INVALID; + return -EINVAL; + } + (*psegmentcount)++; + } + +done: + return status; +} + +static int vfe_var_pmudatainit_super(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct vfe_var *pvfe_var; + struct nv_pmu_vfe_var *pset; + + nvgpu_log_info(g, " "); + + status = pmu_board_obj_pmu_data_init_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pvfe_var = (struct vfe_var *)(void *)obj; + pset = (struct nv_pmu_vfe_var *)(void *)pmu_obj; + + pset->out_range_min = pvfe_var->out_range_min; + pset->out_range_max = pvfe_var->out_range_max; + status = nvgpu_boardobjgrpmask_export(&pvfe_var-> + mask_dependent_vars.super, + pvfe_var->mask_dependent_vars.super.bitcount, + &pset->mask_dependent_vars.super); + status = nvgpu_boardobjgrpmask_export(&pvfe_var-> + mask_dependent_equs.super, + pvfe_var->mask_dependent_equs.super.bitcount, + &pset->mask_dependent_equs.super); + return status; +} + +static int vfe_var_build_depending_mask_null(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct vfe_var *pvfe_var) +{ + /* Individual vfe_var members should over_ride this with their */ + /* respective function types */ + return -EINVAL; +} + +static int vfe_var_construct_super(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct vfe_var *pvfevar; + struct vfe_var *ptmpvar = (struct vfe_var *)pargs; + int status; + + pvfevar = nvgpu_kzalloc(g, size); + if (pvfevar == NULL) { + return -ENOMEM; + } + + status = pmu_board_obj_construct_super(g, + (struct pmu_board_obj *)(void *)pvfevar, pargs); + if (status != 0) { + return -EINVAL; + } + + *obj = (struct pmu_board_obj *)(void *)pvfevar; + + pvfevar->super.pmudatainit = + vfe_var_pmudatainit_super; + + pvfevar->out_range_min = ptmpvar->out_range_min; + pvfevar->out_range_max = ptmpvar->out_range_max; + pvfevar->b_is_dynamic_valid = false; + pvfevar->mask_depending_build = vfe_var_build_depending_mask_null; + + status = boardobjgrpmask_e32_init(&pvfevar->mask_depending_vars, NULL); + if (status != 0) { + return -EINVAL; + } + status = boardobjgrpmask_e32_init(&pvfevar->mask_dependent_vars, NULL); + if (status != 0) { + return -EINVAL; + } + status = boardobjgrpmask_e255_init(&pvfevar->mask_dependent_equs, NULL); + if (status != 0) { + return -EINVAL; + } + nvgpu_log_info(g, " "); + + return status; +} + +static int vfe_var_pmudatainit_derived(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + return vfe_var_pmudatainit_super(g, obj, pmu_obj); +} + +static int vfe_var_construct_derived(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + int status = 0; + struct vfe_var_derived *pvfevar; + + obj_tmp->type_mask |= (u32)BIT(CTRL_PERF_VFE_VAR_TYPE_DERIVED); + status = vfe_var_construct_super(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pvfevar = (struct vfe_var_derived *)(void *)*obj; + + pvfevar->super.super.pmudatainit = + vfe_var_pmudatainit_derived; + + return status; +} + +static int vfe_var_pmudatainit_derived_product(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct vfe_var_derived_product *pvfe_var_derived_product; + struct nv_pmu_vfe_var_derived_product *pset; + + nvgpu_log_info(g, " "); + + status = vfe_var_pmudatainit_derived(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pvfe_var_derived_product = + (struct vfe_var_derived_product *)(void *)obj; + pset = (struct nv_pmu_vfe_var_derived_product *)(void *)pmu_obj; + + pset->var_idx0 = pvfe_var_derived_product->var_idx0; + pset->var_idx1 = pvfe_var_derived_product->var_idx1; + + return status; +} + +static int vfe_var_build_depending_mask_derived_product(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct vfe_var *pvfe_var) +{ + struct vfe_var_derived_product *pvar_dp = + (struct vfe_var_derived_product *)(void *)pvfe_var; + int status; + struct vfe_var *var0, *var1; + + var0 = (struct vfe_var *)(void *)BOARDOBJGRP_OBJ_GET_BY_IDX( + pboardobjgrp, pvar_dp->var_idx0); + status = var0->mask_depending_build(g, pboardobjgrp, var0); + if (status != 0) { + nvgpu_err(g, " Failed calling vfevar[%d].mask_depending_build", + pvar_dp->var_idx0); + return status; + } + + var1 = (struct vfe_var *)BOARDOBJGRP_OBJ_GET_BY_IDX( + pboardobjgrp, + pvar_dp->var_idx1); + status = var1->mask_depending_build(g, pboardobjgrp, var1); + if (status != 0) { + nvgpu_err(g, " Failed calling vfevar[%d].mask_depending_build", + pvar_dp->var_idx1); + return status; + } + + status = nvgpu_boardobjmask_or(&(pvfe_var->mask_depending_vars.super), + &(var0->mask_depending_vars.super), + &(var1->mask_depending_vars.super)); + + return status; +} + +static int vfe_var_construct_derived_product(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct vfe_var_derived_product *pvfevar; + struct vfe_var_derived_product *ptmpvar = + (struct vfe_var_derived_product *)pargs; + int status = 0; + + if (pmu_board_obj_get_type(pargs) != CTRL_PERF_VFE_VAR_TYPE_DERIVED_PRODUCT) { + return -EINVAL; + } + + obj_tmp->type_mask |= (u32)BIT(CTRL_PERF_VFE_VAR_TYPE_DERIVED_PRODUCT); + status = vfe_var_construct_derived(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pvfevar = (struct vfe_var_derived_product *)(void *)*obj; + pvfevar->super.super.mask_depending_build = + vfe_var_build_depending_mask_derived_product; + pvfevar->super.super.super.pmudatainit = + vfe_var_pmudatainit_derived_product; + + pvfevar->var_idx0 = ptmpvar->var_idx0; + pvfevar->var_idx1 = ptmpvar->var_idx1; + + + return status; +} + +static int vfe_var_pmudatainit_derived_sum(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct vfe_var_derived_sum *pvfe_var_derived_sum; + struct nv_pmu_vfe_var_derived_sum *pset; + + status = vfe_var_pmudatainit_derived(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pvfe_var_derived_sum = (struct vfe_var_derived_sum *) + (void *)obj; + pset = (struct nv_pmu_vfe_var_derived_sum *)(void *)pmu_obj; + + pset->var_idx0 = pvfe_var_derived_sum->var_idx0; + pset->var_idx1 = pvfe_var_derived_sum->var_idx1; + + return status; +} + +static int vfe_var_build_depending_mask_derived_sum(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct vfe_var *pvfe_var) +{ + struct vfe_var_derived_sum *pvar_dsum = + (struct vfe_var_derived_sum *)(void *)pvfe_var; + int status; + struct vfe_var *var0, *var1; + + var0 = (struct vfe_var *)BOARDOBJGRP_OBJ_GET_BY_IDX( + pboardobjgrp, pvar_dsum->var_idx0); + status = var0->mask_depending_build(g, pboardobjgrp, var0); + if (status != 0) { + nvgpu_err(g, " Failed calling vfevar[%d].mask_depending_build", + pvar_dsum->var_idx0); + return status; + } + + var1 = (struct vfe_var *)(void *)BOARDOBJGRP_OBJ_GET_BY_IDX( + pboardobjgrp, + pvar_dsum->var_idx1); + status = var1->mask_depending_build(g, pboardobjgrp, var1); + if (status != 0) { + nvgpu_err(g, " Failed calling vfevar[%d].mask_depending_build", + pvar_dsum->var_idx1); + return status; + } + + status = nvgpu_boardobjmask_or(&(pvfe_var->mask_depending_vars.super), + &(var0->mask_depending_vars.super), + &(var1)->mask_depending_vars.super); + + return status; +} + +static int vfe_var_construct_derived_sum(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct vfe_var_derived_sum *pvfevar; + struct vfe_var_derived_sum *ptmpvar = + (struct vfe_var_derived_sum *)pargs; + int status = 0; + + if (pmu_board_obj_get_type(pargs) != CTRL_PERF_VFE_VAR_TYPE_DERIVED_SUM) { + return -EINVAL; + } + + obj_tmp->type_mask |= (u32)BIT(CTRL_PERF_VFE_VAR_TYPE_DERIVED_SUM); + status = vfe_var_construct_derived(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pvfevar = (struct vfe_var_derived_sum *)(void *)*obj; + pvfevar->super.super.mask_depending_build = + vfe_var_build_depending_mask_derived_sum; + pvfevar->super.super.super.pmudatainit = + vfe_var_pmudatainit_derived_sum; + + pvfevar->var_idx0 = ptmpvar->var_idx0; + pvfevar->var_idx1 = ptmpvar->var_idx1; + + return status; +} + +static int vfe_var_pmudatainit_single(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct vfe_var_single *pvfe_var_single; + struct nv_pmu_vfe_var_single *pset; + + status = vfe_var_pmudatainit_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pvfe_var_single = (struct vfe_var_single *)(void *)obj; + pset = (struct nv_pmu_vfe_var_single *)(void *) + pmu_obj; + + pset->override_type = pvfe_var_single->override_type; + pset->override_value = pvfe_var_single->override_value; + + return status; +} + +static int vfe_var_pmudatainit_single_frequency(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct vfe_var_single_frequency *pvfe_var_single_frequency; + struct nv_pmu_vfe_var_single_frequency *pset; + + status = vfe_var_pmudatainit_single(g, obj, pmu_obj); + + pvfe_var_single_frequency = (struct vfe_var_single_frequency *) + (void *)obj; + pset = (struct nv_pmu_vfe_var_single_frequency *)(void *)pmu_obj; + + pset->clk_domain_idx = pvfe_var_single_frequency->clk_domain_idx; + + return status; +} + +static int vfe_var_build_depending_mask_single(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct vfe_var *pvfe_var) +{ + return nvgpu_boardobjgrpmask_bit_set( + &pvfe_var->mask_depending_vars.super, + pvfe_var->super.idx); +} + +static int vfe_var_construct_single(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct vfe_var_single *pvfevar; + int status = 0; + + obj_tmp->type_mask |= (u32)BIT(CTRL_PERF_VFE_VAR_TYPE_SINGLE); + status = vfe_var_construct_super(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pvfevar = (struct vfe_var_single *)(void *)*obj; + pvfevar->super.mask_depending_build = + vfe_var_build_depending_mask_single; + pvfevar->super.super.pmudatainit = + vfe_var_pmudatainit_single; + + pvfevar->override_type = + (u8)CTRL_PERF_VFE_VAR_SINGLE_OVERRIDE_TYPE_NONE; + pvfevar->override_value = 0; + + nvgpu_log_info(g, "Done"); + return status; +} + +static int vfe_var_construct_single_frequency(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct vfe_var_single_frequency *pvfevar; + struct vfe_var_single_frequency *ptmpvar = + (struct vfe_var_single_frequency *)pargs; + int status = 0; + + if (pmu_board_obj_get_type(pargs) != CTRL_PERF_VFE_VAR_TYPE_SINGLE_FREQUENCY) { + return -EINVAL; + } + + obj_tmp->type_mask |= (u32)BIT(CTRL_PERF_VFE_VAR_TYPE_SINGLE_FREQUENCY); + status = vfe_var_construct_single(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pvfevar = (struct vfe_var_single_frequency *)(void *)*obj; + pvfevar->super.super.mask_depending_build = + vfe_var_build_depending_mask_single; + pvfevar->super.super.super.pmudatainit = + vfe_var_pmudatainit_single_frequency; + + pvfevar->super.super.b_is_dynamic = false; + pvfevar->super.super.b_is_dynamic_valid = true; + pvfevar->clk_domain_idx = ptmpvar->clk_domain_idx; + + nvgpu_log_info(g, "Done"); + return status; +} + +static int vfe_var_pmudatainit_single_caller_specified(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct vfe_var_single_caller_specified + *pvfe_var_single_caller_specified; + struct nv_pmu_vfe_var_single_caller_specified *pset; + + status = vfe_var_pmudatainit_single(g, obj, pmu_obj); + + pvfe_var_single_caller_specified = + (struct vfe_var_single_caller_specified *)(void *)obj; + pset = (struct nv_pmu_vfe_var_single_caller_specified *) + (void *)pmu_obj; + + pset->uid = pvfe_var_single_caller_specified->uid; + + return status; +} + +static int vfe_var_construct_single_caller_specified(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct vfe_var_single_caller_specified *pvfevar; + struct vfe_var_single_caller_specified *ptmpvar = + (struct vfe_var_single_caller_specified *)pargs; + int status = 0; + + if (pmu_board_obj_get_type(pargs) != CTRL_PERF_VFE_VAR_TYPE_SINGLE_FREQUENCY) { + return -EINVAL; + } + + obj_tmp->type_mask |= (u32)BIT(CTRL_PERF_VFE_VAR_TYPE_SINGLE_FREQUENCY); + status = vfe_var_construct_single(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pvfevar = (struct vfe_var_single_caller_specified *)(void *)*obj; + + pvfevar->super.super.super.pmudatainit = + vfe_var_pmudatainit_single_caller_specified; + + pvfevar->super.super.b_is_dynamic = false; + pvfevar->super.super.b_is_dynamic_valid = true; + pvfevar->uid = ptmpvar->uid; + + nvgpu_log_info(g, "Done"); + return status; +} + +static int vfe_var_pmudatainit_single_sensed(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + return vfe_var_pmudatainit_single(g, obj, pmu_obj); +} + +static int vfe_var_pmudatainit_single_sensed_fuse(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct vfe_var_single_sensed_fuse *pvfe_var_single_sensed_fuse; + struct nv_pmu_vfe_var_single_sensed_fuse *pset; + + status = vfe_var_pmudatainit_single_sensed(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pvfe_var_single_sensed_fuse = + (struct vfe_var_single_sensed_fuse *)(void *)obj; + + pset = (struct nv_pmu_vfe_var_single_sensed_fuse *)(void *) + pmu_obj; + + nvgpu_memcpy((u8 *)&pset->vfield_info, + (u8 *)&pvfe_var_single_sensed_fuse->vfield_info, + sizeof(struct ctrl_perf_vfe_var_single_sensed_fuse_vfield_info)); + + nvgpu_memcpy((u8 *)&pset->vfield_ver_info, + (u8 *)&pvfe_var_single_sensed_fuse->vfield_ver_info, + sizeof(struct ctrl_perf_vfe_var_single_sensed_fuse_ver_vfield_info)); + + nvgpu_memcpy((u8 *)&pset->override_info, + (u8 *)&pvfe_var_single_sensed_fuse->override_info, + sizeof(struct ctrl_perf_vfe_var_single_sensed_fuse_override_info)); + + pset->b_fuse_value_signed = pvfe_var_single_sensed_fuse->b_fuse_value_signed; + return status; +} + +static int vfe_var_construct_single_sensed(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct vfe_var_single_sensed *pvfevar; + int status = 0; + + obj_tmp->type_mask |= (u32)BIT(CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED); + status = vfe_var_construct_single(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pvfevar = (struct vfe_var_single_sensed *)(void *)*obj; + + pvfevar->super.super.super.pmudatainit = + vfe_var_pmudatainit_single_sensed; + + nvgpu_log_info(g, "Done"); + + return status; +} + +static int vfe_var_construct_single_sensed_fuse(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct vfe_var_single_sensed_fuse *pvfevar; + struct vfe_var_single_sensed_fuse *ptmpvar = + (struct vfe_var_single_sensed_fuse *)pargs; + int status = 0; + + if (pmu_board_obj_get_type(pargs) != CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_FUSE) { + return -EINVAL; + } + + obj_tmp->type_mask |= (u32)BIT(CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_FUSE); + status = vfe_var_construct_single_sensed(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pvfevar = (struct vfe_var_single_sensed_fuse *)(void *)*obj; + + pvfevar->super.super.super.super.pmudatainit = + vfe_var_pmudatainit_single_sensed_fuse; + + pvfevar->vfield_info.v_field_id = ptmpvar->vfield_info.v_field_id; + pvfevar->vfield_info.fuse_val_default = + ptmpvar->vfield_info.fuse_val_default; + pvfevar->vfield_info.hw_correction_scale = + ptmpvar->vfield_info.hw_correction_scale; + pvfevar->vfield_info.hw_correction_offset = + ptmpvar->vfield_info.hw_correction_offset; + pvfevar->vfield_ver_info.v_field_id_ver = + ptmpvar->vfield_ver_info.v_field_id_ver; + pvfevar->vfield_ver_info.ver_expected = + ptmpvar->vfield_ver_info.ver_expected; + pvfevar->vfield_ver_info.b_ver_expected_is_mask = + ptmpvar->vfield_ver_info.b_ver_expected_is_mask; + pvfevar->vfield_ver_info.b_use_default_on_ver_check_fail = + ptmpvar->vfield_ver_info.b_use_default_on_ver_check_fail; + pvfevar->b_version_check_done = false; + pvfevar->b_fuse_value_signed = + ptmpvar->b_fuse_value_signed; + pvfevar->super.super.super.b_is_dynamic = false; + pvfevar->super.super.super.b_is_dynamic_valid = true; + + status = dev_init_get_vfield_info(g, pvfevar); + if (status != 0) { + nvgpu_err(g, "Get vfield table failed"); + goto exit; + } + /*check whether fuse segment got initialized*/ + if (pvfevar->vfield_info.fuse.segment_count == 0U) { + nvgpu_err(g, "unable to get fuse reg info %x", + pvfevar->vfield_info.v_field_id); + status = -EINVAL; + goto exit; + } + if (pvfevar->vfield_ver_info.fuse.segment_count == 0U) { + nvgpu_err(g, "unable to get fuse reg info %x", + pvfevar->vfield_ver_info.v_field_id_ver); + status = -EINVAL; + goto exit; + } +exit: + if (status != 0) { + (*obj)->destruct(*obj); + } + + return status; +} + +static int vfe_var_pmudatainit_single_sensed_temp(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct vfe_var_single_sensed_temp *pvfe_var_single_sensed_temp; + struct nv_pmu_vfe_var_single_sensed_temp *pset; + + status = vfe_var_pmudatainit_single_sensed(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pvfe_var_single_sensed_temp = + (struct vfe_var_single_sensed_temp *)(void *)obj; + + pset = (struct nv_pmu_vfe_var_single_sensed_temp *)(void *) + pmu_obj; + pset->therm_channel_index = + pvfe_var_single_sensed_temp->therm_channel_index; + pset->temp_hysteresis_positive = + pvfe_var_single_sensed_temp->temp_hysteresis_positive; + pset->temp_hysteresis_negative = + pvfe_var_single_sensed_temp->temp_hysteresis_negative; + pset->temp_default = + pvfe_var_single_sensed_temp->temp_default; + return status; +} + +static int vfe_var_construct_single_sensed_temp(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct vfe_var_single_sensed_temp *pvfevar; + struct vfe_var_single_sensed_temp *ptmpvar = + (struct vfe_var_single_sensed_temp *)pargs; + int status = 0; + + if (pmu_board_obj_get_type(pargs) != CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_TEMP) { + return -EINVAL; + } + + obj_tmp->type_mask |= (u32)BIT(CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_TEMP); + status = vfe_var_construct_single_sensed(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pvfevar = (struct vfe_var_single_sensed_temp *)(void *)*obj; + + pvfevar->super.super.super.super.pmudatainit = + vfe_var_pmudatainit_single_sensed_temp; + + pvfevar->therm_channel_index = + ptmpvar->therm_channel_index; + pvfevar->temp_hysteresis_positive = + ptmpvar->temp_hysteresis_positive; + pvfevar->temp_hysteresis_negative = + ptmpvar->temp_hysteresis_negative; + pvfevar->temp_default = + ptmpvar->temp_default; + pvfevar->super.super.super.b_is_dynamic = false; + pvfevar->super.super.super.b_is_dynamic_valid = true; + + return status; +} + +static int vfe_var_pmudatainit_single_voltage(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + return vfe_var_pmudatainit_single(g, obj, pmu_obj); +} + +static int vfe_var_construct_single_voltage(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = (struct pmu_board_obj *)pargs; + struct vfe_var_single_voltage *pvfevar; + int status = 0; + + if (pmu_board_obj_get_type(pargs) != CTRL_PERF_VFE_VAR_TYPE_SINGLE_VOLTAGE) { + return -EINVAL; + } + + obj_tmp->type_mask |= (u32)BIT(CTRL_PERF_VFE_VAR_TYPE_SINGLE_VOLTAGE); + status = vfe_var_construct_super(g, obj, size, pargs); + if (status != 0) { + return -EINVAL; + } + + pvfevar = (struct vfe_var_single_voltage *)(void *)*obj; + pvfevar->super.super.mask_depending_build = + vfe_var_build_depending_mask_single; + pvfevar->super.super.super.pmudatainit = + vfe_var_pmudatainit_single_voltage; + + pvfevar->super.super.b_is_dynamic = false; + pvfevar->super.super.b_is_dynamic_valid = true; + + return status; +} + +static struct vfe_var *construct_vfe_var(struct gk20a *g, void *pargs) +{ + struct pmu_board_obj *obj = NULL; + int status; + + switch (pmu_board_obj_get_type(pargs)) { + case CTRL_PERF_VFE_VAR_TYPE_DERIVED_PRODUCT: + status = vfe_var_construct_derived_product(g, &obj, + sizeof(struct vfe_var_derived_product), pargs); + break; + + case CTRL_PERF_VFE_VAR_TYPE_DERIVED_SUM: + status = vfe_var_construct_derived_sum(g, &obj, + sizeof(struct vfe_var_derived_sum), pargs); + break; + + case CTRL_PERF_VFE_VAR_TYPE_SINGLE_FREQUENCY: + status = vfe_var_construct_single_frequency(g, &obj, + sizeof(struct vfe_var_single_frequency), pargs); + break; + + case CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_FUSE: + status = vfe_var_construct_single_sensed_fuse(g, &obj, + sizeof(struct vfe_var_single_sensed_fuse), pargs); + break; + + case CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_TEMP: + status = vfe_var_construct_single_sensed_temp(g, &obj, + sizeof(struct vfe_var_single_sensed_temp), pargs); + break; + + case CTRL_PERF_VFE_VAR_TYPE_SINGLE_VOLTAGE: + status = vfe_var_construct_single_voltage(g, &obj, + sizeof(struct vfe_var_single_voltage), pargs); + break; + + case CTRL_PERF_VFE_VAR_TYPE_SINGLE_CALLER_SPECIFIED: + status = vfe_var_construct_single_caller_specified(g, &obj, + sizeof(struct vfe_var_single_caller_specified), pargs); + break; + + case CTRL_PERF_VFE_VAR_TYPE_DERIVED: + case CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED: + case CTRL_PERF_VFE_VAR_TYPE_SINGLE: + default: + status = -EINVAL; + break; + } + + if (status != 0) { + return NULL; + } + + nvgpu_log_info(g, "done"); + + return (struct vfe_var *)(void *)obj; +} + +static int devinit_get_vfe_var_table(struct gk20a *g, + struct vfe_vars *pvfevarobjs) +{ + int status = 0; + u8 *vfevars_tbl_ptr = NULL; + struct vbios_vfe_3x_header_struct vfevars_tbl_header = { 0 }; + struct vbios_vfe_3x_var_entry_struct var = { 0 }; + u8 *vfevars_tbl_entry_ptr = NULL; + u8 *rd_offset_ptr = NULL; + u32 index = 0; + struct vfe_var *pvar; + u8 var_type; + u32 szfmt, val; + bool done = false; + u32 hdrszfmt = 0; + u8 clk_domain_idx_available = + VBIOS_VFE_3X_VAR_ENTRY_PAR0_SFREQ_CLK_DOMAIN_IS_AVAILABLE_NO; + + union { + struct pmu_board_obj obj; + struct vfe_var super; + struct vfe_var_derived_product derived_product; + struct vfe_var_derived_sum derived_sum; + struct vfe_var_single_sensed_fuse single_sensed_fuse; + struct vfe_var_single_sensed_temp single_sensed_temp; + struct vfe_var_single_frequency single_freq; + struct vfe_var_single_caller_specified single_caller_specified; + } var_data; + + vfevars_tbl_ptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_PERF_TOKEN), + CONTINUOUS_VIRTUAL_BINNING_TABLE); + if (vfevars_tbl_ptr == NULL) { + status = -EINVAL; + goto done; + } + + nvgpu_memcpy((u8 *)&vfevars_tbl_header, vfevars_tbl_ptr, + VBIOS_VFE_3X_HEADER_SIZE_09); + if (vfevars_tbl_header.header_size == VBIOS_VFE_3X_HEADER_SIZE_09) { + hdrszfmt = VBIOS_VFE_3X_HEADER_SIZE_09; + nvgpu_memcpy((u8 *)&vfevars_tbl_header, vfevars_tbl_ptr, hdrszfmt); + } else { + nvgpu_err(g, "Invalid VFE Table Header size\n"); + status = -EINVAL; + goto done; + } + + if (vfevars_tbl_header.vfe_var_entry_size == + VBIOS_VFE_3X_VAR_ENTRY_SIZE_19) { + szfmt = VBIOS_VFE_3X_VAR_ENTRY_SIZE_19; + } else { + nvgpu_err(g, "Invalid VFE VAR Entry size\n"); + status = -EINVAL; + goto done; + } + + /* Read table entries*/ + vfevars_tbl_entry_ptr = vfevars_tbl_ptr + hdrszfmt; + for (index = 0; + index < vfevars_tbl_header.vfe_var_entry_count; + index++) { + rd_offset_ptr = vfevars_tbl_entry_ptr + + (index * vfevars_tbl_header.vfe_var_entry_size); + nvgpu_memcpy((u8 *)&var, rd_offset_ptr, szfmt); + + var_data.super.out_range_min = var.out_range_min; + var_data.super.out_range_max = var.out_range_max; + + switch ((u8)var.type) { + case VBIOS_VFE_3X_VAR_ENTRY_TYPE_DISABLED: + continue; + break; + + case VBIOS_VFE_3X_VAR_ENTRY_TYPE_SINGLE_FREQUENCY: + var_type = (u8)CTRL_PERF_VFE_VAR_TYPE_SINGLE_FREQUENCY; + clk_domain_idx_available = + BIOS_GET_FIELD(u8, var.param0, + VBIOS_VFE_3X_VAR_ENTRY_PAR0_SFREQ_CLK_DOMAIN_IS_AVAILABLE); + if (clk_domain_idx_available == + VBIOS_VFE_3X_VAR_ENTRY_PAR0_SFREQ_CLK_DOMAIN_IS_AVAILABLE_YES) { + var_data.single_freq.clk_domain_idx = + BIOS_GET_FIELD(u8, var.param0, + VBIOS_VFE_3X_VAR_ENTRY_PAR0_SFREQ_CLK_DOMAIN_IDX); + }; + break; + + case VBIOS_VFE_3X_VAR_ENTRY_TYPE_SINGLE_VOLTAGE: + var_type = (u8)CTRL_PERF_VFE_VAR_TYPE_SINGLE_VOLTAGE; + break; + + case VBIOS_VFE_3X_VAR_ENTRY_TYPE_SINGLE_CALLER_SPECIFIED: + var_type = (u8)CTRL_PERF_VFE_VAR_TYPE_SINGLE_CALLER_SPECIFIED; + var_data.single_caller_specified.uid = + BIOS_GET_FIELD(u8, var.param0, + VBIOS_VFE_3X_VAR_ENTRY_PAR0_SINGLE_CALLER_SPECIFIED_UID); + break; + + case VBIOS_VFE_3X_VAR_ENTRY_TYPE_SINGLE_SENSED_TEMP: + var_type = (u8)CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_TEMP; + var_data.single_sensed_temp.temp_default = 0x9600; + var_data.single_sensed_temp.therm_channel_index = + BIOS_GET_FIELD(u8, var.param0, + VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSTEMP_TH_CH_IDX); + val = BIOS_GET_FIELD(u32, var.param0, + VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSTEMP_HYS_POS) << 5U; + var_data.single_sensed_temp.temp_hysteresis_positive = + (int)val; + val = BIOS_GET_FIELD(u32, var.param0, + VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSTEMP_HYS_NEG) << 5U; + var_data.single_sensed_temp.temp_hysteresis_negative = + (int)val; + break; + + case VBIOS_VFE_3X_VAR_ENTRY_TYPE_SINGLE_SENSED_FUSE: + var_type = (u8)CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_FUSE; + var_data.single_sensed_fuse.vfield_info.v_field_id = + BIOS_GET_FIELD(u8, var.param0, + VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_VFIELD_ID); + var_data.single_sensed_fuse.vfield_ver_info.v_field_id_ver = + BIOS_GET_FIELD(u8, var.param0, + VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_VFIELD_ID_VER); + var_data.single_sensed_fuse.vfield_ver_info.ver_expected = + BIOS_GET_FIELD(u8, var.param0, + VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_EXPECTED_VER); + var_data.single_sensed_fuse.vfield_ver_info.b_ver_expected_is_mask = + BIOS_GET_FIELD(u8, var.param0, + VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_EXPECTED_VER_MODE); + var_data.single_sensed_fuse.vfield_ver_info.b_use_default_on_ver_check_fail = + (BIOS_GET_FIELD(bool, var.param0, + VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_USE_DEFAULT_ON_VER_CHECK_FAIL) && + (VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_USE_DEFAULT_ON_VER_CHECK_FAIL_YES != 0U)); + var_data.single_sensed_fuse.b_fuse_value_signed = + BIOS_GET_FIELD(bool, var.param0, + VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_VALUE_SIGNED_INTEGER); + var_data.single_sensed_fuse.vfield_info.fuse_val_default = + var.param1; + if (szfmt >= VBIOS_VFE_3X_VAR_ENTRY_SIZE_19) { + var_data.single_sensed_fuse.vfield_info.hw_correction_scale = + var.param2; + var_data.single_sensed_fuse.vfield_info.hw_correction_offset = + (int)var.param3; + } else { + var_data.single_sensed_fuse.vfield_info.hw_correction_scale = + BIT32(12); + var_data.single_sensed_fuse.vfield_info.hw_correction_offset = + 0; + if ((var_data.single_sensed_fuse.vfield_info.v_field_id == + VFIELD_ID_STRAP_IDDQ) || + (var_data.single_sensed_fuse.vfield_info.v_field_id == + VFIELD_ID_STRAP_IDDQ_1)) { + var_data.single_sensed_fuse.vfield_info.hw_correction_scale = + 50U << 12U; + } + } + break; + + case VBIOS_VFE_3X_VAR_ENTRY_TYPE_DERIVED_PRODUCT: + var_type = (u8)CTRL_PERF_VFE_VAR_TYPE_DERIVED_PRODUCT; + var_data.derived_product.var_idx0 = + BIOS_GET_FIELD(u8, var.param0, + VBIOS_VFE_3X_VAR_ENTRY_PAR0_DPROD_VFE_VAR_IDX_0); + var_data.derived_product.var_idx1 = + BIOS_GET_FIELD(u8, var.param0, + VBIOS_VFE_3X_VAR_ENTRY_PAR0_DPROD_VFE_VAR_IDX_1); + break; + + case VBIOS_VFE_3X_VAR_ENTRY_TYPE_DERIVED_SUM: + var_type = (u8)CTRL_PERF_VFE_VAR_TYPE_DERIVED_SUM; + var_data.derived_sum.var_idx0 = + BIOS_GET_FIELD(u8, var.param0, + VBIOS_VFE_3X_VAR_ENTRY_PAR0_DSUM_VFE_VAR_IDX_0); + var_data.derived_sum.var_idx1 = + BIOS_GET_FIELD(u8, var.param0, + VBIOS_VFE_3X_VAR_ENTRY_PAR0_DSUM_VFE_VAR_IDX_1); + break; + default: + status = -EINVAL; + done = true; + break; + } + /* + * Previously we were doing "goto done" from the default case of + * the switch-case block above. MISRA however, gets upset about + * this because it wants a break statement in the default case. + * That's why we had to move the goto statement outside of the + * switch-case block. + */ + if(done) { + goto done; + } + + var_data.obj.type = var_type; + var_data.obj.type_mask = 0; + + pvar = construct_vfe_var(g, &var_data); + if (pvar == NULL) { + nvgpu_err(g, + "error constructing vfe_var boardobj %d", + index); + status = -EINVAL; + goto done; + } + + status = boardobjgrp_objinsert(&pvfevarobjs->super.super, + (struct pmu_board_obj *)pvar, index); + if (status != 0) { + nvgpu_err(g, "error adding vfe_var boardobj %d", index); + status = -EINVAL; + goto done; + } + } + pvfevarobjs->polling_periodms = vfevars_tbl_header.polling_periodms; +done: + nvgpu_log_info(g, "done status %x", status); + return status; +} + +static int vfe_var_boardobj_grp_get_status(struct gk20a *g) +{ + + struct boardobjgrp *pboardobjgrp; + struct boardobjgrpmask *pboardobjgrpmask; + struct nv_pmu_boardobjgrp_super *pboardobjgrppmu; + struct pmu_board_obj *obj = NULL; + struct nv_pmu_boardobj_query *pboardobjpmustatus = NULL; + struct vfe_var_single_sensed_fuse *single_sensed_fuse = NULL; + int status; + u8 index; + + pboardobjgrp = &g->pmu->perf_pmu->vfe_varobjs.super.super; + pboardobjgrpmask = &g->pmu->perf_pmu->vfe_varobjs.super.mask.super; + + status = pboardobjgrp->pmugetstatus(g, pboardobjgrp, pboardobjgrpmask); + if (status != 0) { + nvgpu_err(g, "err getting boardobjs from pmu"); + return status; + } + + pboardobjgrppmu = pboardobjgrp->pmu.getstatus.buf; + + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pmu_board_obj*, obj, index) { + single_sensed_fuse = (struct vfe_var_single_sensed_fuse *) + (void *)obj; + status = pboardobjgrp->pmustatusinstget(g, + (struct nv_pmu_boardobjgrp *)(void *)pboardobjgrppmu, + &pboardobjpmustatus, index); + if (status != 0) { + nvgpu_err(g, "could not get status object instance"); + return status; + } + /* At present we are updating only s_param, + * in future we can add other fields if required */ + if (single_sensed_fuse->vfield_info.v_field_id == + VFIELD_ID_S_PARAM) { + status = vfe_var_get_s_param_value(g, + single_sensed_fuse, + (struct nv_pmu_boardobj *) + (void *)pboardobjpmustatus); + if (status != 0) { + nvgpu_err(g, + "could not get single sensed fuse value"); + return status; + } + break; + } + } + return 0; +} + +int perf_vfe_var_sw_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + struct vfe_vars *pvfevarobjs; + + status = nvgpu_boardobjgrp_construct_e32(g, + &g->pmu->perf_pmu->vfe_varobjs.super); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for clk domain, " + "status - 0x%x", status); + goto done; + } + + pboardobjgrp = &g->pmu->perf_pmu->vfe_varobjs.super.super; + pvfevarobjs = &g->pmu->perf_pmu->vfe_varobjs; + + BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, PERF, VFE_VAR); + + status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp, + perf, PERF, vfe_var, VFE_VAR); + if (status != 0) { + nvgpu_err(g, "error constructing GRP_SET interface - 0x%x", + status); + goto done; + } + + pboardobjgrp->pmudatainit = vfe_vars_pmudatainit; + pboardobjgrp->pmudatainstget = vfe_vars_pmudata_instget; + pboardobjgrp->pmustatusinstget = vfe_vars_pmustatus_instget; + + status = devinit_get_vfe_var_table(g, pvfevarobjs); + if (status != 0) { + goto done; + } + + status = vfe_var_dependency_mask_build(g, pvfevarobjs); + if (status != 0) { + goto done; + } + + status = BOARDOBJGRP_PMU_CMD_GRP_GET_STATUS_CONSTRUCT(g, + &g->pmu->perf_pmu->vfe_varobjs.super.super, + perf, PERF, vfe_var, VFE_VAR); + if (status != 0) { + nvgpu_err(g, + "error constructing GRP_GET_STATUS interface - 0x%x", + status); + goto done; + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +int perf_vfe_var_pmu_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + + pboardobjgrp = &g->pmu->perf_pmu->vfe_varobjs.super.super; + + if (!pboardobjgrp->bconstructed) { + return -EINVAL; + } + + status = pboardobjgrp->pmuinithandle(g, pboardobjgrp); + + nvgpu_log_info(g, "Done"); + return status; +} + +int nvgpu_pmu_perf_vfe_get_s_param(struct gk20a *g, u64 *s_param) +{ + struct boardobjgrp *pboardobjgrp; + struct pmu_board_obj *obj = NULL; + struct vfe_var_single_sensed_fuse *single_sensed_fuse = NULL; + u8 index; + int status; + + status = vfe_var_boardobj_grp_get_status(g); + if (status != 0) { + nvgpu_err(g, "Vfe_var get status failed"); + return status; + } + + pboardobjgrp = &g->pmu->perf_pmu->vfe_varobjs.super.super; + + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pmu_board_obj*, obj, index) { + single_sensed_fuse = (struct vfe_var_single_sensed_fuse *) + (void *)obj; + if (single_sensed_fuse->vfield_info.v_field_id == + VFIELD_ID_S_PARAM) { + *s_param = single_sensed_fuse->fuse_value_hw_integer; + } + } + return status; +} diff --git a/drivers/gpu/nvgpu/common/pmu/perf/vfe_var.h b/drivers/gpu/nvgpu/common/pmu/perf/vfe_var.h new file mode 100644 index 000000000..692b1bbb1 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perf/vfe_var.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_PERF_VFE_VAR_H +#define NVGPU_PERF_VFE_VAR_H + +struct vfe_vars { + struct boardobjgrp_e32 super; + u8 polling_periodms; +}; + +struct vfe_var { + struct pmu_board_obj super; + u32 out_range_min; + u32 out_range_max; + struct boardobjgrpmask_e32 mask_depending_vars; + struct boardobjgrpmask_e32 mask_dependent_vars; + struct boardobjgrpmask_e255 mask_dependent_equs; + int (*mask_depending_build)(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct vfe_var *pvfe_var); + bool b_is_dynamic_valid; + bool b_is_dynamic; +}; + +struct vfe_var_derived { + struct vfe_var super; +}; + +struct vfe_var_derived_product { + struct vfe_var_derived super; + u8 var_idx0; + u8 var_idx1; +}; + +struct vfe_var_derived_sum { + struct vfe_var_derived super; + u8 var_idx0; + u8 var_idx1; +}; + +struct vfe_var_single { + struct vfe_var super; + u8 override_type; + u32 override_value; +}; + +struct vfe_var_single_frequency { + struct vfe_var_single super; + u8 clk_domain_idx; +}; + +struct vfe_var_single_voltage { + struct vfe_var_single super; +}; + +struct vfe_var_single_caller_specified { + struct vfe_var_single super; + u8 uid; +}; + +struct vfe_var_single_sensed { + struct vfe_var_single super; +}; + +struct vfe_var_single_sensed_fuse { + struct vfe_var_single_sensed super; + struct ctrl_perf_vfe_var_single_sensed_fuse_override_info override_info; + struct ctrl_perf_vfe_var_single_sensed_fuse_vfield_info vfield_info; + struct ctrl_perf_vfe_var_single_sensed_fuse_ver_vfield_info vfield_ver_info; + struct ctrl_perf_vfe_var_single_sensed_fuse_value fuse_val_default; + bool b_fuse_value_signed; + u32 fuse_value_integer; + u32 fuse_value_hw_integer; + u8 fuse_version; + bool b_version_check_done; +}; + +struct vfe_var_single_sensed_temp { + struct vfe_var_single_sensed super; + u8 therm_channel_index; + int temp_hysteresis_positive; + int temp_hysteresis_negative; + int temp_default; +}; + +int perf_vfe_var_sw_setup(struct gk20a *g); +int perf_vfe_var_pmu_setup(struct gk20a *g); + +#endif /* NVGPU_PERF_VFE_VAR_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon.c b/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon.c new file mode 100644 index 000000000..d976eca68 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon.c @@ -0,0 +1,721 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(CONFIG_NVGPU_NEXT) && defined(CONFIG_NVGPU_NON_FUSA) +#include "nvgpu_next_gpuid.h" +#endif + +#include "pmu_perfmon_sw_gm20b.h" +#include "pmu_perfmon_sw_gv11b.h" + +static u8 get_perfmon_id(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = pmu->g; + u32 ver = g->params.gpu_arch + g->params.gpu_impl; + u8 unit_id; + + switch (ver) { + case GK20A_GPUID_GK20A: + case GK20A_GPUID_GM20B: + case GK20A_GPUID_GM20B_B: + unit_id = PMU_UNIT_PERFMON; + break; + case NVGPU_GPUID_GP10B: + case NVGPU_GPUID_GV11B: +#if defined(CONFIG_NVGPU_NEXT) + case NVGPU_NEXT_GPUID: +#endif + unit_id = PMU_UNIT_PERFMON_T18X; + break; + default: + unit_id = PMU_UNIT_INVALID; + nvgpu_err(g, "no support for %x", ver); + WARN_ON(true); + break; + } + + return unit_id; +} + +void nvgpu_pmu_perfmon_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nv_pmu_rpc_header *rpc, + struct rpc_handler_payload *rpc_payload) +{ + struct nv_pmu_rpc_struct_perfmon_query *rpc_param; + + switch (rpc->function) { + case NV_PMU_RPC_ID_PERFMON_T18X_INIT: + nvgpu_pmu_dbg(g, + "reply NV_PMU_RPC_ID_PERFMON_INIT"); + pmu->pmu_perfmon->perfmon_ready = true; + break; + case NV_PMU_RPC_ID_PERFMON_T18X_START: + nvgpu_pmu_dbg(g, + "reply NV_PMU_RPC_ID_PERFMON_START"); + break; + case NV_PMU_RPC_ID_PERFMON_T18X_STOP: + nvgpu_pmu_dbg(g, + "reply NV_PMU_RPC_ID_PERFMON_STOP"); + break; + case NV_PMU_RPC_ID_PERFMON_T18X_QUERY: + nvgpu_pmu_dbg(g, + "reply NV_PMU_RPC_ID_PERFMON_QUERY"); + rpc_param = (struct nv_pmu_rpc_struct_perfmon_query *) + rpc_payload->rpc_buff; + pmu->pmu_perfmon->load = rpc_param->sample_buffer[0]; + pmu->pmu_perfmon->perfmon_query = 1; + /* set perfmon_query to 1 after load is copied */ + break; + default: + nvgpu_pmu_dbg(g, "invalid reply"); + break; + } +} + +int nvgpu_pmu_initialize_perfmon(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nvgpu_pmu_perfmon **perfmon_ptr) +{ + struct nvgpu_pmu_perfmon *perfmon; + int err = 0; + u32 ver = g->params.gpu_arch + g->params.gpu_impl; + + if (*perfmon_ptr != NULL) { + /* Not to allocate a new buffer after railgating + is done. Use the same memory for pmu_perfmon + after railgating. + */ + nvgpu_pmu_dbg(g, "skip perfmon init for unrailgate sequence"); + goto exit; + + } + /* One-time memory allocation for pmu_perfmon */ + perfmon = (struct nvgpu_pmu_perfmon *) + nvgpu_kzalloc(g, sizeof(struct nvgpu_pmu_perfmon)); + + if (perfmon == NULL) { + nvgpu_err(g, "failed to initialize perfmon"); + return -ENOMEM; + } + + *perfmon_ptr = perfmon; + + switch (ver) { + case GK20A_GPUID_GM20B: + case GK20A_GPUID_GM20B_B: + case NVGPU_GPUID_GP10B: + case NVGPU_GPUID_GV100: + case NVGPU_GPUID_TU104: + nvgpu_gm20b_perfmon_sw_init(g, *perfmon_ptr); + break; + + case NVGPU_GPUID_GV11B: + nvgpu_gv11b_perfmon_sw_init(g, *perfmon_ptr); + break; +#if defined(CONFIG_NVGPU_NEXT) + case NVGPU_NEXT_GPUID: + nvgpu_next_perfmon_sw_init(g, *perfmon_ptr); + break; +#endif + default: + nvgpu_kfree(g, *perfmon_ptr); + err = -EINVAL; + nvgpu_err(g, "no support for GPUID %x", ver); + break; + } + +exit: + return err; + +} + +void nvgpu_pmu_deinitialize_perfmon(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + if (pmu->pmu_perfmon == NULL) { + return; + } else { + nvgpu_kfree(g, pmu->pmu_perfmon); + } +} + +int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = pmu->g; + struct pmu_fw_ver_ops *fw_ops = &pmu->fw->ops; + + struct pmu_cmd cmd; + struct pmu_payload payload; + int status; + u64 tmp_addr, tmp_size; + + if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { + return 0; + } + + nvgpu_log_fn(g, " "); + + pmu->pmu_perfmon->perfmon_ready = false; + + g->ops.pmu.pmu_init_perfmon_counter(g); + + if (pmu->pmu_perfmon->sample_buffer == 0U) { + tmp_addr = nvgpu_alloc(&pmu->dmem, 2U * sizeof(u16)); + nvgpu_assert(tmp_addr <= U32_MAX); + pmu->pmu_perfmon->sample_buffer = (u32)tmp_addr; + } + if (pmu->pmu_perfmon->sample_buffer == 0U) { + nvgpu_err(g, "failed to allocate perfmon sample buffer"); + return -ENOMEM; + } + + /* init PERFMON */ + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + + cmd.hdr.unit_id = get_perfmon_id(pmu); + if (cmd.hdr.unit_id == PMU_UNIT_INVALID) { + nvgpu_err(g, "failed to get perfmon UNIT ID, command skipped"); + return -EINVAL; + } + + tmp_size = PMU_CMD_HDR_SIZE + + (u64)fw_ops->get_perfmon_cmd_init_size(); + nvgpu_assert(tmp_size <= U8_MAX); + cmd.hdr.size = (u8)tmp_size; + cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT; + /* buffer to save counter values for pmu perfmon */ + fw_ops->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon, + (u16)pmu->pmu_perfmon->sample_buffer); + /* number of sample periods below lower threshold + * before pmu triggers perfmon decrease event + */ + fw_ops->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15); + /* index of base counter, aka. always ticking counter */ + fw_ops->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6); + /* microseconds interval between pmu polls perf counters */ + fw_ops->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700); + /* number of perfmon counters + * counter #3 (GR and CE2) for gk20a + */ + fw_ops->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1); + /* moving average window for sample periods + * TBD: = 3000000 / sample_period_us = 17 + */ + fw_ops->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17); + + (void) memset(&payload, 0, sizeof(struct pmu_payload)); + payload.in.buf = fw_ops->get_perfmon_cntr_ptr(pmu); + payload.in.size = fw_ops->get_perfmon_cntr_sz(pmu); + status = fw_ops->get_perfmon_cmd_init_offset_of_var(COUNTER_ALLOC, + &payload.in.offset); + if (status != 0) { + nvgpu_err(g, "failed to get payload offset, command skipped"); + return status; + } + + nvgpu_pmu_dbg(g, "cmd post PMU_PERFMON_CMD_ID_INIT"); + status = nvgpu_pmu_cmd_post(g, &cmd, &payload, PMU_COMMAND_QUEUE_LPQ, + NULL, NULL); + if (status != 0) { + nvgpu_err(g, "failed cmd post PMU_PERFMON_CMD_ID_INIT"); + return status; + } + + return 0; +} + + +int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = pmu->g; + struct pmu_fw_ver_ops *fw_ops = &pmu->fw->ops; + struct pmu_cmd cmd; + struct pmu_payload payload; + int status; + u64 tmp_size; + + if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { + return 0; + } + + /* PERFMON Start */ + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = get_perfmon_id(pmu); + if (cmd.hdr.unit_id == PMU_UNIT_INVALID) { + nvgpu_err(g, "failed to get perfmon UNIT ID, command skipped"); + return -EINVAL; + } + tmp_size = PMU_CMD_HDR_SIZE + + (u64)fw_ops->get_perfmon_cmd_start_size(); + nvgpu_assert(tmp_size <= U8_MAX); + cmd.hdr.size = (u8)tmp_size; + fw_ops->perfmon_start_set_cmd_type(&cmd.cmd.perfmon, + PMU_PERFMON_CMD_ID_START); + fw_ops->perfmon_start_set_group_id(&cmd.cmd.perfmon, + PMU_DOMAIN_GROUP_PSTATE); + + fw_ops->perfmon_start_set_state_id(&cmd.cmd.perfmon, + pmu->pmu_perfmon->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]); + + fw_ops->perfmon_start_set_flags(&cmd.cmd.perfmon, + PMU_PERFMON_FLAG_ENABLE_INCREASE | + PMU_PERFMON_FLAG_ENABLE_DECREASE | + PMU_PERFMON_FLAG_CLEAR_PREV); + + (void) memset(&payload, 0, sizeof(struct pmu_payload)); + + /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */ + fw_ops->set_perfmon_cntr_ut(pmu, 3000); /* 30% */ + /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */ + fw_ops->set_perfmon_cntr_lt(pmu, 1000); /* 10% */ + fw_ops->set_perfmon_cntr_valid(pmu, true); + + payload.in.buf = fw_ops->get_perfmon_cntr_ptr(pmu); + payload.in.size = fw_ops->get_perfmon_cntr_sz(pmu); + status = fw_ops->get_perfmon_cmd_start_offset_of_var(COUNTER_ALLOC, + &payload.in.offset); + if (status != 0) { + nvgpu_err(g, "failed to get payload offset, command skipped"); + return status; + } + + nvgpu_pmu_dbg(g, "cmd post PMU_PERFMON_CMD_ID_START"); + status = nvgpu_pmu_cmd_post(g, &cmd, &payload, PMU_COMMAND_QUEUE_LPQ, + NULL, NULL); + if (status != 0) { + nvgpu_err(g, "failed cmd post PMU_PERFMON_CMD_ID_START"); + return status; + } + + return 0; +} + +int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = pmu->g; + struct pmu_cmd cmd; + u64 tmp_size; + int status; + + if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { + return 0; + } + + /* PERFMON Stop */ + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = get_perfmon_id(pmu); + if (cmd.hdr.unit_id == PMU_UNIT_INVALID) { + nvgpu_err(g, "failed to get perfmon UNIT ID, command skipped"); + return -EINVAL; + } + tmp_size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop); + nvgpu_assert(tmp_size <= U8_MAX); + cmd.hdr.size = (u8)tmp_size; + cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP; + + nvgpu_pmu_dbg(g, "cmd post PMU_PERFMON_CMD_ID_STOP"); + status = nvgpu_pmu_cmd_post(g, &cmd, NULL, PMU_COMMAND_QUEUE_LPQ, + NULL, NULL); + if (status != 0) { + nvgpu_err(g, "failed cmd post PMU_PERFMON_CMD_ID_STOP"); + return status; + } + return 0; +} + +int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load) +{ + *load = g->pmu->pmu_perfmon->load_shadow; + return 0; +} + +int nvgpu_pmu_load_update(struct gk20a *g) +{ + struct nvgpu_pmu *pmu = g->pmu; + u32 load = 0; + int err = 0; + if (!pmu->pmu_perfmon->perfmon_ready) { + pmu->pmu_perfmon->load_shadow = 0; + pmu->pmu_perfmon->load = 0; + return 0; + } + + if (pmu->pmu_perfmon->get_samples_rpc != NULL) { + nvgpu_pmu_perfmon_get_sample(g, pmu, pmu->pmu_perfmon); + load = pmu->pmu_perfmon->load; + } else { + err = nvgpu_falcon_copy_from_dmem(pmu->flcn, + pmu->pmu_perfmon->sample_buffer, (u8 *)&load, 2 * 1, 0); + if (err != 0) { + nvgpu_err(g, "PMU falcon DMEM copy failed"); + return err; + } + } + + pmu->pmu_perfmon->load_shadow = load / 10U; + pmu->pmu_perfmon->load_avg = (((9U*pmu->pmu_perfmon->load_avg) + + pmu->pmu_perfmon->load_shadow) / 10U); + + return err; +} + +int nvgpu_pmu_busy_cycles_norm(struct gk20a *g, u32 *norm) +{ + u64 busy_cycles, total_cycles; + u32 intr_status; + + gk20a_busy_noresume(g); + if (nvgpu_is_powered_off(g)) { + *norm = 0; + goto exit; + } + + if (g->ops.pmu.pmu_read_idle_counter == NULL || + g->ops.pmu.pmu_reset_idle_counter == NULL || + g->ops.pmu.pmu_read_idle_intr_status == NULL || + g->ops.pmu.pmu_clear_idle_intr_status == NULL) { + *norm = PMU_BUSY_CYCLES_NORM_MAX; + goto exit; + } + + busy_cycles = g->ops.pmu.pmu_read_idle_counter(g, 4); + total_cycles = g->ops.pmu.pmu_read_idle_counter(g, 0); + intr_status = g->ops.pmu.pmu_read_idle_intr_status(g); + + g->ops.pmu.pmu_reset_idle_counter(g, 4); + g->ops.pmu.pmu_reset_idle_counter(g, 0); + + if (intr_status != 0UL) { + *norm = PMU_BUSY_CYCLES_NORM_MAX; + g->ops.pmu.pmu_clear_idle_intr_status(g); + } else if (total_cycles == 0ULL || busy_cycles > total_cycles) { + *norm = PMU_BUSY_CYCLES_NORM_MAX; + } else { + *norm = (u32)(busy_cycles * PMU_BUSY_CYCLES_NORM_MAX + / total_cycles); + } + +exit: + gk20a_idle_nosuspend(g); + + return 0; +} + +void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, + u32 *total_cycles) +{ + if (nvgpu_is_powered_off(g) || gk20a_busy(g) != 0) { + *busy_cycles = 0; + *total_cycles = 0; + return; + } + + *busy_cycles = g->ops.pmu.pmu_read_idle_counter(g, 1); + *total_cycles = g->ops.pmu.pmu_read_idle_counter(g, 2); + + gk20a_idle(g); +} + +void nvgpu_pmu_reset_load_counters(struct gk20a *g) +{ + if (nvgpu_is_powered_off(g) || gk20a_busy(g) != 0) { + return; + } + + g->ops.pmu.pmu_reset_idle_counter(g, 2); + g->ops.pmu.pmu_reset_idle_counter(g, 1); + + gk20a_idle(g); +} + +int nvgpu_pmu_handle_perfmon_event(struct gk20a *g, + struct nvgpu_pmu *pmu, struct pmu_msg *msg) +{ + struct pmu_perfmon_msg *perfmon_msg = &msg->msg.perfmon; + nvgpu_log_fn(g, " "); + + switch (perfmon_msg->msg_type) { + case PMU_PERFMON_MSG_ID_INCREASE_EVENT: + nvgpu_pmu_dbg(g, "perfmon increase event: "); + nvgpu_pmu_dbg(g, "state_id %d, ground_id %d, pct %d", + perfmon_msg->gen.state_id, perfmon_msg->gen.group_id, + perfmon_msg->gen.data); + (pmu->pmu_perfmon->perfmon_events_cnt)++; + break; + case PMU_PERFMON_MSG_ID_DECREASE_EVENT: + nvgpu_pmu_dbg(g, "perfmon decrease event: "); + nvgpu_pmu_dbg(g, "state_id %d, ground_id %d, pct %d", + perfmon_msg->gen.state_id, perfmon_msg->gen.group_id, + perfmon_msg->gen.data); + (pmu->pmu_perfmon->perfmon_events_cnt)++; + break; + case PMU_PERFMON_MSG_ID_INIT_EVENT: + pmu->pmu_perfmon->perfmon_ready = true; + nvgpu_pmu_dbg(g, "perfmon init event"); + break; + default: + nvgpu_pmu_dbg(g, "Invalid msgtype:%u for %s", + perfmon_msg->msg_type, __func__); + break; + } + + /* restart sampling */ + if (pmu->pmu_perfmon->perfmon_sampling_enabled) { + return nvgpu_pmu_perfmon_start_sample(g, pmu, + pmu->pmu_perfmon); + } + + return 0; +} + +int nvgpu_pmu_handle_perfmon_event_rpc(struct gk20a *g, + struct nvgpu_pmu *pmu, struct pmu_msg *msg) +{ + struct pmu_nvgpu_rpc_perfmon_init *perfmon_rpc = + &msg->event_rpc.perfmon_init; + + + nvgpu_log_fn(g, " "); + + switch (perfmon_rpc->rpc_hdr.function) { + case PMU_RPC_ID_PERFMON_CHANGE_EVENT: + if (((struct pmu_nvgpu_rpc_perfmon_change *) + (void *)perfmon_rpc)->b_increase) { + nvgpu_pmu_dbg(g, "perfmon increase event"); + } else { + nvgpu_pmu_dbg(g, "perfmon decrease event"); + } + (pmu->pmu_perfmon->perfmon_events_cnt)++; + break; + case PMU_RPC_ID_PERFMON_INIT_EVENT: + nvgpu_pmu_dbg(g, "perfmon init event"); + pmu->pmu_perfmon->perfmon_ready = true; + break; + default: + nvgpu_pmu_dbg(g, "invalid perfmon event %d", + perfmon_rpc->rpc_hdr.function); + break; + } + + /* restart sampling */ + if (pmu->pmu_perfmon->perfmon_sampling_enabled) { + return nvgpu_pmu_perfmon_start_sample(g, pmu, + pmu->pmu_perfmon); + } + + return 0; +} + +/* Perfmon RPC */ +int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = pmu->g; + struct nv_pmu_rpc_struct_perfmon_init rpc; + int status = 0; + + if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { + return 0; + } + + nvgpu_log_fn(g, " "); + + (void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_init)); + pmu->pmu_perfmon->perfmon_ready = false; + + g->ops.pmu.pmu_init_perfmon_counter(g); + + /* microseconds interval between pmu polls perf counters */ + rpc.sample_periodus = 16700; + /* number of sample periods below lower threshold + * before pmu triggers perfmon decrease event + */ + rpc.to_decrease_count = 15; + /* index of base counter, aka. always ticking counter */ + rpc.base_counter_id = 6; + /* moving average window for sample periods */ + rpc.samples_in_moving_avg = 17; + /* number of perfmon counters + * counter #3 (GR and CE2) for gk20a + */ + rpc.num_counters = 1; + + (void) memset(rpc.counter, 0, sizeof(struct pmu_perfmon_counter_v3) * + NV_PMU_PERFMON_MAX_COUNTERS); + /* Counter used to count GR busy cycles */ + rpc.counter[0].index = 3; + + nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_INIT"); + PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, INIT, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute RPC, status=0x%x", status); + goto exit; + } + +exit: + return 0; +} + +int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = pmu->g; + struct nv_pmu_rpc_struct_perfmon_start rpc; + int status = 0; + + if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { + return 0; + } + + nvgpu_log_fn(g, " "); + + (void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_start)); + rpc.group_id = PMU_DOMAIN_GROUP_PSTATE; + rpc.state_id = pmu->pmu_perfmon->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]; + rpc.flags = PMU_PERFMON_FLAG_ENABLE_INCREASE | + PMU_PERFMON_FLAG_ENABLE_DECREASE | + PMU_PERFMON_FLAG_CLEAR_PREV; + + rpc.counter[0].upper_threshold = 3000; + rpc.counter[0].lower_threshold = 1000; + + nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_START\n"); + PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, START, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute RPC, status=0x%x", status); + } + + return status; +} + +int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = pmu->g; + struct nv_pmu_rpc_struct_perfmon_stop rpc; + int status = 0; + + if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { + return 0; + } + + nvgpu_log_fn(g, " "); + + (void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_stop)); + /* PERFMON Stop */ + nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_STOP\n"); + PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, STOP, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute RPC, status=0x%x", status); + } + + return status; +} + +int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = pmu->g; + struct nv_pmu_rpc_struct_perfmon_query rpc; + int status = 0; + + if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { + return 0; + } + + nvgpu_log_fn(g, " "); + pmu->pmu_perfmon->perfmon_query = 0; + (void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_query)); + /* PERFMON QUERY */ + nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_QUERY\n"); + PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, QUERY, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute RPC, status=0x%x", status); + } + + pmu_wait_message_cond(pmu, nvgpu_get_poll_timeout(g), + &pmu->pmu_perfmon->perfmon_query, 1); + + return status; +} + +int nvgpu_pmu_perfmon_get_sampling_enable_status(struct nvgpu_pmu *pmu) +{ + return pmu->pmu_perfmon->perfmon_sampling_enabled; +} + +void nvgpu_pmu_perfmon_set_sampling_enable_status(struct nvgpu_pmu *pmu, + bool status) +{ + pmu->pmu_perfmon->perfmon_sampling_enabled = status; +} + +u64 nvgpu_pmu_perfmon_get_events_count(struct nvgpu_pmu *pmu) +{ + return pmu->pmu_perfmon->perfmon_events_cnt; +} + +u32 nvgpu_pmu_perfmon_get_load_avg(struct nvgpu_pmu *pmu) +{ + return pmu->pmu_perfmon->load_avg; +} + +int nvgpu_pmu_perfmon_initialization(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_perfmon *perfmon) +{ + return perfmon->init_perfmon(pmu); +} + +int nvgpu_pmu_perfmon_start_sample(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_perfmon *perfmon) +{ + + return perfmon->start_sampling(pmu); +} + +int nvgpu_pmu_perfmon_stop_sample(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_perfmon *perfmon) +{ + + return perfmon->stop_sampling(pmu); +} + +int nvgpu_pmu_perfmon_get_sample(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_perfmon *perfmon) +{ + + return perfmon->get_samples_rpc(pmu); +} + +int nvgpu_pmu_perfmon_event_handler(struct gk20a *g, + struct nvgpu_pmu *pmu, struct pmu_msg *msg) +{ + return pmu->pmu_perfmon->perfmon_event_handler(g, pmu, msg); +} diff --git a/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon_sw_gm20b.c b/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon_sw_gm20b.c new file mode 100644 index 000000000..2a15d2ad2 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon_sw_gm20b.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#include +#include +#include "pmu_perfmon_sw_gm20b.h" + +void nvgpu_gm20b_perfmon_sw_init(struct gk20a *g, + struct nvgpu_pmu_perfmon *perfmon) +{ + nvgpu_log_fn(g, " "); + + perfmon->init_perfmon = nvgpu_pmu_init_perfmon; + perfmon->start_sampling = + nvgpu_pmu_perfmon_start_sampling; + perfmon->stop_sampling = + nvgpu_pmu_perfmon_stop_sampling; + perfmon->get_samples_rpc = NULL; + perfmon->perfmon_event_handler = + nvgpu_pmu_handle_perfmon_event; +} + diff --git a/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon_sw_gm20b.h b/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon_sw_gm20b.h new file mode 100644 index 000000000..ccd4bda0c --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon_sw_gm20b.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_PMU_PERFMON_SW_GM20B_H +#define NVGPU_PMU_PERFMON_SW_GM20B_H + +void nvgpu_gm20b_perfmon_sw_init(struct gk20a *g, + struct nvgpu_pmu_perfmon *perfmon); + +#endif /* NVGPU_PMU_PERMON_SW_GM20B_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon_sw_gv11b.c b/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon_sw_gv11b.c new file mode 100644 index 000000000..cdf4f9263 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon_sw_gv11b.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include "pmu_perfmon_sw_gv11b.h" + +void nvgpu_gv11b_perfmon_sw_init(struct gk20a *g, + struct nvgpu_pmu_perfmon *perfmon) +{ + nvgpu_log_fn(g, " "); + + perfmon->init_perfmon = nvgpu_pmu_init_perfmon_rpc; + perfmon->start_sampling = + nvgpu_pmu_perfmon_start_sampling_rpc; + perfmon->stop_sampling = + nvgpu_pmu_perfmon_stop_sampling_rpc; + perfmon->get_samples_rpc = + nvgpu_pmu_perfmon_get_samples_rpc; + perfmon->perfmon_event_handler = + nvgpu_pmu_handle_perfmon_event; +} + diff --git a/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon_sw_gv11b.h b/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon_sw_gv11b.h new file mode 100644 index 000000000..26bf02fab --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon_sw_gv11b.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_PERFMON_SW_GV11B_H +#define NVGPU_PERFMON_SW_GV11B_H + +void nvgpu_gv11b_perfmon_sw_init(struct gk20a *g, + struct nvgpu_pmu_perfmon *perfmon); + +#endif /* NVGPU_PERMON_SW_GV11B_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gm20b.c b/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gm20b.c new file mode 100644 index 000000000..26a25c398 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gm20b.c @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "pg_sw_gm20b.h" +#include "pmu_pg.h" + +u32 gm20b_pmu_pg_engines_list(struct gk20a *g) +{ + return BIT32(PMU_PG_ELPG_ENGINE_ID_GRAPHICS); +} + +u32 gm20b_pmu_pg_feature_list(struct gk20a *g, u32 pg_engine_id) +{ + if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS) { + return NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING; + } + + return 0; +} + +static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 status) +{ + struct nvgpu_pmu *pmu = param; + nvgpu_pmu_dbg(g, "reply ZBC_TABLE_UPDATE"); + pmu->pg->zbc_save_done = true; +} + +void gm20b_pmu_save_zbc(struct gk20a *g, u32 entries) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct pmu_cmd cmd; + size_t tmp_size; + int err = 0; + + if (!nvgpu_pmu_get_fw_ready(g, pmu) || + (entries == 0U) || !pmu->pg->zbc_ready) { + return; + } + + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + tmp_size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_zbc_cmd); + nvgpu_assert(tmp_size <= (size_t)U8_MAX); + cmd.hdr.size = (u8)tmp_size; + cmd.cmd.zbc.cmd_type = g->pmu_ver_cmd_id_zbc_table_update; + cmd.cmd.zbc.entry_mask = ZBC_MASK(entries); + + pmu->pg->zbc_save_done = false; + + nvgpu_pmu_dbg(g, "cmd post ZBC_TABLE_UPDATE"); + err = nvgpu_pmu_cmd_post(g, &cmd, NULL, PMU_COMMAND_QUEUE_HPQ, + pmu_handle_zbc_msg, pmu); + if (err != 0) { + nvgpu_err(g, "ZBC_TABLE_UPDATE cmd post failed"); + return; + } + pmu_wait_message_cond(pmu, nvgpu_get_poll_timeout(g), + &pmu->pg->zbc_save_done, 1); + if (!pmu->pg->zbc_save_done) { + nvgpu_err(g, "ZBC save timeout"); + } +} + +int gm20b_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, + struct pmu_pg_stats_data *pg_stat_data) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct pmu_pg_stats stats; + int err; + + err = nvgpu_falcon_copy_from_dmem(pmu->flcn, + pmu->pg->stat_dmem_offset[pg_engine_id], + (u8 *)&stats, (u32)sizeof(struct pmu_pg_stats), 0); + if (err != 0) { + nvgpu_err(g, "PMU falcon DMEM copy failed"); + return err; + } + + pg_stat_data->ingating_time = stats.pg_ingating_time_us; + pg_stat_data->ungating_time = stats.pg_ungating_time_us; + pg_stat_data->gating_cnt = stats.pg_gating_cnt; + pg_stat_data->avg_entry_latency_us = stats.pg_avg_entry_time_us; + pg_stat_data->avg_exit_latency_us = stats.pg_avg_exit_time_us; + + return err; +} + +int gm20b_pmu_pg_elpg_init(struct gk20a *g, struct nvgpu_pmu *pmu, + u8 pg_engine_id) +{ + struct pmu_cmd cmd; + u64 tmp; + + /* init ELPG */ + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + tmp = nvgpu_safe_add_u64(PMU_CMD_HDR_SIZE, + sizeof(struct pmu_pg_cmd_elpg_cmd)); + nvgpu_assert(tmp <= U8_MAX); + cmd.hdr.size = nvgpu_safe_cast_u64_to_u8(tmp); + cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; + cmd.cmd.pg.elpg_cmd.engine_id = pg_engine_id; + cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT; + + return nvgpu_pmu_cmd_post(g, &cmd, NULL, PMU_COMMAND_QUEUE_HPQ, + pmu_handle_pg_elpg_msg, pmu); +} + +int gm20b_pmu_pg_elpg_allow(struct gk20a *g, struct nvgpu_pmu *pmu, + u8 pg_engine_id) +{ + struct pmu_cmd cmd; + u64 tmp; + + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + tmp = nvgpu_safe_add_u64(PMU_CMD_HDR_SIZE, + sizeof(struct pmu_pg_cmd_elpg_cmd)); + nvgpu_assert(tmp <= U8_MAX); + cmd.hdr.size = nvgpu_safe_cast_u64_to_u8(tmp); + cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; + cmd.cmd.pg.elpg_cmd.engine_id = pg_engine_id; + cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW; + + return nvgpu_pmu_cmd_post(g, &cmd, NULL, + PMU_COMMAND_QUEUE_HPQ, pmu_handle_pg_elpg_msg, + pmu); +} + +int gm20b_pmu_pg_elpg_disallow(struct gk20a *g, struct nvgpu_pmu *pmu, + u8 pg_engine_id) +{ + struct pmu_cmd cmd; + u64 tmp; + + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + tmp = nvgpu_safe_add_u64(PMU_CMD_HDR_SIZE, + sizeof(struct pmu_pg_cmd_elpg_cmd)); + nvgpu_assert(tmp <= U8_MAX); + cmd.hdr.size = nvgpu_safe_cast_u64_to_u8(tmp); + cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD; + cmd.cmd.pg.elpg_cmd.engine_id = pg_engine_id; + cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW; + + return nvgpu_pmu_cmd_post(g, &cmd, NULL, PMU_COMMAND_QUEUE_HPQ, + pmu_handle_pg_elpg_msg, pmu); +} + +int gm20b_pmu_pg_elpg_alloc_dmem(struct gk20a *g, struct nvgpu_pmu *pmu, + u8 pg_engine_id) +{ + struct pmu_cmd cmd; + u64 tmp; + + pmu->pg->stat_dmem_offset[pg_engine_id] = 0; + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + tmp = nvgpu_safe_add_u64(PMU_CMD_HDR_SIZE, + sizeof(struct pmu_pg_cmd_elpg_cmd)); + nvgpu_assert(tmp <= U8_MAX); + cmd.hdr.size = nvgpu_safe_cast_u64_to_u8(tmp); + cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT; + cmd.cmd.pg.stat.engine_id = pg_engine_id; + cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM; + cmd.cmd.pg.stat.data = 0; + + return nvgpu_pmu_cmd_post(g, &cmd, NULL, PMU_COMMAND_QUEUE_LPQ, + pmu_handle_pg_stat_msg, pmu); +} + +int gm20b_pmu_pg_elpg_load_buff(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + struct pmu_cmd cmd; + u64 tmp; + u32 gr_engine_id; + + gr_engine_id = nvgpu_engine_get_gr_id(g); + + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + tmp = nvgpu_safe_add_u64(PMU_CMD_HDR_SIZE, + pmu->fw->ops.pg_cmd_eng_buf_load_size(&cmd.cmd.pg)); + cmd.hdr.unit_id = PMU_UNIT_PG; + nvgpu_assert(PMU_CMD_HDR_SIZE < U32(U8_MAX)); + cmd.hdr.size = nvgpu_safe_cast_u64_to_u8(tmp); + pmu->fw->ops.pg_cmd_eng_buf_load_set_cmd_type(&cmd.cmd.pg, + PMU_PG_CMD_ID_ENG_BUF_LOAD); + pmu->fw->ops.pg_cmd_eng_buf_load_set_engine_id(&cmd.cmd.pg, + nvgpu_safe_cast_u32_to_u8(gr_engine_id)); + pmu->fw->ops.pg_cmd_eng_buf_load_set_buf_idx(&cmd.cmd.pg, + PMU_PGENG_GR_BUFFER_IDX_FECS); + pmu->fw->ops.pg_cmd_eng_buf_load_set_buf_size(&cmd.cmd.pg, + nvgpu_safe_cast_u64_to_u16(pmu->pg->pg_buf.size)); + pmu->fw->ops.pg_cmd_eng_buf_load_set_dma_base(&cmd.cmd.pg, + u64_lo32(pmu->pg->pg_buf.gpu_va)); + pmu->fw->ops.pg_cmd_eng_buf_load_set_dma_offset(&cmd.cmd.pg, + nvgpu_safe_cast_u64_to_u8(pmu->pg->pg_buf.gpu_va & 0xFFU)); + pmu->fw->ops.pg_cmd_eng_buf_load_set_dma_idx(&cmd.cmd.pg, + PMU_DMAIDX_VIRT); + + pmu->pg->buf_loaded = false; + + return nvgpu_pmu_cmd_post(g, &cmd, NULL, PMU_COMMAND_QUEUE_LPQ, + pmu_handle_pg_buf_config_msg, pmu); +} + +int gm20b_pmu_pg_elpg_hw_load_zbc(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + struct pmu_cmd cmd; + u64 tmp; + u32 gr_engine_id; + + gr_engine_id = nvgpu_engine_get_gr_id(g); + + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + tmp = nvgpu_safe_add_u64(PMU_CMD_HDR_SIZE, + pmu->fw->ops.pg_cmd_eng_buf_load_size(&cmd.cmd.pg)); + cmd.hdr.unit_id = PMU_UNIT_PG; + nvgpu_assert(PMU_CMD_HDR_SIZE < U32(U8_MAX)); + cmd.hdr.size = nvgpu_safe_cast_u64_to_u8(tmp); + pmu->fw->ops.pg_cmd_eng_buf_load_set_cmd_type(&cmd.cmd.pg, + PMU_PG_CMD_ID_ENG_BUF_LOAD); + pmu->fw->ops.pg_cmd_eng_buf_load_set_engine_id(&cmd.cmd.pg, + nvgpu_safe_cast_u32_to_u8(gr_engine_id)); + pmu->fw->ops.pg_cmd_eng_buf_load_set_buf_idx(&cmd.cmd.pg, + PMU_PGENG_GR_BUFFER_IDX_ZBC); + pmu->fw->ops.pg_cmd_eng_buf_load_set_buf_size(&cmd.cmd.pg, + nvgpu_safe_cast_u64_to_u16(pmu->pg->seq_buf.size)); + pmu->fw->ops.pg_cmd_eng_buf_load_set_dma_base(&cmd.cmd.pg, + u64_lo32(pmu->pg->seq_buf.gpu_va)); + pmu->fw->ops.pg_cmd_eng_buf_load_set_dma_offset(&cmd.cmd.pg, + nvgpu_safe_cast_u64_to_u8(pmu->pg->seq_buf.gpu_va & 0xFFU)); + pmu->fw->ops.pg_cmd_eng_buf_load_set_dma_idx(&cmd.cmd.pg, + PMU_DMAIDX_VIRT); + + pmu->pg->buf_loaded = false; + + return nvgpu_pmu_cmd_post(g, &cmd, NULL, PMU_COMMAND_QUEUE_LPQ, + pmu_handle_pg_buf_config_msg, pmu); +} + +int gm20b_pmu_pg_init_send(struct gk20a *g, struct nvgpu_pmu *pmu, + u8 pg_engine_id) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + g->ops.pmu.pmu_pg_idle_counter_config(g, pg_engine_id); + + if (pmu->pg->init_param != NULL) { + err = pmu->pg->init_param(g, pg_engine_id); + if (err != 0) { + nvgpu_err(g, "init_param failed err=%d", err); + return err; + } + } + + nvgpu_pmu_dbg(g, "cmd post PMU_PG_ELPG_CMD_INIT"); + if (pmu->pg->init == NULL) { + nvgpu_err(g, "PG init function not assigned"); + return -EINVAL; + } + err = pmu->pg->init(g, pmu, pg_engine_id); + if (err != 0) { + nvgpu_err(g, "PMU_PG_ELPG_CMD_INIT cmd failed\n"); + return err; + } + + /* alloc dmem for powergating state log */ + nvgpu_pmu_dbg(g, "cmd post PMU_PG_STAT_CMD_ALLOC_DMEM"); + if (pmu->pg->alloc_dmem == NULL) { + nvgpu_err(g, "PG alloc dmem function not assigned"); + return -EINVAL; + } + err = pmu->pg->alloc_dmem(g, pmu, pg_engine_id); + if (err != 0) { + nvgpu_err(g, "PMU_PG_STAT_CMD_ALLOC_DMEM cmd failed\n"); + return err; + } + + /* disallow ELPG initially + * PMU ucode requires a disallow cmd before allow cmd + * set for wait_event PMU_ELPG_STAT_OFF */ + if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS) { + pmu->pg->elpg_stat = PMU_ELPG_STAT_OFF; + } else if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS) { + pmu->pg->mscg_transition_state = PMU_ELPG_STAT_OFF; + } + + nvgpu_pmu_dbg(g, "cmd post PMU_PG_ELPG_CMD_DISALLOW"); + if (pmu->pg->disallow == NULL) { + nvgpu_err(g, "PG disallow function not assigned"); + return -EINVAL; + } + err = pmu->pg->disallow(g, pmu, pg_engine_id); + if (err != 0) { + nvgpu_err(g, "PMU_PG_ELPG_CMD_DISALLOW cmd failed\n"); + return err; + } + + if (pmu->pg->set_sub_feature_mask != NULL) { + err = pmu->pg->set_sub_feature_mask(g, pg_engine_id); + if (err != 0) { + nvgpu_err(g, "set_sub_feature_mask failed err=%d", + err); + return err; + } + } + + return err; +} + +void nvgpu_gm20b_pg_sw_init(struct gk20a *g, + struct nvgpu_pmu_pg *pg) +{ + pg->elpg_statistics = gm20b_pmu_elpg_statistics; + pg->init_param = NULL; + pg->supported_engines_list = gm20b_pmu_pg_engines_list; + pg->engines_feature_list = gm20b_pmu_pg_feature_list; + pg->is_lpwr_feature_supported = NULL; + pg->lpwr_enable_pg = NULL; + pg->lpwr_disable_pg = NULL; + pg->param_post_init = NULL; + pg->save_zbc = gm20b_pmu_save_zbc; + pg->allow = gm20b_pmu_pg_elpg_allow; + pg->disallow = gm20b_pmu_pg_elpg_disallow; + pg->init = gm20b_pmu_pg_elpg_init; + pg->alloc_dmem = gm20b_pmu_pg_elpg_alloc_dmem; + pg->load_buff = gm20b_pmu_pg_elpg_load_buff; + pg->hw_load_zbc = gm20b_pmu_pg_elpg_hw_load_zbc; + pg->rpc_handler = NULL; + pg->init_send = gm20b_pmu_pg_init_send; +} diff --git a/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gm20b.h b/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gm20b.h new file mode 100644 index 000000000..51f0f959b --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gm20b.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_PMU_PG_SW_GM20B_H +#define NVGPU_PMU_PG_SW_GM20B_H + +#include + +struct gk20a; +struct pmu_pg_stats_data; + +#define ZBC_MASK(i) U16(~(~(0U) << ((i)+1U)) & 0xfffeU) + +u32 gm20b_pmu_pg_engines_list(struct gk20a *g); +u32 gm20b_pmu_pg_feature_list(struct gk20a *g, u32 pg_engine_id); +void gm20b_pmu_save_zbc(struct gk20a *g, u32 entries); +int gm20b_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, + struct pmu_pg_stats_data *pg_stat_data); +void nvgpu_gm20b_pg_sw_init(struct gk20a *g, + struct nvgpu_pmu_pg *pg); +int gm20b_pmu_pg_elpg_allow(struct gk20a *g, struct nvgpu_pmu *pmu, + u8 pg_engine_id); +int gm20b_pmu_pg_elpg_disallow(struct gk20a *g, struct nvgpu_pmu *pmu, + u8 pg_engine_id); +int gm20b_pmu_pg_elpg_init(struct gk20a *g, struct nvgpu_pmu *pmu, + u8 pg_engine_id); +int gm20b_pmu_pg_elpg_alloc_dmem(struct gk20a *g, struct nvgpu_pmu *pmu, + u8 pg_engine_id); +int gm20b_pmu_pg_elpg_load_buff(struct gk20a *g, struct nvgpu_pmu *pmu); +int gm20b_pmu_pg_elpg_hw_load_zbc(struct gk20a *g, struct nvgpu_pmu *pmu); +int gm20b_pmu_pg_init_send(struct gk20a *g, struct nvgpu_pmu *pmu, + u8 pg_engine_id); + +#endif /* NVGPU_PMU_PG_SW_GM20B_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gp106.c b/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gp106.c new file mode 100644 index 000000000..bfa3943ff --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gp106.c @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "pg_sw_gp106.h" + +static void pmu_handle_param_msg(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 status) +{ + nvgpu_log_fn(g, " "); + + if (status != 0U) { + nvgpu_err(g, "PG PARAM cmd aborted"); + return; + } + + nvgpu_pmu_dbg(g, "PG PARAM is acknowledged from PMU %x", + msg->msg.pg.msg_type); +} + +int gp106_pg_param_init(struct gk20a *g, u32 pg_engine_id) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct pmu_cmd cmd; + int status; + u64 tmp_size; + + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS) { + + status = init_rppg(g); + if (status != 0) { + nvgpu_err(g, "RPPG init Failed"); + return -1; + } + + cmd.hdr.unit_id = PMU_UNIT_PG; + tmp_size = PMU_CMD_HDR_SIZE + + sizeof(struct pmu_pg_cmd_gr_init_param); + nvgpu_assert(tmp_size <= U64(U8_MAX)); + cmd.hdr.size = U8(tmp_size); + cmd.cmd.pg.gr_init_param.cmd_type = + PMU_PG_CMD_ID_PG_PARAM; + cmd.cmd.pg.gr_init_param.sub_cmd_id = + PMU_PG_PARAM_CMD_GR_INIT_PARAM; + cmd.cmd.pg.gr_init_param.featuremask = + NVGPU_PMU_GR_FEATURE_MASK_RPPG; + + nvgpu_pmu_dbg(g, "cmd post GR PMU_PG_CMD_ID_PG_PARAM"); + nvgpu_pmu_cmd_post(g, &cmd, NULL, PMU_COMMAND_QUEUE_HPQ, + pmu_handle_param_msg, pmu); + } else if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS) { + cmd.hdr.unit_id = PMU_UNIT_PG; + tmp_size = PMU_CMD_HDR_SIZE + + sizeof(struct pmu_pg_cmd_ms_init_param); + nvgpu_assert(tmp_size <= U64(U8_MAX)); + cmd.hdr.size = U8(tmp_size); + cmd.cmd.pg.ms_init_param.cmd_type = + PMU_PG_CMD_ID_PG_PARAM; + cmd.cmd.pg.ms_init_param.cmd_id = + PMU_PG_PARAM_CMD_MS_INIT_PARAM; + cmd.cmd.pg.ms_init_param.support_mask = + NVGPU_PMU_MS_FEATURE_MASK_CLOCK_GATING | + NVGPU_PMU_MS_FEATURE_MASK_SW_ASR | + NVGPU_PMU_MS_FEATURE_MASK_RPPG | + NVGPU_PMU_MS_FEATURE_MASK_FB_TRAINING; + + nvgpu_pmu_dbg(g, "cmd post MS PMU_PG_CMD_ID_PG_PARAM"); + nvgpu_pmu_cmd_post(g, &cmd, NULL, PMU_COMMAND_QUEUE_HPQ, + pmu_handle_param_msg, pmu); + } + + return 0; +} + +int gp106_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, + struct pmu_pg_stats_data *pg_stat_data) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct pmu_pg_stats_v2 stats; + int err; + + err = nvgpu_falcon_copy_from_dmem(pmu->flcn, + pmu->pg->stat_dmem_offset[pg_engine_id], + (u8 *)&stats, (u32)sizeof(struct pmu_pg_stats_v2), 0); + if (err != 0) { + nvgpu_err(g, "PMU falcon DMEM copy failed"); + return err; + } + + pg_stat_data->ingating_time = stats.total_sleep_time_us; + pg_stat_data->ungating_time = stats.total_non_sleep_time_us; + pg_stat_data->gating_cnt = stats.entry_count; + pg_stat_data->avg_entry_latency_us = stats.entry_latency_avg_us; + pg_stat_data->avg_exit_latency_us = stats.exit_latency_avg_us; + + return err; +} + +u32 gp106_pmu_pg_engines_list(struct gk20a *g) +{ + return BIT32(PMU_PG_ELPG_ENGINE_ID_GRAPHICS) | + BIT32(PMU_PG_ELPG_ENGINE_ID_MS); +} + +u32 gp106_pmu_pg_feature_list(struct gk20a *g, u32 pg_engine_id) +{ + if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS) { + return NVGPU_PMU_GR_FEATURE_MASK_RPPG; + } + + if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS) { + return NVGPU_PMU_MS_FEATURE_MASK_ALL; + } + + return 0; +} + +bool gp106_pmu_is_lpwr_feature_supported(struct gk20a *g, u32 feature_id) +{ + return false; +} diff --git a/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gp106.h b/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gp106.h new file mode 100644 index 000000000..78a4109b4 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gp106.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_PG_SW_GP106_H +#define NVGPU_PG_SW_GP106_H + +#include + +struct gk20a; +struct pmu_pg_stats_data; + +int gp106_pg_param_init(struct gk20a *g, u32 pg_engine_id); +int gp106_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, + struct pmu_pg_stats_data *pg_stat_data); +u32 gp106_pmu_pg_engines_list(struct gk20a *g); +u32 gp106_pmu_pg_feature_list(struct gk20a *g, u32 pg_engine_id); +bool gp106_pmu_is_lpwr_feature_supported(struct gk20a *g, u32 feature_id); + +#endif /* NVGPU_PG_SW_GP106_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gp10b.c b/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gp10b.c new file mode 100644 index 000000000..f609dbffd --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gp10b.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "pg_sw_gp10b.h" +#include "pg_sw_gm20b.h" + +static void pmu_handle_gr_param_msg(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 status) +{ + nvgpu_log_fn(g, " "); + + if (status != 0U) { + nvgpu_err(g, "GR PARAM cmd aborted"); + return; + } + + nvgpu_pmu_dbg(g, "GR PARAM is acknowledged from PMU %x", + msg->msg.pg.msg_type); + + return; +} + +int gp10b_pg_gr_init(struct gk20a *g, u32 pg_engine_id) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct pmu_cmd cmd; + size_t tmp_size; + + if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS) { + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + tmp_size = PMU_CMD_HDR_SIZE + + sizeof(struct pmu_pg_cmd_gr_init_param_v2); + nvgpu_assert(tmp_size <= (size_t)U8_MAX); + cmd.hdr.size = (u8)tmp_size; + cmd.cmd.pg.gr_init_param_v2.cmd_type = + PMU_PG_CMD_ID_PG_PARAM; + cmd.cmd.pg.gr_init_param_v2.sub_cmd_id = + PMU_PG_PARAM_CMD_GR_INIT_PARAM; + cmd.cmd.pg.gr_init_param_v2.featuremask = + NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING; + cmd.cmd.pg.gr_init_param_v2.ldiv_slowdown_factor = + g->ldiv_slowdown_factor; + + nvgpu_pmu_dbg(g, "cmd post PMU_PG_CMD_ID_PG_PARAM "); + nvgpu_pmu_cmd_post(g, &cmd, NULL, PMU_COMMAND_QUEUE_HPQ, + pmu_handle_gr_param_msg, pmu); + + } else { + return -EINVAL; + } + + return 0; +} + +int gp10b_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, + struct pmu_pg_stats_data *pg_stat_data) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct pmu_pg_stats_v1 stats; + int err; + + err = nvgpu_falcon_copy_from_dmem(pmu->flcn, + pmu->pg->stat_dmem_offset[pg_engine_id], + (u8 *)&stats, (u32)sizeof(struct pmu_pg_stats_v1), 0); + if (err != 0) { + nvgpu_err(g, "PMU falcon DMEM copy failed"); + return err; + } + + pg_stat_data->ingating_time = stats.total_sleep_timeus; + pg_stat_data->ungating_time = stats.total_nonsleep_timeus; + pg_stat_data->gating_cnt = stats.entry_count; + pg_stat_data->avg_entry_latency_us = stats.entrylatency_avgus; + pg_stat_data->avg_exit_latency_us = stats.exitlatency_avgus; + + return err; +} + +void nvgpu_gp10b_pg_sw_init(struct gk20a *g, + struct nvgpu_pmu_pg *pg) +{ + pg->elpg_statistics = gp10b_pmu_elpg_statistics; + pg->init_param = gp10b_pg_gr_init; + pg->supported_engines_list = gm20b_pmu_pg_engines_list; + pg->engines_feature_list = gm20b_pmu_pg_feature_list; + pg->save_zbc = gm20b_pmu_save_zbc; + pg->allow = gm20b_pmu_pg_elpg_allow; + pg->disallow = gm20b_pmu_pg_elpg_disallow; + pg->init = gm20b_pmu_pg_elpg_init; + pg->alloc_dmem = gm20b_pmu_pg_elpg_alloc_dmem; + pg->load_buff = gm20b_pmu_pg_elpg_load_buff; + pg->hw_load_zbc = gm20b_pmu_pg_elpg_hw_load_zbc; + pg->rpc_handler = NULL; + pg->init_send = gm20b_pmu_pg_init_send; +} diff --git a/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gp10b.h b/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gp10b.h new file mode 100644 index 000000000..446e635ec --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gp10b.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_PMU_PG_SW_GP10B_H +#define NVGPU_PMU_PG_SW_GP10B_H + +#include + +struct gk20a; +struct pmu_pg_stats_data; + +int gp10b_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, + struct pmu_pg_stats_data *pg_stat_data); +int gp10b_pg_gr_init(struct gk20a *g, u32 pg_engine_id); +void nvgpu_gp10b_pg_sw_init(struct gk20a *g, + struct nvgpu_pmu_pg *pg); + +#endif /* NVGPU_PMU_PG_SW_GP10B_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gv11b.c b/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gv11b.c new file mode 100644 index 000000000..e953f8341 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gv11b.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "pg_sw_gv11b.h" +#include "pg_sw_gp106.h" +#include "pg_sw_gm20b.h" + +static void pmu_handle_pg_sub_feature_msg(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 status) +{ + nvgpu_log_fn(g, " "); + + if (status != 0U) { + nvgpu_err(g, "Sub-feature mask update cmd aborted"); + return; + } + + nvgpu_pmu_dbg(g, "sub-feature mask update is acknowledged from PMU %x", + msg->msg.pg.msg_type); +} + +static void pmu_handle_pg_param_msg(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 status) +{ + nvgpu_log_fn(g, " "); + + if (status != 0U) { + nvgpu_err(g, "GR PARAM cmd aborted"); + return; + } + + nvgpu_pmu_dbg(g, "GR PARAM is acknowledged from PMU %x", + msg->msg.pg.msg_type); +} + +int gv11b_pg_gr_init(struct gk20a *g, u32 pg_engine_id) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct pmu_cmd cmd; + size_t tmp_size; + + if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS) { + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + tmp_size = PMU_CMD_HDR_SIZE + + sizeof(struct pmu_pg_cmd_gr_init_param_v1); + nvgpu_assert(tmp_size <= (size_t)U8_MAX); + cmd.hdr.size = (u8)tmp_size; + cmd.cmd.pg.gr_init_param_v1.cmd_type = + PMU_PG_CMD_ID_PG_PARAM; + cmd.cmd.pg.gr_init_param_v1.sub_cmd_id = + PMU_PG_PARAM_CMD_GR_INIT_PARAM; + cmd.cmd.pg.gr_init_param_v1.featuremask = + NVGPU_PMU_GR_FEATURE_MASK_ALL; + + nvgpu_pmu_dbg(g, "cmd post PMU_PG_CMD_ID_PG_PARAM_INIT"); + nvgpu_pmu_cmd_post(g, &cmd, NULL, PMU_COMMAND_QUEUE_HPQ, + pmu_handle_pg_param_msg, pmu); + + } else { + return -EINVAL; + } + + return 0; +} + +int gv11b_pg_set_subfeature_mask(struct gk20a *g, u32 pg_engine_id) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct pmu_cmd cmd; + size_t tmp_size; + + if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS) { + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + cmd.hdr.unit_id = PMU_UNIT_PG; + tmp_size = PMU_CMD_HDR_SIZE + + sizeof(struct pmu_pg_cmd_sub_feature_mask_update); + nvgpu_assert(tmp_size <= (size_t)U8_MAX); + cmd.hdr.size = (u8)tmp_size; + cmd.cmd.pg.sf_mask_update.cmd_type = + PMU_PG_CMD_ID_PG_PARAM; + cmd.cmd.pg.sf_mask_update.sub_cmd_id = + PMU_PG_PARAM_CMD_SUB_FEATURE_MASK_UPDATE; + cmd.cmd.pg.sf_mask_update.ctrl_id = + PMU_PG_ELPG_ENGINE_ID_GRAPHICS; + cmd.cmd.pg.sf_mask_update.enabled_mask = + NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING | + NVGPU_PMU_GR_FEATURE_MASK_PRIV_RING | + NVGPU_PMU_GR_FEATURE_MASK_UNBIND | + NVGPU_PMU_GR_FEATURE_MASK_SAVE_GLOBAL_STATE | + NVGPU_PMU_GR_FEATURE_MASK_RESET_ENTRY | + NVGPU_PMU_GR_FEATURE_MASK_HW_SEQUENCE | + NVGPU_PMU_GR_FEATURE_MASK_ELPG_SRAM | + NVGPU_PMU_GR_FEATURE_MASK_ELPG_LOGIC | + NVGPU_PMU_GR_FEATURE_MASK_ELPG_L2RPPG; + + nvgpu_pmu_dbg(g, "cmd post PMU_PG_CMD_SUB_FEATURE_MASK_UPDATE"); + nvgpu_pmu_cmd_post(g, &cmd, NULL, PMU_COMMAND_QUEUE_HPQ, + pmu_handle_pg_sub_feature_msg, pmu); + } else { + return -EINVAL; + } + + return 0; +} + +void nvgpu_gv11b_pg_sw_init(struct gk20a *g, + struct nvgpu_pmu_pg *pg) +{ + pg->elpg_statistics = gp106_pmu_elpg_statistics; + pg->init_param = gv11b_pg_gr_init; + pg->supported_engines_list = gm20b_pmu_pg_engines_list; + pg->engines_feature_list = gm20b_pmu_pg_feature_list; + pg->set_sub_feature_mask = gv11b_pg_set_subfeature_mask; + pg->save_zbc = gm20b_pmu_save_zbc; + pg->allow = gm20b_pmu_pg_elpg_allow; + pg->disallow = gm20b_pmu_pg_elpg_disallow; + pg->init = gm20b_pmu_pg_elpg_init; + pg->alloc_dmem = gm20b_pmu_pg_elpg_alloc_dmem; + pg->load_buff = gm20b_pmu_pg_elpg_load_buff; + pg->hw_load_zbc = gm20b_pmu_pg_elpg_hw_load_zbc; + pg->rpc_handler = NULL; + pg->init_send = gm20b_pmu_pg_init_send; +} diff --git a/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gv11b.h b/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gv11b.h new file mode 100644 index 000000000..639c4323d --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pg/pg_sw_gv11b.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_PMU_PG_SW_GV11B_H +#define NVGPU_PMU_PG_SW_GV11B_H + +#include + +struct gk20a; + +int gv11b_pg_gr_init(struct gk20a *g, u32 pg_engine_id); +int gv11b_pg_set_subfeature_mask(struct gk20a *g, u32 pg_engine_id); +void nvgpu_gv11b_pg_sw_init(struct gk20a *g, struct nvgpu_pmu_pg *pg); + +#endif /* NVGPU_PMU_PG_SW_GV11B_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/pg/pmu_aelpg.c b/drivers/gpu/nvgpu/common/pmu/pg/pmu_aelpg.c new file mode 100644 index 000000000..1bfbe8db5 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pg/pmu_aelpg.c @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +int nvgpu_aelpg_init(struct gk20a *g) +{ + int status = 0; + + /* Remove reliance on app_ctrl field. */ + union pmu_ap_cmd ap_cmd; + + ap_cmd.init.cmd_id = PMU_AP_CMD_ID_INIT; + ap_cmd.init.pg_sampling_period_us = g->pmu->pg->aelpg_param[0]; + + status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false); + return status; +} + +int nvgpu_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id) +{ + struct nvgpu_pmu *pmu = g->pmu; + int status = 0; + union pmu_ap_cmd ap_cmd; + + ap_cmd.init_and_enable_ctrl.cmd_id = PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL; + ap_cmd.init_and_enable_ctrl.ctrl_id = ctrl_id; + ap_cmd.init_and_enable_ctrl.params.min_idle_filter_us = + pmu->pg->aelpg_param[1]; + ap_cmd.init_and_enable_ctrl.params.min_target_saving_us = + pmu->pg->aelpg_param[2]; + ap_cmd.init_and_enable_ctrl.params.power_break_even_us = + pmu->pg->aelpg_param[3]; + ap_cmd.init_and_enable_ctrl.params.cycles_per_sample_max = + pmu->pg->aelpg_param[4]; + + switch (ctrl_id) { + case PMU_AP_CTRL_ID_GRAPHICS: + break; + default: + nvgpu_err(g, "Invalid ctrl_id:%u for %s", ctrl_id, __func__); + break; + } + + status = nvgpu_pmu_ap_send_command(g, &ap_cmd, true); + return status; +} + +/* AELPG */ +static void ap_callback_init_and_enable_ctrl( + struct gk20a *g, struct pmu_msg *msg, + void *param, u32 status) +{ + WARN_ON(msg == NULL); + + if (status == 0U) { + switch (msg->msg.pg.ap_msg.cmn.msg_id) { + case PMU_AP_MSG_ID_INIT_ACK: + nvgpu_pmu_dbg(g, "reply PMU_AP_CMD_ID_INIT"); + break; + + default: + nvgpu_pmu_dbg(g, "%s: Invalid Adaptive Power Message: %x", + __func__, msg->msg.pg.ap_msg.cmn.msg_id); + break; + } + } +} + +/* Send an Adaptive Power (AP) related command to PMU */ +int nvgpu_pmu_ap_send_command(struct gk20a *g, + union pmu_ap_cmd *p_ap_cmd, bool b_block) +{ + struct nvgpu_pmu *pmu = g->pmu; + int status = 0; + struct pmu_cmd cmd; + pmu_callback p_callback = NULL; + u64 tmp; + + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + + /* Copy common members */ + cmd.hdr.unit_id = PMU_UNIT_PG; + tmp = PMU_CMD_HDR_SIZE + sizeof(union pmu_ap_cmd); + nvgpu_assert(tmp <= U8_MAX); + cmd.hdr.size = (u8)tmp; + + cmd.cmd.pg.ap_cmd.cmn.cmd_type = PMU_PG_CMD_ID_AP; + cmd.cmd.pg.ap_cmd.cmn.cmd_id = p_ap_cmd->cmn.cmd_id; + + /* Copy other members of command */ + switch (p_ap_cmd->cmn.cmd_id) { + case PMU_AP_CMD_ID_INIT: + nvgpu_pmu_dbg(g, "cmd post PMU_AP_CMD_ID_INIT"); + cmd.cmd.pg.ap_cmd.init.pg_sampling_period_us = + p_ap_cmd->init.pg_sampling_period_us; + break; + + case PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL: + nvgpu_pmu_dbg(g, "cmd post PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL"); + cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.ctrl_id = + p_ap_cmd->init_and_enable_ctrl.ctrl_id; + nvgpu_memcpy( + (u8 *)&(cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.params), + (u8 *)&(p_ap_cmd->init_and_enable_ctrl.params), + sizeof(struct pmu_ap_ctrl_init_params)); + + p_callback = ap_callback_init_and_enable_ctrl; + break; + + case PMU_AP_CMD_ID_ENABLE_CTRL: + nvgpu_pmu_dbg(g, "cmd post PMU_AP_CMD_ID_ENABLE_CTRL"); + cmd.cmd.pg.ap_cmd.enable_ctrl.ctrl_id = + p_ap_cmd->enable_ctrl.ctrl_id; + break; + + case PMU_AP_CMD_ID_DISABLE_CTRL: + nvgpu_pmu_dbg(g, "cmd post PMU_AP_CMD_ID_DISABLE_CTRL"); + cmd.cmd.pg.ap_cmd.disable_ctrl.ctrl_id = + p_ap_cmd->disable_ctrl.ctrl_id; + break; + + case PMU_AP_CMD_ID_KICK_CTRL: + nvgpu_pmu_dbg(g, "cmd post PMU_AP_CMD_ID_KICK_CTRL"); + cmd.cmd.pg.ap_cmd.kick_ctrl.ctrl_id = + p_ap_cmd->kick_ctrl.ctrl_id; + cmd.cmd.pg.ap_cmd.kick_ctrl.skip_count = + p_ap_cmd->kick_ctrl.skip_count; + break; + + default: + nvgpu_pmu_dbg(g, "%s: Invalid Adaptive Power command %d\n", + __func__, p_ap_cmd->cmn.cmd_id); + status = 0x2f; + break; + } + + if (status != 0) { + goto err_return; + } + + status = nvgpu_pmu_cmd_post(g, &cmd, NULL, PMU_COMMAND_QUEUE_HPQ, + p_callback, pmu); + + if (status != 0) { + nvgpu_pmu_dbg(g, + "%s: Unable to submit Adaptive Power Command %d\n", + __func__, p_ap_cmd->cmn.cmd_id); + goto err_return; + } + +err_return: + return status; +} diff --git a/drivers/gpu/nvgpu/common/pmu/pg/pmu_pg.c b/drivers/gpu/nvgpu/common/pmu/pg/pmu_pg.c new file mode 100644 index 000000000..754796e0b --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pg/pmu_pg.c @@ -0,0 +1,991 @@ +/* + * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pg_sw_gm20b.h" +#include "pg_sw_gv11b.h" +#include "pg_sw_gp10b.h" +#include "pmu_pg.h" + +#if defined(CONFIG_NVGPU_NEXT) && defined(CONFIG_NVGPU_NON_FUSA) +#include "nvgpu_next_gpuid.h" +#endif + +static bool is_pg_supported(struct gk20a *g, struct nvgpu_pmu_pg *pg) +{ + if (!g->support_ls_pmu || !g->can_elpg || pg == NULL) { + return false; + } + + return true; +} + +static int pmu_pg_setup_hw_enable_elpg(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nvgpu_pmu_pg *pg) +{ + int err = 0; + nvgpu_log_fn(g, " "); + + pg->initialized = true; + + nvgpu_pmu_fw_state_change(g, pmu, PMU_FW_STATE_STARTED, false); + + if (nvgpu_is_enabled(g, NVGPU_PMU_ZBC_SAVE)) { + /* Save zbc table after PMU is initialized. */ + pg->zbc_ready = true; + nvgpu_pmu_save_zbc(g, 0xf); + } + + if (g->elpg_enabled) { + /* Init reg with prod values*/ + if (g->ops.pmu.pmu_setup_elpg != NULL) { + g->ops.pmu.pmu_setup_elpg(g); + } + err = nvgpu_pmu_enable_elpg(g); + if (err != 0) { + nvgpu_err(g, "nvgpu_pmu_enable_elpg failed err=%d", + err); + return err; + } + } + + nvgpu_udelay(50); + + /* Enable AELPG */ + if (g->aelpg_enabled) { + err = nvgpu_aelpg_init(g); + if (err != 0) { + nvgpu_err(g, "nvgpu_aelpg_init failed err=%d", err); + return err; + } + + err = nvgpu_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS); + if (err != 0) { + nvgpu_err(g, "aelpg_init_and_enable failed err=%d", + err); + return err; + } + } + + return err; +} + +void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 status) +{ + struct nvgpu_pmu *pmu = param; + struct pmu_pg_msg_elpg_msg *elpg_msg = &msg->msg.pg.elpg_msg; + + nvgpu_log_fn(g, " "); + + if (status != 0U) { + nvgpu_err(g, "ELPG cmd aborted"); + return; + } + + switch (elpg_msg->msg) { + case PMU_PG_ELPG_MSG_INIT_ACK: + nvgpu_pmu_dbg(g, "INIT_PG is ack from PMU, eng - %d", + elpg_msg->engine_id); + break; + case PMU_PG_ELPG_MSG_ALLOW_ACK: + nvgpu_pmu_dbg(g, "ALLOW is ack from PMU, eng - %d", + elpg_msg->engine_id); + if (elpg_msg->engine_id == PMU_PG_ELPG_ENGINE_ID_MS) { + pmu->pg->mscg_transition_state = PMU_ELPG_STAT_ON; + } else { + pmu->pg->elpg_stat = PMU_ELPG_STAT_ON; + } + break; + case PMU_PG_ELPG_MSG_DISALLOW_ACK: + nvgpu_pmu_dbg(g, "DISALLOW is ack from PMU, eng - %d", + elpg_msg->engine_id); + + if (elpg_msg->engine_id == PMU_PG_ELPG_ENGINE_ID_MS) { + pmu->pg->mscg_transition_state = PMU_ELPG_STAT_OFF; + } else { + pmu->pg->elpg_stat = PMU_ELPG_STAT_OFF; + } + + if (nvgpu_pmu_get_fw_state(g, pmu) == + PMU_FW_STATE_ELPG_BOOTING) { + if (pmu->pg->engines_feature_list != NULL && + pmu->pg->engines_feature_list(g, + PMU_PG_ELPG_ENGINE_ID_GRAPHICS) != + NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) { + pmu->pg->initialized = true; + nvgpu_pmu_fw_state_change(g, pmu, PMU_FW_STATE_STARTED, + true); + NV_WRITE_ONCE(pmu->pg->mscg_stat, + PMU_MSCG_DISABLED); + /* make status visible */ + nvgpu_smp_mb(); + } else { + nvgpu_pmu_fw_state_change(g, pmu, + PMU_FW_STATE_ELPG_BOOTED, true); + } + } + break; + default: + nvgpu_err(g, + "unsupported ELPG message : 0x%04x", elpg_msg->msg); + break; + } +} + +/* PG enable/disable */ +int nvgpu_pmu_pg_global_enable(struct gk20a *g, bool enable_pg) +{ + struct nvgpu_pmu *pmu = g->pmu; + int status = 0; + + if (!is_pg_supported(g, pmu->pg)) { + return status; + } + + if (enable_pg) { + if (pmu->pg->engines_feature_list != NULL && + pmu->pg->engines_feature_list(g, + PMU_PG_ELPG_ENGINE_ID_GRAPHICS) != + NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) { + if (pmu->pg->lpwr_enable_pg != NULL) { + status = pmu->pg->lpwr_enable_pg(g, + true); + } + } else if (g->can_elpg) { + status = nvgpu_pmu_enable_elpg(g); + } + } else { + if (pmu->pg->engines_feature_list != NULL && + pmu->pg->engines_feature_list(g, + PMU_PG_ELPG_ENGINE_ID_GRAPHICS) != + NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) { + if (pmu->pg->lpwr_disable_pg != NULL) { + status = pmu->pg->lpwr_disable_pg(g, + true); + } + } else if (g->can_elpg) { + status = nvgpu_pmu_disable_elpg(g); + } + } + + return status; +} + +static int pmu_enable_elpg_locked(struct gk20a *g, u8 pg_engine_id) +{ + struct nvgpu_pmu *pmu = g->pmu; + int status; + + nvgpu_log_fn(g, " "); + + /* no need to wait ack for ELPG enable but set + * pending to sync with follow up ELPG disable + */ + if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS) { + pmu->pg->elpg_stat = PMU_ELPG_STAT_ON_PENDING; + } else if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS) { + pmu->pg->mscg_transition_state = PMU_ELPG_STAT_ON_PENDING; + } + + nvgpu_pmu_dbg(g, "cmd post PMU_PG_ELPG_CMD_ALLOW"); + if (pmu->pg->allow == NULL) { + nvgpu_err(g, "PG allow function not assigned"); + return -EINVAL; + } + status = pmu->pg->allow(g, pmu, pg_engine_id); + + if (status != 0) { + nvgpu_log_fn(g, "pmu_enable_elpg_locked FAILED err=%d", + status); + } else { + nvgpu_log_fn(g, "done"); + } + + return status; +} + +int nvgpu_pmu_enable_elpg(struct gk20a *g) +{ + struct nvgpu_pmu *pmu = g->pmu; + u8 pg_engine_id; + u32 pg_engine_id_list = 0; + + int ret = 0; + + nvgpu_log_fn(g, " "); + + if (!is_pg_supported(g, g->pmu->pg)) { + return ret; + } + + nvgpu_mutex_acquire(&pmu->pg->elpg_mutex); + + pmu->pg->elpg_refcnt++; + if (pmu->pg->elpg_refcnt <= 0) { + goto exit_unlock; + } + + /* something is not right if we end up in following code path */ + if (unlikely(pmu->pg->elpg_refcnt > 1)) { + nvgpu_warn(g, + "%s(): possible elpg refcnt mismatch. elpg refcnt=%d", + __func__, pmu->pg->elpg_refcnt); + WARN_ON(true); + } + + /* do NOT enable elpg until golden ctx is created, + * which is related with the ctx that ELPG save and restore. + */ + if (unlikely(!pmu->pg->golden_image_initialized)) { + goto exit_unlock; + } + + /* return if ELPG is already on or on_pending or off_on_pending */ + if (pmu->pg->elpg_stat != PMU_ELPG_STAT_OFF) { + goto exit_unlock; + } + + if (pmu->pg->supported_engines_list != NULL) { + pg_engine_id_list = pmu->pg->supported_engines_list(g); + } + + for (pg_engine_id = PMU_PG_ELPG_ENGINE_ID_GRAPHICS; + pg_engine_id < PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE; + pg_engine_id++) { + + if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS && + pmu->pg->mscg_stat == PMU_MSCG_DISABLED) { + continue; + } + + if ((BIT32(pg_engine_id) & pg_engine_id_list) != 0U) { + ret = pmu_enable_elpg_locked(g, pg_engine_id); + } + } + +exit_unlock: + nvgpu_mutex_release(&pmu->pg->elpg_mutex); + nvgpu_log_fn(g, "done"); + return ret; +} + +static void pmu_dump_elpg_stats(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = pmu->g; + + if (!is_pg_supported(g, pmu->pg)) { + return; + } + + /* Print PG stats */ + nvgpu_err(g, "Print PG stats"); +#ifdef CONFIG_NVGPU_FALCON_NON_FUSA + nvgpu_falcon_print_dmem(pmu->flcn, + pmu->pg->stat_dmem_offset[PMU_PG_ELPG_ENGINE_ID_GRAPHICS], + (u32)sizeof(struct pmu_pg_stats_v2)); +#endif + + /* Print ELPG stats */ + g->ops.pmu.pmu_dump_elpg_stats(pmu); +} + +int nvgpu_pmu_disable_elpg(struct gk20a *g) +{ + struct nvgpu_pmu *pmu = g->pmu; + int ret = 0; + u8 pg_engine_id; + u32 pg_engine_id_list = 0; + u32 *ptr = NULL; + + nvgpu_log_fn(g, " "); + + if (!is_pg_supported(g, pmu->pg)) { + return ret; + } + + if (pmu->pg->supported_engines_list != NULL) { + pg_engine_id_list = pmu->pg->supported_engines_list(g); + } + + nvgpu_mutex_acquire(&pmu->pg->elpg_mutex); + + pmu->pg->elpg_refcnt--; + if (pmu->pg->elpg_refcnt > 0) { + nvgpu_warn(g, + "%s(): possible elpg refcnt mismatch. elpg refcnt=%d", + __func__, pmu->pg->elpg_refcnt); + WARN_ON(true); + ret = 0; + goto exit_unlock; + } + + /* cancel off_on_pending and return */ + if (pmu->pg->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) { + pmu->pg->elpg_stat = PMU_ELPG_STAT_OFF; + ret = 0; + goto exit_reschedule; + } + /* wait if on_pending */ + else if (pmu->pg->elpg_stat == PMU_ELPG_STAT_ON_PENDING) { + + pmu_wait_message_cond(pmu, nvgpu_get_poll_timeout(g), + &pmu->pg->elpg_stat, PMU_ELPG_STAT_ON); + + if (pmu->pg->elpg_stat != PMU_ELPG_STAT_ON) { + nvgpu_err(g, "ELPG_ALLOW_ACK failed, elpg_stat=%d", + pmu->pg->elpg_stat); + pmu_dump_elpg_stats(pmu); + nvgpu_pmu_dump_falcon_stats(pmu); + ret = -EBUSY; + goto exit_unlock; + } + } + /* return if ELPG is already off */ + else if (pmu->pg->elpg_stat != PMU_ELPG_STAT_ON) { + ret = 0; + goto exit_reschedule; + } + + for (pg_engine_id = PMU_PG_ELPG_ENGINE_ID_GRAPHICS; + pg_engine_id < PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE; + pg_engine_id++) { + + if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS && + pmu->pg->mscg_stat == PMU_MSCG_DISABLED) { + continue; + } + + if ((BIT32(pg_engine_id) & pg_engine_id_list) != 0U) { + if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS) { + pmu->pg->elpg_stat = PMU_ELPG_STAT_OFF_PENDING; + } else if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS) { + pmu->pg->mscg_transition_state = + PMU_ELPG_STAT_OFF_PENDING; + } + if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS) { + ptr = &pmu->pg->elpg_stat; + } else if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS) { + ptr = &pmu->pg->mscg_transition_state; + } + + nvgpu_pmu_dbg(g, "cmd post PMU_PG_ELPG_CMD_DISALLOW"); + if (pmu->pg->disallow == NULL) { + nvgpu_err(g, + "PG disallow function not assigned"); + return -EINVAL; + } + ret = pmu->pg->disallow(g, pmu, pg_engine_id); + if (ret != 0) { + nvgpu_err(g, "PMU_PG_ELPG_CMD_DISALLOW \ + cmd post failed"); + goto exit_unlock; + } + + pmu_wait_message_cond(pmu, + nvgpu_get_poll_timeout(g), + ptr, PMU_ELPG_STAT_OFF); + if (*ptr != PMU_ELPG_STAT_OFF) { + nvgpu_err(g, "ELPG_DISALLOW_ACK failed"); + pmu_dump_elpg_stats(pmu); + nvgpu_pmu_dump_falcon_stats(pmu); + ret = -EBUSY; + goto exit_unlock; + } + } + } + +exit_reschedule: +exit_unlock: + nvgpu_mutex_release(&pmu->pg->elpg_mutex); + nvgpu_log_fn(g, "done"); + return ret; +} + +int nvgpu_pmu_reenable_elpg(struct gk20a *g) +{ + struct nvgpu_pmu *pmu = g->pmu; + int ret = 0; + + nvgpu_log_fn(g, " "); + + if (!is_pg_supported(g, pmu->pg)) { + return ret; + } + + /* If pmu enabled, re-enable by first disabling, then + * enabling. + */ + if (pmu->pg->elpg_refcnt != 0) { + ret = nvgpu_pmu_disable_elpg(g); + if (ret != 0) { + nvgpu_err(g, "failed disabling elpg"); + goto exit; + } + ret = nvgpu_pmu_enable_elpg(g); + if (ret != 0) { + nvgpu_err(g, "failed enabling elpg"); + goto exit; + } + } +exit: + return ret; +} + +/* PG init */ +void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 status) +{ + struct nvgpu_pmu *pmu = param; + + nvgpu_log_fn(g, " "); + + if (status != 0U) { + nvgpu_err(g, "ELPG cmd aborted"); + return; + } + + switch (msg->msg.pg.stat.sub_msg_id) { + case PMU_PG_STAT_MSG_RESP_DMEM_OFFSET: + nvgpu_pmu_dbg(g, "ALLOC_DMEM_OFFSET is acknowledged from PMU"); + pmu->pg->stat_dmem_offset[msg->msg.pg.stat.engine_id] = + msg->msg.pg.stat.data; + break; + default: + nvgpu_err(g, "Invalid msg id:%u", + msg->msg.pg.stat.sub_msg_id); + break; + } +} + +static int pmu_pg_init_powergating(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nvgpu_pmu_pg *pg) +{ + u8 pg_engine_id; + u32 pg_engine_id_list = 0; + int err = 0; + + nvgpu_log_fn(g, " "); + + if (pmu->pg->supported_engines_list != NULL) { + pg_engine_id_list = pmu->pg->supported_engines_list(g); + } + + g->ops.gr.init.wait_initialized(g); + + for (pg_engine_id = PMU_PG_ELPG_ENGINE_ID_GRAPHICS; + pg_engine_id < PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE; + pg_engine_id++) { + + if ((BIT32(pg_engine_id) & pg_engine_id_list) != 0U) { + if (pmu != NULL && + nvgpu_pmu_get_fw_state(g, pmu) == + PMU_FW_STATE_INIT_RECEIVED) { + nvgpu_pmu_fw_state_change(g, pmu, + PMU_FW_STATE_ELPG_BOOTING, false); + } + /* Error print handled by pmu->pg->init_send */ + err = pmu->pg->init_send(g, pmu, pg_engine_id); + if (err != 0) { + return err; + } + } + } + + if (pmu->pg->param_post_init != NULL) { + /* Error print handled by param_post_init */ + err = pmu->pg->param_post_init(g); + } + + return err; +} + +void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 status) +{ + struct nvgpu_pmu *pmu = param; + struct pmu_pg_msg_eng_buf_stat *eng_buf_stat = + &msg->msg.pg.eng_buf_stat; + + nvgpu_log_fn(g, " "); + + nvgpu_pmu_dbg(g, + "reply PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS"); + if (status != 0U) { + nvgpu_err(g, "PGENG cmd aborted"); + return; + } + + pmu->pg->buf_loaded = (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_LOADED); + if ((!pmu->pg->buf_loaded) && + (nvgpu_pmu_get_fw_state(g, pmu) == + PMU_FW_STATE_LOADING_PG_BUF)) { + nvgpu_err(g, "failed to load PGENG buffer"); + } else { + nvgpu_pmu_fw_state_change(g, pmu, + nvgpu_pmu_get_fw_state(g, pmu), true); + } +} + +static int pmu_pg_init_bind_fecs(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nvgpu_pmu_pg *pg) +{ + int err = 0; + nvgpu_log_fn(g, " "); + + nvgpu_pmu_dbg(g, + "cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS"); + nvgpu_pmu_fw_state_change(g, pmu, PMU_FW_STATE_LOADING_PG_BUF, false); + if (pmu->pg->load_buff == NULL) { + nvgpu_err(g, "PG load buffer function not assigned"); + return -EINVAL; + } + err = pmu->pg->load_buff(g, pmu); + if (err != 0) { + nvgpu_err(g, "cmd LOAD PMU_PGENG_GR_BUFFER_IDX_FECS failed\n"); + } + + return err; +} + +static int pmu_pg_setup_hw_load_zbc(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nvgpu_pmu_pg *pg) +{ + int err = 0; + + nvgpu_pmu_dbg(g, + "cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_ZBC"); + nvgpu_pmu_fw_state_change(g, pmu, PMU_FW_STATE_LOADING_ZBC, false); + if (pmu->pg->hw_load_zbc == NULL) { + nvgpu_err(g, "PG load zbc function not assigned"); + return -EINVAL; + } + err = pmu->pg->hw_load_zbc(g, pmu); + if (err != 0) { + nvgpu_err(g, "CMD LOAD PMU_PGENG_GR_BUFFER_IDX_ZBC failed\n"); + } + + return err; +} + +/* stats */ +int nvgpu_pmu_get_pg_stats(struct gk20a *g, u32 pg_engine_id, + struct pmu_pg_stats_data *pg_stat_data) +{ + struct nvgpu_pmu *pmu = g->pmu; + u32 pg_engine_id_list = 0; + int err = 0; + + if (!is_pg_supported(g, pmu->pg) || !pmu->pg->initialized) { + pg_stat_data->ingating_time = 0; + pg_stat_data->ungating_time = 0; + pg_stat_data->gating_cnt = 0; + return 0; + } + + if (pmu->pg->supported_engines_list != NULL) { + pg_engine_id_list = pmu->pg->supported_engines_list(g); + } + + if ((BIT32(pg_engine_id) & pg_engine_id_list) != 0U) { + err = nvgpu_pmu_elpg_statistics(g, pg_engine_id, pg_stat_data); + } + + return err; +} + +/* PG state machine */ +static void pmu_pg_kill_task(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nvgpu_pmu_pg *pg) +{ + struct nvgpu_timeout timeout; + int err = 0; + + /* make sure the pending operations are finished before we continue */ + if (nvgpu_thread_is_running(&pg->pg_init.state_task)) { + + /* post PMU_FW_STATE_EXIT to exit PMU state machine loop */ + nvgpu_pmu_fw_state_change(g, pmu, PMU_FW_STATE_EXIT, true); + + /* Make thread stop*/ + nvgpu_thread_stop(&pg->pg_init.state_task); + + /* wait to confirm thread stopped */ + err = nvgpu_timeout_init(g, &timeout, 1000, + NVGPU_TIMER_RETRY_TIMER); + if (err != 0) { + nvgpu_err(g, "timeout_init failed err=%d", err); + return; + } + do { + if (!nvgpu_thread_is_running(&pg->pg_init.state_task)) { + break; + } + nvgpu_udelay(2); + } while (nvgpu_timeout_expired_msg(&timeout, + "timeout - waiting PMU state machine thread stop") == 0); + /* Reset the flag for next time */ + pmu->pg->pg_init.state_destroy = false; + } else { + nvgpu_thread_join(&pg->pg_init.state_task); + } +} + +static int pmu_pg_task(void *arg) +{ + struct gk20a *g = (struct gk20a *)arg; + struct nvgpu_pmu *pmu = g->pmu; + struct nvgpu_pg_init *pg_init = &pmu->pg->pg_init; + u32 pmu_state = 0; + int err = 0; + + nvgpu_log_fn(g, "thread start"); + + while (true) { + + NVGPU_COND_WAIT_INTERRUPTIBLE(&pg_init->wq, + (pg_init->state_change == true), 0U); + + pmu->pg->pg_init.state_change = false; + pmu_state = nvgpu_pmu_get_fw_state(g, pmu); + + if (pmu->pg->pg_init.state_destroy) { + nvgpu_pmu_dbg(g, "pmu state exit"); + break; + } + + switch (pmu_state) { + case PMU_FW_STATE_INIT_RECEIVED: + nvgpu_pmu_dbg(g, "pmu starting"); + if (g->can_elpg) { + err = pmu_pg_init_powergating(g, pmu, pmu->pg); + } + break; + case PMU_FW_STATE_ELPG_BOOTED: + nvgpu_pmu_dbg(g, "elpg booted"); + err = pmu_pg_init_bind_fecs(g, pmu, pmu->pg); + break; + case PMU_FW_STATE_LOADING_PG_BUF: + nvgpu_pmu_dbg(g, "loaded pg buf"); + err = pmu_pg_setup_hw_load_zbc(g, pmu, pmu->pg); + break; + case PMU_FW_STATE_LOADING_ZBC: + nvgpu_pmu_dbg(g, "loaded zbc"); + err = pmu_pg_setup_hw_enable_elpg(g, pmu, pmu->pg); + nvgpu_pmu_dbg(g, "PMU booted"); + break; + default: + nvgpu_pmu_dbg(g, "invalid state"); + err = -EINVAL; + break; + } + + } + /* + * If an operation above failed, the error was already logged by the + * operation itself and this thread will end just like in the normal case + */ + if (err != 0) { + nvgpu_err(g, "pg_init_task failed err=%d", err); + } + + while (!nvgpu_thread_should_stop(&pg_init->state_task)) { + nvgpu_usleep_range(5000, 5100); + } + + nvgpu_log_fn(g, "thread exit"); + + return err; +} + +static int pmu_pg_task_init(struct gk20a *g, struct nvgpu_pmu_pg *pg) +{ + char thread_name[64]; + int err = 0; + + nvgpu_log_fn(g, " "); + + nvgpu_cond_init(&pg->pg_init.wq); + + (void) snprintf(thread_name, sizeof(thread_name), + "nvgpu_pg_init_%s", g->name); + + err = nvgpu_thread_create(&pg->pg_init.state_task, g, + pmu_pg_task, thread_name); + if (err != 0) { + nvgpu_err(g, "failed to start nvgpu_pg_init thread (%d)", err); + } + + return err; +} + +static int pmu_pg_init_seq_buf(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nvgpu_pmu_pg *pg) +{ + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = mm->pmu.vm; + int err; + u8 *ptr; + + err = nvgpu_dma_alloc_map_sys(vm, PMU_PG_SEQ_BUF_SIZE, + &pg->seq_buf); + if (err != 0) { + return err; + } + + ptr = (u8 *)pg->seq_buf.cpu_va; + + ptr[0] = 0x16; /* opcode EXIT */ + ptr[1] = 0; ptr[2] = 1; ptr[3] = 0; + ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0; + + pg->seq_buf.size = PMU_PG_SEQ_BUF_SIZE; + + return err; +} + +int nvgpu_pmu_pg_sw_setup(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nvgpu_pmu_pg *pg) +{ + int err; + + if (!is_pg_supported(g, pg)) { + return 0; + } + + /* start with elpg disabled until first enable call */ + pg->elpg_refcnt = 0; + + /* skip seq_buf alloc during unrailgate path */ + if (!nvgpu_mem_is_valid(&pg->seq_buf)) { + err = pmu_pg_init_seq_buf(g, pmu, pg); + if (err != 0) { + nvgpu_err(g, "failed to allocate memory"); + return err; + } + } + + if (nvgpu_thread_is_running(&pg->pg_init.state_task)) { + return 0; + } + + /* Create thread to handle PMU state machine */ + return pmu_pg_task_init(g, pg); +} + +void nvgpu_pmu_pg_destroy(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nvgpu_pmu_pg *pg) +{ + struct pmu_pg_stats_data pg_stat_data = { 0 }; + + if (!is_pg_supported(g, pg)) { + return; + } + + nvgpu_pmu_get_pg_stats(g, + PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data); + + if (nvgpu_pmu_disable_elpg(g) != 0) { + nvgpu_err(g, "failed to set disable elpg"); + } + + pg->initialized = false; + + /* update the s/w ELPG residency counters */ + g->pg_ingating_time_us += (u64)pg_stat_data.ingating_time; + g->pg_ungating_time_us += (u64)pg_stat_data.ungating_time; + g->pg_gating_cnt += pg_stat_data.gating_cnt; + + pg->zbc_ready = false; +} + +int nvgpu_pmu_pg_init(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nvgpu_pmu_pg **pg_p) +{ + struct nvgpu_pmu_pg *pg; + int err = 0; + u32 ver = g->params.gpu_arch + g->params.gpu_impl; + + if (!g->support_ls_pmu || !g->can_elpg) { + return 0; + } + + if (*pg_p != NULL) { + /* skip alloc/reinit for unrailgate sequence */ + nvgpu_pmu_dbg(g, "skip lsfm init for unrailgate sequence"); + goto exit; + } + + pg = (struct nvgpu_pmu_pg *) + nvgpu_kzalloc(g, sizeof(struct nvgpu_pmu_pg)); + if (pg == NULL) { + err = -ENOMEM; + goto exit; + } + + /* set default values to aelpg parameters */ + pg->aelpg_param[0] = APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US; + pg->aelpg_param[1] = APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US; + pg->aelpg_param[2] = APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US; + pg->aelpg_param[3] = APCTRL_POWER_BREAKEVEN_DEFAULT_US; + pg->aelpg_param[4] = APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT; + + nvgpu_mutex_init(&pg->elpg_mutex); + nvgpu_mutex_init(&pg->pg_mutex); + + *pg_p = pg; + + switch (ver) { + case GK20A_GPUID_GM20B: + case GK20A_GPUID_GM20B_B: + nvgpu_gm20b_pg_sw_init(g, *pg_p); + break; + + case NVGPU_GPUID_GP10B: + nvgpu_gp10b_pg_sw_init(g, *pg_p); + break; + + case NVGPU_GPUID_GV11B: + nvgpu_gv11b_pg_sw_init(g, *pg_p); + break; + +#if defined(CONFIG_NVGPU_NEXT) && defined(CONFIG_NVGPU_NON_FUSA) + case NVGPU_NEXT_GPUID: + nvgpu_next_pg_sw_init(g, *pg_p); + break; +#endif + + default: + nvgpu_kfree(g, *pg_p); + err = -EINVAL; + nvgpu_err(g, "no support for GPUID %x", ver); + break; + } +exit: + return err; +} + +void nvgpu_pmu_pg_deinit(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nvgpu_pmu_pg *pg) +{ + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = mm->pmu.vm; + + if (!is_pg_supported(g, pg)) { + return; + } + + pmu_pg_kill_task(g, pmu, pg); + + if (nvgpu_mem_is_valid(&pg->seq_buf)) { + nvgpu_dma_unmap_free(vm, &pg->seq_buf); + } + nvgpu_mutex_destroy(&pg->elpg_mutex); + nvgpu_mutex_destroy(&pg->pg_mutex); + nvgpu_kfree(g, pg); +} + +void nvgpu_pmu_set_golden_image_initialized(struct gk20a *g, bool initialized) +{ + struct nvgpu_pmu *pmu = g->pmu; + + if (!is_pg_supported(g, pmu->pg)) { + return; + } + + pmu->pg->golden_image_initialized = initialized; +} + +int nvgpu_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, + struct pmu_pg_stats_data *pg_stat_data) +{ + struct nvgpu_pmu *pmu = g->pmu; + + if (!is_pg_supported(g, pmu->pg)) { + return 0; + } + + return pmu->pg->elpg_statistics(g, pg_engine_id, pg_stat_data); +} + +void nvgpu_pmu_save_zbc(struct gk20a *g, u32 entries) +{ + struct nvgpu_pmu *pmu = g->pmu; + + if (!is_pg_supported(g, pmu->pg)) { + return; + } + + return pmu->pg->save_zbc(g, entries); +} + +bool nvgpu_pmu_is_lpwr_feature_supported(struct gk20a *g, u32 feature_id) +{ + struct nvgpu_pmu *pmu = g->pmu; + + if (!is_pg_supported(g, pmu->pg)) { + return false; + } + + return pmu->pg->is_lpwr_feature_supported(g, feature_id); +} + +u64 nvgpu_pmu_pg_buf_get_gpu_va(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + if (!is_pg_supported(g, pmu->pg)) { + return 0; + } + + return pmu->pg->pg_buf.gpu_va; +} + +struct nvgpu_mem *nvgpu_pmu_pg_buf(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + if (!is_pg_supported(g, pmu->pg)) { + return NULL; + } + + return &pmu->pg->pg_buf; +} + +void *nvgpu_pmu_pg_buf_get_cpu_va(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + if (!is_pg_supported(g, pmu->pg)) { + return NULL; + } + + return pmu->pg->pg_buf.cpu_va; +} diff --git a/drivers/gpu/nvgpu/common/pmu/pg/pmu_pg.h b/drivers/gpu/nvgpu/common/pmu/pg/pmu_pg.h new file mode 100644 index 000000000..8b5a7dcea --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pg/pmu_pg.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef PMU_PG_PMU_PG_H +#define PMU_PG_PMU_PG_H + +/* state transition : + * OFF => [OFF_ON_PENDING optional] => ON_PENDING => ON => OFF + * ON => OFF is always synchronized + */ +/* elpg is off */ +#define PMU_ELPG_STAT_OFF 0U +/* elpg is on */ +#define PMU_ELPG_STAT_ON 1U +/* elpg is off, ALLOW cmd has been sent, wait for ack */ +#define PMU_ELPG_STAT_ON_PENDING 2U +/* elpg is on, DISALLOW cmd has been sent, wait for ack */ +#define PMU_ELPG_STAT_OFF_PENDING 3U +/* elpg is off, caller has requested on, but ALLOW + * cmd hasn't been sent due to ENABLE_ALLOW delay + */ +#define PMU_ELPG_STAT_OFF_ON_PENDING 4U + +#define PMU_PGENG_GR_BUFFER_IDX_INIT 0U +#define PMU_PGENG_GR_BUFFER_IDX_ZBC 1U +#define PMU_PGENG_GR_BUFFER_IDX_FECS 2U + +void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 status); +void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 status); +void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 status); + +#endif /* PMU_PG_PMU_PG_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/pmgr/pmgr.c b/drivers/gpu/nvgpu/common/pmu/pmgr/pmgr.c new file mode 100644 index 000000000..78748513e --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pmgr/pmgr.c @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include "pmgr.h" +#include "pwrdev.h" +#include "pmgrpmu.h" + +int pmgr_pwr_devices_get_power(struct gk20a *g, u32 *val) +{ + struct nv_pmu_pmgr_pwr_devices_query_payload payload; + int status; + + status = pmgr_pmu_pwr_devices_query_blocking(g, 1, &payload); + if (status != 0) { + nvgpu_err(g, "pmgr_pwr_devices_get_current_power failed %x", + status); + } + + *val = payload.devices[0].powerm_w; + + return status; +} + +int pmgr_pwr_devices_get_current(struct gk20a *g, u32 *val) +{ + struct nv_pmu_pmgr_pwr_devices_query_payload payload; + int status; + + status = pmgr_pmu_pwr_devices_query_blocking(g, 1, &payload); + if (status != 0) { + nvgpu_err(g, "pmgr_pwr_devices_get_current failed %x", + status); + } + + *val = payload.devices[0].currentm_a; + + return status; +} + +int pmgr_pwr_devices_get_voltage(struct gk20a *g, u32 *val) +{ + struct nv_pmu_pmgr_pwr_devices_query_payload payload; + int status; + + status = pmgr_pmu_pwr_devices_query_blocking(g, 1, &payload); + if (status != 0) { + nvgpu_err(g, "pmgr_pwr_devices_get_current_voltage failed %x", + status); + } + + *val = payload.devices[0].voltageu_v; + + return status; +} + +int pmgr_domain_sw_setup(struct gk20a *g) +{ + int status; + + status = pmgr_device_sw_setup(g); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for pmgr devices, status - 0x%x", + status); + goto exit; + } + + status = pmgr_monitor_sw_setup(g); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for pmgr monitor, status - 0x%x", + status); + goto exit; + } + + status = pmgr_policy_sw_setup(g); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for pmgr policy, status - 0x%x", + status); + goto exit; + } + +exit: + return status; +} + +int pmgr_domain_pmu_setup(struct gk20a *g) +{ + return pmgr_send_pmgr_tables_to_pmu(g); +} + +int pmgr_pmu_init_pmupstate(struct gk20a *g) +{ + /* If already allocated, do not re-allocate */ + if (g->pmgr_pmu != NULL) { + return 0; + } + + g->pmgr_pmu = nvgpu_kzalloc(g, sizeof(*g->pmgr_pmu)); + if (g->pmgr_pmu == NULL) { + return -ENOMEM; + } + + return 0; +} + +void pmgr_pmu_free_pmupstate(struct gk20a *g) +{ + nvgpu_kfree(g, g->pmgr_pmu); + g->pmgr_pmu = NULL; +} diff --git a/drivers/gpu/nvgpu/common/pmu/pmgr/pmgr.h b/drivers/gpu/nvgpu/common/pmu/pmgr/pmgr.h new file mode 100644 index 000000000..2d62ffc40 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pmgr/pmgr.h @@ -0,0 +1,37 @@ +/* + * general power device structures & definitions + * + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_PMGR_PMGR_H +#define NVGPU_PMGR_PMGR_H + +#include "pwrdev.h" +#include "pwrmonitor.h" +#include "pwrpolicy.h" + +struct pmgr_pmupstate { + struct pwr_devices pmgr_deviceobjs; + struct pmgr_pwr_monitor pmgr_monitorobjs; + struct pmgr_pwr_policy pmgr_policyobjs; +}; + +#endif /* NVGPU_PMGR_PMGR_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/pmgr/pmgrpmu.c b/drivers/gpu/nvgpu/common/pmu/pmgr/pmgrpmu.c new file mode 100644 index 000000000..f4586ec64 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pmgr/pmgrpmu.c @@ -0,0 +1,560 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pwrdev.h" +#include "pmgr.h" +#include "pmgrpmu.h" + +struct pmgr_pmucmdhandler_params { + u32 success; +}; + +static void pmgr_pmucmdhandler(struct gk20a *g, struct pmu_msg *msg, + void *param, u32 status) +{ + struct pmgr_pmucmdhandler_params *phandlerparams = + (struct pmgr_pmucmdhandler_params *)param; + + if ((msg->msg.pmgr.msg_type != NV_PMU_PMGR_MSG_ID_SET_OBJECT) && + (msg->msg.pmgr.msg_type != NV_PMU_PMGR_MSG_ID_QUERY) && + (msg->msg.pmgr.msg_type != NV_PMU_PMGR_MSG_ID_LOAD)) { + nvgpu_err(g, "unknown msg %x", msg->msg.pmgr.msg_type); + return; + } + + if (msg->msg.pmgr.msg_type == NV_PMU_PMGR_MSG_ID_SET_OBJECT) { + if (!msg->msg.pmgr.set_object.b_success || + (msg->msg.pmgr.set_object.flcnstatus != 0U)) { + nvgpu_err(g, "pmgr msg failed %x %x %x %x", + msg->msg.pmgr.set_object.msg_type, + msg->msg.pmgr.set_object.b_success, + msg->msg.pmgr.set_object.flcnstatus, + msg->msg.pmgr.set_object.object_type); + return; + } + } else if (msg->msg.pmgr.msg_type == NV_PMU_PMGR_MSG_ID_QUERY) { + if (!msg->msg.pmgr.query.b_success || + (msg->msg.pmgr.query.flcnstatus != 0U)) { + nvgpu_err(g, "pmgr msg failed %x %x %x %x", + msg->msg.pmgr.query.msg_type, + msg->msg.pmgr.query.b_success, + msg->msg.pmgr.query.flcnstatus, + msg->msg.pmgr.query.cmd_type); + return; + } + } else if (msg->msg.pmgr.msg_type == NV_PMU_PMGR_MSG_ID_LOAD) { + if (!msg->msg.pmgr.query.b_success || + (msg->msg.pmgr.query.flcnstatus != 0U)) { + nvgpu_err(g, "pmgr msg failed %x %x %x", + msg->msg.pmgr.load.msg_type, + msg->msg.pmgr.load.b_success, + msg->msg.pmgr.load.flcnstatus); + return; + } + } else { + /*Will not hit this case due to check in the beginning itself*/ + nvgpu_err(g, "unknown msg %x", msg->msg.pmgr.msg_type); + } + + phandlerparams->success = 1; +} + +static int pmgr_pmu_set_object(struct gk20a *g, + u8 type, + u16 dmem_size, + u16 fb_size, + void *pobj) +{ + struct pmu_cmd cmd; + struct pmu_payload payload; + struct nv_pmu_pmgr_cmd_set_object *pcmd; + int status; + struct pmgr_pmucmdhandler_params handlerparams; + + (void) memset(&payload, 0, sizeof(struct pmu_payload)); + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + (void) memset(&handlerparams, 0, + sizeof(struct pmgr_pmucmdhandler_params)); + + cmd.hdr.unit_id = PMU_UNIT_PMGR; + cmd.hdr.size = (u32)sizeof(struct nv_pmu_pmgr_cmd_set_object) + + (u32)sizeof(struct pmu_hdr);; + + pcmd = &cmd.cmd.pmgr.set_object; + pcmd->cmd_type = NV_PMU_PMGR_CMD_ID_SET_OBJECT; + pcmd->object_type = type; + + payload.in.buf = pobj; + payload.in.size = dmem_size; + payload.in.fb_size = fb_size; + payload.in.offset = NV_PMU_PMGR_SET_OBJECT_ALLOC_OFFSET; + + /* Setup the handler params to communicate back results.*/ + handlerparams.success = 0; + + status = nvgpu_pmu_cmd_post(g, &cmd, &payload, + PMU_COMMAND_QUEUE_LPQ, + pmgr_pmucmdhandler, + (void *)&handlerparams); + if (status != 0) { + nvgpu_err(g, + "unable to post pmgr cmd for unit %x cmd id %x obj type %x", + cmd.hdr.unit_id, pcmd->cmd_type, pcmd->object_type); + goto exit; + } + + pmu_wait_message_cond(g->pmu, + nvgpu_get_poll_timeout(g), + &handlerparams.success, 1); + + if (handlerparams.success == 0U) { + nvgpu_err(g, "could not process cmd"); + status = -ETIMEDOUT; + goto exit; + } + +exit: + return status; +} + +static int pmgr_send_i2c_device_topology_to_pmu(struct gk20a *g) +{ + struct nv_pmu_pmgr_i2c_device_desc_table i2c_desc_table; + u32 idx = g->ina3221_dcb_index; + int status = 0; + + /* INA3221 I2C device info */ + i2c_desc_table.dev_mask = BIT32(idx); + + /* INA3221 */ + i2c_desc_table.devices[idx].super.type = 0x4E; + + nvgpu_assert(idx < NV_PMU_PMGR_I2C_DEVICE_DESC_TABLE_MAX_DEVICES); + i2c_desc_table.devices[idx].dcb_index = (u8)idx; + nvgpu_assert(g->ina3221_i2c_address < (u32)U16_MAX); + i2c_desc_table.devices[idx].i2c_address = (u16)g->ina3221_i2c_address; + i2c_desc_table.devices[idx].i2c_flags = 0xC2F; + nvgpu_assert(g->ina3221_i2c_port <= (u32)U8_MAX); + i2c_desc_table.devices[idx].i2c_port = (u8)g->ina3221_i2c_port; + + /* Pass the table down the PMU as an object */ + status = pmgr_pmu_set_object( + g, + NV_PMU_PMGR_OBJECT_I2C_DEVICE_DESC_TABLE, + (u16)sizeof(struct nv_pmu_pmgr_i2c_device_desc_table), + PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED, + &i2c_desc_table); + + if (status != 0) { + nvgpu_err(g, "pmgr_pmu_set_object failed %x", + status); + } + + return status; +} + +static int pmgr_send_pwr_device_topology_to_pmu(struct gk20a *g) +{ + struct nv_pmu_pmgr_pwr_device_desc_table *pwr_desc_table; + struct nv_pmu_pmgr_pwr_device_desc_table_header *ppwr_desc_header; + int status = 0; + + /* Set the BA-device-independent HW information */ + pwr_desc_table = nvgpu_kzalloc(g, sizeof(*pwr_desc_table)); + if (pwr_desc_table == NULL) { + return -ENOMEM; + } + + ppwr_desc_header = &(pwr_desc_table->hdr.data); + ppwr_desc_header->ba_info.b_initialized_and_used = false; + + /* populate the table */ + nvgpu_boardobjgrp_e32_hdr_set((struct nv_pmu_boardobjgrp *) + &ppwr_desc_header->super, + g->pmgr_pmu->pmgr_deviceobjs.super.super.objmask); + + status = nvgpu_boardobjgrp_pmu_data_init_legacy(g, + &g->pmgr_pmu->pmgr_deviceobjs.super.super, + (struct nv_pmu_boardobjgrp_super *)pwr_desc_table); + + if (status != 0) { + nvgpu_err(g, "boardobjgrp_pmudatainit_legacy failed %x", + status); + goto exit; + } + + /* Pass the table down the PMU as an object */ + status = pmgr_pmu_set_object( + g, + NV_PMU_PMGR_OBJECT_PWR_DEVICE_DESC_TABLE, + (u16)sizeof( + union nv_pmu_pmgr_pwr_device_dmem_size), + (u16)sizeof(struct nv_pmu_pmgr_pwr_device_desc_table), + pwr_desc_table); + + if (status != 0) { + nvgpu_err(g, "pmgr_pmu_set_object failed %x", + status); + } + +exit: + nvgpu_kfree(g, pwr_desc_table); + return status; +} + +static int pmgr_send_pwr_mointer_to_pmu(struct gk20a *g) +{ + struct nv_pmu_pmgr_pwr_monitor_pack *pwr_monitor_pack = NULL; + struct nv_pmu_pmgr_pwr_channel_header *pwr_channel_hdr; + struct nv_pmu_pmgr_pwr_chrelationship_header *pwr_chrelationship_header; + u32 max_dmem_size; + int status = 0; + + pwr_monitor_pack = nvgpu_kzalloc(g, sizeof(*pwr_monitor_pack)); + if (pwr_monitor_pack == NULL) { + return -ENOMEM; + } + + /* Copy all the global settings from the RM copy */ + pwr_channel_hdr = &(pwr_monitor_pack->channels.hdr.data); + *pwr_monitor_pack = g->pmgr_pmu->pmgr_monitorobjs.pmu_data; + + nvgpu_boardobjgrp_e32_hdr_set( + (struct nv_pmu_boardobjgrp *)&pwr_channel_hdr->super, + g->pmgr_pmu->pmgr_monitorobjs. + pwr_channels.super.objmask); + + /* Copy in each channel */ + status = nvgpu_boardobjgrp_pmu_data_init_legacy(g,&g->pmgr_pmu-> + pmgr_monitorobjs.pwr_channels.super, + (struct nv_pmu_boardobjgrp_super *)& + (pwr_monitor_pack->channels)); + + if (status != 0) { + nvgpu_err(g, "boardobjgrp_pmudatainit_legacy failed %x", + status); + goto exit; + } + + /* Copy in each channel relationship */ + pwr_chrelationship_header = &(pwr_monitor_pack->ch_rels.hdr.data); + + nvgpu_boardobjgrp_e32_hdr_set((struct nv_pmu_boardobjgrp *) + &pwr_chrelationship_header->super, + g->pmgr_pmu->pmgr_monitorobjs. + pwr_ch_rels.super.objmask); + + pwr_channel_hdr->physical_channel_mask = + g->pmgr_pmu->pmgr_monitorobjs.physical_channel_mask; + pwr_channel_hdr->type = NV_PMU_PMGR_PWR_MONITOR_TYPE_NO_POLLING; + + status = nvgpu_boardobjgrp_pmu_data_init_legacy(g, + &g->pmgr_pmu->pmgr_monitorobjs.pwr_ch_rels.super, + (struct nv_pmu_boardobjgrp_super *)& + (pwr_monitor_pack->ch_rels)); + + if (status != 0) { + nvgpu_err(g, "boardobjgrp_pmudatainit_legacy failed %x", + status); + goto exit; + } + + /* Calculate the max Dmem buffer size */ + max_dmem_size = (u32)sizeof(union nv_pmu_pmgr_pwr_monitor_dmem_size); + + /* Pass the table down the PMU as an object */ + status = pmgr_pmu_set_object( + g, + NV_PMU_PMGR_OBJECT_PWR_MONITOR, + (u16)max_dmem_size, + (u16)sizeof(struct nv_pmu_pmgr_pwr_monitor_pack), + pwr_monitor_pack); + + if (status != 0) { + nvgpu_err(g, "pmgr_pmu_set_object failed %x", + status); + } + +exit: + nvgpu_kfree(g, pwr_monitor_pack); + return status; +} + +static int pmgr_send_pwr_policy_to_pmu(struct gk20a *g) +{ + struct nv_pmu_pmgr_pwr_policy_pack *ppwrpack = NULL; + struct pwr_policy *ppolicy = NULL; + int status = 0; + u8 indx; + u32 max_dmem_size; + + ppwrpack = nvgpu_kzalloc(g, sizeof(struct nv_pmu_pmgr_pwr_policy_pack)); + if (ppwrpack == NULL) { + nvgpu_err(g, "pwr policy alloc failed %x", + status); + status = -ENOMEM; + goto exit; + } + + ppwrpack->policies.hdr.data.version = g->pmgr_pmu->pmgr_policyobjs.version; + ppwrpack->policies.hdr.data.b_enabled = g->pmgr_pmu->pmgr_policyobjs.b_enabled; + + nvgpu_boardobjgrp_e32_hdr_set((struct nv_pmu_boardobjgrp *) + &ppwrpack->policies.hdr.data.super, g->pmgr_pmu-> + pmgr_policyobjs.pwr_policies.super.objmask); + + (void) memset(&ppwrpack->policies.hdr.data.reserved_pmu_policy_mask, + 0, + sizeof(ppwrpack->policies.hdr.data.reserved_pmu_policy_mask)); + + ppwrpack->policies.hdr.data.base_sample_period = + g->pmgr_pmu->pmgr_policyobjs.base_sample_period; + ppwrpack->policies.hdr.data.min_client_sample_period = + g->pmgr_pmu->pmgr_policyobjs.min_client_sample_period; + ppwrpack->policies.hdr.data.low_sampling_mult = + g->pmgr_pmu->pmgr_policyobjs.low_sampling_mult; + + nvgpu_memcpy((u8 *)&ppwrpack->policies.hdr.data.global_ceiling, + (u8 *)&g->pmgr_pmu->pmgr_policyobjs.global_ceiling, + sizeof(struct nv_pmu_perf_domain_group_limits)); + + nvgpu_memcpy((u8 *)&ppwrpack->policies.hdr.data.semantic_policy_tbl, + (u8 *)&g->pmgr_pmu->pmgr_policyobjs.policy_idxs, + sizeof(g->pmgr_pmu->pmgr_policyobjs.policy_idxs)); + + BOARDOBJGRP_FOR_EACH_INDEX_IN_MASK(32, indx, + ppwrpack->policies.hdr.data.super.obj_mask.super.data[0]) { + ppolicy = PMGR_GET_PWR_POLICY(g, indx); + + status = ((struct pmu_board_obj *)(void *)ppolicy)->pmudatainit( + g, (struct pmu_board_obj *)ppolicy, + (struct nv_pmu_boardobj *)&(ppwrpack->policies.policies[indx].data)); + if (status != 0) { + nvgpu_err(g, "pmudatainit failed %x indx %x", + status, indx); + status = -ENOMEM; + goto exit; + } + } + BOARDOBJGRP_FOR_EACH_INDEX_IN_MASK_END; + + nvgpu_boardobjgrp_e32_hdr_set((struct nv_pmu_boardobjgrp *) + &ppwrpack->policy_rels.hdr.data.super, + g->pmgr_pmu->pmgr_policyobjs. + pwr_policy_rels.super.objmask); + + nvgpu_boardobjgrp_e32_hdr_set((struct nv_pmu_boardobjgrp *) + &ppwrpack->violations.hdr.data.super, + g->pmgr_pmu->pmgr_policyobjs. + pwr_violations.super.objmask); + + max_dmem_size = (u32)sizeof(union nv_pmu_pmgr_pwr_policy_dmem_size); + + /* Pass the table down the PMU as an object */ + status = pmgr_pmu_set_object( + g, + NV_PMU_PMGR_OBJECT_PWR_POLICY, + (u16)max_dmem_size, + (u16)sizeof(struct nv_pmu_pmgr_pwr_policy_pack), + ppwrpack); + + if (status != 0) { + nvgpu_err(g, "pmgr_pmu_set_object failed %x", + status); + } + +exit: + if (ppwrpack != NULL) { + nvgpu_kfree(g, ppwrpack); + } + + return status; +} + +int pmgr_pmu_pwr_devices_query_blocking( + struct gk20a *g, + u32 pwr_dev_mask, + struct nv_pmu_pmgr_pwr_devices_query_payload *ppayload) +{ + struct pmu_cmd cmd; + struct pmu_payload payload; + struct nv_pmu_pmgr_cmd_pwr_devices_query *pcmd; + int status; + struct pmgr_pmucmdhandler_params handlerparams; + + (void) memset(&payload, 0, sizeof(struct pmu_payload)); + (void) memset(&cmd, 0, sizeof(struct pmu_cmd)); + (void) memset(&handlerparams, 0, + sizeof(struct pmgr_pmucmdhandler_params)); + + cmd.hdr.unit_id = PMU_UNIT_PMGR; + cmd.hdr.size = (u32)sizeof(struct nv_pmu_pmgr_cmd_pwr_devices_query) + + (u32)sizeof(struct pmu_hdr); + + pcmd = &cmd.cmd.pmgr.pwr_dev_query; + pcmd->cmd_type = NV_PMU_PMGR_CMD_ID_PWR_DEVICES_QUERY; + pcmd->dev_mask = pwr_dev_mask; + + payload.out.buf = ppayload; + payload.out.size = (u32)sizeof(struct nv_pmu_pmgr_pwr_devices_query_payload); + payload.out.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED; + payload.out.offset = NV_PMU_PMGR_PWR_DEVICES_QUERY_ALLOC_OFFSET; + + /* Setup the handler params to communicate back results.*/ + handlerparams.success = 0; + + status = nvgpu_pmu_cmd_post(g, &cmd, &payload, + PMU_COMMAND_QUEUE_LPQ, + pmgr_pmucmdhandler, + (void *)&handlerparams); + if (status != 0) { + nvgpu_err(g, + "unable to post pmgr query cmd for unit %x cmd id %x dev mask %x", + cmd.hdr.unit_id, pcmd->cmd_type, pcmd->dev_mask); + goto exit; + } + + pmu_wait_message_cond(g->pmu, + nvgpu_get_poll_timeout(g), + &handlerparams.success, 1); + + if (handlerparams.success == 0U) { + nvgpu_err(g, "could not process cmd"); + status = -ETIMEDOUT; + goto exit; + } + +exit: + return status; +} + +static int pmgr_pmu_load_blocking(struct gk20a *g) +{ + struct pmu_cmd cmd = { {0} }; + struct nv_pmu_pmgr_cmd_load *pcmd; + int status; + struct pmgr_pmucmdhandler_params handlerparams = {0}; + + cmd.hdr.unit_id = PMU_UNIT_PMGR; + cmd.hdr.size = (u32)sizeof(struct nv_pmu_pmgr_cmd_load) + + (u32)sizeof(struct pmu_hdr); + + pcmd = &cmd.cmd.pmgr.load; + pcmd->cmd_type = NV_PMU_PMGR_CMD_ID_LOAD; + + /* Setup the handler params to communicate back results.*/ + handlerparams.success = 0; + + status = nvgpu_pmu_cmd_post(g, &cmd, NULL, + PMU_COMMAND_QUEUE_LPQ, + pmgr_pmucmdhandler, + (void *)&handlerparams); + if (status != 0) { + nvgpu_err(g, + "unable to post pmgr load cmd for unit %x cmd id %x", + cmd.hdr.unit_id, pcmd->cmd_type); + goto exit; + } + + pmu_wait_message_cond(g->pmu, + nvgpu_get_poll_timeout(g), + &handlerparams.success, 1); + + if (handlerparams.success == 0U) { + nvgpu_err(g, "could not process cmd"); + status = -ETIMEDOUT; + goto exit; + } + +exit: + return status; +} + +int pmgr_send_pmgr_tables_to_pmu(struct gk20a *g) +{ + int status = 0; + + status = pmgr_send_i2c_device_topology_to_pmu(g); + + if (status != 0) { + nvgpu_err(g, + "pmgr_send_i2c_device_topology_to_pmu failed %x", + status); + goto exit; + } + + if (!BOARDOBJGRP_IS_EMPTY(&g->pmgr_pmu->pmgr_deviceobjs.super.super)) { + status = pmgr_send_pwr_device_topology_to_pmu(g); + if (status != 0) { + nvgpu_err(g, + "pmgr_send_pwr_device_topology_to_pmu failed %x", + status); + goto exit; + } + } + + if (!(BOARDOBJGRP_IS_EMPTY( + &g->pmgr_pmu->pmgr_monitorobjs.pwr_channels.super)) || + !(BOARDOBJGRP_IS_EMPTY( + &g->pmgr_pmu->pmgr_monitorobjs.pwr_ch_rels.super))) { + status = pmgr_send_pwr_mointer_to_pmu(g); + if (status != 0) { + nvgpu_err(g, + "pmgr_send_pwr_mointer_to_pmu failed %x", status); + goto exit; + } + } + + if (!(BOARDOBJGRP_IS_EMPTY( + &g->pmgr_pmu->pmgr_policyobjs.pwr_policies.super)) || + !(BOARDOBJGRP_IS_EMPTY( + &g->pmgr_pmu->pmgr_policyobjs.pwr_policy_rels.super)) || + !(BOARDOBJGRP_IS_EMPTY( + &g->pmgr_pmu->pmgr_policyobjs.pwr_violations.super))) { + status = pmgr_send_pwr_policy_to_pmu(g); + if (status != 0) { + nvgpu_err(g, + "pmgr_send_pwr_policy_to_pmu failed %x", status); + goto exit; + } + } + + status = pmgr_pmu_load_blocking(g); + if (status != 0) { + nvgpu_err(g, + "pmgr_send_pwr_mointer_to_pmu failed %x", status); + goto exit; + } + +exit: + return status; +} diff --git a/drivers/gpu/nvgpu/common/pmu/pmgr/pmgrpmu.h b/drivers/gpu/nvgpu/common/pmu/pmgr/pmgrpmu.h new file mode 100644 index 000000000..5c1618bd4 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pmgr/pmgrpmu.h @@ -0,0 +1,39 @@ +/* + * general power device control structures & definitions + * + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_PMGRPMU_H +#define NVGPU_PMGRPMU_H + +#include + +#include "pwrdev.h" +#include "pwrmonitor.h" + +int pmgr_send_pmgr_tables_to_pmu(struct gk20a *g); + +int pmgr_pmu_pwr_devices_query_blocking( + struct gk20a *g, + u32 pwr_dev_mask, + struct nv_pmu_pmgr_pwr_devices_query_payload *ppayload); + +#endif /* NVGPU_PMGRPMU_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/pmgr/pwrdev.c b/drivers/gpu/nvgpu/common/pmu/pmgr/pwrdev.c new file mode 100644 index 000000000..a770e3c43 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pmgr/pwrdev.c @@ -0,0 +1,331 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "pwrdev.h" +#include "pmgr.h" + +static int _pwr_device_pmudata_instget(struct gk20a *g, + struct nv_pmu_boardobjgrp *pmuboardobjgrp, + struct nv_pmu_boardobj **pmu_obj, + u8 idx) +{ + struct nv_pmu_pmgr_pwr_device_desc_table *ppmgrdevice = + (struct nv_pmu_pmgr_pwr_device_desc_table *)pmuboardobjgrp; + + nvgpu_log_info(g, " "); + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (((u32)BIT(idx) & + ppmgrdevice->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *pmu_obj = (struct nv_pmu_boardobj *) + &ppmgrdevice->devices[idx].data.obj; + + nvgpu_log_info(g, " Done"); + + return 0; +} + +static int _pwr_domains_pmudatainit_ina3221(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + struct nv_pmu_pmgr_pwr_device_desc_ina3221 *ina3221_desc; + struct pwr_device_ina3221 *ina3221; + int status = 0; + u32 indx; + + status = pmu_board_obj_pmu_data_init_super(g, obj, pmu_obj); + if (status != 0) { + nvgpu_err(g, + "error updating pmu boardobjgrp for pwr domain 0x%x", + status); + goto done; + } + + ina3221 = (struct pwr_device_ina3221 *)(void *)obj; + ina3221_desc = (struct nv_pmu_pmgr_pwr_device_desc_ina3221 *) + (void *) pmu_obj; + + ina3221_desc->super.power_corr_factor = ina3221->super.power_corr_factor; + ina3221_desc->i2c_dev_idx = ina3221->super.i2c_dev_idx; + ina3221_desc->configuration = ina3221->configuration; + ina3221_desc->mask_enable = ina3221->mask_enable; + /* configure NV_PMU_THERM_EVENT_EXT_OVERT */ + ina3221_desc->event_mask = BIT32(0); + ina3221_desc->curr_correct_m = ina3221->curr_correct_m; + ina3221_desc->curr_correct_b = ina3221->curr_correct_b; + + for (indx = 0; indx < NV_PMU_PMGR_PWR_DEVICE_INA3221_CH_NUM; indx++) { + ina3221_desc->r_shuntm_ohm[indx] = ina3221->r_shuntm_ohm[indx]; + } + +done: + return status; +} + +static struct pmu_board_obj *construct_pwr_device(struct gk20a *g, + void *pargs, size_t pargs_size, u8 type) +{ + struct pmu_board_obj *obj = NULL; + int status; + u32 indx; + struct pwr_device_ina3221 *pwrdev; + struct pwr_device_ina3221 *ina3221 = (struct pwr_device_ina3221*)pargs; + + pwrdev = nvgpu_kzalloc(g, pargs_size); + if (pwrdev == NULL) { + return NULL; + } + obj = (struct pmu_board_obj *)(void *)pwrdev; + + status = pmu_board_obj_construct_super(g, obj, pargs); + if (status != 0) { + return NULL; + } + + obj = (struct pmu_board_obj *)(void *)pwrdev; + /* Set Super class interfaces */ + obj->pmudatainit = _pwr_domains_pmudatainit_ina3221; + + pwrdev = (struct pwr_device_ina3221 *)(void *)obj; + + pwrdev->super.power_rail = ina3221->super.power_rail; + pwrdev->super.i2c_dev_idx = ina3221->super.i2c_dev_idx; + pwrdev->super.power_corr_factor = BIT32(12); + pwrdev->super.bIs_inforom_config = false; + + /* Set INA3221-specific information */ + pwrdev->configuration = ina3221->configuration; + pwrdev->mask_enable = ina3221->mask_enable; + pwrdev->gpio_function = ina3221->gpio_function; + pwrdev->curr_correct_m = ina3221->curr_correct_m; + pwrdev->curr_correct_b = ina3221->curr_correct_b; + + for (indx = 0; indx < NV_PMU_PMGR_PWR_DEVICE_INA3221_CH_NUM; indx++) { + pwrdev->r_shuntm_ohm[indx] = ina3221->r_shuntm_ohm[indx]; + } + + nvgpu_log_info(g, " Done"); + + return obj; +} + +static int devinit_get_pwr_device_table(struct gk20a *g, + struct pwr_devices *ppwrdeviceobjs) +{ + int status = 0; + u8 *pwr_device_table_ptr = NULL; + u8 *curr_pwr_device_table_ptr = NULL; + struct pmu_board_obj *obj_tmp; + struct pwr_sensors_2x_header pwr_sensor_table_header = { 0 }; + struct pwr_sensors_2x_entry pwr_sensor_table_entry = { 0 }; + u32 index; + u32 obj_index = 0; + size_t pwr_device_size; + union { + struct pmu_board_obj obj; + struct pwr_device pwrdev; + struct pwr_device_ina3221 ina3221; + } pwr_device_data; + + nvgpu_log_info(g, " "); + + pwr_device_table_ptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_PERF_TOKEN), + POWER_SENSORS_TABLE); + if (pwr_device_table_ptr == NULL) { + status = -EINVAL; + goto done; + } + + nvgpu_memcpy((u8 *)&pwr_sensor_table_header, pwr_device_table_ptr, + VBIOS_POWER_SENSORS_2X_HEADER_SIZE_08); + + if (pwr_sensor_table_header.version != + VBIOS_POWER_SENSORS_VERSION_2X) { + status = -EINVAL; + goto done; + } + + if (pwr_sensor_table_header.header_size < + VBIOS_POWER_SENSORS_2X_HEADER_SIZE_08) { + status = -EINVAL; + goto done; + } + + if (pwr_sensor_table_header.table_entry_size != + VBIOS_POWER_SENSORS_2X_ENTRY_SIZE_15) { + status = -EINVAL; + goto done; + } + + curr_pwr_device_table_ptr = (pwr_device_table_ptr + + VBIOS_POWER_SENSORS_2X_HEADER_SIZE_08); + + for (index = 0; index < pwr_sensor_table_header.num_table_entries; index++) { + bool use_fxp8_8 = false; + u8 i2c_dev_idx; + u8 device_type; + + curr_pwr_device_table_ptr += (pwr_sensor_table_header.table_entry_size * index); + + pwr_sensor_table_entry.flags0 = *curr_pwr_device_table_ptr; + + nvgpu_memcpy((u8 *)&pwr_sensor_table_entry.class_param0, + (curr_pwr_device_table_ptr + 1), + (VBIOS_POWER_SENSORS_2X_ENTRY_SIZE_15 - 1U)); + + device_type = BIOS_GET_FIELD(u8, pwr_sensor_table_entry.flags0, + NV_VBIOS_POWER_SENSORS_2X_ENTRY_FLAGS0_CLASS); + + if (device_type == NV_VBIOS_POWER_SENSORS_2X_ENTRY_FLAGS0_CLASS_I2C) { + i2c_dev_idx = BIOS_GET_FIELD(u8, + pwr_sensor_table_entry.class_param0, + NV_VBIOS_POWER_SENSORS_2X_ENTRY_CLASS_PARAM0_I2C_INDEX); + use_fxp8_8 = BIOS_GET_FIELD(bool, + pwr_sensor_table_entry.class_param0, + NV_VBIOS_POWER_SENSORS_2X_ENTRY_CLASS_PARAM0_I2C_USE_FXP8_8); + + pwr_device_data.ina3221.super.i2c_dev_idx = i2c_dev_idx; + pwr_device_data.ina3221.r_shuntm_ohm[0].use_fxp8_8 = use_fxp8_8; + pwr_device_data.ina3221.r_shuntm_ohm[1].use_fxp8_8 = use_fxp8_8; + pwr_device_data.ina3221.r_shuntm_ohm[2].use_fxp8_8 = use_fxp8_8; + pwr_device_data.ina3221.r_shuntm_ohm[0].rshunt_value = + BIOS_GET_FIELD(u16, + pwr_sensor_table_entry.sensor_param0, + NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM0_INA3221_RSHUNT0_MOHM); + + pwr_device_data.ina3221.r_shuntm_ohm[1].rshunt_value = + BIOS_GET_FIELD(u16, + pwr_sensor_table_entry.sensor_param0, + NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM0_INA3221_RSHUNT1_MOHM); + + pwr_device_data.ina3221.r_shuntm_ohm[2].rshunt_value = + BIOS_GET_FIELD(u16, + pwr_sensor_table_entry.sensor_param1, + NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM1_INA3221_RSHUNT2_MOHM); + + pwr_device_data.ina3221.configuration = + BIOS_GET_FIELD(u16, + pwr_sensor_table_entry.sensor_param1, + NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM1_INA3221_CONFIGURATION); + + pwr_device_data.ina3221.mask_enable = + BIOS_GET_FIELD(u16, + pwr_sensor_table_entry.sensor_param2, + NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM2_INA3221_MASKENABLE); + + pwr_device_data.ina3221.gpio_function = + BIOS_GET_FIELD(u8, + pwr_sensor_table_entry.sensor_param2, + NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM2_INA3221_GPIOFUNCTION); + + pwr_device_data.ina3221.curr_correct_m = + BIOS_GET_FIELD(u16, + pwr_sensor_table_entry.sensor_param3, + NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM3_INA3221_CURR_CORRECT_M); + + pwr_device_data.ina3221.curr_correct_b = + BIOS_GET_FIELD(s16, + pwr_sensor_table_entry.sensor_param3, + NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM3_INA3221_CURR_CORRECT_B); + + if (pwr_device_data.ina3221.curr_correct_m == 0U) { + pwr_device_data.ina3221.curr_correct_m = BIT16(12); + } + pwr_device_size = sizeof(struct pwr_device_ina3221); + } else { + continue; + } + + pwr_device_data.obj.type = CTRL_PMGR_PWR_DEVICE_TYPE_INA3221; + pwr_device_data.pwrdev.power_rail = (u8)0; + + obj_tmp = construct_pwr_device(g, &pwr_device_data, + pwr_device_size, pwr_device_data.obj.type); + + if (obj_tmp == NULL) { + nvgpu_err(g, + "unable to create pwr device for %d type %d", index, + pwr_device_data.obj.type); + status = -EINVAL; + goto done; + } + + status = boardobjgrp_objinsert(&ppwrdeviceobjs->super.super, + obj_tmp, obj_index); + + if (status != 0) { + nvgpu_err(g, + "unable to insert pwr device boardobj for %d", index); + status = -EINVAL; + goto done; + } + + ++obj_index; + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +int pmgr_device_sw_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + struct pwr_devices *ppwrdeviceobjs; + + /* Construct the Super Class and override the Interfaces */ + status = nvgpu_boardobjgrp_construct_e32(g, + &g->pmgr_pmu->pmgr_deviceobjs.super); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for pmgr devices, " + "status - 0x%x", status); + goto done; + } + + pboardobjgrp = &g->pmgr_pmu->pmgr_deviceobjs.super.super; + ppwrdeviceobjs = &(g->pmgr_pmu->pmgr_deviceobjs); + + /* Override the Interfaces */ + pboardobjgrp->pmudatainstget = _pwr_device_pmudata_instget; + + status = devinit_get_pwr_device_table(g, ppwrdeviceobjs); + if (status != 0) { + goto done; + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} diff --git a/drivers/gpu/nvgpu/common/pmu/pmgr/pwrdev.h b/drivers/gpu/nvgpu/common/pmu/pmgr/pwrdev.h new file mode 100644 index 000000000..51293ec81 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pmgr/pwrdev.h @@ -0,0 +1,61 @@ +/* + * general power device structures & definitions + * + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_PMGR_PWRDEV_H +#define NVGPU_PMGR_PWRDEV_H + +#include +#include +#include +#include + +#define PWRDEV_I2CDEV_DEVICE_INDEX_NONE (0xFF) + +#define PWR_DEVICE_PROV_NUM_DEFAULT 1 + +struct pwr_device { + struct pmu_board_obj super; + u8 power_rail; + u8 i2c_dev_idx; + bool bIs_inforom_config; + u32 power_corr_factor; +}; + +struct pwr_devices { + struct boardobjgrp_e32 super; +}; + +struct pwr_device_ina3221 { + struct pwr_device super; + struct ctrl_pmgr_pwr_device_info_rshunt + r_shuntm_ohm[NV_PMU_PMGR_PWR_DEVICE_INA3221_CH_NUM]; + u16 configuration; + u16 mask_enable; + u8 gpio_function; + u16 curr_correct_m; + s16 curr_correct_b; +} ; + +int pmgr_device_sw_setup(struct gk20a *g); + +#endif /* NVGPU_PMGR_PWRDEV_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/pmgr/pwrmonitor.c b/drivers/gpu/nvgpu/common/pmu/pmgr/pwrmonitor.c new file mode 100644 index 000000000..079c2f3f7 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pmgr/pwrmonitor.c @@ -0,0 +1,392 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "pwrdev.h" +#include "pmgr.h" + +static int _pwr_channel_pmudata_instget(struct gk20a *g, + struct nv_pmu_boardobjgrp *pmuboardobjgrp, + struct nv_pmu_boardobj **pmu_obj, + u8 idx) +{ + struct nv_pmu_pmgr_pwr_channel_desc *ppmgrchannel = + (struct nv_pmu_pmgr_pwr_channel_desc *)pmuboardobjgrp; + + nvgpu_log_info(g, " "); + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (((u32)BIT(idx) & + ppmgrchannel->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *pmu_obj = (struct nv_pmu_boardobj *) + &ppmgrchannel->channels[idx].data.obj; + + /* handle Global/common data here as we need index */ + ppmgrchannel->channels[idx].data.channel.ch_idx = idx; + + nvgpu_log_info(g, " Done"); + + return 0; +} + +static int _pwr_channel_rels_pmudata_instget(struct gk20a *g, + struct nv_pmu_boardobjgrp *pmuboardobjgrp, + struct nv_pmu_boardobj **pmu_obj, + u8 idx) +{ + struct nv_pmu_pmgr_pwr_chrelationship_desc *ppmgrchrels = + (struct nv_pmu_pmgr_pwr_chrelationship_desc *)pmuboardobjgrp; + + nvgpu_log_info(g, " "); + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (((u32)BIT(idx) & + ppmgrchrels->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *pmu_obj = (struct nv_pmu_boardobj *) + &ppmgrchrels->ch_rels[idx].data.obj; + + nvgpu_log_info(g, " Done"); + + return 0; +} + +static int _pwr_channel_state_init(struct gk20a *g) +{ + u8 indx = 0; + struct pwr_channel *pchannel; + u32 objmask = + g->pmgr_pmu->pmgr_monitorobjs.pwr_channels.super.objmask; + + /* Initialize each PWR_CHANNEL's dependent channel mask */ + BOARDOBJGRP_FOR_EACH_INDEX_IN_MASK(32, indx, objmask) { + pchannel = PMGR_PWR_MONITOR_GET_PWR_CHANNEL(g, indx); + if (pchannel == NULL) { + nvgpu_err(g, + "PMGR_PWR_MONITOR_GET_PWR_CHANNEL-failed %d", indx); + return -EINVAL; + } + pchannel->dependent_ch_mask =0; + } + BOARDOBJGRP_FOR_EACH_INDEX_IN_MASK_END + + return 0; +} + +static bool _pwr_channel_implements(struct pwr_channel *pchannel, + u8 type) +{ + return (type == pmu_board_obj_get_type((struct pmu_board_obj *) + (void *)pchannel)); +} + +static int _pwr_domains_pmudatainit_sensor(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + struct nv_pmu_pmgr_pwr_channel_sensor *pmu_sensor_data; + struct pwr_channel_sensor *sensor; + int status = 0; + + status = pmu_board_obj_pmu_data_init_super(g, obj, pmu_obj); + if (status != 0) { + nvgpu_err(g, + "error updating pmu boardobjgrp for pwr sensor 0x%x", + status); + goto done; + } + + sensor = (struct pwr_channel_sensor *)(void *)obj; + pmu_sensor_data = (struct nv_pmu_pmgr_pwr_channel_sensor *) + (void *) pmu_obj; + + pmu_sensor_data->super.pwr_rail = sensor->super.pwr_rail; + pmu_sensor_data->super.volt_fixedu_v = sensor->super.volt_fixed_uv; + pmu_sensor_data->super.pwr_corr_slope = sensor->super.pwr_corr_slope; + pmu_sensor_data->super.pwr_corr_offsetm_w = sensor->super.pwr_corr_offset_mw; + pmu_sensor_data->super.curr_corr_slope = sensor->super.curr_corr_slope; + pmu_sensor_data->super.curr_corr_offsetm_a = sensor->super.curr_corr_offset_ma; + pmu_sensor_data->super.dependent_ch_mask = sensor->super.dependent_ch_mask; + pmu_sensor_data->super.ch_idx = 0; + + pmu_sensor_data->pwr_dev_idx = sensor->pwr_dev_idx; + pmu_sensor_data->pwr_dev_prov_idx = sensor->pwr_dev_prov_idx; + +done: + return status; +} + +static struct pmu_board_obj *construct_pwr_topology(struct gk20a *g, + void *pargs, size_t pargs_size, u8 type) +{ + struct pmu_board_obj *obj = NULL; + int status; + struct pwr_channel_sensor *pwrchannel; + struct pwr_channel_sensor *sensor = (struct pwr_channel_sensor*)pargs; + + pwrchannel = nvgpu_kzalloc(g, pargs_size); + if (pwrchannel == NULL) { + return NULL; + } + obj = (struct pmu_board_obj *)(void *)pwrchannel; + + status = pmu_board_obj_construct_super(g, obj, pargs); + if (status != 0) { + return NULL; + } + + pwrchannel = (struct pwr_channel_sensor *)(void *)obj; + + /* Set Super class interfaces */ + obj->pmudatainit = _pwr_domains_pmudatainit_sensor; + + pwrchannel->super.pwr_rail = sensor->super.pwr_rail; + pwrchannel->super.volt_fixed_uv = sensor->super.volt_fixed_uv; + pwrchannel->super.pwr_corr_slope = sensor->super.pwr_corr_slope; + pwrchannel->super.pwr_corr_offset_mw = sensor->super.pwr_corr_offset_mw; + pwrchannel->super.curr_corr_slope = sensor->super.curr_corr_slope; + pwrchannel->super.curr_corr_offset_ma = sensor->super.curr_corr_offset_ma; + pwrchannel->super.dependent_ch_mask = 0; + + pwrchannel->pwr_dev_idx = sensor->pwr_dev_idx; + pwrchannel->pwr_dev_prov_idx = sensor->pwr_dev_prov_idx; + + nvgpu_log_info(g, " Done"); + + return obj; +} + +static int devinit_get_pwr_topology_table(struct gk20a *g, + struct pmgr_pwr_monitor *ppwrmonitorobjs) +{ + int status = 0; + u8 *pwr_topology_table_ptr = NULL; + u8 *curr_pwr_topology_table_ptr = NULL; + struct pmu_board_obj *obj_tmp; + struct pwr_topology_2x_header pwr_topology_table_header; + struct pwr_topology_2x_entry pwr_topology_table_entry; + u32 index; + u32 obj_index = 0; + size_t pwr_topology_size; + union { + struct pmu_board_obj obj; + struct pwr_channel pwrchannel; + struct pwr_channel_sensor sensor; + } pwr_topology_data; + + (void) memset(&pwr_topology_table_header, 0U, + sizeof(struct pwr_topology_2x_header)); + (void) memset(&pwr_topology_table_entry, 0U, + sizeof(struct pwr_topology_2x_entry)); + + nvgpu_log_info(g, " "); + + pwr_topology_table_ptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_PERF_TOKEN), + POWER_TOPOLOGY_TABLE); + if (pwr_topology_table_ptr == NULL) { + status = -EINVAL; + goto done; + } + + nvgpu_memcpy((u8 *)&pwr_topology_table_header, pwr_topology_table_ptr, + VBIOS_POWER_TOPOLOGY_2X_HEADER_SIZE_06); + + if (pwr_topology_table_header.version != + VBIOS_POWER_TOPOLOGY_VERSION_2X) { + status = -EINVAL; + goto done; + } + + g->pmgr_pmu->pmgr_monitorobjs.b_is_topology_tbl_ver_1x = false; + + if (pwr_topology_table_header.header_size < + VBIOS_POWER_TOPOLOGY_2X_HEADER_SIZE_06) { + status = -EINVAL; + goto done; + } + + if (pwr_topology_table_header.table_entry_size != + VBIOS_POWER_TOPOLOGY_2X_ENTRY_SIZE_16) { + status = -EINVAL; + goto done; + } + + curr_pwr_topology_table_ptr = (pwr_topology_table_ptr + + VBIOS_POWER_TOPOLOGY_2X_HEADER_SIZE_06); + + for (index = 0; index < pwr_topology_table_header.num_table_entries; + index++) { + u8 class_type; + + curr_pwr_topology_table_ptr += (pwr_topology_table_header.table_entry_size * index); + + pwr_topology_table_entry.flags0 = *curr_pwr_topology_table_ptr; + pwr_topology_table_entry.pwr_rail = *(curr_pwr_topology_table_ptr + 1); + + nvgpu_memcpy((u8 *)&pwr_topology_table_entry.param0, + (curr_pwr_topology_table_ptr + 2), + (VBIOS_POWER_TOPOLOGY_2X_ENTRY_SIZE_16 - 2U)); + + class_type = BIOS_GET_FIELD(u8, pwr_topology_table_entry.flags0, + NV_VBIOS_POWER_TOPOLOGY_2X_ENTRY_FLAGS0_CLASS); + + if (class_type == NV_VBIOS_POWER_TOPOLOGY_2X_ENTRY_FLAGS0_CLASS_SENSOR) { + pwr_topology_data.sensor.pwr_dev_idx = + BIOS_GET_FIELD(u8, + pwr_topology_table_entry.param1, + NV_VBIOS_POWER_TOPOLOGY_2X_ENTRY_PARAM1_SENSOR_INDEX); + pwr_topology_data.sensor.pwr_dev_prov_idx = + BIOS_GET_FIELD(u8, + pwr_topology_table_entry.param1, + NV_VBIOS_POWER_TOPOLOGY_2X_ENTRY_PARAM1_SENSOR_PROVIDER_INDEX); + + pwr_topology_size = sizeof(struct pwr_channel_sensor); + } else { + continue; + } + + /* Initialize data for the parent class */ + pwr_topology_data.obj.type = CTRL_PMGR_PWR_CHANNEL_TYPE_SENSOR; + pwr_topology_data.pwrchannel.pwr_rail = (u8)pwr_topology_table_entry.pwr_rail; + pwr_topology_data.pwrchannel.volt_fixed_uv = pwr_topology_table_entry.param0; + pwr_topology_data.pwrchannel.pwr_corr_slope = BIT32(12); + pwr_topology_data.pwrchannel.pwr_corr_offset_mw = 0; + pwr_topology_data.pwrchannel.curr_corr_slope = + (u32)pwr_topology_table_entry.curr_corr_slope; + pwr_topology_data.pwrchannel.curr_corr_offset_ma = + (s32)pwr_topology_table_entry.curr_corr_offset; + + obj_tmp = construct_pwr_topology(g, &pwr_topology_data, + pwr_topology_size, pwr_topology_data.obj.type); + + if (obj_tmp == NULL) { + nvgpu_err(g, + "unable to create pwr topology for %d type %d", + index, pwr_topology_data.obj.type); + status = -EINVAL; + goto done; + } + + status = boardobjgrp_objinsert(&ppwrmonitorobjs->pwr_channels.super, + obj_tmp, obj_index); + + if (status != 0) { + nvgpu_err(g, + "unable to insert pwr topology boardobj for %d", index); + status = -EINVAL; + goto done; + } + + ++obj_index; + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +int pmgr_monitor_sw_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + struct pwr_channel *pchannel; + struct pmgr_pwr_monitor *ppwrmonitorobjs; + u8 indx = 0; + + /* Construct the Super Class and override the Interfaces */ + status = nvgpu_boardobjgrp_construct_e32(g, + &g->pmgr_pmu->pmgr_monitorobjs.pwr_channels); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for pmgr channel, " + "status - 0x%x", status); + goto done; + } + + pboardobjgrp = &(g->pmgr_pmu->pmgr_monitorobjs.pwr_channels.super); + + /* Override the Interfaces */ + pboardobjgrp->pmudatainstget = _pwr_channel_pmudata_instget; + + /* Construct the Super Class and override the Interfaces */ + status = nvgpu_boardobjgrp_construct_e32(g, + &g->pmgr_pmu->pmgr_monitorobjs.pwr_ch_rels); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for pmgr channel " + "relationship, status - 0x%x", status); + goto done; + } + + pboardobjgrp = &(g->pmgr_pmu->pmgr_monitorobjs.pwr_ch_rels.super); + + /* Override the Interfaces */ + pboardobjgrp->pmudatainstget = _pwr_channel_rels_pmudata_instget; + + /* Initialize the Total GPU Power Channel Mask to 0 */ + g->pmgr_pmu->pmgr_monitorobjs.pmu_data.channels.hdr.data.total_gpu_power_channel_mask = 0; + g->pmgr_pmu->pmgr_monitorobjs.total_gpu_channel_idx = + CTRL_PMGR_PWR_CHANNEL_INDEX_INVALID; + + /* Supported topology table version 1.0 */ + g->pmgr_pmu->pmgr_monitorobjs.b_is_topology_tbl_ver_1x = true; + + ppwrmonitorobjs = &(g->pmgr_pmu->pmgr_monitorobjs); + + status = devinit_get_pwr_topology_table(g, ppwrmonitorobjs); + if (status != 0) { + goto done; + } + + status = _pwr_channel_state_init(g); + if (status != 0) { + goto done; + } + + /* Initialise physicalChannelMask */ + g->pmgr_pmu->pmgr_monitorobjs.physical_channel_mask = 0; + + pboardobjgrp = &g->pmgr_pmu->pmgr_monitorobjs.pwr_channels.super; + + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pwr_channel *, pchannel, indx) { + if (_pwr_channel_implements(pchannel, + CTRL_PMGR_PWR_CHANNEL_TYPE_SENSOR)) { + g->pmgr_pmu->pmgr_monitorobjs.physical_channel_mask |= + BIT32(indx); + } + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} diff --git a/drivers/gpu/nvgpu/common/pmu/pmgr/pwrmonitor.h b/drivers/gpu/nvgpu/common/pmu/pmgr/pwrmonitor.h new file mode 100644 index 000000000..0c23dd6bc --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pmgr/pwrmonitor.h @@ -0,0 +1,69 @@ +/* + * general power channel structures & definitions + * + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_PMGR_PWRMONITOR_H +#define NVGPU_PMGR_PWRMONITOR_H + +#include +#include +#include +#include + +struct pwr_channel { + struct pmu_board_obj super; + u8 pwr_rail; + u32 volt_fixed_uv; + u32 pwr_corr_slope; + s32 pwr_corr_offset_mw; + u32 curr_corr_slope; + s32 curr_corr_offset_ma; + u32 dependent_ch_mask; +}; + +struct pwr_chrelationship { + struct pmu_board_obj super; + u8 chIdx; +}; + +struct pwr_channel_sensor { + struct pwr_channel super; + u8 pwr_dev_idx; + u8 pwr_dev_prov_idx; +}; + +struct pmgr_pwr_monitor { + bool b_is_topology_tbl_ver_1x; + struct boardobjgrp_e32 pwr_channels; + struct boardobjgrp_e32 pwr_ch_rels; + u8 total_gpu_channel_idx; + u32 physical_channel_mask; + struct nv_pmu_pmgr_pwr_monitor_pack pmu_data; +}; + +#define PMGR_PWR_MONITOR_GET_PWR_CHANNEL(g, channel_idx) \ + ((struct pwr_channel *)BOARDOBJGRP_OBJ_GET_BY_IDX( \ + &(g->pmgr_pmu->pmgr_monitorobjs.pwr_channels.super), (channel_idx))) + +int pmgr_monitor_sw_setup(struct gk20a *g); + +#endif /* NVGPU_PMGR_PWRMONITOR_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/pmgr/pwrpolicy.c b/drivers/gpu/nvgpu/common/pmu/pmgr/pwrpolicy.c new file mode 100644 index 000000000..357c003ca --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pmgr/pwrpolicy.c @@ -0,0 +1,795 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "pwrpolicy.h" +#include "pmgr.h" + +#define _pwr_policy_limitarboutputget_helper(p_limit_arb) (p_limit_arb)->output +#define _pwr_policy_limitdeltaapply(limit, delta) ((u32)max(((s32)limit) + (delta), 0)) + +static int _pwr_policy_limitarbinputset_helper(struct gk20a *g, + struct ctrl_pmgr_pwr_policy_limit_arbitration *p_limit_arb, + u8 client_idx, + u32 limit_value) +{ + u8 indx; + bool b_found = false; + int status = 0; + u32 output = limit_value; + + for (indx = 0; indx< p_limit_arb->num_inputs; indx++) { + if (p_limit_arb->inputs[indx].pwr_policy_idx == client_idx) { + p_limit_arb->inputs[indx].limit_value = limit_value; + b_found = true; + } else if (p_limit_arb->b_arb_max) { + output = max(output, p_limit_arb->inputs[indx].limit_value); + } else { + output = min(output, p_limit_arb->inputs[indx].limit_value); + } + } + + if (!b_found) { + if (p_limit_arb->num_inputs < + CTRL_PMGR_PWR_POLICY_MAX_LIMIT_INPUTS) { + p_limit_arb->inputs[ + p_limit_arb->num_inputs].pwr_policy_idx = client_idx; + p_limit_arb->inputs[ + p_limit_arb->num_inputs].limit_value = limit_value; + p_limit_arb->num_inputs++; + } else { + nvgpu_err(g, "No entries remaining for clientIdx=%d", + client_idx); + status = -EINVAL; + } + } + + if (status == 0) { + p_limit_arb->output = output; + } + + return status; +} + +static int _pwr_policy_limitid_translate(struct gk20a *g, + struct pwr_policy *ppolicy, + enum pwr_policy_limit_id limit_id, + struct ctrl_pmgr_pwr_policy_limit_arbitration **p_limit_arb, + struct ctrl_pmgr_pwr_policy_limit_arbitration **p_limit_arb_sec) +{ + int status = 0; + + switch (limit_id) { + case PWR_POLICY_LIMIT_ID_MIN: + *p_limit_arb = &ppolicy->limit_arb_min; + break; + + case PWR_POLICY_LIMIT_ID_RATED: + *p_limit_arb = &ppolicy->limit_arb_rated; + + if (p_limit_arb_sec != NULL) { + *p_limit_arb_sec = &ppolicy->limit_arb_curr; + } + break; + + case PWR_POLICY_LIMIT_ID_MAX: + *p_limit_arb = &ppolicy->limit_arb_max; + break; + + case PWR_POLICY_LIMIT_ID_CURR: + *p_limit_arb = &ppolicy->limit_arb_curr; + break; + + case PWR_POLICY_LIMIT_ID_BATT: + *p_limit_arb = &ppolicy->limit_arb_batt; + break; + + default: + nvgpu_err(g, "Unsupported limitId=%d", + limit_id); + status = -EINVAL; + break; + } + + return status; +} + +static int _pwr_policy_limitarbinputset(struct gk20a *g, + struct pwr_policy *ppolicy, + enum pwr_policy_limit_id limit_id, + u8 client_idx, + u32 limit) +{ + int status = 0; + struct ctrl_pmgr_pwr_policy_limit_arbitration *p_limit_arb = NULL; + struct ctrl_pmgr_pwr_policy_limit_arbitration *p_limit_arb_sec = NULL; + + status = _pwr_policy_limitid_translate(g, + ppolicy, + limit_id, + &p_limit_arb, + &p_limit_arb_sec); + if (status != 0) { + goto exit; + } + + status = _pwr_policy_limitarbinputset_helper(g, p_limit_arb, client_idx, limit); + if (status != 0) { + nvgpu_err(g, + "Error setting client limit value: status=0x%08x, limitId=0x%x, clientIdx=0x%x, limit=%d", + status, limit_id, client_idx, limit); + goto exit; + } + + if (NULL != p_limit_arb_sec) { + status = _pwr_policy_limitarbinputset_helper(g, p_limit_arb_sec, + CTRL_PMGR_PWR_POLICY_LIMIT_INPUT_CLIENT_IDX_RM, + _pwr_policy_limitarboutputget_helper(p_limit_arb)); + } + +exit: + return status; +} + +static inline void _pwr_policy_limitarbconstruct( + struct ctrl_pmgr_pwr_policy_limit_arbitration *p_limit_arb, + bool b_arb_max) +{ + p_limit_arb->num_inputs = 0; + p_limit_arb->b_arb_max = b_arb_max; +} + +static u32 _pwr_policy_limitarboutputget(struct gk20a *g, + struct pwr_policy *ppolicy, + enum pwr_policy_limit_id limit_id) +{ + int status = 0; + struct ctrl_pmgr_pwr_policy_limit_arbitration *p_limit_arb = NULL; + + status = _pwr_policy_limitid_translate(g, + ppolicy, + limit_id, + &p_limit_arb, + NULL); + if (status != 0) { + return 0; + } + + return _pwr_policy_limitarboutputget_helper(p_limit_arb); +} + +static int _pwr_domains_pmudatainit_hw_threshold(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + struct nv_pmu_pmgr_pwr_policy_hw_threshold *pmu_hw_threshold_data; + struct pwr_policy_hw_threshold *p_hw_threshold; + struct pwr_policy *p_pwr_policy; + struct nv_pmu_pmgr_pwr_policy *pmu_pwr_policy; + int status = 0; + + status = pmu_board_obj_pmu_data_init_super(g, obj, pmu_obj); + if (status != 0) { + nvgpu_err(g, + "error updating pmu boardobjgrp for pwr sensor 0x%x", + status); + status = -ENOMEM; + goto done; + } + + p_hw_threshold = (struct pwr_policy_hw_threshold *)(void *)obj; + pmu_hw_threshold_data = (struct nv_pmu_pmgr_pwr_policy_hw_threshold *) pmu_obj; + pmu_pwr_policy = (struct nv_pmu_pmgr_pwr_policy *) pmu_obj; + p_pwr_policy = (struct pwr_policy *)&(p_hw_threshold->super.super); + + pmu_pwr_policy->ch_idx = 0; + pmu_pwr_policy->limit_unit = p_pwr_policy->limit_unit; + pmu_pwr_policy->num_limit_inputs = p_pwr_policy->num_limit_inputs; + + pmu_pwr_policy->limit_min = _pwr_policy_limitdeltaapply( + _pwr_policy_limitarboutputget(g, p_pwr_policy, + PWR_POLICY_LIMIT_ID_MIN), + p_pwr_policy->limit_delta); + + pmu_pwr_policy->limit_max = _pwr_policy_limitdeltaapply( + _pwr_policy_limitarboutputget(g, p_pwr_policy, + PWR_POLICY_LIMIT_ID_MAX), + p_pwr_policy->limit_delta); + + pmu_pwr_policy->limit_curr = _pwr_policy_limitdeltaapply( + _pwr_policy_limitarboutputget(g, p_pwr_policy, + PWR_POLICY_LIMIT_ID_CURR), + p_pwr_policy->limit_delta); + + nvgpu_memcpy((u8 *)&pmu_pwr_policy->integral, + (u8 *)&p_pwr_policy->integral, + sizeof(struct ctrl_pmgr_pwr_policy_info_integral)); + + pmu_pwr_policy->sample_mult = p_pwr_policy->sample_mult; + pmu_pwr_policy->filter_type = p_pwr_policy->filter_type; + pmu_pwr_policy->filter_param = p_pwr_policy->filter_param; + + pmu_hw_threshold_data->threshold_idx = p_hw_threshold->threshold_idx; + pmu_hw_threshold_data->low_threshold_idx = p_hw_threshold->low_threshold_idx; + pmu_hw_threshold_data->b_use_low_threshold = p_hw_threshold->b_use_low_threshold; + pmu_hw_threshold_data->low_threshold_value = p_hw_threshold->low_threshold_value; + + if (pmu_board_obj_get_type(obj) == + CTRL_PMGR_PWR_POLICY_TYPE_SW_THRESHOLD) { + struct nv_pmu_pmgr_pwr_policy_sw_threshold *pmu_sw_threshold_data; + struct pwr_policy_sw_threshold *p_sw_threshold; + + p_sw_threshold = (struct pwr_policy_sw_threshold *)(void *)obj; + pmu_sw_threshold_data = + (struct nv_pmu_pmgr_pwr_policy_sw_threshold *)(void *)pmu_obj; + pmu_sw_threshold_data->event_id = + p_sw_threshold->event_id; + } +done: + return status; +} + +static struct pmu_board_obj *construct_pwr_policy(struct gk20a *g, + void *pargs, size_t pargs_size, u8 type) +{ + struct pmu_board_obj *obj = NULL; + int status; + struct pwr_policy_hw_threshold *pwrpolicyhwthreshold; + struct pwr_policy *pwrpolicy; + struct pwr_policy *pwrpolicyparams = (struct pwr_policy*)pargs; + struct pwr_policy_hw_threshold *hwthreshold = (struct pwr_policy_hw_threshold*)pargs; + + pwrpolicy = nvgpu_kzalloc(g, pargs_size); + if (pwrpolicy == NULL) { + return NULL; + } + + status = pmu_board_obj_construct_super(g, + (struct pmu_board_obj *)(void *)pwrpolicy, pargs); + if (status != 0) { + return NULL; + } + + pwrpolicyhwthreshold = (struct pwr_policy_hw_threshold *)(void *)obj; + pwrpolicy = (struct pwr_policy *)(void *)obj; + + nvgpu_log_fn(g, "min=%u rated=%u max=%u", + pwrpolicyparams->limit_min, + pwrpolicyparams->limit_rated, + pwrpolicyparams->limit_max); + + /* Set Super class interfaces */ + obj->pmudatainit = _pwr_domains_pmudatainit_hw_threshold; + + pwrpolicy->ch_idx = pwrpolicyparams->ch_idx; + pwrpolicy->num_limit_inputs = 0; + pwrpolicy->limit_unit = pwrpolicyparams->limit_unit; + pwrpolicy->filter_type = (enum ctrl_pmgr_pwr_policy_filter_type)(pwrpolicyparams->filter_type); + pwrpolicy->sample_mult = pwrpolicyparams->sample_mult; + switch (pwrpolicy->filter_type) + { + case CTRL_PMGR_PWR_POLICY_FILTER_TYPE_NONE: + break; + + case CTRL_PMGR_PWR_POLICY_FILTER_TYPE_BLOCK: + pwrpolicy->filter_param.block.block_size = + pwrpolicyparams->filter_param.block.block_size; + break; + + case CTRL_PMGR_PWR_POLICY_FILTER_TYPE_MOVING_AVERAGE: + pwrpolicy->filter_param.moving_avg.window_size = + pwrpolicyparams->filter_param.moving_avg.window_size; + break; + + case CTRL_PMGR_PWR_POLICY_FILTER_TYPE_IIR: + pwrpolicy->filter_param.iir.divisor = pwrpolicyparams->filter_param.iir.divisor; + break; + + default: + nvgpu_err(g, "Error: unrecognized Power Policy filter type: %d", + pwrpolicy->filter_type); + break; + } + + _pwr_policy_limitarbconstruct(&pwrpolicy->limit_arb_curr, false); + + pwrpolicy->limit_delta = 0; + + _pwr_policy_limitarbconstruct(&pwrpolicy->limit_arb_min, true); + status = _pwr_policy_limitarbinputset(g, + pwrpolicy, + PWR_POLICY_LIMIT_ID_MIN, + CTRL_PMGR_PWR_POLICY_LIMIT_INPUT_CLIENT_IDX_RM, + pwrpolicyparams->limit_min); + + _pwr_policy_limitarbconstruct(&pwrpolicy->limit_arb_max, false); + status = _pwr_policy_limitarbinputset(g, + pwrpolicy, + PWR_POLICY_LIMIT_ID_MAX, + CTRL_PMGR_PWR_POLICY_LIMIT_INPUT_CLIENT_IDX_RM, + pwrpolicyparams->limit_max); + + _pwr_policy_limitarbconstruct(&pwrpolicy->limit_arb_rated, false); + status = _pwr_policy_limitarbinputset(g, + pwrpolicy, + PWR_POLICY_LIMIT_ID_RATED, + CTRL_PMGR_PWR_POLICY_LIMIT_INPUT_CLIENT_IDX_RM, + pwrpolicyparams->limit_rated); + + _pwr_policy_limitarbconstruct(&pwrpolicy->limit_arb_batt, false); + status = _pwr_policy_limitarbinputset(g, + pwrpolicy, + PWR_POLICY_LIMIT_ID_BATT, + CTRL_PMGR_PWR_POLICY_LIMIT_INPUT_CLIENT_IDX_RM, + ((pwrpolicyparams->limit_batt != 0U) ? + pwrpolicyparams->limit_batt: + CTRL_PMGR_PWR_POLICY_LIMIT_MAX)); + + nvgpu_memcpy((u8 *)&pwrpolicy->integral, + (u8 *)&pwrpolicyparams->integral, + sizeof(struct ctrl_pmgr_pwr_policy_info_integral)); + + pwrpolicyhwthreshold->threshold_idx = hwthreshold->threshold_idx; + pwrpolicyhwthreshold->b_use_low_threshold = hwthreshold->b_use_low_threshold; + pwrpolicyhwthreshold->low_threshold_idx = hwthreshold->low_threshold_idx; + pwrpolicyhwthreshold->low_threshold_value = hwthreshold->low_threshold_value; + + if (type == CTRL_PMGR_PWR_POLICY_TYPE_SW_THRESHOLD) { + struct pwr_policy_sw_threshold *pwrpolicyswthreshold; + struct pwr_policy_sw_threshold *swthreshold = + (struct pwr_policy_sw_threshold*)pargs; + + pwrpolicyswthreshold = + (struct pwr_policy_sw_threshold *)(void *)obj; + pwrpolicyswthreshold->event_id = swthreshold->event_id; + } + + nvgpu_log_info(g, " Done"); + + return obj; +} + +static int _pwr_policy_construct_WAR_SW_Threshold_policy(struct gk20a *g, + struct pmgr_pwr_policy *ppwrpolicyobjs, + union pwr_policy_data_union *ppwrpolicydata, + size_t pwr_policy_size, + u32 obj_index) +{ + int status = 0; + struct pmu_board_obj *obj_tmp; + + /* WARN policy */ + ppwrpolicydata->pwrpolicy.limit_unit = 0; + ppwrpolicydata->pwrpolicy.limit_min = 10000; + ppwrpolicydata->pwrpolicy.limit_rated = 100000; + ppwrpolicydata->pwrpolicy.limit_max = 100000; + ppwrpolicydata->sw_threshold.threshold_idx = 1; + ppwrpolicydata->pwrpolicy.filter_type = + CTRL_PMGR_PWR_POLICY_FILTER_TYPE_MOVING_AVERAGE; + ppwrpolicydata->pwrpolicy.sample_mult = 5; + + /* Filled the entry.filterParam value in the filterParam */ + ppwrpolicydata->pwrpolicy.filter_param.moving_avg.window_size = 10; + + ppwrpolicydata->sw_threshold.event_id = 0x01; + + ppwrpolicydata->obj.type = CTRL_PMGR_PWR_POLICY_TYPE_SW_THRESHOLD; + + obj_tmp = construct_pwr_policy(g, ppwrpolicydata, + pwr_policy_size, ppwrpolicydata->obj.type); + + if (obj_tmp == NULL) { + nvgpu_err(g, + "unable to create pwr policy for type %d", ppwrpolicydata->obj.type); + status = -EINVAL; + goto done; + } + + status = boardobjgrp_objinsert(&ppwrpolicyobjs->pwr_policies.super, + obj_tmp, obj_index); + + if (status != 0) { + nvgpu_err(g, + "unable to insert pwr policy boardobj for %d", obj_index); + status = -EINVAL; + goto done; + } +done: + return status; +} + +struct pwr_policy_3x_header_unpacked { + u8 version; + u8 header_size; + u8 table_entry_size; + u8 num_table_entries; + u16 base_sample_period; + u16 min_client_sample_period; + u8 table_rel_entry_size; + u8 num_table_rel_entries; + u8 tgp_policy_idx; + u8 rtp_policy_idx; + u8 mxm_policy_idx; + u8 dnotifier_policy_idx; + u32 d2_limit; + u32 d3_limit; + u32 d4_limit; + u32 d5_limit; + u8 low_sampling_mult; + u8 pwr_tgt_policy_idx; + u8 pwr_tgt_floor_policy_idx; + u8 sm_bus_policy_idx; + u8 table_viol_entry_size; + u8 num_table_viol_entries; +}; + +#define __UNPACK_FIELD(unpacked, packed, field) \ + ((void) memcpy(&(unpacked)->field, &(packed)->field, \ + sizeof((unpacked)->field))) + +static inline void devinit_unpack_pwr_policy_header( + struct pwr_policy_3x_header_unpacked *unpacked, + struct pwr_policy_3x_header_struct *packed) +{ + __UNPACK_FIELD(unpacked, packed, version); + __UNPACK_FIELD(unpacked, packed, header_size); + __UNPACK_FIELD(unpacked, packed, table_entry_size); + __UNPACK_FIELD(unpacked, packed, num_table_entries); + __UNPACK_FIELD(unpacked, packed, base_sample_period); + __UNPACK_FIELD(unpacked, packed, min_client_sample_period); + __UNPACK_FIELD(unpacked, packed, table_rel_entry_size); + __UNPACK_FIELD(unpacked, packed, num_table_rel_entries); + __UNPACK_FIELD(unpacked, packed, tgp_policy_idx); + __UNPACK_FIELD(unpacked, packed, rtp_policy_idx); + __UNPACK_FIELD(unpacked, packed, mxm_policy_idx); + __UNPACK_FIELD(unpacked, packed, dnotifier_policy_idx); + __UNPACK_FIELD(unpacked, packed, d2_limit); + __UNPACK_FIELD(unpacked, packed, d3_limit); + __UNPACK_FIELD(unpacked, packed, d4_limit); + __UNPACK_FIELD(unpacked, packed, d5_limit); + __UNPACK_FIELD(unpacked, packed, low_sampling_mult); + __UNPACK_FIELD(unpacked, packed, pwr_tgt_policy_idx); + __UNPACK_FIELD(unpacked, packed, pwr_tgt_floor_policy_idx); + __UNPACK_FIELD(unpacked, packed, sm_bus_policy_idx); + __UNPACK_FIELD(unpacked, packed, table_viol_entry_size); + __UNPACK_FIELD(unpacked, packed, num_table_viol_entries); +} + +struct pwr_policy_3x_entry_unpacked { + u8 flags0; + u8 ch_idx; + u32 limit_min; + u32 limit_rated; + u32 limit_max; + u32 param0; + u32 param1; + u32 param2; + u32 param3; + u32 limit_batt; + u8 flags1; + u8 past_length; + u8 next_length; + u16 ratio_min; + u16 ratio_max; + u8 sample_mult; + u32 filter_param; +}; + +static inline void devinit_unpack_pwr_policy_entry( + struct pwr_policy_3x_entry_unpacked *unpacked, + struct pwr_policy_3x_entry_struct *packed) +{ + __UNPACK_FIELD(unpacked, packed, flags0); + __UNPACK_FIELD(unpacked, packed, ch_idx); + __UNPACK_FIELD(unpacked, packed, limit_min); + __UNPACK_FIELD(unpacked, packed, limit_rated); + __UNPACK_FIELD(unpacked, packed, limit_max); + __UNPACK_FIELD(unpacked, packed, param0); + __UNPACK_FIELD(unpacked, packed, param1); + __UNPACK_FIELD(unpacked, packed, param2); + __UNPACK_FIELD(unpacked, packed, param3); + __UNPACK_FIELD(unpacked, packed, limit_batt); + __UNPACK_FIELD(unpacked, packed, flags1); + __UNPACK_FIELD(unpacked, packed, past_length); + __UNPACK_FIELD(unpacked, packed, next_length); + __UNPACK_FIELD(unpacked, packed, ratio_min); + __UNPACK_FIELD(unpacked, packed, ratio_max); + __UNPACK_FIELD(unpacked, packed, sample_mult); + __UNPACK_FIELD(unpacked, packed, filter_param); +} + +static int devinit_get_pwr_policy_table(struct gk20a *g, + struct pmgr_pwr_policy *ppwrpolicyobjs) +{ + int status = 0; + u8 *ptr = NULL; + struct pmu_board_obj *obj_tmp; + struct pwr_policy_3x_header_struct *packed_hdr; + struct pwr_policy_3x_header_unpacked hdr; + u32 index; + u32 obj_index = 0; + size_t pwr_policy_size; + bool integral_control = false; + u32 hw_threshold_policy_index = 0; + union pwr_policy_data_union pwr_policy_data; + + nvgpu_log_info(g, " "); + + ptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_PERF_TOKEN), + POWER_CAPPING_TABLE); + if (ptr == NULL) { + status = -EINVAL; + goto done; + } + + packed_hdr = (struct pwr_policy_3x_header_struct *)ptr; + + if (packed_hdr->version != + VBIOS_POWER_POLICY_VERSION_3X) { + status = -EINVAL; + goto done; + } + + if (packed_hdr->header_size < + VBIOS_POWER_POLICY_3X_HEADER_SIZE_25) { + status = -EINVAL; + goto done; + } + + if (packed_hdr->table_entry_size < + VBIOS_POWER_POLICY_3X_ENTRY_SIZE_2E) { + status = -EINVAL; + goto done; + } + + /* unpack power policy table header */ + devinit_unpack_pwr_policy_header(&hdr, packed_hdr); + + ptr += (u32)hdr.header_size; + + for (index = 0; index < hdr.num_table_entries; index++) { + + struct pwr_policy_3x_entry_struct *packed_entry; + struct pwr_policy_3x_entry_unpacked entry; + + u8 class_type; + + packed_entry = (struct pwr_policy_3x_entry_struct *)ptr; + + class_type = BIOS_GET_FIELD(u8, packed_entry->flags0, + NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS0_CLASS); + + if (class_type != NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS0_CLASS_HW_THRESHOLD) { + ptr += (u32)hdr.table_entry_size; + continue; + } + + /* unpack power policy table entry */ + devinit_unpack_pwr_policy_entry(&entry, packed_entry); + + ppwrpolicyobjs->version = + CTRL_PMGR_PWR_POLICY_TABLE_VERSION_3X; + ppwrpolicyobjs->base_sample_period = hdr.base_sample_period; + ppwrpolicyobjs->min_client_sample_period = + hdr.min_client_sample_period; + ppwrpolicyobjs->low_sampling_mult = hdr.low_sampling_mult; + + ppwrpolicyobjs->policy_idxs[1] = hdr.tgp_policy_idx; + ppwrpolicyobjs->policy_idxs[0] = hdr.rtp_policy_idx; + ppwrpolicyobjs->policy_idxs[2] = hdr.mxm_policy_idx; + ppwrpolicyobjs->policy_idxs[3] = hdr.dnotifier_policy_idx; + ppwrpolicyobjs->ext_limits[0].limit = hdr.d2_limit; + ppwrpolicyobjs->ext_limits[1].limit = hdr.d3_limit; + ppwrpolicyobjs->ext_limits[2].limit = hdr.d4_limit; + ppwrpolicyobjs->ext_limits[3].limit = hdr.d5_limit; + ppwrpolicyobjs->policy_idxs[4] = hdr.pwr_tgt_policy_idx; + ppwrpolicyobjs->policy_idxs[5] = hdr.pwr_tgt_floor_policy_idx; + ppwrpolicyobjs->policy_idxs[6] = hdr.sm_bus_policy_idx; + + integral_control = BIOS_GET_FIELD(bool, entry.flags1, + NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS1_INTEGRAL_CONTROL); + + if (integral_control) { + pwr_policy_data.pwrpolicy.integral.past_sample_count = + entry.past_length; + pwr_policy_data.pwrpolicy.integral.next_sample_count = + entry.next_length; + pwr_policy_data.pwrpolicy.integral.ratio_limit_max = + entry.ratio_max; + pwr_policy_data.pwrpolicy.integral.ratio_limit_min = + entry.ratio_min; + } else { + (void) memset(&(pwr_policy_data.pwrpolicy.integral), + 0x0, sizeof( + struct ctrl_pmgr_pwr_policy_info_integral)); + } + pwr_policy_data.hw_threshold.threshold_idx = + BIOS_GET_FIELD(u8, entry.param0, + NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM0_HW_THRESHOLD_THRES_IDX); + + pwr_policy_data.hw_threshold.b_use_low_threshold = + BIOS_GET_FIELD(bool, entry.param0, + NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM0_HW_THRESHOLD_LOW_THRESHOLD_USE); + + if (pwr_policy_data.hw_threshold.b_use_low_threshold) { + pwr_policy_data.hw_threshold.low_threshold_idx = + BIOS_GET_FIELD(u8, entry.param0, + NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM0_HW_THRESHOLD_LOW_THRESHOLD_IDX); + + pwr_policy_data.hw_threshold.low_threshold_value = + BIOS_GET_FIELD(u16, entry.param1, + NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM1_HW_THRESHOLD_LOW_THRESHOLD_VAL); + } + + pwr_policy_size = sizeof(struct pwr_policy_hw_threshold); + + /* Initialize data for the parent class */ + pwr_policy_data.obj.type = + CTRL_PMGR_PWR_POLICY_TYPE_HW_THRESHOLD; + pwr_policy_data.pwrpolicy.ch_idx = entry.ch_idx; + pwr_policy_data.pwrpolicy.limit_unit = + BIOS_GET_FIELD(u8, entry.flags0, + NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS0_LIMIT_UNIT); + pwr_policy_data.pwrpolicy.filter_type = + BIOS_GET_FIELD(enum ctrl_pmgr_pwr_policy_filter_type, + entry.flags1, + NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS1_FILTER_TYPE); + + pwr_policy_data.pwrpolicy.limit_min = entry.limit_min; + pwr_policy_data.pwrpolicy.limit_rated = entry.limit_rated; + pwr_policy_data.pwrpolicy.limit_max = entry.limit_max; + pwr_policy_data.pwrpolicy.limit_batt = entry.limit_batt; + + pwr_policy_data.pwrpolicy.sample_mult = (u8)entry.sample_mult; + + /* Filled the entry.filterParam value in the filterParam */ + pwr_policy_data.pwrpolicy.filter_param.block.block_size = 0; + pwr_policy_data.pwrpolicy.filter_param.moving_avg.window_size = 0; + pwr_policy_data.pwrpolicy.filter_param.iir.divisor = 0; + + hw_threshold_policy_index |= + BIT32(pwr_policy_data.hw_threshold.threshold_idx); + + obj_tmp = construct_pwr_policy(g, &pwr_policy_data, + pwr_policy_size, pwr_policy_data.obj.type); + + if (obj_tmp == NULL) { + nvgpu_err(g, + "unable to create pwr policy for %d type %d", + index, pwr_policy_data.obj.type); + status = -EINVAL; + goto done; + } + + status = boardobjgrp_objinsert(&ppwrpolicyobjs->pwr_policies.super, + obj_tmp, obj_index); + + if (status != 0) { + nvgpu_err(g, + "unable to insert pwr policy boardobj for %d", + index); + status = -EINVAL; + goto done; + } + ++obj_index; + + ptr += (u32)hdr.table_entry_size; + } + + if (g->hardcode_sw_threshold) { + status = _pwr_policy_construct_WAR_SW_Threshold_policy(g, + ppwrpolicyobjs, + &pwr_policy_data, + sizeof(struct pwr_policy_sw_threshold), + obj_index); + if (status != 0) { + nvgpu_err(g, "unable to construct_WAR_policy"); + status = -EINVAL; + goto done; + } + ++obj_index; + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +int pmgr_policy_sw_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + struct pwr_policy *ppolicy; + struct pmgr_pwr_policy *ppwrpolicyobjs; + u8 indx = 0; + + /* Construct the Super Class and override the Interfaces */ + status = nvgpu_boardobjgrp_construct_e32(g, + &g->pmgr_pmu->pmgr_policyobjs.pwr_policies); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for pmgr policy, " + "status - 0x%x", status); + goto done; + } + + status = nvgpu_boardobjgrp_construct_e32(g, + &g->pmgr_pmu->pmgr_policyobjs.pwr_policy_rels); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for pmgr policy rels, " + "status - 0x%x", status); + goto done; + } + + status = nvgpu_boardobjgrp_construct_e32(g, + &g->pmgr_pmu->pmgr_policyobjs.pwr_violations); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for pmgr violations, " + "status - 0x%x", status); + goto done; + } + + (void) memset(g->pmgr_pmu->pmgr_policyobjs.policy_idxs, + (int)CTRL_PMGR_PWR_POLICY_INDEX_INVALID, + sizeof(u8) * CTRL_PMGR_PWR_POLICY_IDX_NUM_INDEXES); + + /* Initialize external power limit policy indexes to _INVALID/0xFF */ + for (indx = 0; indx < PWR_POLICY_EXT_POWER_STATE_ID_COUNT; indx++) { + g->pmgr_pmu->pmgr_policyobjs.ext_limits[indx].policy_table_idx = + CTRL_PMGR_PWR_POLICY_INDEX_INVALID; + } + + /* Initialize external power state to _D1 */ + g->pmgr_pmu->pmgr_policyobjs.ext_power_state = -1; + + ppwrpolicyobjs = &(g->pmgr_pmu->pmgr_policyobjs); + pboardobjgrp = &(g->pmgr_pmu->pmgr_policyobjs.pwr_policies.super); + + status = devinit_get_pwr_policy_table(g, ppwrpolicyobjs); + if (status != 0) { + goto done; + } + + g->pmgr_pmu->pmgr_policyobjs.b_enabled = true; + + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pwr_policy *, ppolicy, indx) { + PMGR_PWR_POLICY_INCREMENT_LIMIT_INPUT_COUNT(ppolicy); + } + + g->pmgr_pmu->pmgr_policyobjs.global_ceiling.values[0] = + 0xFF; + + g->pmgr_pmu->pmgr_policyobjs.client_work_item.b_pending = false; + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} diff --git a/drivers/gpu/nvgpu/common/pmu/pmgr/pwrpolicy.h b/drivers/gpu/nvgpu/common/pmu/pmgr/pwrpolicy.h new file mode 100644 index 000000000..53bb7755e --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pmgr/pwrpolicy.h @@ -0,0 +1,136 @@ +/* + * general power channel structures & definitions + * + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_PMGR_PWRPOLICY_H +#define NVGPU_PMGR_PWRPOLICY_H + +#include +#include +#include +#include + +#define PWR_POLICY_EXT_POWER_STATE_ID_COUNT 0x4U + +enum pwr_policy_limit_id { + PWR_POLICY_LIMIT_ID_MIN = 0x00000000, + PWR_POLICY_LIMIT_ID_RATED, + PWR_POLICY_LIMIT_ID_MAX, + PWR_POLICY_LIMIT_ID_CURR, + PWR_POLICY_LIMIT_ID_BATT, +}; + +struct pwr_policy { + struct pmu_board_obj super; + u8 ch_idx; + u8 num_limit_inputs; + u8 limit_unit; + s32 limit_delta; + u32 limit_min; + u32 limit_rated; + u32 limit_max; + u32 limit_batt; + struct ctrl_pmgr_pwr_policy_info_integral integral; + struct ctrl_pmgr_pwr_policy_limit_arbitration limit_arb_min; + struct ctrl_pmgr_pwr_policy_limit_arbitration limit_arb_rated; + struct ctrl_pmgr_pwr_policy_limit_arbitration limit_arb_max; + struct ctrl_pmgr_pwr_policy_limit_arbitration limit_arb_batt; + struct ctrl_pmgr_pwr_policy_limit_arbitration limit_arb_curr; + u8 sample_mult; + enum ctrl_pmgr_pwr_policy_filter_type filter_type; + union ctrl_pmgr_pwr_policy_filter_param filter_param; +}; + +struct pwr_policy_ext_limit { + u8 policy_table_idx; + u32 limit; +}; + +struct pwr_policy_batt_workitem { + u32 power_state; + bool b_full_deflection; +}; + +struct pwr_policy_client_workitem { + u32 limit; + bool b_pending; +}; + +struct pwr_policy_relationship { + struct pmu_board_obj super; + u8 policy_idx; +}; + +struct pmgr_pwr_policy { + u8 version; + bool b_enabled; + struct nv_pmu_perf_domain_group_limits global_ceiling; + u8 policy_idxs[CTRL_PMGR_PWR_POLICY_IDX_NUM_INDEXES]; + struct pwr_policy_ext_limit ext_limits[PWR_POLICY_EXT_POWER_STATE_ID_COUNT]; + s32 ext_power_state; + u16 base_sample_period; + u16 min_client_sample_period; + u8 low_sampling_mult; + struct boardobjgrp_e32 pwr_policies; + struct boardobjgrp_e32 pwr_policy_rels; + struct boardobjgrp_e32 pwr_violations; + struct pwr_policy_client_workitem client_work_item; +}; + +struct pwr_policy_limit { + struct pwr_policy super; +}; + +struct pwr_policy_hw_threshold { + struct pwr_policy_limit super; + u8 threshold_idx; + u8 low_threshold_idx; + bool b_use_low_threshold; + u16 low_threshold_value; +}; + +struct pwr_policy_sw_threshold { + struct pwr_policy_limit super; + u8 threshold_idx; + u8 low_threshold_idx; + bool b_use_low_threshold; + u16 low_threshold_value; + u8 event_id; +}; + +union pwr_policy_data_union { + struct pmu_board_obj obj; + struct pwr_policy pwrpolicy; + struct pwr_policy_hw_threshold hw_threshold; + struct pwr_policy_sw_threshold sw_threshold; +} ; + +#define PMGR_GET_PWR_POLICY(g, policy_idx) \ + ((struct pwr_policy *)BOARDOBJGRP_OBJ_GET_BY_IDX( \ + &(g->pmgr_pmu->pmgr_policyobjs.pwr_policies.super), (policy_idx))) + +#define PMGR_PWR_POLICY_INCREMENT_LIMIT_INPUT_COUNT(ppolicy) \ + ((ppolicy)->num_limit_inputs++) + +int pmgr_policy_sw_setup(struct gk20a *g); + +#endif /* NVGPU_PMGR_PWRPOLICY_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/pmu.c b/drivers/gpu/nvgpu/common/pmu/pmu.c new file mode 100644 index 000000000..88b9fdc4c --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pmu.c @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* PMU H/W error functions */ +void nvgpu_pmu_report_bar0_pri_err_status(struct gk20a *g, u32 bar0_status, + u32 error_type) +{ + nvgpu_report_pmu_err(g, NVGPU_ERR_MODULE_PMU, + GPU_PMU_BAR0_ERROR_TIMEOUT, error_type, bar0_status); +} + +/* PMU engine reset functions */ +static int pmu_enable_hw(struct nvgpu_pmu *pmu, bool enable) +{ + struct gk20a *g = pmu->g; + int err = 0; + + nvgpu_log_fn(g, " %s ", g->name); + + if (enable) { + /* bring PMU falcon/engine out of reset */ + g->ops.pmu.reset_engine(g, true); + + nvgpu_cg_slcg_pmu_load_enable(g); + + nvgpu_cg_blcg_pmu_load_enable(g); + + if (nvgpu_falcon_mem_scrub_wait(pmu->flcn) != 0) { + /* keep PMU falcon/engine in reset + * if IMEM/DMEM scrubbing fails + */ + g->ops.pmu.reset_engine(g, false); + nvgpu_err(g, "Falcon mem scrubbing timeout"); + err = -ETIMEDOUT; + } + } else { + /* keep PMU falcon/engine in reset */ + g->ops.pmu.reset_engine(g, false); + } + + nvgpu_log_fn(g, "%s Done, status - %d ", g->name, err); + return err; +} + +void nvgpu_pmu_enable_irq(struct gk20a *g, bool enable) +{ + if ((g->pmu != NULL) && (g->ops.pmu.pmu_enable_irq != NULL)) { + nvgpu_mutex_acquire(&g->pmu->isr_mutex); + g->ops.pmu.pmu_enable_irq(g->pmu, enable); + g->pmu->isr_enabled = enable; + nvgpu_mutex_release(&g->pmu->isr_mutex); + } +} + +static int pmu_enable(struct nvgpu_pmu *pmu, bool enable) +{ + struct gk20a *g = pmu->g; + int err = 0; + + nvgpu_log_fn(g, " "); + + if (!enable) { + if (!g->ops.pmu.is_engine_in_reset(g)) { + nvgpu_pmu_enable_irq(g, false); + err = pmu_enable_hw(pmu, false); + if (err != 0) { + goto exit; + } + } + } else { + err = pmu_enable_hw(pmu, true); + if (err != 0) { + goto exit; + } + + err = nvgpu_falcon_wait_idle(pmu->flcn); + if (err != 0) { + goto exit; + } + +#ifndef CONFIG_NVGPU_LS_PMU + /* Enable PMU ECC interrupts for safety. */ + nvgpu_pmu_enable_irq(g, true); +#endif + } + +exit: + nvgpu_log_fn(g, "Done, status - %d ", err); + return err; +} + +int nvgpu_pmu_reset(struct gk20a *g) +{ + struct nvgpu_pmu *pmu = g->pmu; + int err = 0; + + nvgpu_log_fn(g, " %s ", g->name); + + err = pmu_enable(pmu, false); + if (err != 0) { + goto exit; + } + + err = pmu_enable(pmu, true); + if (err != 0) { + goto exit; + } + +exit: + nvgpu_log_fn(g, " %s Done, status - %d ", g->name, err); + return err; +} + +/* PMU unit deinit */ +void nvgpu_pmu_remove_support(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + if(pmu != NULL) { +#ifdef CONFIG_NVGPU_LS_PMU + if (pmu->remove_support != NULL) { + pmu->remove_support(g->pmu); + } +#endif + nvgpu_mutex_destroy(&pmu->isr_mutex); + if (g->ops.pmu.ecc_free != NULL) { + g->ops.pmu.ecc_free(g); + } + nvgpu_kfree(g, g->pmu); + g->pmu = NULL; + } +} + +/* PMU unit init */ +int nvgpu_pmu_early_init(struct gk20a *g) +{ + int err = 0; + struct nvgpu_pmu *pmu; + + nvgpu_log_fn(g, " "); + + if (g->pmu != NULL) { + /* skip alloc/reinit for unrailgate sequence */ + nvgpu_pmu_dbg(g, "skip pmu init for unrailgate sequence"); + goto exit; + } + + pmu = (struct nvgpu_pmu *) nvgpu_kzalloc(g, sizeof(struct nvgpu_pmu)); + if (pmu == NULL) { + err = -ENOMEM; + goto exit; + } + + g->pmu = pmu; + pmu->g = g; + pmu->flcn = &g->pmu_flcn; + +#if defined(CONFIG_NVGPU_NEXT) + if (nvgpu_falcon_is_falcon2_enabled(&g->pmu_flcn)) { + nvgpu_set_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED, true); + } +#endif + + if ((g->ops.pmu.ecc_init != NULL) && !g->ecc.initialized) { + err = g->ops.pmu.ecc_init(g); + if (err != 0) { + nvgpu_kfree(g, pmu); + g->pmu = NULL; + goto exit; + } + } + + nvgpu_mutex_init(&pmu->isr_mutex); + + if (!g->support_ls_pmu) { + goto exit; + } + + if (!g->ops.pmu.is_pmu_supported(g)) { + g->support_ls_pmu = false; + + /* Disable LS PMU global checkers */ + g->can_elpg = false; + g->elpg_enabled = false; + g->aelpg_enabled = false; + nvgpu_set_enabled(g, NVGPU_PMU_PERFMON, false); + nvgpu_set_enabled(g, NVGPU_ELPG_MS_ENABLED, false); +#ifdef CONFIG_NVGPU_DGPU + nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false); +#endif + goto exit; + } + +#ifdef CONFIG_NVGPU_LS_PMU + err = nvgpu_pmu_rtos_early_init(g, pmu); + if (err != 0) { + nvgpu_mutex_destroy(&pmu->isr_mutex); + if (g->ops.pmu.ecc_free != NULL) { + g->ops.pmu.ecc_free(g); + } + nvgpu_kfree(g, pmu); + g->pmu = NULL; + goto exit; + } +#endif + +exit: + return err; +} diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_debug.c b/drivers/gpu/nvgpu/common/pmu/pmu_debug.c new file mode 100644 index 000000000..eae173055 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pmu_debug.c @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +bool nvgpu_find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos) +{ + u32 i = 0, j = (u32)strlen(strings); + + for (; i < j; i++) { + if (strings[i] == '%') { + if (strings[i + 1U] == 'x' || strings[i + 1U] == 'X') { + *hex_pos = i; + return true; + } + } + } + *hex_pos = U32_MAX; + return false; +} + +static void print_pmu_trace(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = pmu->g; + u32 i = 0, j = 0, k, l, m; + char part_str[40], hex_str[10], buf[0x40] = {0}; + void *tracebuffer; + char *trace; + u32 *trace1; + u32 buf_size = nvgpu_safe_cast_u64_to_u32(sizeof(buf)); + + /* allocate system memory to copy pmu trace buffer */ + tracebuffer = nvgpu_kzalloc(g, PMU_RTOS_TRACE_BUFSIZE); + if (tracebuffer == NULL) { + return; + } + + /* read pmu traces into system memory buffer */ + nvgpu_mem_rd_n(g, &pmu->trace_buf, 0, tracebuffer, + PMU_RTOS_TRACE_BUFSIZE); + + trace = (char *)tracebuffer; + trace1 = (u32 *)tracebuffer; + + nvgpu_err(g, "dump PMU trace buffer"); + for (i = 0U; i < PMU_RTOS_TRACE_BUFSIZE; i += 0x40U) { + for (j = 0U; j < 0x40U; j++) { + if (trace1[(i / 4U) + j] != 0U) { + break; + } + } + if (j == 0x40U) { + break; + } + (void)nvgpu_strnadd_u32(hex_str, trace1[(i / 4U)], + sizeof(hex_str), 16U); + (void)strncat(buf, "Index", nvgpu_safe_sub_u32(buf_size, + nvgpu_safe_cast_u64_to_u32(strlen(buf)))); + (void)strncat(buf, hex_str, nvgpu_safe_sub_u32(buf_size, + nvgpu_safe_cast_u64_to_u32(strlen(buf)))); + (void)strncat(buf, ": ", nvgpu_safe_sub_u32(buf_size, + nvgpu_safe_cast_u64_to_u32(strlen(buf)))); + l = 0; + m = 0; + while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) { + if (k >= 40U) { + break; + } + (void)strncpy(part_str, (trace+i+20+m), k); + part_str[k] = '\0'; + (void)nvgpu_strnadd_u32(hex_str, + trace1[(i / 4U) + 1U + l], + sizeof(hex_str), 16U); + (void)strncat(buf, part_str, nvgpu_safe_sub_u32( + buf_size, nvgpu_safe_cast_u64_to_u32( + strlen(buf)))); + (void)strncat(buf, "0x", nvgpu_safe_sub_u32(buf_size, + nvgpu_safe_cast_u64_to_u32(strlen(buf)))); + (void)strncat(buf, hex_str, nvgpu_safe_sub_u32(buf_size, + nvgpu_safe_cast_u64_to_u32(strlen(buf)))); + l++; + m += k + 2U; + } + + (void)strncat(buf, (trace+i+20+m), nvgpu_safe_sub_u32(buf_size, + nvgpu_safe_cast_u64_to_u32(strlen(buf)))); + nvgpu_err(g, "%s", buf); + } + + nvgpu_kfree(g, tracebuffer); +} + +void nvgpu_pmu_dump_falcon_stats(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = pmu->g; + +#ifdef CONFIG_NVGPU_FALCON_DEBUG + nvgpu_falcon_dump_stats(pmu->flcn); +#endif + g->ops.pmu.pmu_dump_falcon_stats(pmu); + + /* Print PMU F/W debug prints */ + print_pmu_trace(pmu); + + nvgpu_err(g, "pmu state: %d", nvgpu_pmu_get_fw_state(g, pmu)); + + if (g->can_elpg) { + nvgpu_err(g, "elpg state: %d", pmu->pg->elpg_stat); + } + + /* PMU may crash due to FECS crash. Dump FECS status */ + g->ops.gr.falcon.dump_stats(g); +} + +int nvgpu_pmu_debug_init(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = mm->pmu.vm; + int err = 0; + + err = nvgpu_dma_alloc_map(vm, PMU_RTOS_TRACE_BUFSIZE, + &pmu->trace_buf); + if (err != 0) { + nvgpu_err(g, "failed to allocate pmu trace buffer\n"); + } + + return err; +} + +void nvgpu_pmu_debug_deinit(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = mm->pmu.vm; + + if (nvgpu_mem_is_valid(&pmu->trace_buf)) { + nvgpu_dma_unmap_free(vm, &pmu->trace_buf); + } +} diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_mutex.c b/drivers/gpu/nvgpu/common/pmu/pmu_mutex.c new file mode 100644 index 000000000..9d1eae140 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pmu_mutex.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +int nvgpu_pmu_mutex_acquire(struct gk20a *g, struct pmu_mutexes *mutexes, + u32 id, u32 *token) +{ + struct pmu_mutex *mutex; + u32 owner; + int err; + + WARN_ON(token == NULL); + WARN_ON(!PMU_MUTEX_ID_IS_VALID(id)); + WARN_ON(id > mutexes->cnt); + + mutex = &mutexes->mutex[id]; + + owner = g->ops.pmu.pmu_mutex_owner(g, mutexes, id); + + if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) { + WARN_ON(mutex->ref_cnt == 0U); + nvgpu_err(g, "already acquired by owner : 0x%08x", *token); + mutex->ref_cnt++; + return 0; + } + + err = g->ops.pmu.pmu_mutex_acquire(g, mutexes, id, token); + + if (err == 0) { + mutex->ref_cnt = 1; + } + + return err; +} + +int nvgpu_pmu_mutex_release(struct gk20a *g, struct pmu_mutexes *mutexes, + u32 id, u32 *token) +{ + struct pmu_mutex *mutex; + u32 owner; + + WARN_ON(token == NULL); + WARN_ON(!PMU_MUTEX_ID_IS_VALID(id)); + WARN_ON(id > mutexes->cnt); + + mutex = &mutexes->mutex[id]; + + owner = g->ops.pmu.pmu_mutex_owner(g, mutexes, id); + + if (*token != owner) { + nvgpu_err(g, "requester 0x%08x NOT match owner 0x%08x", + *token, owner); + return -EINVAL; + } + + if (--mutex->ref_cnt > 0U) { + return -EBUSY; + } + + g->ops.pmu.pmu_mutex_release(g, mutexes, id, token); + + return 0; +} + +void nvgpu_pmu_mutex_sw_setup(struct gk20a *g, struct nvgpu_pmu *pmu, + struct pmu_mutexes *mutexes) +{ + u32 i; + + nvgpu_log_fn(g, " "); + + for (i = 0; i < mutexes->cnt; i++) { + mutexes->mutex[i].id = i; + mutexes->mutex[i].index = i; + } +} + +int nvgpu_pmu_init_mutexe(struct gk20a *g, struct nvgpu_pmu *pmu, + struct pmu_mutexes **mutexes_p) +{ + struct pmu_mutexes *mutexes; + int err = 0; + + nvgpu_log_fn(g, " "); + + if (*mutexes_p != NULL) { + /* skip alloc/reinit for unrailgate sequence */ + nvgpu_pmu_dbg(g, "skip mutex init for unrailgate sequence"); + goto exit; + } + + mutexes = (struct pmu_mutexes *) + nvgpu_kzalloc(g, sizeof(struct pmu_mutexes)); + if (mutexes == NULL) { + err = -ENOMEM; + goto exit; + } + + mutexes->cnt = g->ops.pmu.pmu_mutex_size(); + + mutexes->mutex = nvgpu_kzalloc(g, mutexes->cnt * + sizeof(struct pmu_mutex)); + if (mutexes->mutex == NULL) { + nvgpu_kfree(g, mutexes); + err = -ENOMEM; + goto exit; + } + + *mutexes_p = mutexes; + +exit: + return err; +} + +void nvgpu_pmu_mutexe_deinit(struct gk20a *g, struct nvgpu_pmu *pmu, + struct pmu_mutexes *mutexes) +{ + nvgpu_log_fn(g, " "); + + if (mutexes == NULL) { + return; + } + + if (mutexes->mutex != NULL) { + nvgpu_kfree(g, mutexes->mutex); + } + nvgpu_kfree(g, mutexes); +} diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_pstate.c b/drivers/gpu/nvgpu/common/pmu/pmu_pstate.c new file mode 100644 index 000000000..3fb993743 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pmu_pstate.c @@ -0,0 +1,204 @@ +/* + * general p state infrastructure + * + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "boardobj/boardobj.h" + +void nvgpu_pmu_pstate_deinit(struct gk20a *g) +{ + pmgr_pmu_free_pmupstate(g); + nvgpu_pmu_therm_deinit(g, g->pmu); + + if (g->pmu->perf_pmu != NULL) { + nvgpu_pmu_perf_deinit(g); + } + + if (g->pmu->volt != NULL) { + nvgpu_pmu_volt_deinit(g); + } + + nvgpu_pmu_clk_deinit(g); + + if (g->ops.clk.mclk_deinit != NULL) { + g->ops.clk.mclk_deinit(g); + } +} + +static int pmu_pstate_init(struct gk20a *g) +{ + int err; + nvgpu_log_fn(g, " "); + + err = nvgpu_pmu_therm_init(g, g->pmu); + if (err != 0) { + nvgpu_pmu_therm_deinit(g, g->pmu); + return err; + } + + err = nvgpu_pmu_clk_init(g); + if (err != 0) { + return err; + } + + err = nvgpu_pmu_perf_init(g); + if (err != 0) { + nvgpu_pmu_perf_deinit(g); + return err; + } + + err = nvgpu_pmu_volt_init(g); + if (err != 0) { + return err; + } + + err = pmgr_pmu_init_pmupstate(g); + if (err != 0) { + pmgr_pmu_free_pmupstate(g); + return err; + } + + return 0; +} + +/*sw setup for pstate components*/ +int nvgpu_pmu_pstate_sw_setup(struct gk20a *g) +{ + int err; + nvgpu_log_fn(g, " "); + + err = nvgpu_pmu_wait_fw_ready(g, g->pmu); + if (err != 0) { + nvgpu_err(g, "PMU not ready to process pstate requests"); + return err; + } + + err = pmu_pstate_init(g); + if (err != 0) { + nvgpu_err(g, "Pstate init failed"); + return err; + } + + err = nvgpu_pmu_volt_sw_setup(g); + if (err != 0) { + nvgpu_err(g, "Volt sw setup failed"); + return err; + } + + err = nvgpu_pmu_therm_sw_setup(g, g->pmu); + if (err != 0) { + goto err_therm_pmu_init_pmupstate; + } + + err = nvgpu_pmu_clk_sw_setup(g); + if (err != 0) { + nvgpu_err(g, "Clk sw setup failed"); + return err; + } + + err = nvgpu_pmu_perf_sw_setup(g); + if (err != 0) { + nvgpu_err(g, "Perf sw setup failed"); + goto err_perf_pmu_init_pmupstate; + } + + if (g->ops.clk.support_pmgr_domain) { + err = pmgr_domain_sw_setup(g); + if (err != 0) { + goto err_pmgr_pmu_init_pmupstate; + } + } + + return 0; + +err_pmgr_pmu_init_pmupstate: + pmgr_pmu_free_pmupstate(g); +err_therm_pmu_init_pmupstate: + nvgpu_pmu_therm_deinit(g, g->pmu); +err_perf_pmu_init_pmupstate: + nvgpu_pmu_perf_deinit(g); + + return err; +} + +/*sw setup for pstate components*/ +int nvgpu_pmu_pstate_pmu_setup(struct gk20a *g) +{ + int err; + nvgpu_log_fn(g, " "); + + if (g->ops.clk.mclk_init != NULL) { + err = g->ops.clk.mclk_init(g); + if (err != 0) { + nvgpu_err(g, "failed to set mclk"); + /* Indicate error and continue */ + } + } + + err = nvgpu_pmu_volt_pmu_setup(g); + if (err != 0) { + nvgpu_err(g, "Failed to send VOLT pmu setup"); + return err; + } + + err = nvgpu_pmu_therm_pmu_setup(g, g->pmu); + if (err != 0) { + return err; + } + + err = nvgpu_pmu_clk_pmu_setup(g); + if (err != 0) { + nvgpu_err(g, "Failed to send CLK pmu setup"); + return err; + } + + err = nvgpu_pmu_perf_pmu_setup(g); + if (err != 0) { + nvgpu_err(g, "Failed to send Perf pmu setup"); + return err; + } + + if (g->ops.clk.support_pmgr_domain) { + err = pmgr_domain_pmu_setup(g); + } + + err = g->ops.clk.perf_pmu_vfe_load(g); + if (err != 0) { + return err; + } + + return err; +} + diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c b/drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c new file mode 100644 index 000000000..3f577869b --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c @@ -0,0 +1,514 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "boardobj/boardobj.h" + +#ifdef CONFIG_NVGPU_POWER_PG +#include +#endif + +#ifdef CONFIG_NVGPU_DGPU +#include +#endif + +#if defined(CONFIG_NVGPU_NEXT) +#define PMU_PRIV_LOCKDOWN_RELEASE_POLLING_US (1U) +#endif + +/* PMU locks used to sync with PMU-RTOS */ +int nvgpu_pmu_lock_acquire(struct gk20a *g, struct nvgpu_pmu *pmu, + u32 id, u32 *token) +{ + if (!g->support_ls_pmu) { + return 0; + } + + if (!g->can_elpg) { + return 0; + } + +#ifdef CONFIG_NVGPU_POWER_PG + if (!pmu->pg->initialized) { + return -EINVAL; + } +#endif + + return nvgpu_pmu_mutex_acquire(g, pmu->mutexes, id, token); +} + +int nvgpu_pmu_lock_release(struct gk20a *g, struct nvgpu_pmu *pmu, + u32 id, u32 *token) +{ + if (!g->support_ls_pmu) { + return 0; + } + + if (!g->can_elpg) { + return 0; + } + +#ifdef CONFIG_NVGPU_POWER_PG + if (!pmu->pg->initialized) { + return -EINVAL; + } +#endif + + return nvgpu_pmu_mutex_release(g, pmu->mutexes, id, token); +} + +/* PMU RTOS init/setup functions */ +int nvgpu_pmu_destroy(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + nvgpu_log_fn(g, " "); + +#ifdef CONFIG_NVGPU_POWER_PG + if (g->can_elpg) { + nvgpu_pmu_pg_destroy(g, pmu, pmu->pg); + } +#endif + + nvgpu_pmu_queues_free(g, &pmu->queues); + + nvgpu_pmu_fw_state_change(g, pmu, PMU_FW_STATE_OFF, false); + nvgpu_pmu_set_fw_ready(g, pmu, false); + nvgpu_pmu_lsfm_clean(g, pmu, pmu->lsfm); + pmu->pmu_perfmon->perfmon_ready = false; + + + nvgpu_log_fn(g, "done"); + return 0; +} + +static void remove_pmu_support(struct nvgpu_pmu *pmu) +{ + struct gk20a *g = pmu->g; + struct pmu_board_obj *obj, *obj_tmp; + struct boardobjgrp *pboardobjgrp, *pboardobjgrp_tmp; + int err = 0; + + nvgpu_log_fn(g, " "); + + if (nvgpu_alloc_initialized(&pmu->dmem)) { + nvgpu_alloc_destroy(&pmu->dmem); + } + + if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) { + nvgpu_list_for_each_entry_safe(pboardobjgrp, + pboardobjgrp_tmp, &g->boardobjgrp_head, + boardobjgrp, node) { + err = pboardobjgrp->destruct(pboardobjgrp); + if (err != 0) { + nvgpu_err(g, "pboardobjgrp destruct failed"); + } + } + + nvgpu_list_for_each_entry_safe(obj, obj_tmp, + &g->boardobj_head, boardobj, node) { + obj->destruct(obj); + } + } + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_PMU_SUPER_SURFACE)) { + nvgpu_pmu_super_surface_deinit(g, pmu, pmu->super_surface); + } + + if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) { + nvgpu_pmu_pstate_deinit(g); + } + + nvgpu_pmu_debug_deinit(g, pmu); + nvgpu_pmu_lsfm_deinit(g, pmu, pmu->lsfm); +#ifdef CONFIG_NVGPU_POWER_PG + nvgpu_pmu_pg_deinit(g, pmu, pmu->pg); +#endif + nvgpu_pmu_sequences_deinit(g, pmu, pmu->sequences); + nvgpu_pmu_mutexe_deinit(g, pmu, pmu->mutexes); + nvgpu_pmu_fw_deinit(g, pmu, pmu->fw); + nvgpu_pmu_deinitialize_perfmon(g, pmu); +} + +static int pmu_sw_setup(struct gk20a *g, struct nvgpu_pmu *pmu ) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + /* set default value to mutexes */ + nvgpu_pmu_mutex_sw_setup(g, pmu, pmu->mutexes); + + /* set default value to sequences */ + nvgpu_pmu_sequences_sw_setup(g, pmu, pmu->sequences); + +#ifdef CONFIG_NVGPU_POWER_PG + if (g->can_elpg) { + err = nvgpu_pmu_pg_sw_setup(g, pmu, pmu->pg); + if (err != 0){ + goto exit; + } + } +#endif + + if (pmu->sw_ready) { + nvgpu_log_fn(g, "skip PMU-RTOS shared buffer realloc"); + goto exit; + } + + /* alloc shared buffer to read PMU-RTOS debug message */ + err = nvgpu_pmu_debug_init(g, pmu); + if (err != 0) { + goto exit; + } + + /* alloc shared buffer super buffer to communicate with PMU-RTOS */ + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_PMU_SUPER_SURFACE)) { + err = nvgpu_pmu_super_surface_buf_alloc(g, + pmu, pmu->super_surface); + if (err != 0) { + goto exit; + } + } + + pmu->sw_ready = true; +exit: + if (err != 0) { + nvgpu_pmu_remove_support(g, pmu); + } + + return err; +} + +void nvgpu_pmu_rtos_cmdline_args_init(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + nvgpu_log_fn(g, " "); + + pmu->fw->ops.set_cmd_line_args_trace_size( + pmu, PMU_RTOS_TRACE_BUFSIZE); + pmu->fw->ops.set_cmd_line_args_trace_dma_base(pmu); + pmu->fw->ops.set_cmd_line_args_trace_dma_idx( + pmu, GK20A_PMU_DMAIDX_VIRT); + + pmu->fw->ops.set_cmd_line_args_cpu_freq(pmu, + g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_PWRCLK)); + + if (pmu->fw->ops.config_cmd_line_args_super_surface != NULL) { + pmu->fw->ops.config_cmd_line_args_super_surface(pmu); + } +} + +#if defined(CONFIG_NVGPU_NEXT) +void nvgpu_pmu_next_core_rtos_args_setup(struct gk20a *g, + struct nvgpu_pmu *pmu) +{ + struct nv_pmu_boot_params boot_params; + struct nv_next_core_bootldr_params *btldr_params; + struct nv_next_core_rtos_params *rtos_params; + struct pmu_cmdline_args_v7 *cmd_line_args; + u64 phyadr = 0; + + nvgpu_pmu_rtos_cmdline_args_init(g, pmu); + + btldr_params = &boot_params.boot_params.bl; + rtos_params = &boot_params.boot_params.rtos; + cmd_line_args = &boot_params.cmd_line_args; + + /* setup core dump */ + rtos_params->core_dump_size = NV_REG_STR_NEXT_CORE_DUMP_SIZE_DEFAULT; + rtos_params->core_dump_phys = nvgpu_mem_get_addr(g, + &pmu->fw->ucode_core_dump); + + /* copy cmd line args to pmu->boot_params.cmd_line_args */ + nvgpu_memcpy((u8 *)cmd_line_args, + (u8 *) (pmu->fw->ops.get_cmd_line_args_ptr(pmu)), + pmu->fw->ops.get_cmd_line_args_size(pmu)); + + cmd_line_args->ctx_bind_addr = g->ops.pmu.get_inst_block_config(g); + + /* setup boot loader args */ + btldr_params->boot_type = NV_NEXT_CORE_BOOTLDR_BOOT_TYPE_RM; + btldr_params->size = U16(sizeof(struct nv_pmu_boot_params)); + btldr_params->version = NV_NEXT_CORE_BOOTLDR_VERSION; + + /* copy to boot_args phyadr */ + nvgpu_mem_wr_n(g, &pmu->fw->ucode_boot_args, 0, + &boot_params.boot_params.bl, + sizeof(struct nv_pmu_boot_params)); + + /* copy boot args phyadr to mailbox 0/1 */ + phyadr = nvgpu_safe_add_u64(NV_NEXT_CORE_AMAP_EXTMEM2_START, + nvgpu_mem_get_addr(g, &pmu->fw->ucode_boot_args)); + + nvgpu_falcon_mailbox_write(g->pmu->flcn, FALCON_MAILBOX_0, + u64_lo32(phyadr)); + nvgpu_falcon_mailbox_write(g->pmu->flcn, FALCON_MAILBOX_1, + u64_hi32(phyadr)); +} + +s32 nvgpu_pmu_next_core_rtos_args_allocate(struct gk20a *g, + struct nvgpu_pmu *pmu) +{ + struct pmu_rtos_fw *rtos_fw = pmu->fw; + s32 err =0; + + nvgpu_log_fn(g, " "); + + /* alloc boot args */ + if (!nvgpu_mem_is_valid(&rtos_fw->ucode_boot_args)) { + err = nvgpu_dma_alloc_flags_sys(g, + NVGPU_DMA_PHYSICALLY_ADDRESSED, + sizeof(struct nv_pmu_boot_params), + &rtos_fw->ucode_boot_args); + if (err != 0) { + goto exit; + } + } + + /* alloc core dump */ + if (!nvgpu_mem_is_valid(&rtos_fw->ucode_core_dump)) { + err = nvgpu_dma_alloc_flags_sys(g, + NVGPU_DMA_PHYSICALLY_ADDRESSED, + NV_REG_STR_NEXT_CORE_DUMP_SIZE_DEFAULT, + &rtos_fw->ucode_core_dump); + if (err != 0) { + goto exit; + } + } + +exit: + return err; +} + +static int nvgpu_pmu_wait_for_priv_lockdown_release(struct gk20a *g, + struct nvgpu_falcon *flcn, unsigned int timeout) +{ + struct nvgpu_timeout to; + int status; + + nvgpu_log_fn(g, " "); + + status = nvgpu_timeout_init(g, &to, timeout, NVGPU_TIMER_CPU_TIMER); + if (status != 0) { + return status; + } + + /* poll for priv lockdown release */ + do { + if (!g->ops.falcon.is_priv_lockdown(flcn)) { + break; + } + + nvgpu_udelay(PMU_PRIV_LOCKDOWN_RELEASE_POLLING_US); + } while (nvgpu_timeout_expired(&to) == 0); + + if (nvgpu_timeout_peek_expired(&to)) { + status = -ETIMEDOUT; + } + + return status; +} +#endif + +int nvgpu_pmu_rtos_init(struct gk20a *g) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + if (!g->support_ls_pmu || (g->pmu == NULL)) { + goto exit; + } + + err = pmu_sw_setup(g, g->pmu); + if (err != 0) { + goto exit; + } + + if (nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { +#ifdef CONFIG_NVGPU_DGPU + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) { + /* Reset PMU engine */ + err = nvgpu_falcon_reset(g->pmu->flcn); + + /* Bootstrap PMU from SEC2 RTOS*/ + err = nvgpu_sec2_bootstrap_ls_falcons(g, &g->sec2, + FALCON_ID_PMU); + if (err != 0) { + goto exit; + } + } +#endif + /* + * clear halt interrupt to avoid PMU-RTOS ucode + * hitting breakpoint due to PMU halt + */ + err = nvgpu_falcon_clear_halt_intr_status(g->pmu->flcn, + nvgpu_get_poll_timeout(g)); + if (err != 0) { + goto exit; + } + + if (g->ops.pmu.setup_apertures != NULL) { + g->ops.pmu.setup_apertures(g); + } + +#if defined(CONFIG_NVGPU_NEXT) + if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) { + err = nvgpu_pmu_next_core_rtos_args_allocate(g, g->pmu); + if (err != 0) { + goto exit; + } + + nvgpu_pmu_next_core_rtos_args_setup(g, g->pmu); + } else +#endif + { + err = nvgpu_pmu_lsfm_ls_pmu_cmdline_args_copy(g, g->pmu, + g->pmu->lsfm); + if (err != 0) { + goto exit; + } + } + + nvgpu_pmu_enable_irq(g, true); + +#if defined(CONFIG_NVGPU_NEXT) + if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) { + g->ops.falcon.bootstrap(g->pmu->flcn, 0U); + err = nvgpu_pmu_wait_for_priv_lockdown_release(g, + g->pmu->flcn, U32_MAX); + if(err != 0) { + nvgpu_err(g, "PRIV lockdown polling failed"); + return err; + } + } else +#endif + { + /*Once in LS mode, cpuctl_alias is only accessible*/ + if (g->ops.pmu.secured_pmu_start != NULL) { + g->ops.pmu.secured_pmu_start(g); + } + } + } else { + /* non-secure boot */ + err = nvgpu_pmu_ns_fw_bootstrap(g, g->pmu); + if (err != 0) { + goto exit; + } +#if defined(CONFIG_NVGPU_NEXT) + if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) { + err = nvgpu_pmu_wait_for_priv_lockdown_release(g, + g->pmu->flcn, U32_MAX); + if(err != 0) { + nvgpu_err(g, "PRIV lockdown polling failed"); + return err; + } + } +#endif + } + + nvgpu_pmu_fw_state_change(g, g->pmu, PMU_FW_STATE_STARTING, false); + +exit: + return err; +} + +int nvgpu_pmu_rtos_early_init(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + /* Allocate memory for pmu_perfmon */ + err = nvgpu_pmu_initialize_perfmon(g, pmu, &pmu->pmu_perfmon); + if (err != 0) { + goto exit; + } + + err = nvgpu_pmu_init_pmu_fw(g, pmu, &pmu->fw); + if (err != 0) { + goto init_failed; + } + + err = nvgpu_pmu_init_mutexe(g, pmu, &pmu->mutexes); + if (err != 0) { + goto init_failed; + } + + err = nvgpu_pmu_sequences_init(g, pmu, &pmu->sequences); + if (err != 0) { + goto init_failed; + } + +#ifdef CONFIG_NVGPU_POWER_PG + if (g->can_elpg) { + err = nvgpu_pmu_pg_init(g, pmu, &pmu->pg); + if (err != 0) { + goto init_failed; + } + } +#endif + + err = nvgpu_pmu_lsfm_init(g, &pmu->lsfm); + if (err != 0) { + goto init_failed; + } + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_PMU_SUPER_SURFACE)) { + err = nvgpu_pmu_super_surface_init(g, pmu, + &pmu->super_surface); + if (err != 0) { + goto init_failed; + } + } + + pmu->remove_support = remove_pmu_support; + goto exit; + +init_failed: + remove_pmu_support(pmu); + +exit: + return err; +} diff --git a/drivers/gpu/nvgpu/common/pmu/super_surface/super_surface.c b/drivers/gpu/nvgpu/common/pmu/super_surface/super_surface.c new file mode 100644 index 000000000..876adb6cc --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/super_surface/super_surface.c @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include "super_surface_priv.h" + +int nvgpu_pmu_super_surface_buf_alloc(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nvgpu_pmu_super_surface *ss) +{ + struct vm_gk20a *vm = g->mm.pmu.vm; + int err = 0; + u32 tmp = 0; + + nvgpu_log_fn(g, " "); + + if (ss == NULL) { + return 0; + } + + err = nvgpu_dma_alloc_map(vm, sizeof(struct super_surface), + &ss->super_surface_buf); + if (err != 0) { + nvgpu_err(g, "failed to allocate pmu suffer surface\n"); + return err; + } + + /* store the gpu_va in super-surface header for PMU ucode to access */ + tmp = u64_lo32(ss->super_surface_buf.gpu_va); + nvgpu_mem_wr_n(g, nvgpu_pmu_super_surface_mem(g, + pmu, pmu->super_surface), + (u64)offsetof(struct super_surface, hdr.data.address.lo), + &tmp, sizeof(u32)); + + tmp = u64_hi32(ss->super_surface_buf.gpu_va); + nvgpu_mem_wr_n(g, nvgpu_pmu_super_surface_mem(g, + pmu, pmu->super_surface), + (u64)offsetof(struct super_surface, hdr.data.address.hi), + &tmp, sizeof(u32)); + + return err; +} + +struct nvgpu_mem *nvgpu_pmu_super_surface_mem(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_super_surface *ss) +{ + return &ss->super_surface_buf; +} + +/* + * Lookup table to hold info about super surface member, + * here member ID from nv_pmu_super_surface_member_descriptor + * used as a index to store the member info in two different + * table, i.e one table is for SET ID TYPE & second table for + * GET_STATUS ID_TYPE. + */ +int nvgpu_pmu_ss_create_ssmd_lookup_table(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_super_surface *ss) +{ + struct super_surface_member_descriptor ssmd; + u32 ssmd_size = (u32) + sizeof(struct super_surface_member_descriptor); + u32 idx = 0U; + int err = 0; + + nvgpu_log_fn(g, " "); + + if (ss == NULL) { + nvgpu_err(g, "SS not allocated"); + return -ENOMEM; + } + + for (idx = 0U; idx < NV_PMU_SUPER_SURFACE_MEMBER_DESCRIPTOR_COUNT; + idx++) { + (void) memset(&ssmd, 0x0, ssmd_size); + + nvgpu_mem_rd_n(g, &ss->super_surface_buf, idx * ssmd_size, + &ssmd, ssmd_size); + + nvgpu_pmu_dbg(g, "ssmd: id-0x%x offset-0x%x size-%x rsvd-0x%x", + ssmd.id, ssmd.offset, ssmd.size, ssmd.rsvd); + + /* Check member type from ID member & update respective table*/ + if ((ssmd.id & + NV_RM_PMU_SUPER_SURFACE_MEMBER_ID_TYPE_SET) != 0U) { + /* + * clear member type from member ID as we create + * different table for each type & use ID as index + * during member info fetch. + */ + ssmd.id &= 0xFFFFU; + if (ssmd.id >= NV_PMU_SUPER_SURFACE_MEMBER_COUNT) { + nvgpu_err(g, "incorrect ssmd id %d", ssmd.id); + nvgpu_err(g, "Failed to create SSMD table"); + err = -EINVAL; + break; + } + /*use member ID as index for lookup table too*/ + (void) memcpy(&ss->ssmd_set[ssmd.id], &ssmd, + ssmd_size); + } else if ((ssmd.id & + NV_RM_PMU_SUPER_SURFACE_MEMBER_ID_TYPE_GET_STATUS) + != 0U) { + /* + * clear member type from member ID as we create + * different table for each type & use ID as index + * during member info fetch. + */ + ssmd.id &= 0xFFFFU; + if (ssmd.id >= NV_PMU_SUPER_SURFACE_MEMBER_COUNT) { + nvgpu_err(g, "incorrect ssmd id %d", ssmd.id); + nvgpu_err(g, "failed to create SSMD table"); + err = -EINVAL; + break; + } + /*use member ID as index for lookup table too*/ + (void) memcpy(&ss->ssmd_get_status[ssmd.id], &ssmd, + ssmd_size); + } else { + continue; + } + } + + return err; +} + +u32 nvgpu_pmu_get_ss_member_set_offset(struct gk20a *g, + struct nvgpu_pmu *pmu, u32 member_id) +{ + return pmu->super_surface->ssmd_set[member_id].offset; +} + +u32 nvgpu_pmu_get_ss_member_set_size(struct gk20a *g, + struct nvgpu_pmu *pmu, u32 member_id) +{ + return pmu->super_surface->ssmd_set[member_id].size; +} + +u32 nvgpu_pmu_get_ss_member_get_status_offset(struct gk20a *g, + struct nvgpu_pmu *pmu, u32 member_id) +{ + return pmu->super_surface->ssmd_get_status[member_id].offset; +} + +u32 nvgpu_pmu_get_ss_member_get_status_size(struct gk20a *g, + struct nvgpu_pmu *pmu, u32 member_id) +{ + return pmu->super_surface->ssmd_get_status[member_id].size; +} + +u32 nvgpu_pmu_get_ss_cmd_fbq_offset(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_super_surface *ss, u32 id) +{ + return (u32)offsetof(struct super_surface, + fbq.cmd_queues.queue[id]); +} + +u32 nvgpu_pmu_get_ss_msg_fbq_offset(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_super_surface *ss) +{ + return (u32)offsetof(struct super_surface, + fbq.msg_queue); +} + +u32 nvgpu_pmu_get_ss_msg_fbq_element_offset(struct gk20a *g, + struct nvgpu_pmu *pmu, struct nvgpu_pmu_super_surface *ss, u32 idx) +{ + return (u32)offsetof(struct super_surface, + fbq.msg_queue.element[idx]); +} + +void nvgpu_pmu_super_surface_deinit(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nvgpu_pmu_super_surface *ss) +{ + nvgpu_log_fn(g, " "); + + if (ss == NULL) { + return; + } + + if (nvgpu_mem_is_valid(&ss->super_surface_buf)) { + nvgpu_dma_free(g, &ss->super_surface_buf); + } + + nvgpu_kfree(g, ss); +} + +int nvgpu_pmu_super_surface_init(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nvgpu_pmu_super_surface **super_surface) +{ + *super_surface = (struct nvgpu_pmu_super_surface *) nvgpu_kzalloc(g, + sizeof(struct nvgpu_pmu_super_surface)); + if (*super_surface == NULL) { + return -ENOMEM; + } + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/pmu/super_surface/super_surface_priv.h b/drivers/gpu/nvgpu/common/pmu/super_surface/super_surface_priv.h new file mode 100644 index 000000000..9662b2f38 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/super_surface/super_surface_priv.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef SUPER_SURFACE_PRIV_H +#define SUPER_SURFACE_PRIV_H + +#include +#include +#include +#include +#include + +struct nvgpu_mem; + +/* PMU super surface */ +/* 1MB Bytes for SUPER_SURFACE_SIZE */ +#define SUPER_SURFACE_SIZE (1024U * 1024U) +/* 64K Bytes for command queues */ +#define FBQ_CMD_QUEUES_SIZE (64U * 1024U) +/* 1K Bytes for message queue */ +#define FBQ_MSG_QUEUE_SIZE (1024U) +/* 512 Bytes for SUPER_SURFACE_MEMBER_DESCRIPTOR */ +#define SSMD_SIZE (512U) +/* 16 bytes for SUPER_SURFACE_HDR */ +#define SS_HDR_SIZE (16U) +#define SS_UNMAPPED_MEMBERS_SIZE (SUPER_SURFACE_SIZE - \ + (FBQ_CMD_QUEUES_SIZE + FBQ_MSG_QUEUE_SIZE + SSMD_SIZE + SS_HDR_SIZE)) + +/* SSMD */ +#define NV_PMU_SUPER_SURFACE_MEMBER_DESCRIPTOR_COUNT 32U + +/* + * Defines the structure of the @ nv_pmu_super_surface_member_descriptor::id + */ +#define NV_RM_PMU_SUPER_SURFACE_MEMBER_ID_GROUP 0x0000U +#define NV_RM_PMU_SUPER_SURFACE_MEMBER_ID_GROUP_INVALID 0xFFFFU +#define NV_RM_PMU_SUPER_SURFACE_MEMBER_ID_TYPE_SET BIT(16) +#define NV_RM_PMU_SUPER_SURFACE_MEMBER_ID_TYPE_GET_STATUS BIT(17) +#define NV_RM_PMU_SUPER_SURFACE_MEMBER_ID_RSVD (0x00UL << 20U) + +struct super_surface_member_descriptor { + /* The member ID (@see NV_PMU_SUPER_SURFACE_MEMBER_ID_). */ + u32 id; + + /* The sub-structure's byte offset within the super-surface. */ + u32 offset; + + /* The sub-structure's byte size (must always be properly aligned). */ + u32 size; + + /* Reserved (and preserving required size/alignment). */ + u32 rsvd; +}; + +/* PMU super surface */ +struct super_surface_hdr { + struct falc_u64 address; + u32 member_mask; + u16 dmem_buffer_size_max; +}; + +NV_PMU_MAKE_ALIGNED_STRUCT(super_surface_hdr, sizeof(struct super_surface_hdr)); + +/* + * Global Super Surface structure for combined INIT data required by PMU. + * NOTE: Any new substructures or entries must be aligned. + */ +struct super_surface { + struct super_surface_member_descriptor + ssmd[NV_PMU_SUPER_SURFACE_MEMBER_DESCRIPTOR_COUNT]; + + struct { + struct nv_pmu_fbq_cmd_queues cmd_queues; + struct nv_pmu_fbq_msg_queue msg_queue; + } fbq; + + union super_surface_hdr_aligned hdr; + + u8 ss_unmapped_members_rsvd[SS_UNMAPPED_MEMBERS_SIZE]; +}; + +/* nvgpu super surface */ +struct nvgpu_pmu_super_surface { + /* super surface members */ + struct nvgpu_mem super_surface_buf; + + struct super_surface_member_descriptor + ssmd_set[NV_PMU_SUPER_SURFACE_MEMBER_COUNT]; + + struct super_surface_member_descriptor + ssmd_get_status[NV_PMU_SUPER_SURFACE_MEMBER_COUNT]; +}; + +#endif /* SUPER_SURFACE_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/therm/therm_channel.c b/drivers/gpu/nvgpu/common/pmu/therm/therm_channel.c new file mode 100644 index 000000000..d959a0015 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/therm/therm_channel.c @@ -0,0 +1,418 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "therm_dev.h" +#include "therm_channel.h" +#include "ucode_therm_inf.h" +#include "thrm.h" + +static int _therm_channel_pmudatainit_device(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct therm_channel *pchannel; + struct therm_channel_device *ptherm_channel; + struct nv_pmu_therm_therm_channel_device_boardobj_set *pset; + + status = pmu_board_obj_pmu_data_init_super(g, obj, pmu_obj); + if (status != 0) { + nvgpu_err(g, + "error updating pmu boardobjgrp for therm channel 0x%x", + status); + status = -ENOMEM; + goto done; + } + + pchannel = (struct therm_channel *)(void *)obj; + pset = (struct nv_pmu_therm_therm_channel_device_boardobj_set *) + (void *)pmu_obj; + ptherm_channel = (struct therm_channel_device *)(void *)obj; + + pset->super.scaling = pchannel->scaling; + pset->super.offset = pchannel->offset; + pset->super.temp_min = pchannel->temp_min; + pset->super.temp_max = pchannel->temp_max; + + pset->therm_dev_idx = ptherm_channel->therm_dev_idx; + pset->therm_dev_prov_idx = ptherm_channel->therm_dev_prov_idx; + +done: + return status; +} +static struct pmu_board_obj *construct_channel_device(struct gk20a *g, + void *pargs, size_t pargs_size, u8 type) +{ + struct pmu_board_obj *obj = NULL; + struct therm_channel *pchannel; + struct therm_channel_device *pchannel_device; + int status; + u16 scale_shift = BIT16(8); + struct therm_channel_device *therm_device = (struct therm_channel_device*)pargs; + + pchannel_device = nvgpu_kzalloc(g, pargs_size); + if (pchannel_device == NULL) { + return NULL; + } + obj = (struct pmu_board_obj *)(void *)pchannel_device; + + status = pmu_board_obj_construct_super(g, obj, pargs); + if (status != 0) { + return NULL; + } + + /* Set Super class interfaces */ + obj->pmudatainit = _therm_channel_pmudatainit_device; + + pchannel = (struct therm_channel *)(void *)obj; + pchannel_device = (struct therm_channel_device *)(void *)obj; + + g->ops.therm.get_internal_sensor_limits(&pchannel->temp_max, + &pchannel->temp_min); + pchannel->scaling = S16(scale_shift); + pchannel->offset = 0; + + pchannel_device->therm_dev_idx = therm_device->therm_dev_idx; + pchannel_device->therm_dev_prov_idx = therm_device->therm_dev_prov_idx; + + nvgpu_log_info(g, " Done"); + + return obj; +} + +static int _therm_channel_pmudata_instget(struct gk20a *g, + struct nv_pmu_boardobjgrp *pmuboardobjgrp, + struct nv_pmu_boardobj **pmu_obj, + u8 idx) +{ + struct nv_pmu_therm_therm_channel_boardobj_grp_set *pgrp_set = + (struct nv_pmu_therm_therm_channel_boardobj_grp_set *) + pmuboardobjgrp; + + nvgpu_log_info(g, " "); + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (((u32)BIT(idx) & + pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *pmu_obj = (struct nv_pmu_boardobj *) + &pgrp_set->objects[idx].data.obj; + + nvgpu_log_info(g, " Done"); + + return 0; +} + +static int therm_channel_pmustatus_instget(struct gk20a *g, + void *pboardobjgrppmu, struct nv_pmu_boardobj_query + **obj_pmu_status, u8 idx) +{ + struct nv_pmu_therm_therm_channel_boardobj_grp_get_status *pmu_status = + (struct nv_pmu_therm_therm_channel_boardobj_grp_get_status *) + (void *)pboardobjgrppmu; + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (((u32)BIT(idx) & + pmu_status->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *obj_pmu_status = (struct nv_pmu_boardobj_query *) + &pmu_status->objects[idx].data.obj; + return 0; +} + +static int devinit_get_therm_channel_table(struct gk20a *g, + struct therm_channels *pthermchannelobjs) +{ + int status = 0; + u8 *therm_channel_table_ptr = NULL; + u8 *curr_therm_channel_table_ptr = NULL; + struct pmu_board_obj *obj_tmp; + struct therm_channel_1x_header therm_channel_table_header = { 0 }; + struct therm_channel_1x_entry *therm_channel_table_entry = NULL; + u32 index; + u32 obj_index = 0; + size_t therm_channel_size = 0; + union { + struct pmu_board_obj obj; + struct therm_channel therm_channel; + struct therm_channel_device device; + } therm_channel_data; + + nvgpu_log_info(g, " "); + + therm_channel_table_ptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_PERF_TOKEN), + THERMAL_CHANNEL_TABLE); + if (therm_channel_table_ptr == NULL) { + status = -EINVAL; + goto done; + } + + nvgpu_memcpy((u8 *)&therm_channel_table_header, therm_channel_table_ptr, + VBIOS_THERM_CHANNEL_1X_HEADER_SIZE_09); + + if (therm_channel_table_header.version != + VBIOS_THERM_CHANNEL_VERSION_1X) { + status = -EINVAL; + goto done; + } + + if (therm_channel_table_header.header_size < + VBIOS_THERM_CHANNEL_1X_HEADER_SIZE_09) { + status = -EINVAL; + goto done; + } + + curr_therm_channel_table_ptr = (therm_channel_table_ptr + + VBIOS_THERM_CHANNEL_1X_HEADER_SIZE_09); + + for (index = 0; index < therm_channel_table_header.num_table_entries; + index++) { + therm_channel_table_entry = (struct therm_channel_1x_entry *) + (curr_therm_channel_table_ptr + + (therm_channel_table_header.table_entry_size * index)); + + if (therm_channel_table_entry->class_id != + NV_VBIOS_THERM_CHANNEL_1X_ENTRY_CLASS_DEVICE) { + continue; + } + + therm_channel_data.device.therm_dev_idx = therm_channel_table_entry->param0; + /* Check for valid therm device index */ + if (!therm_device_idx_is_valid(g->pmu->therm_pmu, + therm_channel_data.device.therm_dev_idx)) { + continue; + } + therm_channel_data.device.therm_dev_prov_idx = therm_channel_table_entry->param1; + + therm_channel_size = sizeof(struct therm_channel_device); + therm_channel_data.obj.type = CTRL_THERMAL_THERM_CHANNEL_CLASS_DEVICE; + + obj_tmp = construct_channel_device(g, &therm_channel_data, + therm_channel_size, therm_channel_data.obj.type); + + if (obj_tmp == NULL) { + nvgpu_err(g, + "unable to create thermal device for %d type %d", + index, therm_channel_data.obj.type); + status = -EINVAL; + goto done; + } + + status = boardobjgrp_objinsert(&pthermchannelobjs->super.super, + obj_tmp, obj_index); + + if (status != 0) { + nvgpu_err(g, + "unable to insert thermal device boardobj for %d", index); + status = -EINVAL; + goto done; + } + + ++obj_index; + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +int therm_channel_sw_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + struct therm_channels *pthermchannelobjs; + + /* Construct the Super Class and override the Interfaces */ + status = nvgpu_boardobjgrp_construct_e32(g, + &g->pmu->therm_pmu->therm_channelobjs.super); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for therm devices, " + "status - 0x%x", status); + goto done; + } + + pboardobjgrp = &g->pmu->therm_pmu->therm_channelobjs.super.super; + pthermchannelobjs = &(g->pmu->therm_pmu->therm_channelobjs); + + /* Override the Interfaces */ + pboardobjgrp->pmudatainstget = _therm_channel_pmudata_instget; + pboardobjgrp->pmustatusinstget = therm_channel_pmustatus_instget; + + status = devinit_get_therm_channel_table(g, pthermchannelobjs); + if (status != 0) { + goto done; + } + + BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, THERM, THERM_CHANNEL); + + status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp, + therm, THERM, therm_channel, THERM_CHANNEL); + if (status != 0) { + nvgpu_err(g, + "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x", + status); + goto done; + } + + status = BOARDOBJGRP_PMU_CMD_GRP_GET_STATUS_CONSTRUCT(g, pboardobjgrp, + therm, THERM, therm_channel, THERM_CHANNEL); + if (status != 0) { + nvgpu_err(g, + "error constructing THERM_GET_STATUS interface - 0x%x", + status); + goto done; + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +static int therm_channel_currtemp_update(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + struct therm_channel_get_status *therm_channel_obj; + struct nv_pmu_therm_therm_channel_boardobj_get_status *pstatus; + + nvgpu_log_info(g, " "); + + therm_channel_obj = (struct therm_channel_get_status *) + (void *)obj; + pstatus = (struct nv_pmu_therm_therm_channel_boardobj_get_status *) + (void *)pmu_obj; + + if (pstatus->super.type != therm_channel_obj->super.type) { + nvgpu_err(g, "pmu data and boardobj type not matching"); + return -EINVAL; + } + + therm_channel_obj->curr_temp = pstatus->current_temp; + return 0; +} + +static int therm_channel_boardobj_grp_get_status(struct gk20a *g) +{ + struct boardobjgrp *pboardobjgrp = NULL; + struct boardobjgrpmask *pboardobjgrpmask; + struct nv_pmu_boardobjgrp_super *pboardobjgrppmu; + struct pmu_board_obj *obj = NULL; + struct nv_pmu_boardobj_query *pboardobjpmustatus = NULL; + int status; + u8 index; + + nvgpu_log_info(g, " "); + + if (g->pmu->therm_pmu == NULL) { + return -EINVAL; + } + + pboardobjgrp = &g->pmu->therm_pmu->therm_channelobjs.super.super; + pboardobjgrpmask = &g->pmu->therm_pmu->therm_channelobjs.super.mask.super; + status = pboardobjgrp->pmugetstatus(g, pboardobjgrp, pboardobjgrpmask); + if (status != 0) { + nvgpu_err(g, "err getting boardobjs from pmu"); + return status; + } + pboardobjgrppmu = pboardobjgrp->pmu.getstatus.buf; + + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pmu_board_obj*, obj, index) { + status = pboardobjgrp->pmustatusinstget(g, + (struct nv_pmu_boardobjgrp *)(void *)pboardobjgrppmu, + &pboardobjpmustatus, index); + if (status != 0) { + nvgpu_err(g, "could not get status object instance"); + return status; + } + status = therm_channel_currtemp_update(g, obj, + (struct nv_pmu_boardobj *)(void *)pboardobjpmustatus); + if (status != 0) { + nvgpu_err(g, "could not update therm_channel status"); + return status; + } + } + return 0; + +} + +int nvgpu_pmu_therm_channel_get_curr_temp(struct gk20a *g, u32 *temp) +{ + struct boardobjgrp *pboardobjgrp; + struct pmu_board_obj *obj = NULL; + struct therm_channel_get_status *therm_channel_status = NULL; + int status; + u8 index; + + status = therm_channel_boardobj_grp_get_status(g); + if (status != 0) { + nvgpu_err(g, "therm_channel get status failed"); + return status; + } + + pboardobjgrp = &g->pmu->therm_pmu->therm_channelobjs.super.super; + + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pmu_board_obj*, obj, index) { + therm_channel_status = (struct therm_channel_get_status *) + (void *)obj; + if (therm_channel_status->curr_temp != 0U) { + *temp = therm_channel_status->curr_temp; + return status; + } + } + return status; +} + +int therm_channel_pmu_setup(struct gk20a *g) +{ + int status = 0; + struct boardobjgrp *pboardobjgrp = NULL; + + nvgpu_log_info(g, " "); + + if (!BOARDOBJGRP_IS_EMPTY( + &g->pmu->therm_pmu->therm_channelobjs.super.super)) { + pboardobjgrp = + &g->pmu->therm_pmu->therm_channelobjs.super.super; + status = pboardobjgrp->pmuinithandle(g, pboardobjgrp); + if (status != 0) { + goto exit; + } + } + +exit: + return status; +} diff --git a/drivers/gpu/nvgpu/common/pmu/therm/therm_channel.h b/drivers/gpu/nvgpu/common/pmu/therm/therm_channel.h new file mode 100644 index 000000000..c770be675 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/therm/therm_channel.h @@ -0,0 +1,56 @@ +/* + * general thermal device structures & definitions + * + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_THERM_THRMCHANNEL_H +#define NVGPU_THERM_THRMCHANNEL_H + +#include +#include + +struct therm_channel { + struct pmu_board_obj super; + s16 scaling; + s16 offset; + s32 temp_min; + s32 temp_max; +}; + +struct therm_channels { + struct boardobjgrp_e32 super; +}; + +struct therm_channel_device { + struct therm_channel super; + u8 therm_dev_idx; + u8 therm_dev_prov_idx; +}; + +struct therm_channel_get_status { + struct pmu_board_obj super; + u32 curr_temp; +}; + +int therm_channel_sw_setup(struct gk20a *g); +int therm_channel_pmu_setup(struct gk20a *g); + +#endif /* NVGPU_THERM_THRMCHANNEL_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/therm/therm_dev.c b/drivers/gpu/nvgpu/common/pmu/therm/therm_dev.c new file mode 100644 index 000000000..ada084023 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/therm/therm_dev.c @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "therm_dev.h" +#include "ucode_therm_inf.h" +#include "thrm.h" + +bool therm_device_idx_is_valid(struct nvgpu_pmu_therm *therm_pmu, u8 idx) +{ + return boardobjgrp_idxisvalid( + &(therm_pmu->therm_deviceobjs.super.super), idx); +} + +static int _therm_device_pmudata_instget(struct gk20a *g, + struct nv_pmu_boardobjgrp *pmuboardobjgrp, + struct nv_pmu_boardobj **pmu_obj, + u8 idx) +{ + struct nv_pmu_therm_therm_device_boardobj_grp_set *pgrp_set = + (struct nv_pmu_therm_therm_device_boardobj_grp_set *) + pmuboardobjgrp; + + nvgpu_log_info(g, " "); + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (((u32)BIT(idx) & + pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *pmu_obj = (struct nv_pmu_boardobj *)(void *) + &pgrp_set->objects[idx].data; + + nvgpu_log_info(g, " Done"); + + return 0; +} + +static int construct_therm_device(struct gk20a *g, + struct pmu_board_obj *obj, void *pargs) +{ + return pmu_board_obj_construct_super(g, obj, pargs); +} + +static int construct_therm_device_gpu(struct gk20a *g, + struct pmu_board_obj *obj, void *pargs) +{ + return construct_therm_device(g, obj, pargs); +} + +static struct pmu_board_obj *therm_device_construct(struct gk20a *g, + void *pargs) +{ + struct pmu_board_obj *obj = NULL; + struct therm_device *ptherm_device = NULL; + int status = 0; + + ptherm_device = nvgpu_kzalloc(g, sizeof(struct therm_device)); + if (ptherm_device == NULL) { + return NULL; + } + obj = (struct pmu_board_obj *)(void *)ptherm_device; + + if (pmu_board_obj_get_type(pargs) == + NV_VBIOS_THERM_DEVICE_1X_ENTRY_CLASS_GPU) { + status = construct_therm_device_gpu(g, obj, pargs); + } else { + nvgpu_err(g, "unsupported therm_device class - 0x%x", + pmu_board_obj_get_type(pargs)); + return NULL; + } + + if(status != 0) { + obj = NULL; + nvgpu_err(g, + "could not allocate memory for therm_device"); + if (obj != NULL) { + nvgpu_kfree(g, obj); + } + } + + + return obj; +} + +static int devinit_get_therm_device_table(struct gk20a *g, + struct therm_devices *pthermdeviceobjs) +{ + int status = 0; + u8 *therm_device_table_ptr = NULL; + u8 *curr_therm_device_table_ptr = NULL; + struct pmu_board_obj *obj_tmp; + struct therm_device_1x_header therm_device_table_header = { 0 }; + struct therm_device_1x_entry *therm_device_table_entry = NULL; + u32 index; + u32 obj_index = 0; + u8 class_id = 0; + bool error_status = false; + union { + struct pmu_board_obj obj; + struct therm_device therm_device; + } therm_device_data; + + nvgpu_log_info(g, " "); + + therm_device_table_ptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_PERF_TOKEN), + THERMAL_DEVICE_TABLE); + if (therm_device_table_ptr == NULL) { + status = -EINVAL; + goto done; + } + + nvgpu_memcpy((u8 *)&therm_device_table_header, therm_device_table_ptr, + VBIOS_THERM_DEVICE_1X_HEADER_SIZE_04); + + if (therm_device_table_header.version != + VBIOS_THERM_DEVICE_VERSION_1X) { + status = -EINVAL; + goto done; + } + + if (therm_device_table_header.header_size < + VBIOS_THERM_DEVICE_1X_HEADER_SIZE_04) { + status = -EINVAL; + goto done; + } + + curr_therm_device_table_ptr = (therm_device_table_ptr + + VBIOS_THERM_DEVICE_1X_HEADER_SIZE_04); + + for (index = 0; index < therm_device_table_header.num_table_entries; + index++) { + therm_device_table_entry = (struct therm_device_1x_entry *) + (curr_therm_device_table_ptr + + (therm_device_table_header.table_entry_size * index)); + + class_id = therm_device_table_entry->class_id; + + switch (class_id) { + case NV_VBIOS_THERM_DEVICE_1X_ENTRY_CLASS_INVALID: + continue; + break; + case NV_VBIOS_THERM_DEVICE_1X_ENTRY_CLASS_GPU: + break; + case NV_VBIOS_THERM_DEVICE_1X_ENTRY_CLASS_GPU_GPC_SCI: + continue; + break; + case NV_VBIOS_THERM_DEVICE_1X_ENTRY_CLASS_GPU_GPC_TSOSC: + continue; + break; + default: + nvgpu_err(g, + "Unknown thermal device class i - %x, class - %x", + index, class_id); + error_status = true; + break; + } + + if (error_status == true) { + goto done; + } + + therm_device_data.obj.type = class_id; + obj_tmp = therm_device_construct(g, &therm_device_data); + if (obj_tmp == NULL) { + nvgpu_err(g, + "unable to create thermal device for %d type %d", + index, therm_device_data.obj.type); + status = -EINVAL; + goto done; + } + + status = boardobjgrp_objinsert(&pthermdeviceobjs->super.super, + obj_tmp, obj_index); + + if (status != 0) { + nvgpu_err(g, + "unable to insert thermal device boardobj for %d", index); + status = -EINVAL; + goto done; + } + + ++obj_index; + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +int therm_device_sw_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + struct therm_devices *pthermdeviceobjs; + + /* Construct the Super Class and override the Interfaces */ + status = nvgpu_boardobjgrp_construct_e32(g, + &g->pmu->therm_pmu->therm_deviceobjs.super); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for therm devices," + "status - 0x%x", status); + goto done; + } + + pboardobjgrp = &g->pmu->therm_pmu->therm_deviceobjs.super.super; + pthermdeviceobjs = &(g->pmu->therm_pmu->therm_deviceobjs); + + /* Override the Interfaces */ + pboardobjgrp->pmudatainstget = _therm_device_pmudata_instget; + + status = devinit_get_therm_device_table(g, pthermdeviceobjs); + if (status != 0) { + goto done; + } + + BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, THERM, THERM_DEVICE); + + status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp, + therm, THERM, therm_device, THERM_DEVICE); + if (status != 0) { + nvgpu_err(g, + "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x", + status); + goto done; + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +int therm_device_pmu_setup(struct gk20a *g) +{ + int status = 0; + struct boardobjgrp *pboardobjgrp = NULL; + + nvgpu_log_info(g, " "); + + if (!BOARDOBJGRP_IS_EMPTY( + &g->pmu->therm_pmu->therm_deviceobjs.super.super)) { + pboardobjgrp = &g->pmu->therm_pmu->therm_deviceobjs.super.super; + status = pboardobjgrp->pmuinithandle(g, pboardobjgrp); + if (status != 0) { + goto exit; + } + } + +exit: + return status; +} diff --git a/drivers/gpu/nvgpu/common/pmu/therm/therm_dev.h b/drivers/gpu/nvgpu/common/pmu/therm/therm_dev.h new file mode 100644 index 000000000..93e20f6a7 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/therm/therm_dev.h @@ -0,0 +1,44 @@ +/* + * general thermal device structures & definitions + * + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_THERM_THRMDEV_H +#define NVGPU_THERM_THRMDEV_H + +#include +#include + +struct nvgpu_pmu_therm; + +struct therm_devices { + struct boardobjgrp_e32 super; +}; + +struct therm_device { + struct pmu_board_obj super; +}; + +int therm_device_sw_setup(struct gk20a *g); +int therm_device_pmu_setup(struct gk20a *g); +bool therm_device_idx_is_valid(struct nvgpu_pmu_therm *therm_pmu, u8 idx); + +#endif /* NVGPU_THERM_THRMDEV_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/therm/thrm.c b/drivers/gpu/nvgpu/common/pmu/therm/thrm.c new file mode 100644 index 000000000..be19e2303 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/therm/thrm.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "thrm.h" + +static void therm_unit_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nv_pmu_rpc_header *rpc) +{ + switch (rpc->function) { + case NV_PMU_RPC_ID_THERM_BOARD_OBJ_GRP_CMD: + nvgpu_pmu_dbg(g, + "reply NV_PMU_RPC_ID_THERM_BOARD_OBJ_GRP_CMD"); + break; + default: + nvgpu_pmu_dbg(g, "reply PMU_UNIT_THERM"); + break; + } +} + +int nvgpu_pmu_therm_sw_setup(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + int status; + + status = therm_device_sw_setup(g); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for therm devices, status - 0x%x", + status); + goto exit; + } + + status = therm_channel_sw_setup(g); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for therm channel, status - 0x%x", + status); + goto exit; + } + + pmu->therm_rpc_handler = therm_unit_rpc_handler; + +exit: + return status; +} + +int nvgpu_pmu_therm_pmu_setup(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + int status; + + status = therm_device_pmu_setup(g); + if (status != 0) { + nvgpu_err(g, "Therm device pmu setup failed - 0x%x", status); + goto exit; + } + + status = therm_channel_pmu_setup(g); + if (status != 0) { + nvgpu_err(g,"Therm channel pmu setup failed - 0x%x", status); + goto exit; + } + +exit: + return status; +} + +int nvgpu_pmu_therm_init(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + /* If already allocated, do not re-allocate */ + if (pmu->therm_pmu != NULL) { + return 0; + } + + pmu->therm_pmu = nvgpu_kzalloc(g, sizeof(*(pmu->therm_pmu))); + if (pmu->therm_pmu == NULL) { + return -ENOMEM; + } + + return 0; +} + +void nvgpu_pmu_therm_deinit(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + nvgpu_kfree(g, pmu->therm_pmu); + pmu->therm_pmu = NULL; +} diff --git a/drivers/gpu/nvgpu/common/pmu/therm/thrm.h b/drivers/gpu/nvgpu/common/pmu/therm/thrm.h new file mode 100644 index 000000000..ebbd28074 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/therm/thrm.h @@ -0,0 +1,35 @@ +/* + * general thermal table structures & definitions + * + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_THERM_THRM_H +#define NVGPU_THERM_THRM_H + +#include "therm_channel.h" +#include "therm_dev.h" + +struct nvgpu_pmu_therm { + struct therm_devices therm_deviceobjs; + struct therm_channels therm_channelobjs; +}; + +#endif /* NVGPU_THERM_THRM_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/therm/ucode_therm_inf.h b/drivers/gpu/nvgpu/common/pmu/therm/ucode_therm_inf.h new file mode 100644 index 000000000..b8609075d --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/therm/ucode_therm_inf.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_PMU_THERM_INF_H +#define NVGPU_PMU_THERM_INF_H + +#define CTRL_THERMAL_THERM_DEVICE_CLASS_GPU 0x01 +#define CTRL_THERMAL_THERM_CHANNEL_CLASS_DEVICE 0x01 + +#define NV_PMU_THERM_CMD_ID_BOARDOBJ_GRP_SET 0x0000000B +#define NV_PMU_THERM_MSG_ID_BOARDOBJ_GRP_SET 0x00000008 +#define NV_PMU_THERM_CMD_ID_BOARDOBJ_GRP_GET_STATUS 0x00000001U +#define NV_PMU_THERM_MSG_ID_BOARDOBJ_GRP_GET_STATUS 0x00000001U + +struct nv_pmu_therm_therm_device_boardobjgrp_set_header { + struct nv_pmu_boardobjgrp_e32 super; +}; + +struct nv_pmu_therm_therm_device_boardobj_set { + struct nv_pmu_boardobj super; +}; + +struct nv_pmu_therm_therm_device_gpu_gpc_tsosc_boardobj_set { + struct nv_pmu_therm_therm_device_boardobj_set super; + u8 gpc_tsosc_idx; +}; + +struct nv_pmu_therm_therm_device_gpu_sci_boardobj_set { + struct nv_pmu_therm_therm_device_boardobj_set super; +}; + +struct nv_pmu_therm_therm_device_i2c_boardobj_set { + struct nv_pmu_therm_therm_device_boardobj_set super; + u8 i2c_dev_idx; +}; + +struct nv_pmu_therm_therm_device_hbm2_site_boardobj_set { + struct nv_pmu_therm_therm_device_boardobj_set super; + u8 site_idx; +}; + +struct nv_pmu_therm_therm_device_hbm2_combined_boardobj_set { + struct nv_pmu_therm_therm_device_boardobj_set super; +}; + +union nv_pmu_therm_therm_device_boardobj_set_union { + struct nv_pmu_boardobj obj; + struct nv_pmu_therm_therm_device_boardobj_set device; + struct nv_pmu_therm_therm_device_gpu_gpc_tsosc_boardobj_set + gpu_gpc_tsosc; + struct nv_pmu_therm_therm_device_gpu_sci_boardobj_set gpu_sci; + struct nv_pmu_therm_therm_device_i2c_boardobj_set i2c; + struct nv_pmu_therm_therm_device_hbm2_site_boardobj_set hbm2_site; + struct nv_pmu_therm_therm_device_hbm2_combined_boardobj_set + hbm2_combined; +}; + +NV_PMU_BOARDOBJ_GRP_SET_MAKE_E32(therm, therm_device); + +struct nv_pmu_therm_therm_channel_boardobjgrp_set_header { + struct nv_pmu_boardobjgrp_e32 super; +}; + +struct nv_pmu_therm_therm_channel_boardobj_set { + struct nv_pmu_boardobj super; + s16 scaling; + s16 offset; + s32 temp_min; + s32 temp_max; +}; + +struct nv_pmu_therm_therm_channel_device_boardobj_set { + struct nv_pmu_therm_therm_channel_boardobj_set super; + u8 therm_dev_idx; + u8 therm_dev_prov_idx; +}; + +union nv_pmu_therm_therm_channel_boardobj_set_union { + struct nv_pmu_boardobj obj; + struct nv_pmu_therm_therm_channel_boardobj_set channel; + struct nv_pmu_therm_therm_channel_device_boardobj_set device; +}; + +NV_PMU_BOARDOBJ_GRP_SET_MAKE_E32(therm, therm_channel); + +struct nv_pmu_therm_therm_channel_boardobjgrp_get_status_header { + struct nv_pmu_boardobjgrp_e32 super; +}; + +struct nv_pmu_therm_therm_channel_boardobj_get_status +{ + struct nv_pmu_boardobj_query super; + u32 current_temp; +}; + +union nv_pmu_therm_therm_channel_boardobj_get_status_union +{ + struct nv_pmu_boardobj_query obj; + struct nv_pmu_therm_therm_channel_boardobj_get_status therm_channel; +}; + +NV_PMU_BOARDOBJ_GRP_GET_STATUS_MAKE_E32(therm, therm_channel); + +#endif /* NVGPU_PMU_THERM_INF_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/volt/ucode_volt_inf.h b/drivers/gpu/nvgpu/common/pmu/volt/ucode_volt_inf.h new file mode 100644 index 000000000..d5a6c2191 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/volt/ucode_volt_inf.h @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_PMUIF_VOLT_H +#define NVGPU_PMUIF_VOLT_H + +#include + +struct nv_pmu_volt_volt_rail_boardobjgrp_set_header { + struct nv_pmu_boardobjgrp_e32 super; +}; + +struct nv_pmu_volt_volt_rail_boardobj_set { + + struct nv_pmu_boardobj super; + u8 rel_limit_vfe_equ_idx; + u8 alt_rel_limit_vfe_equ_idx; + u8 ov_limit_vfe_equ_idx; + u8 vmin_limit_vfe_equ_idx; + u8 volt_margin_limit_vfe_equ_idx; + u8 pwr_equ_idx; + u8 volt_dev_idx_default; + u8 volt_dev_idx_ipc_vmin; + u8 volt_scale_exp_pwr_equ_idx; + struct ctrl_boardobjgrp_mask_e32 vin_dev_mask; + struct ctrl_boardobjgrp_mask_e32 volt_dev_mask; + s32 volt_delta_uv[CTRL_VOLT_RAIL_VOLT_DELTA_MAX_ENTRIES]; +}; + +union nv_pmu_volt_volt_rail_boardobj_set_union { + struct nv_pmu_boardobj obj; + struct nv_pmu_volt_volt_rail_boardobj_set super; +}; + +NV_PMU_BOARDOBJ_GRP_SET_MAKE_E32(volt, volt_rail); + +/* ------------ VOLT_DEVICE's GRP_SET defines and structures ------------ */ + +struct nv_pmu_volt_volt_device_boardobjgrp_set_header { + struct nv_pmu_boardobjgrp_e32 super; +}; + +struct nv_pmu_volt_volt_device_boardobj_set { + struct nv_pmu_boardobj super; + u32 switch_delay_us; + u32 voltage_min_uv; + u32 voltage_max_uv; + u32 volt_step_uv; +}; + +struct nv_pmu_volt_volt_device_vid_boardobj_set { + struct nv_pmu_volt_volt_device_boardobj_set super; + s32 voltage_base_uv; + s32 voltage_offset_scale_uv; + u8 gpio_pin[CTRL_VOLT_VOLT_DEV_VID_VSEL_MAX_ENTRIES]; + u8 vsel_mask; +}; + +struct nv_pmu_volt_volt_device_pwm_boardobj_set { + struct nv_pmu_volt_volt_device_boardobj_set super; + u32 raw_period; + s32 voltage_base_uv; + s32 voltage_offset_scale_uv; + enum nv_pmu_pmgr_pwm_source pwm_source; +}; + +union nv_pmu_volt_volt_device_boardobj_set_union { + struct nv_pmu_boardobj obj; + struct nv_pmu_volt_volt_device_boardobj_set super; + struct nv_pmu_volt_volt_device_vid_boardobj_set vid; + struct nv_pmu_volt_volt_device_pwm_boardobj_set pwm; +}; + +NV_PMU_BOARDOBJ_GRP_SET_MAKE_E32(volt, volt_device); + +/* ------------ VOLT_POLICY's GRP_SET defines and structures ------------ */ +struct nv_pmu_volt_volt_policy_boardobjgrp_set_header { + struct nv_pmu_boardobjgrp_e32 super; + u8 perf_core_vf_seq_policy_idx; +}; + +struct nv_pmu_volt_volt_policy_boardobj_set { + struct nv_pmu_boardobj super; +}; +struct nv_pmu_volt_volt_policy_sr_boardobj_set { + struct nv_pmu_volt_volt_policy_boardobj_set super; + u8 rail_idx; +}; + +struct nv_pmu_volt_volt_policy_sr_multi_step_boardobj_set { + struct nv_pmu_volt_volt_policy_sr_boardobj_set super; + u16 inter_switch_delay_us; + u32 ramp_up_step_size_uv; + u32 ramp_down_step_size_uv; +}; + +struct nv_pmu_volt_volt_policy_splt_r_boardobj_set { + struct nv_pmu_volt_volt_policy_boardobj_set super; + u8 rail_idx_master; + u8 rail_idx_slave; + u8 delta_min_vfe_equ_idx; + u8 delta_max_vfe_equ_idx; + s32 offset_delta_min_uv; + s32 offset_delta_max_uv; +}; + +struct nv_pmu_volt_volt_policy_srms_boardobj_set { + struct nv_pmu_volt_volt_policy_splt_r_boardobj_set super; + u16 inter_switch_delayus; +}; + +/* sr - > single_rail */ +struct nv_pmu_volt_volt_policy_srss_boardobj_set { + struct nv_pmu_volt_volt_policy_splt_r_boardobj_set super; +}; + +union nv_pmu_volt_volt_policy_boardobj_set_union { + struct nv_pmu_boardobj obj; + struct nv_pmu_volt_volt_policy_boardobj_set super; + struct nv_pmu_volt_volt_policy_sr_boardobj_set single_rail; + struct nv_pmu_volt_volt_policy_sr_multi_step_boardobj_set + single_rail_ms; + struct nv_pmu_volt_volt_policy_splt_r_boardobj_set split_rail; + struct nv_pmu_volt_volt_policy_srms_boardobj_set + split_rail_m_s; + struct nv_pmu_volt_volt_policy_srss_boardobj_set + split_rail_s_s; +}; + +NV_PMU_BOARDOBJ_GRP_SET_MAKE_E32(volt, volt_policy); + +/* ----------- VOLT_RAIL's GRP_GET_STATUS defines and structures ----------- */ +struct nv_pmu_volt_volt_rail_boardobjgrp_get_status_header { + struct nv_pmu_boardobjgrp_e32 super; +}; + +struct nv_pmu_volt_volt_rail_boardobj_get_status { + struct nv_pmu_boardobj_query super; + u32 curr_volt_defaultu_v; + u32 rel_limitu_v; + u32 alt_rel_limitu_v; + u32 ov_limitu_v; + u32 max_limitu_v; + u32 vmin_limitu_v; + s32 volt_margin_limitu_v; + u32 rsvd; +}; + +union nv_pmu_volt_volt_rail_boardobj_get_status_union { + struct nv_pmu_boardobj_query obj; + struct nv_pmu_volt_volt_rail_boardobj_get_status super; +}; + +NV_PMU_BOARDOBJ_GRP_GET_STATUS_MAKE_E32(volt, volt_rail); + +#define NV_PMU_VOLT_CMD_ID_BOARDOBJ_GRP_SET (0x00000000U) +#define NV_PMU_VOLT_CMD_ID_RPC (0x00000001U) +#define NV_PMU_VOLT_CMD_ID_BOARDOBJ_GRP_GET_STATUS (0x00000002U) + +/* + * VOLT MSG ID definitions + */ +#define NV_PMU_VOLT_MSG_ID_BOARDOBJ_GRP_SET (0x00000000U) +#define NV_PMU_VOLT_MSG_ID_RPC (0x00000001U) +#define NV_PMU_VOLT_MSG_ID_BOARDOBJ_GRP_GET_STATUS (0x00000002U) + +/* VOLT RPC */ +#define NV_PMU_RPC_ID_VOLT_BOARD_OBJ_GRP_CMD 0x00U +#define NV_PMU_RPC_ID_VOLT_VOLT_SET_VOLTAGE 0x01U +#define NV_PMU_RPC_ID_VOLT_LOAD 0x02U +#define NV_PMU_RPC_ID_VOLT_VOLT_RAIL_GET_VOLTAGE 0x03U +#define NV_PMU_RPC_ID_VOLT_VOLT_POLICY_SANITY_CHECK 0x04U +#define NV_PMU_RPC_ID_VOLT_TEST_EXECUTE 0x05U +#define NV_PMU_RPC_ID_VOLT__COUNT 0x06U + +/* + * Defines the structure that holds data + * used to execute LOAD RPC. + */ +struct nv_pmu_rpc_struct_volt_load { + /*[IN/OUT] Must be first field in RPC structure */ + struct nv_pmu_rpc_header hdr; + u32 scratch[1]; +}; + +#endif /* NVGPU_PMUIF_VOLT_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/volt/volt.c b/drivers/gpu/nvgpu/common/pmu/volt/volt.c new file mode 100644 index 000000000..61af6f8ff --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/volt/volt.c @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "volt.h" +#include "volt_rail.h" +#include "volt_dev.h" +#include "volt_policy.h" + + +static int volt_send_load_cmd_to_pmu(struct gk20a *g) +{ + struct nvgpu_pmu *pmu = g->pmu; + struct nv_pmu_rpc_struct_volt_load rpc; + int status = 0; + + (void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_volt_load)); + PMU_RPC_EXECUTE(status, pmu, VOLT, LOAD, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute RPC status=0x%x", + status); + } + + return status; +} + +void nvgpu_pmu_volt_rpc_handler(struct gk20a *g, struct nv_pmu_rpc_header *rpc) +{ + switch (rpc->function) { + case NV_PMU_RPC_ID_VOLT_BOARD_OBJ_GRP_CMD: + nvgpu_pmu_dbg(g, + "reply NV_PMU_RPC_ID_VOLT_BOARD_OBJ_GRP_CMD"); + break; + case NV_PMU_RPC_ID_VOLT_LOAD: + nvgpu_pmu_dbg(g, + "reply NV_PMU_RPC_ID_VOLT_LOAD"); + break; + default: + nvgpu_pmu_dbg(g, "invalid reply"); + break; + } +} + +int nvgpu_pmu_volt_sw_setup(struct gk20a *g) +{ + int err; + nvgpu_log_fn(g, " "); + + err = volt_rail_sw_setup(g); + if (err != 0) { + return err; + } + + err = volt_dev_sw_setup(g); + if (err != 0) { + return err; + } + + err = volt_policy_sw_setup(g); + if (err != 0) { + return err; + } + + g->pmu->volt->volt_rpc_handler = nvgpu_pmu_volt_rpc_handler; + + return 0; +} + +int nvgpu_pmu_volt_init(struct gk20a *g) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + /* If already allocated, do not re-allocate */ + if (g->pmu->volt != NULL) { + return 0; + } + + g->pmu->volt = (struct nvgpu_pmu_volt *) nvgpu_kzalloc(g, + sizeof(struct nvgpu_pmu_volt)); + if (g->pmu->volt == NULL) { + err = -ENOMEM; + return err; + } + + g->pmu->volt->volt_metadata = (struct nvgpu_pmu_volt_metadata *) + nvgpu_kzalloc(g, sizeof(struct nvgpu_pmu_volt_metadata)); + if (g->pmu->volt->volt_metadata == NULL) { + err = -ENOMEM; + return err; + } + + return err; +} + +void nvgpu_pmu_volt_deinit(struct gk20a *g) +{ + if (g->pmu == NULL) { + return; + } + if ((g->pmu->volt != NULL) && (g->pmu->volt->volt_metadata != NULL)) { + nvgpu_kfree(g, g->pmu->volt->volt_metadata); + nvgpu_kfree(g, g->pmu->volt); + g->pmu->volt = NULL; + } +} + +int nvgpu_pmu_volt_pmu_setup(struct gk20a *g) +{ + int err; + nvgpu_log_fn(g, " "); + + err = volt_rail_pmu_setup(g); + if (err != 0) { + return err; + } + + err = volt_dev_pmu_setup(g); + if (err != 0) { + return err; + } + + err = volt_policy_pmu_setup(g); + if (err != 0) { + return err; + } + + err = volt_send_load_cmd_to_pmu(g); + if (err != 0) { + nvgpu_err(g, + "Failed to send VOLT LOAD CMD to PMU: status = 0x%08x.", + err); + return err; + } + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/pmu/volt/volt.h b/drivers/gpu/nvgpu/common/pmu/volt/volt.h new file mode 100644 index 000000000..d1f4a726c --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/volt/volt.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_VOLT_H +#define NVGPU_VOLT_H + +#include +#include + +#define CTRL_VOLT_RAIL_VOLT_DELTA_MAX_ENTRIES 0x04U + + +#define CTRL_VOLT_VOLT_RAIL_MAX_RAILS \ + CTRL_BOARDOBJGRP_E32_MAX_OBJECTS + +#define CTRL_VOLT_RAIL_VOLT_DELTA_MAX_ENTRIES 0x04U +#define CTRL_VOLT_VOLT_DEV_VID_VSEL_MAX_ENTRIES 0x8U +#define CTRL_VOLT_DOMAIN_INVALID 0x00U +#define CLK_PROG_VFE_ENTRY_SRAM 0x01U +#define NV_PMU_VOLT_VALUE_0V_IN_UV 0U + +/* + * Macros for Voltage Domain HAL. + */ +#define CTRL_VOLT_DOMAIN_HAL_GP10X_SINGLE_RAIL 0x00U + +/*! + * Special value corresponding to an invalid Voltage Rail Index. + */ +#define CTRL_VOLT_RAIL_INDEX_INVALID \ + CTRL_BOARDOBJ_IDX_INVALID + +/*! + * Special value corresponding to an invalid Voltage Device Index. + */ +#define CTRL_VOLT_DEVICE_INDEX_INVALID \ + CTRL_BOARDOBJ_IDX_INVALID + +/*! + * Special value corresponding to an invalid Voltage Policy Index. + */ +#define CTRL_VOLT_POLICY_INDEX_INVALID \ + CTRL_BOARDOBJ_IDX_INVALID + +enum nv_pmu_pmgr_pwm_source { + NV_PMU_PMGR_PWM_SOURCE_INVALID = 0, + NV_PMU_PMGR_PWM_SOURCE_THERM_VID_PWM_0 = 4, + NV_PMU_PMGR_PWM_SOURCE_THERM_IPC_VMIN_VID_PWM_0 = 13U, +}; + +/*! + * Macros for Voltage Device Types. + */ +#define CTRL_VOLT_DEVICE_TYPE_INVALID 0x00U +#define CTRL_VOLT_DEVICE_TYPE_PWM 0x03U + +/* + * Macros for Volt Device Operation types. + */ +#define CTRL_VOLT_DEVICE_OPERATION_TYPE_INVALID 0x00U +#define CTRL_VOLT_DEVICE_OPERATION_TYPE_DEFAULT 0x01U +#define CTRL_VOLT_VOLT_DEVICE_OPERATION_TYPE_IPC_VMIN 0x04U + +/*! + * Macros for Voltage Domains. + */ +#define CTRL_VOLT_DOMAIN_INVALID 0x00U +#define CTRL_VOLT_DOMAIN_LOGIC 0x01U +#define CTRL_VOLT_DOMAIN_SRAM 0x02U + +/*! + * Macros for Volt Policy types. + * + * Virtual VOLT_POLICY types are indexed starting from 0xFF. + */ +#define CTRL_VOLT_POLICY_TYPE_INVALID 0x00U +#define CTRL_VOLT_POLICY_TYPE_SINGLE_RAIL 0x01U +#define CTRL_VOLT_POLICY_TYPE_SR_MULTI_STEP 0x02U +#define CTRL_VOLT_POLICY_TYPE_SR_SINGLE_STEP 0x03U +#define CTRL_VOLT_POLICY_TYPE_SINGLE_RAIL_MULTI_STEP 0x04U +#define CTRL_VOLT_POLICY_TYPE_SPLIT_RAIL 0xFEU +#define CTRL_VOLT_POLICY_TYPE_UNKNOWN 0xFFU + +/*! + * Macros for Volt Policy Client types. + */ +#define CTRL_VOLT_POLICY_CLIENT_INVALID 0x00U +#define CTRL_VOLT_POLICY_CLIENT_PERF_CORE_VF_SEQ 0x01U + +/*! + * metadata of voltage rail functionality. + */ +struct voltage_rail_metadata { + u8 volt_domain_hal; + u8 pct_delta; + u32 ext_rel_delta_uv[CTRL_VOLT_RAIL_VOLT_DELTA_MAX_ENTRIES]; + u8 logic_rail_idx; + u8 sram_rail_idx; + struct boardobjgrp_e32 volt_rails; +}; + +struct voltage_device_metadata { + struct boardobjgrp_e32 volt_devices; +}; + +struct voltage_policy_metadata { + struct boardobjgrp_e32 volt_policies; + u8 perf_core_vf_seq_policy_idx; +}; + +struct nvgpu_pmu_volt_metadata { + struct voltage_rail_metadata volt_rail_metadata; + struct voltage_device_metadata volt_dev_metadata; + struct voltage_policy_metadata volt_policy_metadata; +}; + +u8 volt_rail_vbios_volt_domain_convert_to_internal + (struct gk20a *g, u8 vbios_volt_domain); + +#endif /* NVGPU_VOLT_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/volt/volt_dev.c b/drivers/gpu/nvgpu/common/pmu/volt/volt_dev.c new file mode 100644 index 000000000..4e4ff75e2 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/volt/volt_dev.c @@ -0,0 +1,579 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "volt.h" +#include "ucode_volt_inf.h" +#include "volt_dev.h" +#include "volt_rail.h" + +static int volt_device_pmu_data_init_super(struct gk20a *g, + struct pmu_board_obj *obj, struct nv_pmu_boardobj *pmu_obj) +{ + int status; + struct voltage_device *pdev; + struct nv_pmu_volt_volt_device_boardobj_set *pset; + + status = pmu_board_obj_pmu_data_init_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pdev = (struct voltage_device *)(void *)obj; + pset = (struct nv_pmu_volt_volt_device_boardobj_set *)(void *)pmu_obj; + + pset->switch_delay_us = pdev->switch_delay_us; + pset->voltage_min_uv = pdev->voltage_min_uv; + pset->voltage_max_uv = pdev->voltage_max_uv; + pset->volt_step_uv = pdev->volt_step_uv; + + return status; +} + +static int volt_device_pmu_data_init_pwm(struct gk20a *g, + struct pmu_board_obj *obj, struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct voltage_device_pwm *pdev; + struct nv_pmu_volt_volt_device_pwm_boardobj_set *pset; + + status = volt_device_pmu_data_init_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + pdev = (struct voltage_device_pwm *)(void *)obj; + pset = (struct nv_pmu_volt_volt_device_pwm_boardobj_set *)(void *)pmu_obj; + + pset->raw_period = pdev->raw_period; + pset->voltage_base_uv = pdev->voltage_base_uv; + pset->voltage_offset_scale_uv = pdev->voltage_offset_scale_uv; + pset->pwm_source = pdev->source; + + return status; +} + +static int volt_construct_volt_device(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pargs) +{ + struct voltage_device *ptmp_dev = (struct voltage_device *)pargs; + struct voltage_device *pvolt_dev = NULL; + int status = 0; + + pvolt_dev = nvgpu_kzalloc(g, size); + if (pvolt_dev == NULL) { + return -ENOMEM; + } + + status = pmu_board_obj_construct_super(g, + (struct pmu_board_obj *)(void *)pvolt_dev, pargs); + if (status != 0) { + return -EINVAL; + } + + *obj = (struct pmu_board_obj *)(void *)pvolt_dev; + + pvolt_dev->volt_domain = ptmp_dev->volt_domain; + pvolt_dev->i2c_dev_idx = ptmp_dev->i2c_dev_idx; + pvolt_dev->switch_delay_us = ptmp_dev->switch_delay_us; + pvolt_dev->rsvd_0 = VOLTAGE_DESCRIPTOR_TABLE_ENTRY_INVALID; + pvolt_dev->rsvd_1 = + VOLTAGE_DESCRIPTOR_TABLE_ENTRY_INVALID; + pvolt_dev->operation_type = ptmp_dev->operation_type; + pvolt_dev->voltage_min_uv = ptmp_dev->voltage_min_uv; + pvolt_dev->voltage_max_uv = ptmp_dev->voltage_max_uv; + + pvolt_dev->super.pmudatainit = volt_device_pmu_data_init_super; + + return status; +} + +static int volt_construct_pwm_volt_device(struct gk20a *g, + struct pmu_board_obj **obj, + size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = NULL; + struct voltage_device_pwm *ptmp_dev = + (struct voltage_device_pwm *)pargs; + struct voltage_device_pwm *pdev = NULL; + int status = 0; + + status = volt_construct_volt_device(g, obj, size, pargs); + if (status != 0) { + return status; + } + + obj_tmp = (*obj); + pdev = (struct voltage_device_pwm *)(void *)*obj; + + obj_tmp->pmudatainit = volt_device_pmu_data_init_pwm; + + /* Set VOLTAGE_DEVICE_PWM-specific parameters */ + pdev->voltage_base_uv = ptmp_dev->voltage_base_uv; + pdev->voltage_offset_scale_uv = ptmp_dev->voltage_offset_scale_uv; + pdev->source = ptmp_dev->source; + pdev->raw_period = ptmp_dev->raw_period; + + return status; +} + + +static struct voltage_device_entry *volt_dev_construct_dev_entry_pwm( + struct gk20a *g, + u32 voltage_uv, void *pargs) +{ + struct voltage_device_pwm_entry *pentry = NULL; + struct voltage_device_pwm_entry *ptmp_entry = + (struct voltage_device_pwm_entry *)pargs; + + pentry = nvgpu_kzalloc(g, sizeof(struct voltage_device_pwm_entry)); + if (pentry == NULL) { + return NULL; + } + + (void) memset(pentry, 0, sizeof(struct voltage_device_pwm_entry)); + + pentry->super.voltage_uv = voltage_uv; + pentry->duty_cycle = ptmp_entry->duty_cycle; + + return (struct voltage_device_entry *)pentry; +} + +static u8 volt_dev_operation_type_convert(u8 vbios_type) +{ + switch (vbios_type) { + case NV_VBIOS_VDT_1X_ENTRY_PARAM1_PSV_OPERATION_TYPE_DEFAULT: + return CTRL_VOLT_DEVICE_OPERATION_TYPE_DEFAULT; + case NV_VBIOS_VDT_1X_ENTRY_PARAM1_PSV_OPERATION_TYPE_IPC_VMIN: + return CTRL_VOLT_VOLT_DEVICE_OPERATION_TYPE_IPC_VMIN; + } + + return CTRL_VOLT_DEVICE_OPERATION_TYPE_INVALID; +} + +static struct voltage_device *volt_volt_device_construct(struct gk20a *g, + void *pargs) +{ + struct pmu_board_obj *obj = NULL; + + if (pmu_board_obj_get_type(pargs) == CTRL_VOLT_DEVICE_TYPE_PWM) { + int status = volt_construct_pwm_volt_device(g, &obj, + sizeof(struct voltage_device_pwm), pargs); + if (status != 0) { + nvgpu_err(g, + " Could not allocate memory for VOLTAGE_DEVICE type (%x).", + pmu_board_obj_get_type(pargs)); + obj = NULL; + } + } + + return (struct voltage_device *)(void *)obj; +} + +static int volt_get_voltage_device_table_1x_psv(struct gk20a *g, + struct vbios_voltage_device_table_1x_entry *p_bios_entry, + struct voltage_device_metadata *p_Volt_Device_Meta_Data, + u8 entry_Idx) +{ + int status = 0; + u32 entry_cnt = 0; + struct voltage_device *pvolt_dev = NULL; + struct voltage_device_pwm *pvolt_dev_pwm = NULL; + struct voltage_device_pwm *ptmp_dev = NULL; + u32 duty_cycle; + u32 frequency_hz; + u32 voltage_uv; + u8 ext_dev_idx; + u8 steps; + u8 volt_domain = 0; + struct voltage_device_pwm_entry pwm_entry = { { 0 } }; + + ptmp_dev = nvgpu_kzalloc(g, sizeof(struct voltage_device_pwm)); + if (ptmp_dev == NULL) { + return -ENOMEM; + } + + frequency_hz = BIOS_GET_FIELD(u32, p_bios_entry->param0, + NV_VBIOS_VDT_1X_ENTRY_PARAM0_PSV_INPUT_FREQUENCY); + + ext_dev_idx = BIOS_GET_FIELD(u8, p_bios_entry->param0, + NV_VBIOS_VDT_1X_ENTRY_PARAM0_PSV_EXT_DEVICE_INDEX); + + ptmp_dev->super.operation_type = volt_dev_operation_type_convert( + BIOS_GET_FIELD(u8, p_bios_entry->param1, + NV_VBIOS_VDT_1X_ENTRY_PARAM1_PSV_OPERATION_TYPE)); + + if (ptmp_dev->super.operation_type == + CTRL_VOLT_DEVICE_OPERATION_TYPE_INVALID) { + nvgpu_err(g, " Invalid Voltage Device Operation Type."); + + status = -EINVAL; + goto done; + } + + /* Skip and return success as ucode doesn't support IPC VMIN type */ + if (ptmp_dev->super.operation_type == + CTRL_VOLT_VOLT_DEVICE_OPERATION_TYPE_IPC_VMIN) { + status = 0; + goto done; + } + + ptmp_dev->super.voltage_min_uv = BIOS_GET_FIELD(u32, + p_bios_entry->param1, + NV_VBIOS_VDT_1X_ENTRY_PARAM1_PSV_VOLTAGE_MINIMUM); + + ptmp_dev->super.voltage_max_uv = BIOS_GET_FIELD(u32, + p_bios_entry->param2, + NV_VBIOS_VDT_1X_ENTRY_PARAM2_PSV_VOLTAGE_MAXIMUM); + + ptmp_dev->voltage_base_uv = BIOS_GET_FIELD(s32, p_bios_entry->param3, + NV_VBIOS_VDT_1X_ENTRY_PARAM3_PSV_VOLTAGE_BASE); + + steps = BIOS_GET_FIELD(u8, p_bios_entry->param3, + NV_VBIOS_VDT_1X_ENTRY_PARAM3_PSV_VOLTAGE_STEPS); + if (steps == VOLT_DEV_PWM_VOLTAGE_STEPS_INVALID) { + steps = VOLT_DEV_PWM_VOLTAGE_STEPS_DEFAULT; + } + + ptmp_dev->voltage_offset_scale_uv = + BIOS_GET_FIELD(s32, p_bios_entry->param4, + NV_VBIOS_VDT_1X_ENTRY_PARAM4_PSV_OFFSET_SCALE); + + volt_domain = volt_rail_vbios_volt_domain_convert_to_internal(g, + (u8)p_bios_entry->volt_domain); + if (volt_domain == CTRL_VOLT_DOMAIN_INVALID) { + nvgpu_err(g, "invalid voltage domain = %d", + (u8)p_bios_entry->volt_domain); + status = -EINVAL; + goto done; + } + + if (ptmp_dev->super.operation_type == + CTRL_VOLT_DEVICE_OPERATION_TYPE_DEFAULT || + ptmp_dev->super.operation_type == + CTRL_VOLT_VOLT_DEVICE_OPERATION_TYPE_IPC_VMIN) { + if (volt_domain == CTRL_VOLT_DOMAIN_LOGIC) { + ptmp_dev->source = + NV_PMU_PMGR_PWM_SOURCE_THERM_VID_PWM_0; + } + + if (ptmp_dev->super.operation_type == + CTRL_VOLT_VOLT_DEVICE_OPERATION_TYPE_IPC_VMIN) { + if (ptmp_dev->source == + NV_PMU_PMGR_PWM_SOURCE_THERM_VID_PWM_0) { + ptmp_dev->source = + NV_PMU_PMGR_PWM_SOURCE_THERM_IPC_VMIN_VID_PWM_0; + } + } + ptmp_dev->raw_period = + g->ops.clk.get_crystal_clk_hz(g) / frequency_hz; + } + + /* Initialize data for parent class. */ + ptmp_dev->super.super.type = CTRL_VOLT_DEVICE_TYPE_PWM; + ptmp_dev->super.volt_domain = volt_domain; + ptmp_dev->super.i2c_dev_idx = ext_dev_idx; + ptmp_dev->super.switch_delay_us = (u16)p_bios_entry->settle_time_us; + + pvolt_dev = volt_volt_device_construct(g, ptmp_dev); + if (pvolt_dev == NULL) { + nvgpu_err(g, " Failure to construct VOLTAGE_DEVICE object."); + + status = -EINVAL; + goto done; + } + + status = boardobjgrp_objinsert( + &p_Volt_Device_Meta_Data->volt_devices.super, + (struct pmu_board_obj *)pvolt_dev, entry_Idx); + if (status != 0) { + nvgpu_err(g, + "could not add VOLTAGE_DEVICE for entry %d into boardobjgrp ", + entry_Idx); + goto done; + } + + pvolt_dev_pwm = (struct voltage_device_pwm *)pvolt_dev; + + duty_cycle = 0; + do { + voltage_uv = (u32)(pvolt_dev_pwm->voltage_base_uv + + (s32)((((s64)((s32)duty_cycle)) * + pvolt_dev_pwm->voltage_offset_scale_uv) + / ((s64)((s32) pvolt_dev_pwm->raw_period)))); + + /* Skip creating entry for invalid voltage. */ + if ((voltage_uv >= pvolt_dev_pwm->super.voltage_min_uv) && + (voltage_uv <= pvolt_dev_pwm->super.voltage_max_uv)) { + if (pvolt_dev_pwm->voltage_offset_scale_uv < 0) { + pwm_entry.duty_cycle = + pvolt_dev_pwm->raw_period - duty_cycle; + } else { + pwm_entry.duty_cycle = duty_cycle; + } + + /* Check if there is room left in the voltage table. */ + if (entry_cnt == VOLTAGE_TABLE_MAX_ENTRIES) { + nvgpu_err(g, "Voltage table is full"); + status = -EINVAL; + goto done; + } + + pvolt_dev->pentry[entry_cnt] = + volt_dev_construct_dev_entry_pwm(g, + voltage_uv, &pwm_entry); + if (pvolt_dev->pentry[entry_cnt] == NULL) { + nvgpu_err(g, + " Error creating voltage_device_pwm_entry!"); + status = -EINVAL; + goto done; + } + + entry_cnt++; + } + + /* Obtain next value after the specified steps. */ + duty_cycle = duty_cycle + (u32)steps; + + /* Cap duty cycle to PWM period. */ + if (duty_cycle > pvolt_dev_pwm->raw_period) { + duty_cycle = pvolt_dev_pwm->raw_period; + } + + } while (duty_cycle < pvolt_dev_pwm->raw_period); + +done: + if (pvolt_dev != NULL) { + pvolt_dev->num_entries = entry_cnt; + } + + nvgpu_kfree(g, ptmp_dev); + return status; +} + +static int volt_get_volt_devices_table(struct gk20a *g, + struct voltage_device_metadata *pvolt_device_metadata) +{ + int status = 0; + u8 *volt_device_table_ptr = NULL; + struct vbios_voltage_device_table_1x_header header = { 0 }; + struct vbios_voltage_device_table_1x_entry entry = { 0 }; + u8 entry_idx; + u8 *entry_offset; + + volt_device_table_ptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_PERF_TOKEN), + VOLTAGE_DEVICE_TABLE); + if (volt_device_table_ptr == NULL) { + status = -EINVAL; + goto done; + } + + nvgpu_memcpy((u8 *)&header, volt_device_table_ptr, + sizeof(struct vbios_voltage_device_table_1x_header)); + + /* Read in the entries. */ + for (entry_idx = 0; entry_idx < header.num_table_entries; entry_idx++) { + entry_offset = (volt_device_table_ptr + header.header_size + + (entry_idx * header.table_entry_size)); + + nvgpu_memcpy((u8 *)&entry, entry_offset, + sizeof(struct vbios_voltage_device_table_1x_entry)); + + if (entry.type == NV_VBIOS_VOLTAGE_DEVICE_1X_ENTRY_TYPE_PSV) { + status = volt_get_voltage_device_table_1x_psv(g, + &entry, pvolt_device_metadata, + entry_idx); + } + } + +done: + return status; +} + +static int volt_device_devgrp_pmudata_instget(struct gk20a *g, + struct nv_pmu_boardobjgrp *pmuboardobjgrp, + struct nv_pmu_boardobj **pmu_obj, u8 idx) +{ + struct nv_pmu_volt_volt_device_boardobj_grp_set *pgrp_set = + (struct nv_pmu_volt_volt_device_boardobj_grp_set *) + pmuboardobjgrp; + + nvgpu_log_info(g, " "); + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (((u32)BIT(idx) & + pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *pmu_obj = (struct nv_pmu_boardobj *) + &pgrp_set->objects[idx].data.obj; + nvgpu_log_info(g, "Done"); + return 0; +} + +static int volt_device_state_init(struct gk20a *g, + struct voltage_device *pvolt_dev) +{ + int status = 0; + struct voltage_rail *pRail = NULL; + u8 rail_idx = 0; + + /* Initialize VOLT_DEVICE step size. */ + if (pvolt_dev->num_entries <= VOLTAGE_TABLE_MAX_ENTRIES_ONE) { + pvolt_dev->volt_step_uv = NV_PMU_VOLT_VALUE_0V_IN_UV; + } else { + pvolt_dev->volt_step_uv = (pvolt_dev->pentry[1]->voltage_uv - + pvolt_dev->pentry[0]->voltage_uv); + } + + /* Build VOLT_RAIL SW state from VOLT_DEVICE SW state. */ + /* If VOLT_RAIL isn't supported, exit. */ + if (!BOARDOBJGRP_IS_EMPTY(&g->pmu->volt->volt_metadata-> + volt_rail_metadata.volt_rails.super)) { + rail_idx = nvgpu_pmu_volt_rail_volt_domain_convert_to_idx(g, + pvolt_dev->volt_domain); + if (rail_idx == CTRL_BOARDOBJ_IDX_INVALID) { + nvgpu_err(g, + " could not convert voltage domain to rail index."); + status = -EINVAL; + goto done; + } + + pRail = (struct voltage_rail *)BOARDOBJGRP_OBJ_GET_BY_IDX( + &g->pmu->volt->volt_metadata->volt_rail_metadata.volt_rails.super, + rail_idx); + if (pRail == NULL) { + nvgpu_err(g, + "could not obtain ptr to rail object from rail index"); + status = -EINVAL; + goto done; + } + + status = volt_rail_volt_dev_register(g, pRail, + pmu_board_obj_get_idx(pvolt_dev), pvolt_dev->operation_type); + if (status != 0) { + nvgpu_err(g, + "Failed to register the device with rail obj"); + goto done; + } + } + +done: + if (status != 0) { + nvgpu_err(g, "Error in building rail sw state device sw"); + } + + return status; +} + +int volt_dev_pmu_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + + nvgpu_log_info(g, " "); + + pboardobjgrp = &g->pmu->volt->volt_metadata->volt_dev_metadata.volt_devices.super; + + if (!pboardobjgrp->bconstructed) { + return -EINVAL; + } + + status = pboardobjgrp->pmuinithandle(g, pboardobjgrp); + + nvgpu_log_info(g, "Done"); + return status; +} + +int volt_dev_sw_setup(struct gk20a *g) +{ + int status = 0; + struct boardobjgrp *pboardobjgrp = NULL; + struct voltage_device *pvolt_device; + u8 i; + + nvgpu_log_info(g, " "); + + status = nvgpu_boardobjgrp_construct_e32(g, + &g->pmu->volt->volt_metadata->volt_dev_metadata.volt_devices); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for volt rail, " + "status - 0x%x", status); + goto done; + } + + pboardobjgrp = &g->pmu->volt->volt_metadata->volt_dev_metadata.volt_devices.super; + + pboardobjgrp->pmudatainstget = volt_device_devgrp_pmudata_instget; + + /* Obtain Voltage Rail Table from VBIOS */ + status = volt_get_volt_devices_table(g, &g->pmu->volt->volt_metadata-> + volt_dev_metadata); + if (status != 0) { + goto done; + } + + /* Populate data for the VOLT_RAIL PMU interface */ + BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, VOLT, VOLT_DEVICE); + + status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp, + volt, VOLT, volt_device, VOLT_DEVICE); + if (status != 0) { + nvgpu_err(g, + "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x", + status); + goto done; + } + + /* update calibration to fuse */ + BOARDOBJGRP_FOR_EACH(&(g->pmu->volt->volt_metadata->volt_dev_metadata.volt_devices. + super), + struct voltage_device *, pvolt_device, i) { + status = volt_device_state_init(g, pvolt_device); + if (status != 0) { + nvgpu_err(g, + "failure while executing devices's state init interface"); + nvgpu_err(g, + " railIdx = %d, status = 0x%x", i, status); + goto done; + } + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} diff --git a/drivers/gpu/nvgpu/common/pmu/volt/volt_dev.h b/drivers/gpu/nvgpu/common/pmu/volt/volt_dev.h new file mode 100644 index 000000000..c45b3930e --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/volt/volt_dev.h @@ -0,0 +1,78 @@ +/* +* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. +* + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. +*/ + +#ifndef NVGPU_VOLT_DEV_H +#define NVGPU_VOLT_DEV_H + +#include +#include + +#include "ucode_volt_inf.h" + +#define VOLTAGE_TABLE_MAX_ENTRIES_ONE 1U +#define VOLTAGE_TABLE_MAX_ENTRIES 256U +#define VOLTAGE_DESCRIPTOR_TABLE_ENTRY_INVALID 0xFFU +#define VOLT_DEV_PWM_VOLTAGE_STEPS_INVALID 0U +#define VOLT_DEV_PWM_VOLTAGE_STEPS_DEFAULT 1U + +struct voltage_device { + struct pmu_board_obj super; + u8 volt_domain; + u8 i2c_dev_idx; + u32 switch_delay_us; + u32 num_entries; + struct voltage_device_entry *pentry[VOLTAGE_TABLE_MAX_ENTRIES]; + struct voltage_device_entry *pcurr_entry; + u8 rsvd_0; + u8 rsvd_1; + u8 operation_type; + u32 voltage_min_uv; + u32 voltage_max_uv; + u32 volt_step_uv; +}; + +struct voltage_device_entry { + u32 voltage_uv; +}; + +/*! + * Extends VOLTAGE_DEVICE providing attributes specific to PWM controllers. + */ +struct voltage_device_pwm { + struct voltage_device super; + s32 voltage_base_uv; + s32 voltage_offset_scale_uv; + enum nv_pmu_pmgr_pwm_source source; + u32 raw_period; +}; + +struct voltage_device_pwm_entry { + struct voltage_device_entry super; + u32 duty_cycle; +}; +/* PWM end */ + +int volt_dev_sw_setup(struct gk20a *g); +int volt_dev_pmu_setup(struct gk20a *g); +void nvgpu_pmu_volt_rpc_handler(struct gk20a *g, struct nv_pmu_rpc_header *rpc); + +#endif /* NVGPU_VOLT_DEV_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/volt/volt_policy.c b/drivers/gpu/nvgpu/common/pmu/volt/volt_policy.c new file mode 100644 index 000000000..340f45feb --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/volt/volt_policy.c @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "volt.h" +#include "ucode_volt_inf.h" +#include "volt_policy.h" + +static int volt_policy_pmu_data_init_super(struct gk20a *g, + struct pmu_board_obj *obj, struct nv_pmu_boardobj *pmu_obj) +{ + return pmu_board_obj_pmu_data_init_super(g, obj, pmu_obj); +} + +static int volt_construct_volt_policy(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pArgs) +{ + struct voltage_policy *pvolt_policy = NULL; + int status = 0; + + pvolt_policy = nvgpu_kzalloc(g, size); + if (pvolt_policy == NULL) { + return -ENOMEM; + } + + status = pmu_board_obj_construct_super(g, + (struct pmu_board_obj *)(void *)pvolt_policy, pArgs); + if (status != 0) { + return -EINVAL; + } + + *obj = (struct pmu_board_obj *)(void *)pvolt_policy; + + pvolt_policy->super.pmudatainit = volt_policy_pmu_data_init_super; + + return status; +} + +static int volt_construct_volt_policy_single_rail(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pArgs) +{ + struct voltage_policy_single_rail *ptmp_policy = + (struct voltage_policy_single_rail *)pArgs; + struct voltage_policy_single_rail *pvolt_policy = NULL; + int status = 0; + + status = volt_construct_volt_policy(g, obj, size, pArgs); + if (status != 0) { + return status; + } + + pvolt_policy = (struct voltage_policy_single_rail *)(void *)*obj; + + pvolt_policy->rail_idx = ptmp_policy->rail_idx; + + return status; +} + +static int volt_policy_pmu_data_init_single_rail(struct gk20a *g, + struct pmu_board_obj *obj, struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct voltage_policy_single_rail *ppolicy; + struct nv_pmu_volt_volt_policy_sr_boardobj_set *pset; + + status = volt_policy_pmu_data_init_super(g, obj, pmu_obj); + if (status != 0) { + goto done; + } + + ppolicy = (struct voltage_policy_single_rail *)(void *)obj; + pset = (struct nv_pmu_volt_volt_policy_sr_boardobj_set *)(void *) + pmu_obj; + pset->rail_idx = ppolicy->rail_idx; + +done: + return status; +} + +static int volt_construct_volt_policy_single(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pArgs) +{ + struct pmu_board_obj *obj_tmp = NULL; + int status = 0; + + status = volt_construct_volt_policy_single_rail(g, obj, size, pArgs); + if (status != 0x0) { + return status; + } + + obj_tmp = *obj; + obj_tmp->pmudatainit = volt_policy_pmu_data_init_single_rail; + + return status; +} + +static int volt_policy_pmu_data_init_sr_multi_step(struct gk20a *g, + struct pmu_board_obj *obj, struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct voltage_policy_single_rail_multi_step *ppolicy; + struct nv_pmu_volt_volt_policy_sr_multi_step_boardobj_set *pset; + + status = volt_policy_pmu_data_init_single_rail(g, obj, pmu_obj); + if (status != 0) { + goto done; + } + + ppolicy = (struct voltage_policy_single_rail_multi_step *)(void *)obj; + pset = (struct nv_pmu_volt_volt_policy_sr_multi_step_boardobj_set *) + (void *)pmu_obj; + + pset->ramp_up_step_size_uv = ppolicy->ramp_up_step_size_uv; + pset->ramp_down_step_size_uv = ppolicy->ramp_down_step_size_uv; + pset->inter_switch_delay_us = ppolicy->inter_switch_delay_us; + +done: + return status; +} + +static int volt_construct_volt_policy_single_rail_multi_step(struct gk20a *g, + struct pmu_board_obj **obj, size_t size, void *pargs) +{ + struct pmu_board_obj *obj_tmp = NULL; + struct voltage_policy_single_rail_multi_step *p_volt_policy = NULL; + struct voltage_policy_single_rail_multi_step *tmp_policy = + (struct voltage_policy_single_rail_multi_step *)pargs; + int status = 0; + + status = volt_construct_volt_policy_single_rail(g, obj, size, pargs); + if (status != 0) { + return status; + } + + obj_tmp = (*obj); + p_volt_policy = (struct voltage_policy_single_rail_multi_step *) + *obj; + + obj_tmp->pmudatainit = volt_policy_pmu_data_init_sr_multi_step; + + p_volt_policy->ramp_up_step_size_uv = + tmp_policy->ramp_up_step_size_uv; + p_volt_policy->ramp_down_step_size_uv = + tmp_policy->ramp_down_step_size_uv; + p_volt_policy->inter_switch_delay_us = + tmp_policy->inter_switch_delay_us; + + return status; +} + +static struct voltage_policy *volt_volt_policy_construct(struct gk20a *g, void *pargs) +{ + struct pmu_board_obj *obj = NULL; + int status = 0; + + switch (pmu_board_obj_get_type(pargs)) { + case CTRL_VOLT_POLICY_TYPE_SINGLE_RAIL_MULTI_STEP: + status = volt_construct_volt_policy_single_rail_multi_step(g, + &obj, sizeof(struct voltage_policy_single_rail_multi_step), + pargs); + if (status != 0) { + nvgpu_err(g, + "Could not allocate memory for voltage_policy"); + obj = NULL; + } + break; + case CTRL_VOLT_POLICY_TYPE_SINGLE_RAIL: + status = volt_construct_volt_policy_single(g, + &obj, sizeof(struct voltage_policy_single_rail), pargs); + if (status != 0) { + nvgpu_err(g, + "Could not allocate memory for voltage_policy"); + obj = NULL; + } + break; + } + + return (struct voltage_policy *)(void *)obj; +} + +static u8 volt_policy_type_convert(u8 vbios_type) +{ + switch (vbios_type) { + case NV_VBIOS_VOLTAGE_POLICY_1X_ENTRY_TYPE_SINGLE_RAIL: + return CTRL_VOLT_POLICY_TYPE_SINGLE_RAIL; + case NV_VBIOS_VOLTAGE_POLICY_1X_ENTRY_TYPE_SINGLE_RAIL_MULTI_STEP: + return CTRL_VOLT_POLICY_TYPE_SINGLE_RAIL_MULTI_STEP; + } + + return CTRL_VOLT_POLICY_TYPE_INVALID; +} + +static int volt_get_volt_policy_table(struct gk20a *g, + struct voltage_policy_metadata *pvolt_policy_metadata) +{ + int status = 0; + u8 *voltage_policy_table_ptr = NULL; + struct voltage_policy *ppolicy = NULL; + struct vbios_voltage_policy_table_1x_header header = { 0 }; + struct vbios_voltage_policy_table_1x_entry entry = { 0 }; + u8 i; + u8 policy_type = 0; + u8 *entry_offset; + union policy_type { + struct pmu_board_obj obj; + struct voltage_policy volt_policy; + struct voltage_policy_single_rail_multi_step single_rail_ms; + struct voltage_policy_single_rail single_rail; + } policy_type_data; + + voltage_policy_table_ptr = + (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_PERF_TOKEN), + VOLTAGE_POLICY_TABLE); + if (voltage_policy_table_ptr == NULL) { + status = -EINVAL; + goto done; + } + + nvgpu_memcpy((u8 *)&header, voltage_policy_table_ptr, + sizeof(struct vbios_voltage_policy_table_1x_header)); + + /* Set Voltage Policy Table Index for Perf Core VF Sequence client. */ + pvolt_policy_metadata->perf_core_vf_seq_policy_idx = + (u8)header.perf_core_vf_seq_policy_idx; + + /* Read in the entries. */ + for (i = 0; i < header.num_table_entries; i++) { + entry_offset = (voltage_policy_table_ptr + header.header_size + + i * header.table_entry_size); + + nvgpu_memcpy((u8 *)&entry, entry_offset, + sizeof(struct vbios_voltage_policy_table_1x_entry)); + + (void) memset(&policy_type_data, 0x0, + sizeof(policy_type_data)); + + policy_type = volt_policy_type_convert((u8)entry.type); + + switch (policy_type) { + case CTRL_VOLT_POLICY_TYPE_SINGLE_RAIL_MULTI_STEP: + policy_type_data.single_rail_ms.inter_switch_delay_us = + BIOS_GET_FIELD(u16, entry.param1, + NV_VBIOS_VPT_ENTRY_PARAM1_SR_SETTLE_TIME_INTERMEDIATE); + policy_type_data.single_rail_ms.ramp_up_step_size_uv = + BIOS_GET_FIELD(u32, entry.param2, + NV_VBIOS_VPT_ENTRY_PARAM2_SR_RAMP_UP_STEP_SIZE_UV); + policy_type_data.single_rail_ms.ramp_down_step_size_uv = + BIOS_GET_FIELD(u32, entry.param3, + NV_VBIOS_VPT_ENTRY_PARAM3_SR_RAMP_DOWN_STEP_SIZE_UV); + break; + case CTRL_VOLT_POLICY_TYPE_SINGLE_RAIL: + policy_type_data.single_rail.rail_idx = + BIOS_GET_FIELD(u8, entry.param0, + NV_VBIOS_VPT_ENTRY_PARAM0_SINGLE_RAIL_VOLT_DOMAIN); + break; + } + + policy_type_data.obj.type = policy_type; + + ppolicy = volt_volt_policy_construct(g, + (void *)&policy_type_data); + if (ppolicy == NULL) { + nvgpu_err(g, + "Failure to construct VOLT_POLICY object."); + status = -EINVAL; + goto done; + } + + status = boardobjgrp_objinsert( + &pvolt_policy_metadata->volt_policies.super, + (struct pmu_board_obj *)ppolicy, i); + if (status != 0) { + nvgpu_err(g, + "could not add volt_policy for entry %d into boardobjgrp ", + i); + goto done; + } + } + +done: + return status; +} +static int volt_policy_devgrp_pmudata_instget(struct gk20a *g, + struct nv_pmu_boardobjgrp *pmuboardobjgrp, + struct nv_pmu_boardobj **pmu_obj, u8 idx) +{ + struct nv_pmu_volt_volt_policy_boardobj_grp_set *pgrp_set = + (struct nv_pmu_volt_volt_policy_boardobj_grp_set *) + pmuboardobjgrp; + + nvgpu_log_info(g, " "); + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (((u32)BIT(idx) & + pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *pmu_obj = (struct nv_pmu_boardobj *) + &pgrp_set->objects[idx].data.obj; + nvgpu_log_info(g, " Done"); + return 0; +} + +static int volt_policy_grp_pmudatainit_super(struct gk20a *g, + struct boardobjgrp *pboardobjgrp, + struct nv_pmu_boardobjgrp_super *pboardobjgrppmu) +{ + struct nv_pmu_volt_volt_policy_boardobjgrp_set_header *pset = + (struct nv_pmu_volt_volt_policy_boardobjgrp_set_header *) + pboardobjgrppmu; + struct voltage_policy_metadata *volt = + (struct voltage_policy_metadata *)pboardobjgrp; + int status = 0; + + status = boardobjgrp_pmudatainit_e32(g, pboardobjgrp, pboardobjgrppmu); + if (status != 0) { + nvgpu_err(g, + "error updating pmu boardobjgrp for volt policy 0x%x", + status); + goto done; + } + pset->perf_core_vf_seq_policy_idx = + volt->perf_core_vf_seq_policy_idx; + +done: + return status; +} + +int volt_policy_pmu_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + + nvgpu_log_info(g, " "); + + pboardobjgrp = + &g->pmu->volt->volt_metadata->volt_policy_metadata.volt_policies.super; + + if (!pboardobjgrp->bconstructed) { + return -EINVAL; + } + + status = pboardobjgrp->pmuinithandle(g, pboardobjgrp); + + nvgpu_log_info(g, "Done"); + return status; +} + +int volt_policy_sw_setup(struct gk20a *g) +{ + int status = 0; + struct boardobjgrp *pboardobjgrp = NULL; + + nvgpu_log_info(g, " "); + + status = nvgpu_boardobjgrp_construct_e32(g, + &g->pmu->volt->volt_metadata->volt_policy_metadata.volt_policies); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for volt rail, " + "status - 0x%x", status); + goto done; + } + + pboardobjgrp = + &g->pmu->volt->volt_metadata->volt_policy_metadata.volt_policies.super; + + pboardobjgrp->pmudatainstget = volt_policy_devgrp_pmudata_instget; + pboardobjgrp->pmudatainit = volt_policy_grp_pmudatainit_super; + + /* Obtain Voltage Rail Table from VBIOS */ + status = volt_get_volt_policy_table(g, &g->pmu->volt->volt_metadata-> + volt_policy_metadata); + if (status != 0) { + goto done; + } + + /* Populate data for the VOLT_RAIL PMU interface */ + BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, VOLT, VOLT_POLICY); + + status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp, + volt, VOLT, volt_policy, VOLT_POLICY); + if (status != 0) { + nvgpu_err(g, + "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x", + status); + goto done; + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} diff --git a/drivers/gpu/nvgpu/common/pmu/volt/volt_policy.h b/drivers/gpu/nvgpu/common/pmu/volt/volt_policy.h new file mode 100644 index 000000000..9bcf79028 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/volt/volt_policy.h @@ -0,0 +1,53 @@ +/* +* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. +* + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. +*/ + +#ifndef NVGPU_VOLT_POLICY_H +#define NVGPU_VOLT_POLICY_H + +#define VOLT_POLICY_INDEX_IS_VALID(pvolt, policy_idx) \ + (boardobjgrp_idxisvalid( \ + &((pvolt)->volt_policy_metadata.volt_policies.super), \ + (policy_idx))) + +/*! + * extends boardobj providing attributes common to all voltage_policies. + */ +struct voltage_policy { + struct pmu_board_obj super; +}; + +struct voltage_policy_single_rail { + struct voltage_policy super; + u8 rail_idx; +}; + +struct voltage_policy_single_rail_multi_step { + struct voltage_policy_single_rail super; + u16 inter_switch_delay_us; + u32 ramp_up_step_size_uv; + u32 ramp_down_step_size_uv; +}; + +int volt_policy_sw_setup(struct gk20a *g); +int volt_policy_pmu_setup(struct gk20a *g); + +#endif /* NVGPU_VOLT_POLICY_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/volt/volt_rail.c b/drivers/gpu/nvgpu/common/pmu/volt/volt_rail.c new file mode 100644 index 000000000..f77fcf404 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/volt/volt_rail.c @@ -0,0 +1,619 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "volt.h" +#include "ucode_volt_inf.h" +#include "volt_rail.h" + +#define NV_PMU_PERF_RPC_VFE_EQU_MONITOR_COUNT_MAX 16U + +static int volt_rail_state_init(struct gk20a *g, + struct voltage_rail *pvolt_rail) +{ + int status = 0; + u32 i; + + pvolt_rail->volt_dev_idx_default = CTRL_BOARDOBJ_IDX_INVALID; + pvolt_rail->volt_dev_idx_ipc_vmin = CTRL_BOARDOBJ_IDX_INVALID; + + for (i = 0; i < CTRL_VOLT_RAIL_VOLT_DELTA_MAX_ENTRIES; i++) { + pvolt_rail->volt_delta_uv[i] = (int)NV_PMU_VOLT_VALUE_0V_IN_UV; + g->pmu->volt->volt_metadata->volt_rail_metadata.ext_rel_delta_uv[i] = + NV_PMU_VOLT_VALUE_0V_IN_UV; + } + + pvolt_rail->volt_margin_limit_vfe_equ_mon_handle = + NV_PMU_PERF_RPC_VFE_EQU_MONITOR_COUNT_MAX; + pvolt_rail->rel_limit_vfe_equ_mon_handle = + NV_PMU_PERF_RPC_VFE_EQU_MONITOR_COUNT_MAX; + pvolt_rail->alt_rel_limit_vfe_equ_mon_handle = + NV_PMU_PERF_RPC_VFE_EQU_MONITOR_COUNT_MAX; + pvolt_rail->ov_limit_vfe_equ_mon_handle = + NV_PMU_PERF_RPC_VFE_EQU_MONITOR_COUNT_MAX; + + status = boardobjgrpmask_e32_init(&pvolt_rail->volt_dev_mask, NULL); + if (status != 0) { + nvgpu_err(g, + "Failed to initialize BOARDOBJGRPMASK of VOLTAGE_DEVICEs"); + } + + return status; +} + +static int volt_rail_init_pmudata_super(struct gk20a *g, + struct pmu_board_obj *obj, struct nv_pmu_boardobj *pmu_obj) +{ + int status = 0; + struct voltage_rail *prail; + struct nv_pmu_volt_volt_rail_boardobj_set *rail_pmu_data; + u32 i; + + nvgpu_log_info(g, " "); + + status = pmu_board_obj_pmu_data_init_super(g, obj, pmu_obj); + if (status != 0) { + return status; + } + + prail = (struct voltage_rail *)(void *)obj; + rail_pmu_data = (struct nv_pmu_volt_volt_rail_boardobj_set *)(void *) + pmu_obj; + + rail_pmu_data->rel_limit_vfe_equ_idx = prail->rel_limit_vfe_equ_idx; + rail_pmu_data->alt_rel_limit_vfe_equ_idx = + prail->alt_rel_limit_vfe_equ_idx; + rail_pmu_data->ov_limit_vfe_equ_idx = prail->ov_limit_vfe_equ_idx; + rail_pmu_data->vmin_limit_vfe_equ_idx = prail->vmin_limit_vfe_equ_idx; + rail_pmu_data->volt_margin_limit_vfe_equ_idx = + prail->volt_margin_limit_vfe_equ_idx; + rail_pmu_data->pwr_equ_idx = prail->pwr_equ_idx; + rail_pmu_data->volt_dev_idx_default = prail->volt_dev_idx_default; + rail_pmu_data->volt_scale_exp_pwr_equ_idx = + prail->volt_scale_exp_pwr_equ_idx; + rail_pmu_data->volt_dev_idx_ipc_vmin = prail->volt_dev_idx_ipc_vmin; + + for (i = 0; i < CTRL_VOLT_RAIL_VOLT_DELTA_MAX_ENTRIES; i++) { + rail_pmu_data->volt_delta_uv[i] = prail->volt_delta_uv[i] + + (int)g->pmu->volt->volt_metadata->volt_rail_metadata.ext_rel_delta_uv[i]; + } + + status = nvgpu_boardobjgrpmask_export(&prail->volt_dev_mask.super, + prail->volt_dev_mask.super.bitcount, + &rail_pmu_data->volt_dev_mask.super); + if (status != 0) { + nvgpu_err(g, + "Failed to export BOARDOBJGRPMASK of VOLTAGE_DEVICEs"); + } + + nvgpu_log_info(g, "Done"); + + return status; +} + +static struct voltage_rail *volt_construct_volt_rail(struct gk20a *g, void *pargs) +{ + struct pmu_board_obj *obj = NULL; + struct voltage_rail *ptemp_rail = (struct voltage_rail *)pargs; + struct voltage_rail *board_obj_volt_rail_ptr = NULL; + int status; + + nvgpu_log_info(g, " "); + + board_obj_volt_rail_ptr = nvgpu_kzalloc(g, sizeof(struct voltage_rail)); + if (board_obj_volt_rail_ptr == NULL) { + return NULL; + } + + status = pmu_board_obj_construct_super(g, + (struct pmu_board_obj *)(void *)board_obj_volt_rail_ptr, + pargs); + if (status != 0) { + return NULL; + } + + obj = (struct pmu_board_obj *)(void *)board_obj_volt_rail_ptr; + /* override super class interface */ + obj->pmudatainit = volt_rail_init_pmudata_super; + + board_obj_volt_rail_ptr->boot_voltage_uv = + ptemp_rail->boot_voltage_uv; + board_obj_volt_rail_ptr->rel_limit_vfe_equ_idx = + ptemp_rail->rel_limit_vfe_equ_idx; + board_obj_volt_rail_ptr->alt_rel_limit_vfe_equ_idx = + ptemp_rail->alt_rel_limit_vfe_equ_idx; + board_obj_volt_rail_ptr->ov_limit_vfe_equ_idx = + ptemp_rail->ov_limit_vfe_equ_idx; + board_obj_volt_rail_ptr->pwr_equ_idx = + ptemp_rail->pwr_equ_idx; + board_obj_volt_rail_ptr->boot_volt_vfe_equ_idx = + ptemp_rail->boot_volt_vfe_equ_idx; + board_obj_volt_rail_ptr->vmin_limit_vfe_equ_idx = + ptemp_rail->vmin_limit_vfe_equ_idx; + board_obj_volt_rail_ptr->volt_margin_limit_vfe_equ_idx = + ptemp_rail->volt_margin_limit_vfe_equ_idx; + board_obj_volt_rail_ptr->volt_scale_exp_pwr_equ_idx = + ptemp_rail->volt_scale_exp_pwr_equ_idx; + + nvgpu_log_info(g, "Done"); + + return (struct voltage_rail *)(void *)obj; +} + +static int volt_get_volt_rail_table(struct gk20a *g, + struct voltage_rail_metadata *pvolt_rail_metadata) +{ + int status = 0; + u8 *volt_rail_table_ptr = NULL; + struct voltage_rail *prail = NULL; + struct vbios_voltage_rail_table_1x_header header = { 0 }; + struct vbios_voltage_rail_table_1x_entry entry = { 0 }; + u8 i; + u8 volt_domain; + u8 *entry_ptr; + union rail_type { + struct pmu_board_obj obj; + struct voltage_rail volt_rail; + } rail_type_data; + + volt_rail_table_ptr = (u8 *)nvgpu_bios_get_perf_table_ptrs(g, + nvgpu_bios_get_bit_token(g, NVGPU_BIOS_PERF_TOKEN), + VOLTAGE_RAIL_TABLE); + if (volt_rail_table_ptr == NULL) { + status = -EINVAL; + goto done; + } + + nvgpu_memcpy((u8 *)&header, volt_rail_table_ptr, + sizeof(struct vbios_voltage_rail_table_1x_header)); + + pvolt_rail_metadata->volt_domain_hal = (u8)header.volt_domain_hal; + + for (i = 0; i < header.num_table_entries; i++) { + entry_ptr = (volt_rail_table_ptr + header.header_size + + (i * header.table_entry_size)); + + (void) memset(&rail_type_data, 0x0, sizeof(rail_type_data)); + + nvgpu_memcpy((u8 *)&entry, entry_ptr, + sizeof(struct vbios_voltage_rail_table_1x_entry)); + + volt_domain = volt_rail_vbios_volt_domain_convert_to_internal(g, + i); + if (volt_domain == CTRL_VOLT_DOMAIN_INVALID) { + continue; + } + + rail_type_data.obj.type = volt_domain; + rail_type_data.volt_rail.boot_voltage_uv = + (u32)entry.boot_voltage_uv; + rail_type_data.volt_rail.rel_limit_vfe_equ_idx = + (u8)entry.rel_limit_vfe_equ_idx; + rail_type_data.volt_rail.alt_rel_limit_vfe_equ_idx = + (u8)entry.alt_rel_limit_vfe_equidx; + rail_type_data.volt_rail.ov_limit_vfe_equ_idx = + (u8)entry.ov_limit_vfe_equ_idx; + + if (header.table_entry_size >= + NV_VBIOS_VOLTAGE_RAIL_1X_ENTRY_SIZE_0C) { + rail_type_data.volt_rail.volt_scale_exp_pwr_equ_idx = + (u8)entry.volt_scale_exp_pwr_equ_idx; + } else { + rail_type_data.volt_rail.volt_scale_exp_pwr_equ_idx = + CTRL_BOARDOBJ_IDX_INVALID; + } + + if (header.table_entry_size >= + NV_VBIOS_VOLTAGE_RAIL_1X_ENTRY_SIZE_0B) { + rail_type_data.volt_rail.volt_margin_limit_vfe_equ_idx = + (u8)entry.volt_margin_limit_vfe_equ_idx; + } else { + rail_type_data.volt_rail.volt_margin_limit_vfe_equ_idx = + CTRL_BOARDOBJ_IDX_INVALID; + } + + if (header.table_entry_size >= + NV_VBIOS_VOLTAGE_RAIL_1X_ENTRY_SIZE_0A) { + rail_type_data.volt_rail.vmin_limit_vfe_equ_idx = + (u8)entry.vmin_limit_vfe_equ_idx; + } else { + rail_type_data.volt_rail.vmin_limit_vfe_equ_idx = + CTRL_BOARDOBJ_IDX_INVALID; + } + + if (header.table_entry_size >= + NV_VBIOS_VOLTAGE_RAIL_1X_ENTRY_SIZE_09) { + rail_type_data.volt_rail.boot_volt_vfe_equ_idx = + (u8)entry.boot_volt_vfe_equ_idx; + } else { + rail_type_data.volt_rail.boot_volt_vfe_equ_idx = + CTRL_BOARDOBJ_IDX_INVALID; + } + + if (header.table_entry_size >= + NV_VBIOS_VOLTAGE_RAIL_1X_ENTRY_SIZE_08) { + rail_type_data.volt_rail.pwr_equ_idx = + (u8)entry.pwr_equ_idx; + } else { + rail_type_data.volt_rail.pwr_equ_idx = + CTRL_PMGR_PWR_EQUATION_INDEX_INVALID; + } + + prail = volt_construct_volt_rail(g, &rail_type_data); + + status = boardobjgrp_objinsert( + &pvolt_rail_metadata->volt_rails.super, + (void *)(struct pmu_board_obj *)prail, i); + } + +done: + return status; +} + +static int volt_rail_devgrp_pmudata_instget(struct gk20a *g, + struct nv_pmu_boardobjgrp *pmuboardobjgrp, struct nv_pmu_boardobj + **pmu_obj, u8 idx) +{ + struct nv_pmu_volt_volt_rail_boardobj_grp_set *pgrp_set = + (struct nv_pmu_volt_volt_rail_boardobj_grp_set *) + pmuboardobjgrp; + + nvgpu_log_info(g, " "); + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (((u32)BIT(idx) & + pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *pmu_obj = (struct nv_pmu_boardobj *) + &pgrp_set->objects[idx].data.obj; + nvgpu_log_info(g, " Done"); + return 0; +} + +static int volt_rail_devgrp_pmustatus_instget(struct gk20a *g, + void *pboardobjgrppmu, struct nv_pmu_boardobj_query + **obj_pmu_status, u8 idx) +{ + struct nv_pmu_volt_volt_rail_boardobj_grp_get_status *pgrp_get_status = + (struct nv_pmu_volt_volt_rail_boardobj_grp_get_status *) + pboardobjgrppmu; + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (((u32)BIT(idx) & + pgrp_get_status->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *obj_pmu_status = (struct nv_pmu_boardobj_query *) + &pgrp_get_status->objects[idx].data.obj; + return 0; +} + +static int volt_rail_obj_update(struct gk20a *g, + struct pmu_board_obj *obj, + struct nv_pmu_boardobj *pmu_obj) +{ + struct voltage_rail *volt_rail_obj; + struct nv_pmu_volt_volt_rail_boardobj_get_status *pstatus; + + nvgpu_log_info(g, " "); + + volt_rail_obj = (struct voltage_rail *)(void *)obj; + pstatus = (struct nv_pmu_volt_volt_rail_boardobj_get_status *) + (void *)pmu_obj; + + if (pstatus->super.type != volt_rail_obj->super.type) { + nvgpu_err(g, "pmu data and boardobj type not matching"); + return -EINVAL; + } + + /* Updating only vmin as per requirement, later other fields can be added */ + volt_rail_obj->vmin_limitu_v = pstatus->vmin_limitu_v; + volt_rail_obj->max_limitu_v = pstatus->max_limitu_v; + volt_rail_obj->current_volt_uv = pstatus->curr_volt_defaultu_v; + + return 0; +} + +static int volt_rail_boardobj_grp_get_status(struct gk20a *g) +{ + struct boardobjgrp *pboardobjgrp; + struct boardobjgrpmask *pboardobjgrpmask; + struct nv_pmu_boardobjgrp_super *pboardobjgrppmu; + struct pmu_board_obj *obj = NULL; + struct nv_pmu_boardobj_query *pboardobjpmustatus = NULL; + int status; + u8 index; + + nvgpu_log_info(g, " "); + + pboardobjgrp = &g->pmu->volt->volt_metadata->volt_rail_metadata.volt_rails.super; + pboardobjgrpmask = &g->pmu->volt->volt_metadata->volt_rail_metadata.volt_rails.mask.super; + status = pboardobjgrp->pmugetstatus(g, pboardobjgrp, pboardobjgrpmask); + if (status != 0) { + nvgpu_err(g, "err getting boardobjs from pmu"); + return status; + } + pboardobjgrppmu = pboardobjgrp->pmu.getstatus.buf; + + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pmu_board_obj*, obj, index) { + status = pboardobjgrp->pmustatusinstget(g, + (struct nv_pmu_boardobjgrp *)(void *)pboardobjgrppmu, + &pboardobjpmustatus, index); + if (status != 0) { + nvgpu_err(g, "could not get status object instance"); + return status; + } + status = volt_rail_obj_update(g, obj, + (struct nv_pmu_boardobj *)(void *)pboardobjpmustatus); + if (status != 0) { + nvgpu_err(g, "could not update volt rail status"); + return status; + } + } + return 0; +} + +int volt_rail_sw_setup(struct gk20a *g) +{ + int status = 0; + struct boardobjgrp *pboardobjgrp = NULL; + struct voltage_rail *pvolt_rail; + u8 i; + + nvgpu_log_info(g, " "); + + status = nvgpu_boardobjgrp_construct_e32(g, + &g->pmu->volt->volt_metadata->volt_rail_metadata.volt_rails); + if (status != 0) { + nvgpu_err(g, + "error creating boardobjgrp for volt rail, " + "status - 0x%x", status); + goto done; + } + + pboardobjgrp = &g->pmu->volt->volt_metadata->volt_rail_metadata.volt_rails.super; + + pboardobjgrp->pmudatainstget = volt_rail_devgrp_pmudata_instget; + pboardobjgrp->pmustatusinstget = volt_rail_devgrp_pmustatus_instget; + + g->pmu->volt->volt_metadata->volt_rail_metadata.pct_delta = + NV_PMU_VOLT_VALUE_0V_IN_UV; + + /* Obtain Voltage Rail Table from VBIOS */ + status = volt_get_volt_rail_table(g, &g->pmu->volt->volt_metadata-> + volt_rail_metadata); + if (status != 0) { + goto done; + } + + /* Populate data for the VOLT_RAIL PMU interface */ + BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, VOLT, VOLT_RAIL); + + status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp, + volt, VOLT, volt_rail, VOLT_RAIL); + if (status != 0) { + nvgpu_err(g, + "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x", + status); + goto done; + } + + status = BOARDOBJGRP_PMU_CMD_GRP_GET_STATUS_CONSTRUCT(g, + &g->pmu->volt->volt_metadata->volt_rail_metadata.volt_rails.super, + volt, VOLT, volt_rail, VOLT_RAIL); + if (status != 0) { + nvgpu_err(g, + "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x", + status); + goto done; + } + + /* update calibration to fuse */ + BOARDOBJGRP_FOR_EACH(&(g->pmu->volt->volt_metadata->volt_rail_metadata. + volt_rails.super), + struct voltage_rail *, pvolt_rail, i) { + status = volt_rail_state_init(g, pvolt_rail); + if (status != 0) { + nvgpu_err(g, + "Failure while executing RAIL's state init railIdx = %d", + i); + goto done; + } + } + +done: + nvgpu_log_info(g, " done status %x", status); + return status; +} + +int volt_rail_pmu_setup(struct gk20a *g) +{ + int status; + struct boardobjgrp *pboardobjgrp = NULL; + + nvgpu_log_info(g, " "); + + pboardobjgrp = &g->pmu->volt->volt_metadata->volt_rail_metadata.volt_rails.super; + + if (!pboardobjgrp->bconstructed) { + return -EINVAL; + } + + status = pboardobjgrp->pmuinithandle(g, pboardobjgrp); + + nvgpu_log_info(g, "Done"); + return status; +} + +u8 volt_rail_vbios_volt_domain_convert_to_internal(struct gk20a *g, + u8 vbios_volt_domain) +{ + if (g->pmu->volt->volt_metadata->volt_rail_metadata.volt_domain_hal == + CTRL_VOLT_DOMAIN_HAL_GP10X_SINGLE_RAIL) { + return CTRL_VOLT_DOMAIN_LOGIC; + } else { + nvgpu_err(g, "Unsupported volt domain hal"); + return CTRL_VOLT_DOMAIN_INVALID; + } +} + +int volt_rail_volt_dev_register(struct gk20a *g, struct voltage_rail + *pvolt_rail, u8 volt_dev_idx, u8 operation_type) +{ + int status = 0; + + if (operation_type == CTRL_VOLT_DEVICE_OPERATION_TYPE_DEFAULT) { + if (pvolt_rail->volt_dev_idx_default == + CTRL_BOARDOBJ_IDX_INVALID) { + pvolt_rail->volt_dev_idx_default = volt_dev_idx; + } else { + status = -EINVAL; + goto exit; + } + } else if (operation_type == + CTRL_VOLT_VOLT_DEVICE_OPERATION_TYPE_IPC_VMIN) { + if (pvolt_rail->volt_dev_idx_ipc_vmin == + CTRL_BOARDOBJ_IDX_INVALID) { + pvolt_rail->volt_dev_idx_ipc_vmin = volt_dev_idx; + /* + * Exit on purpose as we do not want to register + * IPC_VMIN device against the rail to avoid + * setting current voltage instead of + * IPC Vmin voltage. + */ + goto exit; + } else { + status = -EINVAL; + goto exit; + } + } else { + goto exit; + } + + status = nvgpu_boardobjgrpmask_bit_set(&pvolt_rail->volt_dev_mask.super, + volt_dev_idx); + +exit: + if (status != 0) { + nvgpu_err(g, "Failed to register VOLTAGE_DEVICE"); + } + + return status; +} + +u8 nvgpu_pmu_volt_rail_volt_domain_convert_to_idx(struct gk20a *g, u8 volt_domain) +{ + if (g->pmu->volt->volt_metadata->volt_rail_metadata.volt_domain_hal == + CTRL_VOLT_DOMAIN_HAL_GP10X_SINGLE_RAIL) { + return 0U; + } else { + nvgpu_err(g, "Unsupported volt domain hal"); + return CTRL_BOARDOBJ_IDX_INVALID; + } +} + +int nvgpu_pmu_volt_get_vmin_vmax_ps35(struct gk20a *g, u32 *vmin_uv, u32 *vmax_uv) +{ + struct boardobjgrp *pboardobjgrp; + struct pmu_board_obj *obj = NULL; + struct voltage_rail *volt_rail = NULL; + int status; + u8 index; + + status = volt_rail_boardobj_grp_get_status(g); + if (status != 0) { + nvgpu_err(g, "Vfe_var get status failed"); + return status; + } + + pboardobjgrp = &g->pmu->volt->volt_metadata->volt_rail_metadata.volt_rails.super; + + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pmu_board_obj*, obj, index) { + volt_rail = (struct voltage_rail *)(void *)obj; + if ((volt_rail->vmin_limitu_v != 0U) && + (volt_rail->max_limitu_v != 0U)) { + *vmin_uv = volt_rail->vmin_limitu_v; + *vmax_uv = volt_rail->max_limitu_v; + + return status; + } + } + return status; +} + +int nvgpu_pmu_volt_get_curr_volt_ps35(struct gk20a *g, u32 *vcurr_uv) +{ + struct boardobjgrp *pboardobjgrp; + struct pmu_board_obj *obj = NULL; + struct voltage_rail *volt_rail = NULL; + int status; + u8 index; + + status = volt_rail_boardobj_grp_get_status(g); + if (status != 0) { + nvgpu_err(g, "volt rail get status failed"); + return status; + } + + pboardobjgrp = &g->pmu->volt->volt_metadata->volt_rail_metadata.volt_rails.super; + + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pmu_board_obj*, obj, index) { + volt_rail = (struct voltage_rail *)(void *)obj; + if (volt_rail->current_volt_uv != 0U) { + *vcurr_uv = volt_rail->current_volt_uv; + return status; + } + } + return status; +} + +u8 nvgpu_pmu_volt_get_vmargin_ps35(struct gk20a *g) +{ + struct boardobjgrp *pboardobjgrp; + struct pmu_board_obj *obj = NULL; + struct voltage_rail *volt_rail = NULL; + u8 index, vmargin_uv; + + pboardobjgrp = &g->pmu->volt->volt_metadata->volt_rail_metadata.volt_rails.super; + + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pmu_board_obj *, obj, index) { + volt_rail = (struct voltage_rail *)(void *)obj; + if (volt_rail->volt_margin_limit_vfe_equ_idx != 255U) { + vmargin_uv = volt_rail->volt_margin_limit_vfe_equ_idx; + return vmargin_uv; + } + } + return 0U; +} + diff --git a/drivers/gpu/nvgpu/common/pmu/volt/volt_rail.h b/drivers/gpu/nvgpu/common/pmu/volt/volt_rail.h new file mode 100644 index 000000000..4bb831d77 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pmu/volt/volt_rail.h @@ -0,0 +1,61 @@ +/* +* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. +* + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef NVGPU_VOLT_RAIL_H +#define NVGPU_VOLT_RAIL_H + +#include +#include + +#define CTRL_PMGR_PWR_EQUATION_INDEX_INVALID 0xFFU + +struct voltage_rail { + struct pmu_board_obj super; + u32 boot_voltage_uv; + u8 rel_limit_vfe_equ_idx; + u8 alt_rel_limit_vfe_equ_idx; + u8 ov_limit_vfe_equ_idx; + u8 pwr_equ_idx; + u8 volt_scale_exp_pwr_equ_idx; + u8 volt_dev_idx_default; + u8 volt_dev_idx_ipc_vmin; + u8 boot_volt_vfe_equ_idx; + u8 vmin_limit_vfe_equ_idx; + u8 volt_margin_limit_vfe_equ_idx; + u32 volt_margin_limit_vfe_equ_mon_handle; + u32 rel_limit_vfe_equ_mon_handle; + u32 alt_rel_limit_vfe_equ_mon_handle; + u32 ov_limit_vfe_equ_mon_handle; + struct boardobjgrpmask_e32 volt_dev_mask; + s32 volt_delta_uv[CTRL_VOLT_RAIL_VOLT_DELTA_MAX_ENTRIES]; + u32 vmin_limitu_v; + u32 max_limitu_v; + u32 current_volt_uv; +}; + +int volt_rail_volt_dev_register(struct gk20a *g, struct voltage_rail + *pvolt_rail, u8 volt_dev_idx, u8 operation_type); +int volt_rail_sw_setup(struct gk20a *g); +int volt_rail_pmu_setup(struct gk20a *g); + +#endif /* NVGPU_VOLT_RAIL_H */ diff --git a/drivers/gpu/nvgpu/common/power_features/cg/cg.c b/drivers/gpu/nvgpu/common/power_features/cg/cg.c new file mode 100644 index 000000000..25a6210ac --- /dev/null +++ b/drivers/gpu/nvgpu/common/power_features/cg/cg.c @@ -0,0 +1,668 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +static void nvgpu_cg_set_mode(struct gk20a *g, u32 cgmode, u32 mode_config) +{ + u32 n; + u32 engine_id = 0; + const struct nvgpu_device *dev = NULL; + struct nvgpu_fifo *f = &g->fifo; + + nvgpu_log_fn(g, " "); + + for (n = 0; n < f->num_engines; n++) { + dev = f->active_engines[n]; + +#ifdef CONFIG_NVGPU_NON_FUSA + /* gr_engine supports both BLCG and ELCG */ + if ((cgmode == BLCG_MODE) && + (dev->type == NVGPU_DEVTYPE_GRAPHICS)) { + g->ops.therm.init_blcg_mode(g, (u32)mode_config, + engine_id); + break; + } else +#endif + if (cgmode == ELCG_MODE) { + g->ops.therm.init_elcg_mode(g, (u32)mode_config, + dev->engine_id); + } else { + nvgpu_err(g, "invalid cg mode %d, config %d for " + "engine_id %d", + cgmode, mode_config, engine_id); + } + } +} + +void nvgpu_cg_elcg_enable_no_wait(struct gk20a *g) +{ + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (g->elcg_enabled) { + nvgpu_cg_set_mode(g, ELCG_MODE, ELCG_AUTO); + } + nvgpu_mutex_release(&g->cg_pg_lock); +} + + +void nvgpu_cg_elcg_disable_no_wait(struct gk20a *g) +{ + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (g->elcg_enabled) { + nvgpu_cg_set_mode(g, ELCG_MODE, ELCG_RUN); + } + nvgpu_mutex_release(&g->cg_pg_lock); +} + +void nvgpu_cg_blcg_fb_ltc_load_enable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (!g->blcg_enabled) { + goto done; + } + if (g->ops.cg.blcg_fb_load_gating_prod != NULL) { + g->ops.cg.blcg_fb_load_gating_prod(g, true); + } + if (g->ops.cg.blcg_ltc_load_gating_prod != NULL) { + g->ops.cg.blcg_ltc_load_gating_prod(g, true); + } +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} + +void nvgpu_cg_blcg_fifo_load_enable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (!g->blcg_enabled) { + goto done; + } + if (g->ops.cg.blcg_fifo_load_gating_prod != NULL) { + g->ops.cg.blcg_fifo_load_gating_prod(g, true); + } +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (g->ops.cg.blcg_runlist_load_gating_prod != NULL) { + g->ops.cg.blcg_runlist_load_gating_prod(g, true); + } +#endif +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} + +void nvgpu_cg_blcg_pmu_load_enable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (!g->blcg_enabled) { + goto done; + } + if (g->ops.cg.blcg_pmu_load_gating_prod != NULL) { + g->ops.cg.blcg_pmu_load_gating_prod(g, true); + } +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} + +void nvgpu_cg_blcg_ce_load_enable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (!g->blcg_enabled) { + goto done; + } + if (g->ops.cg.blcg_ce_load_gating_prod != NULL) { + g->ops.cg.blcg_ce_load_gating_prod(g, true); + } +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} + +void nvgpu_cg_blcg_gr_load_enable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (!g->blcg_enabled) { + goto done; + } + if (g->ops.cg.blcg_gr_load_gating_prod != NULL) { + g->ops.cg.blcg_gr_load_gating_prod(g, true); + } +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} + +void nvgpu_cg_slcg_fb_ltc_load_enable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (!g->slcg_enabled) { + goto done; + } + if (g->ops.cg.slcg_fb_load_gating_prod != NULL) { + g->ops.cg.slcg_fb_load_gating_prod(g, true); + } + if (g->ops.cg.slcg_ltc_load_gating_prod != NULL) { + g->ops.cg.slcg_ltc_load_gating_prod(g, true); + } +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} + +static void nvgpu_cg_slcg_priring_load_prod(struct gk20a *g, bool enable) +{ + + if (g->ops.cg.slcg_priring_load_gating_prod != NULL) { + g->ops.cg.slcg_priring_load_gating_prod(g, enable); + } +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (g->ops.cg.slcg_rs_ctrl_fbp_load_gating_prod != NULL) { + g->ops.cg.slcg_rs_ctrl_fbp_load_gating_prod(g, enable); + } + if (g->ops.cg.slcg_rs_ctrl_gpc_load_gating_prod != NULL) { + g->ops.cg.slcg_rs_ctrl_gpc_load_gating_prod(g, enable); + } + if (g->ops.cg.slcg_rs_ctrl_sys_load_gating_prod != NULL) { + g->ops.cg.slcg_rs_ctrl_sys_load_gating_prod(g, enable); + } + if (g->ops.cg.slcg_rs_fbp_load_gating_prod != NULL) { + g->ops.cg.slcg_rs_fbp_load_gating_prod(g, enable); + } + if (g->ops.cg.slcg_rs_gpc_load_gating_prod != NULL) { + g->ops.cg.slcg_rs_gpc_load_gating_prod(g, enable); + } + if (g->ops.cg.slcg_rs_sys_load_gating_prod != NULL) { + g->ops.cg.slcg_rs_sys_load_gating_prod(g, enable); + } +#endif + +} + +void nvgpu_cg_slcg_priring_load_enable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (!g->slcg_enabled) { + goto done; + } + + nvgpu_cg_slcg_priring_load_prod(g, true); +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} + +void nvgpu_cg_slcg_fifo_load_enable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (!g->slcg_enabled) { + goto done; + } + if (g->ops.cg.slcg_fifo_load_gating_prod != NULL) { + g->ops.cg.slcg_fifo_load_gating_prod(g, true); + } +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (g->ops.cg.slcg_runlist_load_gating_prod != NULL) { + g->ops.cg.slcg_runlist_load_gating_prod(g, true); + } +#endif +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} + +void nvgpu_cg_slcg_pmu_load_enable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (!g->slcg_enabled) { + goto done; + } + if (g->ops.cg.slcg_pmu_load_gating_prod != NULL) { + g->ops.cg.slcg_pmu_load_gating_prod(g, true); + } +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} + +void nvgpu_cg_slcg_therm_load_enable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (!g->slcg_enabled) { + goto done; + } + if (g->ops.cg.slcg_therm_load_gating_prod != NULL) { + g->ops.cg.slcg_therm_load_gating_prod(g, true); + } +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} + +void nvgpu_cg_slcg_ce2_load_enable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (!g->slcg_enabled) { + goto done; + } + if (g->ops.cg.slcg_ce2_load_gating_prod != NULL) { + g->ops.cg.slcg_ce2_load_gating_prod(g, true); + } +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} + +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) +void nvgpu_cg_slcg_timer_load_enable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (!g->slcg_enabled) { + goto done; + } + if (g->ops.cg.slcg_timer_load_gating_prod != NULL) { + g->ops.cg.slcg_timer_load_gating_prod(g, true); + } +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} +#endif + +#ifdef CONFIG_NVGPU_PROFILER +void nvgpu_cg_slcg_perf_load_enable(struct gk20a *g, bool enable) +{ + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (!g->slcg_enabled) { + goto done; + } + + if (g->ops.cg.slcg_perf_load_gating_prod != NULL) { + g->ops.cg.slcg_perf_load_gating_prod(g, enable); + } +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} +#endif + +static void cg_init_gr_slcg_load_gating_prod(struct gk20a *g) +{ + if (g->ops.cg.slcg_bus_load_gating_prod != NULL) { + g->ops.cg.slcg_bus_load_gating_prod(g, true); + } + if (g->ops.cg.slcg_chiplet_load_gating_prod != NULL) { + g->ops.cg.slcg_chiplet_load_gating_prod(g, true); + } + if (g->ops.cg.slcg_gr_load_gating_prod != NULL) { + g->ops.cg.slcg_gr_load_gating_prod(g, true); + } + if (g->ops.cg.slcg_perf_load_gating_prod != NULL) { + g->ops.cg.slcg_perf_load_gating_prod(g, true); + } + if (g->ops.cg.slcg_xbar_load_gating_prod != NULL) { + g->ops.cg.slcg_xbar_load_gating_prod(g, true); + } + if (g->ops.cg.slcg_hshub_load_gating_prod != NULL) { + g->ops.cg.slcg_hshub_load_gating_prod(g, true); + } +} + +static void cg_init_gr_blcg_load_gating_prod(struct gk20a *g) +{ + if (g->ops.cg.blcg_bus_load_gating_prod != NULL) { + g->ops.cg.blcg_bus_load_gating_prod(g, true); + } + if (g->ops.cg.blcg_gr_load_gating_prod != NULL) { + g->ops.cg.blcg_gr_load_gating_prod(g, true); + } + if (g->ops.cg.blcg_xbar_load_gating_prod != NULL) { + g->ops.cg.blcg_xbar_load_gating_prod(g, true); + } + if (g->ops.cg.blcg_hshub_load_gating_prod != NULL) { + g->ops.cg.blcg_hshub_load_gating_prod(g, true); + } +} + +void nvgpu_cg_init_gr_load_gating_prod(struct gk20a *g) +{ + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + + if (!g->slcg_enabled) { + goto check_can_blcg; + } + + cg_init_gr_slcg_load_gating_prod(g); + +check_can_blcg: + if (!g->blcg_enabled) { + goto exit; + } + + cg_init_gr_blcg_load_gating_prod(g); + +exit: + nvgpu_mutex_release(&g->cg_pg_lock); +} + +#ifdef CONFIG_NVGPU_NON_FUSA +void nvgpu_cg_elcg_enable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + g->ops.gr.init.wait_initialized(g); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (g->elcg_enabled) { + nvgpu_cg_set_mode(g, ELCG_MODE, ELCG_AUTO); + } + nvgpu_mutex_release(&g->cg_pg_lock); +} + +void nvgpu_cg_elcg_disable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + g->ops.gr.init.wait_initialized(g); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (g->elcg_enabled) { + nvgpu_cg_set_mode(g, ELCG_MODE, ELCG_RUN); + } + nvgpu_mutex_release(&g->cg_pg_lock); + +} + +void nvgpu_cg_blcg_mode_enable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + g->ops.gr.init.wait_initialized(g); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (g->blcg_enabled) { + nvgpu_cg_set_mode(g, BLCG_MODE, BLCG_AUTO); + } + nvgpu_mutex_release(&g->cg_pg_lock); + +} + +void nvgpu_cg_blcg_mode_disable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + g->ops.gr.init.wait_initialized(g); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (g->blcg_enabled) { + nvgpu_cg_set_mode(g, BLCG_MODE, BLCG_RUN); + } + nvgpu_mutex_release(&g->cg_pg_lock); + + +} + +void nvgpu_cg_slcg_gr_perf_ltc_load_enable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + g->ops.gr.init.wait_initialized(g); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (!g->slcg_enabled) { + goto done; + } + if (g->ops.cg.slcg_ltc_load_gating_prod != NULL) { + g->ops.cg.slcg_ltc_load_gating_prod(g, true); + } + if (g->ops.cg.slcg_perf_load_gating_prod != NULL) { + g->ops.cg.slcg_perf_load_gating_prod(g, true); + } + if (g->ops.cg.slcg_gr_load_gating_prod != NULL) { + g->ops.cg.slcg_gr_load_gating_prod(g, true); + } +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} + +void nvgpu_cg_slcg_gr_perf_ltc_load_disable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + g->ops.gr.init.wait_initialized(g); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (!g->slcg_enabled) { + goto done; + } + if (g->ops.cg.slcg_gr_load_gating_prod != NULL) { + g->ops.cg.slcg_gr_load_gating_prod(g, false); + } + if (g->ops.cg.slcg_perf_load_gating_prod != NULL) { + g->ops.cg.slcg_perf_load_gating_prod(g, false); + } + if (g->ops.cg.slcg_ltc_load_gating_prod != NULL) { + g->ops.cg.slcg_ltc_load_gating_prod(g, false); + } +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} + +void nvgpu_cg_elcg_set_elcg_enabled(struct gk20a *g, bool enable) +{ + nvgpu_log_fn(g, " "); + + g->ops.gr.init.wait_initialized(g); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (enable) { + if (!g->elcg_enabled) { + g->elcg_enabled = true; + nvgpu_cg_set_mode(g, ELCG_MODE, ELCG_AUTO); + } + } else { + if (g->elcg_enabled) { + g->elcg_enabled = false; + nvgpu_cg_set_mode(g, ELCG_MODE, ELCG_RUN); + } + } +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (g->ops.cg.elcg_ce_load_gating_prod != NULL) { + g->ops.cg.elcg_ce_load_gating_prod(g, g->elcg_enabled); + } +#endif + nvgpu_mutex_release(&g->cg_pg_lock); +} + +void nvgpu_cg_blcg_set_blcg_enabled(struct gk20a *g, bool enable) +{ + bool load = false; + + nvgpu_log_fn(g, " "); + + g->ops.gr.init.wait_initialized(g); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (enable) { + if (!g->blcg_enabled) { + load = true; + g->blcg_enabled = true; + } + } else { + if (g->blcg_enabled) { + load = true; + g->blcg_enabled = false; + } + } + if (!load ) { + goto done; + } + + if (g->ops.cg.blcg_bus_load_gating_prod != NULL) { + g->ops.cg.blcg_bus_load_gating_prod(g, enable); + } + if (g->ops.cg.blcg_ce_load_gating_prod != NULL) { + g->ops.cg.blcg_ce_load_gating_prod(g, enable); + } + if (g->ops.cg.blcg_fb_load_gating_prod != NULL) { + g->ops.cg.blcg_fb_load_gating_prod(g, enable); + } + if (g->ops.cg.blcg_fifo_load_gating_prod != NULL) { + g->ops.cg.blcg_fifo_load_gating_prod(g, enable); + } + if (g->ops.cg.blcg_gr_load_gating_prod != NULL) { + g->ops.cg.blcg_gr_load_gating_prod(g, enable); + } +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (g->ops.cg.blcg_runlist_load_gating_prod != NULL) { + g->ops.cg.blcg_runlist_load_gating_prod(g, enable); + } +#endif + if (g->ops.cg.blcg_ltc_load_gating_prod != NULL) { + g->ops.cg.blcg_ltc_load_gating_prod(g, enable); + } + if (g->ops.cg.blcg_pmu_load_gating_prod != NULL) { + g->ops.cg.blcg_pmu_load_gating_prod(g, enable); + } + if (g->ops.cg.blcg_xbar_load_gating_prod != NULL) { + g->ops.cg.blcg_xbar_load_gating_prod(g, enable); + } + if (g->ops.cg.blcg_hshub_load_gating_prod != NULL) { + g->ops.cg.blcg_hshub_load_gating_prod(g, enable); + } + +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} + +void nvgpu_cg_slcg_set_slcg_enabled(struct gk20a *g, bool enable) +{ + bool load = false; + + nvgpu_log_fn(g, " "); + + g->ops.gr.init.wait_initialized(g); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (enable) { + if (!g->slcg_enabled) { + load = true; + g->slcg_enabled = true; + } + } else { + if (g->slcg_enabled) { + load = true; + g->slcg_enabled = false; + } + } + if (!load ) { + goto done; + } + + if (g->ops.cg.slcg_bus_load_gating_prod != NULL) { + g->ops.cg.slcg_bus_load_gating_prod(g, enable); + } + if (g->ops.cg.slcg_ce2_load_gating_prod != NULL) { + g->ops.cg.slcg_ce2_load_gating_prod(g, enable); + } + if (g->ops.cg.slcg_chiplet_load_gating_prod != NULL) { + g->ops.cg.slcg_chiplet_load_gating_prod(g, enable); + } + if (g->ops.cg.slcg_fb_load_gating_prod != NULL) { + g->ops.cg.slcg_fb_load_gating_prod(g, enable); + } + if (g->ops.cg.slcg_fifo_load_gating_prod != NULL) { + g->ops.cg.slcg_fifo_load_gating_prod(g, enable); + } +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (g->ops.cg.slcg_runlist_load_gating_prod != NULL) { + g->ops.cg.slcg_runlist_load_gating_prod(g, enable); + } + if (g->ops.cg.slcg_timer_load_gating_prod != NULL) { + g->ops.cg.slcg_timer_load_gating_prod(g, enable); + } +#endif + if (g->ops.cg.slcg_gr_load_gating_prod != NULL) { + g->ops.cg.slcg_gr_load_gating_prod(g, enable); + } + if (g->ops.cg.slcg_ltc_load_gating_prod != NULL) { + g->ops.cg.slcg_ltc_load_gating_prod(g, enable); + } + if (g->ops.cg.slcg_perf_load_gating_prod != NULL) { + g->ops.cg.slcg_perf_load_gating_prod(g, enable); + } + + nvgpu_cg_slcg_priring_load_prod(g, enable); + + if (g->ops.cg.slcg_pmu_load_gating_prod != NULL) { + g->ops.cg.slcg_pmu_load_gating_prod(g, enable); + } + if (g->ops.cg.slcg_xbar_load_gating_prod != NULL) { + g->ops.cg.slcg_xbar_load_gating_prod(g, enable); + } + if (g->ops.cg.slcg_hshub_load_gating_prod != NULL) { + g->ops.cg.slcg_hshub_load_gating_prod(g, enable); + } + +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} + +void nvgpu_cg_elcg_ce_load_enable(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (!g->elcg_enabled) { + goto done; + } +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + if (g->ops.cg.elcg_ce_load_gating_prod != NULL) { + g->ops.cg.elcg_ce_load_gating_prod(g, true); + } +#endif +done: + nvgpu_mutex_release(&g->cg_pg_lock); +} +#endif diff --git a/drivers/gpu/nvgpu/common/power_features/pg/pg.c b/drivers/gpu/nvgpu/common/power_features/pg/pg.c new file mode 100644 index 000000000..dd1f68fda --- /dev/null +++ b/drivers/gpu/nvgpu/common/power_features/pg/pg.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#ifdef CONFIG_NVGPU_LS_PMU +#include +#include +#endif +#include + +bool nvgpu_pg_elpg_is_enabled(struct gk20a *g) +{ + bool elpg_enabled; + + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + elpg_enabled = g->elpg_enabled; + nvgpu_mutex_release(&g->cg_pg_lock); + return elpg_enabled; +} + +int nvgpu_pg_elpg_enable(struct gk20a *g) +{ + int err = 0; +#ifdef CONFIG_NVGPU_LS_PMU + nvgpu_log_fn(g, " "); + + if (!g->can_elpg) { + return 0; + } + + g->ops.gr.init.wait_initialized(g); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (g->elpg_enabled) { + err = nvgpu_pmu_pg_global_enable(g, true); + } + nvgpu_mutex_release(&g->cg_pg_lock); +#endif + return err; +} + +int nvgpu_pg_elpg_disable(struct gk20a *g) +{ + int err = 0; +#ifdef CONFIG_NVGPU_LS_PMU + nvgpu_log_fn(g, " "); + + if (!g->can_elpg) { + return 0; + } + + g->ops.gr.init.wait_initialized(g); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (g->elpg_enabled) { + err = nvgpu_pmu_pg_global_enable(g, false); + } + nvgpu_mutex_release(&g->cg_pg_lock); +#endif + return err; +} + +int nvgpu_pg_elpg_set_elpg_enabled(struct gk20a *g, bool enable) +{ + int err = 0; + bool change_mode = false; + + nvgpu_log_fn(g, " "); + + if (!g->can_elpg) { + return 0; + } + + g->ops.gr.init.wait_initialized(g); + + nvgpu_mutex_acquire(&g->cg_pg_lock); + if (enable) { + if (!g->elpg_enabled) { + change_mode = true; + g->elpg_enabled = true; + } + } else { + if (g->elpg_enabled) { + change_mode = true; + g->elpg_enabled = false; + } + } + if (!change_mode) { + goto done; + } +#ifdef CONFIG_NVGPU_LS_PMU + err = nvgpu_pmu_pg_global_enable(g, enable); +#endif +done: + nvgpu_mutex_release(&g->cg_pg_lock); + return err; +} diff --git a/drivers/gpu/nvgpu/common/power_features/power_features.c b/drivers/gpu/nvgpu/common/power_features/power_features.c new file mode 100644 index 000000000..ec4d5c16e --- /dev/null +++ b/drivers/gpu/nvgpu/common/power_features/power_features.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +int nvgpu_cg_pg_disable(struct gk20a *g) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + g->ops.gr.init.wait_initialized(g); + +#ifdef CONFIG_NVGPU_POWER_PG + /* disable elpg before clock gating */ + err = nvgpu_pg_elpg_disable(g); + if (err != 0) { + nvgpu_err(g, "failed to set disable elpg"); + } +#endif + nvgpu_cg_slcg_gr_perf_ltc_load_disable(g); + + nvgpu_cg_blcg_mode_disable(g); + + nvgpu_cg_elcg_disable(g); + + return err; +} + +int nvgpu_cg_pg_enable(struct gk20a *g) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + g->ops.gr.init.wait_initialized(g); + + nvgpu_cg_elcg_enable(g); + + nvgpu_cg_blcg_mode_enable(g); + + nvgpu_cg_slcg_gr_perf_ltc_load_enable(g); + +#ifdef CONFIG_NVGPU_POWER_PG + err = nvgpu_pg_elpg_enable(g); + if (err != 0) { + nvgpu_err(g, "failed to set enable elpg"); + } +#endif + + return err; +} diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c new file mode 100644 index 000000000..12e40a735 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pramin.c @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * This typedef is for functions that get called during the access_batched() + * operation. + */ +typedef void (*pramin_access_batch_fn)(struct gk20a *g, u64 start, u64 words, + u32 **arg); + +/* + * The PRAMIN range is 1 MB, must change base addr if a buffer crosses that. + * This same loop is used for read/write/memset. Offset and size in bytes. + * One call to "loop" is done per range, with "arg" supplied. + */ +static void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem, + u64 offset, u64 size, pramin_access_batch_fn loop, u32 **arg) +{ + struct nvgpu_page_alloc *alloc = NULL; + struct nvgpu_sgt *sgt; + void *sgl; + u64 byteoff, start_reg, until_end, n; + + /* + * TODO: Vidmem is not accesible through pramin on shutdown path. + * driver should be refactored to prevent this from happening, but for + * now it is ok just to ignore the writes + */ + if (!gk20a_io_exists(g) && nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { + return; + } + + alloc = mem->vidmem_alloc; + sgt = &alloc->sgt; + + nvgpu_sgt_for_each_sgl(sgl, sgt) { + if (offset >= nvgpu_sgt_get_length(sgt, sgl)) { + u64 tmp_offset = nvgpu_sgt_get_length(sgt, sgl); + + nvgpu_assert(tmp_offset <= offset); + offset -= tmp_offset; + } else { + break; + } + } + + while (size != 0U) { + u64 sgl_len; + + BUG_ON(sgl == NULL); + sgl_len = nvgpu_sgt_get_length(sgt, sgl); + + nvgpu_mutex_acquire(&g->mm.pramin_window_lock); + byteoff = g->ops.bus.set_bar0_window(g, mem, sgt, sgl, + offset / sizeof(u32)); + start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32)); + until_end = U64(SZ_1M) - (byteoff & (U64(SZ_1M) - 1U)); + + n = min3(size, until_end, (sgl_len - offset)); + + loop(g, start_reg, n / sizeof(u32), arg); + + /* read back to synchronize accesses */ + (void) gk20a_readl(g, start_reg); + + nvgpu_mutex_release(&g->mm.pramin_window_lock); + + size -= n; + + if (n == (sgl_len - offset)) { + sgl = nvgpu_sgt_get_next(sgt, sgl); + offset = 0; + } else { + offset += n; + } + } +} + +static void nvgpu_pramin_access_batch_rd_n(struct gk20a *g, + u64 start, u64 words, u32 **arg) +{ + u32 *dest_u32 = *arg; + u64 r = start; + + while (words != 0U) { + words--; + *dest_u32++ = nvgpu_readl(g, r); + r += U32(sizeof(u32)); + } + + *arg = dest_u32; +} + +void nvgpu_pramin_rd_n(struct gk20a *g, struct nvgpu_mem *mem, + u64 start, u64 size, void *dest) +{ + u32 *dest_u32 = dest; + + return nvgpu_pramin_access_batched(g, mem, start, size, + nvgpu_pramin_access_batch_rd_n, &dest_u32); +} + +static void nvgpu_pramin_access_batch_wr_n(struct gk20a *g, + u64 start, u64 words, u32 **arg) +{ + u32 *src_u32 = *arg; + u64 r = start; + + while (words != 0U) { + words--; + nvgpu_writel_relaxed(g, r, *src_u32++); + r += U32(sizeof(u32)); + } + + *arg = src_u32; +} + +void nvgpu_pramin_wr_n(struct gk20a *g, struct nvgpu_mem *mem, + u64 start, u64 size, void *src) +{ + u32 *src_u32 = src; + + return nvgpu_pramin_access_batched(g, mem, start, size, + nvgpu_pramin_access_batch_wr_n, &src_u32); +} + +static void nvgpu_pramin_access_batch_set(struct gk20a *g, + u64 start, u64 words, u32 **arg) +{ + u32 repeat = **arg; + u64 r = start; + + while (words != 0U) { + words--; + nvgpu_writel_relaxed(g, r, repeat); + r += U32(sizeof(u32)); + } +} + +void nvgpu_pramin_memset(struct gk20a *g, struct nvgpu_mem *mem, + u64 start, u64 size, u32 w) +{ + u32 *p = &w; + + return nvgpu_pramin_access_batched(g, mem, start, size, + nvgpu_pramin_access_batch_set, &p); +} +void nvgpu_init_pramin(struct mm_gk20a *mm) +{ + mm->pramin_window = 0; + nvgpu_mutex_init(&mm->pramin_window_lock); +} diff --git a/drivers/gpu/nvgpu/common/profiler/pm_reservation.c b/drivers/gpu/nvgpu/common/profiler/pm_reservation.c new file mode 100644 index 000000000..adf27d92d --- /dev/null +++ b/drivers/gpu/nvgpu/common/profiler/pm_reservation.c @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +static void prepare_resource_reservation(struct gk20a *g, + enum nvgpu_profiler_pm_resource_type pm_resource, bool acquire) +{ + int err; + + if ((pm_resource != NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY) && + (pm_resource != NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM)) { + return; + } + + if (acquire) { + nvgpu_atomic_inc(&g->hwpm_refcount); + nvgpu_log(g, gpu_dbg_prof, "HWPM refcount acquired %u, resource %u", + nvgpu_atomic_read(&g->hwpm_refcount), pm_resource); + + if (nvgpu_atomic_read(&g->hwpm_refcount) == 1) { + nvgpu_log(g, gpu_dbg_prof, + "Trigger HWPM system reset, disable perf SLCG"); + err = nvgpu_mc_reset_units(g, NVGPU_UNIT_PERFMON); + if (err != 0) { + nvgpu_err(g, "Failed to reset PERFMON unit"); + } + nvgpu_cg_slcg_perf_load_enable(g, false); +#ifdef CONFIG_NVGPU_NEXT + /* + * By default, disable the PMASYS legacy mode for + * NVGPU_NEXT. + */ + if (g->ops.perf.enable_pmasys_legacy_mode != NULL) { + g->ops.perf.enable_pmasys_legacy_mode(g, false); + } +#endif + } + } else { + nvgpu_atomic_dec(&g->hwpm_refcount); + nvgpu_log(g, gpu_dbg_prof, "HWPM refcount released %u, resource %u", + nvgpu_atomic_read(&g->hwpm_refcount), pm_resource); + + if (nvgpu_atomic_read(&g->hwpm_refcount) == 0) { + nvgpu_log(g, gpu_dbg_prof, + "Trigger HWPM system reset, re-enable perf SLCG"); + err = nvgpu_mc_reset_units(g, NVGPU_UNIT_PERFMON); + if (err != 0) { + nvgpu_err(g, "Failed to reset PERFMON unit"); + } + nvgpu_cg_slcg_perf_load_enable(g, true); + } + } +} + +static bool check_pm_resource_existing_reservation_locked( + struct nvgpu_pm_resource_reservations *reservations, + u32 reservation_id, u32 vmid) +{ + struct nvgpu_pm_resource_reservation_entry *reservation_entry; + bool reserved = false; + + nvgpu_list_for_each_entry(reservation_entry, + &reservations->head, + nvgpu_pm_resource_reservation_entry, + entry) { + if ((reservation_entry->reservation_id == reservation_id) && + (reservation_entry->vmid == vmid )) { + reserved = true; + break; + } + } + + return reserved; +} + +static bool check_pm_resource_reservation_allowed_locked( + struct nvgpu_pm_resource_reservations *reservations, + enum nvgpu_profiler_pm_reservation_scope scope, + u32 reservation_id, u32 vmid) +{ + struct nvgpu_pm_resource_reservation_entry *reservation_entry; + bool allowed = false; + + switch (scope) { + case NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE: + /* + * Reservation of SCOPE_DEVICE is allowed only if there is + * no current reservation of any scope by any profiler object. + */ + if (reservations->count == 0U) { + allowed = true; + } + break; + + case NVGPU_PROFILER_PM_RESERVATION_SCOPE_CONTEXT: + /* + * Reservation of SCOPE_CONTEXT is allowed only if - + * 1. There is no current SCOPE_DEVICE reservation by any other profiler + * object. + * 2. Requesting profiler object does not already have the reservation. + */ + + if (!nvgpu_list_empty(&reservations->head)) { + reservation_entry = nvgpu_list_first_entry( + &reservations->head, + nvgpu_pm_resource_reservation_entry, + entry); + if (reservation_entry->scope == + NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE) { + break; + } + } + + if (check_pm_resource_existing_reservation_locked(reservations, + reservation_id, vmid)) { + break; + } + + allowed = true; + break; + } + + return allowed; +} + +int nvgpu_pm_reservation_acquire(struct gk20a *g, u32 reservation_id, + enum nvgpu_profiler_pm_resource_type pm_resource, + enum nvgpu_profiler_pm_reservation_scope scope, + u32 vmid) +{ + struct nvgpu_pm_resource_reservations *reservations = + &g->pm_reservations[pm_resource]; + struct nvgpu_pm_resource_reservation_entry *reservation_entry; + int err = 0; + + nvgpu_mutex_acquire(&reservations->lock); + + if (!check_pm_resource_reservation_allowed_locked(reservations, scope, + reservation_id, vmid)) { + err = -EBUSY; + goto done; + } + + reservation_entry = nvgpu_kzalloc(g, sizeof(*reservation_entry)); + if (reservation_entry == NULL) { + err = -ENOMEM; + goto done; + } + + nvgpu_init_list_node(&reservation_entry->entry); + + reservation_entry->reservation_id = reservation_id; + reservation_entry->scope = scope; + reservation_entry->vmid = vmid; + + nvgpu_list_add(&reservation_entry->entry, &reservations->head); + reservations->count++; + + prepare_resource_reservation(g, pm_resource, true); + +done: + nvgpu_mutex_release(&reservations->lock); + + return err; +} + +int nvgpu_pm_reservation_release(struct gk20a *g, u32 reservation_id, + enum nvgpu_profiler_pm_resource_type pm_resource, + u32 vmid) +{ + struct nvgpu_pm_resource_reservations *reservations = + &g->pm_reservations[pm_resource]; + struct nvgpu_pm_resource_reservation_entry *reservation_entry, *n; + bool was_reserved = false; + int err = 0; + + nvgpu_mutex_acquire(&reservations->lock); + + nvgpu_list_for_each_entry_safe(reservation_entry, n, + &reservations->head, + nvgpu_pm_resource_reservation_entry, + entry) { + if ((reservation_entry->reservation_id == reservation_id) && + (reservation_entry->vmid == vmid)) { + was_reserved = true; + nvgpu_list_del(&reservation_entry->entry); + reservations->count--; + nvgpu_kfree(g, reservation_entry); + break; + } + } + + if (was_reserved) { + prepare_resource_reservation(g, pm_resource, false); + } else { + err = -EINVAL; + } + + nvgpu_mutex_release(&reservations->lock); + + return err; +} + +void nvgpu_pm_reservation_release_all_per_vmid(struct gk20a *g, u32 vmid) +{ + struct nvgpu_pm_resource_reservations *reservations; + struct nvgpu_pm_resource_reservation_entry *reservation_entry, *n; + int i; + + for (i = 0; i < NVGPU_PROFILER_PM_RESOURCE_TYPE_COUNT; i++) { + reservations = &g->pm_reservations[i]; + + nvgpu_mutex_acquire(&reservations->lock); + nvgpu_list_for_each_entry_safe(reservation_entry, n, + &reservations->head, + nvgpu_pm_resource_reservation_entry, + entry) { + if (reservation_entry->vmid == vmid) { + nvgpu_list_del(&reservation_entry->entry); + reservations->count--; + nvgpu_kfree(g, reservation_entry); + prepare_resource_reservation(g, i, false); + } + } + nvgpu_mutex_release(&reservations->lock); + } +} + +int nvgpu_pm_reservation_init(struct gk20a *g) +{ + struct nvgpu_pm_resource_reservations *reservations; + int i; + + nvgpu_log(g, gpu_dbg_prof, " "); + + if (g->pm_reservations) { + return 0; + } + + reservations = nvgpu_kzalloc(g, sizeof(*reservations) * + NVGPU_PROFILER_PM_RESOURCE_TYPE_COUNT); + if (reservations == NULL) { + return -ENOMEM; + } + + for (i = 0; i < NVGPU_PROFILER_PM_RESOURCE_TYPE_COUNT; i++) { + nvgpu_init_list_node(&reservations[i].head); + nvgpu_mutex_init(&reservations[i].lock); + } + + g->pm_reservations = reservations; + + nvgpu_atomic_set(&g->hwpm_refcount, 0); + + nvgpu_log(g, gpu_dbg_prof, "initialized"); + + return 0; +} + +void nvgpu_pm_reservation_deinit(struct gk20a *g) +{ + nvgpu_kfree(g, g->pm_reservations); +} diff --git a/drivers/gpu/nvgpu/common/profiler/profiler.c b/drivers/gpu/nvgpu/common/profiler/profiler.c new file mode 100644 index 000000000..28e73a83c --- /dev/null +++ b/drivers/gpu/nvgpu/common/profiler/profiler.c @@ -0,0 +1,1159 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) +#include "nvgpu_next_gpuid.h" +#endif + +static int nvgpu_profiler_build_regops_allowlist(struct nvgpu_profiler_object *prof); +static void nvgpu_profiler_destroy_regops_allowlist(struct nvgpu_profiler_object *prof); + +static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0); +static int generate_unique_id(void) +{ + return nvgpu_atomic_add_return(1, &unique_id); +} + +int nvgpu_profiler_alloc(struct gk20a *g, + struct nvgpu_profiler_object **_prof, + enum nvgpu_profiler_pm_reservation_scope scope, + u32 gpu_instance_id) +{ + struct nvgpu_profiler_object *prof; + *_prof = NULL; + + nvgpu_log(g, gpu_dbg_prof, " "); + + prof = nvgpu_kzalloc(g, sizeof(*prof)); + if (prof == NULL) { + return -ENOMEM; + } + + prof->prof_handle = generate_unique_id(); + prof->scope = scope; + prof->gpu_instance_id = gpu_instance_id; + prof->g = g; + + nvgpu_mutex_init(&prof->ioctl_lock); + nvgpu_init_list_node(&prof->prof_obj_entry); + nvgpu_list_add(&prof->prof_obj_entry, &g->profiler_objects); + + nvgpu_log(g, gpu_dbg_prof, "Allocated profiler handle %u", + prof->prof_handle); + + *_prof = prof; + return 0; +} + +void nvgpu_profiler_free(struct nvgpu_profiler_object *prof) +{ + struct gk20a *g = prof->g; + + nvgpu_log(g, gpu_dbg_prof, "Free profiler handle %u", + prof->prof_handle); + + nvgpu_profiler_unbind_context(prof); + nvgpu_profiler_free_pma_stream(prof); + + nvgpu_list_del(&prof->prof_obj_entry); + prof->gpu_instance_id = 0U; + nvgpu_kfree(g, prof); +} + +int nvgpu_profiler_bind_context(struct nvgpu_profiler_object *prof, + struct nvgpu_tsg *tsg) +{ + struct gk20a *g = prof->g; + + nvgpu_log(g, gpu_dbg_prof, "Request to bind tsgid %u with profiler handle %u", + tsg->tsgid, prof->prof_handle); + + if (tsg->prof != NULL) { + nvgpu_err(g, "TSG %u is already bound", tsg->tsgid); + return -EINVAL; + } + + if (prof->tsg != NULL) { + nvgpu_err(g, "Profiler object %u already bound!", prof->prof_handle); + return -EINVAL; + } + + prof->tsg = tsg; + tsg->prof = prof; + + nvgpu_log(g, gpu_dbg_prof, "Bind tsgid %u with profiler handle %u successful", + tsg->tsgid, prof->prof_handle); + + prof->context_init = true; + return 0; +} + +int nvgpu_profiler_unbind_context(struct nvgpu_profiler_object *prof) +{ + struct gk20a *g = prof->g; + struct nvgpu_tsg *tsg = prof->tsg; + int i; + + if (prof->bound) { + nvgpu_warn(g, "Unbinding resources for handle %u", + prof->prof_handle); + nvgpu_profiler_unbind_pm_resources(prof); + } + + for (i = 0; i < NVGPU_PROFILER_PM_RESOURCE_TYPE_COUNT; i++) { + if (prof->reserved[i]) { + nvgpu_warn(g, "Releasing reserved resource %u for handle %u", + i, prof->prof_handle); + nvgpu_profiler_pm_resource_release(prof, i); + } + } + + if (!prof->context_init) { + return -EINVAL; + } + + if (tsg != NULL) { + tsg->prof = NULL; + prof->tsg = NULL; + + nvgpu_log(g, gpu_dbg_prof, "Unbind profiler handle %u and tsgid %u", + prof->prof_handle, tsg->tsgid); + } + + prof->context_init = false; + return 0; +} + +int nvgpu_profiler_pm_resource_reserve(struct nvgpu_profiler_object *prof, + enum nvgpu_profiler_pm_resource_type pm_resource) +{ + struct gk20a *g = prof->g; + enum nvgpu_profiler_pm_reservation_scope scope = prof->scope; + u32 reservation_id = prof->prof_handle; + int err; + + nvgpu_log(g, gpu_dbg_prof, + "Request reservation for profiler handle %u, resource %u, scope %u", + prof->prof_handle, pm_resource, prof->scope); + + if (prof->reserved[pm_resource]) { + nvgpu_err(g, "Profiler handle %u already has the reservation", + prof->prof_handle); + return -EEXIST; + } + + if (prof->bound) { + nvgpu_log(g, gpu_dbg_prof, + "PM resources alredy bound with profiler handle %u," + " unbinding for new reservation", + prof->prof_handle); + err = nvgpu_profiler_unbind_pm_resources(prof); + if (err != 0) { + nvgpu_err(g, "Profiler handle %u failed to unbound, err %d", + prof->prof_handle, err); + return err; + } + } + + err = g->ops.pm_reservation.acquire(g, reservation_id, pm_resource, + scope, 0); + if (err != 0) { + nvgpu_err(g, "Profiler handle %u denied the reservation, err %d", + prof->prof_handle, err); + return err; + } + + prof->reserved[pm_resource] = true; + + if (pm_resource == NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC) { + if (prof->ctxsw[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC]) { + prof->reg_op_type[NVGPU_HWPM_REGISTER_TYPE_SMPC] = + NVGPU_DBG_REG_OP_TYPE_GR_CTX; + } else { + prof->reg_op_type[NVGPU_HWPM_REGISTER_TYPE_SMPC] = + NVGPU_DBG_REG_OP_TYPE_GLOBAL; + } + } + + if (pm_resource == NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY) { + if (prof->ctxsw[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) { + prof->reg_op_type[NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMON] = + NVGPU_DBG_REG_OP_TYPE_GR_CTX; + prof->reg_op_type[NVGPU_HWPM_REGISTER_TYPE_HWPM_ROUTER] = + NVGPU_DBG_REG_OP_TYPE_GR_CTX; + prof->reg_op_type[NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_TRIGGER] = + NVGPU_DBG_REG_OP_TYPE_GR_CTX; + prof->reg_op_type[NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMUX] = + NVGPU_DBG_REG_OP_TYPE_GR_CTX; + prof->reg_op_type[NVGPU_HWPM_REGISTER_TYPE_CAU] = + NVGPU_DBG_REG_OP_TYPE_GR_CTX; + } else { + prof->reg_op_type[NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMON] = + NVGPU_DBG_REG_OP_TYPE_GLOBAL; + prof->reg_op_type[NVGPU_HWPM_REGISTER_TYPE_HWPM_ROUTER] = + NVGPU_DBG_REG_OP_TYPE_GLOBAL; + prof->reg_op_type[NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_TRIGGER] = + NVGPU_DBG_REG_OP_TYPE_GLOBAL; + prof->reg_op_type[NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMUX] = + NVGPU_DBG_REG_OP_TYPE_GLOBAL; + prof->reg_op_type[NVGPU_HWPM_REGISTER_TYPE_CAU] = + NVGPU_DBG_REG_OP_TYPE_GLOBAL; + } + } + + if (pm_resource == NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM) { + prof->reg_op_type[NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_CHANNEL] = + NVGPU_DBG_REG_OP_TYPE_GLOBAL; + } + + nvgpu_log(g, gpu_dbg_prof, + "Granted reservation for profiler handle %u, resource %u, scope %u", + prof->prof_handle, pm_resource, prof->scope); + + return 0; +} + +int nvgpu_profiler_pm_resource_release(struct nvgpu_profiler_object *prof, + enum nvgpu_profiler_pm_resource_type pm_resource) +{ + struct gk20a *g = prof->g; + u32 reservation_id = prof->prof_handle; + int err; + + nvgpu_log(g, gpu_dbg_prof, + "Release reservation for profiler handle %u, resource %u, scope %u", + prof->prof_handle, pm_resource, prof->scope); + + if (!prof->reserved[pm_resource]) { + nvgpu_log(g, gpu_dbg_prof, + "Profiler handle %u resource is not reserved", + prof->prof_handle); + return -EINVAL; + } + + if (prof->bound) { + nvgpu_log(g, gpu_dbg_prof, + "PM resources alredy bound with profiler handle %u," + " unbinding for reservation release", + prof->prof_handle); + err = nvgpu_profiler_unbind_pm_resources(prof); + if (err != 0) { + nvgpu_err(g, "Profiler handle %u failed to unbound, err %d", + prof->prof_handle, err); + return err; + } + } + + err = g->ops.pm_reservation.release(g, reservation_id, pm_resource, 0); + if (err != 0) { + nvgpu_err(g, "Profiler handle %u does not have valid reservation, err %d", + prof->prof_handle, err); + prof->reserved[pm_resource] = false; + return err; + } + + prof->reserved[pm_resource] = false; + + nvgpu_log(g, gpu_dbg_prof, + "Released reservation for profiler handle %u, resource %u, scope %u", + prof->prof_handle, pm_resource, prof->scope); + + return 0; +} + +static bool nvgpu_profiler_is_context_resource( + struct nvgpu_profiler_object *prof, + enum nvgpu_profiler_pm_resource_type pm_resource) +{ + return (prof->scope != NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE) || + prof->ctxsw[pm_resource]; +} + +int nvgpu_profiler_bind_smpc(struct gk20a *g, + u32 gr_instance_id, + bool is_ctxsw, + struct nvgpu_tsg *tsg) +{ + int err = 0; + + if (!is_ctxsw) { + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SMPC_GLOBAL_MODE)) { + err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id, + g->ops.gr.update_smpc_global_mode(g, true)); + } else { + err = -EINVAL; + } + } else { + err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id, + g->ops.gr.update_smpc_ctxsw_mode(g, tsg, true)); + if (err != 0) { + goto done; + } + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SMPC_GLOBAL_MODE)) { + err = g->ops.gr.update_smpc_global_mode(g, false); + } + } + +done: + if (err != 0) { + nvgpu_err(g, "nvgpu bind smpc failed, err=%d", err); + } + return err; +} + +int nvgpu_profiler_unbind_smpc(struct gk20a *g, bool is_ctxsw, + struct nvgpu_tsg *tsg) +{ + int err; + + if (!is_ctxsw) { + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SMPC_GLOBAL_MODE)) { + err = g->ops.gr.update_smpc_global_mode(g, false); + } else { + err = -EINVAL; + } + } else { + err = g->ops.gr.update_smpc_ctxsw_mode(g, tsg, false); + } + + if (err != 0) { + nvgpu_err(g, "nvgpu unbind smpc failed, err=%d", err); + } + return err; +} + +static int nvgpu_profiler_bind_hwpm_common(struct gk20a *g, u32 gr_instance_id, + bool is_ctxsw, struct nvgpu_tsg *tsg, bool streamout) +{ + int err = 0; + u32 mode = streamout ? NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW : + NVGPU_GR_CTX_HWPM_CTXSW_MODE_CTXSW; + + if (!is_ctxsw) { + if (g->ops.gr.init_cau != NULL) { + nvgpu_gr_exec_for_instance(g, gr_instance_id, + g->ops.gr.init_cau(g)); + } + if (g->ops.perf.reset_hwpm_pmm_registers != NULL) { + g->ops.perf.reset_hwpm_pmm_registers(g); + } + g->ops.perf.init_hwpm_pmm_register(g); + } else { + err = g->ops.gr.update_hwpm_ctxsw_mode( + g, gr_instance_id, tsg, 0, mode); + } + + return err; +} + +int nvgpu_profiler_bind_hwpm(struct gk20a *g, u32 gr_instance_id, + bool is_ctxsw, struct nvgpu_tsg *tsg) +{ + return nvgpu_profiler_bind_hwpm_common(g, gr_instance_id, is_ctxsw, + tsg, false); +} + +int nvgpu_profiler_unbind_hwpm(struct gk20a *g, u32 gr_instance_id, + bool is_ctxsw, struct nvgpu_tsg *tsg) +{ + int err = 0; + u32 mode = NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW; + + if (is_ctxsw) { + err = g->ops.gr.update_hwpm_ctxsw_mode( + g, gr_instance_id, tsg, 0, mode); + } + + return err; +} + +static void nvgpu_profiler_disable_cau_and_smpc(struct gk20a *g) +{ + /* Disable CAUs */ + if (g->ops.gr.disable_cau != NULL) { + g->ops.gr.disable_cau(g); + } + + /* Disable SMPC */ + if (g->ops.gr.disable_smpc != NULL) { + g->ops.gr.disable_smpc(g); + } +} + +static int nvgpu_profiler_quiesce_hwpm_streamout_resident(struct gk20a *g, + u32 gr_instance_id, + void *pma_bytes_available_buffer_cpuva, + bool smpc_reserved) +{ + u64 bytes_available; + int err = 0; + + nvgpu_log(g, gpu_dbg_prof, + "HWPM streamout quiesce in resident state started"); + + /* Enable streamout */ + g->ops.perf.pma_stream_enable(g, true); + + /* Disable all perfmons */ + g->ops.perf.disable_all_perfmons(g); + + if (smpc_reserved) { + nvgpu_gr_exec_for_instance(g, gr_instance_id, + nvgpu_profiler_disable_cau_and_smpc(g)); + } + + /* Wait for routers to idle/quiescent */ + err = g->ops.perf.wait_for_idle_pmm_routers(g); + if (err != 0) { + goto fail; + } + + /* Wait for PMA to idle/quiescent */ + err = g->ops.perf.wait_for_idle_pma(g); + if (err != 0) { + goto fail; + } + +#ifdef CONFIG_NVGPU_NEXT + NVGPU_NEXT_PROFILER_QUIESCE(g); +#endif + + /* Disable streamout */ + g->ops.perf.pma_stream_enable(g, false); + + /* wait for all the inflight records from fb-hub to stream out */ + err = nvgpu_perfbuf_update_get_put(g, 0U, &bytes_available, + pma_bytes_available_buffer_cpuva, true, + NULL, NULL); + +fail: + if (err != 0) { + nvgpu_err(g, "Failed to quiesce HWPM streamout in resident state"); + } else { + nvgpu_log(g, gpu_dbg_prof, + "HWPM streamout quiesce in resident state successfull"); + } + + return 0; +} + +static int nvgpu_profiler_quiesce_hwpm_streamout_non_resident(struct gk20a *g, + struct nvgpu_tsg *tsg) +{ + struct nvgpu_mem *pm_ctx_mem; + + nvgpu_log(g, gpu_dbg_prof, + "HWPM streamout quiesce in non-resident state started"); + + if (tsg == NULL || tsg->gr_ctx == NULL) { + return -EINVAL; + } + + pm_ctx_mem = nvgpu_gr_ctx_get_pm_ctx_mem(tsg->gr_ctx); + if (pm_ctx_mem == NULL) { + nvgpu_err(g, "No PM context"); + return -EINVAL; + } + + nvgpu_memset(g, pm_ctx_mem, 0U, 0U, pm_ctx_mem->size); + nvgpu_log(g, gpu_dbg_prof, + "HWPM streamout quiesce in non-resident state successfull"); + + return 0; +} + +static int nvgpu_profiler_disable_ctxsw_and_check_is_tsg_ctx_resident( + struct nvgpu_tsg *tsg) +{ + struct gk20a *g = tsg->g; + int err; + + err = nvgpu_gr_disable_ctxsw(g); + if (err != 0) { + nvgpu_err(g, "unable to stop gr ctxsw"); + return err; + } + + return g->ops.gr.is_tsg_ctx_resident(tsg); +} + +static int nvgpu_profiler_quiesce_hwpm_streamout_ctx(struct gk20a *g, + u32 gr_instance_id, + struct nvgpu_tsg *tsg, + void *pma_bytes_available_buffer_cpuva, + bool smpc_reserved) +{ + bool ctx_resident; + int err, ctxsw_err; + + ctx_resident = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id, + nvgpu_profiler_disable_ctxsw_and_check_is_tsg_ctx_resident(tsg)); + + if (ctx_resident) { + err = nvgpu_profiler_quiesce_hwpm_streamout_resident(g, + gr_instance_id, + pma_bytes_available_buffer_cpuva, + smpc_reserved); + } else { + err = nvgpu_profiler_quiesce_hwpm_streamout_non_resident(g, tsg); + } + if (err != 0) { + nvgpu_err(g, "Failed to quiesce HWPM streamout"); + } + + ctxsw_err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id, + nvgpu_gr_enable_ctxsw(g)); + if (ctxsw_err != 0) { + nvgpu_err(g, "unable to restart ctxsw!"); + err = ctxsw_err; + } + + return err; +} + +static int nvgpu_profiler_quiesce_hwpm_streamout(struct gk20a *g, + u32 gr_instance_id, + bool is_ctxsw, + struct nvgpu_tsg *tsg, + void *pma_bytes_available_buffer_cpuva, + bool smpc_reserved) +{ + if (!is_ctxsw) { + return nvgpu_profiler_quiesce_hwpm_streamout_resident(g, + gr_instance_id, + pma_bytes_available_buffer_cpuva, + smpc_reserved); + } else { + return nvgpu_profiler_quiesce_hwpm_streamout_ctx(g, + gr_instance_id, + tsg, + pma_bytes_available_buffer_cpuva, + smpc_reserved); + } +} + +int nvgpu_profiler_bind_hwpm_streamout(struct gk20a *g, + u32 gr_instance_id, + bool is_ctxsw, + struct nvgpu_tsg *tsg, + u64 pma_buffer_va, + u32 pma_buffer_size, + u64 pma_bytes_available_buffer_va) +{ + int err; + + err = nvgpu_profiler_bind_hwpm_common(g, gr_instance_id, is_ctxsw, tsg, true); + if (err) { + return err; + } + + err = g->ops.perfbuf.perfbuf_enable(g, pma_buffer_va, pma_buffer_size); + if (err) { + nvgpu_profiler_unbind_hwpm(g, gr_instance_id, is_ctxsw, tsg); + return err; + } + + g->ops.perf.bind_mem_bytes_buffer_addr(g, pma_bytes_available_buffer_va); + return 0; +} + +int nvgpu_profiler_unbind_hwpm_streamout(struct gk20a *g, + u32 gr_instance_id, + bool is_ctxsw, + struct nvgpu_tsg *tsg, + void *pma_bytes_available_buffer_cpuva, + bool smpc_reserved) +{ + int err; + + err = nvgpu_profiler_quiesce_hwpm_streamout(g, + gr_instance_id, + is_ctxsw, tsg, + pma_bytes_available_buffer_cpuva, + smpc_reserved); + if (err) { + return err; + } + + g->ops.perf.bind_mem_bytes_buffer_addr(g, 0ULL); + + err = g->ops.perfbuf.perfbuf_disable(g); + if (err) { + return err; + } + + err = nvgpu_profiler_unbind_hwpm(g, gr_instance_id, is_ctxsw, tsg); + if (err) { + return err; + } + + return 0; +} + +int nvgpu_profiler_bind_pm_resources(struct nvgpu_profiler_object *prof) +{ + struct gk20a *g = prof->g; + bool is_ctxsw; + int err; + u32 gr_instance_id; + + nvgpu_log(g, gpu_dbg_prof, + "Request to bind PM resources with profiler handle %u", + prof->prof_handle); + + if (prof->bound) { + nvgpu_err(g, "PM resources are already bound with profiler handle %u", + prof->prof_handle); + return -EINVAL; + } + + if (!prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY] && + !prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC]) { + nvgpu_err(g, "No PM resources reserved for profiler handle %u", + prof->prof_handle); + return -EINVAL; + } + + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to poweron"); + return err; + } + + gr_instance_id = nvgpu_grmgr_get_gr_instance_id(g, prof->gpu_instance_id); + + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) { + is_ctxsw = nvgpu_profiler_is_context_resource(prof, + NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY); + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM]) { + err = g->ops.profiler.bind_hwpm_streamout(g, + gr_instance_id, + is_ctxsw, + prof->tsg, + prof->pma_buffer_va, + prof->pma_buffer_size, + prof->pma_bytes_available_buffer_va); + if (err != 0) { + nvgpu_err(g, + "failed to bind HWPM streamout with profiler handle %u", + prof->prof_handle); + goto fail; + } + + nvgpu_log(g, gpu_dbg_prof, + "HWPM streamout bound with profiler handle %u", + prof->prof_handle); + } else { + err = g->ops.profiler.bind_hwpm(prof->g, gr_instance_id, + is_ctxsw, prof->tsg); + if (err != 0) { + nvgpu_err(g, + "failed to bind HWPM with profiler handle %u", + prof->prof_handle); + goto fail; + } + + nvgpu_log(g, gpu_dbg_prof, + "HWPM bound with profiler handle %u", + prof->prof_handle); + } + } + + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC]) { + is_ctxsw = nvgpu_profiler_is_context_resource(prof, + NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC); + err = g->ops.profiler.bind_smpc(g, gr_instance_id, + is_ctxsw, prof->tsg); + if (err) { + nvgpu_err(g, "failed to bind SMPC with profiler handle %u", + prof->prof_handle); + goto fail; + } + + nvgpu_log(g, gpu_dbg_prof, + "SMPC bound with profiler handle %u", prof->prof_handle); + } + + err = nvgpu_profiler_build_regops_allowlist(prof); + if (err != 0) { + nvgpu_err(g, "failed to build allowlist"); + goto fail_unbind; + } + + prof->bound = true; + + gk20a_idle(g); + return 0; + +fail_unbind: + nvgpu_profiler_unbind_pm_resources(prof); +fail: + gk20a_idle(g); + return err; +} + +int nvgpu_profiler_unbind_pm_resources(struct nvgpu_profiler_object *prof) +{ + struct gk20a *g = prof->g; + bool is_ctxsw; + int err; + u32 gr_instance_id; + + if (!prof->bound) { + nvgpu_err(g, "No PM resources bound to profiler handle %u", + prof->prof_handle); + return -EINVAL; + } + + nvgpu_profiler_destroy_regops_allowlist(prof); + + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to poweron"); + return err; + } + + gr_instance_id = nvgpu_grmgr_get_gr_instance_id(g, prof->gpu_instance_id); + + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) { + is_ctxsw = nvgpu_profiler_is_context_resource(prof, + NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY); + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM]) { + err = g->ops.profiler.unbind_hwpm_streamout(g, + gr_instance_id, + is_ctxsw, + prof->tsg, + prof->pma_bytes_available_buffer_cpuva, + prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC]); + if (err) { + nvgpu_err(g, + "failed to unbind HWPM streamout from profiler handle %u", + prof->prof_handle); + goto fail; + } + + nvgpu_log(g, gpu_dbg_prof, + "HWPM streamout unbound from profiler handle %u", + prof->prof_handle); + } else { + err = g->ops.profiler.unbind_hwpm(g, gr_instance_id, + is_ctxsw, prof->tsg); + if (err) { + nvgpu_err(g, + "failed to unbind HWPM from profiler handle %u", + prof->prof_handle); + goto fail; + } + + nvgpu_log(g, gpu_dbg_prof, + "HWPM unbound from profiler handle %u", + prof->prof_handle); + } + } + + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC]) { + is_ctxsw = nvgpu_profiler_is_context_resource(prof, + NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC); + err = g->ops.profiler.unbind_smpc(g, is_ctxsw, prof->tsg); + if (err) { + nvgpu_err(g, + "failed to unbind SMPC from profiler handle %u", + prof->prof_handle); + goto fail; + } + + nvgpu_log(g, gpu_dbg_prof, + "SMPC unbound from profiler handle %u", prof->prof_handle); + } + + prof->bound = false; + +fail: + gk20a_idle(g); + return err; +} + +int nvgpu_profiler_alloc_pma_stream(struct nvgpu_profiler_object *prof) +{ + struct gk20a *g = prof->g; + int err; + + err = nvgpu_profiler_pm_resource_reserve(prof, + NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM); + if (err) { + nvgpu_err(g, "failed to reserve PMA stream"); + return err; + } + + err = nvgpu_perfbuf_init_vm(g); + if (err) { + nvgpu_err(g, "failed to initialize perfbuf VM"); + nvgpu_profiler_pm_resource_release(prof, + NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM); + return err; + } + + return 0; +} + +void nvgpu_profiler_free_pma_stream(struct nvgpu_profiler_object *prof) +{ + struct gk20a *g = prof->g; + + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM]) { + nvgpu_perfbuf_deinit_vm(g); + nvgpu_profiler_pm_resource_release(prof, + NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM); + } +} + +static int map_cmp(const void *a, const void *b) +{ + const struct nvgpu_pm_resource_register_range_map *e1; + const struct nvgpu_pm_resource_register_range_map *e2; + + e1 = (const struct nvgpu_pm_resource_register_range_map *)a; + e2 = (const struct nvgpu_pm_resource_register_range_map *)b; + + if (e1->start < e2->start) { + return -1; + } + + if (e1->start > e2->start) { + return 1; + } + + return 0; +} + +static u32 get_pm_resource_register_range_map_entry_count(struct nvgpu_profiler_object *prof) +{ + struct gk20a *g = prof->g; + u32 count = 0U; + u32 range_count; + + /* Account for TYPE_TEST entries added in add_test_range_to_map() */ + count += 2U; + + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC]) { + g->ops.regops.get_smpc_register_ranges(&range_count); + count += range_count; + } + + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) { + g->ops.regops.get_hwpm_perfmon_register_ranges(&range_count); + count += range_count; + + g->ops.regops.get_hwpm_router_register_ranges(&range_count); + count += range_count; + + g->ops.regops.get_hwpm_pma_trigger_register_ranges(&range_count); + count += range_count; + + g->ops.regops.get_hwpm_perfmux_register_ranges(&range_count); + count += range_count; + + if (g->ops.regops.get_cau_register_ranges != NULL) { + g->ops.regops.get_cau_register_ranges(&range_count); + count += range_count; + } + } + + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM]) { + g->ops.regops.get_hwpm_pma_channel_register_ranges(&range_count); + count += range_count; + } + + return count; +} + +static void add_range_to_map(const struct nvgpu_pm_resource_register_range *range, + u32 range_count, struct nvgpu_pm_resource_register_range_map *map, + u32 *map_index, enum nvgpu_pm_resource_hwpm_register_type type) +{ + u32 index = *map_index; + u32 i; + + for (i = 0U; i < range_count; i++) { + map[index].start = range[i].start; + map[index].end = range[i].end; + map[index].type = type; + index++; + } + + *map_index = index; +} + +static void add_test_range_to_map(struct gk20a *g, + struct nvgpu_pm_resource_register_range_map *map, + u32 *map_index, enum nvgpu_pm_resource_hwpm_register_type type) +{ + u32 index = *map_index; + u32 timer0_offset, timer1_offset; + + g->ops.ptimer.get_timer_reg_offsets(&timer0_offset, &timer1_offset); + + map[index].start = timer0_offset; + map[index].end = timer0_offset; + map[index].type = type; + index++; + + map[index].start = timer1_offset; + map[index].end = timer1_offset; + map[index].type = type; + index++; + + *map_index = index; +} + +static int nvgpu_profiler_build_regops_allowlist(struct nvgpu_profiler_object *prof) +{ + struct nvgpu_pm_resource_register_range_map *map; + const struct nvgpu_pm_resource_register_range *range; + u32 map_count, map_index = 0U; + u32 range_count; + struct gk20a *g = prof->g; + u32 i; + + map_count = get_pm_resource_register_range_map_entry_count(prof); + if (map_count == 0U) { + return -EINVAL; + } + + nvgpu_log(g, gpu_dbg_prof, "Allowlist map number of entries %u for handle %u", + map_count, prof->prof_handle); + + map = nvgpu_kzalloc(g, sizeof(*map) * map_count); + if (map == NULL) { + return -ENOMEM; + } + + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC]) { + range = g->ops.regops.get_smpc_register_ranges(&range_count); + add_range_to_map(range, range_count, map, &map_index, + NVGPU_HWPM_REGISTER_TYPE_SMPC); + } + + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) { + range = g->ops.regops.get_hwpm_perfmon_register_ranges(&range_count); + add_range_to_map(range, range_count, map, &map_index, + NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMON); + + range = g->ops.regops.get_hwpm_router_register_ranges(&range_count); + add_range_to_map(range, range_count, map, &map_index, + NVGPU_HWPM_REGISTER_TYPE_HWPM_ROUTER); + + range = g->ops.regops.get_hwpm_pma_trigger_register_ranges(&range_count); + add_range_to_map(range, range_count, map, &map_index, + NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_TRIGGER); + + range = g->ops.regops.get_hwpm_perfmux_register_ranges(&range_count); + add_range_to_map(range, range_count, map, &map_index, + NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMUX); + + if (g->ops.regops.get_cau_register_ranges != NULL) { + range = g->ops.regops.get_cau_register_ranges(&range_count); + add_range_to_map(range, range_count, map, &map_index, + NVGPU_HWPM_REGISTER_TYPE_CAU); + } + } + + if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM]) { + range = g->ops.regops.get_hwpm_pma_channel_register_ranges(&range_count); + add_range_to_map(range, range_count, map, &map_index, + NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_CHANNEL); + } + + add_test_range_to_map(g, map, &map_index, NVGPU_HWPM_REGISTER_TYPE_TEST); + + nvgpu_log(g, gpu_dbg_prof, "Allowlist map created successfully for handle %u", + prof->prof_handle); + + nvgpu_assert(map_count == map_index); + + sort(map, map_count, sizeof(*map), map_cmp, NULL); + + for (i = 0; i < map_count; i++) { + nvgpu_log(g, gpu_dbg_prof, "allowlist[%u]: 0x%x-0x%x : type %u", + i, map[i].start, map[i].end, map[i].type); + } + + prof->map = map; + prof->map_count = map_count; + return 0; +} + +static void nvgpu_profiler_destroy_regops_allowlist(struct nvgpu_profiler_object *prof) +{ + nvgpu_log(prof->g, gpu_dbg_prof, "Allowlist map destroy for handle %u", + prof->prof_handle); + + nvgpu_kfree(prof->g, prof->map); +} + +static bool allowlist_range_search(struct gk20a *g, + struct nvgpu_pm_resource_register_range_map *map, + u32 map_count, u32 offset, + enum nvgpu_pm_resource_hwpm_register_type *type) +{ + u32 start = 0U; + u32 mid = 0U; + u32 end = map_count - 1U; + bool found = false; + + while (start <= end) { + mid = (start + end) / 2U; + + if (offset < map[mid].start) { + end = mid - 1U; + } else if (offset > map[mid].end) { + start = mid + 1U; + } else { + found = true; + break; + } + } + + if (found) { + *type = map[mid].type; + nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x found in range 0x%x-0x%x, type: %u", + offset, map[mid].start, map[mid].end, map[mid].type); + } else { + nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x not found in range search", offset); + } + + return found; +} + +static bool allowlist_offset_search(struct gk20a *g, + const u32 *offset_allowlist, u32 count, u32 offset) +{ + u32 start = 0U; + u32 mid = 0U; + u32 end = count - 1U; + bool found = false; + + while (start <= end) { + mid = (start + end) / 2U; + if (offset_allowlist[mid] == offset) { + found = true; + break; + } + + if (offset < offset_allowlist[mid]) { + end = mid - 1U; + } else { + start = mid + 1U; + } + } + + if (found) { + nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x found in offset allowlist", + offset); + } else { + nvgpu_log(g, gpu_dbg_prof, "Offset 0x%x not found in offset allowlist", + offset); + } + + return found; +} + +bool nvgpu_profiler_validate_regops_allowlist(struct nvgpu_profiler_object *prof, + u32 offset, enum nvgpu_pm_resource_hwpm_register_type *type) +{ + enum nvgpu_pm_resource_hwpm_register_type reg_type; + struct gk20a *g = prof->g; + const u32 *offset_allowlist; + u32 count; + u32 stride; + bool found; + + found = allowlist_range_search(g, prof->map, prof->map_count, offset, ®_type); + if (!found) { + return found; + } + + if (type != NULL) { + *type = reg_type; + } + + if ((reg_type == NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMUX) || + (reg_type == NVGPU_HWPM_REGISTER_TYPE_TEST)) { + return found; + } + + switch ((u32)reg_type) { + case NVGPU_HWPM_REGISTER_TYPE_HWPM_PERFMON: + offset_allowlist = g->ops.regops.get_hwpm_perfmon_register_offset_allowlist(&count); + stride = g->ops.regops.get_hwpm_perfmon_register_stride(); + break; + + case NVGPU_HWPM_REGISTER_TYPE_HWPM_ROUTER: + offset_allowlist = g->ops.regops.get_hwpm_router_register_offset_allowlist(&count); + stride = g->ops.regops.get_hwpm_router_register_stride(); + break; + + case NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_TRIGGER: + offset_allowlist = g->ops.regops.get_hwpm_pma_trigger_register_offset_allowlist(&count); + stride = g->ops.regops.get_hwpm_pma_trigger_register_stride(); + break; + + case NVGPU_HWPM_REGISTER_TYPE_SMPC: + offset_allowlist = g->ops.regops.get_smpc_register_offset_allowlist(&count); + stride = g->ops.regops.get_smpc_register_stride(); + break; + + case NVGPU_HWPM_REGISTER_TYPE_CAU: + offset_allowlist = g->ops.regops.get_cau_register_offset_allowlist(&count); + stride = g->ops.regops.get_cau_register_stride(); + break; + + case NVGPU_HWPM_REGISTER_TYPE_HWPM_PMA_CHANNEL: + offset_allowlist = g->ops.regops.get_hwpm_pma_channel_register_offset_allowlist(&count); + stride = g->ops.regops.get_hwpm_pma_channel_register_stride(); + break; + + default: + return false; + } + + offset = offset & (stride - 1U); + return allowlist_offset_search(g, offset_allowlist, count, offset); +} diff --git a/drivers/gpu/nvgpu/common/ptimer/ptimer.c b/drivers/gpu/nvgpu/common/ptimer/ptimer.c new file mode 100644 index 000000000..1d9bbf3a2 --- /dev/null +++ b/drivers/gpu/nvgpu/common/ptimer/ptimer.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA +#include +#include +#include +#include +#include + +int nvgpu_ptimer_init(struct gk20a *g) +{ +#if defined(CONFIG_NVGPU_NEXT) + nvgpu_cg_slcg_timer_load_enable(g); +#endif + + return 0; +} + +int nvgpu_get_timestamps_zipper(struct gk20a *g, + u32 source_id, u32 count, + struct nvgpu_cpu_time_correlation_sample *samples) +{ + int err = 0; + unsigned int i = 0; + + if (gk20a_busy(g) != 0) { + nvgpu_err(g, "GPU not powered on\n"); + err = -EINVAL; + goto end; + } + + for (i = 0; i < count; i++) { + err = g->ops.ptimer.read_ptimer(g, &samples[i].gpu_timestamp); + if (err != 0) { + return err; + } + + samples[i].cpu_timestamp = nvgpu_hr_timestamp(); + } + +end: + gk20a_idle(g); + return err; +} +#endif diff --git a/drivers/gpu/nvgpu/common/rc/rc.c b/drivers/gpu/nvgpu/common/rc/rc.c new file mode 100644 index 000000000..9dd868e23 --- /dev/null +++ b/drivers/gpu/nvgpu/common/rc/rc.c @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#ifdef CONFIG_NVGPU_RECOVERY +#include +#include +#include +#include +#include +#endif +#include +#include +#include +#include +#include + +void nvgpu_rc_fifo_recover(struct gk20a *g, u32 eng_bitmask, + u32 hw_id, bool id_is_tsg, + bool id_is_known, bool debug_dump, u32 rc_type) +{ +#ifdef CONFIG_NVGPU_RECOVERY + unsigned int id_type; + + if (debug_dump) { + gk20a_debug_dump(g); + } + + if (g->ops.ltc.flush != NULL) { + g->ops.ltc.flush(g); + } + + if (id_is_known) { + id_type = id_is_tsg ? ID_TYPE_TSG : ID_TYPE_CHANNEL; + } else { + id_type = ID_TYPE_UNKNOWN; + } + + g->ops.fifo.recover(g, eng_bitmask, hw_id, id_type, + rc_type, NULL); +#else + WARN_ON(!g->sw_quiesce_pending); +#endif +} + +void nvgpu_rc_ctxsw_timeout(struct gk20a *g, u32 eng_bitmask, + struct nvgpu_tsg *tsg, bool debug_dump) +{ + nvgpu_tsg_set_error_notifier(g, tsg, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); + +#ifdef CONFIG_NVGPU_RECOVERY + /* + * Cancel all channels' wdt since ctxsw timeout causes the runlist to + * stuck and might falsely trigger multiple watchdogs at a time. We + * won't detect proper wdt timeouts that would have happened, but if + * they're stuck, they will trigger the wdt soon enough again. + */ + nvgpu_channel_restart_all_wdts(g); + + nvgpu_rc_fifo_recover(g, eng_bitmask, tsg->tsgid, true, true, debug_dump, + RC_TYPE_CTXSW_TIMEOUT); +#else + WARN_ON(!g->sw_quiesce_pending); +#endif +} + +void nvgpu_rc_pbdma_fault(struct gk20a *g, u32 pbdma_id, u32 error_notifier, + struct nvgpu_pbdma_status_info *pbdma_status) +{ + u32 id; + u32 id_type = PBDMA_STATUS_ID_TYPE_INVALID; + + nvgpu_log(g, gpu_dbg_info, "pbdma id %d error notifier %d", + pbdma_id, error_notifier); + + if (nvgpu_pbdma_status_is_chsw_valid(pbdma_status) || + nvgpu_pbdma_status_is_chsw_save(pbdma_status)) { + id = pbdma_status->id; + id_type = pbdma_status->id_type; + } else if (nvgpu_pbdma_status_is_chsw_load(pbdma_status) || + nvgpu_pbdma_status_is_chsw_switch(pbdma_status)) { + id = pbdma_status->next_id; + id_type = pbdma_status->next_id_type; + } else { + /* Nothing to do here */ + nvgpu_err(g, "Invalid pbdma_status.id"); + return; + } + + if (id_type == PBDMA_STATUS_ID_TYPE_TSGID) { + struct nvgpu_tsg *tsg = nvgpu_tsg_get_from_id(g, id); + + nvgpu_tsg_set_error_notifier(g, tsg, error_notifier); + nvgpu_rc_tsg_and_related_engines(g, tsg, true, + RC_TYPE_PBDMA_FAULT); + } else if(id_type == PBDMA_STATUS_ID_TYPE_CHID) { + struct nvgpu_channel *ch = nvgpu_channel_from_id(g, id); + struct nvgpu_tsg *tsg; + if (ch == NULL) { + nvgpu_err(g, "channel is not referenceable"); + return; + } + + tsg = nvgpu_tsg_from_ch(ch); + if (tsg != NULL) { + nvgpu_tsg_set_error_notifier(g, tsg, error_notifier); + nvgpu_rc_tsg_and_related_engines(g, tsg, true, + RC_TYPE_PBDMA_FAULT); + } else { + nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid); + } + + nvgpu_channel_put(ch); + } else { + nvgpu_err(g, "Invalid pbdma_status.id_type"); + } +} + +void nvgpu_rc_runlist_update(struct gk20a *g, u32 runlist_id) +{ +#ifdef CONFIG_NVGPU_RECOVERY + u32 eng_bitmask = nvgpu_engine_get_runlist_busy_engines(g, runlist_id); + + if (eng_bitmask != 0U) { + nvgpu_rc_fifo_recover(g, eng_bitmask, INVAL_ID, false, false, true, + RC_TYPE_RUNLIST_UPDATE_TIMEOUT); + } +#else + /* + * Runlist update occurs in non-mission mode, when + * adding/removing channel/TSGs. The pending bit + * is a debug only feature. As a result logging a + * warning is sufficient. + * We expect other HW safety mechanisms such as + * PBDMA timeout to detect issues that caused pending + * to not clear. It's possible bad base address could + * cause some MMU faults too. + * Worst case we rely on the application level task + * monitor to detect the GPU tasks are not completing + * on time. + */ + WARN_ON(!g->sw_quiesce_pending); +#endif +} + +void nvgpu_rc_preempt_timeout(struct gk20a *g, struct nvgpu_tsg *tsg) +{ + nvgpu_tsg_set_error_notifier(g, tsg, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); + +#ifdef CONFIG_NVGPU_RECOVERY + nvgpu_rc_tsg_and_related_engines(g, tsg, true, RC_TYPE_PREEMPT_TIMEOUT); +#else + BUG_ON(!g->sw_quiesce_pending); +#endif +} + +void nvgpu_rc_gr_fault(struct gk20a *g, struct nvgpu_tsg *tsg, + struct nvgpu_channel *ch) +{ +#ifdef CONFIG_NVGPU_RECOVERY + u32 gr_engine_id; + u32 gr_eng_bitmask = 0U; + u32 cur_gr_instance_id = nvgpu_gr_get_cur_instance_id(g); + u32 inst_id = nvgpu_gr_get_syspipe_id(g, cur_gr_instance_id); + + nvgpu_log(g, gpu_dbg_gr, "RC GR%u inst_id%u", + cur_gr_instance_id, inst_id); + + gr_engine_id = nvgpu_engine_get_gr_id_for_inst(g, inst_id); + if (gr_engine_id != NVGPU_INVALID_ENG_ID) { + gr_eng_bitmask = BIT32(gr_engine_id); + } else { + nvgpu_warn(g, "gr_engine_id is invalid"); + } + + if (tsg != NULL) { + nvgpu_rc_fifo_recover(g, gr_eng_bitmask, tsg->tsgid, + true, true, true, RC_TYPE_GR_FAULT); + } else { + if (ch != NULL) { + nvgpu_err(g, "chid: %d referenceable but not " + "bound to tsg", ch->chid); + } + nvgpu_rc_fifo_recover(g, gr_eng_bitmask, INVAL_ID, + false, false, true, RC_TYPE_GR_FAULT); + } +#else + WARN_ON(!g->sw_quiesce_pending); +#endif + nvgpu_log(g, gpu_dbg_gr, "done"); +} + +void nvgpu_rc_sched_error_bad_tsg(struct gk20a *g) +{ +#ifdef CONFIG_NVGPU_RECOVERY + /* id is unknown, preempt all runlists and do recovery */ + nvgpu_rc_fifo_recover(g, 0, INVAL_ID, false, false, false, + RC_TYPE_SCHED_ERR); +#else + WARN_ON(!g->sw_quiesce_pending); +#endif +} + +void nvgpu_rc_tsg_and_related_engines(struct gk20a *g, struct nvgpu_tsg *tsg, + bool debug_dump, u32 rc_type) +{ +#ifdef CONFIG_NVGPU_RECOVERY + u32 eng_bitmask = 0U; + int err = 0; + +#ifdef CONFIG_NVGPU_DEBUGGER + nvgpu_mutex_acquire(&g->dbg_sessions_lock); +#endif + + /* disable tsg so that it does not get scheduled again */ + g->ops.tsg.disable(tsg); + + /* + * On hitting engine reset, h/w drops the ctxsw_status to INVALID in + * fifo_engine_status register. Also while the engine is held in reset + * h/w passes busy/idle straight through. fifo_engine_status registers + * are correct in that there is no context switch outstanding + * as the CTXSW is aborted when reset is asserted. + */ + nvgpu_log_info(g, "acquire engines_reset_mutex"); + nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex); + + /* + * stop context switching to prevent engine assignments from + * changing until engine status is checked to make sure tsg + * being recovered is not loaded on the engines + */ + err = nvgpu_gr_disable_ctxsw(g); + if (err != 0) { + /* if failed to disable ctxsw, just abort tsg */ + nvgpu_err(g, "failed to disable ctxsw"); + } else { + /* recover engines if tsg is loaded on the engines */ + eng_bitmask = nvgpu_engine_get_mask_on_id(g, tsg->tsgid, true); + + /* + * it is ok to enable ctxsw before tsg is recovered. If engines + * is 0, no engine recovery is needed and if it is non zero, + * gk20a_fifo_recover will call get_mask_on_id again. + * By that time if tsg is not on the engine, engine need not + * be reset. + */ + err = nvgpu_gr_enable_ctxsw(g); + if (err != 0) { + nvgpu_err(g, "failed to enable ctxsw"); + } + } + nvgpu_log_info(g, "release engines_reset_mutex"); + nvgpu_mutex_release(&g->fifo.engines_reset_mutex); + + if (eng_bitmask != 0U) { + nvgpu_rc_fifo_recover(g, eng_bitmask, tsg->tsgid, true, true, + debug_dump, rc_type); + } else { + if (nvgpu_tsg_mark_error(g, tsg) && debug_dump) { + gk20a_debug_dump(g); + } + + nvgpu_tsg_abort(g, tsg, false); + } + +#ifdef CONFIG_NVGPU_DEBUGGER + nvgpu_mutex_release(&g->dbg_sessions_lock); +#endif +#else + WARN_ON(!g->sw_quiesce_pending); +#endif +} + +void nvgpu_rc_mmu_fault(struct gk20a *g, u32 act_eng_bitmask, + u32 id, unsigned int id_type, unsigned int rc_type, + struct mmu_fault_info *mmufault) +{ + nvgpu_err(g, "mmu fault id=%u id_type=%u act_eng_bitmask=%08x", + id, id_type, act_eng_bitmask); + +#ifdef CONFIG_NVGPU_RECOVERY + g->ops.fifo.recover(g, act_eng_bitmask, + id, id_type, rc_type, mmufault); +#else + if ((id != INVAL_ID) && (id_type == ID_TYPE_TSG)) { + struct nvgpu_tsg *tsg = &g->fifo.tsg[id]; + nvgpu_tsg_set_ctx_mmu_error(g, tsg); + nvgpu_tsg_mark_error(g, tsg); + } + + WARN_ON(!g->sw_quiesce_pending); +#endif +} diff --git a/drivers/gpu/nvgpu/common/regops/regops.c b/drivers/gpu/nvgpu/common/regops/regops.c new file mode 100644 index 000000000..5502a2d2a --- /dev/null +++ b/drivers/gpu/nvgpu/common/regops/regops.c @@ -0,0 +1,518 @@ +/* + * Tegra GK20A GPU Debugger Driver Register Ops + * + * Copyright (c) 2013-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Access ctx buffer offset functions in gr_gk20a.h */ +#include "hal/gr/gr/gr_gk20a.h" + +static int regop_bsearch_range_cmp(const void *pkey, const void *pelem) +{ + const u32 key = *(const u32 *)pkey; + const struct regop_offset_range *prange = + (const struct regop_offset_range *)pelem; + if (key < prange->base) { + return -1; + } else if (prange->base <= key && key < (U32(prange->base) + + (U32(prange->count) * U32(4)))) { + return 0; + } + return 1; +} + +static inline bool linear_search(u32 offset, const u32 *list, u64 size) +{ + u64 i; + for (i = 0; i < size; i++) { + if (list[i] == offset) { + return true; + } + } + return false; +} + +/* + * In order to perform a context relative op the context has + * to be created already... which would imply that the + * context switch mechanism has already been put in place. + * So by the time we perform such an opertation it should always + * be possible to query for the appropriate context offsets, etc. + * + * But note: while the dbg_gpu bind requires the a channel fd, + * it doesn't require an allocated gr/compute obj at that point... + */ +static bool gr_context_info_available(struct gk20a *g) +{ + struct nvgpu_gr_obj_ctx_golden_image *gr_golden_image = + nvgpu_gr_get_golden_image_ptr(g); + + return nvgpu_gr_obj_ctx_is_golden_image_ready(gr_golden_image); +} + +static bool validate_reg_ops(struct gk20a *g, + struct nvgpu_profiler_object *prof, + u32 *ctx_rd_count, u32 *ctx_wr_count, + struct nvgpu_dbg_reg_op *ops, + u32 op_count, + bool valid_ctx, + u32 *flags); + +int exec_regops_gk20a(struct gk20a *g, + struct nvgpu_tsg *tsg, + struct nvgpu_dbg_reg_op *ops, + u32 num_ops, + u32 ctx_wr_count, + u32 ctx_rd_count, + u32 *flags) +{ + int err = 0; + unsigned int i; + u32 data32_lo = 0, data32_hi = 0; + bool skip_read_lo, skip_read_hi; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + + /* be sure that ctx info is in place if there are ctx ops */ + if ((ctx_wr_count | ctx_rd_count) != 0U) { + if (!gr_context_info_available(g)) { + nvgpu_err(g, "gr context data not available"); + return -ENODEV; + } + } + + for (i = 0; i < num_ops; i++) { + /* if it isn't global then it is done in the ctx ops... */ + if (ops[i].type != REGOP(TYPE_GLOBAL)) { + continue; + } + + /* + * Move to next op if current op is invalid. + * Execution will reach here only if CONTINUE_ON_ERROR + * mode is requested. + */ + if (ops[i].status != REGOP(STATUS_SUCCESS)) { + continue; + } + + switch (ops[i].op) { + + case REGOP(READ_32): + ops[i].value_hi = 0; + ops[i].value_lo = gk20a_readl(g, ops[i].offset); + nvgpu_log(g, gpu_dbg_gpu_dbg, "read_32 0x%08x from 0x%08x", + ops[i].value_lo, ops[i].offset); + + break; + + case REGOP(READ_64): + ops[i].value_lo = gk20a_readl(g, ops[i].offset); + ops[i].value_hi = + gk20a_readl(g, ops[i].offset + 4U); + + nvgpu_log(g, gpu_dbg_gpu_dbg, "read_64 0x%08x:%08x from 0x%08x", + ops[i].value_hi, ops[i].value_lo, + ops[i].offset); + break; + + case REGOP(WRITE_32): + case REGOP(WRITE_64): + /* some of this appears wonky/unnecessary but + we've kept it for compat with existing + debugger code. just in case... */ + skip_read_lo = skip_read_hi = false; + if (ops[i].and_n_mask_lo == ~(u32)0) { + data32_lo = ops[i].value_lo; + skip_read_lo = true; + } + + if ((ops[i].op == REGOP(WRITE_64)) && + (ops[i].and_n_mask_hi == ~(u32)0)) { + data32_hi = ops[i].value_hi; + skip_read_hi = true; + } + + /* read first 32bits */ + if (skip_read_lo == false) { + data32_lo = gk20a_readl(g, ops[i].offset); + data32_lo &= ~ops[i].and_n_mask_lo; + data32_lo |= ops[i].value_lo; + } + + /* if desired, read second 32bits */ + if ((ops[i].op == REGOP(WRITE_64)) && + !skip_read_hi) { + data32_hi = gk20a_readl(g, ops[i].offset + 4U); + data32_hi &= ~ops[i].and_n_mask_hi; + data32_hi |= ops[i].value_hi; + } + + /* now update first 32bits */ + gk20a_writel(g, ops[i].offset, data32_lo); + nvgpu_log(g, gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ", + data32_lo, ops[i].offset); + /* if desired, update second 32bits */ + if (ops[i].op == REGOP(WRITE_64)) { + gk20a_writel(g, ops[i].offset + 4U, data32_hi); + nvgpu_log(g, gpu_dbg_gpu_dbg, "Wrote 0x%08x to 0x%08x ", + data32_hi, ops[i].offset + 4U); + + } + + + break; + + /* shouldn't happen as we've already screened */ + default: + BUG(); + err = -EINVAL; + goto clean_up; + break; + } + } + + if ((ctx_wr_count | ctx_rd_count) != 0U) { + err = gr_gk20a_exec_ctx_ops(tsg, ops, num_ops, + ctx_wr_count, ctx_rd_count, + flags); + if (err != 0) { + nvgpu_warn(g, "failed to perform ctx ops\n"); + goto clean_up; + } + } + + clean_up: + nvgpu_log(g, gpu_dbg_gpu_dbg, "ret=%d", err); + return err; + +} + +int nvgpu_regops_exec(struct gk20a *g, + struct nvgpu_tsg *tsg, + struct nvgpu_profiler_object *prof, + struct nvgpu_dbg_reg_op *ops, + u32 num_ops, + u32 *flags) +{ + u32 ctx_rd_count = 0, ctx_wr_count = 0; + int err = 0; + bool ok; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + + ok = validate_reg_ops(g, prof, &ctx_rd_count, &ctx_wr_count, + ops, num_ops, tsg != NULL, flags); + if (!ok) { + nvgpu_err(g, "invalid op(s)"); + return -EINVAL; + } + + err = g->ops.regops.exec_regops(g, tsg, ops, num_ops, ctx_wr_count, + ctx_rd_count, flags); + if (err != 0) { + nvgpu_warn(g, "failed to perform regops, err=%d", err); + } + + return err; +} + +static int validate_reg_op_info(struct nvgpu_dbg_reg_op *op) +{ + int err = 0; + + switch (op->op) { + case REGOP(READ_32): + case REGOP(READ_64): + case REGOP(WRITE_32): + case REGOP(WRITE_64): + break; + default: + op->status |= REGOP(STATUS_UNSUPPORTED_OP); + err = -EINVAL; + break; + } + + switch (op->type) { + case REGOP(TYPE_GLOBAL): + case REGOP(TYPE_GR_CTX): + case REGOP(TYPE_GR_CTX_TPC): + case REGOP(TYPE_GR_CTX_SM): + case REGOP(TYPE_GR_CTX_CROP): + case REGOP(TYPE_GR_CTX_ZROP): + case REGOP(TYPE_GR_CTX_QUAD): + break; + /* + case NVGPU_DBG_GPU_REG_OP_TYPE_FB: + */ + default: + op->status |= REGOP(STATUS_INVALID_TYPE); + err = -EINVAL; + break; + } + + return err; +} + +static bool check_whitelists(struct gk20a *g, + struct nvgpu_dbg_reg_op *op, + u32 offset, + bool valid_ctx) +{ + bool valid = false; + + if (op->type == REGOP(TYPE_GLOBAL)) { + /* search global list */ + valid = (g->ops.regops.get_global_whitelist_ranges != NULL) && + (nvgpu_bsearch(&offset, + g->ops.regops.get_global_whitelist_ranges(), + g->ops.regops.get_global_whitelist_ranges_count(), + sizeof(*g->ops.regops.get_global_whitelist_ranges()), + regop_bsearch_range_cmp) != NULL); + + /* if debug session, search context list */ + if ((!valid) && (valid_ctx)) { + /* binary search context list */ + valid = (g->ops.regops.get_context_whitelist_ranges != NULL) && + (nvgpu_bsearch(&offset, + g->ops.regops.get_context_whitelist_ranges(), + g->ops.regops.get_context_whitelist_ranges_count(), + sizeof(*g->ops.regops.get_context_whitelist_ranges()), + regop_bsearch_range_cmp) != NULL); + } + + /* if debug session, search runcontrol list */ + if ((!valid) && (valid_ctx)) { + valid = (g->ops.regops.get_runcontrol_whitelist != NULL) && + linear_search(offset, + g->ops.regops.get_runcontrol_whitelist(), + g->ops.regops.get_runcontrol_whitelist_count()); + } + } else if (op->type == REGOP(TYPE_GR_CTX)) { + /* binary search context list */ + valid = (g->ops.regops.get_context_whitelist_ranges != NULL) && + (nvgpu_bsearch(&offset, + g->ops.regops.get_context_whitelist_ranges(), + g->ops.regops.get_context_whitelist_ranges_count(), + sizeof(*g->ops.regops.get_context_whitelist_ranges()), + regop_bsearch_range_cmp) != NULL); + + /* if debug session, search runcontrol list */ + if ((!valid) && (valid_ctx)) { + valid = (g->ops.regops.get_runcontrol_whitelist != NULL) && + linear_search(offset, + g->ops.regops.get_runcontrol_whitelist(), + g->ops.regops.get_runcontrol_whitelist_count()); + } + } + + return valid; +} + +static int profiler_obj_validate_reg_op_offset(struct nvgpu_profiler_object *prof, + struct nvgpu_dbg_reg_op *op) +{ + struct gk20a *g = prof->g; + bool valid = false; + u32 offset; + enum nvgpu_pm_resource_hwpm_register_type type, type64; + + offset = op->offset; + + /* support only 24-bit 4-byte aligned offsets */ + if ((offset & 0xFF000003U) != 0U) { + nvgpu_err(g, "invalid regop offset: 0x%x", offset); + op->status |= REGOP(STATUS_INVALID_OFFSET); + return -EINVAL; + } + + valid = nvgpu_profiler_validate_regops_allowlist(prof, offset, &type); + if ((op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) && valid) { + valid = nvgpu_profiler_validate_regops_allowlist(prof, offset + 4U, &type64); + } + + if (!valid) { + op->status |= REGOP(STATUS_INVALID_OFFSET); + return -EINVAL; + } + + if (op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) { + nvgpu_assert(type == type64); + } + + op->type = prof->reg_op_type[type]; + + return 0; +} + +/* note: the op here has already been through validate_reg_op_info */ +static int validate_reg_op_offset(struct gk20a *g, + struct nvgpu_dbg_reg_op *op, + bool valid_ctx) +{ + u32 offset; + bool valid = false; + + offset = op->offset; + + /* support only 24-bit 4-byte aligned offsets */ + if ((offset & 0xFF000003U) != 0U) { + nvgpu_err(g, "invalid regop offset: 0x%x", offset); + op->status |= REGOP(STATUS_INVALID_OFFSET); + return -EINVAL; + } + + valid = check_whitelists(g, op, offset, valid_ctx); + if ((op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) && valid) { + valid = check_whitelists(g, op, offset + 4U, valid_ctx); + } + + if (!valid) { + nvgpu_err(g, "invalid regop offset: 0x%x", offset); + op->status |= REGOP(STATUS_INVALID_OFFSET); + return -EINVAL; + } + + return 0; +} + +static bool validate_reg_ops(struct gk20a *g, + struct nvgpu_profiler_object *prof, + u32 *ctx_rd_count, u32 *ctx_wr_count, + struct nvgpu_dbg_reg_op *ops, + u32 op_count, + bool valid_ctx, + u32 *flags) +{ + bool all_or_none = (*flags) & NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE; + bool gr_ctx_ops = false; + bool op_failed = false; + u32 i; + + /* keep going until the end so every op can get + * a separate error code if needed */ + for (i = 0; i < op_count; i++) { + ops[i].status = 0U; + + /* if "allow_all" flag enabled, dont validate offset */ + if (!g->allow_all) { + if (prof != NULL) { + if (profiler_obj_validate_reg_op_offset(prof, &ops[i]) != 0) { + op_failed = true; + if (all_or_none) { + break; + } + } + } else { + if (validate_reg_op_offset(g, &ops[i], valid_ctx) != 0) { + op_failed = true; + if (all_or_none) { + break; + } + } + } + } + + if (validate_reg_op_info(&ops[i]) != 0) { + op_failed = true; + if (all_or_none) { + break; + } + } + + if (reg_op_is_gr_ctx(ops[i].type)) { + if (reg_op_is_read(ops[i].op)) { + (*ctx_rd_count)++; + } else { + (*ctx_wr_count)++; + } + + gr_ctx_ops = true; + } + + /* context operations need valid context */ + if (gr_ctx_ops && !valid_ctx) { + op_failed = true; + if (all_or_none) { + break; + } + } + + if (ops[i].status == 0U) { + ops[i].status = REGOP(STATUS_SUCCESS); + } + } + + nvgpu_log(g, gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d", + *ctx_wr_count, *ctx_rd_count); + + if (all_or_none) { + if (op_failed) { + return false; + } else { + return true; + } + } + + /* Continue on error */ + if (!op_failed) { + *flags |= NVGPU_REG_OP_FLAG_ALL_PASSED; + } + + return true; +} + +/* exported for tools like cyclestats, etc */ +bool is_bar0_global_offset_whitelisted_gk20a(struct gk20a *g, u32 offset) +{ + bool valid = nvgpu_bsearch(&offset, + g->ops.regops.get_global_whitelist_ranges(), + g->ops.regops.get_global_whitelist_ranges_count(), + sizeof(*g->ops.regops.get_global_whitelist_ranges()), + regop_bsearch_range_cmp) != NULL; + return valid; +} + +bool reg_op_is_gr_ctx(u8 type) +{ + return type == REGOP(TYPE_GR_CTX) || + type == REGOP(TYPE_GR_CTX_TPC) || + type == REGOP(TYPE_GR_CTX_SM) || + type == REGOP(TYPE_GR_CTX_CROP) || + type == REGOP(TYPE_GR_CTX_ZROP) || + type == REGOP(TYPE_GR_CTX_QUAD); +} + +bool reg_op_is_read(u8 op) +{ + return op == REGOP(READ_32) || + op == REGOP(READ_64); +} diff --git a/drivers/gpu/nvgpu/common/sbr/sbr.c b/drivers/gpu/nvgpu/common/sbr/sbr.c new file mode 100644 index 000000000..b43af6c93 --- /dev/null +++ b/drivers/gpu/nvgpu/common/sbr/sbr.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "sbr.h" + +static void pub_ucode_patch_sig(struct gk20a *g, + unsigned int *p_img, unsigned int *p_prod_sig, + unsigned int *p_dbg_sig, unsigned int *p_patch_loc, + unsigned int *p_patch_ind, u32 sig_size) +{ + unsigned int i, j, *p_sig; + nvgpu_info(g, " "); + + if (!g->ops.pmu.is_debug_mode_enabled(g)) { + p_sig = p_prod_sig; + nvgpu_info(g, "PRODUCTION MODE\n"); + } else { + p_sig = p_dbg_sig; + nvgpu_info(g, "DEBUG MODE\n"); + } + + /* Patching logic:*/ + sig_size = sig_size / 4U; + for (i = 0U; i < (sizeof(*p_patch_loc)>>2U); i++) { + for (j = 0U; j < sig_size; j++) { + p_img[nvgpu_safe_add_u32((p_patch_loc[i]>>2U), j)] = + p_sig[nvgpu_safe_add_u32((p_patch_ind[i]<<2U), j)]; + } + } +} + +int nvgpu_sbr_pub_load_and_execute(struct gk20a *g) +{ + struct nvgpu_firmware *pub_fw = NULL; + struct pub_bin_hdr *hs_bin_hdr = NULL; + struct pub_fw_header *fw_hdr = NULL; + u32 *ucode_header = NULL; + u32 *ucode = NULL; + u32 data = 0; + int err = 0; + + nvgpu_log_fn(g, " "); + + if (!g->ops.pmu.is_debug_mode_enabled(g)) { + pub_fw = nvgpu_request_firmware(g, PUB_PROD_BIN, + NVGPU_REQUEST_FIRMWARE_NO_SOC); + } else { + pub_fw = nvgpu_request_firmware(g, PUB_DBG_BIN, + NVGPU_REQUEST_FIRMWARE_NO_SOC); + } + + if (pub_fw == NULL) { + nvgpu_err(g, "pub ucode get fail"); + err = -ENOENT; + goto exit; + } + + hs_bin_hdr = (struct pub_bin_hdr *)(void *)pub_fw->data; + fw_hdr = (struct pub_fw_header *)(void *)(pub_fw->data + + hs_bin_hdr->header_offset); + ucode_header = (u32 *)(void *)(pub_fw->data + + fw_hdr->hdr_offset); + ucode = (u32 *)(void *)(pub_fw->data + hs_bin_hdr->data_offset); + + /* Patch Ucode signatures */ + pub_ucode_patch_sig(g, ucode, + (u32 *)(void *)(pub_fw->data + fw_hdr->sig_prod_offset), + (u32 *)(void *)(pub_fw->data + fw_hdr->sig_dbg_offset), + (u32 *)(void *)(pub_fw->data + fw_hdr->patch_loc), + (u32 *)(void *)(pub_fw->data + fw_hdr->patch_sig), + fw_hdr->sig_dbg_size); + + err = nvgpu_falcon_hs_ucode_load_bootstrap(&g->sec2.flcn, ucode, + ucode_header); + if (err != 0) { + nvgpu_err(g, "pub ucode load & bootstrap failed"); + goto exit; + } + + if (nvgpu_falcon_wait_for_halt(&g->sec2.flcn, PUB_TIMEOUT) != 0) { + nvgpu_err(g, "pub ucode boot timed out"); + err = -ETIMEDOUT; + goto exit; + } + + data = nvgpu_falcon_mailbox_read(&g->sec2.flcn, FALCON_MAILBOX_0); + if (data != 0U) { + nvgpu_err(g, "pub ucode boot failed, err %x", data); + err = -EAGAIN; + goto exit; + } + +exit: +#ifdef CONFIG_NVGPU_FALCON_DEBUG + if (err != 0) { + nvgpu_falcon_dump_stats(&g->sec2.flcn); + } +#endif + + if (pub_fw != NULL) { + nvgpu_release_firmware(g, pub_fw); + } + + nvgpu_log_fn(g, "pub loaded & executed with status %d", err); + return err; +} + diff --git a/drivers/gpu/nvgpu/common/sbr/sbr.h b/drivers/gpu/nvgpu/common/sbr/sbr.h new file mode 100644 index 000000000..a4913115b --- /dev/null +++ b/drivers/gpu/nvgpu/common/sbr/sbr.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_COMMON_SBR_H_ +#define NVGPU_COMMON_SBR_H_ + +#define PUB_PROD_BIN "pub.bin" +#define PUB_DBG_BIN "pub_dbg.bin" +#define PUB_TIMEOUT 100U /* msec */ + +struct pub_bin_hdr { + u32 bin_magic; + u32 bin_ver; + u32 bin_size; + u32 header_offset; + u32 data_offset; + u32 data_size; +}; + +struct pub_fw_header { + u32 sig_dbg_offset; + u32 sig_dbg_size; + u32 sig_prod_offset; + u32 sig_prod_size; + u32 patch_loc; + u32 patch_sig; + u32 hdr_offset; + u32 hdr_size; +}; + +#endif /* NVGPU_COMMON_SBR_H_ */ diff --git a/drivers/gpu/nvgpu/common/sec2/ipc/sec2_cmd.c b/drivers/gpu/nvgpu/common/sec2/ipc/sec2_cmd.c new file mode 100644 index 000000000..032290cbd --- /dev/null +++ b/drivers/gpu/nvgpu/common/sec2/ipc/sec2_cmd.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +/* command post operation functions */ +static bool sec2_validate_cmd(struct nvgpu_sec2 *sec2, + struct nv_flcn_cmd_sec2 *cmd, u32 queue_id) +{ + struct gk20a *g = sec2->g; + u32 queue_size; + + if (queue_id != SEC2_NV_CMDQ_LOG_ID) { + goto invalid_cmd; + } + + if (cmd->hdr.size < PMU_CMD_HDR_SIZE) { + goto invalid_cmd; + } + + queue_size = nvgpu_sec2_queue_get_size(sec2->queues, queue_id); + + if (cmd->hdr.size > (queue_size >> 1)) { + goto invalid_cmd; + } + + if (!NV_SEC2_UNITID_IS_VALID(cmd->hdr.unit_id)) { + goto invalid_cmd; + } + + return true; + +invalid_cmd: + nvgpu_err(g, "invalid sec2 cmd :"); + nvgpu_err(g, "queue_id=%d, cmd_size=%d, cmd_unit_id=%d\n", + queue_id, cmd->hdr.size, cmd->hdr.unit_id); + + return false; +} + +static int sec2_write_cmd(struct nvgpu_sec2 *sec2, + struct nv_flcn_cmd_sec2 *cmd, u32 queue_id, + u32 timeout_ms) +{ + struct nvgpu_timeout timeout; + struct gk20a *g = sec2->g; + int err; + + nvgpu_log_fn(g, " "); + + nvgpu_timeout_init(g, &timeout, timeout_ms, NVGPU_TIMER_CPU_TIMER); + + do { + err = nvgpu_sec2_queue_push(sec2->queues, queue_id, &sec2->flcn, + cmd, cmd->hdr.size); + if ((err == -EAGAIN) && + (nvgpu_timeout_expired(&timeout) == 0)) { + nvgpu_usleep_range(1000U, 2000U); + } else { + break; + } + } while (true); + + if (err != 0) { + nvgpu_err(g, "fail to write cmd to queue %d", queue_id); + } + + return err; +} + +int nvgpu_sec2_cmd_post(struct gk20a *g, struct nv_flcn_cmd_sec2 *cmd, + u32 queue_id, sec2_callback callback, + void *cb_param, u32 timeout) +{ + struct nvgpu_sec2 *sec2 = &g->sec2; + struct sec2_sequence *seq = NULL; + int err = 0; + + if ((cmd == NULL) || (!sec2->sec2_ready)) { + if (cmd == NULL) { + nvgpu_warn(g, + "%s(): SEC2 cmd buffer is NULL", __func__); + } else { + nvgpu_warn(g, "%s(): SEC2 is not ready", __func__); + } + + err = -EINVAL; + goto exit; + } + + /* Sanity check the command input. */ + if (!sec2_validate_cmd(sec2, cmd, queue_id)) { + err = -EINVAL; + goto exit; + } + + /* Attempt to reserve a sequence for this command. */ + err = nvgpu_sec2_seq_acquire(g, &sec2->sequences, &seq, + callback, cb_param); + if (err != 0) { + goto exit; + } + + /* Set the sequence number in the command header. */ + cmd->hdr.seq_id = nvgpu_sec2_seq_get_id(seq); + + cmd->hdr.ctrl_flags = 0U; + cmd->hdr.ctrl_flags = PMU_CMD_FLAGS_STATUS; + + nvgpu_sec2_seq_set_state(seq, SEC2_SEQ_STATE_USED); + + err = sec2_write_cmd(sec2, cmd, queue_id, timeout); + if (err != 0) { + nvgpu_sec2_seq_set_state(seq, SEC2_SEQ_STATE_PENDING); + } + +exit: + return err; +} diff --git a/drivers/gpu/nvgpu/common/sec2/ipc/sec2_msg.c b/drivers/gpu/nvgpu/common/sec2/ipc/sec2_msg.c new file mode 100644 index 000000000..3aa1d013c --- /dev/null +++ b/drivers/gpu/nvgpu/common/sec2/ipc/sec2_msg.c @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* Message/Event request handlers */ +static int sec2_response_handle(struct nvgpu_sec2 *sec2, + struct nv_flcn_msg_sec2 *msg) +{ + struct gk20a *g = sec2->g; + + return nvgpu_sec2_seq_response_handle(g, &sec2->sequences, + msg, msg->hdr.seq_id); +} + +static int sec2_handle_event(struct nvgpu_sec2 *sec2, + struct nv_flcn_msg_sec2 *msg) +{ + int err = 0; + + switch (msg->hdr.unit_id) { + default: + break; + } + + return err; +} + +static bool sec2_read_message(struct nvgpu_sec2 *sec2, + u32 queue_id, struct nv_flcn_msg_sec2 *msg, int *status) +{ + struct gk20a *g = sec2->g; + u32 read_size; + int err; + + *status = 0U; + + if (nvgpu_sec2_queue_is_empty(sec2->queues, queue_id)) { + return false; + } + + if (!nvgpu_sec2_queue_read(g, sec2->queues, queue_id, + &sec2->flcn, &msg->hdr, + PMU_MSG_HDR_SIZE, status)) { + nvgpu_err(g, "fail to read msg from queue %d", queue_id); + goto clean_up; + } + + if (msg->hdr.unit_id == NV_SEC2_UNIT_REWIND) { + err = nvgpu_sec2_queue_rewind(&sec2->flcn, + sec2->queues, queue_id); + if (err != 0) { + nvgpu_err(g, "fail to rewind queue %d", queue_id); + *status = err; + goto clean_up; + } + + /* read again after rewind */ + if (!nvgpu_sec2_queue_read(g, sec2->queues, queue_id, + &sec2->flcn, &msg->hdr, + PMU_MSG_HDR_SIZE, status)) { + nvgpu_err(g, "fail to read msg from queue %d", + queue_id); + goto clean_up; + } + } + + if (!NV_SEC2_UNITID_IS_VALID(msg->hdr.unit_id)) { + nvgpu_err(g, "read invalid unit_id %d from queue %d", + msg->hdr.unit_id, queue_id); + *status = -EINVAL; + goto clean_up; + } + + if (msg->hdr.size > PMU_MSG_HDR_SIZE) { + read_size = msg->hdr.size - PMU_MSG_HDR_SIZE; + if (!nvgpu_sec2_queue_read(g, sec2->queues, queue_id, + &sec2->flcn, &msg->msg, + read_size, status)) { + nvgpu_err(g, "fail to read msg from queue %d", + queue_id); + goto clean_up; + } + } + + return true; + +clean_up: + return false; +} + +static int sec2_process_init_msg(struct nvgpu_sec2 *sec2, + struct nv_flcn_msg_sec2 *msg) +{ + struct gk20a *g = sec2->g; + struct sec2_init_msg_sec2_init *sec2_init; + u32 tail = 0; + int err = 0; + + g->ops.sec2.msgq_tail(g, sec2, &tail, QUEUE_GET); + + err = nvgpu_falcon_copy_from_emem(&sec2->flcn, tail, + (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0U); + if (err != 0) { + goto exit; + } + + if (msg->hdr.unit_id != NV_SEC2_UNIT_INIT) { + nvgpu_err(g, "expecting init msg"); + err = -EINVAL; + goto exit; + } + + err = nvgpu_falcon_copy_from_emem(&sec2->flcn, tail + PMU_MSG_HDR_SIZE, + (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0U); + if (err != 0) { + goto exit; + } + + if (msg->msg.init.msg_type != NV_SEC2_INIT_MSG_ID_SEC2_INIT) { + nvgpu_err(g, "expecting init msg"); + err = -EINVAL; + goto exit; + } + + tail += NVGPU_ALIGN(U32(msg->hdr.size), PMU_DMEM_ALIGNMENT); + g->ops.sec2.msgq_tail(g, sec2, &tail, QUEUE_SET); + + sec2_init = &msg->msg.init.sec2_init; + + err = nvgpu_sec2_queues_init(g, sec2->queues, sec2_init); + if (err != 0) { + return err; + } + + err = nvgpu_sec2_dmem_allocator_init(g, &sec2->dmem, sec2_init); + if (err != 0) { + nvgpu_sec2_queues_free(g, sec2->queues); + return err; + } + + sec2->sec2_ready = true; + +exit: + return err; +} + +int nvgpu_sec2_process_message(struct nvgpu_sec2 *sec2) +{ + struct gk20a *g = sec2->g; + struct nv_flcn_msg_sec2 msg; + int status = 0; + + if (unlikely(!sec2->sec2_ready)) { + status = sec2_process_init_msg(sec2, &msg); + goto exit; + } + + while (sec2_read_message(sec2, + SEC2_NV_MSGQ_LOG_ID, &msg, &status)) { + + nvgpu_sec2_dbg(g, "read msg hdr: "); + nvgpu_sec2_dbg(g, "unit_id = 0x%08x, size = 0x%08x", + msg.hdr.unit_id, msg.hdr.size); + nvgpu_sec2_dbg(g, "ctrl_flags = 0x%08x, seq_id = 0x%08x", + msg.hdr.ctrl_flags, msg.hdr.seq_id); + + msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK; + + if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) { + sec2_handle_event(sec2, &msg); + } else { + sec2_response_handle(sec2, &msg); + } + } + +exit: + return status; +} + +static void sec2_isr(struct gk20a *g, struct nvgpu_sec2 *sec2) +{ + bool recheck = false; + u32 intr; + + if (!g->ops.sec2.is_interrupted(sec2)) { + return; + } + + nvgpu_mutex_acquire(&sec2->isr_mutex); + if (!sec2->isr_enabled) { + goto exit; + } + + intr = g->ops.sec2.get_intr(g); + if (intr == 0U) { + goto exit; + } + + /* + * Handle swgen0 interrupt to process received messages from SEC2. + * If any other interrupt is to be handled with some software + * action expected, then it should be handled here. + * g->ops.sec2.isr call below will handle other hardware interrupts + * that are not expected to be handled in software. + */ + if (g->ops.sec2.msg_intr_received(g)) { + if (nvgpu_sec2_process_message(sec2) != 0) { + g->ops.sec2.clr_intr(g, intr); + goto exit; + } + recheck = true; + } + + g->ops.sec2.process_intr(g, sec2); + g->ops.sec2.clr_intr(g, intr); + + if (recheck) { + if (!nvgpu_sec2_queue_is_empty(sec2->queues, + SEC2_NV_MSGQ_LOG_ID)) { + g->ops.sec2.set_msg_intr(g); + } + } + +exit: + nvgpu_mutex_release(&sec2->isr_mutex); +} + +int nvgpu_sec2_wait_message_cond(struct nvgpu_sec2 *sec2, u32 timeout_ms, + void *var, u8 val) +{ + struct gk20a *g = sec2->g; + struct nvgpu_timeout timeout; + u32 delay = POLL_DELAY_MIN_US; + + nvgpu_timeout_init(g, &timeout, timeout_ms, NVGPU_TIMER_CPU_TIMER); + + do { + if (*(u8 *)var == val) { + return 0; + } + + sec2_isr(g, sec2); + + nvgpu_usleep_range(delay, delay * 2U); + delay = min_t(u32, delay << 1U, POLL_DELAY_MAX_US); + } while (nvgpu_timeout_expired(&timeout) == 0); + + return -ETIMEDOUT; +} diff --git a/drivers/gpu/nvgpu/common/sec2/ipc/sec2_queue.c b/drivers/gpu/nvgpu/common/sec2/ipc/sec2_queue.c new file mode 100644 index 000000000..ca1f249dd --- /dev/null +++ b/drivers/gpu/nvgpu/common/sec2/ipc/sec2_queue.c @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +/* sec2 falcon queue init */ +static int sec2_queue_init(struct gk20a *g, + struct nvgpu_engine_mem_queue **queues, u32 id, + struct sec2_init_msg_sec2_init *init) +{ + struct nvgpu_engine_mem_queue_params params = {0}; + u32 queue_log_id = 0; + u32 oflag = 0; + int err = 0; + + if (id == SEC2_NV_CMDQ_LOG_ID) { + /* + * set OFLAG_WRITE for command queue + * i.e, push from nvgpu & + * pop form falcon ucode + */ + oflag = OFLAG_WRITE; + } else if (id == SEC2_NV_MSGQ_LOG_ID) { + /* + * set OFLAG_READ for message queue + * i.e, push from falcon ucode & + * pop form nvgpu + */ + oflag = OFLAG_READ; + } else { + nvgpu_err(g, "invalid queue-id %d", id); + err = -EINVAL; + goto exit; + } + + /* init queue parameters */ + queue_log_id = init->q_info[id].queue_log_id; + + params.g = g; + params.flcn_id = FALCON_ID_SEC2; + params.id = queue_log_id; + params.index = init->q_info[id].queue_phy_id; + params.offset = init->q_info[id].queue_offset; + params.position = init->q_info[id].queue_offset; + params.size = init->q_info[id].queue_size; + params.oflag = oflag; + params.queue_head = g->ops.sec2.sec2_queue_head; + params.queue_tail = g->ops.sec2.sec2_queue_tail; + params.queue_type = QUEUE_TYPE_EMEM; + + err = nvgpu_engine_mem_queue_init(&queues[queue_log_id], + params); + if (err != 0) { + nvgpu_err(g, "queue-%d init failed", queue_log_id); + } + +exit: + return err; +} + +static void sec2_queue_free(struct gk20a *g, + struct nvgpu_engine_mem_queue **queues, u32 id) +{ + if (!(id == SEC2_NV_CMDQ_LOG_ID) && !(id == SEC2_NV_MSGQ_LOG_ID)) { + nvgpu_err(g, "invalid queue-id %d", id); + goto exit; + } + + if (queues[id] == NULL) { + goto exit; + } + + nvgpu_engine_mem_queue_free(&queues[id]); +exit: + return; +} + +int nvgpu_sec2_queues_init(struct gk20a *g, + struct nvgpu_engine_mem_queue **queues, + struct sec2_init_msg_sec2_init *init) +{ + u32 i, j; + int err; + + for (i = 0; i < SEC2_QUEUE_NUM; i++) { + err = sec2_queue_init(g, queues, i, init); + if (err != 0) { + for (j = 0; j < i; j++) { + sec2_queue_free(g, queues, j); + } + nvgpu_err(g, "SEC2 queue init failed"); + return err; + } + } + + return 0; +} + +void nvgpu_sec2_queues_free(struct gk20a *g, + struct nvgpu_engine_mem_queue **queues) +{ + u32 i; + + for (i = 0; i < SEC2_QUEUE_NUM; i++) { + sec2_queue_free(g, queues, i); + } +} + +u32 nvgpu_sec2_queue_get_size(struct nvgpu_engine_mem_queue **queues, + u32 queue_id) +{ + return nvgpu_engine_mem_queue_get_size(queues[queue_id]); +} + +int nvgpu_sec2_queue_push(struct nvgpu_engine_mem_queue **queues, + u32 queue_id, struct nvgpu_falcon *flcn, + struct nv_flcn_cmd_sec2 *cmd, u32 size) +{ + struct nvgpu_engine_mem_queue *queue; + + queue = queues[queue_id]; + return nvgpu_engine_mem_queue_push(flcn, queue, cmd, size); +} + +bool nvgpu_sec2_queue_is_empty(struct nvgpu_engine_mem_queue **queues, + u32 queue_id) +{ + struct nvgpu_engine_mem_queue *queue = queues[queue_id]; + + return nvgpu_engine_mem_queue_is_empty(queue); +} + +bool nvgpu_sec2_queue_read(struct gk20a *g, + struct nvgpu_engine_mem_queue **queues, + u32 queue_id, struct nvgpu_falcon *flcn, void *data, + u32 bytes_to_read, int *status) +{ + struct nvgpu_engine_mem_queue *queue = queues[queue_id]; + u32 bytes_read; + int err; + + err = nvgpu_engine_mem_queue_pop(flcn, queue, data, + bytes_to_read, &bytes_read); + if (err != 0) { + nvgpu_err(g, "fail to read msg: err %d", err); + *status = err; + return false; + } + if (bytes_read != bytes_to_read) { + nvgpu_err(g, "fail to read requested bytes: 0x%x != 0x%x", + bytes_to_read, bytes_read); + *status = -EINVAL; + return false; + } + + return true; +} + +int nvgpu_sec2_queue_rewind(struct nvgpu_falcon *flcn, + struct nvgpu_engine_mem_queue **queues, + u32 queue_id) +{ + struct nvgpu_engine_mem_queue *queue = queues[queue_id]; + + return nvgpu_engine_mem_queue_rewind(flcn, queue); +} diff --git a/drivers/gpu/nvgpu/common/sec2/ipc/sec2_seq.c b/drivers/gpu/nvgpu/common/sec2/ipc/sec2_seq.c new file mode 100644 index 000000000..8b425b385 --- /dev/null +++ b/drivers/gpu/nvgpu/common/sec2/ipc/sec2_seq.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +int nvgpu_sec2_sequences_alloc(struct gk20a *g, + struct sec2_sequences *sequences) +{ + sequences->seq = nvgpu_kzalloc(g, SEC2_MAX_NUM_SEQUENCES * + sizeof(struct sec2_sequence)); + if (sequences->seq == NULL) { + return -ENOMEM; + } + + nvgpu_mutex_init(&sequences->sec2_seq_lock); + + return 0; +} + +void nvgpu_sec2_sequences_init(struct gk20a *g, + struct sec2_sequences *sequences) +{ + u32 i = 0; + + nvgpu_log_fn(g, " "); + + (void) memset(sequences->seq, 0, + sizeof(struct sec2_sequence) * SEC2_MAX_NUM_SEQUENCES); + + (void) memset(sequences->sec2_seq_tbl, 0, + sizeof(sequences->sec2_seq_tbl)); + + for (i = 0; i < SEC2_MAX_NUM_SEQUENCES; i++) { + sequences->seq[i].id = (u8)i; + } +} + +void nvgpu_sec2_sequences_free(struct gk20a *g, + struct sec2_sequences *sequences) +{ + nvgpu_mutex_destroy(&sequences->sec2_seq_lock); + nvgpu_kfree(g, sequences->seq); +} + +int nvgpu_sec2_seq_acquire(struct gk20a *g, + struct sec2_sequences *sequences, + struct sec2_sequence **pseq, + sec2_callback callback, void *cb_params) +{ + struct sec2_sequence *seq; + u32 index = 0; + int err = 0; + + nvgpu_mutex_acquire(&sequences->sec2_seq_lock); + + index = find_first_zero_bit(sequences->sec2_seq_tbl, + sizeof(sequences->sec2_seq_tbl)); + + if (index >= sizeof(sequences->sec2_seq_tbl)) { + nvgpu_err(g, "no free sequence available"); + nvgpu_mutex_release(&sequences->sec2_seq_lock); + err = -EAGAIN; + goto exit; + } + + nvgpu_assert(index < U64(INT_MAX)); + nvgpu_set_bit(index, sequences->sec2_seq_tbl); + + nvgpu_mutex_release(&sequences->sec2_seq_lock); + + seq = &sequences->seq[index]; + + seq->state = SEC2_SEQ_STATE_PENDING; + seq->callback = callback; + seq->cb_params = cb_params; + seq->out_payload = NULL; + + *pseq = seq; + +exit: + return err; +} + +static void sec2_seq_release(struct sec2_sequences *sequences, + struct sec2_sequence *seq) +{ + seq->state = SEC2_SEQ_STATE_FREE; + seq->callback = NULL; + seq->cb_params = NULL; + seq->out_payload = NULL; + + nvgpu_mutex_acquire(&sequences->sec2_seq_lock); + nvgpu_clear_bit(seq->id, sequences->sec2_seq_tbl); + nvgpu_mutex_release(&sequences->sec2_seq_lock); +} + +int nvgpu_sec2_seq_response_handle(struct gk20a *g, + struct sec2_sequences *sequences, + struct nv_flcn_msg_sec2 *msg, u32 seq_id) +{ + struct sec2_sequence *seq; + + /* get the sequence info data associated with this message */ + seq = &sequences->seq[seq_id]; + + + if (seq->state != SEC2_SEQ_STATE_USED) { + nvgpu_err(g, "msg for an unknown sequence %d", seq->id); + return -EINVAL; + } + + if (seq->callback != NULL) { + seq->callback(g, msg, seq->cb_params, 0); + } + + /* release the sequence so that it may be used for other commands */ + sec2_seq_release(sequences, seq); + + return 0; +} + +u8 nvgpu_sec2_seq_get_id(struct sec2_sequence *seq) +{ + return seq->id; +} + +void nvgpu_sec2_seq_set_state(struct sec2_sequence *seq, + enum sec2_seq_state state) +{ + seq->state = state; +} diff --git a/drivers/gpu/nvgpu/common/sec2/sec2.c b/drivers/gpu/nvgpu/common/sec2/sec2.c new file mode 100644 index 000000000..78d5058fe --- /dev/null +++ b/drivers/gpu/nvgpu/common/sec2/sec2.c @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +static void nvgpu_remove_sec2_support(struct nvgpu_sec2 *sec2) +{ + struct gk20a *g = sec2->g; + + nvgpu_log_fn(g, " "); + + nvgpu_sec2_sequences_free(g, &sec2->sequences); + nvgpu_mutex_destroy(&sec2->isr_mutex); +} + +int nvgpu_init_sec2_setup_sw(struct gk20a *g) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + g->sec2.g = g; + + err = nvgpu_sec2_sequences_alloc(g, &g->sec2.sequences); + if (err != 0) { + return err; + } + + nvgpu_sec2_sequences_init(g, &g->sec2.sequences); + + nvgpu_mutex_init(&g->sec2.isr_mutex); + + g->sec2.remove_support = nvgpu_remove_sec2_support; + + return err; +} + +int nvgpu_init_sec2_support(struct gk20a *g) +{ + struct nvgpu_sec2 *sec2 = &g->sec2; + int err = 0; + + nvgpu_log_fn(g, " "); + + /* Enable irq*/ + nvgpu_mutex_acquire(&sec2->isr_mutex); + g->ops.sec2.enable_irq(sec2, true); + sec2->isr_enabled = true; + nvgpu_mutex_release(&sec2->isr_mutex); + + /* execute SEC2 in secure mode to boot RTOS */ + g->ops.sec2.secured_sec2_start(g); + + return err; +} + +int nvgpu_sec2_destroy(struct gk20a *g) +{ + struct nvgpu_sec2 *sec2 = &g->sec2; + + nvgpu_log_fn(g, " "); + + nvgpu_sec2_dmem_allocator_destroy(&sec2->dmem); + + nvgpu_mutex_acquire(&sec2->isr_mutex); + sec2->isr_enabled = false; + nvgpu_mutex_release(&sec2->isr_mutex); + + nvgpu_sec2_queues_free(g, sec2->queues); + + sec2->sec2_ready = false; + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/sec2/sec2_allocator.c b/drivers/gpu/nvgpu/common/sec2/sec2_allocator.c new file mode 100644 index 000000000..92c5f93b2 --- /dev/null +++ b/drivers/gpu/nvgpu/common/sec2/sec2_allocator.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +int nvgpu_sec2_dmem_allocator_init(struct gk20a *g, + struct nvgpu_allocator *dmem, + struct sec2_init_msg_sec2_init *sec2_init) +{ + int err = 0; + if (!nvgpu_alloc_initialized(dmem)) { + /* Align start and end addresses */ + u32 start = NVGPU_ALIGN(sec2_init->nv_managed_area_offset, + PMU_DMEM_ALLOC_ALIGNMENT); + + u32 end = (sec2_init->nv_managed_area_offset + + sec2_init->nv_managed_area_size) & + ~(PMU_DMEM_ALLOC_ALIGNMENT - 1U); + u32 size = end - start; + + err = nvgpu_allocator_init(g, dmem, NULL, "sec2_dmem", start, + size, PMU_DMEM_ALLOC_ALIGNMENT, 0ULL, 0ULL, + BITMAP_ALLOCATOR); + if (err != 0) { + nvgpu_err(g, "Couldn't init sec2_dmem allocator\n"); + } + } + return err; +} + +void nvgpu_sec2_dmem_allocator_destroy(struct nvgpu_allocator *dmem) +{ + if (nvgpu_alloc_initialized(dmem)) { + nvgpu_alloc_destroy(dmem); + } +} diff --git a/drivers/gpu/nvgpu/common/sec2/sec2_lsfm.c b/drivers/gpu/nvgpu/common/sec2/sec2_lsfm.c new file mode 100644 index 000000000..5f1e693f0 --- /dev/null +++ b/drivers/gpu/nvgpu/common/sec2/sec2_lsfm.c @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Add code below to handle SEC2 RTOS commands */ +/* LSF's bootstrap command */ +static void sec2_handle_lsfm_boot_acr_msg(struct gk20a *g, + struct nv_flcn_msg_sec2 *msg, + void *param, u32 status) +{ + bool *command_ack = param; + + nvgpu_log_fn(g, " "); + + nvgpu_sec2_dbg(g, "reply NV_SEC2_ACR_CMD_ID_BOOTSTRAP_FALCON"); + + nvgpu_sec2_dbg(g, "flcn %d: error code = %x", + msg->msg.acr.msg_flcn.falcon_id, + msg->msg.acr.msg_flcn.error_code); + + *command_ack = true; +} + +static u32 get_gpc_falcon_idx_mask(struct gk20a *g) +{ + u32 gpc_falcon_idx_mask = 0U; + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + gpc_falcon_idx_mask = nvgpu_grmgr_get_gr_logical_gpc_mask(g, + nvgpu_gr_get_cur_instance_id(g)); + } else { + u32 gpc_fs_mask; + struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g); + + gpc_fs_mask = nvgpu_gr_config_get_gpc_mask(gr_config); + gpc_falcon_idx_mask = + nvgpu_safe_sub_u32( + (1U << U32(hweight32(gpc_fs_mask))), 1U); + } + + return gpc_falcon_idx_mask; +} + +static void sec2_load_ls_falcons(struct gk20a *g, struct nvgpu_sec2 *sec2, + u32 falcon_id, u32 flags) +{ + struct nv_flcn_cmd_sec2 cmd; + bool command_ack; + int err = 0; + size_t tmp_size; + + nvgpu_log_fn(g, " "); + + /* send message to load falcon */ + (void) memset(&cmd, 0, sizeof(struct nv_flcn_cmd_sec2)); + cmd.hdr.unit_id = NV_SEC2_UNIT_ACR; + tmp_size = PMU_CMD_HDR_SIZE + + sizeof(struct nv_sec2_acr_cmd_bootstrap_falcon); + nvgpu_assert(tmp_size <= U64(U8_MAX)); + cmd.hdr.size = U8(tmp_size); + + cmd.cmd.acr.bootstrap_falcon.cmd_type = + NV_SEC2_ACR_CMD_ID_BOOTSTRAP_FALCON; + cmd.cmd.acr.bootstrap_falcon.flags = flags; + cmd.cmd.acr.bootstrap_falcon.falcon_id = falcon_id; + cmd.cmd.acr.bootstrap_falcon.falcon_instance = + nvgpu_grmgr_get_gr_syspipe_id(g, + nvgpu_gr_get_cur_instance_id(g)); + cmd.cmd.acr.bootstrap_falcon.falcon_index_mask = + LSF_FALCON_INDEX_MASK_DEFAULT; + + if (falcon_id == FALCON_ID_GPCCS) { + cmd.cmd.acr.bootstrap_falcon.falcon_index_mask = + get_gpc_falcon_idx_mask(g); + } + + nvgpu_sec2_dbg(g, "NV_SEC2_ACR_CMD_ID_BOOTSTRAP_FALCON : %d " + "falcon_instance : %u falcon_index_mask : %x", + falcon_id, + cmd.cmd.acr.bootstrap_falcon.falcon_instance, + cmd.cmd.acr.bootstrap_falcon.falcon_index_mask); + + command_ack = false; + err = nvgpu_sec2_cmd_post(g, &cmd, PMU_COMMAND_QUEUE_HPQ, + sec2_handle_lsfm_boot_acr_msg, &command_ack, U32_MAX); + if (err != 0) { + nvgpu_err(g, "command post failed"); + } + + err = nvgpu_sec2_wait_message_cond(sec2, nvgpu_get_poll_timeout(g), + &command_ack, U8(true)); + if (err != 0) { + nvgpu_err(g, "command ack receive failed"); + } +} + +int nvgpu_sec2_bootstrap_ls_falcons(struct gk20a *g, struct nvgpu_sec2 *sec2, + u32 falcon_id) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + nvgpu_sec2_dbg(g, "Check SEC2 RTOS is ready else wait"); + err = nvgpu_sec2_wait_message_cond(&g->sec2, nvgpu_get_poll_timeout(g), + &g->sec2.sec2_ready, U8(true)); + if (err != 0) { + nvgpu_err(g, "SEC2 RTOS not ready yet, failed to bootstrap flcn %d", + falcon_id); + goto exit; + } + + nvgpu_sec2_dbg(g, "LS flcn %d bootstrap, blocked call", falcon_id); + sec2_load_ls_falcons(g, sec2, falcon_id, + NV_SEC2_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES); + +exit: + nvgpu_sec2_dbg(g, "Done, err-%x", err); + return err; +} diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore.c b/drivers/gpu/nvgpu/common/semaphore/semaphore.c new file mode 100644 index 000000000..e0b349c5a --- /dev/null +++ b/drivers/gpu/nvgpu/common/semaphore/semaphore.c @@ -0,0 +1,175 @@ +/* + * Nvgpu Semaphores + * + * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "semaphore_priv.h" + + +/* + * Allocate a semaphore value object from an underlying hw counter. + * + * Since semaphores are ref-counted there's no explicit free for external code + * to use. When the ref-count hits 0 the internal free will happen. + */ +struct nvgpu_semaphore *nvgpu_semaphore_alloc( + struct nvgpu_hw_semaphore *hw_sema) +{ + struct nvgpu_semaphore_pool *pool = hw_sema->location.pool; + struct gk20a *g = pool->sema_sea->gk20a; + struct nvgpu_semaphore *s; + + s = nvgpu_kzalloc(g, sizeof(*s)); + if (s == NULL) { + return NULL; + } + + nvgpu_ref_init(&s->ref); + s->g = g; + s->location = hw_sema->location; + nvgpu_atomic_set(&s->value, 0); + + /* + * Take a ref on the pool so that we can keep this pool alive for + * as long as this semaphore is alive. + */ + nvgpu_semaphore_pool_get(pool); + + gpu_sema_dbg(g, "Allocated semaphore (c=%d)", hw_sema->chid); + + return s; +} + +static struct nvgpu_semaphore *nvgpu_semaphore_from_ref(struct nvgpu_ref *ref) +{ + return (struct nvgpu_semaphore *) + ((uintptr_t)ref - offsetof(struct nvgpu_semaphore, ref)); +} + +static void nvgpu_semaphore_free(struct nvgpu_ref *ref) +{ + struct nvgpu_semaphore *s = nvgpu_semaphore_from_ref(ref); + + nvgpu_semaphore_pool_put(s->location.pool); + + nvgpu_kfree(s->g, s); +} + +void nvgpu_semaphore_put(struct nvgpu_semaphore *s) +{ + nvgpu_ref_put(&s->ref, nvgpu_semaphore_free); +} + +void nvgpu_semaphore_get(struct nvgpu_semaphore *s) +{ + nvgpu_ref_get(&s->ref); +} + +/* + * Return the address of a specific semaphore. + * + * Don't call this on a semaphore you don't own - the VA returned will make no + * sense in your specific channel's VM. + */ +u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s) +{ + return nvgpu_semaphore_pool_gpu_va(s->location.pool, false) + + s->location.offset; +} + +/* + * Get the global RO address for the semaphore. Can be called on any semaphore + * regardless of whether you own it. + */ +u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s) +{ + return nvgpu_semaphore_pool_gpu_va(s->location.pool, true) + + s->location.offset; +} + +/* + * Read the underlying value from a semaphore. + */ +u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) +{ + return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem, + s->location.offset); +} + +u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) +{ + return (u32)nvgpu_atomic_read(&s->value); +} + +bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) +{ + u32 sema_val = nvgpu_semaphore_read(s); + u32 wait_payload = nvgpu_semaphore_get_value(s); + + return nvgpu_semaphore_value_released(wait_payload, sema_val); +} + +bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) +{ + return !nvgpu_semaphore_is_released(s); +} + +bool nvgpu_semaphore_can_wait(struct nvgpu_semaphore *s) +{ + return s->ready_to_wait; +} + +/* + * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold + * value to "s" which represents the increment that the caller must write in a + * pushbuf. The same nvgpu_semaphore will also represent an output fence; when + * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment. + */ +void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s, + struct nvgpu_hw_semaphore *hw_sema) +{ + /* One submission increments the next value by one. */ + int next = nvgpu_hw_semaphore_read_next(hw_sema) + 1; + + /* "s" should be an uninitialized sema. */ + WARN_ON(s->ready_to_wait); + + nvgpu_atomic_set(&s->value, next); + s->ready_to_wait = true; + + gpu_sema_verbose_dbg(s->g, "PREP sema for c=%d (%u)", + hw_sema->chid, next); +} + +u64 nvgpu_semaphore_get_hw_pool_page_idx(struct nvgpu_semaphore *s) +{ + return nvgpu_semaphore_pool_get_page_idx(s->location.pool); +} + diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c b/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c new file mode 100644 index 000000000..23eeb3adb --- /dev/null +++ b/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "semaphore_priv.h" + +int nvgpu_hw_semaphore_init(struct vm_gk20a *vm, u32 chid, + struct nvgpu_hw_semaphore **new_sema) +{ + struct nvgpu_semaphore_pool *p = vm->sema_pool; + struct nvgpu_hw_semaphore *hw_sema; + struct gk20a *g = vm->mm->g; + int current_value; + int hw_sema_idx; + int ret = 0; + + nvgpu_assert(p != NULL); + + nvgpu_mutex_acquire(&p->pool_lock); + + /* Find an available HW semaphore. */ + hw_sema_idx = semaphore_bitmap_alloc(p->semas_alloced, + NVGPU_CPU_PAGE_SIZE / SEMAPHORE_SIZE); + if (hw_sema_idx < 0) { + ret = hw_sema_idx; + goto fail; + } + + hw_sema = nvgpu_kzalloc(g, sizeof(struct nvgpu_hw_semaphore)); + if (hw_sema == NULL) { + ret = -ENOMEM; + goto fail_free_idx; + } + + hw_sema->chid = chid; + hw_sema->location.pool = p; + hw_sema->location.offset = SEMAPHORE_SIZE * (u32)hw_sema_idx; + current_value = (int)nvgpu_mem_rd(g, &p->rw_mem, + hw_sema->location.offset); + nvgpu_atomic_set(&hw_sema->next_value, current_value); + + nvgpu_mutex_release(&p->pool_lock); + + *new_sema = hw_sema; + return 0; + +fail_free_idx: + nvgpu_clear_bit((u32)hw_sema_idx, p->semas_alloced); +fail: + nvgpu_mutex_release(&p->pool_lock); + return ret; +} + +/* + * Free the channel used semaphore index + */ +void nvgpu_hw_semaphore_free(struct nvgpu_hw_semaphore *hw_sema) +{ + struct nvgpu_semaphore_pool *p = hw_sema->location.pool; + int idx = (int)(hw_sema->location.offset / SEMAPHORE_SIZE); + struct gk20a *g = p->sema_sea->gk20a; + + nvgpu_assert(p != NULL); + + nvgpu_mutex_acquire(&p->pool_lock); + + nvgpu_clear_bit((u32)idx, p->semas_alloced); + + nvgpu_kfree(g, hw_sema); + + nvgpu_mutex_release(&p->pool_lock); +} + +u64 nvgpu_hw_semaphore_addr(struct nvgpu_hw_semaphore *hw_sema) +{ + return nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) + + hw_sema->location.offset; +} + +u32 nvgpu_hw_semaphore_read(struct nvgpu_hw_semaphore *hw_sema) +{ + struct nvgpu_semaphore_pool *pool = hw_sema->location.pool; + struct gk20a *g = pool->sema_sea->gk20a; + + return nvgpu_mem_rd(g, &pool->rw_mem, hw_sema->location.offset); +} + +/* + * Fast-forward the hw sema to its tracked max value. + * + * Return true if the sema wasn't at the max value and needed updating, false + * otherwise. + */ +bool nvgpu_hw_semaphore_reset(struct nvgpu_hw_semaphore *hw_sema) +{ + struct nvgpu_semaphore_pool *pool = hw_sema->location.pool; + struct gk20a *g = pool->sema_sea->gk20a; + u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value); + u32 current_val = nvgpu_hw_semaphore_read(hw_sema); + + /* + * If the semaphore has already reached the value we would write then + * this is really just a NO-OP. However, the sema value shouldn't be + * more than what we expect to be the max. + */ + + bool is_released = nvgpu_semaphore_value_released(threshold + 1U, + current_val); + + nvgpu_assert(!is_released); + + if (is_released) { + return false; + } + + if (current_val == threshold) { + return false; + } + + nvgpu_mem_wr(g, &pool->rw_mem, hw_sema->location.offset, threshold); + + gpu_sema_verbose_dbg(g, "(c=%d) RESET %u -> %u", + hw_sema->chid, current_val, threshold); + + return true; +} + +int nvgpu_hw_semaphore_read_next(struct nvgpu_hw_semaphore *hw_sema) +{ + return nvgpu_atomic_read(&hw_sema->next_value); +} + +int nvgpu_hw_semaphore_update_next(struct nvgpu_hw_semaphore *hw_sema) +{ + int next = nvgpu_atomic_add_return(1, &hw_sema->next_value); + struct nvgpu_semaphore_pool *p = hw_sema->location.pool; + struct gk20a *g = p->sema_sea->gk20a; + + gpu_sema_verbose_dbg(g, "INCR sema for c=%d (%u)", + hw_sema->chid, next); + return next; +} diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore_pool.c b/drivers/gpu/nvgpu/common/semaphore/semaphore_pool.c new file mode 100644 index 000000000..cd2a6a4f2 --- /dev/null +++ b/drivers/gpu/nvgpu/common/semaphore/semaphore_pool.c @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "semaphore_priv.h" + +#define pool_to_gk20a(p) ((p)->sema_sea->gk20a) + +/* + * Allocate a pool from the sea. + */ +int nvgpu_semaphore_pool_alloc(struct nvgpu_semaphore_sea *sea, + struct nvgpu_semaphore_pool **pool) +{ + struct nvgpu_semaphore_pool *p; + unsigned long page_idx; + int ret; + + p = nvgpu_kzalloc(sea->gk20a, sizeof(*p)); + if (p == NULL) { + return -ENOMEM; + } + + nvgpu_semaphore_sea_lock(sea); + + nvgpu_mutex_init(&p->pool_lock); + + ret = semaphore_bitmap_alloc(sea->pools_alloced, + SEMAPHORE_POOL_COUNT); + if (ret < 0) { + goto fail; + } + + page_idx = (unsigned long)ret; + + p->page_idx = page_idx; + p->sema_sea = sea; + nvgpu_init_list_node(&p->pool_list_entry); + nvgpu_ref_init(&p->ref); + + sea->page_count++; + nvgpu_list_add(&p->pool_list_entry, &sea->pool_list); + nvgpu_semaphore_sea_unlock(sea); + + gpu_sema_dbg(sea->gk20a, + "Allocated semaphore pool: page-idx=%llu", p->page_idx); + + *pool = p; + return 0; + +fail: + nvgpu_mutex_destroy(&p->pool_lock); + nvgpu_semaphore_sea_unlock(sea); + nvgpu_kfree(sea->gk20a, p); + gpu_sema_dbg(sea->gk20a, "Failed to allocate semaphore pool!"); + return ret; +} + +/* + * Map a pool into the passed vm's address space. This handles both the fixed + * global RO mapping and the non-fixed private RW mapping. + */ +int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p, + struct vm_gk20a *vm) +{ + int err = 0; + u64 addr; + + if (p->mapped) { + return -EBUSY; + } + + gpu_sema_dbg(pool_to_gk20a(p), + "Mapping semaphore pool! (idx=%llu)", p->page_idx); + + /* + * Take the sea lock so that we don't race with a possible change to the + * nvgpu_mem in the sema sea. + */ + nvgpu_semaphore_sea_lock(p->sema_sea); + + addr = nvgpu_gmmu_map_fixed(vm, &p->sema_sea->sea_mem, + p->sema_sea->gpu_va, + p->sema_sea->map_size, + 0, gk20a_mem_flag_read_only, 0, + p->sema_sea->sea_mem.aperture); + if (addr == 0ULL) { + err = -ENOMEM; + goto fail_unlock; + } + + p->gpu_va_ro = addr; + p->mapped = true; + + gpu_sema_dbg(pool_to_gk20a(p), + " %llu: GPU read-only VA = 0x%llx", + p->page_idx, p->gpu_va_ro); + + /* + * Now the RW mapping. This is a bit more complicated. We make a + * nvgpu_mem describing a page of the bigger RO space and then map + * that. Unlike above this does not need to be a fixed address. + */ + err = nvgpu_mem_create_from_mem(vm->mm->g, + &p->rw_mem, &p->sema_sea->sea_mem, + p->page_idx, 1UL); + if (err != 0) { + goto fail_unmap; + } + + addr = nvgpu_gmmu_map(vm, &p->rw_mem, SZ_4K, 0, + gk20a_mem_flag_none, 0, + p->rw_mem.aperture); + + if (addr == 0ULL) { + err = -ENOMEM; + goto fail_free_submem; + } + + p->gpu_va = addr; + + nvgpu_semaphore_sea_unlock(p->sema_sea); + + gpu_sema_dbg(pool_to_gk20a(p), + " %llu: GPU read-write VA = 0x%llx", + p->page_idx, p->gpu_va); + gpu_sema_dbg(pool_to_gk20a(p), + " %llu: CPU VA = 0x%p", + p->page_idx, p->rw_mem.cpu_va); + + return 0; + +fail_free_submem: + nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem); +fail_unmap: + nvgpu_gmmu_unmap(vm, &p->sema_sea->sea_mem, p->gpu_va_ro); + gpu_sema_dbg(pool_to_gk20a(p), + " %llu: Failed to map semaphore pool!", p->page_idx); +fail_unlock: + nvgpu_semaphore_sea_unlock(p->sema_sea); + return err; +} + +/* + * Unmap a semaphore_pool. + */ +void nvgpu_semaphore_pool_unmap(struct nvgpu_semaphore_pool *p, + struct vm_gk20a *vm) +{ + nvgpu_semaphore_sea_lock(p->sema_sea); + + nvgpu_gmmu_unmap(vm, &p->sema_sea->sea_mem, p->gpu_va_ro); + nvgpu_gmmu_unmap(vm, &p->rw_mem, p->gpu_va); + nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem); + + p->gpu_va = 0; + p->gpu_va_ro = 0; + p->mapped = false; + + nvgpu_semaphore_sea_unlock(p->sema_sea); + + gpu_sema_dbg(pool_to_gk20a(p), + "Unmapped semaphore pool! (idx=%llu)", p->page_idx); +} + +static struct nvgpu_semaphore_pool * +nvgpu_semaphore_pool_from_ref(struct nvgpu_ref *ref) +{ + return (struct nvgpu_semaphore_pool *) + ((uintptr_t)ref - offsetof(struct nvgpu_semaphore_pool, ref)); +} + +/* + * Completely free a semaphore_pool. You should make sure this pool is not + * mapped otherwise there's going to be a memory leak. + */ +static void nvgpu_semaphore_pool_free(struct nvgpu_ref *ref) +{ + struct nvgpu_semaphore_pool *p = nvgpu_semaphore_pool_from_ref(ref); + struct nvgpu_semaphore_sea *s = p->sema_sea; + + /* Freeing a mapped pool is a bad idea. */ + WARN_ON((p->mapped) || + (p->gpu_va != 0ULL) || + (p->gpu_va_ro != 0ULL)); + + nvgpu_semaphore_sea_lock(s); + nvgpu_list_del(&p->pool_list_entry); + nvgpu_clear_bit((u32)p->page_idx, s->pools_alloced); + s->page_count--; + nvgpu_semaphore_sea_unlock(s); + + nvgpu_mutex_destroy(&p->pool_lock); + + gpu_sema_dbg(pool_to_gk20a(p), + "Freed semaphore pool! (idx=%llu)", p->page_idx); + nvgpu_kfree(p->sema_sea->gk20a, p); +} + +void nvgpu_semaphore_pool_get(struct nvgpu_semaphore_pool *p) +{ + nvgpu_ref_get(&p->ref); +} + +void nvgpu_semaphore_pool_put(struct nvgpu_semaphore_pool *p) +{ + nvgpu_ref_put(&p->ref, nvgpu_semaphore_pool_free); +} + +/* + * Get the address for a semaphore_pool - if global is true then return the + * global RO address instead of the RW address owned by the semaphore's VM. + */ +u64 nvgpu_semaphore_pool_gpu_va(struct nvgpu_semaphore_pool *p, bool global) +{ + if (!global) { + return p->gpu_va; + } + + return p->gpu_va_ro + (NVGPU_CPU_PAGE_SIZE * p->page_idx); +} + +/* + * Return the index into the sea bitmap + */ +u64 nvgpu_semaphore_pool_get_page_idx(struct nvgpu_semaphore_pool *p) +{ + return p->page_idx; +} diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore_priv.h b/drivers/gpu/nvgpu/common/semaphore/semaphore_priv.h new file mode 100644 index 000000000..0ff3d8b75 --- /dev/null +++ b/drivers/gpu/nvgpu/common/semaphore/semaphore_priv.h @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#ifndef NVGPU_SEMAPHORE_PRIV_H +#define NVGPU_SEMAPHORE_PRIV_H + +#include +#include +#include +#include +#include +#include +#include + +struct gk20a; + +/* + * The number of channels to get a sema from a VM's pool is determined by the + * pool size (one page) divided by this sema size. + */ +#define SEMAPHORE_SIZE 16U +/* + * Max number of VMs that can be used is 512. This of course needs to be fixed + * to be dynamic but still fast. + */ +#define SEMAPHORE_POOL_COUNT 512U + +/* + * A sea of semaphores pools. Each pool is owned by a single VM. Since multiple + * channels can share a VM each channel gets it's own HW semaphore from the + * pool. Channels then allocate regular semaphores - basically just a value that + * signifies when a particular job is done. + */ +struct nvgpu_semaphore_sea { + struct nvgpu_list_node pool_list; /* List of pools in this sea. */ + struct gk20a *gk20a; + + size_t size; /* Number of pages available. */ + u64 gpu_va; /* GPU virtual address of sema sea. */ + u64 map_size; /* Size of the mapping. */ + + /* + * TODO: + * List of pages that we use to back the pools. The number of pages + * should grow dynamically since allocating 512 pages for all VMs at + * once would be a tremendous waste. + */ + int page_count; /* Pages allocated to pools. */ + + /* + * The read-only memory for the entire semaphore sea. Each semaphore + * pool needs a sub-nvgpu_mem that will be mapped as RW in its address + * space. This sea_mem cannot be freed until all semaphore_pools have + * been freed. + */ + struct nvgpu_mem sea_mem; + + /* + * Can't use a regular allocator here since the full range of pools are + * not always allocated. Instead just use a bitmap. + */ + DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT); + + struct nvgpu_mutex sea_lock; /* Lock alloc/free calls. */ +}; + +/* + * A semaphore pool. Each address space will own exactly one of these. + */ +struct nvgpu_semaphore_pool { + struct nvgpu_list_node pool_list_entry; /* Node for list of pools. */ + u64 gpu_va; /* GPU access to the pool. */ + u64 gpu_va_ro; /* GPU access to the pool. */ + u64 page_idx; /* Index into sea bitmap. */ + + DECLARE_BITMAP(semas_alloced, NVGPU_CPU_PAGE_SIZE / SEMAPHORE_SIZE); + + struct nvgpu_semaphore_sea *sema_sea; /* Sea that owns this pool. */ + + struct nvgpu_mutex pool_lock; + + /* + * This is the address spaces's personal RW table. Other channels will + * ultimately map this page as RO. This is a sub-nvgpu_mem from the + * sea's mem. + */ + struct nvgpu_mem rw_mem; + + bool mapped; + + /* + * Sometimes a channel and its VM can be released before other channels + * are done waiting on it. This ref count ensures that the pool doesn't + * go away until all semaphores using this pool are cleaned up first. + */ + struct nvgpu_ref ref; +}; + +struct nvgpu_semaphore_loc { + struct nvgpu_semaphore_pool *pool; /* Pool that owns this sema. */ + u32 offset; /* Byte offset into the pool. */ +}; + +/* + * Underlying semaphore data structure. This semaphore can be shared amongst + * instances of nvgpu_semaphore via the location in its pool. + */ +struct nvgpu_hw_semaphore { + struct nvgpu_semaphore_loc location; + nvgpu_atomic_t next_value; /* Next available value. */ + u32 chid; /* Owner, for debugging */ +}; + +/* + * A semaphore which the rest of the driver actually uses. This consists of a + * reference to a real semaphore location and a value to wait for. This allows + * one physical semaphore to be shared among an essentially infinite number of + * submits. + */ +struct nvgpu_semaphore { + struct gk20a *g; + struct nvgpu_semaphore_loc location; + + nvgpu_atomic_t value; + bool ready_to_wait; + + struct nvgpu_ref ref; +}; + + +static inline int semaphore_bitmap_alloc(unsigned long *bitmap, + unsigned long len) +{ + unsigned long idx = find_first_zero_bit(bitmap, len); + + if (idx == len) { + return -ENOSPC; + } + + nvgpu_set_bit((u32)idx, bitmap); + + return (int)idx; +} + +/* + * Check if "racer" is over "goal" with wraparound handling. + */ +static inline bool nvgpu_semaphore_value_released(u32 goal, u32 racer) +{ + /* + * Handle wraparound with the same heuristic as the hardware does: + * although the integer will eventually wrap around, consider a sema + * released against a threshold if its value has passed that threshold + * but has not wrapped over half of the u32 range over that threshold; + * such wrapping is unlikely to happen during a sema lifetime. + * + * Values for [goal, goal + 0x7fffffff] are considered signaled; that's + * precisely half of the 32-bit space. If racer == goal + 0x80000000, + * then it needs 0x80000000 increments to wrap again and signal. + * + * Unsigned arithmetic is used because it's well-defined. This is + * effectively the same as: signed_racer - signed_goal > 0. + */ + + return racer - goal < 0x80000000U; +} + +#endif /* NVGPU_SEMAPHORE_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore_sea.c b/drivers/gpu/nvgpu/common/semaphore/semaphore_sea.c new file mode 100644 index 000000000..a185ab880 --- /dev/null +++ b/drivers/gpu/nvgpu/common/semaphore/semaphore_sea.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "semaphore_priv.h" + +void nvgpu_semaphore_sea_lock(struct nvgpu_semaphore_sea *s) +{ + gpu_sema_verbose_dbg(s->gk20a, "Acquiring sema lock..."); + nvgpu_mutex_acquire(&s->sea_lock); + gpu_sema_verbose_dbg(s->gk20a, "Sema lock aquried!"); +} + +void nvgpu_semaphore_sea_unlock(struct nvgpu_semaphore_sea *s) +{ + nvgpu_mutex_release(&s->sea_lock); + gpu_sema_verbose_dbg(s->gk20a, "Released sema lock"); +} + +static int semaphore_sea_grow(struct nvgpu_semaphore_sea *sea) +{ + int ret = 0; + struct gk20a *g = sea->gk20a; + u32 i; + + nvgpu_semaphore_sea_lock(sea); + + ret = nvgpu_dma_alloc_sys(g, + NVGPU_CPU_PAGE_SIZE * SEMAPHORE_POOL_COUNT, + &sea->sea_mem); + if (ret != 0) { + goto out; + } + + sea->size = SEMAPHORE_POOL_COUNT; + sea->map_size = SEMAPHORE_POOL_COUNT * NVGPU_CPU_PAGE_SIZE; + + /* + * Start the semaphores at values that will soon overflow the 32-bit + * integer range. This way any buggy comparisons would start to fail + * sooner rather than later. + */ + for (i = 0U; i < NVGPU_CPU_PAGE_SIZE * SEMAPHORE_POOL_COUNT; i += 4U) { + nvgpu_mem_wr(g, &sea->sea_mem, i, 0xfffffff0U); + } + +out: + nvgpu_semaphore_sea_unlock(sea); + return ret; +} + + +/* + * Return the sema_sea pointer. + */ +struct nvgpu_semaphore_sea *nvgpu_semaphore_get_sea(struct gk20a *g) +{ + return g->sema_sea; +} + +void nvgpu_semaphore_sea_allocate_gpu_va(struct nvgpu_semaphore_sea *s, + struct nvgpu_allocator *a, u64 base, u64 len, u32 page_size) +{ + s->gpu_va = nvgpu_alloc_fixed(a, base, len, page_size); +} + +u64 nvgpu_semaphore_sea_get_gpu_va(struct nvgpu_semaphore_sea *s) +{ + return s->gpu_va; +} + +/* + * Create the semaphore sea. Only create it once - subsequent calls to this will + * return the originally created sea pointer. + */ +struct nvgpu_semaphore_sea *nvgpu_semaphore_sea_create(struct gk20a *g) +{ + if (g->sema_sea != NULL) { + return g->sema_sea; + } + + g->sema_sea = nvgpu_kzalloc(g, sizeof(*g->sema_sea)); + if (g->sema_sea == NULL) { + return NULL; + } + + g->sema_sea->size = 0; + g->sema_sea->page_count = 0; + g->sema_sea->gk20a = g; + nvgpu_init_list_node(&g->sema_sea->pool_list); + nvgpu_mutex_init(&g->sema_sea->sea_lock); + + if (semaphore_sea_grow(g->sema_sea) != 0) { + goto cleanup; + } + + gpu_sema_dbg(g, "Created semaphore sea!"); + return g->sema_sea; + +cleanup: + nvgpu_mutex_destroy(&g->sema_sea->sea_lock); + nvgpu_kfree(g, g->sema_sea); + g->sema_sea = NULL; + gpu_sema_dbg(g, "Failed to creat semaphore sea!"); + return NULL; +} + +void nvgpu_semaphore_sea_destroy(struct gk20a *g) +{ + if (g->sema_sea == NULL) { + return; + } + + nvgpu_dma_free(g, &g->sema_sea->sea_mem); + nvgpu_mutex_destroy(&g->sema_sea->sea_lock); + nvgpu_kfree(g, g->sema_sea); + g->sema_sea = NULL; +} diff --git a/drivers/gpu/nvgpu/common/sim/sim.c b/drivers/gpu/nvgpu/common/sim/sim.c new file mode 100644 index 000000000..34e9b5d20 --- /dev/null +++ b/drivers/gpu/nvgpu/common/sim/sim.c @@ -0,0 +1,303 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void sim_writel(struct sim_nvgpu *sim, u32 r, u32 v) +{ + nvgpu_os_writel(v, sim->regs + r); +} + +u32 sim_readl(struct sim_nvgpu *sim, u32 r) +{ + return nvgpu_os_readl(sim->regs + r); +} + +int nvgpu_alloc_sim_buffer(struct gk20a *g, struct nvgpu_mem *mem) +{ + int err = 0; + + if (!nvgpu_mem_is_valid(mem)) { + err = nvgpu_dma_alloc_sys(g, NVGPU_CPU_PAGE_SIZE, mem); + } + + return err; +} + +void nvgpu_free_sim_buffer(struct gk20a *g, struct nvgpu_mem *mem) +{ + if (nvgpu_mem_is_valid(mem)) { + nvgpu_dma_free(g, mem); + } + + (void) memset(mem, 0, sizeof(*mem)); +} + +void nvgpu_free_sim_support(struct gk20a *g) +{ + nvgpu_free_sim_buffer(g, &g->sim->send_bfr); + nvgpu_free_sim_buffer(g, &g->sim->recv_bfr); + nvgpu_free_sim_buffer(g, &g->sim->msg_bfr); +} + +void nvgpu_remove_sim_support(struct gk20a *g) +{ + if (g->sim) { + nvgpu_free_sim_support(g); + } +} + +void sim_write_hdr(struct gk20a *g, u32 func, u32 size) +{ + /*memset(g->sim->msg_bfr.kvaddr,0,min(NVGPU_CPU_PAGE_SIZE,size));*/ + *sim_msg_hdr(g, sim_msg_signature_r()) = sim_msg_signature_valid_v(); + *sim_msg_hdr(g, sim_msg_result_r()) = sim_msg_result_rpc_pending_v(); + *sim_msg_hdr(g, sim_msg_spare_r()) = sim_msg_spare__init_v(); + *sim_msg_hdr(g, sim_msg_function_r()) = func; + *sim_msg_hdr(g, sim_msg_length_r()) = size + sim_msg_header_size(); +} + +static u32 *sim_send_ring_bfr(struct gk20a *g, u32 byte_offset) +{ + u8 *cpu_va; + + cpu_va = (u8 *)g->sim->send_bfr.cpu_va; + + return (u32 *)(cpu_va + byte_offset); +} + +static int rpc_send_message(struct gk20a *g) +{ + /* calculations done in units of u32s */ + u32 send_base = sim_send_put_pointer_v(g->sim->send_ring_put) * 2; + u32 dma_offset = send_base + sim_dma_r()/sizeof(u32); + u32 dma_hi_offset = send_base + sim_dma_hi_r()/sizeof(u32); + + *sim_send_ring_bfr(g, dma_offset*sizeof(u32)) = + sim_dma_target_phys_pci_coherent_f() | + sim_dma_status_valid_f() | + sim_dma_size_4kb_f() | + sim_dma_addr_lo_f(nvgpu_mem_get_addr(g, &g->sim->msg_bfr) + >> sim_dma_addr_lo_b()); + + *sim_send_ring_bfr(g, dma_hi_offset*sizeof(u32)) = + u64_hi32(nvgpu_mem_get_addr(g, &g->sim->msg_bfr)); + + *sim_msg_hdr(g, sim_msg_sequence_r()) = g->sim->sequence_base++; + + g->sim->send_ring_put = (g->sim->send_ring_put + 2 * sizeof(u32)) + % SIM_BFR_SIZE; + + /* Update the put pointer. This will trap into the host. */ + sim_writel(g->sim, sim_send_put_r(), g->sim->send_ring_put); + + return 0; +} + +static inline u32 *sim_recv_ring_bfr(struct gk20a *g, u32 byte_offset) +{ + u8 *cpu_va; + + cpu_va = (u8 *)g->sim->recv_bfr.cpu_va; + + return (u32 *)(cpu_va + byte_offset); +} + +static int rpc_recv_poll(struct gk20a *g) +{ + u64 recv_phys_addr; + + /* XXX This read is not required (?) */ + /*pVGpu->recv_ring_get = VGPU_REG_RD32(pGpu, NV_VGPU_RECV_GET);*/ + + /* Poll the recv ring get pointer in an infinite loop*/ + do { + g->sim->recv_ring_put = sim_readl(g->sim, sim_recv_put_r()); + } while (g->sim->recv_ring_put == g->sim->recv_ring_get); + + /* process all replies */ + while (g->sim->recv_ring_put != g->sim->recv_ring_get) { + /* these are in u32 offsets*/ + u32 dma_lo_offset = + sim_recv_put_pointer_v(g->sim->recv_ring_get)*2 + 0; + u32 dma_hi_offset = dma_lo_offset + 1; + u32 recv_phys_addr_lo = sim_dma_addr_lo_v( + *sim_recv_ring_bfr(g, dma_lo_offset*4)); + u32 recv_phys_addr_hi = sim_dma_hi_addr_v( + *sim_recv_ring_bfr(g, dma_hi_offset*4)); + + recv_phys_addr = (u64)recv_phys_addr_hi << 32 | + (u64)recv_phys_addr_lo << sim_dma_addr_lo_b(); + + if (recv_phys_addr != + nvgpu_mem_get_addr(g, &g->sim->msg_bfr)) { + nvgpu_err(g, "%s Error in RPC reply", + __func__); + return -1; + } + + /* Update GET pointer */ + g->sim->recv_ring_get = (g->sim->recv_ring_get + 2*sizeof(u32)) + % SIM_BFR_SIZE; + + sim_writel(g->sim, sim_recv_get_r(), g->sim->recv_ring_get); + + g->sim->recv_ring_put = sim_readl(g->sim, sim_recv_put_r()); + } + + return 0; +} + +int issue_rpc_and_wait(struct gk20a *g) +{ + int err; + + err = rpc_send_message(g); + if (err != 0) { + nvgpu_err(g, "%s failed rpc_send_message", + __func__); + return err; + } + + err = rpc_recv_poll(g); + if (err != 0) { + nvgpu_err(g, "%s failed rpc_recv_poll", + __func__); + return err; + } + + /* Now check if RPC really succeeded */ + if (*sim_msg_hdr(g, sim_msg_result_r()) != sim_msg_result_success_v()) { + nvgpu_err(g, "%s received failed status!", + __func__); + return -(*sim_msg_hdr(g, sim_msg_result_r())); + } + return 0; +} + +static void nvgpu_sim_esc_readl(struct gk20a *g, + const char *path, u32 index, u32 *data) +{ + int err; + size_t pathlen = strlen(path); + u32 data_offset; + + sim_write_hdr(g, sim_msg_function_sim_escape_read_v(), + sim_escape_read_hdr_size()); + *sim_msg_param(g, 0) = index; + *sim_msg_param(g, 4) = sizeof(u32); + data_offset = round_up(0xc + pathlen + 1, sizeof(u32)); + *sim_msg_param(g, 8) = data_offset; + strcpy((char *)sim_msg_param(g, 0xc), path); + + err = issue_rpc_and_wait(g); + + if (err == 0) { + nvgpu_memcpy((u8 *)data, (u8 *)sim_msg_param(g, data_offset), + sizeof(u32)); + } else { + *data = 0xffffffff; + WARN(1, "issue_rpc_and_wait failed err=%d", err); + } +} + +static int nvgpu_sim_init_late(struct gk20a *g) +{ + u64 phys; + int err = -ENOMEM; + + if (!g->sim) { + return 0; + } + + nvgpu_info(g, "sim init late"); + + /* allocate sim event/msg buffers */ + err = nvgpu_alloc_sim_buffer(g, &g->sim->send_bfr); + err = err || nvgpu_alloc_sim_buffer(g, &g->sim->recv_bfr); + err = err || nvgpu_alloc_sim_buffer(g, &g->sim->msg_bfr); + + if (err != 0) { + goto fail; + } + + /*mark send ring invalid*/ + sim_writel(g->sim, sim_send_ring_r(), sim_send_ring_status_invalid_f()); + + /*read get pointer and make equal to put*/ + g->sim->send_ring_put = sim_readl(g->sim, sim_send_get_r()); + sim_writel(g->sim, sim_send_put_r(), g->sim->send_ring_put); + + /*write send ring address and make it valid*/ + phys = nvgpu_mem_get_addr(g, &g->sim->send_bfr); + sim_writel(g->sim, sim_send_ring_hi_r(), + sim_send_ring_hi_addr_f(u64_hi32(phys))); + sim_writel(g->sim, sim_send_ring_r(), + sim_send_ring_status_valid_f() | + sim_send_ring_target_phys_pci_coherent_f() | + sim_send_ring_size_4kb_f() | + sim_send_ring_addr_lo_f(phys >> sim_send_ring_addr_lo_b())); + + /*repeat for recv ring (but swap put,get as roles are opposite) */ + sim_writel(g->sim, sim_recv_ring_r(), sim_recv_ring_status_invalid_f()); + + /*read put pointer and make equal to get*/ + g->sim->recv_ring_get = sim_readl(g->sim, sim_recv_put_r()); + sim_writel(g->sim, sim_recv_get_r(), g->sim->recv_ring_get); + + /*write send ring address and make it valid*/ + phys = nvgpu_mem_get_addr(g, &g->sim->recv_bfr); + sim_writel(g->sim, sim_recv_ring_hi_r(), + sim_recv_ring_hi_addr_f(u64_hi32(phys))); + sim_writel(g->sim, sim_recv_ring_r(), + sim_recv_ring_status_valid_f() | + sim_recv_ring_target_phys_pci_coherent_f() | + sim_recv_ring_size_4kb_f() | + sim_recv_ring_addr_lo_f(phys >> sim_recv_ring_addr_lo_b())); + + return 0; + + fail: + nvgpu_free_sim_support(g); + return err; +} + +int nvgpu_init_sim_support(struct gk20a *g) +{ + if (!g->sim) { + return 0; + } + + g->sim->sim_init_late = nvgpu_sim_init_late; + g->sim->remove_support = nvgpu_remove_sim_support; + g->sim->esc_readl = nvgpu_sim_esc_readl; + return 0; +} diff --git a/drivers/gpu/nvgpu/common/sim/sim_netlist.c b/drivers/gpu/nvgpu/common/sim/sim_netlist.c new file mode 100644 index 000000000..97444821a --- /dev/null +++ b/drivers/gpu/nvgpu/common/sim/sim_netlist.c @@ -0,0 +1,820 @@ +/* + * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) +#include "nvgpu/nvgpu_next_sim.h" +#endif + +int nvgpu_init_sim_netlist_ctx_vars(struct gk20a *g) +{ + int err = -ENOMEM; + u32 i, temp; + u32 fecs_inst_count, fecs_data_count; + u32 gpccs_inst_count, gpccs_data_count; + struct netlist_av_list *sw_bundle_init; + struct netlist_av_list *sw_method_init; + struct netlist_aiv_list *sw_ctx_load; + struct netlist_av_list *sw_non_ctx_load; + struct netlist_av_list *sw_veid_bundle_init; + struct netlist_av64_list *sw_bundle64_init; +#ifdef CONFIG_NVGPU_DEBUGGER + struct netlist_aiv_list *sys_ctxsw_regs; + struct netlist_aiv_list *gpc_ctxsw_regs; + struct netlist_aiv_list *tpc_ctxsw_regs; +#ifdef CONFIG_NVGPU_GRAPHICS + struct netlist_aiv_list *zcull_gpc_ctxsw_regs; +#endif + struct netlist_aiv_list *pm_sys_ctxsw_regs; + struct netlist_aiv_list *pm_gpc_ctxsw_regs; + struct netlist_aiv_list *pm_tpc_ctxsw_regs; + struct netlist_aiv_list *ppc_ctxsw_regs; + struct netlist_aiv_list *etpc_ctxsw_regs; + struct netlist_aiv_list *pm_ppc_ctxsw_regs; + struct netlist_aiv_list *perf_sys_ctxsw_regs; + struct netlist_aiv_list *perf_sysrouter_ctxsw_regs; + struct netlist_aiv_list *perf_sys_control_ctxsw_regs; + struct netlist_aiv_list *perf_pma_ctxsw_regs; + struct netlist_aiv_list *perf_fbp_ctxsw_regs; + struct netlist_aiv_list *perf_fbprouter_ctxsw_regs; + struct netlist_aiv_list *perf_gpc_ctxsw_regs; + struct netlist_aiv_list *perf_gpcrouter_ctxsw_regs; + struct netlist_aiv_list *pm_ltc_ctxsw_regs; + struct netlist_aiv_list *pm_rop_ctxsw_regs; + struct netlist_aiv_list *pm_ucgpc_ctxsw_regs; + struct netlist_aiv_list *pm_cau_ctxsw_regs; + struct netlist_aiv_list *pm_fbpa_ctxsw_regs; + struct netlist_aiv_list *perf_fbp_control_ctxsw_regs; + struct netlist_aiv_list *perf_gpc_control_ctxsw_regs; + struct netlist_aiv_list *perf_pma_control_ctxsw_regs; +#endif /* CONFIG_NVGPU_DEBUGGER */ + struct netlist_u32_list *fecs_inst, *fecs_data; + struct netlist_u32_list *gpccs_inst, *gpccs_data; + u32 regs_base_index; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_info, + "querying grctx info from chiplib"); + + nvgpu_netlist_vars_set_dynamic(g, true); + + if (g->sim->esc_readl == NULL) { + nvgpu_err(g, "Invalid pointer to query function."); + err = -ENOENT; + return err; + } + + sw_bundle_init = nvgpu_netlist_get_sw_bundle_init_av_list(g); + sw_method_init = nvgpu_netlist_get_sw_method_init_av_list(g); + sw_ctx_load = nvgpu_netlist_get_sw_ctx_load_aiv_list(g); + sw_non_ctx_load = nvgpu_netlist_get_sw_non_ctx_load_av_list(g); + sw_veid_bundle_init = nvgpu_netlist_get_sw_veid_bundle_init_av_list(g); + sw_bundle64_init = nvgpu_netlist_get_sw_bundle64_init_av64_list(g); + +#ifdef CONFIG_NVGPU_DEBUGGER + sys_ctxsw_regs = nvgpu_netlist_get_sys_ctxsw_regs(g); + gpc_ctxsw_regs = nvgpu_netlist_get_gpc_ctxsw_regs(g); + tpc_ctxsw_regs = nvgpu_netlist_get_tpc_ctxsw_regs(g); +#ifdef CONFIG_NVGPU_GRAPHICS + zcull_gpc_ctxsw_regs = nvgpu_netlist_get_zcull_gpc_ctxsw_regs(g); +#endif + pm_sys_ctxsw_regs = nvgpu_netlist_get_pm_sys_ctxsw_regs(g); + pm_gpc_ctxsw_regs = nvgpu_netlist_get_pm_gpc_ctxsw_regs(g); + pm_tpc_ctxsw_regs = nvgpu_netlist_get_pm_tpc_ctxsw_regs(g); + ppc_ctxsw_regs = nvgpu_netlist_get_ppc_ctxsw_regs(g); + etpc_ctxsw_regs = nvgpu_netlist_get_etpc_ctxsw_regs(g); + + pm_ppc_ctxsw_regs = nvgpu_netlist_get_pm_ppc_ctxsw_regs(g); + perf_sys_ctxsw_regs = nvgpu_netlist_get_perf_sys_ctxsw_regs(g); + perf_sysrouter_ctxsw_regs = + nvgpu_netlist_get_perf_sys_router_ctxsw_regs(g); + perf_sys_control_ctxsw_regs = + nvgpu_netlist_get_perf_sys_control_ctxsw_regs(g); + perf_pma_ctxsw_regs = nvgpu_netlist_get_perf_pma_ctxsw_regs(g); + perf_fbp_ctxsw_regs = nvgpu_netlist_get_fbp_ctxsw_regs(g); + perf_fbprouter_ctxsw_regs = + nvgpu_netlist_get_fbp_router_ctxsw_regs(g); + perf_gpc_ctxsw_regs = nvgpu_netlist_get_perf_gpc_ctxsw_regs(g); + perf_gpcrouter_ctxsw_regs = + nvgpu_netlist_get_gpc_router_ctxsw_regs(g); + pm_ltc_ctxsw_regs = nvgpu_netlist_get_pm_ltc_ctxsw_regs(g); + pm_rop_ctxsw_regs = nvgpu_netlist_get_pm_rop_ctxsw_regs(g); + pm_ucgpc_ctxsw_regs = nvgpu_netlist_get_pm_ucgpc_ctxsw_regs(g); + pm_cau_ctxsw_regs = nvgpu_netlist_get_pm_cau_ctxsw_regs(g); + pm_fbpa_ctxsw_regs = nvgpu_netlist_get_pm_fbpa_ctxsw_regs(g); + perf_fbp_control_ctxsw_regs = + nvgpu_netlist_get_perf_fbp_control_ctxsw_regs(g); + perf_gpc_control_ctxsw_regs = + nvgpu_netlist_get_perf_gpc_control_ctxsw_regs(g); + perf_pma_control_ctxsw_regs = + nvgpu_netlist_get_perf_pma_control_ctxsw_regs(g); + +#endif /* CONFIG_NVGPU_DEBUGGER */ + + fecs_inst = nvgpu_netlist_get_fecs_inst(g); + fecs_data = nvgpu_netlist_get_fecs_data(g); + gpccs_inst = nvgpu_netlist_get_gpccs_inst(g); + gpccs_data = nvgpu_netlist_get_gpccs_data(g); + + /* query sizes and counts */ + g->sim->esc_readl(g, "GRCTX_UCODE_INST_FECS_COUNT", 0, + &fecs_inst_count); + nvgpu_netlist_set_fecs_inst_count(g, fecs_inst_count); + g->sim->esc_readl(g, "GRCTX_UCODE_DATA_FECS_COUNT", 0, + &fecs_data_count); + nvgpu_netlist_set_fecs_data_count(g, fecs_data_count); + g->sim->esc_readl(g, "GRCTX_UCODE_INST_GPCCS_COUNT", 0, + &gpccs_inst_count); + nvgpu_netlist_set_gpccs_inst_count(g, gpccs_inst_count); + g->sim->esc_readl(g, "GRCTX_UCODE_DATA_GPCCS_COUNT", 0, + &gpccs_data_count); + nvgpu_netlist_set_gpccs_data_count(g, gpccs_data_count); + g->sim->esc_readl(g, "GRCTX_ALL_CTX_TOTAL_WORDS", 0, &temp); + nvgpu_netlist_vars_set_buffer_size(g, (temp << 2)); + g->sim->esc_readl(g, "GRCTX_SW_BUNDLE_INIT_SIZE", 0, + &sw_bundle_init->count); + g->sim->esc_readl(g, "GRCTX_SW_METHOD_INIT_SIZE", 0, + &sw_method_init->count); + g->sim->esc_readl(g, "GRCTX_SW_CTX_LOAD_SIZE", 0, + &sw_ctx_load->count); + g->sim->esc_readl(g, "GRCTX_SW_VEID_BUNDLE_INIT_SIZE", 0, + &sw_veid_bundle_init->count); + g->sim->esc_readl(g, "GRCTX_SW_BUNDLE64_INIT_SIZE", 0, + &sw_bundle64_init->count); + g->sim->esc_readl(g, "GRCTX_NONCTXSW_REG_SIZE", 0, + &sw_non_ctx_load->count); +#ifdef CONFIG_NVGPU_DEBUGGER + g->sim->esc_readl(g, "GRCTX_REG_LIST_SYS_COUNT", 0, + &sys_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_GPC_COUNT", 0, + &gpc_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_TPC_COUNT", 0, + &tpc_ctxsw_regs->count); +#ifdef CONFIG_NVGPU_GRAPHICS + g->sim->esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC_COUNT", 0, + &zcull_gpc_ctxsw_regs->count); +#endif + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_SYS_COUNT", 0, + &pm_sys_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_GPC_COUNT", 0, + &pm_gpc_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_TPC_COUNT", 0, + &pm_tpc_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PPC_COUNT", 0, + &ppc_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_ETPC_COUNT", 0, + &etpc_ctxsw_regs->count); + + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_PPC_COUNT", 0, + &pm_ppc_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_SYS_COUNT", 0, + &perf_sys_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_SYSROUTER_COUNT", 0, + &perf_sysrouter_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_SYS_CONTROL_COUNT", 0, + &perf_sys_control_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_PMA_COUNT", 0, + &perf_pma_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_FBP_COUNT", 0, + &perf_fbp_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_FBPROUTER_COUNT", 0, + &perf_fbprouter_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_GPC_COUNT", 0, + &perf_gpc_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_GPCROUTER_COUNT", 0, + &perf_gpcrouter_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_LTC_COUNT", 0, + &pm_ltc_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_ROP_COUNT", 0, + &pm_rop_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_UNICAST_GPC_COUNT", 0, + &pm_ucgpc_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_CAU_COUNT", 0, + &pm_cau_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_FBPA_COUNT", 0, + &pm_fbpa_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_FBP_CONTROL_COUNT", 0, + &perf_fbp_control_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_GPC_CONTROL_COUNT", 0, + &perf_gpc_control_ctxsw_regs->count); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_PMA_CONTROL_COUNT", 0, + &perf_pma_control_ctxsw_regs->count); + +#endif /* CONFIG_NVGPU_DEBUGGER */ + + if (nvgpu_netlist_alloc_u32_list(g, fecs_inst) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_u32_list(g, fecs_data) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_u32_list(g, gpccs_inst) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_u32_list(g, gpccs_data) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_av_list(g, sw_bundle_init) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_av64_list(g, sw_bundle64_init) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_av_list(g, sw_method_init) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, sw_ctx_load) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_av_list(g, sw_non_ctx_load) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_av_list(g, sw_veid_bundle_init) == NULL) { + goto fail; + } +#ifdef CONFIG_NVGPU_DEBUGGER + if (nvgpu_netlist_alloc_aiv_list(g, sys_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, gpc_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, tpc_ctxsw_regs) == NULL) { + goto fail; + } +#ifdef CONFIG_NVGPU_GRAPHICS + if (nvgpu_netlist_alloc_aiv_list(g, zcull_gpc_ctxsw_regs) == NULL) { + goto fail; + } +#endif + if (nvgpu_netlist_alloc_aiv_list(g, ppc_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, pm_sys_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, pm_gpc_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, pm_tpc_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, etpc_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, pm_ppc_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, perf_sys_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, perf_sysrouter_ctxsw_regs) + == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, perf_sys_control_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, perf_pma_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, perf_fbp_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, perf_fbprouter_ctxsw_regs) + == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, perf_gpc_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, perf_gpcrouter_ctxsw_regs) + == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, pm_ltc_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, pm_rop_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, pm_ucgpc_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, pm_cau_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, pm_fbpa_ctxsw_regs) == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, perf_fbp_control_ctxsw_regs) + == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, perf_gpc_control_ctxsw_regs) + == NULL) { + goto fail; + } + if (nvgpu_netlist_alloc_aiv_list(g, perf_pma_control_ctxsw_regs) + == NULL) { + goto fail; + } + +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + nvgpu_next_init_sim_netlist_ctxsw_regs(g); +#endif +#endif /* CONFIG_NVGPU_DEBUGGER */ +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + nvgpu_next_init_sim_netlist_ctx_vars(g); +#endif + + for (i = 0; i < nvgpu_netlist_get_fecs_inst_count(g); i++) { + g->sim->esc_readl(g, "GRCTX_UCODE_INST_FECS", + i, &fecs_inst->l[i]); + } + + for (i = 0; i < nvgpu_netlist_get_fecs_data_count(g); i++) { + g->sim->esc_readl(g, "GRCTX_UCODE_DATA_FECS", + i, &fecs_data->l[i]); + } + + for (i = 0; i < nvgpu_netlist_get_gpccs_inst_count(g); i++) { + g->sim->esc_readl(g, "GRCTX_UCODE_INST_GPCCS", + i, &gpccs_inst->l[i]); + } + + for (i = 0; i < nvgpu_netlist_get_gpccs_data_count(g); i++) { + g->sim->esc_readl(g, "GRCTX_UCODE_DATA_GPCCS", + i, &gpccs_data->l[i]); + } + + for (i = 0; i < sw_bundle_init->count; i++) { + struct netlist_av *l = sw_bundle_init->l; + g->sim->esc_readl(g, "GRCTX_SW_BUNDLE_INIT:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_SW_BUNDLE_INIT:VALUE", + i, &l[i].value); + } + + for (i = 0; i < sw_method_init->count; i++) { + struct netlist_av *l = sw_method_init->l; + g->sim->esc_readl(g, "GRCTX_SW_METHOD_INIT:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_SW_METHOD_INIT:VALUE", + i, &l[i].value); + } + + for (i = 0; i < sw_ctx_load->count; i++) { + struct netlist_aiv *l = sw_ctx_load->l; + g->sim->esc_readl(g, "GRCTX_SW_CTX_LOAD:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_SW_CTX_LOAD:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_SW_CTX_LOAD:VALUE", + i, &l[i].value); + } + + for (i = 0; i < sw_non_ctx_load->count; i++) { + struct netlist_av *l = sw_non_ctx_load->l; + g->sim->esc_readl(g, "GRCTX_NONCTXSW_REG:REG", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_NONCTXSW_REG:VALUE", + i, &l[i].value); + } + + for (i = 0; i < sw_veid_bundle_init->count; i++) { + struct netlist_av *l = sw_veid_bundle_init->l; + + g->sim->esc_readl(g, "GRCTX_SW_VEID_BUNDLE_INIT:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_SW_VEID_BUNDLE_INIT:VALUE", + i, &l[i].value); + } + + for (i = 0; i < sw_bundle64_init->count; i++) { + struct netlist_av64 *l = sw_bundle64_init->l; + + g->sim->esc_readl(g, "GRCTX_SW_BUNDLE64_INIT:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_SW_BUNDLE64_INIT:VALUE_LO", + i, &l[i].value_lo); + g->sim->esc_readl(g, "GRCTX_SW_BUNDLE64_INIT:VALUE_HI", + i, &l[i].value_hi); + } + +#ifdef CONFIG_NVGPU_DEBUGGER + for (i = 0; i < sys_ctxsw_regs->count; i++) { + struct netlist_aiv *l = sys_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_SYS:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_SYS:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_SYS:VALUE", + i, &l[i].value); + } + + for (i = 0; i < gpc_ctxsw_regs->count; i++) { + struct netlist_aiv *l = gpc_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_GPC:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_GPC:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_GPC:VALUE", + i, &l[i].value); + } + + for (i = 0; i < tpc_ctxsw_regs->count; i++) { + struct netlist_aiv *l = tpc_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_TPC:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_TPC:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_TPC:VALUE", + i, &l[i].value); + } + + for (i = 0; i < ppc_ctxsw_regs->count; i++) { + struct netlist_aiv *l = ppc_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PPC:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PPC:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PPC:VALUE", + i, &l[i].value); + } + +#ifdef CONFIG_NVGPU_GRAPHICS + for (i = 0; i < zcull_gpc_ctxsw_regs->count; i++) { + struct netlist_aiv *l = zcull_gpc_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:VALUE", + i, &l[i].value); + } +#endif + + for (i = 0; i < pm_sys_ctxsw_regs->count; i++) { + struct netlist_aiv *l = pm_sys_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_SYS:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_SYS:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_SYS:VALUE", + i, &l[i].value); + } + + for (i = 0; i < pm_gpc_ctxsw_regs->count; i++) { + struct netlist_aiv *l = pm_gpc_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_GPC:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_GPC:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_GPC:VALUE", + i, &l[i].value); + } + + for (i = 0; i < pm_tpc_ctxsw_regs->count; i++) { + struct netlist_aiv *l = pm_tpc_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_TPC:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_TPC:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_TPC:VALUE", + i, &l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_ETPC"); + for (i = 0; i < etpc_ctxsw_regs->count; i++) { + struct netlist_aiv *l = etpc_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_ETPC:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_ETPC:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_ETPC:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_PM_PPC"); + for (i = 0; i < pm_ppc_ctxsw_regs->count; i++) { + struct netlist_aiv *l = pm_ppc_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_PPC:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_PPC:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_PPC:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_PERF_SYS"); + for (i = 0; i < perf_sys_ctxsw_regs->count; i++) { + struct netlist_aiv *l = perf_sys_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_SYS:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_SYS:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_SYS:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_PERF_SYSROUTER"); + for (i = 0; i < perf_sysrouter_ctxsw_regs->count; i++) { + struct netlist_aiv *l = perf_sysrouter_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_SYSROUTER:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_SYSROUTER:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_SYSROUTER:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_PERF_SYS_CONTROL"); + for (i = 0; i < perf_sys_control_ctxsw_regs->count; i++) { + struct netlist_aiv *l = perf_sys_control_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_SYS_CONTROL:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_SYS_CONTROL:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_SYS_CONTROL:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_PERF_PMA"); + for (i = 0; i < perf_pma_ctxsw_regs->count; i++) { + struct netlist_aiv *l = perf_pma_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_PMA:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_PMA:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_PMA:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_PERF_FBP"); + for (i = 0; i < perf_fbp_ctxsw_regs->count; i++) { + struct netlist_aiv *l = perf_fbp_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_FBP:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_FBP:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_FBP:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_PERF_FBPROUTER"); + for (i = 0; i < perf_fbprouter_ctxsw_regs->count; i++) { + struct netlist_aiv *l = perf_fbprouter_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_FBPROUTER:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_FBPROUTER:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_FBPROUTER:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_PERF_GPC"); + for (i = 0; i < perf_gpc_ctxsw_regs->count; i++) { + struct netlist_aiv *l = perf_gpc_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_GPC:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_GPC:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_GPC:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_PERF_GPCROUTER"); + for (i = 0; i < perf_gpcrouter_ctxsw_regs->count; i++) { + struct netlist_aiv *l = perf_gpcrouter_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_GPCROUTER:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_GPCROUTER:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_GPCROUTER:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_PM_LTC"); + for (i = 0; i < pm_ltc_ctxsw_regs->count; i++) { + struct netlist_aiv *l = pm_ltc_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_LTC:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_LTC:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_LTC:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_PM_ROP"); + for (i = 0; i < pm_rop_ctxsw_regs->count; i++) { + struct netlist_aiv *l = pm_rop_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_ROP:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_ROP:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_ROP:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_PM_UNICAST_GPC"); + for (i = 0; i < pm_ucgpc_ctxsw_regs->count; i++) { + struct netlist_aiv *l = pm_ucgpc_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_UNICAST_GPC:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_UNICAST_GPC:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_UNICAST_GPC:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_PM_CAU"); + for (i = 0; i < pm_cau_ctxsw_regs->count; i++) { + struct netlist_aiv *l = pm_cau_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_CAU:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_CAU:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_CAU:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_PM_FBPA_COUNT"); + for (i = 0; i < pm_fbpa_ctxsw_regs->count; i++) { + struct netlist_aiv *l = pm_fbpa_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_FBPA:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_FBPA:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PM_FBPA:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_PERF_FBP_CONTROL"); + for (i = 0; i < perf_fbp_control_ctxsw_regs->count; i++) { + struct netlist_aiv *l = perf_fbp_control_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_FBP_CONTROL:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_FBP_CONTROL:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_FBP_CONTROL:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_PERF_GPC_CONTROL"); + for (i = 0; i < perf_gpc_control_ctxsw_regs->count; i++) { + struct netlist_aiv *l = perf_gpc_control_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_GPC_CONTROL:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_GPC_CONTROL:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_GPC_CONTROL:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "query GRCTX_REG_LIST_PERF_PMA_CONTROL"); + for (i = 0; i < perf_pma_control_ctxsw_regs->count; i++) { + struct netlist_aiv *l = perf_pma_control_ctxsw_regs->l; + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_PMA_CONTROL:ADDR", + i, &l[i].addr); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_PMA_CONTROL:INDEX", + i, &l[i].index); + g->sim->esc_readl(g, "GRCTX_REG_LIST_PERF_PMA_CONTROL:VALUE", + i, &l[i].value); + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, + "addr:0x%#08x index:0x%08x value:0x%08x", + l[i].addr, l[i].index, l[i].value); + } + +#endif /* CONFIG_NVGPU_DEBUGGER */ + + g->netlist_valid = true; + + g->sim->esc_readl(g, "GRCTX_GEN_CTX_REGS_BASE_INDEX", 0, + ®s_base_index); + nvgpu_netlist_vars_set_regs_base_index(g, regs_base_index); + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_fn, "finished querying grctx info from chiplib"); + return 0; +fail: + nvgpu_err(g, "failed querying grctx info from chiplib"); + + nvgpu_kfree(g, fecs_inst->l); + nvgpu_kfree(g, fecs_data->l); + nvgpu_kfree(g, gpccs_inst->l); + nvgpu_kfree(g, gpccs_data->l); + nvgpu_kfree(g, sw_bundle_init->l); + nvgpu_kfree(g, sw_bundle64_init->l); + nvgpu_kfree(g, sw_method_init->l); + nvgpu_kfree(g, sw_ctx_load->l); + nvgpu_kfree(g, sw_non_ctx_load->l); +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + nvgpu_next_init_sim_netlist_ctx_vars_free(g); +#endif + nvgpu_kfree(g, sw_veid_bundle_init->l); +#ifdef CONFIG_NVGPU_DEBUGGER + nvgpu_kfree(g, sys_ctxsw_regs->l); + nvgpu_kfree(g, gpc_ctxsw_regs->l); + nvgpu_kfree(g, tpc_ctxsw_regs->l); +#ifdef CONFIG_NVGPU_GRAPHICS + nvgpu_kfree(g, zcull_gpc_ctxsw_regs->l); +#endif + nvgpu_kfree(g, ppc_ctxsw_regs->l); + nvgpu_kfree(g, pm_sys_ctxsw_regs->l); + nvgpu_kfree(g, pm_gpc_ctxsw_regs->l); + nvgpu_kfree(g, pm_tpc_ctxsw_regs->l); + nvgpu_kfree(g, etpc_ctxsw_regs->l); + nvgpu_kfree(g, pm_ppc_ctxsw_regs->l); + nvgpu_kfree(g, perf_sys_ctxsw_regs->l); + nvgpu_kfree(g, perf_sysrouter_ctxsw_regs->l); + nvgpu_kfree(g, perf_pma_ctxsw_regs->l); + nvgpu_kfree(g, perf_fbp_ctxsw_regs->l); + nvgpu_kfree(g, perf_fbprouter_ctxsw_regs->l); + nvgpu_kfree(g, perf_gpc_ctxsw_regs->l); + nvgpu_kfree(g, perf_gpcrouter_ctxsw_regs->l); + nvgpu_kfree(g, pm_ltc_ctxsw_regs->l); + nvgpu_kfree(g, pm_ucgpc_ctxsw_regs->l); + nvgpu_kfree(g, pm_cau_ctxsw_regs->l); + nvgpu_kfree(g, pm_fbpa_ctxsw_regs->l); + nvgpu_kfree(g, perf_fbp_control_ctxsw_regs->l); + nvgpu_kfree(g, perf_gpc_control_ctxsw_regs->l); + nvgpu_kfree(g, perf_pma_control_ctxsw_regs->l); +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) + nvgpu_next_init_sim_netlist_ctxsw_regs_free(g); +#endif +#endif /* CONFIG_NVGPU_DEBUGGER */ + + return err; +} diff --git a/drivers/gpu/nvgpu/common/sim/sim_pci.c b/drivers/gpu/nvgpu/common/sim/sim_pci.c new file mode 100644 index 000000000..8b5d518f0 --- /dev/null +++ b/drivers/gpu/nvgpu/common/sim/sim_pci.c @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static inline u32 pci_sim_msg_header_size(void) +{ + return 32U; +} + +static inline u32 *pci_sim_msg_param(struct gk20a *g, u32 byte_offset) +{ + /* starts after msg header/cmn */ + return sim_msg_bfr(g, byte_offset + pci_sim_msg_header_size()); +} + +static inline void pci_sim_write_hdr(struct gk20a *g, u32 func, u32 size) +{ + *sim_msg_hdr(g, sim_msg_header_version_r()) = + sim_msg_header_version_major_tot_v() | + sim_msg_header_version_minor_tot_v(); + *sim_msg_hdr(g, sim_msg_signature_r()) = sim_msg_signature_valid_v(); + *sim_msg_hdr(g, sim_msg_result_r()) = sim_msg_result_rpc_pending_v(); + *sim_msg_hdr(g, sim_msg_spare_r()) = sim_msg_spare__init_v(); + *sim_msg_hdr(g, sim_msg_function_r()) = func; + *sim_msg_hdr(g, sim_msg_length_r()) = + size + pci_sim_msg_header_size(); +} + +static u32 *sim_send_ring_bfr(struct gk20a *g, u32 byte_offset) +{ + u8 *cpu_va; + + cpu_va = (u8 *)g->sim->send_bfr.cpu_va; + + return (u32 *)(cpu_va + byte_offset); +} + +static int rpc_send_message(struct gk20a *g) +{ + /* calculations done in units of u32s */ + u32 send_base = sim_send_put_pointer_v(g->sim->send_ring_put) * 2; + u32 dma_offset = send_base + sim_dma_r()/sizeof(u32); + u32 dma_hi_offset = send_base + sim_dma_hi_r()/sizeof(u32); + + *sim_send_ring_bfr(g, dma_offset*sizeof(u32)) = + sim_dma_target_phys_pci_coherent_f() | + sim_dma_status_valid_f() | + sim_dma_size_4kb_f() | + sim_dma_addr_lo_f(nvgpu_mem_get_phys_addr(g, &g->sim->msg_bfr) + >> sim_dma_addr_lo_b()); + + *sim_send_ring_bfr(g, dma_hi_offset*sizeof(u32)) = + u64_hi32(nvgpu_mem_get_phys_addr(g, &g->sim->msg_bfr)); + + *sim_msg_hdr(g, sim_msg_sequence_r()) = g->sim->sequence_base++; + + g->sim->send_ring_put = (g->sim->send_ring_put + 2 * sizeof(u32)) % + SIM_BFR_SIZE; + + /* Update the put pointer. This will trap into the host. */ + sim_writel(g->sim, sim_send_put_r(), g->sim->send_ring_put); + + return 0; +} + +static inline u32 *sim_recv_ring_bfr(struct gk20a *g, u32 byte_offset) +{ + u8 *cpu_va; + + cpu_va = (u8 *)g->sim->recv_bfr.cpu_va; + + return (u32 *)(cpu_va + byte_offset); +} + +static int rpc_recv_poll(struct gk20a *g) +{ + u64 recv_phys_addr; + + /* Poll the recv ring get pointer in an infinite loop */ + do { + g->sim->recv_ring_put = sim_readl(g->sim, sim_recv_put_r()); + } while (g->sim->recv_ring_put == g->sim->recv_ring_get); + + /* process all replies */ + while (g->sim->recv_ring_put != g->sim->recv_ring_get) { + /* these are in u32 offsets */ + u32 dma_lo_offset = + sim_recv_put_pointer_v(g->sim->recv_ring_get)*2 + 0; + u32 dma_hi_offset = dma_lo_offset + 1; + u32 recv_phys_addr_lo = sim_dma_addr_lo_v( + *sim_recv_ring_bfr(g, dma_lo_offset*4)); + u32 recv_phys_addr_hi = sim_dma_hi_addr_v( + *sim_recv_ring_bfr(g, dma_hi_offset*4)); + + recv_phys_addr = (u64)recv_phys_addr_hi << 32 | + (u64)recv_phys_addr_lo << sim_dma_addr_lo_b(); + + if (recv_phys_addr != + nvgpu_mem_get_phys_addr(g, &g->sim->msg_bfr)) { + nvgpu_err(g, "Error in RPC reply"); + return -EINVAL; + } + + /* Update GET pointer */ + g->sim->recv_ring_get = (g->sim->recv_ring_get + 2*sizeof(u32)) + % SIM_BFR_SIZE; + + sim_writel(g->sim, sim_recv_get_r(), g->sim->recv_ring_get); + + g->sim->recv_ring_put = sim_readl(g->sim, sim_recv_put_r()); + } + + return 0; +} + +static int pci_issue_rpc_and_wait(struct gk20a *g) +{ + int err; + + err = rpc_send_message(g); + if (err != 0) { + nvgpu_err(g, "failed rpc_send_message"); + return err; + } + + err = rpc_recv_poll(g); + if (err != 0) { + nvgpu_err(g, "failed rpc_recv_poll"); + return err; + } + + /* Now check if RPC really succeeded */ + if (*sim_msg_hdr(g, sim_msg_result_r()) != sim_msg_result_success_v()) { + nvgpu_err(g, "received failed status!"); + return -EINVAL; + } + return 0; +} + +static void nvgpu_sim_esc_readl(struct gk20a *g, + const char *path, u32 index, u32 *data) +{ + int err; + size_t pathlen = strlen(path); + u32 data_offset; + + pci_sim_write_hdr(g, sim_msg_function_sim_escape_read_v(), + sim_escape_read_hdr_size()); + *pci_sim_msg_param(g, 0) = index; + *pci_sim_msg_param(g, 4) = sizeof(u32); + data_offset = round_up(pathlen + 1, sizeof(u32)); + *pci_sim_msg_param(g, 8) = data_offset; + strcpy((char *)pci_sim_msg_param(g, sim_escape_read_hdr_size()), path); + + err = pci_issue_rpc_and_wait(g); + + if (err == 0) { + nvgpu_memcpy((u8 *)data, + (u8 *)pci_sim_msg_param(g, + nvgpu_safe_add_u32(data_offset, + sim_escape_read_hdr_size())), + sizeof(u32)); + } else { + *data = 0xffffffff; + WARN(1, "pci_issue_rpc_and_wait failed err=%d", err); + } +} + +static int nvgpu_sim_init_late(struct gk20a *g) +{ + u64 phys; + int err = -ENOMEM; + + nvgpu_info(g, "sim init late pci"); + + if (!g->sim) { + return 0; + } + + /* allocate sim event/msg buffers */ + err = nvgpu_alloc_sim_buffer(g, &g->sim->send_bfr); + err = err || nvgpu_alloc_sim_buffer(g, &g->sim->recv_bfr); + err = err || nvgpu_alloc_sim_buffer(g, &g->sim->msg_bfr); + + if (err != 0) { + goto fail; + } + + /* mark send ring invalid */ + sim_writel(g->sim, sim_send_ring_r(), sim_send_ring_status_invalid_f()); + + /* read get pointer and make equal to put */ + g->sim->send_ring_put = sim_readl(g->sim, sim_send_get_r()); + sim_writel(g->sim, sim_send_put_r(), g->sim->send_ring_put); + + /* write send ring address and make it valid */ + phys = nvgpu_mem_get_phys_addr(g, &g->sim->send_bfr); + sim_writel(g->sim, sim_send_ring_hi_r(), + sim_send_ring_hi_addr_f(u64_hi32(phys))); + sim_writel(g->sim, sim_send_ring_r(), + sim_send_ring_status_valid_f() | + sim_send_ring_target_phys_pci_coherent_f() | + sim_send_ring_size_4kb_f() | + sim_send_ring_addr_lo_f(phys >> sim_send_ring_addr_lo_b())); + + /* repeat for recv ring (but swap put,get as roles are opposite) */ + sim_writel(g->sim, sim_recv_ring_r(), sim_recv_ring_status_invalid_f()); + + /* read put pointer and make equal to get */ + g->sim->recv_ring_get = sim_readl(g->sim, sim_recv_put_r()); + sim_writel(g->sim, sim_recv_get_r(), g->sim->recv_ring_get); + + /* write send ring address and make it valid */ + phys = nvgpu_mem_get_phys_addr(g, &g->sim->recv_bfr); + sim_writel(g->sim, sim_recv_ring_hi_r(), + sim_recv_ring_hi_addr_f(u64_hi32(phys))); + sim_writel(g->sim, sim_recv_ring_r(), + sim_recv_ring_status_valid_f() | + sim_recv_ring_target_phys_pci_coherent_f() | + sim_recv_ring_size_4kb_f() | + sim_recv_ring_addr_lo_f(phys >> sim_recv_ring_addr_lo_b())); + + return 0; + fail: + nvgpu_free_sim_support(g); + return err; +} + +int nvgpu_init_sim_support_pci(struct gk20a *g) +{ + + if(!g->sim) { + return 0; + } + + g->sim->sim_init_late = nvgpu_sim_init_late; + g->sim->remove_support = nvgpu_remove_sim_support; + g->sim->esc_readl = nvgpu_sim_esc_readl; + return 0; + +} diff --git a/drivers/gpu/nvgpu/common/swdebug/profile.c b/drivers/gpu/nvgpu/common/swdebug/profile.c new file mode 100644 index 000000000..53c632214 --- /dev/null +++ b/drivers/gpu/nvgpu/common/swdebug/profile.c @@ -0,0 +1,517 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * A simple profiler, capable of generating simple stats for a set of samples. + */ + +/* + * The sample array is a 1d array comprised of repeating rows of data. To + * index the array as though it were a row-major matrix, we need to do some + * simple math. + */ +static inline u32 matrix_to_linear_index(struct nvgpu_swprofiler *p, + u32 row, u32 col) +{ + return (row * p->psample_len) + col; +} + +/* + * Just check the samples field; it'll be allocated for an enabled profiler. + * This is an intrisically racy call; don't rely on it to determine whether the + * underlying pointers/fields really are initialized or not. + * + * However, since this doesn't take the profiler lock, if you use it under the + * profiler lock, you can be sure the state won't change while you hold the + * lock. + */ +bool nvgpu_swprofile_is_enabled(struct nvgpu_swprofiler *p) +{ + return p->samples != NULL; +} + +void nvgpu_swprofile_initialize(struct gk20a *g, + struct nvgpu_swprofiler *p, + const char *col_names[]) +{ + if (p->col_names != NULL) { + /* + * Profiler is already initialized. + */ + return; + } + + nvgpu_mutex_init(&p->lock); + p->g = g; + + p->col_names = col_names; + + p->psample_len = 0U; + while (col_names[p->psample_len] != NULL) { + p->psample_len++; + } +} + +int nvgpu_swprofile_open(struct gk20a *g, struct nvgpu_swprofiler *p) +{ + int ret = 0; + + nvgpu_mutex_acquire(&p->lock); + + /* + * If this profiler is already opened, just take a ref and return. + */ + if (p->samples != NULL) { + nvgpu_ref_get(&p->ref); + nvgpu_mutex_release(&p->lock); + return 0; + } + + /* + * Otherwise allocate the necessary data structures, etc. + */ + p->samples = nvgpu_vzalloc(g, + PROFILE_ENTRIES * p->psample_len * + sizeof(*p->samples)); + if (p->samples == NULL) { + ret = -ENOMEM; + goto fail; + } + + p->samples_start = nvgpu_vzalloc(g, + PROFILE_ENTRIES * sizeof(*p->samples_start)); + if (p->samples_start == NULL) { + ret = -ENOMEM; + goto fail; + } + + nvgpu_ref_init(&p->ref); + + nvgpu_mutex_release(&p->lock); + + return 0; + +fail: + if (p->samples != NULL) { + nvgpu_vfree(g, p->samples); + p->samples = NULL; + } + nvgpu_mutex_release(&p->lock); + + return ret; +} + +static void nvgpu_swprofile_free(struct nvgpu_ref *ref) +{ + struct nvgpu_swprofiler *p = container_of(ref, struct nvgpu_swprofiler, ref); + + nvgpu_vfree(p->g, p->samples); + nvgpu_vfree(p->g, p->samples_start); + p->samples = NULL; + p->samples_start = NULL; +} + +void nvgpu_swprofile_close(struct nvgpu_swprofiler *p) +{ + nvgpu_ref_put(&p->ref, nvgpu_swprofile_free); +} + +static void nvgpu_profile_print_col_header(struct nvgpu_swprofiler *p, + struct nvgpu_debug_context *o) +{ + u32 i; + + for (i = 0U; i < p->psample_len; i++) { + gk20a_debug_output(o, " %15s", p->col_names[i]); + } + gk20a_debug_output(o, "\n"); + +} + +/* + * Note: this does _not_ lock the profiler. This is a conscious choice. If we + * do lock the profiler then there's the possibility that you get bad data due + * to the snapshot blocking on some other user printing the contents of the + * profiler. + * + * Instead, this way, it's possible that someone printing the data in the + * profiler gets a sample that's a mix of old and new. That's not great, but + * IMO worse than a completely bogus sample. + * + * Also it's really quite unlikely for this race to happen in practice as the + * print function is executed as a result of a debugfs call. + */ +void nvgpu_swprofile_snapshot(struct nvgpu_swprofiler *p, u32 idx) +{ + u32 index; + + /* + * Handle two cases: the first allows calling code to simply skip + * any profiling by passing in a NULL profiler; see the CDE code + * for this. The second case is if a profiler is not "opened". + */ + if (p == NULL || p->samples == NULL) { + return; + } + + /* + * p->sample_index is the current row, aka sample, we are writing to. + * idx is the column - i.e the sub-sample. + */ + index = matrix_to_linear_index(p, p->sample_index, idx); + + p->samples[index] = nvgpu_current_time_ns(); +} + +void nvgpu_swprofile_begin_sample(struct nvgpu_swprofiler *p) +{ + nvgpu_mutex_acquire(&p->lock); + + if (p == NULL || p->samples == NULL) { + nvgpu_mutex_release(&p->lock); + return; + } + + p->sample_index++; + + /* Handle wrap. */ + if (p->sample_index >= PROFILE_ENTRIES) { + p->sample_index = 0U; + } + + /* + * Reference time for subsequent subsamples in this sample. + */ + p->samples_start[p->sample_index] = nvgpu_current_time_ns(); + + nvgpu_mutex_release(&p->lock); +} + +static int profile_cmp(const void *a, const void *b) +{ + return *((const u64 *) a) - *((const u64 *) b); +} + +#define PERCENTILE_WIDTH 5 +#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH) + +static u32 nvgpu_swprofile_build_ranges(struct nvgpu_swprofiler *p, + u64 *storage, + u64 *percentiles, + u32 index_end, + u32 index_start) +{ + u32 i; + u32 nelem = 0U; + + /* + * Iterate through a column and build a temporary slice array of samples + * so that we can sort them without corrupting the current data. + * + * Note that we have to first convert the row/column indexes into linear + * indexes to access the underlying sample array. + */ + for (i = 0; i < PROFILE_ENTRIES; i++) { + u32 linear_idx_start = matrix_to_linear_index(p, i, index_start); + u32 linear_idx_end = matrix_to_linear_index(p, i, index_end); + + if (p->samples[linear_idx_end] <= + p->samples[linear_idx_start]) { + /* This is an invalid element */ + continue; + } + + storage[nelem] = p->samples[linear_idx_end] - + p->samples[linear_idx_start]; + nelem++; + } + + /* sort it */ + sort(storage, nelem, sizeof(u64), profile_cmp, NULL); + + /* build ranges */ + for (i = 0; i < PERCENTILE_RANGES; i++) { + percentiles[i] = nelem < PERCENTILE_RANGES ? 0 : + storage[(PERCENTILE_WIDTH * (i + 1) * nelem)/100 - 1]; + } + + return nelem; +} + +/* + * Print a list of percentiles spaced by 5%. Note that the debug_context needs + * to be special here. _Most_ print functions in NvGPU automatically add a new + * line to the end of each print statement. This function _specifically_ + * requires that your debug print function does _NOT_ do this. + */ +void nvgpu_swprofile_print_ranges(struct gk20a *g, + struct nvgpu_swprofiler *p, + struct nvgpu_debug_context *o) +{ + u32 nelem = 0U, i, j; + u64 *sorted_data = NULL; + u64 *percentiles = NULL; + + nvgpu_mutex_acquire(&p->lock); + + if (p->samples == NULL) { + gk20a_debug_output(o, "Profiler not enabled.\n"); + goto done; + } + + sorted_data = nvgpu_vzalloc(g, + PROFILE_ENTRIES * p->psample_len * + sizeof(u64)); + percentiles = nvgpu_vzalloc(g, + PERCENTILE_RANGES * p->psample_len * + sizeof(u64)); + if (!sorted_data || !percentiles) { + nvgpu_err(g, "vzalloc: OOM!"); + goto done; + } + + /* + * Loop over each column; sort the column's data and then build + * percentile ranges based on that sorted data. + */ + for (i = 0U; i < p->psample_len; i++) { + nelem = nvgpu_swprofile_build_ranges(p, + &sorted_data[i * PROFILE_ENTRIES], + &percentiles[i * PERCENTILE_RANGES], + i, 0U); + } + + gk20a_debug_output(o, "Samples: %u\n", nelem); + gk20a_debug_output(o, "%6s", "Perc"); + nvgpu_profile_print_col_header(p, o); + + gk20a_debug_output(o, "%6s", "----"); + for (i = 0U; i < p->psample_len; i++) { + gk20a_debug_output(o, " %15s", "---------------"); + } + gk20a_debug_output(o, "\n"); + + /* + * percentiles is another matrix, but this time it's using column major indexing. + */ + for (i = 0U; i < PERCENTILE_RANGES; i++) { + gk20a_debug_output(o, "%3upc ", PERCENTILE_WIDTH * (i + 1)); + for (j = 0U; j < p->psample_len; j++) { + gk20a_debug_output(o, " %15llu", + percentiles[(j * PERCENTILE_RANGES) + i]); + } + gk20a_debug_output(o, "\n"); + } + gk20a_debug_output(o, "\n"); + +done: + nvgpu_vfree(g, sorted_data); + nvgpu_vfree(g, percentiles); + nvgpu_mutex_release(&p->lock); +} + +/* + * Print raw data for the profiler. Can be useful if you want to do more sophisticated + * analysis in python or something like that. + * + * Note this requires a debug context that does not automatically add newlines. + */ +void nvgpu_swprofile_print_raw_data(struct gk20a *g, + struct nvgpu_swprofiler *p, + struct nvgpu_debug_context *o) +{ + u32 i, j; + + nvgpu_mutex_acquire(&p->lock); + + if (p->samples == NULL) { + gk20a_debug_output(o, "Profiler not enabled.\n"); + goto done; + } + + gk20a_debug_output(o, "max samples: %u, sample len: %u\n", + PROFILE_ENTRIES, p->psample_len); + + nvgpu_profile_print_col_header(p, o); + + for (i = 0U; i < PROFILE_ENTRIES; i++) { + for (j = 0U; j < p->psample_len; j++) { + u32 index = matrix_to_linear_index(p, i, j); + + gk20a_debug_output(o, " %15llu", + p->samples[index] - p->samples_start[i]); + } + gk20a_debug_output(o, "\n"); + } + +done: + nvgpu_mutex_release(&p->lock); +} + +/* + * Print stats for a single column. This covers: + * + * Min + * Max + * Mean + * Median + * Sigma ^ 2 + * + * Note that the results array has to be at least 5 entries long. Storage should be + * an array that is at least PROFILE_ENTRIES long. This is used for working out the + * median - we need a sorted sample set for that. + * + * Note: this skips empty samples. + * + * Note: there's a limit to the sensitivity of these profiling stats. For things that + * happen faster than the granularity of the underlying timer, you'll need to use + * something more sophisticated. It's ok to have some zeros, but too many and you + * won't get a very interesting picture of the data. + */ +static u32 nvgpu_swprofile_subsample_basic_stats(struct gk20a *g, + struct nvgpu_swprofiler *p, + u32 subsample, + u64 *results, + u64 *storage) +{ + u64 sum = 0U, samples = 0U; + u64 min = U64_MAX, max = 0U; + u64 mean, median; + u64 sigma_2 = 0U; + u32 i; + + /* + * First, let's work out min, max, sum, and number of samples of data. With this we + * can then get the mean, median, and sigma^2. + */ + for (i = 0U; i < PROFILE_ENTRIES; i++) { + u32 ss = matrix_to_linear_index(p, i, subsample); + u64 sample = p->samples[ss] - p->samples_start[i]; + + if (p->samples_start[i] == 0U) { + continue; + } + + if (sample < min) { + min = sample; + } + if (sample > max) { + max = sample; + } + + storage[samples] = sample; + sum += sample; + samples += 1U; + } + + /* + * If min is still U64_MAX it means that we almost certainly did not actually + * get a single valid sample. + */ + if (min == U64_MAX) { + min = 0U; + } + + /* With the sorted list of samples we can easily compute the median. */ + sort(storage, samples, sizeof(u64), profile_cmp, NULL); + + mean = sum / samples; + median = storage[samples / 2]; + + /* Compute the sample variance (i.e sigma squared). */ + for (i = 0U; i < samples; i++) { + sigma_2 += storage[i] * storage[i]; + } + + /* Remember: _sample_ variance. */ + sigma_2 /= (samples - 1U); + sigma_2 -= (mean * mean); + + results[0] = min; + results[1] = max; + results[2] = mean; + results[3] = median; + results[4] = sigma_2; + + return samples; +} + +/* + * Print the following stats for each column: + * + * Min, Max, Mean, Median, Sigma^2 + */ +void nvgpu_swprofile_print_basic_stats(struct gk20a *g, + struct nvgpu_swprofiler *p, + struct nvgpu_debug_context *o) +{ + u32 i; + const char *fmt_header = "%-18s %15s %15s %15s %15s %15s\n"; + const char *fmt_output = "%-18s %15llu %15llu %15llu %15llu %15llu\n"; + u64 *storage; + u32 samples = 0U; + + if (p->samples == NULL) { + gk20a_debug_output(o, "Profiler not enabled.\n"); + return; + } + + storage = nvgpu_kzalloc(g, sizeof(u64) * PROFILE_ENTRIES); + if (storage == NULL) { + gk20a_debug_output(o, "OOM!"); + return; + } + + nvgpu_mutex_acquire(&p->lock); + + gk20a_debug_output(o, fmt_header, + "SubSample", "Min", "Max", + "Mean", "Median", "Sigma^2"); + gk20a_debug_output(o, fmt_header, + "---------", "---", "---", + "----", "------", "-------"); + + for (i = 0U; i < p->psample_len; i++) { + u64 results[5]; + + samples = nvgpu_swprofile_subsample_basic_stats(g, p, i, + results, storage); + + gk20a_debug_output(o, fmt_output, p->col_names[i], + results[0], results[1], + results[2], results[3], results[4]); + } + + gk20a_debug_output(o, "Number of samples: %u\n", samples); + + nvgpu_mutex_release(&p->lock); + nvgpu_kfree(g, storage); +} diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync.c b/drivers/gpu/nvgpu/common/sync/channel_sync.c new file mode 100644 index 000000000..205b5e02b --- /dev/null +++ b/drivers/gpu/nvgpu/common/sync/channel_sync.c @@ -0,0 +1,104 @@ +/* + * GK20A Channel Synchronization Abstraction + * + * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "channel_sync_priv.h" + +struct nvgpu_channel_sync *nvgpu_channel_sync_create(struct nvgpu_channel *c) +{ + if (nvgpu_has_syncpoints(c->g)) { + return nvgpu_channel_sync_syncpt_create(c); + } else { +#ifdef CONFIG_NVGPU_SW_SEMAPHORE + return nvgpu_channel_sync_semaphore_create(c); +#else + return NULL; +#endif + } +} + +bool nvgpu_channel_sync_needs_os_fence_framework(struct gk20a *g) +{ + return !nvgpu_has_syncpoints(g); +} + +int nvgpu_channel_sync_wait_fence_fd(struct nvgpu_channel_sync *s, int fd, + struct priv_cmd_entry **entry, u32 max_wait_cmds) +{ + return s->ops->wait_fence_fd(s, fd, entry, max_wait_cmds); +} + +int nvgpu_channel_sync_incr(struct nvgpu_channel_sync *s, + struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, + bool need_sync_fence) +{ + return s->ops->incr(s, entry, fence, need_sync_fence); +} + +int nvgpu_channel_sync_incr_user(struct nvgpu_channel_sync *s, + struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence, + bool wfi, bool need_sync_fence) +{ + return s->ops->incr_user(s, entry, fence, wfi, need_sync_fence); +} + +void nvgpu_channel_sync_mark_progress(struct nvgpu_channel_sync *s, + bool register_irq) +{ + s->ops->mark_progress(s, register_irq); +} + +void nvgpu_channel_sync_set_min_eq_max(struct nvgpu_channel_sync *s) +{ + s->ops->set_min_eq_max(s); +} + +void nvgpu_channel_sync_get_ref(struct nvgpu_channel_sync *s) +{ + nvgpu_atomic_inc(&s->refcount); +} + +bool nvgpu_channel_sync_put_ref_and_check(struct nvgpu_channel_sync *s) +{ + return nvgpu_atomic_dec_and_test(&s->refcount); +} + +void nvgpu_channel_sync_destroy(struct nvgpu_channel_sync *sync) +{ + sync->ops->destroy(sync); +} diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h b/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h new file mode 100644 index 000000000..4916b5ba5 --- /dev/null +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h @@ -0,0 +1,83 @@ +/* + * Nvgpu Channel Synchronization Abstraction + * + * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CHANNEL_SYNC_PRIV_H +#define NVGPU_CHANNEL_SYNC_PRIV_H + +/* + * These APIs are used for job synchronization that we know about in the + * driver. If submits happen in userspace only, none of this will be needed and + * won't be included. This is here just to double check for now. + */ +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + +#include +#include + +struct priv_cmd_entry; +struct nvgpu_fence_type; +struct nvgpu_channel_sync_ops; + +/* + * This struct is private and should not be used directly. Users should + * instead use the public APIs starting with nvgpu_channel_sync_* + */ +struct nvgpu_channel_sync { + nvgpu_atomic_t refcount; + const struct nvgpu_channel_sync_ops *ops; +}; + +/* + * This struct is private and should not be used directly. Users should + * instead use the public APIs starting with nvgpu_channel_sync_* + */ +struct nvgpu_channel_sync_ops { + int (*wait_fence_raw)(struct nvgpu_channel_sync *s, u32 id, u32 thresh, + struct priv_cmd_entry **entry); + + int (*wait_fence_fd)(struct nvgpu_channel_sync *s, int fd, + struct priv_cmd_entry **entry, u32 max_wait_cmds); + + int (*incr)(struct nvgpu_channel_sync *s, + struct priv_cmd_entry **entry, + struct nvgpu_fence_type *fence, + bool need_sync_fence); + + int (*incr_user)(struct nvgpu_channel_sync *s, + struct priv_cmd_entry **entry, + struct nvgpu_fence_type *fence, + bool wfi, + bool need_sync_fence); + + void (*mark_progress)(struct nvgpu_channel_sync *s, + bool register_irq); + + void (*set_min_eq_max)(struct nvgpu_channel_sync *s); + + void (*destroy)(struct nvgpu_channel_sync *s); +}; + +#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */ + +#endif /* NVGPU_CHANNEL_SYNC_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c new file mode 100644 index 000000000..4233d4507 --- /dev/null +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c @@ -0,0 +1,389 @@ +/* + * GK20A Channel Synchronization Abstraction + * + * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "channel_sync_priv.h" + +struct nvgpu_channel_sync_semaphore { + struct nvgpu_channel_sync base; + struct nvgpu_channel *c; + struct nvgpu_hw_semaphore *hw_sema; +}; + +static struct nvgpu_channel_sync_semaphore * +nvgpu_channel_sync_semaphore_from_base(struct nvgpu_channel_sync *base) +{ + return (struct nvgpu_channel_sync_semaphore *) + ((uintptr_t)base - + offsetof(struct nvgpu_channel_sync_semaphore, base)); +} + +#ifndef CONFIG_NVGPU_SYNCFD_NONE +static void add_sema_wait_cmd(struct gk20a *g, struct nvgpu_channel *c, + struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd) +{ + int ch = c->chid; + u64 va; + + /* acquire just needs to read the mem. */ + va = nvgpu_semaphore_gpu_ro_va(s); + + g->ops.sync.sema.add_wait_cmd(g, cmd, s, va); + gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u pool=%-3llu" + "va=0x%llx cmd=%p", + ch, nvgpu_semaphore_get_value(s), + nvgpu_semaphore_get_hw_pool_page_idx(s), + va, cmd); +} + +static void channel_sync_semaphore_gen_wait_cmd(struct nvgpu_channel *c, + struct nvgpu_semaphore *sema, struct priv_cmd_entry *wait_cmd, + u32 wait_cmd_size) +{ + bool has_incremented; + + if (sema == NULL) { + /* came from an expired sync fence */ + nvgpu_priv_cmdbuf_append_zeros(c->g, wait_cmd, wait_cmd_size); + } else { + has_incremented = nvgpu_semaphore_can_wait(sema); + nvgpu_assert(has_incremented); + add_sema_wait_cmd(c->g, c, sema, wait_cmd); + nvgpu_semaphore_put(sema); + } +} +#endif + +static void add_sema_incr_cmd(struct gk20a *g, struct nvgpu_channel *c, + struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd, + bool wfi, struct nvgpu_hw_semaphore *hw_sema) +{ + int ch = c->chid; + u64 va; + + /* release will need to write back to the semaphore memory. */ + va = nvgpu_semaphore_gpu_rw_va(s); + + /* find the right sema next_value to write (like syncpt's max). */ + nvgpu_semaphore_prepare(s, hw_sema); + + g->ops.sync.sema.add_incr_cmd(g, cmd, s, va, wfi); + gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) pool=%-3llu" + "va=0x%llx entry=%p", + ch, nvgpu_semaphore_get_value(s), + nvgpu_semaphore_read(s), + nvgpu_semaphore_get_hw_pool_page_idx(s), + va, cmd); +} + +static int channel_sync_semaphore_wait_fd( + struct nvgpu_channel_sync *s, int fd, + struct priv_cmd_entry **entry, u32 max_wait_cmds) +{ +#ifndef CONFIG_NVGPU_SYNCFD_NONE + struct nvgpu_channel_sync_semaphore *sema = + nvgpu_channel_sync_semaphore_from_base(s); + struct nvgpu_channel *c = sema->c; + + struct nvgpu_os_fence os_fence = {0}; + struct nvgpu_os_fence_sema os_fence_sema = {0}; + int err; + u32 wait_cmd_size, i, num_fences; + struct nvgpu_semaphore *semaphore = NULL; + + err = nvgpu_os_fence_fdget(&os_fence, c, fd); + if (err != 0) { + return err; + } + + err = nvgpu_os_fence_get_semas(&os_fence_sema, &os_fence); + if (err != 0) { + goto cleanup; + } + + num_fences = nvgpu_os_fence_sema_get_num_semaphores(&os_fence_sema); + + if (num_fences == 0U) { + goto cleanup; + } + + if ((max_wait_cmds != 0U) && (num_fences > max_wait_cmds)) { + err = -EINVAL; + goto cleanup; + } + + wait_cmd_size = c->g->ops.sync.sema.get_wait_cmd_size(); + err = nvgpu_priv_cmdbuf_alloc(c->priv_cmd_q, + wait_cmd_size * num_fences, entry); + if (err != 0) { + goto cleanup; + } + + for (i = 0; i < num_fences; i++) { + nvgpu_os_fence_sema_extract_nth_semaphore( + &os_fence_sema, i, &semaphore); + channel_sync_semaphore_gen_wait_cmd(c, semaphore, *entry, + wait_cmd_size); + } + +cleanup: + os_fence.ops->drop_ref(&os_fence); + return err; +#else + struct nvgpu_channel_sync_semaphore *sema = + nvgpu_channel_sync_semaphore_from_base(s); + + nvgpu_err(sema->c->g, + "trying to use sync fds with CONFIG_NVGPU_SYNCFD_NONE"); + return -ENODEV; +#endif +} + +static int channel_sync_semaphore_incr_common( + struct nvgpu_channel_sync *s, bool wfi_cmd, + struct priv_cmd_entry **incr_cmd, + struct nvgpu_fence_type *fence, + bool need_sync_fence) +{ + u32 incr_cmd_size; + struct nvgpu_channel_sync_semaphore *sp = + nvgpu_channel_sync_semaphore_from_base(s); + struct nvgpu_channel *c = sp->c; + struct nvgpu_semaphore *semaphore; + int err = 0; + struct nvgpu_os_fence os_fence = {0}; + + semaphore = nvgpu_semaphore_alloc(sp->hw_sema); + if (semaphore == NULL) { + nvgpu_err(c->g, + "ran out of semaphores"); + return -ENOMEM; + } + + incr_cmd_size = c->g->ops.sync.sema.get_incr_cmd_size(); + err = nvgpu_priv_cmdbuf_alloc(c->priv_cmd_q, incr_cmd_size, incr_cmd); + if (err != 0) { + goto clean_up_sema; + } + + /* Release the completion semaphore. */ + add_sema_incr_cmd(c->g, c, semaphore, *incr_cmd, wfi_cmd, sp->hw_sema); + + if (need_sync_fence) { + err = nvgpu_os_fence_sema_create(&os_fence, c, semaphore); + + if (err != 0) { + goto clean_up_cmdbuf; + } + } + + nvgpu_fence_from_semaphore(fence, semaphore, &c->semaphore_wq, os_fence); + + return 0; + +clean_up_cmdbuf: + nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, *incr_cmd); +clean_up_sema: + nvgpu_semaphore_put(semaphore); + return err; +} + +static int channel_sync_semaphore_incr( + struct nvgpu_channel_sync *s, + struct priv_cmd_entry **entry, + struct nvgpu_fence_type *fence, + bool need_sync_fence) +{ + /* Don't put wfi cmd to this one since we're not returning + * a fence to user space. */ + return channel_sync_semaphore_incr_common(s, + false /* no wfi */, + entry, fence, need_sync_fence); +} + +static int channel_sync_semaphore_incr_user( + struct nvgpu_channel_sync *s, + struct priv_cmd_entry **entry, + struct nvgpu_fence_type *fence, + bool wfi, + bool need_sync_fence) +{ +#ifndef CONFIG_NVGPU_SYNCFD_NONE + int err; + + err = channel_sync_semaphore_incr_common(s, wfi, entry, fence, + need_sync_fence); + if (err != 0) { + return err; + } + + return 0; +#else + struct nvgpu_channel_sync_semaphore *sema = + nvgpu_channel_sync_semaphore_from_base(s); + + nvgpu_err(sema->c->g, + "trying to use sync fds with CONFIG_NVGPU_SYNCFD_NONE"); + return -ENODEV; +#endif +} + +static void channel_sync_semaphore_mark_progress(struct nvgpu_channel_sync *s, + bool register_irq) +{ + struct nvgpu_channel_sync_semaphore *sp = + nvgpu_channel_sync_semaphore_from_base(s); + + (void)nvgpu_hw_semaphore_update_next(sp->hw_sema); + /* + * register_irq is ignored: there is only one semaphore interrupt that + * triggers nvgpu_channel_update() and it's always active. + */ +} + +static void channel_sync_semaphore_set_min_eq_max(struct nvgpu_channel_sync *s) +{ + struct nvgpu_channel_sync_semaphore *sp = + nvgpu_channel_sync_semaphore_from_base(s); + struct nvgpu_channel *c = sp->c; + bool updated; + + updated = nvgpu_hw_semaphore_reset(sp->hw_sema); + + if (updated) { + nvgpu_cond_broadcast_interruptible(&c->semaphore_wq); + } +} + +static void channel_sync_semaphore_destroy(struct nvgpu_channel_sync *s) +{ + struct nvgpu_channel_sync_semaphore *sema = + nvgpu_channel_sync_semaphore_from_base(s); + + struct nvgpu_channel *c = sema->c; + struct gk20a *g = c->g; + + if (c->has_os_fence_framework_support && + g->os_channel.os_fence_framework_inst_exists(c)) { + g->os_channel.destroy_os_fence_framework(c); + } + nvgpu_hw_semaphore_free(sema->hw_sema); + + nvgpu_kfree(g, sema); +} + +static const struct nvgpu_channel_sync_ops channel_sync_semaphore_ops = { + .wait_fence_fd = channel_sync_semaphore_wait_fd, + .incr = channel_sync_semaphore_incr, + .incr_user = channel_sync_semaphore_incr_user, + .mark_progress = channel_sync_semaphore_mark_progress, + .set_min_eq_max = channel_sync_semaphore_set_min_eq_max, + .destroy = channel_sync_semaphore_destroy, +}; + +/* Converts a valid struct nvgpu_channel_sync ptr to + * struct nvgpu_channel_sync_syncpt ptr else return NULL. + */ +struct nvgpu_channel_sync_semaphore * + nvgpu_channel_sync_to_semaphore(struct nvgpu_channel_sync *sync) +{ + struct nvgpu_channel_sync_semaphore *sema = NULL; + if (sync->ops == &channel_sync_semaphore_ops) { + sema = nvgpu_channel_sync_semaphore_from_base(sync); + } + + return sema; +} + +struct nvgpu_hw_semaphore * +nvgpu_channel_sync_semaphore_hw_sema(struct nvgpu_channel_sync_semaphore *sema) +{ + return sema->hw_sema; +} + +struct nvgpu_channel_sync * +nvgpu_channel_sync_semaphore_create(struct nvgpu_channel *c) +{ + struct nvgpu_channel_sync_semaphore *sema; + struct gk20a *g = c->g; + int asid = -1; + int err; + + if (c->vm == NULL) { + nvgpu_do_assert(); + return NULL; + } + + sema = nvgpu_kzalloc(c->g, sizeof(*sema)); + if (sema == NULL) { + return NULL; + } + sema->c = c; + + err = nvgpu_hw_semaphore_init(c->vm, c->chid, &sema->hw_sema); + if (err != 0) { + goto err_free_sema; + } + + if (c->vm->as_share != NULL) { + asid = c->vm->as_share->id; + } + + if (c->has_os_fence_framework_support) { + /*Init the sync_timeline for this channel */ + err = g->os_channel.init_os_fence_framework(c, + "gk20a_ch%d_as%d", c->chid, asid); + + if (err != 0) { + goto err_free_hw_sema; + } + } + + nvgpu_atomic_set(&sema->base.refcount, 0); + sema->base.ops = &channel_sync_semaphore_ops; + + return &sema->base; + +err_free_hw_sema: + nvgpu_hw_semaphore_free(sema->hw_sema); +err_free_sema: + nvgpu_kfree(g, sema); + return NULL; +} diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c new file mode 100644 index 000000000..740eb3143 --- /dev/null +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c @@ -0,0 +1,419 @@ +/* + * GK20A Channel Synchronization Abstraction + * + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#if !defined(CONFIG_NVGPU_SYNCFD_NONE) && !defined(CONFIG_TEGRA_GK20A_NVHOST_HOST1X) +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "channel_sync_priv.h" + +struct nvgpu_channel_sync_syncpt { + struct nvgpu_channel_sync base; + struct nvgpu_channel *c; + struct nvgpu_nvhost_dev *nvhost; + u32 id; + struct nvgpu_mem syncpt_buf; + u32 max_thresh; +}; + +static struct nvgpu_channel_sync_syncpt * +nvgpu_channel_sync_syncpt_from_base(struct nvgpu_channel_sync *base) +{ + return (struct nvgpu_channel_sync_syncpt *) + ((uintptr_t)base - + offsetof(struct nvgpu_channel_sync_syncpt, base)); +} + +static void channel_sync_syncpt_gen_wait_cmd(struct nvgpu_channel *c, + u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd) +{ + nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx", + id, c->vm->syncpt_ro_map_gpu_va); + c->g->ops.sync.syncpt.add_wait_cmd(c->g, wait_cmd, id, thresh, + c->vm->syncpt_ro_map_gpu_va); +} + +static int channel_sync_syncpt_wait_raw(struct nvgpu_channel_sync_syncpt *s, + u32 id, u32 thresh, struct priv_cmd_entry **wait_cmd) +{ + struct nvgpu_channel *c = s->c; + int err = 0; + u32 wait_cmd_size = c->g->ops.sync.syncpt.get_wait_cmd_size(); + + if (!nvgpu_nvhost_syncpt_is_valid_pt_ext(s->nvhost, id)) { + return -EINVAL; + } + + err = nvgpu_priv_cmdbuf_alloc(c->priv_cmd_q, wait_cmd_size, wait_cmd); + if (err != 0) { + return err; + } + + channel_sync_syncpt_gen_wait_cmd(c, id, thresh, *wait_cmd); + + return 0; +} + +#ifndef CONFIG_NVGPU_SYNCFD_NONE +struct gen_wait_cmd_iter_data { + struct nvgpu_channel *c; + struct priv_cmd_entry *wait_cmd; +}; + +static int gen_wait_cmd_iter(struct nvhost_ctrl_sync_fence_info info, void *d) +{ + struct gen_wait_cmd_iter_data *data = d; + + channel_sync_syncpt_gen_wait_cmd(data->c, info.id, info.thresh, + data->wait_cmd); + return 0; +} + +static int channel_sync_syncpt_wait_fd(struct nvgpu_channel_sync *s, int fd, + struct priv_cmd_entry **wait_cmd, u32 max_wait_cmds) +{ + struct nvgpu_os_fence os_fence = {0}; + struct nvgpu_os_fence_syncpt os_fence_syncpt = {0}; + struct nvgpu_channel_sync_syncpt *sp = + nvgpu_channel_sync_syncpt_from_base(s); + struct nvgpu_channel *c = sp->c; + struct gen_wait_cmd_iter_data iter_data = { + .c = c + }; + u32 num_fences, wait_cmd_size; + int err = 0; + + err = nvgpu_os_fence_fdget(&os_fence, c, fd); + if (err != 0) { + return -EINVAL; + } + + err = nvgpu_os_fence_get_syncpts(&os_fence_syncpt, &os_fence); + if (err != 0) { + goto cleanup; + } + + num_fences = nvgpu_os_fence_syncpt_get_num_syncpoints(&os_fence_syncpt); + + if (num_fences == 0U) { + goto cleanup; + } + + if ((max_wait_cmds != 0U) && (num_fences > max_wait_cmds)) { + err = -EINVAL; + goto cleanup; + } + + wait_cmd_size = c->g->ops.sync.syncpt.get_wait_cmd_size(); + err = nvgpu_priv_cmdbuf_alloc(c->priv_cmd_q, + wait_cmd_size * num_fences, wait_cmd); + if (err != 0) { + goto cleanup; + } + + iter_data.wait_cmd = *wait_cmd; + + nvgpu_os_fence_syncpt_foreach_pt(&os_fence_syncpt, + gen_wait_cmd_iter, &iter_data); + +cleanup: + os_fence.ops->drop_ref(&os_fence); + return err; +} +#else /* CONFIG_NVGPU_SYNCFD_NONE */ +static int channel_sync_syncpt_wait_fd(struct nvgpu_channel_sync *s, int fd, + struct priv_cmd_entry **wait_cmd, u32 max_wait_cmds) +{ + struct nvgpu_channel_sync_syncpt *sp = + nvgpu_channel_sync_syncpt_from_base(s); + nvgpu_err(sp->c->g, + "trying to use sync fds with CONFIG_NVGPU_SYNCFD_NONE"); + return -ENODEV; +} +#endif /* CONFIG_NVGPU_SYNCFD_NONE */ + +static void channel_sync_syncpt_update(void *priv, int nr_completed) +{ + struct nvgpu_channel *ch = priv; + + nvgpu_channel_update(ch); + + /* note: channel_get() is in channel_sync_syncpt_mark_progress() */ + nvgpu_channel_put(ch); +} + +static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s, + bool wfi_cmd, + struct priv_cmd_entry **incr_cmd, + struct nvgpu_fence_type *fence, + bool need_sync_fence) +{ + u32 thresh; + int err; + struct nvgpu_channel_sync_syncpt *sp = + nvgpu_channel_sync_syncpt_from_base(s); + struct nvgpu_channel *c = sp->c; + struct nvgpu_os_fence os_fence = {0}; + struct gk20a *g = c->g; + + err = nvgpu_priv_cmdbuf_alloc(c->priv_cmd_q, + g->ops.sync.syncpt.get_incr_cmd_size(wfi_cmd), + incr_cmd); + if (err != 0) { + return err; + } + + nvgpu_log(g, gpu_dbg_info, "sp->id %d gpu va %llx", + sp->id, sp->syncpt_buf.gpu_va); + g->ops.sync.syncpt.add_incr_cmd(g, *incr_cmd, + sp->id, sp->syncpt_buf.gpu_va, wfi_cmd); + + thresh = nvgpu_wrapping_add_u32(sp->max_thresh, + g->ops.sync.syncpt.get_incr_per_release()); + + if (need_sync_fence) { + err = nvgpu_os_fence_syncpt_create(&os_fence, c, sp->nvhost, + sp->id, thresh); + + if (err != 0) { + goto clean_up_priv_cmd; + } + } + + nvgpu_fence_from_syncpt(fence, sp->nvhost, sp->id, thresh, os_fence); + + return 0; + +clean_up_priv_cmd: + nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, *incr_cmd); + return err; +} + +static int channel_sync_syncpt_incr(struct nvgpu_channel_sync *s, + struct priv_cmd_entry **entry, + struct nvgpu_fence_type *fence, + bool need_sync_fence) +{ + /* Don't put wfi cmd to this one since we're not returning + * a fence to user space. */ + return channel_sync_syncpt_incr_common(s, false, entry, fence, + need_sync_fence); +} + +static int channel_sync_syncpt_incr_user(struct nvgpu_channel_sync *s, + struct priv_cmd_entry **entry, + struct nvgpu_fence_type *fence, + bool wfi, + bool need_sync_fence) +{ + /* Need to do 'wfi + host incr' since we return the fence + * to user space. */ + return channel_sync_syncpt_incr_common(s, wfi, entry, fence, + need_sync_fence); +} + +static void channel_sync_syncpt_mark_progress(struct nvgpu_channel_sync *s, + bool register_irq) +{ + struct nvgpu_channel_sync_syncpt *sp = + nvgpu_channel_sync_syncpt_from_base(s); + struct nvgpu_channel *c = sp->c; + struct gk20a *g = c->g; + + sp->max_thresh = nvgpu_wrapping_add_u32(sp->max_thresh, + g->ops.sync.syncpt.get_incr_per_release()); + + if (register_irq) { + struct nvgpu_channel *referenced = nvgpu_channel_get(c); + + WARN_ON(referenced == NULL); + + if (referenced != NULL) { + /* + * note: the matching channel_put() is in + * channel_sync_syncpt_update() that gets called when + * the job completes. + */ + + int err = nvgpu_nvhost_intr_register_notifier( + sp->nvhost, + sp->id, sp->max_thresh, + channel_sync_syncpt_update, c); + if (err != 0) { + nvgpu_channel_put(referenced); + } + + /* + * This never fails in practice. If it does, we won't + * be getting a completion signal to free the job + * resources, but maybe this succeeds on a possible + * subsequent submit, and the channel closure path will + * eventually mark everything completed anyway. + */ + WARN(err != 0, + "failed to set submit complete interrupt"); + } + } +} + +int nvgpu_channel_sync_wait_syncpt(struct nvgpu_channel_sync_syncpt *s, + u32 id, u32 thresh, struct priv_cmd_entry **entry) +{ + return channel_sync_syncpt_wait_raw(s, id, thresh, entry); +} + +static void channel_sync_syncpt_set_min_eq_max(struct nvgpu_channel_sync *s) +{ + struct nvgpu_channel_sync_syncpt *sp = + nvgpu_channel_sync_syncpt_from_base(s); + + nvgpu_nvhost_syncpt_set_minval(sp->nvhost, sp->id, sp->max_thresh); +} + +static u32 channel_sync_syncpt_get_id(struct nvgpu_channel_sync_syncpt *sp) +{ + return sp->id; +} + +static void channel_sync_syncpt_destroy(struct nvgpu_channel_sync *s) +{ + struct nvgpu_channel_sync_syncpt *sp = + nvgpu_channel_sync_syncpt_from_base(s); + + + sp->c->g->ops.sync.syncpt.free_buf(sp->c, &sp->syncpt_buf); + + channel_sync_syncpt_set_min_eq_max(s); + nvgpu_nvhost_syncpt_put_ref_ext(sp->nvhost, sp->id); + nvgpu_kfree(sp->c->g, sp); +} + +u32 nvgpu_channel_sync_get_syncpt_id(struct nvgpu_channel_sync_syncpt *s) +{ + return channel_sync_syncpt_get_id(s); +} + +static const struct nvgpu_channel_sync_ops channel_sync_syncpt_ops = { + .wait_fence_fd = channel_sync_syncpt_wait_fd, + .incr = channel_sync_syncpt_incr, + .incr_user = channel_sync_syncpt_incr_user, + .mark_progress = channel_sync_syncpt_mark_progress, + .set_min_eq_max = channel_sync_syncpt_set_min_eq_max, + .destroy = channel_sync_syncpt_destroy, +}; + +struct nvgpu_channel_sync_syncpt * +nvgpu_channel_sync_to_syncpt(struct nvgpu_channel_sync *sync) +{ + struct nvgpu_channel_sync_syncpt *syncpt = NULL; + + if (sync->ops == &channel_sync_syncpt_ops) { + syncpt = nvgpu_channel_sync_syncpt_from_base(sync); + } + + return syncpt; +} + +struct nvgpu_channel_sync * +nvgpu_channel_sync_syncpt_create(struct nvgpu_channel *c) +{ + struct nvgpu_channel_sync_syncpt *sp; + char syncpt_name[32]; + int err; + + sp = nvgpu_kzalloc(c->g, sizeof(*sp)); + if (sp == NULL) { + return NULL; + } + + sp->c = c; + sp->nvhost = c->g->nvhost; + + snprintf(syncpt_name, sizeof(syncpt_name), + "%s_%d", c->g->name, c->chid); + + sp->id = nvgpu_nvhost_get_syncpt_client_managed(sp->nvhost, + syncpt_name); + + /** + * This is a fix to handle invalid value of a syncpt. + * Once nvhost update the return value as NVGPU_INVALID_SYNCPT_ID, + * we can remove the zero check. + */ + if ((nvgpu_is_errata_present(c->g, NVGPU_ERRATA_SYNCPT_INVALID_ID_0)) && + (sp->id == 0U)) { + nvgpu_err(c->g, "failed to get free syncpt"); + goto err_free; + } + if (sp->id == NVGPU_INVALID_SYNCPT_ID) { + nvgpu_err(c->g, "failed to get free syncpt"); + goto err_free; + } + + err = sp->c->g->ops.sync.syncpt.alloc_buf(sp->c, sp->id, + &sp->syncpt_buf); + + if (err != 0) { + nvgpu_err(c->g, "failed to allocate syncpoint buffer"); + goto err_put; + } + + err = nvgpu_nvhost_syncpt_read_ext_check(sp->nvhost, sp->id, + &sp->max_thresh); + + if (err != 0) { + goto err_free_buf; + } + + nvgpu_atomic_set(&sp->base.refcount, 0); + sp->base.ops = &channel_sync_syncpt_ops; + + return &sp->base; + +err_free_buf: + sp->c->g->ops.sync.syncpt.free_buf(sp->c, &sp->syncpt_buf); +err_put: + nvgpu_nvhost_syncpt_put_ref_ext(sp->nvhost, sp->id); +err_free: + nvgpu_kfree(c->g, sp); + return NULL; +} diff --git a/drivers/gpu/nvgpu/common/sync/channel_user_syncpt.c b/drivers/gpu/nvgpu/common/sync/channel_user_syncpt.c new file mode 100644 index 000000000..86f072371 --- /dev/null +++ b/drivers/gpu/nvgpu/common/sync/channel_user_syncpt.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "channel_user_syncpt_priv.h" + +static int user_sync_build_debug_name(struct nvgpu_channel *ch, + char *buf, size_t capacity) +{ + struct gk20a *g = ch->g; + int n; + + (void)strncpy(buf, g->name, capacity); + capacity = nvgpu_safe_sub_u64(capacity, strlen(g->name)); + + (void)strncat(buf, "_", capacity); + capacity = nvgpu_safe_sub_u64(capacity, strlen("_")); + /* + * however, nvgpu_strnadd_u32 expects capacity to include the + * terminating byte, so add it back + */ + capacity = nvgpu_safe_add_u64(capacity, 1); + + n = nvgpu_strnadd_u32(&buf[strlen(buf)], ch->chid, + capacity, 10); + if (n == 0) { + nvgpu_err(g, "strnadd failed!"); + return -EINVAL; + } + capacity = nvgpu_safe_sub_u64(capacity, nvgpu_safe_cast_s32_to_u64(n)); + /* nul byte */ + capacity = nvgpu_safe_sub_u64(capacity, 1UL); + + (void)strncat(buf, "_user", capacity); + /* make sure it didn't get truncated */ + capacity = nvgpu_safe_sub_u64(capacity, strlen("_user")); + + return 0; +} + +struct nvgpu_channel_user_syncpt * +nvgpu_channel_user_syncpt_create(struct nvgpu_channel *ch) +{ + struct gk20a *g = ch->g; + struct nvgpu_channel_user_syncpt *s; + char syncpt_name[SYNCPT_NAME_SZ] = {0}; /* e.g. gp10b_42_user */ + int err; + + s = nvgpu_kzalloc(ch->g, sizeof(*s)); + if (s == NULL) { + return NULL; + } + + s->ch = ch; + s->nvhost = g->nvhost; + + err = user_sync_build_debug_name(ch, syncpt_name, + SYNCPT_NAME_SZ - 1UL); + if (err < 0) { + goto err_free; + } + + s->syncpt_id = nvgpu_nvhost_get_syncpt_client_managed(s->nvhost, + syncpt_name); + + /** + * This is a fix to handle invalid value of a syncpt. + * Once nvhost update the return value as NVGPU_INVALID_SYNCPT_ID, + * we can remove the zero check. + */ + if ((nvgpu_is_errata_present(g, NVGPU_ERRATA_SYNCPT_INVALID_ID_0)) && + (s->syncpt_id == 0U)) { + nvgpu_err(g, "failed to get free syncpt"); + goto err_free; + } + if (s->syncpt_id == NVGPU_INVALID_SYNCPT_ID) { + nvgpu_err(g, "failed to get free syncpt"); + goto err_free; + } + + err = g->ops.sync.syncpt.alloc_buf(ch, s->syncpt_id, &s->syncpt_buf); + if (err != 0) { + nvgpu_err(g, "failed to allocate syncpoint buffer"); + goto err_put; + } + + return s; +err_put: + nvgpu_nvhost_syncpt_put_ref_ext(s->nvhost, s->syncpt_id); +err_free: + nvgpu_kfree(g, s); + return NULL; +} + +u32 nvgpu_channel_user_syncpt_get_id(struct nvgpu_channel_user_syncpt *s) +{ + return s->syncpt_id; +} + +u64 nvgpu_channel_user_syncpt_get_address(struct nvgpu_channel_user_syncpt *s) +{ + return s->syncpt_buf.gpu_va; +} + +void nvgpu_channel_user_syncpt_set_safe_state(struct nvgpu_channel_user_syncpt *s) +{ + nvgpu_nvhost_syncpt_set_safe_state(s->nvhost, s->syncpt_id); +} + +void nvgpu_channel_user_syncpt_destroy(struct nvgpu_channel_user_syncpt *s) +{ + struct gk20a *g = s->ch->g; + + g->ops.sync.syncpt.free_buf(s->ch, &s->syncpt_buf); + + nvgpu_nvhost_syncpt_put_ref_ext(s->nvhost, s->syncpt_id); + nvgpu_kfree(g, s); +} diff --git a/drivers/gpu/nvgpu/common/sync/channel_user_syncpt_priv.h b/drivers/gpu/nvgpu/common/sync/channel_user_syncpt_priv.h new file mode 100644 index 000000000..2757e0105 --- /dev/null +++ b/drivers/gpu/nvgpu/common/sync/channel_user_syncpt_priv.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#ifndef NVGPU_USER_SYNCPT_PRIV_H +#define NVGPU_USER_SYNCPT_PRIV_H + +#include +#include + +struct nvgpu_channel; +struct nvgpu_nvhost_dev; + +struct nvgpu_channel_user_syncpt { + struct nvgpu_channel *ch; + struct nvgpu_nvhost_dev *nvhost; + uint32_t syncpt_id; + struct nvgpu_mem syncpt_buf; +}; + +#define SYNCPT_NAME_SZ 32UL + +#endif /* NVGPU_USER_SYNC_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/therm/therm.c b/drivers/gpu/nvgpu/common/therm/therm.c new file mode 100644 index 000000000..9f0c73ba2 --- /dev/null +++ b/drivers/gpu/nvgpu/common/therm/therm.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +int nvgpu_init_therm_support(struct gk20a *g) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + if (g->ops.therm.init_therm_setup_hw != NULL) { + err = g->ops.therm.init_therm_setup_hw(g); + } + if (err != 0) { + return err; + } + + nvgpu_cg_slcg_therm_load_enable(g); + +#ifdef CONFIG_DEBUG_FS + if (g->ops.therm.therm_debugfs_init) + g->ops.therm.therm_debugfs_init(g); +#endif + + return err; +} diff --git a/drivers/gpu/nvgpu/common/utils/enabled.c b/drivers/gpu/nvgpu/common/utils/enabled.c new file mode 100644 index 000000000..aa3f2d22b --- /dev/null +++ b/drivers/gpu/nvgpu/common/utils/enabled.c @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +/** + * Array of flag names + */ +#define DEFINE_FLAG(flag, desc) [flag] = nvgpu_stringify(flag) +static const char *enabled_flag_names[NVGPU_MAX_ENABLED_BITS + 1U] = { + ENABLED_FLAGS +}; +#undef DEFINE_FLAG + +/** + * Array of flag descriptions + */ +#define DEFINE_FLAG(flag, desc) [flag] = desc +static const char *enabled_flag_desc[NVGPU_MAX_ENABLED_BITS + 1U] = { + ENABLED_FLAGS +}; +#undef DEFINE_FLAG + +void nvgpu_print_enabled_flags(struct gk20a *g) +{ + u32 i; + + nvgpu_log(g, gpu_dbg_info, "NVGPU support flags status"); + nvgpu_log(g, gpu_dbg_info, "%-55.55s %-6.6s %s", + "Flag", "Status", "Description"); + nvgpu_log(g, gpu_dbg_info, "%-55.55s %-6.6s %s", + "----", "------", "-----------"); + + for (i = 0U; i < U32(NVGPU_MAX_ENABLED_BITS); i++) { + nvgpu_log(g, gpu_dbg_info, "%-55.55s %-6.6s %s", + enabled_flag_names[i], + nvgpu_is_enabled(g, i) ? "true" : "false", + enabled_flag_desc[i]); + } +} + +int nvgpu_init_enabled_flags(struct gk20a *g) +{ + /* + * Zero all flags initially. Flags that should be set to non-zero states + * can be done so during driver init. + */ + g->enabled_flags = nvgpu_kzalloc(g, + BITS_TO_LONGS(U32(NVGPU_MAX_ENABLED_BITS)) * + sizeof(unsigned long)); + if (g->enabled_flags == NULL) { + return -ENOMEM; + } + + return 0; +} + +/* + * Call this on driver shutdown! + */ +void nvgpu_free_enabled_flags(struct gk20a *g) +{ + nvgpu_kfree(g, g->enabled_flags); +} + +bool nvgpu_is_enabled(struct gk20a *g, u32 flag) +{ + if (flag < NVGPU_MAX_ENABLED_BITS) { + return nvgpu_test_bit(flag, g->enabled_flags); + } else { + return 0; + } +} + +void nvgpu_set_enabled(struct gk20a *g, u32 flag, bool state) +{ + if (flag >= NVGPU_MAX_ENABLED_BITS) { + return; + } + + if (state) { + nvgpu_set_bit(flag, g->enabled_flags); + } else { + nvgpu_clear_bit(flag, g->enabled_flags); + } +} diff --git a/drivers/gpu/nvgpu/common/utils/errata.c b/drivers/gpu/nvgpu/common/utils/errata.c new file mode 100644 index 000000000..8a6182405 --- /dev/null +++ b/drivers/gpu/nvgpu/common/utils/errata.c @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +/** + * Array of flag names + */ +#define DEFINE_ERRATA(flag, chip, desc) [flag] = nvgpu_stringify(flag) +static const char *errata_flag_names[NVGPU_MAX_ERRATA_BITS + 1U] = { + ERRATA_FLAGS_NEXT + ERRATA_FLAGS +}; +#undef DEFINE_ERRATA + +/** + * Array of flag descriptions + */ +#define DEFINE_ERRATA(flag, chip, desc) [flag] = desc +static const char *errata_flag_desc[NVGPU_MAX_ERRATA_BITS + 1U] = { + ERRATA_FLAGS_NEXT + ERRATA_FLAGS +}; +#undef DEFINE_ERRATA + +/** + * Array of chips where errata was first discovered + */ +#define DEFINE_ERRATA(flag, chip, desc) [flag] = chip +static const char *errata_flag_chip[NVGPU_MAX_ERRATA_BITS + 1U] = { + ERRATA_FLAGS_NEXT + ERRATA_FLAGS +}; +#undef DEFINE_ERRATA + +void nvgpu_print_errata_flags(struct gk20a *g) +{ + u32 i; + + nvgpu_log(g, gpu_dbg_info, "NVGPU Erratas present in chip"); + nvgpu_log(g, gpu_dbg_info, "%-55.55s %-5.5s %s", + "Flag", "Chip", "Description"); + nvgpu_log(g, gpu_dbg_info, "%-55.55s %-5.5s %s", + "----", "-----", "-----------"); + + for (i = 0U; i < U32(NVGPU_MAX_ERRATA_BITS); i++) { + /* Only print erratas present in chip */ + if (nvgpu_is_errata_present(g, i)) { + nvgpu_log(g, gpu_dbg_info, "%-55.55s %-5.5s %s", + errata_flag_names[i], + errata_flag_chip[i], + errata_flag_desc[i]); + } + } +} + +int nvgpu_init_errata_flags(struct gk20a *g) +{ + /* + * Zero all flags initially. Flags that should be set to non-zero states + * can be done so during hal init. + */ + g->errata_flags = nvgpu_kzalloc(g, + BITS_TO_LONGS(U32(NVGPU_MAX_ERRATA_BITS)) * + sizeof(unsigned long)); + if (g->errata_flags == NULL) { + return -ENOMEM; + } + + return 0; +} + +/* + * Call this on driver shutdown! + */ +void nvgpu_free_errata_flags(struct gk20a *g) +{ + nvgpu_kfree(g, g->errata_flags); +} + +bool nvgpu_is_errata_present(struct gk20a *g, u32 flag) +{ + if (flag < NVGPU_MAX_ERRATA_BITS) { + return nvgpu_test_bit(flag, g->errata_flags); + } else { + return 0; + } +} + +void nvgpu_set_errata(struct gk20a *g, u32 flag, bool state) +{ + if (flag >= NVGPU_MAX_ERRATA_BITS) { + return; + } + + if (state) { + nvgpu_set_bit(flag, g->errata_flags); + } else { + nvgpu_clear_bit(flag, g->errata_flags); + } +} diff --git a/drivers/gpu/nvgpu/common/utils/rbtree.c b/drivers/gpu/nvgpu/common/utils/rbtree.c new file mode 100644 index 000000000..cf881eab2 --- /dev/null +++ b/drivers/gpu/nvgpu/common/utils/rbtree.c @@ -0,0 +1,493 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +/* + * rotate node x to left + */ +static void rotate_left(struct nvgpu_rbtree_node **root, + struct nvgpu_rbtree_node *x) +{ + struct nvgpu_rbtree_node *y = x->right; + + /* establish x->right link */ + x->right = y->left; + if (y->left != NULL) { + y->left->parent = x; + } + + /* establish y->parent link */ + y->parent = x->parent; + if (x->parent != NULL) { + if (x == x->parent->left) { + x->parent->left = y; + } else { + x->parent->right = y; + } + } else { + *root = y; + } + + /* link x and y */ + y->left = x; + x->parent = y; +} + +/* + * rotate node x to right + */ +static void rotate_right(struct nvgpu_rbtree_node **root, + struct nvgpu_rbtree_node *x) +{ + struct nvgpu_rbtree_node *y = x->left; + + /* establish x->left link */ + x->left = y->right; + if (y->right != NULL) { + y->right->parent = x; + } + + /* establish y->parent link */ + y->parent = x->parent; + if (x->parent != NULL) { + if (x == x->parent->right) { + x->parent->right = y; + } else { + x->parent->left = y; + } + } else { + *root = y; + } + + /* link x and y */ + y->right = x; + x->parent = y; +} + +/* + * maintain red-black tree balance after inserting node x + */ +static void insert_fixup(struct nvgpu_rbtree_node **root, + struct nvgpu_rbtree_node *x) +{ + /* check red-black properties */ + while ((x != *root) && x->parent->is_red) { + /* we have a violation */ + if (x->parent == x->parent->parent->left) { + struct nvgpu_rbtree_node *y = x->parent->parent->right; + + if ((y != NULL) && (y->is_red)) { + /* uncle is RED */ + x->parent->is_red = false; + y->is_red = false; + x->parent->parent->is_red = true; + x = x->parent->parent; + } else { + /* uncle is BLACK */ + if (x == x->parent->right) { + /* make x a left child */ + x = x->parent; + rotate_left(root, x); + } + + /* recolor and rotate */ + x->parent->is_red = false; + x->parent->parent->is_red = true; + rotate_right(root, x->parent->parent); + } + } else { + /* mirror image of above code */ + struct nvgpu_rbtree_node *y = x->parent->parent->left; + + if ((y != NULL) && (y->is_red)) { + /* uncle is RED */ + x->parent->is_red = false; + y->is_red = false; + x->parent->parent->is_red = true; + x = x->parent->parent; + } else { + /* uncle is BLACK */ + if (x == x->parent->left) { + x = x->parent; + rotate_right(root, x); + } + x->parent->is_red = false; + x->parent->parent->is_red = true; + rotate_left(root, x->parent->parent); + } + } + } + + (*root)->is_red = false; +} + +void nvgpu_rbtree_insert(struct nvgpu_rbtree_node *new_node, + struct nvgpu_rbtree_node **root) +{ + struct nvgpu_rbtree_node *curr; + struct nvgpu_rbtree_node *parent; + + /* find future parent */ + curr = *root; + parent = NULL; + + while (curr != NULL) { + parent = curr; + if (new_node->key_start < curr->key_start) { + curr = curr->left; + } else if (new_node->key_start > curr->key_start) { + curr = curr->right; + } else { + return; /* duplicate entry */ + } + } + + /* the caller allocated the node already, just fix the links */ + new_node->parent = parent; + new_node->left = NULL; + new_node->right = NULL; + new_node->is_red = true; + + /* insert node in tree */ + if (parent != NULL) { + if (new_node->key_start < parent->key_start) { + parent->left = new_node; + } else { + parent->right = new_node; + } + } else { + *root = new_node; + } + + insert_fixup(root, new_node); +} + +/* + * helper function for delete_fixup_*_child to test if node has no red + * children + */ +static bool has_no_red_children(struct nvgpu_rbtree_node *w) +{ + return (w == NULL) || + (((w->left == NULL) || (!w->left->is_red)) && + ((w->right == NULL) || (!w->right->is_red))); +} + +/* delete_fixup handling if x is the left child */ +static void delete_fixup_left_child(struct nvgpu_rbtree_node **root, + struct nvgpu_rbtree_node *parent_of_x, + struct nvgpu_rbtree_node **x) +{ + struct nvgpu_rbtree_node *w = parent_of_x->right; + + if ((w != NULL) && (w->is_red)) { + w->is_red = false; + parent_of_x->is_red = true; + rotate_left(root, parent_of_x); + w = parent_of_x->right; + } + + if (has_no_red_children(w)) { + if (w != NULL) { + w->is_red = true; + } + *x = parent_of_x; + } else { + if ((w->right == NULL) || (!w->right->is_red)) { + w->left->is_red = false; + w->is_red = true; + rotate_right(root, w); + w = parent_of_x->right; + } + w->is_red = parent_of_x->is_red; + parent_of_x->is_red = false; + w->right->is_red = false; + rotate_left(root, parent_of_x); + *x = *root; + } +} + +/* delete_fixup handling if x is the right child */ +static void delete_fixup_right_child(struct nvgpu_rbtree_node **root, + struct nvgpu_rbtree_node *parent_of_x, + struct nvgpu_rbtree_node **x) +{ + struct nvgpu_rbtree_node *w = parent_of_x->left; + + if ((w != NULL) && (w->is_red)) { + w->is_red = false; + parent_of_x->is_red = true; + rotate_right(root, parent_of_x); + w = parent_of_x->left; + } + + if (has_no_red_children(w)) { + if (w != NULL) { + w->is_red = true; + } + *x = parent_of_x; + } else { + if ((w->left == NULL) || (!w->left->is_red)) { + w->right->is_red = false; + w->is_red = true; + rotate_left(root, w); + w = parent_of_x->left; + } + w->is_red = parent_of_x->is_red; + parent_of_x->is_red = false; + w->left->is_red = false; + rotate_right(root, parent_of_x); + *x = *root; + } +} + +/* + * maintain red-black tree balance after deleting node x + */ +static void delete_fixup(struct nvgpu_rbtree_node **root, + struct nvgpu_rbtree_node *parent_of_x, + struct nvgpu_rbtree_node *x) +{ + while ((x != *root) && ((x == NULL) || (!x->is_red))) { + /* + * NULL nodes are sentinel nodes. If we delete a sentinel + * node (x==NULL) it must have a parent node (or be the root). + * Hence, parent_of_x == NULL with + * x==NULL is never possible (tree invariant) + */ + if (parent_of_x == NULL) { + nvgpu_assert(x != NULL); + parent_of_x = x->parent; + continue; + } + + if (x == parent_of_x->left) { + delete_fixup_left_child(root, parent_of_x, &x); + } else { + delete_fixup_right_child(root, parent_of_x, &x); + } + parent_of_x = x->parent; + } + + if (x != NULL) { + x->is_red = false; + } +} + +static void swap_in_new_child(struct nvgpu_rbtree_node *old, + struct nvgpu_rbtree_node *new, + struct nvgpu_rbtree_node **root) +{ + if (old->parent != NULL) { + if (old == old->parent->left) { + old->parent->left = new; + } else { + old->parent->right = new; + } + } else { + *root = new; + } +} + +static void adopt_children(struct nvgpu_rbtree_node *old, + struct nvgpu_rbtree_node *new) +{ + new->left = old->left; + if (old->left != NULL) { + old->left->parent = new; + } + + new->right = old->right; + if (old->right != NULL) { + old->right->parent = new; + } +} + +void nvgpu_rbtree_unlink(struct nvgpu_rbtree_node *node, + struct nvgpu_rbtree_node **root) +{ + struct nvgpu_rbtree_node *x; + struct nvgpu_rbtree_node *y; + struct nvgpu_rbtree_node *z; + struct nvgpu_rbtree_node *parent_of_x; + bool y_was_black; + + z = node; + + /* unlink */ + if ((z->left == NULL) || (z->right == NULL)) { + /* y has a SENTINEL node as a child */ + y = z; + } else { + /* find tree successor */ + y = z->right; + while (y->left != NULL) { + y = y->left; + } + } + + /* x is y's only child */ + if (y->left != NULL) { + x = y->left; + } else { + x = y->right; + } + + /* remove y from the parent chain */ + parent_of_x = y->parent; + if (x != NULL) { + x->parent = parent_of_x; + } + /* update the parent's links */ + swap_in_new_child(y, x, root); + + y_was_black = !y->is_red; + if (y != z) { + /* we need to replace z with y so + * the memory for z can be freed + */ + y->parent = z->parent; + swap_in_new_child(z, y, root); + + y->is_red = z->is_red; + + adopt_children(z, y); + + if (parent_of_x == z) { + parent_of_x = y; + } + } + + if (y_was_black) { + delete_fixup(root, parent_of_x, x); + } +} + +void nvgpu_rbtree_search(u64 key_start, struct nvgpu_rbtree_node **node, + struct nvgpu_rbtree_node *root) +{ + struct nvgpu_rbtree_node *curr = root; + + while (curr != NULL) { + if (key_start < curr->key_start) { + curr = curr->left; + } else if (key_start > curr->key_start) { + curr = curr->right; + } else { + *node = curr; + return; + } + } + + *node = NULL; +} + +void nvgpu_rbtree_range_search(u64 key, + struct nvgpu_rbtree_node **node, + struct nvgpu_rbtree_node *root) +{ + struct nvgpu_rbtree_node *curr = root; + + while (curr != NULL) { + if ((key >= curr->key_start) && + (key < curr->key_end)) { + *node = curr; + return; + } else if (key < curr->key_start) { + curr = curr->left; + } else { + curr = curr->right; + } + } + + *node = NULL; +} + +void nvgpu_rbtree_less_than_search(u64 key_start, + struct nvgpu_rbtree_node **node, + struct nvgpu_rbtree_node *root) +{ + struct nvgpu_rbtree_node *curr = root; + + while (curr != NULL) { + if (key_start <= curr->key_start) { + curr = curr->left; + } else { + *node = curr; + curr = curr->right; + } + } +} + +void nvgpu_rbtree_enum_start(u64 key_start, struct nvgpu_rbtree_node **node, + struct nvgpu_rbtree_node *root) +{ + *node = NULL; + + if (root != NULL) { + struct nvgpu_rbtree_node *curr = root; + + while (curr != NULL) { + if (key_start < curr->key_start) { + *node = curr; + curr = curr->left; + } else if (key_start > curr->key_start) { + curr = curr->right; + } else { + *node = curr; + break; + } + } + } +} + +void nvgpu_rbtree_enum_next(struct nvgpu_rbtree_node **node, + struct nvgpu_rbtree_node *root) +{ + struct nvgpu_rbtree_node *curr = NULL; + + if ((root != NULL) && (*node != NULL)) { + /* if we don't have a right subtree return the parent */ + curr = *node; + + /* pick the leftmost node of the right subtree ? */ + if (curr->right != NULL) { + curr = curr->right; + while (curr->left != NULL) { + curr = curr->left; + } + } else { + /* go up until we find the right inorder node */ + for (curr = curr->parent; + curr != NULL; + curr = curr->parent) { + if (curr->key_start > (*node)->key_start) { + break; + } + } + } + } + + *node = curr; +} diff --git a/drivers/gpu/nvgpu/common/utils/string.c b/drivers/gpu/nvgpu/common/utils/string.c new file mode 100644 index 000000000..2cb50c691 --- /dev/null +++ b/drivers/gpu/nvgpu/common/utils/string.c @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +void +nvgpu_memcpy(u8 *destb, const u8 *srcb, size_t n) +{ + (void) memcpy(destb, srcb, n); +} + +int +nvgpu_memcmp(const u8 *b1, const u8 *b2, size_t n) +{ + return memcmp(b1, b2, n); +} + +int nvgpu_strnadd_u32(char *dst, const u32 value, size_t size, u32 radix) +{ + int n; + u32 v; + char *p; + u32 digit; + + if ((radix < 2U) || (radix > 16U)) { + return 0; + } + + if (size > ((u64)(INT_MAX))) { + return 0; + } + + /* how many digits do we need ? */ + n = 0; + v = value; + do { + n = nvgpu_safe_add_s32(n, 1); + v = v / radix; + } while (v > 0U); + + /* bail out if there is not room for '\0' */ + if (n >= (s32)size) { + return 0; + } + + /* number of digits (not including '\0') */ + p = dst + n; + + /* terminate with '\0' */ + *p = '\0'; + p--; + + v = value; + do { + digit = v % radix; + *p = "0123456789abcdef"[digit]; + v = v / radix; + p--; + } + while (v > 0U); + + return n; +} + +bool nvgpu_mem_is_word_aligned(struct gk20a *g, u8 *addr) +{ + if (((unsigned long)addr % 4UL) != 0UL) { + nvgpu_log_info(g, "addr not 4-byte aligned"); + return false; + } + + return true; +} diff --git a/drivers/gpu/nvgpu/common/utils/worker.c b/drivers/gpu/nvgpu/common/utils/worker.c new file mode 100644 index 000000000..df3eef734 --- /dev/null +++ b/drivers/gpu/nvgpu/common/utils/worker.c @@ -0,0 +1,298 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +static void nvgpu_worker_pre_process(struct nvgpu_worker *worker) +{ + if (worker->ops->pre_process != NULL) { + worker->ops->pre_process(worker); + } +} + +static bool nvgpu_worker_wakeup_condition(struct nvgpu_worker *worker) +{ + bool ret = false; + + if (worker->ops->wakeup_condition != NULL) { + ret = worker->ops->wakeup_condition(worker); + } + return ret; +} + +static u32 nvgpu_worker_wakeup_timeout( + struct nvgpu_worker *worker) +{ + u32 timeout = 0U; + + if (worker->ops->wakeup_timeout != NULL) { + timeout = worker->ops->wakeup_timeout( + worker); + } + return timeout; +} + +static bool nvgpu_worker_wakeup_early_exit(struct nvgpu_worker *worker) +{ + bool ret = false; + + if (worker->ops->wakeup_early_exit != NULL) { + ret = worker->ops->wakeup_early_exit(worker); + } + return ret; +} + +static void nvgpu_worker_wakeup_process_item(struct nvgpu_worker *worker, + struct nvgpu_list_node *work_item) +{ + nvgpu_assert(worker->ops->wakeup_process_item != NULL); + + worker->ops->wakeup_process_item(work_item); +} + +static void nvgpu_worker_wakeup_post_process( + struct nvgpu_worker *worker) +{ + if (worker->ops->wakeup_post_process != NULL) { + worker->ops->wakeup_post_process(worker); + } +} + +/** + * Tell the worker that potentially more work needs to be done. + * + * Increase the work counter to synchronize the worker with the new work. Wake + * up the worker. If the worker was already running, it will handle this work + * before going to sleep. + */ +static int nvgpu_worker_wakeup(struct nvgpu_worker *worker) +{ + int put; + struct gk20a *g = worker->g; + + nvgpu_log_fn(g, " "); + + put = nvgpu_atomic_inc_return(&worker->put); + nvgpu_cond_signal_interruptible(&worker->wq); + + return put; +} + +static bool nvgpu_worker_pending(struct nvgpu_worker *worker, int get) +{ + bool pending = nvgpu_atomic_read(&worker->put) != get; + + /* We don't need barriers because they are implicit in locking */ + return pending; +} + +/** + * Process the queued works for the worker thread serially. + * + * Flush all the work items in the queue one by one. This may block timeout + * handling for a short while, as these are serialized. + */ +static void nvgpu_worker_process(struct nvgpu_worker *worker, int *get) +{ + struct gk20a *g = worker->g; + + while (nvgpu_worker_pending(worker, *get)) { + struct nvgpu_list_node *work_item = NULL; + + nvgpu_spinlock_acquire(&worker->items_lock); + if (!nvgpu_list_empty(&worker->items)) { + work_item = worker->items.next; + nvgpu_list_del(work_item); + } + nvgpu_spinlock_release(&worker->items_lock); + + if (work_item == NULL) { + /* + * Woke up for some other reason, but there are no + * other reasons than a work item added in the items + * list currently, so warn and ack the message. + */ + nvgpu_info(g, "Spurious worker event!"); + ++*get; + break; + } + + nvgpu_worker_wakeup_process_item(worker, work_item); + ++*get; + } +} + +/* + * Process all work items found in the work queue. + */ +static int nvgpu_worker_poll_work(void *arg) +{ + struct nvgpu_worker *worker = (struct nvgpu_worker *)arg; + int get = 0; + + nvgpu_worker_pre_process(worker); + + while (!nvgpu_thread_should_stop(&worker->poll_task)) { + int ret; + + ret = NVGPU_COND_WAIT_INTERRUPTIBLE( + &worker->wq, + nvgpu_worker_pending(worker, get) || + nvgpu_worker_wakeup_condition(worker), + nvgpu_worker_wakeup_timeout(worker)); + + if (nvgpu_worker_wakeup_early_exit(worker)) { + break; + } + + if (ret == 0) { + nvgpu_worker_process(worker, &get); + } + + nvgpu_worker_wakeup_post_process(worker); + } + return 0; +} + +static int nvgpu_worker_start(struct nvgpu_worker *worker) +{ + int err = 0; + + if (nvgpu_thread_is_running(&worker->poll_task)) { + return err; + } + + nvgpu_mutex_acquire(&worker->start_lock); + + /* + * Mutexes have implicit barriers, so there is no risk of a thread + * having a stale copy of the poll_task variable as the call to + * thread_is_running is volatile + */ + + if (nvgpu_thread_is_running(&worker->poll_task)) { + nvgpu_mutex_release(&worker->start_lock); + return err; + } + + err = nvgpu_thread_create(&worker->poll_task, worker, + nvgpu_worker_poll_work, worker->thread_name); + + nvgpu_mutex_release(&worker->start_lock); + return err; +} + +bool nvgpu_worker_should_stop(struct nvgpu_worker *worker) +{ + return nvgpu_thread_should_stop(&worker->poll_task); +} + +int nvgpu_worker_enqueue(struct nvgpu_worker *worker, + struct nvgpu_list_node *work_item) +{ + int err; + struct gk20a *g = worker->g; + + /* + * Warn if worker thread cannot run + */ + err = nvgpu_worker_start(worker); + if (err != 0) { + nvgpu_do_assert_print(g, "nvgpu_worker %s cannot run!", + worker->thread_name); + return -1; + } + + nvgpu_spinlock_acquire(&worker->items_lock); + if (!nvgpu_list_empty(work_item)) { + /* + * Already queued, so will get processed eventually. + * The worker is probably awake already. + */ + nvgpu_spinlock_release(&worker->items_lock); + return -1; + } + nvgpu_list_add_tail(work_item, &worker->items); + nvgpu_spinlock_release(&worker->items_lock); + + (void) nvgpu_worker_wakeup(worker); + + return 0; +} + +void nvgpu_worker_init_name(struct nvgpu_worker *worker, + const char* worker_name, const char *gpu_name) +{ + /* + * Maximum character size of worker thread name + * Note: 1 is subtracted to account for null character + */ + size_t worker_name_size = sizeof(worker->thread_name) - 1U; + + /* Number of characters that can be used for thread name */ + size_t num_free_chars = worker_name_size; + + /* Terminate thread name with NULL character */ + worker->thread_name[0] = '\0'; + + (void) strncat(worker->thread_name, worker_name, num_free_chars); + + num_free_chars = worker_name_size - strlen(worker->thread_name); + + (void) strncat(worker->thread_name, "_", num_free_chars); + + num_free_chars = worker_name_size - strlen(worker->thread_name); + + (void) strncat(worker->thread_name, gpu_name, num_free_chars); +} + +int nvgpu_worker_init(struct gk20a *g, struct nvgpu_worker *worker, + const struct nvgpu_worker_ops *ops) +{ + int err; + + worker->g = g; + nvgpu_atomic_set(&worker->put, 0); + (void) nvgpu_cond_init(&worker->wq); + nvgpu_init_list_node(&worker->items); + nvgpu_spinlock_init(&worker->items_lock); + nvgpu_mutex_init(&worker->start_lock); + + worker->ops = ops; + + err = nvgpu_worker_start(worker); + if (err != 0) { + nvgpu_err(g, "failed to start worker poller thread %s", + worker->thread_name); + return err; + } + return 0; +} + +void nvgpu_worker_deinit(struct nvgpu_worker *worker) +{ + nvgpu_mutex_acquire(&worker->start_lock); + nvgpu_thread_stop(&worker->poll_task); + nvgpu_mutex_release(&worker->start_lock); +} diff --git a/drivers/gpu/nvgpu/common/vbios/bios.c b/drivers/gpu/nvgpu/common/vbios/bios.c new file mode 100644 index 000000000..937d74722 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vbios/bios.c @@ -0,0 +1,882 @@ +/* + * Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "bios_sw_gv100.h" +#include "bios_sw_tu104.h" + +#if defined(CONFIG_NVGPU_NEXT) && defined(CONFIG_NVGPU_NON_FUSA) +#include "nvgpu_next_gpuid.h" +#endif + +static void nvgpu_bios_parse_bit(struct gk20a *g, u32 offset); + +int nvgpu_bios_devinit(struct gk20a *g, + struct nvgpu_bios *bios) +{ + if (bios == NULL) { + return 0; + } + + if (bios->devinit_bios != NULL) { + return bios->devinit_bios(g); + } else { + return 0; + } +} + +int nvgpu_bios_preos_wait_for_halt(struct gk20a *g, + struct nvgpu_bios *bios) +{ + if (bios == NULL) { + return 0; + } + + if (bios->preos_wait_for_halt != NULL) { + return bios->preos_wait_for_halt(g); + } else { + return 0; + } +} + +bool nvgpu_bios_check_dgpu(struct gk20a *g, u32 ver) +{ + bool is_supported; + + switch (ver) { + + case NVGPU_GPUID_GV100: + case NVGPU_GPUID_TU104: +#if defined(CONFIG_NVGPU_NEXT) && defined(CONFIG_NVGPU_NON_FUSA) + case NVGPU_NEXT_DGPU_GPUID: +#endif + is_supported = true; + break; + + default: + is_supported = false; + break; + } + + return is_supported; +} + +u32 nvgpu_bios_get_vbios_version(struct gk20a *g) +{ + u32 ver = nvgpu_safe_add_u32(g->params.gpu_arch, g->params.gpu_impl); + u32 vbios_version; + + switch (ver) { + + case NVGPU_GPUID_GV100: + case NVGPU_GPUID_TU104: + if (nvgpu_platform_is_silicon(g)) { + vbios_version = g->bios->vbios_version; + } else { + vbios_version = 0; + } + break; + + default: + vbios_version = 0; + break; + } + + return vbios_version; +} + +u8 nvgpu_bios_get_vbios_oem_version(struct gk20a *g) +{ + u32 ver = nvgpu_safe_add_u32(g->params.gpu_arch, g->params.gpu_impl); + u8 vbios_oem_version; + + switch (ver) { + + case NVGPU_GPUID_GV100: + case NVGPU_GPUID_TU104: + if (nvgpu_platform_is_silicon(g)) { + vbios_oem_version = g->bios->vbios_oem_version; + } else { + vbios_oem_version = 0; + } + break; + + default: + vbios_oem_version = 0; + break; + } + + return vbios_oem_version; +} + +struct bit_token *nvgpu_bios_get_bit_token(struct gk20a *g, u8 token_id) +{ + struct bit_token *token = NULL; + + switch (token_id) { + case NVGPU_BIOS_CLOCK_TOKEN: + token = g->bios->clock_token; + break; + + case NVGPU_BIOS_PERF_TOKEN: + token = g->bios->perf_token; + break; + + case NVGPU_BIOS_VIRT_TOKEN: + token = g->bios->virt_token; + break; + + default: + token = NULL; + break; + } + return token; +} + +int nvgpu_bios_sw_init(struct gk20a *g) +{ + u32 ver = nvgpu_safe_add_u32(g->params.gpu_arch, g->params.gpu_impl); + int err = 0; + +#ifdef CONFIG_NVGPU_SIM + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { + goto done; + } +#endif + + if (nvgpu_bios_check_dgpu(g, ver) == false) { + goto done; + } + + if (g->bios != NULL) { + /* skip alloc/reinit for unrailgate sequence */ + nvgpu_pmu_dbg(g, "skip bios init for unrailgate sequence"); + goto done; + } + + g->bios = (struct nvgpu_bios *) + nvgpu_kzalloc(g, sizeof(struct nvgpu_bios)); + if (g->bios == NULL) { + err = -ENOMEM; + goto done; + } + + switch (ver) { +#ifdef CONFIG_NVGPU_DGPU + case NVGPU_GPUID_GV100: + nvgpu_gv100_bios_sw_init(g, g->bios); + break; + + case NVGPU_GPUID_TU104: + nvgpu_tu104_bios_sw_init(g, g->bios); + break; + +#if defined(CONFIG_NVGPU_NEXT) && defined(CONFIG_NVGPU_NON_FUSA) + case NVGPU_NEXT_DGPU_GPUID: + /* + * TODO + * After IFR region removal from bios image this can + * be replaced with nvgpu_tu104_bios_sw_init. + */ + err = tu104_bios_verify_devinit(g); + break; +#endif +#endif + default: + goto clean_bios; + } + + if ((g->bios)->init != NULL) { + err = g->bios->init(g); + if (err != 0) { + nvgpu_falcon_sw_free(g, FALCON_ID_FECS); + goto clean_bios; + } + } +done: + return err; + +clean_bios: + nvgpu_kfree(g, g->bios); + g->bios = NULL; + return err; + +} + +void nvgpu_bios_sw_deinit(struct gk20a *g, struct nvgpu_bios *bios) +{ + if (bios == NULL) { + return; + } else { + nvgpu_kfree(g, bios); + } +} + +static u16 nvgpu_bios_rdu16(struct gk20a *g, u32 offset) +{ + u32 val; + + nvgpu_assert(offset < U32_MAX); + /* + * cast up to u32 because cast to u16 promotes into an int and + * causes a CERT-C INT31 violation + */ + val = nvgpu_safe_add_u32(U32(g->bios->data[offset+1U]) << U32(8), + U32(g->bios->data[offset])); + return nvgpu_safe_cast_u32_to_u16(val); +} + +static u32 nvgpu_bios_rdu32(struct gk20a *g, u32 offset) +{ + u32 val; + + nvgpu_assert(offset <= (U32_MAX - 3U)); + val = nvgpu_safe_add_u32(U32(g->bios->data[offset+3U]) << U32(24), + U32(g->bios->data[offset+2U]) << U32(16)); + val = nvgpu_safe_add_u32(val, U32(g->bios->data[offset+1U]) << U32(8)); + val = nvgpu_safe_add_u32(val, U32(g->bios->data[offset])); + return val; +} + +int nvgpu_bios_parse_rom(struct gk20a *g) +{ + u32 offset = 0; + u8 last = 0; + bool found = false; + unsigned int i; + + while (last == 0U) { + struct pci_exp_rom pci_rom; + struct pci_data_struct pci_data; + struct pci_ext_data_struct pci_ext_data; + + nvgpu_memcpy((u8 *)&pci_rom, (u8 *)(g->bios->data + offset), + sizeof(struct pci_exp_rom)); + nvgpu_log_fn(g, "pci rom sig %04x ptr %04x block %x", + pci_rom.sig, pci_rom.pci_data_struct_ptr, + pci_rom.size_of_block); + + if (pci_rom.sig != PCI_EXP_ROM_SIG && + pci_rom.sig != PCI_EXP_ROM_SIG_NV) { + nvgpu_err(g, "invalid VBIOS signature"); + return -EINVAL; + } + + nvgpu_memcpy((u8 *)&pci_data, (u8 *)(g->bios->data + offset + + pci_rom.pci_data_struct_ptr), + sizeof(struct pci_data_struct)); + nvgpu_log_fn(g, "pci data sig %08x len %d image len %x type %x last %d max %08x", + pci_data.sig, pci_data.pci_data_struct_len, + pci_data.image_len, pci_data.code_type, + pci_data.last_image, + pci_data.max_runtime_image_len); + + /* Get Base ROM Size */ + if (pci_data.code_type == + PCI_DATA_STRUCTURE_CODE_TYPE_VBIOS_BASE) { + g->bios->base_rom_size = nvgpu_safe_mult_u32( + (u32)pci_data.image_len, + PCI_ROM_IMAGE_BLOCK_SIZE); + nvgpu_log_fn(g, "Base ROM Size: %x", + g->bios->base_rom_size); + } + + /* Get Expansion ROM offset: + * In the UEFI case, the expansion ROM where the Perf tables + * are located is not necessarily immediately after the base + * VBIOS image. Some VBIOS images uses a "private image" layout, + * where the order of the images is the VBIOS base block, + * the UEFI ROM, the expansion ROM, and then the cert. So we + * need to add the UEFI ROM size to offsets within the + * expansion ROM. + */ + if (pci_data.code_type == + PCI_DATA_STRUCTURE_CODE_TYPE_VBIOS_UEFI) { + u32 ext_offset; + + ext_offset = nvgpu_safe_add_u32(offset, + pci_rom.pci_data_struct_ptr); + ext_offset = nvgpu_safe_add_u32(ext_offset, + pci_data.pci_data_struct_len); + ext_offset = nvgpu_safe_add_u32(ext_offset, + 0xfU) & ~0xfU; + nvgpu_memcpy((u8 *)&pci_ext_data, (u8 *)(g->bios->data + + ext_offset), + sizeof(struct pci_ext_data_struct)); + nvgpu_log_fn(g, "pci ext data sig %08x rev %x len %x sub_image_len %x priv_last %d flags %x", + pci_ext_data.sig, + pci_ext_data.nv_pci_data_ext_rev, + pci_ext_data.nv_pci_data_ext_len, + pci_ext_data.sub_image_len, + pci_ext_data.priv_last_image, + pci_ext_data.flags); + + nvgpu_log_fn(g, "expansion rom offset %x", + nvgpu_safe_mult_u32((u32)pci_data.image_len, + PCI_ROM_IMAGE_BLOCK_SIZE)); + g->bios->expansion_rom_offset = nvgpu_safe_mult_u32( + (u32)pci_data.image_len, + PCI_ROM_IMAGE_BLOCK_SIZE); + offset = nvgpu_safe_add_u32(offset, + nvgpu_safe_mult_u32( + (u32)pci_ext_data.sub_image_len, + PCI_ROM_IMAGE_BLOCK_SIZE)); + last = pci_ext_data.priv_last_image; + } else { + offset = nvgpu_safe_add_u32(offset, + nvgpu_safe_mult_u32( + (u32)pci_data.image_len, + PCI_ROM_IMAGE_BLOCK_SIZE)); + last = pci_data.last_image; + } + } + + nvgpu_log_info(g, "read bios"); + for (i = 0; i < nvgpu_safe_sub_u64(g->bios->size, 6ULL); i++) { + if (nvgpu_bios_rdu16(g, i) == BIT_HEADER_ID && + nvgpu_bios_rdu32(g, i+2U) == BIT_HEADER_SIGNATURE) { + nvgpu_bios_parse_bit(g, i); + found = true; + } + } + + if (!found) { + return -EINVAL; + } else { + return 0; + } +} + +static void nvgpu_bios_parse_biosdata(struct gk20a *g, u32 offset) +{ + struct biosdata bios_data; + + nvgpu_memcpy((u8 *)&bios_data, &g->bios->data[offset], + sizeof(bios_data)); + nvgpu_log_fn(g, "bios version %x, oem version %x", + bios_data.version, + bios_data.oem_version); + + g->bios->vbios_version = bios_data.version; + g->bios->vbios_oem_version = bios_data.oem_version; +} + +static void nvgpu_bios_parse_nvinit_ptrs(struct gk20a *g, u32 offset) +{ + struct nvinit_ptrs init_ptrs; + + nvgpu_memcpy((u8 *)&init_ptrs, &g->bios->data[offset], + sizeof(init_ptrs)); + nvgpu_log_fn(g, "devinit ptr %x size %d", init_ptrs.devinit_tables_ptr, + init_ptrs.devinit_tables_size); + nvgpu_log_fn(g, "bootscripts ptr %x size %d", init_ptrs.bootscripts_ptr, + init_ptrs.bootscripts_size); + + g->bios->devinit_tables = &g->bios->data[init_ptrs.devinit_tables_ptr]; + g->bios->devinit_tables_size = init_ptrs.devinit_tables_size; + g->bios->bootscripts = &g->bios->data[init_ptrs.bootscripts_ptr]; + g->bios->bootscripts_size = init_ptrs.bootscripts_size; + g->bios->condition_table_ptr = init_ptrs.condition_table_ptr; + g->bios->nvlink_config_data_offset = init_ptrs.nvlink_config_data_ptr; +} +static void nvgpu_bios_parse_memory_ptrs(struct gk20a *g, u16 offset, u8 version) +{ + struct memory_ptrs_v1 v1; + struct memory_ptrs_v2 v2; + + switch (version) { + case MEMORY_PTRS_V1: + nvgpu_memcpy((u8 *)&v1, &g->bios->data[offset], sizeof(v1)); + g->bios->mem_strap_data_count = v1.mem_strap_data_count; + g->bios->mem_strap_xlat_tbl_ptr = v1.mem_strap_xlat_tbl_ptr; + break; + case MEMORY_PTRS_V2: + nvgpu_memcpy((u8 *)&v2, &g->bios->data[offset], sizeof(v2)); + g->bios->mem_strap_data_count = v2.mem_strap_data_count; + g->bios->mem_strap_xlat_tbl_ptr = v2.mem_strap_xlat_tbl_ptr; + break; + default: + nvgpu_err(g, "unknown vbios memory table version %x", version); + break; + } + + return; +} + +static void nvgpu_bios_parse_bios_board_id_ptrs(struct gk20a *g, u16 offset) +{ + struct bios_board_id board; + + nvgpu_memcpy((u8 *)&board, &g->bios->data[offset], sizeof(board)); + g->bios->vbios_board_id = board.board_id; +} + +static void nvgpu_bios_parse_devinit_appinfo(struct gk20a *g, u32 dmem_offset) +{ + struct devinit_engine_interface interface; + + nvgpu_memcpy((u8 *)&interface, &g->bios->devinit.dmem[dmem_offset], + sizeof(interface)); + nvgpu_log_fn(g, "devinit version %x tables phys %x script phys %x size %d", + interface.version, + interface.tables_phys_base, + interface.script_phys_base, + interface.script_size); + + if (interface.version != 1U) { + return; + } + g->bios->devinit_tables_phys_base = interface.tables_phys_base; + g->bios->devinit_script_phys_base = interface.script_phys_base; +} + +static int nvgpu_bios_parse_appinfo_table(struct gk20a *g, u32 offset) +{ + struct application_interface_table_hdr_v1 hdr; + u32 i; + + nvgpu_memcpy((u8 *)&hdr, &g->bios->data[offset], sizeof(hdr)); + + nvgpu_log_fn(g, "appInfoHdr ver %d size %d entrySize %d entryCount %d", + hdr.version, hdr.header_size, + hdr.entry_size, hdr.entry_count); + + if (hdr.version != 1U) { + return 0; + } + + offset = nvgpu_safe_add_u32(offset, + nvgpu_safe_cast_u64_to_u32(sizeof(hdr))); + for (i = 0U; i < hdr.entry_count; i++) { + struct application_interface_entry_v1 entry; + + nvgpu_memcpy((u8 *)&entry, &g->bios->data[offset], + sizeof(entry)); + + nvgpu_log_fn(g, "appInfo id %d dmem_offset %d", + entry.id, entry.dmem_offset); + + if (entry.id == APPINFO_ID_DEVINIT) { + nvgpu_bios_parse_devinit_appinfo(g, entry.dmem_offset); + } + + offset = nvgpu_safe_add_u32(offset, hdr.entry_size); + } + + return 0; +} + +static int nvgpu_bios_parse_falcon_ucode_desc(struct gk20a *g, + struct nvgpu_bios_ucode *ucode, u32 offset) +{ + union falcon_ucode_desc udesc; + struct falcon_ucode_desc_v2 desc; + u8 version; + u16 desc_size; + int ret = 0; + + nvgpu_memcpy((u8 *)&udesc, &g->bios->data[offset], sizeof(udesc)); + + if (FALCON_UCODE_IS_VERSION_AVAILABLE(udesc)) { + version = FALCON_UCODE_GET_VERSION(udesc); + desc_size = FALCON_UCODE_GET_DESC_SIZE(udesc); + } else { + size_t tmp_size = sizeof(udesc.v1); + + version = 1; + nvgpu_assert(tmp_size <= (size_t)U16_MAX); + desc_size = U16(tmp_size); + } + + switch (version) { + case 1: + desc.stored_size = udesc.v1.hdr_size.stored_size; + desc.uncompressed_size = udesc.v1.uncompressed_size; + desc.virtual_entry = udesc.v1.virtual_entry; + desc.interface_offset = udesc.v1.interface_offset; + desc.imem_phys_base = udesc.v1.imem_phys_base; + desc.imem_load_size = udesc.v1.imem_load_size; + desc.imem_virt_base = udesc.v1.imem_virt_base; + desc.imem_sec_base = udesc.v1.imem_sec_base; + desc.imem_sec_size = udesc.v1.imem_sec_size; + desc.dmem_offset = udesc.v1.dmem_offset; + desc.dmem_phys_base = udesc.v1.dmem_phys_base; + desc.dmem_load_size = udesc.v1.dmem_load_size; + break; + case 2: + nvgpu_memcpy((u8 *)&desc, (u8 *)&udesc, sizeof(udesc.v2)); + break; + default: + nvgpu_log_info(g, "invalid version"); + ret = -EINVAL; + break; + } + if (ret != 0) { + return ret; + } + + nvgpu_log_info(g, "falcon ucode desc version %x len %x", version, desc_size); + + nvgpu_log_info(g, "falcon ucode desc stored size %x uncompressed size %x", + desc.stored_size, desc.uncompressed_size); + nvgpu_log_info(g, "falcon ucode desc virtualEntry %x, interfaceOffset %x", + desc.virtual_entry, desc.interface_offset); + nvgpu_log_info(g, "falcon ucode IMEM phys base %x, load size %x virt base %x sec base %x sec size %x", + desc.imem_phys_base, desc.imem_load_size, + desc.imem_virt_base, desc.imem_sec_base, + desc.imem_sec_size); + nvgpu_log_info(g, "falcon ucode DMEM offset %x phys base %x, load size %x", + desc.dmem_offset, desc.dmem_phys_base, + desc.dmem_load_size); + + if (desc.stored_size != desc.uncompressed_size) { + nvgpu_log_info(g, "does not match"); + return -EINVAL; + } + + ucode->code_entry_point = desc.virtual_entry; + ucode->bootloader = &g->bios->data[offset] + desc_size; + ucode->bootloader_phys_base = desc.imem_phys_base; + ucode->bootloader_size = nvgpu_safe_sub_u32(desc.imem_load_size, + desc.imem_sec_size); + ucode->ucode = ucode->bootloader + ucode->bootloader_size; + ucode->phys_base = nvgpu_safe_add_u32(ucode->bootloader_phys_base, + ucode->bootloader_size); + ucode->size = desc.imem_sec_size; + ucode->dmem = ucode->bootloader + desc.dmem_offset; + ucode->dmem_phys_base = desc.dmem_phys_base; + ucode->dmem_size = desc.dmem_load_size; + + offset = nvgpu_safe_add_u32(offset, U32(desc_size)); + offset = nvgpu_safe_add_u32(offset, desc.dmem_offset); + offset = nvgpu_safe_add_u32(offset, desc.interface_offset); + ret = nvgpu_bios_parse_appinfo_table(g, offset); + + return ret; +} + +static int nvgpu_bios_parse_falcon_ucode_table(struct gk20a *g, u32 offset) +{ + struct falcon_ucode_table_hdr_v1 hdr; + u32 i; + + nvgpu_memcpy((u8 *)&hdr, &g->bios->data[offset], sizeof(hdr)); + nvgpu_log_fn(g, "falcon ucode table ver %d size %d entrySize %d entryCount %d descVer %d descSize %d", + hdr.version, hdr.header_size, + hdr.entry_size, hdr.entry_count, + hdr.desc_version, hdr.desc_size); + + if (hdr.version != 1U) { + return -EINVAL; + } + + offset = nvgpu_safe_add_u32(offset, hdr.header_size); + + for (i = 0U; i < hdr.entry_count; i++) { + struct falcon_ucode_table_entry_v1 entry; + + nvgpu_memcpy((u8 *)&entry, &g->bios->data[offset], + sizeof(entry)); + + nvgpu_log_fn(g, "falcon ucode table entry appid %x targetId %x descPtr %x", + entry.application_id, entry.target_id, + entry.desc_ptr); + + if (entry.target_id == TARGET_ID_PMU && + entry.application_id == APPLICATION_ID_DEVINIT) { + int err; + + err = nvgpu_bios_parse_falcon_ucode_desc(g, + &g->bios->devinit, entry.desc_ptr); + if (err != 0) { + err = nvgpu_bios_parse_falcon_ucode_desc(g, + &g->bios->devinit, + nvgpu_safe_add_u32(entry.desc_ptr, + g->bios->expansion_rom_offset)); + } + + if (err != 0) { + nvgpu_err(g, + "could not parse devinit ucode desc"); + } + } else if (entry.target_id == TARGET_ID_PMU && + entry.application_id == APPLICATION_ID_PRE_OS) { + int err; + + err = nvgpu_bios_parse_falcon_ucode_desc(g, + &g->bios->preos, entry.desc_ptr); + if (err != 0) { + err = nvgpu_bios_parse_falcon_ucode_desc(g, + &g->bios->preos, + nvgpu_safe_add_u32(entry.desc_ptr, + g->bios->expansion_rom_offset)); + } + + if (err != 0) { + nvgpu_err(g, + "could not parse preos ucode desc"); + } + } else { + nvgpu_log_info(g, "App_id: %u and target_id: %u" + " combination not supported.", + entry.application_id, + entry.target_id); + } + + offset = nvgpu_safe_add_u32(offset, hdr.entry_size); + } + + return 0; +} + +static void nvgpu_bios_parse_falcon_data_v2(struct gk20a *g, u32 offset) +{ + struct falcon_data_v2 falcon_data; + int err; + + nvgpu_memcpy((u8 *)&falcon_data, &g->bios->data[offset], + sizeof(falcon_data)); + nvgpu_log_fn(g, "falcon ucode table ptr %x", + falcon_data.falcon_ucode_table_ptr); + err = nvgpu_bios_parse_falcon_ucode_table(g, + falcon_data.falcon_ucode_table_ptr); + if (err != 0) { + err = nvgpu_bios_parse_falcon_ucode_table(g, + nvgpu_safe_add_u32(falcon_data.falcon_ucode_table_ptr, + g->bios->expansion_rom_offset)); + } + + if (err != 0) { + nvgpu_err(g, "could not parse falcon ucode table"); + } +} + +void *nvgpu_bios_get_perf_table_ptrs(struct gk20a *g, + struct bit_token *ptoken, u8 table_id) +{ + u32 perf_table_id_offset = 0; + u8 *perf_table_ptr = NULL; + u8 data_size = 4; + + if (ptoken != NULL) { + + if (ptoken->token_id == TOKEN_ID_VIRT_PTRS) { + perf_table_id_offset = + *((u16 *)((uintptr_t)g->bios->data + + ptoken->data_ptr + + (U16(table_id) * + U16(PERF_PTRS_WIDTH_16)))); + data_size = PERF_PTRS_WIDTH_16; + } else { + perf_table_id_offset = + *((u32 *)((uintptr_t)g->bios->data + + ptoken->data_ptr + + (U16(table_id) * + U16(PERF_PTRS_WIDTH)))); + data_size = PERF_PTRS_WIDTH; + } + } else { + return (void *)perf_table_ptr; + } + + if (table_id < (ptoken->data_size/data_size)) { + + nvgpu_log_info(g, "Perf_Tbl_ID-offset 0x%x Tbl_ID_Ptr-offset- 0x%x", + (ptoken->data_ptr + + (U16(table_id) * U16(data_size))), + perf_table_id_offset); + + if (perf_table_id_offset != 0U) { + /* check if perf_table_id_offset is beyond base rom */ + if (perf_table_id_offset > g->bios->base_rom_size) { + u32 tmp_index = nvgpu_safe_add_u32( + g->bios->expansion_rom_offset, + perf_table_id_offset); + perf_table_ptr = &g->bios->data[tmp_index]; + } else { + perf_table_ptr = + &g->bios->data[perf_table_id_offset]; + } + } else { + nvgpu_warn(g, "PERF TABLE ID %d is NULL", + table_id); + } + } else { + nvgpu_warn(g, "INVALID PERF TABLE ID - %d ", table_id); + } + + return (void *)perf_table_ptr; +} + +static void nvgpu_bios_parse_bit(struct gk20a *g, u32 offset) +{ + struct bios_bit bit; + struct bit_token token; + u32 i; + + nvgpu_log_fn(g, " "); + nvgpu_memcpy((u8 *)&bit, &g->bios->data[offset], sizeof(bit)); + + nvgpu_log_info(g, "BIT header: %04x %08x", bit.id, bit.signature); + nvgpu_log_info(g, "tokens: %d entries * %d bytes", + bit.token_entries, bit.token_size); + + offset = nvgpu_safe_add_u32(offset, bit.header_size); + for (i = 0U; i < bit.token_entries; i++) { + nvgpu_memcpy((u8 *)&token, &g->bios->data[offset], + sizeof(token)); + + nvgpu_log_info(g, "BIT token id %d ptr %d size %d ver %d", + token.token_id, token.data_ptr, + token.data_size, token.data_version); + + switch (token.token_id) { + case TOKEN_ID_BIOSDATA: + nvgpu_bios_parse_biosdata(g, token.data_ptr); + break; + case TOKEN_ID_NVINIT_PTRS: + nvgpu_bios_parse_nvinit_ptrs(g, token.data_ptr); + break; + case TOKEN_ID_FALCON_DATA: + if (token.data_version == 2U) { + nvgpu_bios_parse_falcon_data_v2(g, + token.data_ptr); + } + break; + case TOKEN_ID_PERF_PTRS: + g->bios->perf_token = + (struct bit_token *) + ((uintptr_t)g->bios->data + offset); + break; + case TOKEN_ID_CLOCK_PTRS: + g->bios->clock_token = + (struct bit_token *) + ((uintptr_t)g->bios->data + offset); + break; + case TOKEN_ID_VIRT_PTRS: + g->bios->virt_token = + (struct bit_token *) + ((uintptr_t)g->bios->data + offset); + break; + case TOKEN_ID_MEMORY_PTRS: + nvgpu_bios_parse_memory_ptrs(g, token.data_ptr, + token.data_version); + break; + case TOKEN_ID_BIOS_BOARD_ID_PTRS: + nvgpu_bios_parse_bios_board_id_ptrs(g, + token.data_ptr); + break; + default: + nvgpu_log_info(g, "Token id %d not supported", + token.token_id); + break; + } + + offset = nvgpu_safe_add_u32(offset, bit.token_size); + } + nvgpu_log_fn(g, "done"); +} + +static u32 nvgpu_bios_readbyte_impl(struct gk20a *g, u32 offset) +{ + return g->bios->data[offset]; +} + +u8 nvgpu_bios_read_u8(struct gk20a *g, u32 offset) +{ + u32 val = nvgpu_bios_readbyte_impl(g, offset); + + return nvgpu_safe_cast_u32_to_u8(val); +} + +s8 nvgpu_bios_read_s8(struct gk20a *g, u32 offset) +{ + u32 val; + + val = nvgpu_bios_readbyte_impl(g, offset); + val = ((val & 0x80U) != 0U) ? (val | ~0xffU) : val; + + return nvgpu_safe_cast_u32_to_s8(val); +} + +u16 nvgpu_bios_read_u16(struct gk20a *g, u32 offset) +{ + u32 val; + + nvgpu_assert(offset < U32_MAX); + /* + * cast up to u32 because cast to u16 promotes into an int and + * causes a CERT-C INT31 violation + */ + val = U32(nvgpu_bios_readbyte_impl(g, offset) | + (U32(nvgpu_bios_readbyte_impl(g, offset+1U)) << 8U)); + + return nvgpu_safe_cast_u32_to_u16(val); +} + +u32 nvgpu_bios_read_u32(struct gk20a *g, u32 offset) +{ + u32 val; + + nvgpu_assert(offset < (U32_MAX - 3U)); + val = U32(nvgpu_bios_readbyte_impl(g, offset) | + (nvgpu_bios_readbyte_impl(g, offset+1U) << 8U) | + (nvgpu_bios_readbyte_impl(g, offset+2U) << 16U) | + (nvgpu_bios_readbyte_impl(g, offset+3U) << 24U)); + + return val; +} + +#ifdef CONFIG_NVGPU_DGPU +bool nvgpu_bios_wait_for_init_done(struct gk20a *g) +{ + struct nvgpu_timeout timeout; + int err; + + err = nvgpu_timeout_init(g, &timeout, + NVGPU_BIOS_DEVINIT_VERIFY_TIMEOUT_MS, NVGPU_TIMER_CPU_TIMER); + if (err != 0) { + return false; + } + + /* Wait till vbios is completed */ + do { + if (g->bios_is_init == true) { + return true; + } + nvgpu_msleep(NVGPU_BIOS_DEVINIT_VERIFY_COMPLETION_MS); + } while (nvgpu_timeout_expired(&timeout) == 0); + + if (g->bios_is_init == true) { + return true; + } else { + return false; + } +} +#endif diff --git a/drivers/gpu/nvgpu/common/vbios/bios_sw_gv100.c b/drivers/gpu/nvgpu/common/vbios/bios_sw_gv100.c new file mode 100644 index 000000000..ad6371f09 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vbios/bios_sw_gv100.c @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "bios_sw_gv100.h" + +#define BIOS_SIZE 0x90000 + +#define PMU_BOOT_TIMEOUT_DEFAULT 100U /* usec */ +#define PMU_BOOT_TIMEOUT_MAX 2000000U /* usec */ + +#define SCRATCH_PREOS_PROGRESS 6U +#define PREOS_PROGRESS_MASK(r) (((r) >> 12U) & 0xfU) +#define PREOS_PROGRESS_NOT_STARTED 0U +#define PREOS_PROGRESS_STARTED 1U +#define PREOS_PROGRESS_EXIT 2U +#define PREOS_PROGRESS_EXIT_SECUREMODE 3U +#define PREOS_PROGRESS_ABORTED 6U + +#define SCRATCH_PMU_EXIT_AND_HALT 1U +#define PMU_EXIT_AND_HALT_SET(r, v) (((r) & ~0x200U) | (v)) +#define PMU_EXIT_AND_HALT_YES BIT32(9) + +#define SCRATCH_PRE_OS_RELOAD 1U +#define PRE_OS_RELOAD_SET(r, v) (((r) & ~0x100U) | (v)) +#define PRE_OS_RELOAD_YES BIT32(8) + + +void gv100_bios_preos_reload_check(struct gk20a *g) +{ + u32 progress = g->ops.bus.read_sw_scratch(g, SCRATCH_PREOS_PROGRESS); + + if (PREOS_PROGRESS_MASK(progress) != PREOS_PROGRESS_NOT_STARTED) { + u32 reload = g->ops.bus.read_sw_scratch(g, + SCRATCH_PRE_OS_RELOAD); + + g->ops.bus.write_sw_scratch(g, SCRATCH_PRE_OS_RELOAD, + PRE_OS_RELOAD_SET(reload, PRE_OS_RELOAD_YES)); + } +} + +int gv100_bios_preos_wait_for_halt(struct gk20a *g) +{ + int err = -EINVAL; + u32 progress; + u32 tmp; + bool preos_completed; + struct nvgpu_timeout timeout; + + nvgpu_udelay(PMU_BOOT_TIMEOUT_DEFAULT); + + /* Check the progress */ + progress = g->ops.bus.read_sw_scratch(g, SCRATCH_PREOS_PROGRESS); + + if (PREOS_PROGRESS_MASK(progress) == PREOS_PROGRESS_STARTED) { + err = 0; + + /* Complete the handshake */ + tmp = g->ops.bus.read_sw_scratch(g, SCRATCH_PMU_EXIT_AND_HALT); + + g->ops.bus.write_sw_scratch(g, SCRATCH_PMU_EXIT_AND_HALT, + PMU_EXIT_AND_HALT_SET(tmp, PMU_EXIT_AND_HALT_YES)); + + err = nvgpu_timeout_init(g, &timeout, + PMU_BOOT_TIMEOUT_MAX / + PMU_BOOT_TIMEOUT_DEFAULT, + NVGPU_TIMER_RETRY_TIMER); + if (err != 0) { + nvgpu_err(g, "NVGPU timeout init failed"); + return err; + } + + do { + progress = g->ops.bus.read_sw_scratch(g, + SCRATCH_PREOS_PROGRESS); + preos_completed = g->ops.falcon.is_falcon_cpu_halted( + g->pmu->flcn) && + (PREOS_PROGRESS_MASK(progress) == + PREOS_PROGRESS_EXIT); + + nvgpu_udelay(PMU_BOOT_TIMEOUT_DEFAULT); + } while (!preos_completed && + (nvgpu_timeout_expired(&timeout) == 0)); + } + + return err; +} + +int gv100_bios_devinit(struct gk20a *g) +{ + int err = 0; + bool devinit_completed; + struct nvgpu_timeout timeout; + u32 top_scratch1_reg; + + nvgpu_log_fn(g, " "); + + if (nvgpu_falcon_reset(g->pmu->flcn) != 0) { + err = -ETIMEDOUT; + goto out; + } + + err = nvgpu_falcon_copy_to_imem(g->pmu->flcn, + g->bios->devinit.bootloader_phys_base, + g->bios->devinit.bootloader, + g->bios->devinit.bootloader_size, + 0, 0, g->bios->devinit.bootloader_phys_base >> 8); + if (err != 0) { + nvgpu_err(g, "bios devinit bootloader copy failed %d", err); + goto out; + } + + err = nvgpu_falcon_copy_to_imem(g->pmu->flcn, g->bios->devinit.phys_base, + g->bios->devinit.ucode, + g->bios->devinit.size, + 0, 1, g->bios->devinit.phys_base >> 8); + if (err != 0) { + nvgpu_err(g, "bios devinit ucode copy failed %d", err); + goto out; + } + + err = nvgpu_falcon_copy_to_dmem(g->pmu->flcn, + g->bios->devinit.dmem_phys_base, + g->bios->devinit.dmem, + g->bios->devinit.dmem_size, + 0); + if (err != 0) { + nvgpu_err(g, "bios devinit dmem copy failed %d", err); + goto out; + } + + err = nvgpu_falcon_copy_to_dmem(g->pmu->flcn, + g->bios->devinit_tables_phys_base, + g->bios->devinit_tables, + g->bios->devinit_tables_size, + 0); + if (err != 0) { + nvgpu_err(g, "fbios devinit tables copy failed %d", err); + goto out; + } + + err = nvgpu_falcon_copy_to_dmem(g->pmu->flcn, + g->bios->devinit_script_phys_base, + g->bios->bootscripts, + g->bios->bootscripts_size, + 0); + if (err != 0) { + nvgpu_err(g, "bios devinit bootscripts copy failed %d", err); + goto out; + } + + err = nvgpu_falcon_bootstrap(g->pmu->flcn, + g->bios->devinit.code_entry_point); + if (err != 0) { + nvgpu_err(g, "falcon bootstrap failed %d", err); + goto out; + } + + err = nvgpu_timeout_init(g, &timeout, + PMU_BOOT_TIMEOUT_MAX / + PMU_BOOT_TIMEOUT_DEFAULT, + NVGPU_TIMER_RETRY_TIMER); + if (err != 0) { + nvgpu_err(g, "nvgpu timeout init failed %d", err); + goto out; + } + do { + top_scratch1_reg = g->ops.top.read_top_scratch1_reg(g); + devinit_completed = ((g->ops.falcon.is_falcon_cpu_halted( + g->pmu->flcn) != 0U) && + (g->ops.top.top_scratch1_devinit_completed(g, + top_scratch1_reg)) != 0U); + + nvgpu_udelay(PMU_BOOT_TIMEOUT_DEFAULT); + } while (!devinit_completed && (nvgpu_timeout_expired(&timeout) == 0)); + + if (nvgpu_timeout_peek_expired(&timeout)) { + err = -ETIMEDOUT; + goto out; + } + + err = nvgpu_falcon_clear_halt_intr_status(g->pmu->flcn, + nvgpu_get_poll_timeout(g)); + if (err != 0) { + nvgpu_err(g, "falcon_clear_halt_intr_status failed %d", err); + goto out; + } + +out: + nvgpu_log_fn(g, "done"); + return err; +} + +int gv100_bios_init(struct gk20a *g) +{ + unsigned int i; + int err; + + nvgpu_log_fn(g, " "); + + if (g->bios_is_init) { + return 0; + } + + nvgpu_log_info(g, "reading bios from EEPROM"); + g->bios->size = BIOS_SIZE; + g->bios->data = nvgpu_vmalloc(g, BIOS_SIZE); + if (g->bios->data == NULL) { + return -ENOMEM; + } + + if (g->ops.xve.disable_shadow_rom != NULL) { + g->ops.xve.disable_shadow_rom(g); + } + + for (i = 0U; i < g->bios->size/4U; i++) { + u32 val = be32_to_cpu(gk20a_readl(g, 0x300000U + i*4U)); + + g->bios->data[(i*4U)] = (val >> 24U) & 0xffU; + g->bios->data[(i*4U)+1U] = (val >> 16U) & 0xffU; + g->bios->data[(i*4U)+2U] = (val >> 8U) & 0xffU; + g->bios->data[(i*4U)+3U] = val & 0xffU; + } + + if (g->ops.xve.enable_shadow_rom != NULL) { + g->ops.xve.enable_shadow_rom(g); + } + + err = nvgpu_bios_parse_rom(g); + if (err != 0) { + goto free_firmware; + } + + if (g->bios->verify_version != NULL) { + if (g->bios->verify_version(g) < 0) { + err = -EINVAL; + goto free_firmware; + } + } + + nvgpu_log_fn(g, "done"); + + err = nvgpu_bios_devinit(g, g->bios); + if (err != 0) { + nvgpu_err(g, "devinit failed"); + goto free_firmware; + } + + if (nvgpu_is_enabled(g, NVGPU_PMU_RUN_PREOS) && + (g->bios->preos_bios != NULL)) { + err = g->bios->preos_bios(g); + if (err != 0) { + nvgpu_err(g, "pre-os failed"); + goto free_firmware; + } + } + + if (g->bios->verify_devinit != NULL) { + err = g->bios->verify_devinit(g); + if (err != 0) { + nvgpu_err(g, "devinit status verification failed"); + goto free_firmware; + } + } + + g->bios_is_init = true; + + return 0; + +free_firmware: + if (g->bios->data != NULL) { + nvgpu_vfree(g, g->bios->data); + } + return err; +} + +int gv100_bios_preos(struct gk20a *g) +{ + int err = 0; + + nvgpu_log_fn(g, " "); + + if (nvgpu_falcon_reset(g->pmu->flcn) != 0) { + err = -ETIMEDOUT; + goto out; + } + + if (g->bios->preos_reload_check != NULL) { + g->bios->preos_reload_check(g); + } + + err = nvgpu_falcon_copy_to_imem(g->pmu->flcn, + g->bios->preos.bootloader_phys_base, + g->bios->preos.bootloader, + g->bios->preos.bootloader_size, + 0, 0, g->bios->preos.bootloader_phys_base >> 8); + + if (err != 0) { + nvgpu_err(g, "bios preos bootloader copy failed %d", err); + goto out; + } + + err = nvgpu_falcon_copy_to_imem(g->pmu->flcn, g->bios->preos.phys_base, + g->bios->preos.ucode, + g->bios->preos.size, + 0, 1, g->bios->preos.phys_base >> 8); + + if (err != 0) { + nvgpu_err(g, "bios preos ucode copy failed %d", err); + goto out; + } + + err = nvgpu_falcon_copy_to_dmem(g->pmu->flcn, g->bios->preos.dmem_phys_base, + g->bios->preos.dmem, + g->bios->preos.dmem_size, + 0); + + if (err != 0) { + nvgpu_err(g, "bios preos dmem copy failed %d", err); + goto out; + } + + err = nvgpu_falcon_bootstrap(g->pmu->flcn, + g->bios->preos.code_entry_point); + + if (err != 0) { + nvgpu_err(g, "falcon bootstrap failed %d", err); + goto out; + } + + err = nvgpu_bios_preos_wait_for_halt(g, g->bios); + if (err != 0) { + nvgpu_err(g, "preos_wait_for_halt failed %d", err); + goto out; + } + + err = nvgpu_falcon_clear_halt_intr_status(g->pmu->flcn, + nvgpu_get_poll_timeout(g)); + if (err != 0) { + nvgpu_err(g, "falcon_clear_halt_intr_status failed %d", err); + goto out; + } + +out: + nvgpu_log_fn(g, "done"); + return err; +} + +void nvgpu_gv100_bios_sw_init(struct gk20a *g, + struct nvgpu_bios *bios) +{ + bios->init = gv100_bios_init; + bios->verify_version = NULL; + bios->preos_wait_for_halt = gv100_bios_preos_wait_for_halt; + bios->preos_reload_check = gv100_bios_preos_reload_check; + bios->preos_bios = gv100_bios_preos; + bios->devinit_bios = gv100_bios_devinit; + bios->verify_devinit = NULL; +} diff --git a/drivers/gpu/nvgpu/common/vbios/bios_sw_gv100.h b/drivers/gpu/nvgpu/common/vbios/bios_sw_gv100.h new file mode 100644 index 000000000..aff9ee602 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vbios/bios_sw_gv100.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_BIOS_SW_GV100_H +#define NVGPU_BIOS_SW_GV100_H + +struct gk20a; +struct nvgpu_bios; + +void gv100_bios_preos_reload_check(struct gk20a *g); +int gv100_bios_preos_wait_for_halt(struct gk20a *g); +int gv100_bios_devinit(struct gk20a *g); +int gv100_bios_preos(struct gk20a *g); +int gv100_bios_init(struct gk20a *g); +void nvgpu_gv100_bios_sw_init(struct gk20a *g, + struct nvgpu_bios *bios); + +#endif /* NVGPU_BIOS_SW_GV100_H */ diff --git a/drivers/gpu/nvgpu/common/vbios/bios_sw_tu104.c b/drivers/gpu/nvgpu/common/vbios/bios_sw_tu104.c new file mode 100644 index 000000000..f9b49bd88 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vbios/bios_sw_tu104.c @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include "bios_sw_gv100.h" +#include "bios_sw_tu104.h" + +#define NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT_PROGRESS_MASK \ + 0xFFU +#define NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT_PROGRESS_COMPLETED \ + 0xFFU + +#define NVGPU_PG189_MIN_VBIOS 0x90041800U + +#define NVGPU_PG189_0600_VBIOS 0x90049500U +#define NVGPU_PG189_0600_QS_VBIOS 0x9004A200U +#define NVGPU_PG189_0601_VBIOS 0x90045a00U +#define NVGPU_PG189_0610_QS_VBIOS 0x90049100U +#define NVGPU_PG189_0601_QS_VBIOS 0x90049600U + +struct nvgpu_vbios_board { + u16 board_id; + u32 vbios_version; +}; + +#define NVGPU_PG189_NUM_VBIOS_BOARDS 5U + +static struct nvgpu_vbios_board vbios_boards[NVGPU_PG189_NUM_VBIOS_BOARDS] = { + /* SKU 600 ES/CS, SKU 606*/ + [0] = { + .board_id = 0x0068, + .vbios_version = NVGPU_PG189_0600_VBIOS, + }, + /* SKU 600 QS */ + [1] = { + .board_id = 0x0183, + .vbios_version = NVGPU_PG189_0600_QS_VBIOS, + }, + /* SKU 601 CS */ + [2] = { + .board_id = 0x00E8, + .vbios_version = NVGPU_PG189_0601_VBIOS, + }, + /* SKU 610 QS */ + [3] = { + .board_id = 0x01a3, + .vbios_version = NVGPU_PG189_0610_QS_VBIOS, + }, + /* SKU 601 QS */ + [4] = { + .board_id = 0x01cc, + .vbios_version = NVGPU_PG189_0601_QS_VBIOS, + }, +}; + +static int tu104_bios_verify_version(struct gk20a *g) +{ + struct nvgpu_vbios_board *board = NULL; + u32 i; + + nvgpu_info(g, "VBIOS board id %04x", g->bios->vbios_board_id); + + nvgpu_info(g, "VBIOS version %08x:%02x\n", + g->bios->vbios_version, + g->bios->vbios_oem_version); + + if (g->bios->vbios_version < NVGPU_PG189_MIN_VBIOS) { + nvgpu_err(g, "unsupported VBIOS version %08x", + g->bios->vbios_version); + return -EINVAL; + } + + for (i = 0; i < NVGPU_PG189_NUM_VBIOS_BOARDS; i++) { + if (g->bios->vbios_board_id == vbios_boards[i].board_id) { + board = &vbios_boards[i]; + } + } + + if (board == NULL) { + nvgpu_warn(g, "unknown board id %04x", + g->bios->vbios_board_id); + return 0; + } + + if ((board->vbios_version != 0U) && + (g->bios->vbios_version < board->vbios_version)) { + nvgpu_warn(g, "VBIOS version should be at least %08x", + board->vbios_version); + } + + return 0; +} + +int tu104_bios_verify_devinit(struct gk20a *g) +{ + struct nvgpu_timeout timeout; + u32 val; + u32 aon_secure_scratch_reg; + int err; + + err = nvgpu_timeout_init(g, &timeout, + NVGPU_BIOS_DEVINIT_VERIFY_TIMEOUT_MS, NVGPU_TIMER_CPU_TIMER); + if (err != 0) { + return err; + } + + do { + aon_secure_scratch_reg = g->ops.bios.get_aon_secure_scratch_reg(g, 0); + val = nvgpu_readl(g, aon_secure_scratch_reg); + val &= NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT_PROGRESS_MASK; + + if (val == NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT_PROGRESS_COMPLETED) { + nvgpu_log_info(g, "devinit complete"); + return 0; + } + + nvgpu_udelay(NVGPU_BIOS_DEVINIT_VERIFY_DELAY_US); + } while (nvgpu_timeout_expired(&timeout) == 0); + + return -ETIMEDOUT; +} + +int tu104_bios_init(struct gk20a *g) +{ +#ifdef CONFIG_NVGPU_SIM + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { + return 0; + } +#endif + + return gv100_bios_init(g); +} + +void nvgpu_tu104_bios_sw_init(struct gk20a *g, + struct nvgpu_bios *bios) +{ + bios->init = tu104_bios_init; + bios->verify_version = tu104_bios_verify_version; + bios->preos_wait_for_halt = NULL; + bios->preos_reload_check = NULL; + bios->preos_bios = NULL; + bios->devinit_bios = NULL; + bios->verify_devinit = tu104_bios_verify_devinit; +} + diff --git a/drivers/gpu/nvgpu/common/vbios/bios_sw_tu104.h b/drivers/gpu/nvgpu/common/vbios/bios_sw_tu104.h new file mode 100644 index 000000000..eb1db454b --- /dev/null +++ b/drivers/gpu/nvgpu/common/vbios/bios_sw_tu104.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_BIOS_SW_TU104_H +#define NVGPU_BIOS_SW_TU104_H + +#define NVGPU_BIOS_DEVINIT_VERIFY_TIMEOUT_MS 1000U +#define NVGPU_BIOS_DEVINIT_VERIFY_DELAY_US 10U +#define NVGPU_BIOS_DEVINIT_VERIFY_COMPLETION_MS 1U + +struct gk20a; + +int tu104_bios_verify_devinit(struct gk20a *g); +int tu104_bios_init(struct gk20a *g); +void nvgpu_tu104_bios_sw_init(struct gk20a *g, + struct nvgpu_bios *bios); + +#endif /* NVGPU_BIOS_SW_TU104_H */ diff --git a/drivers/gpu/nvgpu/common/vbios/nvlink_bios.c b/drivers/gpu/nvgpu/common/vbios/nvlink_bios.c new file mode 100644 index 000000000..b2588a5fb --- /dev/null +++ b/drivers/gpu/nvgpu/common/vbios/nvlink_bios.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +int nvgpu_bios_get_nvlink_config_data(struct gk20a *g) +{ + int ret = 0; + struct nvlink_config_data_hdr_v1 config; + + if (g->bios->nvlink_config_data_offset == 0U) { + return -EINVAL; + } + + nvgpu_memcpy((u8 *)&config, + &g->bios->data[g->bios->nvlink_config_data_offset], + sizeof(config)); + + if (config.version != NVLINK_CONFIG_DATA_HDR_VER_10) { + nvgpu_err(g, "unsupported nvlink bios version: 0x%x", + config.version); + return -EINVAL; + } + + switch (config.hdr_size) { + case NVLINK_CONFIG_DATA_HDR_12_SIZE: + g->nvlink.ac_coupling_mask = config.ac_coupling_mask; + g->nvlink.train_at_boot = config.train_at_boot; + g->nvlink.link_disable_mask = config.link_disable_mask; + g->nvlink.link_mode_mask = config.link_mode_mask; + g->nvlink.link_refclk_mask = config.link_refclk_mask; + break; + case NVLINK_CONFIG_DATA_HDR_11_SIZE: + g->nvlink.train_at_boot = config.train_at_boot; + g->nvlink.link_disable_mask = config.link_disable_mask; + g->nvlink.link_mode_mask = config.link_mode_mask; + g->nvlink.link_refclk_mask = config.link_refclk_mask; + break; + case NVLINK_CONFIG_DATA_HDR_10_SIZE: + g->nvlink.link_disable_mask = config.link_disable_mask; + g->nvlink.link_mode_mask = config.link_mode_mask; + g->nvlink.link_refclk_mask = config.link_refclk_mask; + break; + default: + nvgpu_err(g, "invalid nvlink bios config size"); + ret = -EINVAL; + break; + } + + return ret; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c new file mode 100644 index 000000000..aeb4c2cd2 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c @@ -0,0 +1,53 @@ +/* + * Virtualized GPU CBC + * + * Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include "cbc_vgpu.h" + +int vgpu_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + u32 max_comptag_lines = 0; + int err; + + nvgpu_log_fn(g, " "); + + cbc->comptags_per_cacheline = priv->constants.comptags_per_cacheline; + max_comptag_lines = priv->constants.comptag_lines; + + if (max_comptag_lines < 2) { + return -ENXIO; + } + + err = gk20a_comptag_allocator_init(g, &cbc->comp_tags, max_comptag_lines); + if (err) { + return err; + } + + cbc->max_comptag_lines = max_comptag_lines; + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h new file mode 100644 index 000000000..bc9e5614d --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CBC_VGPU_H +#define NVGPU_CBC_VGPU_H + +#ifdef CONFIG_NVGPU_COMPRESSION + +struct gk20a; +struct nvgpu_cbc; + +int vgpu_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc); + +#endif +#endif /* NVGPU_CBC_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/ce_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/ce_vgpu.c new file mode 100644 index 000000000..04bd8b5db --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/ce_vgpu.c @@ -0,0 +1,36 @@ +/* + * Virtualized GPU CE2 + * + * Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include +#include +#include + +u32 vgpu_ce_get_num_pce(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + return priv->constants.num_pce; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/clk_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/clk_vgpu.c new file mode 100644 index 000000000..066a8fc02 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/clk_vgpu.c @@ -0,0 +1,309 @@ +/* + * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "clk_vgpu.h" +#include "common/vgpu/ivc/comm_vgpu.h" + +static unsigned long vgpu_clk_get_rate(struct gk20a *g, u32 api_domain) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate; + int err; + unsigned long ret = 0; + + nvgpu_log_fn(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_SYSCLK: + case CTRL_CLK_DOMAIN_GPCCLK: + msg.cmd = TEGRA_VGPU_CMD_GET_GPU_CLK_RATE; + msg.handle = vgpu_get_handle(g); + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) { + nvgpu_err(g, "%s failed - %d", __func__, err); + } else { + /* return frequency in Hz */ + ret = p->rate; + } + break; + case CTRL_CLK_DOMAIN_PWRCLK: + nvgpu_err(g, "unsupported clock: %u", api_domain); + break; + default: + nvgpu_err(g, "unknown clock: %u", api_domain); + break; + } + + return ret; +} + +static int vgpu_clk_set_rate(struct gk20a *g, + u32 api_domain, unsigned long rate) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate; + int err = -EINVAL; + + nvgpu_log_fn(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + msg.cmd = TEGRA_VGPU_CMD_SET_GPU_CLK_RATE; + msg.handle = vgpu_get_handle(g); + + p->rate = rate; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) { + nvgpu_err(g, "%s failed - %d", __func__, err); + } + break; + case CTRL_CLK_DOMAIN_PWRCLK: + nvgpu_err(g, "unsupported clock: %u", api_domain); + break; + default: + nvgpu_err(g, "unknown clock: %u", api_domain); + break; + } + + return err; +} + +static unsigned long vgpu_clk_get_maxrate(struct gk20a *g, u32 api_domain) +{ + unsigned long *freqs; + int num_freqs = 0; + int err; + unsigned long ret = 0; + + nvgpu_log_fn(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + err = vgpu_clk_get_freqs(g, &freqs, &num_freqs); + if (err == 0) { + /* return freq in Hz */ + ret = freqs[num_freqs - 1]; + } + break; + default: + nvgpu_err(g, "unknown clock: %u", api_domain); + break; + } + + return ret; +} + +static int vgpu_clk_get_round_rate(struct gk20a *g, u32 api_domain, + unsigned long rate_target, unsigned long *rounded_rate) +{ + int err = -EINVAL; + + nvgpu_log_fn(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + *rounded_rate = rate_target; + err = 0; + break; + default: + nvgpu_err(g, "unknown clock: %u", api_domain); + break; + } + + return err; +} + +static int vgpu_clk_get_range(struct gk20a *g, u32 api_domain, + u16 *min_mhz, u16 *max_mhz) +{ + unsigned long *freqs; + int num_freqs = 0; + int err = -EINVAL; + + nvgpu_log_fn(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + err = vgpu_clk_get_freqs(g, &freqs, &num_freqs); + if (err == 0) { + /* return freq in MHz */ + *min_mhz = (u16)(freqs[0] / 1000000); + *max_mhz = (u16)(freqs[num_freqs - 1] / 1000000); + } + break; + default: + nvgpu_err(g, "unknown clock: %u", api_domain); + break; + } + + return err; +} + +static int vgpu_clk_get_f_points(struct gk20a *g, + u32 api_domain, u32 *num_points, u16 *freqs_mhz) +{ + unsigned long *freqs; + int num_freqs = 0; + u32 i; + int err = -EINVAL; + + nvgpu_log_fn(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + err = vgpu_clk_get_freqs(g, &freqs, &num_freqs); + if (err) { + return err; + } + + if (num_points == NULL) { + return -EINVAL; + } + + if (*num_points != 0U) { + if (freqs == NULL || (*num_points > (u32)num_freqs)) { + return -EINVAL; + } + } + + if (*num_points == 0) { + *num_points = num_freqs; + } else { + for (i = 0; i < *num_points; i++) { + /* return freq in MHz */ + freqs_mhz[i] = (u16)(freqs[i] / 1000000); + } + } + break; + default: + nvgpu_err(g, "unknown clock: %u", api_domain); + break; + } + + return err; +} + +void vgpu_init_clk_support(struct gk20a *g) +{ + g->ops.clk.get_rate = vgpu_clk_get_rate; + g->ops.clk.set_rate = vgpu_clk_set_rate; + g->ops.clk.get_maxrate = vgpu_clk_get_maxrate; + g->ops.clk.clk_get_round_rate = vgpu_clk_get_round_rate; + g->ops.clk.get_clk_range = vgpu_clk_get_range; + g->ops.clk.clk_domain_get_f_points = vgpu_clk_get_f_points; + g->ops.clk.measure_freq = nvgpu_clk_measure_freq; +} + +int vgpu_clk_get_freqs(struct gk20a *g, unsigned long **freqs_out, + int *num_freqs) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_get_gpu_freq_table_params *p = + &msg.params.get_gpu_freq_table; + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + u64 *freqs; + int err = 0; + void *handle = NULL; + size_t oob_size; + unsigned int i; + + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&priv->vgpu_clk_get_freq_lock); + + if (priv->freqs != NULL) { + goto done; + } + + msg.cmd = TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE; + msg.handle = vgpu_get_handle(g); + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) { + nvgpu_err(g, "%s failed - %d", __func__, err); + goto done; + } + + handle = vgpu_ivc_oob_get_ptr(vgpu_ivc_get_server_vmid(), + TEGRA_VGPU_QUEUE_CMD, (void **)&freqs, &oob_size); + if (!handle) { + nvgpu_err(g, "failed to get ivm handle"); + err = -EINVAL; + goto done; + } + + priv->freqs = nvgpu_kzalloc(g, sizeof(*priv->freqs) * (p->num_freqs)); + if (!priv->freqs) { + nvgpu_err(g, "failed to allocate memory"); + vgpu_ivc_oob_put_ptr(handle); + err = -ENOMEM; + goto done; + } + priv->num_freqs = p->num_freqs; + + for (i = 0; i < priv->num_freqs; i++) { + /* store frequency in Hz */ + priv->freqs[i] = (unsigned long)(freqs[i]); + } + + vgpu_ivc_oob_put_ptr(handle); + +done: + if (err == 0) { + *num_freqs = priv->num_freqs; + *freqs_out = priv->freqs; + } + + nvgpu_mutex_release(&priv->vgpu_clk_get_freq_lock); + + return err; +} + +int vgpu_clk_cap_rate(struct gk20a *g, unsigned long rate) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate; + int err = 0; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_CAP_GPU_CLK_RATE; + msg.handle = vgpu_get_handle(g); + p->rate = rate; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) { + nvgpu_err(g, "%s failed - %d", __func__, err); + return err; + } + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/clk_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/clk_vgpu.h new file mode 100644 index 000000000..ec3d0a387 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/clk_vgpu.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CLK_VGPU_H +#define NVGPU_CLK_VGPU_H + +void vgpu_init_clk_support(struct gk20a *g); +int vgpu_clk_get_freqs(struct gk20a *g, unsigned long **freqs, int *num_freqs); +int vgpu_clk_cap_rate(struct gk20a *g, unsigned long rate); +#endif /* NVGPU_CLK_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.c new file mode 100644 index 000000000..14c5c0ea5 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2015-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "debugger_vgpu.h" +#include "common/vgpu/ivc/comm_vgpu.h" + +int vgpu_exec_regops(struct gk20a *g, + struct nvgpu_tsg *tsg, + struct nvgpu_dbg_reg_op *ops, + u32 num_ops, + u32 ctx_wr_count, + u32 ctx_rd_count, + u32 *flags) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_reg_ops_params *p = &msg.params.reg_ops; + void *oob; + size_t oob_size, ops_size; + void *handle = NULL; + int err = 0; + + nvgpu_log_fn(g, " "); + BUG_ON(sizeof(*ops) != sizeof(struct tegra_vgpu_reg_op)); + + handle = vgpu_ivc_oob_get_ptr(vgpu_ivc_get_server_vmid(), + TEGRA_VGPU_QUEUE_CMD, + &oob, &oob_size); + if (!handle) { + return -EINVAL; + } + + ops_size = sizeof(*ops) * num_ops; + if (oob_size < ops_size) { + err = -ENOMEM; + goto fail; + } + + nvgpu_memcpy((u8 *)oob, (u8 *)ops, ops_size); + + msg.cmd = TEGRA_VGPU_CMD_REG_OPS; + msg.handle = vgpu_get_handle(g); + p->tsg_id = tsg ? tsg->tsgid : U32_MAX; + p->num_ops = num_ops; + p->flags = *flags; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err == 0) { + nvgpu_memcpy((u8 *)ops, (u8 *)oob, ops_size); + } + *flags = p->flags; + +fail: + vgpu_ivc_oob_put_ptr(handle); + return err; +} + +int vgpu_dbg_set_powergate(struct dbg_session_gk20a *dbg_s, + bool disable_powergate) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_set_powergate_params *p = &msg.params.set_powergate; + int err = 0; + u32 mode; + struct gk20a *g = dbg_s->g; + + nvgpu_log_fn(g, " "); + + /* Just return if requested mode is the same as the session's mode */ + if (disable_powergate) { + if (dbg_s->is_pg_disabled) { + return 0; + } + dbg_s->is_pg_disabled = true; + mode = TEGRA_VGPU_POWERGATE_MODE_DISABLE; + } else { + if (!dbg_s->is_pg_disabled) { + return 0; + } + dbg_s->is_pg_disabled = false; + mode = TEGRA_VGPU_POWERGATE_MODE_ENABLE; + } + + msg.cmd = TEGRA_VGPU_CMD_SET_POWERGATE; + msg.handle = vgpu_get_handle(dbg_s->g); + p->mode = mode; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + return err; +} + +int vgpu_tsg_set_long_timeslice(struct nvgpu_tsg *tsg, u32 timeslice_us) +{ + struct tegra_vgpu_cmd_msg msg = {0}; + struct tegra_vgpu_tsg_timeslice_params *p = + &msg.params.tsg_timeslice; + int err; + struct gk20a *g = tsg->g; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_TSG_SET_LONG_TIMESLICE; + msg.handle = vgpu_get_handle(g); + p->tsg_id = tsg->tsgid; + p->timeslice_us = timeslice_us; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + WARN_ON(err); + if (!err) { + tsg->timeslice_us = timeslice_us; + } + + return err; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.h new file mode 100644 index 000000000..f2fcaa651 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_DBG_VGPU_H +#define NVGPU_DBG_VGPU_H + +struct dbg_session_gk20a; +struct nvgpu_dbg_reg_op; +struct gk20a; +struct nvgpu_channel; + +int vgpu_exec_regops(struct gk20a *g, + struct nvgpu_tsg *tsg, + struct nvgpu_dbg_reg_op *ops, + u32 num_ops, + u32 ctx_wr_count, + u32 ctx_rd_count, + u32 *flags); +int vgpu_dbg_set_powergate(struct dbg_session_gk20a *dbg_s, + bool disable_powergate); +int vgpu_tsg_set_long_timeslice(struct nvgpu_tsg *tsg, u32 timeslice_us); + +#endif /* NVGPU_DBG_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/ecc_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/ecc_vgpu.c new file mode 100644 index 000000000..e65ac4398 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/ecc_vgpu.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include "common/vgpu/ecc_vgpu.h" +#include "common/vgpu/ivc/comm_vgpu.h" + +int vgpu_ecc_get_info(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + struct tegra_vgpu_cmd_msg msg = {0}; + struct tegra_vgpu_ecc_info_params *p = &msg.params.ecc_info; + struct tegra_vgpu_ecc_info_entry *entry; + struct vgpu_ecc_stat *stats; + void *handle; + int err, i, count; + size_t oob_size; + + msg.cmd = TEGRA_VGPU_CMD_GET_ECC_INFO; + msg.handle = vgpu_get_handle(g); + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (unlikely(err)) { + nvgpu_err(g, "vgpu get_ecc_info failed, err=%d", err); + return err; + } + + count = p->ecc_stats_count; + + handle = vgpu_ivc_oob_get_ptr(vgpu_ivc_get_server_vmid(), + TEGRA_VGPU_QUEUE_CMD, + (void **)&entry, &oob_size); + if (unlikely(!handle)) + return -EINVAL; + + if (unlikely(oob_size < count * sizeof(*entry))) { + err = -E2BIG; + goto out; + } + + stats = nvgpu_kzalloc(g, count * sizeof(*stats)); + if (unlikely(!stats)) { + err = -ENOMEM; + goto out; + } + + for (i = 0; i < count; i++) { + stats[i].ecc_id = entry[i].ecc_id; + (void) strncpy(stats[i].name, entry[i].name, + NVGPU_ECC_STAT_NAME_MAX_SIZE); + } + + priv->ecc_stats = stats; + priv->ecc_stats_count = count; +out: + vgpu_ivc_oob_put_ptr(handle); + return err; +} + +void vgpu_ecc_remove_info(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + priv->ecc_stats_count = 0; + + if (priv->ecc_stats) { + nvgpu_kfree(g, priv->ecc_stats); + priv->ecc_stats = NULL; + } +} diff --git a/drivers/gpu/nvgpu/common/vgpu/ecc_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/ecc_vgpu.h new file mode 100644 index 000000000..3a66d92dd --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/ecc_vgpu.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _ECC_VGPU_H_ +#define _ECC_VGPU_H_ + +#include +#include /* For NVGPU_ECC_STAT_NAME_MAX_SIZE */ + +struct gk20a; + +struct vgpu_ecc_stat { + u32 ecc_id; + char name[NVGPU_ECC_STAT_NAME_MAX_SIZE + 1]; +}; + +int vgpu_ecc_get_info(struct gk20a *g); +void vgpu_ecc_remove_info(struct gk20a *g); + +#endif diff --git a/drivers/gpu/nvgpu/common/vgpu/fb/fb_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/fb/fb_vgpu.c new file mode 100644 index 000000000..2de618a63 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fb/fb_vgpu.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "common/vgpu/ivc/comm_vgpu.h" +#include "fb_vgpu.h" + +#ifdef CONFIG_NVGPU_DEBUGGER + +void vgpu_fb_set_mmu_debug_mode(struct gk20a *g, bool enable) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_fb_set_mmu_debug_mode_params *p = + &msg.params.fb_set_mmu_debug_mode; + int err; + + msg.cmd = TEGRA_VGPU_CMD_FB_SET_MMU_DEBUG_MODE; + msg.handle = vgpu_get_handle(g); + p->enable = enable ? 1U : 0U; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err != 0 ? err : msg.ret; + if (err != 0) { + nvgpu_err(g, + "fb set mmu debug mode failed err %d", err); + } +} + +#endif diff --git a/drivers/gpu/nvgpu/common/vgpu/fb/fb_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/fb/fb_vgpu.h new file mode 100644 index 000000000..6b0ebeb51 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fb/fb_vgpu.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef FB_VGPU_H +#define FB_VGPU_H + +#ifdef CONFIG_NVGPU_DEBUGGER + +void vgpu_fb_set_mmu_debug_mode(struct gk20a *g, bool enable); + +#endif + +#endif diff --git a/drivers/gpu/nvgpu/common/vgpu/fbp/fbp_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/fbp/fbp_vgpu.c new file mode 100644 index 000000000..1b31a6a53 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fbp/fbp_vgpu.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "fbp_vgpu.h" +#include "common/fbp/fbp_priv.h" + +int vgpu_fbp_init_support(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + struct nvgpu_fbp *fbp; + u32 i; + + if (g->fbp != NULL) { + return 0; + } + + fbp = nvgpu_kzalloc(g, sizeof(*fbp)); + if (fbp == NULL) { + return -ENOMEM; + } + + fbp->num_fbps = priv->constants.num_fbps; + fbp->max_fbps_count = priv->constants.num_fbps; + fbp->fbp_en_mask = priv->constants.fbp_en_mask; + + fbp->fbp_rop_l2_en_mask = + nvgpu_kzalloc(g, fbp->max_fbps_count * sizeof(u32)); + if (fbp->fbp_rop_l2_en_mask == NULL) { + nvgpu_kfree(g, fbp); + return -ENOMEM; + } + + for (i = 0U; i < fbp->max_fbps_count; i++) { + fbp->fbp_rop_l2_en_mask[i] = priv->constants.l2_en_mask[i]; + } + + g->fbp = fbp; + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/fbp/fbp_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/fbp/fbp_vgpu.h new file mode 100644 index 000000000..395c256da --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fbp/fbp_vgpu.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_FBP_VGPU_H +#define NVGPU_FBP_VGPU_H + +struct gk20a; + +int vgpu_fbp_init_support(struct gk20a *g); + +#endif /* NVGPU_FBP_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/channel_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/fifo/channel_vgpu.c new file mode 100644 index 000000000..8eff9a145 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fifo/channel_vgpu.c @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "common/vgpu/ivc/comm_vgpu.h" +#include "channel_vgpu.h" + +void vgpu_channel_bind(struct nvgpu_channel *ch) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_config_params *p = + &msg.params.channel_config; + int err; + struct gk20a *g = ch->g; + + nvgpu_log_info(g, "bind channel %d", ch->chid); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND; + msg.handle = vgpu_get_handle(ch->g); + p->handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + + nvgpu_smp_wmb(); + nvgpu_atomic_set(&ch->bound, true); +} + +void vgpu_channel_unbind(struct nvgpu_channel *ch) +{ + struct gk20a *g = ch->g; + + nvgpu_log_fn(g, " "); + + if (nvgpu_atomic_cmpxchg(&ch->bound, true, false)) { + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_config_params *p = + &msg.params.channel_config; + int err; + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNBIND; + msg.handle = vgpu_get_handle(ch->g); + p->handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + } + +} + +int vgpu_channel_alloc_inst(struct gk20a *g, struct nvgpu_channel *ch) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_HWCTX; + msg.handle = vgpu_get_handle(g); + p->id = ch->chid; + p->runlist_id = ch->runlist->id; + p->pid = (u64)ch->pid; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) { + nvgpu_err(g, "fail"); + return -ENOMEM; + } + + ch->virt_ctx = p->handle; + nvgpu_log_fn(g, "done"); + return 0; +} + +void vgpu_channel_free_inst(struct gk20a *g, struct nvgpu_channel *ch) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWCTX; + msg.handle = vgpu_get_handle(g); + p->handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); +} + +void vgpu_channel_enable(struct nvgpu_channel *ch) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_config_params *p = + &msg.params.channel_config; + int err; + struct gk20a *g = ch->g; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ENABLE; + msg.handle = vgpu_get_handle(ch->g); + p->handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); +} + +void vgpu_channel_disable(struct nvgpu_channel *ch) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_config_params *p = + &msg.params.channel_config; + int err; + struct gk20a *g = ch->g; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_DISABLE; + msg.handle = vgpu_get_handle(ch->g); + p->handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); +} + +u32 vgpu_channel_count(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + return priv->constants.num_channels; +} + +void vgpu_channel_set_ctx_mmu_error(struct gk20a *g, struct nvgpu_channel *ch) +{ + /* + * If error code is already set, this mmu fault + * was triggered as part of recovery from other + * error condition. + * Don't overwrite error flag. + */ + g->ops.channel.set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT); + + /* mark channel as faulted */ + nvgpu_channel_set_unserviceable(ch); + + /* unblock pending waits */ + nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq); + nvgpu_cond_broadcast_interruptible(&ch->notifier_wq); +} + +void vgpu_channel_set_error_notifier(struct gk20a *g, + struct tegra_vgpu_channel_set_error_notifier *p) +{ + struct nvgpu_channel *ch; + + if (p->chid >= g->fifo.num_channels) { + nvgpu_err(g, "invalid chid %d", p->chid); + return; + } + + ch = &g->fifo.channel[p->chid]; + g->ops.channel.set_error_notifier(ch, p->error); +} + +void vgpu_channel_abort_cleanup(struct gk20a *g, u32 chid) +{ + struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid); + + if (ch == NULL) { + nvgpu_err(g, "invalid channel id %d", chid); + return; + } + + nvgpu_channel_set_unserviceable(ch); + g->ops.channel.abort_clean_up(ch); + nvgpu_channel_put(ch); +} diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/channel_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/fifo/channel_vgpu.h new file mode 100644 index 000000000..7b3f33518 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fifo/channel_vgpu.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CHANNEL_VGPU_H +#define NVGPU_CHANNEL_VGPU_H + +struct gk20a; +struct nvgpu_channel; + +void vgpu_channel_bind(struct nvgpu_channel *ch); +void vgpu_channel_unbind(struct nvgpu_channel *ch); +int vgpu_channel_alloc_inst(struct gk20a *g, struct nvgpu_channel *ch); +void vgpu_channel_free_inst(struct gk20a *g, struct nvgpu_channel *ch); +void vgpu_channel_enable(struct nvgpu_channel *ch); +void vgpu_channel_disable(struct nvgpu_channel *ch); +u32 vgpu_channel_count(struct gk20a *g); +void vgpu_channel_set_ctx_mmu_error(struct gk20a *g, struct nvgpu_channel *ch); +void vgpu_channel_set_error_notifier(struct gk20a *g, + struct tegra_vgpu_channel_set_error_notifier *p); +void vgpu_channel_abort_cleanup(struct gk20a *g, u32 chid); + +#endif diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c new file mode 100644 index 000000000..d9141d111 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c @@ -0,0 +1,136 @@ +/* + * Virtualized GPU Fifo + * + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "fifo_vgpu.h" +#include "channel_vgpu.h" +#include "tsg_vgpu.h" + +void vgpu_fifo_cleanup_sw(struct gk20a *g) +{ + u32 i; + struct nvgpu_fifo *f = &g->fifo; + + for (i = 0U; i < f->max_engines; i++) { + if (f->host_engines[i] == NULL) { + continue; + } + + /* + * Cast to (void *) to get rid of the constness. + */ + nvgpu_kfree(g, (void *)f->host_engines[i]); + } + nvgpu_fifo_cleanup_sw_common(g); +} + +int vgpu_fifo_setup_sw(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + int err = 0; + + nvgpu_log_fn(g, " "); + + if (f->sw_ready) { + nvgpu_log_fn(g, "skip init"); + return 0; + } + + err = nvgpu_fifo_setup_sw_common(g); + if (err != 0) { + nvgpu_err(g, "fifo sw setup failed, err=%d", err); + return err; + } + +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT + err = nvgpu_channel_worker_init(g); + if (err) { + goto clean_up; + } +#endif + + f->channel_base = priv->constants.channel_base; + + f->sw_ready = true; + + nvgpu_log_fn(g, "done"); + return 0; + +#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT +clean_up: + nvgpu_fifo_cleanup_sw_common(g); +#endif + + return err; +} + +int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info) +{ + struct nvgpu_channel *ch = nvgpu_channel_from_id(g, info->chid); + + nvgpu_log_fn(g, " "); + + nvgpu_err(g, "fifo intr (%d) on ch %u", + info->type, info->chid); + + switch (info->type) { + case TEGRA_VGPU_FIFO_INTR_PBDMA: + g->ops.channel.set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_PBDMA_ERROR); + break; + case TEGRA_VGPU_FIFO_INTR_CTXSW_TIMEOUT: + g->ops.channel.set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); + break; + case TEGRA_VGPU_FIFO_INTR_MMU_FAULT: + vgpu_tsg_set_ctx_mmu_error(g, info->chid); + nvgpu_channel_abort(ch, false); + break; + default: + WARN_ON(1); + break; + } + + nvgpu_channel_put(ch); + return 0; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.h new file mode 100644 index 000000000..d369a5cc0 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_FIFO_VGPU_H +#define NVGPU_FIFO_VGPU_H + +#include + +struct gk20a; +struct tegra_vgpu_fifo_intr_info; + +int vgpu_fifo_setup_sw(struct gk20a *g); +void vgpu_fifo_cleanup_sw(struct gk20a *g); +int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info); + +#endif /* NVGPU_FIFO_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/preempt_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/fifo/preempt_vgpu.c new file mode 100644 index 000000000..03ada501c --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fifo/preempt_vgpu.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "preempt_vgpu.h" +#include "common/vgpu/ivc/comm_vgpu.h" + +int vgpu_fifo_preempt_channel(struct gk20a *g, struct nvgpu_channel *ch) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_config_params *p = + &msg.params.channel_config; + int err; + + nvgpu_log_fn(g, " "); + + if (!nvgpu_atomic_read(&ch->bound)) { + return 0; + } + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_PREEMPT; + msg.handle = vgpu_get_handle(g); + p->handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + if (err || msg.ret) { + nvgpu_err(g, + "preempt channel %d failed", ch->chid); + err = -ENOMEM; + } + + return err; +} + +int vgpu_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_tsg_preempt_params *p = + &msg.params.tsg_preempt; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_TSG_PREEMPT; + msg.handle = vgpu_get_handle(g); + p->tsg_id = tsg->tsgid; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + + if (err) { + nvgpu_err(g, + "preempt tsg %u failed", tsg->tsgid); + } + + return err; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/preempt_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/fifo/preempt_vgpu.h new file mode 100644 index 000000000..ce142d892 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fifo/preempt_vgpu.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_PREEMPT_VGPU_H +#define NVGPU_PREEMPT_VGPU_H + +struct gk20a; +struct nvgpu_channel; +struct nvgpu_tsg; + +int vgpu_fifo_preempt_channel(struct gk20a *g, struct nvgpu_channel *ch); +int vgpu_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg); + +#endif diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/ramfc_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/fifo/ramfc_vgpu.c new file mode 100644 index 000000000..762f2593d --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fifo/ramfc_vgpu.c @@ -0,0 +1,52 @@ +/* + * Virtualized GPU Channel RAMFC + * + * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "ramfc_vgpu.h" +#include "common/vgpu/ivc/comm_vgpu.h" + +int vgpu_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base, + u32 gpfifo_entries, u64 pbdma_acquire_timeout, u32 flags) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_ramfc_params *p = &msg.params.ramfc; + int err; + struct gk20a *g = ch->g; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SETUP_RAMFC; + msg.handle = vgpu_get_handle(ch->g); + p->handle = ch->virt_ctx; + p->gpfifo_va = gpfifo_base; + p->num_entries = gpfifo_entries; + p->userd_addr = ch->userd_iova; + p->iova = 0; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + return (err || msg.ret) ? -ENOMEM : 0; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/ramfc_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/fifo/ramfc_vgpu.h new file mode 100644 index 000000000..64b9110c1 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fifo/ramfc_vgpu.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_RAMFC_VGPU_H +#define NVGPU_RAMFC_VGPU_H + +#include + +struct nvgpu_channel; + +int vgpu_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base, + u32 gpfifo_entries, u64 pbdma_acquire_timeout, u32 flags); + +#endif /* NVGPU_RAMFC_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c new file mode 100644 index 000000000..30b9a48df --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c @@ -0,0 +1,212 @@ +/* + * Virtualized GPU Runlist + * + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "runlist_vgpu.h" +#include "common/vgpu/ivc/comm_vgpu.h" + +static int vgpu_submit_runlist(struct gk20a *g, u64 handle, u8 runlist_id, + u16 *runlist, u32 num_entries) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_runlist_params *p; + int err; + void *oob_handle; + void *oob; + size_t size, oob_size; + + oob_handle = vgpu_ivc_oob_get_ptr(vgpu_ivc_get_server_vmid(), + TEGRA_VGPU_QUEUE_CMD, + &oob, &oob_size); + if (!oob_handle) { + return -EINVAL; + } + + size = sizeof(*runlist) * num_entries; + if (oob_size < size) { + err = -ENOMEM; + goto done; + } + + msg.cmd = TEGRA_VGPU_CMD_SUBMIT_RUNLIST; + msg.handle = handle; + p = &msg.params.runlist; + p->runlist_id = runlist_id; + p->num_entries = num_entries; + + nvgpu_memcpy((u8 *)oob, (u8 *)runlist, size); + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + err = (err || msg.ret) ? -1 : 0; + +done: + vgpu_ivc_oob_put_ptr(oob_handle); + return err; +} + +static bool vgpu_runlist_modify_active_locked(struct gk20a *g, u32 runlist_id, + struct nvgpu_channel *ch, bool add) +{ + struct nvgpu_fifo *f = &g->fifo; + struct nvgpu_runlist *runlist; + + runlist = f->runlists[runlist_id]; + + if (add) { + if (nvgpu_test_and_set_bit(ch->chid, + runlist->active_channels)) { + return false; + /* was already there */ + } + } else { + if (!nvgpu_test_and_clear_bit(ch->chid, + runlist->active_channels)) { + /* wasn't there */ + return false; + } + } + + return true; +} + +static void vgpu_runlist_reconstruct_locked(struct gk20a *g, u32 runlist_id, + bool add_entries) +{ + struct nvgpu_fifo *f = &g->fifo; + struct nvgpu_runlist *runlist; + + runlist = f->runlists[runlist_id]; + + if (add_entries) { + u16 *runlist_entry; + u32 count = 0; + unsigned long chid; + + runlist_entry = runlist->mem[0].cpu_va; + + nvgpu_assert(f->num_channels <= (unsigned int)U16_MAX); + for_each_set_bit(chid, + runlist->active_channels, f->num_channels) { + nvgpu_log_info(g, "add channel %lu to runlist", chid); + *runlist_entry++ = (u16)chid; + count++; + } + + runlist->count = count; + } else { + runlist->count = 0; + } +} + +static int vgpu_runlist_update_locked(struct gk20a *g, u32 runlist_id, + struct nvgpu_channel *ch, bool add, + bool wait_for_finish) +{ + struct nvgpu_fifo *f = &g->fifo; + struct nvgpu_runlist *runlist; + bool add_entries; + + nvgpu_log_fn(g, " "); + + if (ch != NULL) { + bool update = vgpu_runlist_modify_active_locked(g, runlist_id, + ch, add); + if (!update) { + /* no change in runlist contents */ + return 0; + } + /* had a channel to update, so reconstruct */ + add_entries = true; + } else { + /* no channel; add means update all, !add means clear all */ + add_entries = add; + } + + runlist = f->runlists[runlist_id]; + + vgpu_runlist_reconstruct_locked(g, runlist_id, add_entries); + + return vgpu_submit_runlist(g, vgpu_get_handle(g), runlist_id, + runlist->mem[0].cpu_va, runlist->count); +} + +/* add/remove a channel from runlist + special cases below: runlist->active_channels will NOT be changed. + (ch == NULL && !add) means remove all active channels from runlist. + (ch == NULL && add) means restore all active channels on runlist. */ +static int vgpu_runlist_do_update(struct gk20a *g, struct nvgpu_runlist *rl, + struct nvgpu_channel *ch, + bool add, bool wait_for_finish) +{ + u32 ret = 0; + + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&rl->runlist_lock); + + ret = vgpu_runlist_update_locked(g, rl->id, ch, add, + wait_for_finish); + + nvgpu_mutex_release(&rl->runlist_lock); + return ret; +} + +int vgpu_runlist_update(struct gk20a *g, struct nvgpu_runlist *rl, + struct nvgpu_channel *ch, + bool add, bool wait_for_finish) +{ + nvgpu_assert(ch != NULL); + + return vgpu_runlist_do_update(g, rl, ch, add, wait_for_finish); +} + +int vgpu_runlist_reload(struct gk20a *g, struct nvgpu_runlist *rl, + bool add, bool wait_for_finish) +{ + return vgpu_runlist_do_update(g, rl, NULL, add, wait_for_finish); +} + +u32 vgpu_runlist_length_max(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + return priv->constants.num_channels; +} + +u32 vgpu_runlist_entry_size(struct gk20a *g) +{ + /* + * This is not the HW format you're looking for (see + * vgpu_fifo_update_runlist_locked(), vgpu_submit_runlist()) + */ + return (u32)sizeof(u16); +} diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.h new file mode 100644 index 000000000..f7efe330e --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.h @@ -0,0 +1,35 @@ +/* + * Virtualized GPU Runlist + * + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +struct gk20a; +struct nvgpu_channel; +struct nvgpu_runlist; + +int vgpu_runlist_update(struct gk20a *g, struct nvgpu_runlist *rl, + struct nvgpu_channel *ch, + bool add, bool wait_for_finish); +int vgpu_runlist_reload(struct gk20a *g, struct nvgpu_runlist *rl, + bool add, bool wait_for_finish); +u32 vgpu_runlist_length_max(struct gk20a *g); +u32 vgpu_runlist_entry_size(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/tsg_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/fifo/tsg_vgpu.c new file mode 100644 index 000000000..5f3d37d4b --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fifo/tsg_vgpu.c @@ -0,0 +1,316 @@ +/* + * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "tsg_vgpu.h" +#include "channel_vgpu.h" +#include "common/vgpu/ivc/comm_vgpu.h" + +int vgpu_tsg_open(struct nvgpu_tsg *tsg) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_tsg_open_rel_params *p = + &msg.params.tsg_open; + int err; + struct gk20a *g = tsg->g; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_TSG_OPEN; + msg.handle = vgpu_get_handle(tsg->g); + p->tsg_id = tsg->tsgid; + p->pid = tsg->tgid; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) { + nvgpu_err(tsg->g, + "vgpu_tsg_open failed, tsgid %d", tsg->tsgid); + } + + return err; +} + +void vgpu_tsg_release(struct nvgpu_tsg *tsg) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_tsg_open_rel_params *p = + &msg.params.tsg_release; + int err; + struct gk20a *g = tsg->g; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_TSG_RELEASE; + msg.handle = vgpu_get_handle(tsg->g); + p->tsg_id = tsg->tsgid; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) { + nvgpu_err(tsg->g, + "vgpu_tsg_release failed, tsgid %d", tsg->tsgid); + } +} + +void vgpu_tsg_enable(struct nvgpu_tsg *tsg) +{ + struct gk20a *g = tsg->g; + struct nvgpu_channel *ch; + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + nvgpu_list_for_each_entry(ch, &tsg->ch_list, nvgpu_channel, ch_entry) { + g->ops.channel.enable(ch); + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); +} + +int vgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) +{ + struct tegra_vgpu_cmd_msg msg = {}; + int err; + struct gk20a *g = ch->g; + + nvgpu_log_fn(g, " "); + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + struct tegra_vgpu_tsg_bind_unbind_channel_params *p = + &msg.params.tsg_bind_unbind_channel; + + msg.cmd = TEGRA_VGPU_CMD_TSG_BIND_CHANNEL; + p->tsg_id = tsg->tsgid; + p->ch_handle = ch->virt_ctx; + p->runlist_id = ch->runlist->id; + } else { + struct tegra_vgpu_tsg_bind_channel_ex_params *p = + &msg.params.tsg_bind_channel_ex; + + msg.cmd = TEGRA_VGPU_CMD_TSG_BIND_CHANNEL_EX; + p->tsg_id = tsg->tsgid; + p->ch_handle = ch->virt_ctx; + p->runlist_id = ch->runlist->id; + p->subctx_id = ch->subctx_id; + p->runqueue_sel = ch->runqueue_sel; + } + + msg.handle = vgpu_get_handle(g); + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) { + nvgpu_err(g, "vgpu_tsg_bind_channel failed, ch %d tsgid %d", + ch->chid, tsg->tsgid); + } + + return err; +} + +int vgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_tsg_bind_unbind_channel_params *p = + &msg.params.tsg_bind_unbind_channel; + int err; + struct gk20a *g = ch->g; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_TSG_UNBIND_CHANNEL; + msg.handle = vgpu_get_handle(g); + p->ch_handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + WARN_ON(err); + + return err; +} + +int vgpu_tsg_set_timeslice(struct nvgpu_tsg *tsg, u32 timeslice) +{ + struct tegra_vgpu_cmd_msg msg = {0}; + struct tegra_vgpu_tsg_timeslice_params *p = + &msg.params.tsg_timeslice; + int err; + struct gk20a *g = tsg->g; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_TSG_SET_TIMESLICE; + msg.handle = vgpu_get_handle(g); + p->tsg_id = tsg->tsgid; + p->timeslice_us = timeslice; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + WARN_ON(err); + if (!err) { + tsg->timeslice_us = timeslice; + } + + return err; +} + +int vgpu_set_sm_exception_type_mask(struct nvgpu_channel *ch, + u32 exception_mask) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_set_sm_exception_type_mask_params *p = + &msg.params.set_sm_exception_mask; + int err = 0; + struct gk20a *g = ch->g; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_SET_SM_EXCEPTION_TYPE_MASK; + msg.handle = vgpu_get_handle(g); + p->handle = ch->virt_ctx; + p->mask = exception_mask; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + WARN_ON(err); + + return err; +} + +int vgpu_tsg_set_interleave(struct nvgpu_tsg *tsg, u32 new_level) +{ + struct tegra_vgpu_cmd_msg msg = {0}; + struct tegra_vgpu_tsg_runlist_interleave_params *p = + &msg.params.tsg_interleave; + int err; + struct gk20a *g = tsg->g; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_TSG_SET_RUNLIST_INTERLEAVE; + msg.handle = vgpu_get_handle(g); + p->tsg_id = tsg->tsgid; + p->level = new_level; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + return err ? err : msg.ret; +} + +int vgpu_tsg_force_reset_ch(struct nvgpu_channel *ch, + u32 err_code, bool verbose) +{ + struct nvgpu_tsg *tsg = NULL; + struct nvgpu_channel *ch_tsg = NULL; + struct gk20a *g = ch->g; + struct tegra_vgpu_cmd_msg msg = {0}; + struct tegra_vgpu_channel_config_params *p = + &msg.params.channel_config; + int err; + + nvgpu_log_fn(g, " "); + + tsg = nvgpu_tsg_from_ch(ch); + if (tsg != NULL) { + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + + nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list, + nvgpu_channel, ch_entry) { + if (nvgpu_channel_get(ch_tsg)) { + nvgpu_channel_set_error_notifier(g, ch_tsg, + err_code); + nvgpu_channel_set_unserviceable(ch_tsg); + nvgpu_channel_put(ch_tsg); + } + } + + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + } else { + nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid); + } + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FORCE_RESET; + msg.handle = vgpu_get_handle(ch->g); + p->handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + if (!err) { + nvgpu_channel_abort(ch, false); + } + return err ? err : msg.ret; +} + +u32 vgpu_tsg_default_timeslice_us(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + return priv->constants.default_timeslice_us; +} + +void vgpu_tsg_set_ctx_mmu_error(struct gk20a *g, u32 chid) +{ + struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid); + struct nvgpu_tsg *tsg = NULL; + + if (ch == NULL) { + return; + } + + tsg = nvgpu_tsg_from_ch(ch); + if (tsg != NULL) { + struct nvgpu_channel *ch_tsg = NULL; + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + + nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list, + nvgpu_channel, ch_entry) { + if (nvgpu_channel_get(ch_tsg)) { + vgpu_channel_set_ctx_mmu_error(g, ch_tsg); + nvgpu_channel_put(ch_tsg); + } + } + + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + } else { + nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid); + } +} + +void vgpu_tsg_handle_event(struct gk20a *g, + struct tegra_vgpu_channel_event_info *info) +{ + struct nvgpu_tsg *tsg; + + if (!info->is_tsg) { + nvgpu_err(g, "channel event posted"); + return; + } + + if (info->id >= g->fifo.num_channels || + info->event_id >= TEGRA_VGPU_CHANNEL_EVENT_ID_MAX) { + nvgpu_err(g, "invalid channel event"); + return; + } + + tsg = &g->fifo.tsg[info->id]; + g->ops.tsg.post_event_id(tsg, info->event_id); +} diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/tsg_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/fifo/tsg_vgpu.h new file mode 100644 index 000000000..30f20c8a3 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fifo/tsg_vgpu.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_TSG_VGPU_H +#define NVGPU_TSG_VGPU_H + +#include + +struct gk20a; +struct nvgpu_channel; +struct nvgpu_tsg; + +int vgpu_tsg_open(struct nvgpu_tsg *tsg); +void vgpu_tsg_release(struct nvgpu_tsg *tsg); +void vgpu_tsg_enable(struct nvgpu_tsg *tsg); +int vgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch); +int vgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch); +int vgpu_tsg_set_timeslice(struct nvgpu_tsg *tsg, u32 timeslice); +int vgpu_set_sm_exception_type_mask(struct nvgpu_channel *ch, + u32 exception_mask); +int vgpu_tsg_set_interleave(struct nvgpu_tsg *tsg, u32 new_level); +int vgpu_tsg_force_reset_ch(struct nvgpu_channel *ch, + u32 err_code, bool verbose); +u32 vgpu_tsg_default_timeslice_us(struct gk20a *g); +void vgpu_tsg_set_ctx_mmu_error(struct gk20a *g, u32 chid); +void vgpu_tsg_handle_event(struct gk20a *g, + struct tegra_vgpu_channel_event_info *info); + +#endif diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/userd_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/fifo/userd_vgpu.c new file mode 100644 index 000000000..11847e72d --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fifo/userd_vgpu.c @@ -0,0 +1,39 @@ +/* + * Virtualized GPU USERD + * + * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "userd_vgpu.h" + +int vgpu_userd_setup_sw(struct gk20a *g) +{ + return nvgpu_userd_init_slabs(g); +} + +void vgpu_userd_cleanup_sw(struct gk20a *g) +{ + nvgpu_userd_free_slabs(g); +} diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/userd_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/fifo/userd_vgpu.h new file mode 100644 index 000000000..d3f2cd1a3 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/fifo/userd_vgpu.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef USERD_VGPU_H +#define USERD_VGPU_H + +struct gk20a; + +int vgpu_userd_setup_sw(struct gk20a *g); +void vgpu_userd_cleanup_sw(struct gk20a *g); + +#endif /* USERD_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.c new file mode 100644 index 000000000..53f3495b4 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.c @@ -0,0 +1,379 @@ +/* + * Virtualized GPU Graphics + * + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/gr/ctx_priv.h" + +#include "ctx_vgpu.h" +#include "common/vgpu/ivc/comm_vgpu.h" + +int vgpu_gr_alloc_gr_ctx(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct vm_gk20a *vm) +{ + struct tegra_vgpu_cmd_msg msg = {0}; + struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; + struct nvgpu_gr_obj_ctx_golden_image *gr_golden_image = + nvgpu_gr_get_golden_image_ptr(g); + u32 golden_image_size; + int err; + + nvgpu_log_fn(g, " "); + + golden_image_size = + nvgpu_gr_obj_ctx_get_golden_image_size(gr_golden_image); + if (golden_image_size == 0) { + return -EINVAL; + } + + gr_ctx->mem.gpu_va = nvgpu_vm_alloc_va(vm, + golden_image_size, + GMMU_PAGE_SIZE_KERNEL); + + if (!gr_ctx->mem.gpu_va) { + return -ENOMEM; + } + gr_ctx->mem.size = golden_image_size; + gr_ctx->mem.aperture = APERTURE_SYSMEM; + + msg.cmd = TEGRA_VGPU_CMD_GR_CTX_ALLOC; + msg.handle = vgpu_get_handle(g); + p->as_handle = vm->handle; + p->gr_ctx_va = gr_ctx->mem.gpu_va; + p->tsg_id = gr_ctx->tsgid; +#ifdef CONFIG_NVGPU_SM_DIVERSITY + p->sm_diversity_config = gr_ctx->sm_diversity_config; +#else + p->sm_diversity_config = NVGPU_DEFAULT_SM_DIVERSITY_CONFIG; +#endif + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + + if (unlikely(err)) { + nvgpu_err(g, "fail to alloc gr_ctx"); + nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, + GMMU_PAGE_SIZE_KERNEL); + gr_ctx->mem.aperture = APERTURE_INVALID; + } + + return err; +} + +void vgpu_gr_free_gr_ctx(struct gk20a *g, + struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx) +{ + nvgpu_log_fn(g, " "); + + if (gr_ctx->mem.gpu_va) { + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; + int err; + + msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE; + msg.handle = vgpu_get_handle(g); + p->tsg_id = gr_ctx->tsgid; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + + nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, + GMMU_PAGE_SIZE_KERNEL); + + vgpu_gr_unmap_global_ctx_buffers(g, gr_ctx, vm); + vgpu_gr_free_patch_ctx(g, vm, gr_ctx); + vgpu_gr_free_pm_ctx(g, vm, gr_ctx); + +#ifdef CONFIG_NVGPU_GRAPHICS + nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); + nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); +#endif + + (void) memset(gr_ctx, 0, sizeof(*gr_ctx)); + } +} + +int vgpu_gr_alloc_patch_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + struct vm_gk20a *ch_vm, u64 virt_ctx) +{ + struct patch_desc *patch_ctx; + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; + int err; + + nvgpu_log_fn(g, " "); + + patch_ctx = &gr_ctx->patch_ctx; + patch_ctx->mem.size = 1024 * sizeof(u32); + patch_ctx->mem.gpu_va = nvgpu_vm_alloc_va(ch_vm, + patch_ctx->mem.size, + GMMU_PAGE_SIZE_KERNEL); + if (!patch_ctx->mem.gpu_va) { + return -ENOMEM; + } + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_PATCH_CTX; + msg.handle = vgpu_get_handle(g); + p->handle = virt_ctx; + p->patch_ctx_va = patch_ctx->mem.gpu_va; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) { + nvgpu_vm_free_va(ch_vm, patch_ctx->mem.gpu_va, + GMMU_PAGE_SIZE_KERNEL); + err = -ENOMEM; + } + + return err; +} + +void vgpu_gr_free_patch_ctx(struct gk20a *g, struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx) +{ + struct patch_desc *patch_ctx = &gr_ctx->patch_ctx; + + nvgpu_log_fn(g, " "); + + if (patch_ctx->mem.gpu_va) { + /* server will free on channel close */ + + nvgpu_vm_free_va(vm, patch_ctx->mem.gpu_va, + GMMU_PAGE_SIZE_KERNEL); + patch_ctx->mem.gpu_va = 0; + } +} + +int vgpu_gr_alloc_pm_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + struct vm_gk20a *vm) +{ + struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx; + struct nvgpu_gr_hwpm_map *gr_hwpm_map = nvgpu_gr_get_hwpm_map_ptr(g); + + nvgpu_log_fn(g, " "); + + if (pm_ctx->mem.gpu_va != 0ULL) { + return 0; + } + + pm_ctx->mem.gpu_va = nvgpu_vm_alloc_va(vm, + nvgpu_gr_hwpm_map_get_size(gr_hwpm_map), + GMMU_PAGE_SIZE_KERNEL); + + if (!pm_ctx->mem.gpu_va) { + nvgpu_err(g, "failed to map pm ctxt buffer"); + return -ENOMEM; + } + + pm_ctx->mem.size = nvgpu_gr_hwpm_map_get_size(gr_hwpm_map); + return 0; +} + +void vgpu_gr_free_pm_ctx(struct gk20a *g, struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx) +{ + struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx; + + nvgpu_log_fn(g, " "); + + /* check if hwpm was ever initialized. If not, nothing to do */ + if (pm_ctx->mem.gpu_va == 0) { + return; + } + + /* server will free on channel close */ + + nvgpu_vm_free_va(vm, pm_ctx->mem.gpu_va, + GMMU_PAGE_SIZE_KERNEL); + pm_ctx->mem.gpu_va = 0; +} + +void vgpu_gr_unmap_global_ctx_buffers(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *ch_vm) +{ + u64 *g_bfr_va = gr_ctx->global_ctx_buffer_va; + u32 i; + + nvgpu_log_fn(g, " "); + + if (gr_ctx->global_ctx_buffer_mapped) { + /* server will unmap on channel close */ + + for (i = 0; i < NVGPU_GR_CTX_VA_COUNT; i++) { + if (g_bfr_va[i]) { + nvgpu_vm_free_va(ch_vm, g_bfr_va[i], + GMMU_PAGE_SIZE_KERNEL); + g_bfr_va[i] = 0; + } + } + + gr_ctx->global_ctx_buffer_mapped = false; + } +} + +int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct vm_gk20a *ch_vm, u64 virt_ctx) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; + u64 *g_bfr_va; + u64 gpu_va; + u32 i; + int err; + + nvgpu_log_fn(g, " "); + + g_bfr_va = gr_ctx->global_ctx_buffer_va; + + /* + * MIG supports only compute class. + * Allocate BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB + * if 2D/3D/I2M classes(graphics) are supported. + */ + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + /* Circular Buffer */ + gpu_va = nvgpu_vm_alloc_va(ch_vm, + nvgpu_gr_global_ctx_get_size(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_CIRCULAR), + GMMU_PAGE_SIZE_KERNEL); + + if (!gpu_va) { + goto clean_up; + } + g_bfr_va[NVGPU_GR_CTX_CIRCULAR_VA] = gpu_va; + + /* Attribute Buffer */ + gpu_va = nvgpu_vm_alloc_va(ch_vm, + nvgpu_gr_global_ctx_get_size(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_ATTRIBUTE), + GMMU_PAGE_SIZE_KERNEL); + + if (!gpu_va) { + goto clean_up; + } + g_bfr_va[NVGPU_GR_CTX_ATTRIBUTE_VA] = gpu_va; + + /* Page Pool */ + gpu_va = nvgpu_vm_alloc_va(ch_vm, + nvgpu_gr_global_ctx_get_size(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_PAGEPOOL), + GMMU_PAGE_SIZE_KERNEL); + if (!gpu_va) { + goto clean_up; + } + g_bfr_va[NVGPU_GR_CTX_PAGEPOOL_VA] = gpu_va; + + /* RTV circular buffer */ + if (nvgpu_gr_global_ctx_get_size(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER) != 0U) { + gpu_va = nvgpu_vm_alloc_va(ch_vm, + nvgpu_gr_global_ctx_get_size(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER), + GMMU_PAGE_SIZE_KERNEL); + if (!gpu_va) { + goto clean_up; + } + g_bfr_va[NVGPU_GR_CTX_RTV_CIRCULAR_BUFFER_VA] = gpu_va; + } + } + + /* Priv register Access Map */ + gpu_va = nvgpu_vm_alloc_va(ch_vm, + nvgpu_gr_global_ctx_get_size(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP), + GMMU_PAGE_SIZE_KERNEL); + if (!gpu_va) { + goto clean_up; + } + g_bfr_va[NVGPU_GR_CTX_PRIV_ACCESS_MAP_VA] = gpu_va; + + /* FECS trace Buffer */ +#ifdef CONFIG_NVGPU_FECS_TRACE + gpu_va = nvgpu_vm_alloc_va(ch_vm, + nvgpu_gr_global_ctx_get_size(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER), + GMMU_PAGE_SIZE_KERNEL); + + if (!gpu_va) + goto clean_up; + + g_bfr_va[NVGPU_GR_CTX_FECS_TRACE_BUFFER_VA] = gpu_va; +#endif + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_MAP_GR_GLOBAL_CTX; + msg.handle = vgpu_get_handle(g); + p->handle = virt_ctx; + p->cb_va = g_bfr_va[NVGPU_GR_CTX_CIRCULAR_VA]; + p->attr_va = g_bfr_va[NVGPU_GR_CTX_ATTRIBUTE_VA]; + p->page_pool_va = g_bfr_va[NVGPU_GR_CTX_PAGEPOOL_VA]; + p->priv_access_map_va = g_bfr_va[NVGPU_GR_CTX_PRIV_ACCESS_MAP_VA]; + p->rtv_cb_va = g_bfr_va[NVGPU_GR_CTX_RTV_CIRCULAR_BUFFER_VA]; +#ifdef CONFIG_NVGPU_FECS_TRACE + p->fecs_trace_va = g_bfr_va[NVGPU_GR_CTX_FECS_TRACE_BUFFER_VA]; +#endif + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) { + goto clean_up; + } + + gr_ctx->global_ctx_buffer_mapped = true; + return 0; + + clean_up: + for (i = 0; i < NVGPU_GR_CTX_VA_COUNT; i++) { + if (g_bfr_va[i]) { + nvgpu_vm_free_va(ch_vm, g_bfr_va[i], + GMMU_PAGE_SIZE_KERNEL); + g_bfr_va[i] = 0; + } + } + return -ENOMEM; +} + +/* load saved fresh copy of gloden image into channel gr_ctx */ +int vgpu_gr_load_golden_ctx_image(struct gk20a *g, u64 virt_ctx) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_LOAD_GR_GOLDEN_CTX; + msg.handle = vgpu_get_handle(g); + p->handle = virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + return (err || msg.ret) ? -1 : 0; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.h new file mode 100644 index 000000000..5e1ed2e67 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CTX_VGPU_H +#define CTX_VGPU_H + +struct gk20a; +struct nvgpu_gr_ctx; +struct vm_gk20a; +struct nvgpu_gr_global_ctx_buffer_desc; + +int vgpu_gr_alloc_gr_ctx(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct vm_gk20a *vm); +void vgpu_gr_free_gr_ctx(struct gk20a *g, + struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx); +int vgpu_gr_alloc_patch_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + struct vm_gk20a *ch_vm, u64 virt_ctx); +void vgpu_gr_free_patch_ctx(struct gk20a *g, struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx); +int vgpu_gr_alloc_pm_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + struct vm_gk20a *vm); +void vgpu_gr_free_pm_ctx(struct gk20a *g, struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx); +void vgpu_gr_unmap_global_ctx_buffers(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *ch_vm); +int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct vm_gk20a *ch_vm, u64 virt_ctx); +int vgpu_gr_load_golden_ctx_image(struct gk20a *g, u64 virt_ctx); + +#endif diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/fecs_trace_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/fecs_trace_vgpu.c new file mode 100644 index 000000000..3ff39b3a6 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/gr/fecs_trace_vgpu.c @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fecs_trace_vgpu.h" +#include "common/vgpu/ivc/comm_vgpu.h" + +int vgpu_fecs_trace_init(struct gk20a *g) +{ + struct vgpu_fecs_trace *vcst; + u32 mempool; + int err; + + nvgpu_log_fn(g, " "); + + if (g->fecs_trace) { + nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, false); + return 0; + } + + vcst = nvgpu_kzalloc(g, sizeof(*vcst)); + if (!vcst) { + nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, false); + return -ENOMEM; + } + + err = nvgpu_dt_read_u32_index(g, "mempool-fecs-trace", 1, &mempool); + if (err) { + nvgpu_info(g, "does not support fecs trace"); + nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, false); + goto fail; + } + + vcst->cookie = vgpu_ivm_mempool_reserve(mempool); + if ((vcst->cookie == NULL) || + ((unsigned long)vcst->cookie >= (unsigned long)-MAX_ERRNO)) { + nvgpu_info(g, + "mempool %u reserve failed", mempool); + vcst->cookie = NULL; + err = -EINVAL; + goto fail; + } + + vcst->buf = vgpu_ivm_mempool_map(vcst->cookie); + if (!vcst->buf) { + nvgpu_info(g, "ioremap_cache failed"); + err = -EINVAL; + goto fail; + } + vcst->header = vcst->buf; + vcst->num_entries = vcst->header->num_ents; + if (unlikely(vcst->header->ent_size != sizeof(*vcst->entries))) { + nvgpu_err(g, "entry size mismatch"); + goto fail; + } + vcst->entries = (struct nvgpu_gpu_ctxsw_trace_entry *)( + (char *)vcst->buf + sizeof(*vcst->header)); + g->fecs_trace = (struct nvgpu_gr_fecs_trace *)vcst; + + return 0; +fail: + if (vcst->cookie != NULL && vcst->buf != NULL) { + vgpu_ivm_mempool_unmap(vcst->cookie, vcst->buf); + } + if (vcst->cookie) { + vgpu_ivm_mempool_unreserve(vcst->cookie); + } + nvgpu_kfree(g, vcst); + return err; +} + +int vgpu_fecs_trace_deinit(struct gk20a *g) +{ + struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; + + vgpu_ivm_mempool_unmap(vcst->cookie, vcst->buf); + vgpu_ivm_mempool_unreserve(vcst->cookie); + nvgpu_kfree(g, vcst); + return 0; +} + +int vgpu_fecs_trace_enable(struct gk20a *g) +{ + struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; + struct tegra_vgpu_cmd_msg msg = { + .cmd = TEGRA_VGPU_CMD_FECS_TRACE_ENABLE, + .handle = vgpu_get_handle(g), + }; + int err; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + WARN_ON(err); + vcst->enabled = !err; + return err; +} + +int vgpu_fecs_trace_disable(struct gk20a *g) +{ + struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; + struct tegra_vgpu_cmd_msg msg = { + .cmd = TEGRA_VGPU_CMD_FECS_TRACE_DISABLE, + .handle = vgpu_get_handle(g), + }; + int err; + + vcst->enabled = false; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + WARN_ON(err); + return err; +} + +bool vgpu_fecs_trace_is_enabled(struct gk20a *g) +{ + struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; + + return (vcst && vcst->enabled); +} + +int vgpu_fecs_trace_poll(struct gk20a *g) +{ + struct tegra_vgpu_cmd_msg msg = { + .cmd = TEGRA_VGPU_CMD_FECS_TRACE_POLL, + .handle = vgpu_get_handle(g), + }; + int err; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + WARN_ON(err); + return err; +} + +int vgpu_free_user_buffer(struct gk20a *g) +{ + return 0; +} + + +#ifdef CONFIG_NVGPU_FECS_TRACE +int vgpu_fecs_trace_max_entries(struct gk20a *g, + struct nvgpu_gpu_ctxsw_trace_filter *filter) +{ + struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; + + return vcst->header->num_ents; +} + +int vgpu_fecs_trace_set_filter(struct gk20a *g, + struct nvgpu_gpu_ctxsw_trace_filter *filter) +{ + struct tegra_vgpu_cmd_msg msg = { + .cmd = TEGRA_VGPU_CMD_FECS_TRACE_SET_FILTER, + .handle = vgpu_get_handle(g), + }; + struct tegra_vgpu_fecs_trace_filter *p = &msg.params.fecs_trace_filter; + int err; + + (void) memcpy(&p->tag_bits, &filter->tag_bits, sizeof(p->tag_bits)); + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + WARN_ON(err); + return err; +} + +#endif /* CONFIG_NVGPU_FECS_TRACE */ diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/fecs_trace_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/gr/fecs_trace_vgpu.h new file mode 100644 index 000000000..734bb6eed --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/gr/fecs_trace_vgpu.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_FECS_TRACE_VGPU_H +#define NVGPU_FECS_TRACE_VGPU_H + +#include + +struct gk20a; +struct vm_area_struct; +struct nvgpu_gpu_ctxsw_trace_filter; +struct tegra_hv_ivm_cookie; +struct nvgpu_gpu_ctxsw_trace_entry; +struct nvgpu_ctxsw_ring_header_internal; + +struct vgpu_fecs_trace { + struct tegra_hv_ivm_cookie *cookie; + struct nvgpu_ctxsw_ring_header_internal *header; + struct nvgpu_gpu_ctxsw_trace_entry *entries; + int num_entries; + bool enabled; + void *buf; +}; + +void vgpu_fecs_trace_data_update(struct gk20a *g); +int vgpu_fecs_trace_init(struct gk20a *g); +int vgpu_fecs_trace_deinit(struct gk20a *g); +int vgpu_fecs_trace_enable(struct gk20a *g); +int vgpu_fecs_trace_disable(struct gk20a *g); +bool vgpu_fecs_trace_is_enabled(struct gk20a *g); +int vgpu_fecs_trace_poll(struct gk20a *g); +int vgpu_alloc_user_buffer(struct gk20a *g, void **buf, size_t *size); +int vgpu_free_user_buffer(struct gk20a *g); +void vgpu_get_mmap_user_buffer_info(struct gk20a *g, + void **mmapaddr, size_t *mmapsize); +int vgpu_fecs_trace_max_entries(struct gk20a *g, + struct nvgpu_gpu_ctxsw_trace_filter *filter); +int vgpu_fecs_trace_set_filter(struct gk20a *g, + struct nvgpu_gpu_ctxsw_trace_filter *filter); +struct tegra_hv_ivm_cookie *vgpu_fecs_trace_get_ivm(struct gk20a *g); + +#endif /* NVGPU_FECS_TRACE_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c new file mode 100644 index 000000000..a898272e6 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c @@ -0,0 +1,1605 @@ +/* + * Virtualized GPU Graphics + * + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_NVGPU_GRAPHICS +#include +#include +#endif +#include +#include +#include +#include +#include + +#include "gr_vgpu.h" +#include "ctx_vgpu.h" +#include "subctx_vgpu.h" + +#include "common/vgpu/perf/cyclestats_snapshot_vgpu.h" +#include "common/vgpu/ivc/comm_vgpu.h" + +#include "common/gr/gr_config_priv.h" +#include "common/gr/gr_falcon_priv.h" +#include "common/gr/gr_intr_priv.h" +#include "common/gr/ctx_priv.h" +#ifdef CONFIG_NVGPU_GRAPHICS +#include "common/gr/zcull_priv.h" +#include "common/gr/zbc_priv.h" +#endif +#include "common/gr/gr_priv.h" + +static int vgpu_gr_set_ctxsw_preemption_mode(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct vm_gk20a *vm, u32 class, + u32 graphics_preempt_mode, + u32 compute_preempt_mode); +static int vgpu_gr_init_ctxsw_preemption_mode(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct vm_gk20a *vm, + u32 class, + u32 flags); + +void vgpu_gr_detect_sm_arch(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + nvgpu_log_fn(g, " "); + + g->params.sm_arch_sm_version = + priv->constants.sm_arch_sm_version; + g->params.sm_arch_spa_version = + priv->constants.sm_arch_spa_version; + g->params.sm_arch_warp_count = + priv->constants.sm_arch_warp_count; +} + +static int vgpu_gr_commit_inst(struct nvgpu_channel *c, u64 gpu_va) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; + int err; + struct gk20a *g = c->g; + + nvgpu_log_fn(g, " "); + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + err = vgpu_alloc_subctx_header(g, &c->subctx, c->vm, + c->virt_ctx); + if (err != 0) { + return err; + } + } + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_COMMIT_GR_CTX; + msg.handle = vgpu_get_handle(c->g); + p->handle = c->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + if (err || msg.ret) { + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + vgpu_free_subctx_header(g, c->subctx, c->vm, + c->virt_ctx); + } + return -1; + } else { + return 0; + } +} + +static int vgpu_gr_commit_global_ctx_buffers(struct gk20a *g, + struct nvgpu_channel *c, bool patch) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_COMMIT_GR_GLOBAL_CTX; + msg.handle = vgpu_get_handle(g); + p->handle = c->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + return (err || msg.ret) ? -1 : 0; +} + +int vgpu_gr_init_ctx_state(struct gk20a *g, + struct nvgpu_gr_falcon_query_sizes *sizes) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + nvgpu_log_fn(g, " "); + + sizes->golden_image_size = priv->constants.golden_ctx_size; + sizes->pm_ctxsw_image_size = priv->constants.hwpm_ctx_size; + if (!sizes->golden_image_size || + !sizes->pm_ctxsw_image_size) { + return -ENXIO; + } + +#ifdef CONFIG_NVGPU_GRAPHICS + sizes->zcull_image_size = priv->constants.zcull_ctx_size; + if (sizes->zcull_image_size == 0U) { + return -ENXIO; + } +#endif + + sizes->preempt_image_size = + priv->constants.preempt_ctx_size; + if (!sizes->preempt_image_size) { + return -EINVAL; + } + + return 0; +} + +int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g) +{ + struct nvgpu_gr *gr = g->gr; + u32 size; + + nvgpu_log_fn(g, " "); + + gr->global_ctx_buffer = nvgpu_gr_global_ctx_desc_alloc(g); + if (gr->global_ctx_buffer == NULL) { + return -ENOMEM; + } + + size = g->ops.gr.init.get_global_ctx_cb_buffer_size(g); + nvgpu_log_info(g, "cb_buffer_size : %d", size); + + nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_CIRCULAR, size); + + size = g->ops.gr.init.get_global_ctx_pagepool_buffer_size(g); + nvgpu_log_info(g, "pagepool_buffer_size : %d", size); + + nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_PAGEPOOL, size); + + size = g->ops.gr.init.get_global_attr_cb_size(g, + nvgpu_gr_config_get_tpc_count(g->gr->config), + nvgpu_gr_config_get_max_tpc_count(g->gr->config)); + nvgpu_log_info(g, "attr_buffer_size : %u", size); + + nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_ATTRIBUTE, size); + + if (g->ops.gr.init.get_rtv_cb_size != NULL) { + size = g->ops.gr.init.get_rtv_cb_size(g); + nvgpu_log_info(g, "rtv_circular_buffer_size : %u", size); + + nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER, size); + } + + size = NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP_SIZE; + nvgpu_log_info(g, "priv_access_map_size : %d", size); + + nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP, size); + +#ifdef CONFIG_NVGPU_FECS_TRACE + size = nvgpu_gr_fecs_trace_buffer_size(g); + nvgpu_log_info(g, "fecs_trace_buffer_size : %d", size); + + nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER, size); +#endif + return 0; +} + +int vgpu_gr_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, u32 flags) +{ + struct gk20a *g = c->g; + struct nvgpu_gr_ctx *gr_ctx = NULL; + struct nvgpu_gr *gr = g->gr; + struct nvgpu_tsg *tsg = NULL; + int err = 0; + + nvgpu_log_fn(g, " "); + + /* an address space needs to have been bound at this point.*/ + if (!nvgpu_channel_as_bound(c)) { + nvgpu_err(g, "not bound to address space at time" + " of grctx allocation"); + return -EINVAL; + } + + if (!g->ops.gpu_class.is_valid(class_num)) { + nvgpu_err(g, "invalid obj class 0x%x", class_num); + err = -EINVAL; + goto out; + } + c->obj_class = class_num; + + tsg = nvgpu_tsg_from_ch(c); + if (tsg == NULL) { + return -EINVAL; + } + + gr_ctx = tsg->gr_ctx; + + if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_ctx_mem(gr_ctx))) { + tsg->vm = c->vm; + nvgpu_vm_get(tsg->vm); + nvgpu_gr_ctx_set_tsgid(gr_ctx, tsg->tsgid); + err = vgpu_gr_alloc_gr_ctx(g, gr_ctx, c->vm); + if (err) { + nvgpu_err(g, + "fail to allocate TSG gr ctx buffer, err=%d", + err); + nvgpu_vm_put(tsg->vm); + tsg->vm = NULL; + goto out; + } + + /* allocate patch buffer */ + err = vgpu_gr_alloc_patch_ctx(g, gr_ctx, c->vm, c->virt_ctx); + if (err) { + nvgpu_err(g, "fail to allocate patch buffer"); + goto out; + } + + vgpu_gr_init_ctxsw_preemption_mode(g, gr_ctx, + c->vm, + class_num, + flags); + + /* map global buffer to channel gpu_va and commit */ + err = vgpu_gr_map_global_ctx_buffers(g, gr_ctx, + gr->global_ctx_buffer, c->vm, + c->virt_ctx); + if (err) { + nvgpu_err(g, "fail to map global ctx buffer"); + goto out; + } + + err = vgpu_gr_commit_global_ctx_buffers(g, c, true); + if (err) { + nvgpu_err(g, "fail to commit global ctx buffers"); + goto out; + } + + /* commit gr ctx buffer */ + err = vgpu_gr_commit_inst(c, + nvgpu_gr_ctx_get_ctx_mem(gr_ctx)->gpu_va); + if (err) { + nvgpu_err(g, "fail to commit gr ctx buffer"); + goto out; + } + + /* load golden image */ + err = nvgpu_pg_elpg_protected_call(g, + vgpu_gr_load_golden_ctx_image(g, c->virt_ctx)); + if (err) { + nvgpu_err(g, "fail to load golden ctx image"); + goto out; + } + } else { + /* commit gr ctx buffer */ + err = vgpu_gr_commit_inst(c, + nvgpu_gr_ctx_get_ctx_mem(gr_ctx)->gpu_va); + if (err) { + nvgpu_err(g, "fail to commit gr ctx buffer"); + goto out; + } +#ifdef CONFIG_NVGPU_FECS_TRACE + /* for fecs bind channel */ + err = nvgpu_pg_elpg_protected_call(g, + vgpu_gr_load_golden_ctx_image(g, c->virt_ctx)); + if (err) { + nvgpu_err(g, "fail to load golden ctx image"); + goto out; + } +#endif + } + + /* PM ctxt switch is off by default */ + nvgpu_gr_ctx_set_pm_ctx_pm_mode(gr_ctx, + g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()); + + nvgpu_log_fn(g, "done"); + return 0; +out: + /* 1. gr_ctx, patch_ctx and global ctx buffer mapping + can be reused so no need to release them. + 2. golden image load is a one time thing so if + they pass, no need to undo. */ + nvgpu_err(g, "fail"); + return err; +} + +static int vgpu_gr_init_gr_config(struct gk20a *g, struct nvgpu_gr *gr) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + struct nvgpu_gr_config *config; + u32 gpc_index; + u32 sm_per_tpc; + u32 pes_index; + int err = -ENOMEM; + + nvgpu_log_fn(g, " "); + + gr->config = nvgpu_kzalloc(g, sizeof(*gr->config)); + if (gr->config == NULL) { + return -ENOMEM; + } + + config = gr->config; + + config->g = g; + + config->max_gpc_count = priv->constants.max_gpc_count; + config->gpc_count = priv->constants.gpc_count; + config->gpc_mask = priv->constants.gpc_mask; + config->max_tpc_per_gpc_count = priv->constants.max_tpc_per_gpc_count; + + config->max_tpc_count = config->max_gpc_count * config->max_tpc_per_gpc_count; + + config->gpc_tpc_count = nvgpu_kzalloc(g, config->gpc_count * sizeof(u32)); + if (!config->gpc_tpc_count) { + goto cleanup; + } + + config->gpc_tpc_mask = nvgpu_kzalloc(g, config->gpc_count * sizeof(u32)); + if (!config->gpc_tpc_mask) { + goto cleanup; + } + + sm_per_tpc = priv->constants.sm_per_tpc; + gr->config->sm_to_cluster = nvgpu_kzalloc(g, config->gpc_count * + config->max_tpc_per_gpc_count * + sm_per_tpc * + sizeof(struct nvgpu_sm_info)); + if (!gr->config->sm_to_cluster) { + goto cleanup; + } + +#ifdef CONFIG_NVGPU_SM_DIVERSITY + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SM_DIVERSITY)) { + config->sm_to_cluster_redex_config = + nvgpu_kzalloc(g, config->gpc_count * + config->max_tpc_per_gpc_count * + sm_per_tpc * + sizeof(struct nvgpu_sm_info)); + if (config->sm_to_cluster_redex_config == NULL) { + nvgpu_err(g, "sm_to_cluster_redex_config == NULL"); + goto cleanup; + } + } +#endif + + config->tpc_count = 0; + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + config->gpc_tpc_count[gpc_index] = + priv->constants.gpc_tpc_count[gpc_index]; + + config->tpc_count += config->gpc_tpc_count[gpc_index]; + + if (g->ops.gr.config.get_gpc_tpc_mask) { + gr->config->gpc_tpc_mask[gpc_index] = + g->ops.gr.config.get_gpc_tpc_mask(g, + g->gr->config, gpc_index); + } + } + + config->pe_count_per_gpc = + nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); + if (config->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC) { + nvgpu_do_assert_print(g, "too many pes per gpc %u\n", + config->pe_count_per_gpc); + goto cleanup; + } + if (config->pe_count_per_gpc > TEGRA_VGPU_MAX_PES_COUNT_PER_GPC) { + nvgpu_err(g, "pe_count_per_gpc %d is too big!", + config->pe_count_per_gpc); + goto cleanup; + } + + if (config->gpc_ppc_count == NULL) { + config->gpc_ppc_count = nvgpu_kzalloc(g, config->gpc_count * + sizeof(u32)); + } else { + (void) memset(config->gpc_ppc_count, 0, config->gpc_count * + sizeof(u32)); + } + + for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) { + config->gpc_ppc_count[gpc_index] = + priv->constants.gpc_ppc_count[gpc_index]; + + for (pes_index = 0u; pes_index < config->pe_count_per_gpc; + pes_index++) { + u32 pes_tpc_count, pes_tpc_mask; + + if (config->pes_tpc_count[pes_index] == NULL) { + config->pes_tpc_count[pes_index] = nvgpu_kzalloc(g, + config->gpc_count * sizeof(u32)); + config->pes_tpc_mask[pes_index] = nvgpu_kzalloc(g, + config->gpc_count * sizeof(u32)); + if (config->pes_tpc_count[pes_index] == NULL || + config->pes_tpc_mask[pes_index] == NULL) { + goto cleanup; + } + } + + pes_tpc_count = priv->constants. + pes_tpc_count[TEGRA_VGPU_MAX_PES_COUNT_PER_GPC * + gpc_index + pes_index]; + pes_tpc_mask = priv->constants. + pes_tpc_mask[TEGRA_VGPU_MAX_PES_COUNT_PER_GPC * + gpc_index + pes_index]; + config->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count; + config->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask; + } + } + + err = g->ops.gr.config.init_sm_id_table(g, g->gr->config); + if (err) { + goto cleanup; + } + return 0; +cleanup: + nvgpu_err(g, "out of memory"); + + for (pes_index = 0u; pes_index < config->pe_count_per_gpc; pes_index++) { + nvgpu_kfree(g, config->pes_tpc_count[pes_index]); + config->pes_tpc_count[pes_index] = NULL; + nvgpu_kfree(g, config->pes_tpc_mask[pes_index]); + config->pes_tpc_mask[pes_index] = NULL; + } + + nvgpu_kfree(g, config->gpc_ppc_count); + config->gpc_ppc_count = NULL; + + nvgpu_kfree(g, config->gpc_tpc_count); + config->gpc_tpc_count = NULL; + + nvgpu_kfree(g, config->gpc_tpc_mask); + config->gpc_tpc_mask = NULL; + + if (config->sm_to_cluster != NULL) { + nvgpu_kfree(g, config->sm_to_cluster); + config->sm_to_cluster = NULL; + } + +#ifdef CONFIG_NVGPU_SM_DIVERSITY + if (config->sm_to_cluster_redex_config != NULL) { + nvgpu_kfree(g, config->sm_to_cluster_redex_config); + config->sm_to_cluster_redex_config = NULL; + } +#endif + + return err; +} + +#ifdef CONFIG_NVGPU_GRAPHICS +static int vgpu_gr_init_gr_zcull(struct gk20a *g, struct nvgpu_gr *gr, + u32 size) +{ + nvgpu_log_fn(g, " "); + + gr->zcull = nvgpu_kzalloc(g, sizeof(*gr->zcull)); + if (gr->zcull == NULL) { + return -ENOMEM; + } + + gr->zcull->zcull_ctxsw_image_size = size; + + return 0; +} +int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct nvgpu_channel *c, + u64 zcull_va, u32 mode) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_zcull_bind_params *p = &msg.params.zcull_bind; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_ZCULL; + msg.handle = vgpu_get_handle(g); + p->handle = c->virt_ctx; + p->zcull_va = zcull_va; + p->mode = mode; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + return (err || msg.ret) ? -ENOMEM : 0; +} + +int vgpu_gr_get_zcull_info(struct gk20a *g, + struct nvgpu_gr_config *gr_config, + struct nvgpu_gr_zcull *zcull, + struct nvgpu_gr_zcull_info *zcull_params) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_zcull_info_params *p = &msg.params.zcull_info; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_GET_ZCULL_INFO; + msg.handle = vgpu_get_handle(g); + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) { + return -ENOMEM; + } + + zcull_params->width_align_pixels = p->width_align_pixels; + zcull_params->height_align_pixels = p->height_align_pixels; + zcull_params->pixel_squares_by_aliquots = p->pixel_squares_by_aliquots; + zcull_params->aliquot_total = p->aliquot_total; + zcull_params->region_byte_multiplier = p->region_byte_multiplier; + zcull_params->region_header_size = p->region_header_size; + zcull_params->subregion_header_size = p->subregion_header_size; + zcull_params->subregion_width_align_pixels = + p->subregion_width_align_pixels; + zcull_params->subregion_height_align_pixels = + p->subregion_height_align_pixels; + zcull_params->subregion_count = p->subregion_count; + + return 0; +} +#endif + +u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config, + u32 gpc_index) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + return priv->constants.gpc_tpc_mask[gpc_index]; +} + +u32 vgpu_gr_get_max_fbps_count(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + nvgpu_log_fn(g, " "); + + return priv->constants.num_fbps; +} + +u32 vgpu_gr_get_max_ltc_per_fbp(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + nvgpu_log_fn(g, " "); + + return priv->constants.ltc_per_fbp; +} + +u32 vgpu_gr_get_max_lts_per_ltc(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + nvgpu_log_fn(g, " "); + + return priv->constants.max_lts_per_ltc; +} + +#ifdef CONFIG_NVGPU_GRAPHICS +int vgpu_gr_add_zbc(struct gk20a *g, struct nvgpu_gr_zbc *zbc, + struct nvgpu_gr_zbc_entry *zbc_val) +{ + struct tegra_vgpu_cmd_msg msg = {0}; + struct tegra_vgpu_zbc_set_table_params *p = &msg.params.zbc_set_table; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_ZBC_SET_TABLE; + msg.handle = vgpu_get_handle(g); + + p->type = zbc_val->type; + p->format = zbc_val->format; + switch (p->type) { + case NVGPU_GR_ZBC_TYPE_COLOR: + nvgpu_memcpy((u8 *)p->color_ds, (u8 *)zbc_val->color_ds, + sizeof(p->color_ds)); + nvgpu_memcpy((u8 *)p->color_l2, (u8 *)zbc_val->color_l2, + sizeof(p->color_l2)); + break; + case NVGPU_GR_ZBC_TYPE_DEPTH: + p->depth = zbc_val->depth; + break; + default: + return -EINVAL; + } + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + return (err || msg.ret) ? -ENOMEM : 0; +} + +int vgpu_gr_query_zbc(struct gk20a *g, struct nvgpu_gr_zbc *zbc, + struct nvgpu_gr_zbc_query_params *query_params) +{ + struct tegra_vgpu_cmd_msg msg = {0}; + struct tegra_vgpu_zbc_query_table_params *p = + &msg.params.zbc_query_table; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_ZBC_QUERY_TABLE; + msg.handle = vgpu_get_handle(g); + + p->type = query_params->type; + p->index_size = query_params->index_size; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) { + return -ENOMEM; + } + + switch (query_params->type) { + case NVGPU_GR_ZBC_TYPE_COLOR: + nvgpu_memcpy((u8 *)query_params->color_ds, (u8 *)p->color_ds, + sizeof(query_params->color_ds)); + nvgpu_memcpy((u8 *)query_params->color_l2, (u8 *)p->color_l2, + sizeof(query_params->color_l2)); + break; + case NVGPU_GR_ZBC_TYPE_DEPTH: + query_params->depth = p->depth; + break; + case NVGPU_GR_ZBC_TYPE_INVALID: + query_params->index_size = p->index_size; + break; + default: + return -EINVAL; + } + query_params->ref_cnt = p->ref_cnt; + query_params->format = p->format; + + return 0; +} +#endif + +static void vgpu_remove_gr_support(struct gk20a *g) +{ + struct nvgpu_gr *gr = g->gr; + + nvgpu_log_fn(gr->g, " "); + + nvgpu_kfree(gr->g, gr->config->sm_to_cluster); + gr->config->sm_to_cluster = NULL; + +#ifdef CONFIG_NVGPU_SM_DIVERSITY + if (gr->config->sm_to_cluster_redex_config != NULL) { + nvgpu_kfree(g, gr->config->sm_to_cluster_redex_config); + gr->config->sm_to_cluster_redex_config = NULL; + } +#endif + + nvgpu_gr_config_deinit(gr->g, gr->config); + +#ifdef CONFIG_NVGPU_GRAPHICS + nvgpu_gr_zcull_deinit(gr->g, gr->zcull); +#endif + + nvgpu_gr_free(g); +} + +static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) +{ + struct nvgpu_gr *gr = g->gr; + int err; + + nvgpu_log_fn(g, " "); + + if (gr->sw_ready) { + nvgpu_log_fn(g, "skip init"); + return 0; + } + + gr->g = g; + + err = g->ops.gr.falcon.init_ctx_state(g, &gr->falcon->sizes); + if (err) { + goto clean_up; + } + + err = vgpu_gr_init_gr_config(g, gr); + if (err) { + goto clean_up; + } + + err = nvgpu_gr_obj_ctx_init(g, &gr->golden_image, + nvgpu_gr_falcon_get_golden_image_size(g->gr->falcon)); + if (err != 0) { + goto clean_up; + } + +#ifdef CONFIG_NVGPU_DEBUGGER + err = nvgpu_gr_hwpm_map_init(g, &g->gr->hwpm_map, + nvgpu_gr_falcon_get_pm_ctxsw_image_size(g->gr->falcon)); + if (err != 0) { + nvgpu_err(g, "hwpm_map init failed"); + goto clean_up; + } +#endif + +#ifdef CONFIG_NVGPU_GRAPHICS + err = vgpu_gr_init_gr_zcull(g, gr, + nvgpu_gr_falcon_get_zcull_image_size(g->gr->falcon)); + if (err) { + goto clean_up; + } +#endif + + err = vgpu_gr_alloc_global_ctx_buffers(g); + if (err) { + goto clean_up; + } + + gr->gr_ctx_desc = nvgpu_gr_ctx_desc_alloc(g); + if (gr->gr_ctx_desc == NULL) { + goto clean_up; + } + +#ifdef CONFIG_NVGPU_GRAPHICS + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + nvgpu_gr_ctx_set_size(gr->gr_ctx_desc, + NVGPU_GR_CTX_PREEMPT_CTXSW, + nvgpu_gr_falcon_get_preempt_image_size(g->gr->falcon)); + } +#endif + + nvgpu_spinlock_init(&g->gr->intr->ch_tlb_lock); + + gr->remove_support = vgpu_remove_gr_support; + gr->sw_ready = true; + + nvgpu_log_fn(g, "done"); + return 0; + +clean_up: + nvgpu_err(g, "fail"); + vgpu_remove_gr_support(g); + return err; +} + +int vgpu_init_gr_support(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + return vgpu_gr_init_gr_setup_sw(g); +} + +int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info) +{ + struct nvgpu_channel *ch = nvgpu_channel_from_id(g, info->chid); + + nvgpu_log_fn(g, " "); + + if (!ch) { + return 0; + } + + if (info->type != TEGRA_VGPU_GR_INTR_NOTIFY && + info->type != TEGRA_VGPU_GR_INTR_SEMAPHORE) { + nvgpu_err(g, "gr intr (%d) on ch %u", info->type, info->chid); + } + + switch (info->type) { + case TEGRA_VGPU_GR_INTR_NOTIFY: + nvgpu_cond_broadcast_interruptible(&ch->notifier_wq); + break; + case TEGRA_VGPU_GR_INTR_SEMAPHORE: + nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq); + break; + case TEGRA_VGPU_GR_INTR_SEMAPHORE_TIMEOUT: + g->ops.channel.set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT); + break; + case TEGRA_VGPU_GR_INTR_ILLEGAL_NOTIFY: + g->ops.channel.set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY); + case TEGRA_VGPU_GR_INTR_ILLEGAL_METHOD: + break; + case TEGRA_VGPU_GR_INTR_ILLEGAL_CLASS: + g->ops.channel.set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); + break; + case TEGRA_VGPU_GR_INTR_FECS_ERROR: + break; + case TEGRA_VGPU_GR_INTR_CLASS_ERROR: + g->ops.channel.set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); + break; + case TEGRA_VGPU_GR_INTR_FIRMWARE_METHOD: + g->ops.channel.set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); + break; + case TEGRA_VGPU_GR_INTR_EXCEPTION: + g->ops.channel.set_error_notifier(ch, + NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); + break; +#ifdef CONFIG_NVGPU_DEBUGGER + case TEGRA_VGPU_GR_INTR_SM_EXCEPTION: + g->ops.debugger.post_events(ch); + break; +#endif + default: + WARN_ON(1); + break; + } + + nvgpu_channel_put(ch); + return 0; +} + +int vgpu_gr_set_sm_debug_mode(struct gk20a *g, + struct nvgpu_channel *ch, u64 sms, bool enable) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_sm_debug_mode *p = &msg.params.sm_debug_mode; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_SET_SM_DEBUG_MODE; + msg.handle = vgpu_get_handle(g); + p->handle = ch->virt_ctx; + p->sms = sms; + p->enable = (u32)enable; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + + return err ? err : msg.ret; +} + +int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g, + struct nvgpu_tsg *tsg, bool enable) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_SMPC_CTXSW_MODE; + msg.handle = vgpu_get_handle(g); + p->tsg_id = tsg->tsgid; + + if (enable) { + p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; + } else { + p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; + } + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + + return err ? err : msg.ret; +} + +int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, + u32 gr_instance_id, struct nvgpu_tsg *tsg, u64 gpu_va, u32 mode) +{ + struct nvgpu_gr_ctx *gr_ctx; + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode; + int err; + + nvgpu_log_fn(g, " "); + + if (gpu_va) { + nvgpu_err(g, "gpu_va suppose to be allocated by this function."); + return -EINVAL; + } + + gr_ctx = tsg->gr_ctx; + + if (mode == NVGPU_GR_CTX_HWPM_CTXSW_MODE_CTXSW) { + /* + * send command to enable HWPM only once - otherwise server + * will return an error due to using the same GPU VA twice. + */ + + if (nvgpu_gr_ctx_get_pm_ctx_pm_mode(gr_ctx) == + g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw()) { + return 0; + } + p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; + } else if (mode == NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW) { + if (nvgpu_gr_ctx_get_pm_ctx_pm_mode(gr_ctx) == + g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()) { + return 0; + } + p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; + } else if ((mode == NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) && + g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) { + if (nvgpu_gr_ctx_get_pm_ctx_pm_mode(gr_ctx) == + g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) { + return 0; + } + p->mode = TEGRA_VGPU_CTXSW_MODE_STREAM_OUT_CTXSW; + } else { + nvgpu_err(g, "invalid hwpm context switch mode"); + return -EINVAL; + } + + if (mode != NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW) { + /* Allocate buffer if necessary */ + err = vgpu_gr_alloc_pm_ctx(g, tsg->gr_ctx, tsg->vm); + if (err != 0) { + nvgpu_err(g, + "failed to allocate pm ctxt buffer"); + return err; + } + } + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE; + msg.handle = vgpu_get_handle(g); + p->tsg_id = tsg->tsgid; + p->gpu_va = nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx)->gpu_va; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + err = err ? err : msg.ret; + if (!err) { + if (mode == NVGPU_GR_CTX_HWPM_CTXSW_MODE_CTXSW) { + nvgpu_gr_ctx_set_pm_ctx_pm_mode(gr_ctx, + g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw()); + } else if (mode == NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW) { + nvgpu_gr_ctx_set_pm_ctx_pm_mode(gr_ctx, + g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()); + } else { + nvgpu_gr_ctx_set_pm_ctx_pm_mode(gr_ctx, + g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()); + } + } + + return err; +} + +int vgpu_gr_clear_sm_error_state(struct gk20a *g, + struct nvgpu_channel *ch, u32 sm_id) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_clear_sm_error_state *p = + &msg.params.clear_sm_error_state; + struct nvgpu_tsg *tsg; + int err; + + tsg = nvgpu_tsg_from_ch(ch); + if (!tsg) { + return -EINVAL; + } + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + msg.cmd = TEGRA_VGPU_CMD_CLEAR_SM_ERROR_STATE; + msg.handle = vgpu_get_handle(g); + p->handle = ch->virt_ctx; + p->sm_id = sm_id; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + + (void) memset(&tsg->sm_error_states[sm_id], 0, + sizeof(*tsg->sm_error_states)); + nvgpu_mutex_release(&g->dbg_sessions_lock); + + return err ? err : msg.ret; + + + return 0; +} + +static int vgpu_gr_suspend_resume_contexts(struct gk20a *g, + struct dbg_session_gk20a *dbg_s, + int *ctx_resident_ch_fd, u32 cmd) +{ + struct dbg_session_channel_data *ch_data; + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_suspend_resume_contexts *p; + size_t n; + int channel_fd = -1; + int err = 0; + void *handle = NULL; + u16 *oob; + size_t oob_size; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + nvgpu_mutex_acquire(&dbg_s->ch_list_lock); + + handle = vgpu_ivc_oob_get_ptr(vgpu_ivc_get_server_vmid(), + TEGRA_VGPU_QUEUE_CMD, + (void **)&oob, &oob_size); + if (!handle) { + err = -EINVAL; + goto done; + } + + n = 0; + nvgpu_list_for_each_entry(ch_data, &dbg_s->ch_list, + dbg_session_channel_data, ch_entry) { + n++; + } + + if (oob_size < n * sizeof(u16)) { + err = -ENOMEM; + goto done; + } + + msg.cmd = cmd; + msg.handle = vgpu_get_handle(g); + p = &msg.params.suspend_contexts; + p->num_channels = n; + n = 0; + nvgpu_list_for_each_entry(ch_data, &dbg_s->ch_list, + dbg_session_channel_data, ch_entry) { + oob[n++] = (u16)ch_data->chid; + } + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) { + err = -ENOMEM; + goto done; + } + + if (p->resident_chid != (u16)~0) { + nvgpu_list_for_each_entry(ch_data, &dbg_s->ch_list, + dbg_session_channel_data, ch_entry) { + if (ch_data->chid == p->resident_chid) { + channel_fd = ch_data->channel_fd; + break; + } + } + } + +done: + if (handle) { + vgpu_ivc_oob_put_ptr(handle); + } + nvgpu_mutex_release(&dbg_s->ch_list_lock); + nvgpu_mutex_release(&g->dbg_sessions_lock); + *ctx_resident_ch_fd = channel_fd; + return err; +} + +int vgpu_gr_suspend_contexts(struct gk20a *g, + struct dbg_session_gk20a *dbg_s, + int *ctx_resident_ch_fd) +{ + return vgpu_gr_suspend_resume_contexts(g, dbg_s, + ctx_resident_ch_fd, TEGRA_VGPU_CMD_SUSPEND_CONTEXTS); +} + +int vgpu_gr_resume_contexts(struct gk20a *g, + struct dbg_session_gk20a *dbg_s, + int *ctx_resident_ch_fd) +{ + return vgpu_gr_suspend_resume_contexts(g, dbg_s, + ctx_resident_ch_fd, TEGRA_VGPU_CMD_RESUME_CONTEXTS); +} + +void vgpu_gr_handle_sm_esr_event(struct gk20a *g, + struct tegra_vgpu_sm_esr_info *info) +{ + struct nvgpu_tsg_sm_error_state *sm_error_states; + struct nvgpu_tsg *tsg; + u32 no_of_sm = g->ops.gr.init.get_no_of_sm(g); + + if (info->sm_id >= no_of_sm) { + nvgpu_err(g, "invalid smd_id %d / %d", info->sm_id, no_of_sm); + return; + } + + if (info->tsg_id >= g->fifo.num_channels) { + nvgpu_err(g, "invalid tsg_id in sm esr event"); + return; + } + + tsg = nvgpu_tsg_check_and_get_from_id(g, info->tsg_id); + if (tsg == NULL) { + nvgpu_err(g, "invalid tsg"); + return; + } + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + sm_error_states = &tsg->sm_error_states[info->sm_id]; + + sm_error_states->hww_global_esr = info->hww_global_esr; + sm_error_states->hww_warp_esr = info->hww_warp_esr; + sm_error_states->hww_warp_esr_pc = info->hww_warp_esr_pc; + sm_error_states->hww_global_esr_report_mask = + info->hww_global_esr_report_mask; + sm_error_states->hww_warp_esr_report_mask = + info->hww_warp_esr_report_mask; + + nvgpu_mutex_release(&g->dbg_sessions_lock); +} + +int vgpu_gr_init_sm_id_table(struct gk20a *g, struct nvgpu_gr_config *gr_config) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_vsms_mapping_params *p = &msg.params.vsms_mapping; + struct tegra_vgpu_vsms_mapping_entry *entry; + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + struct nvgpu_sm_info *sm_info; + int err; + size_t oob_size; + void *handle = NULL; + u32 sm_id; + u32 max_sm; + u32 sm_config; + + msg.cmd = TEGRA_VGPU_CMD_GET_VSMS_MAPPING; + msg.handle = vgpu_get_handle(g); + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) { + nvgpu_err(g, + "get vsms mapping failed err %d", err); + return err; + } + + handle = vgpu_ivc_oob_get_ptr(vgpu_ivc_get_server_vmid(), + TEGRA_VGPU_QUEUE_CMD, + (void **)&entry, &oob_size); + if (!handle) { + return -EINVAL; + } + + max_sm = gr_config->gpc_count * + gr_config->max_tpc_per_gpc_count * + priv->constants.sm_per_tpc; + if (p->num_sm > max_sm) { + return -EINVAL; + } + + if ((p->num_sm * sizeof(*entry) * + priv->constants.max_sm_diversity_config_count) > oob_size) { + return -EINVAL; + } + + gr_config->no_of_sm = p->num_sm; + for (sm_config = NVGPU_DEFAULT_SM_DIVERSITY_CONFIG; + sm_config < priv->constants.max_sm_diversity_config_count; + sm_config++) { + for (sm_id = 0; sm_id < p->num_sm; sm_id++, entry++) { +#ifdef CONFIG_NVGPU_SM_DIVERSITY + sm_info = + ((sm_config == NVGPU_DEFAULT_SM_DIVERSITY_CONFIG) ? + nvgpu_gr_config_get_sm_info(gr_config, sm_id) : + nvgpu_gr_config_get_redex_sm_info( + gr_config, sm_id)); +#else + sm_info = nvgpu_gr_config_get_sm_info(gr_config, sm_id); +#endif + sm_info->tpc_index = entry->tpc_index; + sm_info->gpc_index = entry->gpc_index; + sm_info->sm_index = entry->sm_index; + sm_info->global_tpc_index = entry->global_tpc_index; + } + } + vgpu_ivc_oob_put_ptr(handle); + + return 0; +} + +int vgpu_gr_update_pc_sampling(struct nvgpu_channel *ch, bool enable) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_channel_update_pc_sampling *p = + &msg.params.update_pc_sampling; + struct gk20a *g; + int err = -EINVAL; + + if (!ch->g) { + return err; + } + g = ch->g; + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_UPDATE_PC_SAMPLING; + msg.handle = vgpu_get_handle(g); + p->handle = ch->virt_ctx; + if (enable) { + p->mode = TEGRA_VGPU_ENABLE_SAMPLING; + } else { + p->mode = TEGRA_VGPU_DISABLE_SAMPLING; + } + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + + return err ? err : msg.ret; +} + +void vgpu_gr_init_cyclestats(struct gk20a *g) +{ +#if defined(CONFIG_NVGPU_CYCLESTATS) + bool snapshots_supported = true; + + /* cyclestats not supported on vgpu */ + nvgpu_set_enabled(g, NVGPU_SUPPORT_CYCLE_STATS, false); + + if (vgpu_css_init(g) != 0) { + snapshots_supported = false; + } + + nvgpu_set_enabled(g, NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT, + snapshots_supported); +#endif +} + +static int vgpu_gr_init_ctxsw_preemption_mode(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct vm_gk20a *vm, + u32 class, + u32 flags) +{ + u32 graphics_preempt_mode = 0; + u32 compute_preempt_mode = 0; + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + int err; + + nvgpu_log_fn(g, " "); + +#ifdef CONFIG_NVGPU_GRAPHICS + if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) { + graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; + } +#endif +#ifdef CONFIG_NVGPU_CILP + if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) { + compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP; + } +#endif + + if (priv->constants.force_preempt_mode && !graphics_preempt_mode && + !compute_preempt_mode) { +#ifdef CONFIG_NVGPU_GRAPHICS + graphics_preempt_mode = g->ops.gpu_class.is_valid_gfx(class) ? + NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP : 0; +#endif + compute_preempt_mode = + g->ops.gpu_class.is_valid_compute(class) ? + NVGPU_PREEMPTION_MODE_COMPUTE_CTA : 0; + } + + if (graphics_preempt_mode || compute_preempt_mode) { + err = vgpu_gr_set_ctxsw_preemption_mode(g, gr_ctx, vm, + class, graphics_preempt_mode, compute_preempt_mode); + if (err) { + nvgpu_err(g, + "set_ctxsw_preemption_mode failed"); + return err; + } + } + + nvgpu_log_fn(g, "done"); + return 0; +} + +static int vgpu_gr_set_ctxsw_preemption_mode(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct vm_gk20a *vm, u32 class, + u32 graphics_preempt_mode, + u32 compute_preempt_mode) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_gr_bind_ctxsw_buffers_params *p = + &msg.params.gr_bind_ctxsw_buffers; + int err = 0; + +#ifdef CONFIG_NVGPU_GRAPHICS + if (g->ops.gpu_class.is_valid_gfx(class) && + g->gr->gr_ctx_desc->force_preemption_gfxp) { + graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; + } +#endif + +#ifdef CONFIG_NVGPU_CILP + if (g->ops.gpu_class.is_valid_compute(class) && + g->gr->gr_ctx_desc->force_preemption_cilp) { + compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP; + } +#endif + + /* check for invalid combinations */ + if ((graphics_preempt_mode == 0) && (compute_preempt_mode == 0)) { + return -EINVAL; + } + +#if defined(CONFIG_NVGPU_CILP) && defined(CONFIG_NVGPU_GRAPHICS) + if ((graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) && + (compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP)) { + return -EINVAL; + } +#endif + + /* set preemption modes */ + switch (graphics_preempt_mode) { +#ifdef CONFIG_NVGPU_GRAPHICS + case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP: + { + u32 spill_size = g->ops.gr.init.get_ctx_spill_size(g); + u32 pagepool_size = g->ops.gr.init.get_ctx_pagepool_size(g); + u32 betacb_size = g->ops.gr.init.get_ctx_betacb_size(g); + u32 attrib_cb_size = + g->ops.gr.init.get_ctx_attrib_cb_size(g, betacb_size, + nvgpu_gr_config_get_tpc_count(g->gr->config), + nvgpu_gr_config_get_max_tpc_count(g->gr->config)); + u32 rtv_cb_size; + struct nvgpu_mem *desc; + + nvgpu_log_info(g, "gfxp context preempt size=%d", + g->gr->falcon->sizes.preempt_image_size); + nvgpu_log_info(g, "gfxp context spill size=%d", spill_size); + nvgpu_log_info(g, "gfxp context pagepool size=%d", pagepool_size); + nvgpu_log_info(g, "gfxp context attrib cb size=%d", + attrib_cb_size); + + nvgpu_gr_ctx_set_size(g->gr->gr_ctx_desc, + NVGPU_GR_CTX_SPILL_CTXSW, spill_size); + nvgpu_gr_ctx_set_size(g->gr->gr_ctx_desc, + NVGPU_GR_CTX_BETACB_CTXSW, attrib_cb_size); + nvgpu_gr_ctx_set_size(g->gr->gr_ctx_desc, + NVGPU_GR_CTX_PAGEPOOL_CTXSW, pagepool_size); + + if (g->ops.gr.init.get_gfxp_rtv_cb_size != NULL) { + rtv_cb_size = g->ops.gr.init.get_gfxp_rtv_cb_size(g); + nvgpu_gr_ctx_set_size(g->gr->gr_ctx_desc, + NVGPU_GR_CTX_GFXP_RTVCB_CTXSW, rtv_cb_size); + } + + err = nvgpu_gr_ctx_alloc_ctxsw_buffers(g, gr_ctx, + g->gr->gr_ctx_desc, vm); + if (err != 0) { + nvgpu_err(g, "cannot allocate ctxsw buffers"); + goto fail; + } + + desc = nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx); + p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_MAIN] = desc->gpu_va; + p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_MAIN] = desc->size; + + desc = nvgpu_gr_ctx_get_spill_ctxsw_buffer(gr_ctx); + p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_SPILL] = desc->gpu_va; + p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_SPILL] = desc->size; + + desc = nvgpu_gr_ctx_get_pagepool_ctxsw_buffer(gr_ctx); + p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_PAGEPOOL] = + desc->gpu_va; + p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_PAGEPOOL] = desc->size; + + desc = nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx); + p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_BETACB] = + desc->gpu_va; + p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_BETACB] = desc->size; + + desc = nvgpu_gr_ctx_get_gfxp_rtvcb_ctxsw_buffer(gr_ctx); + p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_RTVCB] = + desc->gpu_va; + p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_RTVCB] = desc->size; + + nvgpu_gr_ctx_init_graphics_preemption_mode(gr_ctx, + NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP); + p->mode = TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_GFX_GFXP; + break; + } +#endif + case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI: + nvgpu_gr_ctx_init_graphics_preemption_mode(gr_ctx, + graphics_preempt_mode); + break; + + default: + break; + } + + if (g->ops.gpu_class.is_valid_compute(class)) { + switch (compute_preempt_mode) { + case NVGPU_PREEMPTION_MODE_COMPUTE_WFI: + nvgpu_gr_ctx_init_compute_preemption_mode(gr_ctx, + NVGPU_PREEMPTION_MODE_COMPUTE_WFI); + p->mode = TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_WFI; + break; + case NVGPU_PREEMPTION_MODE_COMPUTE_CTA: + nvgpu_gr_ctx_init_compute_preemption_mode(gr_ctx, + NVGPU_PREEMPTION_MODE_COMPUTE_CTA); + p->mode = + TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_COMPUTE_CTA; + break; +#ifdef CONFIG_NVGPU_CILP + case NVGPU_PREEMPTION_MODE_COMPUTE_CILP: + nvgpu_gr_ctx_init_compute_preemption_mode(gr_ctx, + NVGPU_PREEMPTION_MODE_COMPUTE_CILP); + p->mode = + TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_COMPUTE_CILP; + break; +#endif + default: + break; + } + } + + if ( +#ifdef CONFIG_NVGPU_GRAPHICS + (nvgpu_gr_ctx_get_graphics_preemption_mode(gr_ctx) != 0U) || +#endif + (nvgpu_gr_ctx_get_compute_preemption_mode(gr_ctx) != 0U)) { + msg.cmd = TEGRA_VGPU_CMD_BIND_GR_CTXSW_BUFFERS; + msg.handle = vgpu_get_handle(g); + p->tsg_id = nvgpu_gr_ctx_get_tsgid(gr_ctx); + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) { + err = -ENOMEM; + goto fail; + } + } + + return err; + +fail: + nvgpu_err(g, "%s failed %d", __func__, err); + return err; +} + +int vgpu_gr_set_preemption_mode(struct nvgpu_channel *ch, + u32 graphics_preempt_mode, u32 compute_preempt_mode, + u32 gr_instance_id) +{ + struct nvgpu_gr_ctx *gr_ctx; + struct gk20a *g = ch->g; + struct nvgpu_tsg *tsg; + struct vm_gk20a *vm; + u32 class; + int err; + + class = ch->obj_class; + if (!class) { + return -EINVAL; + } + + tsg = nvgpu_tsg_from_ch(ch); + if (!tsg) { + return -EINVAL; + } + + vm = tsg->vm; + gr_ctx = tsg->gr_ctx; + +#ifdef CONFIG_NVGPU_GRAPHICS + /* skip setting anything if both modes are already set */ + if (graphics_preempt_mode && + (graphics_preempt_mode == + nvgpu_gr_ctx_get_graphics_preemption_mode(gr_ctx))) { + graphics_preempt_mode = 0; + } +#endif + + if (compute_preempt_mode && + (compute_preempt_mode == + nvgpu_gr_ctx_get_compute_preemption_mode(gr_ctx))) { + compute_preempt_mode = 0; + } + + if (graphics_preempt_mode == 0 && compute_preempt_mode == 0) { + return 0; + } + + err = vgpu_gr_set_ctxsw_preemption_mode(g, gr_ctx, vm, class, + graphics_preempt_mode, + compute_preempt_mode); + if (err) { + nvgpu_err(g, "set_ctxsw_preemption_mode failed"); + return err; + } + + return err; +} + +u32 vgpu_gr_get_max_gpc_count(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + return priv->constants.max_gpc_count; +} + +u32 vgpu_gr_get_gpc_count(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + return priv->constants.gpc_count; +} + +u32 vgpu_gr_get_gpc_mask(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + return priv->constants.gpc_mask; +} + +#ifdef CONFIG_NVGPU_DEBUGGER + +u64 vgpu_gr_gk20a_tpc_enabled_exceptions(struct gk20a *g) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_get_tpc_exception_en_status_params *p = + &msg.params.get_tpc_exception_status; + u64 tpc_exception_en = 0U; + int err = 0; + + msg.cmd = TEGRA_VGPU_CMD_GET_TPC_EXCEPTION_EN_STATUS; + msg.handle = vgpu_get_handle(g); + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) { + nvgpu_err(g, + "get tpc enabled exception failed err %d", err); + return err; + } + + tpc_exception_en = p->tpc_exception_en_sm_mask; + return tpc_exception_en; +} + +int vgpu_gr_set_mmu_debug_mode(struct gk20a *g, + struct nvgpu_channel *ch, bool enable) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_gr_set_mmu_debug_mode_params *p = + &msg.params.gr_set_mmu_debug_mode; + int err; + + msg.cmd = TEGRA_VGPU_CMD_GR_SET_MMU_DEBUG_MODE; + msg.handle = vgpu_get_handle(g); + p->ch_handle = ch->virt_ctx; + p->enable = enable ? 1U : 0U; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err != 0 ? err : msg.ret; + if (err != 0) { + nvgpu_err(g, + "gr set mmu debug mode failed err %d", err); + } + + return err; +} + +#endif diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h new file mode 100644 index 000000000..5a2fe2d7a --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_VGPU_H +#define NVGPU_GR_VGPU_H + +#include + +struct gk20a; +struct nvgpu_channel; +struct gr_gk20a; +#ifdef CONFIG_NVGPU_GRAPHICS +struct nvgpu_gr_zcull_info; +struct nvgpu_gr_zcull; +struct nvgpu_gr_zbc; +struct nvgpu_gr_zbc_entry; +struct nvgpu_gr_zbc_query_params; +#endif +struct dbg_session_gk20a; +struct nvgpu_tsg; +struct vm_gk20a; +struct nvgpu_gr_ctx; +struct tegra_vgpu_gr_intr_info; +struct tegra_vgpu_sm_esr_info; +struct nvgpu_gr_falcon_query_sizes; + +void vgpu_gr_detect_sm_arch(struct gk20a *g); +int vgpu_gr_init_ctx_state(struct gk20a *g, + struct nvgpu_gr_falcon_query_sizes *sizes); +int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g); +void vgpu_gr_free_channel_ctx(struct nvgpu_channel *c, bool is_tsg); +void vgpu_gr_free_tsg_ctx(struct nvgpu_tsg *tsg); +int vgpu_gr_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, u32 flags); +u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config, + u32 gpc_index); +u32 vgpu_gr_get_max_fbps_count(struct gk20a *g); +u32 vgpu_gr_get_max_ltc_per_fbp(struct gk20a *g); +u32 vgpu_gr_get_max_lts_per_ltc(struct gk20a *g); +#ifdef CONFIG_NVGPU_GRAPHICS +int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct nvgpu_channel *c, + u64 zcull_va, u32 mode); +int vgpu_gr_get_zcull_info(struct gk20a *g, + struct nvgpu_gr_config *gr_config, + struct nvgpu_gr_zcull *zcull, + struct nvgpu_gr_zcull_info *zcull_params); +int vgpu_gr_add_zbc(struct gk20a *g, struct nvgpu_gr_zbc *zbc, + struct nvgpu_gr_zbc_entry *zbc_val); +int vgpu_gr_query_zbc(struct gk20a *g, struct nvgpu_gr_zbc *zbc, + struct nvgpu_gr_zbc_query_params *query_params); +#endif +int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g, + struct nvgpu_tsg *tsg, bool enable); +int vgpu_gr_set_sm_debug_mode(struct gk20a *g, + struct nvgpu_channel *ch, u64 sms, bool enable); +int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, + u32 gr_instance_id, struct nvgpu_tsg *tsg, u64 gpu_va, u32 mode); +int vgpu_gr_clear_sm_error_state(struct gk20a *g, + struct nvgpu_channel *ch, u32 sm_id); +int vgpu_gr_suspend_contexts(struct gk20a *g, + struct dbg_session_gk20a *dbg_s, + int *ctx_resident_ch_fd); +int vgpu_gr_resume_contexts(struct gk20a *g, + struct dbg_session_gk20a *dbg_s, + int *ctx_resident_ch_fd); +int vgpu_gr_init_sm_id_table(struct gk20a *g, + struct nvgpu_gr_config *gr_config); +int vgpu_gr_update_pc_sampling(struct nvgpu_channel *ch, bool enable); +void vgpu_gr_init_cyclestats(struct gk20a *g); +int vgpu_gr_set_preemption_mode(struct nvgpu_channel *ch, + u32 graphics_preempt_mode, u32 compute_preempt_mode, + u32 gr_instance_id); +int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info); +void vgpu_gr_handle_sm_esr_event(struct gk20a *g, + struct tegra_vgpu_sm_esr_info *info); +int vgpu_init_gr_support(struct gk20a *g); +u32 vgpu_gr_get_max_gpc_count(struct gk20a *g); +u32 vgpu_gr_get_gpc_count(struct gk20a *g); +u32 vgpu_gr_get_gpc_mask(struct gk20a *g); +#ifdef CONFIG_NVGPU_DEBUGGER +u64 vgpu_gr_gk20a_tpc_enabled_exceptions(struct gk20a *g); +int vgpu_gr_set_mmu_debug_mode(struct gk20a *g, + struct nvgpu_channel *ch, bool enable); +#endif + +#endif /* NVGPU_GR_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/subctx_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/subctx_vgpu.c new file mode 100644 index 000000000..754119c52 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/gr/subctx_vgpu.c @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include "common/gr/subctx_priv.h" + +#include "subctx_vgpu.h" +#include "common/vgpu/ivc/comm_vgpu.h" + +int vgpu_alloc_subctx_header(struct gk20a *g, + struct nvgpu_gr_subctx **gr_subctx, + struct vm_gk20a *vm, u64 virt_ctx) +{ + struct nvgpu_gr_subctx *subctx; + struct nvgpu_mem *ctxheader; + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_alloc_ctx_header_params *p = + &msg.params.alloc_ctx_header; + int err; + + subctx = nvgpu_kzalloc(g, sizeof(*subctx)); + if (subctx == NULL) { + return -ENOMEM; + } + + ctxheader = &subctx->ctx_header; + + msg.cmd = TEGRA_VGPU_CMD_ALLOC_CTX_HEADER; + msg.handle = vgpu_get_handle(g); + p->ch_handle = virt_ctx; + p->ctx_header_va = nvgpu_vm_alloc_va(vm, + g->ops.gr.ctxsw_prog.hw_get_fecs_header_size(), + GMMU_PAGE_SIZE_KERNEL); + if (p->ctx_header_va == 0U) { + nvgpu_err(g, "alloc va failed for ctx_header"); + err = -ENOMEM; + goto fail; + } + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (unlikely(err != 0)) { + nvgpu_err(g, "alloc ctx_header failed err %d", err); + nvgpu_vm_free_va(vm, p->ctx_header_va, + GMMU_PAGE_SIZE_KERNEL); + goto fail; + } + ctxheader->gpu_va = p->ctx_header_va; + + *gr_subctx = subctx; + return 0; + +fail: + nvgpu_kfree(g, subctx); + return err; +} + +void vgpu_free_subctx_header(struct gk20a *g, struct nvgpu_gr_subctx *subctx, + struct vm_gk20a *vm, u64 virt_ctx) +{ + struct nvgpu_mem *ctxheader; + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_free_ctx_header_params *p = + &msg.params.free_ctx_header; + int err; + + if (subctx != NULL) { + ctxheader = &subctx->ctx_header; + + msg.cmd = TEGRA_VGPU_CMD_FREE_CTX_HEADER; + msg.handle = vgpu_get_handle(g); + p->ch_handle = virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (unlikely(err != 0)) { + nvgpu_err(g, "free ctx_header failed err %d", err); + } + nvgpu_vm_free_va(vm, ctxheader->gpu_va, + GMMU_PAGE_SIZE_KERNEL); + ctxheader->gpu_va = 0; + nvgpu_kfree(g, subctx); + } +} + +void vgpu_gr_setup_free_subctx(struct nvgpu_channel *c) +{ + vgpu_free_subctx_header(c->g, c->subctx, c->vm, c->virt_ctx); +} diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/subctx_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/gr/subctx_vgpu.h new file mode 100644 index 000000000..c3ae21c07 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/gr/subctx_vgpu.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_SUBCTX_VGPU_H +#define NVGPU_SUBCTX_VGPU_H + +struct gk20a; +struct nvgpu_gr_subctx; +struct vm_gk20a; +struct nvgpu_channel; + +int vgpu_alloc_subctx_header(struct gk20a *g, + struct nvgpu_gr_subctx **gr_subctx, + struct vm_gk20a *vm, u64 virt_ctx); +void vgpu_free_subctx_header(struct gk20a *g, struct nvgpu_gr_subctx *subctx, + struct vm_gk20a *vm, u64 virt_ctx); +void vgpu_gr_setup_free_subctx(struct nvgpu_channel *c); + +#endif /* NVGPU_SUBCTX_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c new file mode 100644 index 000000000..2e81ebb20 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c @@ -0,0 +1,271 @@ +/* + * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "init_vgpu.h" +#include "hal/vgpu/init/init_hal_vgpu.h" +#include "common/vgpu/gr/fecs_trace_vgpu.h" +#include "common/vgpu/fifo/fifo_vgpu.h" +#include "common/vgpu/mm/mm_vgpu.h" +#include "common/vgpu/gr/gr_vgpu.h" +#include "common/vgpu/fbp/fbp_vgpu.h" +#include "common/vgpu/ivc/comm_vgpu.h" + +u64 vgpu_connect(void) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_connect_params *p = &msg.params.connect; + int err; + + msg.cmd = TEGRA_VGPU_CMD_CONNECT; + p->module = TEGRA_VGPU_MODULE_GPU; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + return (err || msg.ret) ? 0 : p->handle; +} + +void vgpu_remove_support_common(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + struct tegra_vgpu_intr_msg msg; + int err; + +#ifdef CONFIG_NVGPU_DEBUGGER + if (g->dbg_regops_tmp_buf) { + nvgpu_kfree(g, g->dbg_regops_tmp_buf); + } +#endif + + nvgpu_gr_remove_support(g); + + if (g->ops.grmgr.remove_gr_manager != NULL) { + if (g->ops.grmgr.remove_gr_manager(g) != 0) { + nvgpu_err(g, "g->ops.grmgr.remove_gr_manager-failed"); + } + } + + if (g->fifo.remove_support) { + g->fifo.remove_support(&g->fifo); + } + + nvgpu_pmu_remove_support(g, g->pmu); + + if (g->mm.remove_support) { + g->mm.remove_support(&g->mm); + } + +#if defined(CONFIG_NVGPU_CYCLESTATS) + nvgpu_free_cyclestats_snapshot_data(g); +#endif + + nvgpu_fbp_remove_support(g); + + msg.event = TEGRA_VGPU_EVENT_ABORT; + err = vgpu_ivc_send(vgpu_ivc_get_peer_self(), TEGRA_VGPU_QUEUE_INTR, + &msg, sizeof(msg)); + WARN_ON(err); + nvgpu_thread_stop(&priv->intr_handler); + + nvgpu_clk_arb_cleanup_arbiter(g); + + nvgpu_mutex_destroy(&g->clk_arb_enable_lock); + nvgpu_mutex_destroy(&priv->vgpu_clk_get_freq_lock); + + nvgpu_kfree(g, priv->freqs); +} + +int vgpu_init_gpu_characteristics(struct gk20a *g) +{ + int err; + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + nvgpu_log_fn(g, " "); + + err = nvgpu_init_gpu_characteristics(g); + if (err != 0) { + nvgpu_err(g, "failed to init GPU characteristics"); + return err; + } + + /* features vgpu does not support */ + nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, false); + nvgpu_set_enabled(g, NVGPU_SUPPORT_RESCHEDULE_RUNLIST, false); + nvgpu_set_enabled(g, NVGPU_SUPPORT_SPARSE_ALLOCS, false); + nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false); + nvgpu_set_enabled(g, NVGPU_SUPPORT_SM_TTU, priv->constants.support_sm_ttu != 0U); + nvgpu_set_enabled(g, NVGPU_SUPPORT_GET_GR_CONTEXT, false); + + /* per-device identifier */ + g->per_device_identifier = priv->constants.per_device_identifier; + + return 0; +} + +int vgpu_get_constants(struct gk20a *g) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_constants_params *p; + void *oob_handle; + size_t oob_size; + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + int err; + + nvgpu_log_fn(g, " "); + + oob_handle = vgpu_ivc_oob_get_ptr(vgpu_ivc_get_server_vmid(), + TEGRA_VGPU_QUEUE_CMD, + (void **)&p, &oob_size); + if (!oob_handle || oob_size < sizeof(*p)) { + return -EINVAL; + } + + msg.cmd = TEGRA_VGPU_CMD_GET_CONSTANTS; + msg.handle = vgpu_get_handle(g); + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + + if (unlikely(err)) { + nvgpu_err(g, "%s failed, err=%d", __func__, err); + goto fail; + } + + nvgpu_smp_rmb(); + + if (unlikely(p->gpc_count > TEGRA_VGPU_MAX_GPC_COUNT || + p->max_tpc_per_gpc_count > TEGRA_VGPU_MAX_TPC_COUNT_PER_GPC)) { + nvgpu_err(g, "gpc_count %d max_tpc_per_gpc %d overflow", + (int)p->gpc_count, (int)p->max_tpc_per_gpc_count); + err = -EINVAL; + goto fail; + } + + priv->constants = *p; +fail: + vgpu_ivc_oob_put_ptr(oob_handle); + return err; +} + +int vgpu_finalize_poweron_common(struct gk20a *g) +{ + int err; + + nvgpu_log_fn(g, " "); + + vgpu_detect_chip(g); + err = vgpu_init_hal(g); + if (err != 0) { + return err; + } + + err = nvgpu_device_init(g); + if (err != 0) { + nvgpu_err(g, "failed to init devices"); + return err; + } + + err = nvgpu_init_ltc_support(g); + if (err != 0) { + nvgpu_err(g, "failed to init ltc"); + return err; + } + + err = vgpu_init_mm_support(g); + if (err != 0) { + nvgpu_err(g, "failed to init gk20a mm"); + return err; + } + + err = nvgpu_fifo_init_support(g); + if (err != 0) { + nvgpu_err(g, "failed to init gk20a fifo"); + return err; + } + + err = vgpu_fbp_init_support(g); + if (err != 0) { + nvgpu_err(g, "failed to init gk20a fbp"); + return err; + } + + err = g->ops.grmgr.init_gr_manager(g); + if (err != 0) { + nvgpu_err(g, "failed to init gk20a grmgr"); + return err; + } + + err = nvgpu_gr_alloc(g); + if (err != 0) { + nvgpu_err(g, "couldn't allocate gr memory"); + return err; + } + + err = vgpu_init_gr_support(g); + if (err != 0) { + nvgpu_err(g, "failed to init gk20a gr"); + return err; + } + + err = nvgpu_clk_arb_init_arbiter(g); + if (err != 0) { + nvgpu_err(g, "failed to init clk arb"); + return err; + } + +#ifdef CONFIG_NVGPU_COMPRESSION + err = nvgpu_cbc_init_support(g); + if (err != 0) { + nvgpu_err(g, "failed to init cbc"); + return err; + } +#endif + + err = g->ops.chip_init_gpu_characteristics(g); + if (err != 0) { + nvgpu_err(g, "failed to init GPU characteristics"); + return err; + } + + err = g->ops.channel.resume_all_serviceable_ch(g); + if (err != 0) { + nvgpu_err(g, "Failed to resume channels"); + return err; + } + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.h new file mode 100644 index 000000000..b3d058c09 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef INIT_VGPU_H +#define INIT_VGPU_H + +struct gk20a; + +u64 vgpu_connect(void); +void vgpu_remove_support_common(struct gk20a *g); +int vgpu_init_gpu_characteristics(struct gk20a *g); +int vgpu_get_constants(struct gk20a *g); +int vgpu_finalize_poweron_common(struct gk20a *g); + +#endif diff --git a/drivers/gpu/nvgpu/common/vgpu/intr/intr_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/intr/intr_vgpu.c new file mode 100644 index 000000000..a41f8c9d4 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/intr/intr_vgpu.c @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include + +#include "intr_vgpu.h" +#include "common/vgpu/gr/fecs_trace_vgpu.h" +#include "common/vgpu/fifo/fifo_vgpu.h" +#include "common/vgpu/fifo/channel_vgpu.h" +#include "common/vgpu/fifo/tsg_vgpu.h" +#include "common/vgpu/mm/mm_vgpu.h" +#include "common/vgpu/gr/gr_vgpu.h" + +int vgpu_intr_thread(void *dev_id) +{ + struct gk20a *g = dev_id; + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + while (true) { + struct tegra_vgpu_intr_msg *msg; + u32 sender; + void *handle; + size_t size; + int err; + + err = vgpu_ivc_recv(TEGRA_VGPU_QUEUE_INTR, &handle, + (void **)&msg, &size, &sender); + if (err == -ETIME) { + continue; + } + if (err != 0) { + nvgpu_do_assert_print(g, + "Unexpected vgpu_ivc_recv err=%d", err); + continue; + } + + if (msg->event == TEGRA_VGPU_EVENT_ABORT) { + vgpu_ivc_release(handle); + break; + } + + switch (msg->event) { + case TEGRA_VGPU_EVENT_INTR: + if (msg->unit == TEGRA_VGPU_INTR_GR) { + vgpu_gr_isr(g, &msg->info.gr_intr); + } else if (msg->unit == TEGRA_VGPU_INTR_FIFO) { + vgpu_fifo_isr(g, &msg->info.fifo_intr); + } + break; +#ifdef CONFIG_NVGPU_FECS_TRACE + case TEGRA_VGPU_EVENT_FECS_TRACE: + vgpu_fecs_trace_data_update(g); + break; +#endif + case TEGRA_VGPU_EVENT_CHANNEL: + vgpu_tsg_handle_event(g, &msg->info.channel_event); + break; + case TEGRA_VGPU_EVENT_SM_ESR: + vgpu_gr_handle_sm_esr_event(g, &msg->info.sm_esr); + break; + case TEGRA_VGPU_EVENT_SEMAPHORE_WAKEUP: + g->ops.semaphore_wakeup(g, + !!msg->info.sem_wakeup.post_events); + break; + case TEGRA_VGPU_EVENT_CHANNEL_CLEANUP: + vgpu_channel_abort_cleanup(g, + msg->info.ch_cleanup.chid); + break; + case TEGRA_VGPU_EVENT_SET_ERROR_NOTIFIER: + vgpu_channel_set_error_notifier(g, + &msg->info.set_error_notifier); + break; + default: + nvgpu_err(g, "unknown event %u", msg->event); + break; + } + + vgpu_ivc_release(handle); + } + + while (!nvgpu_thread_should_stop(&priv->intr_handler)) { + nvgpu_msleep(10); + } + return 0; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/intr/intr_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/intr/intr_vgpu.h new file mode 100644 index 000000000..d067b8987 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/intr/intr_vgpu.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef INTR_VGPU_H +#define INTR_VGPU_H + +int vgpu_intr_thread(void *dev_id); + +#endif diff --git a/drivers/gpu/nvgpu/common/vgpu/ivc/comm_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/ivc/comm_vgpu.c new file mode 100644 index 000000000..c2863a00c --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/ivc/comm_vgpu.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "comm_vgpu.h" + +int vgpu_comm_init(struct gk20a *g) +{ + size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES }; + + return vgpu_ivc_init(g, 3, queue_sizes, TEGRA_VGPU_QUEUE_CMD, + ARRAY_SIZE(queue_sizes)); +} + +void vgpu_comm_deinit(void) +{ + size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES }; + + vgpu_ivc_deinit(TEGRA_VGPU_QUEUE_CMD, ARRAY_SIZE(queue_sizes)); +} + +int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in, + size_t size_out) +{ + void *handle; + size_t size = size_in; + void *data = msg; + int err; + + err = vgpu_ivc_sendrecv(vgpu_ivc_get_server_vmid(), + TEGRA_VGPU_QUEUE_CMD, &handle, &data, &size); + if (err == 0) { + WARN_ON(size < size_out); + nvgpu_memcpy((u8 *)msg, (u8 *)data, size_out); + vgpu_ivc_release(handle); + } + + return err; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/ivc/comm_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/ivc/comm_vgpu.h new file mode 100644 index 000000000..699b3ba09 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/ivc/comm_vgpu.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef COMM_VGPU_H +#define COMM_VGPU_H + +struct gk20a; +struct tegra_vgpu_cmd_msg; + +int vgpu_comm_init(struct gk20a *g); +void vgpu_comm_deinit(void); +int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in, + size_t size_out); + +#endif diff --git a/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c new file mode 100644 index 000000000..c8a369707 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c @@ -0,0 +1,102 @@ +/* + * Virtualized GPU L2 + * + * Copyright (c) 2014-2021 NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "common/vgpu/ivc/comm_vgpu.h" +#include "ltc_vgpu.h" + +u64 vgpu_determine_L2_size_bytes(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + nvgpu_log_fn(g, " "); + + return priv->constants.l2_size; +} + +void vgpu_ltc_init_fs_state(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + struct nvgpu_ltc *ltc = g->ltc; + + nvgpu_log_fn(g, " "); + + ltc->ltc_count = priv->constants.ltc_count; + ltc->cacheline_size = priv->constants.cacheline_size; + ltc->slices_per_ltc = priv->constants.slices_per_ltc; +} + +#ifdef CONFIG_NVGPU_DEBUGGER + +int vgpu_ltc_get_max_ways_evict_last(struct gk20a *g, struct nvgpu_tsg *tsg, + u32 *num_ways) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_l2_max_ways_evict_last_params *p = + &msg.params.l2_max_ways_evict_last; + int err; + + msg.cmd = TEGRA_VGPU_CMD_TSG_GET_L2_MAX_WAYS_EVICT_LAST; + msg.handle = vgpu_get_handle(g); + p->tsg_id = tsg->tsgid; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + + if (unlikely(err)) { + nvgpu_err(g, "failed to get L2 max ways evict last, err %d", + err); + } else { + *num_ways = p->num_ways; + } + + return err; +} + +int vgpu_ltc_set_max_ways_evict_last(struct gk20a *g, struct nvgpu_tsg *tsg, + u32 num_ways) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_l2_max_ways_evict_last_params *p = + &msg.params.l2_max_ways_evict_last; + int err; + + msg.cmd = TEGRA_VGPU_CMD_TSG_SET_L2_MAX_WAYS_EVICT_LAST; + msg.handle = vgpu_get_handle(g); + p->tsg_id = tsg->tsgid; + p->num_ways = num_ways; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + + if (unlikely(err)) { + nvgpu_err(g, "failed to set L2 max ways evict last, err %d", + err); + } + + return err; +} + +#endif diff --git a/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.h new file mode 100644 index 000000000..3588a539b --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_LTC_VGPU_H +#define NVGPU_LTC_VGPU_H + +struct gk20a; +struct gr_gk20a; + +u64 vgpu_determine_L2_size_bytes(struct gk20a *g); +void vgpu_ltc_init_fs_state(struct gk20a *g); + +#ifdef CONFIG_NVGPU_DEBUGGER +int vgpu_ltc_get_max_ways_evict_last(struct gk20a *g, struct nvgpu_tsg *tsg, + u32 *num_ways); +int vgpu_ltc_set_max_ways_evict_last(struct gk20a *g, struct nvgpu_tsg *tsg, + u32 num_ways); +#endif + +#endif /* NVGPU_LTC_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.c new file mode 100644 index 000000000..baab36505 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.c @@ -0,0 +1,395 @@ +/* + * Virtualized GPU Memory Management + * + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mm_vgpu.h" +#include "common/vgpu/ivc/comm_vgpu.h" + +static int vgpu_init_mm_setup_sw(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + + nvgpu_log_fn(g, " "); + + if (mm->sw_ready) { + nvgpu_log_fn(g, "skip init"); + return 0; + } + + nvgpu_mutex_init(&mm->tlb_lock); + nvgpu_mutex_init(&mm->priv_lock); + + mm->g = g; + + /*TBD: make channel vm size configurable */ + g->ops.mm.get_default_va_sizes(NULL, &mm->channel.user_size, + &mm->channel.kernel_size); + + nvgpu_log_info(g, "channel vm size: user %dMB kernel %dMB", + (int)(mm->channel.user_size >> 20), + (int)(mm->channel.kernel_size >> 20)); + + mm->sw_ready = true; + + return 0; +} + +int vgpu_init_mm_support(struct gk20a *g) +{ + int err; + + nvgpu_log_fn(g, " "); + + err = vgpu_init_mm_setup_sw(g); + if (err) { + return err; + } + + return err; +} + +void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm, + u64 vaddr, + u64 size, + u32 pgsz_idx, + bool va_allocated, + enum gk20a_mem_rw_flag rw_flag, + bool sparse, + struct vm_gk20a_mapping_batch *batch) +{ + struct gk20a *g = gk20a_from_vm(vm); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_as_map_params *p = &msg.params.as_map; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_AS_UNMAP; + msg.handle = vgpu_get_handle(g); + p->handle = vm->handle; + p->gpu_va = vaddr; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) { + nvgpu_err(g, "failed to update gmmu ptes on unmap"); + } + + if (va_allocated) { + nvgpu_vm_free_va(vm, vaddr, pgsz_idx); + } + /* TLB invalidate handled on server side */ +} + +int vgpu_vm_bind_channel(struct vm_gk20a *vm, + struct nvgpu_channel *ch) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_as_bind_share_params *p = &msg.params.as_bind_share; + int err; + struct gk20a *g = ch->g; + + nvgpu_log_fn(g, " "); + + ch->vm = vm; + msg.cmd = TEGRA_VGPU_CMD_AS_BIND_SHARE; + msg.handle = vgpu_get_handle(ch->g); + p->as_handle = vm->handle; + p->chan_handle = ch->virt_ctx; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + if (err || msg.ret) { + ch->vm = NULL; + err = -ENOMEM; + } + + if (ch->vm) { + nvgpu_vm_get(ch->vm); + } + + return err; +} + +static int vgpu_cache_maint(u64 handle, u8 op) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_cache_maint_params *p = &msg.params.cache_maint; + int err; + + msg.cmd = TEGRA_VGPU_CMD_CACHE_MAINT; + msg.handle = handle; + p->op = op; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); + return err; +} + +int vgpu_mm_fb_flush(struct gk20a *g) +{ + + nvgpu_log_fn(g, " "); + + return vgpu_cache_maint(vgpu_get_handle(g), TEGRA_VGPU_FB_FLUSH); +} + +void vgpu_mm_l2_invalidate(struct gk20a *g) +{ + + nvgpu_log_fn(g, " "); + + (void) vgpu_cache_maint(vgpu_get_handle(g), TEGRA_VGPU_L2_MAINT_INV); +} + +int vgpu_mm_l2_flush(struct gk20a *g, bool invalidate) +{ + u8 op; + + nvgpu_log_fn(g, " "); + + if (invalidate) { + op = TEGRA_VGPU_L2_MAINT_FLUSH_INV; + } else { + op = TEGRA_VGPU_L2_MAINT_FLUSH; + } + + return vgpu_cache_maint(vgpu_get_handle(g), op); +} + +int vgpu_mm_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb) +{ + nvgpu_log_fn(g, " "); + + nvgpu_err(g, "call to RM server not supported"); + return 0; +} + +#ifdef CONFIG_NVGPU_DEBUGGER +void vgpu_mm_mmu_set_debug_mode(struct gk20a *g, bool enable) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_mmu_debug_mode *p = &msg.params.mmu_debug_mode; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_SET_MMU_DEBUG_MODE; + msg.handle = vgpu_get_handle(g); + p->enable = (u32)enable; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); +} +#endif + +static inline int add_mem_desc(struct tegra_vgpu_mem_desc *mem_desc, + u64 addr, u64 size, size_t *oob_size) +{ + if (*oob_size < sizeof(*mem_desc)) { + return -ENOMEM; + } + + mem_desc->addr = addr; + mem_desc->length = size; + *oob_size -= sizeof(*mem_desc); + return 0; +} + +u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, + u64 map_offset, + struct nvgpu_sgt *sgt, + u64 buffer_offset, + u64 size, + u32 pgsz_idx, + u8 kind_v, + u32 ctag_offset, + u32 flags, + enum gk20a_mem_rw_flag rw_flag, + bool clear_ctags, + bool sparse, + bool priv, + struct vm_gk20a_mapping_batch *batch, + enum nvgpu_aperture aperture) +{ + int err = 0; + struct gk20a *g = gk20a_from_vm(vm); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_as_map_ex_params *p = &msg.params.as_map_ex; + struct tegra_vgpu_mem_desc *mem_desc; + u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; + u64 buffer_size = NVGPU_ALIGN(size, SZ_4K); + u64 space_to_skip = buffer_offset; + u32 mem_desc_count = 0, i; + void *handle = NULL; + size_t oob_size; + u8 prot; + void *sgl; + + nvgpu_log_fn(g, " "); + + /* FIXME: add support for sparse mappings */ + + if (!sgt) { + nvgpu_do_assert_print(g, "NULL SGT"); + return 0; + } + + if (nvgpu_iommuable(g)) { + nvgpu_do_assert_print(g, "MM should not be IOMMU-able"); + return 0; + } + + if (space_to_skip & (page_size - 1)) { + return 0; + } + + (void) memset(&msg, 0, sizeof(msg)); + + /* Allocate (or validate when map_offset != 0) the virtual address. */ + if (!map_offset) { + map_offset = nvgpu_vm_alloc_va(vm, size, pgsz_idx); + if (!map_offset) { + nvgpu_err(g, "failed to allocate va space"); + err = -ENOMEM; + goto fail; + } + } + + handle = vgpu_ivc_oob_get_ptr(vgpu_ivc_get_server_vmid(), + TEGRA_VGPU_QUEUE_CMD, + (void **)&mem_desc, &oob_size); + if (!handle) { + err = -EINVAL; + goto fail; + } + sgl = sgt->sgl; + + /* Align size to page size */ + size = NVGPU_ALIGN(size, page_size); + + while (sgl) { + u64 phys_addr; + u64 chunk_length; + + /* + * Cut out sgl ents for space_to_skip. + */ + if (space_to_skip && + space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) { + space_to_skip -= nvgpu_sgt_get_length(sgt, sgl); + sgl = nvgpu_sgt_get_next(sgt, sgl); + continue; + } + + phys_addr = nvgpu_sgt_get_phys(g, sgt, sgl) + space_to_skip; + chunk_length = min(size, + nvgpu_sgt_get_length(sgt, sgl) - space_to_skip); + + if (add_mem_desc(&mem_desc[mem_desc_count++], phys_addr, + chunk_length, &oob_size)) { + err = -ENOMEM; + goto fail; + } + + space_to_skip = 0; + size -= chunk_length; + sgl = nvgpu_sgt_get_next(sgt, sgl); + + if (size == 0) { + break; + } + } + + if (rw_flag == gk20a_mem_flag_read_only) { + prot = TEGRA_VGPU_MAP_PROT_READ_ONLY; + } else if (rw_flag == gk20a_mem_flag_write_only) { + prot = TEGRA_VGPU_MAP_PROT_WRITE_ONLY; + } else { + prot = TEGRA_VGPU_MAP_PROT_NONE; + } + + if (pgsz_idx == GMMU_PAGE_SIZE_KERNEL) { + pgsz_idx = GMMU_PAGE_SIZE_SMALL; + } + + msg.cmd = TEGRA_VGPU_CMD_AS_MAP_EX; + msg.handle = vgpu_get_handle(g); + p->handle = vm->handle; + p->gpu_va = map_offset; + p->size = buffer_size; + p->mem_desc_count = mem_desc_count; + nvgpu_assert(pgsz_idx <= U32(U8_MAX)); + p->pgsz_idx = U8(pgsz_idx); + p->iova = 0; + p->kind = kind_v; + if (flags & NVGPU_VM_MAP_CACHEABLE) { + p->flags = TEGRA_VGPU_MAP_CACHEABLE; + } + if (flags & NVGPU_VM_MAP_IO_COHERENT) { + p->flags |= TEGRA_VGPU_MAP_IO_COHERENT; + } + if (flags & NVGPU_VM_MAP_L3_ALLOC) { + p->flags |= TEGRA_VGPU_MAP_L3_ALLOC; + } + if (flags & NVGPU_VM_MAP_PLATFORM_ATOMIC) { + p->flags |= TEGRA_VGPU_MAP_PLATFORM_ATOMIC; + } + + p->prot = prot; + p->ctag_offset = ctag_offset; + p->clear_ctags = clear_ctags; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) { + goto fail; + } + + /* TLB invalidate handled on server side */ + + vgpu_ivc_oob_put_ptr(handle); + return map_offset; +fail: + if (handle) { + vgpu_ivc_oob_put_ptr(handle); + } + nvgpu_err(g, "Failed: err=%d, msg.ret=%d", err, msg.ret); + nvgpu_err(g, + " Map: %-5s GPU virt %#-12llx +%#-9llx " + "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " + "kind=%#02x APT=%-6s", + vm->name, map_offset, buffer_size, buffer_offset, + vm->gmmu_page_sizes[pgsz_idx] >> 10, + nvgpu_gmmu_perm_str(rw_flag), + kind_v, "SYSMEM"); + for (i = 0; i < mem_desc_count; i++) { + nvgpu_err(g, " > 0x%010llx + 0x%llx", + mem_desc[i].addr, mem_desc[i].length); + } + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.h new file mode 100644 index 000000000..b0f66bf11 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_MM_VGPU_H +#define NVGPU_MM_VGPU_H + +struct nvgpu_mem; +struct nvgpu_channel; +struct vm_gk20a_mapping_batch; +struct gk20a_as_share; +struct vm_gk20a; +enum gk20a_mem_rw_flag; +struct nvgpu_sgt; +enum nvgpu_aperture; + +void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm, + u64 vaddr, + u64 size, + u32 pgsz_idx, + bool va_allocated, + enum gk20a_mem_rw_flag rw_flag, + bool sparse, + struct vm_gk20a_mapping_batch *batch); +int vgpu_vm_bind_channel(struct vm_gk20a *vm, + struct nvgpu_channel *ch); +int vgpu_mm_fb_flush(struct gk20a *g); +void vgpu_mm_l2_invalidate(struct gk20a *g); +int vgpu_mm_l2_flush(struct gk20a *g, bool invalidate); +int vgpu_mm_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb); +#ifdef CONFIG_NVGPU_DEBUGGER +void vgpu_mm_mmu_set_debug_mode(struct gk20a *g, bool enable); +#endif +u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, + u64 map_offset, + struct nvgpu_sgt *sgt, + u64 buffer_offset, + u64 size, + u32 pgsz_idx, + u8 kind_v, + u32 ctag_offset, + u32 flags, + enum gk20a_mem_rw_flag rw_flag, + bool clear_ctags, + bool sparse, + bool priv, + struct vm_gk20a_mapping_batch *batch, + enum nvgpu_aperture aperture); +int vgpu_init_mm_support(struct gk20a *g); + +#endif /* NVGPU_MM_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/mm/vm_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/mm/vm_vgpu.c new file mode 100644 index 000000000..ad123ac7c --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/mm/vm_vgpu.c @@ -0,0 +1,81 @@ +/* + * Virtualized GPU VM + * + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "common/vgpu/ivc/comm_vgpu.h" + +/* + * This is called by the common VM init routine to handle vGPU specifics of + * intializing a VM on a vGPU. This alone is not enough to init a VM. See + * nvgpu_vm_init(). + */ +int vgpu_vm_as_alloc_share(struct gk20a *g, struct vm_gk20a *vm) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_as_share_params *p = &msg.params.as_share; + int err; + + msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE; + msg.handle = vgpu_get_handle(g); + p->size = vm->va_limit; + p->big_page_size = vm->big_page_size; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err || msg.ret) { + return -ENOMEM; + } + + vm->handle = p->handle; + + return 0; +} + +/* + * Similar to vgpu_vm_as_alloc_share() this is called as part of the cleanup + * path for VMs. This alone is not enough to remove a VM - + * see nvgpu_vm_remove(). + */ +void vgpu_vm_as_free_share(struct vm_gk20a *vm) +{ + struct gk20a *g = gk20a_from_vm(vm); + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_as_share_params *p = &msg.params.as_share; + int err; + + msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; + msg.handle = vgpu_get_handle(g); + p->handle = vm->handle; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + WARN_ON(err || msg.ret); +} diff --git a/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.c new file mode 100644 index 000000000..a0ec94c4c --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.c @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cyclestats_snapshot_vgpu.h" +#include "common/vgpu/ivc/comm_vgpu.h" + +int vgpu_css_init(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + struct tegra_hv_ivm_cookie *cookie; + u32 mempool; + int err; + + err = nvgpu_dt_read_u32_index(g, "mempool-css", 1, &mempool); + if (err) { + nvgpu_err(g, "dt missing mempool-css"); + return err; + } + + cookie = vgpu_ivm_mempool_reserve(mempool); + if ((cookie == NULL) || + ((unsigned long)cookie >= (unsigned long)-MAX_ERRNO)) { + nvgpu_err(g, "mempool %u reserve failed", mempool); + return -EINVAL; + } + + priv->css_cookie = cookie; + + return 0; +} + +u32 vgpu_css_get_buffer_size(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + nvgpu_log_fn(g, " "); + + if (NULL == priv->css_cookie) { + return 0U; + } + + return vgpu_ivm_get_size(priv->css_cookie); +} + +static int vgpu_css_init_snapshot_buffer(struct gk20a *g) +{ + struct gk20a_cs_snapshot *data = g->cs_data; + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + void *buf = NULL; + int err; + u64 size; + + nvgpu_log_fn(g, " "); + + if (data->hw_snapshot) { + return 0; + } + + if (NULL == priv->css_cookie) { + return -EINVAL; + } + + size = vgpu_ivm_get_size(priv->css_cookie); + /* Make sure buffer size is large enough */ + if (size < CSS_MIN_HW_SNAPSHOT_SIZE) { + nvgpu_info(g, "mempool size 0x%llx too small", size); + err = -ENOMEM; + goto fail; + } + + buf = vgpu_ivm_mempool_map(priv->css_cookie); + if (!buf) { + nvgpu_info(g, "vgpu_ivm_mempool_map failed"); + err = -EINVAL; + goto fail; + } + + data->hw_snapshot = buf; + data->hw_end = data->hw_snapshot + + size / sizeof(struct gk20a_cs_snapshot_fifo_entry); + data->hw_get = data->hw_snapshot; + (void) memset(data->hw_snapshot, 0xff, size); + return 0; +fail: + return err; +} + +void vgpu_css_release_snapshot_buffer(struct gk20a *g) +{ + struct gk20a_cs_snapshot *data = g->cs_data; + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + if (!data->hw_snapshot) { + return; + } + + vgpu_ivm_mempool_unmap(priv->css_cookie, data->hw_snapshot); + data->hw_snapshot = NULL; + + nvgpu_log_info(g, "cyclestats(vgpu): buffer for snapshots released\n"); +} + +int vgpu_css_flush_snapshots(struct nvgpu_channel *ch, + u32 *pending, bool *hw_overflow) +{ + struct gk20a *g = ch->g; + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_channel_cyclestats_snapshot_params *p; + struct gk20a_cs_snapshot *data = g->cs_data; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT; + msg.handle = vgpu_get_handle(g); + p = &msg.params.cyclestats_snapshot; + p->handle = ch->virt_ctx; + p->subcmd = TEGRA_VGPU_CYCLE_STATS_SNAPSHOT_CMD_FLUSH; + p->buf_info = (uintptr_t)data->hw_get - (uintptr_t)data->hw_snapshot; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + err = (err || msg.ret) ? -1 : 0; + + *pending = p->buf_info; + *hw_overflow = p->hw_overflow; + + return err; +} + +static int vgpu_css_attach(struct nvgpu_channel *ch, + struct gk20a_cs_snapshot_client *cs_client) +{ + struct gk20a *g = ch->g; + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_channel_cyclestats_snapshot_params *p = + &msg.params.cyclestats_snapshot; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT; + msg.handle = vgpu_get_handle(g); + p->handle = ch->virt_ctx; + p->subcmd = TEGRA_VGPU_CYCLE_STATS_SNAPSHOT_CMD_ATTACH; + p->perfmon_count = cs_client->perfmon_count; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) { + nvgpu_err(g, "failed"); + } else { + cs_client->perfmon_start = p->perfmon_start; + } + + return err; +} + +int vgpu_css_detach(struct nvgpu_channel *ch, + struct gk20a_cs_snapshot_client *cs_client) +{ + struct gk20a *g = ch->g; + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_channel_cyclestats_snapshot_params *p = + &msg.params.cyclestats_snapshot; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT; + msg.handle = vgpu_get_handle(g); + p->handle = ch->virt_ctx; + p->subcmd = TEGRA_VGPU_CYCLE_STATS_SNAPSHOT_CMD_DETACH; + p->perfmon_start = cs_client->perfmon_start; + p->perfmon_count = cs_client->perfmon_count; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) { + nvgpu_err(g, "failed"); + } + + return err; +} + +int vgpu_css_enable_snapshot_buffer(struct nvgpu_channel *ch, + struct gk20a_cs_snapshot_client *cs_client) +{ + int ret; + + ret = vgpu_css_attach(ch, cs_client); + if (ret) { + return ret; + } + + ret = vgpu_css_init_snapshot_buffer(ch->g); + return ret; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.h new file mode 100644 index 000000000..24b86bed8 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CSS_VGPU_H +#define NVGPU_CSS_VGPU_H + +#include + +struct gk20a; +struct nvgpu_channel; +struct gk20a_cs_snapshot_client; + +int vgpu_css_init(struct gk20a *g); +void vgpu_css_release_snapshot_buffer(struct gk20a *g); +int vgpu_css_flush_snapshots(struct nvgpu_channel *ch, + u32 *pending, bool *hw_overflow); +int vgpu_css_detach(struct nvgpu_channel *ch, + struct gk20a_cs_snapshot_client *cs_client); +int vgpu_css_enable_snapshot_buffer(struct nvgpu_channel *ch, + struct gk20a_cs_snapshot_client *cs_client); +u32 vgpu_css_get_buffer_size(struct gk20a *g); +#endif /* NVGPU_CSS_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/perf/perf_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/perf/perf_vgpu.c new file mode 100644 index 000000000..ef9b0a1d2 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/perf/perf_vgpu.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include "perf_vgpu.h" +#include "common/vgpu/ivc/comm_vgpu.h" + +static int vgpu_sendrecv_perfbuf_cmd(struct gk20a *g, u64 offset, u32 size) +{ + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = mm->perfbuf.vm; + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_perfbuf_mgt_params *p = + &msg.params.perfbuf_management; + int err; + + msg.cmd = TEGRA_VGPU_CMD_PERFBUF_MGT; + msg.handle = vgpu_get_handle(g); + + p->vm_handle = vm->handle; + p->offset = offset; + p->size = size; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + return err; +} + +int vgpu_perfbuffer_enable(struct gk20a *g, u64 offset, u32 size) +{ + return vgpu_sendrecv_perfbuf_cmd(g, offset, size); +} + +int vgpu_perfbuffer_disable(struct gk20a *g) +{ + return vgpu_sendrecv_perfbuf_cmd(g, 0, 0); +} + +static int vgpu_sendrecv_perfbuf_inst_block_cmd(struct gk20a *g, u32 mode) +{ + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = mm->perfbuf.vm; + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_perfbuf_inst_block_mgt_params *p = + &msg.params.perfbuf_inst_block_management; + int err; + + msg.cmd = TEGRA_VGPU_CMD_PERFBUF_INST_BLOCK_MGT; + msg.handle = vgpu_get_handle(g); + + p->vm_handle = vm->handle; + p->mode = mode; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + return err; +} + +int vgpu_perfbuffer_init_inst_block(struct gk20a *g) +{ + return vgpu_sendrecv_perfbuf_inst_block_cmd(g, + TEGRA_VGPU_PROF_PERFBUF_INST_BLOCK_INIT); +} + +void vgpu_perfbuffer_deinit_inst_block(struct gk20a *g) +{ + vgpu_sendrecv_perfbuf_inst_block_cmd(g, + TEGRA_VGPU_PROF_PERFBUF_INST_BLOCK_DEINIT); +} + +int vgpu_perf_update_get_put(struct gk20a *g, u64 bytes_consumed, + bool update_available_bytes, u64 *put_ptr, + bool *overflowed) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_perf_update_get_put_params *p = + &msg.params.perf_updat_get_put; + int err; + + msg.cmd = TEGRA_VGPU_CMD_PERF_UPDATE_GET_PUT; + msg.handle = vgpu_get_handle(g); + + p->bytes_consumed = bytes_consumed; + p->update_available_bytes = (u8)update_available_bytes; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + + if (err == 0) { + if (put_ptr != NULL) { + *put_ptr = p->put_ptr; + } + if (overflowed != NULL) { + *overflowed = (bool)p->overflowed; + } + } + + return err; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/perf/perf_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/perf/perf_vgpu.h new file mode 100644 index 000000000..6737d2a35 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/perf/perf_vgpu.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_PERF_VGPU_H +#define NVGPU_PERF_VGPU_H + +struct gk20a; + +int vgpu_perfbuffer_enable(struct gk20a *g, u64 offset, u32 size); +int vgpu_perfbuffer_disable(struct gk20a *g); + +int vgpu_perfbuffer_init_inst_block(struct gk20a *g); +void vgpu_perfbuffer_deinit_inst_block(struct gk20a *g); + +int vgpu_perf_update_get_put(struct gk20a *g, u64 bytes_consumed, + bool update_available_bytes, u64 *put_ptr, + bool *overflowed); + +#endif diff --git a/drivers/gpu/nvgpu/common/vgpu/pm_reservation_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/pm_reservation_vgpu.c new file mode 100644 index 000000000..25d7d3b7b --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/pm_reservation_vgpu.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "pm_reservation_vgpu.h" +#include "common/vgpu/ivc/comm_vgpu.h" + +int vgpu_pm_reservation_acquire(struct gk20a *g, u32 reservation_id, + enum nvgpu_profiler_pm_resource_type pm_resource, + enum nvgpu_profiler_pm_reservation_scope scope, + u32 vmid) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_prof_mgt_params *p = &msg.params.prof_management; + int err = 0; + + msg.cmd = TEGRA_VGPU_CMD_PROF_MGT; + msg.handle = vgpu_get_handle(g); + + p->mode = TEGRA_VGPU_PROF_PM_RESERVATION_ACQUIRE; + p->reservation_id = reservation_id; + p->pm_resource = pm_resource; + p->scope = scope; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + return err; +} + +int vgpu_pm_reservation_release(struct gk20a *g, u32 reservation_id, + enum nvgpu_profiler_pm_resource_type pm_resource, + u32 vmid) +{ + struct tegra_vgpu_cmd_msg msg; + struct tegra_vgpu_prof_mgt_params *p = &msg.params.prof_management; + int err = 0; + + msg.cmd = TEGRA_VGPU_CMD_PROF_MGT; + msg.handle = vgpu_get_handle(g); + + p->mode = TEGRA_VGPU_PROF_PM_RESERVATION_RELEASE; + p->reservation_id = reservation_id; + p->pm_resource = pm_resource; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + return err; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/pm_reservation_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/pm_reservation_vgpu.h new file mode 100644 index 000000000..e7f9103ba --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/pm_reservation_vgpu.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_PM_RESERVATION_VGPU_H +#define NVGPU_PM_RESERVATION_VGPU_H + +#include + +struct gk20a; +enum nvgpu_profiler_pm_resource_type; +enum nvgpu_profiler_pm_reservation_scope; + +int vgpu_pm_reservation_acquire(struct gk20a *g, u32 reservation_id, + enum nvgpu_profiler_pm_resource_type pm_resource, + enum nvgpu_profiler_pm_reservation_scope scope, + u32 vmid); +int vgpu_pm_reservation_release(struct gk20a *g, u32 reservation_id, + enum nvgpu_profiler_pm_resource_type pm_resource, + u32 vmid); + +#endif /* NVGPU_PM_RESERVATION_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/profiler/profiler_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/profiler/profiler_vgpu.c new file mode 100644 index 000000000..f943482b8 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/profiler/profiler_vgpu.c @@ -0,0 +1,200 @@ +/* + * Tegra GPU Virtualization Interfaces to Server + * + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "common/vgpu/ivc/comm_vgpu.h" +#include "profiler_vgpu.h" + +int vgpu_profiler_bind_hwpm(struct gk20a *g, + u32 gr_instance_id, + bool is_ctxsw, + struct nvgpu_tsg *tsg) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_prof_bind_unbind_params *p = + &msg.params.prof_bind_unbind; + int err; + + nvgpu_assert(gr_instance_id == 0U); + + if (is_ctxsw) { + err = g->ops.gr.update_hwpm_ctxsw_mode(g, gr_instance_id, + tsg, 0, NVGPU_GR_CTX_HWPM_CTXSW_MODE_CTXSW); + if (err != 0) { + return err; + } + } + + msg.cmd = TEGRA_VGPU_CMD_PROF_BIND_UNBIND; + msg.handle = vgpu_get_handle(g); + + p->subcmd = TEGRA_VGPU_PROF_BIND_HWPM; + p->is_ctxsw = is_ctxsw; + p->tsg_id = tsg != NULL ? tsg->tsgid : NVGPU_INVALID_TSG_ID; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + return err; +} + +int vgpu_profiler_unbind_hwpm(struct gk20a *g, + u32 gr_instance_id, + bool is_ctxsw, + struct nvgpu_tsg *tsg) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_prof_bind_unbind_params *p = + &msg.params.prof_bind_unbind; + int err; + + nvgpu_assert(gr_instance_id == 0U); + + msg.cmd = TEGRA_VGPU_CMD_PROF_BIND_UNBIND; + msg.handle = vgpu_get_handle(g); + + p->subcmd = TEGRA_VGPU_PROF_UNBIND_HWPM; + p->is_ctxsw = is_ctxsw; + p->tsg_id = tsg != NULL ? tsg->tsgid : NVGPU_INVALID_TSG_ID; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + return err; +} + +int vgpu_profiler_bind_hwpm_streamout(struct gk20a *g, + u32 gr_instance_id, + bool is_ctxsw, + struct nvgpu_tsg *tsg, + u64 pma_buffer_va, + u32 pma_buffer_size, + u64 pma_bytes_available_buffer_va) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_prof_bind_unbind_params *p = + &msg.params.prof_bind_unbind; + int err; + + nvgpu_assert(gr_instance_id == 0U); + + if (is_ctxsw) { + err = g->ops.gr.update_hwpm_ctxsw_mode(g, gr_instance_id, + tsg, 0, + NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW); + if (err != 0) { + return err; + } + } + + msg.cmd = TEGRA_VGPU_CMD_PROF_BIND_UNBIND; + msg.handle = vgpu_get_handle(g); + + p->subcmd = TEGRA_VGPU_PROF_BIND_HWPM_STREAMOUT; + p->is_ctxsw = is_ctxsw; + p->tsg_id = tsg != NULL ? tsg->tsgid : NVGPU_INVALID_TSG_ID; + p->pma_buffer_va = pma_buffer_va; + p->pma_buffer_size = pma_buffer_size; + p->pma_bytes_available_buffer_va = pma_bytes_available_buffer_va; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + return err; +} + +int vgpu_profiler_unbind_hwpm_streamout(struct gk20a *g, + u32 gr_instance_id, + bool is_ctxsw, + struct nvgpu_tsg *tsg, + void *pma_bytes_available_buffer_cpuva, + bool smpc_reserved) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_prof_bind_unbind_params *p = + &msg.params.prof_bind_unbind; + int err; + + nvgpu_assert(gr_instance_id == 0U); + + msg.cmd = TEGRA_VGPU_CMD_PROF_BIND_UNBIND; + msg.handle = vgpu_get_handle(g); + + p->subcmd = TEGRA_VGPU_PROF_UNBIND_HWPM_STREAMOUT; + p->is_ctxsw = is_ctxsw; + p->tsg_id = tsg != NULL ? tsg->tsgid : NVGPU_INVALID_TSG_ID; + p->smpc_reserved = (u8)smpc_reserved; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + return err; +} + +int vgpu_profiler_bind_smpc(struct gk20a *g, + u32 gr_instance_id, + bool is_ctxsw, + struct nvgpu_tsg *tsg) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_prof_bind_unbind_params *p = + &msg.params.prof_bind_unbind; + int err; + + nvgpu_assert(gr_instance_id == 0U); + + msg.cmd = TEGRA_VGPU_CMD_PROF_BIND_UNBIND; + msg.handle = vgpu_get_handle(g); + + p->subcmd = TEGRA_VGPU_PROF_BIND_SMPC; + p->is_ctxsw = is_ctxsw; + p->tsg_id = tsg != NULL ? tsg->tsgid : NVGPU_INVALID_TSG_ID; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + return err; +} + +int vgpu_profiler_unbind_smpc(struct gk20a *g, + bool is_ctxsw, + struct nvgpu_tsg *tsg) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_prof_bind_unbind_params *p = + &msg.params.prof_bind_unbind; + int err; + + msg.cmd = TEGRA_VGPU_CMD_PROF_BIND_UNBIND; + msg.handle = vgpu_get_handle(g); + + p->subcmd = TEGRA_VGPU_PROF_UNBIND_SMPC; + p->is_ctxsw = is_ctxsw; + p->tsg_id = tsg != NULL ? tsg->tsgid : NVGPU_INVALID_TSG_ID; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + return err; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/profiler/profiler_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/profiler/profiler_vgpu.h new file mode 100644 index 000000000..ce95fb6bf --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/profiler/profiler_vgpu.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_PROFILER_VGPU_H +#define NVGPU_PROFILER_VGPU_H + +int vgpu_profiler_bind_hwpm(struct gk20a *g, + u32 gr_instance_id, + bool is_ctxsw, + struct nvgpu_tsg *tsg); + +int vgpu_profiler_unbind_hwpm(struct gk20a *g, + u32 gr_instance_id, + bool is_ctxsw, + struct nvgpu_tsg *tsg); + +int vgpu_profiler_bind_hwpm_streamout(struct gk20a *g, + u32 gr_instance_id, + bool is_ctxsw, + struct nvgpu_tsg *tsg, + u64 pma_buffer_va, + u32 pma_buffer_size, + u64 pma_bytes_available_buffer_va); + +int vgpu_profiler_unbind_hwpm_streamout(struct gk20a *g, + u32 gr_instance_id, + bool is_ctxsw, + struct nvgpu_tsg *tsg, + void *pma_bytes_available_buffer_cpuva, + bool smpc_reserved); + +int vgpu_profiler_bind_smpc(struct gk20a *g, + u32 gr_instance_id, + bool is_ctxsw, + struct nvgpu_tsg *tsg); + +int vgpu_profiler_unbind_smpc(struct gk20a *g, + bool is_ctxsw, + struct nvgpu_tsg *tsg); + +#endif diff --git a/drivers/gpu/nvgpu/common/vgpu/ptimer/ptimer_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/ptimer/ptimer_vgpu.c new file mode 100644 index 000000000..867259826 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/ptimer/ptimer_vgpu.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include +#include + +#include "ptimer_vgpu.h" +#include "common/vgpu/ivc/comm_vgpu.h" + +int vgpu_read_ptimer(struct gk20a *g, u64 *value) +{ + struct tegra_vgpu_cmd_msg msg = {0}; + struct tegra_vgpu_read_ptimer_params *p = &msg.params.read_ptimer; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_READ_PTIMER; + msg.handle = vgpu_get_handle(g); + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err == 0) { + *value = p->time; + } else { + nvgpu_err(g, "vgpu read ptimer failed, err=%d", err); + } + + return err; +} + +int vgpu_get_timestamps_zipper(struct gk20a *g, + u32 source_id, u32 count, + struct nvgpu_cpu_time_correlation_sample *samples) +{ + struct tegra_vgpu_cmd_msg msg = {0}; + struct tegra_vgpu_get_timestamps_zipper_params *p = + &msg.params.get_timestamps_zipper; + int err; + u32 i; + + nvgpu_log_fn(g, " "); + + if (count > TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT) { + nvgpu_err(g, "count %u overflow", count); + return -EINVAL; + } + + msg.cmd = TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER; + msg.handle = vgpu_get_handle(g); + p->source_id = TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_SRC_ID_TSC; + p->count = count; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err != 0) { + nvgpu_err(g, "vgpu get timestamps zipper failed, err=%d", err); + return err; + } + + for (i = 0; i < count; i++) { + samples[i].cpu_timestamp = p->samples[i].cpu_timestamp; + samples[i].gpu_timestamp = p->samples[i].gpu_timestamp; + } + + return err; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/ptimer/ptimer_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/ptimer/ptimer_vgpu.h new file mode 100644 index 000000000..5f91be836 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/ptimer/ptimer_vgpu.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef PTIMER_VGPU_H +#define PTIMER_VGPU_H + +struct gk20a; +struct nvgpu_cpu_time_correlation_sample; + +int vgpu_read_ptimer(struct gk20a *g, u64 *value); +int vgpu_get_timestamps_zipper(struct gk20a *g, + u32 source_id, u32 count, + struct nvgpu_cpu_time_correlation_sample *samples); + +#endif diff --git a/drivers/gpu/nvgpu/common/vgpu/top/top_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/top/top_vgpu.c new file mode 100644 index 000000000..82d4ad311 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/top/top_vgpu.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include + +#include "top_vgpu.h" + +/* + * Similar to how the real HW version works, just read a device out of the vGPU + * device list one at a time. The core device management code will manage the + * actual device lists for us. + */ +struct nvgpu_device *vgpu_top_parse_next_dev(struct gk20a *g, u32 *token) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + struct tegra_vgpu_engines_info *engines = &priv->constants.engines_info; + struct nvgpu_device *dev; + + /* + * Check to see if we are done parsing engines. + */ + if (*token >= engines->num_engines) { + return NULL; + } + + dev = nvgpu_kzalloc(g, sizeof(*dev)); + if (!dev) { + return NULL; + } + + /* + * Copy the engine data into the device and return it to our caller. + */ + dev->type = engines->info[*token].engine_enum; + dev->engine_id = engines->info[*token].engine_id; + dev->intr_id = nvgpu_ffs(engines->info[*token].intr_mask) - 1; + dev->reset_id = nvgpu_ffs(engines->info[*token].reset_mask) - 1; + dev->runlist_id = engines->info[*token].runlist_id; + dev->pbdma_id = engines->info[*token].pbdma_id; + dev->inst_id = engines->info[*token].inst_id; + dev->pri_base = engines->info[*token].pri_base; + dev->fault_id = engines->info[*token].fault_id; + + (*token)++; + + return dev; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/top/top_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/top/top_vgpu.h new file mode 100644 index 000000000..1f3afd93e --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/top/top_vgpu.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef COMMON_VGPU_TOP_VGPU_H +#define COMMON_VGPU_TOP_VGPU_H + +#include + +struct gk20a; +struct nvgpu_device; + +struct nvgpu_device *vgpu_top_parse_next_dev(struct gk20a *g, u32 *token); + +#endif diff --git a/drivers/gpu/nvgpu/doxygen/Doxyfile.safety b/drivers/gpu/nvgpu/doxygen/Doxyfile.safety new file mode 100644 index 000000000..e23368c53 --- /dev/null +++ b/drivers/gpu/nvgpu/doxygen/Doxyfile.safety @@ -0,0 +1,2444 @@ +# Doxyfile 1.8.11 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all text +# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv +# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv +# for the list of possible encodings. +# The default value is: UTF-8. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. + +PROJECT_NAME = "nvgpu-driver" + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. + +PROJECT_NUMBER = + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = + +# With the PROJECT_LOGO tag one can specify a logo or an icon that is included +# in the documentation. The maximum height of the logo should not exceed 55 +# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy +# the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. + +OUTPUT_DIRECTORY = + +# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- +# directories (in 2 levels) under the output directory of each output format and +# will distribute the generated files over these directories. Enabling this +# option can be useful when feeding doxygen a huge amount of source files, where +# putting all generated files in the same directory would otherwise causes +# performance problems for the file system. +# The default value is: NO. + +CREATE_SUBDIRS = NO + +# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII +# characters to appear in the names of generated files. If set to NO, non-ASCII +# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode +# U+3044. +# The default value is: NO. + +ALLOW_UNICODE_NAMES = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, +# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), +# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, +# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), +# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, +# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, +# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, +# Ukrainian and Vietnamese. +# The default value is: English. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member +# descriptions after the members that are listed in the file and class +# documentation (similar to Javadoc). Set to NO to disable this. +# The default value is: YES. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief +# description of a member or function before the detailed description +# +# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +# The default value is: YES. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator that is +# used to form the text in various listings. Each string in this list, if found +# as the leading text of the brief description, will be stripped from the text +# and the result, after processing the whole list, is used as the annotated +# text. Otherwise, the brief description is used as-is. If left blank, the +# following values are used ($name is automatically replaced with the name of +# the entity):The $name class, The $name widget, The $name file, is, provides, +# specifies, contains, represents, a, an and the. + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# doxygen will generate a detailed section even if there is only a brief +# description. +# The default value is: NO. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +# The default value is: NO. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path +# before files name in the file list and in the header files. If set to NO the +# shortest path that makes the file name unique will be used +# The default value is: YES. + +FULL_PATH_NAMES = YES + +# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. +# Stripping is only done if one of the specified strings matches the left-hand +# part of the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the path to +# strip. +# +# Note that you can specify absolute paths here, but also relative paths, which +# will be relative from the directory where doxygen is started. +# This tag requires that the tag FULL_PATH_NAMES is set to YES. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the +# path mentioned in the documentation of a class, which tells the reader which +# header file to include in order to use a class. If left blank only the name of +# the header file containing the class definition is used. Otherwise one should +# specify the list of include paths that are normally passed to the compiler +# using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but +# less readable) file names. This can be useful is your file systems doesn't +# support long names like on DOS, Mac, or CD-ROM. +# The default value is: NO. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a +# multi-line C++ special comment block (i.e. a block of //! or /// comments) as +# a brief description. This used to be the default behavior. The new default is +# to treat a multi-line C++ comment block as a detailed description. Set this +# tag to YES if you prefer the old behavior instead. +# +# Note that setting this tag to YES also means that rational rose comments are +# not recognized any more. +# The default value is: NO. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the +# documentation from any documented member that it re-implements. +# The default value is: YES. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new +# page for each member. If set to NO, the documentation of a member will be part +# of the file/class/namespace that contains it. +# The default value is: NO. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen +# uses this value to replace tabs by spaces in code fragments. +# Minimum value: 1, maximum value: 16, default value: 4. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that act as commands in +# the documentation. An alias has the form: +# name=value +# For example adding +# "sideeffect=@par Side Effects:\n" +# will allow you to put the command \sideeffect (or @sideeffect) in the +# documentation, which will result in a user-defined paragraph with heading +# "Side Effects:". You can put \n's in the value part of an alias to insert +# newlines. + +ALIASES = + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding "class=itcl::class" +# will allow you to use the command class in the itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. For +# instance, some of the names that are used will be different. The list of all +# members will be omitted, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or +# Python sources only. Doxygen will then generate output that is more tailored +# for that language. For instance, namespaces will be presented as packages, +# qualified scopes will look different, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources. Doxygen will then generate output that is tailored for Fortran. +# The default value is: NO. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for VHDL. +# The default value is: NO. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, and +# language is one of the parsers supported by doxygen: IDL, Java, Javascript, +# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: +# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: +# Fortran. In the later case the parser tries to guess whether the code is fixed +# or free formatted code, this is the default for Fortran type files), VHDL. For +# instance to make doxygen treat .inc files as Fortran files (default is PHP), +# and .f files as C (default is Fortran), use: inc=Fortran f=C. +# +# Note: For files without extension you can use no_extension as a placeholder. +# +# Note that for custom extensions you also need to set FILE_PATTERNS otherwise +# the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments +# according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you can +# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in +# case of backward compatibilities issues. +# The default value is: YES. + +MARKDOWN_SUPPORT = YES + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by putting a % sign in front of the word or +# globally by setting AUTOLINK_SUPPORT to NO. +# The default value is: YES. + +AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should set this +# tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); +# versus func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. +# The default value is: NO. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. +# The default value is: NO. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: +# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen +# will parse them like normal C++ but will assume all classes use public instead +# of private inheritance when no explicit protection keyword is present. +# The default value is: NO. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES will make +# doxygen to replace the get and set methods by a property in the documentation. +# This will only work if the methods are indeed getting or setting a simple +# type. If this is not the case, or you want to show the methods anyway, you +# should set this option to NO. +# The default value is: YES. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +# The default value is: NO. + +DISTRIBUTE_GROUP_DOC = NO + +# If one adds a struct or class to a group and this option is enabled, then also +# any nested class or struct is added to the same group. By default this option +# is disabled and one has to add nested compounds explicitly via \ingroup. +# The default value is: NO. + +GROUP_NESTED_COMPOUNDS = NO + +# Set the SUBGROUPING tag to YES to allow class member groups of the same type +# (for instance a group of public functions) to be put as a subgroup of that +# type (e.g. under the Public Functions section). Set it to NO to prevent +# subgrouping. Alternatively, this can be done per class using the +# \nosubgrouping command. +# The default value is: YES. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions +# are shown inside the group in which they are included (e.g. using \ingroup) +# instead of on a separate page (for HTML and Man pages) or section (for LaTeX +# and RTF). +# +# Note that this feature does not work in combination with +# SEPARATE_MEMBER_PAGES. +# The default value is: NO. + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions +# with only public data fields or simple typedef fields will be shown inline in +# the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO, structs, classes, and unions are shown on a separate page (for HTML and +# Man pages) or section (for LaTeX and RTF). +# The default value is: NO. + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or +# enum is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically be +# useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. +# The default value is: NO. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can be +# an expensive process and often the same symbol appears multiple times in the +# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small +# doxygen will become slower. If the cache is too large, memory is wasted. The +# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range +# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 +# symbols. At the end of a run doxygen will report the cache usage and suggest +# the optimal cache size from a speed point of view. +# Minimum value: 0, maximum value: 9, default value: 0. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in +# documentation are documented, even if no documentation was available. Private +# class members and static file members will be hidden unless the +# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. +# Note: This will also disable the warnings about undocumented members that are +# normally produced when WARNINGS is set to YES. +# The default value is: NO. + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will +# be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal +# scope will be included in the documentation. +# The default value is: NO. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be +# included in the documentation. +# The default value is: NO. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO, +# only classes defined in header files are included. Does not have any effect +# for Java sources. +# The default value is: YES. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. If set to YES, local methods, +# which are defined in the implementation section but not in the interface are +# included in the documentation. If set to NO, only methods in the interface are +# included. +# The default value is: NO. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base name of +# the file that contains the anonymous namespace. By default anonymous namespace +# are hidden. +# The default value is: NO. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all +# undocumented members inside documented classes or files. If set to NO these +# members will be included in the various overviews, but no documentation +# section is generated. This option has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. If set +# to NO, these classes will be included in the various overviews. This option +# has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend +# (class|struct|union) declarations. If set to NO, these declarations will be +# included in the documentation. +# The default value is: NO. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any +# documentation blocks found inside the body of a function. If set to NO, these +# blocks will be appended to the function's detailed documentation block. +# The default value is: NO. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation that is typed after a +# \internal command is included. If the tag is set to NO then the documentation +# will be excluded. Set it to YES to include the internal documentation. +# The default value is: NO. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file +# names in lower-case letters. If set to YES, upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. +# The default value is: system dependent. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with +# their full class and namespace scopes in the documentation. If set to YES, the +# scope will be hidden. +# The default value is: NO. + +HIDE_SCOPE_NAMES = NO + +# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will +# append additional text to a page's title, such as Class Reference. If set to +# YES the compound reference will be hidden. +# The default value is: NO. + +HIDE_COMPOUND_REFERENCE= NO + +# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of +# the files that are included by a file in the documentation of that file. +# The default value is: YES. + +SHOW_INCLUDE_FILES = YES + +# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each +# grouped member an include statement to the documentation, telling the reader +# which file to include in order to use the member. +# The default value is: NO. + +SHOW_GROUPED_MEMB_INC = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include +# files with double quotes in the documentation rather than with sharp brackets. +# The default value is: NO. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the +# documentation for inline members. +# The default value is: YES. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the +# (detailed) documentation of file and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. +# The default value is: YES. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief +# descriptions of file, namespace and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. Note that +# this will also influence the order of the classes in the class list. +# The default value is: NO. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the +# (brief and detailed) documentation of class members so that constructors and +# destructors are listed first. If set to NO the constructors will appear in the +# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. +# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief +# member documentation. +# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting +# detailed member documentation. +# The default value is: NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy +# of group names into alphabetical order. If set to NO the group names will +# appear in their defined order. +# The default value is: NO. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by +# fully-qualified names, including namespaces. If set to NO, the class list will +# be sorted only by class name, not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the alphabetical +# list. +# The default value is: NO. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper +# type resolution of all parameters of a function it will reject a match between +# the prototype and the implementation of a member function even if there is +# only one candidate or it is obvious which candidate to choose by doing a +# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still +# accept a match between prototype and implementation in such cases. +# The default value is: NO. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo +# list. This list is created by putting \todo commands in the documentation. +# The default value is: YES. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test +# list. This list is created by putting \test commands in the documentation. +# The default value is: YES. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug +# list. This list is created by putting \bug commands in the documentation. +# The default value is: YES. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) +# the deprecated list. This list is created by putting \deprecated commands in +# the documentation. +# The default value is: YES. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional documentation +# sections, marked by \if ... \endif and \cond +# ... \endcond blocks. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the +# initial value of a variable or macro / define can have for it to appear in the +# documentation. If the initializer consists of more lines than specified here +# it will be hidden. Use a value of 0 to hide initializers completely. The +# appearance of the value of individual variables and macros / defines can be +# controlled using \showinitializer or \hideinitializer command in the +# documentation regardless of this setting. +# Minimum value: 0, maximum value: 10000, default value: 30. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at +# the bottom of the documentation of classes and structs. If set to YES, the +# list will mention the files that were used to generate the documentation. +# The default value is: YES. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This +# will remove the Files entry from the Quick Index and from the Folder Tree View +# (if specified). +# The default value is: YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces +# page. This will remove the Namespaces entry from the Quick Index and from the +# Folder Tree View (if specified). +# The default value is: YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command command input-file, where command is the value of the +# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided +# by doxygen. Whatever the program writes to standard output is used as the file +# version. For an example see the documentation. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. You can +# optionally specify a file name after the option, if omitted DoxygenLayout.xml +# will be used as the name of the layout file. +# +# Note that if you run doxygen from a directory containing a file called +# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE +# tag is left empty. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files containing +# the reference definitions. This must be a list of .bib files. The .bib +# extension is automatically appended if omitted. This requires the bibtex tool +# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. +# For LaTeX the style of the bibliography can be controlled using +# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the +# search path. See also \cite for info how to create references. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES +# this implies that the warnings are on. +# +# Tip: Turn warnings on while writing the documentation. +# The default value is: YES. + +WARNINGS = YES + +# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = YES + +# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some parameters +# in a documented function, or documenting parameters that don't exist or using +# markup commands wrongly. +# The default value is: YES. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that +# are documented, but have no documentation for their parameters or return +# value. If set to NO, doxygen will only warn about wrong or incomplete +# parameter documentation, but not about the absence of documentation. +# The default value is: NO. + +WARN_NO_PARAMDOC = NO + +# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when +# a warning is encountered. +# The default value is: NO. + +WARN_AS_ERROR = NO + +# The WARN_FORMAT tag determines the format of the warning messages that doxygen +# can produce. The string should contain the $file, $line, and $text tags, which +# will be replaced by the file and line number from which the warning originated +# and the warning text. Optionally the format may contain $version, which will +# be replaced by the version of the file (if it could be obtained via +# FILE_VERSION_FILTER) +# The default value is: $file:$line: $text. + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning and error +# messages should be written. If left blank the output is written to standard +# error (stderr). + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag is used to specify the files and/or directories that contain +# documented source files. You may enter file names like myfile.cpp or +# directories like /usr/src/myproject. Separate the files or directories with +# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING +# Note: If this tag is empty the current directory is searched. + +@INCLUDE_PATH = doxygen +@INCLUDE = Doxyfile.sources.safety +@INCLUDE = ../../../userspace/SWUTS.sources + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses +# libiconv (or the iconv built into libc) for the transcoding. See the libiconv +# documentation (see: http://www.gnu.org/software/libiconv) for the list of +# possible encodings. +# The default value is: UTF-8. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and +# *.h) to filter out the source-files in the directories. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# read by doxygen. +# +# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, +# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, +# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, +# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f, *.for, *.tcl, +# *.vhd, *.vhdl, *.ucf, *.qsf, *.as and *.js. + +FILE_PATTERNS = + +# The RECURSIVE tag can be used to specify whether or not subdirectories should +# be searched for input files as well. +# The default value is: NO. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. +# The default value is: NO. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories use the pattern */test/* + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or directories +# that contain example code fragments that are included (see the \include +# command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank all +# files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude commands +# irrespective of the value of the RECURSIVE tag. +# The default value is: NO. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or directories +# that contain images that are to be included in the documentation (see the +# \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command: +# +# +# +# where is the value of the INPUT_FILTER tag, and is the +# name of an input file. Doxygen will then use the output that the filter +# program writes to standard output. If FILTER_PATTERNS is specified, this tag +# will be ignored. +# +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: pattern=filter +# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how +# filters are used. If the FILTER_PATTERNS tag is empty or if none of the +# patterns match the file name, INPUT_FILTER is applied. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will also be used to filter the input files that are used for +# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). +# The default value is: NO. + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and +# it is also possible to disable source filtering for a specific pattern using +# *.ext= (so without naming a filter). +# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will be +# generated. Documented entities will be cross-referenced with these sources. +# +# Note: To get rid of all source code in the generated output, make sure that +# also VERBATIM_HEADERS is set to NO. +# The default value is: NO. + +SOURCE_BROWSER = YES + +# Setting the INLINE_SOURCES tag to YES will include the body of functions, +# classes and enums directly into the documentation. +# The default value is: NO. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any +# special comment blocks from generated source code fragments. Normal C, C++ and +# Fortran comments will always remain visible. +# The default value is: YES. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES then for each documented +# function all documented functions referencing it will be listed. +# The default value is: NO. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES then for each documented function +# all documented entities called/used by that function will be listed. +# The default value is: NO. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set +# to YES then the hyperlinks from functions in REFERENCES_RELATION and +# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will +# link to the documentation. +# The default value is: YES. + +REFERENCES_LINK_SOURCE = YES + +# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the +# source code will show a tooltip with additional information such as prototype, +# brief description and links to the definition and documentation. Since this +# will make the HTML file larger and loading of large files a bit slower, you +# can opt to disable this feature. +# The default value is: YES. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +SOURCE_TOOLTIPS = YES + +# If the USE_HTAGS tag is set to YES then the references to source code will +# point to the HTML generated by the htags(1) tool instead of doxygen built-in +# source browser. The htags tool is part of GNU's global source tagging system +# (see http://www.gnu.org/software/global/global.html). You will need version +# 4.8.6 or higher. +# +# To use it do the following: +# - Install the latest version of global +# - Enable SOURCE_BROWSER and USE_HTAGS in the config file +# - Make sure the INPUT points to the root of the source tree +# - Run doxygen as normal +# +# Doxygen will invoke htags (and that will in turn invoke gtags), so these +# tools must be available from the command line (i.e. in the search path). +# +# The result: instead of the source browser generated by doxygen, the links to +# source code will now point to the output of htags. +# The default value is: NO. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a +# verbatim copy of the header file for each class for which an include is +# specified. Set to NO to disable this. +# See also: Section \class. +# The default value is: YES. + +VERBATIM_HEADERS = YES + +# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the +# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the +# cost of reduced performance. This can be particularly helpful with template +# rich C++ code for which doxygen's built-in parser lacks the necessary type +# information. +# Note: The availability of this option depends on whether or not doxygen was +# generated with the -Duse-libclang=ON option for CMake. +# The default value is: NO. + +CLANG_ASSISTED_PARSING = NO + +# If clang assisted parsing is enabled you can provide the compiler with command +# line options that you would normally use when invoking the compiler. Note that +# the include paths will already be set by doxygen for the files and directories +# specified with INPUT and INCLUDE_PATH. +# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. + +CLANG_OPTIONS = + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all +# compounds will be generated. Enable this if the project contains a lot of +# classes, structs, unions or interfaces. +# The default value is: YES. + +ALPHABETICAL_INDEX = YES + +# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in +# which the alphabetical index list will be split. +# Minimum value: 1, maximum value: 20, default value: 5. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all classes will +# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag +# can be used to specify a prefix (or a list of prefixes) that should be ignored +# while generating the index headers. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output +# The default value is: YES. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each +# generated HTML page (for example: .htm, .php, .asp). +# The default value is: .html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a user-defined HTML header file for +# each generated HTML page. If the tag is left blank doxygen will generate a +# standard header. +# +# To get valid HTML the header file that includes any scripts and style sheets +# that doxygen needs, which is dependent on the configuration options used (e.g. +# the setting GENERATE_TREEVIEW). It is highly recommended to start with a +# default header using +# doxygen -w html new_header.html new_footer.html new_stylesheet.css +# YourConfigFile +# and then modify the file new_header.html. See also section "Doxygen usage" +# for information on how to generate the default header that doxygen normally +# uses. +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. For a description +# of the possible markers and block names see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each +# generated HTML page. If the tag is left blank doxygen will generate a standard +# footer. See HTML_HEADER for more information on how to generate a default +# footer and what special commands can be used inside the footer. See also +# section "Doxygen usage" for information on how to generate the default footer +# that doxygen normally uses. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style +# sheet that is used by each HTML page. It can be used to fine-tune the look of +# the HTML output. If left blank doxygen will generate a default style sheet. +# See also section "Doxygen usage" for information on how to generate the style +# sheet that doxygen normally uses. +# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as +# it is more robust and this tag (HTML_STYLESHEET) will in the future become +# obsolete. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined +# cascading style sheets that are included after the standard style sheets +# created by doxygen. Using this option one can overrule certain style aspects. +# This is preferred over using HTML_STYLESHEET since it does not replace the +# standard style sheet and is therefore more robust against future updates. +# Doxygen will copy the style sheet files to the output directory. +# Note: The order of the extra style sheet files is of importance (e.g. the last +# style sheet in the list overrules the setting of the previous ones in the +# list). For an example see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that the +# files will be copied as-is; there are no commands or markers available. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the style sheet and background images according to +# this color. Hue is specified as an angle on a colorwheel, see +# http://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors +# in the HTML output. For a value of 0 the output will use grayscales only. A +# value of 255 will produce the most vivid colors. +# Minimum value: 0, maximum value: 255, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the +# luminance component of the colors in the HTML output. Values below 100 +# gradually make the output lighter, whereas values above 100 make the output +# darker. The value divided by 100 is the actual gamma applied, so 80 represents +# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not +# change the gamma. +# Minimum value: 40, maximum value: 240, default value: 80. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting this +# to YES can help to show when doxygen was last run and thus if the +# documentation is up to date. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_TIMESTAMP = NO + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries +# shown in the various tree structured indices initially; the user can expand +# and collapse entries dynamically later on. Doxygen will expand the tree to +# such a level that at most the specified number of entries are visible (unless +# a fully collapsed tree already exceeds this amount). So setting the number of +# entries 1 will produce a full collapsed tree by default. 0 is a special value +# representing an infinite number of entries and will result in a full expanded +# tree by default. +# Minimum value: 0, maximum value: 9999, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files will be +# generated that can be used as input for Apple's Xcode 3 integrated development +# environment (see: http://developer.apple.com/tools/xcode/), introduced with +# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a +# Makefile in the HTML output directory. Running make will produce the docset in +# that directory and running make install will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at +# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_DOCSET = NO + +# This tag determines the name of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# The default value is: Doxygen generated docs. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# This tag specifies a string that should uniquely identify the documentation +# set bundle. This should be a reverse domain-name style string, e.g. +# com.mycompany.MyDocSet. Doxygen will append .docset to the name. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. +# The default value is: org.doxygen.Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. +# The default value is: Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three +# additional HTML index files: index.hhp, index.hhc, and index.hhk. The +# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop +# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on +# Windows. +# +# The HTML Help Workshop contains a compiler that can convert all HTML output +# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML +# files are now used as the Windows 98 help format, and will replace the old +# Windows help format (.hlp) on all Windows platforms in the future. Compressed +# HTML files also contain an index, a table of contents, and you can search for +# words in the documentation. The HTML workshop also contains a viewer for +# compressed HTML files. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_HTMLHELP = NO + +# The CHM_FILE tag can be used to specify the file name of the resulting .chm +# file. You can add a path in front of the file if the result should not be +# written to the html output directory. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_FILE = + +# The HHC_LOCATION tag can be used to specify the location (absolute path +# including file name) of the HTML help compiler (hhc.exe). If non-empty, +# doxygen will try to run the HTML help compiler on the generated index.hhp. +# The file has to be specified with full path. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +HHC_LOCATION = + +# The GENERATE_CHI flag controls if a separate .chi index file is generated +# (YES) or that it should be included in the master .chm file (NO). +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +GENERATE_CHI = NO + +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) +# and project file content. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_INDEX_ENCODING = + +# The BINARY_TOC flag controls whether a binary table of contents is generated +# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it +# enables the Previous and Next buttons. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members to +# the table of contents of the HTML help documentation and to the tree view. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that +# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help +# (.qch) of the generated HTML documentation. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify +# the file name of the resulting .qch file. The path specified is relative to +# the HTML output folder. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help +# Project output. For more information please see Qt Help Project / Namespace +# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt +# Help Project output. For more information please see Qt Help Project / Virtual +# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- +# folders). +# The default value is: doc. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_VIRTUAL_FOLDER = doc + +# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom +# filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's filter section matches. Qt Help Project / Filter Attributes (see: +# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_SECT_FILTER_ATTRS = + +# The QHG_LOCATION tag can be used to specify the location of Qt's +# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the +# generated .qhp file. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be +# generated, together with the HTML files, they form an Eclipse help plugin. To +# install this plugin and make it available under the help contents menu in +# Eclipse, the contents of the directory containing the HTML and XML files needs +# to be copied into the plugins directory of eclipse. The name of the directory +# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. +# After copying Eclipse needs to be restarted before the help appears. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the Eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have this +# name. Each documentation set should have its own identifier. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# If you want full control over the layout of the generated HTML pages it might +# be necessary to disable the index and replace it with your own. The +# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top +# of each HTML page. A value of NO enables the index and the value YES disables +# it. Since the tabs in the index contain the same information as the navigation +# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. If the tag +# value is set to YES, a side panel will be generated containing a tree-like +# index structure (just like the one that is generated for HTML Help). For this +# to work a browser that supports JavaScript, DHTML, CSS and frames is required +# (i.e. any modern browser). Windows users are probably better off using the +# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can +# further fine-tune the look of the index. As an example, the default style +# sheet generated by doxygen has an example that shows how to put an image at +# the root of the tree instead of the PROJECT_NAME. Since the tree basically has +# the same information as the tab index, you could consider setting +# DISABLE_INDEX to YES when enabling this option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_TREEVIEW = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that +# doxygen will group on one line in the generated HTML documentation. +# +# Note that a value of 0 will completely suppress the enum values from appearing +# in the overview section. +# Minimum value: 0, maximum value: 20, default value: 4. +# This tag requires that the tag GENERATE_HTML is set to YES. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used +# to set the initial width (in pixels) of the frame in which the tree is shown. +# Minimum value: 0, maximum value: 1500, default value: 250. +# This tag requires that the tag GENERATE_HTML is set to YES. + +TREEVIEW_WIDTH = 250 + +# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to +# external symbols imported via tag files in a separate window. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of LaTeX formulas included as images in +# the HTML documentation. When you change the font size after a successful +# doxygen run you need to manually remove any form_*.png images from the HTML +# output directory to force them to be regenerated. +# Minimum value: 8, maximum value: 50, default value: 10. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are not +# supported properly for IE 6.0, but are supported on all modern browsers. +# +# Note that when changing this option you need to delete any form_*.png files in +# the HTML output directory before the changes have effect. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see +# http://www.mathjax.org) which uses client side Javascript for the rendering +# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX +# installed or if you want to formulas look prettier in the HTML output. When +# enabled you may also need to install MathJax separately and configure the path +# to it using the MATHJAX_RELPATH option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +USE_MATHJAX = NO + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. See the MathJax site (see: +# http://docs.mathjax.org/en/latest/output.html) for more details. +# Possible values are: HTML-CSS (which is slower, but has the best +# compatibility), NativeMML (i.e. MathML) and SVG. +# The default value is: HTML-CSS. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the HTML +# output directory using the MATHJAX_RELPATH option. The destination directory +# should contain the MathJax.js script. For instance, if the mathjax directory +# is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax +# Content Delivery Network so you can quickly see the result without installing +# MathJax. However, it is strongly recommended to install a local copy of +# MathJax from http://www.mathjax.org before deployment. +# The default value is: http://cdn.mathjax.org/mathjax/latest. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest + +# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax +# extension names that should be enabled during MathJax rendering. For example +# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces +# of code that will be used on startup of the MathJax code. See the MathJax site +# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# example see the documentation. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for +# the HTML output. The underlying search engine uses javascript and DHTML and +# should work on any modern browser. Note that when using HTML help +# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) +# there is already a search function so this one should typically be disabled. +# For large projects the javascript based search engine can be slow, then +# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to +# search using the keyboard; to jump to the search box use + S +# (what the is depends on the OS and browser, but it is typically +# , /