mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
following compilation error are reported with CONFIG_DEBUG_FS=n: pci.c:246:7: error: 'struct mm_gk20a' has no member named 'ltc_enabled' pci.c:247:7: error: 'struct mm_gk20a' has no member named 'ltc_enabled_debug' possible compilation error(reported sometime): gk20a_allocator.c:1163:13: error: 'gk20a_alloc_debugfs_init' defined but not used fixed by adding '#ifdef CONFIG_DEBUG_FS' for debug only code. following variables from gk20a.c are not initialized from CONFIG_DEBUG_FS=n path, it will cause kernel oops when booting: gk20a->mm.bypass_smmu = platform->bypass_smmu; gk20a->mm.disable_bigpage = platform->disable_bigpage; gk20a->mm.has_physical_mode = true; fix it by move them out from '#ifdef CONFIG_DEBUG_FS' section. Bug 1778001 Change-Id: Ic2da36b3f500882748ee46a5150903244b697761 Signed-off-by: David Pu <dpu@nvidia.com> Reviewed-on: http://git-master/r/1167358 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
2274 lines
55 KiB
C
2274 lines
55 KiB
C
/*
|
|
* GK20A Graphics
|
|
*
|
|
* Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/dma-mapping.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/string.h>
|
|
#include <linux/cdev.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/firmware.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/irq.h>
|
|
#include <linux/export.h>
|
|
#include <linux/file.h>
|
|
#include <linux/of.h>
|
|
#include <linux/of_device.h>
|
|
#include <linux/of_platform.h>
|
|
#include <linux/pm_runtime.h>
|
|
#include <linux/thermal.h>
|
|
#include <asm/cacheflush.h>
|
|
#include <linux/debugfs.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/tegra-powergate.h>
|
|
#include <linux/tegra_pm_domains.h>
|
|
#include <linux/clk/tegra.h>
|
|
#include <linux/kthread.h>
|
|
#include <linux/platform/tegra/common.h>
|
|
#include <linux/reset.h>
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/version.h>
|
|
|
|
#include "gk20a.h"
|
|
#include "debug_gk20a.h"
|
|
#include "ctrl_gk20a.h"
|
|
#include "hw_mc_gk20a.h"
|
|
#include "hw_timer_gk20a.h"
|
|
#include "hw_bus_gk20a.h"
|
|
#include "hw_sim_gk20a.h"
|
|
#include "hw_top_gk20a.h"
|
|
#include "hw_ltc_gk20a.h"
|
|
#include "hw_gr_gk20a.h"
|
|
#include "hw_fb_gk20a.h"
|
|
#include "gk20a_scale.h"
|
|
#include "ctxsw_trace_gk20a.h"
|
|
#include "dbg_gpu_gk20a.h"
|
|
#include "gk20a_allocator.h"
|
|
#include "hal.h"
|
|
#include "vgpu/vgpu.h"
|
|
#include "pci.h"
|
|
|
|
#define CREATE_TRACE_POINTS
|
|
#include <trace/events/gk20a.h>
|
|
|
|
#ifdef CONFIG_ARCH_TEGRA_18x_SOC
|
|
#include "nvgpu_gpuid_t18x.h"
|
|
#endif
|
|
|
|
#ifdef CONFIG_TEGRA_19x_GPU
|
|
#include "nvgpu_gpuid_t19x.h"
|
|
#endif
|
|
|
|
#ifdef CONFIG_ARM64
|
|
#define __cpuc_flush_dcache_area __flush_dcache_area
|
|
#endif
|
|
|
|
#define CLASS_NAME "nvidia-gpu"
|
|
/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */
|
|
|
|
#define GK20A_NUM_CDEVS 7
|
|
|
|
#define EMC3D_DEFAULT_RATIO 750
|
|
|
|
#if defined(GK20A_DEBUG)
|
|
u32 gk20a_dbg_mask = GK20A_DEFAULT_DBG_MASK;
|
|
u32 gk20a_dbg_ftrace;
|
|
#endif
|
|
|
|
#define GK20A_WAIT_FOR_IDLE_MS 2000
|
|
|
|
static int gk20a_pm_prepare_poweroff(struct device *dev);
|
|
|
|
static inline void set_gk20a(struct platform_device *pdev, struct gk20a *gk20a)
|
|
{
|
|
gk20a_get_platform(&pdev->dev)->g = gk20a;
|
|
}
|
|
|
|
static const struct file_operations gk20a_channel_ops = {
|
|
.owner = THIS_MODULE,
|
|
.release = gk20a_channel_release,
|
|
.open = gk20a_channel_open,
|
|
#ifdef CONFIG_COMPAT
|
|
.compat_ioctl = gk20a_channel_ioctl,
|
|
#endif
|
|
.unlocked_ioctl = gk20a_channel_ioctl,
|
|
};
|
|
|
|
static const struct file_operations gk20a_ctrl_ops = {
|
|
.owner = THIS_MODULE,
|
|
.release = gk20a_ctrl_dev_release,
|
|
.open = gk20a_ctrl_dev_open,
|
|
.unlocked_ioctl = gk20a_ctrl_dev_ioctl,
|
|
#ifdef CONFIG_COMPAT
|
|
.compat_ioctl = gk20a_ctrl_dev_ioctl,
|
|
#endif
|
|
};
|
|
|
|
static const struct file_operations gk20a_dbg_ops = {
|
|
.owner = THIS_MODULE,
|
|
.release = gk20a_dbg_gpu_dev_release,
|
|
.open = gk20a_dbg_gpu_dev_open,
|
|
.unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
|
|
.poll = gk20a_dbg_gpu_dev_poll,
|
|
#ifdef CONFIG_COMPAT
|
|
.compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
|
|
#endif
|
|
};
|
|
|
|
static const struct file_operations gk20a_as_ops = {
|
|
.owner = THIS_MODULE,
|
|
.release = gk20a_as_dev_release,
|
|
.open = gk20a_as_dev_open,
|
|
#ifdef CONFIG_COMPAT
|
|
.compat_ioctl = gk20a_as_dev_ioctl,
|
|
#endif
|
|
.unlocked_ioctl = gk20a_as_dev_ioctl,
|
|
};
|
|
|
|
/*
|
|
* Note: We use a different 'open' to trigger handling of the profiler session.
|
|
* Most of the code is shared between them... Though, at some point if the
|
|
* code does get too tangled trying to handle each in the same path we can
|
|
* separate them cleanly.
|
|
*/
|
|
static const struct file_operations gk20a_prof_ops = {
|
|
.owner = THIS_MODULE,
|
|
.release = gk20a_dbg_gpu_dev_release,
|
|
.open = gk20a_prof_gpu_dev_open,
|
|
.unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
|
|
/* .mmap = gk20a_prof_gpu_dev_mmap,*/
|
|
/*int (*mmap) (struct file *, struct vm_area_struct *);*/
|
|
#ifdef CONFIG_COMPAT
|
|
.compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
|
|
#endif
|
|
};
|
|
|
|
static const struct file_operations gk20a_tsg_ops = {
|
|
.owner = THIS_MODULE,
|
|
.release = gk20a_tsg_dev_release,
|
|
.open = gk20a_tsg_dev_open,
|
|
#ifdef CONFIG_COMPAT
|
|
.compat_ioctl = gk20a_tsg_dev_ioctl,
|
|
#endif
|
|
.unlocked_ioctl = gk20a_tsg_dev_ioctl,
|
|
};
|
|
|
|
static const struct file_operations gk20a_ctxsw_ops = {
|
|
.owner = THIS_MODULE,
|
|
.release = gk20a_ctxsw_dev_release,
|
|
.open = gk20a_ctxsw_dev_open,
|
|
#ifdef CONFIG_COMPAT
|
|
.compat_ioctl = gk20a_ctxsw_dev_ioctl,
|
|
#endif
|
|
.unlocked_ioctl = gk20a_ctxsw_dev_ioctl,
|
|
.poll = gk20a_ctxsw_dev_poll,
|
|
.read = gk20a_ctxsw_dev_read,
|
|
.mmap = gk20a_ctxsw_dev_mmap,
|
|
};
|
|
|
|
static inline void sim_writel(struct gk20a *g, u32 r, u32 v)
|
|
{
|
|
writel(v, g->sim.regs+r);
|
|
}
|
|
|
|
static inline u32 sim_readl(struct gk20a *g, u32 r)
|
|
{
|
|
return readl(g->sim.regs+r);
|
|
}
|
|
|
|
/*
|
|
* Locks out the driver from accessing GPU registers. This prevents access to
|
|
* thse registers after the GPU has been clock or power gated. This should help
|
|
* find annoying bugs where register reads and writes are silently dropped
|
|
* after the GPU has been turned off. On older chips these reads and writes can
|
|
* also lock the entire CPU up.
|
|
*/
|
|
int gk20a_lockout_registers(struct gk20a *g)
|
|
{
|
|
g->regs = NULL;
|
|
g->bar1 = NULL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Undoes gk20a_lockout_registers().
|
|
*/
|
|
int gk20a_restore_registers(struct gk20a *g)
|
|
{
|
|
g->regs = g->regs_saved;
|
|
g->bar1 = g->bar1_saved;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void kunmap_and_free_iopage(void **kvaddr, struct page **page)
|
|
{
|
|
if (*kvaddr) {
|
|
kunmap(*kvaddr);
|
|
*kvaddr = NULL;
|
|
}
|
|
if (*page) {
|
|
__free_page(*page);
|
|
*page = NULL;
|
|
}
|
|
}
|
|
|
|
static void gk20a_free_sim_support(struct gk20a *g)
|
|
{
|
|
/* free sim mappings, bfrs */
|
|
kunmap_and_free_iopage(&g->sim.send_bfr.kvaddr,
|
|
&g->sim.send_bfr.page);
|
|
|
|
kunmap_and_free_iopage(&g->sim.recv_bfr.kvaddr,
|
|
&g->sim.recv_bfr.page);
|
|
|
|
kunmap_and_free_iopage(&g->sim.msg_bfr.kvaddr,
|
|
&g->sim.msg_bfr.page);
|
|
}
|
|
|
|
static void gk20a_remove_sim_support(struct sim_gk20a *s)
|
|
{
|
|
struct gk20a *g = s->g;
|
|
if (g->sim.regs)
|
|
sim_writel(g, sim_config_r(), sim_config_mode_disabled_v());
|
|
gk20a_free_sim_support(g);
|
|
}
|
|
|
|
static int alloc_and_kmap_iopage(struct device *d,
|
|
void **kvaddr,
|
|
u64 *phys,
|
|
struct page **page)
|
|
{
|
|
int err = 0;
|
|
*page = alloc_page(GFP_KERNEL);
|
|
|
|
if (!*page) {
|
|
err = -ENOMEM;
|
|
dev_err(d, "couldn't allocate io page\n");
|
|
goto fail;
|
|
}
|
|
|
|
*kvaddr = kmap(*page);
|
|
if (!*kvaddr) {
|
|
err = -ENOMEM;
|
|
dev_err(d, "couldn't kmap io page\n");
|
|
goto fail;
|
|
}
|
|
*phys = page_to_phys(*page);
|
|
return 0;
|
|
|
|
fail:
|
|
kunmap_and_free_iopage(kvaddr, page);
|
|
return err;
|
|
|
|
}
|
|
|
|
static void __iomem *gk20a_ioremap_resource(struct platform_device *dev, int i,
|
|
struct resource **out)
|
|
{
|
|
struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i);
|
|
if (!r)
|
|
return NULL;
|
|
if (out)
|
|
*out = r;
|
|
return devm_ioremap_resource(&dev->dev, r);
|
|
}
|
|
|
|
/* TBD: strip from released */
|
|
static int gk20a_init_sim_support(struct platform_device *pdev)
|
|
{
|
|
int err = 0;
|
|
struct device *dev = &pdev->dev;
|
|
struct gk20a *g = get_gk20a(dev);
|
|
u64 phys;
|
|
|
|
g->sim.g = g;
|
|
g->sim.regs = gk20a_ioremap_resource(pdev, GK20A_SIM_IORESOURCE_MEM,
|
|
&g->sim.reg_mem);
|
|
if (IS_ERR(g->sim.regs)) {
|
|
dev_err(dev, "failed to remap gk20a sim regs\n");
|
|
err = PTR_ERR(g->sim.regs);
|
|
goto fail;
|
|
}
|
|
|
|
/* allocate sim event/msg buffers */
|
|
err = alloc_and_kmap_iopage(dev, &g->sim.send_bfr.kvaddr,
|
|
&g->sim.send_bfr.phys,
|
|
&g->sim.send_bfr.page);
|
|
|
|
err = err || alloc_and_kmap_iopage(dev, &g->sim.recv_bfr.kvaddr,
|
|
&g->sim.recv_bfr.phys,
|
|
&g->sim.recv_bfr.page);
|
|
|
|
err = err || alloc_and_kmap_iopage(dev, &g->sim.msg_bfr.kvaddr,
|
|
&g->sim.msg_bfr.phys,
|
|
&g->sim.msg_bfr.page);
|
|
|
|
if (!(g->sim.send_bfr.kvaddr && g->sim.recv_bfr.kvaddr &&
|
|
g->sim.msg_bfr.kvaddr)) {
|
|
dev_err(dev, "couldn't allocate all sim buffers\n");
|
|
goto fail;
|
|
}
|
|
|
|
/*mark send ring invalid*/
|
|
sim_writel(g, sim_send_ring_r(), sim_send_ring_status_invalid_f());
|
|
|
|
/*read get pointer and make equal to put*/
|
|
g->sim.send_ring_put = sim_readl(g, sim_send_get_r());
|
|
sim_writel(g, sim_send_put_r(), g->sim.send_ring_put);
|
|
|
|
/*write send ring address and make it valid*/
|
|
phys = g->sim.send_bfr.phys;
|
|
sim_writel(g, sim_send_ring_hi_r(),
|
|
sim_send_ring_hi_addr_f(u64_hi32(phys)));
|
|
sim_writel(g, sim_send_ring_r(),
|
|
sim_send_ring_status_valid_f() |
|
|
sim_send_ring_target_phys_pci_coherent_f() |
|
|
sim_send_ring_size_4kb_f() |
|
|
sim_send_ring_addr_lo_f(phys >> PAGE_SHIFT));
|
|
|
|
/*repeat for recv ring (but swap put,get as roles are opposite) */
|
|
sim_writel(g, sim_recv_ring_r(), sim_recv_ring_status_invalid_f());
|
|
|
|
/*read put pointer and make equal to get*/
|
|
g->sim.recv_ring_get = sim_readl(g, sim_recv_put_r());
|
|
sim_writel(g, sim_recv_get_r(), g->sim.recv_ring_get);
|
|
|
|
/*write send ring address and make it valid*/
|
|
phys = g->sim.recv_bfr.phys;
|
|
sim_writel(g, sim_recv_ring_hi_r(),
|
|
sim_recv_ring_hi_addr_f(u64_hi32(phys)));
|
|
sim_writel(g, sim_recv_ring_r(),
|
|
sim_recv_ring_status_valid_f() |
|
|
sim_recv_ring_target_phys_pci_coherent_f() |
|
|
sim_recv_ring_size_4kb_f() |
|
|
sim_recv_ring_addr_lo_f(phys >> PAGE_SHIFT));
|
|
|
|
g->sim.remove_support = gk20a_remove_sim_support;
|
|
return 0;
|
|
|
|
fail:
|
|
gk20a_free_sim_support(g);
|
|
return err;
|
|
}
|
|
|
|
static inline u32 sim_msg_header_size(void)
|
|
{
|
|
return 24;/*TBD: fix the header to gt this from NV_VGPU_MSG_HEADER*/
|
|
}
|
|
|
|
static inline u32 *sim_msg_bfr(struct gk20a *g, u32 byte_offset)
|
|
{
|
|
return (u32 *)(g->sim.msg_bfr.kvaddr + byte_offset);
|
|
}
|
|
|
|
static inline u32 *sim_msg_hdr(struct gk20a *g, u32 byte_offset)
|
|
{
|
|
return sim_msg_bfr(g, byte_offset); /*starts at 0*/
|
|
}
|
|
|
|
static inline u32 *sim_msg_param(struct gk20a *g, u32 byte_offset)
|
|
{
|
|
/*starts after msg header/cmn*/
|
|
return sim_msg_bfr(g, byte_offset + sim_msg_header_size());
|
|
}
|
|
|
|
static inline void sim_write_hdr(struct gk20a *g, u32 func, u32 size)
|
|
{
|
|
/*memset(g->sim.msg_bfr.kvaddr,0,min(PAGE_SIZE,size));*/
|
|
*sim_msg_hdr(g, sim_msg_signature_r()) = sim_msg_signature_valid_v();
|
|
*sim_msg_hdr(g, sim_msg_result_r()) = sim_msg_result_rpc_pending_v();
|
|
*sim_msg_hdr(g, sim_msg_spare_r()) = sim_msg_spare__init_v();
|
|
*sim_msg_hdr(g, sim_msg_function_r()) = func;
|
|
*sim_msg_hdr(g, sim_msg_length_r()) = size + sim_msg_header_size();
|
|
}
|
|
|
|
static inline u32 sim_escape_read_hdr_size(void)
|
|
{
|
|
return 12; /*TBD: fix NV_VGPU_SIM_ESCAPE_READ_HEADER*/
|
|
}
|
|
|
|
static u32 *sim_send_ring_bfr(struct gk20a *g, u32 byte_offset)
|
|
{
|
|
return (u32 *)(g->sim.send_bfr.kvaddr + byte_offset);
|
|
}
|
|
|
|
static int rpc_send_message(struct gk20a *g)
|
|
{
|
|
/* calculations done in units of u32s */
|
|
u32 send_base = sim_send_put_pointer_v(g->sim.send_ring_put) * 2;
|
|
u32 dma_offset = send_base + sim_dma_r()/sizeof(u32);
|
|
u32 dma_hi_offset = send_base + sim_dma_hi_r()/sizeof(u32);
|
|
|
|
*sim_send_ring_bfr(g, dma_offset*sizeof(u32)) =
|
|
sim_dma_target_phys_pci_coherent_f() |
|
|
sim_dma_status_valid_f() |
|
|
sim_dma_size_4kb_f() |
|
|
sim_dma_addr_lo_f(g->sim.msg_bfr.phys >> PAGE_SHIFT);
|
|
|
|
*sim_send_ring_bfr(g, dma_hi_offset*sizeof(u32)) =
|
|
u64_hi32(g->sim.msg_bfr.phys);
|
|
|
|
*sim_msg_hdr(g, sim_msg_sequence_r()) = g->sim.sequence_base++;
|
|
|
|
g->sim.send_ring_put = (g->sim.send_ring_put + 2 * sizeof(u32)) %
|
|
PAGE_SIZE;
|
|
|
|
__cpuc_flush_dcache_area(g->sim.msg_bfr.kvaddr, PAGE_SIZE);
|
|
__cpuc_flush_dcache_area(g->sim.send_bfr.kvaddr, PAGE_SIZE);
|
|
__cpuc_flush_dcache_area(g->sim.recv_bfr.kvaddr, PAGE_SIZE);
|
|
|
|
/* Update the put pointer. This will trap into the host. */
|
|
sim_writel(g, sim_send_put_r(), g->sim.send_ring_put);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline u32 *sim_recv_ring_bfr(struct gk20a *g, u32 byte_offset)
|
|
{
|
|
return (u32 *)(g->sim.recv_bfr.kvaddr + byte_offset);
|
|
}
|
|
|
|
static int rpc_recv_poll(struct gk20a *g)
|
|
{
|
|
u64 recv_phys_addr;
|
|
|
|
/* XXX This read is not required (?) */
|
|
/*pVGpu->recv_ring_get = VGPU_REG_RD32(pGpu, NV_VGPU_RECV_GET);*/
|
|
|
|
/* Poll the recv ring get pointer in an infinite loop*/
|
|
do {
|
|
g->sim.recv_ring_put = sim_readl(g, sim_recv_put_r());
|
|
} while (g->sim.recv_ring_put == g->sim.recv_ring_get);
|
|
|
|
/* process all replies */
|
|
while (g->sim.recv_ring_put != g->sim.recv_ring_get) {
|
|
/* these are in u32 offsets*/
|
|
u32 dma_lo_offset =
|
|
sim_recv_put_pointer_v(g->sim.recv_ring_get)*2 + 0;
|
|
u32 dma_hi_offset = dma_lo_offset + 1;
|
|
u32 recv_phys_addr_lo = sim_dma_addr_lo_v(
|
|
*sim_recv_ring_bfr(g, dma_lo_offset*4));
|
|
u32 recv_phys_addr_hi = sim_dma_hi_addr_v(
|
|
*sim_recv_ring_bfr(g, dma_hi_offset*4));
|
|
|
|
recv_phys_addr = (u64)recv_phys_addr_hi << 32 |
|
|
(u64)recv_phys_addr_lo << PAGE_SHIFT;
|
|
|
|
if (recv_phys_addr != g->sim.msg_bfr.phys) {
|
|
dev_err(dev_from_gk20a(g), "%s Error in RPC reply\n",
|
|
__func__);
|
|
return -1;
|
|
}
|
|
|
|
/* Update GET pointer */
|
|
g->sim.recv_ring_get = (g->sim.recv_ring_get + 2*sizeof(u32)) %
|
|
PAGE_SIZE;
|
|
|
|
__cpuc_flush_dcache_area(g->sim.msg_bfr.kvaddr, PAGE_SIZE);
|
|
__cpuc_flush_dcache_area(g->sim.send_bfr.kvaddr, PAGE_SIZE);
|
|
__cpuc_flush_dcache_area(g->sim.recv_bfr.kvaddr, PAGE_SIZE);
|
|
|
|
sim_writel(g, sim_recv_get_r(), g->sim.recv_ring_get);
|
|
|
|
g->sim.recv_ring_put = sim_readl(g, sim_recv_put_r());
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int issue_rpc_and_wait(struct gk20a *g)
|
|
{
|
|
int err;
|
|
|
|
err = rpc_send_message(g);
|
|
if (err) {
|
|
dev_err(dev_from_gk20a(g), "%s failed rpc_send_message\n",
|
|
__func__);
|
|
return err;
|
|
}
|
|
|
|
err = rpc_recv_poll(g);
|
|
if (err) {
|
|
dev_err(dev_from_gk20a(g), "%s failed rpc_recv_poll\n",
|
|
__func__);
|
|
return err;
|
|
}
|
|
|
|
/* Now check if RPC really succeeded */
|
|
if (*sim_msg_hdr(g, sim_msg_result_r()) != sim_msg_result_success_v()) {
|
|
dev_err(dev_from_gk20a(g), "%s received failed status!\n",
|
|
__func__);
|
|
return -(*sim_msg_hdr(g, sim_msg_result_r()));
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_sim_esc_read(struct gk20a *g, char *path, u32 index, u32 count, u32 *data)
|
|
{
|
|
int err;
|
|
size_t pathlen = strlen(path);
|
|
u32 data_offset;
|
|
|
|
sim_write_hdr(g, sim_msg_function_sim_escape_read_v(),
|
|
sim_escape_read_hdr_size());
|
|
*sim_msg_param(g, 0) = index;
|
|
*sim_msg_param(g, 4) = count;
|
|
data_offset = roundup(0xc + pathlen + 1, sizeof(u32));
|
|
*sim_msg_param(g, 8) = data_offset;
|
|
strcpy((char *)sim_msg_param(g, 0xc), path);
|
|
|
|
err = issue_rpc_and_wait(g);
|
|
|
|
if (!err)
|
|
memcpy(data, sim_msg_param(g, data_offset), count);
|
|
return err;
|
|
}
|
|
|
|
static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id)
|
|
{
|
|
struct gk20a *g = dev_id;
|
|
|
|
return g->ops.mc.isr_stall(g);
|
|
}
|
|
|
|
static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id)
|
|
{
|
|
struct gk20a *g = dev_id;
|
|
|
|
return g->ops.mc.isr_nonstall(g);
|
|
}
|
|
|
|
void gk20a_pbus_isr(struct gk20a *g)
|
|
{
|
|
u32 val, err_code;
|
|
val = gk20a_readl(g, bus_intr_0_r());
|
|
if (val & (bus_intr_0_pri_squash_m() |
|
|
bus_intr_0_pri_fecserr_m() |
|
|
bus_intr_0_pri_timeout_m())) {
|
|
gk20a_err(dev_from_gk20a(g), "pmc_enable : 0x%x",
|
|
gk20a_readl(g, mc_enable_r()));
|
|
gk20a_err(dev_from_gk20a(g), "NV_PBUS_INTR_0 : 0x%x", val);
|
|
gk20a_err(g->dev,
|
|
"NV_PTIMER_PRI_TIMEOUT_SAVE_0: 0x%x\n",
|
|
gk20a_readl(g, timer_pri_timeout_save_0_r()));
|
|
gk20a_err(g->dev,
|
|
"NV_PTIMER_PRI_TIMEOUT_SAVE_1: 0x%x\n",
|
|
gk20a_readl(g, timer_pri_timeout_save_1_r()));
|
|
err_code = gk20a_readl(g, timer_pri_timeout_fecs_errcode_r());
|
|
gk20a_err(g->dev,
|
|
"NV_PTIMER_PRI_TIMEOUT_FECS_ERRCODE: 0x%x\n",
|
|
err_code);
|
|
if (err_code == 0xbadf13)
|
|
gk20a_err(g->dev,
|
|
"NV_PGRAPH_PRI_GPC0_GPCCS_FS_GPC: 0x%x\n",
|
|
gk20a_readl(g, gr_gpc0_fs_gpc_r()));
|
|
|
|
}
|
|
|
|
if (val)
|
|
gk20a_err(g->dev,
|
|
"Unhandled pending pbus interrupt\n");
|
|
|
|
gk20a_writel(g, bus_intr_0_r(), val);
|
|
}
|
|
|
|
static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id)
|
|
{
|
|
struct gk20a *g = dev_id;
|
|
return g->ops.mc.isr_thread_stall(g);
|
|
}
|
|
|
|
static irqreturn_t gk20a_intr_thread_nonstall(int irq, void *dev_id)
|
|
{
|
|
struct gk20a *g = dev_id;
|
|
return g->ops.mc.isr_thread_nonstall(g);
|
|
}
|
|
|
|
void gk20a_remove_support(struct device *dev)
|
|
{
|
|
struct gk20a *g = get_gk20a(dev);
|
|
|
|
#ifdef CONFIG_TEGRA_COMMON
|
|
tegra_unregister_idle_unidle();
|
|
#endif
|
|
|
|
if (g->pmu.remove_support)
|
|
g->pmu.remove_support(&g->pmu);
|
|
|
|
if (g->gr.remove_support)
|
|
g->gr.remove_support(&g->gr);
|
|
|
|
if (g->fifo.remove_support)
|
|
g->fifo.remove_support(&g->fifo);
|
|
|
|
if (g->mm.remove_support)
|
|
g->mm.remove_support(&g->mm);
|
|
|
|
if (g->sim.remove_support)
|
|
g->sim.remove_support(&g->sim);
|
|
|
|
release_firmware(g->pmu_fw);
|
|
|
|
/* free mappings to registers, etc */
|
|
|
|
if (g->regs) {
|
|
iounmap(g->regs);
|
|
g->regs = NULL;
|
|
}
|
|
if (g->bar1) {
|
|
iounmap(g->bar1);
|
|
g->bar1 = NULL;
|
|
}
|
|
}
|
|
|
|
static int gk20a_init_support(struct platform_device *dev)
|
|
{
|
|
int err = 0;
|
|
struct gk20a *g = get_gk20a(&dev->dev);
|
|
|
|
#ifdef CONFIG_TEGRA_COMMON
|
|
tegra_register_idle_unidle(gk20a_do_idle, gk20a_do_unidle);
|
|
#endif
|
|
|
|
g->regs = gk20a_ioremap_resource(dev, GK20A_BAR0_IORESOURCE_MEM,
|
|
&g->reg_mem);
|
|
if (IS_ERR(g->regs)) {
|
|
dev_err(dev_from_gk20a(g), "failed to remap gk20a registers\n");
|
|
err = PTR_ERR(g->regs);
|
|
goto fail;
|
|
}
|
|
|
|
g->bar1 = gk20a_ioremap_resource(dev, GK20A_BAR1_IORESOURCE_MEM,
|
|
&g->bar1_mem);
|
|
if (IS_ERR(g->bar1)) {
|
|
dev_err(dev_from_gk20a(g), "failed to remap gk20a bar1\n");
|
|
err = PTR_ERR(g->bar1);
|
|
goto fail;
|
|
}
|
|
|
|
g->regs_saved = g->regs;
|
|
g->bar1_saved = g->bar1;
|
|
|
|
/* Get interrupt numbers */
|
|
g->irq_nonstall = platform_get_irq(dev, 1);
|
|
if (g->irq_stall < 0 || g->irq_nonstall < 0) {
|
|
err = -ENXIO;
|
|
goto fail;
|
|
}
|
|
|
|
if (tegra_cpu_is_asim()) {
|
|
err = gk20a_init_sim_support(dev);
|
|
if (err)
|
|
goto fail;
|
|
}
|
|
|
|
mutex_init(&g->dbg_sessions_lock);
|
|
mutex_init(&g->client_lock);
|
|
mutex_init(&g->ch_wdt_lock);
|
|
mutex_init(&g->poweroff_lock);
|
|
|
|
g->remove_support = gk20a_remove_support;
|
|
return 0;
|
|
|
|
fail:
|
|
gk20a_remove_support(&dev->dev);
|
|
return err;
|
|
}
|
|
|
|
static int gk20a_pm_prepare_poweroff(struct device *dev)
|
|
{
|
|
struct gk20a *g = get_gk20a(dev);
|
|
int ret = 0;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
mutex_lock(&g->poweroff_lock);
|
|
|
|
if (!g->power_on)
|
|
goto done;
|
|
|
|
/*
|
|
* After this point, gk20a interrupts should not get
|
|
* serviced.
|
|
*/
|
|
disable_irq(g->irq_stall);
|
|
disable_irq(g->irq_nonstall);
|
|
|
|
gk20a_scale_suspend(dev);
|
|
|
|
/* cancel any pending cde work */
|
|
gk20a_cde_suspend(g);
|
|
|
|
ret = gk20a_channel_suspend(g);
|
|
if (ret)
|
|
goto done;
|
|
|
|
/* disable elpg before gr or fifo suspend */
|
|
ret |= gk20a_pmu_destroy(g);
|
|
|
|
ret |= gk20a_gr_suspend(g);
|
|
ret |= gk20a_mm_suspend(g);
|
|
ret |= gk20a_fifo_suspend(g);
|
|
|
|
/* Disable GPCPLL */
|
|
if (g->ops.clk.suspend_clk_support)
|
|
ret |= g->ops.clk.suspend_clk_support(g);
|
|
|
|
g->power_on = false;
|
|
|
|
/* Stop CPU from accessing the GPU registers. */
|
|
gk20a_lockout_registers(g);
|
|
|
|
done:
|
|
mutex_unlock(&g->poweroff_lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int gk20a_detect_chip(struct gk20a *g)
|
|
{
|
|
struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics;
|
|
u32 mc_boot_0_value;
|
|
|
|
if (gpu->arch)
|
|
return 0;
|
|
|
|
mc_boot_0_value = gk20a_readl(g, mc_boot_0_r());
|
|
gpu->arch = mc_boot_0_architecture_v(mc_boot_0_value) <<
|
|
NVGPU_GPU_ARCHITECTURE_SHIFT;
|
|
gpu->impl = mc_boot_0_implementation_v(mc_boot_0_value);
|
|
gpu->rev =
|
|
(mc_boot_0_major_revision_v(mc_boot_0_value) << 4) |
|
|
mc_boot_0_minor_revision_v(mc_boot_0_value);
|
|
|
|
gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n",
|
|
g->gpu_characteristics.arch,
|
|
g->gpu_characteristics.impl,
|
|
g->gpu_characteristics.rev);
|
|
|
|
return gpu_init_hal(g);
|
|
}
|
|
|
|
int gk20a_pm_finalize_poweron(struct device *dev)
|
|
{
|
|
struct gk20a *g = get_gk20a(dev);
|
|
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
|
int err, nice_value;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (g->power_on)
|
|
return 0;
|
|
|
|
trace_gk20a_finalize_poweron(dev_name(dev));
|
|
|
|
err = gk20a_restore_registers(g);
|
|
if (err)
|
|
return err;
|
|
|
|
nice_value = task_nice(current);
|
|
set_user_nice(current, -20);
|
|
|
|
g->power_on = true;
|
|
|
|
err = gk20a_detect_chip(g);
|
|
if (err)
|
|
goto done;
|
|
|
|
if (g->ops.bios.init)
|
|
err = g->ops.bios.init(g);
|
|
if (err)
|
|
goto done;
|
|
|
|
if (!tegra_platform_is_silicon())
|
|
gk20a_writel(g, bus_intr_en_0_r(), 0x0);
|
|
else
|
|
gk20a_writel(g, bus_intr_en_0_r(),
|
|
bus_intr_en_0_pri_squash_m() |
|
|
bus_intr_en_0_pri_fecserr_m() |
|
|
bus_intr_en_0_pri_timeout_m());
|
|
|
|
if (g->ops.clk.disable_slowboot)
|
|
g->ops.clk.disable_slowboot(g);
|
|
|
|
gk20a_reset_priv_ring(g);
|
|
|
|
/* TBD: move this after graphics init in which blcg/slcg is enabled.
|
|
This function removes SlowdownOnBoot which applies 32x divider
|
|
on gpcpll bypass path. The purpose of slowdown is to save power
|
|
during boot but it also significantly slows down gk20a init on
|
|
simulation and emulation. We should remove SOB after graphics power
|
|
saving features (blcg/slcg) are enabled. For now, do it here. */
|
|
if (g->ops.clk.init_clk_support) {
|
|
err = g->ops.clk.init_clk_support(g);
|
|
if (err) {
|
|
gk20a_err(dev, "failed to init gk20a clk");
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
/* enable pri timeout only on silicon */
|
|
if (tegra_platform_is_silicon()) {
|
|
gk20a_writel(g,
|
|
timer_pri_timeout_r(),
|
|
timer_pri_timeout_period_f(0x186A0) |
|
|
timer_pri_timeout_en_en_enabled_f());
|
|
} else {
|
|
gk20a_writel(g,
|
|
timer_pri_timeout_r(),
|
|
timer_pri_timeout_period_f(0x186A0) |
|
|
timer_pri_timeout_en_en_disabled_f());
|
|
}
|
|
|
|
err = gk20a_init_fifo_reset_enable_hw(g);
|
|
if (err) {
|
|
gk20a_err(dev, "failed to reset gk20a fifo");
|
|
goto done;
|
|
}
|
|
|
|
if (g->ops.ltc.init_fs_state)
|
|
g->ops.ltc.init_fs_state(g);
|
|
|
|
err = gk20a_init_mm_support(g);
|
|
if (err) {
|
|
gk20a_err(dev, "failed to init gk20a mm");
|
|
goto done;
|
|
}
|
|
|
|
err = gk20a_init_fifo_support(g);
|
|
if (err) {
|
|
gk20a_err(dev, "failed to init gk20a fifo");
|
|
goto done;
|
|
}
|
|
|
|
g->ops.mc.intr_enable(g);
|
|
|
|
err = gk20a_enable_gr_hw(g);
|
|
if (err) {
|
|
gk20a_err(dev, "failed to enable gr");
|
|
goto done;
|
|
}
|
|
|
|
if (g->ops.pmu.prepare_ucode)
|
|
err = g->ops.pmu.prepare_ucode(g);
|
|
if (err) {
|
|
gk20a_err(dev, "failed to init pmu ucode");
|
|
goto done;
|
|
}
|
|
|
|
err = gk20a_init_pmu_support(g);
|
|
if (err) {
|
|
gk20a_err(dev, "failed to init gk20a pmu");
|
|
goto done;
|
|
}
|
|
|
|
err = gk20a_init_gr_support(g);
|
|
if (err) {
|
|
gk20a_err(dev, "failed to init gk20a gr");
|
|
goto done;
|
|
}
|
|
|
|
err = gk20a_init_therm_support(g);
|
|
if (err) {
|
|
gk20a_err(dev, "failed to init gk20a therm");
|
|
goto done;
|
|
}
|
|
|
|
err = g->ops.chip_init_gpu_characteristics(g);
|
|
if (err) {
|
|
gk20a_err(dev, "failed to init gk20a gpu characteristics");
|
|
goto done;
|
|
}
|
|
|
|
err = gk20a_ctxsw_trace_init(g);
|
|
if (err)
|
|
gk20a_warn(dev, "could not initialize ctxsw tracing");
|
|
|
|
/* Restore the debug setting */
|
|
g->ops.mm.set_debug_mode(g, g->mmu_debug_ctrl);
|
|
|
|
gk20a_channel_resume(g);
|
|
set_user_nice(current, nice_value);
|
|
|
|
gk20a_scale_resume(dev);
|
|
|
|
trace_gk20a_finalize_poweron_done(dev_name(dev));
|
|
|
|
if (platform->has_cde)
|
|
gk20a_init_cde_support(g);
|
|
|
|
enable_irq(g->irq_stall);
|
|
if (g->irq_stall != g->irq_nonstall)
|
|
enable_irq(g->irq_nonstall);
|
|
|
|
done:
|
|
return err;
|
|
}
|
|
|
|
static struct of_device_id tegra_gk20a_of_match[] = {
|
|
#ifdef CONFIG_TEGRA_GK20A
|
|
{ .compatible = "nvidia,tegra124-gk20a",
|
|
.data = &gk20a_tegra_platform },
|
|
{ .compatible = "nvidia,tegra210-gm20b",
|
|
.data = &gm20b_tegra_platform },
|
|
#ifdef CONFIG_ARCH_TEGRA_18x_SOC
|
|
{ .compatible = TEGRA_18x_GPU_COMPAT_TEGRA,
|
|
.data = &t18x_gpu_tegra_platform },
|
|
#endif
|
|
#ifdef CONFIG_TEGRA_19x_GPU
|
|
{ .compatible = TEGRA_19x_GPU_COMPAT_TEGRA,
|
|
.data = &t19x_gpu_tegra_platform },
|
|
#endif
|
|
#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
|
|
{ .compatible = "nvidia,tegra124-gk20a-vgpu",
|
|
.data = &vgpu_tegra_platform },
|
|
#endif
|
|
#else
|
|
{ .compatible = "nvidia,tegra124-gk20a",
|
|
.data = &gk20a_generic_platform },
|
|
{ .compatible = "nvidia,tegra210-gm20b",
|
|
.data = &gk20a_generic_platform },
|
|
#ifdef CONFIG_ARCH_TEGRA_18x_SOC
|
|
{ .compatible = TEGRA_18x_GPU_COMPAT_TEGRA,
|
|
.data = &gk20a_generic_platform },
|
|
#endif
|
|
|
|
#endif
|
|
{ .compatible = "nvidia,generic-gk20a",
|
|
.data = &gk20a_generic_platform },
|
|
{ .compatible = "nvidia,generic-gm20b",
|
|
.data = &gk20a_generic_platform },
|
|
#ifdef CONFIG_ARCH_TEGRA_18x_SOC
|
|
{ .compatible = TEGRA_18x_GPU_COMPAT_GENERIC,
|
|
.data = &gk20a_generic_platform },
|
|
#endif
|
|
{ },
|
|
};
|
|
|
|
static int gk20a_create_device(
|
|
struct device *dev, int devno,
|
|
const char *interface_name, const char *cdev_name,
|
|
struct cdev *cdev, struct device **out,
|
|
const struct file_operations *ops,
|
|
struct class *class)
|
|
{
|
|
struct device *subdev;
|
|
int err;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
cdev_init(cdev, ops);
|
|
cdev->owner = THIS_MODULE;
|
|
|
|
err = cdev_add(cdev, devno, 1);
|
|
if (err) {
|
|
dev_err(dev, "failed to add %s cdev\n", cdev_name);
|
|
return err;
|
|
}
|
|
|
|
subdev = device_create(class, NULL, devno, NULL,
|
|
interface_name, cdev_name);
|
|
|
|
if (IS_ERR(subdev)) {
|
|
err = PTR_ERR(dev);
|
|
cdev_del(cdev);
|
|
dev_err(dev, "failed to create %s device for %s\n",
|
|
cdev_name, dev_name(dev));
|
|
return err;
|
|
}
|
|
|
|
*out = subdev;
|
|
return 0;
|
|
}
|
|
|
|
void gk20a_user_deinit(struct device *dev, struct class *class)
|
|
{
|
|
struct gk20a *g = gk20a_from_dev(dev);
|
|
|
|
if (g->channel.node) {
|
|
device_destroy(class, g->channel.cdev.dev);
|
|
cdev_del(&g->channel.cdev);
|
|
}
|
|
|
|
if (g->as.node) {
|
|
device_destroy(class, g->as.cdev.dev);
|
|
cdev_del(&g->as.cdev);
|
|
}
|
|
|
|
if (g->ctrl.node) {
|
|
device_destroy(class, g->ctrl.cdev.dev);
|
|
cdev_del(&g->ctrl.cdev);
|
|
}
|
|
|
|
if (g->dbg.node) {
|
|
device_destroy(class, g->dbg.cdev.dev);
|
|
cdev_del(&g->dbg.cdev);
|
|
}
|
|
|
|
if (g->prof.node) {
|
|
device_destroy(class, g->prof.cdev.dev);
|
|
cdev_del(&g->prof.cdev);
|
|
}
|
|
|
|
if (g->tsg.node) {
|
|
device_destroy(class, g->tsg.cdev.dev);
|
|
cdev_del(&g->tsg.cdev);
|
|
}
|
|
|
|
if (g->ctxsw.node) {
|
|
device_destroy(class, g->ctxsw.cdev.dev);
|
|
cdev_del(&g->ctxsw.cdev);
|
|
}
|
|
|
|
if (g->cdev_region)
|
|
unregister_chrdev_region(g->cdev_region, GK20A_NUM_CDEVS);
|
|
}
|
|
|
|
int gk20a_user_init(struct device *dev, const char *interface_name,
|
|
struct class *class)
|
|
{
|
|
int err;
|
|
dev_t devno;
|
|
struct gk20a *g = gk20a_from_dev(dev);
|
|
|
|
err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, dev_name(dev));
|
|
if (err) {
|
|
dev_err(dev, "failed to allocate devno\n");
|
|
goto fail;
|
|
}
|
|
g->cdev_region = devno;
|
|
|
|
err = gk20a_create_device(dev, devno++, interface_name, "",
|
|
&g->channel.cdev, &g->channel.node,
|
|
&gk20a_channel_ops,
|
|
class);
|
|
if (err)
|
|
goto fail;
|
|
|
|
err = gk20a_create_device(dev, devno++, interface_name, "-as",
|
|
&g->as.cdev, &g->as.node,
|
|
&gk20a_as_ops,
|
|
class);
|
|
if (err)
|
|
goto fail;
|
|
|
|
err = gk20a_create_device(dev, devno++, interface_name, "-ctrl",
|
|
&g->ctrl.cdev, &g->ctrl.node,
|
|
&gk20a_ctrl_ops,
|
|
class);
|
|
if (err)
|
|
goto fail;
|
|
|
|
err = gk20a_create_device(dev, devno++, interface_name, "-dbg",
|
|
&g->dbg.cdev, &g->dbg.node,
|
|
&gk20a_dbg_ops,
|
|
class);
|
|
if (err)
|
|
goto fail;
|
|
|
|
err = gk20a_create_device(dev, devno++, interface_name, "-prof",
|
|
&g->prof.cdev, &g->prof.node,
|
|
&gk20a_prof_ops,
|
|
class);
|
|
if (err)
|
|
goto fail;
|
|
|
|
err = gk20a_create_device(dev, devno++, interface_name, "-tsg",
|
|
&g->tsg.cdev, &g->tsg.node,
|
|
&gk20a_tsg_ops,
|
|
class);
|
|
if (err)
|
|
goto fail;
|
|
|
|
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
|
err = gk20a_create_device(dev, devno++, interface_name, "-ctxsw",
|
|
&g->ctxsw.cdev, &g->ctxsw.node,
|
|
&gk20a_ctxsw_ops,
|
|
class);
|
|
if (err)
|
|
goto fail;
|
|
#endif
|
|
|
|
|
|
return 0;
|
|
fail:
|
|
gk20a_user_deinit(dev, &nvgpu_class);
|
|
return err;
|
|
}
|
|
|
|
struct channel_gk20a *gk20a_get_channel_from_file(int fd)
|
|
{
|
|
struct channel_gk20a *ch;
|
|
struct file *f = fget(fd);
|
|
if (!f)
|
|
return NULL;
|
|
|
|
if (f->f_op != &gk20a_channel_ops) {
|
|
fput(f);
|
|
return NULL;
|
|
}
|
|
|
|
ch = (struct channel_gk20a *)f->private_data;
|
|
fput(f);
|
|
return ch;
|
|
}
|
|
|
|
static int gk20a_pm_enable_clk(struct device *dev)
|
|
{
|
|
int index = 0;
|
|
struct gk20a_platform *platform;
|
|
|
|
platform = dev_get_drvdata(dev);
|
|
if (!platform)
|
|
return -EINVAL;
|
|
|
|
for (index = 0; index < platform->num_clks; index++) {
|
|
int err = 0;
|
|
if (platform->clk[index])
|
|
err = clk_prepare_enable(platform->clk[index]);
|
|
if (err)
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_pm_disable_clk(struct device *dev)
|
|
{
|
|
int index = 0;
|
|
struct gk20a_platform *platform;
|
|
|
|
platform = dev_get_drvdata(dev);
|
|
if (!platform)
|
|
return -EINVAL;
|
|
|
|
for (index = 0; index < platform->num_clks; index++) {
|
|
if (platform->clk[index])
|
|
clk_disable_unprepare(platform->clk[index]);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void gk20a_pm_shutdown(struct platform_device *pdev)
|
|
{
|
|
struct gk20a_platform *platform = platform_get_drvdata(pdev);
|
|
|
|
dev_info(&pdev->dev, "shutting down");
|
|
|
|
/* If GPU is already railgated,
|
|
* just prevent more requests, and return */
|
|
if (platform->is_railgated && platform->is_railgated(&pdev->dev)) {
|
|
#ifdef CONFIG_PM
|
|
__pm_runtime_disable(&pdev->dev, false);
|
|
#endif
|
|
return;
|
|
}
|
|
|
|
|
|
#ifdef CONFIG_PM
|
|
/* Prevent more requests by disabling Runtime PM */
|
|
__pm_runtime_disable(&pdev->dev, false);
|
|
#endif
|
|
|
|
/* Be ready for rail-gate after this point */
|
|
if (gk20a_gpu_is_virtual(&pdev->dev))
|
|
vgpu_pm_prepare_poweroff(&pdev->dev);
|
|
else
|
|
gk20a_pm_prepare_poweroff(&pdev->dev);
|
|
|
|
dev_info(&pdev->dev, "shut down complete\n");
|
|
}
|
|
|
|
#ifdef CONFIG_PM
|
|
static const struct dev_pm_ops gk20a_pm_ops = {
|
|
#if defined(CONFIG_PM) && !defined(CONFIG_PM_GENERIC_DOMAINS)
|
|
.runtime_resume = gk20a_pm_enable_clk,
|
|
.runtime_suspend = gk20a_pm_disable_clk,
|
|
#endif
|
|
};
|
|
#endif
|
|
|
|
static int _gk20a_pm_railgate(struct device *dev)
|
|
{
|
|
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
|
int ret = 0;
|
|
if (platform->railgate)
|
|
ret = platform->railgate(dev);
|
|
return ret;
|
|
}
|
|
|
|
static int gk20a_pm_railgate(struct generic_pm_domain *domain)
|
|
{
|
|
struct gk20a_domain_data *gk20a_domain = container_of(domain,
|
|
struct gk20a_domain_data, gpd);
|
|
struct gk20a *g = gk20a_domain->gk20a;
|
|
|
|
return _gk20a_pm_railgate(g->dev);
|
|
}
|
|
|
|
static int _gk20a_pm_unrailgate(struct device *dev)
|
|
{
|
|
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
|
int ret = 0;
|
|
|
|
if (platform->unrailgate) {
|
|
mutex_lock(&platform->railgate_lock);
|
|
ret = platform->unrailgate(dev);
|
|
mutex_unlock(&platform->railgate_lock);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int gk20a_pm_unrailgate(struct generic_pm_domain *domain)
|
|
{
|
|
struct gk20a_domain_data *gk20a_domain = container_of(domain,
|
|
struct gk20a_domain_data, gpd);
|
|
struct gk20a *g = gk20a_domain->gk20a;
|
|
trace_gk20a_pm_unrailgate(dev_name(g->dev));
|
|
|
|
return _gk20a_pm_unrailgate(g->dev);
|
|
}
|
|
|
|
static int gk20a_pm_suspend(struct device *dev)
|
|
{
|
|
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
|
int ret = 0;
|
|
|
|
#ifdef CONFIG_PM
|
|
if (atomic_read(&dev->power.usage_count) > 1)
|
|
return -EBUSY;
|
|
#endif
|
|
|
|
ret = gk20a_pm_prepare_poweroff(dev);
|
|
if (ret)
|
|
return ret;
|
|
|
|
if (platform->suspend)
|
|
platform->suspend(dev);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_pm_resume(struct device *dev)
|
|
{
|
|
return gk20a_pm_finalize_poweron(dev);
|
|
}
|
|
|
|
#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
|
|
static int gk20a_pm_initialise_domain(struct device *dev)
|
|
{
|
|
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
|
struct dev_power_governor *pm_domain_gov = NULL;
|
|
struct generic_pm_domain *domain = dev_to_genpd(dev);
|
|
|
|
if (IS_ERR(domain))
|
|
return 0;
|
|
|
|
#ifdef CONFIG_PM
|
|
if (!platform->can_railgate)
|
|
pm_domain_gov = &pm_domain_always_on_gov;
|
|
#endif
|
|
domain->gov = pm_domain_gov;
|
|
|
|
if (platform->railgate_delay)
|
|
pm_genpd_set_poweroff_delay(domain, platform->railgate_delay);
|
|
|
|
device_set_wakeup_capable(dev, 0);
|
|
return 0;
|
|
}
|
|
|
|
#else
|
|
static int gk20a_pm_initialise_domain(struct device *dev)
|
|
{
|
|
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
|
struct dev_power_governor *pm_domain_gov = NULL;
|
|
struct generic_pm_domain *domain = NULL;
|
|
int ret = 0;
|
|
struct gk20a_domain_data *gpu_gpd_data = (struct gk20a_domain_data *)
|
|
kzalloc(sizeof(struct gk20a_domain_data), GFP_KERNEL);
|
|
|
|
if (!gpu_gpd_data)
|
|
return -ENOMEM;
|
|
|
|
gpu_gpd_data->gk20a = platform->g;
|
|
domain = &gpu_gpd_data->gpd;
|
|
|
|
domain->name = "gpu";
|
|
|
|
#ifdef CONFIG_PM
|
|
if (!platform->can_railgate)
|
|
pm_domain_gov = &pm_domain_always_on_gov;
|
|
#endif
|
|
|
|
pm_genpd_init(domain, pm_domain_gov, true);
|
|
|
|
domain->power_off = gk20a_pm_railgate;
|
|
domain->power_on = gk20a_pm_unrailgate;
|
|
domain->dev_ops.start = gk20a_pm_enable_clk;
|
|
domain->dev_ops.stop = gk20a_pm_disable_clk;
|
|
domain->dev_ops.save_state = gk20a_pm_prepare_poweroff;
|
|
domain->dev_ops.restore_state = gk20a_pm_finalize_poweron;
|
|
domain->dev_ops.suspend = gk20a_pm_suspend;
|
|
domain->dev_ops.resume = gk20a_pm_resume;
|
|
|
|
device_set_wakeup_capable(dev, 0);
|
|
ret = pm_genpd_add_device(domain, dev);
|
|
|
|
if (platform->railgate_delay)
|
|
pm_genpd_set_poweroff_delay(domain, platform->railgate_delay);
|
|
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
int gk20a_pm_init(struct device *dev)
|
|
{
|
|
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
|
int err = 0;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
/* Initialise pm runtime */
|
|
if (platform->railgate_delay) {
|
|
pm_runtime_set_autosuspend_delay(dev,
|
|
platform->railgate_delay);
|
|
pm_runtime_use_autosuspend(dev);
|
|
}
|
|
|
|
pm_runtime_enable(dev);
|
|
if (!pm_runtime_enabled(dev))
|
|
gk20a_pm_enable_clk(dev);
|
|
|
|
/* Enable runtime railgating if possible. If not,
|
|
* turn on the rail now. */
|
|
if (platform->can_railgate && IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS))
|
|
_gk20a_pm_railgate(dev);
|
|
else if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS))
|
|
_gk20a_pm_unrailgate(dev);
|
|
|
|
/* genpd will take care of runtime power management if it is enabled */
|
|
if (IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS))
|
|
err = gk20a_pm_initialise_domain(dev);
|
|
|
|
return err;
|
|
}
|
|
|
|
static int gk20a_secure_page_alloc(struct platform_device *pdev)
|
|
{
|
|
struct gk20a_platform *platform = platform_get_drvdata(pdev);
|
|
int err = 0;
|
|
|
|
if (platform->secure_page_alloc) {
|
|
err = platform->secure_page_alloc(&pdev->dev);
|
|
if (!err)
|
|
platform->secure_alloc_ready = true;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
static struct of_device_id tegra_gpu_domain_match[] = {
|
|
{.compatible = "nvidia,tegra124-gpu-pd"},
|
|
{.compatible = "nvidia,tegra132-gpu-pd"},
|
|
{.compatible = "nvidia,tegra210-gpu-pd"},
|
|
{.compatible = "nvidia,tegra186-gpu-pd"},
|
|
{},
|
|
};
|
|
|
|
static int gk20a_probe(struct platform_device *dev)
|
|
{
|
|
struct gk20a *gk20a;
|
|
int err;
|
|
struct gk20a_platform *platform = NULL;
|
|
|
|
#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
|
|
struct gk20a_domain_data *gk20a_domain;
|
|
#endif
|
|
|
|
if (dev->dev.of_node) {
|
|
const struct of_device_id *match;
|
|
|
|
match = of_match_device(tegra_gk20a_of_match, &dev->dev);
|
|
if (match)
|
|
platform = (struct gk20a_platform *)match->data;
|
|
} else
|
|
platform = (struct gk20a_platform *)dev->dev.platform_data;
|
|
|
|
if (!platform) {
|
|
dev_err(&dev->dev, "no platform data\n");
|
|
return -ENODATA;
|
|
}
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
platform_set_drvdata(dev, platform);
|
|
|
|
if (gk20a_gpu_is_virtual(&dev->dev))
|
|
return vgpu_probe(dev);
|
|
|
|
gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL);
|
|
if (!gk20a) {
|
|
dev_err(&dev->dev, "couldn't allocate gk20a support");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
init_waitqueue_head(&gk20a->sw_irq_stall_last_handled_wq);
|
|
init_waitqueue_head(&gk20a->sw_irq_nonstall_last_handled_wq);
|
|
|
|
#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
|
|
gk20a_domain = container_of(dev_to_genpd(&dev->dev),
|
|
struct gk20a_domain_data, gpd);
|
|
gk20a_domain->gk20a = gk20a;
|
|
#endif
|
|
|
|
set_gk20a(dev, gk20a);
|
|
gk20a->dev = &dev->dev;
|
|
|
|
gk20a->irq_stall = platform_get_irq(dev, 0);
|
|
gk20a->irq_nonstall = platform_get_irq(dev, 1);
|
|
if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0)
|
|
return -ENXIO;
|
|
err = devm_request_threaded_irq(&dev->dev,
|
|
gk20a->irq_stall,
|
|
gk20a_intr_isr_stall,
|
|
gk20a_intr_thread_stall,
|
|
0, "gk20a_stall", gk20a);
|
|
if (err) {
|
|
dev_err(&dev->dev,
|
|
"failed to request stall intr irq @ %d\n",
|
|
gk20a->irq_stall);
|
|
return err;
|
|
}
|
|
err = devm_request_threaded_irq(&dev->dev,
|
|
gk20a->irq_nonstall,
|
|
gk20a_intr_isr_nonstall,
|
|
gk20a_intr_thread_nonstall,
|
|
0, "gk20a_nonstall", gk20a);
|
|
if (err) {
|
|
dev_err(&dev->dev,
|
|
"failed to request non-stall intr irq @ %d\n",
|
|
gk20a->irq_nonstall);
|
|
return err;
|
|
}
|
|
disable_irq(gk20a->irq_stall);
|
|
if (gk20a->irq_stall != gk20a->irq_nonstall)
|
|
disable_irq(gk20a->irq_nonstall);
|
|
|
|
err = gk20a_user_init(&dev->dev, INTERFACE_NAME, &nvgpu_class);
|
|
if (err)
|
|
return err;
|
|
|
|
gk20a_init_support(dev);
|
|
|
|
init_rwsem(&gk20a->busy_lock);
|
|
mutex_init(&platform->railgate_lock);
|
|
|
|
spin_lock_init(&gk20a->mc_enable_lock);
|
|
|
|
#ifdef CONFIG_RESET_CONTROLLER
|
|
platform->reset_control = devm_reset_control_get(&dev->dev, NULL);
|
|
if (IS_ERR(platform->reset_control))
|
|
platform->reset_control = NULL;
|
|
#endif
|
|
|
|
gk20a_debug_init(&dev->dev);
|
|
|
|
/* Initialize the platform interface. */
|
|
err = platform->probe(&dev->dev);
|
|
if (err) {
|
|
dev_err(&dev->dev, "platform probe failed");
|
|
return err;
|
|
}
|
|
|
|
err = gk20a_secure_page_alloc(dev);
|
|
if (err)
|
|
dev_err(&dev->dev,
|
|
"failed to allocate secure buffer %d\n", err);
|
|
|
|
err = gk20a_pm_init(&dev->dev);
|
|
if (err) {
|
|
dev_err(&dev->dev, "pm init failed");
|
|
return err;
|
|
}
|
|
|
|
gk20a->emc3d_ratio = EMC3D_DEFAULT_RATIO;
|
|
|
|
/* Initialise scaling */
|
|
if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
|
|
gk20a_scale_init(&dev->dev);
|
|
|
|
/* Set DMA parameters to allow larger sgt lists */
|
|
dev->dev.dma_parms = &gk20a->dma_parms;
|
|
dma_set_max_seg_size(&dev->dev, UINT_MAX);
|
|
|
|
gk20a->gr_idle_timeout_default =
|
|
CONFIG_GK20A_DEFAULT_TIMEOUT;
|
|
if (tegra_platform_is_silicon())
|
|
gk20a->timeouts_enabled = true;
|
|
|
|
gk20a->runlist_interleave = true;
|
|
|
|
gk20a->timeslice_low_priority_us = 1300;
|
|
gk20a->timeslice_medium_priority_us = 2600;
|
|
gk20a->timeslice_high_priority_us = 5200;
|
|
|
|
/* Set up initial power settings. For non-slicon platforms, disable *
|
|
* power features and for silicon platforms, read from platform data */
|
|
gk20a->slcg_enabled =
|
|
tegra_platform_is_silicon() ? platform->enable_slcg : false;
|
|
gk20a->blcg_enabled =
|
|
tegra_platform_is_silicon() ? platform->enable_blcg : false;
|
|
gk20a->elcg_enabled =
|
|
tegra_platform_is_silicon() ? platform->enable_elcg : false;
|
|
gk20a->elpg_enabled =
|
|
tegra_platform_is_silicon() ? platform->enable_elpg : false;
|
|
gk20a->aelpg_enabled =
|
|
tegra_platform_is_silicon() ? platform->enable_aelpg : false;
|
|
|
|
/* set default values to aelpg parameters */
|
|
gk20a->pmu.aelpg_param[0] = APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US;
|
|
gk20a->pmu.aelpg_param[1] = APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US;
|
|
gk20a->pmu.aelpg_param[2] = APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US;
|
|
gk20a->pmu.aelpg_param[3] = APCTRL_POWER_BREAKEVEN_DEFAULT_US;
|
|
gk20a->pmu.aelpg_param[4] = APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT;
|
|
|
|
if (platform->late_probe) {
|
|
err = platform->late_probe(&dev->dev);
|
|
if (err) {
|
|
dev_err(&dev->dev, "late probe failed");
|
|
return err;
|
|
}
|
|
}
|
|
|
|
gk20a_create_sysfs(&dev->dev);
|
|
|
|
gk20a->mm.bypass_smmu = platform->bypass_smmu;
|
|
gk20a->mm.disable_bigpage = platform->disable_bigpage;
|
|
gk20a->mm.has_physical_mode = true;
|
|
|
|
#ifdef CONFIG_DEBUG_FS
|
|
spin_lock_init(&gk20a->debugfs_lock);
|
|
gk20a->mm.ltc_enabled = true;
|
|
gk20a->mm.ltc_enabled_debug = true;
|
|
gk20a->debugfs_ltc_enabled =
|
|
debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
|
|
platform->debugfs,
|
|
&gk20a->mm.ltc_enabled_debug);
|
|
gk20a->mm.ltc_enabled_debug = true;
|
|
gk20a->debugfs_gr_idle_timeout_default =
|
|
debugfs_create_u32("gr_idle_timeout_default_us",
|
|
S_IRUGO|S_IWUSR, platform->debugfs,
|
|
&gk20a->gr_idle_timeout_default);
|
|
gk20a->debugfs_timeouts_enabled =
|
|
debugfs_create_bool("timeouts_enabled",
|
|
S_IRUGO|S_IWUSR,
|
|
platform->debugfs,
|
|
&gk20a->timeouts_enabled);
|
|
gk20a->debugfs_bypass_smmu =
|
|
debugfs_create_bool("bypass_smmu",
|
|
S_IRUGO|S_IWUSR,
|
|
platform->debugfs,
|
|
&gk20a->mm.bypass_smmu);
|
|
gk20a->debugfs_disable_bigpage =
|
|
debugfs_create_bool("disable_bigpage",
|
|
S_IRUGO|S_IWUSR,
|
|
platform->debugfs,
|
|
&gk20a->mm.disable_bigpage);
|
|
|
|
gk20a->debugfs_timeslice_low_priority_us =
|
|
debugfs_create_u32("timeslice_low_priority_us",
|
|
S_IRUGO|S_IWUSR,
|
|
platform->debugfs,
|
|
&gk20a->timeslice_low_priority_us);
|
|
|
|
gk20a->debugfs_timeslice_medium_priority_us =
|
|
debugfs_create_u32("timeslice_medium_priority_us",
|
|
S_IRUGO|S_IWUSR,
|
|
platform->debugfs,
|
|
&gk20a->timeslice_medium_priority_us);
|
|
|
|
gk20a->debugfs_timeslice_high_priority_us =
|
|
debugfs_create_u32("timeslice_high_priority_us",
|
|
S_IRUGO|S_IWUSR,
|
|
platform->debugfs,
|
|
&gk20a->timeslice_high_priority_us);
|
|
|
|
gk20a->debugfs_runlist_interleave =
|
|
debugfs_create_bool("runlist_interleave",
|
|
S_IRUGO|S_IWUSR,
|
|
platform->debugfs,
|
|
&gk20a->runlist_interleave);
|
|
|
|
gr_gk20a_debugfs_init(gk20a);
|
|
gk20a_pmu_debugfs_init(&dev->dev);
|
|
gk20a_cde_debugfs_init(&dev->dev);
|
|
gk20a_alloc_debugfs_init(dev);
|
|
gk20a_mm_debugfs_init(&dev->dev);
|
|
gk20a_fifo_debugfs_init(&dev->dev);
|
|
#endif
|
|
|
|
gk20a_init_gr(gk20a);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __exit gk20a_remove(struct platform_device *pdev)
|
|
{
|
|
struct device *dev = &pdev->dev;
|
|
struct gk20a *g = get_gk20a(dev);
|
|
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
|
struct gk20a_domain_data *gk20a_gpd;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (gk20a_gpu_is_virtual(dev))
|
|
return vgpu_remove(pdev);
|
|
|
|
if (platform->has_cde)
|
|
gk20a_cde_destroy(g);
|
|
|
|
gk20a_ctxsw_trace_cleanup(g);
|
|
|
|
if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
|
|
gk20a_scale_exit(dev);
|
|
|
|
if (g->remove_support)
|
|
g->remove_support(dev);
|
|
|
|
gk20a_user_deinit(dev, &nvgpu_class);
|
|
|
|
debugfs_remove_recursive(platform->debugfs);
|
|
debugfs_remove_recursive(platform->debugfs_alias);
|
|
|
|
gk20a_remove_sysfs(dev);
|
|
|
|
if (platform->secure_buffer.destroy)
|
|
platform->secure_buffer.destroy(dev,
|
|
&platform->secure_buffer);
|
|
|
|
gk20a_gpd = container_of(&g, struct gk20a_domain_data, gk20a);
|
|
gk20a_gpd->gk20a = NULL;
|
|
kfree(gk20a_gpd);
|
|
|
|
if (pm_runtime_enabled(dev))
|
|
pm_runtime_disable(dev);
|
|
else
|
|
gk20a_pm_disable_clk(dev);
|
|
|
|
if (platform->remove)
|
|
platform->remove(dev);
|
|
|
|
set_gk20a(pdev, NULL);
|
|
kfree(g);
|
|
|
|
gk20a_dbg_fn("removed");
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct platform_driver gk20a_driver = {
|
|
.probe = gk20a_probe,
|
|
.remove = __exit_p(gk20a_remove),
|
|
.shutdown = gk20a_pm_shutdown,
|
|
.driver = {
|
|
.owner = THIS_MODULE,
|
|
.name = "gk20a",
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0)
|
|
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
|
|
#endif
|
|
#ifdef CONFIG_OF
|
|
.of_match_table = tegra_gk20a_of_match,
|
|
#endif
|
|
#ifdef CONFIG_PM
|
|
.pm = &gk20a_pm_ops,
|
|
#endif
|
|
}
|
|
};
|
|
|
|
#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
|
|
|
|
|
|
static int _gk20a_init_domain(struct device_node *np,
|
|
struct generic_pm_domain *gpd)
|
|
{
|
|
bool is_off = false;
|
|
|
|
gpd->name = (char *)np->name;
|
|
|
|
if (of_property_read_bool(np, "is_off"))
|
|
is_off = true;
|
|
|
|
pm_genpd_init(gpd, NULL, is_off);
|
|
|
|
gpd->power_on = gk20a_pm_unrailgate;
|
|
gpd->power_off = gk20a_pm_railgate;
|
|
gpd->dev_ops.start = gk20a_pm_enable_clk;
|
|
gpd->dev_ops.stop = gk20a_pm_disable_clk;
|
|
gpd->dev_ops.save_state = gk20a_pm_prepare_poweroff;
|
|
gpd->dev_ops.restore_state = gk20a_pm_finalize_poweron;
|
|
gpd->dev_ops.suspend = gk20a_pm_suspend;
|
|
gpd->dev_ops.resume = gk20a_pm_resume;
|
|
|
|
of_genpd_add_provider_simple(np, gpd);
|
|
gpd->of_node = of_node_get(np);
|
|
|
|
genpd_pm_subdomain_attach(gpd);
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_domain_init(struct of_device_id *matches)
|
|
{
|
|
int ret = 0;
|
|
struct device_node *np;
|
|
struct gk20a_domain_data *gk20a_domain;
|
|
|
|
np = of_find_matching_node(NULL, matches);
|
|
if (!np)
|
|
return -ENOENT;
|
|
|
|
gk20a_domain = (struct gk20a_domain_data *)kzalloc
|
|
(sizeof(struct gk20a_domain_data), GFP_KERNEL);
|
|
if (!gk20a_domain)
|
|
return -ENOMEM;
|
|
|
|
ret = _gk20a_init_domain(np, &gk20a_domain->gpd);
|
|
|
|
return ret;
|
|
}
|
|
#else
|
|
static int gk20a_domain_init(struct of_device_id *matches)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
struct class nvgpu_class = {
|
|
.owner = THIS_MODULE,
|
|
.name = CLASS_NAME,
|
|
};
|
|
|
|
static int __init gk20a_init(void)
|
|
{
|
|
|
|
int ret;
|
|
|
|
ret = class_register(&nvgpu_class);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = gk20a_domain_init(tegra_gpu_domain_match);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = nvgpu_pci_init();
|
|
if (ret)
|
|
return ret;
|
|
|
|
return platform_driver_register(&gk20a_driver);
|
|
}
|
|
|
|
static void __exit gk20a_exit(void)
|
|
{
|
|
nvgpu_pci_exit();
|
|
platform_driver_unregister(&gk20a_driver);
|
|
class_unregister(&nvgpu_class);
|
|
}
|
|
|
|
void gk20a_busy_noresume(struct device *dev)
|
|
{
|
|
pm_runtime_get_noresume(dev);
|
|
}
|
|
|
|
int gk20a_busy(struct device *dev)
|
|
{
|
|
int ret = 0;
|
|
struct gk20a *g = get_gk20a(dev);
|
|
#ifdef CONFIG_PM
|
|
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
|
#endif
|
|
|
|
down_read(&g->busy_lock);
|
|
|
|
#ifdef CONFIG_PM
|
|
if (platform->busy) {
|
|
ret = platform->busy(dev);
|
|
if (ret < 0) {
|
|
dev_err(dev, "%s: failed to poweron platform dependency\n",
|
|
__func__);
|
|
goto fail;
|
|
}
|
|
}
|
|
|
|
ret = pm_runtime_get_sync(dev);
|
|
if (ret < 0) {
|
|
pm_runtime_put_noidle(dev);
|
|
if (platform->idle)
|
|
platform->idle(dev);
|
|
goto fail;
|
|
}
|
|
#else
|
|
if (!g->power_on) {
|
|
ret = gk20a_gpu_is_virtual(dev) ?
|
|
vgpu_pm_finalize_poweron(dev)
|
|
: gk20a_pm_finalize_poweron(dev);
|
|
if (ret)
|
|
goto fail;
|
|
}
|
|
#endif
|
|
gk20a_scale_notify_busy(dev);
|
|
|
|
fail:
|
|
up_read(&g->busy_lock);
|
|
|
|
return ret < 0 ? ret : 0;
|
|
}
|
|
|
|
void gk20a_idle(struct device *dev)
|
|
{
|
|
#ifdef CONFIG_PM
|
|
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
|
if (atomic_read(&dev->power.usage_count) == 1)
|
|
gk20a_scale_notify_idle(dev);
|
|
pm_runtime_mark_last_busy(dev);
|
|
pm_runtime_put_sync_autosuspend(dev);
|
|
|
|
if (platform->idle)
|
|
platform->idle(dev);
|
|
#else
|
|
gk20a_scale_notify_idle(dev);
|
|
#endif
|
|
}
|
|
|
|
void gk20a_disable(struct gk20a *g, u32 units)
|
|
{
|
|
u32 pmc;
|
|
|
|
gk20a_dbg(gpu_dbg_info, "pmc disable: %08x\n", units);
|
|
|
|
spin_lock(&g->mc_enable_lock);
|
|
pmc = gk20a_readl(g, mc_enable_r());
|
|
pmc &= ~units;
|
|
gk20a_writel(g, mc_enable_r(), pmc);
|
|
spin_unlock(&g->mc_enable_lock);
|
|
}
|
|
|
|
void gk20a_enable(struct gk20a *g, u32 units)
|
|
{
|
|
u32 pmc;
|
|
|
|
gk20a_dbg(gpu_dbg_info, "pmc enable: %08x\n", units);
|
|
|
|
spin_lock(&g->mc_enable_lock);
|
|
pmc = gk20a_readl(g, mc_enable_r());
|
|
pmc |= units;
|
|
gk20a_writel(g, mc_enable_r(), pmc);
|
|
gk20a_readl(g, mc_enable_r());
|
|
spin_unlock(&g->mc_enable_lock);
|
|
|
|
udelay(20);
|
|
}
|
|
|
|
void gk20a_reset(struct gk20a *g, u32 units)
|
|
{
|
|
gk20a_disable(g, units);
|
|
if (units & gk20a_fifo_get_all_ce_engine_reset_mask(g))
|
|
udelay(500);
|
|
else
|
|
udelay(20);
|
|
gk20a_enable(g, units);
|
|
}
|
|
|
|
#ifdef CONFIG_PM
|
|
/**
|
|
* __gk20a_do_idle() - force the GPU to idle and railgate
|
|
*
|
|
* In success, this call MUST be balanced by caller with __gk20a_do_unidle()
|
|
*
|
|
* Acquires two locks : &g->busy_lock and &platform->railgate_lock
|
|
* In success, we hold these locks and return
|
|
* In failure, we release these locks and return
|
|
*/
|
|
int __gk20a_do_idle(struct device *dev, bool force_reset)
|
|
{
|
|
struct gk20a *g = get_gk20a(dev);
|
|
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
|
unsigned long timeout = jiffies +
|
|
msecs_to_jiffies(GK20A_WAIT_FOR_IDLE_MS);
|
|
int ref_cnt;
|
|
bool is_railgated;
|
|
|
|
/* acquire busy lock to block other busy() calls */
|
|
down_write(&g->busy_lock);
|
|
|
|
/* acquire railgate lock to prevent unrailgate in midst of do_idle() */
|
|
mutex_lock(&platform->railgate_lock);
|
|
|
|
/* check if it is already railgated ? */
|
|
if (platform->is_railgated(dev))
|
|
return 0;
|
|
|
|
/*
|
|
* release railgate_lock, prevent suspend by incrementing usage counter,
|
|
* re-acquire railgate_lock
|
|
*/
|
|
mutex_unlock(&platform->railgate_lock);
|
|
pm_runtime_get_sync(dev);
|
|
mutex_lock(&platform->railgate_lock);
|
|
|
|
/* check and wait until GPU is idle (with a timeout) */
|
|
do {
|
|
msleep(1);
|
|
ref_cnt = atomic_read(&dev->power.usage_count);
|
|
} while (ref_cnt != 1 && time_before(jiffies, timeout));
|
|
|
|
if (ref_cnt != 1) {
|
|
gk20a_err(dev, "failed to idle - refcount %d != 1\n",
|
|
ref_cnt);
|
|
goto fail_drop_usage_count;
|
|
}
|
|
|
|
/* check if global force_reset flag is set */
|
|
force_reset |= platform->force_reset_in_do_idle;
|
|
|
|
if (platform->can_railgate && !force_reset) {
|
|
/*
|
|
* Case 1 : GPU railgate is supported
|
|
*
|
|
* if GPU is now idle, we will have only one ref count,
|
|
* drop this ref which will rail gate the GPU
|
|
*/
|
|
pm_runtime_put_sync(dev);
|
|
|
|
/* add sufficient delay to allow GPU to rail gate */
|
|
msleep(platform->railgate_delay);
|
|
|
|
timeout = jiffies + msecs_to_jiffies(GK20A_WAIT_FOR_IDLE_MS);
|
|
|
|
/* check in loop if GPU is railgated or not */
|
|
do {
|
|
msleep(1);
|
|
is_railgated = platform->is_railgated(dev);
|
|
} while (!is_railgated && time_before(jiffies, timeout));
|
|
|
|
if (is_railgated) {
|
|
return 0;
|
|
} else {
|
|
gk20a_err(dev, "failed to idle in timeout\n");
|
|
goto fail_timeout;
|
|
}
|
|
} else {
|
|
/*
|
|
* Case 2 : GPU railgate is not supported or we explicitly
|
|
* do not want to depend on runtime PM
|
|
*
|
|
* if GPU is now idle, call prepare_poweroff() to save the
|
|
* state and then do explicit railgate
|
|
*
|
|
* __gk20a_do_unidle() needs to unrailgate, call
|
|
* finalize_poweron(), and then call pm_runtime_put_sync()
|
|
* to balance the GPU usage counter
|
|
*/
|
|
|
|
/* Save the GPU state */
|
|
gk20a_pm_prepare_poweroff(dev);
|
|
|
|
gk20a_pm_disable_clk(dev);
|
|
|
|
/* railgate GPU */
|
|
platform->railgate(dev);
|
|
|
|
udelay(10);
|
|
|
|
g->forced_reset = true;
|
|
return 0;
|
|
}
|
|
|
|
fail_drop_usage_count:
|
|
pm_runtime_put_noidle(dev);
|
|
fail_timeout:
|
|
mutex_unlock(&platform->railgate_lock);
|
|
up_write(&g->busy_lock);
|
|
return -EBUSY;
|
|
}
|
|
|
|
/**
|
|
* gk20a_do_idle() - wrap up for __gk20a_do_idle() to be called
|
|
* from outside of GPU driver
|
|
*
|
|
* In success, this call MUST be balanced by caller with gk20a_do_unidle()
|
|
*/
|
|
int gk20a_do_idle(void)
|
|
{
|
|
struct device_node *node =
|
|
of_find_matching_node(NULL, tegra_gk20a_of_match);
|
|
struct platform_device *pdev = of_find_device_by_node(node);
|
|
|
|
int ret = __gk20a_do_idle(&pdev->dev, true);
|
|
|
|
of_node_put(node);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* __gk20a_do_unidle() - unblock all the tasks blocked by __gk20a_do_idle()
|
|
*/
|
|
int __gk20a_do_unidle(struct device *dev)
|
|
{
|
|
struct gk20a *g = get_gk20a(dev);
|
|
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
|
|
|
if (g->forced_reset) {
|
|
/*
|
|
* If we did a forced-reset/railgate
|
|
* then unrailgate the GPU here first
|
|
*/
|
|
platform->unrailgate(dev);
|
|
|
|
gk20a_pm_enable_clk(dev);
|
|
|
|
/* restore the GPU state */
|
|
gk20a_pm_finalize_poweron(dev);
|
|
|
|
/* balance GPU usage counter */
|
|
pm_runtime_put_sync(dev);
|
|
|
|
g->forced_reset = false;
|
|
}
|
|
|
|
/* release the lock and open up all other busy() calls */
|
|
mutex_unlock(&platform->railgate_lock);
|
|
up_write(&g->busy_lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* gk20a_do_unidle() - wrap up for __gk20a_do_unidle()
|
|
*/
|
|
int gk20a_do_unidle(void)
|
|
{
|
|
struct device_node *node =
|
|
of_find_matching_node(NULL, tegra_gk20a_of_match);
|
|
struct platform_device *pdev = of_find_device_by_node(node);
|
|
|
|
int ret = __gk20a_do_unidle(&pdev->dev);
|
|
|
|
of_node_put(node);
|
|
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
int gk20a_init_gpu_characteristics(struct gk20a *g)
|
|
{
|
|
struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics;
|
|
struct gk20a_platform *platform = dev_get_drvdata(g->dev);
|
|
|
|
gpu->L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g);
|
|
gpu->on_board_video_memory_size = 0; /* integrated GPU */
|
|
|
|
gpu->num_gpc = g->gr.gpc_count;
|
|
gpu->max_gpc_count = g->gr.max_gpc_count;
|
|
|
|
gpu->num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count;
|
|
|
|
gpu->bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */
|
|
|
|
gpu->big_page_size = g->mm.pmu.vm.big_page_size;
|
|
gpu->compression_page_size = g->ops.fb.compression_page_size(g);
|
|
gpu->pde_coverage_bit_count =
|
|
gk20a_mm_pde_coverage_bit_count(&g->mm.pmu.vm);
|
|
|
|
if (g->mm.disable_bigpage) {
|
|
gpu->big_page_size = 0;
|
|
gpu->available_big_page_sizes = 0;
|
|
} else {
|
|
gpu->available_big_page_sizes = gpu->big_page_size;
|
|
if (g->ops.mm.get_big_page_sizes)
|
|
gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes();
|
|
}
|
|
|
|
gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS
|
|
| NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS;
|
|
|
|
if (g->ops.mm.support_sparse && g->ops.mm.support_sparse(g))
|
|
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS;
|
|
|
|
if (IS_ENABLED(CONFIG_TEGRA_GK20A) &&
|
|
gk20a_platform_has_syncpoints(g->dev))
|
|
gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
|
|
|
|
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS;
|
|
gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_TSG;
|
|
|
|
gpu->gpc_mask = (1 << g->gr.gpc_count)-1;
|
|
|
|
g->ops.gr.detect_sm_arch(g);
|
|
|
|
if (g->ops.gr.init_cyclestats)
|
|
g->ops.gr.init_cyclestats(g);
|
|
|
|
gpu->gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST;
|
|
gpu->tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST;
|
|
gpu->dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST;
|
|
gpu->ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST;
|
|
gpu->as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST;
|
|
gpu->gpu_va_bit_count = 40;
|
|
|
|
memcpy(gpu->chipname, g->ops.name, strlen(g->ops.name));
|
|
gpu->max_fbps_count = g->ops.gr.get_max_fbps_count(g);
|
|
gpu->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
|
|
gpu->max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g);
|
|
gpu->max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g);
|
|
g->ops.gr.get_rop_l2_en_mask(g);
|
|
gpu->gr_compbit_store_base_hw = g->gr.compbit_store.base_hw;
|
|
gpu->gr_gobs_per_comptagline_per_slice =
|
|
g->gr.gobs_per_comptagline_per_slice;
|
|
gpu->num_ltc = g->ltc_count;
|
|
gpu->lts_per_ltc = g->gr.slices_per_ltc;
|
|
gpu->cbc_cache_line_size = g->gr.cacheline_size;
|
|
gpu->cbc_comptags_per_line = g->gr.comptags_per_cacheline;
|
|
|
|
gpu->map_buffer_batch_limit = 256;
|
|
|
|
if (platform->clk_round_rate)
|
|
gpu->max_freq = platform->clk_round_rate(g->dev, UINT_MAX);
|
|
|
|
g->ops.gr.get_preemption_mode_flags(g, &g->gr.preemption_mode_rec);
|
|
gpu->graphics_preemption_mode_flags =
|
|
g->gr.preemption_mode_rec.graphics_preemption_mode_flags;
|
|
gpu->compute_preemption_mode_flags =
|
|
g->gr.preemption_mode_rec.compute_preemption_mode_flags;
|
|
gpu->default_graphics_preempt_mode =
|
|
g->gr.preemption_mode_rec.default_graphics_preempt_mode;
|
|
gpu->default_compute_preempt_mode =
|
|
g->gr.preemption_mode_rec.default_compute_preempt_mode;
|
|
|
|
|
|
return 0;
|
|
}
|
|
|
|
static const struct firmware *
|
|
do_request_firmware(struct device *dev, const char *prefix, const char *fw_name)
|
|
{
|
|
const struct firmware *fw;
|
|
char *fw_path = NULL;
|
|
int path_len, err;
|
|
|
|
if (prefix) {
|
|
path_len = strlen(prefix) + strlen(fw_name);
|
|
path_len += 2; /* for the path separator and zero terminator*/
|
|
|
|
fw_path = kzalloc(sizeof(*fw_path) * path_len, GFP_KERNEL);
|
|
if (!fw_path)
|
|
return NULL;
|
|
|
|
sprintf(fw_path, "%s/%s", prefix, fw_name);
|
|
fw_name = fw_path;
|
|
}
|
|
|
|
err = request_firmware(&fw, fw_name, dev);
|
|
kfree(fw_path);
|
|
if (err)
|
|
return NULL;
|
|
return fw;
|
|
}
|
|
|
|
/* This is a simple wrapper around request_firmware that takes 'fw_name' and
|
|
* applies an IP specific relative path prefix to it. The caller is
|
|
* responsible for calling release_firmware later. */
|
|
const struct firmware *
|
|
gk20a_request_firmware(struct gk20a *g, const char *fw_name)
|
|
{
|
|
struct device *dev = g->dev;
|
|
const struct firmware *fw;
|
|
|
|
/* current->fs is NULL when calling from SYS_EXIT.
|
|
Add a check here to prevent crash in request_firmware */
|
|
if (!current->fs || !fw_name)
|
|
return NULL;
|
|
|
|
BUG_ON(!g->ops.name);
|
|
fw = do_request_firmware(dev, g->ops.name, fw_name);
|
|
|
|
#ifdef CONFIG_TEGRA_GK20A
|
|
/* TO BE REMOVED - Support loading from legacy SOC specific path. */
|
|
if (!fw) {
|
|
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
|
fw = do_request_firmware(dev, platform->soc_name, fw_name);
|
|
}
|
|
#endif
|
|
|
|
if (!fw) {
|
|
dev_err(dev, "failed to get firmware\n");
|
|
return NULL;
|
|
}
|
|
|
|
return fw;
|
|
}
|
|
|
|
int gk20a_read_ptimer(struct gk20a *g, u64 *value)
|
|
{
|
|
const unsigned int max_iterations = 3;
|
|
unsigned int i = 0;
|
|
u32 gpu_timestamp_hi_prev = 0;
|
|
|
|
if (!value)
|
|
return -EINVAL;
|
|
|
|
/* Note. The GPU nanosecond timer consists of two 32-bit
|
|
* registers (high & low). To detect a possible low register
|
|
* wrap-around between the reads, we need to read the high
|
|
* register before and after low. The wraparound happens
|
|
* approximately once per 4 secs. */
|
|
|
|
/* get initial gpu_timestamp_hi value */
|
|
gpu_timestamp_hi_prev = gk20a_readl(g, timer_time_1_r());
|
|
|
|
for (i = 0; i < max_iterations; ++i) {
|
|
u32 gpu_timestamp_hi = 0;
|
|
u32 gpu_timestamp_lo = 0;
|
|
|
|
gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
|
|
gpu_timestamp_hi = gk20a_readl(g, timer_time_1_r());
|
|
|
|
if (gpu_timestamp_hi == gpu_timestamp_hi_prev) {
|
|
*value = (((u64)gpu_timestamp_hi) << 32) |
|
|
gpu_timestamp_lo;
|
|
return 0;
|
|
}
|
|
|
|
/* wrap-around detected, retry */
|
|
gpu_timestamp_hi_prev = gpu_timestamp_hi;
|
|
}
|
|
|
|
/* too many iterations, bail out */
|
|
gk20a_err(dev_from_gk20a(g), "failed to read ptimer");
|
|
return -EBUSY;
|
|
}
|
|
|
|
MODULE_LICENSE("GPL v2");
|
|
module_init(gk20a_init);
|
|
module_exit(gk20a_exit);
|