mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 09:57:08 +03:00
Use busy looping on L2 and TLB maintenance operations. This speeds them up by an order of magnitude. Add also trace points to measure performance for memory ops and interrupt processing. Change-Id: Ic4a8525d3d946b2b8f57b4b8ddcfc61605619399 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/681640
179 lines
4.5 KiB
C
179 lines
4.5 KiB
C
/*
|
|
* GK20A memory interface
|
|
*
|
|
* Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*/
|
|
|
|
#include <linux/types.h>
|
|
#include <trace/events/gk20a.h>
|
|
|
|
#include "gk20a.h"
|
|
#include "mc_gk20a.h"
|
|
#include "hw_mc_gk20a.h"
|
|
|
|
irqreturn_t mc_gk20a_isr_stall(struct gk20a *g)
|
|
{
|
|
u32 mc_intr_0;
|
|
|
|
trace_mc_gk20a_intr_stall(g->dev->name);
|
|
|
|
if (!g->power_on)
|
|
return IRQ_NONE;
|
|
|
|
/* not from gpu when sharing irq with others */
|
|
mc_intr_0 = gk20a_readl(g, mc_intr_0_r());
|
|
if (unlikely(!mc_intr_0))
|
|
return IRQ_NONE;
|
|
|
|
gk20a_writel(g, mc_intr_en_0_r(),
|
|
mc_intr_en_0_inta_disabled_f());
|
|
|
|
/* flush previous write */
|
|
gk20a_readl(g, mc_intr_en_0_r());
|
|
|
|
trace_mc_gk20a_intr_stall_done(g->dev->name);
|
|
|
|
return IRQ_WAKE_THREAD;
|
|
}
|
|
|
|
irqreturn_t mc_gk20a_isr_nonstall(struct gk20a *g)
|
|
{
|
|
u32 mc_intr_1;
|
|
|
|
if (!g->power_on)
|
|
return IRQ_NONE;
|
|
|
|
/* not from gpu when sharing irq with others */
|
|
mc_intr_1 = gk20a_readl(g, mc_intr_1_r());
|
|
if (unlikely(!mc_intr_1))
|
|
return IRQ_NONE;
|
|
|
|
gk20a_writel(g, mc_intr_en_1_r(),
|
|
mc_intr_en_1_inta_disabled_f());
|
|
|
|
/* flush previous write */
|
|
gk20a_readl(g, mc_intr_en_1_r());
|
|
|
|
return IRQ_WAKE_THREAD;
|
|
}
|
|
|
|
irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g)
|
|
{
|
|
u32 mc_intr_0;
|
|
|
|
gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
|
|
|
|
trace_mc_gk20a_intr_thread_stall(g->dev->name);
|
|
|
|
mc_intr_0 = gk20a_readl(g, mc_intr_0_r());
|
|
|
|
gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0);
|
|
|
|
if (mc_intr_0 & BIT(g->fifo.engine_info[ENGINE_GR_GK20A].intr_id))
|
|
gr_gk20a_elpg_protected_call(g, gk20a_gr_isr(g));
|
|
if (mc_intr_0 & BIT(g->fifo.engine_info[ENGINE_CE2_GK20A].intr_id))
|
|
gk20a_ce2_isr(g);
|
|
if (mc_intr_0 & mc_intr_0_pfifo_pending_f())
|
|
gk20a_fifo_isr(g);
|
|
if (mc_intr_0 & mc_intr_0_pmu_pending_f())
|
|
gk20a_pmu_isr(g);
|
|
if (mc_intr_0 & mc_intr_0_priv_ring_pending_f())
|
|
gk20a_priv_ring_isr(g);
|
|
if (mc_intr_0 & mc_intr_0_ltc_pending_f())
|
|
g->ops.ltc.isr(g);
|
|
if (mc_intr_0 & mc_intr_0_pbus_pending_f())
|
|
gk20a_pbus_isr(g);
|
|
|
|
gk20a_writel(g, mc_intr_en_0_r(),
|
|
mc_intr_en_0_inta_hardware_f());
|
|
|
|
/* flush previous write */
|
|
gk20a_readl(g, mc_intr_en_0_r());
|
|
|
|
trace_mc_gk20a_intr_thread_stall_done(g->dev->name);
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g)
|
|
{
|
|
u32 mc_intr_1;
|
|
|
|
gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
|
|
|
|
mc_intr_1 = gk20a_readl(g, mc_intr_1_r());
|
|
|
|
gk20a_dbg(gpu_dbg_intr, "non-stall intr %08x\n", mc_intr_1);
|
|
|
|
if (mc_intr_1 & mc_intr_0_pfifo_pending_f())
|
|
gk20a_fifo_nonstall_isr(g);
|
|
if (mc_intr_1 & BIT(g->fifo.engine_info[ENGINE_GR_GK20A].intr_id))
|
|
gk20a_gr_nonstall_isr(g);
|
|
if (mc_intr_1 & BIT(g->fifo.engine_info[ENGINE_CE2_GK20A].intr_id))
|
|
gk20a_ce2_nonstall_isr(g);
|
|
|
|
gk20a_writel(g, mc_intr_en_1_r(),
|
|
mc_intr_en_1_inta_hardware_f());
|
|
|
|
/* flush previous write */
|
|
gk20a_readl(g, mc_intr_en_1_r());
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
void mc_gk20a_intr_enable(struct gk20a *g)
|
|
{
|
|
u32 eng_intr_mask = gk20a_fifo_engine_interrupt_mask(g);
|
|
|
|
gk20a_writel(g, mc_intr_mask_1_r(),
|
|
mc_intr_0_pfifo_pending_f()
|
|
| eng_intr_mask);
|
|
gk20a_writel(g, mc_intr_en_1_r(),
|
|
mc_intr_en_1_inta_hardware_f());
|
|
|
|
gk20a_writel(g, mc_intr_mask_0_r(),
|
|
mc_intr_0_pfifo_pending_f()
|
|
| mc_intr_0_priv_ring_pending_f()
|
|
| mc_intr_0_ltc_pending_f()
|
|
| mc_intr_0_pbus_pending_f()
|
|
| eng_intr_mask);
|
|
gk20a_writel(g, mc_intr_en_0_r(),
|
|
mc_intr_en_0_inta_hardware_f());
|
|
}
|
|
|
|
void mc_gk20a_intr_unit_config(struct gk20a *g, bool enable,
|
|
bool is_stalling, u32 mask)
|
|
{
|
|
u32 mask_reg = (is_stalling ? mc_intr_mask_0_r() :
|
|
mc_intr_mask_1_r());
|
|
|
|
if (enable) {
|
|
gk20a_writel(g, mask_reg,
|
|
gk20a_readl(g, mask_reg) |
|
|
mask);
|
|
} else {
|
|
gk20a_writel(g, mask_reg,
|
|
gk20a_readl(g, mask_reg) &
|
|
~mask);
|
|
}
|
|
}
|
|
|
|
void gk20a_init_mc(struct gpu_ops *gops)
|
|
{
|
|
gops->mc.intr_enable = mc_gk20a_intr_enable;
|
|
gops->mc.intr_unit_config = mc_gk20a_intr_unit_config;
|
|
gops->mc.isr_stall = mc_gk20a_isr_stall;
|
|
gops->mc.isr_nonstall = mc_gk20a_isr_nonstall;
|
|
gops->mc.isr_thread_stall = mc_gk20a_intr_thread_stall;
|
|
gops->mc.isr_thread_nonstall = mc_gk20a_intr_thread_nonstall;
|
|
}
|