mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
Rewrite PMU boot sequence as a state machine. At PMU power-up send initial messages, and reset state machine. At each reply from PMU, do the next stage of PMU boot and set state. As now PMU and FECS boot are independent, we need to ensure engine idle before saving ZBC. Change-Id: I1ea747ab794ef08f1784eeabfdae7655d585ff21 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/410205
6861 lines
198 KiB
C
6861 lines
198 KiB
C
/*
|
|
* GK20A Graphics
|
|
*
|
|
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along with
|
|
* this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*/
|
|
|
|
#include <linux/delay.h> /* for udelay */
|
|
#include <linux/mm.h> /* for totalram_pages */
|
|
#include <linux/scatterlist.h>
|
|
#include <linux/tegra-soc.h>
|
|
#include <linux/nvhost_dbg_gpu_ioctl.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/dma-mapping.h>
|
|
#include <linux/firmware.h>
|
|
#include <linux/nvhost.h>
|
|
|
|
#include "gk20a.h"
|
|
#include "kind_gk20a.h"
|
|
#include "gr_ctx_gk20a.h"
|
|
|
|
#include "hw_ccsr_gk20a.h"
|
|
#include "hw_ctxsw_prog_gk20a.h"
|
|
#include "hw_fifo_gk20a.h"
|
|
#include "hw_gr_gk20a.h"
|
|
#include "hw_gmmu_gk20a.h"
|
|
#include "hw_mc_gk20a.h"
|
|
#include "hw_ram_gk20a.h"
|
|
#include "hw_pri_ringmaster_gk20a.h"
|
|
#include "hw_pri_ringstation_sys_gk20a.h"
|
|
#include "hw_pri_ringstation_gpc_gk20a.h"
|
|
#include "hw_pri_ringstation_fbp_gk20a.h"
|
|
#include "hw_proj_gk20a.h"
|
|
#include "hw_top_gk20a.h"
|
|
#include "hw_ltc_gk20a.h"
|
|
#include "hw_fb_gk20a.h"
|
|
#include "hw_therm_gk20a.h"
|
|
#include "hw_pbdma_gk20a.h"
|
|
#include "gr_pri_gk20a.h"
|
|
#include "regops_gk20a.h"
|
|
#include "dbg_gpu_gk20a.h"
|
|
|
|
#define BLK_SIZE (256)
|
|
|
|
static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
|
|
|
|
/* global ctx buffer */
|
|
static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g);
|
|
static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g);
|
|
static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
|
|
struct channel_gk20a *c);
|
|
static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c);
|
|
|
|
/* channel gr ctx buffer */
|
|
static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
|
|
struct channel_gk20a *c);
|
|
static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c);
|
|
|
|
/* channel patch ctx buffer */
|
|
static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
|
|
struct channel_gk20a *c);
|
|
static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c);
|
|
|
|
/* golden ctx image */
|
|
static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
|
|
struct channel_gk20a *c);
|
|
static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
|
|
struct channel_gk20a *c);
|
|
|
|
void gk20a_fecs_dump_falcon_stats(struct gk20a *g)
|
|
{
|
|
int i;
|
|
|
|
gk20a_err(dev_from_gk20a(g), "gr_fecs_os_r : %d",
|
|
gk20a_readl(g, gr_fecs_os_r()));
|
|
gk20a_err(dev_from_gk20a(g), "gr_fecs_cpuctl_r : 0x%x",
|
|
gk20a_readl(g, gr_fecs_cpuctl_r()));
|
|
gk20a_err(dev_from_gk20a(g), "gr_fecs_idlestate_r : 0x%x",
|
|
gk20a_readl(g, gr_fecs_idlestate_r()));
|
|
gk20a_err(dev_from_gk20a(g), "gr_fecs_mailbox0_r : 0x%x",
|
|
gk20a_readl(g, gr_fecs_mailbox0_r()));
|
|
gk20a_err(dev_from_gk20a(g), "gr_fecs_mailbox1_r : 0x%x",
|
|
gk20a_readl(g, gr_fecs_mailbox1_r()));
|
|
gk20a_err(dev_from_gk20a(g), "gr_fecs_irqstat_r : 0x%x",
|
|
gk20a_readl(g, gr_fecs_irqstat_r()));
|
|
gk20a_err(dev_from_gk20a(g), "gr_fecs_irqmode_r : 0x%x",
|
|
gk20a_readl(g, gr_fecs_irqmode_r()));
|
|
gk20a_err(dev_from_gk20a(g), "gr_fecs_irqmask_r : 0x%x",
|
|
gk20a_readl(g, gr_fecs_irqmask_r()));
|
|
gk20a_err(dev_from_gk20a(g), "gr_fecs_irqdest_r : 0x%x",
|
|
gk20a_readl(g, gr_fecs_irqdest_r()));
|
|
gk20a_err(dev_from_gk20a(g), "gr_fecs_debug1_r : 0x%x",
|
|
gk20a_readl(g, gr_fecs_debug1_r()));
|
|
gk20a_err(dev_from_gk20a(g), "gr_fecs_debuginfo_r : 0x%x",
|
|
gk20a_readl(g, gr_fecs_debuginfo_r()));
|
|
|
|
for (i = 0; i < gr_fecs_ctxsw_mailbox__size_1_v(); i++)
|
|
gk20a_err(dev_from_gk20a(g), "gr_fecs_ctxsw_mailbox_r(%d) : 0x%x",
|
|
i, gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(i)));
|
|
|
|
gk20a_err(dev_from_gk20a(g), "gr_fecs_engctl_r : 0x%x",
|
|
gk20a_readl(g, gr_fecs_engctl_r()));
|
|
gk20a_err(dev_from_gk20a(g), "gr_fecs_curctx_r : 0x%x",
|
|
gk20a_readl(g, gr_fecs_curctx_r()));
|
|
gk20a_err(dev_from_gk20a(g), "gr_fecs_nxtctx_r : 0x%x",
|
|
gk20a_readl(g, gr_fecs_nxtctx_r()));
|
|
|
|
gk20a_writel(g, gr_fecs_icd_cmd_r(),
|
|
gr_fecs_icd_cmd_opc_rreg_f() |
|
|
gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_IMB));
|
|
gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_IMB : 0x%x",
|
|
gk20a_readl(g, gr_fecs_icd_rdata_r()));
|
|
|
|
gk20a_writel(g, gr_fecs_icd_cmd_r(),
|
|
gr_fecs_icd_cmd_opc_rreg_f() |
|
|
gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_DMB));
|
|
gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_DMB : 0x%x",
|
|
gk20a_readl(g, gr_fecs_icd_rdata_r()));
|
|
|
|
gk20a_writel(g, gr_fecs_icd_cmd_r(),
|
|
gr_fecs_icd_cmd_opc_rreg_f() |
|
|
gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_CSW));
|
|
gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_CSW : 0x%x",
|
|
gk20a_readl(g, gr_fecs_icd_rdata_r()));
|
|
|
|
gk20a_writel(g, gr_fecs_icd_cmd_r(),
|
|
gr_fecs_icd_cmd_opc_rreg_f() |
|
|
gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_CTX));
|
|
gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_CTX : 0x%x",
|
|
gk20a_readl(g, gr_fecs_icd_rdata_r()));
|
|
|
|
gk20a_writel(g, gr_fecs_icd_cmd_r(),
|
|
gr_fecs_icd_cmd_opc_rreg_f() |
|
|
gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_EXCI));
|
|
gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_EXCI : 0x%x",
|
|
gk20a_readl(g, gr_fecs_icd_rdata_r()));
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
gk20a_writel(g, gr_fecs_icd_cmd_r(),
|
|
gr_fecs_icd_cmd_opc_rreg_f() |
|
|
gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_PC));
|
|
gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_PC : 0x%x",
|
|
gk20a_readl(g, gr_fecs_icd_rdata_r()));
|
|
|
|
gk20a_writel(g, gr_fecs_icd_cmd_r(),
|
|
gr_fecs_icd_cmd_opc_rreg_f() |
|
|
gr_fecs_icd_cmd_idx_f(PMU_FALCON_REG_SP));
|
|
gk20a_err(dev_from_gk20a(g), "FECS_FALCON_REG_SP : 0x%x",
|
|
gk20a_readl(g, gr_fecs_icd_rdata_r()));
|
|
}
|
|
}
|
|
|
|
static void gr_gk20a_load_falcon_dmem(struct gk20a *g)
|
|
{
|
|
u32 i, ucode_u32_size;
|
|
const u32 *ucode_u32_data;
|
|
u32 checksum;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) |
|
|
gr_gpccs_dmemc_blk_f(0) |
|
|
gr_gpccs_dmemc_aincw_f(1)));
|
|
|
|
ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.data.count;
|
|
ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.data.l;
|
|
|
|
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
|
|
gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]);
|
|
checksum += ucode_u32_data[i];
|
|
}
|
|
|
|
gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) |
|
|
gr_fecs_dmemc_blk_f(0) |
|
|
gr_fecs_dmemc_aincw_f(1)));
|
|
|
|
ucode_u32_size = g->gr.ctx_vars.ucode.fecs.data.count;
|
|
ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.data.l;
|
|
|
|
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
|
|
gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]);
|
|
checksum += ucode_u32_data[i];
|
|
}
|
|
gk20a_dbg_fn("done");
|
|
}
|
|
|
|
static void gr_gk20a_load_falcon_imem(struct gk20a *g)
|
|
{
|
|
u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size;
|
|
const u32 *ucode_u32_data;
|
|
u32 tag, i, pad_start, pad_end;
|
|
u32 checksum;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
cfg = gk20a_readl(g, gr_fecs_cfg_r());
|
|
fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg);
|
|
|
|
cfg = gk20a_readl(g, gr_gpc0_cfg_r());
|
|
gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg);
|
|
|
|
/* Use the broadcast address to access all of the GPCCS units. */
|
|
gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) |
|
|
gr_gpccs_imemc_blk_f(0) |
|
|
gr_gpccs_imemc_aincw_f(1)));
|
|
|
|
/* Setup the tags for the instruction memory. */
|
|
tag = 0;
|
|
gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag));
|
|
|
|
ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.inst.count;
|
|
ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.inst.l;
|
|
|
|
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
|
|
if (i && ((i % (256/sizeof(u32))) == 0)) {
|
|
tag++;
|
|
gk20a_writel(g, gr_gpccs_imemt_r(0),
|
|
gr_gpccs_imemt_tag_f(tag));
|
|
}
|
|
gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]);
|
|
checksum += ucode_u32_data[i];
|
|
}
|
|
|
|
pad_start = i*4;
|
|
pad_end = pad_start+(256-pad_start%256)+256;
|
|
for (i = pad_start;
|
|
(i < gpccs_imem_size * 256) && (i < pad_end);
|
|
i += 4) {
|
|
if (i && ((i % 256) == 0)) {
|
|
tag++;
|
|
gk20a_writel(g, gr_gpccs_imemt_r(0),
|
|
gr_gpccs_imemt_tag_f(tag));
|
|
}
|
|
gk20a_writel(g, gr_gpccs_imemd_r(0), 0);
|
|
}
|
|
|
|
gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) |
|
|
gr_fecs_imemc_blk_f(0) |
|
|
gr_fecs_imemc_aincw_f(1)));
|
|
|
|
/* Setup the tags for the instruction memory. */
|
|
tag = 0;
|
|
gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag));
|
|
|
|
ucode_u32_size = g->gr.ctx_vars.ucode.fecs.inst.count;
|
|
ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.inst.l;
|
|
|
|
for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
|
|
if (i && ((i % (256/sizeof(u32))) == 0)) {
|
|
tag++;
|
|
gk20a_writel(g, gr_fecs_imemt_r(0),
|
|
gr_fecs_imemt_tag_f(tag));
|
|
}
|
|
gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]);
|
|
checksum += ucode_u32_data[i];
|
|
}
|
|
|
|
pad_start = i*4;
|
|
pad_end = pad_start+(256-pad_start%256)+256;
|
|
for (i = pad_start; (i < fecs_imem_size * 256) && i < pad_end; i += 4) {
|
|
if (i && ((i % 256) == 0)) {
|
|
tag++;
|
|
gk20a_writel(g, gr_fecs_imemt_r(0),
|
|
gr_fecs_imemt_tag_f(tag));
|
|
}
|
|
gk20a_writel(g, gr_fecs_imemd_r(0), 0);
|
|
}
|
|
}
|
|
|
|
static int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies,
|
|
u32 expect_delay)
|
|
{
|
|
u32 delay = expect_delay;
|
|
bool gr_enabled;
|
|
bool ctxsw_active;
|
|
bool gr_busy;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
do {
|
|
/* fmodel: host gets fifo_engine_status(gr) from gr
|
|
only when gr_status is read */
|
|
gk20a_readl(g, gr_status_r());
|
|
|
|
gr_enabled = gk20a_readl(g, mc_enable_r()) &
|
|
mc_enable_pgraph_enabled_f();
|
|
|
|
ctxsw_active = gk20a_readl(g,
|
|
fifo_engine_status_r(ENGINE_GR_GK20A)) &
|
|
fifo_engine_status_ctxsw_in_progress_f();
|
|
|
|
gr_busy = gk20a_readl(g, gr_engine_status_r()) &
|
|
gr_engine_status_value_busy_f();
|
|
|
|
if (!gr_enabled || (!gr_busy && !ctxsw_active)) {
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
}
|
|
|
|
usleep_range(delay, delay * 2);
|
|
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
|
|
|
|
} while (time_before(jiffies, end_jiffies)
|
|
|| !tegra_platform_is_silicon());
|
|
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"timeout, ctxsw busy : %d, gr busy : %d",
|
|
ctxsw_active, gr_busy);
|
|
|
|
return -EAGAIN;
|
|
}
|
|
|
|
static int gr_gk20a_wait_fe_idle(struct gk20a *g, unsigned long end_jiffies,
|
|
u32 expect_delay)
|
|
{
|
|
u32 val;
|
|
u32 delay = expect_delay;
|
|
|
|
if (tegra_platform_is_linsim())
|
|
return 0;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
do {
|
|
val = gk20a_readl(g, gr_status_r());
|
|
|
|
if (!gr_status_fe_method_upper_v(val) &&
|
|
!gr_status_fe_method_lower_v(val) &&
|
|
!gr_status_fe_gi_v(val)) {
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
}
|
|
|
|
usleep_range(delay, delay * 2);
|
|
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
|
|
} while (time_before(jiffies, end_jiffies)
|
|
|| !tegra_platform_is_silicon());
|
|
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"timeout, fe busy : %x", val);
|
|
|
|
return -EAGAIN;
|
|
}
|
|
static int gr_gk20a_ctx_reset(struct gk20a *g, u32 rst_mask)
|
|
{
|
|
u32 delay = GR_IDLE_CHECK_DEFAULT;
|
|
unsigned long end_jiffies = jiffies +
|
|
msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
|
|
u32 reg;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (!tegra_platform_is_linsim()) {
|
|
/* Force clocks on */
|
|
gk20a_writel(g, gr_fe_pwr_mode_r(),
|
|
gr_fe_pwr_mode_req_send_f() |
|
|
gr_fe_pwr_mode_mode_force_on_f());
|
|
|
|
/* Wait for the clocks to indicate that they are on */
|
|
do {
|
|
reg = gk20a_readl(g, gr_fe_pwr_mode_r());
|
|
|
|
if (gr_fe_pwr_mode_req_v(reg) ==
|
|
gr_fe_pwr_mode_req_done_v())
|
|
break;
|
|
|
|
usleep_range(delay, delay * 2);
|
|
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
|
|
|
|
} while (time_before(jiffies, end_jiffies));
|
|
|
|
if (!time_before(jiffies, end_jiffies)) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to force the clocks on\n");
|
|
WARN_ON(1);
|
|
}
|
|
}
|
|
if (rst_mask) {
|
|
gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(), rst_mask);
|
|
} else {
|
|
gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
|
|
gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
|
|
gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
|
|
gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() |
|
|
gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
|
|
gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
|
|
gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() |
|
|
gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f() |
|
|
gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f() |
|
|
gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f());
|
|
}
|
|
|
|
/* we need to read the reset register *and* wait for a moment to ensure
|
|
* reset propagation */
|
|
|
|
gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
|
|
udelay(20);
|
|
|
|
gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
|
|
gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
|
|
gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
|
|
gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() |
|
|
gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
|
|
gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
|
|
gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() |
|
|
gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f() |
|
|
gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f() |
|
|
gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f());
|
|
|
|
/* we need to readl the reset and then wait a small moment after that */
|
|
gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
|
|
udelay(20);
|
|
|
|
if (!tegra_platform_is_linsim()) {
|
|
/* Set power mode back to auto */
|
|
gk20a_writel(g, gr_fe_pwr_mode_r(),
|
|
gr_fe_pwr_mode_req_send_f() |
|
|
gr_fe_pwr_mode_mode_auto_f());
|
|
|
|
/* Wait for the request to complete */
|
|
end_jiffies = jiffies +
|
|
msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
|
|
do {
|
|
reg = gk20a_readl(g, gr_fe_pwr_mode_r());
|
|
|
|
if (gr_fe_pwr_mode_req_v(reg) ==
|
|
gr_fe_pwr_mode_req_done_v())
|
|
break;
|
|
|
|
usleep_range(delay, delay * 2);
|
|
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
|
|
|
|
} while (time_before(jiffies, end_jiffies));
|
|
|
|
if (!time_before(jiffies, end_jiffies))
|
|
gk20a_warn(dev_from_gk20a(g),
|
|
"failed to set power mode to auto\n");
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
|
|
u32 *mailbox_ret, u32 opc_success,
|
|
u32 mailbox_ok, u32 opc_fail,
|
|
u32 mailbox_fail)
|
|
{
|
|
unsigned long end_jiffies = jiffies +
|
|
msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
|
|
u32 delay = GR_IDLE_CHECK_DEFAULT;
|
|
u32 check = WAIT_UCODE_LOOP;
|
|
u32 reg;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
while (check == WAIT_UCODE_LOOP) {
|
|
if (!time_before(jiffies, end_jiffies) &&
|
|
tegra_platform_is_silicon())
|
|
check = WAIT_UCODE_TIMEOUT;
|
|
|
|
reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(mailbox_id));
|
|
|
|
if (mailbox_ret)
|
|
*mailbox_ret = reg;
|
|
|
|
switch (opc_success) {
|
|
case GR_IS_UCODE_OP_EQUAL:
|
|
if (reg == mailbox_ok)
|
|
check = WAIT_UCODE_OK;
|
|
break;
|
|
case GR_IS_UCODE_OP_NOT_EQUAL:
|
|
if (reg != mailbox_ok)
|
|
check = WAIT_UCODE_OK;
|
|
break;
|
|
case GR_IS_UCODE_OP_AND:
|
|
if (reg & mailbox_ok)
|
|
check = WAIT_UCODE_OK;
|
|
break;
|
|
case GR_IS_UCODE_OP_LESSER:
|
|
if (reg < mailbox_ok)
|
|
check = WAIT_UCODE_OK;
|
|
break;
|
|
case GR_IS_UCODE_OP_LESSER_EQUAL:
|
|
if (reg <= mailbox_ok)
|
|
check = WAIT_UCODE_OK;
|
|
break;
|
|
case GR_IS_UCODE_OP_SKIP:
|
|
/* do no success check */
|
|
break;
|
|
default:
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"invalid success opcode 0x%x", opc_success);
|
|
|
|
check = WAIT_UCODE_ERROR;
|
|
break;
|
|
}
|
|
|
|
switch (opc_fail) {
|
|
case GR_IS_UCODE_OP_EQUAL:
|
|
if (reg == mailbox_fail)
|
|
check = WAIT_UCODE_ERROR;
|
|
break;
|
|
case GR_IS_UCODE_OP_NOT_EQUAL:
|
|
if (reg != mailbox_fail)
|
|
check = WAIT_UCODE_ERROR;
|
|
break;
|
|
case GR_IS_UCODE_OP_AND:
|
|
if (reg & mailbox_fail)
|
|
check = WAIT_UCODE_ERROR;
|
|
break;
|
|
case GR_IS_UCODE_OP_LESSER:
|
|
if (reg < mailbox_fail)
|
|
check = WAIT_UCODE_ERROR;
|
|
break;
|
|
case GR_IS_UCODE_OP_LESSER_EQUAL:
|
|
if (reg <= mailbox_fail)
|
|
check = WAIT_UCODE_ERROR;
|
|
break;
|
|
case GR_IS_UCODE_OP_SKIP:
|
|
/* do no check on fail*/
|
|
break;
|
|
default:
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"invalid fail opcode 0x%x", opc_fail);
|
|
check = WAIT_UCODE_ERROR;
|
|
break;
|
|
}
|
|
|
|
usleep_range(delay, delay * 2);
|
|
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
|
|
}
|
|
|
|
if (check == WAIT_UCODE_TIMEOUT) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"timeout waiting on ucode response");
|
|
gk20a_fecs_dump_falcon_stats(g);
|
|
return -1;
|
|
} else if (check == WAIT_UCODE_ERROR) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"ucode method failed on mailbox=%d value=0x%08x",
|
|
mailbox_id, reg);
|
|
gk20a_fecs_dump_falcon_stats(g);
|
|
return -1;
|
|
}
|
|
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
}
|
|
|
|
/* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...)
|
|
* We should replace most, if not all, fecs method calls to this instead. */
|
|
struct fecs_method_op_gk20a {
|
|
struct {
|
|
u32 addr;
|
|
u32 data;
|
|
} method;
|
|
|
|
struct {
|
|
u32 id;
|
|
u32 data;
|
|
u32 clr;
|
|
u32 *ret;
|
|
u32 ok;
|
|
u32 fail;
|
|
} mailbox;
|
|
|
|
struct {
|
|
u32 ok;
|
|
u32 fail;
|
|
} cond;
|
|
|
|
};
|
|
|
|
int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
|
|
struct fecs_method_op_gk20a op)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
int ret;
|
|
|
|
mutex_lock(&gr->fecs_mutex);
|
|
|
|
if (op.mailbox.id != 0)
|
|
gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(op.mailbox.id),
|
|
op.mailbox.data);
|
|
|
|
gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
|
|
gr_fecs_ctxsw_mailbox_clear_value_f(op.mailbox.clr));
|
|
|
|
gk20a_writel(g, gr_fecs_method_data_r(), op.method.data);
|
|
gk20a_writel(g, gr_fecs_method_push_r(),
|
|
gr_fecs_method_push_adr_f(op.method.addr));
|
|
|
|
/* op.mb.id == 4 cases require waiting for completion on
|
|
* for op.mb.id == 0 */
|
|
if (op.mailbox.id == 4)
|
|
op.mailbox.id = 0;
|
|
|
|
ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret,
|
|
op.cond.ok, op.mailbox.ok,
|
|
op.cond.fail, op.mailbox.fail);
|
|
|
|
mutex_unlock(&gr->fecs_mutex);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int gr_gk20a_ctrl_ctxsw(struct gk20a *g, u32 fecs_method, u32 *ret)
|
|
{
|
|
return gr_gk20a_submit_fecs_method_op(g,
|
|
(struct fecs_method_op_gk20a) {
|
|
.method.addr = fecs_method,
|
|
.method.data = ~0,
|
|
.mailbox = { .id = 1, /*sideband?*/
|
|
.data = ~0, .clr = ~0, .ret = ret,
|
|
.ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
|
|
.fail = gr_fecs_ctxsw_mailbox_value_fail_v(), },
|
|
.cond.ok = GR_IS_UCODE_OP_EQUAL,
|
|
.cond.fail = GR_IS_UCODE_OP_EQUAL });
|
|
}
|
|
|
|
/* Stop processing (stall) context switches at FECS.
|
|
* The caller must hold the dbg_sessions_lock, else if mutliple stop methods
|
|
* are sent to the ucode in sequence, it can get into an undefined state. */
|
|
int gr_gk20a_disable_ctxsw(struct gk20a *g)
|
|
{
|
|
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
|
|
return gr_gk20a_ctrl_ctxsw(g, gr_fecs_method_push_adr_stop_ctxsw_v(), 0);
|
|
}
|
|
|
|
/* Start processing (continue) context switches at FECS */
|
|
int gr_gk20a_enable_ctxsw(struct gk20a *g)
|
|
{
|
|
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
|
|
return gr_gk20a_ctrl_ctxsw(g, gr_fecs_method_push_adr_start_ctxsw_v(), 0);
|
|
}
|
|
|
|
|
|
static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
|
|
{
|
|
u32 addr_lo;
|
|
u32 addr_hi;
|
|
void *inst_ptr = NULL;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
inst_ptr = c->inst_block.cpuva;
|
|
if (!inst_ptr)
|
|
return -ENOMEM;
|
|
|
|
addr_lo = u64_lo32(gpu_va) >> 12;
|
|
addr_hi = u64_hi32(gpu_va);
|
|
|
|
gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_target_w(),
|
|
ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() |
|
|
ram_in_gr_wfi_ptr_lo_f(addr_lo));
|
|
|
|
gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(),
|
|
ram_in_gr_wfi_ptr_hi_f(addr_hi));
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Context state can be written directly or "patched" at times.
|
|
* So that code can be used in either situation it is written
|
|
* using a series _ctx_patch_write(..., patch) statements.
|
|
* However any necessary cpu map/unmap and gpu l2 invalidates
|
|
* should be minimized (to avoid doing it once per patch write).
|
|
* Before a sequence of these set up with "_ctx_patch_write_begin"
|
|
* and close with "_ctx_patch_write_end."
|
|
*/
|
|
int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
|
|
struct channel_ctx_gk20a *ch_ctx)
|
|
{
|
|
/* being defensive still... */
|
|
if (ch_ctx->patch_ctx.cpu_va) {
|
|
gk20a_err(dev_from_gk20a(g), "nested ctx patch begin?");
|
|
return -EBUSY;
|
|
}
|
|
|
|
ch_ctx->patch_ctx.cpu_va = vmap(ch_ctx->patch_ctx.pages,
|
|
PAGE_ALIGN(ch_ctx->patch_ctx.size) >> PAGE_SHIFT,
|
|
0, pgprot_dmacoherent(PAGE_KERNEL));
|
|
|
|
if (!ch_ctx->patch_ctx.cpu_va)
|
|
return -ENOMEM;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int gr_gk20a_ctx_patch_write_end(struct gk20a *g,
|
|
struct channel_ctx_gk20a *ch_ctx)
|
|
{
|
|
/* being defensive still... */
|
|
if (!ch_ctx->patch_ctx.cpu_va) {
|
|
gk20a_err(dev_from_gk20a(g), "dangling ctx patch end?");
|
|
return -EINVAL;
|
|
}
|
|
|
|
vunmap(ch_ctx->patch_ctx.cpu_va);
|
|
ch_ctx->patch_ctx.cpu_va = NULL;
|
|
return 0;
|
|
}
|
|
|
|
int gr_gk20a_ctx_patch_write(struct gk20a *g,
|
|
struct channel_ctx_gk20a *ch_ctx,
|
|
u32 addr, u32 data, bool patch)
|
|
{
|
|
u32 patch_slot = 0;
|
|
void *patch_ptr = NULL;
|
|
bool mapped_here = false;
|
|
|
|
BUG_ON(patch != 0 && ch_ctx == NULL);
|
|
|
|
if (patch) {
|
|
if (!ch_ctx)
|
|
return -EINVAL;
|
|
/* we added an optimization prolog, epilog
|
|
* to get rid of unnecessary maps and l2 invals.
|
|
* but be defensive still... */
|
|
if (!ch_ctx->patch_ctx.cpu_va) {
|
|
int err;
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"per-write ctx patch begin?");
|
|
/* yes, gr_gk20a_ctx_patch_smpc causes this one */
|
|
err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
|
|
if (err)
|
|
return err;
|
|
mapped_here = true;
|
|
} else
|
|
mapped_here = false;
|
|
|
|
patch_ptr = ch_ctx->patch_ctx.cpu_va;
|
|
patch_slot = ch_ctx->patch_ctx.data_count * 2;
|
|
|
|
gk20a_mem_wr32(patch_ptr, patch_slot++, addr);
|
|
gk20a_mem_wr32(patch_ptr, patch_slot++, data);
|
|
|
|
ch_ctx->patch_ctx.data_count++;
|
|
|
|
if (mapped_here)
|
|
gr_gk20a_ctx_patch_write_end(g, ch_ctx);
|
|
|
|
} else
|
|
gk20a_writel(g, addr, data);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
|
|
struct channel_gk20a *c)
|
|
{
|
|
u32 inst_base_ptr = u64_lo32(c->inst_block.cpu_pa
|
|
>> ram_in_base_shift_v());
|
|
u32 ret;
|
|
|
|
gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
|
|
c->hw_chid, inst_base_ptr);
|
|
|
|
ret = gr_gk20a_submit_fecs_method_op(g,
|
|
(struct fecs_method_op_gk20a) {
|
|
.method.addr = gr_fecs_method_push_adr_bind_pointer_v(),
|
|
.method.data = (gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
|
|
gr_fecs_current_ctx_target_vid_mem_f() |
|
|
gr_fecs_current_ctx_valid_f(1)),
|
|
.mailbox = { .id = 0, .data = 0,
|
|
.clr = 0x30,
|
|
.ret = NULL,
|
|
.ok = 0x10,
|
|
.fail = 0x20, },
|
|
.cond.ok = GR_IS_UCODE_OP_AND,
|
|
.cond.fail = GR_IS_UCODE_OP_AND});
|
|
if (ret)
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"bind channel instance failed");
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
|
|
bool disable_fifo)
|
|
{
|
|
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
|
|
u32 va_lo, va_hi, va;
|
|
int ret = 0;
|
|
void *ctx_ptr = NULL;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
|
|
PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
|
|
0, pgprot_dmacoherent(PAGE_KERNEL));
|
|
if (!ctx_ptr)
|
|
return -ENOMEM;
|
|
|
|
if (ch_ctx->zcull_ctx.gpu_va == 0 &&
|
|
ch_ctx->zcull_ctx.ctx_sw_mode ==
|
|
ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {
|
|
ret = -EINVAL;
|
|
goto clean_up;
|
|
}
|
|
|
|
va_lo = u64_lo32(ch_ctx->zcull_ctx.gpu_va);
|
|
va_hi = u64_hi32(ch_ctx->zcull_ctx.gpu_va);
|
|
va = ((va_lo >> 8) & 0x00FFFFFF) | ((va_hi << 24) & 0xFF000000);
|
|
|
|
if (disable_fifo) {
|
|
ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
|
|
if (ret) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to disable gr engine activity\n");
|
|
goto clean_up;
|
|
}
|
|
}
|
|
|
|
gk20a_mm_fb_flush(g);
|
|
|
|
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0,
|
|
ch_ctx->zcull_ctx.ctx_sw_mode);
|
|
|
|
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, va);
|
|
|
|
if (disable_fifo) {
|
|
ret = gk20a_fifo_enable_engine_activity(g, gr_info);
|
|
if (ret) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to enable gr engine activity\n");
|
|
goto clean_up;
|
|
}
|
|
}
|
|
|
|
clean_up:
|
|
vunmap(ctx_ptr);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int gr_gk20a_commit_global_cb_manager(struct gk20a *g,
|
|
struct channel_gk20a *c, bool patch)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
struct channel_ctx_gk20a *ch_ctx = NULL;
|
|
u32 attrib_offset_in_chunk = 0;
|
|
u32 alpha_offset_in_chunk = 0;
|
|
u32 pd_ab_max_output;
|
|
u32 gpc_index, ppc_index;
|
|
u32 temp;
|
|
u32 cbm_cfg_size1, cbm_cfg_size2;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (patch) {
|
|
int err;
|
|
ch_ctx = &c->ch_ctx;
|
|
err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(),
|
|
gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) |
|
|
gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size),
|
|
patch);
|
|
|
|
pd_ab_max_output = (gr->alpha_cb_default_size *
|
|
gr_gpc0_ppc0_cbm_cfg_size_granularity_v()) /
|
|
gr_pd_ab_dist_cfg1_max_output_granularity_v();
|
|
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(),
|
|
gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
|
|
gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
|
|
|
|
alpha_offset_in_chunk = attrib_offset_in_chunk +
|
|
gr->tpc_count * gr->attrib_cb_size;
|
|
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
|
temp = proj_gpc_stride_v() * gpc_index;
|
|
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
|
|
ppc_index++) {
|
|
cbm_cfg_size1 = gr->attrib_cb_default_size *
|
|
gr->pes_tpc_count[ppc_index][gpc_index];
|
|
cbm_cfg_size2 = gr->alpha_cb_default_size *
|
|
gr->pes_tpc_count[ppc_index][gpc_index];
|
|
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx,
|
|
gr_gpc0_ppc0_cbm_cfg_r() + temp +
|
|
proj_ppc_in_gpc_stride_v() * ppc_index,
|
|
gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(gr->timeslice_mode) |
|
|
gr_gpc0_ppc0_cbm_cfg_start_offset_f(attrib_offset_in_chunk) |
|
|
gr_gpc0_ppc0_cbm_cfg_size_f(cbm_cfg_size1), patch);
|
|
|
|
attrib_offset_in_chunk += gr->attrib_cb_size *
|
|
gr->pes_tpc_count[ppc_index][gpc_index];
|
|
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx,
|
|
gr_gpc0_ppc0_cbm_cfg2_r() + temp +
|
|
proj_ppc_in_gpc_stride_v() * ppc_index,
|
|
gr_gpc0_ppc0_cbm_cfg2_start_offset_f(alpha_offset_in_chunk) |
|
|
gr_gpc0_ppc0_cbm_cfg2_size_f(cbm_cfg_size2), patch);
|
|
|
|
alpha_offset_in_chunk += gr->alpha_cb_size *
|
|
gr->pes_tpc_count[ppc_index][gpc_index];
|
|
}
|
|
}
|
|
|
|
if (patch)
|
|
gr_gk20a_ctx_patch_write_end(g, ch_ctx);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
|
|
struct channel_gk20a *c, bool patch)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
|
|
u64 addr;
|
|
u32 size;
|
|
|
|
gk20a_dbg_fn("");
|
|
if (patch) {
|
|
int err;
|
|
err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
/* global pagepool buffer */
|
|
addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >>
|
|
gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
|
|
(u64_hi32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) <<
|
|
(32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
|
|
|
|
size = gr->global_ctx_buffer[PAGEPOOL].size /
|
|
gr_scc_pagepool_total_pages_byte_granularity_v();
|
|
|
|
if (size == gr_scc_pagepool_total_pages_hwmax_value_v())
|
|
size = gr_scc_pagepool_total_pages_hwmax_v();
|
|
|
|
gk20a_dbg_info("pagepool buffer addr : 0x%016llx, size : %d",
|
|
addr, size);
|
|
|
|
g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, patch);
|
|
|
|
/* global bundle cb */
|
|
addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >>
|
|
gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) |
|
|
(u64_hi32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) <<
|
|
(32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v()));
|
|
|
|
size = gr->bundle_cb_default_size;
|
|
|
|
gk20a_dbg_info("bundle cb addr : 0x%016llx, size : %d",
|
|
addr, size);
|
|
|
|
g->ops.gr.commit_global_bundle_cb(g, ch_ctx, addr, size, patch);
|
|
|
|
/* global attrib cb */
|
|
addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >>
|
|
gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
|
|
(u64_hi32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) <<
|
|
(32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
|
|
|
|
gk20a_dbg_info("attrib cb addr : 0x%016llx", addr);
|
|
g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, patch);
|
|
|
|
if (patch)
|
|
gr_gk20a_ctx_patch_write_end(g, ch_ctx);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void gr_gk20a_commit_global_attrib_cb(struct gk20a *g,
|
|
struct channel_ctx_gk20a *ch_ctx,
|
|
u64 addr, bool patch)
|
|
{
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(),
|
|
gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) |
|
|
gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch);
|
|
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(),
|
|
gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) |
|
|
gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch);
|
|
}
|
|
|
|
static void gr_gk20a_commit_global_bundle_cb(struct gk20a *g,
|
|
struct channel_ctx_gk20a *ch_ctx,
|
|
u64 addr, u64 size, bool patch)
|
|
{
|
|
u32 data;
|
|
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(),
|
|
gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
|
|
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(),
|
|
gr_scc_bundle_cb_size_div_256b_f(size) |
|
|
gr_scc_bundle_cb_size_valid_true_f(), patch);
|
|
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_bundle_cb_base_r(),
|
|
gr_gpcs_setup_bundle_cb_base_addr_39_8_f(addr), patch);
|
|
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_bundle_cb_size_r(),
|
|
gr_gpcs_setup_bundle_cb_size_div_256b_f(size) |
|
|
gr_gpcs_setup_bundle_cb_size_valid_true_f(), patch);
|
|
|
|
/* data for state_limit */
|
|
data = (g->gr.bundle_cb_default_size *
|
|
gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
|
|
gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
|
|
|
|
data = min_t(u32, data, g->gr.min_gpm_fifo_depth);
|
|
|
|
gk20a_dbg_info("bundle cb token limit : %d, state limit : %d",
|
|
g->gr.bundle_cb_token_limit, data);
|
|
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
|
|
gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
|
|
gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
|
|
|
|
}
|
|
|
|
static int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, bool patch)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
struct channel_ctx_gk20a *ch_ctx = NULL;
|
|
u32 gpm_pd_cfg;
|
|
u32 pd_ab_dist_cfg0;
|
|
u32 ds_debug;
|
|
u32 mpc_vtg_debug;
|
|
u32 pe_vaf;
|
|
u32 pe_vsc_vpc;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
gpm_pd_cfg = gk20a_readl(g, gr_gpcs_gpm_pd_cfg_r());
|
|
pd_ab_dist_cfg0 = gk20a_readl(g, gr_pd_ab_dist_cfg0_r());
|
|
ds_debug = gk20a_readl(g, gr_ds_debug_r());
|
|
mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r());
|
|
|
|
if (patch) {
|
|
int err;
|
|
ch_ctx = &c->ch_ctx;
|
|
err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
if (gr->timeslice_mode == gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v()) {
|
|
pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r());
|
|
pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r());
|
|
|
|
gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_enable_f() | gpm_pd_cfg;
|
|
pe_vaf = gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f() | pe_vaf;
|
|
pe_vsc_vpc = gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f() | pe_vsc_vpc;
|
|
pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_en_f() | pd_ab_dist_cfg0;
|
|
ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug;
|
|
mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug;
|
|
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch);
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, patch);
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, patch);
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch);
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch);
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch);
|
|
} else {
|
|
gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg;
|
|
pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0;
|
|
ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug;
|
|
mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug;
|
|
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch);
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch);
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, patch);
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch);
|
|
}
|
|
|
|
if (patch)
|
|
gr_gk20a_ctx_patch_write_end(g, ch_ctx);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
|
|
{
|
|
u32 norm_entries, norm_shift;
|
|
u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod, coeff9_mod, coeff10_mod, coeff11_mod;
|
|
u32 map0, map1, map2, map3, map4, map5;
|
|
|
|
if (!gr->map_tiles)
|
|
return -1;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
gk20a_writel(g, gr_crstr_map_table_cfg_r(),
|
|
gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) |
|
|
gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count));
|
|
|
|
map0 = gr_crstr_gpc_map0_tile0_f(gr->map_tiles[0]) |
|
|
gr_crstr_gpc_map0_tile1_f(gr->map_tiles[1]) |
|
|
gr_crstr_gpc_map0_tile2_f(gr->map_tiles[2]) |
|
|
gr_crstr_gpc_map0_tile3_f(gr->map_tiles[3]) |
|
|
gr_crstr_gpc_map0_tile4_f(gr->map_tiles[4]) |
|
|
gr_crstr_gpc_map0_tile5_f(gr->map_tiles[5]);
|
|
|
|
map1 = gr_crstr_gpc_map1_tile6_f(gr->map_tiles[6]) |
|
|
gr_crstr_gpc_map1_tile7_f(gr->map_tiles[7]) |
|
|
gr_crstr_gpc_map1_tile8_f(gr->map_tiles[8]) |
|
|
gr_crstr_gpc_map1_tile9_f(gr->map_tiles[9]) |
|
|
gr_crstr_gpc_map1_tile10_f(gr->map_tiles[10]) |
|
|
gr_crstr_gpc_map1_tile11_f(gr->map_tiles[11]);
|
|
|
|
map2 = gr_crstr_gpc_map2_tile12_f(gr->map_tiles[12]) |
|
|
gr_crstr_gpc_map2_tile13_f(gr->map_tiles[13]) |
|
|
gr_crstr_gpc_map2_tile14_f(gr->map_tiles[14]) |
|
|
gr_crstr_gpc_map2_tile15_f(gr->map_tiles[15]) |
|
|
gr_crstr_gpc_map2_tile16_f(gr->map_tiles[16]) |
|
|
gr_crstr_gpc_map2_tile17_f(gr->map_tiles[17]);
|
|
|
|
map3 = gr_crstr_gpc_map3_tile18_f(gr->map_tiles[18]) |
|
|
gr_crstr_gpc_map3_tile19_f(gr->map_tiles[19]) |
|
|
gr_crstr_gpc_map3_tile20_f(gr->map_tiles[20]) |
|
|
gr_crstr_gpc_map3_tile21_f(gr->map_tiles[21]) |
|
|
gr_crstr_gpc_map3_tile22_f(gr->map_tiles[22]) |
|
|
gr_crstr_gpc_map3_tile23_f(gr->map_tiles[23]);
|
|
|
|
map4 = gr_crstr_gpc_map4_tile24_f(gr->map_tiles[24]) |
|
|
gr_crstr_gpc_map4_tile25_f(gr->map_tiles[25]) |
|
|
gr_crstr_gpc_map4_tile26_f(gr->map_tiles[26]) |
|
|
gr_crstr_gpc_map4_tile27_f(gr->map_tiles[27]) |
|
|
gr_crstr_gpc_map4_tile28_f(gr->map_tiles[28]) |
|
|
gr_crstr_gpc_map4_tile29_f(gr->map_tiles[29]);
|
|
|
|
map5 = gr_crstr_gpc_map5_tile30_f(gr->map_tiles[30]) |
|
|
gr_crstr_gpc_map5_tile31_f(gr->map_tiles[31]) |
|
|
gr_crstr_gpc_map5_tile32_f(0) |
|
|
gr_crstr_gpc_map5_tile33_f(0) |
|
|
gr_crstr_gpc_map5_tile34_f(0) |
|
|
gr_crstr_gpc_map5_tile35_f(0);
|
|
|
|
gk20a_writel(g, gr_crstr_gpc_map0_r(), map0);
|
|
gk20a_writel(g, gr_crstr_gpc_map1_r(), map1);
|
|
gk20a_writel(g, gr_crstr_gpc_map2_r(), map2);
|
|
gk20a_writel(g, gr_crstr_gpc_map3_r(), map3);
|
|
gk20a_writel(g, gr_crstr_gpc_map4_r(), map4);
|
|
gk20a_writel(g, gr_crstr_gpc_map5_r(), map5);
|
|
|
|
switch (gr->tpc_count) {
|
|
case 1:
|
|
norm_shift = 4;
|
|
break;
|
|
case 2:
|
|
case 3:
|
|
norm_shift = 3;
|
|
break;
|
|
case 4:
|
|
case 5:
|
|
case 6:
|
|
case 7:
|
|
norm_shift = 2;
|
|
break;
|
|
case 8:
|
|
case 9:
|
|
case 10:
|
|
case 11:
|
|
case 12:
|
|
case 13:
|
|
case 14:
|
|
case 15:
|
|
norm_shift = 1;
|
|
break;
|
|
default:
|
|
norm_shift = 0;
|
|
break;
|
|
}
|
|
|
|
norm_entries = gr->tpc_count << norm_shift;
|
|
coeff5_mod = (1 << 5) % norm_entries;
|
|
coeff6_mod = (1 << 6) % norm_entries;
|
|
coeff7_mod = (1 << 7) % norm_entries;
|
|
coeff8_mod = (1 << 8) % norm_entries;
|
|
coeff9_mod = (1 << 9) % norm_entries;
|
|
coeff10_mod = (1 << 10) % norm_entries;
|
|
coeff11_mod = (1 << 11) % norm_entries;
|
|
|
|
gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(),
|
|
gr_ppcs_wwdx_map_table_cfg_row_offset_f(gr->map_row_offset) |
|
|
gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(norm_entries) |
|
|
gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(norm_shift) |
|
|
gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(coeff5_mod) |
|
|
gr_ppcs_wwdx_map_table_cfg_num_entries_f(gr->tpc_count));
|
|
|
|
gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg2_r(),
|
|
gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(coeff6_mod) |
|
|
gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(coeff7_mod) |
|
|
gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(coeff8_mod) |
|
|
gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(coeff9_mod) |
|
|
gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(coeff10_mod) |
|
|
gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(coeff11_mod));
|
|
|
|
gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map0_r(), map0);
|
|
gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map1_r(), map1);
|
|
gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map2_r(), map2);
|
|
gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map3_r(), map3);
|
|
gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map4_r(), map4);
|
|
gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map5_r(), map5);
|
|
|
|
gk20a_writel(g, gr_rstr2d_map_table_cfg_r(),
|
|
gr_rstr2d_map_table_cfg_row_offset_f(gr->map_row_offset) |
|
|
gr_rstr2d_map_table_cfg_num_entries_f(gr->tpc_count));
|
|
|
|
gk20a_writel(g, gr_rstr2d_gpc_map0_r(), map0);
|
|
gk20a_writel(g, gr_rstr2d_gpc_map1_r(), map1);
|
|
gk20a_writel(g, gr_rstr2d_gpc_map2_r(), map2);
|
|
gk20a_writel(g, gr_rstr2d_gpc_map3_r(), map3);
|
|
gk20a_writel(g, gr_rstr2d_gpc_map4_r(), map4);
|
|
gk20a_writel(g, gr_rstr2d_gpc_map5_r(), map5);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline u32 count_bits(u32 mask)
|
|
{
|
|
u32 temp = mask;
|
|
u32 count;
|
|
for (count = 0; temp != 0; count++)
|
|
temp &= temp - 1;
|
|
|
|
return count;
|
|
}
|
|
|
|
static inline u32 clear_count_bits(u32 num, u32 clear_count)
|
|
{
|
|
u32 count = clear_count;
|
|
for (; (num != 0) && (count != 0); count--)
|
|
num &= num - 1;
|
|
|
|
return num;
|
|
}
|
|
|
|
static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g,
|
|
struct gr_gk20a *gr)
|
|
{
|
|
u32 table_index_bits = 5;
|
|
u32 rows = (1 << table_index_bits);
|
|
u32 row_stride = gr_pd_alpha_ratio_table__size_1_v() / rows;
|
|
|
|
u32 row;
|
|
u32 index;
|
|
u32 gpc_index;
|
|
u32 gpcs_per_reg = 4;
|
|
u32 pes_index;
|
|
u32 tpc_count_pes;
|
|
u32 num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v();
|
|
|
|
u32 alpha_target, beta_target;
|
|
u32 alpha_bits, beta_bits;
|
|
u32 alpha_mask, beta_mask, partial_mask;
|
|
u32 reg_offset;
|
|
bool assign_alpha;
|
|
|
|
u32 map_alpha[gr_pd_alpha_ratio_table__size_1_v()];
|
|
u32 map_beta[gr_pd_alpha_ratio_table__size_1_v()];
|
|
u32 map_reg_used[gr_pd_alpha_ratio_table__size_1_v()];
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
memset(map_alpha, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
|
|
memset(map_beta, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
|
|
memset(map_reg_used, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
|
|
|
|
for (row = 0; row < rows; ++row) {
|
|
alpha_target = max_t(u32, gr->tpc_count * row / rows, 1);
|
|
beta_target = gr->tpc_count - alpha_target;
|
|
|
|
assign_alpha = (alpha_target < beta_target);
|
|
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
|
reg_offset = (row * row_stride) + (gpc_index / gpcs_per_reg);
|
|
alpha_mask = beta_mask = 0;
|
|
|
|
for (pes_index = 0; pes_index < num_pes_per_gpc; pes_index++) {
|
|
tpc_count_pes = gr->pes_tpc_count[pes_index][gpc_index];
|
|
|
|
if (assign_alpha) {
|
|
alpha_bits = (alpha_target == 0) ? 0 : tpc_count_pes;
|
|
beta_bits = tpc_count_pes - alpha_bits;
|
|
} else {
|
|
beta_bits = (beta_target == 0) ? 0 : tpc_count_pes;
|
|
alpha_bits = tpc_count_pes - beta_bits;
|
|
}
|
|
|
|
partial_mask = gr->pes_tpc_mask[pes_index][gpc_index];
|
|
partial_mask = clear_count_bits(partial_mask, tpc_count_pes - alpha_bits);
|
|
alpha_mask |= partial_mask;
|
|
|
|
partial_mask = gr->pes_tpc_mask[pes_index][gpc_index] ^ partial_mask;
|
|
beta_mask |= partial_mask;
|
|
|
|
alpha_target -= min(alpha_bits, alpha_target);
|
|
beta_target -= min(beta_bits, beta_target);
|
|
|
|
if ((alpha_bits > 0) || (beta_bits > 0))
|
|
assign_alpha = !assign_alpha;
|
|
}
|
|
|
|
switch (gpc_index % gpcs_per_reg) {
|
|
case 0:
|
|
map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n0_mask_f(alpha_mask);
|
|
map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n0_mask_f(beta_mask);
|
|
break;
|
|
case 1:
|
|
map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n1_mask_f(alpha_mask);
|
|
map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n1_mask_f(beta_mask);
|
|
break;
|
|
case 2:
|
|
map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n2_mask_f(alpha_mask);
|
|
map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n2_mask_f(beta_mask);
|
|
break;
|
|
case 3:
|
|
map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n3_mask_f(alpha_mask);
|
|
map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n3_mask_f(beta_mask);
|
|
break;
|
|
}
|
|
map_reg_used[reg_offset] = true;
|
|
}
|
|
}
|
|
|
|
for (index = 0; index < gr_pd_alpha_ratio_table__size_1_v(); index++) {
|
|
if (map_reg_used[index]) {
|
|
gk20a_writel(g, gr_pd_alpha_ratio_table_r(index), map_alpha[index]);
|
|
gk20a_writel(g, gr_pd_beta_ratio_table_r(index), map_beta[index]);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
u32 tpc_index, gpc_index;
|
|
u32 tpc_offset, gpc_offset;
|
|
u32 sm_id = 0, gpc_id = 0;
|
|
u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()];
|
|
u32 tpc_per_gpc;
|
|
u32 max_ways_evict = INVALID_MAX_WAYS;
|
|
u32 l1c_dbg_reg_val;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) {
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
|
gpc_offset = proj_gpc_stride_v() * gpc_index;
|
|
if (tpc_index < gr->gpc_tpc_count[gpc_index]) {
|
|
tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index;
|
|
|
|
gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
|
|
gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id));
|
|
gk20a_writel(g, gr_gpc0_tpc0_l1c_cfg_smid_r() + gpc_offset + tpc_offset,
|
|
gr_gpc0_tpc0_l1c_cfg_smid_value_f(sm_id));
|
|
gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) + gpc_offset,
|
|
gr_gpc0_gpm_pd_sm_id_id_f(sm_id));
|
|
gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
|
|
gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id));
|
|
|
|
sm_id_to_gpc_id[sm_id] = gpc_index;
|
|
sm_id++;
|
|
}
|
|
|
|
gk20a_writel(g, gr_gpc0_gpm_pd_active_tpcs_r() + gpc_offset,
|
|
gr_gpc0_gpm_pd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index]));
|
|
gk20a_writel(g, gr_gpc0_gpm_sd_active_tpcs_r() + gpc_offset,
|
|
gr_gpc0_gpm_sd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index]));
|
|
}
|
|
}
|
|
|
|
for (tpc_index = 0, gpc_id = 0;
|
|
tpc_index < gr_pd_num_tpc_per_gpc__size_1_v();
|
|
tpc_index++, gpc_id += 8) {
|
|
|
|
if (gpc_id >= gr->gpc_count)
|
|
gpc_id = 0;
|
|
|
|
tpc_per_gpc =
|
|
gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) |
|
|
gr_pd_num_tpc_per_gpc_count1_f(gr->gpc_tpc_count[gpc_id + 1]) |
|
|
gr_pd_num_tpc_per_gpc_count2_f(gr->gpc_tpc_count[gpc_id + 2]) |
|
|
gr_pd_num_tpc_per_gpc_count3_f(gr->gpc_tpc_count[gpc_id + 3]) |
|
|
gr_pd_num_tpc_per_gpc_count4_f(gr->gpc_tpc_count[gpc_id + 4]) |
|
|
gr_pd_num_tpc_per_gpc_count5_f(gr->gpc_tpc_count[gpc_id + 5]) |
|
|
gr_pd_num_tpc_per_gpc_count6_f(gr->gpc_tpc_count[gpc_id + 6]) |
|
|
gr_pd_num_tpc_per_gpc_count7_f(gr->gpc_tpc_count[gpc_id + 7]);
|
|
|
|
gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc);
|
|
gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc);
|
|
}
|
|
|
|
/* gr__setup_pd_mapping stubbed for gk20a */
|
|
gr_gk20a_setup_rop_mapping(g, gr);
|
|
if (g->ops.gr.setup_alpha_beta_tables)
|
|
g->ops.gr.setup_alpha_beta_tables(g, gr);
|
|
|
|
if (gr->num_fbps == 1)
|
|
max_ways_evict = 9;
|
|
|
|
if (max_ways_evict != INVALID_MAX_WAYS)
|
|
g->ops.ltc.set_max_ways_evict_last(g, max_ways_evict);
|
|
|
|
for (gpc_index = 0;
|
|
gpc_index < gr_pd_dist_skip_table__size_1_v() * 4;
|
|
gpc_index += 4) {
|
|
|
|
gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4),
|
|
gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) ||
|
|
gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) ||
|
|
gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) ||
|
|
gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3]));
|
|
}
|
|
|
|
gk20a_writel(g, gr_cwd_fs_r(),
|
|
gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
|
|
gr_cwd_fs_num_tpcs_f(gr->tpc_count));
|
|
|
|
gk20a_writel(g, gr_bes_zrop_settings_r(),
|
|
gr_bes_zrop_settings_num_active_fbps_f(gr->num_fbps));
|
|
gk20a_writel(g, gr_bes_crop_settings_r(),
|
|
gr_bes_crop_settings_num_active_fbps_f(gr->num_fbps));
|
|
|
|
/* turn on cya15 bit for a default val that missed the cut */
|
|
l1c_dbg_reg_val = gk20a_readl(g, gr_gpc0_tpc0_l1c_dbg_r());
|
|
l1c_dbg_reg_val |= gr_gpc0_tpc0_l1c_dbg_cya15_en_f();
|
|
gk20a_writel(g, gr_gpc0_tpc0_l1c_dbg_r(), l1c_dbg_reg_val);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
|
|
{
|
|
struct gk20a *g = c->g;
|
|
int ret;
|
|
|
|
u32 inst_base_ptr =
|
|
u64_lo32(c->inst_block.cpu_pa
|
|
>> ram_in_base_shift_v());
|
|
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
ret = gr_gk20a_submit_fecs_method_op(g,
|
|
(struct fecs_method_op_gk20a) {
|
|
.method.addr = save_type,
|
|
.method.data = (gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
|
|
gr_fecs_current_ctx_target_vid_mem_f() |
|
|
gr_fecs_current_ctx_valid_f(1)),
|
|
.mailbox = {.id = 0, .data = 0, .clr = 3, .ret = NULL,
|
|
.ok = 1, .fail = 2,
|
|
},
|
|
.cond.ok = GR_IS_UCODE_OP_AND,
|
|
.cond.fail = GR_IS_UCODE_OP_AND,
|
|
});
|
|
|
|
if (ret)
|
|
gk20a_err(dev_from_gk20a(g), "save context image failed");
|
|
|
|
return ret;
|
|
}
|
|
|
|
static u32 gk20a_init_sw_bundle(struct gk20a *g)
|
|
{
|
|
struct av_list_gk20a *sw_bundle_init = &g->gr.ctx_vars.sw_bundle_init;
|
|
u32 last_bundle_data = 0;
|
|
u32 err = 0;
|
|
int i;
|
|
unsigned long end_jiffies = jiffies +
|
|
msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
|
|
|
|
/* enable pipe mode override */
|
|
gk20a_writel(g, gr_pipe_bundle_config_r(),
|
|
gr_pipe_bundle_config_override_pipe_mode_enabled_f());
|
|
|
|
/* load bundle init */
|
|
for (i = 0; i < sw_bundle_init->count; i++) {
|
|
err |= gr_gk20a_wait_fe_idle(g, end_jiffies,
|
|
GR_IDLE_CHECK_DEFAULT);
|
|
if (i == 0 || last_bundle_data != sw_bundle_init->l[i].value) {
|
|
gk20a_writel(g, gr_pipe_bundle_data_r(),
|
|
sw_bundle_init->l[i].value);
|
|
last_bundle_data = sw_bundle_init->l[i].value;
|
|
}
|
|
|
|
gk20a_writel(g, gr_pipe_bundle_address_r(),
|
|
sw_bundle_init->l[i].addr);
|
|
|
|
if (gr_pipe_bundle_address_value_v(sw_bundle_init->l[i].addr) ==
|
|
GR_GO_IDLE_BUNDLE)
|
|
err |= gr_gk20a_wait_idle(g, end_jiffies,
|
|
GR_IDLE_CHECK_DEFAULT);
|
|
}
|
|
|
|
/* disable pipe mode override */
|
|
gk20a_writel(g, gr_pipe_bundle_config_r(),
|
|
gr_pipe_bundle_config_override_pipe_mode_disabled_f());
|
|
|
|
return err;
|
|
}
|
|
|
|
/* init global golden image from a fresh gr_ctx in channel ctx.
|
|
save a copy in local_golden_image in ctx_vars */
|
|
static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
|
|
struct channel_gk20a *c)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
|
|
u32 ctx_header_bytes = ctxsw_prog_fecs_header_v();
|
|
u32 ctx_header_words;
|
|
u32 i;
|
|
u32 data;
|
|
void *ctx_ptr = NULL;
|
|
void *gold_ptr = NULL;
|
|
u32 err = 0;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
/* golden ctx is global to all channels. Although only the first
|
|
channel initializes golden image, driver needs to prevent multiple
|
|
channels from initializing golden ctx at the same time */
|
|
mutex_lock(&gr->ctx_mutex);
|
|
|
|
if (gr->ctx_vars.golden_image_initialized)
|
|
goto clean_up;
|
|
|
|
err = gr_gk20a_fecs_ctx_bind_channel(g, c);
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
err = gk20a_init_sw_bundle(g);
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
err = gr_gk20a_elpg_protected_call(g,
|
|
gr_gk20a_commit_global_ctx_buffers(g, c, false));
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
gold_ptr = vmap(gr->global_ctx_buffer[GOLDEN_CTX].pages,
|
|
PAGE_ALIGN(gr->global_ctx_buffer[GOLDEN_CTX].size) >>
|
|
PAGE_SHIFT, 0, pgprot_dmacoherent(PAGE_KERNEL));
|
|
if (!gold_ptr)
|
|
goto clean_up;
|
|
|
|
ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
|
|
PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
|
|
0, pgprot_dmacoherent(PAGE_KERNEL));
|
|
if (!ctx_ptr)
|
|
goto clean_up;
|
|
|
|
ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
|
|
ctx_header_words >>= 2;
|
|
|
|
gk20a_mm_l2_flush(g, true);
|
|
|
|
for (i = 0; i < ctx_header_words; i++) {
|
|
data = gk20a_mem_rd32(ctx_ptr, i);
|
|
gk20a_mem_wr32(gold_ptr, i, data);
|
|
}
|
|
|
|
gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_o(), 0,
|
|
ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
|
|
|
|
gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, 0);
|
|
|
|
gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
|
|
|
|
gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v());
|
|
|
|
if (gr->ctx_vars.local_golden_image == NULL) {
|
|
|
|
gr->ctx_vars.local_golden_image =
|
|
kzalloc(gr->ctx_vars.golden_image_size, GFP_KERNEL);
|
|
|
|
if (gr->ctx_vars.local_golden_image == NULL) {
|
|
err = -ENOMEM;
|
|
goto clean_up;
|
|
}
|
|
|
|
for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++)
|
|
gr->ctx_vars.local_golden_image[i] =
|
|
gk20a_mem_rd32(gold_ptr, i);
|
|
}
|
|
|
|
gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va);
|
|
|
|
gr->ctx_vars.golden_image_initialized = true;
|
|
|
|
gk20a_writel(g, gr_fecs_current_ctx_r(),
|
|
gr_fecs_current_ctx_valid_false_f());
|
|
|
|
clean_up:
|
|
if (err)
|
|
gk20a_err(dev_from_gk20a(g), "fail");
|
|
else
|
|
gk20a_dbg_fn("done");
|
|
|
|
if (gold_ptr)
|
|
vunmap(gold_ptr);
|
|
if (ctx_ptr)
|
|
vunmap(ctx_ptr);
|
|
|
|
mutex_unlock(&gr->ctx_mutex);
|
|
return err;
|
|
}
|
|
|
|
int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
|
|
struct channel_gk20a *c,
|
|
bool enable_smpc_ctxsw)
|
|
{
|
|
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
|
|
void *ctx_ptr = NULL;
|
|
u32 data;
|
|
|
|
/* Channel gr_ctx buffer is gpu cacheable.
|
|
Flush and invalidate before cpu update. */
|
|
gk20a_mm_l2_flush(g, true);
|
|
|
|
ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
|
|
PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
|
|
0, pgprot_dmacoherent(PAGE_KERNEL));
|
|
if (!ctx_ptr)
|
|
return -ENOMEM;
|
|
|
|
data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0);
|
|
data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
|
|
data |= enable_smpc_ctxsw ?
|
|
ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
|
|
ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
|
|
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0,
|
|
data);
|
|
|
|
vunmap(ctx_ptr);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* load saved fresh copy of gloden image into channel gr_ctx */
|
|
static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
|
|
struct channel_gk20a *c)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
|
|
u32 virt_addr_lo;
|
|
u32 virt_addr_hi;
|
|
u32 i, v, data;
|
|
int ret = 0;
|
|
void *ctx_ptr = NULL;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (gr->ctx_vars.local_golden_image == NULL)
|
|
return -1;
|
|
|
|
/* Channel gr_ctx buffer is gpu cacheable.
|
|
Flush and invalidate before cpu update. */
|
|
gk20a_mm_l2_flush(g, true);
|
|
|
|
ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
|
|
PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
|
|
0, pgprot_dmacoherent(PAGE_KERNEL));
|
|
if (!ctx_ptr)
|
|
return -ENOMEM;
|
|
|
|
for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++)
|
|
gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]);
|
|
|
|
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0);
|
|
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0);
|
|
|
|
virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va);
|
|
virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va);
|
|
|
|
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0,
|
|
ch_ctx->patch_ctx.data_count);
|
|
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_lo_o(), 0,
|
|
virt_addr_lo);
|
|
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0,
|
|
virt_addr_hi);
|
|
|
|
/* no user for client managed performance counter ctx */
|
|
ch_ctx->pm_ctx.ctx_sw_mode =
|
|
ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
|
|
data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0);
|
|
data = data & ~ctxsw_prog_main_image_pm_mode_m();
|
|
data |= ch_ctx->pm_ctx.ctx_sw_mode;
|
|
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0,
|
|
data);
|
|
|
|
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, 0);
|
|
|
|
/* set priv access map */
|
|
virt_addr_lo =
|
|
u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
|
|
virt_addr_hi =
|
|
u64_hi32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
|
|
|
|
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_config_o(), 0,
|
|
ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f());
|
|
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_lo_o(), 0,
|
|
virt_addr_lo);
|
|
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_hi_o(), 0,
|
|
virt_addr_hi);
|
|
/* disable verif features */
|
|
v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0);
|
|
v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m());
|
|
v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f();
|
|
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0, v);
|
|
|
|
|
|
vunmap(ctx_ptr);
|
|
|
|
if (tegra_platform_is_linsim()) {
|
|
u32 inst_base_ptr =
|
|
u64_lo32(c->inst_block.cpu_pa
|
|
>> ram_in_base_shift_v());
|
|
|
|
ret = gr_gk20a_submit_fecs_method_op(g,
|
|
(struct fecs_method_op_gk20a) {
|
|
.method.data =
|
|
(gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
|
|
gr_fecs_current_ctx_target_vid_mem_f() |
|
|
gr_fecs_current_ctx_valid_f(1)),
|
|
.method.addr =
|
|
gr_fecs_method_push_adr_restore_golden_v(),
|
|
.mailbox = {
|
|
.id = 0, .data = 0,
|
|
.clr = ~0, .ret = NULL,
|
|
.ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
|
|
.fail = 0},
|
|
.cond.ok = GR_IS_UCODE_OP_EQUAL,
|
|
.cond.fail = GR_IS_UCODE_OP_SKIP});
|
|
|
|
if (ret)
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"restore context image failed");
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void gr_gk20a_start_falcon_ucode(struct gk20a *g)
|
|
{
|
|
gk20a_dbg_fn("");
|
|
|
|
gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
|
|
gr_fecs_ctxsw_mailbox_clear_value_f(~0));
|
|
|
|
gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0));
|
|
gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0));
|
|
|
|
gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1));
|
|
gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1));
|
|
|
|
gk20a_dbg_fn("done");
|
|
}
|
|
|
|
static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
|
|
{
|
|
struct mm_gk20a *mm = &g->mm;
|
|
struct vm_gk20a *vm = &mm->pmu.vm;
|
|
struct device *d = dev_from_gk20a(g);
|
|
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
|
|
void *inst_ptr;
|
|
u32 pde_addr_lo;
|
|
u32 pde_addr_hi;
|
|
u64 pde_addr;
|
|
dma_addr_t iova;
|
|
|
|
/* Alloc mem of inst block */
|
|
ucode_info->inst_blk_desc.size = ram_in_alloc_size_v();
|
|
ucode_info->inst_blk_desc.cpuva = dma_alloc_coherent(d,
|
|
ucode_info->inst_blk_desc.size,
|
|
&iova,
|
|
GFP_KERNEL);
|
|
if (!ucode_info->inst_blk_desc.cpuva) {
|
|
gk20a_err(d, "failed to allocate memory\n");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
ucode_info->inst_blk_desc.iova = iova;
|
|
ucode_info->inst_blk_desc.cpu_pa = gk20a_get_phys_from_iova(d,
|
|
ucode_info->inst_blk_desc.iova);
|
|
|
|
inst_ptr = ucode_info->inst_blk_desc.cpuva;
|
|
|
|
/* Set inst block */
|
|
gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
|
|
u64_lo32(vm->va_limit) | 0xFFF);
|
|
gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
|
|
ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
|
|
|
|
pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
|
|
pde_addr_lo = u64_lo32(pde_addr >> 12);
|
|
pde_addr_hi = u64_hi32(pde_addr);
|
|
gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
|
|
ram_in_page_dir_base_target_vid_mem_f() |
|
|
ram_in_page_dir_base_vol_true_f() |
|
|
ram_in_page_dir_base_lo_f(pde_addr_lo));
|
|
gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
|
|
ram_in_page_dir_base_hi_f(pde_addr_hi));
|
|
|
|
/* Map ucode surface to GMMU */
|
|
ucode_info->ucode_gpuva = gk20a_gmmu_map(vm,
|
|
&ucode_info->surface_desc.sgt,
|
|
ucode_info->surface_desc.size,
|
|
0, /* flags */
|
|
gk20a_mem_flag_read_only);
|
|
if (!ucode_info->ucode_gpuva) {
|
|
gk20a_err(d, "failed to update gmmu ptes\n");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void gr_gk20a_init_ctxsw_ucode_segment(
|
|
struct gk20a_ctxsw_ucode_segment *p_seg, u32 *offset, u32 size)
|
|
{
|
|
p_seg->offset = *offset;
|
|
p_seg->size = size;
|
|
*offset = ALIGN(*offset + size, BLK_SIZE);
|
|
}
|
|
|
|
static void gr_gk20a_init_ctxsw_ucode_segments(
|
|
struct gk20a_ctxsw_ucode_segments *segments, u32 *offset,
|
|
struct gk20a_ctxsw_bootloader_desc *bootdesc,
|
|
u32 code_size, u32 data_size)
|
|
{
|
|
u32 boot_size = ALIGN(bootdesc->size, sizeof(u32));
|
|
segments->boot_entry = bootdesc->entry_point;
|
|
segments->boot_imem_offset = bootdesc->imem_offset;
|
|
gr_gk20a_init_ctxsw_ucode_segment(&segments->boot, offset, boot_size);
|
|
gr_gk20a_init_ctxsw_ucode_segment(&segments->code, offset, code_size);
|
|
gr_gk20a_init_ctxsw_ucode_segment(&segments->data, offset, data_size);
|
|
}
|
|
|
|
static int gr_gk20a_copy_ctxsw_ucode_segments(
|
|
u8 *buf,
|
|
struct gk20a_ctxsw_ucode_segments *segments,
|
|
u32 *bootimage,
|
|
u32 *code, u32 *data)
|
|
{
|
|
memcpy(buf + segments->boot.offset, bootimage, segments->boot.size);
|
|
memcpy(buf + segments->code.offset, code, segments->code.size);
|
|
memcpy(buf + segments->data.offset, data, segments->data.size);
|
|
return 0;
|
|
}
|
|
|
|
static int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
|
|
{
|
|
struct device *d = dev_from_gk20a(g);
|
|
struct mm_gk20a *mm = &g->mm;
|
|
struct vm_gk20a *vm = &mm->pmu.vm;
|
|
struct gk20a_ctxsw_bootloader_desc *fecs_boot_desc;
|
|
struct gk20a_ctxsw_bootloader_desc *gpccs_boot_desc;
|
|
const struct firmware *fecs_fw;
|
|
const struct firmware *gpccs_fw;
|
|
u32 *fecs_boot_image;
|
|
u32 *gpccs_boot_image;
|
|
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
|
|
u8 *buf;
|
|
u32 ucode_size;
|
|
int err = 0;
|
|
dma_addr_t iova;
|
|
DEFINE_DMA_ATTRS(attrs);
|
|
|
|
fecs_fw = gk20a_request_firmware(g, GK20A_FECS_UCODE_IMAGE);
|
|
if (!fecs_fw) {
|
|
gk20a_err(d, "failed to load fecs ucode!!");
|
|
return -ENOENT;
|
|
}
|
|
|
|
fecs_boot_desc = (void *)fecs_fw->data;
|
|
fecs_boot_image = (void *)(fecs_fw->data +
|
|
sizeof(struct gk20a_ctxsw_bootloader_desc));
|
|
|
|
gpccs_fw = gk20a_request_firmware(g, GK20A_GPCCS_UCODE_IMAGE);
|
|
if (!gpccs_fw) {
|
|
release_firmware(fecs_fw);
|
|
gk20a_err(d, "failed to load gpccs ucode!!");
|
|
return -ENOENT;
|
|
}
|
|
|
|
gpccs_boot_desc = (void *)gpccs_fw->data;
|
|
gpccs_boot_image = (void *)(gpccs_fw->data +
|
|
sizeof(struct gk20a_ctxsw_bootloader_desc));
|
|
|
|
ucode_size = 0;
|
|
gr_gk20a_init_ctxsw_ucode_segments(&ucode_info->fecs, &ucode_size,
|
|
fecs_boot_desc,
|
|
g->gr.ctx_vars.ucode.fecs.inst.count * sizeof(u32),
|
|
g->gr.ctx_vars.ucode.fecs.data.count * sizeof(u32));
|
|
gr_gk20a_init_ctxsw_ucode_segments(&ucode_info->gpccs, &ucode_size,
|
|
gpccs_boot_desc,
|
|
g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32),
|
|
g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32));
|
|
|
|
ucode_info->surface_desc.size = ucode_size;
|
|
dma_set_attr(DMA_ATTR_READ_ONLY, &attrs);
|
|
ucode_info->surface_desc.cpuva = dma_alloc_attrs(d,
|
|
ucode_info->surface_desc.size,
|
|
&iova,
|
|
GFP_KERNEL,
|
|
&attrs);
|
|
if (!ucode_info->surface_desc.cpuva) {
|
|
gk20a_err(d, "memory allocation failed\n");
|
|
err = -ENOMEM;
|
|
goto clean_up;
|
|
}
|
|
|
|
ucode_info->surface_desc.iova = iova;
|
|
err = gk20a_get_sgtable(d, &ucode_info->surface_desc.sgt,
|
|
ucode_info->surface_desc.cpuva,
|
|
ucode_info->surface_desc.iova,
|
|
ucode_info->surface_desc.size);
|
|
if (err) {
|
|
gk20a_err(d, "failed to create sg table\n");
|
|
goto clean_up;
|
|
}
|
|
|
|
buf = (u8 *)ucode_info->surface_desc.cpuva;
|
|
if (!buf) {
|
|
gk20a_err(d, "failed to map surface desc buffer");
|
|
err = -ENOMEM;
|
|
goto clean_up;
|
|
}
|
|
|
|
gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->fecs,
|
|
fecs_boot_image,
|
|
g->gr.ctx_vars.ucode.fecs.inst.l,
|
|
g->gr.ctx_vars.ucode.fecs.data.l);
|
|
|
|
release_firmware(fecs_fw);
|
|
fecs_fw = NULL;
|
|
|
|
gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->gpccs,
|
|
gpccs_boot_image,
|
|
g->gr.ctx_vars.ucode.gpccs.inst.l,
|
|
g->gr.ctx_vars.ucode.gpccs.data.l);
|
|
|
|
release_firmware(gpccs_fw);
|
|
gpccs_fw = NULL;
|
|
|
|
err = gr_gk20a_init_ctxsw_ucode_vaspace(g);
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
gk20a_free_sgtable(&ucode_info->surface_desc.sgt);
|
|
|
|
return 0;
|
|
|
|
clean_up:
|
|
if (ucode_info->ucode_gpuva)
|
|
gk20a_gmmu_unmap(vm, ucode_info->ucode_gpuva,
|
|
ucode_info->surface_desc.size, gk20a_mem_flag_none);
|
|
if (ucode_info->surface_desc.sgt)
|
|
gk20a_free_sgtable(&ucode_info->surface_desc.sgt);
|
|
if (ucode_info->surface_desc.cpuva)
|
|
dma_free_attrs(d, ucode_info->surface_desc.size,
|
|
ucode_info->surface_desc.cpuva,
|
|
ucode_info->surface_desc.iova,
|
|
&attrs);
|
|
ucode_info->surface_desc.cpuva = NULL;
|
|
ucode_info->surface_desc.iova = 0;
|
|
|
|
release_firmware(gpccs_fw);
|
|
gpccs_fw = NULL;
|
|
release_firmware(fecs_fw);
|
|
fecs_fw = NULL;
|
|
|
|
return err;
|
|
}
|
|
|
|
static void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
|
|
{
|
|
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
|
|
int retries = 20;
|
|
phys_addr_t inst_ptr;
|
|
u32 val;
|
|
|
|
while ((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) &
|
|
gr_fecs_ctxsw_status_1_arb_busy_m()) && retries) {
|
|
udelay(2);
|
|
retries--;
|
|
}
|
|
if (!retries) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"arbiter idle timeout, status: %08x",
|
|
gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
|
|
}
|
|
|
|
gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0);
|
|
|
|
inst_ptr = ucode_info->inst_blk_desc.cpu_pa;
|
|
gk20a_writel(g, gr_fecs_new_ctx_r(),
|
|
gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
|
|
gr_fecs_new_ctx_target_m() |
|
|
gr_fecs_new_ctx_valid_m());
|
|
|
|
gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(),
|
|
gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) |
|
|
gr_fecs_arb_ctx_ptr_target_m());
|
|
|
|
gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7);
|
|
|
|
/* Wait for arbiter command to complete */
|
|
retries = 20;
|
|
val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
|
|
while (gr_fecs_arb_ctx_cmd_cmd_v(val) && retries) {
|
|
udelay(2);
|
|
retries--;
|
|
val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
|
|
}
|
|
if (!retries)
|
|
gk20a_err(dev_from_gk20a(g), "arbiter complete timeout");
|
|
|
|
gk20a_writel(g, gr_fecs_current_ctx_r(),
|
|
gr_fecs_current_ctx_ptr_f(inst_ptr >> 12) |
|
|
gr_fecs_current_ctx_target_m() |
|
|
gr_fecs_current_ctx_valid_m());
|
|
/* Send command to arbiter to flush */
|
|
gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), gr_fecs_arb_ctx_cmd_cmd_s());
|
|
|
|
retries = 20;
|
|
val = (gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()));
|
|
while (gr_fecs_arb_ctx_cmd_cmd_v(val) && retries) {
|
|
udelay(2);
|
|
retries--;
|
|
val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r());
|
|
}
|
|
if (!retries)
|
|
gk20a_err(dev_from_gk20a(g), "arbiter complete timeout");
|
|
}
|
|
|
|
static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
|
|
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
|
|
{
|
|
u32 addr_code32;
|
|
u32 addr_data32;
|
|
u32 addr_load32;
|
|
u32 dst = 0;
|
|
u32 blocks;
|
|
u32 b;
|
|
|
|
addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8);
|
|
addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8);
|
|
addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8);
|
|
|
|
gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(),
|
|
gr_fecs_dmactl_require_ctx_f(0));
|
|
|
|
/*
|
|
* Copy falcon bootloader header into dmem at offset 0.
|
|
* Configure dmem port 0 for auto-incrementing writes starting at dmem
|
|
* offset 0.
|
|
*/
|
|
gk20a_writel(g, reg_offset + gr_fecs_dmemc_r(0),
|
|
gr_fecs_dmemc_offs_f(0) |
|
|
gr_fecs_dmemc_blk_f(0) |
|
|
gr_fecs_dmemc_aincw_f(1));
|
|
|
|
/* Write out the actual data */
|
|
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
|
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32);
|
|
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
|
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->code.size);
|
|
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
|
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_data32);
|
|
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->data.size);
|
|
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32);
|
|
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
|
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
|
|
|
|
blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8;
|
|
|
|
/*
|
|
* Set the base FB address for the DMA transfer. Subtract off the 256
|
|
* byte IMEM block offset such that the relative FB and IMEM offsets
|
|
* match, allowing the IMEM tags to be properly created.
|
|
*/
|
|
|
|
dst = segments->boot_imem_offset;
|
|
gk20a_writel(g, reg_offset + gr_fecs_dmatrfbase_r(),
|
|
(addr_load32 - (dst >> 8)));
|
|
|
|
for (b = 0; b < blocks; b++) {
|
|
/* Setup destination IMEM offset */
|
|
gk20a_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(),
|
|
dst + (b << 8));
|
|
|
|
/* Setup source offset (relative to BASE) */
|
|
gk20a_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(),
|
|
dst + (b << 8));
|
|
|
|
gk20a_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(),
|
|
gr_fecs_dmatrfcmd_imem_f(0x01) |
|
|
gr_fecs_dmatrfcmd_write_f(0x00) |
|
|
gr_fecs_dmatrfcmd_size_f(0x06) |
|
|
gr_fecs_dmatrfcmd_ctxdma_f(0));
|
|
}
|
|
|
|
/* Specify the falcon boot vector */
|
|
gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(),
|
|
gr_fecs_bootvec_vec_f(segments->boot_entry));
|
|
|
|
/* Write to CPUCTL to start the falcon */
|
|
gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(),
|
|
gr_fecs_cpuctl_startcpu_f(0x01));
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g)
|
|
{
|
|
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
|
|
u64 addr_base = ucode_info->ucode_gpuva;
|
|
|
|
gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0);
|
|
|
|
gr_gk20a_load_falcon_bind_instblk(g);
|
|
|
|
g->ops.gr.falcon_load_ucode(g, addr_base,
|
|
&g->ctxsw_ucode_info.fecs, 0);
|
|
|
|
g->ops.gr.falcon_load_ucode(g, addr_base,
|
|
&g->ctxsw_ucode_info.gpccs,
|
|
gr_gpcs_gpccs_falcon_hwcfg_r() -
|
|
gr_fecs_falcon_hwcfg_r());
|
|
}
|
|
|
|
static int gr_gk20a_load_ctxsw_ucode(struct gk20a *g, struct gr_gk20a *gr)
|
|
{
|
|
u32 ret;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (tegra_platform_is_linsim()) {
|
|
gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7),
|
|
gr_fecs_ctxsw_mailbox_value_f(0xc0de7777));
|
|
gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
|
|
gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777));
|
|
}
|
|
|
|
/*
|
|
* In case bootloader is not supported, revert to the old way of
|
|
* loading gr ucode, without the faster bootstrap routine.
|
|
*/
|
|
if (g->gpu_characteristics.arch == NVHOST_GPU_ARCH_GM200) {
|
|
gr_gk20a_load_falcon_dmem(g);
|
|
gr_gk20a_load_falcon_imem(g);
|
|
gr_gk20a_start_falcon_ucode(g);
|
|
} else {
|
|
if (!gr->skip_ucode_init)
|
|
gr_gk20a_init_ctxsw_ucode(g);
|
|
gr_gk20a_load_falcon_with_bootloader(g);
|
|
gr->skip_ucode_init = true;
|
|
}
|
|
|
|
ret = gr_gk20a_ctx_wait_ucode(g, 0, 0,
|
|
GR_IS_UCODE_OP_EQUAL,
|
|
eUcodeHandshakeInitComplete,
|
|
GR_IS_UCODE_OP_SKIP, 0);
|
|
if (ret) {
|
|
gk20a_err(dev_from_gk20a(g), "falcon ucode init timeout");
|
|
return ret;
|
|
}
|
|
|
|
if (support_gk20a_pmu())
|
|
gk20a_writel(g, gr_fecs_current_ctx_r(),
|
|
gr_fecs_current_ctx_valid_false_f());
|
|
|
|
gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0xffffffff);
|
|
gk20a_writel(g, gr_fecs_method_data_r(), 0x7fffffff);
|
|
gk20a_writel(g, gr_fecs_method_push_r(),
|
|
gr_fecs_method_push_adr_set_watchdog_timeout_f());
|
|
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
}
|
|
|
|
static int gr_gk20a_init_ctx_state(struct gk20a *g, struct gr_gk20a *gr)
|
|
{
|
|
u32 golden_ctx_image_size = 0;
|
|
u32 zcull_ctx_image_size = 0;
|
|
u32 pm_ctx_image_size = 0;
|
|
u32 ret;
|
|
struct fecs_method_op_gk20a op = {
|
|
.mailbox = { .id = 0, .data = 0,
|
|
.clr = ~0, .ok = 0, .fail = 0},
|
|
.method.data = 0,
|
|
.cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
|
|
.cond.fail = GR_IS_UCODE_OP_SKIP,
|
|
};
|
|
|
|
gk20a_dbg_fn("");
|
|
op.method.addr = gr_fecs_method_push_adr_discover_image_size_v();
|
|
op.mailbox.ret = &golden_ctx_image_size;
|
|
ret = gr_gk20a_submit_fecs_method_op(g, op);
|
|
if (ret) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"query golden image size failed");
|
|
return ret;
|
|
}
|
|
op.method.addr = gr_fecs_method_push_adr_discover_zcull_image_size_v();
|
|
op.mailbox.ret = &zcull_ctx_image_size;
|
|
ret = gr_gk20a_submit_fecs_method_op(g, op);
|
|
if (ret) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"query zcull ctx image size failed");
|
|
return ret;
|
|
}
|
|
op.method.addr = gr_fecs_method_push_adr_discover_pm_image_size_v();
|
|
op.mailbox.ret = &pm_ctx_image_size;
|
|
ret = gr_gk20a_submit_fecs_method_op(g, op);
|
|
if (ret) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"query pm ctx image size failed");
|
|
return ret;
|
|
}
|
|
|
|
if (!g->gr.ctx_vars.golden_image_size &&
|
|
!g->gr.ctx_vars.zcull_ctxsw_image_size) {
|
|
g->gr.ctx_vars.golden_image_size = golden_ctx_image_size;
|
|
g->gr.ctx_vars.zcull_ctxsw_image_size = zcull_ctx_image_size;
|
|
} else {
|
|
/* hw is different after railgating? */
|
|
BUG_ON(g->gr.ctx_vars.golden_image_size != golden_ctx_image_size);
|
|
BUG_ON(g->gr.ctx_vars.zcull_ctxsw_image_size != zcull_ctx_image_size);
|
|
}
|
|
|
|
g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
|
|
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
}
|
|
|
|
static void gk20a_gr_destroy_ctx_buffer(struct platform_device *pdev,
|
|
struct gr_ctx_buffer_desc *desc)
|
|
{
|
|
struct device *dev = &pdev->dev;
|
|
gk20a_free_sgtable(&desc->sgt);
|
|
dma_free_attrs(dev, desc->size, desc->pages,
|
|
desc->iova, &desc->attrs);
|
|
}
|
|
|
|
static int gk20a_gr_alloc_ctx_buffer(struct platform_device *pdev,
|
|
struct gr_ctx_buffer_desc *desc,
|
|
size_t size)
|
|
{
|
|
struct device *dev = &pdev->dev;
|
|
DEFINE_DMA_ATTRS(attrs);
|
|
dma_addr_t iova;
|
|
int err = 0;
|
|
|
|
dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
|
|
|
|
desc->pages = dma_alloc_attrs(&pdev->dev, size, &iova,
|
|
GFP_KERNEL, &attrs);
|
|
if (!desc->pages)
|
|
return -ENOMEM;
|
|
|
|
desc->iova = iova;
|
|
desc->size = size;
|
|
desc->attrs = attrs;
|
|
desc->destroy = gk20a_gr_destroy_ctx_buffer;
|
|
err = gk20a_get_sgtable_from_pages(&pdev->dev, &desc->sgt, desc->pages,
|
|
desc->iova, desc->size);
|
|
if (err) {
|
|
dma_free_attrs(dev, desc->size, desc->pages,
|
|
desc->iova, &desc->attrs);
|
|
memset(desc, 0, sizeof(*desc));
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
|
|
{
|
|
struct gk20a_platform *platform = platform_get_drvdata(g->dev);
|
|
struct gr_gk20a *gr = &g->gr;
|
|
int i, attr_buffer_size, err;
|
|
struct platform_device *pdev = g->dev;
|
|
|
|
u32 cb_buffer_size = gr->bundle_cb_default_size *
|
|
gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
|
|
|
|
u32 pagepool_buffer_size = gr_scc_pagepool_total_pages_hwmax_value_v() *
|
|
gr_scc_pagepool_total_pages_byte_granularity_v();
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
attr_buffer_size = g->ops.gr.calc_global_ctx_buffer_size(g);
|
|
|
|
gk20a_dbg_info("cb_buffer_size : %d", cb_buffer_size);
|
|
|
|
err = gk20a_gr_alloc_ctx_buffer(pdev, &gr->global_ctx_buffer[CIRCULAR],
|
|
cb_buffer_size);
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
if (platform->secure_alloc)
|
|
platform->secure_alloc(pdev,
|
|
&gr->global_ctx_buffer[CIRCULAR_VPR],
|
|
cb_buffer_size);
|
|
|
|
gk20a_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size);
|
|
|
|
err = gk20a_gr_alloc_ctx_buffer(pdev, &gr->global_ctx_buffer[PAGEPOOL],
|
|
pagepool_buffer_size);
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
if (platform->secure_alloc)
|
|
platform->secure_alloc(pdev,
|
|
&gr->global_ctx_buffer[PAGEPOOL_VPR],
|
|
pagepool_buffer_size);
|
|
|
|
gk20a_dbg_info("attr_buffer_size : %d", attr_buffer_size);
|
|
|
|
err = gk20a_gr_alloc_ctx_buffer(pdev, &gr->global_ctx_buffer[ATTRIBUTE],
|
|
attr_buffer_size);
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
if (platform->secure_alloc)
|
|
platform->secure_alloc(pdev,
|
|
&gr->global_ctx_buffer[ATTRIBUTE_VPR],
|
|
attr_buffer_size);
|
|
|
|
gk20a_dbg_info("golden_image_size : %d",
|
|
gr->ctx_vars.golden_image_size);
|
|
|
|
err = gk20a_gr_alloc_ctx_buffer(pdev,
|
|
&gr->global_ctx_buffer[GOLDEN_CTX],
|
|
gr->ctx_vars.golden_image_size);
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
gk20a_dbg_info("priv_access_map_size : %d",
|
|
gr->ctx_vars.priv_access_map_size);
|
|
|
|
err = gk20a_gr_alloc_ctx_buffer(pdev,
|
|
&gr->global_ctx_buffer[PRIV_ACCESS_MAP],
|
|
gr->ctx_vars.priv_access_map_size);
|
|
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
|
|
clean_up:
|
|
gk20a_err(dev_from_gk20a(g), "fail");
|
|
for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) {
|
|
if (gr->global_ctx_buffer[i].destroy) {
|
|
gr->global_ctx_buffer[i].destroy(pdev,
|
|
&gr->global_ctx_buffer[i]);
|
|
}
|
|
}
|
|
return -ENOMEM;
|
|
}
|
|
|
|
static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g)
|
|
{
|
|
struct platform_device *pdev = g->dev;
|
|
struct gr_gk20a *gr = &g->gr;
|
|
DEFINE_DMA_ATTRS(attrs);
|
|
u32 i;
|
|
|
|
dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
|
|
|
|
for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) {
|
|
gr->global_ctx_buffer[i].destroy(pdev,
|
|
&gr->global_ctx_buffer[i]);
|
|
}
|
|
|
|
gk20a_dbg_fn("done");
|
|
}
|
|
|
|
static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
|
|
struct channel_gk20a *c)
|
|
{
|
|
struct vm_gk20a *ch_vm = c->vm;
|
|
u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
|
|
u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
|
|
struct gr_gk20a *gr = &g->gr;
|
|
struct sg_table *sgt;
|
|
u64 size;
|
|
u64 gpu_va;
|
|
u32 i;
|
|
gk20a_dbg_fn("");
|
|
|
|
/* Circular Buffer */
|
|
if (!c->vpr || (gr->global_ctx_buffer[CIRCULAR_VPR].sgt == NULL)) {
|
|
sgt = gr->global_ctx_buffer[CIRCULAR].sgt;
|
|
size = gr->global_ctx_buffer[CIRCULAR].size;
|
|
} else {
|
|
sgt = gr->global_ctx_buffer[CIRCULAR_VPR].sgt;
|
|
size = gr->global_ctx_buffer[CIRCULAR_VPR].size;
|
|
}
|
|
|
|
gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size,
|
|
NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
|
|
gk20a_mem_flag_none);
|
|
if (!gpu_va)
|
|
goto clean_up;
|
|
g_bfr_va[CIRCULAR_VA] = gpu_va;
|
|
g_bfr_size[CIRCULAR_VA] = size;
|
|
|
|
/* Attribute Buffer */
|
|
if (!c->vpr || (gr->global_ctx_buffer[ATTRIBUTE_VPR].sgt == NULL)) {
|
|
sgt = gr->global_ctx_buffer[ATTRIBUTE].sgt;
|
|
size = gr->global_ctx_buffer[ATTRIBUTE].size;
|
|
} else {
|
|
sgt = gr->global_ctx_buffer[ATTRIBUTE_VPR].sgt;
|
|
size = gr->global_ctx_buffer[ATTRIBUTE_VPR].size;
|
|
}
|
|
|
|
gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size,
|
|
NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
|
|
gk20a_mem_flag_none);
|
|
if (!gpu_va)
|
|
goto clean_up;
|
|
g_bfr_va[ATTRIBUTE_VA] = gpu_va;
|
|
g_bfr_size[ATTRIBUTE_VA] = size;
|
|
|
|
/* Page Pool */
|
|
if (!c->vpr || (gr->global_ctx_buffer[PAGEPOOL_VPR].sgt == NULL)) {
|
|
sgt = gr->global_ctx_buffer[PAGEPOOL].sgt;
|
|
size = gr->global_ctx_buffer[PAGEPOOL].size;
|
|
} else {
|
|
sgt = gr->global_ctx_buffer[PAGEPOOL_VPR].sgt;
|
|
size = gr->global_ctx_buffer[PAGEPOOL_VPR].size;
|
|
}
|
|
|
|
gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size,
|
|
NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
|
|
gk20a_mem_flag_none);
|
|
if (!gpu_va)
|
|
goto clean_up;
|
|
g_bfr_va[PAGEPOOL_VA] = gpu_va;
|
|
g_bfr_size[PAGEPOOL_VA] = size;
|
|
|
|
/* Golden Image */
|
|
sgt = gr->global_ctx_buffer[GOLDEN_CTX].sgt;
|
|
size = gr->global_ctx_buffer[GOLDEN_CTX].size;
|
|
gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, 0,
|
|
gk20a_mem_flag_none);
|
|
if (!gpu_va)
|
|
goto clean_up;
|
|
g_bfr_va[GOLDEN_CTX_VA] = gpu_va;
|
|
g_bfr_size[GOLDEN_CTX_VA] = size;
|
|
|
|
/* Priv register Access Map */
|
|
sgt = gr->global_ctx_buffer[PRIV_ACCESS_MAP].sgt;
|
|
size = gr->global_ctx_buffer[PRIV_ACCESS_MAP].size;
|
|
gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, 0,
|
|
gk20a_mem_flag_none);
|
|
if (!gpu_va)
|
|
goto clean_up;
|
|
g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;
|
|
g_bfr_size[PRIV_ACCESS_MAP_VA] = size;
|
|
|
|
c->ch_ctx.global_ctx_buffer_mapped = true;
|
|
return 0;
|
|
|
|
clean_up:
|
|
for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
|
|
if (g_bfr_va[i]) {
|
|
gk20a_gmmu_unmap(ch_vm, g_bfr_va[i],
|
|
gr->global_ctx_buffer[i].size,
|
|
gk20a_mem_flag_none);
|
|
g_bfr_va[i] = 0;
|
|
}
|
|
}
|
|
return -ENOMEM;
|
|
}
|
|
|
|
static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
|
|
{
|
|
struct vm_gk20a *ch_vm = c->vm;
|
|
u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
|
|
u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
|
|
u32 i;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
|
|
if (g_bfr_va[i]) {
|
|
gk20a_gmmu_unmap(ch_vm, g_bfr_va[i],
|
|
g_bfr_size[i],
|
|
gk20a_mem_flag_none);
|
|
g_bfr_va[i] = 0;
|
|
g_bfr_size[i] = 0;
|
|
}
|
|
}
|
|
c->ch_ctx.global_ctx_buffer_mapped = false;
|
|
}
|
|
|
|
static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
|
|
struct channel_gk20a *c)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
struct gr_ctx_desc *gr_ctx = &c->ch_ctx.gr_ctx;
|
|
struct vm_gk20a *ch_vm = c->vm;
|
|
struct device *d = dev_from_gk20a(g);
|
|
struct sg_table *sgt;
|
|
DEFINE_DMA_ATTRS(attrs);
|
|
int err = 0;
|
|
dma_addr_t iova;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (gr->ctx_vars.buffer_size == 0)
|
|
return 0;
|
|
|
|
/* alloc channel gr ctx buffer */
|
|
gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
|
|
gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
|
|
|
|
gr_ctx->size = gr->ctx_vars.buffer_total_size;
|
|
dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
|
|
gr_ctx->pages = dma_alloc_attrs(d, gr_ctx->size,
|
|
&iova, GFP_KERNEL, &attrs);
|
|
if (!gr_ctx->pages)
|
|
return -ENOMEM;
|
|
|
|
gr_ctx->iova = iova;
|
|
err = gk20a_get_sgtable_from_pages(d, &sgt, gr_ctx->pages,
|
|
gr_ctx->iova, gr_ctx->size);
|
|
if (err)
|
|
goto err_free;
|
|
|
|
gr_ctx->gpu_va = gk20a_gmmu_map(ch_vm, &sgt, gr_ctx->size,
|
|
NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
|
|
gk20a_mem_flag_none);
|
|
if (!gr_ctx->gpu_va)
|
|
goto err_free_sgt;
|
|
|
|
gk20a_free_sgtable(&sgt);
|
|
|
|
return 0;
|
|
|
|
err_free_sgt:
|
|
gk20a_free_sgtable(&sgt);
|
|
err_free:
|
|
dma_free_attrs(d, gr_ctx->size,
|
|
gr_ctx->pages, gr_ctx->iova, &attrs);
|
|
gr_ctx->pages = NULL;
|
|
gr_ctx->iova = 0;
|
|
|
|
return err;
|
|
}
|
|
|
|
static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c)
|
|
{
|
|
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
|
|
struct vm_gk20a *ch_vm = c->vm;
|
|
struct gk20a *g = c->g;
|
|
struct device *d = dev_from_gk20a(g);
|
|
DEFINE_DMA_ATTRS(attrs);
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (!ch_ctx->gr_ctx.gpu_va)
|
|
return;
|
|
|
|
gk20a_gmmu_unmap(ch_vm, ch_ctx->gr_ctx.gpu_va,
|
|
ch_ctx->gr_ctx.size, gk20a_mem_flag_none);
|
|
dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
|
|
dma_free_attrs(d, ch_ctx->gr_ctx.size,
|
|
ch_ctx->gr_ctx.pages, ch_ctx->gr_ctx.iova, &attrs);
|
|
ch_ctx->gr_ctx.pages = NULL;
|
|
ch_ctx->gr_ctx.iova = 0;
|
|
}
|
|
|
|
static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
|
|
struct channel_gk20a *c)
|
|
{
|
|
struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
|
|
struct device *d = dev_from_gk20a(g);
|
|
struct vm_gk20a *ch_vm = c->vm;
|
|
DEFINE_DMA_ATTRS(attrs);
|
|
struct sg_table *sgt;
|
|
int err = 0;
|
|
dma_addr_t iova;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
patch_ctx->size = 128 * sizeof(u32);
|
|
dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
|
|
patch_ctx->pages = dma_alloc_attrs(d, patch_ctx->size,
|
|
&iova, GFP_KERNEL,
|
|
&attrs);
|
|
if (!patch_ctx->pages)
|
|
return -ENOMEM;
|
|
|
|
patch_ctx->iova = iova;
|
|
err = gk20a_get_sgtable_from_pages(d, &sgt, patch_ctx->pages,
|
|
patch_ctx->iova, patch_ctx->size);
|
|
if (err)
|
|
goto err_free;
|
|
|
|
patch_ctx->gpu_va = gk20a_gmmu_map(ch_vm, &sgt, patch_ctx->size,
|
|
0, gk20a_mem_flag_none);
|
|
if (!patch_ctx->gpu_va)
|
|
goto err_free_sgtable;
|
|
|
|
gk20a_free_sgtable(&sgt);
|
|
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
|
|
err_free_sgtable:
|
|
gk20a_free_sgtable(&sgt);
|
|
err_free:
|
|
dma_free_attrs(d, patch_ctx->size,
|
|
patch_ctx->pages, patch_ctx->iova, &attrs);
|
|
patch_ctx->pages = NULL;
|
|
patch_ctx->iova = 0;
|
|
gk20a_err(dev_from_gk20a(g), "fail");
|
|
return err;
|
|
}
|
|
|
|
static void gr_gk20a_unmap_channel_patch_ctx(struct channel_gk20a *c)
|
|
{
|
|
struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
|
|
struct vm_gk20a *ch_vm = c->vm;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (patch_ctx->gpu_va)
|
|
gk20a_gmmu_unmap(ch_vm, patch_ctx->gpu_va,
|
|
patch_ctx->size, gk20a_mem_flag_none);
|
|
patch_ctx->gpu_va = 0;
|
|
patch_ctx->data_count = 0;
|
|
}
|
|
|
|
static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c)
|
|
{
|
|
struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
|
|
struct gk20a *g = c->g;
|
|
struct device *d = dev_from_gk20a(g);
|
|
DEFINE_DMA_ATTRS(attrs);
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
gr_gk20a_unmap_channel_patch_ctx(c);
|
|
|
|
if (patch_ctx->pages) {
|
|
dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
|
|
dma_free_attrs(d, patch_ctx->size,
|
|
patch_ctx->pages, patch_ctx->iova, &attrs);
|
|
patch_ctx->pages = NULL;
|
|
patch_ctx->iova = 0;
|
|
}
|
|
}
|
|
|
|
void gk20a_free_channel_ctx(struct channel_gk20a *c)
|
|
{
|
|
gr_gk20a_unmap_global_ctx_buffers(c);
|
|
gr_gk20a_free_channel_patch_ctx(c);
|
|
gr_gk20a_free_channel_gr_ctx(c);
|
|
|
|
/* zcull_ctx, pm_ctx */
|
|
|
|
memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a));
|
|
|
|
c->num_objects = 0;
|
|
c->first_init = false;
|
|
}
|
|
|
|
static bool gr_gk20a_is_valid_class(struct gk20a *g, u32 class_num)
|
|
{
|
|
bool valid = false;
|
|
|
|
switch (class_num) {
|
|
case KEPLER_COMPUTE_A:
|
|
case KEPLER_C:
|
|
case FERMI_TWOD_A:
|
|
case KEPLER_DMA_COPY_A:
|
|
valid = true;
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return valid;
|
|
}
|
|
|
|
int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
|
|
struct nvhost_alloc_obj_ctx_args *args)
|
|
{
|
|
struct gk20a *g = c->g;
|
|
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
|
|
int err = 0;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
/* an address space needs to have been bound at this point.*/
|
|
if (!gk20a_channel_as_bound(c)) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"not bound to address space at time"
|
|
" of grctx allocation");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (!g->ops.gr.is_valid_class(g, args->class_num)) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"invalid obj class 0x%x", args->class_num);
|
|
err = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
/* allocate gr ctx buffer */
|
|
if (ch_ctx->gr_ctx.pages == NULL) {
|
|
err = gr_gk20a_alloc_channel_gr_ctx(g, c);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"fail to allocate gr ctx buffer");
|
|
goto out;
|
|
}
|
|
c->obj_class = args->class_num;
|
|
} else {
|
|
/*TBD: needs to be more subtle about which is being allocated
|
|
* as some are allowed to be allocated along same channel */
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"too many classes alloc'd on same channel");
|
|
err = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
/* commit gr ctx buffer */
|
|
err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"fail to commit gr ctx buffer");
|
|
goto out;
|
|
}
|
|
|
|
/* allocate patch buffer */
|
|
if (ch_ctx->patch_ctx.pages == NULL) {
|
|
err = gr_gk20a_alloc_channel_patch_ctx(g, c);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"fail to allocate patch buffer");
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
/* map global buffer to channel gpu_va and commit */
|
|
if (!ch_ctx->global_ctx_buffer_mapped) {
|
|
err = gr_gk20a_map_global_ctx_buffers(g, c);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"fail to map global ctx buffer");
|
|
goto out;
|
|
}
|
|
gr_gk20a_elpg_protected_call(g,
|
|
gr_gk20a_commit_global_ctx_buffers(g, c, true));
|
|
}
|
|
|
|
/* tweak any perf parameters per-context here */
|
|
if (args->class_num == KEPLER_COMPUTE_A) {
|
|
int begin_err;
|
|
u32 tex_lock_disable_mask =
|
|
gr_gpcs_tpcs_sm_sch_texlock_tex_hash_m() |
|
|
gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tile_m() |
|
|
gr_gpcs_tpcs_sm_sch_texlock_tex_hash_phase_m() |
|
|
gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tex_m() |
|
|
gr_gpcs_tpcs_sm_sch_texlock_tex_hash_timeout_m() |
|
|
gr_gpcs_tpcs_sm_sch_texlock_dot_t_unlock_m();
|
|
|
|
u32 texlock = gk20a_readl(g, gr_gpcs_tpcs_sm_sch_texlock_r());
|
|
|
|
texlock = (texlock & ~tex_lock_disable_mask) |
|
|
(gr_gpcs_tpcs_sm_sch_texlock_tex_hash_disable_f() |
|
|
gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tile_disable_f() |
|
|
gr_gpcs_tpcs_sm_sch_texlock_tex_hash_phase_disable_f() |
|
|
gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tex_disable_f() |
|
|
gr_gpcs_tpcs_sm_sch_texlock_tex_hash_timeout_disable_f() |
|
|
gr_gpcs_tpcs_sm_sch_texlock_dot_t_unlock_disable_f());
|
|
|
|
begin_err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
|
|
|
|
if (!begin_err) {
|
|
err = gr_gk20a_ctx_patch_write(g, ch_ctx,
|
|
gr_gpcs_tpcs_sm_sch_texlock_r(),
|
|
texlock, true);
|
|
}
|
|
if ((begin_err || err)) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to set texlock for compute class");
|
|
}
|
|
if (!begin_err)
|
|
gr_gk20a_ctx_patch_write_end(g, ch_ctx);
|
|
}
|
|
|
|
/* init golden image, ELPG enabled after this is done */
|
|
err = gr_gk20a_init_golden_ctx_image(g, c);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"fail to init golden ctx image");
|
|
goto out;
|
|
}
|
|
|
|
/* load golden image */
|
|
if (!c->first_init) {
|
|
err = gr_gk20a_elpg_protected_call(g,
|
|
gr_gk20a_load_golden_ctx_image(g, c));
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"fail to load golden ctx image");
|
|
goto out;
|
|
}
|
|
c->first_init = true;
|
|
}
|
|
|
|
c->num_objects++;
|
|
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
out:
|
|
/* 1. gr_ctx, patch_ctx and global ctx buffer mapping
|
|
can be reused so no need to release them.
|
|
2. golden image init and load is a one time thing so if
|
|
they pass, no need to undo. */
|
|
gk20a_err(dev_from_gk20a(g), "fail");
|
|
return err;
|
|
}
|
|
|
|
int gk20a_free_obj_ctx(struct channel_gk20a *c,
|
|
struct nvhost_free_obj_ctx_args *args)
|
|
{
|
|
unsigned long timeout = gk20a_get_gr_idle_timeout(c->g);
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (c->num_objects == 0)
|
|
return 0;
|
|
|
|
c->num_objects--;
|
|
|
|
if (c->num_objects == 0) {
|
|
c->first_init = false;
|
|
gk20a_disable_channel(c,
|
|
!c->has_timedout,
|
|
timeout);
|
|
gr_gk20a_unmap_channel_patch_ctx(c);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void gk20a_remove_gr_support(struct gr_gk20a *gr)
|
|
{
|
|
struct gk20a *g = gr->g;
|
|
struct device *d = dev_from_gk20a(g);
|
|
DEFINE_DMA_ATTRS(attrs);
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
gr_gk20a_free_global_ctx_buffers(g);
|
|
|
|
dma_free_coherent(d, gr->mmu_wr_mem.size,
|
|
gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova);
|
|
gr->mmu_wr_mem.cpuva = NULL;
|
|
gr->mmu_wr_mem.iova = 0;
|
|
dma_free_coherent(d, gr->mmu_rd_mem.size,
|
|
gr->mmu_rd_mem.cpuva, gr->mmu_rd_mem.iova);
|
|
gr->mmu_rd_mem.cpuva = NULL;
|
|
gr->mmu_rd_mem.iova = 0;
|
|
|
|
dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
|
|
dma_free_attrs(d, gr->compbit_store.size, gr->compbit_store.pages,
|
|
gr->compbit_store.base_iova, &attrs);
|
|
|
|
memset(&gr->mmu_wr_mem, 0, sizeof(struct mmu_desc));
|
|
memset(&gr->mmu_rd_mem, 0, sizeof(struct mmu_desc));
|
|
memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
|
|
|
|
kfree(gr->gpc_tpc_count);
|
|
kfree(gr->gpc_zcb_count);
|
|
kfree(gr->gpc_ppc_count);
|
|
kfree(gr->pes_tpc_count[0]);
|
|
kfree(gr->pes_tpc_count[1]);
|
|
kfree(gr->pes_tpc_mask[0]);
|
|
kfree(gr->pes_tpc_mask[1]);
|
|
kfree(gr->gpc_skip_mask);
|
|
kfree(gr->map_tiles);
|
|
gr->gpc_tpc_count = NULL;
|
|
gr->gpc_zcb_count = NULL;
|
|
gr->gpc_ppc_count = NULL;
|
|
gr->pes_tpc_count[0] = NULL;
|
|
gr->pes_tpc_count[1] = NULL;
|
|
gr->pes_tpc_mask[0] = NULL;
|
|
gr->pes_tpc_mask[1] = NULL;
|
|
gr->gpc_skip_mask = NULL;
|
|
gr->map_tiles = NULL;
|
|
|
|
kfree(gr->ctx_vars.ucode.fecs.inst.l);
|
|
kfree(gr->ctx_vars.ucode.fecs.data.l);
|
|
kfree(gr->ctx_vars.ucode.gpccs.inst.l);
|
|
kfree(gr->ctx_vars.ucode.gpccs.data.l);
|
|
kfree(gr->ctx_vars.sw_bundle_init.l);
|
|
kfree(gr->ctx_vars.sw_method_init.l);
|
|
kfree(gr->ctx_vars.sw_ctx_load.l);
|
|
kfree(gr->ctx_vars.sw_non_ctx_load.l);
|
|
kfree(gr->ctx_vars.ctxsw_regs.sys.l);
|
|
kfree(gr->ctx_vars.ctxsw_regs.gpc.l);
|
|
kfree(gr->ctx_vars.ctxsw_regs.tpc.l);
|
|
kfree(gr->ctx_vars.ctxsw_regs.zcull_gpc.l);
|
|
kfree(gr->ctx_vars.ctxsw_regs.ppc.l);
|
|
kfree(gr->ctx_vars.ctxsw_regs.pm_sys.l);
|
|
kfree(gr->ctx_vars.ctxsw_regs.pm_gpc.l);
|
|
kfree(gr->ctx_vars.ctxsw_regs.pm_tpc.l);
|
|
|
|
kfree(gr->ctx_vars.local_golden_image);
|
|
gr->ctx_vars.local_golden_image = NULL;
|
|
|
|
gk20a_allocator_destroy(&gr->comp_tags);
|
|
}
|
|
|
|
static void gr_gk20a_bundle_cb_defaults(struct gk20a *g)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
|
|
gr->bundle_cb_default_size =
|
|
gr_scc_bundle_cb_size_div_256b__prod_v();
|
|
gr->min_gpm_fifo_depth =
|
|
gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
|
|
gr->bundle_cb_token_limit =
|
|
gr_pd_ab_dist_cfg2_token_limit_init_v();
|
|
}
|
|
|
|
static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
|
|
{
|
|
u32 gpc_index, pes_index;
|
|
u32 pes_tpc_mask;
|
|
u32 pes_tpc_count;
|
|
u32 pes_heavy_index;
|
|
u32 gpc_new_skip_mask;
|
|
u32 tmp;
|
|
|
|
tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r());
|
|
gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp);
|
|
|
|
tmp = gk20a_readl(g, top_num_gpcs_r());
|
|
gr->max_gpc_count = top_num_gpcs_value_v(tmp);
|
|
|
|
tmp = gk20a_readl(g, top_num_fbps_r());
|
|
gr->max_fbps_count = top_num_fbps_value_v(tmp);
|
|
|
|
tmp = gk20a_readl(g, top_tpc_per_gpc_r());
|
|
gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp);
|
|
|
|
gr->max_tpc_count = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
|
|
|
|
tmp = gk20a_readl(g, top_num_fbps_r());
|
|
gr->sys_count = top_num_fbps_value_v(tmp);
|
|
|
|
tmp = gk20a_readl(g, pri_ringmaster_enum_gpc_r());
|
|
gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp);
|
|
|
|
gr->pe_count_per_gpc = proj_scal_litter_num_pes_per_gpc_v();
|
|
gr->max_zcull_per_gpc_count = proj_scal_litter_num_zcull_banks_v();
|
|
|
|
if (!gr->gpc_count) {
|
|
gk20a_err(dev_from_gk20a(g), "gpc_count==0!");
|
|
goto clean_up;
|
|
}
|
|
|
|
gr->gpc_tpc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
|
|
gr->gpc_zcb_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
|
|
gr->gpc_ppc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
|
|
gr->pes_tpc_count[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
|
|
gr->pes_tpc_count[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
|
|
gr->pes_tpc_mask[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
|
|
gr->pes_tpc_mask[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
|
|
gr->gpc_skip_mask =
|
|
kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32),
|
|
GFP_KERNEL);
|
|
|
|
if (!gr->gpc_tpc_count || !gr->gpc_zcb_count || !gr->gpc_ppc_count ||
|
|
!gr->pes_tpc_count[0] || !gr->pes_tpc_count[1] ||
|
|
!gr->pes_tpc_mask[0] || !gr->pes_tpc_mask[1] || !gr->gpc_skip_mask)
|
|
goto clean_up;
|
|
|
|
gr->ppc_count = 0;
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
|
tmp = gk20a_readl(g, gr_gpc0_fs_gpc_r());
|
|
|
|
gr->gpc_tpc_count[gpc_index] =
|
|
gr_gpc0_fs_gpc_num_available_tpcs_v(tmp);
|
|
gr->tpc_count += gr->gpc_tpc_count[gpc_index];
|
|
|
|
gr->gpc_zcb_count[gpc_index] =
|
|
gr_gpc0_fs_gpc_num_available_zculls_v(tmp);
|
|
gr->zcb_count += gr->gpc_zcb_count[gpc_index];
|
|
|
|
gr->gpc_ppc_count[gpc_index] = gr->pe_count_per_gpc;
|
|
gr->ppc_count += gr->gpc_ppc_count[gpc_index];
|
|
for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) {
|
|
|
|
tmp = gk20a_readl(g,
|
|
gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) +
|
|
gpc_index * proj_gpc_stride_v());
|
|
|
|
pes_tpc_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp);
|
|
pes_tpc_count = count_bits(pes_tpc_mask);
|
|
|
|
gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
|
|
gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
|
|
}
|
|
|
|
gpc_new_skip_mask = 0;
|
|
if (gr->pes_tpc_count[0][gpc_index] +
|
|
gr->pes_tpc_count[1][gpc_index] == 5) {
|
|
pes_heavy_index =
|
|
gr->pes_tpc_count[0][gpc_index] >
|
|
gr->pes_tpc_count[1][gpc_index] ? 0 : 1;
|
|
|
|
gpc_new_skip_mask =
|
|
gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^
|
|
(gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
|
|
(gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
|
|
|
|
} else if ((gr->pes_tpc_count[0][gpc_index] +
|
|
gr->pes_tpc_count[1][gpc_index] == 4) &&
|
|
(gr->pes_tpc_count[0][gpc_index] !=
|
|
gr->pes_tpc_count[1][gpc_index])) {
|
|
pes_heavy_index =
|
|
gr->pes_tpc_count[0][gpc_index] >
|
|
gr->pes_tpc_count[1][gpc_index] ? 0 : 1;
|
|
|
|
gpc_new_skip_mask =
|
|
gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^
|
|
(gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
|
|
(gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
|
|
}
|
|
gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
|
|
}
|
|
|
|
gk20a_dbg_info("fbps: %d", gr->num_fbps);
|
|
gk20a_dbg_info("max_gpc_count: %d", gr->max_gpc_count);
|
|
gk20a_dbg_info("max_fbps_count: %d", gr->max_fbps_count);
|
|
gk20a_dbg_info("max_tpc_per_gpc_count: %d", gr->max_tpc_per_gpc_count);
|
|
gk20a_dbg_info("max_zcull_per_gpc_count: %d", gr->max_zcull_per_gpc_count);
|
|
gk20a_dbg_info("max_tpc_count: %d", gr->max_tpc_count);
|
|
gk20a_dbg_info("sys_count: %d", gr->sys_count);
|
|
gk20a_dbg_info("gpc_count: %d", gr->gpc_count);
|
|
gk20a_dbg_info("pe_count_per_gpc: %d", gr->pe_count_per_gpc);
|
|
gk20a_dbg_info("tpc_count: %d", gr->tpc_count);
|
|
gk20a_dbg_info("ppc_count: %d", gr->ppc_count);
|
|
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
|
|
gk20a_dbg_info("gpc_tpc_count[%d] : %d",
|
|
gpc_index, gr->gpc_tpc_count[gpc_index]);
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
|
|
gk20a_dbg_info("gpc_zcb_count[%d] : %d",
|
|
gpc_index, gr->gpc_zcb_count[gpc_index]);
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
|
|
gk20a_dbg_info("gpc_ppc_count[%d] : %d",
|
|
gpc_index, gr->gpc_ppc_count[gpc_index]);
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
|
|
gk20a_dbg_info("gpc_skip_mask[%d] : %d",
|
|
gpc_index, gr->gpc_skip_mask[gpc_index]);
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
|
|
for (pes_index = 0;
|
|
pes_index < gr->pe_count_per_gpc;
|
|
pes_index++)
|
|
gk20a_dbg_info("pes_tpc_count[%d][%d] : %d",
|
|
pes_index, gpc_index,
|
|
gr->pes_tpc_count[pes_index][gpc_index]);
|
|
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
|
|
for (pes_index = 0;
|
|
pes_index < gr->pe_count_per_gpc;
|
|
pes_index++)
|
|
gk20a_dbg_info("pes_tpc_mask[%d][%d] : %d",
|
|
pes_index, gpc_index,
|
|
gr->pes_tpc_mask[pes_index][gpc_index]);
|
|
|
|
g->ops.gr.bundle_cb_defaults(g);
|
|
g->ops.gr.cb_size_default(g);
|
|
g->ops.gr.calc_global_ctx_buffer_size(g);
|
|
gr->timeslice_mode = gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v();
|
|
|
|
gk20a_dbg_info("bundle_cb_default_size: %d",
|
|
gr->bundle_cb_default_size);
|
|
gk20a_dbg_info("min_gpm_fifo_depth: %d", gr->min_gpm_fifo_depth);
|
|
gk20a_dbg_info("bundle_cb_token_limit: %d", gr->bundle_cb_token_limit);
|
|
gk20a_dbg_info("attrib_cb_default_size: %d",
|
|
gr->attrib_cb_default_size);
|
|
gk20a_dbg_info("attrib_cb_size: %d", gr->attrib_cb_size);
|
|
gk20a_dbg_info("alpha_cb_default_size: %d", gr->alpha_cb_default_size);
|
|
gk20a_dbg_info("alpha_cb_size: %d", gr->alpha_cb_size);
|
|
gk20a_dbg_info("timeslice_mode: %d", gr->timeslice_mode);
|
|
|
|
return 0;
|
|
|
|
clean_up:
|
|
return -ENOMEM;
|
|
}
|
|
|
|
static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr)
|
|
{
|
|
struct device *d = dev_from_gk20a(g);
|
|
dma_addr_t iova;
|
|
|
|
gr->mmu_wr_mem_size = gr->mmu_rd_mem_size = 0x1000;
|
|
|
|
gr->mmu_wr_mem.size = gr->mmu_wr_mem_size;
|
|
gr->mmu_wr_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_wr_mem_size,
|
|
&iova, GFP_KERNEL);
|
|
if (!gr->mmu_wr_mem.cpuva)
|
|
goto err;
|
|
|
|
gr->mmu_wr_mem.iova = iova;
|
|
|
|
gr->mmu_rd_mem.size = gr->mmu_rd_mem_size;
|
|
gr->mmu_rd_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_rd_mem_size,
|
|
&iova, GFP_KERNEL);
|
|
if (!gr->mmu_rd_mem.cpuva)
|
|
goto err_free_wr_mem;
|
|
|
|
gr->mmu_rd_mem.iova = iova;
|
|
return 0;
|
|
|
|
err_free_wr_mem:
|
|
dma_free_coherent(d, gr->mmu_wr_mem.size,
|
|
gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova);
|
|
gr->mmu_wr_mem.cpuva = NULL;
|
|
gr->mmu_wr_mem.iova = 0;
|
|
err:
|
|
return -ENOMEM;
|
|
}
|
|
|
|
static u32 prime_set[18] = {
|
|
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 };
|
|
|
|
static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
|
|
{
|
|
s32 comm_denom;
|
|
s32 mul_factor;
|
|
s32 *init_frac = NULL;
|
|
s32 *init_err = NULL;
|
|
s32 *run_err = NULL;
|
|
s32 *sorted_num_tpcs = NULL;
|
|
s32 *sorted_to_unsorted_gpc_map = NULL;
|
|
u32 gpc_index;
|
|
u32 gpc_mark = 0;
|
|
u32 num_tpc;
|
|
u32 max_tpc_count = 0;
|
|
u32 swap;
|
|
u32 tile_count;
|
|
u32 index;
|
|
bool delete_map = false;
|
|
bool gpc_sorted;
|
|
int ret = 0;
|
|
|
|
init_frac = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
|
|
init_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
|
|
run_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
|
|
sorted_num_tpcs =
|
|
kzalloc(proj_scal_max_gpcs_v() *
|
|
proj_scal_max_tpc_per_gpc_v() * sizeof(s32),
|
|
GFP_KERNEL);
|
|
sorted_to_unsorted_gpc_map =
|
|
kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
|
|
|
|
if (!(init_frac && init_err && run_err && sorted_num_tpcs &&
|
|
sorted_to_unsorted_gpc_map)) {
|
|
ret = -ENOMEM;
|
|
goto clean_up;
|
|
}
|
|
|
|
gr->map_row_offset = INVALID_SCREEN_TILE_ROW_OFFSET;
|
|
|
|
if (gr->tpc_count == 3)
|
|
gr->map_row_offset = 2;
|
|
else if (gr->tpc_count < 3)
|
|
gr->map_row_offset = 1;
|
|
else {
|
|
gr->map_row_offset = 3;
|
|
|
|
for (index = 1; index < 18; index++) {
|
|
u32 prime = prime_set[index];
|
|
if ((gr->tpc_count % prime) != 0) {
|
|
gr->map_row_offset = prime;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
switch (gr->tpc_count) {
|
|
case 15:
|
|
gr->map_row_offset = 6;
|
|
break;
|
|
case 14:
|
|
gr->map_row_offset = 5;
|
|
break;
|
|
case 13:
|
|
gr->map_row_offset = 2;
|
|
break;
|
|
case 11:
|
|
gr->map_row_offset = 7;
|
|
break;
|
|
case 10:
|
|
gr->map_row_offset = 6;
|
|
break;
|
|
case 7:
|
|
case 5:
|
|
gr->map_row_offset = 1;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (gr->map_tiles) {
|
|
if (gr->map_tile_count != gr->tpc_count)
|
|
delete_map = true;
|
|
|
|
for (tile_count = 0; tile_count < gr->map_tile_count; tile_count++) {
|
|
if ((u32)gr->map_tiles[tile_count] >= gr->tpc_count)
|
|
delete_map = true;
|
|
}
|
|
|
|
if (delete_map) {
|
|
kfree(gr->map_tiles);
|
|
gr->map_tiles = NULL;
|
|
gr->map_tile_count = 0;
|
|
}
|
|
}
|
|
|
|
if (gr->map_tiles == NULL) {
|
|
gr->map_tile_count = proj_scal_max_gpcs_v();
|
|
|
|
gr->map_tiles = kzalloc(proj_scal_max_gpcs_v() * sizeof(u8), GFP_KERNEL);
|
|
if (gr->map_tiles == NULL) {
|
|
ret = -ENOMEM;
|
|
goto clean_up;
|
|
}
|
|
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
|
sorted_num_tpcs[gpc_index] = gr->gpc_tpc_count[gpc_index];
|
|
sorted_to_unsorted_gpc_map[gpc_index] = gpc_index;
|
|
}
|
|
|
|
gpc_sorted = false;
|
|
while (!gpc_sorted) {
|
|
gpc_sorted = true;
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count - 1; gpc_index++) {
|
|
if (sorted_num_tpcs[gpc_index + 1] > sorted_num_tpcs[gpc_index]) {
|
|
gpc_sorted = false;
|
|
swap = sorted_num_tpcs[gpc_index];
|
|
sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1];
|
|
sorted_num_tpcs[gpc_index + 1] = swap;
|
|
swap = sorted_to_unsorted_gpc_map[gpc_index];
|
|
sorted_to_unsorted_gpc_map[gpc_index] =
|
|
sorted_to_unsorted_gpc_map[gpc_index + 1];
|
|
sorted_to_unsorted_gpc_map[gpc_index + 1] = swap;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
|
|
if (gr->gpc_tpc_count[gpc_index] > max_tpc_count)
|
|
max_tpc_count = gr->gpc_tpc_count[gpc_index];
|
|
|
|
mul_factor = gr->gpc_count * max_tpc_count;
|
|
if (mul_factor & 0x1)
|
|
mul_factor = 2;
|
|
else
|
|
mul_factor = 1;
|
|
|
|
comm_denom = gr->gpc_count * max_tpc_count * mul_factor;
|
|
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
|
num_tpc = sorted_num_tpcs[gpc_index];
|
|
|
|
init_frac[gpc_index] = num_tpc * gr->gpc_count * mul_factor;
|
|
|
|
if (num_tpc != 0)
|
|
init_err[gpc_index] = gpc_index * max_tpc_count * mul_factor - comm_denom/2;
|
|
else
|
|
init_err[gpc_index] = 0;
|
|
|
|
run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index];
|
|
}
|
|
|
|
while (gpc_mark < gr->tpc_count) {
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
|
if ((run_err[gpc_index] * 2) >= comm_denom) {
|
|
gr->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index];
|
|
run_err[gpc_index] += init_frac[gpc_index] - comm_denom;
|
|
} else
|
|
run_err[gpc_index] += init_frac[gpc_index];
|
|
}
|
|
}
|
|
}
|
|
|
|
clean_up:
|
|
kfree(init_frac);
|
|
kfree(init_err);
|
|
kfree(run_err);
|
|
kfree(sorted_num_tpcs);
|
|
kfree(sorted_to_unsorted_gpc_map);
|
|
|
|
if (ret)
|
|
gk20a_err(dev_from_gk20a(g), "fail");
|
|
else
|
|
gk20a_dbg_fn("done");
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int gr_gk20a_init_zcull(struct gk20a *g, struct gr_gk20a *gr)
|
|
{
|
|
struct gr_zcull_gk20a *zcull = &gr->zcull;
|
|
|
|
zcull->aliquot_width = gr->tpc_count * 16;
|
|
zcull->aliquot_height = 16;
|
|
|
|
zcull->width_align_pixels = gr->tpc_count * 16;
|
|
zcull->height_align_pixels = 32;
|
|
|
|
zcull->aliquot_size =
|
|
zcull->aliquot_width * zcull->aliquot_height;
|
|
|
|
/* assume no floor sweeping since we only have 1 tpc in 1 gpc */
|
|
zcull->pixel_squares_by_aliquots =
|
|
gr->zcb_count * 16 * 16 * gr->tpc_count /
|
|
(gr->gpc_count * gr->gpc_tpc_count[0]);
|
|
|
|
zcull->total_aliquots =
|
|
gr_gpc0_zcull_total_ram_size_num_aliquots_f(
|
|
gk20a_readl(g, gr_gpc0_zcull_total_ram_size_r()));
|
|
|
|
return 0;
|
|
}
|
|
|
|
u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr)
|
|
{
|
|
/* assuming gr has already been initialized */
|
|
return gr->ctx_vars.zcull_ctxsw_image_size;
|
|
}
|
|
|
|
int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
|
|
struct channel_gk20a *c, u64 zcull_va, u32 mode)
|
|
{
|
|
struct zcull_ctx_desc *zcull_ctx = &c->ch_ctx.zcull_ctx;
|
|
|
|
zcull_ctx->ctx_sw_mode = mode;
|
|
zcull_ctx->gpu_va = zcull_va;
|
|
|
|
/* TBD: don't disable channel in sw method processing */
|
|
return gr_gk20a_ctx_zcull_setup(g, c, true);
|
|
}
|
|
|
|
int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
|
|
struct gr_zcull_info *zcull_params)
|
|
{
|
|
struct gr_zcull_gk20a *zcull = &gr->zcull;
|
|
|
|
zcull_params->width_align_pixels = zcull->width_align_pixels;
|
|
zcull_params->height_align_pixels = zcull->height_align_pixels;
|
|
zcull_params->pixel_squares_by_aliquots =
|
|
zcull->pixel_squares_by_aliquots;
|
|
zcull_params->aliquot_total = zcull->total_aliquots;
|
|
|
|
zcull_params->region_byte_multiplier =
|
|
gr->gpc_count * gr_zcull_bytes_per_aliquot_per_gpu_v();
|
|
zcull_params->region_header_size =
|
|
proj_scal_litter_num_gpcs_v() *
|
|
gr_zcull_save_restore_header_bytes_per_gpc_v();
|
|
|
|
zcull_params->subregion_header_size =
|
|
proj_scal_litter_num_gpcs_v() *
|
|
gr_zcull_save_restore_subregion_header_bytes_per_gpc_v();
|
|
|
|
zcull_params->subregion_width_align_pixels =
|
|
gr->tpc_count * gr_gpc0_zcull_zcsize_width_subregion__multiple_v();
|
|
zcull_params->subregion_height_align_pixels =
|
|
gr_gpc0_zcull_zcsize_height_subregion__multiple_v();
|
|
zcull_params->subregion_count = gr_zcull_subregion_qty_v();
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
|
|
struct zbc_entry *color_val, u32 index)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
|
|
u32 i;
|
|
unsigned long end_jiffies = jiffies +
|
|
msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
|
|
u32 ret;
|
|
|
|
ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
|
|
if (ret) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to disable gr engine activity\n");
|
|
return ret;
|
|
}
|
|
|
|
ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
|
|
if (ret) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to idle graphics\n");
|
|
goto clean_up;
|
|
}
|
|
|
|
/* update l2 table */
|
|
g->ops.ltc.set_zbc_color_entry(g, color_val, index);
|
|
|
|
/* update ds table */
|
|
gk20a_writel(g, gr_ds_zbc_color_r_r(),
|
|
gr_ds_zbc_color_r_val_f(color_val->color_ds[0]));
|
|
gk20a_writel(g, gr_ds_zbc_color_g_r(),
|
|
gr_ds_zbc_color_g_val_f(color_val->color_ds[1]));
|
|
gk20a_writel(g, gr_ds_zbc_color_b_r(),
|
|
gr_ds_zbc_color_b_val_f(color_val->color_ds[2]));
|
|
gk20a_writel(g, gr_ds_zbc_color_a_r(),
|
|
gr_ds_zbc_color_a_val_f(color_val->color_ds[3]));
|
|
|
|
gk20a_writel(g, gr_ds_zbc_color_fmt_r(),
|
|
gr_ds_zbc_color_fmt_val_f(color_val->format));
|
|
|
|
gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
|
|
gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
|
|
|
|
/* trigger the write */
|
|
gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
|
|
gr_ds_zbc_tbl_ld_select_c_f() |
|
|
gr_ds_zbc_tbl_ld_action_write_f() |
|
|
gr_ds_zbc_tbl_ld_trigger_active_f());
|
|
|
|
/* update local copy */
|
|
for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
|
|
gr->zbc_col_tbl[index].color_l2[i] = color_val->color_l2[i];
|
|
gr->zbc_col_tbl[index].color_ds[i] = color_val->color_ds[i];
|
|
}
|
|
gr->zbc_col_tbl[index].format = color_val->format;
|
|
gr->zbc_col_tbl[index].ref_cnt++;
|
|
|
|
clean_up:
|
|
ret = gk20a_fifo_enable_engine_activity(g, gr_info);
|
|
if (ret) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to enable gr engine activity\n");
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
|
|
struct zbc_entry *depth_val, u32 index)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
|
|
unsigned long end_jiffies = jiffies +
|
|
msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
|
|
u32 ret;
|
|
|
|
ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
|
|
if (ret) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to disable gr engine activity\n");
|
|
return ret;
|
|
}
|
|
|
|
ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
|
|
if (ret) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to idle graphics\n");
|
|
goto clean_up;
|
|
}
|
|
|
|
/* update l2 table */
|
|
g->ops.ltc.set_zbc_depth_entry(g, depth_val, index);
|
|
|
|
/* update ds table */
|
|
gk20a_writel(g, gr_ds_zbc_z_r(),
|
|
gr_ds_zbc_z_val_f(depth_val->depth));
|
|
|
|
gk20a_writel(g, gr_ds_zbc_z_fmt_r(),
|
|
gr_ds_zbc_z_fmt_val_f(depth_val->format));
|
|
|
|
gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
|
|
gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
|
|
|
|
/* trigger the write */
|
|
gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
|
|
gr_ds_zbc_tbl_ld_select_z_f() |
|
|
gr_ds_zbc_tbl_ld_action_write_f() |
|
|
gr_ds_zbc_tbl_ld_trigger_active_f());
|
|
|
|
/* update local copy */
|
|
gr->zbc_dep_tbl[index].depth = depth_val->depth;
|
|
gr->zbc_dep_tbl[index].format = depth_val->format;
|
|
gr->zbc_dep_tbl[index].ref_cnt++;
|
|
|
|
clean_up:
|
|
ret = gk20a_fifo_enable_engine_activity(g, gr_info);
|
|
if (ret) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to enable gr engine activity\n");
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct fifo_engine_info_gk20a *gr_info =
|
|
f->engine_info + ENGINE_GR_GK20A;
|
|
unsigned long end_jiffies = jiffies +
|
|
msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
|
|
u32 ret;
|
|
|
|
ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
|
|
if (ret) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to disable gr engine activity\n");
|
|
return;
|
|
}
|
|
|
|
ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
|
|
if (ret) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to idle graphics\n");
|
|
goto clean_up;
|
|
}
|
|
|
|
/* update zbc */
|
|
gk20a_pmu_save_zbc(g, entries);
|
|
|
|
clean_up:
|
|
ret = gk20a_fifo_enable_engine_activity(g, gr_info);
|
|
if (ret) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to enable gr engine activity\n");
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
|
|
struct zbc_entry *zbc_val)
|
|
{
|
|
struct zbc_color_table *c_tbl;
|
|
struct zbc_depth_table *d_tbl;
|
|
u32 i, ret = -ENOMEM;
|
|
bool added = false;
|
|
u32 entries;
|
|
|
|
/* no endian swap ? */
|
|
|
|
mutex_lock(&gr->zbc_lock);
|
|
switch (zbc_val->type) {
|
|
case GK20A_ZBC_TYPE_COLOR:
|
|
/* search existing tables */
|
|
for (i = 0; i < gr->max_used_color_index; i++) {
|
|
|
|
c_tbl = &gr->zbc_col_tbl[i];
|
|
|
|
if (c_tbl->ref_cnt && c_tbl->format == zbc_val->format &&
|
|
memcmp(c_tbl->color_ds, zbc_val->color_ds,
|
|
sizeof(zbc_val->color_ds)) == 0) {
|
|
|
|
if (memcmp(c_tbl->color_l2, zbc_val->color_l2,
|
|
sizeof(zbc_val->color_l2))) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"zbc l2 and ds color don't match with existing entries");
|
|
ret = -EINVAL;
|
|
goto err_mutex;
|
|
}
|
|
added = true;
|
|
c_tbl->ref_cnt++;
|
|
ret = 0;
|
|
break;
|
|
}
|
|
}
|
|
/* add new table */
|
|
if (!added &&
|
|
gr->max_used_color_index < GK20A_ZBC_TABLE_SIZE) {
|
|
|
|
c_tbl =
|
|
&gr->zbc_col_tbl[gr->max_used_color_index];
|
|
WARN_ON(c_tbl->ref_cnt != 0);
|
|
|
|
ret = gr_gk20a_add_zbc_color(g, gr,
|
|
zbc_val, gr->max_used_color_index);
|
|
|
|
if (!ret)
|
|
gr->max_used_color_index++;
|
|
}
|
|
break;
|
|
case GK20A_ZBC_TYPE_DEPTH:
|
|
/* search existing tables */
|
|
for (i = 0; i < gr->max_used_depth_index; i++) {
|
|
|
|
d_tbl = &gr->zbc_dep_tbl[i];
|
|
|
|
if (d_tbl->ref_cnt &&
|
|
d_tbl->depth == zbc_val->depth &&
|
|
d_tbl->format == zbc_val->format) {
|
|
added = true;
|
|
d_tbl->ref_cnt++;
|
|
ret = 0;
|
|
break;
|
|
}
|
|
}
|
|
/* add new table */
|
|
if (!added &&
|
|
gr->max_used_depth_index < GK20A_ZBC_TABLE_SIZE) {
|
|
|
|
d_tbl =
|
|
&gr->zbc_dep_tbl[gr->max_used_depth_index];
|
|
WARN_ON(d_tbl->ref_cnt != 0);
|
|
|
|
ret = gr_gk20a_add_zbc_depth(g, gr,
|
|
zbc_val, gr->max_used_depth_index);
|
|
|
|
if (!ret)
|
|
gr->max_used_depth_index++;
|
|
}
|
|
break;
|
|
default:
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"invalid zbc table type %d", zbc_val->type);
|
|
ret = -EINVAL;
|
|
goto err_mutex;
|
|
}
|
|
|
|
if (!added && ret == 0) {
|
|
/* update zbc for elpg only when new entry is added */
|
|
entries = max(gr->max_used_color_index,
|
|
gr->max_used_depth_index);
|
|
gr_gk20a_pmu_save_zbc(g, entries);
|
|
}
|
|
|
|
err_mutex:
|
|
mutex_unlock(&gr->zbc_lock);
|
|
return ret;
|
|
}
|
|
|
|
/* get a zbc table entry specified by index
|
|
* return table size when type is invalid */
|
|
int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
|
|
struct zbc_query_params *query_params)
|
|
{
|
|
u32 index = query_params->index_size;
|
|
u32 i;
|
|
|
|
switch (query_params->type) {
|
|
case GK20A_ZBC_TYPE_INVALID:
|
|
query_params->index_size = GK20A_ZBC_TABLE_SIZE;
|
|
break;
|
|
case GK20A_ZBC_TYPE_COLOR:
|
|
if (index >= GK20A_ZBC_TABLE_SIZE) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"invalid zbc color table index\n");
|
|
return -EINVAL;
|
|
}
|
|
for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
|
|
query_params->color_l2[i] =
|
|
gr->zbc_col_tbl[index].color_l2[i];
|
|
query_params->color_ds[i] =
|
|
gr->zbc_col_tbl[index].color_ds[i];
|
|
}
|
|
query_params->format = gr->zbc_col_tbl[index].format;
|
|
query_params->ref_cnt = gr->zbc_col_tbl[index].ref_cnt;
|
|
break;
|
|
case GK20A_ZBC_TYPE_DEPTH:
|
|
if (index >= GK20A_ZBC_TABLE_SIZE) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"invalid zbc depth table index\n");
|
|
return -EINVAL;
|
|
}
|
|
query_params->depth = gr->zbc_dep_tbl[index].depth;
|
|
query_params->format = gr->zbc_dep_tbl[index].format;
|
|
query_params->ref_cnt = gr->zbc_dep_tbl[index].ref_cnt;
|
|
break;
|
|
default:
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"invalid zbc table type\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int gr_gk20a_load_zbc_table(struct gk20a *g, struct gr_gk20a *gr)
|
|
{
|
|
int i, ret;
|
|
|
|
mutex_init(&gr->zbc_lock);
|
|
for (i = 0; i < gr->max_used_color_index; i++) {
|
|
struct zbc_color_table *c_tbl = &gr->zbc_col_tbl[i];
|
|
struct zbc_entry zbc_val;
|
|
|
|
zbc_val.type = GK20A_ZBC_TYPE_COLOR;
|
|
memcpy(zbc_val.color_ds,
|
|
c_tbl->color_ds, sizeof(zbc_val.color_ds));
|
|
memcpy(zbc_val.color_l2,
|
|
c_tbl->color_l2, sizeof(zbc_val.color_l2));
|
|
zbc_val.format = c_tbl->format;
|
|
|
|
ret = gr_gk20a_add_zbc_color(g, gr, &zbc_val, i);
|
|
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
for (i = 0; i < gr->max_used_depth_index; i++) {
|
|
struct zbc_depth_table *d_tbl = &gr->zbc_dep_tbl[i];
|
|
struct zbc_entry zbc_val;
|
|
|
|
zbc_val.type = GK20A_ZBC_TYPE_DEPTH;
|
|
zbc_val.depth = d_tbl->depth;
|
|
zbc_val.format = d_tbl->format;
|
|
|
|
ret = gr_gk20a_add_zbc_depth(g, gr, &zbc_val, i);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr)
|
|
{
|
|
struct zbc_entry zbc_val;
|
|
u32 i, err;
|
|
|
|
/* load default color table */
|
|
zbc_val.type = GK20A_ZBC_TYPE_COLOR;
|
|
|
|
zbc_val.format = gr_ds_zbc_color_fmt_val_zero_v();
|
|
for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
|
|
zbc_val.color_ds[i] = 0;
|
|
zbc_val.color_l2[i] = 0;
|
|
}
|
|
err = gr_gk20a_add_zbc(g, gr, &zbc_val);
|
|
|
|
zbc_val.format = gr_ds_zbc_color_fmt_val_unorm_one_v();
|
|
for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
|
|
zbc_val.color_ds[i] = 0xffffffff;
|
|
zbc_val.color_l2[i] = 0x3f800000;
|
|
}
|
|
err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
|
|
|
|
zbc_val.format = gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v();
|
|
for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
|
|
zbc_val.color_ds[i] = 0;
|
|
zbc_val.color_l2[i] = 0;
|
|
}
|
|
err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
|
|
|
|
zbc_val.format = gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v();
|
|
for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
|
|
zbc_val.color_ds[i] = 0x3f800000;
|
|
zbc_val.color_l2[i] = 0x3f800000;
|
|
}
|
|
err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
|
|
|
|
if (!err)
|
|
gr->max_default_color_index = 4;
|
|
else {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"fail to load default zbc color table\n");
|
|
return err;
|
|
}
|
|
|
|
/* load default depth table */
|
|
zbc_val.type = GK20A_ZBC_TYPE_DEPTH;
|
|
|
|
zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v();
|
|
zbc_val.depth = 0;
|
|
err = gr_gk20a_add_zbc(g, gr, &zbc_val);
|
|
|
|
zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v();
|
|
zbc_val.depth = 0x3f800000;
|
|
err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
|
|
|
|
if (!err)
|
|
gr->max_default_depth_index = 2;
|
|
else {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"fail to load default zbc depth table\n");
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
|
|
struct zbc_entry *zbc_val)
|
|
{
|
|
gk20a_dbg_fn("");
|
|
|
|
return gr_gk20a_elpg_protected_call(g,
|
|
gr_gk20a_add_zbc(g, gr, zbc_val));
|
|
}
|
|
|
|
void gr_gk20a_init_blcg_mode(struct gk20a *g, u32 mode, u32 engine)
|
|
{
|
|
u32 gate_ctrl;
|
|
|
|
gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine));
|
|
|
|
switch (mode) {
|
|
case BLCG_RUN:
|
|
gate_ctrl = set_field(gate_ctrl,
|
|
therm_gate_ctrl_blk_clk_m(),
|
|
therm_gate_ctrl_blk_clk_run_f());
|
|
break;
|
|
case BLCG_AUTO:
|
|
gate_ctrl = set_field(gate_ctrl,
|
|
therm_gate_ctrl_blk_clk_m(),
|
|
therm_gate_ctrl_blk_clk_auto_f());
|
|
break;
|
|
default:
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"invalid blcg mode %d", mode);
|
|
return;
|
|
}
|
|
|
|
gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl);
|
|
}
|
|
|
|
void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine)
|
|
{
|
|
u32 gate_ctrl, idle_filter;
|
|
|
|
gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine));
|
|
|
|
switch (mode) {
|
|
case ELCG_RUN:
|
|
gate_ctrl = set_field(gate_ctrl,
|
|
therm_gate_ctrl_eng_clk_m(),
|
|
therm_gate_ctrl_eng_clk_run_f());
|
|
gate_ctrl = set_field(gate_ctrl,
|
|
therm_gate_ctrl_eng_pwr_m(),
|
|
/* set elpg to auto to meet hw expectation */
|
|
therm_gate_ctrl_eng_pwr_auto_f());
|
|
break;
|
|
case ELCG_STOP:
|
|
gate_ctrl = set_field(gate_ctrl,
|
|
therm_gate_ctrl_eng_clk_m(),
|
|
therm_gate_ctrl_eng_clk_stop_f());
|
|
break;
|
|
case ELCG_AUTO:
|
|
gate_ctrl = set_field(gate_ctrl,
|
|
therm_gate_ctrl_eng_clk_m(),
|
|
therm_gate_ctrl_eng_clk_auto_f());
|
|
break;
|
|
default:
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"invalid elcg mode %d", mode);
|
|
}
|
|
|
|
if (tegra_platform_is_linsim()) {
|
|
gate_ctrl = set_field(gate_ctrl,
|
|
therm_gate_ctrl_eng_delay_after_m(),
|
|
therm_gate_ctrl_eng_delay_after_f(4));
|
|
}
|
|
|
|
/* 2 * (1 << 9) = 1024 clks */
|
|
gate_ctrl = set_field(gate_ctrl,
|
|
therm_gate_ctrl_eng_idle_filt_exp_m(),
|
|
therm_gate_ctrl_eng_idle_filt_exp_f(9));
|
|
gate_ctrl = set_field(gate_ctrl,
|
|
therm_gate_ctrl_eng_idle_filt_mant_m(),
|
|
therm_gate_ctrl_eng_idle_filt_mant_f(2));
|
|
gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl);
|
|
|
|
/* default fecs_idle_filter to 0 */
|
|
idle_filter = gk20a_readl(g, therm_fecs_idle_filter_r());
|
|
idle_filter &= ~therm_fecs_idle_filter_value_m();
|
|
gk20a_writel(g, therm_fecs_idle_filter_r(), idle_filter);
|
|
/* default hubmmu_idle_filter to 0 */
|
|
idle_filter = gk20a_readl(g, therm_hubmmu_idle_filter_r());
|
|
idle_filter &= ~therm_hubmmu_idle_filter_value_m();
|
|
gk20a_writel(g, therm_hubmmu_idle_filter_r(), idle_filter);
|
|
}
|
|
|
|
static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
|
|
{
|
|
u32 gpc_index, gpc_tpc_count, gpc_zcull_count;
|
|
u32 *zcull_map_tiles, *zcull_bank_counters;
|
|
u32 map_counter;
|
|
u32 rcp_conserv;
|
|
u32 offset;
|
|
bool floorsweep = false;
|
|
|
|
if (!gr->map_tiles)
|
|
return -1;
|
|
|
|
zcull_map_tiles = kzalloc(proj_scal_max_gpcs_v() *
|
|
proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL);
|
|
if (!zcull_map_tiles) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to allocate zcull temp buffers");
|
|
return -ENOMEM;
|
|
}
|
|
zcull_bank_counters = kzalloc(proj_scal_max_gpcs_v() *
|
|
proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL);
|
|
|
|
if (!zcull_bank_counters) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to allocate zcull temp buffers");
|
|
kfree(zcull_map_tiles);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
for (map_counter = 0; map_counter < gr->tpc_count; map_counter++) {
|
|
zcull_map_tiles[map_counter] =
|
|
zcull_bank_counters[gr->map_tiles[map_counter]];
|
|
zcull_bank_counters[gr->map_tiles[map_counter]]++;
|
|
}
|
|
|
|
gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(),
|
|
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(zcull_map_tiles[0]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(zcull_map_tiles[1]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(zcull_map_tiles[2]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(zcull_map_tiles[3]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(zcull_map_tiles[4]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(zcull_map_tiles[5]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(zcull_map_tiles[6]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(zcull_map_tiles[7]));
|
|
|
|
gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(),
|
|
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(zcull_map_tiles[8]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(zcull_map_tiles[9]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(zcull_map_tiles[10]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(zcull_map_tiles[11]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(zcull_map_tiles[12]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(zcull_map_tiles[13]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(zcull_map_tiles[14]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(zcull_map_tiles[15]));
|
|
|
|
gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(),
|
|
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(zcull_map_tiles[16]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(zcull_map_tiles[17]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(zcull_map_tiles[18]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(zcull_map_tiles[19]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(zcull_map_tiles[20]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(zcull_map_tiles[21]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(zcull_map_tiles[22]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(zcull_map_tiles[23]));
|
|
|
|
gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(),
|
|
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(zcull_map_tiles[24]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(zcull_map_tiles[25]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(zcull_map_tiles[26]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(zcull_map_tiles[27]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(zcull_map_tiles[28]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(zcull_map_tiles[29]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(zcull_map_tiles[30]) |
|
|
gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(zcull_map_tiles[31]));
|
|
|
|
kfree(zcull_map_tiles);
|
|
kfree(zcull_bank_counters);
|
|
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
|
gpc_tpc_count = gr->gpc_tpc_count[gpc_index];
|
|
gpc_zcull_count = gr->gpc_zcb_count[gpc_index];
|
|
|
|
if (gpc_zcull_count != gr->max_zcull_per_gpc_count &&
|
|
gpc_zcull_count < gpc_tpc_count) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"zcull_banks (%d) less than tpcs (%d) for gpc (%d)",
|
|
gpc_zcull_count, gpc_tpc_count, gpc_index);
|
|
return -EINVAL;
|
|
}
|
|
if (gpc_zcull_count != gr->max_zcull_per_gpc_count &&
|
|
gpc_zcull_count != 0)
|
|
floorsweep = true;
|
|
}
|
|
|
|
/* 1.0f / 1.0f * gr_gpc0_zcull_sm_num_rcp_conservative__max_v() */
|
|
rcp_conserv = gr_gpc0_zcull_sm_num_rcp_conservative__max_v();
|
|
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
|
offset = gpc_index * proj_gpc_stride_v();
|
|
|
|
if (floorsweep) {
|
|
gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
|
|
gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) |
|
|
gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
|
|
gr->max_zcull_per_gpc_count));
|
|
} else {
|
|
gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
|
|
gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) |
|
|
gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
|
|
gr->gpc_tpc_count[gpc_index]));
|
|
}
|
|
|
|
gk20a_writel(g, gr_gpc0_zcull_fs_r() + offset,
|
|
gr_gpc0_zcull_fs_num_active_banks_f(gr->gpc_zcb_count[gpc_index]) |
|
|
gr_gpc0_zcull_fs_num_sms_f(gr->tpc_count));
|
|
|
|
gk20a_writel(g, gr_gpc0_zcull_sm_num_rcp_r() + offset,
|
|
gr_gpc0_zcull_sm_num_rcp_conservative_f(rcp_conserv));
|
|
}
|
|
|
|
gk20a_writel(g, gr_gpcs_ppcs_wwdx_sm_num_rcp_r(),
|
|
gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(rcp_conserv));
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void gk20a_gr_enable_gpc_exceptions(struct gk20a *g)
|
|
{
|
|
/* enable tpc exception forwarding */
|
|
gk20a_writel(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r(),
|
|
gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f());
|
|
|
|
/* enable gpc exception forwarding */
|
|
gk20a_writel(g, gr_gpc0_gpccs_gpc_exception_en_r(),
|
|
gr_gpc0_gpccs_gpc_exception_en_tpc_0_enabled_f());
|
|
}
|
|
|
|
|
|
void gr_gk20a_enable_hww_exceptions(struct gk20a *g)
|
|
{
|
|
/* enable exceptions */
|
|
gk20a_writel(g, gr_fe_hww_esr_r(),
|
|
gr_fe_hww_esr_en_enable_f() |
|
|
gr_fe_hww_esr_reset_active_f());
|
|
gk20a_writel(g, gr_memfmt_hww_esr_r(),
|
|
gr_memfmt_hww_esr_en_enable_f() |
|
|
gr_memfmt_hww_esr_reset_active_f());
|
|
gk20a_writel(g, gr_scc_hww_esr_r(),
|
|
gr_scc_hww_esr_en_enable_f() |
|
|
gr_scc_hww_esr_reset_active_f());
|
|
gk20a_writel(g, gr_mme_hww_esr_r(),
|
|
gr_mme_hww_esr_en_enable_f() |
|
|
gr_mme_hww_esr_reset_active_f());
|
|
gk20a_writel(g, gr_pd_hww_esr_r(),
|
|
gr_pd_hww_esr_en_enable_f() |
|
|
gr_pd_hww_esr_reset_active_f());
|
|
gk20a_writel(g, gr_sked_hww_esr_r(), /* enabled by default */
|
|
gr_sked_hww_esr_reset_active_f());
|
|
gk20a_writel(g, gr_ds_hww_esr_r(),
|
|
gr_ds_hww_esr_en_enabled_f() |
|
|
gr_ds_hww_esr_reset_task_f());
|
|
gk20a_writel(g, gr_ds_hww_report_mask_r(),
|
|
gr_ds_hww_report_mask_sph0_err_report_f() |
|
|
gr_ds_hww_report_mask_sph1_err_report_f() |
|
|
gr_ds_hww_report_mask_sph2_err_report_f() |
|
|
gr_ds_hww_report_mask_sph3_err_report_f() |
|
|
gr_ds_hww_report_mask_sph4_err_report_f() |
|
|
gr_ds_hww_report_mask_sph5_err_report_f() |
|
|
gr_ds_hww_report_mask_sph6_err_report_f() |
|
|
gr_ds_hww_report_mask_sph7_err_report_f() |
|
|
gr_ds_hww_report_mask_sph8_err_report_f() |
|
|
gr_ds_hww_report_mask_sph9_err_report_f() |
|
|
gr_ds_hww_report_mask_sph10_err_report_f() |
|
|
gr_ds_hww_report_mask_sph11_err_report_f() |
|
|
gr_ds_hww_report_mask_sph12_err_report_f() |
|
|
gr_ds_hww_report_mask_sph13_err_report_f() |
|
|
gr_ds_hww_report_mask_sph14_err_report_f() |
|
|
gr_ds_hww_report_mask_sph15_err_report_f() |
|
|
gr_ds_hww_report_mask_sph16_err_report_f() |
|
|
gr_ds_hww_report_mask_sph17_err_report_f() |
|
|
gr_ds_hww_report_mask_sph18_err_report_f() |
|
|
gr_ds_hww_report_mask_sph19_err_report_f() |
|
|
gr_ds_hww_report_mask_sph20_err_report_f() |
|
|
gr_ds_hww_report_mask_sph21_err_report_f() |
|
|
gr_ds_hww_report_mask_sph22_err_report_f() |
|
|
gr_ds_hww_report_mask_sph23_err_report_f());
|
|
}
|
|
|
|
static void gr_gk20a_set_hww_esr_report_mask(struct gk20a *g)
|
|
{
|
|
/* setup sm warp esr report masks */
|
|
gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f());
|
|
|
|
/* setup sm global esr report mask */
|
|
gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(),
|
|
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f());
|
|
}
|
|
|
|
static int gk20a_init_gr_setup_hw(struct gk20a *g)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;
|
|
struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;
|
|
u32 data;
|
|
u32 addr_lo, addr_hi;
|
|
u64 addr;
|
|
unsigned long end_jiffies = jiffies +
|
|
msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
|
|
u32 fe_go_idle_timeout_save;
|
|
u32 last_method_data = 0;
|
|
u32 i, err;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (g->ops.gr.init_gpc_mmu)
|
|
g->ops.gr.init_gpc_mmu(g);
|
|
|
|
/* slcg prod values */
|
|
g->ops.clock_gating.slcg_gr_load_gating_prod(g, g->slcg_enabled);
|
|
g->ops.clock_gating.slcg_perf_load_gating_prod(g, g->slcg_enabled);
|
|
|
|
/* init mmu debug buffer */
|
|
addr = NV_MC_SMMU_VADDR_TRANSLATE(gr->mmu_wr_mem.iova);
|
|
addr_lo = u64_lo32(addr);
|
|
addr_hi = u64_hi32(addr);
|
|
addr = (addr_lo >> fb_mmu_debug_wr_addr_alignment_v()) |
|
|
(addr_hi << (32 - fb_mmu_debug_wr_addr_alignment_v()));
|
|
|
|
gk20a_writel(g, fb_mmu_debug_wr_r(),
|
|
fb_mmu_debug_wr_aperture_vid_mem_f() |
|
|
fb_mmu_debug_wr_vol_false_f() |
|
|
fb_mmu_debug_wr_addr_v(addr));
|
|
|
|
addr = NV_MC_SMMU_VADDR_TRANSLATE(gr->mmu_rd_mem.iova);
|
|
addr_lo = u64_lo32(addr);
|
|
addr_hi = u64_hi32(addr);
|
|
addr = (addr_lo >> fb_mmu_debug_rd_addr_alignment_v()) |
|
|
(addr_hi << (32 - fb_mmu_debug_rd_addr_alignment_v()));
|
|
|
|
gk20a_writel(g, fb_mmu_debug_rd_r(),
|
|
fb_mmu_debug_rd_aperture_vid_mem_f() |
|
|
fb_mmu_debug_rd_vol_false_f() |
|
|
fb_mmu_debug_rd_addr_v(addr));
|
|
|
|
/* load gr floorsweeping registers */
|
|
data = gk20a_readl(g, gr_gpc0_ppc0_pes_vsc_strem_r());
|
|
data = set_field(data, gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(),
|
|
gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f());
|
|
gk20a_writel(g, gr_gpc0_ppc0_pes_vsc_strem_r(), data);
|
|
|
|
gr_gk20a_zcull_init_hw(g, gr);
|
|
|
|
g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled);
|
|
g->ops.clock_gating.pg_gr_load_gating_prod(g, true);
|
|
|
|
if (g->elcg_enabled) {
|
|
gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A);
|
|
gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A);
|
|
} else {
|
|
gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A);
|
|
gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A);
|
|
}
|
|
|
|
/* Bug 1340570: increase the clock timeout to avoid potential
|
|
* operation failure at high gpcclk rate. Default values are 0x400.
|
|
*/
|
|
gk20a_writel(g, pri_ringstation_sys_master_config_r(0x15), 0x800);
|
|
gk20a_writel(g, pri_ringstation_gpc_master_config_r(0xa), 0x800);
|
|
gk20a_writel(g, pri_ringstation_fbp_master_config_r(0x8), 0x800);
|
|
|
|
/* enable fifo access */
|
|
gk20a_writel(g, gr_gpfifo_ctl_r(),
|
|
gr_gpfifo_ctl_access_enabled_f() |
|
|
gr_gpfifo_ctl_semaphore_access_enabled_f());
|
|
|
|
/* TBD: reload gr ucode when needed */
|
|
|
|
/* enable interrupts */
|
|
gk20a_writel(g, gr_intr_r(), 0xFFFFFFFF);
|
|
gk20a_writel(g, gr_intr_en_r(), 0xFFFFFFFF);
|
|
|
|
/* enable fecs error interrupts */
|
|
gk20a_writel(g, gr_fecs_host_int_enable_r(),
|
|
gr_fecs_host_int_enable_fault_during_ctxsw_enable_f() |
|
|
gr_fecs_host_int_enable_umimp_firmware_method_enable_f() |
|
|
gr_fecs_host_int_enable_umimp_illegal_method_enable_f() |
|
|
gr_fecs_host_int_enable_watchdog_enable_f());
|
|
|
|
g->ops.gr.enable_hww_exceptions(g);
|
|
g->ops.gr.set_hww_esr_report_mask(g);
|
|
|
|
/* enable per GPC exceptions */
|
|
gk20a_gr_enable_gpc_exceptions(g);
|
|
|
|
/* TBD: ECC for L1/SM */
|
|
/* TBD: enable per BE exceptions */
|
|
|
|
/* reset and enable all exceptions */
|
|
gk20a_writel(g, gr_exception_r(), 0xFFFFFFFF);
|
|
gk20a_writel(g, gr_exception_en_r(), 0xFFFFFFFF);
|
|
gk20a_writel(g, gr_exception1_r(), 0xFFFFFFFF);
|
|
gk20a_writel(g, gr_exception1_en_r(), 0xFFFFFFFF);
|
|
gk20a_writel(g, gr_exception2_r(), 0xFFFFFFFF);
|
|
gk20a_writel(g, gr_exception2_en_r(), 0xFFFFFFFF);
|
|
|
|
/* ignore status from some units */
|
|
data = gk20a_readl(g, gr_status_mask_r());
|
|
gk20a_writel(g, gr_status_mask_r(), data & gr->status_disable_mask);
|
|
|
|
if (gr->sw_ready)
|
|
gr_gk20a_load_zbc_table(g, gr);
|
|
else
|
|
gr_gk20a_load_zbc_default_table(g, gr);
|
|
|
|
g->ops.ltc.init_cbc(g, gr);
|
|
|
|
/* load ctx init */
|
|
for (i = 0; i < sw_ctx_load->count; i++)
|
|
gk20a_writel(g, sw_ctx_load->l[i].addr,
|
|
sw_ctx_load->l[i].value);
|
|
|
|
err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
|
|
if (err)
|
|
goto out;
|
|
|
|
/* save and disable fe_go_idle */
|
|
fe_go_idle_timeout_save =
|
|
gk20a_readl(g, gr_fe_go_idle_timeout_r());
|
|
gk20a_writel(g, gr_fe_go_idle_timeout_r(),
|
|
(fe_go_idle_timeout_save & gr_fe_go_idle_timeout_count_f(0)) |
|
|
gr_fe_go_idle_timeout_count_disabled_f());
|
|
|
|
/* override a few ctx state registers */
|
|
g->ops.gr.commit_global_cb_manager(g, NULL, false);
|
|
gr_gk20a_commit_global_timeslice(g, NULL, false);
|
|
|
|
/* floorsweep anything left */
|
|
g->ops.gr.init_fs_state(g);
|
|
|
|
err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
|
|
if (err)
|
|
goto restore_fe_go_idle;
|
|
|
|
restore_fe_go_idle:
|
|
/* restore fe_go_idle */
|
|
gk20a_writel(g, gr_fe_go_idle_timeout_r(), fe_go_idle_timeout_save);
|
|
|
|
if (err || gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT))
|
|
goto out;
|
|
|
|
/* load method init */
|
|
if (sw_method_init->count) {
|
|
gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
|
|
sw_method_init->l[0].value);
|
|
gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
|
|
gr_pri_mme_shadow_raw_index_write_trigger_f() |
|
|
sw_method_init->l[0].addr);
|
|
last_method_data = sw_method_init->l[0].value;
|
|
}
|
|
for (i = 1; i < sw_method_init->count; i++) {
|
|
if (sw_method_init->l[i].value != last_method_data) {
|
|
gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
|
|
sw_method_init->l[i].value);
|
|
last_method_data = sw_method_init->l[i].value;
|
|
}
|
|
gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
|
|
gr_pri_mme_shadow_raw_index_write_trigger_f() |
|
|
sw_method_init->l[i].addr);
|
|
}
|
|
|
|
err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
|
|
if (err)
|
|
goto out;
|
|
|
|
out:
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_init_gr_prepare(struct gk20a *g)
|
|
{
|
|
u32 gpfifo_ctrl, pmc_en;
|
|
u32 err = 0;
|
|
|
|
/* disable fifo access */
|
|
pmc_en = gk20a_readl(g, mc_enable_r());
|
|
if (pmc_en & mc_enable_pgraph_enabled_f()) {
|
|
gpfifo_ctrl = gk20a_readl(g, gr_gpfifo_ctl_r());
|
|
gpfifo_ctrl &= ~gr_gpfifo_ctl_access_enabled_f();
|
|
gk20a_writel(g, gr_gpfifo_ctl_r(), gpfifo_ctrl);
|
|
}
|
|
|
|
/* reset gr engine */
|
|
gk20a_reset(g, mc_enable_pgraph_enabled_f()
|
|
| mc_enable_blg_enabled_f()
|
|
| mc_enable_perfmon_enabled_f());
|
|
|
|
/* enable fifo access */
|
|
gk20a_writel(g, gr_gpfifo_ctl_r(),
|
|
gr_gpfifo_ctl_access_enabled_f() |
|
|
gr_gpfifo_ctl_semaphore_access_enabled_f());
|
|
|
|
if (!g->gr.ctx_vars.valid) {
|
|
err = gr_gk20a_init_ctx_vars(g, &g->gr);
|
|
if (err)
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"fail to load gr init ctx");
|
|
}
|
|
return err;
|
|
}
|
|
|
|
static int gr_gk20a_wait_mem_scrubbing(struct gk20a *g)
|
|
{
|
|
int retries = GR_IDLE_CHECK_MAX / GR_IDLE_CHECK_DEFAULT;
|
|
bool fecs_scrubbing;
|
|
bool gpccs_scrubbing;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
do {
|
|
fecs_scrubbing = gk20a_readl(g, gr_fecs_dmactl_r()) &
|
|
(gr_fecs_dmactl_imem_scrubbing_m() |
|
|
gr_fecs_dmactl_dmem_scrubbing_m());
|
|
|
|
gpccs_scrubbing = gk20a_readl(g, gr_gpccs_dmactl_r()) &
|
|
(gr_gpccs_dmactl_imem_scrubbing_m() |
|
|
gr_gpccs_dmactl_imem_scrubbing_m());
|
|
|
|
if (!fecs_scrubbing && !gpccs_scrubbing) {
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
}
|
|
|
|
udelay(GR_IDLE_CHECK_DEFAULT);
|
|
} while (--retries || !tegra_platform_is_silicon());
|
|
|
|
gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout");
|
|
return -ETIMEDOUT;
|
|
}
|
|
|
|
static int gk20a_init_gr_reset_enable_hw(struct gk20a *g)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
struct av_list_gk20a *sw_non_ctx_load = &g->gr.ctx_vars.sw_non_ctx_load;
|
|
unsigned long end_jiffies = jiffies +
|
|
msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
|
|
u32 i, err = 0;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
/* enable interrupts */
|
|
gk20a_writel(g, gr_intr_r(), ~0);
|
|
gk20a_writel(g, gr_intr_en_r(), ~0);
|
|
|
|
/* reset ctx switch state */
|
|
gr_gk20a_ctx_reset(g, 0);
|
|
|
|
/* clear scc ram */
|
|
gk20a_writel(g, gr_scc_init_r(),
|
|
gr_scc_init_ram_trigger_f());
|
|
|
|
/* load non_ctx init */
|
|
for (i = 0; i < sw_non_ctx_load->count; i++)
|
|
gk20a_writel(g, sw_non_ctx_load->l[i].addr,
|
|
sw_non_ctx_load->l[i].value);
|
|
|
|
err = gr_gk20a_wait_mem_scrubbing(g);
|
|
if (err)
|
|
goto out;
|
|
|
|
err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
|
|
if (err)
|
|
goto out;
|
|
|
|
err = gr_gk20a_load_ctxsw_ucode(g, gr);
|
|
if (err)
|
|
goto out;
|
|
|
|
/* this appears query for sw states but fecs actually init
|
|
ramchain, etc so this is hw init */
|
|
err = gr_gk20a_init_ctx_state(g, gr);
|
|
if (err)
|
|
goto out;
|
|
|
|
out:
|
|
if (err)
|
|
gk20a_err(dev_from_gk20a(g), "fail");
|
|
else
|
|
gk20a_dbg_fn("done");
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* XXX Merge this list with the debugger/profiler
|
|
* session regops whitelists?
|
|
*/
|
|
static u32 wl_addr_gk20a[] = {
|
|
/* this list must be sorted (low to high) */
|
|
0x404468, /* gr_pri_mme_max_instructions */
|
|
0x408944, /* gr_pri_bes_crop_hww_esr */
|
|
0x418800, /* gr_pri_gpcs_setup_debug */
|
|
0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */
|
|
0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */
|
|
0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */
|
|
0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */
|
|
};
|
|
|
|
static int gr_gk20a_init_access_map(struct gk20a *g)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
void *data;
|
|
int err = 0;
|
|
u32 w, nr_pages =
|
|
DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size,
|
|
PAGE_SIZE);
|
|
|
|
data = vmap(gr->global_ctx_buffer[PRIV_ACCESS_MAP].pages,
|
|
PAGE_ALIGN(gr->global_ctx_buffer[PRIV_ACCESS_MAP].size) >>
|
|
PAGE_SHIFT, 0, pgprot_dmacoherent(PAGE_KERNEL));
|
|
if (!data) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to map priv access map memory");
|
|
err = -ENOMEM;
|
|
goto clean_up;
|
|
}
|
|
|
|
memset(data, 0x0, PAGE_SIZE * nr_pages);
|
|
|
|
for (w = 0; w < ARRAY_SIZE(wl_addr_gk20a); w++) {
|
|
u32 map_bit, map_byte, map_shift;
|
|
map_bit = wl_addr_gk20a[w] >> 2;
|
|
map_byte = map_bit >> 3;
|
|
map_shift = map_bit & 0x7; /* i.e. 0-7 */
|
|
gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d",
|
|
wl_addr_gk20a[w], map_byte, map_shift);
|
|
((u8 *)data)[map_byte] |= 1 << map_shift;
|
|
}
|
|
|
|
clean_up:
|
|
if (data)
|
|
vunmap(data);
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_init_gr_setup_sw(struct gk20a *g)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
int err;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (gr->sw_ready) {
|
|
gk20a_dbg_fn("skip init");
|
|
return 0;
|
|
}
|
|
|
|
gr->g = g;
|
|
|
|
err = gr_gk20a_init_gr_config(g, gr);
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
err = gr_gk20a_init_mmu_sw(g, gr);
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
err = gr_gk20a_init_map_tiles(g, gr);
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
if (tegra_cpu_is_asim())
|
|
gr->max_comptag_mem = 1; /* MBs worth of comptag coverage */
|
|
else {
|
|
gk20a_dbg_info("total ram pages : %lu", totalram_pages);
|
|
gr->max_comptag_mem = totalram_pages
|
|
>> (10 - (PAGE_SHIFT - 10));
|
|
}
|
|
err = g->ops.ltc.init_comptags(g, gr);
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
err = gr_gk20a_init_zcull(g, gr);
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
err = gr_gk20a_alloc_global_ctx_buffers(g);
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
err = gr_gk20a_init_access_map(g);
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
mutex_init(&gr->ctx_mutex);
|
|
spin_lock_init(&gr->ch_tlb_lock);
|
|
|
|
gr->remove_support = gk20a_remove_gr_support;
|
|
gr->sw_ready = true;
|
|
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
|
|
clean_up:
|
|
gk20a_err(dev_from_gk20a(g), "fail");
|
|
gk20a_remove_gr_support(gr);
|
|
return err;
|
|
}
|
|
|
|
int gk20a_init_gr_support(struct gk20a *g)
|
|
{
|
|
u32 err;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
err = gk20a_init_gr_prepare(g);
|
|
if (err)
|
|
return err;
|
|
|
|
/* this is required before gr_gk20a_init_ctx_state */
|
|
mutex_init(&g->gr.fecs_mutex);
|
|
|
|
err = gk20a_init_gr_reset_enable_hw(g);
|
|
if (err)
|
|
return err;
|
|
|
|
err = gk20a_init_gr_setup_sw(g);
|
|
if (err)
|
|
return err;
|
|
|
|
err = gk20a_init_gr_setup_hw(g);
|
|
if (err)
|
|
return err;
|
|
|
|
/* GR is inialized, signal possible waiters */
|
|
g->gr.initialized = true;
|
|
wake_up(&g->gr.init_wq);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Wait until GR is initialized */
|
|
void gk20a_gr_wait_initialized(struct gk20a *g)
|
|
{
|
|
wait_event(g->gr.init_wq, g->gr.initialized);
|
|
}
|
|
|
|
#define NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE 0x02dc
|
|
#define NVA297_SET_CIRCULAR_BUFFER_SIZE 0x1280
|
|
#define NVA297_SET_SHADER_EXCEPTIONS 0x1528
|
|
#define NVA0C0_SET_SHADER_EXCEPTIONS 0x1528
|
|
|
|
#define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0
|
|
|
|
struct gr_isr_data {
|
|
u32 addr;
|
|
u32 data_lo;
|
|
u32 data_hi;
|
|
u32 curr_ctx;
|
|
u32 chid;
|
|
u32 offset;
|
|
u32 sub_chan;
|
|
u32 class_num;
|
|
};
|
|
|
|
void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data)
|
|
{
|
|
gk20a_dbg_fn("");
|
|
|
|
if (data == NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE) {
|
|
gk20a_writel(g,
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), 0);
|
|
gk20a_writel(g,
|
|
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(), 0);
|
|
} else {
|
|
/* setup sm warp esr report masks */
|
|
gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f());
|
|
|
|
/* setup sm global esr report mask */
|
|
gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(),
|
|
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f() |
|
|
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f());
|
|
}
|
|
}
|
|
|
|
static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
u32 gpc_index, ppc_index, stride, val, offset;
|
|
u32 cb_size = data * 4;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (cb_size > gr->attrib_cb_size)
|
|
cb_size = gr->attrib_cb_size;
|
|
|
|
gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
|
|
(gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
|
|
~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) |
|
|
gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size));
|
|
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
|
stride = proj_gpc_stride_v() * gpc_index;
|
|
|
|
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
|
|
ppc_index++) {
|
|
|
|
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg_r() +
|
|
stride +
|
|
proj_ppc_in_gpc_stride_v() * ppc_index);
|
|
|
|
offset = gr_gpc0_ppc0_cbm_cfg_start_offset_v(val);
|
|
|
|
val = set_field(val,
|
|
gr_gpc0_ppc0_cbm_cfg_size_m(),
|
|
gr_gpc0_ppc0_cbm_cfg_size_f(cb_size *
|
|
gr->pes_tpc_count[ppc_index][gpc_index]));
|
|
val = set_field(val,
|
|
gr_gpc0_ppc0_cbm_cfg_start_offset_m(),
|
|
(offset + 1));
|
|
|
|
gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() +
|
|
stride +
|
|
proj_ppc_in_gpc_stride_v() * ppc_index, val);
|
|
|
|
val = set_field(val,
|
|
gr_gpc0_ppc0_cbm_cfg_start_offset_m(),
|
|
offset);
|
|
|
|
gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() +
|
|
stride +
|
|
proj_ppc_in_gpc_stride_v() * ppc_index, val);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
u32 gpc_index, ppc_index, stride, val;
|
|
u32 pd_ab_max_output;
|
|
u32 alpha_cb_size = data * 4;
|
|
|
|
gk20a_dbg_fn("");
|
|
/* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF)
|
|
return; */
|
|
|
|
if (alpha_cb_size > gr->alpha_cb_size)
|
|
alpha_cb_size = gr->alpha_cb_size;
|
|
|
|
gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
|
|
(gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
|
|
~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) |
|
|
gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size));
|
|
|
|
pd_ab_max_output = alpha_cb_size *
|
|
gr_gpc0_ppc0_cbm_cfg_size_granularity_v() /
|
|
gr_pd_ab_dist_cfg1_max_output_granularity_v();
|
|
|
|
gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
|
|
gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output));
|
|
|
|
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
|
|
stride = proj_gpc_stride_v() * gpc_index;
|
|
|
|
for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
|
|
ppc_index++) {
|
|
|
|
val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg2_r() +
|
|
stride +
|
|
proj_ppc_in_gpc_stride_v() * ppc_index);
|
|
|
|
val = set_field(val, gr_gpc0_ppc0_cbm_cfg2_size_m(),
|
|
gr_gpc0_ppc0_cbm_cfg2_size_f(alpha_cb_size *
|
|
gr->pes_tpc_count[ppc_index][gpc_index]));
|
|
|
|
gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg2_r() +
|
|
stride +
|
|
proj_ppc_in_gpc_stride_v() * ppc_index, val);
|
|
}
|
|
}
|
|
}
|
|
|
|
int gk20a_gr_reset(struct gk20a *g)
|
|
{
|
|
int err;
|
|
u32 size;
|
|
|
|
err = gk20a_init_gr_prepare(g);
|
|
if (err)
|
|
return err;
|
|
|
|
err = gk20a_init_gr_reset_enable_hw(g);
|
|
if (err)
|
|
return err;
|
|
|
|
err = gk20a_init_gr_setup_hw(g);
|
|
if (err)
|
|
return err;
|
|
|
|
size = 0;
|
|
err = gr_gk20a_fecs_get_reglist_img_size(g, &size);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"fail to query fecs pg buffer size");
|
|
return err;
|
|
}
|
|
|
|
err = gr_gk20a_fecs_set_reglist_bind_inst(g,
|
|
g->mm.pmu.inst_block.cpu_pa);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"fail to bind pmu inst to gr");
|
|
return err;
|
|
}
|
|
|
|
err = gr_gk20a_fecs_set_reglist_virtual_addr(g, g->pmu.pg_buf.pmu_va);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"fail to set pg buffer pmu va");
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int gr_gk20a_handle_sw_method(struct gk20a *g, u32 addr,
|
|
u32 class_num, u32 offset, u32 data)
|
|
{
|
|
gk20a_dbg_fn("");
|
|
|
|
if (class_num == KEPLER_COMPUTE_A) {
|
|
switch (offset << 2) {
|
|
case NVA0C0_SET_SHADER_EXCEPTIONS:
|
|
gk20a_gr_set_shader_exceptions(g, data);
|
|
break;
|
|
default:
|
|
goto fail;
|
|
}
|
|
}
|
|
|
|
if (class_num == KEPLER_C) {
|
|
switch (offset << 2) {
|
|
case NVA297_SET_SHADER_EXCEPTIONS:
|
|
gk20a_gr_set_shader_exceptions(g, data);
|
|
break;
|
|
case NVA297_SET_CIRCULAR_BUFFER_SIZE:
|
|
g->ops.gr.set_circular_buffer_size(g, data);
|
|
break;
|
|
case NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
|
|
g->ops.gr.set_alpha_circular_buffer_size(g, data);
|
|
break;
|
|
default:
|
|
goto fail;
|
|
}
|
|
}
|
|
return 0;
|
|
|
|
fail:
|
|
return -EINVAL;
|
|
}
|
|
|
|
static int gk20a_gr_handle_semaphore_timeout_pending(struct gk20a *g,
|
|
struct gr_isr_data *isr_data)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct channel_gk20a *ch = &f->channel[isr_data->chid];
|
|
gk20a_dbg_fn("");
|
|
gk20a_set_error_notifier(ch,
|
|
NVHOST_CHANNEL_GR_SEMAPHORE_TIMEOUT);
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"gr semaphore timeout\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
static int gk20a_gr_intr_illegal_notify_pending(struct gk20a *g,
|
|
struct gr_isr_data *isr_data)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct channel_gk20a *ch = &f->channel[isr_data->chid];
|
|
gk20a_dbg_fn("");
|
|
gk20a_set_error_notifier(ch,
|
|
NVHOST_CHANNEL_GR_ILLEGAL_NOTIFY);
|
|
/* This is an unrecoverable error, reset is needed */
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"gr semaphore timeout\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
static int gk20a_gr_handle_illegal_method(struct gk20a *g,
|
|
struct gr_isr_data *isr_data)
|
|
{
|
|
int ret = g->ops.gr.handle_sw_method(g, isr_data->addr,
|
|
isr_data->class_num, isr_data->offset,
|
|
isr_data->data_lo);
|
|
if (ret)
|
|
gk20a_err(dev_from_gk20a(g), "invalid method class 0x%08x"
|
|
", offset 0x%08x address 0x%08x\n",
|
|
isr_data->class_num, isr_data->offset, isr_data->addr);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int gk20a_gr_handle_illegal_class(struct gk20a *g,
|
|
struct gr_isr_data *isr_data)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct channel_gk20a *ch = &f->channel[isr_data->chid];
|
|
gk20a_dbg_fn("");
|
|
gk20a_set_error_notifier(ch,
|
|
NVHOST_CHANNEL_GR_ERROR_SW_NOTIFY);
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"invalid class 0x%08x, offset 0x%08x",
|
|
isr_data->class_num, isr_data->offset);
|
|
return -EINVAL;
|
|
}
|
|
|
|
static int gk20a_gr_handle_fecs_error(struct gk20a *g,
|
|
struct gr_isr_data *isr_data)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct channel_gk20a *ch = &f->channel[isr_data->chid];
|
|
u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_intr_r());
|
|
gk20a_dbg_fn("");
|
|
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"unhandled fecs error interrupt 0x%08x for channel %u",
|
|
gr_fecs_intr, ch->hw_chid);
|
|
|
|
gk20a_writel(g, gr_fecs_intr_r(), gr_fecs_intr);
|
|
return -EINVAL;
|
|
}
|
|
|
|
static int gk20a_gr_handle_class_error(struct gk20a *g,
|
|
struct gr_isr_data *isr_data)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct channel_gk20a *ch = &f->channel[isr_data->chid];
|
|
gk20a_dbg_fn("");
|
|
|
|
gk20a_set_error_notifier(ch,
|
|
NVHOST_CHANNEL_GR_ERROR_SW_NOTIFY);
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"class error 0x%08x, offset 0x%08x",
|
|
isr_data->class_num, isr_data->offset);
|
|
return -EINVAL;
|
|
}
|
|
|
|
static int gk20a_gr_handle_semaphore_pending(struct gk20a *g,
|
|
struct gr_isr_data *isr_data)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct channel_gk20a *ch = &f->channel[isr_data->chid];
|
|
|
|
wake_up(&ch->semaphore_wq);
|
|
|
|
return 0;
|
|
}
|
|
|
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
|
static inline bool is_valid_cyclestats_bar0_offset_gk20a(struct gk20a *g,
|
|
u32 offset)
|
|
{
|
|
/* support only 24-bit 4-byte aligned offsets */
|
|
bool valid = !(offset & 0xFF000003);
|
|
/* whitelist check */
|
|
valid = valid &&
|
|
is_bar0_global_offset_whitelisted_gk20a(offset);
|
|
/* resource size check in case there was a problem
|
|
* with allocating the assumed size of bar0 */
|
|
valid = valid &&
|
|
offset < resource_size(g->reg_mem);
|
|
return valid;
|
|
}
|
|
#endif
|
|
|
|
static int gk20a_gr_handle_notify_pending(struct gk20a *g,
|
|
struct gr_isr_data *isr_data)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct channel_gk20a *ch = &f->channel[isr_data->chid];
|
|
|
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
|
void *virtual_address;
|
|
u32 buffer_size;
|
|
u32 offset;
|
|
u32 new_offset;
|
|
bool exit;
|
|
struct share_buffer_head *sh_hdr;
|
|
u32 raw_reg;
|
|
u64 mask_orig;
|
|
u64 v = 0;
|
|
struct gk20a_cyclestate_buffer_elem *op_elem;
|
|
/* GL will never use payload 0 for cycle state */
|
|
if ((ch->cyclestate.cyclestate_buffer == NULL) || (isr_data->data_lo == 0))
|
|
return 0;
|
|
|
|
mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
|
|
|
|
virtual_address = ch->cyclestate.cyclestate_buffer;
|
|
buffer_size = ch->cyclestate.cyclestate_buffer_size;
|
|
offset = isr_data->data_lo;
|
|
exit = false;
|
|
while (!exit) {
|
|
if (offset >= buffer_size) {
|
|
WARN_ON(1);
|
|
break;
|
|
}
|
|
|
|
sh_hdr = (struct share_buffer_head *)
|
|
((char *)virtual_address + offset);
|
|
|
|
if (sh_hdr->size < sizeof(struct share_buffer_head)) {
|
|
WARN_ON(1);
|
|
break;
|
|
}
|
|
new_offset = offset + sh_hdr->size;
|
|
|
|
switch (sh_hdr->operation) {
|
|
case OP_END:
|
|
exit = true;
|
|
break;
|
|
|
|
case BAR0_READ32:
|
|
case BAR0_WRITE32:
|
|
{
|
|
bool valid;
|
|
op_elem =
|
|
(struct gk20a_cyclestate_buffer_elem *)
|
|
sh_hdr;
|
|
valid = is_valid_cyclestats_bar0_offset_gk20a(g,
|
|
op_elem->offset_bar0);
|
|
if (!valid) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"invalid cycletstats op offset: 0x%x\n",
|
|
op_elem->offset_bar0);
|
|
|
|
sh_hdr->failed = exit = true;
|
|
break;
|
|
}
|
|
|
|
|
|
mask_orig =
|
|
((1ULL <<
|
|
(op_elem->last_bit + 1))
|
|
-1)&~((1ULL <<
|
|
op_elem->first_bit)-1);
|
|
|
|
raw_reg =
|
|
gk20a_readl(g,
|
|
op_elem->offset_bar0);
|
|
|
|
switch (sh_hdr->operation) {
|
|
case BAR0_READ32:
|
|
op_elem->data =
|
|
(raw_reg & mask_orig)
|
|
>> op_elem->first_bit;
|
|
break;
|
|
|
|
case BAR0_WRITE32:
|
|
v = 0;
|
|
if ((unsigned int)mask_orig !=
|
|
(unsigned int)~0) {
|
|
v = (unsigned int)
|
|
(raw_reg & ~mask_orig);
|
|
}
|
|
|
|
v |= ((op_elem->data
|
|
<< op_elem->first_bit)
|
|
& mask_orig);
|
|
|
|
gk20a_writel(g,
|
|
op_elem->offset_bar0,
|
|
(unsigned int)v);
|
|
break;
|
|
default:
|
|
/* nop ok?*/
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
/* no operation content case */
|
|
exit = true;
|
|
break;
|
|
}
|
|
sh_hdr->completed = true;
|
|
offset = new_offset;
|
|
}
|
|
mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
|
|
#endif
|
|
gk20a_dbg_fn("");
|
|
wake_up(&ch->notifier_wq);
|
|
return 0;
|
|
}
|
|
|
|
/* Used by sw interrupt thread to translate current ctx to chid.
|
|
* For performance, we don't want to go through 128 channels every time.
|
|
* A small tlb is used here to cache translation */
|
|
static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct gr_gk20a *gr = &g->gr;
|
|
u32 chid = -1;
|
|
u32 i;
|
|
|
|
spin_lock(&gr->ch_tlb_lock);
|
|
|
|
/* check cache first */
|
|
for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
|
|
if (gr->chid_tlb[i].curr_ctx == curr_ctx) {
|
|
chid = gr->chid_tlb[i].hw_chid;
|
|
goto unlock;
|
|
}
|
|
}
|
|
|
|
/* slow path */
|
|
for (chid = 0; chid < f->num_channels; chid++)
|
|
if (f->channel[chid].in_use) {
|
|
if ((u32)(f->channel[chid].inst_block.cpu_pa >>
|
|
ram_in_base_shift_v()) ==
|
|
gr_fecs_current_ctx_ptr_v(curr_ctx))
|
|
break;
|
|
}
|
|
|
|
if (chid >= f->num_channels) {
|
|
chid = -1;
|
|
goto unlock;
|
|
}
|
|
|
|
/* add to free tlb entry */
|
|
for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
|
|
if (gr->chid_tlb[i].curr_ctx == 0) {
|
|
gr->chid_tlb[i].curr_ctx = curr_ctx;
|
|
gr->chid_tlb[i].hw_chid = chid;
|
|
goto unlock;
|
|
}
|
|
}
|
|
|
|
/* no free entry, flush one */
|
|
gr->chid_tlb[gr->channel_tlb_flush_index].curr_ctx = curr_ctx;
|
|
gr->chid_tlb[gr->channel_tlb_flush_index].hw_chid = chid;
|
|
|
|
gr->channel_tlb_flush_index =
|
|
(gr->channel_tlb_flush_index + 1) &
|
|
(GR_CHANNEL_MAP_TLB_SIZE - 1);
|
|
|
|
unlock:
|
|
spin_unlock(&gr->ch_tlb_lock);
|
|
return chid;
|
|
}
|
|
|
|
static int gk20a_gr_lock_down_sm(struct gk20a *g, u32 global_esr_mask)
|
|
{
|
|
unsigned long end_jiffies = jiffies +
|
|
msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
|
|
u32 delay = GR_IDLE_CHECK_DEFAULT;
|
|
bool mmu_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled(g);
|
|
u32 dbgr_control0;
|
|
|
|
gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "locking down SM");
|
|
|
|
/* assert stop trigger */
|
|
dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
|
|
dbgr_control0 |= gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f();
|
|
gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r(), dbgr_control0);
|
|
|
|
/* wait for the sm to lock down */
|
|
do {
|
|
u32 global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r());
|
|
u32 warp_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_warp_esr_r());
|
|
u32 dbgr_status0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_status0_r());
|
|
bool locked_down =
|
|
(gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) ==
|
|
gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v());
|
|
bool error_pending =
|
|
(gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) !=
|
|
gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) ||
|
|
((global_esr & ~global_esr_mask) != 0);
|
|
|
|
if (locked_down || !error_pending) {
|
|
gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "locked down SM");
|
|
|
|
/* de-assert stop trigger */
|
|
dbgr_control0 &= ~gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f();
|
|
gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r(), dbgr_control0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* if an mmu fault is pending and mmu debug mode is not
|
|
* enabled, the sm will never lock down. */
|
|
if (!mmu_debug_mode_enabled && gk20a_fifo_mmu_fault_pending(g)) {
|
|
gk20a_err(dev_from_gk20a(g), "mmu fault pending, sm will"
|
|
" never lock down!");
|
|
return -EFAULT;
|
|
}
|
|
|
|
usleep_range(delay, delay * 2);
|
|
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
|
|
|
|
} while (time_before(jiffies, end_jiffies)
|
|
|| !tegra_platform_is_silicon());
|
|
|
|
gk20a_err(dev_from_gk20a(g), "timed out while trying to lock down SM");
|
|
|
|
return -EAGAIN;
|
|
}
|
|
|
|
bool gk20a_gr_sm_debugger_attached(struct gk20a *g)
|
|
{
|
|
u32 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
|
|
|
|
/* check if an sm debugger is attached */
|
|
if (gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(dbgr_control0) ==
|
|
gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v())
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
static void gk20a_gr_clear_sm_hww(struct gk20a *g, u32 global_esr)
|
|
{
|
|
gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r(), global_esr);
|
|
|
|
/* clear the warp hww */
|
|
gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r(),
|
|
gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f());
|
|
}
|
|
|
|
static struct channel_gk20a *
|
|
channel_from_hw_chid(struct gk20a *g, u32 hw_chid)
|
|
{
|
|
return g->fifo.channel+hw_chid;
|
|
}
|
|
|
|
static int gk20a_gr_handle_sm_exception(struct gk20a *g,
|
|
struct gr_isr_data *isr_data)
|
|
{
|
|
int ret = 0;
|
|
bool do_warp_sync = false;
|
|
/* these three interrupts don't require locking down the SM. They can
|
|
* be handled by usermode clients as they aren't fatal. Additionally,
|
|
* usermode clients may wish to allow some warps to execute while others
|
|
* are at breakpoints, as opposed to fatal errors where all warps should
|
|
* halt. */
|
|
u32 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() |
|
|
gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
|
|
gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
|
|
u32 global_esr, warp_esr;
|
|
bool sm_debugger_attached = gk20a_gr_sm_debugger_attached(g);
|
|
struct channel_gk20a *fault_ch;
|
|
|
|
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
|
|
|
|
global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r());
|
|
warp_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_warp_esr_r());
|
|
|
|
/* if an sm debugger is attached, disable forwarding of tpc exceptions.
|
|
* the debugger will reenable exceptions after servicing them. */
|
|
if (sm_debugger_attached) {
|
|
u32 tpc_exception_en = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r());
|
|
tpc_exception_en &= ~gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f();
|
|
gk20a_writel(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r(), tpc_exception_en);
|
|
gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "SM debugger attached");
|
|
}
|
|
|
|
/* if a debugger is present and an error has occurred, do a warp sync */
|
|
if (sm_debugger_attached && ((warp_esr != 0) || ((global_esr & ~global_mask) != 0))) {
|
|
gk20a_dbg(gpu_dbg_intr, "warp sync needed");
|
|
do_warp_sync = true;
|
|
}
|
|
|
|
if (do_warp_sync) {
|
|
ret = gk20a_gr_lock_down_sm(g, global_mask);
|
|
if (ret) {
|
|
gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
/* finally, signal any client waiting on an event */
|
|
fault_ch = channel_from_hw_chid(g, isr_data->chid);
|
|
if (fault_ch)
|
|
gk20a_dbg_gpu_post_events(fault_ch);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int gk20a_gr_handle_tpc_exception(struct gk20a *g,
|
|
struct gr_isr_data *isr_data)
|
|
{
|
|
int ret = 0;
|
|
u32 tpc_exception = gk20a_readl(g, gr_gpcs_tpcs_tpccs_tpc_exception_r());
|
|
|
|
gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "");
|
|
|
|
/* check if an sm exeption is pending */
|
|
if (gr_gpcs_tpcs_tpccs_tpc_exception_sm_v(tpc_exception) ==
|
|
gr_gpcs_tpcs_tpccs_tpc_exception_sm_pending_v()) {
|
|
gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "SM exception pending");
|
|
ret = gk20a_gr_handle_sm_exception(g, isr_data);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int gk20a_gr_handle_gpc_exception(struct gk20a *g,
|
|
struct gr_isr_data *isr_data)
|
|
{
|
|
int ret = 0;
|
|
u32 gpc_exception = gk20a_readl(g, gr_gpcs_gpccs_gpc_exception_r());
|
|
|
|
gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "");
|
|
|
|
/* check if tpc 0 has an exception */
|
|
if (gr_gpcs_gpccs_gpc_exception_tpc_v(gpc_exception) ==
|
|
gr_gpcs_gpccs_gpc_exception_tpc_0_pending_v()) {
|
|
gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "TPC exception pending");
|
|
ret = gk20a_gr_handle_tpc_exception(g, isr_data);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int gk20a_gr_isr(struct gk20a *g)
|
|
{
|
|
struct gr_isr_data isr_data;
|
|
u32 grfifo_ctl;
|
|
u32 obj_table;
|
|
int need_reset = 0;
|
|
u32 gr_intr = gk20a_readl(g, gr_intr_r());
|
|
|
|
gk20a_dbg_fn("");
|
|
gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr);
|
|
|
|
if (!gr_intr)
|
|
return 0;
|
|
|
|
grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r());
|
|
grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1);
|
|
grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1);
|
|
|
|
gk20a_writel(g, gr_gpfifo_ctl_r(),
|
|
grfifo_ctl | gr_gpfifo_ctl_access_f(0) |
|
|
gr_gpfifo_ctl_semaphore_access_f(0));
|
|
|
|
isr_data.addr = gk20a_readl(g, gr_trapped_addr_r());
|
|
isr_data.data_lo = gk20a_readl(g, gr_trapped_data_lo_r());
|
|
isr_data.data_hi = gk20a_readl(g, gr_trapped_data_hi_r());
|
|
isr_data.curr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
|
|
isr_data.offset = gr_trapped_addr_mthd_v(isr_data.addr);
|
|
isr_data.sub_chan = gr_trapped_addr_subch_v(isr_data.addr);
|
|
obj_table = gk20a_readl(g,
|
|
gr_fe_object_table_r(isr_data.sub_chan));
|
|
isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table);
|
|
|
|
isr_data.chid =
|
|
gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx);
|
|
if (isr_data.chid == -1) {
|
|
gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x",
|
|
isr_data.curr_ctx);
|
|
goto clean_up;
|
|
}
|
|
|
|
gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
|
"channel %d: addr 0x%08x, "
|
|
"data 0x%08x 0x%08x,"
|
|
"ctx 0x%08x, offset 0x%08x, "
|
|
"subchannel 0x%08x, class 0x%08x",
|
|
isr_data.chid, isr_data.addr,
|
|
isr_data.data_hi, isr_data.data_lo,
|
|
isr_data.curr_ctx, isr_data.offset,
|
|
isr_data.sub_chan, isr_data.class_num);
|
|
|
|
if (gr_intr & gr_intr_notify_pending_f()) {
|
|
gk20a_gr_handle_notify_pending(g, &isr_data);
|
|
gk20a_writel(g, gr_intr_r(),
|
|
gr_intr_notify_reset_f());
|
|
gr_intr &= ~gr_intr_notify_pending_f();
|
|
}
|
|
|
|
if (gr_intr & gr_intr_semaphore_pending_f()) {
|
|
gk20a_gr_handle_semaphore_pending(g, &isr_data);
|
|
gk20a_writel(g, gr_intr_r(),
|
|
gr_intr_semaphore_reset_f());
|
|
gr_intr &= ~gr_intr_semaphore_pending_f();
|
|
}
|
|
|
|
if (gr_intr & gr_intr_semaphore_timeout_pending_f()) {
|
|
need_reset |= gk20a_gr_handle_semaphore_timeout_pending(g,
|
|
&isr_data);
|
|
gk20a_writel(g, gr_intr_r(),
|
|
gr_intr_semaphore_reset_f());
|
|
gr_intr &= ~gr_intr_semaphore_pending_f();
|
|
}
|
|
|
|
if (gr_intr & gr_intr_illegal_notify_pending_f()) {
|
|
need_reset |= gk20a_gr_intr_illegal_notify_pending(g,
|
|
&isr_data);
|
|
gk20a_writel(g, gr_intr_r(),
|
|
gr_intr_illegal_notify_reset_f());
|
|
gr_intr &= ~gr_intr_illegal_notify_pending_f();
|
|
}
|
|
|
|
if (gr_intr & gr_intr_illegal_method_pending_f()) {
|
|
need_reset |= gk20a_gr_handle_illegal_method(g, &isr_data);
|
|
gk20a_writel(g, gr_intr_r(),
|
|
gr_intr_illegal_method_reset_f());
|
|
gr_intr &= ~gr_intr_illegal_method_pending_f();
|
|
}
|
|
|
|
if (gr_intr & gr_intr_illegal_class_pending_f()) {
|
|
need_reset |= gk20a_gr_handle_illegal_class(g, &isr_data);
|
|
gk20a_writel(g, gr_intr_r(),
|
|
gr_intr_illegal_class_reset_f());
|
|
gr_intr &= ~gr_intr_illegal_class_pending_f();
|
|
}
|
|
|
|
if (gr_intr & gr_intr_fecs_error_pending_f()) {
|
|
need_reset |= gk20a_gr_handle_fecs_error(g, &isr_data);
|
|
gk20a_writel(g, gr_intr_r(),
|
|
gr_intr_fecs_error_reset_f());
|
|
gr_intr &= ~gr_intr_fecs_error_pending_f();
|
|
}
|
|
|
|
if (gr_intr & gr_intr_class_error_pending_f()) {
|
|
need_reset |= gk20a_gr_handle_class_error(g, &isr_data);
|
|
gk20a_writel(g, gr_intr_r(),
|
|
gr_intr_class_error_reset_f());
|
|
gr_intr &= ~gr_intr_class_error_pending_f();
|
|
}
|
|
|
|
/* this one happens if someone tries to hit a non-whitelisted
|
|
* register using set_falcon[4] */
|
|
if (gr_intr & gr_intr_firmware_method_pending_f()) {
|
|
need_reset |= true;
|
|
gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "firmware method intr pending\n");
|
|
gk20a_writel(g, gr_intr_r(),
|
|
gr_intr_firmware_method_reset_f());
|
|
gr_intr &= ~gr_intr_firmware_method_pending_f();
|
|
}
|
|
|
|
if (gr_intr & gr_intr_exception_pending_f()) {
|
|
u32 exception = gk20a_readl(g, gr_exception_r());
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct channel_gk20a *ch = &f->channel[isr_data.chid];
|
|
|
|
gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "exception %08x\n", exception);
|
|
|
|
if (exception & gr_exception_fe_m()) {
|
|
u32 fe = gk20a_readl(g, gr_fe_hww_esr_r());
|
|
gk20a_dbg(gpu_dbg_intr, "fe warning %08x\n", fe);
|
|
gk20a_writel(g, gr_fe_hww_esr_r(), fe);
|
|
need_reset |= -EFAULT;
|
|
}
|
|
|
|
/* check if a gpc exception has occurred */
|
|
if (exception & gr_exception_gpc_m() && need_reset == 0) {
|
|
u32 exception1 = gk20a_readl(g, gr_exception1_r());
|
|
u32 global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r());
|
|
|
|
gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "GPC exception pending");
|
|
|
|
/* if no sm debugger is present, clean up the channel */
|
|
if (!gk20a_gr_sm_debugger_attached(g)) {
|
|
gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
|
"SM debugger not attached, clearing interrupt");
|
|
need_reset |= -EFAULT;
|
|
} else {
|
|
/* check if gpc 0 has an exception */
|
|
if (exception1 & gr_exception1_gpc_0_pending_f())
|
|
need_reset |= gk20a_gr_handle_gpc_exception(g, &isr_data);
|
|
/* clear the hwws, also causes tpc and gpc
|
|
* exceptions to be cleared */
|
|
gk20a_gr_clear_sm_hww(g, global_esr);
|
|
}
|
|
|
|
if (need_reset)
|
|
gk20a_set_error_notifier(ch,
|
|
NVHOST_CHANNEL_GR_ERROR_SW_NOTIFY);
|
|
}
|
|
|
|
gk20a_writel(g, gr_intr_r(), gr_intr_exception_reset_f());
|
|
gr_intr &= ~gr_intr_exception_pending_f();
|
|
}
|
|
|
|
if (need_reset)
|
|
gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), true);
|
|
|
|
clean_up:
|
|
gk20a_writel(g, gr_gpfifo_ctl_r(),
|
|
grfifo_ctl | gr_gpfifo_ctl_access_f(1) |
|
|
gr_gpfifo_ctl_semaphore_access_f(1));
|
|
|
|
if (gr_intr)
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"unhandled gr interrupt 0x%08x", gr_intr);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_gr_nonstall_isr(struct gk20a *g)
|
|
{
|
|
u32 gr_intr = gk20a_readl(g, gr_intr_nonstall_r());
|
|
u32 clear_intr = 0;
|
|
|
|
gk20a_dbg(gpu_dbg_intr, "pgraph nonstall intr %08x", gr_intr);
|
|
|
|
if (gr_intr & gr_intr_nonstall_trap_pending_f()) {
|
|
gk20a_channel_semaphore_wakeup(g);
|
|
clear_intr |= gr_intr_nonstall_trap_pending_f();
|
|
}
|
|
|
|
gk20a_writel(g, gr_intr_nonstall_r(), clear_intr);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size)
|
|
{
|
|
BUG_ON(size == NULL);
|
|
return gr_gk20a_submit_fecs_method_op(g,
|
|
(struct fecs_method_op_gk20a) {
|
|
.mailbox.id = 0,
|
|
.mailbox.data = 0,
|
|
.mailbox.clr = ~0,
|
|
.method.data = 1,
|
|
.method.addr = gr_fecs_method_push_adr_discover_reglist_image_size_v(),
|
|
.mailbox.ret = size,
|
|
.cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
|
|
.mailbox.ok = 0,
|
|
.cond.fail = GR_IS_UCODE_OP_SKIP,
|
|
.mailbox.fail = 0});
|
|
}
|
|
|
|
int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr)
|
|
{
|
|
return gr_gk20a_submit_fecs_method_op(g,
|
|
(struct fecs_method_op_gk20a){
|
|
.mailbox.id = 4,
|
|
.mailbox.data = (gr_fecs_current_ctx_ptr_f(addr >> 12) |
|
|
gr_fecs_current_ctx_valid_f(1) |
|
|
gr_fecs_current_ctx_target_vid_mem_f()),
|
|
.mailbox.clr = ~0,
|
|
.method.data = 1,
|
|
.method.addr = gr_fecs_method_push_adr_set_reglist_bind_instance_v(),
|
|
.mailbox.ret = NULL,
|
|
.cond.ok = GR_IS_UCODE_OP_EQUAL,
|
|
.mailbox.ok = 1,
|
|
.cond.fail = GR_IS_UCODE_OP_SKIP,
|
|
.mailbox.fail = 0});
|
|
}
|
|
|
|
int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va)
|
|
{
|
|
return gr_gk20a_submit_fecs_method_op(g,
|
|
(struct fecs_method_op_gk20a) {
|
|
.mailbox.id = 4,
|
|
.mailbox.data = u64_lo32(pmu_va >> 8),
|
|
.mailbox.clr = ~0,
|
|
.method.data = 1,
|
|
.method.addr = gr_fecs_method_push_adr_set_reglist_virtual_address_v(),
|
|
.mailbox.ret = NULL,
|
|
.cond.ok = GR_IS_UCODE_OP_EQUAL,
|
|
.mailbox.ok = 1,
|
|
.cond.fail = GR_IS_UCODE_OP_SKIP,
|
|
.mailbox.fail = 0});
|
|
}
|
|
|
|
int gk20a_gr_suspend(struct gk20a *g)
|
|
{
|
|
unsigned long end_jiffies = jiffies +
|
|
msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
|
|
u32 ret = 0;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
|
|
if (ret)
|
|
return ret;
|
|
|
|
gk20a_writel(g, gr_gpfifo_ctl_r(),
|
|
gr_gpfifo_ctl_access_disabled_f());
|
|
|
|
/* disable gr intr */
|
|
gk20a_writel(g, gr_intr_r(), 0);
|
|
gk20a_writel(g, gr_intr_en_r(), 0);
|
|
|
|
/* disable all exceptions */
|
|
gk20a_writel(g, gr_exception_r(), 0);
|
|
gk20a_writel(g, gr_exception_en_r(), 0);
|
|
gk20a_writel(g, gr_exception1_r(), 0);
|
|
gk20a_writel(g, gr_exception1_en_r(), 0);
|
|
gk20a_writel(g, gr_exception2_r(), 0);
|
|
gk20a_writel(g, gr_exception2_en_r(), 0);
|
|
|
|
gk20a_gr_flush_channel_tlb(&g->gr);
|
|
|
|
g->gr.initialized = false;
|
|
|
|
gk20a_dbg_fn("done");
|
|
return ret;
|
|
}
|
|
|
|
static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
|
|
u32 addr,
|
|
bool is_quad, u32 quad,
|
|
u32 *context_buffer,
|
|
u32 context_buffer_size,
|
|
u32 *priv_offset);
|
|
|
|
/* This function will decode a priv address and return the partition type and numbers. */
|
|
int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
|
|
int *addr_type, /* enum ctxsw_addr_type */
|
|
u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num,
|
|
u32 *broadcast_flags)
|
|
{
|
|
u32 gpc_addr;
|
|
u32 ppc_address;
|
|
u32 ppc_broadcast_addr;
|
|
|
|
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
|
|
|
|
/* setup defaults */
|
|
ppc_address = 0;
|
|
ppc_broadcast_addr = 0;
|
|
*addr_type = CTXSW_ADDR_TYPE_SYS;
|
|
*broadcast_flags = PRI_BROADCAST_FLAGS_NONE;
|
|
*gpc_num = 0;
|
|
*tpc_num = 0;
|
|
*ppc_num = 0;
|
|
*be_num = 0;
|
|
|
|
if (pri_is_gpc_addr(addr)) {
|
|
*addr_type = CTXSW_ADDR_TYPE_GPC;
|
|
gpc_addr = pri_gpccs_addr_mask(addr);
|
|
if (pri_is_gpc_addr_shared(addr)) {
|
|
*addr_type = CTXSW_ADDR_TYPE_GPC;
|
|
*broadcast_flags |= PRI_BROADCAST_FLAGS_GPC;
|
|
} else
|
|
*gpc_num = pri_get_gpc_num(addr);
|
|
|
|
if (pri_is_tpc_addr(gpc_addr)) {
|
|
*addr_type = CTXSW_ADDR_TYPE_TPC;
|
|
if (pri_is_tpc_addr_shared(gpc_addr)) {
|
|
*broadcast_flags |= PRI_BROADCAST_FLAGS_TPC;
|
|
return 0;
|
|
}
|
|
*tpc_num = pri_get_tpc_num(gpc_addr);
|
|
}
|
|
return 0;
|
|
} else if (pri_is_be_addr(addr)) {
|
|
*addr_type = CTXSW_ADDR_TYPE_BE;
|
|
if (pri_is_be_addr_shared(addr)) {
|
|
*broadcast_flags |= PRI_BROADCAST_FLAGS_BE;
|
|
return 0;
|
|
}
|
|
*be_num = pri_get_be_num(addr);
|
|
return 0;
|
|
} else {
|
|
*addr_type = CTXSW_ADDR_TYPE_SYS;
|
|
return 0;
|
|
}
|
|
/* PPC!?!?!?! */
|
|
|
|
/*NOTREACHED*/
|
|
return -EINVAL;
|
|
}
|
|
|
|
static int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr,
|
|
u32 gpc_num,
|
|
u32 *priv_addr_table, u32 *t)
|
|
{
|
|
u32 ppc_num;
|
|
|
|
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
|
|
|
|
for (ppc_num = 0; ppc_num < g->gr.pe_count_per_gpc; ppc_num++)
|
|
priv_addr_table[(*t)++] = pri_ppc_addr(pri_ppccs_addr_mask(addr),
|
|
gpc_num, ppc_num);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* The context buffer is indexed using BE broadcast addresses and GPC/TPC
|
|
* unicast addresses. This function will convert a BE unicast address to a BE
|
|
* broadcast address and split a GPC/TPC broadcast address into a table of
|
|
* GPC/TPC addresses. The addresses generated by this function can be
|
|
* successfully processed by gr_gk20a_find_priv_offset_in_buffer
|
|
*/
|
|
static int gr_gk20a_create_priv_addr_table(struct gk20a *g,
|
|
u32 addr,
|
|
u32 *priv_addr_table,
|
|
u32 *num_registers)
|
|
{
|
|
int addr_type; /*enum ctxsw_addr_type */
|
|
u32 gpc_num, tpc_num, ppc_num, be_num;
|
|
u32 broadcast_flags;
|
|
u32 t;
|
|
int err;
|
|
|
|
t = 0;
|
|
*num_registers = 0;
|
|
|
|
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
|
|
|
|
err = gr_gk20a_decode_priv_addr(g, addr, &addr_type,
|
|
&gpc_num, &tpc_num, &ppc_num, &be_num,
|
|
&broadcast_flags);
|
|
gk20a_dbg(gpu_dbg_gpu_dbg, "addr_type = %d", addr_type);
|
|
if (err)
|
|
return err;
|
|
|
|
if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
|
|
(addr_type == CTXSW_ADDR_TYPE_BE)) {
|
|
/* The BE broadcast registers are included in the compressed PRI
|
|
* table. Convert a BE unicast address to a broadcast address
|
|
* so that we can look up the offset. */
|
|
if ((addr_type == CTXSW_ADDR_TYPE_BE) &&
|
|
!(broadcast_flags & PRI_BROADCAST_FLAGS_BE))
|
|
priv_addr_table[t++] = pri_be_shared_addr(addr);
|
|
else
|
|
priv_addr_table[t++] = addr;
|
|
|
|
*num_registers = t;
|
|
return 0;
|
|
}
|
|
|
|
/* The GPC/TPC unicast registers are included in the compressed PRI
|
|
* tables. Convert a GPC/TPC broadcast address to unicast addresses so
|
|
* that we can look up the offsets. */
|
|
if (broadcast_flags & PRI_BROADCAST_FLAGS_GPC) {
|
|
for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) {
|
|
|
|
if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC)
|
|
for (tpc_num = 0;
|
|
tpc_num < g->gr.gpc_tpc_count[gpc_num];
|
|
tpc_num++)
|
|
priv_addr_table[t++] =
|
|
pri_tpc_addr(pri_tpccs_addr_mask(addr),
|
|
gpc_num, tpc_num);
|
|
|
|
else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) {
|
|
err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num,
|
|
priv_addr_table, &t);
|
|
if (err)
|
|
return err;
|
|
} else
|
|
priv_addr_table[t++] =
|
|
pri_gpc_addr(pri_gpccs_addr_mask(addr),
|
|
gpc_num);
|
|
}
|
|
} else {
|
|
if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC)
|
|
for (tpc_num = 0;
|
|
tpc_num < g->gr.gpc_tpc_count[gpc_num];
|
|
tpc_num++)
|
|
priv_addr_table[t++] =
|
|
pri_tpc_addr(pri_tpccs_addr_mask(addr),
|
|
gpc_num, tpc_num);
|
|
else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC)
|
|
err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num,
|
|
priv_addr_table, &t);
|
|
else
|
|
priv_addr_table[t++] = addr;
|
|
}
|
|
|
|
*num_registers = t;
|
|
return 0;
|
|
}
|
|
|
|
int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
|
|
u32 addr,
|
|
u32 max_offsets,
|
|
u32 *offsets, u32 *offset_addrs,
|
|
u32 *num_offsets,
|
|
bool is_quad, u32 quad)
|
|
{
|
|
u32 i;
|
|
u32 priv_offset = 0;
|
|
u32 *priv_registers;
|
|
u32 num_registers = 0;
|
|
int err = 0;
|
|
u32 potential_offsets = proj_scal_litter_num_gpcs_v() *
|
|
proj_scal_litter_num_tpc_per_gpc_v();
|
|
|
|
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
|
|
|
|
/* implementation is crossed-up if either of these happen */
|
|
if (max_offsets > potential_offsets)
|
|
return -EINVAL;
|
|
|
|
if (!g->gr.ctx_vars.golden_image_initialized)
|
|
return -ENODEV;
|
|
|
|
priv_registers = kzalloc(sizeof(u32) * potential_offsets, GFP_KERNEL);
|
|
if (IS_ERR_OR_NULL(priv_registers)) {
|
|
gk20a_dbg_fn("failed alloc for potential_offsets=%d", potential_offsets);
|
|
err = PTR_ERR(priv_registers);
|
|
goto cleanup;
|
|
}
|
|
memset(offsets, 0, sizeof(u32) * max_offsets);
|
|
memset(offset_addrs, 0, sizeof(u32) * max_offsets);
|
|
*num_offsets = 0;
|
|
|
|
gr_gk20a_create_priv_addr_table(g, addr, &priv_registers[0], &num_registers);
|
|
|
|
if ((max_offsets > 1) && (num_registers > max_offsets)) {
|
|
err = -EINVAL;
|
|
goto cleanup;
|
|
}
|
|
|
|
if ((max_offsets == 1) && (num_registers > 1))
|
|
num_registers = 1;
|
|
|
|
if (!g->gr.ctx_vars.local_golden_image) {
|
|
gk20a_dbg_fn("no context switch header info to work with");
|
|
err = -EINVAL;
|
|
goto cleanup;
|
|
}
|
|
|
|
for (i = 0; i < num_registers; i++) {
|
|
err = gr_gk20a_find_priv_offset_in_buffer(g,
|
|
priv_registers[i],
|
|
is_quad, quad,
|
|
g->gr.ctx_vars.local_golden_image,
|
|
g->gr.ctx_vars.golden_image_size,
|
|
&priv_offset);
|
|
if (err) {
|
|
gk20a_dbg_fn("Could not determine priv_offset for addr:0x%x",
|
|
addr); /*, grPriRegStr(addr)));*/
|
|
goto cleanup;
|
|
}
|
|
|
|
offsets[i] = priv_offset;
|
|
offset_addrs[i] = priv_registers[i];
|
|
}
|
|
|
|
*num_offsets = num_registers;
|
|
|
|
cleanup:
|
|
|
|
if (!IS_ERR_OR_NULL(priv_registers))
|
|
kfree(priv_registers);
|
|
|
|
return err;
|
|
}
|
|
|
|
/* Setup some register tables. This looks hacky; our
|
|
* register/offset functions are just that, functions.
|
|
* So they can't be used as initializers... TBD: fix to
|
|
* generate consts at least on an as-needed basis.
|
|
*/
|
|
static const u32 _num_ovr_perf_regs = 17;
|
|
static u32 _ovr_perf_regs[17] = { 0, };
|
|
/* Following are the blocks of registers that the ucode
|
|
stores in the extended region.*/
|
|
/* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */
|
|
static const u32 _num_sm_dsm_perf_regs = 5;
|
|
/* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/
|
|
static const u32 _num_sm_dsm_perf_ctrl_regs = 4;
|
|
static u32 _sm_dsm_perf_regs[5];
|
|
static u32 _sm_dsm_perf_ctrl_regs[4];
|
|
|
|
static void init_sm_dsm_reg_info(void)
|
|
{
|
|
if (_ovr_perf_regs[0] != 0)
|
|
return;
|
|
|
|
_ovr_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r();
|
|
_ovr_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r();
|
|
_ovr_perf_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r();
|
|
_ovr_perf_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r();
|
|
_ovr_perf_regs[4] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status1_r();
|
|
_ovr_perf_regs[5] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_control_r();
|
|
_ovr_perf_regs[6] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_control_r();
|
|
_ovr_perf_regs[7] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_control_r();
|
|
_ovr_perf_regs[8] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_control_r();
|
|
_ovr_perf_regs[9] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_control_r();
|
|
_ovr_perf_regs[10] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_control_r();
|
|
_ovr_perf_regs[11] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_control_r();
|
|
_ovr_perf_regs[12] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_control_r();
|
|
_ovr_perf_regs[13] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_r();
|
|
_ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r();
|
|
_ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r();
|
|
_ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r();
|
|
|
|
|
|
_sm_dsm_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r();
|
|
_sm_dsm_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r();
|
|
_sm_dsm_perf_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_r();
|
|
_sm_dsm_perf_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_r();
|
|
_sm_dsm_perf_regs[4] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_r();
|
|
|
|
_sm_dsm_perf_ctrl_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control1_r();
|
|
_sm_dsm_perf_ctrl_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control2_r();
|
|
_sm_dsm_perf_ctrl_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control3_r();
|
|
_sm_dsm_perf_ctrl_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control4_r();
|
|
|
|
}
|
|
|
|
/* TBD: would like to handle this elsewhere, at a higher level.
|
|
* these are currently constructed in a "test-then-write" style
|
|
* which makes it impossible to know externally whether a ctx
|
|
* write will actually occur. so later we should put a lazy,
|
|
* map-and-hold system in the patch write state */
|
|
int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
|
|
struct channel_ctx_gk20a *ch_ctx,
|
|
u32 addr, u32 data,
|
|
u8 *context)
|
|
{
|
|
u32 num_gpc = g->gr.gpc_count;
|
|
u32 num_tpc;
|
|
u32 tpc, gpc, reg;
|
|
u32 chk_addr;
|
|
u32 vaddr_lo;
|
|
u32 vaddr_hi;
|
|
u32 tmp;
|
|
|
|
init_sm_dsm_reg_info();
|
|
|
|
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
|
|
|
|
for (reg = 0; reg < _num_ovr_perf_regs; reg++) {
|
|
for (gpc = 0; gpc < num_gpc; gpc++) {
|
|
num_tpc = g->gr.gpc_tpc_count[gpc];
|
|
for (tpc = 0; tpc < num_tpc; tpc++) {
|
|
chk_addr = ((proj_gpc_stride_v() * gpc) +
|
|
(proj_tpc_in_gpc_stride_v() * tpc) +
|
|
_ovr_perf_regs[reg]);
|
|
if (chk_addr != addr)
|
|
continue;
|
|
/* reset the patch count from previous
|
|
runs,if ucode has already processed
|
|
it */
|
|
tmp = gk20a_mem_rd32(context +
|
|
ctxsw_prog_main_image_patch_count_o(), 0);
|
|
|
|
if (!tmp)
|
|
ch_ctx->patch_ctx.data_count = 0;
|
|
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx,
|
|
addr, data, true);
|
|
|
|
vaddr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va);
|
|
vaddr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va);
|
|
|
|
gk20a_mem_wr32(context +
|
|
ctxsw_prog_main_image_patch_count_o(),
|
|
0, ch_ctx->patch_ctx.data_count);
|
|
gk20a_mem_wr32(context +
|
|
ctxsw_prog_main_image_patch_adr_lo_o(),
|
|
0, vaddr_lo);
|
|
gk20a_mem_wr32(context +
|
|
ctxsw_prog_main_image_patch_adr_hi_o(),
|
|
0, vaddr_hi);
|
|
|
|
/* we're not caching these on cpu side,
|
|
but later watch for it */
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset)
|
|
{
|
|
u32 reg;
|
|
u32 quad_ctrl;
|
|
u32 half_ctrl;
|
|
u32 tpc, gpc;
|
|
u32 gpc_tpc_addr;
|
|
u32 gpc_tpc_stride;
|
|
|
|
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "offset=0x%x", offset);
|
|
|
|
gpc = pri_get_gpc_num(offset);
|
|
gpc_tpc_addr = pri_gpccs_addr_mask(offset);
|
|
tpc = pri_get_tpc_num(gpc_tpc_addr);
|
|
|
|
quad_ctrl = quad & 0x1; /* first bit tells us quad */
|
|
half_ctrl = (quad >> 1) & 0x1; /* second bit tells us half */
|
|
|
|
gpc_tpc_stride = gpc * proj_gpc_stride_v() +
|
|
tpc * proj_tpc_in_gpc_stride_v();
|
|
gpc_tpc_addr = gr_gpc0_tpc0_sm_halfctl_ctrl_r() + gpc_tpc_stride;
|
|
|
|
reg = gk20a_readl(g, gpc_tpc_addr);
|
|
reg = set_field(reg,
|
|
gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_m(),
|
|
gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_f(quad_ctrl));
|
|
|
|
gk20a_writel(g, gpc_tpc_addr, reg);
|
|
|
|
gpc_tpc_addr = gr_gpc0_tpc0_sm_debug_sfe_control_r() + gpc_tpc_stride;
|
|
reg = gk20a_readl(g, gpc_tpc_addr);
|
|
reg = set_field(reg,
|
|
gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_m(),
|
|
gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(half_ctrl));
|
|
gk20a_writel(g, gpc_tpc_addr, reg);
|
|
}
|
|
|
|
#define ILLEGAL_ID (~0)
|
|
|
|
static inline bool check_main_image_header_magic(void *context)
|
|
{
|
|
u32 magic = gk20a_mem_rd32(context +
|
|
ctxsw_prog_main_image_magic_value_o(), 0);
|
|
gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic);
|
|
return magic == ctxsw_prog_main_image_magic_value_v_value_v();
|
|
}
|
|
static inline bool check_local_header_magic(void *context)
|
|
{
|
|
u32 magic = gk20a_mem_rd32(context +
|
|
ctxsw_prog_local_magic_value_o(), 0);
|
|
gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic);
|
|
return magic == ctxsw_prog_local_magic_value_v_value_v();
|
|
|
|
}
|
|
|
|
/* most likely dupe of ctxsw_gpccs_header__size_1_v() */
|
|
static inline int ctxsw_prog_ucode_header_size_in_bytes(void)
|
|
{
|
|
return 256;
|
|
}
|
|
|
|
void gr_gk20a_get_sm_dsm_perf_regs(struct gk20a *g,
|
|
u32 *num_sm_dsm_perf_regs,
|
|
u32 **sm_dsm_perf_regs,
|
|
u32 *perf_register_stride)
|
|
{
|
|
*num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs;
|
|
*sm_dsm_perf_regs = _sm_dsm_perf_regs;
|
|
*perf_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v();
|
|
}
|
|
|
|
void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
|
|
u32 *num_sm_dsm_perf_ctrl_regs,
|
|
u32 **sm_dsm_perf_ctrl_regs,
|
|
u32 *ctrl_register_stride)
|
|
{
|
|
*num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs;
|
|
*sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs;
|
|
*ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
|
|
}
|
|
|
|
static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
|
|
u32 addr,
|
|
bool is_quad, u32 quad,
|
|
u32 *context_buffer,
|
|
u32 context_buffer_size,
|
|
u32 *priv_offset)
|
|
{
|
|
u32 i, data32;
|
|
u32 gpc_num, tpc_num;
|
|
u32 num_gpcs, num_tpcs;
|
|
u32 chk_addr;
|
|
u32 ext_priv_offset, ext_priv_size;
|
|
void *context;
|
|
u32 offset_to_segment, offset_to_segment_end;
|
|
u32 sm_dsm_perf_reg_id = ILLEGAL_ID;
|
|
u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID;
|
|
u32 num_ext_gpccs_ext_buffer_segments;
|
|
u32 inter_seg_offset;
|
|
u32 tpc_gpc_mask = (proj_tpc_in_gpc_stride_v() - 1);
|
|
u32 max_tpc_count;
|
|
u32 *sm_dsm_perf_ctrl_regs = NULL;
|
|
u32 num_sm_dsm_perf_ctrl_regs = 0;
|
|
u32 *sm_dsm_perf_regs = NULL;
|
|
u32 num_sm_dsm_perf_regs = 0;
|
|
u32 buffer_segments_size = 0;
|
|
u32 marker_size = 0;
|
|
u32 control_register_stride = 0;
|
|
u32 perf_register_stride = 0;
|
|
|
|
/* Only have TPC registers in extended region, so if not a TPC reg,
|
|
then return error so caller can look elsewhere. */
|
|
if (pri_is_gpc_addr(addr)) {
|
|
u32 gpc_addr = 0;
|
|
gpc_num = pri_get_gpc_num(addr);
|
|
gpc_addr = pri_gpccs_addr_mask(addr);
|
|
if (pri_is_tpc_addr(gpc_addr))
|
|
tpc_num = pri_get_tpc_num(gpc_addr);
|
|
else
|
|
return -EINVAL;
|
|
|
|
gk20a_dbg_info(" gpc = %d tpc = %d",
|
|
gpc_num, tpc_num);
|
|
} else
|
|
return -EINVAL;
|
|
|
|
buffer_segments_size = ctxsw_prog_extended_buffer_segments_size_in_bytes_v();
|
|
/* note below is in words/num_registers */
|
|
marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2;
|
|
|
|
context = context_buffer;
|
|
/* sanity check main header */
|
|
if (!check_main_image_header_magic(context)) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"Invalid main header: magic value");
|
|
return -EINVAL;
|
|
}
|
|
num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0);
|
|
if (gpc_num >= num_gpcs) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"GPC 0x%08x is greater than total count 0x%08x!\n",
|
|
gpc_num, num_gpcs);
|
|
return -EINVAL;
|
|
}
|
|
|
|
data32 = gk20a_mem_rd32(context + ctxsw_prog_main_extended_buffer_ctl_o(), 0);
|
|
ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32);
|
|
if (0 == ext_priv_size) {
|
|
gk20a_dbg_info(" No extended memory in context buffer");
|
|
return -EINVAL;
|
|
}
|
|
ext_priv_offset = ctxsw_prog_main_extended_buffer_ctl_offset_v(data32);
|
|
|
|
offset_to_segment = ext_priv_offset * ctxsw_prog_ucode_header_size_in_bytes();
|
|
offset_to_segment_end = offset_to_segment +
|
|
(ext_priv_size * buffer_segments_size);
|
|
|
|
/* check local header magic */
|
|
context += ctxsw_prog_ucode_header_size_in_bytes();
|
|
if (!check_local_header_magic(context)) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"Invalid local header: magic value\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* See if the incoming register address is in the first table of
|
|
* registers. We check this by decoding only the TPC addr portion.
|
|
* If we get a hit on the TPC bit, we then double check the address
|
|
* by computing it from the base gpc/tpc strides. Then make sure
|
|
* it is a real match.
|
|
*/
|
|
g->ops.gr.get_sm_dsm_perf_regs(g, &num_sm_dsm_perf_regs,
|
|
&sm_dsm_perf_regs,
|
|
&perf_register_stride);
|
|
|
|
init_sm_dsm_reg_info();
|
|
|
|
for (i = 0; i < num_sm_dsm_perf_regs; i++) {
|
|
if ((addr & tpc_gpc_mask) == (sm_dsm_perf_regs[i] & tpc_gpc_mask)) {
|
|
sm_dsm_perf_reg_id = i;
|
|
|
|
gk20a_dbg_info("register match: 0x%08x",
|
|
sm_dsm_perf_regs[i]);
|
|
|
|
chk_addr = (proj_gpc_base_v() +
|
|
(proj_gpc_stride_v() * gpc_num) +
|
|
proj_tpc_in_gpc_base_v() +
|
|
(proj_tpc_in_gpc_stride_v() * tpc_num) +
|
|
(sm_dsm_perf_regs[sm_dsm_perf_reg_id] & tpc_gpc_mask));
|
|
|
|
if (chk_addr != addr) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"Oops addr miss-match! : 0x%08x != 0x%08x\n",
|
|
addr, chk_addr);
|
|
return -EINVAL;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Didn't find reg in supported group 1.
|
|
* so try the second group now */
|
|
g->ops.gr.get_sm_dsm_perf_ctrl_regs(g, &num_sm_dsm_perf_ctrl_regs,
|
|
&sm_dsm_perf_ctrl_regs,
|
|
&control_register_stride);
|
|
|
|
if (ILLEGAL_ID == sm_dsm_perf_reg_id) {
|
|
for (i = 0; i < num_sm_dsm_perf_ctrl_regs; i++) {
|
|
if ((addr & tpc_gpc_mask) ==
|
|
(sm_dsm_perf_ctrl_regs[i] & tpc_gpc_mask)) {
|
|
sm_dsm_perf_ctrl_reg_id = i;
|
|
|
|
gk20a_dbg_info("register match: 0x%08x",
|
|
sm_dsm_perf_ctrl_regs[i]);
|
|
|
|
chk_addr = (proj_gpc_base_v() +
|
|
(proj_gpc_stride_v() * gpc_num) +
|
|
proj_tpc_in_gpc_base_v() +
|
|
(proj_tpc_in_gpc_stride_v() * tpc_num) +
|
|
(sm_dsm_perf_ctrl_regs[sm_dsm_perf_ctrl_reg_id] &
|
|
tpc_gpc_mask));
|
|
|
|
if (chk_addr != addr) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"Oops addr miss-match! : 0x%08x != 0x%08x\n",
|
|
addr, chk_addr);
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ((ILLEGAL_ID == sm_dsm_perf_ctrl_reg_id) &&
|
|
(ILLEGAL_ID == sm_dsm_perf_reg_id))
|
|
return -EINVAL;
|
|
|
|
/* Skip the FECS extended header, nothing there for us now. */
|
|
offset_to_segment += buffer_segments_size;
|
|
|
|
/* skip through the GPCCS extended headers until we get to the data for
|
|
* our GPC. The size of each gpc extended segment is enough to hold the
|
|
* max tpc count for the gpcs,in 256b chunks.
|
|
*/
|
|
|
|
max_tpc_count = proj_scal_litter_num_tpc_per_gpc_v();
|
|
|
|
num_ext_gpccs_ext_buffer_segments = (u32)((max_tpc_count + 1) / 2);
|
|
|
|
offset_to_segment += (num_ext_gpccs_ext_buffer_segments *
|
|
buffer_segments_size * gpc_num);
|
|
|
|
num_tpcs = g->gr.gpc_tpc_count[gpc_num];
|
|
|
|
/* skip the head marker to start with */
|
|
inter_seg_offset = marker_size;
|
|
|
|
if (ILLEGAL_ID != sm_dsm_perf_ctrl_reg_id) {
|
|
/* skip over control regs of TPC's before the one we want.
|
|
* then skip to the register in this tpc */
|
|
inter_seg_offset = inter_seg_offset +
|
|
(tpc_num * control_register_stride) +
|
|
sm_dsm_perf_ctrl_reg_id;
|
|
} else {
|
|
/* skip all the control registers */
|
|
inter_seg_offset = inter_seg_offset +
|
|
(num_tpcs * control_register_stride);
|
|
|
|
/* skip the marker between control and counter segments */
|
|
inter_seg_offset += marker_size;
|
|
|
|
/* skip over counter regs of TPCs before the one we want */
|
|
inter_seg_offset = inter_seg_offset +
|
|
(tpc_num * perf_register_stride) *
|
|
ctxsw_prog_extended_num_smpc_quadrants_v();
|
|
|
|
/* skip over the register for the quadrants we do not want.
|
|
* then skip to the register in this tpc */
|
|
inter_seg_offset = inter_seg_offset +
|
|
(perf_register_stride * quad) +
|
|
sm_dsm_perf_reg_id;
|
|
}
|
|
|
|
/* set the offset to the segment offset plus the inter segment offset to
|
|
* our register */
|
|
offset_to_segment += (inter_seg_offset * 4);
|
|
|
|
/* last sanity check: did we somehow compute an offset outside the
|
|
* extended buffer? */
|
|
if (offset_to_segment > offset_to_segment_end) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"Overflow ctxsw buffer! 0x%08x > 0x%08x\n",
|
|
offset_to_segment, offset_to_segment_end);
|
|
return -EINVAL;
|
|
}
|
|
|
|
*priv_offset = offset_to_segment;
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int
|
|
gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
|
|
int addr_type,/* enum ctxsw_addr_type */
|
|
u32 pri_addr,
|
|
u32 gpc_num, u32 num_tpcs,
|
|
u32 num_ppcs, u32 ppc_mask,
|
|
u32 *priv_offset)
|
|
{
|
|
u32 i;
|
|
u32 address, base_address;
|
|
u32 sys_offset, gpc_offset, tpc_offset, ppc_offset;
|
|
u32 ppc_num, tpc_num, tpc_addr, gpc_addr, ppc_addr;
|
|
struct aiv_gk20a *reg;
|
|
|
|
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr);
|
|
|
|
if (!g->gr.ctx_vars.valid)
|
|
return -EINVAL;
|
|
|
|
/* Process the SYS/BE segment. */
|
|
if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
|
|
(addr_type == CTXSW_ADDR_TYPE_BE)) {
|
|
for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.sys.count; i++) {
|
|
reg = &g->gr.ctx_vars.ctxsw_regs.sys.l[i];
|
|
address = reg->addr;
|
|
sys_offset = reg->index;
|
|
|
|
if (pri_addr == address) {
|
|
*priv_offset = sys_offset;
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Process the TPC segment. */
|
|
if (addr_type == CTXSW_ADDR_TYPE_TPC) {
|
|
for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) {
|
|
for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.tpc.count; i++) {
|
|
reg = &g->gr.ctx_vars.ctxsw_regs.tpc.l[i];
|
|
address = reg->addr;
|
|
tpc_addr = pri_tpccs_addr_mask(address);
|
|
base_address = proj_gpc_base_v() +
|
|
(gpc_num * proj_gpc_stride_v()) +
|
|
proj_tpc_in_gpc_base_v() +
|
|
(tpc_num * proj_tpc_in_gpc_stride_v());
|
|
address = base_address + tpc_addr;
|
|
/*
|
|
* The data for the TPCs is interleaved in the context buffer.
|
|
* Example with num_tpcs = 2
|
|
* 0 1 2 3 4 5 6 7 8 9 10 11 ...
|
|
* 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
|
|
*/
|
|
tpc_offset = (reg->index * num_tpcs) + (tpc_num * 4);
|
|
|
|
if (pri_addr == address) {
|
|
*priv_offset = tpc_offset;
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Process the PPC segment. */
|
|
if (addr_type == CTXSW_ADDR_TYPE_PPC) {
|
|
for (ppc_num = 0; ppc_num < num_ppcs; ppc_num++) {
|
|
for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.ppc.count; i++) {
|
|
reg = &g->gr.ctx_vars.ctxsw_regs.ppc.l[i];
|
|
address = reg->addr;
|
|
ppc_addr = pri_ppccs_addr_mask(address);
|
|
base_address = proj_gpc_base_v() +
|
|
(gpc_num * proj_gpc_stride_v()) +
|
|
proj_ppc_in_gpc_base_v() +
|
|
(ppc_num * proj_ppc_in_gpc_stride_v());
|
|
address = base_address + ppc_addr;
|
|
/*
|
|
* The data for the PPCs is interleaved in the context buffer.
|
|
* Example with numPpcs = 2
|
|
* 0 1 2 3 4 5 6 7 8 9 10 11 ...
|
|
* 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
|
|
*/
|
|
ppc_offset = (reg->index * num_ppcs) + (ppc_num * 4);
|
|
|
|
if (pri_addr == address) {
|
|
*priv_offset = ppc_offset;
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/* Process the GPC segment. */
|
|
if (addr_type == CTXSW_ADDR_TYPE_GPC) {
|
|
for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.gpc.count; i++) {
|
|
reg = &g->gr.ctx_vars.ctxsw_regs.gpc.l[i];
|
|
|
|
address = reg->addr;
|
|
gpc_addr = pri_gpccs_addr_mask(address);
|
|
gpc_offset = reg->index;
|
|
|
|
base_address = proj_gpc_base_v() +
|
|
(gpc_num * proj_gpc_stride_v());
|
|
address = base_address + gpc_addr;
|
|
|
|
if (pri_addr == address) {
|
|
*priv_offset = gpc_offset;
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
return -EINVAL;
|
|
}
|
|
|
|
static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
|
|
void *context,
|
|
u32 *num_ppcs, u32 *ppc_mask,
|
|
u32 *reg_ppc_count)
|
|
{
|
|
u32 data32;
|
|
u32 litter_num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v();
|
|
|
|
/*
|
|
* if there is only 1 PES_PER_GPC, then we put the PES registers
|
|
* in the GPC reglist, so we can't error out if ppc.count == 0
|
|
*/
|
|
if ((!g->gr.ctx_vars.valid) ||
|
|
((g->gr.ctx_vars.ctxsw_regs.ppc.count == 0) &&
|
|
(litter_num_pes_per_gpc > 1)))
|
|
return -EINVAL;
|
|
|
|
data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0);
|
|
|
|
*num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32);
|
|
*ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32);
|
|
|
|
*reg_ppc_count = g->gr.ctx_vars.ctxsw_regs.ppc.count;
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
* This function will return the 32 bit offset for a priv register if it is
|
|
* present in the context buffer.
|
|
*/
|
|
static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
|
|
u32 addr,
|
|
bool is_quad, u32 quad,
|
|
u32 *context_buffer,
|
|
u32 context_buffer_size,
|
|
u32 *priv_offset)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
u32 i, data32;
|
|
int err;
|
|
int addr_type; /*enum ctxsw_addr_type */
|
|
u32 broadcast_flags;
|
|
u32 gpc_num, tpc_num, ppc_num, be_num;
|
|
u32 num_gpcs, num_tpcs, num_ppcs;
|
|
u32 offset;
|
|
u32 sys_priv_offset, gpc_priv_offset;
|
|
u32 ppc_mask, reg_list_ppc_count;
|
|
void *context;
|
|
u32 offset_to_segment;
|
|
|
|
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
|
|
|
|
err = gr_gk20a_decode_priv_addr(g, addr, &addr_type,
|
|
&gpc_num, &tpc_num, &ppc_num, &be_num,
|
|
&broadcast_flags);
|
|
if (err)
|
|
return err;
|
|
|
|
context = context_buffer;
|
|
if (!check_main_image_header_magic(context)) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"Invalid main header: magic value");
|
|
return -EINVAL;
|
|
}
|
|
num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0);
|
|
|
|
/* Parse the FECS local header. */
|
|
context += ctxsw_prog_ucode_header_size_in_bytes();
|
|
if (!check_local_header_magic(context)) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"Invalid FECS local header: magic value\n");
|
|
return -EINVAL;
|
|
}
|
|
data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0);
|
|
sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
|
|
|
|
/* If found in Ext buffer, ok.
|
|
* If it failed and we expected to find it there (quad offset)
|
|
* then return the error. Otherwise continue on.
|
|
*/
|
|
err = gr_gk20a_find_priv_offset_in_ext_buffer(g,
|
|
addr, is_quad, quad, context_buffer,
|
|
context_buffer_size, priv_offset);
|
|
if (!err || (err && is_quad))
|
|
return err;
|
|
|
|
if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
|
|
(addr_type == CTXSW_ADDR_TYPE_BE)) {
|
|
/* Find the offset in the FECS segment. */
|
|
offset_to_segment = sys_priv_offset *
|
|
ctxsw_prog_ucode_header_size_in_bytes();
|
|
|
|
err = gr_gk20a_process_context_buffer_priv_segment(g,
|
|
addr_type, addr,
|
|
0, 0, 0, 0,
|
|
&offset);
|
|
if (err)
|
|
return err;
|
|
|
|
*priv_offset = (offset_to_segment + offset);
|
|
return 0;
|
|
}
|
|
|
|
if ((gpc_num + 1) > num_gpcs) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"GPC %d not in this context buffer.\n",
|
|
gpc_num);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Parse the GPCCS local header(s).*/
|
|
for (i = 0; i < num_gpcs; i++) {
|
|
context += ctxsw_prog_ucode_header_size_in_bytes();
|
|
if (!check_local_header_magic(context)) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"Invalid GPCCS local header: magic value\n");
|
|
return -EINVAL;
|
|
|
|
}
|
|
data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0);
|
|
gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
|
|
|
|
err = gr_gk20a_determine_ppc_configuration(g, context,
|
|
&num_ppcs, &ppc_mask,
|
|
®_list_ppc_count);
|
|
if (err)
|
|
return err;
|
|
|
|
num_tpcs = gk20a_mem_rd32(context + ctxsw_prog_local_image_num_tpcs_o(), 0);
|
|
|
|
if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"GPC %d TPC %d not in this context buffer.\n",
|
|
gpc_num, tpc_num);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Find the offset in the GPCCS segment.*/
|
|
if (i == gpc_num) {
|
|
offset_to_segment = gpc_priv_offset *
|
|
ctxsw_prog_ucode_header_size_in_bytes();
|
|
|
|
if (addr_type == CTXSW_ADDR_TYPE_TPC) {
|
|
/*reg = gr->ctx_vars.ctxsw_regs.tpc.l;*/
|
|
} else if (addr_type == CTXSW_ADDR_TYPE_PPC) {
|
|
/* The ucode stores TPC data before PPC data.
|
|
* Advance offset past TPC data to PPC data. */
|
|
offset_to_segment +=
|
|
((gr->ctx_vars.ctxsw_regs.tpc.count *
|
|
num_tpcs) << 2);
|
|
} else if (addr_type == CTXSW_ADDR_TYPE_GPC) {
|
|
/* The ucode stores TPC/PPC data before GPC data.
|
|
* Advance offset past TPC/PPC data to GPC data. */
|
|
/* note 1 PES_PER_GPC case */
|
|
u32 litter_num_pes_per_gpc =
|
|
proj_scal_litter_num_pes_per_gpc_v();
|
|
if (litter_num_pes_per_gpc > 1) {
|
|
offset_to_segment +=
|
|
(((gr->ctx_vars.ctxsw_regs.tpc.count *
|
|
num_tpcs) << 2) +
|
|
((reg_list_ppc_count * num_ppcs) << 2));
|
|
} else {
|
|
offset_to_segment +=
|
|
((gr->ctx_vars.ctxsw_regs.tpc.count *
|
|
num_tpcs) << 2);
|
|
}
|
|
} else {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
" Unknown address type.\n");
|
|
return -EINVAL;
|
|
}
|
|
err = gr_gk20a_process_context_buffer_priv_segment(g,
|
|
addr_type, addr,
|
|
i, num_tpcs,
|
|
num_ppcs, ppc_mask,
|
|
&offset);
|
|
if (err)
|
|
return -EINVAL;
|
|
|
|
*priv_offset = offset_to_segment + offset;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return -EINVAL;
|
|
}
|
|
|
|
|
|
int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
|
|
struct nvhost_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
|
|
u32 num_ctx_wr_ops, u32 num_ctx_rd_ops)
|
|
{
|
|
struct gk20a *g = ch->g;
|
|
struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
|
|
void *ctx_ptr = NULL;
|
|
int curr_gr_chid, curr_gr_ctx;
|
|
bool ch_is_curr_ctx, restart_gr_ctxsw = false;
|
|
u32 i, j, offset, v;
|
|
u32 max_offsets = proj_scal_litter_num_gpcs_v() *
|
|
proj_scal_litter_num_tpc_per_gpc_v();
|
|
u32 *offsets = NULL;
|
|
u32 *offset_addrs = NULL;
|
|
u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops};
|
|
int err, pass;
|
|
|
|
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d",
|
|
num_ctx_wr_ops, num_ctx_rd_ops);
|
|
|
|
/* disable channel switching.
|
|
* at that point the hardware state can be inspected to
|
|
* determine if the context we're interested in is current.
|
|
*/
|
|
err = gr_gk20a_disable_ctxsw(g);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(g), "unable to stop gr ctxsw");
|
|
/* this should probably be ctx-fatal... */
|
|
goto cleanup;
|
|
}
|
|
|
|
restart_gr_ctxsw = true;
|
|
|
|
curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
|
|
curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx);
|
|
ch_is_curr_ctx = (curr_gr_chid != -1) && (ch->hw_chid == curr_gr_chid);
|
|
|
|
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx);
|
|
if (ch_is_curr_ctx) {
|
|
for (pass = 0; pass < 2; pass++) {
|
|
ctx_op_nr = 0;
|
|
for (i = 0; (ctx_op_nr < num_ctx_ops[pass]) && (i < num_ops); ++i) {
|
|
/* only do ctx ops and only on the right pass */
|
|
if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) ||
|
|
(((pass == 0) && reg_op_is_read(ctx_ops[i].op)) ||
|
|
((pass == 1) && !reg_op_is_read(ctx_ops[i].op))))
|
|
continue;
|
|
|
|
/* if this is a quad access, setup for special access*/
|
|
if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD)
|
|
&& g->ops.gr.access_smpc_reg)
|
|
g->ops.gr.access_smpc_reg(g,
|
|
ctx_ops[i].quad,
|
|
ctx_ops[i].offset);
|
|
offset = ctx_ops[i].offset;
|
|
|
|
if (pass == 0) { /* write pass */
|
|
v = gk20a_readl(g, offset);
|
|
v &= ~ctx_ops[i].and_n_mask_lo;
|
|
v |= ctx_ops[i].value_lo;
|
|
gk20a_writel(g, offset, v);
|
|
|
|
gk20a_dbg(gpu_dbg_gpu_dbg,
|
|
"direct wr: offset=0x%x v=0x%x",
|
|
offset, v);
|
|
|
|
if (ctx_ops[i].op == REGOP(WRITE_64)) {
|
|
v = gk20a_readl(g, offset + 4);
|
|
v &= ~ctx_ops[i].and_n_mask_hi;
|
|
v |= ctx_ops[i].value_hi;
|
|
gk20a_writel(g, offset + 4, v);
|
|
|
|
gk20a_dbg(gpu_dbg_gpu_dbg,
|
|
"direct wr: offset=0x%x v=0x%x",
|
|
offset + 4, v);
|
|
}
|
|
|
|
} else { /* read pass */
|
|
ctx_ops[i].value_lo =
|
|
gk20a_readl(g, offset);
|
|
|
|
gk20a_dbg(gpu_dbg_gpu_dbg,
|
|
"direct rd: offset=0x%x v=0x%x",
|
|
offset, ctx_ops[i].value_lo);
|
|
|
|
if (ctx_ops[i].op == REGOP(READ_64)) {
|
|
ctx_ops[i].value_hi =
|
|
gk20a_readl(g, offset + 4);
|
|
|
|
gk20a_dbg(gpu_dbg_gpu_dbg,
|
|
"direct rd: offset=0x%x v=0x%x",
|
|
offset, ctx_ops[i].value_lo);
|
|
} else
|
|
ctx_ops[i].value_hi = 0;
|
|
}
|
|
ctx_op_nr++;
|
|
}
|
|
}
|
|
goto cleanup;
|
|
}
|
|
|
|
/* they're the same size, so just use one alloc for both */
|
|
offsets = kzalloc(2 * sizeof(u32) * max_offsets, GFP_KERNEL);
|
|
if (!offsets) {
|
|
err = -ENOMEM;
|
|
goto cleanup;
|
|
}
|
|
offset_addrs = offsets + max_offsets;
|
|
|
|
/* would have been a variant of gr_gk20a_apply_instmem_overrides */
|
|
/* recoded in-place instead.*/
|
|
ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
|
|
PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
|
|
0, pgprot_dmacoherent(PAGE_KERNEL));
|
|
if (!ctx_ptr) {
|
|
err = -ENOMEM;
|
|
goto cleanup;
|
|
}
|
|
|
|
gk20a_mm_l2_flush(g, true);
|
|
|
|
/* write to appropriate place in context image,
|
|
* first have to figure out where that really is */
|
|
|
|
/* first pass is writes, second reads */
|
|
for (pass = 0; pass < 2; pass++) {
|
|
ctx_op_nr = 0;
|
|
for (i = 0; (ctx_op_nr < num_ctx_ops[pass]) && (i < num_ops); ++i) {
|
|
u32 num_offsets;
|
|
|
|
/* only do ctx ops and only on the right pass */
|
|
if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) ||
|
|
(((pass == 0) && reg_op_is_read(ctx_ops[i].op)) ||
|
|
((pass == 1) && !reg_op_is_read(ctx_ops[i].op))))
|
|
continue;
|
|
|
|
err = gr_gk20a_get_ctx_buffer_offsets(g,
|
|
ctx_ops[i].offset,
|
|
max_offsets,
|
|
offsets, offset_addrs,
|
|
&num_offsets,
|
|
ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD),
|
|
ctx_ops[i].quad);
|
|
if (err) {
|
|
gk20a_dbg(gpu_dbg_gpu_dbg,
|
|
"ctx op invalid offset: offset=0x%x",
|
|
ctx_ops[i].offset);
|
|
ctx_ops[i].status =
|
|
NVHOST_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET;
|
|
continue;
|
|
}
|
|
|
|
/* if this is a quad access, setup for special access*/
|
|
if (ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD) &&
|
|
g->ops.gr.access_smpc_reg)
|
|
g->ops.gr.access_smpc_reg(g, ctx_ops[i].quad,
|
|
ctx_ops[i].offset);
|
|
|
|
for (j = 0; j < num_offsets; j++) {
|
|
/* sanity check, don't write outside, worst case */
|
|
if (offsets[j] >= g->gr.ctx_vars.golden_image_size)
|
|
continue;
|
|
if (pass == 0) { /* write pass */
|
|
v = gk20a_mem_rd32(ctx_ptr + offsets[j], 0);
|
|
v &= ~ctx_ops[i].and_n_mask_lo;
|
|
v |= ctx_ops[i].value_lo;
|
|
gk20a_mem_wr32(ctx_ptr + offsets[j], 0, v);
|
|
|
|
gk20a_dbg(gpu_dbg_gpu_dbg,
|
|
"context wr: offset=0x%x v=0x%x",
|
|
offsets[j], v);
|
|
|
|
if (ctx_ops[i].op == REGOP(WRITE_64)) {
|
|
v = gk20a_mem_rd32(ctx_ptr + offsets[j] + 4, 0);
|
|
v &= ~ctx_ops[i].and_n_mask_hi;
|
|
v |= ctx_ops[i].value_hi;
|
|
gk20a_mem_wr32(ctx_ptr + offsets[j] + 4, 0, v);
|
|
|
|
gk20a_dbg(gpu_dbg_gpu_dbg,
|
|
"context wr: offset=0x%x v=0x%x",
|
|
offsets[j] + 4, v);
|
|
}
|
|
|
|
/* check to see if we need to add a special WAR
|
|
for some of the SMPC perf regs */
|
|
gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j],
|
|
v, ctx_ptr);
|
|
|
|
} else { /* read pass */
|
|
ctx_ops[i].value_lo =
|
|
gk20a_mem_rd32(ctx_ptr + offsets[0], 0);
|
|
|
|
gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x",
|
|
offsets[0], ctx_ops[i].value_lo);
|
|
|
|
if (ctx_ops[i].op == REGOP(READ_64)) {
|
|
ctx_ops[i].value_hi =
|
|
gk20a_mem_rd32(ctx_ptr + offsets[0] + 4, 0);
|
|
|
|
gk20a_dbg(gpu_dbg_gpu_dbg,
|
|
"context rd: offset=0x%x v=0x%x",
|
|
offsets[0] + 4, ctx_ops[i].value_hi);
|
|
} else
|
|
ctx_ops[i].value_hi = 0;
|
|
}
|
|
}
|
|
ctx_op_nr++;
|
|
}
|
|
}
|
|
#if 0
|
|
/* flush cpu caches for the ctx buffer? only if cpu cached, of course.
|
|
* they aren't, yet */
|
|
if (cached) {
|
|
FLUSH_CPU_DCACHE(ctx_ptr,
|
|
sg_phys(ch_ctx->gr_ctx.mem.ref), size);
|
|
}
|
|
#endif
|
|
|
|
cleanup:
|
|
if (offsets)
|
|
kfree(offsets);
|
|
|
|
if (ctx_ptr)
|
|
vunmap(ctx_ptr);
|
|
|
|
if (restart_gr_ctxsw) {
|
|
int tmp_err = gr_gk20a_enable_ctxsw(g);
|
|
if (tmp_err) {
|
|
gk20a_err(dev_from_gk20a(g), "unable to restart ctxsw!\n");
|
|
err = tmp_err;
|
|
}
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
static void gr_gk20a_cb_size_default(struct gk20a *g)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
|
|
gr->attrib_cb_default_size =
|
|
gr_gpc0_ppc0_cbm_cfg_size_default_v();
|
|
gr->alpha_cb_default_size =
|
|
gr_gpc0_ppc0_cbm_cfg2_size_default_v();
|
|
}
|
|
|
|
static int gr_gk20a_calc_global_ctx_buffer_size(struct gk20a *g)
|
|
{
|
|
struct gr_gk20a *gr = &g->gr;
|
|
int size;
|
|
|
|
gr->attrib_cb_size = gr->attrib_cb_default_size;
|
|
gr->alpha_cb_size = gr->alpha_cb_default_size
|
|
+ (gr->alpha_cb_default_size >> 1);
|
|
|
|
size = gr->attrib_cb_size *
|
|
gr_gpc0_ppc0_cbm_cfg_size_granularity_v() *
|
|
gr->max_tpc_count;
|
|
|
|
size += gr->alpha_cb_size *
|
|
gr_gpc0_ppc0_cbm_cfg2_size_granularity_v() *
|
|
gr->max_tpc_count;
|
|
|
|
return size;
|
|
}
|
|
|
|
void gr_gk20a_commit_global_pagepool(struct gk20a *g,
|
|
struct channel_ctx_gk20a *ch_ctx,
|
|
u64 addr, u32 size, bool patch)
|
|
{
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(),
|
|
gr_scc_pagepool_base_addr_39_8_f(addr), patch);
|
|
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(),
|
|
gr_scc_pagepool_total_pages_f(size) |
|
|
gr_scc_pagepool_valid_true_f(), patch);
|
|
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(),
|
|
gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
|
|
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(),
|
|
gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
|
|
|
|
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_pagepool_r(),
|
|
gr_pd_pagepool_total_pages_f(size) |
|
|
gr_pd_pagepool_valid_true_f(), patch);
|
|
}
|
|
|
|
void gk20a_init_gr(struct gk20a *g)
|
|
{
|
|
init_waitqueue_head(&g->gr.init_wq);
|
|
}
|
|
|
|
void gk20a_init_gr_ops(struct gpu_ops *gops)
|
|
{
|
|
gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg;
|
|
gops->gr.bundle_cb_defaults = gr_gk20a_bundle_cb_defaults;
|
|
gops->gr.cb_size_default = gr_gk20a_cb_size_default;
|
|
gops->gr.calc_global_ctx_buffer_size =
|
|
gr_gk20a_calc_global_ctx_buffer_size;
|
|
gops->gr.commit_global_attrib_cb = gr_gk20a_commit_global_attrib_cb;
|
|
gops->gr.commit_global_bundle_cb = gr_gk20a_commit_global_bundle_cb;
|
|
gops->gr.commit_global_cb_manager = gr_gk20a_commit_global_cb_manager;
|
|
gops->gr.commit_global_pagepool = gr_gk20a_commit_global_pagepool;
|
|
gops->gr.handle_sw_method = gr_gk20a_handle_sw_method;
|
|
gops->gr.set_alpha_circular_buffer_size =
|
|
gk20a_gr_set_circular_buffer_size;
|
|
gops->gr.set_circular_buffer_size =
|
|
gk20a_gr_set_alpha_circular_buffer_size;
|
|
gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions;
|
|
gops->gr.is_valid_class = gr_gk20a_is_valid_class;
|
|
gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs;
|
|
gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs;
|
|
gops->gr.init_fs_state = gr_gk20a_ctx_state_floorsweep;
|
|
gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask;
|
|
gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables;
|
|
gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments;
|
|
}
|