gpu: nvgpu: support for hwpm context switching

Add support for hwpm context switching

Bug 1648200

Change-Id: I482899bf165cd2ef24bb8617be16df01218e462f
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1120450
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Peter Daifuku
2016-03-09 19:10:20 -08:00
committed by Terje Bergstrom
parent 6675c03603
commit 37155b65f1
12 changed files with 849 additions and 65 deletions

View File

@@ -54,6 +54,7 @@ struct channel_ctx_gk20a {
struct gr_ctx_desc *gr_ctx; struct gr_ctx_desc *gr_ctx;
struct patch_desc patch_ctx; struct patch_desc patch_ctx;
struct zcull_ctx_desc zcull_ctx; struct zcull_ctx_desc zcull_ctx;
struct pm_ctx_desc pm_ctx;
u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA]; u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA];
u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA]; u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA];
bool global_ctx_buffer_mapped; bool global_ctx_buffer_mapped;

View File

@@ -457,6 +457,9 @@ static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args); struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args);
static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args);
static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
struct dbg_session_gk20a *dbg_s, struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args);
@@ -582,6 +585,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
(struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *)buf); (struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *)buf);
break; break;
case NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE:
err = nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(dbg_s,
(struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *)buf);
break;
case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS: case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS:
err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s, err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s,
(struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf); (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf);
@@ -880,7 +888,7 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
ch_gk20a = dbg_s->ch; ch_gk20a = dbg_s->ch;
if (!ch_gk20a) { if (!ch_gk20a) {
gk20a_err(dev_from_gk20a(dbg_s->g), gk20a_err(dev_from_gk20a(g),
"no bound channel for smpc ctxsw mode update\n"); "no bound channel for smpc ctxsw mode update\n");
err = -EINVAL; err = -EINVAL;
goto clean_up; goto clean_up;
@@ -889,13 +897,48 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a, err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a,
args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW); args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW);
if (err) { if (err) {
gk20a_err(dev_from_gk20a(dbg_s->g), gk20a_err(dev_from_gk20a(g),
"error (%d) during smpc ctxsw mode update\n", err); "error (%d) during smpc ctxsw mode update\n", err);
goto clean_up; goto clean_up;
} }
err = g->ops.regops.apply_smpc_war(dbg_s); err = g->ops.regops.apply_smpc_war(dbg_s);
clean_up:
mutex_unlock(&g->dbg_sessions_lock);
return err;
}
static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args)
{
int err;
struct gk20a *g = get_gk20a(dbg_s->pdev);
struct channel_gk20a *ch_gk20a;
gk20a_dbg_fn("%s pm ctxsw mode = %d",
dev_name(dbg_s->dev), args->mode);
/* Take the global lock, since we'll be doing global regops */
mutex_lock(&g->dbg_sessions_lock);
ch_gk20a = dbg_s->ch;
if (!ch_gk20a) {
gk20a_err(dev_from_gk20a(g),
"no bound channel for pm ctxsw mode update\n");
err = -EINVAL;
goto clean_up;
}
err = gr_gk20a_update_hwpm_ctxsw_mode(g, ch_gk20a,
args->mode == NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW);
if (err)
gk20a_err(dev_from_gk20a(g),
"error (%d) during pm ctxsw mode update\n", err);
/* gk20a would require a WAR to set the core PM_ENABLE bit, not
* added here with gk20a being deprecated
*/
clean_up: clean_up:
mutex_unlock(&g->dbg_sessions_lock); mutex_unlock(&g->dbg_sessions_lock);
return err; return err;

View File

@@ -3,7 +3,7 @@
* *
* GK20A Graphics Context * GK20A Graphics Context
* *
* Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License, * under the terms and conditions of the GNU General Public License,
@@ -281,7 +281,60 @@ static int gr_gk20a_init_ctx_vars_fw(struct gk20a *g, struct gr_gk20a *gr)
netlist_num); netlist_num);
break; break;
case NETLIST_REGIONID_CTXREG_PMPPC: case NETLIST_REGIONID_CTXREG_PMPPC:
gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMPPC skipped"); gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMPPC");
err = gr_gk20a_alloc_load_netlist_aiv(
src, size, &g->gr.ctx_vars.ctxsw_regs.pm_ppc);
if (err)
goto clean_up;
break;
case NETLIST_REGIONID_NVPERF_CTXREG_SYS:
gk20a_dbg_info("NETLIST_REGIONID_NVPERF_CTXREG_SYS");
err = gr_gk20a_alloc_load_netlist_aiv(
src, size, &g->gr.ctx_vars.ctxsw_regs.perf_sys);
if (err)
goto clean_up;
break;
case NETLIST_REGIONID_NVPERF_FBP_CTXREGS:
gk20a_dbg_info("NETLIST_REGIONID_NVPERF_FBP_CTXREGS");
err = gr_gk20a_alloc_load_netlist_aiv(
src, size, &g->gr.ctx_vars.ctxsw_regs.fbp);
if (err)
goto clean_up;
break;
case NETLIST_REGIONID_NVPERF_CTXREG_GPC:
gk20a_dbg_info("NETLIST_REGIONID_NVPERF_CTXREG_GPC");
err = gr_gk20a_alloc_load_netlist_aiv(
src, size, &g->gr.ctx_vars.ctxsw_regs.perf_gpc);
if (err)
goto clean_up;
break;
case NETLIST_REGIONID_NVPERF_FBP_ROUTER:
gk20a_dbg_info("NETLIST_REGIONID_NVPERF_FBP_ROUTER");
err = gr_gk20a_alloc_load_netlist_aiv(
src, size, &g->gr.ctx_vars.ctxsw_regs.fbp_router);
if (err)
goto clean_up;
break;
case NETLIST_REGIONID_NVPERF_GPC_ROUTER:
gk20a_dbg_info("NETLIST_REGIONID_NVPERF_GPC_ROUTER");
err = gr_gk20a_alloc_load_netlist_aiv(
src, size, &g->gr.ctx_vars.ctxsw_regs.gpc_router);
if (err)
goto clean_up;
break;
case NETLIST_REGIONID_CTXREG_PMLTC:
gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMLTC");
err = gr_gk20a_alloc_load_netlist_aiv(
src, size, &g->gr.ctx_vars.ctxsw_regs.pm_ltc);
if (err)
goto clean_up;
break;
case NETLIST_REGIONID_CTXREG_PMFBPA:
gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMFBPA");
err = gr_gk20a_alloc_load_netlist_aiv(
src, size, &g->gr.ctx_vars.ctxsw_regs.pm_fbpa);
if (err)
goto clean_up;
break; break;
default: default:
gk20a_dbg_info("unrecognized region %d skipped", i); gk20a_dbg_info("unrecognized region %d skipped", i);
@@ -319,6 +372,14 @@ clean_up:
kfree(g->gr.ctx_vars.ctxsw_regs.pm_sys.l); kfree(g->gr.ctx_vars.ctxsw_regs.pm_sys.l);
kfree(g->gr.ctx_vars.ctxsw_regs.pm_gpc.l); kfree(g->gr.ctx_vars.ctxsw_regs.pm_gpc.l);
kfree(g->gr.ctx_vars.ctxsw_regs.pm_tpc.l); kfree(g->gr.ctx_vars.ctxsw_regs.pm_tpc.l);
kfree(g->gr.ctx_vars.ctxsw_regs.pm_ppc.l);
kfree(g->gr.ctx_vars.ctxsw_regs.perf_sys.l);
kfree(g->gr.ctx_vars.ctxsw_regs.fbp.l);
kfree(g->gr.ctx_vars.ctxsw_regs.perf_gpc.l);
kfree(g->gr.ctx_vars.ctxsw_regs.fbp_router.l);
kfree(g->gr.ctx_vars.ctxsw_regs.gpc_router.l);
kfree(g->gr.ctx_vars.ctxsw_regs.pm_ltc.l);
kfree(g->gr.ctx_vars.ctxsw_regs.pm_fbpa.l);
release_firmware(netlist_fw); release_firmware(netlist_fw);
err = -ENOENT; err = -ENOENT;
} }

View File

@@ -1,7 +1,7 @@
/* /*
* GK20A Graphics Context * GK20A Graphics Context
* *
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License, * under the terms and conditions of the GNU General Public License,
@@ -75,6 +75,13 @@ union __max_name {
#define NETLIST_REGIONID_NETLIST_NUM 18 #define NETLIST_REGIONID_NETLIST_NUM 18
#define NETLIST_REGIONID_CTXREG_PPC 19 #define NETLIST_REGIONID_CTXREG_PPC 19
#define NETLIST_REGIONID_CTXREG_PMPPC 20 #define NETLIST_REGIONID_CTXREG_PMPPC 20
#define NETLIST_REGIONID_NVPERF_CTXREG_SYS 21
#define NETLIST_REGIONID_NVPERF_FBP_CTXREGS 22
#define NETLIST_REGIONID_NVPERF_CTXREG_GPC 23
#define NETLIST_REGIONID_NVPERF_FBP_ROUTER 24
#define NETLIST_REGIONID_NVPERF_GPC_ROUTER 25
#define NETLIST_REGIONID_CTXREG_PMLTC 26
#define NETLIST_REGIONID_CTXREG_PMFBPA 27
struct netlist_region { struct netlist_region {
u32 region_id; u32 region_id;
@@ -114,6 +121,11 @@ struct u32_list_gk20a {
u32 count; u32 count;
}; };
struct ctxsw_buf_offset_map_entry {
u32 addr; /* Register address */
u32 offset; /* Offset in ctxt switch buffer */
};
static inline static inline
struct av_gk20a *alloc_av_list_gk20a(struct av_list_gk20a *avl) struct av_gk20a *alloc_av_list_gk20a(struct av_list_gk20a *avl)
{ {

View File

@@ -27,6 +27,8 @@
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <linux/firmware.h> #include <linux/firmware.h>
#include <linux/nvhost.h> #include <linux/nvhost.h>
#include <linux/sort.h>
#include <linux/bsearch.h>
#include <trace/events/gk20a.h> #include <trace/events/gk20a.h>
#include "gk20a.h" #include "gk20a.h"
@@ -59,6 +61,10 @@
#include "ctxsw_trace_gk20a.h" #include "ctxsw_trace_gk20a.h"
#define BLK_SIZE (256) #define BLK_SIZE (256)
#define NV_PMM_FBP_STRIDE 0x1000
#define NV_PERF_PMM_FBP_ROUTER_STRIDE 0x0200
#define NV_PERF_PMMGPC_CHIPLET_OFFSET 0x1000
#define NV_PERF_PMMGPCROUTER_STRIDE 0x0200
static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g); static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g);
static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va); static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
@@ -1591,9 +1597,17 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
u32 data; u32 data;
int ret; int ret;
gk20a_dbg_fn("");
if (!ch_ctx->gr_ctx) {
gk20a_err(dev_from_gk20a(g), "no graphics context allocated");
return -EFAULT;
}
c->g->ops.fifo.disable_channel(c); c->g->ops.fifo.disable_channel(c);
ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid); ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid);
if (ret) { if (ret) {
c->g->ops.fifo.enable_channel(c);
gk20a_err(dev_from_gk20a(g), gk20a_err(dev_from_gk20a(g),
"failed to preempt channel\n"); "failed to preempt channel\n");
return ret; return ret;
@@ -1603,11 +1617,18 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
Flush and invalidate before cpu update. */ Flush and invalidate before cpu update. */
g->ops.mm.l2_flush(g, true); g->ops.mm.l2_flush(g, true);
if (!ch_ctx->gr_ctx) {
gk20a_err(dev_from_gk20a(g), "no graphics context allocated");
return -EFAULT;
}
ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages,
PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
0, pgprot_writecombine(PAGE_KERNEL)); 0, pgprot_writecombine(PAGE_KERNEL));
if (!ctx_ptr) if (!ctx_ptr) {
c->g->ops.fifo.enable_channel(c);
return -ENOMEM; return -ENOMEM;
}
data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0);
data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
@@ -1620,13 +1641,137 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
vunmap(ctx_ptr); vunmap(ctx_ptr);
/* enable channel */ /* enable channel */
gk20a_writel(c->g, ccsr_channel_r(c->hw_chid), c->g->ops.fifo.enable_channel(c);
gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
ccsr_channel_enable_set_true_f());
return 0; return 0;
} }
int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
struct channel_gk20a *c,
bool enable_hwpm_ctxsw)
{
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
void *ctx_ptr = NULL;
void *pm_ctx_ptr;
u32 data, virt_addr;
int ret;
gk20a_dbg_fn("");
if (!ch_ctx->gr_ctx) {
gk20a_err(dev_from_gk20a(g), "no graphics context allocated");
return -EFAULT;
}
if (enable_hwpm_ctxsw) {
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f())
return 0;
} else {
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f())
return 0;
}
c->g->ops.fifo.disable_channel(c);
ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid);
if (ret) {
c->g->ops.fifo.enable_channel(c);
gk20a_err(dev_from_gk20a(g),
"failed to preempt channel\n");
return ret;
}
/* Channel gr_ctx buffer is gpu cacheable.
Flush and invalidate before cpu update. */
g->ops.mm.l2_flush(g, true);
if (enable_hwpm_ctxsw) {
/* Allocate buffer if necessary */
if (pm_ctx->mem.gpu_va == 0) {
ret = gk20a_gmmu_alloc_attr(g, DMA_ATTR_NO_KERNEL_MAPPING,
g->gr.ctx_vars.pm_ctxsw_image_size,
&pm_ctx->mem);
if (ret) {
c->g->ops.fifo.enable_channel(c);
gk20a_err(dev_from_gk20a(g),
"failed to allocate pm ctxt buffer");
return ret;
}
pm_ctx->mem.gpu_va = gk20a_gmmu_map(c->vm,
&pm_ctx->mem.sgt,
pm_ctx->mem.size,
NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
gk20a_mem_flag_none, true);
if (!pm_ctx->mem.gpu_va) {
gk20a_err(dev_from_gk20a(g),
"failed to map pm ctxt buffer");
gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING,
&pm_ctx->mem);
c->g->ops.fifo.enable_channel(c);
return -ENOMEM;
}
}
/* Now clear the buffer */
pm_ctx_ptr = vmap(pm_ctx->mem.pages,
PAGE_ALIGN(pm_ctx->mem.size) >> PAGE_SHIFT,
0, pgprot_writecombine(PAGE_KERNEL));
if (!pm_ctx_ptr) {
ret = -ENOMEM;
goto cleanup_pm_buf;
}
memset(pm_ctx_ptr, 0, pm_ctx->mem.size);
vunmap(pm_ctx_ptr);
}
ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages,
PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
0, pgprot_writecombine(PAGE_KERNEL));
if (!ctx_ptr) {
ret = -ENOMEM;
goto cleanup_pm_buf;
}
data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0);
data = data & ~ctxsw_prog_main_image_pm_mode_m();
if (enable_hwpm_ctxsw) {
pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f();
/* pack upper 32 bits of virtual address into a 32 bit number
* (256 byte boundary)
*/
virt_addr = (u32)(pm_ctx->mem.gpu_va >> 8);
} else {
pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
virt_addr = 0;
}
data |= pm_ctx->pm_mode;
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data);
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr);
vunmap(ctx_ptr);
/* enable channel */
c->g->ops.fifo.enable_channel(c);
return 0;
cleanup_pm_buf:
gk20a_gmmu_unmap(c->vm, pm_ctx->mem.gpu_va, pm_ctx->mem.size,
gk20a_mem_flag_none);
gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &pm_ctx->mem);
memset(&pm_ctx->mem, 0, sizeof(struct mem_desc));
c->g->ops.fifo.enable_channel(c);
return ret;
}
/* load saved fresh copy of gloden image into channel gr_ctx */ /* load saved fresh copy of gloden image into channel gr_ctx */
int gr_gk20a_load_golden_ctx_image(struct gk20a *g, int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
struct channel_gk20a *c) struct channel_gk20a *c)
@@ -1635,6 +1780,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
u32 virt_addr_lo; u32 virt_addr_lo;
u32 virt_addr_hi; u32 virt_addr_hi;
u32 virt_addr = 0;
u32 i, v, data; u32 i, v, data;
int ret = 0; int ret = 0;
void *ctx_ptr = NULL; void *ctx_ptr = NULL;
@@ -1663,15 +1809,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0);
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0);
/* no user for client managed performance counter ctx */
data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0);
data = data & ~ctxsw_prog_main_image_pm_mode_m();
data |= ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0,
data);
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, 0);
/* set priv access map */ /* set priv access map */
virt_addr_lo = virt_addr_lo =
u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
@@ -1708,6 +1845,32 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0, gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0,
virt_addr_hi); virt_addr_hi);
/* Update main header region of the context buffer with the info needed
* for PM context switching, including mode and possibly a pointer to
* the PM backing store.
*/
if (ch_ctx->pm_ctx.pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
if (ch_ctx->pm_ctx.mem.gpu_va == 0) {
gk20a_err(dev_from_gk20a(g),
"context switched pm with no pm buffer!");
vunmap(ctx_ptr);
return -EFAULT;
}
/* pack upper 32 bits of virtual address into a 32 bit number
* (256 byte boundary)
*/
virt_addr = (u32)(ch_ctx->pm_ctx.mem.gpu_va >> 8);
} else
virt_addr = 0;
data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0);
data = data & ~ctxsw_prog_main_image_pm_mode_m();
data |= ch_ctx->pm_ctx.pm_mode;
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data);
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr);
vunmap(ctx_ptr); vunmap(ctx_ptr);
if (tegra_platform_is_linsim()) { if (tegra_platform_is_linsim()) {
@@ -2205,7 +2368,6 @@ static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g)
int gr_gk20a_init_ctx_state(struct gk20a *g) int gr_gk20a_init_ctx_state(struct gk20a *g)
{ {
u32 pm_ctx_image_size;
u32 ret; u32 ret;
struct fecs_method_op_gk20a op = { struct fecs_method_op_gk20a op = {
.mailbox = { .id = 0, .data = 0, .mailbox = { .id = 0, .data = 0,
@@ -2237,7 +2399,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g)
} }
op.method.addr = op.method.addr =
gr_fecs_method_push_adr_discover_pm_image_size_v(); gr_fecs_method_push_adr_discover_pm_image_size_v();
op.mailbox.ret = &pm_ctx_image_size; op.mailbox.ret = &g->gr.ctx_vars.pm_ctxsw_image_size;
ret = gr_gk20a_submit_fecs_method_op(g, op, false); ret = gr_gk20a_submit_fecs_method_op(g, op, false);
if (ret) { if (ret) {
gk20a_err(dev_from_gk20a(g), gk20a_err(dev_from_gk20a(g),
@@ -2641,14 +2803,30 @@ static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c)
patch_ctx->data_count = 0; patch_ctx->data_count = 0;
} }
static void gr_gk20a_free_channel_pm_ctx(struct channel_gk20a *c)
{
struct pm_ctx_desc *pm_ctx = &c->ch_ctx.pm_ctx;
struct gk20a *g = c->g;
gk20a_dbg_fn("");
if (pm_ctx->mem.gpu_va) {
gk20a_gmmu_unmap(c->vm, pm_ctx->mem.gpu_va,
pm_ctx->mem.size, gk20a_mem_flag_none);
gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &pm_ctx->mem);
}
}
void gk20a_free_channel_ctx(struct channel_gk20a *c) void gk20a_free_channel_ctx(struct channel_gk20a *c)
{ {
gr_gk20a_unmap_global_ctx_buffers(c); gr_gk20a_unmap_global_ctx_buffers(c);
gr_gk20a_free_channel_patch_ctx(c); gr_gk20a_free_channel_patch_ctx(c);
gr_gk20a_free_channel_pm_ctx(c);
if (!gk20a_is_channel_marked_as_tsg(c)) if (!gk20a_is_channel_marked_as_tsg(c))
gr_gk20a_free_channel_gr_ctx(c); gr_gk20a_free_channel_gr_ctx(c);
/* zcull_ctx, pm_ctx */ /* zcull_ctx */
memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a));
@@ -2743,6 +2921,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
ch_ctx->gr_ctx = tsg->tsg_gr_ctx; ch_ctx->gr_ctx = tsg->tsg_gr_ctx;
} }
/* PM ctxt switch is off by default */
ch_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
/* commit gr ctx buffer */ /* commit gr ctx buffer */
err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va);
if (err) { if (err) {
@@ -2983,6 +3164,10 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
kfree(gr->ctx_vars.local_golden_image); kfree(gr->ctx_vars.local_golden_image);
gr->ctx_vars.local_golden_image = NULL; gr->ctx_vars.local_golden_image = NULL;
if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map)
nvgpu_free(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map);
gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
gk20a_comptag_allocator_destroy(&gr->comp_tags); gk20a_comptag_allocator_destroy(&gr->comp_tags);
} }
@@ -5828,6 +6013,10 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
u32 context_buffer_size, u32 context_buffer_size,
u32 *priv_offset); u32 *priv_offset);
static int gr_gk20a_find_priv_offset_in_pm_buffer(struct gk20a *g,
u32 addr,
u32 *priv_offset);
/* This function will decode a priv address and return the partition type and numbers. */ /* This function will decode a priv address and return the partition type and numbers. */
static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
int *addr_type, /* enum ctxsw_addr_type */ int *addr_type, /* enum ctxsw_addr_type */
@@ -6056,14 +6245,81 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
offset_addrs[i] = priv_registers[i]; offset_addrs[i] = priv_registers[i];
} }
*num_offsets = num_registers; *num_offsets = num_registers;
cleanup:
if (!IS_ERR_OR_NULL(priv_registers))
kfree(priv_registers);
cleanup: return err;
}
if (!IS_ERR_OR_NULL(priv_registers)) int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
kfree(priv_registers); u32 addr,
u32 max_offsets,
u32 *offsets, u32 *offset_addrs,
u32 *num_offsets)
{
u32 i;
u32 priv_offset = 0;
u32 *priv_registers;
u32 num_registers = 0;
int err = 0;
struct gr_gk20a *gr = &g->gr;
u32 potential_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
return err; gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
/* implementation is crossed-up if either of these happen */
if (max_offsets > potential_offsets)
return -EINVAL;
if (!g->gr.ctx_vars.golden_image_initialized)
return -ENODEV;
priv_registers = kzalloc(sizeof(u32) * potential_offsets, GFP_KERNEL);
if (ZERO_OR_NULL_PTR(priv_registers)) {
gk20a_dbg_fn("failed alloc for potential_offsets=%d", potential_offsets);
return -ENOMEM;
}
memset(offsets, 0, sizeof(u32) * max_offsets);
memset(offset_addrs, 0, sizeof(u32) * max_offsets);
*num_offsets = 0;
gr_gk20a_create_priv_addr_table(g, addr, priv_registers, &num_registers);
if ((max_offsets > 1) && (num_registers > max_offsets)) {
err = -EINVAL;
goto cleanup;
}
if ((max_offsets == 1) && (num_registers > 1))
num_registers = 1;
if (!g->gr.ctx_vars.local_golden_image) {
gk20a_dbg_fn("no context switch header info to work with");
err = -EINVAL;
goto cleanup;
}
for (i = 0; i < num_registers; i++) {
err = gr_gk20a_find_priv_offset_in_pm_buffer(g,
priv_registers[i],
&priv_offset);
if (err) {
gk20a_dbg_fn("Could not determine priv_offset for addr:0x%x",
addr); /*, grPriRegStr(addr)));*/
goto cleanup;
}
offsets[i] = priv_offset;
offset_addrs[i] = priv_registers[i];
}
*num_offsets = num_registers;
cleanup:
kfree(priv_registers);
return err;
} }
/* Setup some register tables. This looks hacky; our /* Setup some register tables. This looks hacky; our
@@ -6638,8 +6894,6 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
return 0; return 0;
} }
/* /*
* This function will return the 32 bit offset for a priv register if it is * This function will return the 32 bit offset for a priv register if it is
* present in the context buffer. * present in the context buffer.
@@ -6801,6 +7055,314 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
return -EINVAL; return -EINVAL;
} }
static int map_cmp(const void *a, const void *b)
{
struct ctxsw_buf_offset_map_entry *e1 =
(struct ctxsw_buf_offset_map_entry *)a;
struct ctxsw_buf_offset_map_entry *e2 =
(struct ctxsw_buf_offset_map_entry *)b;
if (e1->addr < e2->addr)
return -1;
if (e1->addr > e2->addr)
return 1;
return 0;
}
static int add_ctxsw_buffer_map_entries(struct ctxsw_buf_offset_map_entry *map,
struct aiv_list_gk20a *regs,
u32 *count, u32 *offset,
u32 max_cnt, u32 base, u32 mask)
{
u32 idx;
u32 cnt = *count;
u32 off = *offset;
if ((cnt + regs->count) > max_cnt)
return -EINVAL;
for (idx = 0; idx < regs->count; idx++) {
map[cnt].addr = base + (regs->l[idx].addr & mask);
map[cnt++].offset = off;
off += 4;
}
*count = cnt;
*offset = off;
return 0;
}
/* Helper function to add register entries to the register map for all
* subunits
*/
static int add_ctxsw_buffer_map_entries_subunits(
struct ctxsw_buf_offset_map_entry *map,
struct aiv_list_gk20a *regs,
u32 *count, u32 *offset,
u32 max_cnt, u32 base,
u32 num_units, u32 stride, u32 mask)
{
u32 unit;
u32 idx;
u32 cnt = *count;
u32 off = *offset;
if ((cnt + (regs->count * num_units)) > max_cnt)
return -EINVAL;
/* Data is interleaved for units in ctxsw buffer */
for (idx = 0; idx < regs->count; idx++) {
for (unit = 0; unit < num_units; unit++) {
map[cnt].addr = base + (regs->l[idx].addr & mask) +
(unit * stride);
map[cnt++].offset = off;
off += 4;
}
}
*count = cnt;
*offset = off;
return 0;
}
static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
struct ctxsw_buf_offset_map_entry *map,
u32 *count, u32 *offset, u32 max_cnt)
{
u32 num_gpcs = g->gr.gpc_count;
u32 num_ppcs, num_tpcs, gpc_num, base;
for (gpc_num = 0; gpc_num < num_gpcs; gpc_num++) {
num_tpcs = g->gr.gpc_tpc_count[gpc_num];
base = proj_gpc_base_v() +
(proj_gpc_stride_v() * gpc_num) + proj_tpc_in_gpc_base_v();
if (add_ctxsw_buffer_map_entries_subunits(map,
&g->gr.ctx_vars.ctxsw_regs.pm_tpc,
count, offset, max_cnt, base, num_tpcs,
proj_tpc_in_gpc_stride_v(),
(proj_tpc_in_gpc_stride_v() - 1)))
return -EINVAL;
num_ppcs = g->gr.gpc_ppc_count[gpc_num];
base = proj_gpc_base_v() + (proj_gpc_stride_v() * gpc_num) +
proj_ppc_in_gpc_base_v();
if (add_ctxsw_buffer_map_entries_subunits(map,
&g->gr.ctx_vars.ctxsw_regs.pm_ppc,
count, offset, max_cnt, base, num_ppcs,
proj_ppc_in_gpc_stride_v(),
(proj_ppc_in_gpc_stride_v() - 1)))
return -EINVAL;
base = proj_gpc_base_v() + (proj_gpc_stride_v() * gpc_num);
if (add_ctxsw_buffer_map_entries(map,
&g->gr.ctx_vars.ctxsw_regs.pm_gpc,
count, offset, max_cnt, base,
(proj_gpc_stride_v() - 1)))
return -EINVAL;
base = (NV_PERF_PMMGPC_CHIPLET_OFFSET * gpc_num);
if (add_ctxsw_buffer_map_entries(map,
&g->gr.ctx_vars.ctxsw_regs.perf_gpc,
count, offset, max_cnt, base, ~0))
return -EINVAL;
base = (NV_PERF_PMMGPCROUTER_STRIDE * gpc_num);
if (add_ctxsw_buffer_map_entries(map,
&g->gr.ctx_vars.ctxsw_regs.gpc_router,
count, offset, max_cnt, base, ~0))
return -EINVAL;
*offset = ALIGN(*offset, 256);
}
return 0;
}
/*
* PM CTXSW BUFFER LAYOUT :
*|---------------------------------------------|0x00 <----PM CTXSW BUFFER BASE
*| |
*| LIST_compressed_pm_ctx_reg_SYS |Space allocated: numRegs words
*|---------------------------------------------|
*| |
*| LIST_compressed_nv_perf_ctx_reg_SYS |Space allocated: numRegs words
*|---------------------------------------------|
*| PADDING for 256 byte alignment |
*|---------------------------------------------|<----256 byte aligned
*| LIST_compressed_nv_perf_fbp_ctx_regs |
*| |Space allocated: numRegs * n words (for n FB units)
*|---------------------------------------------|
*| LIST_compressed_nv_perf_fbprouter_ctx_regs |
*| |Space allocated: numRegs * n words (for n FB units)
*|---------------------------------------------|
*| LIST_compressed_pm_fbpa_ctx_regs |
*| |Space allocated: numRegs * n words (for n FB units)
*|---------------------------------------------|
*| LIST_compressed_pm_ltc_ctx_regs |
*| LTC0 LTS0 |
*| LTC1 LTS0 |Space allocated: numRegs * n words (for n LTC units)
*| LTCn LTS0 |
*| LTC0 LTS1 |
*| LTC1 LTS1 |
*| LTCn LTS1 |
*| LTC0 LTSn |
*| LTC1 LTSn |
*| LTCn LTSn |
*|---------------------------------------------|
*| PADDING for 256 byte alignment |
*|---------------------------------------------|<----256 byte aligned
*| GPC0 REG0 TPC0 |Each GPC has space allocated to accommodate
*| REG0 TPC1 | all the GPC/TPC register lists
*| Lists in each GPC region: REG0 TPCn |Per GPC allocated space is always 256 byte aligned
*| LIST_pm_ctx_reg_TPC REG1 TPC0 |
*| * numTpcs REG1 TPC1 |
*| LIST_pm_ctx_reg_PPC REG1 TPCn |
*| * numPpcs REGn TPC0 |
*| LIST_pm_ctx_reg_GPC REGn TPC1 |
*| LIST_nv_perf_ctx_reg_GPC REGn TPCn |
*| ---- |--
*| GPC1 . |
*| . |<----
*|---------------------------------------------|
*= =
*| GPCn |
*= =
*|---------------------------------------------|
*/
static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
{
u32 hwpm_ctxsw_buffer_size = g->gr.ctx_vars.pm_ctxsw_image_size;
u32 hwpm_ctxsw_reg_count_max;
u32 map_size;
u32 i, count = 0;
u32 offset = 0;
struct ctxsw_buf_offset_map_entry *map;
if (hwpm_ctxsw_buffer_size == 0) {
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
"no PM Ctxsw buffer memory in context buffer");
return -EINVAL;
}
hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2;
map_size = hwpm_ctxsw_reg_count_max * sizeof(*map);
map = nvgpu_alloc(map_size, true);
if (!map)
return -ENOMEM;
/* Add entries from _LIST_pm_ctx_reg_SYS */
if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.pm_sys,
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0))
goto cleanup;
/* Add entries from _LIST_nv_perf_ctx_reg_SYS */
if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys,
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0))
goto cleanup;
offset = ALIGN(offset, 256);
/* Add entries from _LIST_nv_perf_fbp_ctx_regs */
if (add_ctxsw_buffer_map_entries_subunits(map,
&g->gr.ctx_vars.ctxsw_regs.fbp,
&count, &offset,
hwpm_ctxsw_reg_count_max, 0,
g->gr.num_fbps, NV_PMM_FBP_STRIDE, ~0))
goto cleanup;
/* Add entries from _LIST_nv_perf_fbprouter_ctx_regs */
if (add_ctxsw_buffer_map_entries_subunits(map,
&g->gr.ctx_vars.ctxsw_regs.fbp_router,
&count, &offset,
hwpm_ctxsw_reg_count_max, 0, g->gr.num_fbps,
NV_PERF_PMM_FBP_ROUTER_STRIDE, ~0))
goto cleanup;
/* Add entries from _LIST_nv_pm_fbpa_ctx_regs */
if (add_ctxsw_buffer_map_entries_subunits(map,
&g->gr.ctx_vars.ctxsw_regs.pm_fbpa,
&count, &offset,
hwpm_ctxsw_reg_count_max, 0,
proj_scal_litter_num_fbpas_v(),
proj_fbpa_stride_v(), ~0))
goto cleanup;
/* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */
if (add_ctxsw_buffer_map_entries_subunits(map,
&g->gr.ctx_vars.ctxsw_regs.pm_ltc,
&count, &offset,
hwpm_ctxsw_reg_count_max, 0,
g->ltc_count, proj_ltc_stride_v(), ~0))
goto cleanup;
offset = ALIGN(offset, 256);
/* Add GPC entries */
if (add_ctxsw_buffer_map_entries_gpcs(g, map, &count, &offset,
hwpm_ctxsw_reg_count_max))
goto cleanup;
if (offset > hwpm_ctxsw_buffer_size) {
gk20a_err(dev_from_gk20a(g), "offset > buffer size");
goto cleanup;
}
sort(map, count, sizeof(*map), map_cmp, NULL);
g->gr.ctx_vars.hwpm_ctxsw_buffer_offset_map = map;
g->gr.ctx_vars.hwpm_ctxsw_buffer_offset_map_count = count;
gk20a_dbg_info("Reg Addr => HWPM Ctxt switch buffer offset");
for (i = 0; i < count; i++)
gk20a_dbg_info("%08x => %08x", map[i].addr, map[i].offset);
return 0;
cleanup:
gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map");
nvgpu_free(map);
return -EINVAL;
}
/*
* This function will return the 32 bit offset for a priv register if it is
* present in the PM context buffer.
*/
static int gr_gk20a_find_priv_offset_in_pm_buffer(struct gk20a *g,
u32 addr,
u32 *priv_offset)
{
struct gr_gk20a *gr = &g->gr;
int err = 0;
u32 count;
struct ctxsw_buf_offset_map_entry *map, *result, map_key;
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
/* Create map of pri address and pm offset if necessary */
if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map == NULL) {
err = gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(g);
if (err)
return err;
}
*priv_offset = 0;
map = gr->ctx_vars.hwpm_ctxsw_buffer_offset_map;
count = gr->ctx_vars.hwpm_ctxsw_buffer_offset_map_count;
map_key.addr = addr;
result = bsearch(&map_key, map, count, sizeof(*map), map_cmp);
if (result)
*priv_offset = result->offset;
else {
gk20a_err(dev_from_gk20a(g), "Lookup failed for address 0x%x", addr);
err = -EINVAL;
}
return err;
}
bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch) bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
{ {
int curr_gr_ctx, curr_gr_tsgid; int curr_gr_ctx, curr_gr_tsgid;
@@ -6840,6 +7402,8 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
struct gk20a *g = ch->g; struct gk20a *g = ch->g;
struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
void *ctx_ptr = NULL; void *ctx_ptr = NULL;
void *pm_ctx_ptr = NULL;
void *base_ptr = NULL;
bool ch_is_curr_ctx, restart_gr_ctxsw = false; bool ch_is_curr_ctx, restart_gr_ctxsw = false;
u32 i, j, offset, v; u32 i, j, offset, v;
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
@@ -6940,15 +7504,6 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
} }
offset_addrs = offsets + max_offsets; offset_addrs = offsets + max_offsets;
/* would have been a variant of gr_gk20a_apply_instmem_overrides */
/* recoded in-place instead.*/
ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages,
PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
0, pgprot_writecombine(PAGE_KERNEL));
if (!ctx_ptr) {
err = -ENOMEM;
goto cleanup;
}
err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
if (err) if (err)
goto cleanup; goto cleanup;
@@ -6977,13 +7532,52 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
&num_offsets, &num_offsets,
ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD),
ctx_ops[i].quad); ctx_ops[i].quad);
if (err) { if (!err) {
gk20a_dbg(gpu_dbg_gpu_dbg, if (!ctx_ptr) {
/* would have been a variant of
* gr_gk20a_apply_instmem_overrides,
* recoded in-place instead.
*/
ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages,
PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
0, pgprot_writecombine(PAGE_KERNEL));
if (!ctx_ptr) {
err = -ENOMEM;
goto cleanup;
}
}
base_ptr = ctx_ptr;
} else {
err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
ctx_ops[i].offset,
max_offsets,
offsets, offset_addrs,
&num_offsets);
if (err) {
gk20a_dbg(gpu_dbg_gpu_dbg,
"ctx op invalid offset: offset=0x%x", "ctx op invalid offset: offset=0x%x",
ctx_ops[i].offset); ctx_ops[i].offset);
ctx_ops[i].status = ctx_ops[i].status =
NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET; NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET;
continue; continue;
}
if (!pm_ctx_ptr) {
/* Make sure ctx buffer was initialized */
if (!ch_ctx->pm_ctx.mem.pages) {
gk20a_err(dev_from_gk20a(g),
"Invalid ctx buffer");
err = -EINVAL;
goto cleanup;
}
pm_ctx_ptr = vmap(ch_ctx->pm_ctx.mem.pages,
PAGE_ALIGN(ch_ctx->pm_ctx.mem.size) >> PAGE_SHIFT,
0, pgprot_writecombine(PAGE_KERNEL));
if (!pm_ctx_ptr) {
err = -ENOMEM;
goto cleanup;
}
}
base_ptr = pm_ctx_ptr;
} }
/* if this is a quad access, setup for special access*/ /* if this is a quad access, setup for special access*/
@@ -6993,24 +7587,27 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
ctx_ops[i].offset); ctx_ops[i].offset);
for (j = 0; j < num_offsets; j++) { for (j = 0; j < num_offsets; j++) {
/* sanity check, don't write outside, worst case */ /* sanity check gr ctxt offsets,
if (offsets[j] >= g->gr.ctx_vars.golden_image_size) * don't write outside, worst case
*/
if ((base_ptr == ctx_ptr) &&
(offsets[j] >= g->gr.ctx_vars.golden_image_size))
continue; continue;
if (pass == 0) { /* write pass */ if (pass == 0) { /* write pass */
v = gk20a_mem_rd32(ctx_ptr + offsets[j], 0); v = gk20a_mem_rd32(base_ptr + offsets[j], 0);
v &= ~ctx_ops[i].and_n_mask_lo; v &= ~ctx_ops[i].and_n_mask_lo;
v |= ctx_ops[i].value_lo; v |= ctx_ops[i].value_lo;
gk20a_mem_wr32(ctx_ptr + offsets[j], 0, v); gk20a_mem_wr32(base_ptr + offsets[j], 0, v);
gk20a_dbg(gpu_dbg_gpu_dbg, gk20a_dbg(gpu_dbg_gpu_dbg,
"context wr: offset=0x%x v=0x%x", "context wr: offset=0x%x v=0x%x",
offsets[j], v); offsets[j], v);
if (ctx_ops[i].op == REGOP(WRITE_64)) { if (ctx_ops[i].op == REGOP(WRITE_64)) {
v = gk20a_mem_rd32(ctx_ptr + offsets[j] + 4, 0); v = gk20a_mem_rd32(base_ptr + offsets[j] + 4, 0);
v &= ~ctx_ops[i].and_n_mask_hi; v &= ~ctx_ops[i].and_n_mask_hi;
v |= ctx_ops[i].value_hi; v |= ctx_ops[i].value_hi;
gk20a_mem_wr32(ctx_ptr + offsets[j] + 4, 0, v); gk20a_mem_wr32(base_ptr + offsets[j] + 4, 0, v);
gk20a_dbg(gpu_dbg_gpu_dbg, gk20a_dbg(gpu_dbg_gpu_dbg,
"context wr: offset=0x%x v=0x%x", "context wr: offset=0x%x v=0x%x",
@@ -7020,18 +7617,18 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
/* check to see if we need to add a special WAR /* check to see if we need to add a special WAR
for some of the SMPC perf regs */ for some of the SMPC perf regs */
gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j],
v, ctx_ptr); v, base_ptr);
} else { /* read pass */ } else { /* read pass */
ctx_ops[i].value_lo = ctx_ops[i].value_lo =
gk20a_mem_rd32(ctx_ptr + offsets[0], 0); gk20a_mem_rd32(base_ptr + offsets[0], 0);
gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x", gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x",
offsets[0], ctx_ops[i].value_lo); offsets[0], ctx_ops[i].value_lo);
if (ctx_ops[i].op == REGOP(READ_64)) { if (ctx_ops[i].op == REGOP(READ_64)) {
ctx_ops[i].value_hi = ctx_ops[i].value_hi =
gk20a_mem_rd32(ctx_ptr + offsets[0] + 4, 0); gk20a_mem_rd32(base_ptr + offsets[0] + 4, 0);
gk20a_dbg(gpu_dbg_gpu_dbg, gk20a_dbg(gpu_dbg_gpu_dbg,
"context rd: offset=0x%x v=0x%x", "context rd: offset=0x%x v=0x%x",
@@ -7062,6 +7659,9 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
if (ctx_ptr) if (ctx_ptr)
vunmap(ctx_ptr); vunmap(ctx_ptr);
if (pm_ctx_ptr)
vunmap(pm_ctx_ptr);
if (restart_gr_ctxsw) { if (restart_gr_ctxsw) {
int tmp_err = gr_gk20a_enable_ctxsw(g); int tmp_err = gr_gk20a_enable_ctxsw(g);
if (tmp_err) { if (tmp_err) {

View File

@@ -198,8 +198,13 @@ struct gr_gk20a {
u32 golden_image_size; u32 golden_image_size;
u32 *local_golden_image; u32 *local_golden_image;
u32 hwpm_ctxsw_buffer_offset_map_count;
struct ctxsw_buf_offset_map_entry *hwpm_ctxsw_buffer_offset_map;
u32 zcull_ctxsw_image_size; u32 zcull_ctxsw_image_size;
u32 pm_ctxsw_image_size;
u32 buffer_header_size; u32 buffer_header_size;
u32 priv_access_map_size; u32 priv_access_map_size;
@@ -219,6 +224,14 @@ struct gr_gk20a {
struct aiv_list_gk20a pm_sys; struct aiv_list_gk20a pm_sys;
struct aiv_list_gk20a pm_gpc; struct aiv_list_gk20a pm_gpc;
struct aiv_list_gk20a pm_tpc; struct aiv_list_gk20a pm_tpc;
struct aiv_list_gk20a pm_ppc;
struct aiv_list_gk20a perf_sys;
struct aiv_list_gk20a perf_gpc;
struct aiv_list_gk20a fbp;
struct aiv_list_gk20a fbp_router;
struct aiv_list_gk20a gpc_router;
struct aiv_list_gk20a pm_ltc;
struct aiv_list_gk20a pm_fbpa;
} ctxsw_regs; } ctxsw_regs;
int regs_base_index; int regs_base_index;
bool valid; bool valid;
@@ -484,9 +497,17 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
u32 *offsets, u32 *offset_addrs, u32 *offsets, u32 *offset_addrs,
u32 *num_offsets, u32 *num_offsets,
bool is_quad, u32 quad); bool is_quad, u32 quad);
int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
u32 addr,
u32 max_offsets,
u32 *offsets, u32 *offset_addrs,
u32 *num_offsets);
int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
struct channel_gk20a *c, struct channel_gk20a *c,
bool enable_smpc_ctxsw); bool enable_smpc_ctxsw);
int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
struct channel_gk20a *c,
bool enable_hwpm_ctxsw);
struct channel_ctx_gk20a; struct channel_ctx_gk20a;
int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx,

View File

@@ -94,6 +94,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
{ {
return 0x7 << 0; return 0x7 << 0;
} }
static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void)
{
return 0x1;
}
static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void) static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
{ {
return 0x0; return 0x0;

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2012-2016, NVIDIA CORPORATION. All rights reserved.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License, * under the terms and conditions of the GNU General Public License,
@@ -70,6 +70,10 @@ static inline u32 proj_lts_stride_v(void)
{ {
return 0x00000400; return 0x00000400;
} }
static inline u32 proj_fbpa_stride_v(void)
{
return 0x00001000;
}
static inline u32 proj_ppc_in_gpc_base_v(void) static inline u32 proj_ppc_in_gpc_base_v(void)
{ {
return 0x00003000; return 0x00003000;
@@ -114,6 +118,10 @@ static inline u32 proj_scal_litter_num_fbps_v(void)
{ {
return 0x00000001; return 0x00000001;
} }
static inline u32 proj_scal_litter_num_fbpas_v(void)
{
return 0x00000001;
}
static inline u32 proj_scal_litter_num_gpcs_v(void) static inline u32 proj_scal_litter_num_gpcs_v(void)
{ {
return 0x00000001; return 0x00000001;

View File

@@ -1,7 +1,7 @@
/* /*
* GK20A memory management * GK20A memory management
* *
* Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License, * under the terms and conditions of the GNU General Public License,
@@ -73,6 +73,11 @@ struct zcull_ctx_desc {
u32 ctx_sw_mode; u32 ctx_sw_mode;
}; };
struct pm_ctx_desc {
struct mem_desc mem;
u32 pm_mode;
};
struct gk20a; struct gk20a;
struct gr_ctx_buffer_desc { struct gr_ctx_buffer_desc {
void (*destroy)(struct gk20a *, struct gr_ctx_buffer_desc *); void (*destroy)(struct gk20a *, struct gr_ctx_buffer_desc *);

View File

@@ -1,7 +1,7 @@
/* /*
* Tegra GK20A GPU Debugger Driver Register Ops * Tegra GK20A GPU Debugger Driver Register Ops
* *
* Copyright (c) 2013-2015, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2013-2016, NVIDIA CORPORATION. All rights reserved.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License, * under the terms and conditions of the GNU General Public License,
@@ -644,22 +644,31 @@ static int validate_reg_op_offset(struct dbg_session_gk20a *dbg_s,
valid = check_whitelists(dbg_s, op, offset + 4); valid = check_whitelists(dbg_s, op, offset + 4);
if (valid && (op->type != REGOP(TYPE_GLOBAL))) { if (valid && (op->type != REGOP(TYPE_GLOBAL))) {
err = gr_gk20a_get_ctx_buffer_offsets(dbg_s->g, err = gr_gk20a_get_ctx_buffer_offsets(dbg_s->g,
op->offset,
1,
&buf_offset_lo,
&buf_offset_addr,
&num_offsets,
op->type == REGOP(TYPE_GR_CTX_QUAD),
op->quad);
if (err) {
err = gr_gk20a_get_pm_ctx_buffer_offsets(dbg_s->g,
op->offset, op->offset,
1, 1,
&buf_offset_lo, &buf_offset_lo,
&buf_offset_addr, &buf_offset_addr,
&num_offsets, &num_offsets);
op->type == REGOP(TYPE_GR_CTX_QUAD),
op->quad);
if (err) { if (err) {
op->status |= REGOP(STATUS_INVALID_OFFSET); op->status |= REGOP(STATUS_INVALID_OFFSET);
return -EINVAL; return -EINVAL;
} }
if (!buf_offset_lo) { }
op->status |= REGOP(STATUS_INVALID_OFFSET); if (!buf_offset_lo) {
return -EINVAL; op->status |= REGOP(STATUS_INVALID_OFFSET);
} return -EINVAL;
}
} }
if (!valid) { if (!valid) {

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License, * under the terms and conditions of the GNU General Public License,
@@ -70,6 +70,10 @@ static inline u32 proj_lts_stride_v(void)
{ {
return 0x00000200; return 0x00000200;
} }
static inline u32 proj_fbpa_stride_v(void)
{
return 0x00001000;
}
static inline u32 proj_ppc_in_gpc_base_v(void) static inline u32 proj_ppc_in_gpc_base_v(void)
{ {
return 0x00003000; return 0x00003000;
@@ -114,6 +118,10 @@ static inline u32 proj_scal_litter_num_fbps_v(void)
{ {
return 0x00000001; return 0x00000001;
} }
static inline u32 proj_scal_litter_num_fbpas_v(void)
{
return 0x00000001;
}
static inline u32 proj_scal_litter_num_gpcs_v(void) static inline u32 proj_scal_litter_num_gpcs_v(void)
{ {
return 0x00000001; return 0x00000001;

View File

@@ -647,8 +647,20 @@ struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args {
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 12, struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args) _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 12, struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args)
/* PM Context Switch Mode */
#define NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW (0x00000000)
#define NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW (0x00000001)
struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args {
__u32 mode;
__u32 reserved;
};
#define NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE \
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 13, struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args)
#define NVGPU_DBG_GPU_IOCTL_LAST \ #define NVGPU_DBG_GPU_IOCTL_LAST \
_IOC_NR(NVGPU_DBG_GPU_IOCTL_SET_NEXT_STOP_TRIGGER_TYPE) _IOC_NR(NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE)
#define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \
sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args) sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args)