gpu: nvgpu: Make graphics context property of TSG

Move graphics context ownership to TSG instead of channel. Combine
channel_ctx_gk20a and gr_ctx_desc to one structure, because the split
between them was arbitrary. Move context header to be property of
channel.

Bug 1842197

Change-Id: I410e3262f80b318d8528bcbec270b63a2d8d2ff9
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1639532
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Terje Bergstrom
2017-12-15 09:04:15 -08:00
committed by mobile promotions
parent 6a73114788
commit 2f6698b863
33 changed files with 836 additions and 742 deletions

View File

@@ -91,8 +91,8 @@ static int gk20a_fifo_sched_debugfs_seq_show(
tsg->timeslice_us, tsg->timeslice_us,
ch->timeout_ms_max, ch->timeout_ms_max,
tsg->interleave_level, tsg->interleave_level,
ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX, tsg->gr_ctx.graphics_preempt_mode,
ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX); tsg->gr_ctx.compute_preempt_mode);
gk20a_channel_put(ch); gk20a_channel_put(ch);
} }
return 0; return 0;

View File

@@ -85,10 +85,10 @@ static void gk20a_channel_trace_sched_param(
tsg_gk20a_from_ch(ch)->timeslice_us, tsg_gk20a_from_ch(ch)->timeslice_us,
ch->timeout_ms_max, ch->timeout_ms_max,
gk20a_fifo_interleave_level_name(tsg->interleave_level), gk20a_fifo_interleave_level_name(tsg->interleave_level),
gr_gk20a_graphics_preempt_mode_name(ch->ch_ctx.gr_ctx ? gr_gk20a_graphics_preempt_mode_name(
ch->ch_ctx.gr_ctx->graphics_preempt_mode : 0), tsg->gr_ctx.graphics_preempt_mode),
gr_gk20a_compute_preempt_mode_name(ch->ch_ctx.gr_ctx ? gr_gk20a_compute_preempt_mode_name(
ch->ch_ctx.gr_ctx->compute_preempt_mode : 0)); tsg->gr_ctx.compute_preempt_mode));
} }
/* /*

View File

@@ -198,15 +198,10 @@ static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched,
arg->runlist_interleave = tsg->interleave_level; arg->runlist_interleave = tsg->interleave_level;
arg->timeslice = tsg->timeslice_us; arg->timeslice = tsg->timeslice_us;
if (tsg->tsg_gr_ctx) { arg->graphics_preempt_mode =
arg->graphics_preempt_mode = tsg->gr_ctx.graphics_preempt_mode;
tsg->tsg_gr_ctx->graphics_preempt_mode; arg->compute_preempt_mode =
arg->compute_preempt_mode = tsg->gr_ctx.compute_preempt_mode;
tsg->tsg_gr_ctx->compute_preempt_mode;
} else {
arg->graphics_preempt_mode = 0;
arg->compute_preempt_mode = 0;
}
nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);

View File

@@ -27,12 +27,11 @@
#include <nvgpu/hw/gp10b/hw_gr_gp10b.h> #include <nvgpu/hw/gp10b/hw_gr_gp10b.h>
int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
struct gr_ctx_desc **__gr_ctx, struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, struct vm_gk20a *vm,
u32 class, u32 class,
u32 flags) u32 flags)
{ {
struct gr_ctx_desc *gr_ctx;
u32 graphics_preempt_mode = 0; u32 graphics_preempt_mode = 0;
u32 compute_preempt_mode = 0; u32 compute_preempt_mode = 0;
struct vgpu_priv_data *priv = vgpu_get_priv_data(g); struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
@@ -40,12 +39,10 @@ int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
gk20a_dbg_fn(""); gk20a_dbg_fn("");
err = vgpu_gr_alloc_gr_ctx(g, __gr_ctx, vm, class, flags); err = vgpu_gr_alloc_gr_ctx(g, gr_ctx, vm, class, flags);
if (err) if (err)
return err; return err;
gr_ctx = *__gr_ctx;
if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP)
graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP)
@@ -84,7 +81,7 @@ fail:
} }
int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
struct gr_ctx_desc *gr_ctx, struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, u32 class, struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode, u32 graphics_preempt_mode,
u32 compute_preempt_mode) u32 compute_preempt_mode)
@@ -240,7 +237,7 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
u32 graphics_preempt_mode, u32 graphics_preempt_mode,
u32 compute_preempt_mode) u32 compute_preempt_mode)
{ {
struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; struct nvgpu_gr_ctx *gr_ctx;
struct gk20a *g = ch->g; struct gk20a *g = ch->g;
struct tsg_gk20a *tsg; struct tsg_gk20a *tsg;
struct vm_gk20a *vm; struct vm_gk20a *vm;
@@ -251,6 +248,13 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
if (!class) if (!class)
return -EINVAL; return -EINVAL;
tsg = tsg_gk20a_from_ch(ch);
if (!tsg)
return -EINVAL;
vm = tsg->vm;
gr_ctx = &tsg->gr_ctx;
/* skip setting anything if both modes are already set */ /* skip setting anything if both modes are already set */
if (graphics_preempt_mode && if (graphics_preempt_mode &&
(graphics_preempt_mode == gr_ctx->graphics_preempt_mode)) (graphics_preempt_mode == gr_ctx->graphics_preempt_mode))
@@ -263,13 +267,6 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
if (graphics_preempt_mode == 0 && compute_preempt_mode == 0) if (graphics_preempt_mode == 0 && compute_preempt_mode == 0)
return 0; return 0;
if (gk20a_is_channel_marked_as_tsg(ch)) {
tsg = &g->fifo.tsg[ch->tsgid];
vm = tsg->vm;
} else {
vm = ch->vm;
}
if (g->ops.gr.set_ctxsw_preemption_mode) { if (g->ops.gr.set_ctxsw_preemption_mode) {
err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class, err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class,
graphics_preempt_mode, graphics_preempt_mode,

View File

@@ -20,12 +20,12 @@
#include "gk20a/gk20a.h" #include "gk20a/gk20a.h"
int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
struct gr_ctx_desc **__gr_ctx, struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, struct vm_gk20a *vm,
u32 class, u32 class,
u32 flags); u32 flags);
int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
struct gr_ctx_desc *gr_ctx, struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, u32 class, struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode, u32 graphics_preempt_mode,
u32 compute_preempt_mode); u32 compute_preempt_mode);

View File

@@ -112,7 +112,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
.get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
.free_channel_ctx = vgpu_gr_free_channel_ctx,
.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
.get_zcull_info = vgpu_gr_get_zcull_info, .get_zcull_info = vgpu_gr_get_zcull_info,

View File

@@ -20,14 +20,18 @@
#include <nvgpu/kmem.h> #include <nvgpu/kmem.h>
#include <nvgpu/bug.h> #include <nvgpu/bug.h>
#include <nvgpu/dma.h>
#include <nvgpu/error_notifier.h> #include <nvgpu/error_notifier.h>
#include <nvgpu/dma.h> #include <nvgpu/dma.h>
#include "vgpu.h" #include "vgpu.h"
#include "gr_vgpu.h" #include "gr_vgpu.h"
#include "gk20a/dbg_gpu_gk20a.h" #include "gk20a/dbg_gpu_gk20a.h"
#include "gk20a/channel_gk20a.h"
#include "gk20a/tsg_gk20a.h"
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
void vgpu_gr_detect_sm_arch(struct gk20a *g) void vgpu_gr_detect_sm_arch(struct gk20a *g)
{ {
@@ -152,8 +156,9 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
struct vm_gk20a *ch_vm = c->vm; struct vm_gk20a *ch_vm = c->vm;
u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; struct tsg_gk20a *tsg;
u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; u64 *g_bfr_va;
u64 *g_bfr_size;
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
u64 gpu_va; u64 gpu_va;
u32 i; u32 i;
@@ -161,7 +166,12 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
gk20a_dbg_fn(""); gk20a_dbg_fn("");
/* FIXME: add VPR support */ tsg = tsg_gk20a_from_ch(c);
if (!tsg)
return -EINVAL;
g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va;
g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size;
/* Circular Buffer */ /* Circular Buffer */
gpu_va = __nvgpu_vm_alloc_va(ch_vm, gpu_va = __nvgpu_vm_alloc_va(ch_vm,
@@ -213,7 +223,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
if (err || msg.ret) if (err || msg.ret)
goto clean_up; goto clean_up;
c->ch_ctx.global_ctx_buffer_mapped = true; tsg->gr_ctx.global_ctx_buffer_mapped = true;
return 0; return 0;
clean_up: clean_up:
@@ -227,40 +237,33 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
return -ENOMEM; return -ENOMEM;
} }
static void vgpu_gr_unmap_global_ctx_buffers(struct channel_gk20a *c) static void vgpu_gr_unmap_global_ctx_buffers(struct tsg_gk20a *tsg)
{ {
struct vm_gk20a *ch_vm = c->vm; struct vm_gk20a *ch_vm = tsg->vm;
u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; u64 *g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va;
u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; u64 *g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size;
u32 i; u32 i;
gk20a_dbg_fn(""); gk20a_dbg_fn("");
if (c->ch_ctx.global_ctx_buffer_mapped) { if (tsg->gr_ctx.global_ctx_buffer_mapped) {
struct tegra_vgpu_cmd_msg msg; /* server will unmap on channel close */
struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
int err;
msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNMAP_GR_GLOBAL_CTX; for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
msg.handle = vgpu_get_handle(c->g); if (g_bfr_va[i]) {
p->handle = c->virt_ctx; __nvgpu_vm_free_va(ch_vm, g_bfr_va[i],
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); gmmu_page_size_kernel);
WARN_ON(err || msg.ret); g_bfr_va[i] = 0;
} g_bfr_size[i] = 0;
}
for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
if (g_bfr_va[i]) {
__nvgpu_vm_free_va(ch_vm, g_bfr_va[i],
gmmu_page_size_kernel);
g_bfr_va[i] = 0;
g_bfr_size[i] = 0;
} }
tsg->gr_ctx.global_ctx_buffer_mapped = false;
} }
c->ch_ctx.global_ctx_buffer_mapped = false;
} }
int vgpu_gr_alloc_gr_ctx(struct gk20a *g, int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
struct gr_ctx_desc **__gr_ctx, struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, struct vm_gk20a *vm,
u32 class, u32 class,
u32 flags) u32 flags)
@@ -268,7 +271,6 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
struct tegra_vgpu_cmd_msg msg = {0}; struct tegra_vgpu_cmd_msg msg = {0};
struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
struct gr_ctx_desc *gr_ctx;
int err; int err;
gk20a_dbg_fn(""); gk20a_dbg_fn("");
@@ -280,19 +282,14 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
gr_ctx = nvgpu_kzalloc(g, sizeof(*gr_ctx));
if (!gr_ctx)
return -ENOMEM;
gr_ctx->mem.size = gr->ctx_vars.buffer_total_size;
gr_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(vm, gr_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(vm,
gr_ctx->mem.size, gr->ctx_vars.buffer_total_size,
gmmu_page_size_kernel); gmmu_page_size_kernel);
if (!gr_ctx->mem.gpu_va) { if (!gr_ctx->mem.gpu_va)
nvgpu_kfree(g, gr_ctx);
return -ENOMEM; return -ENOMEM;
} gr_ctx->mem.size = gr->ctx_vars.buffer_total_size;
gr_ctx->mem.aperture = APERTURE_SYSMEM;
msg.cmd = TEGRA_VGPU_CMD_GR_CTX_ALLOC; msg.cmd = TEGRA_VGPU_CMD_GR_CTX_ALLOC;
msg.handle = vgpu_get_handle(g); msg.handle = vgpu_get_handle(g);
@@ -306,57 +303,19 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
nvgpu_err(g, "fail to alloc gr_ctx"); nvgpu_err(g, "fail to alloc gr_ctx");
__nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va,
gmmu_page_size_kernel); gmmu_page_size_kernel);
nvgpu_kfree(g, gr_ctx); gr_ctx->mem.aperture = APERTURE_INVALID;
} else { } else {
gr_ctx->virt_ctx = p->gr_ctx_handle; gr_ctx->virt_ctx = p->gr_ctx_handle;
*__gr_ctx = gr_ctx;
} }
return err; return err;
} }
void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
struct gr_ctx_desc *gr_ctx)
{
struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
int err;
gk20a_dbg_fn("");
if (!gr_ctx || !gr_ctx->mem.gpu_va)
return;
msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE;
msg.handle = vgpu_get_handle(g);
p->gr_ctx_handle = gr_ctx->virt_ctx;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
WARN_ON(err || msg.ret);
__nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va,
gmmu_page_size_kernel);
nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer);
nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
nvgpu_kfree(g, gr_ctx);
}
static void vgpu_gr_free_channel_gr_ctx(struct channel_gk20a *c)
{
gk20a_dbg_fn("");
c->g->ops.gr.free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx);
c->ch_ctx.gr_ctx = NULL;
}
static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
struct channel_gk20a *c) struct channel_gk20a *c)
{ {
struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; struct tsg_gk20a *tsg;
struct patch_desc *patch_ctx;
struct vm_gk20a *ch_vm = c->vm; struct vm_gk20a *ch_vm = c->vm;
struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
@@ -364,6 +323,11 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
gk20a_dbg_fn(""); gk20a_dbg_fn("");
tsg = tsg_gk20a_from_ch(c);
if (!tsg)
return -EINVAL;
patch_ctx = &tsg->gr_ctx.patch_ctx;
patch_ctx->mem.size = 128 * sizeof(u32); patch_ctx->mem.size = 128 * sizeof(u32);
patch_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch_vm, patch_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch_vm,
patch_ctx->mem.size, patch_ctx->mem.size,
@@ -385,37 +349,25 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
return err; return err;
} }
static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c) static void vgpu_gr_free_channel_patch_ctx(struct tsg_gk20a *tsg)
{ {
struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; struct patch_desc *patch_ctx = &tsg->gr_ctx.patch_ctx;
struct vm_gk20a *ch_vm = c->vm;
gk20a_dbg_fn(""); gk20a_dbg_fn("");
if (patch_ctx->mem.gpu_va) { if (patch_ctx->mem.gpu_va) {
struct tegra_vgpu_cmd_msg msg; /* server will free on channel close */
struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
int err;
msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_PATCH_CTX; __nvgpu_vm_free_va(tsg->vm, patch_ctx->mem.gpu_va,
msg.handle = vgpu_get_handle(c->g);
p->handle = c->virt_ctx;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
WARN_ON(err || msg.ret);
__nvgpu_vm_free_va(ch_vm, patch_ctx->mem.gpu_va,
gmmu_page_size_kernel); gmmu_page_size_kernel);
patch_ctx->mem.gpu_va = 0; patch_ctx->mem.gpu_va = 0;
} }
} }
static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c) static void vgpu_gr_free_channel_pm_ctx(struct tsg_gk20a *tsg)
{ {
struct tegra_vgpu_cmd_msg msg; struct nvgpu_gr_ctx *ch_ctx = &tsg->gr_ctx;
struct tegra_vgpu_channel_free_hwpm_ctx *p = &msg.params.free_hwpm_ctx;
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
int err;
gk20a_dbg_fn(""); gk20a_dbg_fn("");
@@ -423,44 +375,63 @@ static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c)
if (pm_ctx->mem.gpu_va == 0) if (pm_ctx->mem.gpu_va == 0)
return; return;
msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX; /* server will free on channel close */
msg.handle = vgpu_get_handle(c->g);
p->handle = c->virt_ctx;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
WARN_ON(err || msg.ret);
__nvgpu_vm_free_va(c->vm, pm_ctx->mem.gpu_va, __nvgpu_vm_free_va(tsg->vm, pm_ctx->mem.gpu_va,
gmmu_page_size_kernel); gmmu_page_size_kernel);
pm_ctx->mem.gpu_va = 0; pm_ctx->mem.gpu_va = 0;
} }
void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg) void vgpu_gr_free_gr_ctx(struct gk20a *g,
struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
{ {
struct tsg_gk20a *tsg;
gk20a_dbg_fn(""); gk20a_dbg_fn("");
if (c->g->ops.fifo.free_channel_ctx_header) if (gr_ctx->mem.gpu_va) {
c->g->ops.fifo.free_channel_ctx_header(c); struct tegra_vgpu_cmd_msg msg;
vgpu_gr_unmap_global_ctx_buffers(c); struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
vgpu_gr_free_channel_patch_ctx(c); int err;
vgpu_gr_free_channel_pm_ctx(c);
if (!is_tsg)
vgpu_gr_free_channel_gr_ctx(c);
/* zcull_ctx, pm_ctx */ msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE;
msg.handle = vgpu_get_handle(g);
p->gr_ctx_handle = gr_ctx->virt_ctx;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
WARN_ON(err || msg.ret);
memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va,
gmmu_page_size_kernel);
c->first_init = false; tsg = &g->fifo.tsg[gr_ctx->tsgid];
vgpu_gr_unmap_global_ctx_buffers(tsg);
vgpu_gr_free_channel_patch_ctx(tsg);
vgpu_gr_free_channel_pm_ctx(tsg);
nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer);
nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
memset(gr_ctx, 0, sizeof(*gr_ctx));
}
} }
static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c) static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c)
{ {
struct gr_ctx_desc *gr_ctx = c->ch_ctx.gr_ctx; struct tsg_gk20a *tsg;
struct nvgpu_gr_ctx *gr_ctx;
struct tegra_vgpu_cmd_msg msg = {0}; struct tegra_vgpu_cmd_msg msg = {0};
struct tegra_vgpu_channel_bind_gr_ctx_params *p = struct tegra_vgpu_channel_bind_gr_ctx_params *p =
&msg.params.ch_bind_gr_ctx; &msg.params.ch_bind_gr_ctx;
int err; int err;
tsg = tsg_gk20a_from_ch(c);
if (!tsg)
return -EINVAL;
gr_ctx = &tsg->gr_ctx;
msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTX; msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTX;
msg.handle = vgpu_get_handle(c->g); msg.handle = vgpu_get_handle(c->g);
p->ch_handle = c->virt_ctx; p->ch_handle = c->virt_ctx;
@@ -474,7 +445,7 @@ static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c)
static int vgpu_gr_tsg_bind_gr_ctx(struct tsg_gk20a *tsg) static int vgpu_gr_tsg_bind_gr_ctx(struct tsg_gk20a *tsg)
{ {
struct gr_ctx_desc *gr_ctx = tsg->tsg_gr_ctx; struct nvgpu_gr_ctx *gr_ctx = &tsg->gr_ctx;
struct tegra_vgpu_cmd_msg msg = {0}; struct tegra_vgpu_cmd_msg msg = {0};
struct tegra_vgpu_tsg_bind_gr_ctx_params *p = struct tegra_vgpu_tsg_bind_gr_ctx_params *p =
&msg.params.tsg_bind_gr_ctx; &msg.params.tsg_bind_gr_ctx;
@@ -495,7 +466,7 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
{ {
struct gk20a *g = c->g; struct gk20a *g = c->g;
struct fifo_gk20a *f = &g->fifo; struct fifo_gk20a *f = &g->fifo;
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; struct nvgpu_gr_ctx *gr_ctx = NULL;
struct tsg_gk20a *tsg = NULL; struct tsg_gk20a *tsg = NULL;
int err = 0; int err = 0;
@@ -515,95 +486,87 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
} }
c->obj_class = class_num; c->obj_class = class_num;
if (gk20a_is_channel_marked_as_tsg(c)) if (!gk20a_is_channel_marked_as_tsg(c))
tsg = &f->tsg[c->tsgid]; return -EINVAL;
if (!tsg) { tsg = &f->tsg[c->tsgid];
/* allocate gr ctx buffer */ gr_ctx = &tsg->gr_ctx;
if (!ch_ctx->gr_ctx) {
err = g->ops.gr.alloc_gr_ctx(g, &c->ch_ctx.gr_ctx, if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
c->vm, tsg->vm = c->vm;
class_num, nvgpu_vm_get(tsg->vm);
flags); err = g->ops.gr.alloc_gr_ctx(g, gr_ctx,
if (!err) c->vm,
err = vgpu_gr_ch_bind_gr_ctx(c); class_num,
if (err) { flags);
nvgpu_err(g, "fail to allocate gr ctx buffer"); if (!err)
goto out; err = vgpu_gr_tsg_bind_gr_ctx(tsg);
} if (err) {
} else {
/*TBD: needs to be more subtle about which is
* being allocated as some are allowed to be
* allocated along same channel */
nvgpu_err(g, nvgpu_err(g,
"too many classes alloc'd on same channel"); "fail to allocate TSG gr ctx buffer, err=%d", err);
err = -EINVAL; nvgpu_vm_put(tsg->vm);
tsg->vm = NULL;
goto out; goto out;
} }
} else {
if (!tsg->tsg_gr_ctx) {
tsg->vm = c->vm;
nvgpu_vm_get(tsg->vm);
err = g->ops.gr.alloc_gr_ctx(g, &tsg->tsg_gr_ctx,
c->vm,
class_num,
flags);
if (!err)
err = vgpu_gr_tsg_bind_gr_ctx(tsg);
if (err) {
nvgpu_err(g,
"fail to allocate TSG gr ctx buffer, err=%d", err);
nvgpu_vm_put(tsg->vm);
tsg->vm = NULL;
goto out;
}
}
ch_ctx->gr_ctx = tsg->tsg_gr_ctx;
err = vgpu_gr_ch_bind_gr_ctx(c); err = vgpu_gr_ch_bind_gr_ctx(c);
if (err) { if (err) {
nvgpu_err(g, "fail to bind gr ctx buffer"); nvgpu_err(g, "fail to bind gr ctx buffer");
goto out; goto out;
} }
}
/* commit gr ctx buffer */ /* commit gr ctx buffer */
err = g->ops.gr.commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
if (err) { if (err) {
nvgpu_err(g, "fail to commit gr ctx buffer"); nvgpu_err(g, "fail to commit gr ctx buffer");
goto out; goto out;
} }
/* allocate patch buffer */ /* allocate patch buffer */
if (ch_ctx->patch_ctx.mem.priv.pages == NULL) {
err = vgpu_gr_alloc_channel_patch_ctx(g, c); err = vgpu_gr_alloc_channel_patch_ctx(g, c);
if (err) { if (err) {
nvgpu_err(g, "fail to allocate patch buffer"); nvgpu_err(g, "fail to allocate patch buffer");
goto out; goto out;
} }
}
/* map global buffer to channel gpu_va and commit */ /* map global buffer to channel gpu_va and commit */
if (!ch_ctx->global_ctx_buffer_mapped) {
err = vgpu_gr_map_global_ctx_buffers(g, c); err = vgpu_gr_map_global_ctx_buffers(g, c);
if (err) { if (err) {
nvgpu_err(g, "fail to map global ctx buffer"); nvgpu_err(g, "fail to map global ctx buffer");
goto out; goto out;
} }
vgpu_gr_commit_global_ctx_buffers(g, c, true);
}
/* load golden image */ err = vgpu_gr_commit_global_ctx_buffers(g, c, true);
if (!c->first_init) { if (err) {
nvgpu_err(g, "fail to commit global ctx buffers");
goto out;
}
/* load golden image */
err = gr_gk20a_elpg_protected_call(g, err = gr_gk20a_elpg_protected_call(g,
vgpu_gr_load_golden_ctx_image(g, c)); vgpu_gr_load_golden_ctx_image(g, c));
if (err) { if (err) {
nvgpu_err(g, "fail to load golden ctx image"); nvgpu_err(g, "fail to load golden ctx image");
goto out; goto out;
} }
c->first_init = true; } else {
err = vgpu_gr_ch_bind_gr_ctx(c);
if (err) {
nvgpu_err(g, "fail to bind gr ctx buffer");
goto out;
}
/* commit gr ctx buffer */
err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
if (err) {
nvgpu_err(g, "fail to commit gr ctx buffer");
goto out;
}
} }
/* PM ctxt switch is off by default */
gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
gk20a_dbg_fn("done"); gk20a_dbg_fn("done");
return 0; return 0;
out: out:
@@ -1055,15 +1018,30 @@ int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g,
int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
struct channel_gk20a *ch, bool enable) struct channel_gk20a *ch, bool enable)
{ {
struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; struct tsg_gk20a *tsg;
struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; struct nvgpu_gr_ctx *ch_ctx;
struct pm_ctx_desc *pm_ctx;
struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode; struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode;
int err; int err;
gk20a_dbg_fn(""); gk20a_dbg_fn("");
tsg = tsg_gk20a_from_ch(ch);
if (!tsg)
return -EINVAL;
ch_ctx = &tsg->gr_ctx;
pm_ctx = &ch_ctx->pm_ctx;
if (enable) { if (enable) {
/*
* send command to enable HWPM only once - otherwise server
* will return an error due to using the same GPU VA twice.
*/
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f())
return 0;
p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW;
/* Allocate buffer if necessary */ /* Allocate buffer if necessary */
@@ -1076,8 +1054,12 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
return -ENOMEM; return -ENOMEM;
pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size; pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size;
} }
} else } else {
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f())
return 0;
p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
}
msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE; msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE;
msg.handle = vgpu_get_handle(g); msg.handle = vgpu_get_handle(g);
@@ -1086,8 +1068,13 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
WARN_ON(err || msg.ret); WARN_ON(err || msg.ret);
err = err ? err : msg.ret;
if (!err)
pm_ctx->pm_mode = enable ?
ctxsw_prog_main_image_pm_mode_ctxsw_f() :
ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
return err ? err : msg.ret; return err;
} }
int vgpu_gr_clear_sm_error_state(struct gk20a *g, int vgpu_gr_clear_sm_error_state(struct gk20a *g,

View File

@@ -29,6 +29,7 @@ struct dbg_session_gk20a;
void vgpu_gr_detect_sm_arch(struct gk20a *g); void vgpu_gr_detect_sm_arch(struct gk20a *g);
void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg); void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg);
void vgpu_gr_free_tsg_ctx(struct tsg_gk20a *tsg);
int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags); int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags);
int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
struct channel_gk20a *c, u64 zcull_va, struct channel_gk20a *c, u64 zcull_va,

View File

@@ -131,7 +131,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask, .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
.get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
.free_channel_ctx = vgpu_gr_free_channel_ctx,
.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
.get_zcull_info = vgpu_gr_get_zcull_info, .get_zcull_info = vgpu_gr_get_zcull_info,

View File

@@ -21,7 +21,7 @@
int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c) int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c)
{ {
struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; struct ctx_header_desc *ctx = &c->ctx_header;
struct tegra_vgpu_cmd_msg msg = {}; struct tegra_vgpu_cmd_msg msg = {};
struct tegra_vgpu_alloc_ctx_header_params *p = struct tegra_vgpu_alloc_ctx_header_params *p =
&msg.params.alloc_ctx_header; &msg.params.alloc_ctx_header;
@@ -52,7 +52,7 @@ int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c)
void vgpu_gv11b_free_subctx_header(struct channel_gk20a *c) void vgpu_gv11b_free_subctx_header(struct channel_gk20a *c)
{ {
struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; struct ctx_header_desc *ctx = &c->ctx_header;
struct tegra_vgpu_cmd_msg msg = {}; struct tegra_vgpu_cmd_msg msg = {};
struct tegra_vgpu_free_ctx_header_params *p = struct tegra_vgpu_free_ctx_header_params *p =
&msg.params.free_ctx_header; &msg.params.free_ctx_header;

View File

@@ -79,12 +79,12 @@ int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info);
int vgpu_gr_nonstall_isr(struct gk20a *g, int vgpu_gr_nonstall_isr(struct gk20a *g,
struct tegra_vgpu_gr_nonstall_intr_info *info); struct tegra_vgpu_gr_nonstall_intr_info *info);
int vgpu_gr_alloc_gr_ctx(struct gk20a *g, int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
struct gr_ctx_desc **__gr_ctx, struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, struct vm_gk20a *vm,
u32 class, u32 class,
u32 flags); u32 flags);
void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
struct gr_ctx_desc *gr_ctx); struct nvgpu_gr_ctx *gr_ctx);
void vgpu_gr_handle_sm_esr_event(struct gk20a *g, void vgpu_gr_handle_sm_esr_event(struct gk20a *g,
struct tegra_vgpu_sm_esr_info *info); struct tegra_vgpu_sm_esr_info *info);
int vgpu_gr_init_ctx_state(struct gk20a *g); int vgpu_gr_init_ctx_state(struct gk20a *g);
@@ -141,7 +141,7 @@ static inline int vgpu_gr_isr(struct gk20a *g,
return 0; return 0;
} }
static inline int vgpu_gr_alloc_gr_ctx(struct gk20a *g, static inline int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
struct gr_ctx_desc **__gr_ctx, struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, struct vm_gk20a *vm,
u32 class, u32 class,
u32 flags) u32 flags)
@@ -149,7 +149,7 @@ static inline int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
return -ENOSYS; return -ENOSYS;
} }
static inline void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, static inline void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
struct gr_ctx_desc *gr_ctx) struct nvgpu_gr_ctx *gr_ctx)
{ {
} }
static inline int vgpu_gr_init_ctx_state(struct gk20a *g) static inline int vgpu_gr_init_ctx_state(struct gk20a *g)

View File

@@ -259,7 +259,7 @@ void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
ch->g->ops.fifo.disable_channel(ch); ch->g->ops.fifo.disable_channel(ch);
if (channel_preempt && ch->ch_ctx.gr_ctx) if (channel_preempt && gk20a_is_channel_marked_as_tsg(ch))
ch->g->ops.fifo.preempt_channel(ch->g, ch->chid); ch->g->ops.fifo.preempt_channel(ch->g, ch->chid);
gk20a_channel_abort_clean_up(ch); gk20a_channel_abort_clean_up(ch);
@@ -421,8 +421,8 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
g->ops.fecs_trace.unbind_channel(g, ch); g->ops.fecs_trace.unbind_channel(g, ch);
#endif #endif
/* release channel ctx */ if(g->ops.fifo.free_channel_ctx_header)
g->ops.gr.free_channel_ctx(ch, was_tsg); g->ops.fifo.free_channel_ctx_header(ch);
gk20a_gr_flush_channel_tlb(gr); gk20a_gr_flush_channel_tlb(gr);

View File

@@ -31,7 +31,6 @@
#include <nvgpu/atomic.h> #include <nvgpu/atomic.h>
struct gk20a; struct gk20a;
struct gr_gk20a;
struct dbg_session_gk20a; struct dbg_session_gk20a;
struct gk20a_fence; struct gk20a_fence;
struct fifo_profile_gk20a; struct fifo_profile_gk20a;
@@ -50,10 +49,6 @@ struct fifo_profile_gk20a;
#define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC (1 << 1) #define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC (1 << 1)
#define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1 << 2) #define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1 << 2)
/* Flags to be passed to g->ops.gr.alloc_obj_ctx() */
#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP (1 << 1)
#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP (1 << 2)
struct notification { struct notification {
struct { struct {
u32 nanoseconds[2]; u32 nanoseconds[2];
@@ -63,19 +58,6 @@ struct notification {
u16 status; u16 status;
}; };
/* contexts associated with a channel */
struct channel_ctx_gk20a {
struct gr_ctx_desc *gr_ctx;
struct patch_desc patch_ctx;
struct zcull_ctx_desc zcull_ctx;
struct pm_ctx_desc pm_ctx;
u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA];
u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA];
int global_ctx_buffer_index[NR_GLOBAL_CTX_BUF_VA];
bool global_ctx_buffer_mapped;
struct ctx_header_desc ctx_header;
};
struct channel_gk20a_job { struct channel_gk20a_job {
struct nvgpu_mapped_buf **mapped_buffers; struct nvgpu_mapped_buf **mapped_buffers;
int num_mapped_buffers; int num_mapped_buffers;
@@ -190,7 +172,6 @@ struct channel_gk20a {
int chid; int chid;
bool wdt_enabled; bool wdt_enabled;
nvgpu_atomic_t bound; nvgpu_atomic_t bound;
bool first_init;
bool vpr; bool vpr;
bool deterministic; bool deterministic;
/* deterministic, but explicitly idle and submits disallowed */ /* deterministic, but explicitly idle and submits disallowed */
@@ -210,8 +191,6 @@ struct channel_gk20a {
struct gpfifo_desc gpfifo; struct gpfifo_desc gpfifo;
struct channel_ctx_gk20a ch_ctx;
struct nvgpu_mem inst_block; struct nvgpu_mem inst_block;
u64 userd_iova; u64 userd_iova;
@@ -262,6 +241,8 @@ struct channel_gk20a {
struct channel_t19x t19x; struct channel_t19x t19x;
#endif #endif
struct ctx_header_desc ctx_header;
/* Any operating system specific data. */ /* Any operating system specific data. */
void *os_priv; void *os_priv;
}; };

View File

@@ -625,9 +625,10 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
u32 lo; u32 lo;
u32 hi; u32 hi;
u64 pa; u64 pa;
struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; struct tsg_gk20a *tsg;
struct nvgpu_gr_ctx *ch_ctx;
struct gk20a_fecs_trace *trace = g->fecs_trace; struct gk20a_fecs_trace *trace = g->fecs_trace;
struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; struct nvgpu_mem *mem;
u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
pid_t pid; pid_t pid;
u32 aperture; u32 aperture;
@@ -637,6 +638,13 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
ch->chid, context_ptr, ch->chid, context_ptr,
nvgpu_inst_block_addr(g, &ch->inst_block)); nvgpu_inst_block_addr(g, &ch->inst_block));
tsg = tsg_gk20a_from_ch(ch);
if (!tsg)
return -EINVAL;
ch_ctx = &tsg->gr_ctx;
mem = &ch_ctx->mem;
if (!trace) if (!trace)
return -ENOMEM; return -ENOMEM;

View File

@@ -187,16 +187,16 @@ struct gpu_ops {
void (*cb_size_default)(struct gk20a *g); void (*cb_size_default)(struct gk20a *g);
int (*calc_global_ctx_buffer_size)(struct gk20a *g); int (*calc_global_ctx_buffer_size)(struct gk20a *g);
void (*commit_global_attrib_cb)(struct gk20a *g, void (*commit_global_attrib_cb)(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *ch_ctx,
u64 addr, bool patch); u64 addr, bool patch);
void (*commit_global_bundle_cb)(struct gk20a *g, void (*commit_global_bundle_cb)(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *ch_ctx,
u64 addr, u64 size, bool patch); u64 addr, u64 size, bool patch);
int (*commit_global_cb_manager)(struct gk20a *g, int (*commit_global_cb_manager)(struct gk20a *g,
struct channel_gk20a *ch, struct channel_gk20a *ch,
bool patch); bool patch);
void (*commit_global_pagepool)(struct gk20a *g, void (*commit_global_pagepool)(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *ch_ctx,
u64 addr, u32 size, bool patch); u64 addr, u32 size, bool patch);
void (*init_gpc_mmu)(struct gk20a *g); void (*init_gpc_mmu)(struct gk20a *g);
int (*handle_sw_method)(struct gk20a *g, u32 addr, int (*handle_sw_method)(struct gk20a *g, u32 addr,
@@ -230,7 +230,6 @@ struct gpu_ops {
int (*load_ctxsw_ucode)(struct gk20a *g); int (*load_ctxsw_ucode)(struct gk20a *g);
u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
void (*free_channel_ctx)(struct channel_gk20a *c, bool is_tsg);
int (*alloc_obj_ctx)(struct channel_gk20a *c, int (*alloc_obj_ctx)(struct channel_gk20a *c,
u32 class_num, u32 flags); u32 class_num, u32 flags);
int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr, int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr,
@@ -285,13 +284,12 @@ struct gpu_ops {
u32 (*pagepool_default_size)(struct gk20a *g); u32 (*pagepool_default_size)(struct gk20a *g);
int (*init_ctx_state)(struct gk20a *g); int (*init_ctx_state)(struct gk20a *g);
int (*alloc_gr_ctx)(struct gk20a *g, int (*alloc_gr_ctx)(struct gk20a *g,
struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
u32 class, u32 padding); u32 class, u32 padding);
void (*free_gr_ctx)(struct gk20a *g, void (*free_gr_ctx)(struct gk20a *g,
struct vm_gk20a *vm, struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
struct gr_ctx_desc *gr_ctx);
void (*update_ctxsw_preemption_mode)(struct gk20a *g, void (*update_ctxsw_preemption_mode)(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct channel_gk20a *c,
struct nvgpu_mem *mem); struct nvgpu_mem *mem);
int (*update_smpc_ctxsw_mode)(struct gk20a *g, int (*update_smpc_ctxsw_mode)(struct gk20a *g,
struct channel_gk20a *c, struct channel_gk20a *c,
@@ -384,14 +382,14 @@ struct gpu_ops {
int (*get_preemption_mode_flags)(struct gk20a *g, int (*get_preemption_mode_flags)(struct gk20a *g,
struct nvgpu_preemption_modes_rec *preemption_modes_rec); struct nvgpu_preemption_modes_rec *preemption_modes_rec);
int (*set_ctxsw_preemption_mode)(struct gk20a *g, int (*set_ctxsw_preemption_mode)(struct gk20a *g,
struct gr_ctx_desc *gr_ctx, struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, u32 class, struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode, u32 graphics_preempt_mode,
u32 compute_preempt_mode); u32 compute_preempt_mode);
int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost); int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost);
void (*update_boosted_ctx)(struct gk20a *g, void (*update_boosted_ctx)(struct gk20a *g,
struct nvgpu_mem *mem, struct nvgpu_mem *mem,
struct gr_ctx_desc *gr_ctx); struct nvgpu_gr_ctx *gr_ctx);
int (*init_sm_id_table)(struct gk20a *g); int (*init_sm_id_table)(struct gk20a *g);
int (*load_smid_config)(struct gk20a *g); int (*load_smid_config)(struct gk20a *g);
void (*program_sm_id_numbering)(struct gk20a *g, void (*program_sm_id_numbering)(struct gk20a *g,
@@ -440,7 +438,7 @@ struct gpu_ops {
u32 (*get_gpcs_swdx_dss_zbc_c_format_reg)(struct gk20a *g); u32 (*get_gpcs_swdx_dss_zbc_c_format_reg)(struct gk20a *g);
u32 (*get_gpcs_swdx_dss_zbc_z_format_reg)(struct gk20a *g); u32 (*get_gpcs_swdx_dss_zbc_z_format_reg)(struct gk20a *g);
void (*dump_ctxsw_stats)(struct gk20a *g, struct vm_gk20a *vm, void (*dump_ctxsw_stats)(struct gk20a *g, struct vm_gk20a *vm,
struct gr_ctx_desc *gr_ctx); struct nvgpu_gr_ctx *gr_ctx);
} gr; } gr;
struct { struct {
void (*init_hw)(struct gk20a *g); void (*init_hw)(struct gk20a *g);

View File

File diff suppressed because it is too large Load Diff

View File

@@ -28,7 +28,6 @@
#include "gr_t19x.h" #include "gr_t19x.h"
#endif #endif
#include "tsg_gk20a.h"
#include "gr_ctx_gk20a.h" #include "gr_ctx_gk20a.h"
#include "mm_gk20a.h" #include "mm_gk20a.h"
@@ -48,6 +47,10 @@
#define GK20A_TIMEOUT_FPGA 100000 /* 100 sec */ #define GK20A_TIMEOUT_FPGA 100000 /* 100 sec */
/* Flags to be passed to g->ops.gr.alloc_obj_ctx() */
#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP (1 << 1)
#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP (1 << 2)
/* /*
* allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries
* of address and data pairs * of address and data pairs
@@ -64,6 +67,7 @@
#define NVGPU_PREEMPTION_MODE_COMPUTE_CTA (1 << 1) #define NVGPU_PREEMPTION_MODE_COMPUTE_CTA (1 << 1)
#define NVGPU_PREEMPTION_MODE_COMPUTE_CILP (1 << 2) #define NVGPU_PREEMPTION_MODE_COMPUTE_CILP (1 << 2)
struct tsg_gk20a;
struct channel_gk20a; struct channel_gk20a;
struct nvgpu_warpstate; struct nvgpu_warpstate;
@@ -433,7 +437,12 @@ struct gr_gk20a {
void gk20a_fecs_dump_falcon_stats(struct gk20a *g); void gk20a_fecs_dump_falcon_stats(struct gk20a *g);
struct gr_ctx_desc { struct ctx_header_desc {
struct nvgpu_mem mem;
};
/* contexts associated with a TSG */
struct nvgpu_gr_ctx {
struct nvgpu_mem mem; struct nvgpu_mem mem;
u32 graphics_preempt_mode; u32 graphics_preempt_mode;
@@ -452,10 +461,16 @@ struct gr_ctx_desc {
u64 virt_ctx; u64 virt_ctx;
#endif #endif
bool golden_img_loaded; bool golden_img_loaded;
};
struct ctx_header_desc { struct patch_desc patch_ctx;
struct nvgpu_mem mem; struct zcull_ctx_desc zcull_ctx;
struct pm_ctx_desc pm_ctx;
u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA];
u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA];
int global_ctx_buffer_index[NR_GLOBAL_CTX_BUF_VA];
bool global_ctx_buffer_mapped;
u32 tsgid;
}; };
struct gk20a_ctxsw_ucode_segment { struct gk20a_ctxsw_ucode_segment {
@@ -552,7 +567,6 @@ int gk20a_init_gr_channel(struct channel_gk20a *ch_gk20a);
int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr); int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr);
int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags); int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags);
void gk20a_free_channel_ctx(struct channel_gk20a *c, bool is_tsg);
int gk20a_gr_isr(struct gk20a *g); int gk20a_gr_isr(struct gk20a *g);
int gk20a_gr_nonstall_isr(struct gk20a *g); int gk20a_gr_nonstall_isr(struct gk20a *g);
@@ -633,17 +647,17 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
struct channel_gk20a *c, struct channel_gk20a *c,
bool enable_hwpm_ctxsw); bool enable_hwpm_ctxsw);
struct channel_ctx_gk20a; struct nvgpu_gr_ctx;
void gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, void gr_gk20a_ctx_patch_write(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx,
u32 addr, u32 data, bool patch); u32 addr, u32 data, bool patch);
int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *ch_ctx,
bool update_patch_count); bool update_patch_count);
void gr_gk20a_ctx_patch_write_end(struct gk20a *g, void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *ch_ctx,
bool update_patch_count); bool update_patch_count);
void gr_gk20a_commit_global_pagepool(struct gk20a *g, void gr_gk20a_commit_global_pagepool(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *ch_ctx,
u64 addr, u32 size, bool patch); u64 addr, u32 size, bool patch);
void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data);
void gr_gk20a_enable_hww_exceptions(struct gk20a *g); void gr_gk20a_enable_hww_exceptions(struct gk20a *g);
@@ -694,10 +708,10 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g, int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g,
struct fecs_method_op_gk20a op); struct fecs_method_op_gk20a op);
int gr_gk20a_alloc_gr_ctx(struct gk20a *g, int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
u32 class, u32 padding); u32 class, u32 padding);
void gr_gk20a_free_gr_ctx(struct gk20a *g, void gr_gk20a_free_gr_ctx(struct gk20a *g,
struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
int gr_gk20a_halt_pipe(struct gk20a *g); int gr_gk20a_halt_pipe(struct gk20a *g);
#if defined(CONFIG_GK20A_CYCLE_STATS) #if defined(CONFIG_GK20A_CYCLE_STATS)

View File

@@ -280,7 +280,6 @@ struct tsg_gk20a *gk20a_tsg_open(struct gk20a *g)
tsg->num_active_channels = 0; tsg->num_active_channels = 0;
nvgpu_ref_init(&tsg->refcount); nvgpu_ref_init(&tsg->refcount);
tsg->tsg_gr_ctx = NULL;
tsg->vm = NULL; tsg->vm = NULL;
tsg->interleave_level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; tsg->interleave_level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW;
tsg->timeslice_us = 0; tsg->timeslice_us = 0;
@@ -319,10 +318,8 @@ void gk20a_tsg_release(struct nvgpu_ref *ref)
if (g->ops.fifo.tsg_release) if (g->ops.fifo.tsg_release)
g->ops.fifo.tsg_release(tsg); g->ops.fifo.tsg_release(tsg);
if (tsg->tsg_gr_ctx) { if (nvgpu_mem_is_valid(&tsg->gr_ctx.mem))
gr_gk20a_free_tsg_gr_ctx(tsg); gr_gk20a_free_tsg_gr_ctx(tsg);
tsg->tsg_gr_ctx = NULL;
}
if (g->ops.fifo.deinit_eng_method_buffers) if (g->ops.fifo.deinit_eng_method_buffers)
g->ops.fifo.deinit_eng_method_buffers(g, tsg); g->ops.fifo.deinit_eng_method_buffers(g, tsg);

View File

@@ -26,6 +26,8 @@
#include <nvgpu/kref.h> #include <nvgpu/kref.h>
#include <nvgpu/rwsem.h> #include <nvgpu/rwsem.h>
#include "gr_gk20a.h"
#ifdef CONFIG_TEGRA_19x_GPU #ifdef CONFIG_TEGRA_19x_GPU
#include "tsg_t19x.h" #include "tsg_t19x.h"
#endif #endif
@@ -56,8 +58,6 @@ struct tsg_gk20a {
unsigned int timeslice_timeout; unsigned int timeslice_timeout;
unsigned int timeslice_scale; unsigned int timeslice_scale;
struct gr_ctx_desc *tsg_gr_ctx;
struct vm_gk20a *vm; struct vm_gk20a *vm;
u32 interleave_level; u32 interleave_level;
@@ -71,6 +71,8 @@ struct tsg_gk20a {
#ifdef CONFIG_TEGRA_19x_GPU #ifdef CONFIG_TEGRA_19x_GPU
struct tsg_t19x t19x; struct tsg_t19x t19x;
#endif #endif
struct nvgpu_gr_ctx gr_ctx;
}; };
int gk20a_enable_tsg(struct tsg_gk20a *tsg); int gk20a_enable_tsg(struct tsg_gk20a *tsg);

View File

@@ -124,7 +124,7 @@ int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g)
} }
void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *ch_ctx,
u64 addr, bool patch) u64 addr, bool patch)
{ {
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(), gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(),
@@ -141,7 +141,7 @@ void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
} }
void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *ch_ctx,
u64 addr, u64 size, bool patch) u64 addr, u64 size, bool patch)
{ {
u32 data; u32 data;
@@ -180,7 +180,8 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
struct channel_gk20a *c, bool patch) struct channel_gk20a *c, bool patch)
{ {
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; struct tsg_gk20a *tsg;
struct nvgpu_gr_ctx *ch_ctx;
u32 attrib_offset_in_chunk = 0; u32 attrib_offset_in_chunk = 0;
u32 alpha_offset_in_chunk = 0; u32 alpha_offset_in_chunk = 0;
u32 pd_ab_max_output; u32 pd_ab_max_output;
@@ -193,6 +194,12 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
gk20a_dbg_fn(""); gk20a_dbg_fn("");
tsg = tsg_gk20a_from_ch(c);
if (!tsg)
return -EINVAL;
ch_ctx = &tsg->gr_ctx;
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(), gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(),
gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) | gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) |
gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size), gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size),
@@ -257,7 +264,7 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
} }
void gr_gm20b_commit_global_pagepool(struct gk20a *g, void gr_gm20b_commit_global_pagepool(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *ch_ctx,
u64 addr, u32 size, bool patch) u64 addr, u32 size, bool patch)
{ {
gr_gk20a_commit_global_pagepool(g, ch_ctx, addr, size, patch); gr_gk20a_commit_global_pagepool(g, ch_ctx, addr, size, patch);
@@ -845,7 +852,7 @@ u32 gr_gm20b_pagepool_default_size(struct gk20a *g)
} }
int gr_gm20b_alloc_gr_ctx(struct gk20a *g, int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
u32 class, u32 class,
u32 flags) u32 flags)
{ {
@@ -858,7 +865,7 @@ int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
return err; return err;
if (class == MAXWELL_COMPUTE_B) if (class == MAXWELL_COMPUTE_B)
(*gr_ctx)->compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CTA; gr_ctx->compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CTA;
gk20a_dbg_fn("done"); gk20a_dbg_fn("done");
@@ -866,15 +873,21 @@ int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
} }
void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct channel_gk20a *c,
struct nvgpu_mem *mem) struct nvgpu_mem *mem)
{ {
struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; struct tsg_gk20a *tsg;
struct nvgpu_gr_ctx *gr_ctx;
u32 cta_preempt_option = u32 cta_preempt_option =
ctxsw_prog_main_image_preemption_options_control_cta_enabled_f(); ctxsw_prog_main_image_preemption_options_control_cta_enabled_f();
gk20a_dbg_fn(""); gk20a_dbg_fn("");
tsg = tsg_gk20a_from_ch(c);
if (!tsg)
return;
gr_ctx = &tsg->gr_ctx;
if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) { if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
gk20a_dbg_info("CTA: %x", cta_preempt_option); gk20a_dbg_info("CTA: %x", cta_preempt_option);
nvgpu_mem_wr(g, mem, nvgpu_mem_wr(g, mem,
@@ -1026,16 +1039,22 @@ int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
bool enable) bool enable)
{ {
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; struct tsg_gk20a *tsg;
struct nvgpu_gr_ctx *gr_ctx;
struct nvgpu_mem *mem; struct nvgpu_mem *mem;
u32 v; u32 v;
gk20a_dbg_fn(""); gk20a_dbg_fn("");
if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr) tsg = tsg_gk20a_from_ch(c);
if (!tsg)
return -EINVAL;
gr_ctx = &tsg->gr_ctx;
mem = &gr_ctx->mem;
if (!nvgpu_mem_is_valid(mem) || c->vpr)
return -EINVAL; return -EINVAL;
mem = &ch_ctx->gr_ctx->mem;
if (nvgpu_mem_begin(c->g, mem)) if (nvgpu_mem_begin(c->g, mem))
return -ENOMEM; return -ENOMEM;
@@ -1289,12 +1308,19 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g,
{ {
u32 gpc, tpc, offset; u32 gpc, tpc, offset;
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; struct tsg_gk20a *tsg;
struct nvgpu_gr_ctx *ch_ctx;
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
GPU_LIT_TPC_IN_GPC_STRIDE); GPU_LIT_TPC_IN_GPC_STRIDE);
int err = 0; int err = 0;
tsg = tsg_gk20a_from_ch(ch);
if (!tsg)
return -EINVAL;
ch_ctx = &tsg->gr_ctx;
nvgpu_mutex_acquire(&g->dbg_sessions_lock); nvgpu_mutex_acquire(&g->dbg_sessions_lock);
gr->sm_error_states[sm_id].hww_global_esr = gr->sm_error_states[sm_id].hww_global_esr =

View File

@@ -46,7 +46,7 @@ enum {
#define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0 #define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0
void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *ch_ctx,
u64 addr, bool patch); u64 addr, bool patch);
int gr_gm20b_init_fs_state(struct gk20a *g); int gr_gm20b_init_fs_state(struct gk20a *g);
int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask); int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask);
@@ -57,12 +57,12 @@ void gr_gm20b_bundle_cb_defaults(struct gk20a *g);
void gr_gm20b_cb_size_default(struct gk20a *g); void gr_gm20b_cb_size_default(struct gk20a *g);
int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g); int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g);
void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *ch_ctx,
u64 addr, u64 size, bool patch); u64 addr, u64 size, bool patch);
int gr_gm20b_commit_global_cb_manager(struct gk20a *g, int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
struct channel_gk20a *c, bool patch); struct channel_gk20a *c, bool patch);
void gr_gm20b_commit_global_pagepool(struct gk20a *g, void gr_gm20b_commit_global_pagepool(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *ch_ctx,
u64 addr, u32 size, bool patch); u64 addr, u32 size, bool patch);
int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr, int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr,
u32 class_num, u32 offset, u32 data); u32 class_num, u32 offset, u32 data);
@@ -96,11 +96,11 @@ int gr_gm20b_load_ctxsw_ucode(struct gk20a *g);
void gr_gm20b_detect_sm_arch(struct gk20a *g); void gr_gm20b_detect_sm_arch(struct gk20a *g);
u32 gr_gm20b_pagepool_default_size(struct gk20a *g); u32 gr_gm20b_pagepool_default_size(struct gk20a *g);
int gr_gm20b_alloc_gr_ctx(struct gk20a *g, int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
u32 class, u32 class,
u32 flags); u32 flags);
void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct channel_gk20a *c,
struct nvgpu_mem *mem); struct nvgpu_mem *mem);
int gr_gm20b_dump_gr_status_regs(struct gk20a *g, int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
struct gk20a_debug_output *o); struct gk20a_debug_output *o);

View File

@@ -226,7 +226,6 @@ static const struct gpu_ops gm20b_ops = {
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask, .set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask,
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
.free_channel_ctx = gk20a_free_channel_ctx,
.alloc_obj_ctx = gk20a_alloc_obj_ctx, .alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info, .get_zcull_info = gr_gk20a_get_zcull_info,

View File

@@ -135,7 +135,7 @@ void gr_gp106_cb_size_default(struct gk20a *g)
} }
int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g,
struct gr_ctx_desc *gr_ctx, struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, u32 class, struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode, u32 graphics_preempt_mode,
u32 compute_preempt_mode) u32 compute_preempt_mode)

View File

@@ -38,7 +38,7 @@ int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr,
u32 class_num, u32 offset, u32 data); u32 class_num, u32 offset, u32 data);
void gr_gp106_cb_size_default(struct gk20a *g); void gr_gp106_cb_size_default(struct gk20a *g);
int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g,
struct gr_ctx_desc *gr_ctx, struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, u32 class, struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode, u32 graphics_preempt_mode,
u32 compute_preempt_mode); u32 compute_preempt_mode);

View File

@@ -272,7 +272,6 @@ static const struct gpu_ops gp106_ops = {
.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
.free_channel_ctx = gk20a_free_channel_ctx,
.alloc_obj_ctx = gk20a_alloc_obj_ctx, .alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info, .get_zcull_info = gr_gk20a_get_zcull_info,

View File

@@ -389,9 +389,9 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
int gr_gp10b_commit_global_cb_manager(struct gk20a *g, int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
struct channel_gk20a *c, bool patch) struct channel_gk20a *c, bool patch)
{ {
struct tsg_gk20a *tsg;
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; struct nvgpu_gr_ctx *gr_ctx;
struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
u32 attrib_offset_in_chunk = 0; u32 attrib_offset_in_chunk = 0;
u32 alpha_offset_in_chunk = 0; u32 alpha_offset_in_chunk = 0;
u32 pd_ab_max_output; u32 pd_ab_max_output;
@@ -405,6 +405,12 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
gk20a_dbg_fn(""); gk20a_dbg_fn("");
tsg = tsg_gk20a_from_ch(c);
if (!tsg)
return -EINVAL;
gr_ctx = &tsg->gr_ctx;
if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
attrib_size_in_chunk = gr->attrib_cb_gfxp_size; attrib_size_in_chunk = gr->attrib_cb_gfxp_size;
cb_attrib_cache_size_init = gr->attrib_cb_gfxp_default_size; cb_attrib_cache_size_init = gr->attrib_cb_gfxp_default_size;
@@ -413,9 +419,9 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
cb_attrib_cache_size_init = gr->attrib_cb_default_size; cb_attrib_cache_size_init = gr->attrib_cb_default_size;
} }
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_beta_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_beta_r(),
gr->attrib_cb_default_size, patch); gr->attrib_cb_default_size, patch);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_alpha_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_alpha_r(),
gr->alpha_cb_default_size, patch); gr->alpha_cb_default_size, patch);
pd_ab_max_output = (gr->alpha_cb_default_size * pd_ab_max_output = (gr->alpha_cb_default_size *
@@ -423,11 +429,11 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
gr_pd_ab_dist_cfg1_max_output_granularity_v(); gr_pd_ab_dist_cfg1_max_output_granularity_v();
if (g->gr.pd_max_batches) { if (g->gr.pd_max_batches) {
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(),
gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
gr_pd_ab_dist_cfg1_max_batches_f(g->gr.pd_max_batches), patch); gr_pd_ab_dist_cfg1_max_batches_f(g->gr.pd_max_batches), patch);
} else { } else {
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(),
gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
} }
@@ -447,17 +453,17 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
cbm_cfg_size_steadystate = gr->attrib_cb_default_size * cbm_cfg_size_steadystate = gr->attrib_cb_default_size *
gr->pes_tpc_count[ppc_index][gpc_index]; gr->pes_tpc_count[ppc_index][gpc_index];
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gk20a_ctx_patch_write(g, gr_ctx,
gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
ppc_in_gpc_stride * ppc_index, ppc_in_gpc_stride * ppc_index,
cbm_cfg_size_beta, patch); cbm_cfg_size_beta, patch);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gk20a_ctx_patch_write(g, gr_ctx,
gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
ppc_in_gpc_stride * ppc_index, ppc_in_gpc_stride * ppc_index,
attrib_offset_in_chunk, patch); attrib_offset_in_chunk, patch);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gk20a_ctx_patch_write(g, gr_ctx,
gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp + gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp +
ppc_in_gpc_stride * ppc_index, ppc_in_gpc_stride * ppc_index,
cbm_cfg_size_steadystate, cbm_cfg_size_steadystate,
@@ -466,12 +472,12 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
attrib_offset_in_chunk += attrib_size_in_chunk * attrib_offset_in_chunk += attrib_size_in_chunk *
gr->pes_tpc_count[ppc_index][gpc_index]; gr->pes_tpc_count[ppc_index][gpc_index];
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gk20a_ctx_patch_write(g, gr_ctx,
gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
ppc_in_gpc_stride * ppc_index, ppc_in_gpc_stride * ppc_index,
cbm_cfg_size_alpha, patch); cbm_cfg_size_alpha, patch);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gk20a_ctx_patch_write(g, gr_ctx,
gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
ppc_in_gpc_stride * ppc_index, ppc_in_gpc_stride * ppc_index,
alpha_offset_in_chunk, patch); alpha_offset_in_chunk, patch);
@@ -479,7 +485,7 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
alpha_offset_in_chunk += gr->alpha_cb_size * alpha_offset_in_chunk += gr->alpha_cb_size *
gr->pes_tpc_count[ppc_index][gpc_index]; gr->pes_tpc_count[ppc_index][gpc_index];
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gk20a_ctx_patch_write(g, gr_ctx,
gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate), gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate),
patch); patch);
@@ -490,20 +496,20 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
} }
void gr_gp10b_commit_global_pagepool(struct gk20a *g, void gr_gp10b_commit_global_pagepool(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *gr_ctx,
u64 addr, u32 size, bool patch) u64 addr, u32 size, bool patch)
{ {
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(),
gr_scc_pagepool_base_addr_39_8_f(addr), patch); gr_scc_pagepool_base_addr_39_8_f(addr), patch);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(),
gr_scc_pagepool_total_pages_f(size) | gr_scc_pagepool_total_pages_f(size) |
gr_scc_pagepool_valid_true_f(), patch); gr_scc_pagepool_valid_true_f(), patch);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(),
gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(),
gr_gpcs_gcc_pagepool_total_pages_f(size), patch); gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
} }
@@ -947,7 +953,7 @@ fail_free:
} }
int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
struct gr_ctx_desc *gr_ctx, struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, u32 class, struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode, u32 graphics_preempt_mode,
u32 compute_preempt_mode) u32 compute_preempt_mode)
@@ -1071,7 +1077,7 @@ fail:
} }
int gr_gp10b_alloc_gr_ctx(struct gk20a *g, int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
u32 class, u32 class,
u32 flags) u32 flags)
{ {
@@ -1085,7 +1091,7 @@ int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
if (err) if (err)
return err; return err;
(*gr_ctx)->ctx_id_valid = false; gr_ctx->ctx_id_valid = false;
if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP)
graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
@@ -1094,7 +1100,7 @@ int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
if (graphics_preempt_mode || compute_preempt_mode) { if (graphics_preempt_mode || compute_preempt_mode) {
if (g->ops.gr.set_ctxsw_preemption_mode) { if (g->ops.gr.set_ctxsw_preemption_mode) {
err = g->ops.gr.set_ctxsw_preemption_mode(g, *gr_ctx, vm, err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm,
class, graphics_preempt_mode, compute_preempt_mode); class, graphics_preempt_mode, compute_preempt_mode);
if (err) { if (err) {
nvgpu_err(g, "set_ctxsw_preemption_mode failed"); nvgpu_err(g, "set_ctxsw_preemption_mode failed");
@@ -1109,14 +1115,13 @@ int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
return 0; return 0;
fail_free_gk20a_ctx: fail_free_gk20a_ctx:
gr_gk20a_free_gr_ctx(g, vm, *gr_ctx); gr_gk20a_free_gr_ctx(g, vm, gr_ctx);
*gr_ctx = NULL;
return err; return err;
} }
void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm, void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm,
struct gr_ctx_desc *gr_ctx) struct nvgpu_gr_ctx *gr_ctx)
{ {
struct nvgpu_mem *mem = &gr_ctx->mem; struct nvgpu_mem *mem = &gr_ctx->mem;
@@ -1168,13 +1173,13 @@ void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm,
} }
void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct channel_gk20a *c,
struct nvgpu_mem *mem) struct nvgpu_mem *mem)
{ {
struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; struct tsg_gk20a *tsg;
struct ctx_header_desc *ctx = &ch_ctx->ctx_header; struct nvgpu_gr_ctx *gr_ctx;
struct ctx_header_desc *ctx = &c->ctx_header;
struct nvgpu_mem *ctxheader = &ctx->mem; struct nvgpu_mem *ctxheader = &ctx->mem;
u32 gfxp_preempt_option = u32 gfxp_preempt_option =
ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
u32 cilp_preempt_option = u32 cilp_preempt_option =
@@ -1185,6 +1190,12 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
gk20a_dbg_fn(""); gk20a_dbg_fn("");
tsg = tsg_gk20a_from_ch(c);
if (!tsg)
return;
gr_ctx = &tsg->gr_ctx;
if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
nvgpu_mem_wr(g, mem, nvgpu_mem_wr(g, mem,
@@ -1220,7 +1231,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
gr_ctx->preempt_ctxsw_buffer.gpu_va); gr_ctx->preempt_ctxsw_buffer.gpu_va);
} }
err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true);
if (err) { if (err) {
nvgpu_err(g, "can't map patch context"); nvgpu_err(g, "can't map patch context");
goto out; goto out;
@@ -1232,7 +1243,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
(32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
gk20a_dbg_info("attrib cb addr : 0x%016x", addr); gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true); g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, true);
addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >> addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >>
gr_scc_pagepool_base_addr_39_8_align_bits_v()) | gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
@@ -1243,7 +1254,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
if (size == g->ops.gr.pagepool_default_size(g)) if (size == g->ops.gr.pagepool_default_size(g))
size = gr_scc_pagepool_total_pages_hwmax_v(); size = gr_scc_pagepool_total_pages_hwmax_v();
g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true); g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, true);
addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >> addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >>
gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) | gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
@@ -1252,28 +1263,28 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
size = gr_ctx->spill_ctxsw_buffer.size / size = gr_ctx->spill_ctxsw_buffer.size /
gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gk20a_ctx_patch_write(g, gr_ctx,
gr_gpc0_swdx_rm_spill_buffer_addr_r(), gr_gpc0_swdx_rm_spill_buffer_addr_r(),
gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
true); true);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gk20a_ctx_patch_write(g, gr_ctx,
gr_gpc0_swdx_rm_spill_buffer_size_r(), gr_gpc0_swdx_rm_spill_buffer_size_r(),
gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
true); true);
cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gk20a_ctx_patch_write(g, gr_ctx,
gr_gpcs_swdx_beta_cb_ctrl_r(), gr_gpcs_swdx_beta_cb_ctrl_r(),
gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
cbes_reserve), cbes_reserve),
true); true);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gk20a_ctx_patch_write(g, gr_ctx,
gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
cbes_reserve), cbes_reserve),
true); true);
gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); gr_gk20a_ctx_patch_write_end(g, gr_ctx, true);
} }
out: out:
@@ -1478,10 +1489,9 @@ int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms,
} }
void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *gr_ctx,
u64 addr, bool patch) u64 addr, bool patch)
{ {
struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
int attrBufferSize; int attrBufferSize;
if (gr_ctx->preempt_ctxsw_buffer.gpu_va) if (gr_ctx->preempt_ctxsw_buffer.gpu_va)
@@ -1491,37 +1501,37 @@ void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(); attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f();
gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch); gr_gm20b_commit_global_attrib_cb(g, gr_ctx, addr, patch);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch); gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) | gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) |
gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
} }
void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *gr_ctx,
u64 addr, u64 size, bool patch) u64 addr, u64 size, bool patch)
{ {
u32 data; u32 data;
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_base_r(),
gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_size_r(),
gr_scc_bundle_cb_size_div_256b_f(size) | gr_scc_bundle_cb_size_div_256b_f(size) |
gr_scc_bundle_cb_size_valid_true_f(), patch); gr_scc_bundle_cb_size_valid_true_f(), patch);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch); gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) | gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) |
gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
@@ -1535,7 +1545,7 @@ void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
gk20a_dbg_info("bundle cb token limit : %d, state limit : %d", gk20a_dbg_info("bundle cb token limit : %d, state limit : %d",
g->gr.bundle_cb_token_limit, data); g->gr.bundle_cb_token_limit, data);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg2_r(),
gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
gr_pd_ab_dist_cfg2_state_limit_f(data), patch); gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
} }
@@ -1706,14 +1716,17 @@ int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g,
struct channel_gk20a *fault_ch) struct channel_gk20a *fault_ch)
{ {
int ret; int ret;
struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
struct tsg_gk20a *tsg; struct tsg_gk20a *tsg;
struct nvgpu_gr_ctx *gr_ctx;
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
if (!gr_ctx) tsg = tsg_gk20a_from_ch(fault_ch);
if (!tsg)
return -EINVAL; return -EINVAL;
gr_ctx = &tsg->gr_ctx;
if (gr_ctx->cilp_preempt_pending) { if (gr_ctx->cilp_preempt_pending) {
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
"CILP is already pending for chid %d", "CILP is already pending for chid %d",
@@ -1783,13 +1796,17 @@ int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g,
static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g, static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g,
struct channel_gk20a *fault_ch) struct channel_gk20a *fault_ch)
{ {
struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx; struct tsg_gk20a *tsg;
struct nvgpu_gr_ctx *gr_ctx;
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
if (!gr_ctx) tsg = tsg_gk20a_from_ch(fault_ch);
if (!tsg)
return -EINVAL; return -EINVAL;
gr_ctx = &tsg->gr_ctx;
/* The ucode is self-clearing, so all we need to do here is /* The ucode is self-clearing, so all we need to do here is
to clear cilp_preempt_pending. */ to clear cilp_preempt_pending. */
if (!gr_ctx->cilp_preempt_pending) { if (!gr_ctx->cilp_preempt_pending) {
@@ -1820,13 +1837,19 @@ int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
struct tsg_gk20a *tsg;
*early_exit = false; *early_exit = false;
*ignore_debugger = false; *ignore_debugger = false;
if (fault_ch) if (fault_ch) {
cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == tsg = tsg_gk20a_from_ch(fault_ch);
if (!tsg)
return -EINVAL;
cilp_enabled = (tsg->gr_ctx.compute_preempt_mode ==
NVGPU_PREEMPTION_MODE_COMPUTE_CILP); NVGPU_PREEMPTION_MODE_COMPUTE_CILP);
}
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n", gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n",
gpc, tpc, global_esr); gpc, tpc, global_esr);
@@ -1911,8 +1934,9 @@ int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid) static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid)
{ {
struct gr_ctx_desc *gr_ctx; struct nvgpu_gr_ctx *gr_ctx;
struct channel_gk20a *ch; struct channel_gk20a *ch;
struct tsg_gk20a *tsg;
int chid; int chid;
int ret = -EINVAL; int ret = -EINVAL;
@@ -1922,7 +1946,11 @@ static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid)
if (!ch) if (!ch)
return ret; return ret;
gr_ctx = ch->ch_ctx.gr_ctx; tsg = tsg_gk20a_from_ch(ch);
if (!tsg)
return -EINVAL;
gr_ctx = &tsg->gr_ctx;
if (gr_ctx->cilp_preempt_pending) { if (gr_ctx->cilp_preempt_pending) {
*__chid = chid; *__chid = chid;
@@ -2022,11 +2050,17 @@ static bool gr_gp10b_suspend_context(struct channel_gk20a *ch,
bool *cilp_preempt_pending) bool *cilp_preempt_pending)
{ {
struct gk20a *g = ch->g; struct gk20a *g = ch->g;
struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; struct tsg_gk20a *tsg;
struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; struct nvgpu_gr_ctx *gr_ctx;
bool ctx_resident = false; bool ctx_resident = false;
int err = 0; int err = 0;
tsg = tsg_gk20a_from_ch(ch);
if (!tsg)
return -EINVAL;
gr_ctx = &tsg->gr_ctx;
*cilp_preempt_pending = false; *cilp_preempt_pending = false;
if (gk20a_is_channel_ctx_resident(ch)) { if (gk20a_is_channel_ctx_resident(ch)) {
@@ -2097,15 +2131,22 @@ int gr_gp10b_suspend_contexts(struct gk20a *g,
nvgpu_mutex_release(&g->dbg_sessions_lock); nvgpu_mutex_release(&g->dbg_sessions_lock);
if (cilp_preempt_pending_ch) { if (cilp_preempt_pending_ch) {
struct channel_ctx_gk20a *ch_ctx = struct tsg_gk20a *tsg;
&cilp_preempt_pending_ch->ch_ctx; struct nvgpu_gr_ctx *gr_ctx;
struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
struct nvgpu_timeout timeout; struct nvgpu_timeout timeout;
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
"CILP preempt pending, waiting %lu msecs for preemption", "CILP preempt pending, waiting %lu msecs for preemption",
gk20a_get_gr_idle_timeout(g)); gk20a_get_gr_idle_timeout(g));
tsg = tsg_gk20a_from_ch(cilp_preempt_pending_ch);
if (!tsg) {
err = -EINVAL;
goto clean_up;
}
gr_ctx = &tsg->gr_ctx;
nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
NVGPU_TIMER_CPU_TIMER); NVGPU_TIMER_CPU_TIMER);
do { do {
@@ -2130,12 +2171,19 @@ clean_up:
int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch, int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,
bool boost) bool boost)
{ {
struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; struct tsg_gk20a *tsg;
struct nvgpu_gr_ctx *gr_ctx;
struct gk20a *g = ch->g; struct gk20a *g = ch->g;
struct nvgpu_mem *mem = &gr_ctx->mem; struct nvgpu_mem *mem;
int err = 0; int err = 0;
tsg = tsg_gk20a_from_ch(ch);
if (!tsg)
return -EINVAL;
gr_ctx = &tsg->gr_ctx;
gr_ctx->boosted_ctx = boost; gr_ctx->boosted_ctx = boost;
mem = &gr_ctx->mem;
if (nvgpu_mem_begin(g, mem)) if (nvgpu_mem_begin(g, mem))
return -ENOMEM; return -ENOMEM;
@@ -2162,7 +2210,7 @@ unmap_ctx:
} }
void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
struct gr_ctx_desc *gr_ctx) { struct nvgpu_gr_ctx *gr_ctx) {
u32 v; u32 v;
v = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f( v = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f(
@@ -2174,13 +2222,12 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
u32 graphics_preempt_mode, u32 graphics_preempt_mode,
u32 compute_preempt_mode) u32 compute_preempt_mode)
{ {
struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; struct nvgpu_gr_ctx *gr_ctx;
struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
struct gk20a *g = ch->g; struct gk20a *g = ch->g;
struct tsg_gk20a *tsg; struct tsg_gk20a *tsg;
struct vm_gk20a *vm; struct vm_gk20a *vm;
struct nvgpu_mem *mem = &gr_ctx->mem; struct nvgpu_mem *mem;
struct ctx_header_desc *ctx = &ch->ch_ctx.ctx_header; struct ctx_header_desc *ctx = &ch->ctx_header;
struct nvgpu_mem *ctxheader = &ctx->mem; struct nvgpu_mem *ctxheader = &ctx->mem;
u32 class; u32 class;
int err = 0; int err = 0;
@@ -2189,12 +2236,13 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
if (!class) if (!class)
return -EINVAL; return -EINVAL;
if (gk20a_is_channel_marked_as_tsg(ch)) { tsg = tsg_gk20a_from_ch(ch);
tsg = &g->fifo.tsg[ch->tsgid]; if (!tsg)
vm = tsg->vm; return -EINVAL;
} else {
vm = ch->vm; vm = tsg->vm;
} gr_ctx = &tsg->gr_ctx;
mem = &gr_ctx->mem;
/* skip setting anything if both modes are already set */ /* skip setting anything if both modes are already set */
if (graphics_preempt_mode && if (graphics_preempt_mode &&
@@ -2241,15 +2289,15 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
if (g->ops.gr.update_ctxsw_preemption_mode) { if (g->ops.gr.update_ctxsw_preemption_mode) {
g->ops.gr.update_ctxsw_preemption_mode(ch->g, g->ops.gr.update_ctxsw_preemption_mode(ch->g,
ch_ctx, mem); ch, mem);
err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true);
if (err) { if (err) {
nvgpu_err(g, "can't map patch context"); nvgpu_err(g, "can't map patch context");
goto enable_ch; goto enable_ch;
} }
g->ops.gr.commit_global_cb_manager(g, ch, true); g->ops.gr.commit_global_cb_manager(g, ch, true);
gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); gr_gk20a_ctx_patch_write_end(g, gr_ctx, true);
} }
enable_ch: enable_ch:

View File

@@ -29,9 +29,8 @@
struct gk20a; struct gk20a;
struct gr_gk20a_isr_data; struct gr_gk20a_isr_data;
struct channel_ctx_gk20a; struct nvgpu_gr_ctx;
struct zbc_entry; struct zbc_entry;
struct gr_ctx_desc;
struct nvgpu_preemption_modes_rec; struct nvgpu_preemption_modes_rec;
struct gk20a_debug_output; struct gk20a_debug_output;
@@ -75,7 +74,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
int gr_gp10b_commit_global_cb_manager(struct gk20a *g, int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
struct channel_gk20a *c, bool patch); struct channel_gk20a *c, bool patch);
void gr_gp10b_commit_global_pagepool(struct gk20a *g, void gr_gp10b_commit_global_pagepool(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *ch_ctx,
u64 addr, u32 size, bool patch); u64 addr, u32 size, bool patch);
u32 gr_gp10b_get_gpcs_swdx_dss_zbc_c_format_reg(struct gk20a *g); u32 gr_gp10b_get_gpcs_swdx_dss_zbc_c_format_reg(struct gk20a *g);
u32 gr_gp10b_get_gpcs_swdx_dss_zbc_z_format_reg(struct gk20a *g); u32 gr_gp10b_get_gpcs_swdx_dss_zbc_z_format_reg(struct gk20a *g);
@@ -93,28 +92,28 @@ void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data);
void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data); void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data);
int gr_gp10b_init_ctx_state(struct gk20a *g); int gr_gp10b_init_ctx_state(struct gk20a *g);
int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
struct gr_ctx_desc *gr_ctx, struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, u32 class, struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode, u32 graphics_preempt_mode,
u32 compute_preempt_mode); u32 compute_preempt_mode);
int gr_gp10b_alloc_gr_ctx(struct gk20a *g, int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
u32 class, u32 class,
u32 flags); u32 flags);
void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct channel_gk20a *c,
struct nvgpu_mem *mem); struct nvgpu_mem *mem);
int gr_gp10b_dump_gr_status_regs(struct gk20a *g, int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
struct gk20a_debug_output *o); struct gk20a_debug_output *o);
void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm, void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm,
struct gr_ctx_desc *gr_ctx); struct nvgpu_gr_ctx *gr_ctx);
int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms, int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms,
u32 expect_delay); u32 expect_delay);
void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *ch_ctx,
u64 addr, bool patch); u64 addr, bool patch);
void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *ch_ctx,
u64 addr, u64 size, bool patch); u64 addr, u64 size, bool patch);
int gr_gp10b_load_smid_config(struct gk20a *g); int gr_gp10b_load_smid_config(struct gk20a *g);
void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
@@ -133,7 +132,7 @@ int gr_gp10b_suspend_contexts(struct gk20a *g,
int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch, int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,
bool boost); bool boost);
void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
struct gr_ctx_desc *gr_ctx); struct nvgpu_gr_ctx *gr_ctx);
int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
u32 graphics_preempt_mode, u32 graphics_preempt_mode,
u32 compute_preempt_mode); u32 compute_preempt_mode);

View File

@@ -236,7 +236,6 @@ static const struct gpu_ops gp10b_ops = {
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
.free_channel_ctx = gk20a_free_channel_ctx,
.alloc_obj_ctx = gk20a_alloc_obj_ctx, .alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info, .get_zcull_info = gr_gk20a_get_zcull_info,

View File

@@ -305,7 +305,6 @@ static const struct gpu_ops gv100_ops = {
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask, .set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
.free_channel_ctx = gk20a_free_channel_ctx,
.alloc_obj_ctx = gk20a_alloc_obj_ctx, .alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info, .get_zcull_info = gr_gk20a_get_zcull_info,

View File

@@ -1373,7 +1373,7 @@ fail_free:
} }
int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g, int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
struct gr_ctx_desc *gr_ctx, struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, u32 class, struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode, u32 graphics_preempt_mode,
u32 compute_preempt_mode) u32 compute_preempt_mode)
@@ -1497,13 +1497,13 @@ fail:
} }
void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct channel_gk20a *c,
struct nvgpu_mem *mem) struct nvgpu_mem *mem)
{ {
struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; struct tsg_gk20a *tsg;
struct ctx_header_desc *ctx = &ch_ctx->ctx_header; struct nvgpu_gr_ctx *gr_ctx;
struct ctx_header_desc *ctx = &c->ctx_header;
struct nvgpu_mem *ctxheader = &ctx->mem; struct nvgpu_mem *ctxheader = &ctx->mem;
u32 gfxp_preempt_option = u32 gfxp_preempt_option =
ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
u32 cilp_preempt_option = u32 cilp_preempt_option =
@@ -1514,6 +1514,12 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
gk20a_dbg_fn(""); gk20a_dbg_fn("");
tsg = tsg_gk20a_from_ch(c);
if (!tsg)
return;
gr_ctx = &tsg->gr_ctx;
if (gr_ctx->graphics_preempt_mode == if (gr_ctx->graphics_preempt_mode ==
NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
@@ -1552,7 +1558,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
gr_ctx->preempt_ctxsw_buffer.gpu_va); gr_ctx->preempt_ctxsw_buffer.gpu_va);
} }
err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true);
if (err) { if (err) {
nvgpu_err(g, "can't map patch context"); nvgpu_err(g, "can't map patch context");
goto out; goto out;
@@ -1564,7 +1570,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
(32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
gk20a_dbg_info("attrib cb addr : 0x%016x", addr); gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true); g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, true);
addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >> addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >>
gr_scc_pagepool_base_addr_39_8_align_bits_v()) | gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
@@ -1575,7 +1581,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
if (size == g->ops.gr.pagepool_default_size(g)) if (size == g->ops.gr.pagepool_default_size(g))
size = gr_scc_pagepool_total_pages_hwmax_v(); size = gr_scc_pagepool_total_pages_hwmax_v();
g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true); g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, true);
addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >> addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >>
gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) | gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
@@ -1584,28 +1590,28 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
size = gr_ctx->spill_ctxsw_buffer.size / size = gr_ctx->spill_ctxsw_buffer.size /
gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gk20a_ctx_patch_write(g, gr_ctx,
gr_gpc0_swdx_rm_spill_buffer_addr_r(), gr_gpc0_swdx_rm_spill_buffer_addr_r(),
gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
true); true);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gk20a_ctx_patch_write(g, gr_ctx,
gr_gpc0_swdx_rm_spill_buffer_size_r(), gr_gpc0_swdx_rm_spill_buffer_size_r(),
gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
true); true);
cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gk20a_ctx_patch_write(g, gr_ctx,
gr_gpcs_swdx_beta_cb_ctrl_r(), gr_gpcs_swdx_beta_cb_ctrl_r(),
gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
cbes_reserve), cbes_reserve),
true); true);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gk20a_ctx_patch_write(g, gr_ctx,
gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
cbes_reserve), cbes_reserve),
true); true);
gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); gr_gk20a_ctx_patch_write_end(g, gr_ctx, true);
} }
out: out:
@@ -1902,10 +1908,9 @@ int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms,
} }
void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *gr_ctx,
u64 addr, bool patch) u64 addr, bool patch)
{ {
struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
int attrBufferSize; int attrBufferSize;
if (gr_ctx->preempt_ctxsw_buffer.gpu_va) if (gr_ctx->preempt_ctxsw_buffer.gpu_va)
@@ -1915,16 +1920,16 @@ void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(); attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f();
gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch); gr_gm20b_commit_global_attrib_cb(g, gr_ctx, addr, patch);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch); gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch);
gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) | gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) |
gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
} }
@@ -2042,6 +2047,7 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
u32 offset = gk20a_gr_gpc_offset(g, gpc) + u32 offset = gk20a_gr_gpc_offset(g, gpc) +
gk20a_gr_tpc_offset(g, tpc) + gk20a_gr_tpc_offset(g, tpc) +
gv11b_gr_sm_offset(g, sm); gv11b_gr_sm_offset(g, sm);
struct tsg_gk20a *tsg;
*early_exit = false; *early_exit = false;
*ignore_debugger = false; *ignore_debugger = false;
@@ -2054,9 +2060,14 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm, return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm,
warp_esr, fault_ch); warp_esr, fault_ch);
if (fault_ch) if (fault_ch) {
cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == tsg = tsg_gk20a_from_ch(fault_ch);
if (!tsg)
return -EINVAL;
cilp_enabled = (tsg->gr_ctx.compute_preempt_mode ==
NVGPU_PREEMPTION_MODE_COMPUTE_CILP); NVGPU_PREEMPTION_MODE_COMPUTE_CILP);
}
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
"SM Exception received on gpc %d tpc %d sm %d = 0x%08x", "SM Exception received on gpc %d tpc %d sm %d = 0x%08x",
@@ -2509,7 +2520,7 @@ int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
if (err) if (err)
return err; return err;
ctx = &c->ch_ctx.ctx_header; ctx = &c->ctx_header;
addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v(); addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v();
addr_hi = u64_hi32(ctx->mem.gpu_va); addr_hi = u64_hi32(ctx->mem.gpu_va);
@@ -2529,7 +2540,7 @@ int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
{ {
struct channel_ctx_gk20a *ch_ctx = NULL; struct nvgpu_gr_ctx *ch_ctx = NULL;
u32 pd_ab_dist_cfg0; u32 pd_ab_dist_cfg0;
u32 ds_debug; u32 ds_debug;
u32 mpc_vtg_debug; u32 mpc_vtg_debug;
@@ -2836,11 +2847,18 @@ int gv11b_gr_update_sm_error_state(struct gk20a *g,
struct channel_gk20a *ch, u32 sm_id, struct channel_gk20a *ch, u32 sm_id,
struct nvgpu_gr_sm_error_state *sm_error_state) struct nvgpu_gr_sm_error_state *sm_error_state)
{ {
struct tsg_gk20a *tsg;
u32 gpc, tpc, sm, offset; u32 gpc, tpc, sm, offset;
struct gr_gk20a *gr = &g->gr; struct gr_gk20a *gr = &g->gr;
struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; struct nvgpu_gr_ctx *ch_ctx;
int err = 0; int err = 0;
tsg = tsg_gk20a_from_ch(ch);
if (!tsg)
return -EINVAL;
ch_ctx = &tsg->gr_ctx;
nvgpu_mutex_acquire(&g->dbg_sessions_lock); nvgpu_mutex_acquire(&g->dbg_sessions_lock);
gr->sm_error_states[sm_id].hww_global_esr = gr->sm_error_states[sm_id].hww_global_esr =

View File

@@ -41,9 +41,10 @@ struct zbc_s_table {
}; };
struct gk20a; struct gk20a;
struct gr_gk20a;
struct zbc_entry; struct zbc_entry;
struct zbc_query_params; struct zbc_query_params;
struct channel_ctx_gk20a; struct nvgpu_gr_ctx;
struct nvgpu_warpstate; struct nvgpu_warpstate;
struct nvgpu_gr_sm_error_state; struct nvgpu_gr_sm_error_state;
struct gr_ctx_desc; struct gr_ctx_desc;
@@ -128,7 +129,7 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms, int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms,
u32 expect_delay); u32 expect_delay);
void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct nvgpu_gr_ctx *ch_ctx,
u64 addr, bool patch); u64 addr, bool patch);
void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
void gr_gv11b_get_access_map(struct gk20a *g, void gr_gv11b_get_access_map(struct gk20a *g,
@@ -222,13 +223,13 @@ unsigned long gr_gv11b_get_max_gfxp_wfi_timeout_count(struct gk20a *g);
void gr_gv11b_ecc_init_scrub_reg(struct gk20a *g); void gr_gv11b_ecc_init_scrub_reg(struct gk20a *g);
int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g, int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
struct gr_ctx_desc *gr_ctx, struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, u32 class, struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode, u32 graphics_preempt_mode,
u32 compute_preempt_mode); u32 compute_preempt_mode);
void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
struct channel_ctx_gk20a *ch_ctx, struct channel_gk20a *ch_ctx,
struct nvgpu_mem *mem); struct nvgpu_mem *mem);
#endif #endif

View File

@@ -272,7 +272,6 @@ static const struct gpu_ops gv11b_ops = {
.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask, .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
.free_channel_ctx = gk20a_free_channel_ctx,
.alloc_obj_ctx = gk20a_alloc_obj_ctx, .alloc_obj_ctx = gk20a_alloc_obj_ctx,
.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
.get_zcull_info = gr_gk20a_get_zcull_info, .get_zcull_info = gr_gk20a_get_zcull_info,

View File

@@ -43,7 +43,7 @@ static void gv11b_subctx_commit_pdb(struct channel_gk20a *c,
void gv11b_free_subctx_header(struct channel_gk20a *c) void gv11b_free_subctx_header(struct channel_gk20a *c)
{ {
struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; struct ctx_header_desc *ctx = &c->ctx_header;
struct gk20a *g = c->g; struct gk20a *g = c->g;
nvgpu_log(g, gpu_dbg_fn, "gv11b_free_subctx_header"); nvgpu_log(g, gpu_dbg_fn, "gv11b_free_subctx_header");
@@ -57,13 +57,13 @@ void gv11b_free_subctx_header(struct channel_gk20a *c)
int gv11b_alloc_subctx_header(struct channel_gk20a *c) int gv11b_alloc_subctx_header(struct channel_gk20a *c)
{ {
struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; struct ctx_header_desc *ctx = &c->ctx_header;
struct gk20a *g = c->g; struct gk20a *g = c->g;
int ret = 0; int ret = 0;
nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header"); nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header");
if (ctx->mem.gpu_va == 0) { if (!nvgpu_mem_is_valid(&ctx->mem)) {
ret = nvgpu_dma_alloc_flags_sys(g, ret = nvgpu_dma_alloc_flags_sys(g,
0, /* No Special flags */ 0, /* No Special flags */
ctxsw_prog_fecs_header_v(), ctxsw_prog_fecs_header_v(),
@@ -111,20 +111,50 @@ static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va) int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
{ {
struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; struct ctx_header_desc *ctx = &c->ctx_header;
struct nvgpu_mem *gr_mem; struct nvgpu_mem *gr_mem;
struct gk20a *g = c->g; struct gk20a *g = c->g;
int ret = 0; int ret = 0;
u32 addr_lo, addr_hi; u32 addr_lo, addr_hi;
struct tsg_gk20a *tsg;
struct nvgpu_gr_ctx *gr_ctx;
addr_lo = u64_lo32(gpu_va); tsg = tsg_gk20a_from_ch(c);
addr_hi = u64_hi32(gpu_va); if (!tsg)
return -EINVAL;
gr_ctx = &tsg->gr_ctx;
gr_mem = &ctx->mem; gr_mem = &ctx->mem;
g->ops.mm.l2_flush(g, true); g->ops.mm.l2_flush(g, true);
if (nvgpu_mem_begin(g, gr_mem)) if (nvgpu_mem_begin(g, gr_mem))
return -ENOMEM; return -ENOMEM;
/* set priv access map */
addr_lo = u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
addr_hi = u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
nvgpu_mem_wr(g, gr_mem,
ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
addr_lo);
nvgpu_mem_wr(g, gr_mem,
ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
addr_hi);
addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
nvgpu_mem_wr(g, gr_mem,
ctxsw_prog_main_image_patch_adr_lo_o(),
addr_lo);
nvgpu_mem_wr(g, gr_mem,
ctxsw_prog_main_image_patch_adr_hi_o(),
addr_hi);
g->ops.gr.write_pm_ptr(g, gr_mem, gr_ctx->pm_ctx.mem.gpu_va);
g->ops.gr.write_zcull_ptr(g, gr_mem, gr_ctx->zcull_ctx.gpu_va);
addr_lo = u64_lo32(gpu_va);
addr_hi = u64_hi32(gpu_va);
nvgpu_mem_wr(g, gr_mem, nvgpu_mem_wr(g, gr_mem,
ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi); ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi);
nvgpu_mem_wr(g, gr_mem, nvgpu_mem_wr(g, gr_mem,