Files
linux-nvgpu/drivers/gpu/nvgpu/common/gr/ctx.c
Sagar Kamble f55fd5dc8c gpu: nvgpu: multiple address spaces support for subcontexts
This patch introduces following relationships among various nvgpu
objects to support multiple address spaces with subcontexts.
IOCTLs setting the relationships are shown in the braces.

nvgpu_tsg             1<---->n nvgpu_tsg_subctx (TSG_BIND_CHANNEL_EX)
nvgpu_tsg             1<---->n nvgpu_gr_ctx_mappings (ALLOC_OBJ_CTX)

nvgpu_tsg_subctx      1<---->1 nvgpu_gr_subctx (ALLOC_OBJ_CTX)
nvgpu_tsg_subctx      1<---->n nvgpu_channel (TSG_BIND_CHANNEL_EX)

nvgpu_gr_ctx_mappings 1<---->n nvgpu_gr_subctx (ALLOC_OBJ_CTX)
nvgpu_gr_ctx_mappings 1<---->1 vm_gk20a (ALLOC_OBJ_CTX)

On unbinding the channel, objects are deleted according
to dependencies.

Without subcontexts, gr_ctx buffers mappings are maintained in the
struct nvgpu_gr_ctx. For subcontexts, they are maintained in the
struct nvgpu_gr_subctx.

Preemption buffer with index NVGPU_GR_CTX_PREEMPT_CTXSW and PM
buffer with index NVGPU_GR_CTX_PM_CTX are to be mapped in all
subcontexts when they are programmed from respective ioctls.

Global GR context buffers are to be programmed only for VEID0.
Based on the channel object class the state is patched in
the patch buffer in every ALLOC_OBJ_CTX call unlike
setting it for only first channel like before.

PM and preemptions buffers programming is protected under TSG
ctx_init_lock.

tsg->vm is now removed. VM reference for gr_ctx buffers mappings
is managed through gr_ctx or gr_subctx mappings object.

For vGPU, gr_subctx and mappings objects are created to reference
VMs for the gr_ctx lifetime.

The functions nvgpu_tsg_subctx_alloc_gr_subctx and nvgpu_tsg_-
subctx_setup_subctx_header sets up the subcontext struct header
for native driver.

The function nvgpu_tsg_subctx_alloc_gr_subctx is called from
vgpu to manage the gr ctx mapping references.

free_subctx is now done when unbinding channel considering
references to the subcontext by other channels. It will unmap
the buffers in native driver case. It will just release the
VM reference in vgpu case.

Note that TEGRA_VGPU_CMD_FREE_CTX_HEADER ioctl is not called
by vgpu any longer as it would be taken care by native driver.

Bug 3677982

Change-Id: Ia439b251ff452a49f8514498832e24d04db86d2f
Signed-off-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2718760
Reviewed-by: Scott Long <scottl@nvidia.com>
Reviewed-by: Ankur Kishore <ankkishore@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2022-09-08 20:59:59 -07:00

1016 lines
25 KiB
C

/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/static_analysis.h>
#include <nvgpu/gr/global_ctx.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/ctx_mappings.h>
#include <nvgpu/vm.h>
#include <nvgpu/io.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/dma.h>
#include <nvgpu/string.h>
#include <nvgpu/tsg_subctx.h>
#include <nvgpu/power_features/pg.h>
#include "common/gr/ctx_priv.h"
struct nvgpu_gr_ctx_desc *
nvgpu_gr_ctx_desc_alloc(struct gk20a *g)
{
struct nvgpu_gr_ctx_desc *desc = nvgpu_kzalloc(g, sizeof(*desc));
return desc;
}
void nvgpu_gr_ctx_desc_free(struct gk20a *g,
struct nvgpu_gr_ctx_desc *desc)
{
nvgpu_kfree(g, desc);
}
void nvgpu_gr_ctx_set_size(struct nvgpu_gr_ctx_desc *gr_ctx_desc,
u32 index, u32 size)
{
nvgpu_assert(index < NVGPU_GR_CTX_COUNT);
gr_ctx_desc->size[index] = size;
}
u32 nvgpu_gr_ctx_get_size(struct nvgpu_gr_ctx_desc *gr_ctx_desc,
u32 index)
{
nvgpu_assert(index < NVGPU_GR_CTX_COUNT);
return gr_ctx_desc->size[index];
}
struct nvgpu_gr_ctx *nvgpu_alloc_gr_ctx_struct(struct gk20a *g)
{
return nvgpu_kzalloc(g, sizeof(struct nvgpu_gr_ctx));
}
void nvgpu_free_gr_ctx_struct(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
{
nvgpu_kfree(g, gr_ctx);
}
void nvgpu_gr_ctx_free_ctx_buffers(struct gk20a *g,
struct nvgpu_gr_ctx *ctx)
{
u32 i;
nvgpu_log(g, gpu_dbg_gr, " ");
for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) {
if (nvgpu_mem_is_valid(&ctx->mem[i])) {
nvgpu_dma_free(g, &ctx->mem[i]);
}
}
nvgpu_log(g, gpu_dbg_gr, "done");
}
int nvgpu_gr_ctx_alloc_ctx_buffers(struct gk20a *g,
struct nvgpu_gr_ctx_desc *desc,
struct nvgpu_gr_ctx *ctx)
{
int err = 0;
u32 i;
nvgpu_log(g, gpu_dbg_gr, " ");
if (desc->size[NVGPU_GR_CTX_CTX] == 0U) {
nvgpu_err(g, "context buffer size not set");
return -EINVAL;
}
for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) {
#ifdef CONFIG_NVGPU_GFXP
/**
* Skip allocating the gfxp preemption buffers if GFXP mode is
* not set in the gr ctx.
*/
if ((i >= NVGPU_GR_CTX_PREEMPT_CTXSW) &&
(i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW) &&
(nvgpu_gr_ctx_get_graphics_preemption_mode(ctx) !=
NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP)) {
continue;
}
#endif
if (desc->size[i] != 0U && !nvgpu_mem_is_valid(&ctx->mem[i])) {
err = nvgpu_dma_alloc_sys(g, desc->size[i],
&ctx->mem[i]);
if (err != 0) {
nvgpu_err(g, "ctx buffer %u alloc failed", i);
nvgpu_gr_ctx_free_ctx_buffers(g, ctx);
return err;
}
nvgpu_log(g, gpu_dbg_gr, "ctx buffer %u allocated", i);
}
}
if (!nvgpu_gr_ctx_get_ctx_initialized(ctx)) {
ctx->ctx_id_valid = false;
}
nvgpu_log(g, gpu_dbg_gr, "done");
return err;
}
void nvgpu_gr_ctx_init_ctx_buffers_mapping_flags(struct gk20a *g,
struct nvgpu_gr_ctx *ctx)
{
u32 i;
nvgpu_log(g, gpu_dbg_gr, " ");
/**
* Map all ctx buffers as cacheable except GR CTX and
* PATCH CTX buffers.
*/
for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) {
ctx->mapping_flags[i] = NVGPU_VM_MAP_CACHEABLE;
}
ctx->mapping_flags[NVGPU_GR_CTX_CTX] = 0U;
ctx->mapping_flags[NVGPU_GR_CTX_PATCH_CTX] = 0U;
nvgpu_log(g, gpu_dbg_gr, "done");
}
#ifdef CONFIG_NVGPU_GFXP
static void nvgpu_gr_ctx_free_ctx_preemption_buffers(struct gk20a *g,
struct nvgpu_gr_ctx *ctx)
{
u32 i;
nvgpu_log_fn(g, " ");
for (i = NVGPU_GR_CTX_PREEMPT_CTXSW;
i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; i++) {
if (nvgpu_mem_is_valid(&ctx->mem[i])) {
nvgpu_dma_free(g, &ctx->mem[i]);
}
}
nvgpu_log_fn(g, "done");
}
int nvgpu_gr_ctx_alloc_ctx_preemption_buffers(struct gk20a *g,
struct nvgpu_gr_ctx_desc *desc,
struct nvgpu_gr_ctx *ctx)
{
int err = 0;
u32 i;
nvgpu_log(g, gpu_dbg_gr, " ");
/**
* Skip allocating the gfxp preemption buffers if GFXP mode is
* not set in the gr ctx.
*/
if (nvgpu_gr_ctx_get_graphics_preemption_mode(ctx) !=
NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
nvgpu_log(g, gpu_dbg_gr, "GFXP mode not set. Skip preemption "
"buffers allocation");
return 0;
}
for (i = NVGPU_GR_CTX_PREEMPT_CTXSW;
i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; i++) {
if (desc->size[i] != 0U && !nvgpu_mem_is_valid(&ctx->mem[i])) {
err = nvgpu_dma_alloc_sys(g, desc->size[i],
&ctx->mem[i]);
if (err != 0) {
nvgpu_err(g, "ctx preemption buffer %u alloc failed", i);
nvgpu_gr_ctx_free_ctx_preemption_buffers(g, ctx);
return err;
}
nvgpu_log(g, gpu_dbg_gr, "ctx preemption buffer %u allocated", i);
}
}
nvgpu_log(g, gpu_dbg_gr, "done");
return err;
}
#endif
void nvgpu_gr_ctx_free(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer)
{
struct nvgpu_tsg *tsg;
nvgpu_log(g, gpu_dbg_gr, " ");
if (gr_ctx != NULL) {
tsg = nvgpu_tsg_get_from_id(g, gr_ctx->tsgid);
nvgpu_mutex_acquire(&tsg->ctx_init_lock);
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
nvgpu_assert(nvgpu_list_empty(&tsg->ch_list));
nvgpu_assert(nvgpu_list_empty(&tsg->subctx_list));
nvgpu_assert(nvgpu_list_empty(&tsg->gr_ctx_mappings_list));
} else {
if (gr_ctx->mappings != NULL) {
nvgpu_gr_ctx_unmap_buffers(g,
gr_ctx, NULL, global_ctx_buffer,
gr_ctx->mappings);
nvgpu_gr_ctx_free_mappings(g, gr_ctx);
}
}
nvgpu_gr_ctx_set_patch_ctx_data_count(gr_ctx, 0);
nvgpu_gr_ctx_free_ctx_buffers(g, gr_ctx);
(void) memset(gr_ctx, 0, sizeof(*gr_ctx));
nvgpu_mutex_release(&tsg->ctx_init_lock);
}
nvgpu_log(g, gpu_dbg_gr, "done");
}
struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_alloc_or_get_mappings(struct gk20a *g,
struct nvgpu_tsg *tsg, struct nvgpu_channel *ch)
{
struct nvgpu_gr_ctx_mappings *mappings = NULL;
struct nvgpu_gr_ctx *gr_ctx = tsg->gr_ctx;
struct vm_gk20a *vm = ch->vm;
nvgpu_log(g, gpu_dbg_gr, " ");
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
return nvgpu_tsg_subctx_alloc_or_get_mappings(g, tsg, ch);
}
mappings = gr_ctx->mappings;
if (mappings != NULL) {
return mappings;
}
mappings = nvgpu_gr_ctx_mappings_create(g, tsg, vm);
if (mappings == NULL) {
nvgpu_err(g, "failed to allocate gr_ctx mappings");
return mappings;
}
gr_ctx->mappings = mappings;
nvgpu_log(g, gpu_dbg_gr, "done");
return mappings;
}
void nvgpu_gr_ctx_free_mappings(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx)
{
nvgpu_log(g, gpu_dbg_gr, " ");
if (gr_ctx->mappings == NULL) {
return;
}
nvgpu_gr_ctx_mappings_free(g, gr_ctx->mappings);
gr_ctx->mappings = NULL;
nvgpu_log(g, gpu_dbg_gr, "done");
}
struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_get_mappings(struct nvgpu_tsg *tsg,
struct nvgpu_channel *ch)
{
struct gk20a *g = tsg->g;
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
return nvgpu_gr_ctx_mappings_get_subctx_mappings(g, tsg, ch->vm);
}
return tsg->gr_ctx->mappings;
}
void nvgpu_gr_ctx_set_patch_ctx_data_count(struct nvgpu_gr_ctx *gr_ctx,
u32 data_count)
{
gr_ctx->patch_ctx.data_count = data_count;
}
struct nvgpu_mem *nvgpu_gr_ctx_get_ctx_mem(struct nvgpu_gr_ctx *gr_ctx,
u32 index)
{
nvgpu_assert(index < NVGPU_GR_CTX_COUNT);
return &gr_ctx->mem[index];
}
u32 nvgpu_gr_ctx_get_ctx_mapping_flags(struct nvgpu_gr_ctx *gr_ctx, u32 index)
{
nvgpu_assert(index < NVGPU_GR_CTX_COUNT);
return gr_ctx->mapping_flags[index];
}
#ifdef CONFIG_NVGPU_SM_DIVERSITY
void nvgpu_gr_ctx_set_sm_diversity_config(struct nvgpu_gr_ctx *gr_ctx,
u32 sm_diversity_config)
{
gr_ctx->sm_diversity_config = sm_diversity_config;
}
u32 nvgpu_gr_ctx_get_sm_diversity_config(struct nvgpu_gr_ctx *gr_ctx)
{
return gr_ctx->sm_diversity_config;
}
#endif
/* load saved fresh copy of gloden image into channel gr_ctx */
void nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_gr_ctx_mappings *mappings,
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image,
bool cde)
{
struct nvgpu_mem *mem;
#ifdef CONFIG_NVGPU_DEBUGGER
u64 virt_addr = 0;
#endif
(void)cde;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
nvgpu_gr_global_ctx_load_local_golden_image(g,
local_golden_image, mem);
#ifdef CONFIG_NVGPU_HAL_NON_FUSA
g->ops.gr.ctxsw_prog.init_ctxsw_hdr_data(g, mem);
#endif
#ifdef CONFIG_NVGPU_DEBUGGER
if ((g->ops.gr.ctxsw_prog.set_cde_enabled != NULL) && cde) {
g->ops.gr.ctxsw_prog.set_cde_enabled(g, mem);
}
#endif
#ifdef CONFIG_NVGPU_SET_FALCON_ACCESS_MAP
/* set priv access map */
g->ops.gr.ctxsw_prog.set_priv_access_map_config_mode(g, mem,
g->allow_all);
g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, mem,
nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings,
NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP_VA));
#endif
#ifdef CONFIG_NVGPU_HAL_NON_FUSA
/* disable verif features */
g->ops.gr.ctxsw_prog.disable_verif_features(g, mem);
#endif
#ifdef CONFIG_NVGPU_DEBUGGER
if (g->ops.gr.ctxsw_prog.set_pmu_options_boost_clock_frequencies !=
NULL) {
g->ops.gr.ctxsw_prog.set_pmu_options_boost_clock_frequencies(g,
mem, nvgpu_safe_cast_bool_to_u32(gr_ctx->boosted_ctx));
}
#endif
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "write patch count = %d",
gr_ctx->patch_ctx.data_count);
g->ops.gr.ctxsw_prog.set_patch_count(g, mem,
gr_ctx->patch_ctx.data_count);
g->ops.gr.ctxsw_prog.set_patch_addr(g, mem,
nvgpu_gr_ctx_mappings_get_ctx_va(mappings,
NVGPU_GR_CTX_PATCH_CTX));
#ifdef CONFIG_NVGPU_DEBUGGER
/* PM ctxt switch is off by default */
gr_ctx->pm_ctx.pm_mode =
g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw();
virt_addr = 0;
g->ops.gr.ctxsw_prog.set_pm_mode(g, mem, gr_ctx->pm_ctx.pm_mode);
g->ops.gr.ctxsw_prog.set_pm_ptr(g, mem, virt_addr);
#endif
nvgpu_log(g, gpu_dbg_gr, "done");
}
/*
* Context state can be written directly, or "patched" at times. So that code
* can be used in either situation it is written using a series of
* _ctx_patch_write(..., patch) statements. However any necessary map overhead
* should be minimized; thus, bundle the sequence of these writes together, and
* set them up and close with _ctx_patch_write_begin/_ctx_patch_write_end.
*/
void nvgpu_gr_ctx_patch_write_begin(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
bool update_patch_count)
{
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
if (update_patch_count) {
/* reset patch count if ucode has already processed it */
gr_ctx->patch_ctx.data_count =
g->ops.gr.ctxsw_prog.get_patch_count(g, mem);
nvgpu_log(g, gpu_dbg_info, "patch count reset to %d",
gr_ctx->patch_ctx.data_count);
}
}
void nvgpu_gr_ctx_patch_write_end(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
bool update_patch_count)
{
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
/* Write context count to context image if it is mapped */
if (update_patch_count) {
g->ops.gr.ctxsw_prog.set_patch_count(g, mem,
gr_ctx->patch_ctx.data_count);
nvgpu_log(g, gpu_dbg_info, "write patch count %d",
gr_ctx->patch_ctx.data_count);
}
}
void nvgpu_gr_ctx_patch_write(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
u32 addr, u32 data, bool patch)
{
if (patch) {
u32 patch_slot;
u64 patch_slot_max;
struct nvgpu_mem *patch_ctx_mem;
if (gr_ctx == NULL) {
nvgpu_err(g,
"failed to access gr_ctx[NULL] but patch true");
return;
}
patch_ctx_mem = &gr_ctx->mem[NVGPU_GR_CTX_PATCH_CTX];
patch_slot =
nvgpu_safe_mult_u32(gr_ctx->patch_ctx.data_count,
PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY);
patch_slot_max =
nvgpu_safe_sub_u64(
PATCH_CTX_ENTRIES_FROM_SIZE(
patch_ctx_mem->size),
PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY);
if (patch_slot > patch_slot_max) {
nvgpu_err(g, "failed to access patch_slot %d",
patch_slot);
return;
}
nvgpu_mem_wr32(g, patch_ctx_mem, (u64)patch_slot, addr);
nvgpu_mem_wr32(g, patch_ctx_mem, (u64)patch_slot + 1ULL, data);
gr_ctx->patch_ctx.data_count = nvgpu_safe_add_u32(
gr_ctx->patch_ctx.data_count, 1U);
nvgpu_log(g, gpu_dbg_info,
"patch addr = 0x%x data = 0x%x data_count %d",
addr, data, gr_ctx->patch_ctx.data_count);
} else {
nvgpu_writel(g, addr, data);
}
}
void nvgpu_gr_ctx_init_compute_preemption_mode(struct nvgpu_gr_ctx *gr_ctx,
u32 compute_preempt_mode)
{
gr_ctx->compute_preempt_mode = compute_preempt_mode;
}
u32 nvgpu_gr_ctx_get_compute_preemption_mode(struct nvgpu_gr_ctx *gr_ctx)
{
return gr_ctx->compute_preempt_mode;
}
bool nvgpu_gr_ctx_check_valid_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
u32 graphics_preempt_mode, u32 compute_preempt_mode)
{
u32 supported_graphics_preempt_mode = 0U;
u32 supported_compute_preempt_mode = 0U;
if ((graphics_preempt_mode == 0U) && (compute_preempt_mode == 0U)) {
return false;
}
g->ops.gr.init.get_supported__preemption_modes(
&supported_graphics_preempt_mode,
&supported_compute_preempt_mode);
if (graphics_preempt_mode != 0U) {
if ((graphics_preempt_mode & supported_graphics_preempt_mode) == 0U) {
return false;
}
/* Do not allow lower preemption modes than current ones */
if (graphics_preempt_mode < gr_ctx->graphics_preempt_mode) {
return false;
}
}
if (compute_preempt_mode != 0U) {
if ((compute_preempt_mode & supported_compute_preempt_mode) == 0U) {
return false;
}
/* Do not allow lower preemption modes than current ones */
if (compute_preempt_mode < gr_ctx->compute_preempt_mode) {
return false;
}
}
#if defined(CONFIG_NVGPU_CILP) && defined(CONFIG_NVGPU_GFXP)
/* Invalid combination */
if ((graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) &&
(compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP)) {
return false;
}
#endif
return true;
}
void nvgpu_gr_ctx_set_preemption_modes(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx)
{
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
#ifdef CONFIG_NVGPU_GFXP
if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
g->ops.gr.ctxsw_prog.set_graphics_preemption_mode_gfxp(g, mem);
}
#endif
#ifdef CONFIG_NVGPU_CILP
if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP) {
g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cilp(g, mem);
}
#endif
if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cta(g, mem);
}
}
void nvgpu_gr_ctx_set_tsgid(struct nvgpu_gr_ctx *gr_ctx, u32 tsgid)
{
gr_ctx->tsgid = tsgid;
}
u32 nvgpu_gr_ctx_get_tsgid(struct nvgpu_gr_ctx *gr_ctx)
{
return gr_ctx->tsgid;
}
void nvgpu_gr_ctx_mark_ctx_initialized(struct nvgpu_gr_ctx *gr_ctx)
{
gr_ctx->ctx_initialized = true;
}
bool nvgpu_gr_ctx_get_ctx_initialized(struct nvgpu_gr_ctx *gr_ctx)
{
return gr_ctx->ctx_initialized;
}
#ifdef CONFIG_NVGPU_GRAPHICS
void nvgpu_gr_ctx_init_graphics_preemption_mode(struct nvgpu_gr_ctx *gr_ctx,
u32 graphics_preempt_mode)
{
gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
}
u32 nvgpu_gr_ctx_get_graphics_preemption_mode(struct nvgpu_gr_ctx *gr_ctx)
{
return gr_ctx->graphics_preempt_mode;
}
void nvgpu_gr_ctx_set_zcull_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
u32 mode, u64 gpu_va)
{
struct zcull_ctx_desc *zcull_ctx = &gr_ctx->zcull_ctx;
(void)g;
zcull_ctx->ctx_sw_mode = mode;
zcull_ctx->gpu_va = gpu_va;
}
u64 nvgpu_gr_ctx_get_zcull_ctx_va(struct nvgpu_gr_ctx *gr_ctx)
{
return gr_ctx->zcull_ctx.gpu_va;
}
int nvgpu_gr_ctx_init_zcull(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
{
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
nvgpu_log(g, gpu_dbg_gr, " ");
g->ops.gr.ctxsw_prog.set_zcull_mode_no_ctxsw(g, mem);
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, mem, 0);
return 0;
}
int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
bool set_zcull_ptr)
{
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
nvgpu_log_fn(g, " ");
if (gr_ctx->zcull_ctx.gpu_va == 0ULL &&
g->ops.gr.ctxsw_prog.is_zcull_mode_separate_buffer(
gr_ctx->zcull_ctx.ctx_sw_mode)) {
return -EINVAL;
}
g->ops.gr.ctxsw_prog.set_zcull(g, mem, gr_ctx->zcull_ctx.ctx_sw_mode);
if (set_zcull_ptr) {
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, mem,
gr_ctx->zcull_ctx.gpu_va);
}
return 0;
}
#endif /* CONFIG_NVGPU_GRAPHICS */
#ifdef CONFIG_NVGPU_GFXP
void nvgpu_gr_ctx_set_preemption_buffer_va(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx)
{
struct nvgpu_gr_ctx_mappings *mappings = gr_ctx->mappings;
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
u64 preempt_ctxsw_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings,
NVGPU_GR_CTX_PREEMPT_CTXSW);
g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, mem,
preempt_ctxsw_gpu_va);
if (g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0 != NULL) {
g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0(g,
mem, preempt_ctxsw_gpu_va);
}
}
bool nvgpu_gr_ctx_desc_force_preemption_gfxp(struct nvgpu_gr_ctx_desc *gr_ctx_desc)
{
return gr_ctx_desc->force_preemption_gfxp;
}
#endif /* CONFIG_NVGPU_GFXP */
#ifdef CONFIG_NVGPU_CILP
bool nvgpu_gr_ctx_desc_force_preemption_cilp(struct nvgpu_gr_ctx_desc *gr_ctx_desc)
{
return gr_ctx_desc->force_preemption_cilp;
}
bool nvgpu_gr_ctx_get_cilp_preempt_pending(struct nvgpu_gr_ctx *gr_ctx)
{
return gr_ctx->cilp_preempt_pending;
}
void nvgpu_gr_ctx_set_cilp_preempt_pending(struct nvgpu_gr_ctx *gr_ctx,
bool cilp_preempt_pending)
{
gr_ctx->cilp_preempt_pending = cilp_preempt_pending;
}
#endif
#ifdef CONFIG_NVGPU_DEBUGGER
void nvgpu_gr_ctx_reset_patch_count(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx)
{
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
u32 tmp;
tmp = g->ops.gr.ctxsw_prog.get_patch_count(g, mem);
if (tmp == 0U) {
gr_ctx->patch_ctx.data_count = 0;
}
}
void nvgpu_gr_ctx_set_patch_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
{
struct nvgpu_gr_ctx_mappings *mappings = gr_ctx->mappings;
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
g->ops.gr.ctxsw_prog.set_patch_count(g, mem,
gr_ctx->patch_ctx.data_count);
g->ops.gr.ctxsw_prog.set_patch_addr(g, mem,
nvgpu_gr_ctx_mappings_get_ctx_va(mappings,
NVGPU_GR_CTX_PATCH_CTX));
}
static int nvgpu_gr_ctx_alloc_pm_ctx(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_gr_ctx_desc *gr_ctx_desc)
{
int err;
err = nvgpu_dma_alloc_sys(g, gr_ctx_desc->size[NVGPU_GR_CTX_PM_CTX],
&gr_ctx->mem[NVGPU_GR_CTX_PM_CTX]);
if (err != 0) {
nvgpu_err(g,
"failed to allocate pm ctx buffer");
return err;
}
return 0;
}
static void nvgpu_gr_ctx_free_pm_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
{
if (nvgpu_mem_is_valid(&gr_ctx->mem[NVGPU_GR_CTX_PM_CTX])) {
nvgpu_dma_free(g, &gr_ctx->mem[NVGPU_GR_CTX_PM_CTX]);
}
(void)g;
}
int nvgpu_gr_ctx_alloc_map_pm_ctx(struct gk20a *g,
struct nvgpu_tsg *tsg,
struct nvgpu_gr_ctx_desc *gr_ctx_desc,
struct nvgpu_gr_hwpm_map *hwpm_map)
{
struct nvgpu_gr_ctx *gr_ctx = tsg->gr_ctx;
struct nvgpu_gr_ctx_mappings *mappings;
int ret;
if (gr_ctx->pm_ctx.mapped) {
return 0;
}
nvgpu_gr_ctx_set_size(gr_ctx_desc,
NVGPU_GR_CTX_PM_CTX,
nvgpu_gr_hwpm_map_get_size(hwpm_map));
ret = nvgpu_gr_ctx_alloc_pm_ctx(g, gr_ctx, gr_ctx_desc);
if (ret != 0) {
nvgpu_err(g,
"failed to allocate pm ctxt buffer");
return ret;
}
/*
* Commit NVGPU_GR_CTX_PM_CTX gpu va for all subcontexts
* when subcontexts are enabled.
*/
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
ret = nvgpu_gr_ctx_mappings_map_buffer_all_subctx(tsg,
NVGPU_GR_CTX_PM_CTX);
} else {
mappings = nvgpu_gr_ctx_get_mappings(tsg, NULL);
if (mappings == NULL) {
nvgpu_err(g, "gr_ctx mappings struct not allocated");
nvgpu_gr_ctx_free_pm_ctx(g, gr_ctx);
return -ENOMEM;
}
ret = nvgpu_gr_ctx_mappings_map_ctx_buffer(g, gr_ctx,
NVGPU_GR_CTX_PM_CTX, mappings);
}
if (ret != 0) {
nvgpu_err(g, "gr_ctx pm_ctx buffer map failed %d", ret);
nvgpu_gr_ctx_free_pm_ctx(g, gr_ctx);
return ret;
}
return 0;
}
void nvgpu_gr_ctx_set_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx, u32 pm_mode)
{
gr_ctx->pm_ctx.pm_mode = pm_mode;
}
u32 nvgpu_gr_ctx_get_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx)
{
return gr_ctx->pm_ctx.pm_mode;
}
u32 nvgpu_gr_ctx_get_ctx_id(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
{
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
if (!gr_ctx->ctx_id_valid) {
gr_ctx->ctx_id = g->ops.gr.ctxsw_prog.get_main_image_ctx_id(g,
mem);
gr_ctx->ctx_id_valid = true;
}
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, "ctx_id: 0x%x", gr_ctx->ctx_id);
return gr_ctx->ctx_id;
}
u32 nvgpu_gr_ctx_read_ctx_id(struct nvgpu_gr_ctx *gr_ctx)
{
return gr_ctx->ctx_id;
}
#ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
void nvgpu_gr_ctx_set_boosted_ctx(struct nvgpu_gr_ctx *gr_ctx, bool boost)
{
gr_ctx->boosted_ctx = boost;
}
bool nvgpu_gr_ctx_get_boosted_ctx(struct nvgpu_gr_ctx *gr_ctx)
{
return gr_ctx->boosted_ctx;
}
#endif
#ifdef CONFIG_DEBUG_FS
bool nvgpu_gr_ctx_desc_dump_ctxsw_stats_on_channel_close(
struct nvgpu_gr_ctx_desc *gr_ctx_desc)
{
return gr_ctx_desc->dump_ctxsw_stats_on_channel_close;
}
#endif
int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
bool enable)
{
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
if (!nvgpu_mem_is_valid(mem)) {
nvgpu_err(g, "no graphics context allocated");
return -EFAULT;
}
g->ops.gr.ctxsw_prog.set_pm_smpc_mode(g, mem, enable);
return 0;
}
int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
u32 mode, bool *set_pm_ctx_gpu_va, bool *skip_update)
{
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx;
int ret = 0;
*set_pm_ctx_gpu_va = false;
*skip_update = false;
if (!nvgpu_mem_is_valid(mem)) {
nvgpu_err(g, "no graphics context allocated");
return -EFAULT;
}
if ((mode == NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) &&
(g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw == NULL)) {
nvgpu_err(g,
"Mode-E hwpm context switch mode is not supported");
return -EINVAL;
}
switch (mode) {
case NVGPU_GR_CTX_HWPM_CTXSW_MODE_CTXSW:
if (pm_ctx->pm_mode ==
g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw()) {
*skip_update = true;
return 0;
}
pm_ctx->pm_mode = g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw();
*set_pm_ctx_gpu_va = true;
break;
case NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW:
if (pm_ctx->pm_mode ==
g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()) {
*skip_update = true;
return 0;
}
pm_ctx->pm_mode =
g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw();
*set_pm_ctx_gpu_va = false;
break;
case NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
if (pm_ctx->pm_mode ==
g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) {
*skip_update = true;
return 0;
}
pm_ctx->pm_mode =
g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw();
*set_pm_ctx_gpu_va = true;
break;
default:
nvgpu_err(g, "invalid hwpm context switch mode");
ret = -EINVAL;
break;
}
return ret;
}
void nvgpu_gr_ctx_set_hwpm_pm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
{
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
g->ops.gr.ctxsw_prog.set_pm_mode(g, mem, gr_ctx->pm_ctx.pm_mode);
}
void nvgpu_gr_ctx_set_hwpm_ptr(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
bool set_pm_ctx_gpu_va)
{
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
u64 pm_ctx_gpu_va = 0ULL;
if (set_pm_ctx_gpu_va) {
pm_ctx_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(
gr_ctx->mappings,
NVGPU_GR_CTX_PM_CTX);
}
g->ops.gr.ctxsw_prog.set_pm_ptr(g, mem, pm_ctx_gpu_va);
}
void nvgpu_gr_ctx_set_pm_ctx_mapped(struct nvgpu_gr_ctx *ctx, bool mapped)
{
ctx->pm_ctx.mapped = mapped;
}
#endif /* CONFIG_NVGPU_DEBUGGER */
bool nvgpu_gr_obj_ctx_global_ctx_buffers_patched(struct nvgpu_gr_ctx *gr_ctx)
{
return gr_ctx->global_ctx_buffers_patched;
}
void nvgpu_gr_obj_ctx_set_global_ctx_buffers_patched(
struct nvgpu_gr_ctx *gr_ctx, bool patched)
{
gr_ctx->global_ctx_buffers_patched = patched;
}
bool nvgpu_gr_obj_ctx_preempt_buffers_patched(struct nvgpu_gr_ctx *gr_ctx)
{
return gr_ctx->preempt_buffers_patched;
}
void nvgpu_gr_obj_ctx_set_preempt_buffers_patched(
struct nvgpu_gr_ctx *gr_ctx, bool patched)
{
gr_ctx->preempt_buffers_patched = patched;
}
bool nvgpu_gr_obj_ctx_default_compute_regs_patched(struct nvgpu_gr_ctx *gr_ctx)
{
return gr_ctx->default_compute_regs_patched;
}
void nvgpu_gr_obj_ctx_set_default_compute_regs_patched(
struct nvgpu_gr_ctx *gr_ctx, bool patched)
{
gr_ctx->default_compute_regs_patched = patched;
}
bool nvgpu_gr_obj_ctx_default_gfx_regs_patched(struct nvgpu_gr_ctx *gr_ctx)
{
return gr_ctx->default_gfx_regs_patched;
}
void nvgpu_gr_obj_ctx_set_default_gfx_regs_patched(
struct nvgpu_gr_ctx *gr_ctx, bool patched)
{
gr_ctx->default_gfx_regs_patched = patched;
}