mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 09:12:24 +03:00
In order to maintain separate mappings of GR TSG and global context
buffers for different subcontexts, we need to separate the memory
struct and the mapping struct for the buffers. This patch moves
the mappings of all GR ctx buffers to new structure
nvgpu_gr_ctx_mappings.
This will be instantiated per subcontext in the upcoming patches.
Summary of changes:
1. Various context buffers were allocated and mapped separately.
All TSG context buffers are now stored in gr_ctx->mem[] array
since allocation and mapping is unified for them.
2. Mapping/unmapping and querying the GPU VA of the context
buffers is now handled in ctx_mappings unit. Structure
nvgpu_gr_ctx_mappings in nvgpu_gr_ctx holds the maps.
On ALLOC_OBJ_CTX this struct is instantiated and deleted
on free_gr_ctx.
3. Introduce mapping flags for TSG and global context buffers.
This is to map different buffers with different caching
attribute. Map all buffers as cacheable except
PRIV_ACCESS_MAP, RTV_CIRCULAR_BUFFER, FECS_TRACE, GR CTX
and PATCH ctx buffers. Map all buffers as privileged.
4. Wherever VM or GPU VA is passed in the obj_ctx allocation
functions, they are now replaced by nvgpu_gr_ctx_mappings.
5. free_gr_ctx API need not accept the VM as mappings struct
will hold the VM. mappings struct will be kept in gr_ctx.
6. Move preemption buffers allocation logic out of
nvgpu_gr_obj_ctx_set_graphics_preemption_mode.
7. set_preemption_mode and gr_gk20a_update_hwpm_ctxsw_mode
functions need update to ensure buffers are allocated
and mapped.
8. Keep the unit tests and documentation updated.
With these changes there is clear seggregation of allocation and
mapping of GR context buffers. This will simplify further change
to add multiple address spaces support. With multiple address
spaces in a TSG, subcontexts created after first subcontext
just need to map the buffers.
Bug 3677982
Change-Id: I3cd5f1311dd85aad1cf547da8fa45293fb7a7cb3
Signed-off-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2712222
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
914 lines
23 KiB
C
914 lines
23 KiB
C
/*
|
|
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <nvgpu/gk20a.h>
|
|
#include <nvgpu/static_analysis.h>
|
|
#include <nvgpu/gr/global_ctx.h>
|
|
#include <nvgpu/gr/ctx.h>
|
|
#include <nvgpu/gr/ctx_mappings.h>
|
|
#include <nvgpu/vm.h>
|
|
#include <nvgpu/io.h>
|
|
#include <nvgpu/gmmu.h>
|
|
#include <nvgpu/dma.h>
|
|
#include <nvgpu/string.h>
|
|
|
|
#include <nvgpu/power_features/pg.h>
|
|
#include "common/gr/ctx_priv.h"
|
|
|
|
struct nvgpu_gr_ctx_desc *
|
|
nvgpu_gr_ctx_desc_alloc(struct gk20a *g)
|
|
{
|
|
struct nvgpu_gr_ctx_desc *desc = nvgpu_kzalloc(g, sizeof(*desc));
|
|
return desc;
|
|
}
|
|
|
|
void nvgpu_gr_ctx_desc_free(struct gk20a *g,
|
|
struct nvgpu_gr_ctx_desc *desc)
|
|
{
|
|
nvgpu_kfree(g, desc);
|
|
}
|
|
|
|
void nvgpu_gr_ctx_set_size(struct nvgpu_gr_ctx_desc *gr_ctx_desc,
|
|
u32 index, u32 size)
|
|
{
|
|
nvgpu_assert(index < NVGPU_GR_CTX_COUNT);
|
|
gr_ctx_desc->size[index] = size;
|
|
}
|
|
|
|
u32 nvgpu_gr_ctx_get_size(struct nvgpu_gr_ctx_desc *gr_ctx_desc,
|
|
u32 index)
|
|
{
|
|
nvgpu_assert(index < NVGPU_GR_CTX_COUNT);
|
|
return gr_ctx_desc->size[index];
|
|
}
|
|
|
|
struct nvgpu_gr_ctx *nvgpu_alloc_gr_ctx_struct(struct gk20a *g)
|
|
{
|
|
return nvgpu_kzalloc(g, sizeof(struct nvgpu_gr_ctx));
|
|
}
|
|
|
|
void nvgpu_free_gr_ctx_struct(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
nvgpu_kfree(g, gr_ctx);
|
|
}
|
|
|
|
void nvgpu_gr_ctx_free_ctx_buffers(struct gk20a *g,
|
|
struct nvgpu_gr_ctx *ctx)
|
|
{
|
|
u32 i;
|
|
|
|
nvgpu_log(g, gpu_dbg_gr, " ");
|
|
|
|
for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) {
|
|
if (nvgpu_mem_is_valid(&ctx->mem[i])) {
|
|
nvgpu_dma_free(g, &ctx->mem[i]);
|
|
}
|
|
}
|
|
|
|
nvgpu_log(g, gpu_dbg_gr, "done");
|
|
}
|
|
|
|
int nvgpu_gr_ctx_alloc_ctx_buffers(struct gk20a *g,
|
|
struct nvgpu_gr_ctx_desc *desc,
|
|
struct nvgpu_gr_ctx *ctx)
|
|
{
|
|
int err = 0;
|
|
u32 i;
|
|
|
|
nvgpu_log(g, gpu_dbg_gr, " ");
|
|
|
|
if (desc->size[NVGPU_GR_CTX_CTX] == 0U) {
|
|
nvgpu_err(g, "context buffer size not set");
|
|
return -EINVAL;
|
|
}
|
|
|
|
for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) {
|
|
|
|
#ifdef CONFIG_NVGPU_GFXP
|
|
/**
|
|
* Skip allocating the gfxp preemption buffers if GFXP mode is
|
|
* not set in the gr ctx.
|
|
*/
|
|
if ((i >= NVGPU_GR_CTX_PREEMPT_CTXSW) &&
|
|
(i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW) &&
|
|
(nvgpu_gr_ctx_get_graphics_preemption_mode(ctx) !=
|
|
NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP)) {
|
|
continue;
|
|
}
|
|
#endif
|
|
|
|
if (desc->size[i] != 0U) {
|
|
nvgpu_assert(!nvgpu_mem_is_valid(&ctx->mem[i]));
|
|
|
|
err = nvgpu_dma_alloc_sys(g, desc->size[i],
|
|
&ctx->mem[i]);
|
|
if (err != 0) {
|
|
nvgpu_err(g, "ctx buffer %u alloc failed", i);
|
|
nvgpu_gr_ctx_free_ctx_buffers(g, ctx);
|
|
return err;
|
|
}
|
|
}
|
|
}
|
|
|
|
ctx->ctx_id_valid = false;
|
|
|
|
nvgpu_log(g, gpu_dbg_gr, "done");
|
|
|
|
return err;
|
|
}
|
|
|
|
void nvgpu_gr_ctx_init_ctx_buffers_mapping_flags(struct gk20a *g,
|
|
struct nvgpu_gr_ctx *ctx)
|
|
{
|
|
u32 i;
|
|
|
|
nvgpu_log(g, gpu_dbg_gr, " ");
|
|
|
|
/**
|
|
* Map all ctx buffers as cacheable except GR CTX and
|
|
* PATCH CTX buffers.
|
|
*/
|
|
for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) {
|
|
ctx->mapping_flags[i] = NVGPU_VM_MAP_CACHEABLE;
|
|
}
|
|
|
|
ctx->mapping_flags[NVGPU_GR_CTX_CTX] = 0U;
|
|
ctx->mapping_flags[NVGPU_GR_CTX_PATCH_CTX] = 0U;
|
|
|
|
nvgpu_log(g, gpu_dbg_gr, "done");
|
|
}
|
|
|
|
#ifdef CONFIG_NVGPU_GFXP
|
|
static void nvgpu_gr_ctx_free_ctx_preemption_buffers(struct gk20a *g,
|
|
struct nvgpu_gr_ctx *ctx)
|
|
{
|
|
u32 i;
|
|
|
|
nvgpu_log_fn(g, " ");
|
|
|
|
for (i = NVGPU_GR_CTX_PREEMPT_CTXSW;
|
|
i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; i++) {
|
|
if (nvgpu_mem_is_valid(&ctx->mem[i])) {
|
|
nvgpu_dma_free(g, &ctx->mem[i]);
|
|
}
|
|
}
|
|
|
|
nvgpu_log_fn(g, "done");
|
|
}
|
|
|
|
int nvgpu_gr_ctx_alloc_ctx_preemption_buffers(struct gk20a *g,
|
|
struct nvgpu_gr_ctx_desc *desc,
|
|
struct nvgpu_gr_ctx *ctx)
|
|
{
|
|
int err = 0;
|
|
u32 i;
|
|
|
|
nvgpu_log(g, gpu_dbg_gr, " ");
|
|
|
|
/**
|
|
* Skip allocating the gfxp preemption buffers if GFXP mode is
|
|
* not set in the gr ctx.
|
|
*/
|
|
if (nvgpu_gr_ctx_get_graphics_preemption_mode(ctx) !=
|
|
NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
|
|
nvgpu_log(g, gpu_dbg_gr, "GFXP mode not set. Skip preemption "
|
|
"buffers allocation");
|
|
return 0;
|
|
}
|
|
|
|
for (i = NVGPU_GR_CTX_PREEMPT_CTXSW;
|
|
i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; i++) {
|
|
|
|
if (desc->size[i] != 0U && !nvgpu_mem_is_valid(&ctx->mem[i])) {
|
|
err = nvgpu_dma_alloc_sys(g, desc->size[i],
|
|
&ctx->mem[i]);
|
|
if (err != 0) {
|
|
nvgpu_err(g, "ctx preemption buffer %u alloc failed", i);
|
|
nvgpu_gr_ctx_free_ctx_preemption_buffers(g, ctx);
|
|
return err;
|
|
}
|
|
}
|
|
}
|
|
|
|
nvgpu_log(g, gpu_dbg_gr, "done");
|
|
|
|
return err;
|
|
}
|
|
#endif
|
|
|
|
void nvgpu_gr_ctx_free(struct gk20a *g,
|
|
struct nvgpu_gr_ctx *gr_ctx,
|
|
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer)
|
|
{
|
|
nvgpu_log(g, gpu_dbg_gr, " ");
|
|
|
|
if ((gr_ctx != NULL) && (gr_ctx->mappings != NULL)) {
|
|
nvgpu_gr_ctx_unmap_buffers(g,
|
|
gr_ctx, global_ctx_buffer, gr_ctx->mappings);
|
|
|
|
nvgpu_gr_ctx_free_mappings(g, gr_ctx);
|
|
|
|
nvgpu_gr_ctx_set_patch_ctx_data_count(gr_ctx, 0);
|
|
|
|
nvgpu_gr_ctx_free_ctx_buffers(g, gr_ctx);
|
|
|
|
(void) memset(gr_ctx, 0, sizeof(*gr_ctx));
|
|
}
|
|
|
|
nvgpu_log(g, gpu_dbg_gr, "done");
|
|
}
|
|
|
|
struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_alloc_or_get_mappings(struct gk20a *g,
|
|
struct nvgpu_tsg *tsg, struct vm_gk20a *vm)
|
|
{
|
|
struct nvgpu_gr_ctx_mappings *mappings = NULL;
|
|
struct nvgpu_gr_ctx *gr_ctx = tsg->gr_ctx;
|
|
|
|
nvgpu_log(g, gpu_dbg_gr, " ");
|
|
|
|
mappings = gr_ctx->mappings;
|
|
if (mappings != NULL) {
|
|
return mappings;
|
|
}
|
|
|
|
mappings = nvgpu_gr_ctx_mappings_create(g, tsg, vm);
|
|
if (mappings == NULL) {
|
|
nvgpu_err(g, "failed to allocate gr_ctx mappings");
|
|
return mappings;
|
|
}
|
|
|
|
gr_ctx->mappings = mappings;
|
|
|
|
nvgpu_log(g, gpu_dbg_gr, "done");
|
|
|
|
return mappings;
|
|
}
|
|
|
|
void nvgpu_gr_ctx_free_mappings(struct gk20a *g,
|
|
struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
nvgpu_log(g, gpu_dbg_gr, " ");
|
|
|
|
if (gr_ctx->mappings == NULL) {
|
|
return;
|
|
}
|
|
|
|
nvgpu_gr_ctx_mappings_free(g, gr_ctx->mappings);
|
|
gr_ctx->mappings = NULL;
|
|
|
|
nvgpu_log(g, gpu_dbg_gr, "done");
|
|
}
|
|
|
|
struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_get_mappings(struct nvgpu_tsg *tsg)
|
|
{
|
|
struct nvgpu_gr_ctx *gr_ctx = tsg->gr_ctx;
|
|
|
|
return gr_ctx->mappings;
|
|
}
|
|
|
|
void nvgpu_gr_ctx_set_patch_ctx_data_count(struct nvgpu_gr_ctx *gr_ctx,
|
|
u32 data_count)
|
|
{
|
|
gr_ctx->patch_ctx.data_count = data_count;
|
|
}
|
|
|
|
struct nvgpu_mem *nvgpu_gr_ctx_get_ctx_mem(struct nvgpu_gr_ctx *gr_ctx,
|
|
u32 index)
|
|
{
|
|
nvgpu_assert(index < NVGPU_GR_CTX_COUNT);
|
|
return &gr_ctx->mem[index];
|
|
}
|
|
|
|
u32 nvgpu_gr_ctx_get_ctx_mapping_flags(struct nvgpu_gr_ctx *gr_ctx, u32 index)
|
|
{
|
|
nvgpu_assert(index < NVGPU_GR_CTX_COUNT);
|
|
return gr_ctx->mapping_flags[index];
|
|
}
|
|
|
|
#ifdef CONFIG_NVGPU_SM_DIVERSITY
|
|
void nvgpu_gr_ctx_set_sm_diversity_config(struct nvgpu_gr_ctx *gr_ctx,
|
|
u32 sm_diversity_config)
|
|
{
|
|
gr_ctx->sm_diversity_config = sm_diversity_config;
|
|
}
|
|
|
|
u32 nvgpu_gr_ctx_get_sm_diversity_config(struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
return gr_ctx->sm_diversity_config;
|
|
}
|
|
#endif
|
|
|
|
/* load saved fresh copy of gloden image into channel gr_ctx */
|
|
void nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g,
|
|
struct nvgpu_gr_ctx *gr_ctx,
|
|
struct nvgpu_gr_ctx_mappings *mappings,
|
|
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image,
|
|
bool cde)
|
|
{
|
|
struct nvgpu_mem *mem;
|
|
#ifdef CONFIG_NVGPU_DEBUGGER
|
|
u64 virt_addr = 0;
|
|
#endif
|
|
|
|
(void)cde;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
|
|
|
|
mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
|
|
|
|
nvgpu_gr_global_ctx_load_local_golden_image(g,
|
|
local_golden_image, mem);
|
|
|
|
#ifdef CONFIG_NVGPU_HAL_NON_FUSA
|
|
g->ops.gr.ctxsw_prog.init_ctxsw_hdr_data(g, mem);
|
|
#endif
|
|
|
|
#ifdef CONFIG_NVGPU_DEBUGGER
|
|
if ((g->ops.gr.ctxsw_prog.set_cde_enabled != NULL) && cde) {
|
|
g->ops.gr.ctxsw_prog.set_cde_enabled(g, mem);
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_NVGPU_SET_FALCON_ACCESS_MAP
|
|
/* set priv access map */
|
|
g->ops.gr.ctxsw_prog.set_priv_access_map_config_mode(g, mem,
|
|
g->allow_all);
|
|
g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, mem,
|
|
nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings,
|
|
NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP_VA));
|
|
#endif
|
|
|
|
#ifdef CONFIG_NVGPU_HAL_NON_FUSA
|
|
/* disable verif features */
|
|
g->ops.gr.ctxsw_prog.disable_verif_features(g, mem);
|
|
#endif
|
|
|
|
#ifdef CONFIG_NVGPU_DEBUGGER
|
|
if (g->ops.gr.ctxsw_prog.set_pmu_options_boost_clock_frequencies !=
|
|
NULL) {
|
|
g->ops.gr.ctxsw_prog.set_pmu_options_boost_clock_frequencies(g,
|
|
mem, nvgpu_safe_cast_bool_to_u32(gr_ctx->boosted_ctx));
|
|
}
|
|
#endif
|
|
|
|
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "write patch count = %d",
|
|
gr_ctx->patch_ctx.data_count);
|
|
g->ops.gr.ctxsw_prog.set_patch_count(g, mem,
|
|
gr_ctx->patch_ctx.data_count);
|
|
g->ops.gr.ctxsw_prog.set_patch_addr(g, mem,
|
|
nvgpu_gr_ctx_mappings_get_ctx_va(mappings,
|
|
NVGPU_GR_CTX_PATCH_CTX));
|
|
|
|
#ifdef CONFIG_NVGPU_DEBUGGER
|
|
/* PM ctxt switch is off by default */
|
|
gr_ctx->pm_ctx.pm_mode =
|
|
g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw();
|
|
virt_addr = 0;
|
|
|
|
g->ops.gr.ctxsw_prog.set_pm_mode(g, mem, gr_ctx->pm_ctx.pm_mode);
|
|
g->ops.gr.ctxsw_prog.set_pm_ptr(g, mem, virt_addr);
|
|
#endif
|
|
|
|
nvgpu_log(g, gpu_dbg_gr, "done");
|
|
}
|
|
|
|
/*
|
|
* Context state can be written directly, or "patched" at times. So that code
|
|
* can be used in either situation it is written using a series of
|
|
* _ctx_patch_write(..., patch) statements. However any necessary map overhead
|
|
* should be minimized; thus, bundle the sequence of these writes together, and
|
|
* set them up and close with _ctx_patch_write_begin/_ctx_patch_write_end.
|
|
*/
|
|
void nvgpu_gr_ctx_patch_write_begin(struct gk20a *g,
|
|
struct nvgpu_gr_ctx *gr_ctx,
|
|
bool update_patch_count)
|
|
{
|
|
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
|
|
|
|
if (update_patch_count) {
|
|
/* reset patch count if ucode has already processed it */
|
|
gr_ctx->patch_ctx.data_count =
|
|
g->ops.gr.ctxsw_prog.get_patch_count(g, mem);
|
|
nvgpu_log(g, gpu_dbg_info, "patch count reset to %d",
|
|
gr_ctx->patch_ctx.data_count);
|
|
}
|
|
}
|
|
|
|
void nvgpu_gr_ctx_patch_write_end(struct gk20a *g,
|
|
struct nvgpu_gr_ctx *gr_ctx,
|
|
bool update_patch_count)
|
|
{
|
|
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
|
|
|
|
/* Write context count to context image if it is mapped */
|
|
if (update_patch_count) {
|
|
g->ops.gr.ctxsw_prog.set_patch_count(g, mem,
|
|
gr_ctx->patch_ctx.data_count);
|
|
nvgpu_log(g, gpu_dbg_info, "write patch count %d",
|
|
gr_ctx->patch_ctx.data_count);
|
|
}
|
|
}
|
|
|
|
void nvgpu_gr_ctx_patch_write(struct gk20a *g,
|
|
struct nvgpu_gr_ctx *gr_ctx,
|
|
u32 addr, u32 data, bool patch)
|
|
{
|
|
if (patch) {
|
|
u32 patch_slot;
|
|
u64 patch_slot_max;
|
|
struct nvgpu_mem *patch_ctx_mem;
|
|
|
|
if (gr_ctx == NULL) {
|
|
nvgpu_err(g,
|
|
"failed to access gr_ctx[NULL] but patch true");
|
|
return;
|
|
}
|
|
|
|
patch_ctx_mem = &gr_ctx->mem[NVGPU_GR_CTX_PATCH_CTX];
|
|
|
|
patch_slot =
|
|
nvgpu_safe_mult_u32(gr_ctx->patch_ctx.data_count,
|
|
PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY);
|
|
patch_slot_max =
|
|
nvgpu_safe_sub_u64(
|
|
PATCH_CTX_ENTRIES_FROM_SIZE(
|
|
patch_ctx_mem->size),
|
|
PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY);
|
|
|
|
if (patch_slot > patch_slot_max) {
|
|
nvgpu_err(g, "failed to access patch_slot %d",
|
|
patch_slot);
|
|
return;
|
|
}
|
|
|
|
nvgpu_mem_wr32(g, patch_ctx_mem, (u64)patch_slot, addr);
|
|
nvgpu_mem_wr32(g, patch_ctx_mem, (u64)patch_slot + 1ULL, data);
|
|
gr_ctx->patch_ctx.data_count = nvgpu_safe_add_u32(
|
|
gr_ctx->patch_ctx.data_count, 1U);
|
|
nvgpu_log(g, gpu_dbg_info,
|
|
"patch addr = 0x%x data = 0x%x data_count %d",
|
|
addr, data, gr_ctx->patch_ctx.data_count);
|
|
} else {
|
|
nvgpu_writel(g, addr, data);
|
|
}
|
|
}
|
|
|
|
void nvgpu_gr_ctx_init_compute_preemption_mode(struct nvgpu_gr_ctx *gr_ctx,
|
|
u32 compute_preempt_mode)
|
|
{
|
|
gr_ctx->compute_preempt_mode = compute_preempt_mode;
|
|
}
|
|
|
|
u32 nvgpu_gr_ctx_get_compute_preemption_mode(struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
return gr_ctx->compute_preempt_mode;
|
|
}
|
|
|
|
bool nvgpu_gr_ctx_check_valid_preemption_mode(struct gk20a *g,
|
|
struct nvgpu_gr_ctx *gr_ctx,
|
|
u32 graphics_preempt_mode, u32 compute_preempt_mode)
|
|
{
|
|
u32 supported_graphics_preempt_mode = 0U;
|
|
u32 supported_compute_preempt_mode = 0U;
|
|
|
|
if ((graphics_preempt_mode == 0U) && (compute_preempt_mode == 0U)) {
|
|
return false;
|
|
}
|
|
|
|
g->ops.gr.init.get_supported__preemption_modes(
|
|
&supported_graphics_preempt_mode,
|
|
&supported_compute_preempt_mode);
|
|
|
|
if (graphics_preempt_mode != 0U) {
|
|
if ((graphics_preempt_mode & supported_graphics_preempt_mode) == 0U) {
|
|
return false;
|
|
}
|
|
|
|
/* Do not allow lower preemption modes than current ones */
|
|
if (graphics_preempt_mode < gr_ctx->graphics_preempt_mode) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (compute_preempt_mode != 0U) {
|
|
if ((compute_preempt_mode & supported_compute_preempt_mode) == 0U) {
|
|
return false;
|
|
}
|
|
|
|
/* Do not allow lower preemption modes than current ones */
|
|
if (compute_preempt_mode < gr_ctx->compute_preempt_mode) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
#if defined(CONFIG_NVGPU_CILP) && defined(CONFIG_NVGPU_GFXP)
|
|
/* Invalid combination */
|
|
if ((graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) &&
|
|
(compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP)) {
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
void nvgpu_gr_ctx_set_preemption_modes(struct gk20a *g,
|
|
struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
|
|
|
|
#ifdef CONFIG_NVGPU_GFXP
|
|
if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
|
|
g->ops.gr.ctxsw_prog.set_graphics_preemption_mode_gfxp(g, mem);
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_NVGPU_CILP
|
|
if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP) {
|
|
g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cilp(g, mem);
|
|
}
|
|
#endif
|
|
|
|
if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
|
|
g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cta(g, mem);
|
|
}
|
|
}
|
|
|
|
void nvgpu_gr_ctx_set_tsgid(struct nvgpu_gr_ctx *gr_ctx, u32 tsgid)
|
|
{
|
|
gr_ctx->tsgid = tsgid;
|
|
}
|
|
|
|
u32 nvgpu_gr_ctx_get_tsgid(struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
return gr_ctx->tsgid;
|
|
}
|
|
|
|
#ifdef CONFIG_NVGPU_GRAPHICS
|
|
void nvgpu_gr_ctx_init_graphics_preemption_mode(struct nvgpu_gr_ctx *gr_ctx,
|
|
u32 graphics_preempt_mode)
|
|
{
|
|
gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
|
|
}
|
|
|
|
u32 nvgpu_gr_ctx_get_graphics_preemption_mode(struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
return gr_ctx->graphics_preempt_mode;
|
|
}
|
|
|
|
void nvgpu_gr_ctx_set_zcull_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
|
|
u32 mode, u64 gpu_va)
|
|
{
|
|
struct zcull_ctx_desc *zcull_ctx = &gr_ctx->zcull_ctx;
|
|
|
|
(void)g;
|
|
|
|
zcull_ctx->ctx_sw_mode = mode;
|
|
zcull_ctx->gpu_va = gpu_va;
|
|
}
|
|
|
|
u64 nvgpu_gr_ctx_get_zcull_ctx_va(struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
return gr_ctx->zcull_ctx.gpu_va;
|
|
}
|
|
|
|
int nvgpu_gr_ctx_init_zcull(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
|
|
|
|
nvgpu_log(g, gpu_dbg_gr, " ");
|
|
|
|
g->ops.gr.ctxsw_prog.set_zcull_mode_no_ctxsw(g, mem);
|
|
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, mem, 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
|
|
bool set_zcull_ptr)
|
|
{
|
|
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
|
|
|
|
nvgpu_log_fn(g, " ");
|
|
|
|
if (gr_ctx->zcull_ctx.gpu_va == 0ULL &&
|
|
g->ops.gr.ctxsw_prog.is_zcull_mode_separate_buffer(
|
|
gr_ctx->zcull_ctx.ctx_sw_mode)) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
g->ops.gr.ctxsw_prog.set_zcull(g, mem, gr_ctx->zcull_ctx.ctx_sw_mode);
|
|
|
|
if (set_zcull_ptr) {
|
|
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, mem,
|
|
gr_ctx->zcull_ctx.gpu_va);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
#endif /* CONFIG_NVGPU_GRAPHICS */
|
|
|
|
#ifdef CONFIG_NVGPU_GFXP
|
|
void nvgpu_gr_ctx_set_preemption_buffer_va(struct gk20a *g,
|
|
struct nvgpu_gr_ctx *gr_ctx,
|
|
struct nvgpu_gr_ctx_mappings *mappings)
|
|
{
|
|
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
|
|
u64 preempt_ctxsw_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings,
|
|
NVGPU_GR_CTX_PREEMPT_CTXSW);
|
|
|
|
g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, mem,
|
|
preempt_ctxsw_gpu_va);
|
|
|
|
if (g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0 != NULL) {
|
|
g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0(g,
|
|
mem, preempt_ctxsw_gpu_va);
|
|
}
|
|
}
|
|
|
|
bool nvgpu_gr_ctx_desc_force_preemption_gfxp(struct nvgpu_gr_ctx_desc *gr_ctx_desc)
|
|
{
|
|
return gr_ctx_desc->force_preemption_gfxp;
|
|
}
|
|
#endif /* CONFIG_NVGPU_GFXP */
|
|
|
|
#ifdef CONFIG_NVGPU_CILP
|
|
bool nvgpu_gr_ctx_desc_force_preemption_cilp(struct nvgpu_gr_ctx_desc *gr_ctx_desc)
|
|
{
|
|
return gr_ctx_desc->force_preemption_cilp;
|
|
}
|
|
|
|
bool nvgpu_gr_ctx_get_cilp_preempt_pending(struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
return gr_ctx->cilp_preempt_pending;
|
|
}
|
|
|
|
void nvgpu_gr_ctx_set_cilp_preempt_pending(struct nvgpu_gr_ctx *gr_ctx,
|
|
bool cilp_preempt_pending)
|
|
{
|
|
gr_ctx->cilp_preempt_pending = cilp_preempt_pending;
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_NVGPU_DEBUGGER
|
|
void nvgpu_gr_ctx_reset_patch_count(struct gk20a *g,
|
|
struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
|
|
u32 tmp;
|
|
|
|
tmp = g->ops.gr.ctxsw_prog.get_patch_count(g, mem);
|
|
if (tmp == 0U) {
|
|
gr_ctx->patch_ctx.data_count = 0;
|
|
}
|
|
}
|
|
|
|
void nvgpu_gr_ctx_set_patch_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
struct nvgpu_gr_ctx_mappings *mappings = gr_ctx->mappings;
|
|
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
|
|
|
|
g->ops.gr.ctxsw_prog.set_patch_count(g, mem,
|
|
gr_ctx->patch_ctx.data_count);
|
|
|
|
g->ops.gr.ctxsw_prog.set_patch_addr(g, mem,
|
|
nvgpu_gr_ctx_mappings_get_ctx_va(mappings,
|
|
NVGPU_GR_CTX_PATCH_CTX));
|
|
}
|
|
|
|
static int nvgpu_gr_ctx_alloc_pm_ctx(struct gk20a *g,
|
|
struct nvgpu_gr_ctx *gr_ctx,
|
|
struct nvgpu_gr_ctx_desc *gr_ctx_desc)
|
|
{
|
|
int err;
|
|
|
|
err = nvgpu_dma_alloc_sys(g, gr_ctx_desc->size[NVGPU_GR_CTX_PM_CTX],
|
|
&gr_ctx->mem[NVGPU_GR_CTX_PM_CTX]);
|
|
if (err != 0) {
|
|
nvgpu_err(g,
|
|
"failed to allocate pm ctx buffer");
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void nvgpu_gr_ctx_free_pm_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
if (nvgpu_mem_is_valid(&gr_ctx->mem[NVGPU_GR_CTX_PM_CTX])) {
|
|
nvgpu_dma_free(g, &gr_ctx->mem[NVGPU_GR_CTX_PM_CTX]);
|
|
}
|
|
|
|
(void)g;
|
|
}
|
|
|
|
int nvgpu_gr_ctx_alloc_map_pm_ctx(struct gk20a *g,
|
|
struct nvgpu_tsg *tsg,
|
|
struct nvgpu_gr_ctx_desc *gr_ctx_desc,
|
|
struct nvgpu_gr_hwpm_map *hwpm_map)
|
|
{
|
|
struct nvgpu_gr_ctx *gr_ctx = tsg->gr_ctx;
|
|
struct nvgpu_gr_ctx_mappings *mappings;
|
|
int ret;
|
|
|
|
if (gr_ctx->pm_ctx.mapped) {
|
|
return 0;
|
|
}
|
|
|
|
mappings = nvgpu_gr_ctx_get_mappings(tsg);
|
|
if (mappings == NULL) {
|
|
nvgpu_err(g, "gr_ctx mappings struct not allocated");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
nvgpu_gr_ctx_set_size(gr_ctx_desc,
|
|
NVGPU_GR_CTX_PM_CTX,
|
|
nvgpu_gr_hwpm_map_get_size(hwpm_map));
|
|
|
|
ret = nvgpu_gr_ctx_alloc_pm_ctx(g, gr_ctx, gr_ctx_desc);
|
|
if (ret != 0) {
|
|
nvgpu_err(g,
|
|
"failed to allocate pm ctxt buffer");
|
|
return ret;
|
|
}
|
|
|
|
ret = nvgpu_gr_ctx_mappings_map_ctx_buffer(g, gr_ctx,
|
|
NVGPU_GR_CTX_PM_CTX, mappings);
|
|
if (ret != 0) {
|
|
nvgpu_err(g, "gr_ctx pm_ctx buffer map failed %d", ret);
|
|
nvgpu_gr_ctx_free_pm_ctx(g, gr_ctx);
|
|
return ret;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void nvgpu_gr_ctx_set_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx, u32 pm_mode)
|
|
{
|
|
gr_ctx->pm_ctx.pm_mode = pm_mode;
|
|
}
|
|
|
|
u32 nvgpu_gr_ctx_get_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
return gr_ctx->pm_ctx.pm_mode;
|
|
}
|
|
|
|
u32 nvgpu_gr_ctx_get_ctx_id(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
|
|
|
|
if (!gr_ctx->ctx_id_valid) {
|
|
gr_ctx->ctx_id = g->ops.gr.ctxsw_prog.get_main_image_ctx_id(g,
|
|
mem);
|
|
gr_ctx->ctx_id_valid = true;
|
|
}
|
|
|
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, "ctx_id: 0x%x", gr_ctx->ctx_id);
|
|
|
|
return gr_ctx->ctx_id;
|
|
}
|
|
|
|
u32 nvgpu_gr_ctx_read_ctx_id(struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
return gr_ctx->ctx_id;
|
|
}
|
|
|
|
#ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
|
|
void nvgpu_gr_ctx_set_boosted_ctx(struct nvgpu_gr_ctx *gr_ctx, bool boost)
|
|
{
|
|
gr_ctx->boosted_ctx = boost;
|
|
}
|
|
|
|
bool nvgpu_gr_ctx_get_boosted_ctx(struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
return gr_ctx->boosted_ctx;
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_DEBUG_FS
|
|
bool nvgpu_gr_ctx_desc_dump_ctxsw_stats_on_channel_close(
|
|
struct nvgpu_gr_ctx_desc *gr_ctx_desc)
|
|
{
|
|
return gr_ctx_desc->dump_ctxsw_stats_on_channel_close;
|
|
}
|
|
#endif
|
|
|
|
int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
|
|
bool enable)
|
|
{
|
|
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
|
|
|
|
if (!nvgpu_mem_is_valid(mem)) {
|
|
nvgpu_err(g, "no graphics context allocated");
|
|
return -EFAULT;
|
|
}
|
|
|
|
g->ops.gr.ctxsw_prog.set_pm_smpc_mode(g, mem, enable);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g,
|
|
struct nvgpu_gr_ctx *gr_ctx,
|
|
u32 mode, u64 *pm_ctx_gpu_va, bool *skip_update)
|
|
{
|
|
struct nvgpu_gr_ctx_mappings *mappings = gr_ctx->mappings;
|
|
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
|
|
struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx;
|
|
int ret = 0;
|
|
|
|
*skip_update = false;
|
|
|
|
if (!nvgpu_mem_is_valid(mem)) {
|
|
nvgpu_err(g, "no graphics context allocated");
|
|
return -EFAULT;
|
|
}
|
|
|
|
if ((mode == NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) &&
|
|
(g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw == NULL)) {
|
|
nvgpu_err(g,
|
|
"Mode-E hwpm context switch mode is not supported");
|
|
return -EINVAL;
|
|
}
|
|
|
|
switch (mode) {
|
|
case NVGPU_GR_CTX_HWPM_CTXSW_MODE_CTXSW:
|
|
if (pm_ctx->pm_mode ==
|
|
g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw()) {
|
|
*skip_update = true;
|
|
return 0;
|
|
}
|
|
pm_ctx->pm_mode = g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw();
|
|
*pm_ctx_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings,
|
|
NVGPU_GR_CTX_PM_CTX);
|
|
break;
|
|
case NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW:
|
|
if (pm_ctx->pm_mode ==
|
|
g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()) {
|
|
*skip_update = true;
|
|
return 0;
|
|
}
|
|
pm_ctx->pm_mode =
|
|
g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw();
|
|
*pm_ctx_gpu_va = 0;
|
|
break;
|
|
case NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
|
|
if (pm_ctx->pm_mode ==
|
|
g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) {
|
|
*skip_update = true;
|
|
return 0;
|
|
}
|
|
pm_ctx->pm_mode =
|
|
g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw();
|
|
*pm_ctx_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings,
|
|
NVGPU_GR_CTX_PM_CTX);
|
|
break;
|
|
default:
|
|
nvgpu_err(g, "invalid hwpm context switch mode");
|
|
ret = -EINVAL;
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void nvgpu_gr_ctx_set_hwpm_pm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
|
|
{
|
|
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
|
|
|
|
g->ops.gr.ctxsw_prog.set_pm_mode(g, mem, gr_ctx->pm_ctx.pm_mode);
|
|
}
|
|
|
|
void nvgpu_gr_ctx_set_hwpm_ptr(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
|
|
u64 pm_ctx_gpu_va)
|
|
{
|
|
struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX];
|
|
|
|
g->ops.gr.ctxsw_prog.set_pm_ptr(g, mem, pm_ctx_gpu_va);
|
|
}
|
|
|
|
void nvgpu_gr_ctx_set_pm_ctx_mapped(struct nvgpu_gr_ctx *ctx, bool mapped)
|
|
{
|
|
ctx->pm_ctx.mapped = mapped;
|
|
}
|
|
#endif /* CONFIG_NVGPU_DEBUGGER */
|