gpu: nvgpu: add common.gr.obj_ctx unit

Add a new unit common.gr.obj_ctx which allocates and initializes GR
context. This unit also takes care of creating global golden image
used to initialize every context.

Add private header obj_ctx_priv.h that defines struct
nvgpu_gr_obj_ctx_golden_image

Add public header obj_ctx.h that exposes functions supported by new unit

This unit now exposes below API to allocate and initialize context
nvgpu_gr_obj_ctx_alloc()

Remove below functions from gk20a/gr_gk20a.c and move them to new unit
with below renames

gr_gk20a_fecs_ctx_bind_channel() -> nvgpu_gr_obj_ctx_bind_channel()
gr_gk20a_fecs_ctx_image_save() -> nvgpu_gr_obj_ctx_image_save()
gk20a_init_sw_bundle() -> nvgpu_gr_obj_ctx_alloc_sw_bundle()
gr_gk20a_alloc_gr_ctx() -> nvgpu_gr_obj_ctx_gr_ctx_alloc()
gr_gk20a_init_golden_ctx_image() ->
		nvgpu_gr_obj_ctx_alloc_golden_ctx_image()

Use new APIs in gk20a_alloc_obj_ctx() to allocate context

For now this unit includes <nvgpu/gr/gr.h> and some h/w headers.
But they will be removed in follow up patches

Jira NVGPU-1887

Change-Id: Ib95ec1c19c5b74810f85c2feed8fdd63889d3d22
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2087662
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Vinod Gopalakrishnakurup <vinodg@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2019-04-01 18:22:07 +05:30
committed by mobile promotions
parent 1819c36562
commit c33827e122
8 changed files with 608 additions and 366 deletions

View File

@@ -77,6 +77,7 @@ nvgpu-y += \
common/gr/zbc.o \
common/gr/gr_setup.o \
common/gr/hwpm_map.o \
common/gr/obj_ctx.o \
common/netlist/netlist.o \
common/netlist/netlist_sim.o \
common/netlist/netlist_gm20b.o \

View File

@@ -117,6 +117,7 @@ srcs += common/sim.c \
common/gr/zbc.c \
common/gr/gr_setup.c \
common/gr/hwpm_map.c \
common/gr/obj_ctx.c \
common/netlist/netlist.c \
common/netlist/netlist_sim.c \
common/netlist/netlist_gm20b.c \

View File

@@ -0,0 +1,460 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/log.h>
#include <nvgpu/io.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/global_ctx.h>
#include <nvgpu/gr/obj_ctx.h>
#include <nvgpu/power_features/cg.h>
#include "obj_ctx_priv.h"
/*
* TODO: needed for nvgpu_gr_init_fs_state() and introduces cyclic dependency
* with common.gr.gr unit. Remove this in follow up
*/
#include <nvgpu/gr/gr.h>
/*
* TODO: remove these when nvgpu_gr_obj_ctx_bind_channel() and
* nvgpu_gr_obj_ctx_image_save() are moved to appropriate units
*/
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
static int nvgpu_gr_obj_ctx_alloc_sw_bundle(struct gk20a *g)
{
struct netlist_av_list *sw_bundle_init =
&g->netlist_vars->sw_bundle_init;
struct netlist_av_list *sw_veid_bundle_init =
&g->netlist_vars->sw_veid_bundle_init;
struct netlist_av64_list *sw_bundle64_init =
&g->netlist_vars->sw_bundle64_init;
int err = 0;
/* enable pipe mode override */
g->ops.gr.init.pipe_mode_override(g, true);
/* load bundle init */
err = g->ops.gr.init.load_sw_bundle_init(g, sw_bundle_init);
if (err != 0) {
goto error;
}
if (g->ops.gr.init.load_sw_veid_bundle != NULL) {
err = g->ops.gr.init.load_sw_veid_bundle(g,
sw_veid_bundle_init);
if (err != 0) {
goto error;
}
}
if (g->ops.gr.init.load_sw_bundle64 != NULL) {
err = g->ops.gr.init.load_sw_bundle64(g, sw_bundle64_init);
if (err != 0) {
goto error;
}
}
/* disable pipe mode override */
g->ops.gr.init.pipe_mode_override(g, false);
err = g->ops.gr.init.wait_idle(g);
return err;
error:
/* in case of error skip waiting for GR idle - just restore state */
g->ops.gr.init.pipe_mode_override(g, false);
return err;
}
static int nvgpu_gr_obj_ctx_bind_channel(struct gk20a *g,
struct nvgpu_mem *inst_block)
{
u32 inst_base_ptr = u64_lo32(nvgpu_inst_block_addr(g, inst_block)
>> ram_in_base_shift_v());
u32 data = fecs_current_ctx_data(g, inst_block);
int ret;
nvgpu_log_info(g, "bind inst ptr 0x%08x", inst_base_ptr);
ret = g->ops.gr.falcon.submit_fecs_method_op(g,
(struct fecs_method_op_gk20a) {
.method.addr = gr_fecs_method_push_adr_bind_pointer_v(),
.method.data = data,
.mailbox = { .id = 0, .data = 0,
.clr = 0x30,
.ret = NULL,
.ok = 0x10,
.fail = 0x20, },
.cond.ok = GR_IS_UCODE_OP_AND,
.cond.fail = GR_IS_UCODE_OP_AND}, true);
if (ret != 0) {
nvgpu_err(g,
"bind channel instance failed");
}
return ret;
}
static int nvgpu_gr_obj_ctx_image_save(struct gk20a *g,
struct nvgpu_mem *inst_block)
{
int ret;
nvgpu_log_fn(g, " ");
ret = g->ops.gr.falcon.submit_fecs_method_op(g,
(struct fecs_method_op_gk20a) {
.method.addr = gr_fecs_method_push_adr_wfi_golden_save_v(),
.method.data = fecs_current_ctx_data(g, inst_block),
.mailbox = {.id = 0, .data = 0, .clr = 3, .ret = NULL,
.ok = 1, .fail = 2,
},
.cond.ok = GR_IS_UCODE_OP_AND,
.cond.fail = GR_IS_UCODE_OP_AND,
}, true);
if (ret != 0) {
nvgpu_err(g, "save context image failed");
}
return ret;
}
/*
* init global golden image from a fresh gr_ctx in channel ctx.
* save a copy in local_golden_image in ctx_vars
*/
int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
struct nvgpu_gr_obj_ctx_golden_image *golden_image,
struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_mem *inst_block)
{
u32 i;
struct nvgpu_mem *gr_mem;
int err = 0;
struct netlist_aiv_list *sw_ctx_load = &g->netlist_vars->sw_ctx_load;
struct netlist_av_list *sw_method_init = &g->netlist_vars->sw_method_init;
nvgpu_log_fn(g, " ");
gr_mem = &gr_ctx->mem;
/*
* golden ctx is global to all channels. Although only the first
* channel initializes golden image, driver needs to prevent multiple
* channels from initializing golden ctx at the same time
*/
nvgpu_mutex_acquire(&golden_image->ctx_mutex);
if (golden_image->ready) {
goto clean_up;
}
err = g->ops.gr.init.fe_pwr_mode_force_on(g, true);
if (err != 0) {
goto clean_up;
}
g->ops.gr.init.override_context_reset(g);
err = g->ops.gr.init.fe_pwr_mode_force_on(g, false);
if (err != 0) {
goto clean_up;
}
err = nvgpu_gr_obj_ctx_bind_channel(g, inst_block);
if (err != 0) {
goto clean_up;
}
err = g->ops.gr.init.wait_idle(g);
/* load ctx init */
for (i = 0U; i < sw_ctx_load->count; i++) {
nvgpu_writel(g, sw_ctx_load->l[i].addr,
sw_ctx_load->l[i].value);
}
if (g->ops.gr.init.preemption_state != NULL) {
err = g->ops.gr.init.preemption_state(g,
g->gr.gfxp_wfi_timeout_count,
g->gr.gfxp_wfi_timeout_unit_usec);
if (err != 0) {
goto clean_up;
}
}
nvgpu_cg_blcg_gr_load_enable(g);
err = g->ops.gr.init.wait_idle(g);
if (err != 0) {
goto clean_up;
}
/* disable fe_go_idle */
g->ops.gr.init.fe_go_idle_timeout(g, false);
err = g->ops.gr.commit_global_ctx_buffers(g, gr_ctx, false);
if (err != 0) {
goto clean_up;
}
/* override a few ctx state registers */
g->ops.gr.init.commit_global_timeslice(g);
/* floorsweep anything left */
err = nvgpu_gr_init_fs_state(g);
if (err != 0) {
goto clean_up;
}
err = g->ops.gr.init.wait_idle(g);
if (err != 0) {
goto restore_fe_go_idle;
}
err = nvgpu_gr_obj_ctx_alloc_sw_bundle(g);
if (err != 0) {
goto clean_up;
}
restore_fe_go_idle:
/* restore fe_go_idle */
g->ops.gr.init.fe_go_idle_timeout(g, true);
if ((err != 0) || (g->ops.gr.init.wait_idle(g) != 0)) {
goto clean_up;
}
/* load method init */
g->ops.gr.init.load_method_init(g, sw_method_init);
err = g->ops.gr.init.wait_idle(g);
if (err != 0) {
goto clean_up;
}
err = nvgpu_gr_ctx_init_zcull(g, gr_ctx);
if (err != 0) {
goto clean_up;
}
nvgpu_gr_obj_ctx_image_save(g, inst_block);
golden_image->local_golden_image =
nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem,
g->gr.ctx_vars.golden_image_size);
if (golden_image->local_golden_image == NULL) {
err = -ENOMEM;
goto clean_up;
}
golden_image->ready = true;
g->gr.ctx_vars.golden_image_initialized = true;
g->ops.gr.falcon.set_current_ctx_invalid(g);
clean_up:
if (err != 0) {
nvgpu_err(g, "fail");
} else {
nvgpu_log_fn(g, "done");
}
nvgpu_mutex_release(&golden_image->ctx_mutex);
return err;
}
static int nvgpu_gr_obj_ctx_gr_ctx_alloc(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm)
{
struct gr_gk20a *gr = &g->gr;
u32 size;
int err = 0;
nvgpu_log_fn(g, " ");
size = nvgpu_gr_obj_ctx_get_golden_image_size(g->gr.golden_image);
nvgpu_gr_ctx_set_size(gr->gr_ctx_desc, NVGPU_GR_CTX_CTX, size);
err = nvgpu_gr_ctx_alloc(g, gr_ctx, gr->gr_ctx_desc, vm);
if (err != 0) {
return err;
}
return 0;
}
int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
struct nvgpu_gr_obj_ctx_golden_image *golden_image,
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_gr_subctx *subctx,
struct channel_gk20a *c,
struct vm_gk20a *vm,
struct nvgpu_mem *inst_block,
u32 class_num, u32 flags,
bool cde, bool vpr)
{
int err = 0;
nvgpu_log_fn(g, " ");
err = nvgpu_gr_obj_ctx_gr_ctx_alloc(g, gr_ctx, vm);
if (err != 0) {
nvgpu_err(g,
"fail to allocate TSG gr ctx buffer");
goto out;
}
/* allocate patch buffer */
if (!nvgpu_mem_is_valid(&gr_ctx->patch_ctx.mem)) {
gr_ctx->patch_ctx.data_count = 0;
nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc,
NVGPU_GR_CTX_PATCH_CTX,
g->ops.gr.get_patch_slots(g) *
PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY);
err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx,
g->gr.gr_ctx_desc, vm);
if (err != 0) {
nvgpu_err(g,
"fail to allocate patch buffer");
goto out;
}
}
g->ops.gr.init_ctxsw_preemption_mode(g, gr_ctx, vm, class_num, flags);
/* map global buffer to channel gpu_va and commit */
err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx,
global_ctx_buffer, vm, vpr);
if (err != 0) {
nvgpu_err(g,
"fail to map global ctx buffer");
goto out;
}
g->ops.gr.commit_global_ctx_buffers(g, gr_ctx, true);
/* commit gr ctx buffer */
err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
if (err != 0) {
nvgpu_err(g,
"fail to commit gr ctx buffer");
goto out;
}
/* init golden image, ELPG enabled after this is done */
err = nvgpu_gr_obj_ctx_alloc_golden_ctx_image(g, golden_image, gr_ctx,
inst_block);
if (err != 0) {
nvgpu_err(g,
"fail to init golden ctx image");
goto out;
}
/* load golden image */
nvgpu_gr_ctx_load_golden_ctx_image(g, gr_ctx,
golden_image->local_golden_image, cde);
if (err != 0) {
nvgpu_err(g,
"fail to load golden ctx image");
goto out;
}
if (g->ops.gr.update_ctxsw_preemption_mode != NULL) {
g->ops.gr.update_ctxsw_preemption_mode(g, gr_ctx,
subctx);
}
nvgpu_log_fn(g, "done");
return 0;
out:
/*
* 1. gr_ctx, patch_ctx and global ctx buffer mapping
* can be reused so no need to release them.
* 2. golden image init and load is a one time thing so if
* they pass, no need to undo.
*/
nvgpu_err(g, "fail");
return err;
}
void nvgpu_gr_obj_ctx_set_golden_image_size(
struct nvgpu_gr_obj_ctx_golden_image *golden_image,
size_t size)
{
golden_image->size = size;
}
size_t nvgpu_gr_obj_ctx_get_golden_image_size(
struct nvgpu_gr_obj_ctx_golden_image *golden_image)
{
return golden_image->size;
}
u32 *nvgpu_gr_obj_ctx_get_local_golden_image_ptr(
struct nvgpu_gr_obj_ctx_golden_image *golden_image)
{
return nvgpu_gr_global_ctx_get_local_golden_image_ptr(
golden_image->local_golden_image);
}
int nvgpu_gr_obj_ctx_init(struct gk20a *g,
struct nvgpu_gr_obj_ctx_golden_image **gr_golden_image, u32 size)
{
struct nvgpu_gr_obj_ctx_golden_image *golden_image;
golden_image = nvgpu_kzalloc(g, sizeof(*golden_image));
if (golden_image == NULL) {
return -ENOMEM;
}
nvgpu_gr_obj_ctx_set_golden_image_size(golden_image, size);
nvgpu_mutex_init(&golden_image->ctx_mutex);
*gr_golden_image = golden_image;
return 0;
}
void nvgpu_gr_obj_ctx_deinit(struct gk20a *g,
struct nvgpu_gr_obj_ctx_golden_image *golden_image)
{
if (golden_image->local_golden_image != NULL) {
nvgpu_gr_global_ctx_deinit_local_golden_image(g,
golden_image->local_golden_image);
golden_image->local_golden_image = NULL;
}
golden_image->ready = false;
nvgpu_kfree(g, golden_image);
}

View File

@@ -0,0 +1,40 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_OBJ_CTX_PRIV_H
#define NVGPU_GR_OBJ_CTX_PRIV_H
#include <nvgpu/types.h>
#include <nvgpu/lock.h>
struct nvgpu_gr_global_ctx_local_golden_image;
struct nvgpu_gr_obj_ctx_golden_image {
bool ready;
struct nvgpu_mutex ctx_mutex;
size_t size;
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image;
};
#endif /* NVGPU_GR_OBJ_CTX_PRIV_H */

View File

@@ -52,6 +52,7 @@
#include <nvgpu/gr/zbc.h>
#include <nvgpu/gr/gr.h>
#include <nvgpu/gr/gr_falcon.h>
#include <nvgpu/gr/obj_ctx.h>
#include <nvgpu/gr/zcull.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/fecs_trace.h>
@@ -302,7 +303,7 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
return 0;
}
static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
{
u64 ptr = nvgpu_inst_block_addr(g, inst_block) >>
ram_in_base_shift_v();
@@ -315,36 +316,6 @@ static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
gr_fecs_current_ctx_valid_f(1);
}
int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
struct channel_gk20a *c)
{
u32 inst_base_ptr = u64_lo32(nvgpu_inst_block_addr(g, &c->inst_block)
>> ram_in_base_shift_v());
u32 data = fecs_current_ctx_data(g, &c->inst_block);
int ret;
nvgpu_log_info(g, "bind channel %d inst ptr 0x%08x",
c->chid, inst_base_ptr);
ret = g->ops.gr.falcon.submit_fecs_method_op(g,
(struct fecs_method_op_gk20a) {
.method.addr = gr_fecs_method_push_adr_bind_pointer_v(),
.method.data = data,
.mailbox = { .id = 0, .data = 0,
.clr = 0x30,
.ret = NULL,
.ok = 0x10,
.fail = 0x20, },
.cond.ok = GR_IS_UCODE_OP_AND,
.cond.fail = GR_IS_UCODE_OP_AND}, true);
if (ret != 0) {
nvgpu_err(g,
"bind channel instance failed");
}
return ret;
}
int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, bool patch)
{
@@ -401,219 +372,6 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
return 0;
}
int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
{
struct gk20a *g = c->g;
int ret;
nvgpu_log_fn(g, " ");
ret = g->ops.gr.falcon.submit_fecs_method_op(g,
(struct fecs_method_op_gk20a) {
.method.addr = save_type,
.method.data = fecs_current_ctx_data(g, &c->inst_block),
.mailbox = {.id = 0, .data = 0, .clr = 3, .ret = NULL,
.ok = 1, .fail = 2,
},
.cond.ok = GR_IS_UCODE_OP_AND,
.cond.fail = GR_IS_UCODE_OP_AND,
}, true);
if (ret != 0) {
nvgpu_err(g, "save context image failed");
}
return ret;
}
int gk20a_init_sw_bundle(struct gk20a *g)
{
struct netlist_av_list *sw_bundle_init =
&g->netlist_vars->sw_bundle_init;
struct netlist_av_list *sw_veid_bundle_init =
&g->netlist_vars->sw_veid_bundle_init;
struct netlist_av64_list *sw_bundle64_init =
&g->netlist_vars->sw_bundle64_init;
int err = 0;
/* enable pipe mode override */
g->ops.gr.init.pipe_mode_override(g, true);
/* load bundle init */
err = g->ops.gr.init.load_sw_bundle_init(g, sw_bundle_init);
if (err != 0) {
goto error;
}
if (g->ops.gr.init.load_sw_veid_bundle != NULL) {
err = g->ops.gr.init.load_sw_veid_bundle(g,
sw_veid_bundle_init);
if (err != 0) {
goto error;
}
}
if (g->ops.gr.init.load_sw_bundle64 != NULL) {
err = g->ops.gr.init.load_sw_bundle64(g, sw_bundle64_init);
if (err != 0) {
goto error;
}
}
/* disable pipe mode override */
g->ops.gr.init.pipe_mode_override(g, false);
err = g->ops.gr.init.wait_idle(g);
return err;
error:
/* in case of error skip waiting for GR idle - just restore state */
g->ops.gr.init.pipe_mode_override(g, false);
return err;
}
/* init global golden image from a fresh gr_ctx in channel ctx.
save a copy in local_golden_image in ctx_vars */
int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
struct channel_gk20a *c,
struct nvgpu_gr_ctx *gr_ctx)
{
struct gr_gk20a *gr = &g->gr;
u32 i;
struct nvgpu_mem *gr_mem;
int err = 0;
struct netlist_aiv_list *sw_ctx_load = &g->netlist_vars->sw_ctx_load;
struct netlist_av_list *sw_method_init = &g->netlist_vars->sw_method_init;
nvgpu_log_fn(g, " ");
gr_mem = &gr_ctx->mem;
/* golden ctx is global to all channels. Although only the first
channel initializes golden image, driver needs to prevent multiple
channels from initializing golden ctx at the same time */
nvgpu_mutex_acquire(&gr->ctx_mutex);
if (gr->ctx_vars.golden_image_initialized) {
goto clean_up;
}
err = g->ops.gr.init.fe_pwr_mode_force_on(g, true);
if (err != 0) {
goto clean_up;
}
g->ops.gr.init.override_context_reset(g);
err = g->ops.gr.init.fe_pwr_mode_force_on(g, false);
if (err != 0) {
goto clean_up;
}
err = gr_gk20a_fecs_ctx_bind_channel(g, c);
if (err != 0) {
goto clean_up;
}
err = g->ops.gr.init.wait_idle(g);
/* load ctx init */
for (i = 0; i < sw_ctx_load->count; i++) {
gk20a_writel(g, sw_ctx_load->l[i].addr,
sw_ctx_load->l[i].value);
}
if (g->ops.gr.init.preemption_state != NULL) {
err = g->ops.gr.init.preemption_state(g,
gr->gfxp_wfi_timeout_count,
gr->gfxp_wfi_timeout_unit_usec);
if (err != 0) {
goto clean_up;
}
}
nvgpu_cg_blcg_gr_load_enable(g);
err = g->ops.gr.init.wait_idle(g);
if (err != 0) {
goto clean_up;
}
/* disable fe_go_idle */
g->ops.gr.init.fe_go_idle_timeout(g, false);
err = g->ops.gr.commit_global_ctx_buffers(g, gr_ctx, false);
if (err != 0) {
goto clean_up;
}
/* override a few ctx state registers */
g->ops.gr.init.commit_global_timeslice(g);
/* floorsweep anything left */
err = nvgpu_gr_init_fs_state(g);
if (err != 0) {
goto clean_up;
}
err = g->ops.gr.init.wait_idle(g);
if (err != 0) {
goto restore_fe_go_idle;
}
err = gk20a_init_sw_bundle(g);
if (err != 0) {
goto clean_up;
}
restore_fe_go_idle:
/* restore fe_go_idle */
g->ops.gr.init.fe_go_idle_timeout(g, true);
if ((err != 0) || (g->ops.gr.init.wait_idle(g) != 0)) {
goto clean_up;
}
/* load method init */
g->ops.gr.init.load_method_init(g, sw_method_init);
err = g->ops.gr.init.wait_idle(g);
if (err != 0) {
goto clean_up;
}
err = nvgpu_gr_ctx_init_zcull(g, gr_ctx);
if (err != 0) {
goto clean_up;
}
gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v());
gr->local_golden_image =
nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem,
gr->ctx_vars.golden_image_size);
if (gr->local_golden_image == NULL) {
err = -ENOMEM;
goto clean_up;
}
gr->ctx_vars.golden_image_initialized = true;
g->ops.gr.falcon.set_current_ctx_invalid(g);
clean_up:
if (err != 0) {
nvgpu_err(g, "fail");
} else {
nvgpu_log_fn(g, "done");
}
nvgpu_mutex_release(&gr->ctx_mutex);
return err;
}
int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
struct channel_gk20a *c,
bool enable_smpc_ctxsw)
@@ -851,25 +609,6 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
return 0;
}
static int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm)
{
struct gr_gk20a *gr = &g->gr;
int err = 0;
nvgpu_log_fn(g, " ");
nvgpu_gr_ctx_set_size(gr->gr_ctx_desc, NVGPU_GR_CTX_CTX,
gr->ctx_vars.golden_image_size);
err = nvgpu_gr_ctx_alloc(g, gr_ctx, gr->gr_ctx_desc, vm);
if (err != 0) {
return err;
}
return 0;
}
void gr_gk20a_free_gr_ctx(struct gk20a *g,
struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
{
@@ -936,88 +675,20 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
tsg->vm = c->vm;
nvgpu_vm_get(tsg->vm);
err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, tsg->vm);
err = nvgpu_gr_obj_ctx_alloc(g, g->gr.golden_image,
g->gr.global_ctx_buffer, gr_ctx, c->subctx, c,
tsg->vm, &c->inst_block, class_num, flags,
c->cde, c->vpr);
if (err != 0) {
nvgpu_err(g,
"fail to allocate TSG gr ctx buffer");
"failed to allocate gr ctx buffer");
nvgpu_vm_put(tsg->vm);
tsg->vm = NULL;
goto out;
}
gr_ctx->tsgid = tsg->tsgid;
/* allocate patch buffer */
if (!nvgpu_mem_is_valid(&gr_ctx->patch_ctx.mem)) {
gr_ctx->patch_ctx.data_count = 0;
nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc,
NVGPU_GR_CTX_PATCH_CTX,
g->ops.gr.get_patch_slots(g) *
PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY);
err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx,
g->gr.gr_ctx_desc, c->vm);
if (err != 0) {
nvgpu_err(g,
"fail to allocate patch buffer");
goto out;
}
}
g->ops.gr.init_ctxsw_preemption_mode(g, gr_ctx, tsg->vm,
class_num, flags);
/* map global buffer to channel gpu_va and commit */
err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx,
g->gr.global_ctx_buffer, tsg->vm, c->vpr);
if (err != 0) {
nvgpu_err(g,
"fail to map global ctx buffer");
goto out;
}
g->ops.gr.commit_global_ctx_buffers(g, gr_ctx, true);
/* commit gr ctx buffer */
err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
if (err != 0) {
nvgpu_err(g,
"fail to commit gr ctx buffer");
goto out;
}
/* init golden image, ELPG enabled after this is done */
err = gr_gk20a_init_golden_ctx_image(g, c, gr_ctx);
if (err != 0) {
nvgpu_err(g,
"fail to init golden ctx image");
goto out;
}
/* load golden image */
nvgpu_gr_ctx_load_golden_ctx_image(g, gr_ctx,
g->gr.local_golden_image, c->cde);
if (err != 0) {
nvgpu_err(g,
"fail to load golden ctx image");
goto out;
}
if (g->ops.gr.update_ctxsw_preemption_mode != NULL) {
g->ops.gr.update_ctxsw_preemption_mode(g, gr_ctx,
c->subctx);
}
#ifdef CONFIG_GK20A_CTXSW_TRACE
if (g->ops.gr.fecs_trace.bind_channel && !c->vpr) {
err = g->ops.gr.fecs_trace.bind_channel(g, &c->inst_block,
c->subctx, gr_ctx, tsg->tgid, 0);
if (err != 0) {
nvgpu_warn(g,
"fail to bind channel for ctxsw trace");
}
}
#endif
} else {
/* commit gr ctx buffer */
err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
@@ -1026,18 +697,19 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
"fail to commit gr ctx buffer");
goto out;
}
#ifdef CONFIG_GK20A_CTXSW_TRACE
if (g->ops.gr.fecs_trace.bind_channel && !c->vpr) {
err = g->ops.gr.fecs_trace.bind_channel(g, &c->inst_block,
c->subctx, gr_ctx, tsg->tgid, 0);
if (err != 0) {
nvgpu_warn(g,
"fail to bind channel for ctxsw trace");
}
}
#endif
}
#ifdef CONFIG_GK20A_CTXSW_TRACE
if (g->ops.gr.fecs_trace.bind_channel && !c->vpr) {
err = g->ops.gr.fecs_trace.bind_channel(g, &c->inst_block,
c->subctx, gr_ctx, tsg->tgid, 0);
if (err != 0) {
nvgpu_warn(g,
"fail to bind channel for ctxsw trace");
}
}
#endif
nvgpu_log_fn(g, "done");
return 0;
out:
@@ -1069,18 +741,13 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
nvgpu_netlist_deinit_ctx_vars(g);
if (gr->local_golden_image != NULL) {
nvgpu_gr_global_ctx_deinit_local_golden_image(g,
gr->local_golden_image);
gr->local_golden_image = NULL;
gr->ctx_vars.golden_image_initialized = false;
}
nvgpu_gr_hwpm_map_deinit(g, gr->hwpm_map);
nvgpu_ecc_remove_support(g);
nvgpu_gr_zbc_deinit(g, gr->zbc);
nvgpu_gr_zcull_deinit(g, gr->zcull);
nvgpu_gr_obj_ctx_deinit(g, gr->golden_image);
gr->ctx_vars.golden_image_initialized = false;
}
static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
@@ -1363,6 +1030,12 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g)
}
#endif
err = nvgpu_gr_obj_ctx_init(g, &gr->golden_image,
g->gr.ctx_vars.golden_image_size);
if (err != 0) {
goto clean_up;
}
err = gr_gk20a_init_gr_config(g, gr);
if (err != 0) {
goto clean_up;
@@ -3162,8 +2835,8 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
err = gr_gk20a_find_priv_offset_in_buffer(g,
priv_registers[i],
is_quad, quad,
nvgpu_gr_global_ctx_get_local_golden_image_ptr(
g->gr.local_golden_image),
nvgpu_gr_obj_ctx_get_local_golden_image_ptr(
g->gr.golden_image),
g->gr.ctx_vars.golden_image_size,
&priv_offset);
if (err != 0) {

View File

@@ -166,7 +166,8 @@ struct gr_gk20a {
bool gfxp_wfi_timeout_unit_usec;
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer;
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image;
struct nvgpu_gr_obj_ctx_golden_image *golden_image;
struct nvgpu_gr_ctx_desc *gr_ctx_desc;
@@ -428,10 +429,9 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g);
int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, bool patch);
int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
struct channel_gk20a *c);
u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block);
int gk20a_init_sw_bundle(struct gk20a *g);
int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type);
int gk20a_gr_handle_semaphore_pending(struct gk20a *g,
struct gr_gk20a_isr_data *isr_data);
int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,

View File

@@ -0,0 +1,68 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_OBJ_CTX_H
#define NVGPU_GR_OBJ_CTX_H
#include <nvgpu/types.h>
#include <nvgpu/lock.h>
struct gk20a;
struct nvgpu_gr_ctx;
struct nvgpu_gr_subctx;
struct vm_gk20a;
struct nvgpu_gr_global_ctx_buffer_desc;
struct nvgpu_mem;
struct channel_gk20a;
struct nvgpu_gr_obj_ctx_golden_image;
int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
struct nvgpu_gr_obj_ctx_golden_image *golden_image,
struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_mem *inst_block);
int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
struct nvgpu_gr_obj_ctx_golden_image *golden_image,
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_gr_subctx *subctx,
struct channel_gk20a *c,
struct vm_gk20a *vm,
struct nvgpu_mem *inst_block,
u32 class_num, u32 flags,
bool cde, bool vpr);
void nvgpu_gr_obj_ctx_set_golden_image_size(
struct nvgpu_gr_obj_ctx_golden_image *golden_image,
size_t size);
size_t nvgpu_gr_obj_ctx_get_golden_image_size(
struct nvgpu_gr_obj_ctx_golden_image *golden_image);
u32 *nvgpu_gr_obj_ctx_get_local_golden_image_ptr(
struct nvgpu_gr_obj_ctx_golden_image *golden_image);
int nvgpu_gr_obj_ctx_init(struct gk20a *g,
struct nvgpu_gr_obj_ctx_golden_image **gr_golden_image, u32 size);
void nvgpu_gr_obj_ctx_deinit(struct gk20a *g,
struct nvgpu_gr_obj_ctx_golden_image *golden_image);
#endif /* NVGPU_GR_OBJ_CTX_H */

View File

@@ -24,6 +24,7 @@
#include <nvgpu/string.h>
#include <nvgpu/gr/global_ctx.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/obj_ctx.h>
#include <nvgpu/power_features/cg.h>
#include <nvgpu/power_features/pg.h>
@@ -884,13 +885,11 @@ static ssize_t tpc_fs_mask_store(struct device *dev,
g->ops.gr.set_gpc_tpc_mask(g, 0);
if (g->gr.local_golden_image != NULL) {
nvgpu_gr_global_ctx_deinit_local_golden_image(g,
g->gr.local_golden_image);
g->gr.local_golden_image = NULL;
g->gr.ctx_vars.golden_image_initialized = false;
}
nvgpu_gr_obj_ctx_deinit(g, g->gr.golden_image);
g->gr.ctx_vars.golden_image_initialized = false;
g->gr.ctx_vars.golden_image_size = 0;
nvgpu_gr_config_deinit(g, g->gr.config);
/* Cause next poweron to reinit just gr */
g->gr.sw_ready = false;