gpu: nvgpu: add common.gr.obj_ctx apis to initialize/set preemption mode

These HALs are used to initialize and set preeemption modes
g->ops.gr.init_ctxsw_preemption_mode()
g->ops.gr.set_ctxsw_preemption_mode()
g->ops.gr.update_ctxsw_preemption_mode()

They are all h/w independent except for the functional support for
GFXP/CILP preemption support which is only present on gp10b+ chips

Add a characteristics flag NVGPU_SUPPORT_PREEMPTION_GFXP for these
preemption modes and set this flag for gp10b+ chips

Use this flag and unify all above HALs into below common functions
nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode()
nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode()
nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode()

vGPU specific code also directly calls below vGPU specific APIs
vgpu_gr_init_ctxsw_preemption_mode()
vgpu_gr_set_ctxsw_preemption_mode()

g->ops.gr.update_ctxsw_preemption_mode() is not needed for vGPU since
it is handled by vserver

Above g->ops.gr.*_ctxsw_preemption_mode() HALs are no more required
hence delete them

Jira NVGPU-1887

Change-Id: I9b3164bcf01e5e3c27e52369c9364e0ee23a9662
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2088507
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2019-04-02 19:19:50 +05:30
committed by mobile promotions
parent 4ad98e87ad
commit 45e1207223
18 changed files with 279 additions and 338 deletions

View File

@@ -24,10 +24,12 @@
#include <nvgpu/log.h>
#include <nvgpu/io.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/subctx.h>
#include <nvgpu/gr/global_ctx.h>
#include <nvgpu/gr/obj_ctx.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/power_features/cg.h>
#include <nvgpu/channel.h>
#include "obj_ctx_priv.h"
@@ -44,6 +46,215 @@
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
u32 class, u32 flags)
{
int err;
u32 graphics_preempt_mode = 0;
u32 compute_preempt_mode = 0;
nvgpu_log_fn(g, " ");
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP)) {
if (g->ops.gr.is_valid_compute_class(g, class)) {
nvgpu_gr_ctx_init_compute_preemption_mode(gr_ctx,
NVGPU_PREEMPTION_MODE_COMPUTE_CTA);
}
return 0;
}
if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) != 0U) {
graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
}
if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) != 0U) {
compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
}
if ((graphics_preempt_mode != 0U) || (compute_preempt_mode != 0U)) {
err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, gr_ctx, vm,
class, graphics_preempt_mode, compute_preempt_mode);
if (err != 0) {
nvgpu_err(g, "set_ctxsw_preemption_mode failed");
return err;
}
}
nvgpu_log_fn(g, "done");
return 0;
}
int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode, u32 compute_preempt_mode)
{
int err = 0;
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP)) {
return 0;
}
if (g->ops.gr.is_valid_gfx_class(g, class) &&
g->gr.ctx_vars.force_preemption_gfxp) {
graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
}
if (g->ops.gr.is_valid_compute_class(g, class) &&
g->gr.ctx_vars.force_preemption_cilp) {
compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
}
/* check for invalid combinations */
if (nvgpu_gr_ctx_check_valid_preemption_mode(gr_ctx,
graphics_preempt_mode, compute_preempt_mode) == false) {
return -EINVAL;
}
/* set preemption modes */
switch (graphics_preempt_mode) {
case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP:
{
u32 rtv_cb_size;
u32 spill_size = g->ops.gr.init.get_ctx_spill_size(g);
u32 pagepool_size = g->ops.gr.init.get_ctx_pagepool_size(g);
u32 betacb_size = g->ops.gr.init.get_ctx_betacb_size(g);
u32 attrib_cb_size =
g->ops.gr.init.get_ctx_attrib_cb_size(g, betacb_size,
nvgpu_gr_config_get_tpc_count(g->gr.config),
nvgpu_gr_config_get_max_tpc_count(g->gr.config));
nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size);
nvgpu_log_info(g, "gfxp context pagepool_size=%d", pagepool_size);
nvgpu_log_info(g, "gfxp context attrib_cb_size=%d",
attrib_cb_size);
nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc,
NVGPU_GR_CTX_PREEMPT_CTXSW,
g->gr.ctx_vars.preempt_image_size);
nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc,
NVGPU_GR_CTX_SPILL_CTXSW, spill_size);
nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc,
NVGPU_GR_CTX_BETACB_CTXSW, attrib_cb_size);
nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc,
NVGPU_GR_CTX_PAGEPOOL_CTXSW, pagepool_size);
if (g->ops.gr.init.get_gfxp_rtv_cb_size != NULL) {
rtv_cb_size = g->ops.gr.init.get_gfxp_rtv_cb_size(g);
nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc,
NVGPU_GR_CTX_GFXP_RTVCB_CTXSW, rtv_cb_size);
}
err = nvgpu_gr_ctx_alloc_ctxsw_buffers(g, gr_ctx,
g->gr.gr_ctx_desc, vm);
if (err != 0) {
nvgpu_err(g, "cannot allocate ctxsw buffers");
goto fail;
}
nvgpu_gr_ctx_init_graphics_preemption_mode(gr_ctx,
graphics_preempt_mode);
break;
}
case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
nvgpu_gr_ctx_init_graphics_preemption_mode(gr_ctx,
graphics_preempt_mode);
break;
default:
break;
}
if (g->ops.gr.is_valid_compute_class(g, class) ||
g->ops.gr.is_valid_gfx_class(g, class)) {
switch (compute_preempt_mode) {
case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
case NVGPU_PREEMPTION_MODE_COMPUTE_CILP:
nvgpu_gr_ctx_init_compute_preemption_mode(gr_ctx,
compute_preempt_mode);
break;
default:
break;
}
}
return 0;
fail:
return err;
}
void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx)
{
int err;
u64 addr;
u32 size;
nvgpu_log_fn(g, " ");
nvgpu_gr_ctx_set_preemption_modes(g, gr_ctx);
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP)) {
return;
}
if (!nvgpu_mem_is_valid(&gr_ctx->preempt_ctxsw_buffer)) {
return;
}
if (subctx != NULL) {
nvgpu_gr_subctx_set_preemption_buffer_va(g, subctx,
gr_ctx);
} else {
nvgpu_gr_ctx_set_preemption_buffer_va(g, gr_ctx);
}
err = nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true);
if (err != 0) {
nvgpu_err(g, "can't map patch context");
goto out;
}
addr = gr_ctx->betacb_ctxsw_buffer.gpu_va;
g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx,
nvgpu_gr_config_get_tpc_count(g->gr.config),
nvgpu_gr_config_get_max_tpc_count(g->gr.config), addr,
true);
addr = gr_ctx->pagepool_ctxsw_buffer.gpu_va;
nvgpu_assert(gr_ctx->pagepool_ctxsw_buffer.size <= U32_MAX);
size = (u32)gr_ctx->pagepool_ctxsw_buffer.size;
g->ops.gr.init.commit_global_pagepool(g, gr_ctx, addr, size,
true, false);
addr = gr_ctx->spill_ctxsw_buffer.gpu_va;
nvgpu_assert(gr_ctx->spill_ctxsw_buffer.size <= U32_MAX);
size = (u32)gr_ctx->spill_ctxsw_buffer.size;
g->ops.gr.init.commit_ctxsw_spill(g, gr_ctx, addr, size, true);
g->ops.gr.init.commit_cbes_reserve(g, gr_ctx, true);
if (g->ops.gr.init.gfxp_wfi_timeout != NULL) {
g->ops.gr.init.gfxp_wfi_timeout(g, gr_ctx,
g->gr.gfxp_wfi_timeout_count, true);
}
if (g->ops.gr.init.commit_gfxp_rtv_cb != NULL) {
g->ops.gr.init.commit_gfxp_rtv_cb(g, gr_ctx, true);
}
nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true);
out:
nvgpu_log_fn(g, "done");
}
int nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, bool patch)
{
@@ -407,7 +618,8 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
}
}
g->ops.gr.init_ctxsw_preemption_mode(g, gr_ctx, vm, class_num, flags);
nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(g, gr_ctx, vm, class_num,
flags);
/* map global buffer to channel gpu_va and commit */
err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx,
@@ -449,10 +661,7 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
goto out;
}
if (g->ops.gr.update_ctxsw_preemption_mode != NULL) {
g->ops.gr.update_ctxsw_preemption_mode(g, gr_ctx,
subctx);
}
nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, gr_ctx, c->subctx);
nvgpu_log_fn(g, "done");
return 0;

View File

@@ -140,10 +140,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.detect_sm_arch = vgpu_gr_detect_sm_arch,
.init_ctx_state = vgpu_gr_init_ctx_state,
.free_gr_ctx = vgpu_gr_free_gr_ctx,
.init_ctxsw_preemption_mode =
vgpu_gr_init_ctxsw_preemption_mode,
.update_ctxsw_preemption_mode =
gr_gp10b_update_ctxsw_preemption_mode,
.dump_gr_regs = NULL,
.update_pc_sampling = vgpu_gr_update_pc_sampling,
.get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
@@ -188,8 +184,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.pre_process_sm_exception = NULL,
.set_bes_crop_debug3 = NULL,
.set_bes_crop_debug4 = NULL,
.set_ctxsw_preemption_mode =
vgpu_gr_set_ctxsw_preemption_mode,
.init_gfxp_wfi_timeout_count =
gr_gp10b_init_gfxp_wfi_timeout_count,
.get_max_gfxp_wfi_timeout_count =

View File

@@ -52,6 +52,17 @@
#include "common/vgpu/perf/cyclestats_snapshot_vgpu.h"
#include "common/gr/zcull_priv.h"
static int vgpu_gr_set_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode,
u32 compute_preempt_mode);
static int vgpu_gr_init_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm,
u32 class,
u32 flags);
void vgpu_gr_detect_sm_arch(struct gk20a *g)
{
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
@@ -250,7 +261,7 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
goto out;
}
g->ops.gr.init_ctxsw_preemption_mode(g, gr_ctx,
vgpu_gr_init_ctxsw_preemption_mode(g, gr_ctx,
c->vm,
class_num,
flags);
@@ -1209,7 +1220,7 @@ void vgpu_gr_init_cyclestats(struct gk20a *g)
#endif
}
int vgpu_gr_init_ctxsw_preemption_mode(struct gk20a *g,
static int vgpu_gr_init_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm,
u32 class,
@@ -1239,16 +1250,12 @@ int vgpu_gr_init_ctxsw_preemption_mode(struct gk20a *g,
}
if (graphics_preempt_mode || compute_preempt_mode) {
if (g->ops.gr.set_ctxsw_preemption_mode) {
err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm,
class, graphics_preempt_mode, compute_preempt_mode);
if (err) {
nvgpu_err(g,
"set_ctxsw_preemption_mode failed");
return err;
}
} else {
return -ENOSYS;
err = vgpu_gr_set_ctxsw_preemption_mode(g, gr_ctx, vm,
class, graphics_preempt_mode, compute_preempt_mode);
if (err) {
nvgpu_err(g,
"set_ctxsw_preemption_mode failed");
return err;
}
}
@@ -1256,7 +1263,7 @@ int vgpu_gr_init_ctxsw_preemption_mode(struct gk20a *g,
return 0;
}
int vgpu_gr_set_ctxsw_preemption_mode(struct gk20a *g,
static int vgpu_gr_set_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode,
@@ -1435,16 +1442,12 @@ int vgpu_gr_set_preemption_mode(struct channel_gk20a *ch,
return 0;
}
if (g->ops.gr.set_ctxsw_preemption_mode) {
err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class,
graphics_preempt_mode,
compute_preempt_mode);
if (err) {
nvgpu_err(g, "set_ctxsw_preemption_mode failed");
return err;
}
} else {
err = -ENOSYS;
err = vgpu_gr_set_ctxsw_preemption_mode(g, gr_ctx, vm, class,
graphics_preempt_mode,
compute_preempt_mode);
if (err) {
nvgpu_err(g, "set_ctxsw_preemption_mode failed");
return err;
}
return err;

View File

@@ -80,16 +80,6 @@ int vgpu_gr_init_sm_id_table(struct nvgpu_gr_config *gr_config);
int vgpu_gr_init_fs_state(struct gk20a *g);
int vgpu_gr_update_pc_sampling(struct channel_gk20a *ch, bool enable);
void vgpu_gr_init_cyclestats(struct gk20a *g);
int vgpu_gr_init_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm,
u32 class,
u32 flags);
int vgpu_gr_set_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode,
u32 compute_preempt_mode);
int vgpu_gr_set_preemption_mode(struct channel_gk20a *ch,
u32 graphics_preempt_mode,
u32 compute_preempt_mode);

View File

@@ -40,5 +40,6 @@ void vgpu_gv11b_init_gpu_characteristics(struct gk20a *g)
nvgpu_set_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_USERMODE_SUBMIT, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true);
}

View File

@@ -162,10 +162,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.detect_sm_arch = vgpu_gr_detect_sm_arch,
.init_ctx_state = vgpu_gr_init_ctx_state,
.free_gr_ctx = vgpu_gr_free_gr_ctx,
.init_ctxsw_preemption_mode =
vgpu_gr_init_ctxsw_preemption_mode,
.update_ctxsw_preemption_mode =
gr_gp10b_update_ctxsw_preemption_mode,
.dump_gr_regs = NULL,
.update_pc_sampling = vgpu_gr_update_pc_sampling,
.get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
@@ -210,7 +206,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.pre_process_sm_exception = NULL,
.set_bes_crop_debug3 = NULL,
.set_bes_crop_debug4 = NULL,
.set_ctxsw_preemption_mode = vgpu_gr_set_ctxsw_preemption_mode,
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,
.get_egpc_base = gv11b_gr_get_egpc_base,

View File

@@ -284,6 +284,8 @@ void vgpu_init_gpu_characteristics(struct gk20a *g)
gk20a_init_gpu_characteristics(g);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true);
/* features vgpu does not support */
nvgpu_set_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH, false);
nvgpu_set_enabled(g, NVGPU_SUPPORT_RESCHEDULE_RUNLIST, false);

View File

@@ -390,32 +390,6 @@ void gr_gm20b_detect_sm_arch(struct gk20a *g)
gr_gpc0_tpc0_sm_arch_warp_count_v(v);
}
int gr_gm20b_init_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
u32 class, u32 flags)
{
nvgpu_log_fn(g, " ");
if (class == MAXWELL_COMPUTE_B) {
nvgpu_gr_ctx_init_compute_preemption_mode(gr_ctx,
NVGPU_PREEMPTION_MODE_COMPUTE_CTA);
}
nvgpu_log_fn(g, "done");
return 0;
}
void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx)
{
nvgpu_log_fn(g, " ");
nvgpu_gr_ctx_set_preemption_modes(g, gr_ctx);
nvgpu_log_fn(g, "done");
}
int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
struct gk20a_debug_output *o)
{

View File

@@ -69,11 +69,6 @@ void gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr);
u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr);
void gr_gm20b_detect_sm_arch(struct gk20a *g);
int gr_gm20b_init_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
u32 class, u32 flags);
void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx);
int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
struct gk20a_debug_output *o);
int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,

View File

@@ -267,10 +267,6 @@ static const struct gpu_ops gm20b_ops = {
.detect_sm_arch = gr_gm20b_detect_sm_arch,
.init_ctx_state = gr_gk20a_init_ctx_state,
.free_gr_ctx = gr_gk20a_free_gr_ctx,
.init_ctxsw_preemption_mode =
gr_gm20b_init_ctxsw_preemption_mode,
.update_ctxsw_preemption_mode =
gr_gm20b_update_ctxsw_preemption_mode,
.dump_gr_regs = gr_gm20b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
@@ -1090,6 +1086,7 @@ int gm20b_init_hal(struct gk20a *g)
}
nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, false);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, false);
nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
g->pmu_lsf_pmu_wpr_init_done = 0;

View File

@@ -41,6 +41,7 @@
#include <nvgpu/gr/gr.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/gr_falcon.h>
#include <nvgpu/gr/obj_ctx.h>
#include <nvgpu/engines.h>
#include <nvgpu/engine_status.h>
@@ -546,203 +547,6 @@ int gr_gp10b_init_ctx_state(struct gk20a *g)
return 0;
}
int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode,
u32 compute_preempt_mode)
{
int err = 0;
if (g->ops.gr.is_valid_gfx_class(g, class) &&
g->gr.ctx_vars.force_preemption_gfxp) {
graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
}
if (g->ops.gr.is_valid_compute_class(g, class) &&
g->gr.ctx_vars.force_preemption_cilp) {
compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
}
/* check for invalid combinations */
if (nvgpu_gr_ctx_check_valid_preemption_mode(gr_ctx,
graphics_preempt_mode, compute_preempt_mode) == false) {
return -EINVAL;
}
/* set preemption modes */
switch (graphics_preempt_mode) {
case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP:
{
u32 rtv_cb_size;
u32 spill_size = g->ops.gr.init.get_ctx_spill_size(g);
u32 pagepool_size = g->ops.gr.init.get_ctx_pagepool_size(g);
u32 betacb_size = g->ops.gr.init.get_ctx_betacb_size(g);
u32 attrib_cb_size =
g->ops.gr.init.get_ctx_attrib_cb_size(g, betacb_size,
nvgpu_gr_config_get_tpc_count(g->gr.config),
nvgpu_gr_config_get_max_tpc_count(g->gr.config));
nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size);
nvgpu_log_info(g, "gfxp context pagepool_size=%d", pagepool_size);
nvgpu_log_info(g, "gfxp context attrib_cb_size=%d",
attrib_cb_size);
nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc,
NVGPU_GR_CTX_PREEMPT_CTXSW,
g->gr.ctx_vars.preempt_image_size);
nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc,
NVGPU_GR_CTX_SPILL_CTXSW, spill_size);
nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc,
NVGPU_GR_CTX_BETACB_CTXSW, attrib_cb_size);
nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc,
NVGPU_GR_CTX_PAGEPOOL_CTXSW, pagepool_size);
if (g->ops.gr.init.get_gfxp_rtv_cb_size != NULL) {
rtv_cb_size = g->ops.gr.init.get_gfxp_rtv_cb_size(g);
nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc,
NVGPU_GR_CTX_GFXP_RTVCB_CTXSW, rtv_cb_size);
}
err = nvgpu_gr_ctx_alloc_ctxsw_buffers(g, gr_ctx,
g->gr.gr_ctx_desc, vm);
if (err != 0) {
nvgpu_err(g, "cannot allocate ctxsw buffers");
goto fail;
}
nvgpu_gr_ctx_init_graphics_preemption_mode(gr_ctx,
graphics_preempt_mode);
break;
}
case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
nvgpu_gr_ctx_init_graphics_preemption_mode(gr_ctx,
graphics_preempt_mode);
break;
default:
break;
}
if (g->ops.gr.is_valid_compute_class(g, class) ||
g->ops.gr.is_valid_gfx_class(g, class)) {
switch (compute_preempt_mode) {
case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
case NVGPU_PREEMPTION_MODE_COMPUTE_CILP:
nvgpu_gr_ctx_init_compute_preemption_mode(gr_ctx,
compute_preempt_mode);
break;
default:
break;
}
}
return 0;
fail:
return err;
}
int gr_gp10b_init_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
u32 class, u32 flags)
{
int err;
u32 graphics_preempt_mode = 0;
u32 compute_preempt_mode = 0;
nvgpu_log_fn(g, " ");
if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) != 0U) {
graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
}
if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) != 0U) {
compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
}
if ((graphics_preempt_mode != 0U) || (compute_preempt_mode != 0U)) {
if (g->ops.gr.set_ctxsw_preemption_mode != NULL) {
err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm,
class, graphics_preempt_mode, compute_preempt_mode);
if (err != 0) {
nvgpu_err(g, "set_ctxsw_preemption_mode failed");
return err;
}
} else {
return -EINVAL;
}
}
nvgpu_log_fn(g, "done");
return 0;
}
void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx)
{
int err;
nvgpu_log_fn(g, " ");
nvgpu_gr_ctx_set_preemption_modes(g, gr_ctx);
if (gr_ctx->preempt_ctxsw_buffer.gpu_va != 0ULL) {
u64 addr;
u32 size;
if (subctx != NULL) {
nvgpu_gr_subctx_set_preemption_buffer_va(g, subctx,
gr_ctx);
} else {
nvgpu_gr_ctx_set_preemption_buffer_va(g, gr_ctx);
}
err = nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true);
if (err != 0) {
nvgpu_err(g, "can't map patch context");
goto out;
}
addr = gr_ctx->betacb_ctxsw_buffer.gpu_va;
g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx,
nvgpu_gr_config_get_tpc_count(g->gr.config),
nvgpu_gr_config_get_max_tpc_count(g->gr.config), addr,
true);
addr = gr_ctx->pagepool_ctxsw_buffer.gpu_va;
nvgpu_assert(gr_ctx->pagepool_ctxsw_buffer.size <= U32_MAX);
size = (u32)gr_ctx->pagepool_ctxsw_buffer.size;
g->ops.gr.init.commit_global_pagepool(g, gr_ctx, addr, size,
true, false);
addr = gr_ctx->spill_ctxsw_buffer.gpu_va;
nvgpu_assert(gr_ctx->spill_ctxsw_buffer.size <= U32_MAX);
size = (u32)gr_ctx->spill_ctxsw_buffer.size;
g->ops.gr.init.commit_ctxsw_spill(g, gr_ctx, addr, size, true);
g->ops.gr.init.commit_cbes_reserve(g, gr_ctx, true);
if (g->ops.gr.init.gfxp_wfi_timeout != NULL) {
g->ops.gr.init.gfxp_wfi_timeout(g, gr_ctx,
g->gr.gfxp_wfi_timeout_count, true);
}
if (g->ops.gr.init.commit_gfxp_rtv_cb != NULL) {
g->ops.gr.init.commit_gfxp_rtv_cb(g, gr_ctx, true);
}
nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true);
}
out:
nvgpu_log_fn(g, "done");
}
int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
struct gk20a_debug_output *o)
{
@@ -1486,21 +1290,18 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
return 0;
}
if (g->ops.gr.set_ctxsw_preemption_mode != NULL) {
nvgpu_log(g, gpu_dbg_sched, "chid=%d tsgid=%d pid=%d "
"graphics_preempt=%d compute_preempt=%d",
ch->chid,
ch->tsgid,
ch->tgid,
graphics_preempt_mode,
compute_preempt_mode);
err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class,
graphics_preempt_mode, compute_preempt_mode);
if (err != 0) {
nvgpu_err(g, "set_ctxsw_preemption_mode failed");
return err;
}
nvgpu_log(g, gpu_dbg_sched, "chid=%d tsgid=%d pid=%d "
"graphics_preempt=%d compute_preempt=%d",
ch->chid,
ch->tsgid,
ch->tgid,
graphics_preempt_mode,
compute_preempt_mode);
err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, gr_ctx, vm, class,
graphics_preempt_mode, compute_preempt_mode);
if (err != 0) {
nvgpu_err(g, "set_ctxsw_preemption_mode failed");
return err;
}
err = gk20a_disable_channel_tsg(g, ch);
@@ -1513,19 +1314,16 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
goto enable_ch;
}
if (g->ops.gr.update_ctxsw_preemption_mode != NULL) {
g->ops.gr.update_ctxsw_preemption_mode(ch->g, gr_ctx,
ch->subctx);
nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(ch->g, gr_ctx, ch->subctx);
err = nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true);
if (err != 0) {
nvgpu_err(g, "can't map patch context");
goto enable_ch;
}
g->ops.gr.init.commit_global_cb_manager(g, g->gr.config, gr_ctx,
true);
nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true);
err = nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true);
if (err != 0) {
nvgpu_err(g, "can't map patch context");
goto enable_ch;
}
g->ops.gr.init.commit_global_cb_manager(g, g->gr.config, gr_ctx,
true);
nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true);
enable_ch:
gk20a_enable_channel_tsg(g, ch);

View File

@@ -77,16 +77,6 @@ int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data);
void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data);
int gr_gp10b_init_ctx_state(struct gk20a *g);
int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode,
u32 compute_preempt_mode);
int gr_gp10b_init_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
u32 class, u32 flags);
void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx);
int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
struct gk20a_debug_output *o);
void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);

View File

@@ -295,10 +295,6 @@ static const struct gpu_ops gp10b_ops = {
.detect_sm_arch = gr_gm20b_detect_sm_arch,
.init_ctx_state = gr_gp10b_init_ctx_state,
.free_gr_ctx = gr_gk20a_free_gr_ctx,
.init_ctxsw_preemption_mode =
gr_gp10b_init_ctxsw_preemption_mode,
.update_ctxsw_preemption_mode =
gr_gp10b_update_ctxsw_preemption_mode,
.dump_gr_regs = gr_gp10b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
@@ -342,7 +338,6 @@ static const struct gpu_ops gp10b_ops = {
.set_preemption_mode = gr_gp10b_set_preemption_mode,
.pre_process_sm_exception = gr_gp10b_pre_process_sm_exception,
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
.init_ecc = gp10b_ecc_init,
.init_gfxp_wfi_timeout_count =
gr_gp10b_init_gfxp_wfi_timeout_count,
@@ -1179,6 +1174,7 @@ int gp10b_init_hal(struct gk20a *g)
}
nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, false);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true);
nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
g->pmu_lsf_pmu_wpr_init_done = false;

View File

@@ -404,10 +404,6 @@ static const struct gpu_ops gv100_ops = {
.detect_sm_arch = gr_gv11b_detect_sm_arch,
.init_ctx_state = gr_gp10b_init_ctx_state,
.free_gr_ctx = gr_gk20a_free_gr_ctx,
.init_ctxsw_preemption_mode =
gr_gp10b_init_ctxsw_preemption_mode,
.update_ctxsw_preemption_mode =
gr_gp10b_update_ctxsw_preemption_mode,
.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
@@ -455,7 +451,6 @@ static const struct gpu_ops gv100_ops = {
.pre_process_sm_exception = gr_gv11b_pre_process_sm_exception,
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,
.get_egpc_base = gv11b_gr_get_egpc_base,
@@ -1454,6 +1449,7 @@ int gv100_init_hal(struct gk20a *g)
nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_RTOS_FBQ, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_VM, true);
/*

View File

@@ -356,10 +356,6 @@ static const struct gpu_ops gv11b_ops = {
.init_ctx_state = gr_gp10b_init_ctx_state,
.free_gr_ctx = gr_gk20a_free_gr_ctx,
.powergate_tpc = gr_gv11b_powergate_tpc,
.init_ctxsw_preemption_mode =
gr_gp10b_init_ctxsw_preemption_mode,
.update_ctxsw_preemption_mode =
gr_gp10b_update_ctxsw_preemption_mode,
.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
@@ -408,7 +404,6 @@ static const struct gpu_ops gv11b_ops = {
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
.init_ecc = gv11b_ecc_init,
.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,
.get_egpc_base = gv11b_gr_get_egpc_base,
@@ -1332,6 +1327,7 @@ int gv11b_init_hal(struct gk20a *g)
nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);
nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true);
/*

View File

@@ -191,10 +191,13 @@ struct gk20a;
/* GSP VM support */
#define NVGPU_SUPPORT_GSP_VM 74
/* GFXP preemption support */
#define NVGPU_SUPPORT_PREEMPTION_GFXP 75
/*
* Must be greater than the largest bit offset in the above list.
*/
#define NVGPU_MAX_ENABLED_BITS 75U
#define NVGPU_MAX_ENABLED_BITS 76U
/**
* nvgpu_is_enabled - Check if the passed flag is enabled.

View File

@@ -35,6 +35,12 @@ struct nvgpu_mem;
struct channel_gk20a;
struct nvgpu_gr_obj_ctx_golden_image;
int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class,
u32 graphics_preempt_mode, u32 compute_preempt_mode);
void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx);
int nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, bool patch);

View File

@@ -425,10 +425,6 @@ static const struct gpu_ops tu104_ops = {
.detect_sm_arch = gr_gv11b_detect_sm_arch,
.init_ctx_state = gr_gp10b_init_ctx_state,
.free_gr_ctx = gr_gk20a_free_gr_ctx,
.init_ctxsw_preemption_mode =
gr_gp10b_init_ctxsw_preemption_mode,
.update_ctxsw_preemption_mode =
gr_gp10b_update_ctxsw_preemption_mode,
.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
@@ -477,7 +473,6 @@ static const struct gpu_ops tu104_ops = {
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
.init_ecc = tu104_ecc_init,
.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
.egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,
.get_egpc_base = gv11b_gr_get_egpc_base,
@@ -1499,6 +1494,7 @@ int tu104_init_hal(struct gk20a *g)
nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_RTOS, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_RTOS_FBQ, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_VM, true);
nvgpu_set_enabled(g, NVGPU_SUPPORT_GSP_VM, true);