diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 98fe3301e..5f4fb6791 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -81,7 +81,7 @@ enum /* global_ctx_buffer */ { ATTRIBUTE_VPR = 5, GOLDEN_CTX = 6, PRIV_ACCESS_MAP = 7, - /* #8 is reserved */ + RTV_CIRCULAR_BUFFER = 8, FECS_TRACE_BUFFER = 9, NR_GLOBAL_CTX_BUF = 10 }; @@ -93,7 +93,7 @@ enum /*global_ctx_buffer_va */ { ATTRIBUTE_VA = 2, GOLDEN_CTX_VA = 3, PRIV_ACCESS_MAP_VA = 4, - /* #5 is reserved */ + RTV_CIRCULAR_BUFFER_VA = 5, FECS_TRACE_BUFFER_VA = 6, NR_GLOBAL_CTX_BUF_VA = 7 }; @@ -403,6 +403,7 @@ struct nvgpu_gr_ctx { struct nvgpu_mem spill_ctxsw_buffer; struct nvgpu_mem betacb_ctxsw_buffer; struct nvgpu_mem pagepool_ctxsw_buffer; + struct nvgpu_mem gfxp_rtvcb_ctxsw_buffer; u32 ctx_id; bool ctx_id_valid; bool cilp_preempt_pending; diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 9fb2ab46d..3ff567165 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -333,6 +333,8 @@ static const struct gpu_ops gm20b_ops = { .dump_gr_falcon_stats = gk20a_fecs_dump_falcon_stats, .get_fecs_ctx_state_store_major_rev_id = gk20a_gr_get_fecs_ctx_state_store_major_rev_id, + .alloc_gfxp_rtv_cb = NULL, + .commit_gfxp_rtv_cb = NULL, }, .fb = { .init_hw = gm20b_fb_init_hw, diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index c414f0fdb..412649f08 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -410,6 +410,8 @@ static const struct gpu_ops gp106_ops = { .dump_gr_falcon_stats = gk20a_fecs_dump_falcon_stats, .get_fecs_ctx_state_store_major_rev_id = gk20a_gr_get_fecs_ctx_state_store_major_rev_id, + .alloc_gfxp_rtv_cb = NULL, + .commit_gfxp_rtv_cb = NULL, }, .fb = { .init_hw = gm20b_fb_init_hw, diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index fb6513d43..ffefb32b7 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -1036,6 +1036,14 @@ int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, goto fail_free_betacb; } + if (g->ops.gr.alloc_gfxp_rtv_cb != NULL) { + err = g->ops.gr.alloc_gfxp_rtv_cb(g, gr_ctx, vm); + if (err != 0) { + nvgpu_err(g, "cannot allocate gfxp rtv_cb"); + goto fail_free_pagepool; + } + } + gr_ctx->graphics_preempt_mode = graphics_preempt_mode; break; } @@ -1063,6 +1071,8 @@ int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, return 0; +fail_free_pagepool: + nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); fail_free_betacb: nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); fail_free_spill: diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index f2e0ed5d4..3f0ad1659 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -371,6 +371,8 @@ static const struct gpu_ops gp10b_ops = { .dump_gr_falcon_stats = gk20a_fecs_dump_falcon_stats, .get_fecs_ctx_state_store_major_rev_id = gk20a_gr_get_fecs_ctx_state_store_major_rev_id, + .alloc_gfxp_rtv_cb = NULL, + .commit_gfxp_rtv_cb = NULL, }, .fb = { .init_hw = gm20b_fb_init_hw, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 60da8dd6b..52ab46328 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -491,6 +491,8 @@ static const struct gpu_ops gv100_ops = { .dump_gr_falcon_stats = gk20a_fecs_dump_falcon_stats, .get_fecs_ctx_state_store_major_rev_id = gk20a_gr_get_fecs_ctx_state_store_major_rev_id, + .alloc_gfxp_rtv_cb = NULL, + .commit_gfxp_rtv_cb = NULL, }, .fb = { .init_hw = gv11b_fb_init_hw, diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index a0b373867..f6cc2117b 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1771,6 +1771,10 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, g->gr.gfxp_wfi_timeout_count, true); + if (g->ops.gr.commit_gfxp_rtv_cb != NULL) { + g->ops.gr.commit_gfxp_rtv_cb(g, gr_ctx, true); + } + gr_gk20a_ctx_patch_write_end(g, gr_ctx, true); } diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 2e5e4f9a7..8c80f59f2 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -453,6 +453,8 @@ static const struct gpu_ops gv11b_ops = { .dump_gr_falcon_stats = gk20a_fecs_dump_falcon_stats, .get_fecs_ctx_state_store_major_rev_id = gk20a_gr_get_fecs_ctx_state_store_major_rev_id, + .alloc_gfxp_rtv_cb = NULL, + .commit_gfxp_rtv_cb = NULL, }, .fb = { .init_hw = gv11b_fb_init_hw, diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 1c2651d97..c9d0839d4 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -535,6 +535,10 @@ struct gpu_ops { void (*set_debug_mode)(struct gk20a *g, bool enable); void (*dump_gr_falcon_stats)(struct gk20a *g); u32 (*get_fecs_ctx_state_store_major_rev_id)(struct gk20a *g); + int (*alloc_gfxp_rtv_cb)(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm); + void (*commit_gfxp_rtv_cb)(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, bool patch); } gr; struct { void (*init_hw)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_gr_tu104.h b/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_gr_tu104.h index 9fb9b7b93..83fa99889 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_gr_tu104.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_gr_tu104.h @@ -2302,6 +2302,14 @@ static inline u32 gr_scc_rm_rtv_cb_size_div_256b_f(u32 v) { return (v & 0x7fffU) << 0U; } +static inline u32 gr_scc_rm_rtv_cb_size_div_256b_byte_granularity_v(void) +{ + return 0x00000100U; +} +static inline u32 gr_scc_rm_rtv_cb_size_div_256b_init_f(void) +{ + return 0x0U; +} static inline u32 gr_scc_rm_rtv_cb_size_div_256b_default_f(void) { return 0x800U; @@ -2310,6 +2318,10 @@ static inline u32 gr_scc_rm_rtv_cb_size_div_256b_db_adder_f(void) { return 0x0U; } +static inline u32 gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f(void) +{ + return 0x20U; +} static inline u32 gr_gpcs_gcc_rm_rtv_cb_base_r(void) { return 0x00419034U; diff --git a/drivers/gpu/nvgpu/tu104/gr_tu104.c b/drivers/gpu/nvgpu/tu104/gr_tu104.c index ca2adafcc..59354d00f 100644 --- a/drivers/gpu/nvgpu/tu104/gr_tu104.c +++ b/drivers/gpu/nvgpu/tu104/gr_tu104.c @@ -200,7 +200,7 @@ clean_up: static void gr_tu104_commit_rtv_circular_buffer(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, - u64 addr, u32 size, bool patch) + u64 addr, u32 size, u32 gfxpAddSize, bool patch) { gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_base_r(), gr_scc_rm_rtv_cb_base_addr_39_8_f(addr), patch); @@ -209,7 +209,8 @@ static void gr_tu104_commit_rtv_circular_buffer(struct gk20a *g, gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_rm_rtv_cb_base_r(), gr_gpcs_gcc_rm_rtv_cb_base_addr_39_8_f(addr), patch); gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_gfxp_reserve_r(), - gr_scc_rm_gfxp_reserve_rtv_cb_size_div_256b_f(0), patch); + gr_scc_rm_gfxp_reserve_rtv_cb_size_div_256b_f(gfxpAddSize), + patch); } int gr_tu104_commit_global_ctx_buffers(struct gk20a *g, @@ -218,6 +219,9 @@ int gr_tu104_commit_global_ctx_buffers(struct gk20a *g, int err; u64 addr; u32 size; + u32 gfxpaddsize = 0; + + nvgpu_log_fn(g, " "); err = gr_gk20a_commit_global_ctx_buffers(g, gr_ctx, patch); if (err != 0) { @@ -239,7 +243,8 @@ int gr_tu104_commit_global_ctx_buffers(struct gk20a *g, size = (gr_scc_rm_rtv_cb_size_div_256b_default_f() + gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()); - gr_tu104_commit_rtv_circular_buffer(g, gr_ctx, addr, size, patch); + gr_tu104_commit_rtv_circular_buffer(g, gr_ctx, addr, size, + gfxpaddsize, patch); if (patch) { gr_gk20a_ctx_patch_write_end(g, gr_ctx, false); @@ -248,6 +253,55 @@ int gr_tu104_commit_global_ctx_buffers(struct gk20a *g, return 0; } +int gr_tu104_alloc_gfxp_rtv_cb(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm) +{ + int err; + u32 rtv_cb_size; + + nvgpu_log_fn(g, " "); + + rtv_cb_size = + (gr_scc_rm_rtv_cb_size_div_256b_default_f() + + gr_scc_rm_rtv_cb_size_div_256b_db_adder_f() + + gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f()) * + gr_scc_rm_rtv_cb_size_div_256b_byte_granularity_v(); + + err = gr_gp10b_alloc_buffer(vm, + rtv_cb_size, + &gr_ctx->gfxp_rtvcb_ctxsw_buffer); + + return err; +} + +void gr_tu104_commit_gfxp_rtv_cb(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, bool patch) +{ + u64 addr; + u32 rtv_cb_size; + u32 gfxp_addr_size; + + nvgpu_log_fn(g, " "); + + rtv_cb_size = + (gr_scc_rm_rtv_cb_size_div_256b_default_f() + + gr_scc_rm_rtv_cb_size_div_256b_db_adder_f() + + gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f()); + gfxp_addr_size = gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f(); + + /* GFXP RTV circular buffer */ + addr = (u64)(u64_lo32(gr_ctx->gfxp_rtvcb_ctxsw_buffer.gpu_va) >> + gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f()) | + (u64)(u64_hi32(gr_ctx->gfxp_rtvcb_ctxsw_buffer.gpu_va) << + (32U - gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f())); + + + gr_tu104_commit_rtv_circular_buffer(g, gr_ctx, addr, + rtv_cb_size, + gfxp_addr_size, + patch); +} + void gr_tu104_bundle_cb_defaults(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; @@ -270,6 +324,10 @@ void gr_tu104_cb_size_default(struct gk20a *g) } gr->alpha_cb_default_size = gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); + gr->attrib_cb_gfxp_default_size = + gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); + gr->attrib_cb_gfxp_size = + gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); } int gr_tu104_get_preemption_mode_flags(struct gk20a *g, @@ -290,6 +348,18 @@ int gr_tu104_get_preemption_mode_flags(struct gk20a *g, return 0; } +void gr_tu104_free_gr_ctx(struct gk20a *g, + struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx) +{ + nvgpu_log_fn(g, " "); + + if (gr_ctx != NULL) { + nvgpu_dma_unmap_free(vm, &gr_ctx->gfxp_rtvcb_ctxsw_buffer); + } + + gr_gk20a_free_gr_ctx(g, vm, gr_ctx); +} + void gr_tu104_enable_gpc_exceptions(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; diff --git a/drivers/gpu/nvgpu/tu104/gr_tu104.h b/drivers/gpu/nvgpu/tu104/gr_tu104.h index 39c0f0db2..5ac378b56 100644 --- a/drivers/gpu/nvgpu/tu104/gr_tu104.h +++ b/drivers/gpu/nvgpu/tu104/gr_tu104.h @@ -27,6 +27,7 @@ struct gk20a; struct nvgpu_preemption_modes_rec; +struct nvgpu_gr_ctx; enum { TURING_CHANNEL_GPFIFO_A = 0xC46F, @@ -55,10 +56,6 @@ enum { #define NVC597_SET_SM_DISP_CTRL 0x10c8U #define NVC597_SET_SHADER_CUT_COLLECTOR 0x10d0U -/* TODO: merge these into global context buffer list in gr_gk20a.h */ -#define RTV_CIRCULAR_BUFFER 8 -#define RTV_CIRCULAR_BUFFER_VA 5 - bool gr_tu104_is_valid_class(struct gk20a *g, u32 class_num); bool gr_tu104_is_valid_gfx_class(struct gk20a *g, u32 class_num); bool gr_tu104_is_valid_compute_class(struct gk20a *g, u32 class_num); @@ -88,6 +85,15 @@ int gr_tu104_get_offset_in_gpccs_segment(struct gk20a *g, int gr_tu104_handle_sw_method(struct gk20a *g, u32 addr, u32 class_num, u32 offset, u32 data); +int gr_tu104_alloc_gfxp_rtv_cb(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm); +void gr_tu104_commit_gfxp_rtv_cb(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, bool patch); + +void gr_tu104_free_gr_ctx(struct gk20a *g, + struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx); + void gr_tu104_init_sm_dsm_reg_info(void); void gr_tu104_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, u32 *num_sm_dsm_perf_ctrl_regs, u32 **sm_dsm_perf_ctrl_regs, diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 95c403032..a06a92c6d 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -387,9 +387,9 @@ static const struct gpu_ops tu104_ops = { .pagepool_default_size = gr_gv11b_pagepool_default_size, .init_ctx_state = gr_gp10b_init_ctx_state, .alloc_gr_ctx = gr_gp10b_alloc_gr_ctx, - .free_gr_ctx = gr_gk20a_free_gr_ctx, + .free_gr_ctx = gr_tu104_free_gr_ctx, .update_ctxsw_preemption_mode = - gr_gp10b_update_ctxsw_preemption_mode, + gr_gv11b_update_ctxsw_preemption_mode, .dump_gr_regs = gr_gv11b_dump_gr_status_regs, .update_pc_sampling = gr_gm20b_update_pc_sampling, .get_fbp_en_mask = gr_gm20b_get_fbp_en_mask, @@ -458,11 +458,13 @@ static const struct gpu_ops tu104_ops = { .set_czf_bypass = NULL, .pre_process_sm_exception = gr_gv11b_pre_process_sm_exception, .set_preemption_buffer_va = gr_gv11b_set_preemption_buffer_va, - .init_preemption_state = NULL, + .init_preemption_state = gr_gv11b_init_preemption_state, .update_boosted_ctx = gr_gp10b_update_boosted_ctx, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, .init_ecc = tu104_ecc_init, + .alloc_gfxp_rtv_cb = gr_tu104_alloc_gfxp_rtv_cb, + .commit_gfxp_rtv_cb = gr_tu104_commit_gfxp_rtv_cb, .set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode, .is_etpc_addr = gv11b_gr_pri_is_etpc_addr, .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table, @@ -485,6 +487,12 @@ static const struct gpu_ops tu104_ops = { .handle_tpc_sm_ecc_exception = gr_gv11b_handle_tpc_sm_ecc_exception, .decode_egpc_addr = gv11b_gr_decode_egpc_addr, + .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data, + .init_gfxp_wfi_timeout_count = + gr_gv11b_init_gfxp_wfi_timeout_count, + .get_max_gfxp_wfi_timeout_count = + gr_gv11b_get_max_gfxp_wfi_timeout_count, + .dump_ctxsw_stats = gr_gp10b_dump_ctxsw_stats, .fecs_host_int_enable = gr_gv11b_fecs_host_int_enable, .handle_ssync_hww = gr_gv11b_handle_ssync_hww, .handle_notify_pending = gk20a_gr_handle_notify_pending,