diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 4ea56d8ef..c059e464f 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -13,6 +13,7 @@ nvgpu-y += \ $(nvgpu-t19x)/gv11b/ce_gv11b.o \ $(nvgpu-t19x)/gv11b/gr_ctx_gv11b.o \ $(nvgpu-t19x)/gv11b/pmu_gv11b.o \ - $(nvgpu-t19x)/gv11b/therm_gv11b.o + $(nvgpu-t19x)/gv11b/therm_gv11b.o \ + $(nvgpu-t19x)/gv11b/subctx_gv11b.o nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t19x)/gv11b/platform_gv11b_tegra.o diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index bc413a9ee..b9276e094 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c @@ -19,6 +19,8 @@ #include "gp10b/fifo_gp10b.h" #include "hw_pbdma_gv11b.h" #include "fifo_gv11b.h" +#include "subctx_gv11b.h" +#include "gr_gv11b.h" #include "hw_fifo_gv11b.h" #include "hw_ram_gv11b.h" #include "hw_ccsr_gv11b.h" @@ -200,6 +202,15 @@ static void gv11b_userd_gp_put(struct gk20a *g, struct channel_gk20a *c) } +static void channel_gv11b_unbind(struct channel_gk20a *ch) +{ + gk20a_dbg_fn(""); + + gv11b_free_subctx_header(ch); + + channel_gk20a_unbind(ch); + +} static u32 gv11b_fifo_get_num_fifos(struct gk20a *g) { @@ -218,4 +229,5 @@ void gv11b_init_fifo(struct gpu_ops *gops) gops->fifo.userd_gp_get = gv11b_userd_gp_get; gops->fifo.userd_gp_put = gv11b_userd_gp_put; gops->fifo.setup_ramfc = channel_gv11b_setup_ramfc; + gops->fifo.unbind_channel = channel_gv11b_unbind; } diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 7f5b8d3f1..bdb96329d 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -13,6 +13,7 @@ * more details. */ +#include #include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */ #include #include @@ -24,12 +25,16 @@ #include "gm20b/gr_gm20b.h" #include "gv11b/gr_gv11b.h" +#include "gv11b/mm_gv11b.h" +#include "gv11b/subctx_gv11b.h" #include "hw_gr_gv11b.h" #include "hw_fifo_gv11b.h" #include "hw_proj_gv11b.h" #include "hw_ctxsw_prog_gv11b.h" #include "hw_mc_gv11b.h" #include "hw_gr_gv11b.h" +#include "hw_ram_gv11b.h" +#include "hw_pbdma_gv11b.h" #include #include @@ -1583,7 +1588,6 @@ static int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) return 0; } - static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) { struct av_list_gk20a *sw_veid_bundle_init = @@ -1766,12 +1770,43 @@ static int gr_gv11b_load_smid_config(struct gk20a *g) for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); - kfree(tpc_sm_id); return 0; } +static int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va) +{ + u32 addr_lo; + u32 addr_hi; + struct ctx_header_desc *ctx; + + gk20a_dbg_fn(""); + + gv11b_alloc_subctx_header(c); + + gv11b_update_subctx_header(c, gpu_va); + + ctx = &c->ch_ctx.ctx_header; + addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v(); + addr_hi = u64_hi32(ctx->mem.gpu_va); + + /* point this address to engine_wfi_ptr */ + gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_target_w(), + ram_in_engine_cs_wfi_v() | + ram_in_engine_wfi_target_f( + ram_in_engine_wfi_target_sys_mem_ncoh_v()) | + ram_in_engine_wfi_mode_f(ram_in_engine_wfi_mode_virtual_v()) | + ram_in_engine_wfi_ptr_lo_f(addr_lo)); + + gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_ptr_hi_w(), + ram_in_engine_wfi_ptr_hi_f(addr_hi)); + + return 0; +} + + + static int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, bool patch) { @@ -1828,6 +1863,7 @@ static int gr_gv11b_commit_global_timeslice(struct gk20a *g, void gv11b_init_gr(struct gpu_ops *gops) { gp10b_init_gr(gops); + gops->gr.init_preemption_state = NULL; gops->gr.init_fs_state = gr_gv11b_init_fs_state; gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch; gops->gr.is_valid_class = gr_gv11b_is_valid_class; @@ -1872,4 +1908,6 @@ void gv11b_init_gr(struct gpu_ops *gops) gops->gr.load_smid_config = gr_gv11b_load_smid_config; gops->gr.program_sm_id_numbering = gr_gv11b_program_sm_id_numbering; + gops->gr.commit_inst = gr_gv11b_commit_inst; + } diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c new file mode 100644 index 000000000..3acc53f6e --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c @@ -0,0 +1,147 @@ +/* + * Volta GPU series Subcontext + * + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. + */ + +#include "gk20a/gk20a.h" +#include "gk20a/semaphore_gk20a.h" +#include "gv11b/subctx_gv11b.h" +#include "gv11b/hw_ram_gv11b.h" +#include "gv11b/hw_ctxsw_prog_gv11b.h" + +static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, + struct mem_desc *inst_block); + +void gv11b_free_subctx_header(struct channel_gk20a *c) +{ + struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; + struct gk20a *g = c->g; + + gk20a_dbg_fn(""); + + if (ctx->mem.gpu_va) { + gk20a_gmmu_unmap(c->vm, ctx->mem.gpu_va, + ctx->mem.size, gk20a_mem_flag_none); + + gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &ctx->mem); + } +} + +int gv11b_alloc_subctx_header(struct channel_gk20a *c) +{ + struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; + struct gk20a *g = c->g; + int ret = 0; + + gk20a_dbg_fn(""); + + if (ctx->mem.gpu_va == 0) { + ret = gk20a_gmmu_alloc_attr_sys(g, + DMA_ATTR_NO_KERNEL_MAPPING, + ctxsw_prog_fecs_header_v(), + &ctx->mem); + if (ret) { + gk20a_err(dev_from_gk20a(g), + "failed to allocate sub ctx header"); + return ret; + } + ctx->mem.gpu_va = gk20a_gmmu_map(c->vm, + &ctx->mem.sgt, + ctx->mem.size, + NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, + gk20a_mem_flag_none, true, + ctx->mem.aperture); + if (!ctx->mem.gpu_va) { + gk20a_err(dev_from_gk20a(g), + "failed to map ctx header"); + gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, + &ctx->mem); + return -ENOMEM; + } + /* Now clear the buffer */ + if (gk20a_mem_begin(g, &ctx->mem)) + return -ENOMEM; + + gk20a_memset(g, &ctx->mem, 0, 0, ctx->mem.size); + gk20a_mem_end(g, &ctx->mem); + + gv11b_init_subcontext_pdb(c, &c->inst_block); + + } + return ret; +} + +static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, + struct mem_desc *inst_block) +{ + struct gk20a *g = c->g; + struct vm_gk20a *vm; + u64 pdb_addr, pdb_addr_lo, pdb_addr_hi; + u32 format_word; + u32 lo, hi; + + gk20a_dbg_fn(""); + /* load main pdb as veid0 pdb also */ + vm = c->vm; + pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0); + pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); + pdb_addr_hi = u64_hi32(pdb_addr); + format_word = ram_in_sc_page_dir_base_target_f( + ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) | + ram_in_sc_page_dir_base_vol_f( + ram_in_sc_page_dir_base_vol_true_v(), 0) | + ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) | + ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) | + ram_in_sc_use_ver2_pt_format_f(1, 0) | + ram_in_sc_big_page_size_f(1, 0) | + ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo); + lo = ram_in_sc_page_dir_base_vol_0_w(); + hi = ram_in_sc_page_dir_base_hi_0_w(); + gk20a_mem_wr32(g, inst_block, lo, format_word); + gk20a_mem_wr32(g, inst_block, hi, pdb_addr_hi); + + /* make subcontext0 address space to valid */ + /* TODO fix proper hw register definations */ + gk20a_mem_wr32(g, inst_block, 166, 0x1); + gk20a_mem_wr32(g, inst_block, 167, 0); + gk20a_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(), + ram_in_engine_wfi_veid_f(0)); + +} + +int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va) +{ + struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; + struct mem_desc *gr_mem; + struct gk20a *g = c->g; + int ret = 0; + u32 addr_lo, addr_hi; + + addr_lo = u64_lo32(gpu_va); + addr_hi = u64_hi32(gpu_va); + + gr_mem = &ctx->mem; + g->ops.mm.l2_flush(g, true); + if (gk20a_mem_begin(g, gr_mem)) + return -ENOMEM; + + gk20a_mem_wr(g, gr_mem, + ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi); + gk20a_mem_wr(g, gr_mem, + ctxsw_prog_main_image_context_buffer_ptr_o(), addr_lo); + gk20a_mem_end(g, gr_mem); + return ret; +} diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h new file mode 100644 index 000000000..357cd2540 --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h @@ -0,0 +1,27 @@ +/* + * + * Volta GPU series Subcontext + * + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. + */ +#ifndef __SUBCONTEXT_GV11B_H__ +#define __SUBCONTEXT_GV11B_H__ + +int gv11b_alloc_subctx_header(struct channel_gk20a *c); + +void gv11b_free_subctx_header(struct channel_gk20a *c); + +int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va); +#endif /* __SUBCONTEXT_GV11B_H__ */