/* * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include "hal/fifo/ramfc_ga10b.h" #include int ga10b_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base, u32 gpfifo_entries, u64 pbdma_acquire_timeout, u32 flags) { struct gk20a *g = ch->g; struct nvgpu_mem *mem = &ch->inst_block; u32 data; u32 engine_id = 0U; u32 eng_intr_mask = 0U; u32 eng_intr_vector = 0U; u32 eng_bitmask = 0U; bool replayable = false; (void)flags; nvgpu_log_fn(g, " "); /* * ga10b can have max 3 engines on a runlist and only * runlist 0 has more than 1 engine(gr0, grcopy0 and grcopy1). * Since grcopy0 and grcopy1 can't schedule work directly, it * is always safe to assume that first active engine on runlist * will trigger pbdma intr notify. * TODO: Add helper function to get active engine mask for * runlist - NVGPU-5219 */ eng_bitmask = ch->runlist->eng_bitmask; engine_id = nvgpu_safe_sub_u32( nvgpu_safe_cast_u64_to_u32(nvgpu_ffs(eng_bitmask)), 1U); nvgpu_memset(g, mem, 0U, 0U, ram_fc_size_val_v()); #ifdef CONFIG_NVGPU_REPLAYABLE_FAULT if ((flags & NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE) != 0U) { replayable = true; } #endif nvgpu_log_info(g, "%llu %u", pbdma_acquire_timeout, g->ops.pbdma.acquire_val(pbdma_acquire_timeout)); g->ops.ramin.init_subctx_pdb(g, mem, ch->vm->pdb.mem, replayable, nvgpu_channel_get_max_subctx_count(ch)); nvgpu_mem_wr32(g, mem, ram_fc_gp_base_w(), g->ops.pbdma.get_gp_base(gpfifo_base)); nvgpu_mem_wr32(g, mem, ram_fc_gp_base_hi_w(), g->ops.pbdma.get_gp_base_hi(gpfifo_base, gpfifo_entries)); nvgpu_mem_wr32(g, mem, ram_fc_signature_w(), ch->g->ops.pbdma.get_signature(ch->g)); nvgpu_mem_wr32(g, mem, ram_fc_pb_header_w(), g->ops.pbdma.get_fc_pb_header()); nvgpu_mem_wr32(g, mem, ram_fc_subdevice_w(), g->ops.pbdma.get_fc_subdevice()); nvgpu_mem_wr32(g, mem, ram_fc_target_w(), g->ops.pbdma.get_fc_target( nvgpu_engine_get_active_eng_info(g, engine_id))); nvgpu_mem_wr32(g, mem, ram_fc_acquire_w(), g->ops.pbdma.acquire_val(pbdma_acquire_timeout)); data = nvgpu_mem_rd32(g, mem, ram_fc_set_channel_info_w()); data = data | (g->ops.pbdma.set_channel_info_veid(ch->subctx_id) | g->ops.pbdma.set_channel_info_chid(ch->chid)); nvgpu_mem_wr32(g, mem, ram_fc_set_channel_info_w(), data); nvgpu_mem_wr32(g, mem, ram_in_engine_wfi_veid_w(), ram_in_engine_wfi_veid_f(ch->subctx_id)); /* get engine interrupt vector */ eng_intr_mask = nvgpu_engine_act_interrupt_mask(g, engine_id); eng_intr_vector = nvgpu_safe_sub_u32( nvgpu_safe_cast_u64_to_u32(nvgpu_ffs(eng_intr_mask)), 1U); /* * engine_intr_vector can be value between 0 and 255. * For example, engine_intr_vector x translates to subtree x/64, * leaf (x % 64)/32 and leaf entry interrupt bit(x % 64)%32. * ga10b engine_intr_vectors are 0,1,2,3,4,5. They map to * subtree_0 and leaf_0(Engine non-stall interrupts) interrupt * bits. */ data = g->ops.pbdma.set_intr_notify(eng_intr_vector); nvgpu_mem_wr32(g, mem, ram_fc_intr_notify_w(), data); if (ch->is_privileged_channel) { /* Set privilege level for channel */ nvgpu_mem_wr32(g, mem, ram_fc_config_w(), g->ops.pbdma.get_config_auth_level_privileged()); /* Enable HCE priv mode for phys mode transfer */ nvgpu_mem_wr32(g, mem, ram_fc_hce_ctrl_w(), g->ops.pbdma.get_ctrl_hce_priv_mode_yes()); } /* Enable userd writeback */ data = nvgpu_mem_rd32(g, mem, ram_fc_config_w()); data = g->ops.pbdma.config_userd_writeback_enable(data); nvgpu_mem_wr32(g, mem, ram_fc_config_w(), data); return 0; } void ga10b_ramfc_capture_ram_dump(struct gk20a *g, struct nvgpu_channel *ch, struct nvgpu_channel_dump_info *info) { struct nvgpu_mem *mem = &ch->inst_block; info->inst.pb_top_level_get = nvgpu_mem_rd32_pair(g, mem, ram_fc_pb_top_level_get_w(), ram_fc_pb_top_level_get_hi_w()); info->inst.pb_put = nvgpu_mem_rd32_pair(g, mem, ram_fc_pb_put_w(), ram_fc_pb_put_hi_w()); info->inst.pb_get = nvgpu_mem_rd32_pair(g, mem, ram_fc_pb_get_w(), ram_fc_pb_get_hi_w()); info->inst.pb_header = nvgpu_mem_rd32(g, mem, ram_fc_pb_header_w()); info->inst.pb_count = nvgpu_mem_rd32(g, mem, ram_fc_pb_count_w()); info->inst.sem_addr = nvgpu_mem_rd32_pair(g, mem, ram_fc_sem_addr_lo_w(), ram_fc_sem_addr_hi_w()); info->inst.sem_payload = nvgpu_mem_rd32_pair(g, mem, ram_fc_sem_payload_lo_w(), ram_fc_sem_payload_hi_w()); info->inst.sem_execute = nvgpu_mem_rd32(g, mem, ram_fc_sem_execute_w()); }