/* * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * We might need two extra gpfifo entries per submit - one for pre fence and * one for post fence. */ #define EXTRA_GPFIFO_ENTRIES 2U static int nvgpu_submit_create_wait_cmd(struct nvgpu_channel *c, struct nvgpu_channel_fence *fence, struct priv_cmd_entry **wait_cmd, bool flag_sync_fence) { /* * A single input sync fd may contain multiple fences. The preallocated * priv cmdbuf space allows exactly one per submit in the worst case. * Require at most one wait for consistent deterministic submits; if * there are more and no space, we'll -EAGAIN in nondeterministic mode. */ u32 max_wait_cmds = nvgpu_channel_is_deterministic(c) ? 1U : 0U; int err; if (flag_sync_fence) { nvgpu_assert(fence->id <= (u32)INT_MAX); err = nvgpu_channel_sync_wait_fence_fd(c->sync, (int)fence->id, wait_cmd, max_wait_cmds); } else { struct nvgpu_channel_sync_syncpt *sync_syncpt; sync_syncpt = nvgpu_channel_sync_to_syncpt(c->sync); if (sync_syncpt != NULL) { err = nvgpu_channel_sync_wait_syncpt(sync_syncpt, fence->id, fence->value, wait_cmd); } else { err = -EINVAL; } } return err; } static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c, struct priv_cmd_entry **incr_cmd, struct nvgpu_fence_type **post_fence, bool flag_fence_get, bool need_wfi, bool need_sync_fence) { int err; *post_fence = nvgpu_fence_alloc(c); if (*post_fence == NULL) { return -ENOMEM; } if (flag_fence_get) { err = nvgpu_channel_sync_incr_user(c->sync, incr_cmd, *post_fence, need_wfi, need_sync_fence); } else { err = nvgpu_channel_sync_incr(c->sync, incr_cmd, *post_fence, need_sync_fence); } if (err != 0) { nvgpu_fence_put(*post_fence); *post_fence = NULL; } return err; } /* * Handle the submit synchronization - pre-fences and post-fences. */ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c, struct nvgpu_channel_fence *fence, struct nvgpu_channel_job *job, u32 flags) { struct gk20a *g = c->g; bool need_sync_fence; bool new_sync_created = false; int err = 0; bool need_wfi = (flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI) == 0U; bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U; bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U; bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U; if (g->aggressive_sync_destroy_thresh != 0U) { nvgpu_mutex_acquire(&c->sync_lock); if (c->sync == NULL) { c->sync = nvgpu_channel_sync_create(c); if (c->sync == NULL) { err = -ENOMEM; goto clean_up_unlock; } new_sync_created = true; } nvgpu_channel_sync_get_ref(c->sync); } if ((g->ops.channel.set_syncpt != NULL) && new_sync_created) { err = g->ops.channel.set_syncpt(c); if (err != 0) { goto clean_up_put_sync; } } /* * Optionally insert syncpt/semaphore wait in the beginning of gpfifo * submission when user requested. */ if (flag_fence_wait) { err = nvgpu_submit_create_wait_cmd(c, fence, &job->wait_cmd, flag_sync_fence); if (err != 0) { goto clean_up_put_sync; } } need_sync_fence = flag_fence_get && flag_sync_fence; /* * Always generate an increment at the end of a GPFIFO submission. When * we do job tracking, post fences are needed for various reasons even * if not requested by user. */ err = nvgpu_submit_create_incr_cmd(c, &job->incr_cmd, &job->post_fence, flag_fence_get, need_wfi, need_sync_fence); if (err != 0) { goto clean_up_wait_cmd; } if (g->aggressive_sync_destroy_thresh != 0U) { nvgpu_mutex_release(&c->sync_lock); } return 0; clean_up_wait_cmd: if (job->wait_cmd != NULL) { nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->wait_cmd); } job->wait_cmd = NULL; clean_up_put_sync: if (g->aggressive_sync_destroy_thresh != 0U) { if (nvgpu_channel_sync_put_ref_and_check(c->sync) && g->aggressive_sync_destroy) { nvgpu_channel_sync_destroy(c->sync); } } clean_up_unlock: if (g->aggressive_sync_destroy_thresh != 0U) { nvgpu_mutex_release(&c->sync_lock); } return err; } static void nvgpu_submit_append_priv_cmdbuf(struct nvgpu_channel *c, struct priv_cmd_entry *cmd) { struct gk20a *g = c->g; struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; struct nvgpu_gpfifo_entry gpfifo_entry; u64 gva; u32 size; nvgpu_priv_cmdbuf_finish(g, cmd, &gva, &size); g->ops.pbdma.format_gpfifo_entry(g, &gpfifo_entry, gva, size); nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * (u32)sizeof(gpfifo_entry), &gpfifo_entry, (u32)sizeof(gpfifo_entry)); c->gpfifo.put = (c->gpfifo.put + 1U) & (c->gpfifo.entry_num - 1U); } static int nvgpu_submit_append_gpfifo_user_direct(struct nvgpu_channel *c, struct nvgpu_gpfifo_userdata userdata, u32 num_entries) { struct gk20a *g = c->g; struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va; u32 gpfifo_size = c->gpfifo.entry_num; u32 len = num_entries; u32 start = c->gpfifo.put; u32 end = start + len; /* exclusive */ int err; nvgpu_speculation_barrier(); if (end > gpfifo_size) { /* wrap-around */ u32 length0 = gpfifo_size - start; u32 length1 = len - length0; err = g->os_channel.copy_user_gpfifo( &gpfifo_cpu[start], userdata, 0, length0); if (err != 0) { return err; } err = g->os_channel.copy_user_gpfifo( gpfifo_cpu, userdata, length0, length1); if (err != 0) { return err; } } else { err = g->os_channel.copy_user_gpfifo( &gpfifo_cpu[start], userdata, 0, len); if (err != 0) { return err; } } return 0; } static void nvgpu_submit_append_gpfifo_common(struct nvgpu_channel *c, struct nvgpu_gpfifo_entry *src, u32 num_entries) { struct gk20a *g = c->g; struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; /* in bytes */ u32 gpfifo_size = c->gpfifo.entry_num * (u32)sizeof(struct nvgpu_gpfifo_entry); u32 len = num_entries * (u32)sizeof(struct nvgpu_gpfifo_entry); u32 start = c->gpfifo.put * (u32)sizeof(struct nvgpu_gpfifo_entry); u32 end = start + len; /* exclusive */ if (end > gpfifo_size) { /* wrap-around */ u32 length0 = gpfifo_size - start; u32 length1 = len - length0; struct nvgpu_gpfifo_entry *src2 = &src[length0]; nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0); nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1); } else { nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len); } } /* * Copy source gpfifo entries into the gpfifo ring buffer, potentially * splitting into two memcpys to handle wrap-around. */ static int nvgpu_submit_append_gpfifo(struct nvgpu_channel *c, struct nvgpu_gpfifo_entry *kern_gpfifo, struct nvgpu_gpfifo_userdata userdata, u32 num_entries) { int err; if ((kern_gpfifo == NULL) #ifdef CONFIG_NVGPU_DGPU && (c->gpfifo.pipe == NULL) #endif ) { /* * This path (from userspace to sysmem) is special in order to * avoid two copies unnecessarily (from user to pipe, then from * pipe to gpu sysmem buffer). */ err = nvgpu_submit_append_gpfifo_user_direct(c, userdata, num_entries); if (err != 0) { return err; } } #ifdef CONFIG_NVGPU_DGPU else if (kern_gpfifo == NULL) { /* from userspace to vidmem, use the common path */ err = c->g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, userdata, 0, num_entries); if (err != 0) { return err; } nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe, num_entries); } #endif else { /* from kernel to either sysmem or vidmem, don't need * copy_user_gpfifo so use the common path */ nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries); } trace_write_pushbuffers(c, num_entries); c->gpfifo.put = (c->gpfifo.put + num_entries) & (c->gpfifo.entry_num - 1U); return 0; } static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c, struct nvgpu_gpfifo_entry *gpfifo, struct nvgpu_gpfifo_userdata userdata, u32 num_entries, u32 flags, struct nvgpu_channel_fence *fence, struct nvgpu_fence_type **fence_out, struct nvgpu_profile *profile, bool need_deferred_cleanup) { bool skip_buffer_refcounting = (flags & NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U; struct nvgpu_channel_job *job = NULL; int err; err = nvgpu_channel_alloc_job(c, &job); if (err != 0) { return err; } err = nvgpu_submit_prepare_syncs(c, fence, job, flags); if (err != 0) { goto clean_up_job; } nvgpu_profile_snapshot(profile, PROFILE_JOB_TRACKING); /* * wait_cmd can be unset even if flag_fence_wait exists; the * android sync framework for example can provide entirely * empty fences that act like trivially expired waits. */ if (job->wait_cmd != NULL) { nvgpu_submit_append_priv_cmdbuf(c, job->wait_cmd); } err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata, num_entries); if (err != 0) { goto clean_up_gpfifo_wait; } nvgpu_submit_append_priv_cmdbuf(c, job->incr_cmd); err = nvgpu_channel_add_job(c, job, skip_buffer_refcounting); if (err != 0) { goto clean_up_gpfifo_incr; } nvgpu_channel_sync_mark_progress(c->sync, need_deferred_cleanup); if (fence_out != NULL) { *fence_out = nvgpu_fence_get(job->post_fence); } return 0; clean_up_gpfifo_incr: /* * undo the incr priv cmdbuf and the user entries: * new gp.put = * (gp.put - (1 + num_entries)) & (gp.entry_num - 1) = * (gp.put + (gp.entry_num - (1 + num_entries))) & (gp.entry_num - 1) * the + entry_num does not affect the result but avoids wrapping below * zero for MISRA, although it would be well defined. */ c->gpfifo.put = (nvgpu_safe_add_u32(c->gpfifo.put, nvgpu_safe_sub_u32(c->gpfifo.entry_num, nvgpu_safe_add_u32(1U, num_entries)))) & nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U); clean_up_gpfifo_wait: if (job->wait_cmd != NULL) { /* * undo the wait priv cmdbuf entry: * gp.put = * (gp.put - 1) & (gp.entry_num - 1) = * (gp.put + (gp.entry_num - 1)) & (gp.entry_num - 1) * same as above with the gp.entry_num on the left side. */ c->gpfifo.put = nvgpu_safe_add_u32(c->gpfifo.put, nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U)) & nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U); } nvgpu_fence_put(job->post_fence); nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->incr_cmd); if (job->wait_cmd != NULL) { nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->wait_cmd); } clean_up_job: nvgpu_channel_free_job(c, job); return err; } static int nvgpu_submit_prepare_gpfifo_notrack(struct nvgpu_channel *c, struct nvgpu_gpfifo_entry *gpfifo, struct nvgpu_gpfifo_userdata userdata, u32 num_entries, struct nvgpu_fence_type **fence_out, struct nvgpu_profile *profile) { int err; nvgpu_profile_snapshot(profile, PROFILE_JOB_TRACKING); err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata, num_entries); if (err != 0) { return err; } if (fence_out != NULL) { *fence_out = NULL; } return 0; } static int check_gpfifo_capacity(struct nvgpu_channel *c, u32 required) { /* * Make sure we have enough space for gpfifo entries. Check cached * values first and then read from HW. If no space, return -EAGAIN * and let userpace decide to re-try request or not. */ if (nvgpu_channel_get_gpfifo_free_count(c) < required) { if (nvgpu_channel_update_gpfifo_get_and_get_free_count(c) < required) { return -EAGAIN; } } return 0; } static int nvgpu_do_submit(struct nvgpu_channel *c, struct nvgpu_gpfifo_entry *gpfifo, struct nvgpu_gpfifo_userdata userdata, u32 num_entries, u32 flags, struct nvgpu_channel_fence *fence, struct nvgpu_fence_type **fence_out, struct nvgpu_profile *profile, bool need_job_tracking, bool need_deferred_cleanup) { struct gk20a *g = c->g; int err; #ifdef CONFIG_NVGPU_TRACE trace_gk20a_channel_submit_gpfifo(g->name, c->chid, num_entries, flags, fence ? fence->id : 0, fence ? fence->value : 0); #endif nvgpu_log_info(g, "pre-submit put %d, get %d, size %d", c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); err = check_gpfifo_capacity(c, num_entries + EXTRA_GPFIFO_ENTRIES); if (err != 0) { return err; } if (need_job_tracking) { err = nvgpu_submit_prepare_gpfifo_track(c, gpfifo, userdata, num_entries, flags, fence, fence_out, profile, need_deferred_cleanup); } else { err = nvgpu_submit_prepare_gpfifo_notrack(c, gpfifo, userdata, num_entries, fence_out, profile); } if (err != 0) { return err; } nvgpu_profile_snapshot(profile, PROFILE_APPEND); g->ops.userd.gp_put(g, c); return 0; } #ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS static int nvgpu_submit_deterministic(struct nvgpu_channel *c, struct nvgpu_gpfifo_entry *gpfifo, struct nvgpu_gpfifo_userdata userdata, u32 num_entries, u32 flags, struct nvgpu_channel_fence *fence, struct nvgpu_fence_type **fence_out, struct nvgpu_profile *profile) { bool skip_buffer_refcounting = (flags & NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U; bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U; bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U; bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U; struct gk20a *g = c->g; bool need_job_tracking; int err = 0; nvgpu_assert(nvgpu_channel_is_deterministic(c)); /* sync framework on post fences would not be deterministic */ if (flag_fence_get && flag_sync_fence) { return -EINVAL; } /* this would be O(n) */ if (!skip_buffer_refcounting) { return -EINVAL; } #ifdef CONFIG_NVGPU_CHANNEL_WDT /* the watchdog needs periodic job cleanup */ if (nvgpu_channel_wdt_enabled(c->wdt)) { return -EINVAL; } #endif /* * Job tracking is necessary on deterministic channels if and only if * pre- or post-fence functionality is needed. If not, a fast submit * can be done (ie. only need to write out userspace GPFIFO entries and * update GP_PUT). */ need_job_tracking = flag_fence_wait || flag_fence_get; if (need_job_tracking) { /* nvgpu_semaphore is dynamically allocated, not pooled */ if (!nvgpu_has_syncpoints(g)) { return -EINVAL; } /* dynamic job allocation wouldn't be deterministic */ if (!nvgpu_channel_is_prealloc_enabled(c)) { return -EINVAL; } /* dynamic sync allocation wouldn't be deterministic */ if (g->aggressive_sync_destroy_thresh != 0U) { return -EINVAL; } /* * (Try to) clean up a single job, if available. Each job * requires the same amount of metadata, so this is enough for * the job list, fence pool, and private command buffers that * this submit will need. * * This submit might still need more gpfifo space than what the * previous has used. The job metadata doesn't look at it * though - the hw GP_GET pointer can be much further away than * our metadata pointers; gpfifo space is "freed" by the HW. */ nvgpu_channel_clean_up_jobs(c, true); } /* Grab access to HW to deal with do_idle */ nvgpu_rwsem_down_read(&g->deterministic_busy); if (c->deterministic_railgate_allowed) { /* * Nope - this channel has dropped its own power ref. As * deterministic submits don't hold power on per each submitted * job like normal ones do, the GPU might railgate any time now * and thus submit is disallowed. */ err = -EINVAL; goto clean_up; } err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence, fence_out, profile, need_job_tracking, false); if (err != 0) { goto clean_up; } /* No hw access beyond this point */ nvgpu_rwsem_up_read(&g->deterministic_busy); return 0; clean_up: nvgpu_log_fn(g, "fail %d", err); nvgpu_rwsem_up_read(&g->deterministic_busy); return err; } #endif static int nvgpu_submit_nondeterministic(struct nvgpu_channel *c, struct nvgpu_gpfifo_entry *gpfifo, struct nvgpu_gpfifo_userdata userdata, u32 num_entries, u32 flags, struct nvgpu_channel_fence *fence, struct nvgpu_fence_type **fence_out, struct nvgpu_profile *profile) { bool skip_buffer_refcounting = (flags & NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U; bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U; bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U; struct gk20a *g = c->g; bool need_job_tracking; int err = 0; nvgpu_assert(!nvgpu_channel_is_deterministic(c)); /* * Job tracking is necessary for any of the following conditions on * non-deterministic channels: * - pre- or post-fence functionality * - GPU rail-gating * - VPR resize enabled * - buffer refcounting * - channel watchdog * * If none of the conditions are met, then job tracking is not * required and a fast submit can be done (ie. only need to write * out userspace GPFIFO entries and update GP_PUT). */ need_job_tracking = (flag_fence_wait || flag_fence_get || nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) || nvgpu_is_vpr_resize_enabled() || !skip_buffer_refcounting); #ifdef CONFIG_NVGPU_CHANNEL_WDT need_job_tracking = need_job_tracking || nvgpu_channel_wdt_enabled(c->wdt); #endif if (need_job_tracking) { /* * Get a power ref because this isn't a deterministic * channel that holds them during the channel lifetime. * This one is released by nvgpu_channel_clean_up_jobs, * via syncpt or sema interrupt, whichever is used. */ err = gk20a_busy(g); if (err != 0) { nvgpu_err(g, "failed to host gk20a to submit gpfifo"); nvgpu_print_current(g, NULL, NVGPU_ERROR); return err; } } err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence, fence_out, profile, need_job_tracking, true); if (err != 0) { goto clean_up; } return 0; clean_up: nvgpu_log_fn(g, "fail %d", err); gk20a_idle(g); return err; } static int check_submit_allowed(struct nvgpu_channel *c) { struct gk20a *g = c->g; if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { return -ENODEV; } if (nvgpu_channel_check_unserviceable(c)) { return -ETIMEDOUT; } if (c->usermode_submit_enabled) { return -EINVAL; } if (!nvgpu_mem_is_valid(&c->gpfifo.mem)) { return -ENOMEM; } /* an address space needs to have been bound at this point. */ if (!nvgpu_channel_as_bound(c)) { nvgpu_err(g, "not bound to an address space at time of gpfifo" " submission."); return -EINVAL; } return 0; } static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, struct nvgpu_gpfifo_entry *gpfifo, struct nvgpu_gpfifo_userdata userdata, u32 num_entries, u32 flags, struct nvgpu_channel_fence *fence, struct nvgpu_fence_type **fence_out, struct nvgpu_profile *profile) { struct gk20a *g = c->g; int err; err = check_submit_allowed(c); if (err != 0) { return err; } /* * Fifo not large enough for request. Return error immediately. * Kernel can insert gpfifo entries before and after user gpfifos. * So, add extra entries in user request. Also, HW with fifo size N * can accept only N-1 entries. */ if (c->gpfifo.entry_num - 1U < num_entries + EXTRA_GPFIFO_ENTRIES) { nvgpu_err(g, "not enough gpfifo space allocated"); return -ENOMEM; } nvgpu_profile_snapshot(profile, PROFILE_ENTRY); /* update debug settings */ nvgpu_ltc_sync_enabled(g); nvgpu_log_info(g, "channel %d", c->chid); #ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS if (c->deterministic) { err = nvgpu_submit_deterministic(c, gpfifo, userdata, num_entries, flags, fence, fence_out, profile); } else #endif { err = nvgpu_submit_nondeterministic(c, gpfifo, userdata, num_entries, flags, fence, fence_out, profile); } if (err != 0) { return err; } #ifdef CONFIG_NVGPU_TRACE if (fence_out != NULL && *fence_out != NULL) { trace_gk20a_channel_submitted_gpfifo(g->name, c->chid, num_entries, flags, (*fence_out)->syncpt_id, (*fence_out)->syncpt_value); } else { trace_gk20a_channel_submitted_gpfifo(g->name, c->chid, num_entries, flags, 0, 0); } #endif nvgpu_log_info(g, "post-submit put %d, get %d, size %d", c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); nvgpu_profile_snapshot(profile, PROFILE_END); nvgpu_log_fn(g, "done"); return err; } int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c, struct nvgpu_gpfifo_userdata userdata, u32 num_entries, u32 flags, struct nvgpu_channel_fence *fence, struct nvgpu_fence_type **fence_out, struct nvgpu_profile *profile) { return nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries, flags, fence, fence_out, profile); } int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c, struct nvgpu_gpfifo_entry *gpfifo, u32 num_entries, u32 flags, struct nvgpu_channel_fence *fence, struct nvgpu_fence_type **fence_out) { struct nvgpu_gpfifo_userdata userdata = { NULL, NULL }; return nvgpu_submit_channel_gpfifo(c, gpfifo, userdata, num_entries, flags, fence, fence_out, NULL); }