/* * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include struct priv_cmd_queue { struct nvgpu_mem mem; u32 size; /* num of entries in words */ u32 put; /* put for priv cmd queue */ u32 get; /* get for priv cmd queue */ }; /* allocate private cmd buffer queue. used for inserting commands before/after user submitted buffers. */ int nvgpu_alloc_priv_cmdbuf_queue(struct nvgpu_channel *ch, u32 num_in_flight) { struct gk20a *g = ch->g; struct vm_gk20a *ch_vm = ch->vm; struct priv_cmd_queue *q; u64 size, tmp_size; int err = 0; u32 wait_size, incr_size; /* * sema size is at least as much as syncpt size, but semas may not be * enabled in the build. If neither semas nor syncpts are enabled, priv * cmdbufs and as such kernel mode submits with job tracking won't be * supported. */ #ifdef CONFIG_NVGPU_SW_SEMAPHORE wait_size = g->ops.sync.sema.get_wait_cmd_size(); incr_size = g->ops.sync.sema.get_incr_cmd_size(); #else wait_size = g->ops.sync.syncpt.get_wait_cmd_size(); incr_size = g->ops.sync.syncpt.get_incr_cmd_size(true); #endif /* * Compute the amount of priv_cmdbuf space we need. In general the * worst case is the kernel inserts both a semaphore pre-fence and * post-fence. Any sync-pt fences will take less memory so we can * ignore them unless they're the only supported type. * * A semaphore ACQ (fence-wait) is 8 words: semaphore_a, semaphore_b, * semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be * 10 words: all the same as an ACQ plus a non-stalling intr which is * another 2 words. In reality these numbers vary by chip but we'll use * 8 and 10 as examples. * * We have two cases to consider: the first is we base the size of the * queue on the gpfifo count. Here we multiply by a factor of 1/3 * because at most a third of the GPFIFO entries can be used for * user-submitted jobs; another third goes to wait entries, and the * final third to incr entries. There will be one pair of acq and incr * commands for each job. * * gpfifo entry num * (1 / 3) * (8 + 10) * 4 bytes * * If instead num_in_flight is specified then we will use that to size * the queue instead of a third of the gpfifo entry count. The worst * case is still both sync commands (one ACQ and one INCR) per submit so * we have a queue size of: * * num_in_flight * (8 + 10) * 4 bytes */ if (num_in_flight == 0U) { /* round down to ensure space for all priv cmds */ num_in_flight = ch->gpfifo.entry_num / 3; } size = num_in_flight * (wait_size + incr_size) * sizeof(u32); tmp_size = PAGE_ALIGN(roundup_pow_of_two(size)); nvgpu_assert(tmp_size <= U32_MAX); size = (u32)tmp_size; q = nvgpu_kzalloc(g, sizeof(*q)); err = nvgpu_dma_alloc_map_sys(ch_vm, size, &q->mem); if (err != 0) { nvgpu_err(g, "%s: memory allocation failed", __func__); goto err_free_buf; } tmp_size = q->mem.size / sizeof(u32); nvgpu_assert(tmp_size <= U32_MAX); q->size = (u32)tmp_size; ch->priv_cmd_q = q; return 0; err_free_buf: nvgpu_kfree(g, q); return err; } void nvgpu_free_priv_cmdbuf_queue(struct nvgpu_channel *ch) { struct vm_gk20a *ch_vm = ch->vm; struct priv_cmd_queue *q = ch->priv_cmd_q; if (q == NULL) { return; } nvgpu_dma_unmap_free(ch_vm, &q->mem); nvgpu_kfree(ch->g, q); ch->priv_cmd_q = NULL; } /* allocate a cmd buffer with given size. size is number of u32 entries */ int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size, struct priv_cmd_entry *e) { struct priv_cmd_queue *q = c->priv_cmd_q; u32 free_count; u32 size = orig_size; nvgpu_log_fn(c->g, "size %d", orig_size); if (e == NULL) { nvgpu_err(c->g, "ch %d: priv cmd entry is null", c->chid); return -EINVAL; } /* if free space in the end is less than requested, increase the size * to make the real allocated space start from beginning. */ if (q->put + size > q->size) { size = orig_size + (q->size - q->put); } nvgpu_log_info(c->g, "ch %d: priv cmd queue get:put %d:%d", c->chid, q->get, q->put); free_count = (q->size - (q->put - q->get) - 1U) % q->size; if (size > free_count) { return -EAGAIN; } e->fill_off = 0; e->size = orig_size; e->mem = &q->mem; /* if we have increased size to skip free space in the end, set put to beginning of cmd buffer (0) + size */ if (size != orig_size) { e->off = 0; e->gva = q->mem.gpu_va; q->put = orig_size; } else { e->off = q->put; e->gva = q->mem.gpu_va + q->put * sizeof(u32); q->put = (q->put + orig_size) & (q->size - 1U); } /* we already handled q->put + size > q->size so BUG_ON this */ BUG_ON(q->put > q->size); /* * commit the previous writes before making the entry valid. * see the corresponding nvgpu_smp_rmb() in * nvgpu_channel_update_priv_cmd_q_and_free_entry(). */ nvgpu_smp_wmb(); e->valid = true; nvgpu_log_fn(c->g, "done"); return 0; } /* * Don't call this to free an explicit cmd entry. * It doesn't update priv_cmd_queue get/put. */ void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c, struct priv_cmd_entry *e) { if (nvgpu_channel_is_prealloc_enabled(c)) { (void) memset(e, 0, sizeof(struct priv_cmd_entry)); } else { nvgpu_kfree(c->g, e); } } void nvgpu_channel_update_priv_cmd_q_and_free_entry( struct nvgpu_channel *ch, struct priv_cmd_entry *e) { struct priv_cmd_queue *q = ch->priv_cmd_q; struct gk20a *g = ch->g; if (e == NULL) { return; } if (e->valid) { /* read the entry's valid flag before reading its contents */ nvgpu_smp_rmb(); if ((q->get != e->off) && e->off != 0U) { nvgpu_err(g, "requests out-of-order, ch=%d", ch->chid); } q->get = e->off + e->size; } nvgpu_channel_free_priv_cmd_entry(ch, e); } void nvgpu_priv_cmdbuf_append(struct gk20a *g, struct priv_cmd_entry *e, u32 *data, u32 entries) { nvgpu_assert(e->fill_off + entries <= e->size); nvgpu_mem_wr_n(g, e->mem, (e->off + e->fill_off) * sizeof(u32), data, entries * sizeof(u32)); e->fill_off += entries; } void nvgpu_priv_cmdbuf_append_zeros(struct gk20a *g, struct priv_cmd_entry *e, u32 entries) { nvgpu_assert(e->fill_off + entries <= e->size); nvgpu_memset(g, e->mem, (e->off + e->fill_off) * sizeof(u32), 0, entries * sizeof(u32)); e->fill_off += entries; } void nvgpu_priv_cmdbuf_finish(struct gk20a *g, struct priv_cmd_entry *e, u64 *gva, u32 *size) { /* * The size is written to the pushbuf entry, so make sure this buffer * is complete at this point. The responsibility of the channel sync is * to be consistent in allocation and usage, and the matching size and * add gops (e.g., get_wait_cmd_size, add_wait_cmd) help there. */ nvgpu_assert(e->fill_off == e->size); #ifdef CONFIG_NVGPU_TRACE if (e->mem->aperture == APERTURE_SYSMEM) { trace_gk20a_push_cmdbuf(g->name, 0, e->size, 0, (u32 *)e->mem->cpu_va + e->off); } #endif *gva = e->gva; *size = e->size; }