diff --git a/arch/nvgpu-common.yaml b/arch/nvgpu-common.yaml index 1e22ff38e..5fdbacb4d 100644 --- a/arch/nvgpu-common.yaml +++ b/arch/nvgpu-common.yaml @@ -348,6 +348,8 @@ fifo: submit: safe: yes sources: [ common/fifo/submit.c, + common/fifo/priv_cmdbuf.c, + include/nvgpu/priv_cmdbuf.h, include/nvgpu/profile.h ] deps: [ ] runlist: diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 7bc58db9f..fdca1590d 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -518,6 +518,7 @@ nvgpu-y += \ common/fifo/channel.o \ common/fifo/pbdma.o \ common/fifo/submit.o \ + common/fifo/priv_cmdbuf.o \ common/fifo/tsg.o \ common/fifo/runlist.o \ common/fifo/engine_status.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index c4d6586ba..0970a9816 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -387,6 +387,7 @@ endif ifeq ($(CONFIG_NVGPU_KERNEL_MODE_SUBMIT),1) srcs += common/fifo/submit.c \ + common/fifo/priv_cmdbuf.c \ common/sync/channel_sync.c \ common/sync/channel_sync_syncpt.c endif diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index aec1426dd..b2cb0fa85 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -60,12 +60,12 @@ #ifdef CONFIG_NVGPU_DEBUGGER #include #endif +#include static void free_channel(struct nvgpu_fifo *f, struct nvgpu_channel *ch); static void channel_dump_ref_actions(struct nvgpu_channel *ch); #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT -static void channel_free_priv_cmd_q(struct nvgpu_channel *ch); static void channel_free_prealloc_resources(struct nvgpu_channel *c); static void channel_joblist_add(struct nvgpu_channel *c, struct nvgpu_channel_job *job); @@ -238,164 +238,6 @@ static void channel_kernelmode_deinit(struct nvgpu_channel *ch) nvgpu_mutex_release(&ch->sync_lock); } -/* allocate private cmd buffer. - used for inserting commands before/after user submitted buffers. */ -static int channel_alloc_priv_cmdbuf(struct nvgpu_channel *ch, - u32 num_in_flight) -{ - struct gk20a *g = ch->g; - struct vm_gk20a *ch_vm = ch->vm; - struct priv_cmd_queue *q = &ch->priv_cmd_q; - u64 size, tmp_size; - int err = 0; - bool gpfifo_based = false; - - if (num_in_flight == 0U) { - num_in_flight = ch->gpfifo.entry_num; - gpfifo_based = true; - } - - /* - * Compute the amount of priv_cmdbuf space we need. In general the worst - * case is the kernel inserts both a semaphore pre-fence and post-fence. - * Any sync-pt fences will take less memory so we can ignore them for - * now. - * - * A semaphore ACQ (fence-wait) is 8 words: semaphore_a, semaphore_b, - * semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be 10 - * words: all the same as an ACQ plus a non-stalling intr which is - * another 2 words. - * - * We have two cases to consider: the first is we base the size of the - * priv_cmd_buf on the gpfifo count. Here we multiply by a factor of - * 2/3rds because only at most 2/3rds of the GPFIFO can be used for - * sync commands: - * - * nr_gpfifos * (2 / 3) * (8 + 10) * 4 bytes - * - * If instead num_in_flight is specified then we will use that to size - * the priv_cmd_buf. The worst case is two sync commands (one ACQ and - * one INCR) per submit so we have a priv_cmd_buf size of: - * - * num_in_flight * (8 + 10) * 4 bytes - */ - size = num_in_flight * 18UL * sizeof(u32); - if (gpfifo_based) { - size = 2U * size / 3U; - } - - tmp_size = PAGE_ALIGN(roundup_pow_of_two(size)); - nvgpu_assert(tmp_size <= U32_MAX); - size = (u32)tmp_size; - - err = nvgpu_dma_alloc_map_sys(ch_vm, size, &q->mem); - if (err != 0) { - nvgpu_err(g, "%s: memory allocation failed", __func__); - goto clean_up; - } - - tmp_size = q->mem.size / sizeof(u32); - nvgpu_assert(tmp_size <= U32_MAX); - q->size = (u32)tmp_size; - - return 0; - -clean_up: - channel_free_priv_cmd_q(ch); - return err; -} - -static void channel_free_priv_cmd_q(struct nvgpu_channel *ch) -{ - struct vm_gk20a *ch_vm = ch->vm; - struct priv_cmd_queue *q = &ch->priv_cmd_q; - - if (q->size == 0U) { - return; - } - - nvgpu_dma_unmap_free(ch_vm, &q->mem); - - (void) memset(q, 0, sizeof(struct priv_cmd_queue)); -} - -/* allocate a cmd buffer with given size. size is number of u32 entries */ -int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size, - struct priv_cmd_entry *e) -{ - struct priv_cmd_queue *q = &c->priv_cmd_q; - u32 free_count; - u32 size = orig_size; - - nvgpu_log_fn(c->g, "size %d", orig_size); - - if (e == NULL) { - nvgpu_err(c->g, - "ch %d: priv cmd entry is null", - c->chid); - return -EINVAL; - } - - /* if free space in the end is less than requested, increase the size - * to make the real allocated space start from beginning. */ - if (q->put + size > q->size) { - size = orig_size + (q->size - q->put); - } - - nvgpu_log_info(c->g, "ch %d: priv cmd queue get:put %d:%d", - c->chid, q->get, q->put); - - free_count = (q->size - (q->put - q->get) - 1U) % q->size; - - if (size > free_count) { - return -EAGAIN; - } - - e->size = orig_size; - e->mem = &q->mem; - - /* if we have increased size to skip free space in the end, set put - to beginning of cmd buffer (0) + size */ - if (size != orig_size) { - e->off = 0; - e->gva = q->mem.gpu_va; - q->put = orig_size; - } else { - e->off = q->put; - e->gva = q->mem.gpu_va + q->put * sizeof(u32); - q->put = (q->put + orig_size) & (q->size - 1U); - } - - /* we already handled q->put + size > q->size so BUG_ON this */ - BUG_ON(q->put > q->size); - - /* - * commit the previous writes before making the entry valid. - * see the corresponding nvgpu_smp_rmb() in - * nvgpu_channel_update_priv_cmd_q_and_free_entry(). - */ - nvgpu_smp_wmb(); - - e->valid = true; - nvgpu_log_fn(c->g, "done"); - - return 0; -} - -/* - * Don't call this to free an explicit cmd entry. - * It doesn't update priv_cmd_queue get/put. - */ -void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c, - struct priv_cmd_entry *e) -{ - if (nvgpu_channel_is_prealloc_enabled(c)) { - (void) memset(e, 0, sizeof(struct priv_cmd_entry)); - } else { - nvgpu_kfree(c->g, e); - } -} - int nvgpu_channel_alloc_job(struct nvgpu_channel *c, struct nvgpu_channel_job **job_out) { @@ -1213,29 +1055,6 @@ static void channel_worker_enqueue(struct nvgpu_channel *ch) } } -void nvgpu_channel_update_priv_cmd_q_and_free_entry( - struct nvgpu_channel *ch, struct priv_cmd_entry *e) -{ - struct priv_cmd_queue *q = &ch->priv_cmd_q; - struct gk20a *g = ch->g; - - if (e == NULL) { - return; - } - - if (e->valid) { - /* read the entry's valid flag before reading its contents */ - nvgpu_smp_rmb(); - if ((q->get != e->off) && e->off != 0U) { - nvgpu_err(g, "requests out-of-order, ch=%d", - ch->chid); - } - q->get = e->off + e->size; - } - - nvgpu_channel_free_priv_cmd_entry(ch, e); -} - int nvgpu_channel_add_job(struct nvgpu_channel *c, struct nvgpu_channel_job *job, bool skip_buffer_refcounting) diff --git a/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c b/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c new file mode 100644 index 000000000..cdcf2ef55 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* allocate private cmd buffer. + used for inserting commands before/after user submitted buffers. */ +int channel_alloc_priv_cmdbuf(struct nvgpu_channel *ch, + u32 num_in_flight) +{ + struct gk20a *g = ch->g; + struct vm_gk20a *ch_vm = ch->vm; + struct priv_cmd_queue *q = &ch->priv_cmd_q; + u64 size, tmp_size; + int err = 0; + bool gpfifo_based = false; + + if (num_in_flight == 0U) { + num_in_flight = ch->gpfifo.entry_num; + gpfifo_based = true; + } + + /* + * Compute the amount of priv_cmdbuf space we need. In general the worst + * case is the kernel inserts both a semaphore pre-fence and post-fence. + * Any sync-pt fences will take less memory so we can ignore them for + * now. + * + * A semaphore ACQ (fence-wait) is 8 words: semaphore_a, semaphore_b, + * semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be 10 + * words: all the same as an ACQ plus a non-stalling intr which is + * another 2 words. + * + * We have two cases to consider: the first is we base the size of the + * priv_cmd_buf on the gpfifo count. Here we multiply by a factor of + * 2/3rds because only at most 2/3rds of the GPFIFO can be used for + * sync commands: + * + * nr_gpfifos * (2 / 3) * (8 + 10) * 4 bytes + * + * If instead num_in_flight is specified then we will use that to size + * the priv_cmd_buf. The worst case is two sync commands (one ACQ and + * one INCR) per submit so we have a priv_cmd_buf size of: + * + * num_in_flight * (8 + 10) * 4 bytes + */ + size = num_in_flight * 18UL * sizeof(u32); + if (gpfifo_based) { + size = 2U * size / 3U; + } + + tmp_size = PAGE_ALIGN(roundup_pow_of_two(size)); + nvgpu_assert(tmp_size <= U32_MAX); + size = (u32)tmp_size; + + err = nvgpu_dma_alloc_map_sys(ch_vm, size, &q->mem); + if (err != 0) { + nvgpu_err(g, "%s: memory allocation failed", __func__); + goto clean_up; + } + + tmp_size = q->mem.size / sizeof(u32); + nvgpu_assert(tmp_size <= U32_MAX); + q->size = (u32)tmp_size; + + return 0; + +clean_up: + channel_free_priv_cmd_q(ch); + return err; +} + +void channel_free_priv_cmd_q(struct nvgpu_channel *ch) +{ + struct vm_gk20a *ch_vm = ch->vm; + struct priv_cmd_queue *q = &ch->priv_cmd_q; + + if (q->size == 0U) { + return; + } + + nvgpu_dma_unmap_free(ch_vm, &q->mem); + + (void) memset(q, 0, sizeof(struct priv_cmd_queue)); +} + +/* allocate a cmd buffer with given size. size is number of u32 entries */ +int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size, + struct priv_cmd_entry *e) +{ + struct priv_cmd_queue *q = &c->priv_cmd_q; + u32 free_count; + u32 size = orig_size; + + nvgpu_log_fn(c->g, "size %d", orig_size); + + if (e == NULL) { + nvgpu_err(c->g, + "ch %d: priv cmd entry is null", + c->chid); + return -EINVAL; + } + + /* if free space in the end is less than requested, increase the size + * to make the real allocated space start from beginning. */ + if (q->put + size > q->size) { + size = orig_size + (q->size - q->put); + } + + nvgpu_log_info(c->g, "ch %d: priv cmd queue get:put %d:%d", + c->chid, q->get, q->put); + + free_count = (q->size - (q->put - q->get) - 1U) % q->size; + + if (size > free_count) { + return -EAGAIN; + } + + e->size = orig_size; + e->mem = &q->mem; + + /* if we have increased size to skip free space in the end, set put + to beginning of cmd buffer (0) + size */ + if (size != orig_size) { + e->off = 0; + e->gva = q->mem.gpu_va; + q->put = orig_size; + } else { + e->off = q->put; + e->gva = q->mem.gpu_va + q->put * sizeof(u32); + q->put = (q->put + orig_size) & (q->size - 1U); + } + + /* we already handled q->put + size > q->size so BUG_ON this */ + BUG_ON(q->put > q->size); + + /* + * commit the previous writes before making the entry valid. + * see the corresponding nvgpu_smp_rmb() in + * nvgpu_channel_update_priv_cmd_q_and_free_entry(). + */ + nvgpu_smp_wmb(); + + e->valid = true; + nvgpu_log_fn(c->g, "done"); + + return 0; +} + +/* + * Don't call this to free an explicit cmd entry. + * It doesn't update priv_cmd_queue get/put. + */ +void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c, + struct priv_cmd_entry *e) +{ + if (nvgpu_channel_is_prealloc_enabled(c)) { + (void) memset(e, 0, sizeof(struct priv_cmd_entry)); + } else { + nvgpu_kfree(c->g, e); + } +} + +void nvgpu_channel_update_priv_cmd_q_and_free_entry( + struct nvgpu_channel *ch, struct priv_cmd_entry *e) +{ + struct priv_cmd_queue *q = &ch->priv_cmd_q; + struct gk20a *g = ch->g; + + if (e == NULL) { + return; + } + + if (e->valid) { + /* read the entry's valid flag before reading its contents */ + nvgpu_smp_rmb(); + if ((q->get != e->off) && e->off != 0U) { + nvgpu_err(g, "requests out-of-order, ch=%d", + ch->chid); + } + q->get = e->off + e->size; + } + + nvgpu_channel_free_priv_cmd_entry(ch, e); +} diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index 1c47aa5f6..cdaa0af20 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -26,8 +26,10 @@ #include #include #include +#include #include #include +#include #include #include #include diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c index 64510767f..ee2314844 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "channel_sync_priv.h" diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c index adcdd6026..5d0e68222 100644 --- a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c +++ b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c index 7670bf119..5c3da8e4e 100644 --- a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c +++ b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "sema_cmdbuf_gk20a.h" diff --git a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c index cd6393f62..f6cc5b1a0 100644 --- a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c +++ b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "sema_cmdbuf_gv11b.h" diff --git a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c index 90e9e8633..34f65fc6d 100644 --- a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c +++ b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "syncpt_cmdbuf_gk20a.h" diff --git a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c index 0fe1b70a4..69cfc0198 100644 --- a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c +++ b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index 0840bcdb7..cdef0544a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -46,6 +46,7 @@ struct nvgpu_gpfifo_userdata; struct nvgpu_gr_subctx; struct nvgpu_gr_ctx; struct nvgpu_debug_context; +struct priv_cmd_entry; /** * S/W defined invalid channel identifier. @@ -257,15 +258,6 @@ struct priv_cmd_queue { u32 get; /* get for priv cmd queue */ }; -struct priv_cmd_entry { - bool valid; - struct nvgpu_mem *mem; - u32 off; /* offset in mem, in u32 entries */ - u64 gva; - u32 get; /* start of entry in queue */ - u32 size; /* in words */ -}; - struct nvgpu_channel_job { struct nvgpu_mapped_buf **mapped_buffers; u32 num_mapped_buffers; @@ -619,10 +611,6 @@ nvgpu_channel_from_worker_item(struct nvgpu_list_node *node) return (struct nvgpu_channel *) ((uintptr_t)node - offsetof(struct nvgpu_channel, worker_item)); }; -int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size, - struct priv_cmd_entry *e); -void nvgpu_channel_update_priv_cmd_q_and_free_entry( - struct nvgpu_channel *ch, struct priv_cmd_entry *e); int nvgpu_channel_worker_init(struct gk20a *g); void nvgpu_channel_worker_deinit(struct gk20a *g); void nvgpu_channel_update(struct nvgpu_channel *c); @@ -636,8 +624,6 @@ u32 nvgpu_channel_get_gpfifo_free_count(struct nvgpu_channel *ch); int nvgpu_channel_add_job(struct nvgpu_channel *c, struct nvgpu_channel_job *job, bool skip_buffer_refcounting); -void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c, - struct priv_cmd_entry *e); void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c, bool clean_all); int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c, diff --git a/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h b/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h new file mode 100644 index 000000000..2c98c8482 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_PRIV_CMDBUF_H +#define NVGPU_PRIV_CMDBUF_H + +#include + +struct gk20a; +struct nvgpu_mem; +struct nvgpu_channel; + +struct priv_cmd_entry { + bool valid; + struct nvgpu_mem *mem; + u32 off; /* offset in mem, in u32 entries */ + u64 gva; + u32 get; /* start of entry in queue */ + u32 size; /* in words */ +}; + +int channel_alloc_priv_cmdbuf(struct nvgpu_channel *ch, u32 num_in_flight); +void channel_free_priv_cmd_q(struct nvgpu_channel *ch); + +int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size, + struct priv_cmd_entry *e); +void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c, + struct priv_cmd_entry *e); +void nvgpu_channel_update_priv_cmd_q_and_free_entry(struct nvgpu_channel *ch, + struct priv_cmd_entry *e); + +#endif