Open source GPL/LGPL release

2025-12-24 10:34:43 +03:00 · 2022-02-15 09:37:14 -08:00
commit d3b5c6a57c
2490 changed files with 683529 additions and 0 deletions
--- a/drivers/gpu/nvgpu/common/semaphore/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore/semaphore.c
@@ -0,0 +1,175 @@
+/*
+ * Nvgpu Semaphores
+ *
+ * Copyright (c) 2014-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/sizes.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/semaphore.h>
+
+#include "semaphore_priv.h"
+
+
+/*
+ * Allocate a semaphore value object from an underlying hw counter.
+ *
+ * Since semaphores are ref-counted there's no explicit free for external code
+ * to use. When the ref-count hits 0 the internal free will happen.
+ */
+struct nvgpu_semaphore *nvgpu_semaphore_alloc(
+		struct nvgpu_hw_semaphore *hw_sema)
+{
+	struct nvgpu_semaphore_pool *pool = hw_sema->location.pool;
+	struct gk20a *g = pool->sema_sea->gk20a;
+	struct nvgpu_semaphore *s;
+
+	s = nvgpu_kzalloc(g, sizeof(*s));
+	if (s == NULL) {
+		return NULL;
+	}
+
+	nvgpu_ref_init(&s->ref);
+	s->g = g;
+	s->location = hw_sema->location;
+	nvgpu_atomic_set(&s->value, 0);
+
+	/*
+	 * Take a ref on the pool so that we can keep this pool alive for
+	 * as long as this semaphore is alive.
+	 */
+	nvgpu_semaphore_pool_get(pool);
+
+	gpu_sema_dbg(g, "Allocated semaphore (c=%d)", hw_sema->chid);
+
+	return s;
+}
+
+static struct nvgpu_semaphore *nvgpu_semaphore_from_ref(struct nvgpu_ref *ref)
+{
+	return (struct nvgpu_semaphore *)
+		((uintptr_t)ref - offsetof(struct nvgpu_semaphore, ref));
+}
+
+static void nvgpu_semaphore_free(struct nvgpu_ref *ref)
+{
+	struct nvgpu_semaphore *s = nvgpu_semaphore_from_ref(ref);
+
+	nvgpu_semaphore_pool_put(s->location.pool);
+
+	nvgpu_kfree(s->g, s);
+}
+
+void nvgpu_semaphore_put(struct nvgpu_semaphore *s)
+{
+	nvgpu_ref_put(&s->ref, nvgpu_semaphore_free);
+}
+
+void nvgpu_semaphore_get(struct nvgpu_semaphore *s)
+{
+	nvgpu_ref_get(&s->ref);
+}
+
+/*
+ * Return the address of a specific semaphore.
+ *
+ * Don't call this on a semaphore you don't own - the VA returned will make no
+ * sense in your specific channel's VM.
+ */
+u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
+{
+	return nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
+		s->location.offset;
+}
+
+/*
+ * Get the global RO address for the semaphore. Can be called on any semaphore
+ * regardless of whether you own it.
+ */
+u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
+{
+	return nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
+		s->location.offset;
+}
+
+/*
+ * Read the underlying value from a semaphore.
+ */
+u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
+{
+	return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
+			s->location.offset);
+}
+
+u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
+{
+	return (u32)nvgpu_atomic_read(&s->value);
+}
+
+bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
+{
+	u32 sema_val = nvgpu_semaphore_read(s);
+	u32 wait_payload = nvgpu_semaphore_get_value(s);
+
+	return nvgpu_semaphore_value_released(wait_payload, sema_val);
+}
+
+bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
+{
+	return !nvgpu_semaphore_is_released(s);
+}
+
+bool nvgpu_semaphore_can_wait(struct nvgpu_semaphore *s)
+{
+	return s->ready_to_wait;
+}
+
+/*
+ * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
+ * value to "s" which represents the increment that the caller must write in a
+ * pushbuf. The same nvgpu_semaphore will also represent an output fence; when
+ * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
+ */
+void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
+		struct nvgpu_hw_semaphore *hw_sema)
+{
+	/* One submission increments the next value by one. */
+	int next = nvgpu_hw_semaphore_read_next(hw_sema) + 1;
+
+	/* "s" should be an uninitialized sema. */
+	WARN_ON(s->ready_to_wait);
+
+	nvgpu_atomic_set(&s->value, next);
+	s->ready_to_wait = true;
+
+	gpu_sema_verbose_dbg(s->g, "PREP sema for c=%d (%u)",
+			     hw_sema->chid, next);
+}
+
+u64 nvgpu_semaphore_get_hw_pool_page_idx(struct nvgpu_semaphore *s)
+{
+	return nvgpu_semaphore_pool_get_page_idx(s->location.pool);
+}
+
--- a/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c
+++ b/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2014-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/lock.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/vm.h>
+#include <nvgpu/mm.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/semaphore.h>
+
+#include "semaphore_priv.h"
+
+int nvgpu_hw_semaphore_init(struct vm_gk20a *vm, u32 chid,
+		struct nvgpu_hw_semaphore **new_sema)
+{
+	struct nvgpu_semaphore_pool *p = vm->sema_pool;
+	struct nvgpu_hw_semaphore *hw_sema;
+	struct gk20a *g = vm->mm->g;
+	int current_value;
+	int hw_sema_idx;
+	int ret = 0;
+
+	nvgpu_assert(p != NULL);
+
+	nvgpu_mutex_acquire(&p->pool_lock);
+
+	/* Find an available HW semaphore. */
+	hw_sema_idx = semaphore_bitmap_alloc(p->semas_alloced,
+					       NVGPU_CPU_PAGE_SIZE / SEMAPHORE_SIZE);
+	if (hw_sema_idx < 0) {
+		ret = hw_sema_idx;
+		goto fail;
+	}
+
+	hw_sema = nvgpu_kzalloc(g, sizeof(struct nvgpu_hw_semaphore));
+	if (hw_sema == NULL) {
+		ret = -ENOMEM;
+		goto fail_free_idx;
+	}
+
+	hw_sema->chid = chid;
+	hw_sema->location.pool = p;
+	hw_sema->location.offset = SEMAPHORE_SIZE * (u32)hw_sema_idx;
+	current_value = (int)nvgpu_mem_rd(g, &p->rw_mem,
+			hw_sema->location.offset);
+	nvgpu_atomic_set(&hw_sema->next_value, current_value);
+
+	nvgpu_mutex_release(&p->pool_lock);
+
+	*new_sema = hw_sema;
+	return 0;
+
+fail_free_idx:
+	nvgpu_clear_bit((u32)hw_sema_idx, p->semas_alloced);
+fail:
+	nvgpu_mutex_release(&p->pool_lock);
+	return ret;
+}
+
+/*
+ * Free the channel used semaphore index
+ */
+void nvgpu_hw_semaphore_free(struct nvgpu_hw_semaphore *hw_sema)
+{
+	struct nvgpu_semaphore_pool *p = hw_sema->location.pool;
+	int idx = (int)(hw_sema->location.offset / SEMAPHORE_SIZE);
+	struct gk20a *g = p->sema_sea->gk20a;
+
+	nvgpu_assert(p != NULL);
+
+	nvgpu_mutex_acquire(&p->pool_lock);
+
+	nvgpu_clear_bit((u32)idx, p->semas_alloced);
+
+	nvgpu_kfree(g, hw_sema);
+
+	nvgpu_mutex_release(&p->pool_lock);
+}
+
+u64 nvgpu_hw_semaphore_addr(struct nvgpu_hw_semaphore *hw_sema)
+{
+	return nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
+		hw_sema->location.offset;
+}
+
+u32 nvgpu_hw_semaphore_read(struct nvgpu_hw_semaphore *hw_sema)
+{
+	struct nvgpu_semaphore_pool *pool = hw_sema->location.pool;
+	struct gk20a *g = pool->sema_sea->gk20a;
+
+	return nvgpu_mem_rd(g, &pool->rw_mem, hw_sema->location.offset);
+}
+
+/*
+ * Fast-forward the hw sema to its tracked max value.
+ *
+ * Return true if the sema wasn't at the max value and needed updating, false
+ * otherwise.
+ */
+bool nvgpu_hw_semaphore_reset(struct nvgpu_hw_semaphore *hw_sema)
+{
+	struct nvgpu_semaphore_pool *pool = hw_sema->location.pool;
+	struct gk20a *g = pool->sema_sea->gk20a;
+	u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
+	u32 current_val = nvgpu_hw_semaphore_read(hw_sema);
+
+	/*
+	 * If the semaphore has already reached the value we would write then
+	 * this is really just a NO-OP. However, the sema value shouldn't be
+	 * more than what we expect to be the max.
+	 */
+
+	bool is_released = nvgpu_semaphore_value_released(threshold + 1U,
+				current_val);
+
+	nvgpu_assert(!is_released);
+
+	if (is_released) {
+		return false;
+	}
+
+	if (current_val == threshold) {
+		return false;
+	}
+
+	nvgpu_mem_wr(g, &pool->rw_mem, hw_sema->location.offset, threshold);
+
+	gpu_sema_verbose_dbg(g, "(c=%d) RESET %u -> %u",
+			hw_sema->chid, current_val, threshold);
+
+	return true;
+}
+
+int nvgpu_hw_semaphore_read_next(struct nvgpu_hw_semaphore *hw_sema)
+{
+	return nvgpu_atomic_read(&hw_sema->next_value);
+}
+
+int nvgpu_hw_semaphore_update_next(struct nvgpu_hw_semaphore *hw_sema)
+{
+	int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
+	struct nvgpu_semaphore_pool *p = hw_sema->location.pool;
+	struct gk20a *g = p->sema_sea->gk20a;
+
+	gpu_sema_verbose_dbg(g, "INCR sema for c=%d (%u)",
+			     hw_sema->chid, next);
+	return next;
+}
--- a/drivers/gpu/nvgpu/common/semaphore/semaphore_pool.c
+++ b/drivers/gpu/nvgpu/common/semaphore/semaphore_pool.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2014-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/log.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/vm.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/semaphore.h>
+
+#include "semaphore_priv.h"
+
+#define pool_to_gk20a(p) ((p)->sema_sea->gk20a)
+
+/*
+ * Allocate a pool from the sea.
+ */
+int nvgpu_semaphore_pool_alloc(struct nvgpu_semaphore_sea *sea,
+			       struct nvgpu_semaphore_pool **pool)
+{
+	struct nvgpu_semaphore_pool *p;
+	unsigned long page_idx;
+	int ret;
+
+	p = nvgpu_kzalloc(sea->gk20a, sizeof(*p));
+	if (p == NULL) {
+		return -ENOMEM;
+	}
+
+	nvgpu_semaphore_sea_lock(sea);
+
+	nvgpu_mutex_init(&p->pool_lock);
+
+	ret = semaphore_bitmap_alloc(sea->pools_alloced,
+				       SEMAPHORE_POOL_COUNT);
+	if (ret < 0) {
+		goto fail;
+	}
+
+	page_idx = (unsigned long)ret;
+
+	p->page_idx = page_idx;
+	p->sema_sea = sea;
+	nvgpu_init_list_node(&p->pool_list_entry);
+	nvgpu_ref_init(&p->ref);
+
+	sea->page_count++;
+	nvgpu_list_add(&p->pool_list_entry, &sea->pool_list);
+	nvgpu_semaphore_sea_unlock(sea);
+
+	gpu_sema_dbg(sea->gk20a,
+		     "Allocated semaphore pool: page-idx=%llu", p->page_idx);
+
+	*pool = p;
+	return 0;
+
+fail:
+	nvgpu_mutex_destroy(&p->pool_lock);
+	nvgpu_semaphore_sea_unlock(sea);
+	nvgpu_kfree(sea->gk20a, p);
+	gpu_sema_dbg(sea->gk20a, "Failed to allocate semaphore pool!");
+	return ret;
+}
+
+/*
+ * Map a pool into the passed vm's address space. This handles both the fixed
+ * global RO mapping and the non-fixed private RW mapping.
+ */
+int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p,
+			     struct vm_gk20a *vm)
+{
+	int err = 0;
+	u64 addr;
+
+	if (p->mapped) {
+		return -EBUSY;
+	}
+
+	gpu_sema_dbg(pool_to_gk20a(p),
+		     "Mapping semaphore pool! (idx=%llu)", p->page_idx);
+
+	/*
+	 * Take the sea lock so that we don't race with a possible change to the
+	 * nvgpu_mem in the sema sea.
+	 */
+	nvgpu_semaphore_sea_lock(p->sema_sea);
+
+	addr = nvgpu_gmmu_map_fixed(vm, &p->sema_sea->sea_mem,
+				    p->sema_sea->gpu_va,
+				    p->sema_sea->map_size,
+				    0, gk20a_mem_flag_read_only, 0,
+				    p->sema_sea->sea_mem.aperture);
+	if (addr == 0ULL) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	p->gpu_va_ro = addr;
+	p->mapped = true;
+
+	gpu_sema_dbg(pool_to_gk20a(p),
+		     "  %llu: GPU read-only  VA = 0x%llx",
+		     p->page_idx, p->gpu_va_ro);
+
+	/*
+	 * Now the RW mapping. This is a bit more complicated. We make a
+	 * nvgpu_mem describing a page of the bigger RO space and then map
+	 * that. Unlike above this does not need to be a fixed address.
+	 */
+	err = nvgpu_mem_create_from_mem(vm->mm->g,
+					&p->rw_mem, &p->sema_sea->sea_mem,
+					p->page_idx, 1UL);
+	if (err != 0) {
+		goto fail_unmap;
+	}
+
+	addr = nvgpu_gmmu_map(vm, &p->rw_mem, SZ_4K, 0,
+			      gk20a_mem_flag_none, 0,
+			      p->rw_mem.aperture);
+
+	if (addr == 0ULL) {
+		err = -ENOMEM;
+		goto fail_free_submem;
+	}
+
+	p->gpu_va = addr;
+
+	nvgpu_semaphore_sea_unlock(p->sema_sea);
+
+	gpu_sema_dbg(pool_to_gk20a(p),
+		     "  %llu: GPU read-write VA = 0x%llx",
+		     p->page_idx, p->gpu_va);
+	gpu_sema_dbg(pool_to_gk20a(p),
+		     "  %llu: CPU VA            = 0x%p",
+		     p->page_idx, p->rw_mem.cpu_va);
+
+	return 0;
+
+fail_free_submem:
+	nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem);
+fail_unmap:
+	nvgpu_gmmu_unmap(vm, &p->sema_sea->sea_mem, p->gpu_va_ro);
+	gpu_sema_dbg(pool_to_gk20a(p),
+		     "  %llu: Failed to map semaphore pool!", p->page_idx);
+fail_unlock:
+	nvgpu_semaphore_sea_unlock(p->sema_sea);
+	return err;
+}
+
+/*
+ * Unmap a semaphore_pool.
+ */
+void nvgpu_semaphore_pool_unmap(struct nvgpu_semaphore_pool *p,
+				struct vm_gk20a *vm)
+{
+	nvgpu_semaphore_sea_lock(p->sema_sea);
+
+	nvgpu_gmmu_unmap(vm, &p->sema_sea->sea_mem, p->gpu_va_ro);
+	nvgpu_gmmu_unmap(vm, &p->rw_mem, p->gpu_va);
+	nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem);
+
+	p->gpu_va = 0;
+	p->gpu_va_ro = 0;
+	p->mapped = false;
+
+	nvgpu_semaphore_sea_unlock(p->sema_sea);
+
+	gpu_sema_dbg(pool_to_gk20a(p),
+		     "Unmapped semaphore pool! (idx=%llu)", p->page_idx);
+}
+
+static struct nvgpu_semaphore_pool *
+nvgpu_semaphore_pool_from_ref(struct nvgpu_ref *ref)
+{
+	return (struct nvgpu_semaphore_pool *)
+		((uintptr_t)ref - offsetof(struct nvgpu_semaphore_pool, ref));
+}
+
+/*
+ * Completely free a semaphore_pool. You should make sure this pool is not
+ * mapped otherwise there's going to be a memory leak.
+ */
+static void nvgpu_semaphore_pool_free(struct nvgpu_ref *ref)
+{
+	struct nvgpu_semaphore_pool *p = nvgpu_semaphore_pool_from_ref(ref);
+	struct nvgpu_semaphore_sea *s = p->sema_sea;
+
+	/* Freeing a mapped pool is a bad idea. */
+	WARN_ON((p->mapped) ||
+		(p->gpu_va != 0ULL) ||
+		(p->gpu_va_ro != 0ULL));
+
+	nvgpu_semaphore_sea_lock(s);
+	nvgpu_list_del(&p->pool_list_entry);
+	nvgpu_clear_bit((u32)p->page_idx, s->pools_alloced);
+	s->page_count--;
+	nvgpu_semaphore_sea_unlock(s);
+
+	nvgpu_mutex_destroy(&p->pool_lock);
+
+	gpu_sema_dbg(pool_to_gk20a(p),
+		     "Freed semaphore pool! (idx=%llu)", p->page_idx);
+	nvgpu_kfree(p->sema_sea->gk20a, p);
+}
+
+void nvgpu_semaphore_pool_get(struct nvgpu_semaphore_pool *p)
+{
+	nvgpu_ref_get(&p->ref);
+}
+
+void nvgpu_semaphore_pool_put(struct nvgpu_semaphore_pool *p)
+{
+	nvgpu_ref_put(&p->ref, nvgpu_semaphore_pool_free);
+}
+
+/*
+ * Get the address for a semaphore_pool - if global is true then return the
+ * global RO address instead of the RW address owned by the semaphore's VM.
+ */
+u64 nvgpu_semaphore_pool_gpu_va(struct nvgpu_semaphore_pool *p, bool global)
+{
+	if (!global) {
+		return p->gpu_va;
+	}
+
+	return p->gpu_va_ro + (NVGPU_CPU_PAGE_SIZE * p->page_idx);
+}
+
+/*
+ * Return the index into the sea bitmap
+ */
+u64 nvgpu_semaphore_pool_get_page_idx(struct nvgpu_semaphore_pool *p)
+{
+	return p->page_idx;
+}
--- a/drivers/gpu/nvgpu/common/semaphore/semaphore_priv.h
+++ b/drivers/gpu/nvgpu/common/semaphore/semaphore_priv.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2014-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef NVGPU_SEMAPHORE_PRIV_H
+#define NVGPU_SEMAPHORE_PRIV_H
+
+#include <nvgpu/errno.h>
+#include <nvgpu/types.h>
+#include <nvgpu/list.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/kref.h>
+#include <nvgpu/nvgpu_mem.h>
+
+struct gk20a;
+
+/*
+ * The number of channels to get a sema from a VM's pool is determined by the
+ * pool size (one page) divided by this sema size.
+ */
+#define SEMAPHORE_SIZE			16U
+/*
+ * Max number of VMs that can be used is 512. This of course needs to be fixed
+ * to be dynamic but still fast.
+ */
+#define SEMAPHORE_POOL_COUNT		512U
+
+/*
+ * A sea of semaphores pools. Each pool is owned by a single VM. Since multiple
+ * channels can share a VM each channel gets it's own HW semaphore from the
+ * pool. Channels then allocate regular semaphores - basically just a value that
+ * signifies when a particular job is done.
+ */
+struct nvgpu_semaphore_sea {
+	struct nvgpu_list_node pool_list;	/* List of pools in this sea. */
+	struct gk20a *gk20a;
+
+	size_t size;			/* Number of pages available. */
+	u64 gpu_va;			/* GPU virtual address of sema sea. */
+	u64 map_size;			/* Size of the mapping. */
+
+	/*
+	 * TODO:
+	 * List of pages that we use to back the pools. The number of pages
+	 * should grow dynamically since allocating 512 pages for all VMs at
+	 * once would be a tremendous waste.
+	 */
+	int page_count;			/* Pages allocated to pools. */
+
+	/*
+	 * The read-only memory for the entire semaphore sea. Each semaphore
+	 * pool needs a sub-nvgpu_mem that will be mapped as RW in its address
+	 * space. This sea_mem cannot be freed until all semaphore_pools have
+	 * been freed.
+	 */
+	struct nvgpu_mem sea_mem;
+
+	/*
+	 * Can't use a regular allocator here since the full range of pools are
+	 * not always allocated. Instead just use a bitmap.
+	 */
+	DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT);
+
+	struct nvgpu_mutex sea_lock;		/* Lock alloc/free calls. */
+};
+
+/*
+ * A semaphore pool. Each address space will own exactly one of these.
+ */
+struct nvgpu_semaphore_pool {
+	struct nvgpu_list_node pool_list_entry;	/* Node for list of pools. */
+	u64 gpu_va;				/* GPU access to the pool. */
+	u64 gpu_va_ro;				/* GPU access to the pool. */
+	u64 page_idx;				/* Index into sea bitmap. */
+
+	DECLARE_BITMAP(semas_alloced, NVGPU_CPU_PAGE_SIZE / SEMAPHORE_SIZE);
+
+	struct nvgpu_semaphore_sea *sema_sea;	/* Sea that owns this pool. */
+
+	struct nvgpu_mutex pool_lock;
+
+	/*
+	 * This is the address spaces's personal RW table. Other channels will
+	 * ultimately map this page as RO. This is a sub-nvgpu_mem from the
+	 * sea's mem.
+	 */
+	struct nvgpu_mem rw_mem;
+
+	bool mapped;
+
+	/*
+	 * Sometimes a channel and its VM can be released before other channels
+	 * are done waiting on it. This ref count ensures that the pool doesn't
+	 * go away until all semaphores using this pool are cleaned up first.
+	 */
+	struct nvgpu_ref ref;
+};
+
+struct nvgpu_semaphore_loc {
+	struct nvgpu_semaphore_pool *pool; /* Pool that owns this sema. */
+	u32 offset;			   /* Byte offset into the pool. */
+};
+
+/*
+ * Underlying semaphore data structure. This semaphore can be shared amongst
+ * instances of nvgpu_semaphore via the location in its pool.
+ */
+struct nvgpu_hw_semaphore {
+	struct nvgpu_semaphore_loc location;
+	nvgpu_atomic_t next_value;	/* Next available value. */
+	u32 chid;			/* Owner, for debugging */
+};
+
+/*
+ * A semaphore which the rest of the driver actually uses. This consists of a
+ * reference to a real semaphore location and a value to wait for. This allows
+ * one physical semaphore to be shared among an essentially infinite number of
+ * submits.
+ */
+struct nvgpu_semaphore {
+	struct gk20a *g;
+	struct nvgpu_semaphore_loc location;
+
+	nvgpu_atomic_t value;
+	bool ready_to_wait;
+
+	struct nvgpu_ref ref;
+};
+
+
+static inline int semaphore_bitmap_alloc(unsigned long *bitmap,
+		unsigned long len)
+{
+	unsigned long idx = find_first_zero_bit(bitmap, len);
+
+	if (idx == len) {
+		return -ENOSPC;
+	}
+
+	nvgpu_set_bit((u32)idx, bitmap);
+
+	return (int)idx;
+}
+
+/*
+ * Check if "racer" is over "goal" with wraparound handling.
+ */
+static inline bool nvgpu_semaphore_value_released(u32 goal, u32 racer)
+{
+	/*
+	 * Handle wraparound with the same heuristic as the hardware does:
+	 * although the integer will eventually wrap around, consider a sema
+	 * released against a threshold if its value has passed that threshold
+	 * but has not wrapped over half of the u32 range over that threshold;
+	 * such wrapping is unlikely to happen during a sema lifetime.
+	 *
+	 * Values for [goal, goal + 0x7fffffff] are considered signaled; that's
+	 * precisely half of the 32-bit space. If racer == goal + 0x80000000,
+	 * then it needs 0x80000000 increments to wrap again and signal.
+	 *
+	 * Unsigned arithmetic is used because it's well-defined. This is
+	 * effectively the same as: signed_racer - signed_goal > 0.
+	 */
+
+	return racer - goal < 0x80000000U;
+}
+
+#endif /* NVGPU_SEMAPHORE_PRIV_H */
--- a/drivers/gpu/nvgpu/common/semaphore/semaphore_sea.c
+++ b/drivers/gpu/nvgpu/common/semaphore/semaphore_sea.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2014-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/log.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/semaphore.h>
+
+#include "semaphore_priv.h"
+
+void nvgpu_semaphore_sea_lock(struct nvgpu_semaphore_sea *s)
+{
+	gpu_sema_verbose_dbg(s->gk20a, "Acquiring sema lock...");
+	nvgpu_mutex_acquire(&s->sea_lock);
+	gpu_sema_verbose_dbg(s->gk20a, "Sema lock aquried!");
+}
+
+void nvgpu_semaphore_sea_unlock(struct nvgpu_semaphore_sea *s)
+{
+	nvgpu_mutex_release(&s->sea_lock);
+	gpu_sema_verbose_dbg(s->gk20a, "Released sema lock");
+}
+
+static int semaphore_sea_grow(struct nvgpu_semaphore_sea *sea)
+{
+	int ret = 0;
+	struct gk20a *g = sea->gk20a;
+	u32 i;
+
+	nvgpu_semaphore_sea_lock(sea);
+
+	ret = nvgpu_dma_alloc_sys(g,
+				  NVGPU_CPU_PAGE_SIZE * SEMAPHORE_POOL_COUNT,
+				  &sea->sea_mem);
+	if (ret != 0) {
+		goto out;
+	}
+
+	sea->size = SEMAPHORE_POOL_COUNT;
+	sea->map_size = SEMAPHORE_POOL_COUNT * NVGPU_CPU_PAGE_SIZE;
+
+	/*
+	 * Start the semaphores at values that will soon overflow the 32-bit
+	 * integer range. This way any buggy comparisons would start to fail
+	 * sooner rather than later.
+	 */
+	for (i = 0U; i < NVGPU_CPU_PAGE_SIZE * SEMAPHORE_POOL_COUNT; i += 4U) {
+		nvgpu_mem_wr(g, &sea->sea_mem, i, 0xfffffff0U);
+	}
+
+out:
+	nvgpu_semaphore_sea_unlock(sea);
+	return ret;
+}
+
+
+/*
+ * Return the sema_sea pointer.
+ */
+struct nvgpu_semaphore_sea *nvgpu_semaphore_get_sea(struct gk20a *g)
+{
+	return g->sema_sea;
+}
+
+void nvgpu_semaphore_sea_allocate_gpu_va(struct nvgpu_semaphore_sea *s,
+	struct nvgpu_allocator *a, u64 base, u64 len, u32 page_size)
+{
+	s->gpu_va = nvgpu_alloc_fixed(a, base, len, page_size);
+}
+
+u64 nvgpu_semaphore_sea_get_gpu_va(struct nvgpu_semaphore_sea *s)
+{
+	return s->gpu_va;
+}
+
+/*
+ * Create the semaphore sea. Only create it once - subsequent calls to this will
+ * return the originally created sea pointer.
+ */
+struct nvgpu_semaphore_sea *nvgpu_semaphore_sea_create(struct gk20a *g)
+{
+	if (g->sema_sea != NULL) {
+		return g->sema_sea;
+	}
+
+	g->sema_sea = nvgpu_kzalloc(g, sizeof(*g->sema_sea));
+	if (g->sema_sea == NULL) {
+		return NULL;
+	}
+
+	g->sema_sea->size = 0;
+	g->sema_sea->page_count = 0;
+	g->sema_sea->gk20a = g;
+	nvgpu_init_list_node(&g->sema_sea->pool_list);
+	nvgpu_mutex_init(&g->sema_sea->sea_lock);
+
+	if (semaphore_sea_grow(g->sema_sea) != 0) {
+		goto cleanup;
+	}
+
+	gpu_sema_dbg(g, "Created semaphore sea!");
+	return g->sema_sea;
+
+cleanup:
+	nvgpu_mutex_destroy(&g->sema_sea->sea_lock);
+	nvgpu_kfree(g, g->sema_sea);
+	g->sema_sea = NULL;
+	gpu_sema_dbg(g, "Failed to creat semaphore sea!");
+	return NULL;
+}
+
+void nvgpu_semaphore_sea_destroy(struct gk20a *g)
+{
+	if (g->sema_sea == NULL) {
+		return;
+	}
+
+	nvgpu_dma_free(g, &g->sema_sea->sea_mem);
+	nvgpu_mutex_destroy(&g->sema_sea->sea_lock);
+	nvgpu_kfree(g, g->sema_sea);
+	g->sema_sea = NULL;
+}