gpu: nvgpu: create a wrapper over sync_fences

This patch constructs an abstraction to hide the sync_fence functionality from the common code. struct nvgpu_os_fence acts as an abstraction for struct sync_fence. struct nvgpu_os_fence consists of an ops structure named nvgpu_os_fence_ops which contains an API to do pushbuffer programming to generate wait commands for the fence. The current implementation of nvgpu only allows for wait method on a sync_fence which was generated using a similar backend(i.e. either Nvhost Syncpoints or Semaphores). In this patch, a generic API is introduced which will decide the type of the underlying implementation of the struct nvgpu_os_fence at runtime and run the corresponding wait implementation on it. This patch changes the channel_sync_gk20a's semaphore specific implementation to use the abstract API. A subsequent patch will make the changes for the nvhost_syncpoint based implementations as well. JIRA NVGPU-66 Change-Id: If6675bfde5885c3d15d2ca380bb6c7c0e240e734 Signed-off-by: Debarshi Dutta <ddutta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1667218 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-23 09:57:08 +03:00 · 2018-04-18 11:03:02 +05:30
parent 90b2f780d4
commit 4dfd6e43cf
7 changed files with 379 additions and 78 deletions
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -160,7 +160,9 @@ nvgpu-$(CONFIG_TEGRA_GK20A) += \
 	common/linux/platform_gp10b_tegra.o \
 	common/linux/platform_gv11b_tegra.o

-nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o
+nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o \
+		common/linux/os_fence_android.o \
+		common/linux/os_fence_android_sema.o

 nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o \
 	common/linux/pci_usermode.o \
--- a/drivers/gpu/nvgpu/common/linux/os_fence_android.c
+++ b/drivers/gpu/nvgpu/common/linux/os_fence_android.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <nvgpu/types.h>
+#include <nvgpu/os_fence.h>
+#include <nvgpu/linux/os_fence_android.h>
+
+#include "gk20a/gk20a.h"
+
+#include "../drivers/staging/android/sync.h"
+
+inline struct sync_fence *nvgpu_get_sync_fence(struct nvgpu_os_fence *s)
+{
+	struct sync_fence *fence = (struct sync_fence *)s->priv;
+	return fence;
+}
+
+static void nvgpu_os_fence_clear(struct nvgpu_os_fence *fence_out)
+{
+	fence_out->priv = NULL;
+	fence_out->g = NULL;
+	fence_out->ops = NULL;
+}
+
+void nvgpu_os_fence_init(struct nvgpu_os_fence *fence_out,
+	struct gk20a *g, const struct nvgpu_os_fence_ops *fops,
+	struct sync_fence *fence)
+{
+	fence_out->g = g;
+	fence_out->ops = fops;
+	fence_out->priv = (void *)fence;
+}
+
+void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s)
+{
+	struct sync_fence *fence = nvgpu_get_sync_fence(s);
+
+	sync_fence_put(fence);
+
+	nvgpu_os_fence_clear(s);
+}
+
+int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out,
+	struct channel_gk20a *c, int fd)
+{
+	int err;
+
+	err = nvgpu_os_fence_sema_fdget(fence_out, c, fd);
+
+	/* TO-DO
+	 * check if fence is empty and if CONFIG_TEGRA_GK20A_NVHOST
+	 * is enabled, try to get a sync_fence using
+	 * corresponding nvhost method.
+	 */
+	if (err)
+		nvgpu_err(c->g, "error obtaining fence from fd %d", fd);
+
+	return err;
+}
--- a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
+++ b/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <nvgpu/errno.h>
+
+#include <nvgpu/types.h>
+#include <nvgpu/os_fence.h>
+#include <nvgpu/linux/os_fence_android.h>
+#include <nvgpu/semaphore.h>
+
+#include "gk20a/sync_gk20a.h"
+#include "gk20a/channel_sync_gk20a.h"
+#include "gk20a/mm_gk20a.h"
+
+#include "../drivers/staging/android/sync.h"
+
+int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s,
+	struct priv_cmd_entry *wait_cmd,
+	struct channel_gk20a *c,
+	int max_wait_cmds)
+{
+	int err;
+	const int wait_cmd_size = 8;
+	int num_wait_cmds;
+	int i;
+	struct nvgpu_semaphore *sema;
+	struct sync_fence *sync_fence = nvgpu_get_sync_fence(s);
+
+	num_wait_cmds = sync_fence->num_fences;
+	if (num_wait_cmds == 0)
+		return 0;
+
+	if (max_wait_cmds && num_wait_cmds > max_wait_cmds)
+		return -EINVAL;
+
+	err = gk20a_channel_alloc_priv_cmdbuf(c,
+		wait_cmd_size * num_wait_cmds,
+		wait_cmd);
+	if (err) {
+		nvgpu_err(c->g, "not enough priv cmd buffer space");
+		return err;
+	}
+
+	for (i = 0; i < num_wait_cmds; i++) {
+		struct fence *f = sync_fence->cbs[i].sync_pt;
+		struct sync_pt *pt = sync_pt_from_fence(f);
+
+		sema = gk20a_sync_pt_sema(pt);
+		gk20a_channel_gen_sema_wait_cmd(c, sema, wait_cmd,
+			wait_cmd_size, i);
+	}
+
+	return 0;
+}
+
+static const struct nvgpu_os_fence_ops sema_ops = {
+	.program_waits = nvgpu_os_fence_sema_wait_gen_cmd,
+	.drop_ref = nvgpu_os_fence_android_drop_ref,
+};
+
+int nvgpu_os_fence_sema_create(
+	struct nvgpu_os_fence *fence_out,
+	struct channel_gk20a *c,
+	struct nvgpu_semaphore *sema)
+{
+	struct sync_fence *fence;
+
+	fence = gk20a_sync_fence_create(c, sema, "f-gk20a-0x%04x",
+			nvgpu_semaphore_gpu_ro_va(sema));
+
+	if (!fence) {
+		nvgpu_err(c->g, "error constructing new fence: f-gk20a-0x%04x",
+			(u32)nvgpu_semaphore_gpu_ro_va(sema));
+
+		return -ENOMEM;
+	}
+
+	nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence);
+
+	return 0;
+}
+
+int nvgpu_os_fence_sema_fdget(struct nvgpu_os_fence *fence_out,
+	struct channel_gk20a *c, int fd)
+{
+	struct sync_fence *fence = gk20a_sync_fence_fdget(fd);
+
+	if (!fence)
+		return -EINVAL;
+
+	nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence);
+
+	return 0;
+}
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -29,6 +29,7 @@
 #include <nvgpu/bug.h>
 #include <nvgpu/list.h>
 #include <nvgpu/nvhost.h>
+#include <nvgpu/os_fence.h>

 #include "channel_sync_gk20a.h"
 #include "gk20a.h"
@@ -472,6 +473,23 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
 				     va, cmd->gva, cmd->mem->gpu_va, ob);
 }

+void gk20a_channel_gen_sema_wait_cmd(struct channel_gk20a *c,
+	struct nvgpu_semaphore *sema, struct priv_cmd_entry *wait_cmd,
+	u32 wait_cmd_size, int pos)
+{
+	if (!sema) {
+		/* expired */
+		nvgpu_memset(c->g, wait_cmd->mem,
+			(wait_cmd->off + pos * wait_cmd_size) * sizeof(u32),
+			0, wait_cmd_size * sizeof(u32));
+	} else {
+		WARN_ON(!sema->incremented);
+		add_sema_cmd(c->g, c, sema, wait_cmd,
+			pos * wait_cmd_size, true, false);
+		nvgpu_semaphore_put(sema);
+	}
+}
+
 static int gk20a_channel_semaphore_wait_syncpt(
 		struct gk20a_channel_sync *s, u32 id,
 		u32 thresh, struct priv_cmd_entry *entry)
@@ -483,64 +501,6 @@ static int gk20a_channel_semaphore_wait_syncpt(
 	return -ENODEV;
 }

-#ifdef CONFIG_SYNC
-static int semaphore_wait_fd_native(struct channel_gk20a *c, int fd,
-		struct priv_cmd_entry *wait_cmd, int max_wait_cmds)
-{
-	struct sync_fence *sync_fence;
-	int err;
-	const int wait_cmd_size = 8;
-	int num_wait_cmds;
-	int i;
-
-	sync_fence = gk20a_sync_fence_fdget(fd);
-	if (!sync_fence)
-		return -EINVAL;
-
-	num_wait_cmds = sync_fence->num_fences;
-	if (num_wait_cmds == 0) {
-		err = 0;
-		goto put_fence;
-	}
-
-	if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds) {
-		err = -EINVAL;
-		goto put_fence;
-	}
-
-	err = gk20a_channel_alloc_priv_cmdbuf(c,
-			wait_cmd_size * num_wait_cmds,
-			wait_cmd);
-	if (err) {
-		nvgpu_err(c->g, "not enough priv cmd buffer space");
-		goto put_fence;
-	}
-
-	for (i = 0; i < sync_fence->num_fences; i++) {
-		struct fence *f = sync_fence->cbs[i].sync_pt;
-		struct sync_pt *pt = sync_pt_from_fence(f);
-		struct nvgpu_semaphore *sema;
-
-		sema = gk20a_sync_pt_sema(pt);
-		if (!sema) {
-			/* expired */
-			nvgpu_memset(c->g, wait_cmd->mem,
-			(wait_cmd->off + i * wait_cmd_size) * sizeof(u32),
-				0, wait_cmd_size * sizeof(u32));
-		} else {
-			WARN_ON(!sema->incremented);
-			add_sema_cmd(c->g, c, sema, wait_cmd,
-					i * wait_cmd_size, true, false);
-			nvgpu_semaphore_put(sema);
-		}
-	}
-
-put_fence:
-	sync_fence_put(sync_fence);
-	return err;
-}
-#endif
-
 static int gk20a_channel_semaphore_wait_fd(
 		struct gk20a_channel_sync *s, int fd,
 		struct priv_cmd_entry *entry, int max_wait_cmds)
@@ -548,13 +508,20 @@ static int gk20a_channel_semaphore_wait_fd(
 	struct gk20a_channel_semaphore *sema =
 		container_of(s, struct gk20a_channel_semaphore, ops);
 	struct channel_gk20a *c = sema->c;
-#ifdef CONFIG_SYNC
-	return semaphore_wait_fd_native(c, fd, entry, max_wait_cmds);
-#else
-	nvgpu_err(c->g,
-		  "trying to use sync fds with CONFIG_SYNC disabled");
-	return -ENODEV;
-#endif
+
+	struct nvgpu_os_fence os_fence = {0};
+	int err;
+
+	err = nvgpu_os_fence_fdget(&os_fence, c, fd);
+	if (err)
+		return err;
+
+	err = os_fence.ops->program_waits(&os_fence,
+		entry, c, max_wait_cmds);
+
+	os_fence.ops->drop_ref(&os_fence);
+
+	return err;
 }

 static int __gk20a_channel_semaphore_incr(
@@ -570,6 +537,7 @@ static int __gk20a_channel_semaphore_incr(
 	struct nvgpu_semaphore *semaphore;
 	int err = 0;
 	struct sync_fence *sync_fence = NULL;
+	struct nvgpu_os_fence os_fence = {0};

 	semaphore = nvgpu_semaphore_alloc(c);
 	if (!semaphore) {
@@ -589,18 +557,15 @@ static int __gk20a_channel_semaphore_incr(
 	/* Release the completion semaphore. */
 	add_sema_cmd(c->g, c, semaphore, incr_cmd, 0, false, wfi_cmd);

-#ifdef CONFIG_SYNC
 	if (need_sync_fence) {
-		sync_fence = gk20a_sync_fence_create(c,
-			semaphore, "f-gk20a-0x%04x",
-			nvgpu_semaphore_gpu_ro_va(semaphore));
+		err = nvgpu_os_fence_sema_create(&os_fence, c,
+			semaphore);

-		if (!sync_fence) {
-			err = -ENOMEM;
+		if (err)
 			goto clean_up_sema;
-		}
+
+		sync_fence = (struct sync_fence *)os_fence.priv;
 	}
-#endif

 	err = gk20a_fence_from_semaphore(fence,
 		semaphore,
@@ -608,10 +573,8 @@ static int __gk20a_channel_semaphore_incr(
 		sync_fence);

 	if (err) {
-#ifdef CONFIG_SYNC
-		if (sync_fence)
-			sync_fence_put(sync_fence);
-#endif
+		if (nvgpu_os_fence_is_initialized(&os_fence))
+			os_fence.ops->drop_ref(&os_fence);
 		goto clean_up_sema;
 	}

--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -32,6 +32,7 @@ struct priv_cmd_entry;
 struct channel_gk20a;
 struct gk20a_fence;
 struct gk20a;
+struct nvgpu_semaphore;

 struct gk20a_channel_sync {
 	nvgpu_atomic_t refcount;
@@ -103,6 +104,10 @@ struct gk20a_channel_sync {
 	void (*destroy)(struct gk20a_channel_sync *s);
 };

+void gk20a_channel_gen_sema_wait_cmd(struct channel_gk20a *c,
+	struct nvgpu_semaphore *sema, struct priv_cmd_entry *wait_cmd,
+	u32 wait_cmd_size, int pos);
+
 void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync,
 	bool set_safe_state);
 struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c,
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/os_fence_android.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/os_fence_android.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVGPU_OS_FENCE_ANDROID_H__
+#define __NVGPU_OS_FENCE_ANDROID_H__
+
+struct gk20a;
+struct nvgpu_os_fence;
+struct sync_fence;
+struct channel_gk20a;
+
+struct sync_fence *nvgpu_get_sync_fence(struct nvgpu_os_fence *s);
+
+void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s);
+
+int nvgpu_os_fence_sema_fdget(struct nvgpu_os_fence *fence_out,
+	struct channel_gk20a *c, int fd);
+
+void nvgpu_os_fence_init(struct nvgpu_os_fence *fence_out,
+	struct gk20a *g, const struct nvgpu_os_fence_ops *fops,
+	struct sync_fence *fence);
+
+#endif
--- a/drivers/gpu/nvgpu/include/nvgpu/os_fence.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/os_fence.h
@@ -0,0 +1,111 @@
+/*
+ * nvgpu os fence
+ *
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVGPU_OS_FENCE__
+#define __NVGPU_OS_FENCE__
+
+struct nvgpu_semaphore;
+struct channel_gk20a;
+struct priv_cmd_entry;
+
+/*
+ * struct nvgpu_os_fence adds an abstraction to the earlier Android Sync
+ * Framework, specifically the sync-fence mechanism and the newer DMA sync
+ * APIs from linux-4.9. This abstraction provides the high-level definition
+ * as well as APIs that can be used by other OSes in future to have their own
+ * alternatives for the sync-framework.
+ */
+struct nvgpu_os_fence;
+
+/*
+ * struct nvgpu_os_fence depends on the following ops structure
+ */
+struct nvgpu_os_fence_ops {
+	/*
+	 * This API is used to iterate through multiple fence points within the
+	 * fence and program the pushbuffer method for wait command.
+	 */
+	int (*program_waits)(struct nvgpu_os_fence *s,
+		struct priv_cmd_entry *wait_cmd,
+		struct channel_gk20a *c,
+		int max_wait_cmds);
+
+	/*
+	 * This should be the last operation on the OS fence. The
+	 * OS fence acts as a place-holder for the underlying fence
+	 * implementation e.g. sync_fences. For each construct/fdget call
+	 * there needs to be a drop_ref call. This reduces a reference count
+	 * for the underlying sync_fence.
+	 */
+	void (*drop_ref)(struct nvgpu_os_fence *s);
+};
+
+/*
+ * The priv structure here is used to contain the struct sync_fence
+ * for LINUX_VERSION <= 4.9 and dma_fence for LINUX_VERSION > 4.9
+ */
+struct nvgpu_os_fence {
+	void *priv;
+	struct gk20a *g;
+	const struct nvgpu_os_fence_ops *ops;
+};
+
+/*
+ * This API is used to validate the nvgpu_os_fence
+ */
+static inline int nvgpu_os_fence_is_initialized(struct nvgpu_os_fence *fence)
+{
+	return (fence->ops != NULL);
+}
+
+#ifdef CONFIG_SYNC
+
+int nvgpu_os_fence_sema_create(
+	struct nvgpu_os_fence *fence_out,
+	struct channel_gk20a *c,
+	struct nvgpu_semaphore *sema);
+
+int nvgpu_os_fence_fdget(
+	struct nvgpu_os_fence *fence_out,
+	struct channel_gk20a *c, int fd);
+
+#else
+
+static inline int nvgpu_os_fence_sema_create(
+	struct nvgpu_os_fence *fence_out,
+	struct channel_gk20a *c,
+	struct nvgpu_semaphore *sema)
+{
+	return -ENOSYS;
+}
+static inline int nvgpu_os_fence_fdget(
+	struct nvgpu_os_fence *fence_out,
+	struct channel_gk20a *c, int fd)
+{
+	return -ENOSYS;
+}
+
+#endif /* CONFIG_SYNC */
+
+#endif /* __NVGPU_OS_FENCE__ */