gpu: nvgpu: create new gr/fecs_trace unit

Create new gr/fecs_trace unit with common/gr/fecs_trace/fecs_trace.c as common source file and include/nvgpu/gr/fecs_trace.h as common header file This unit will be common between Linux and QNX Corresponding HAL files will be added under common/gr/fecs_trace/ as more functionality is moved to new unit For now move struct gk20a_fecs_trace_hash_ent to new unit and rename it as struct nvgpu_fecs_trace_context_entry Add vmid field to this struct since it is required for QNX Remove use of hashtables and simply use linked list to simplify the code. FECS tracing is not a performance sensitive use case so perf hit could be ignored Rename hash_lock mutex to list_lock struct gk20a_fecs_trace and mutex list_lock are still declared in gk20a/fecs_trace_gk20a.c, hence they cannot be used in new unit yet Rename and update all gk20a_fecs_trace_hash_*() APIs to appropriate nvgpu_gr_fecs_trace_*() APIs Remove gk20a_fecs_trace_hash_dump() since it is not being used Jira NVGPU-1880 Change-Id: I89c2715baa770dbbd864ea70ab43d83d98ba693c Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2022903 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 02:22:34 +03:00 · 2019-02-19 17:47:41 +05:30
parent decbbf3504
commit 556041f425
6 changed files with 197 additions and 127 deletions
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -90,6 +90,7 @@ nvgpu-y += common/bus/bus_gk20a.o \
 	common/gr/subctx.o \
 	common/gr/config/gr_config.o \
 	common/gr/config/gr_config_gm20b.o \
+	common/gr/fecs_trace/fecs_trace.o \
 	common/netlist/netlist.o \
 	common/netlist/netlist_sim.o \
 	common/netlist/netlist_gm20b.o \
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -135,6 +135,7 @@ srcs += common/sim.c \
 	common/gr/ctx.c \
 	common/gr/config/gr_config.c \
 	common/gr/config/gr_config_gm20b.c \
+	common/gr/fecs_trace/fecs_trace.c \
 	common/netlist/netlist.c \
 	common/netlist/netlist_sim.c \
 	common/netlist/netlist_gm20b.c \
--- a/drivers/gpu/nvgpu/common/gr/fecs_trace/fecs_trace.c
+++ b/drivers/gpu/nvgpu/common/gr/fecs_trace/fecs_trace.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/list.h>
+#include <nvgpu/log.h>
+#include <nvgpu/gr/fecs_trace.h>
+
+int nvgpu_gr_fecs_trace_add_context(struct gk20a *g, u32 context_ptr,
+	pid_t pid, u32 vmid, struct nvgpu_list_node *list)
+{
+	struct nvgpu_fecs_trace_context_entry *entry;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
+		"adding hash entry context_ptr=%x -> pid=%d, vmid=%d",
+		context_ptr, pid, vmid);
+
+	entry = nvgpu_kzalloc(g, sizeof(*entry));
+	if (unlikely(!entry)) {
+		nvgpu_err(g,
+			"can't alloc new entry for context_ptr=%x pid=%d vmid=%d",
+			context_ptr, pid, vmid);
+		return -ENOMEM;
+	}
+
+	nvgpu_init_list_node(&entry->entry);
+	entry->context_ptr = context_ptr;
+	entry->pid = pid;
+	entry->vmid = vmid;
+
+	nvgpu_list_add_tail(&entry->entry, list);
+
+	return 0;
+}
+
+void nvgpu_gr_fecs_trace_remove_context(struct gk20a *g, u32 context_ptr,
+	struct nvgpu_list_node *list)
+{
+	struct nvgpu_fecs_trace_context_entry *entry, *tmp;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
+		"freeing entry context_ptr=%x", context_ptr);
+
+	nvgpu_list_for_each_entry_safe(entry, tmp, list,
+			nvgpu_fecs_trace_context_entry,	entry) {
+		if (entry->context_ptr == context_ptr) {
+			nvgpu_list_del(&entry->entry);
+			nvgpu_log(g, gpu_dbg_ctxsw,
+				"freed entry=%p context_ptr=%x", entry,
+				entry->context_ptr);
+			nvgpu_kfree(g, entry);
+			break;
+		}
+	}
+}
+
+void nvgpu_gr_fecs_trace_remove_contexts(struct gk20a *g,
+	struct nvgpu_list_node *list)
+{
+	struct nvgpu_fecs_trace_context_entry *entry, *tmp;
+
+	nvgpu_list_for_each_entry_safe(entry, tmp, list,
+			nvgpu_fecs_trace_context_entry,	entry) {
+		nvgpu_list_del(&entry->entry);
+		nvgpu_kfree(g, entry);
+	}
+}
+
+void nvgpu_gr_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr,
+	struct nvgpu_list_node *list, pid_t *pid, u32 *vmid)
+{
+	struct nvgpu_fecs_trace_context_entry *entry;
+
+	nvgpu_list_for_each_entry(entry, list, nvgpu_fecs_trace_context_entry,
+			entry) {
+		if (entry->context_ptr == context_ptr) {
+			nvgpu_log(g, gpu_dbg_ctxsw,
+				"found context_ptr=%x -> pid=%d, vmid=%d",
+				entry->context_ptr, entry->pid, entry->vmid);
+			*pid = entry->pid;
+			*vmid = entry->vmid;
+			return;
+		}
+	}
+
+	*pid = 0;
+	*vmid = 0xffffffffU;
+}
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -24,7 +24,6 @@
 #include <nvgpu/dma.h>
 #include <nvgpu/enabled.h>
 #include <nvgpu/bug.h>
-#include <nvgpu/hashtable.h>
 #include <nvgpu/circ_buf.h>
 #include <nvgpu/thread.h>
 #include <nvgpu/barrier.h>
@@ -39,6 +38,7 @@
 #include <nvgpu/gr/global_ctx.h>
 #include <nvgpu/gr/subctx.h>
 #include <nvgpu/gr/ctx.h>
+#include <nvgpu/gr/fecs_trace.h>

 #include "fecs_trace_gk20a.h"
 #include "gr_gk20a.h"
@@ -48,16 +48,9 @@

 #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>

-struct gk20a_fecs_trace_hash_ent {
-	u32 context_ptr;
-	pid_t pid;
-	struct hlist_node node;
-};
-
 struct gk20a_fecs_trace {
-
-	DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
-	struct nvgpu_mutex hash_lock;
+	struct nvgpu_list_node context_list;
+	struct nvgpu_mutex list_lock;
 	struct nvgpu_mutex poll_lock;
 	struct nvgpu_thread poll_task;
 	bool init;
@@ -119,111 +112,6 @@ static int gk20a_fecs_trace_set_read_index(struct gk20a *g, int index)
 			(gk20a_writel(g, gr_fecs_mailbox1_r(), index), 0));
 }

-void gk20a_fecs_trace_hash_dump(struct gk20a *g)
-{
-	u32 bkt;
-	struct gk20a_fecs_trace_hash_ent *ent;
-	struct gk20a_fecs_trace *trace = g->fecs_trace;
-
-	nvgpu_log(g, gpu_dbg_ctxsw, "dumping hash table");
-
-	nvgpu_mutex_acquire(&trace->hash_lock);
-	hash_for_each(trace->pid_hash_table, bkt, ent, node)
-	{
-		nvgpu_log(g, gpu_dbg_ctxsw, " ent=%p bkt=%x context_ptr=%x pid=%d",
-			ent, bkt, ent->context_ptr, ent->pid);
-
-	}
-	nvgpu_mutex_release(&trace->hash_lock);
-}
-
-static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid)
-{
-	struct gk20a_fecs_trace_hash_ent *he;
-	struct gk20a_fecs_trace *trace = g->fecs_trace;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
-		"adding hash entry context_ptr=%x -> pid=%d", context_ptr, pid);
-
-	he = nvgpu_kzalloc(g, sizeof(*he));
-	if (unlikely(!he)) {
-		nvgpu_warn(g,
-			"can't alloc new hash entry for context_ptr=%x pid=%d",
-			context_ptr, pid);
-		return -ENOMEM;
-	}
-
-	he->context_ptr = context_ptr;
-	he->pid = pid;
-	nvgpu_mutex_acquire(&trace->hash_lock);
-	hash_add(trace->pid_hash_table, &he->node, context_ptr);
-	nvgpu_mutex_release(&trace->hash_lock);
-	return 0;
-}
-
-static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr)
-{
-	struct hlist_node *tmp;
-	struct gk20a_fecs_trace_hash_ent *ent;
-	struct gk20a_fecs_trace *trace = g->fecs_trace;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
-		"freeing hash entry context_ptr=%x", context_ptr);
-
-	nvgpu_mutex_acquire(&trace->hash_lock);
-	hash_for_each_possible_safe(trace->pid_hash_table, ent, tmp, node,
-		context_ptr) {
-		if (ent->context_ptr == context_ptr) {
-			hash_del(&ent->node);
-			nvgpu_log(g, gpu_dbg_ctxsw,
-				"freed hash entry=%p context_ptr=%x", ent,
-				ent->context_ptr);
-			nvgpu_kfree(g, ent);
-			break;
-		}
-	}
-	nvgpu_mutex_release(&trace->hash_lock);
-}
-
-static void gk20a_fecs_trace_free_hash_table(struct gk20a *g)
-{
-	u32 bkt;
-	struct hlist_node *tmp;
-	struct gk20a_fecs_trace_hash_ent *ent;
-	struct gk20a_fecs_trace *trace = g->fecs_trace;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, "trace=%p", trace);
-
-	nvgpu_mutex_acquire(&trace->hash_lock);
-	hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) {
-		hash_del(&ent->node);
-		nvgpu_kfree(g, ent);
-	}
-	nvgpu_mutex_release(&trace->hash_lock);
-
-}
-
-static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr)
-{
-	struct gk20a_fecs_trace_hash_ent *ent;
-	struct gk20a_fecs_trace *trace = g->fecs_trace;
-	pid_t pid = 0;
-
-	nvgpu_mutex_acquire(&trace->hash_lock);
-	hash_for_each_possible(trace->pid_hash_table, ent, node, context_ptr) {
-		if (ent->context_ptr == context_ptr) {
-			nvgpu_log(g, gpu_dbg_ctxsw,
-				"found context_ptr=%x -> pid=%d",
-				ent->context_ptr, ent->pid);
-			pid = ent->pid;
-			break;
-		}
-	}
-	nvgpu_mutex_release(&trace->hash_lock);
-
-	return pid;
-}
-
 /*
 * Converts HW entry format to userspace-facing format and pushes it to the
 * queue.
@@ -235,6 +123,7 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
 	struct gk20a_fecs_trace *trace = g->fecs_trace;
 	pid_t cur_pid;
 	pid_t new_pid;
+	u32 cur_vmid, new_vmid;
 	int count = 0;

 	/* for now, only one VM */
@@ -262,8 +151,12 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
 	 */
 	r->magic_hi = 0;

-	cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr);
-	new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr);
+	nvgpu_mutex_acquire(&trace->list_lock);
+	nvgpu_gr_fecs_trace_find_pid(g, r->context_ptr, &trace->context_list,
+		&cur_pid, &cur_vmid);
+	nvgpu_gr_fecs_trace_find_pid(g, r->new_context_ptr, &trace->context_list,
+		&new_pid, &new_vmid);
+	nvgpu_mutex_release(&trace->list_lock);

 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
 		"context_ptr=%x (pid=%d) new_context_ptr=%x (pid=%d)",
@@ -418,12 +311,12 @@ int gk20a_fecs_trace_init(struct gk20a *g)
 	err = nvgpu_mutex_init(&trace->poll_lock);
 	if (err != 0)
 		goto clean;
-	err = nvgpu_mutex_init(&trace->hash_lock);
+	err = nvgpu_mutex_init(&trace->list_lock);
 	if (err != 0)
 		goto clean_poll_lock;

 	BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
-	hash_init(trace->pid_hash_table);
+	nvgpu_init_list_node(&trace->context_list);

 	nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);

@@ -453,6 +346,7 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
 	u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
 	u32 aperture_mask;
 	struct tsg_gk20a *tsg;
+	int ret;

 	tsg = tsg_gk20a_from_ch(ch);
 	if (tsg == NULL) {
@@ -504,16 +398,20 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
 	/* pid (process identifier) in user space, corresponds to tgid (thread
 	 * group id) in kernel space.
 	 */
-	gk20a_fecs_trace_hash_add(g, context_ptr, tsg->tgid);
+	nvgpu_mutex_acquire(&trace->list_lock);
+	ret = nvgpu_gr_fecs_trace_add_context(g, context_ptr, tsg->tgid, 0,
+		&trace->context_list);
+	nvgpu_mutex_release(&trace->list_lock);

-	return 0;
+	return ret;
 }

 int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch)
 {
 	u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
+	struct gk20a_fecs_trace *trace = g->fecs_trace;

-	if (g->fecs_trace) {
+	if (trace) {
 		nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
 			"ch=%p context_ptr=%x", ch, context_ptr);

@@ -522,7 +420,11 @@ int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch)
 				g->ops.fecs_trace.flush(g);
 			gk20a_fecs_trace_poll(g);
 		}
-		gk20a_fecs_trace_hash_del(g, context_ptr);
+
+		nvgpu_mutex_acquire(&trace->list_lock);
+		nvgpu_gr_fecs_trace_remove_context(g, context_ptr,
+			&trace->context_list);
+		nvgpu_mutex_release(&trace->list_lock);
 	}
 	return 0;
 }
@@ -546,9 +448,12 @@ int gk20a_fecs_trace_deinit(struct gk20a *g)
 		return 0;

 	nvgpu_thread_stop(&trace->poll_task);
-	gk20a_fecs_trace_free_hash_table(g);

-	nvgpu_mutex_destroy(&g->fecs_trace->hash_lock);
+	nvgpu_mutex_acquire(&trace->list_lock);
+	nvgpu_gr_fecs_trace_remove_contexts(g, &trace->context_list);
+	nvgpu_mutex_release(&trace->list_lock);
+
+	nvgpu_mutex_destroy(&g->fecs_trace->list_lock);
 	nvgpu_mutex_destroy(&g->fecs_trace->poll_lock);

 	nvgpu_kfree(g, g->fecs_trace);
--- a/drivers/gpu/nvgpu/include/nvgpu/fecs_trace.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/fecs_trace.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2018-2019, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -30,7 +30,6 @@ struct gk20a;
 * increasing this constant should help (it drives Linux' internal buffer size).
 */
 #define GK20A_FECS_TRACE_NUM_RECORDS		(1 << 10)
-#define GK20A_FECS_TRACE_HASH_BITS		8 /* 2^8 */
 #define GK20A_FECS_TRACE_FRAME_PERIOD_US	(1000000ULL/60ULL)
 #define GK20A_FECS_TRACE_PTIMER_SHIFT		5

--- a/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_GR_FECS_TRACE_H
+#define NVGPU_GR_FECS_TRACE_H
+
+#include <nvgpu/types.h>
+#include <nvgpu/list.h>
+
+struct gk20a;
+
+struct nvgpu_fecs_trace_context_entry {
+	u32 context_ptr;
+
+	pid_t pid;
+	u32 vmid;
+
+	struct nvgpu_list_node entry;
+};
+
+static inline struct nvgpu_fecs_trace_context_entry *
+nvgpu_fecs_trace_context_entry_from_entry(struct nvgpu_list_node *node)
+{
+	return (struct nvgpu_fecs_trace_context_entry *)
+		((uintptr_t)node -
+		offsetof(struct nvgpu_fecs_trace_context_entry, entry));
+};
+
+int nvgpu_gr_fecs_trace_add_context(struct gk20a *g, u32 context_ptr,
+	pid_t pid, u32 vmid, struct nvgpu_list_node *list);
+void nvgpu_gr_fecs_trace_remove_context(struct gk20a *g, u32 context_ptr,
+	struct nvgpu_list_node *list);
+void nvgpu_gr_fecs_trace_remove_contexts(struct gk20a *g,
+	struct nvgpu_list_node *list);
+void nvgpu_gr_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr,
+	struct nvgpu_list_node *list, pid_t *pid, u32 *vmid);
+
+#endif /* NVGPU_GR_FECS_TRACE_H */