gpu: nvgpu: create new gr/fecs_trace unit

Create new gr/fecs_trace unit with common/gr/fecs_trace/fecs_trace.c as common source file and include/nvgpu/gr/fecs_trace.h as common header file This unit will be common between Linux and QNX Corresponding HAL files will be added under common/gr/fecs_trace/ as more functionality is moved to new unit For now move struct gk20a_fecs_trace_hash_ent to new unit and rename it as struct nvgpu_fecs_trace_context_entry Add vmid field to this struct since it is required for QNX Remove use of hashtables and simply use linked list to simplify the code. FECS tracing is not a performance sensitive use case so perf hit could be ignored Rename hash_lock mutex to list_lock struct gk20a_fecs_trace and mutex list_lock are still declared in gk20a/fecs_trace_gk20a.c, hence they cannot be used in new unit yet Rename and update all gk20a_fecs_trace_hash_*() APIs to appropriate nvgpu_gr_fecs_trace_*() APIs Remove gk20a_fecs_trace_hash_dump() since it is not being used Jira NVGPU-1880 Change-Id: I89c2715baa770dbbd864ea70ab43d83d98ba693c Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2022903 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-25 02:52:51 +03:00 · 2019-02-19 17:47:41 +05:30
parent decbbf3504
commit 556041f425
6 changed files with 197 additions and 127 deletions
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -24,7 +24,6 @@
 #include <nvgpu/dma.h>
 #include <nvgpu/enabled.h>
 #include <nvgpu/bug.h>
-#include <nvgpu/hashtable.h>
 #include <nvgpu/circ_buf.h>
 #include <nvgpu/thread.h>
 #include <nvgpu/barrier.h>
@@ -39,6 +38,7 @@
 #include <nvgpu/gr/global_ctx.h>
 #include <nvgpu/gr/subctx.h>
 #include <nvgpu/gr/ctx.h>
+#include <nvgpu/gr/fecs_trace.h>

 #include "fecs_trace_gk20a.h"
 #include "gr_gk20a.h"
@@ -48,16 +48,9 @@

 #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>

-struct gk20a_fecs_trace_hash_ent {
-	u32 context_ptr;
-	pid_t pid;
-	struct hlist_node node;
-};
-
 struct gk20a_fecs_trace {
-
-	DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
-	struct nvgpu_mutex hash_lock;
+	struct nvgpu_list_node context_list;
+	struct nvgpu_mutex list_lock;
 	struct nvgpu_mutex poll_lock;
 	struct nvgpu_thread poll_task;
 	bool init;
@@ -119,111 +112,6 @@ static int gk20a_fecs_trace_set_read_index(struct gk20a *g, int index)
 			(gk20a_writel(g, gr_fecs_mailbox1_r(), index), 0));
 }

-void gk20a_fecs_trace_hash_dump(struct gk20a *g)
-{
-	u32 bkt;
-	struct gk20a_fecs_trace_hash_ent *ent;
-	struct gk20a_fecs_trace *trace = g->fecs_trace;
-
-	nvgpu_log(g, gpu_dbg_ctxsw, "dumping hash table");
-
-	nvgpu_mutex_acquire(&trace->hash_lock);
-	hash_for_each(trace->pid_hash_table, bkt, ent, node)
-	{
-		nvgpu_log(g, gpu_dbg_ctxsw, " ent=%p bkt=%x context_ptr=%x pid=%d",
-			ent, bkt, ent->context_ptr, ent->pid);
-
-	}
-	nvgpu_mutex_release(&trace->hash_lock);
-}
-
-static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid)
-{
-	struct gk20a_fecs_trace_hash_ent *he;
-	struct gk20a_fecs_trace *trace = g->fecs_trace;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
-		"adding hash entry context_ptr=%x -> pid=%d", context_ptr, pid);
-
-	he = nvgpu_kzalloc(g, sizeof(*he));
-	if (unlikely(!he)) {
-		nvgpu_warn(g,
-			"can't alloc new hash entry for context_ptr=%x pid=%d",
-			context_ptr, pid);
-		return -ENOMEM;
-	}
-
-	he->context_ptr = context_ptr;
-	he->pid = pid;
-	nvgpu_mutex_acquire(&trace->hash_lock);
-	hash_add(trace->pid_hash_table, &he->node, context_ptr);
-	nvgpu_mutex_release(&trace->hash_lock);
-	return 0;
-}
-
-static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr)
-{
-	struct hlist_node *tmp;
-	struct gk20a_fecs_trace_hash_ent *ent;
-	struct gk20a_fecs_trace *trace = g->fecs_trace;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
-		"freeing hash entry context_ptr=%x", context_ptr);
-
-	nvgpu_mutex_acquire(&trace->hash_lock);
-	hash_for_each_possible_safe(trace->pid_hash_table, ent, tmp, node,
-		context_ptr) {
-		if (ent->context_ptr == context_ptr) {
-			hash_del(&ent->node);
-			nvgpu_log(g, gpu_dbg_ctxsw,
-				"freed hash entry=%p context_ptr=%x", ent,
-				ent->context_ptr);
-			nvgpu_kfree(g, ent);
-			break;
-		}
-	}
-	nvgpu_mutex_release(&trace->hash_lock);
-}
-
-static void gk20a_fecs_trace_free_hash_table(struct gk20a *g)
-{
-	u32 bkt;
-	struct hlist_node *tmp;
-	struct gk20a_fecs_trace_hash_ent *ent;
-	struct gk20a_fecs_trace *trace = g->fecs_trace;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, "trace=%p", trace);
-
-	nvgpu_mutex_acquire(&trace->hash_lock);
-	hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) {
-		hash_del(&ent->node);
-		nvgpu_kfree(g, ent);
-	}
-	nvgpu_mutex_release(&trace->hash_lock);
-
-}
-
-static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr)
-{
-	struct gk20a_fecs_trace_hash_ent *ent;
-	struct gk20a_fecs_trace *trace = g->fecs_trace;
-	pid_t pid = 0;
-
-	nvgpu_mutex_acquire(&trace->hash_lock);
-	hash_for_each_possible(trace->pid_hash_table, ent, node, context_ptr) {
-		if (ent->context_ptr == context_ptr) {
-			nvgpu_log(g, gpu_dbg_ctxsw,
-				"found context_ptr=%x -> pid=%d",
-				ent->context_ptr, ent->pid);
-			pid = ent->pid;
-			break;
-		}
-	}
-	nvgpu_mutex_release(&trace->hash_lock);
-
-	return pid;
-}
-
 /*
 * Converts HW entry format to userspace-facing format and pushes it to the
 * queue.
@@ -235,6 +123,7 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
 	struct gk20a_fecs_trace *trace = g->fecs_trace;
 	pid_t cur_pid;
 	pid_t new_pid;
+	u32 cur_vmid, new_vmid;
 	int count = 0;

 	/* for now, only one VM */
@@ -262,8 +151,12 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
 	 */
 	r->magic_hi = 0;

-	cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr);
-	new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr);
+	nvgpu_mutex_acquire(&trace->list_lock);
+	nvgpu_gr_fecs_trace_find_pid(g, r->context_ptr, &trace->context_list,
+		&cur_pid, &cur_vmid);
+	nvgpu_gr_fecs_trace_find_pid(g, r->new_context_ptr, &trace->context_list,
+		&new_pid, &new_vmid);
+	nvgpu_mutex_release(&trace->list_lock);

 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
 		"context_ptr=%x (pid=%d) new_context_ptr=%x (pid=%d)",
@@ -418,12 +311,12 @@ int gk20a_fecs_trace_init(struct gk20a *g)
 	err = nvgpu_mutex_init(&trace->poll_lock);
 	if (err != 0)
 		goto clean;
-	err = nvgpu_mutex_init(&trace->hash_lock);
+	err = nvgpu_mutex_init(&trace->list_lock);
 	if (err != 0)
 		goto clean_poll_lock;

 	BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
-	hash_init(trace->pid_hash_table);
+	nvgpu_init_list_node(&trace->context_list);

 	nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);

@@ -453,6 +346,7 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
 	u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
 	u32 aperture_mask;
 	struct tsg_gk20a *tsg;
+	int ret;

 	tsg = tsg_gk20a_from_ch(ch);
 	if (tsg == NULL) {
@@ -504,16 +398,20 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
 	/* pid (process identifier) in user space, corresponds to tgid (thread
 	 * group id) in kernel space.
 	 */
-	gk20a_fecs_trace_hash_add(g, context_ptr, tsg->tgid);
+	nvgpu_mutex_acquire(&trace->list_lock);
+	ret = nvgpu_gr_fecs_trace_add_context(g, context_ptr, tsg->tgid, 0,
+		&trace->context_list);
+	nvgpu_mutex_release(&trace->list_lock);

-	return 0;
+	return ret;
 }

 int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch)
 {
 	u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
+	struct gk20a_fecs_trace *trace = g->fecs_trace;

-	if (g->fecs_trace) {
+	if (trace) {
 		nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
 			"ch=%p context_ptr=%x", ch, context_ptr);

@@ -522,7 +420,11 @@ int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch)
 				g->ops.fecs_trace.flush(g);
 			gk20a_fecs_trace_poll(g);
 		}
-		gk20a_fecs_trace_hash_del(g, context_ptr);
+
+		nvgpu_mutex_acquire(&trace->list_lock);
+		nvgpu_gr_fecs_trace_remove_context(g, context_ptr,
+			&trace->context_list);
+		nvgpu_mutex_release(&trace->list_lock);
 	}
 	return 0;
 }
@@ -546,9 +448,12 @@ int gk20a_fecs_trace_deinit(struct gk20a *g)
 		return 0;

 	nvgpu_thread_stop(&trace->poll_task);
-	gk20a_fecs_trace_free_hash_table(g);

-	nvgpu_mutex_destroy(&g->fecs_trace->hash_lock);
+	nvgpu_mutex_acquire(&trace->list_lock);
+	nvgpu_gr_fecs_trace_remove_contexts(g, &trace->context_list);
+	nvgpu_mutex_release(&trace->list_lock);
+
+	nvgpu_mutex_destroy(&g->fecs_trace->list_lock);
 	nvgpu_mutex_destroy(&g->fecs_trace->poll_lock);

 	nvgpu_kfree(g, g->fecs_trace);