gpu: nvgpu: Add a generic profiler

Add a generic profiler based on the channel kickoff profiler. This aims to provide a mechanism to allow engineers to (more) easily profile arbitrary software paths within nvgpu. Usage of this profiler is still primarily through debugfs. Next up is a generic debugfs interface for this profiler in the Linux code. The end goal for this is to profile the recovery code and generate interesting statistics. JIRA NVGPU-5606 Signed-off-by: Alex Waterman <alexw@nvidia.com> Change-Id: I99783ec7e5143855845bde4e98760ff43350456d Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2355319 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2020-06-01 19:56:37 -05:00
parent 59eb714c48
commit 70ce67df2d
18 changed files with 579 additions and 292 deletions
--- a/drivers/gpu/nvgpu/os/linux/debug_fifo.c
+++ b/drivers/gpu/nvgpu/os/linux/debug_fifo.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017-2019 NVIDIA Corporation.  All rights reserved.
+ * Copyright (C) 2017-2020 NVIDIA Corporation.  All rights reserved.
 *
 * This software is licensed under the terms of the GNU General Public
 * License version 2, as published by the Free Software Foundation, and
@@ -9,7 +9,6 @@
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
- *
 */

 #include "debug_fifo.h"
@@ -24,7 +23,9 @@
 #include <nvgpu/gr/ctx.h>
 #include <nvgpu/engines.h>
 #include <nvgpu/runlist.h>
-#include <nvgpu/profile.h>
+#include <nvgpu/swprofile.h>
+
+#include <nvgpu/fifo/swprofile.h>

 void __gk20a_fifo_profile_free(struct nvgpu_ref *ref);

@@ -147,41 +148,12 @@ static int gk20a_fifo_profile_enable(void *data, u64 val)
 	struct gk20a *g = (struct gk20a *) data;
 	struct nvgpu_fifo *f = &g->fifo;

-
-	nvgpu_mutex_acquire(&f->profile.lock);
 	if (val == 0) {
-		if (f->profile.enabled) {
-			f->profile.enabled = false;
-			nvgpu_ref_put(&f->profile.ref,
-				__gk20a_fifo_profile_free);
-		}
+		nvgpu_swprofile_close(&f->kickoff_profiler);
+		return 0;
 	} else {
-		if (!f->profile.enabled) {
-			/* not kref init as it can have a running condition if
-			 * we enable/disable/enable while kickoff is happening
-			 */
-			if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) {
-				f->profile.data = nvgpu_vzalloc(g,
-					FIFO_PROFILING_ENTRIES *
-					sizeof(struct nvgpu_profile));
-				f->profile.sorted  = nvgpu_vzalloc(g,
-					FIFO_PROFILING_ENTRIES *
-					sizeof(u64));
-				if (!(f->profile.data && f->profile.sorted)) {
-					nvgpu_vfree(g, f->profile.data);
-					nvgpu_vfree(g, f->profile.sorted);
-					nvgpu_mutex_release(&f->profile.lock);
-					return -ENOMEM;
-				}
-				nvgpu_ref_init(&f->profile.ref);
-			}
-			atomic_set(&f->profile.get.atomic_var, 0);
-			f->profile.enabled = true;
-		}
+		return nvgpu_swprofile_open(g, &f->kickoff_profiler);
 	}
-	nvgpu_mutex_release(&f->profile.lock);
-
-	return 0;
 }

 DEFINE_SIMPLE_ATTRIBUTE(
@@ -191,96 +163,20 @@ DEFINE_SIMPLE_ATTRIBUTE(
 	"%llu\n"
 );

-static int __profile_cmp(const void *a, const void *b)
+static void gk20a_fifo_write_to_seqfile_no_nl(void *ctx, const char *str)
 {
-	return *((unsigned long long *) a) - *((unsigned long long *) b);
-}
-
-/*
- * This uses about 800b in the stack, but the function using it is not part
- * of a callstack where much memory is being used, so it is fine
- */
-#define PERCENTILE_WIDTH	5
-#define PERCENTILE_RANGES	(100/PERCENTILE_WIDTH)
-
-static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
-		u64 *percentiles, u32 index_end, u32 index_start)
-{
-	unsigned int nelem = 0;
-	unsigned int index;
-	struct nvgpu_profile *profile;
-
-	for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
-		profile = &g->fifo.profile.data[index];
-
-		if (profile->timestamp[index_end] >
-				profile->timestamp[index_start]) {
-			/* This is a valid element */
-			g->fifo.profile.sorted[nelem] =
-						profile->timestamp[index_end] -
-						profile->timestamp[index_start];
-			nelem++;
-		}
-	}
-
-	/* sort it */
-	sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
-		__profile_cmp, NULL);
-
-	/* build ranges */
-	for (index = 0; index < PERCENTILE_RANGES; index++) {
-		percentiles[index] = nelem < PERCENTILE_RANGES ? 0 :
-			g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
-						nelem)/100 - 1];
-	}
-	return nelem;
+	seq_printf((struct seq_file *)ctx, str);
 }

 static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
 {
 	struct gk20a *g = s->private;
-	unsigned int get, nelem, index;
-	/*
-	 * 800B in the stack, but function is declared statically and only
-	 * called from debugfs handler
-	 */
-	u64 percentiles_ioctl[PERCENTILE_RANGES];
-	u64 percentiles_kickoff[PERCENTILE_RANGES];
-	u64 percentiles_jobtracking[PERCENTILE_RANGES];
-	u64 percentiles_append[PERCENTILE_RANGES];
-	u64 percentiles_userd[PERCENTILE_RANGES];
+	struct nvgpu_debug_context o = {
+		.fn = gk20a_fifo_write_to_seqfile_no_nl,
+		.ctx = s,
+	};

-	if (!nvgpu_ref_get_unless_zero(&g->fifo.profile.ref)) {
-		seq_printf(s, "Profiling disabled\n");
-		return 0;
-	}
-
-	get = atomic_read(&g->fifo.profile.get.atomic_var);
-
-	__gk20a_fifo_create_stats(g, percentiles_ioctl,
-		PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
-	__gk20a_fifo_create_stats(g, percentiles_kickoff,
-		PROFILE_END, PROFILE_ENTRY);
-	__gk20a_fifo_create_stats(g, percentiles_jobtracking,
-		PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
-	__gk20a_fifo_create_stats(g, percentiles_append,
-		PROFILE_APPEND, PROFILE_JOB_TRACKING);
-	nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
-		PROFILE_END, PROFILE_APPEND);
-
-	seq_printf(s, "Number of kickoffs: %d\n", nelem);
-	seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
-
-	for (index = 0; index < PERCENTILE_RANGES; index++)
-		seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
-			PERCENTILE_WIDTH * (index+1),
-			percentiles_ioctl[index],
-			percentiles_kickoff[index],
-			percentiles_append[index],
-			percentiles_jobtracking[index],
-			percentiles_userd[index]);
-
-	nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
+	nvgpu_swprofile_print_ranges(g, &g->fifo.kickoff_profiler, &o);

 	return 0;
 }
@@ -297,7 +193,6 @@ static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
 	.release	= single_release,
 };

-
 void gk20a_fifo_debugfs_init(struct gk20a *g)
 {
 	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
@@ -318,11 +213,6 @@ void gk20a_fifo_debugfs_init(struct gk20a *g)
 	if (IS_ERR_OR_NULL(profile_root))
 		return;

-	nvgpu_mutex_init(&g->fifo.profile.lock);
-	g->fifo.profile.enabled = false;
-	atomic_set(&g->fifo.profile.get.atomic_var, 0);
-	atomic_set(&g->fifo.profile.ref.refcount.atomic_var, 0);
-
 	debugfs_create_file("enable", 0600, profile_root, g,
 		&gk20a_fifo_profile_enable_debugfs_fops);

@@ -330,54 +220,3 @@ void gk20a_fifo_debugfs_init(struct gk20a *g)
 		&gk20a_fifo_profile_stats_debugfs_fops);

 }
-
-void nvgpu_profile_snapshot(struct nvgpu_profile *profile, int idx)
-{
-	if (profile)
-		profile->timestamp[idx] = nvgpu_current_time_ns();
-}
-
-void __gk20a_fifo_profile_free(struct nvgpu_ref *ref)
-{
-	struct nvgpu_fifo *f = container_of(ref, struct nvgpu_fifo,
-						profile.ref);
-	nvgpu_vfree(f->g, f->profile.data);
-	nvgpu_vfree(f->g, f->profile.sorted);
-}
-
-/* Get the next element in the ring buffer of profile entries
- * and grab a reference to the structure
- */
-struct nvgpu_profile *nvgpu_profile_acquire(struct gk20a *g)
-{
-	struct nvgpu_fifo *f = &g->fifo;
-	struct nvgpu_profile *profile;
-	unsigned int index;
-
-	/* If kref is zero, profiling is not enabled */
-	if (!nvgpu_ref_get_unless_zero(&f->profile.ref))
-		return NULL;
-	index = atomic_inc_return(&f->profile.get.atomic_var);
-	profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
-
-	return profile;
-}
-
-/* Free the reference to the structure. This allows deferred cleanups */
-void nvgpu_profile_release(struct gk20a *g,
-					struct nvgpu_profile *profile)
-{
-	nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
-}
-
-void gk20a_fifo_debugfs_deinit(struct gk20a *g)
-{
-	struct nvgpu_fifo *f = &g->fifo;
-
-	nvgpu_mutex_acquire(&f->profile.lock);
-	if (f->profile.enabled) {
-		f->profile.enabled = false;
-		nvgpu_ref_put(&f->profile.ref, __gk20a_fifo_profile_free);
-	}
-	nvgpu_mutex_release(&f->profile.lock);
-}