gpu: nvgpu: Add a generic profiler

Add a generic profiler based on the channel kickoff profiler. This aims to provide a mechanism to allow engineers to (more) easily profile arbitrary software paths within nvgpu. Usage of this profiler is still primarily through debugfs. Next up is a generic debugfs interface for this profiler in the Linux code. The end goal for this is to profile the recovery code and generate interesting statistics. JIRA NVGPU-5606 Signed-off-by: Alex Waterman <alexw@nvidia.com> Change-Id: I99783ec7e5143855845bde4e98760ff43350456d Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2355319 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-23 18:16:01 +03:00 · 2020-06-01 19:56:37 -05:00
parent 59eb714c48
commit 70ce67df2d
18 changed files with 579 additions and 292 deletions
--- a/drivers/gpu/nvgpu/common/fifo/submit.c
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -34,11 +34,13 @@
 #include <nvgpu/priv_cmdbuf.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/fence.h>
-#include <nvgpu/profile.h>
+#include <nvgpu/swprofile.h>
 #include <nvgpu/vpr.h>
 #include <nvgpu/trace.h>
 #include <nvgpu/nvhost.h>

+#include <nvgpu/fifo/swprofile.h>
+
 /*
 * We might need two extra gpfifo entries per submit - one for pre fence and
 * one for post fence.
@@ -340,7 +342,7 @@ static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c,
 		u32 flags,
 		struct nvgpu_channel_fence *fence,
 		struct nvgpu_fence_type **fence_out,
-		struct nvgpu_profile *profile,
+		struct nvgpu_swprofiler *profiler,
 		bool need_deferred_cleanup)
 {
 	bool skip_buffer_refcounting = (flags &
@@ -358,7 +360,7 @@ static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c,
 		goto clean_up_job;
 	}

-	nvgpu_profile_snapshot(profile, PROFILE_JOB_TRACKING);
+	nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_JOB_TRACKING);

 	/*
 	 * wait_cmd can be unset even if flag_fence_wait exists; the
@@ -432,11 +434,11 @@ static int nvgpu_submit_prepare_gpfifo_notrack(struct nvgpu_channel *c,
 		struct nvgpu_gpfifo_userdata userdata,
 		u32 num_entries,
 		struct nvgpu_fence_type **fence_out,
-		struct nvgpu_profile *profile)
+		struct nvgpu_swprofiler *profiler)
 {
 	int err;

-	nvgpu_profile_snapshot(profile, PROFILE_JOB_TRACKING);
+	nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_JOB_TRACKING);

 	err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
 			num_entries);
@@ -475,7 +477,7 @@ static int nvgpu_do_submit(struct nvgpu_channel *c,
 		u32 flags,
 		struct nvgpu_channel_fence *fence,
 		struct nvgpu_fence_type **fence_out,
-		struct nvgpu_profile *profile,
+		struct nvgpu_swprofiler *profiler,
 		bool need_job_tracking,
 		bool need_deferred_cleanup)
 {
@@ -502,17 +504,17 @@ static int nvgpu_do_submit(struct nvgpu_channel *c,
 	if (need_job_tracking) {
 		err = nvgpu_submit_prepare_gpfifo_track(c, gpfifo,
 				userdata, num_entries, flags, fence,
-				fence_out, profile, need_deferred_cleanup);
+				fence_out, profiler, need_deferred_cleanup);
 	} else {
 		err = nvgpu_submit_prepare_gpfifo_notrack(c, gpfifo,
-				userdata, num_entries, fence_out, profile);
+				userdata, num_entries, fence_out, profiler);
 	}

 	if (err != 0) {
 		return err;
 	}

-	nvgpu_profile_snapshot(profile, PROFILE_APPEND);
+	nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_APPEND);

 	g->ops.userd.gp_put(g, c);

@@ -527,7 +529,7 @@ static int nvgpu_submit_deterministic(struct nvgpu_channel *c,
 				u32 flags,
 				struct nvgpu_channel_fence *fence,
 				struct nvgpu_fence_type **fence_out,
-				struct nvgpu_profile *profile)
+				struct nvgpu_swprofiler *profiler)
 {
 	bool skip_buffer_refcounting = (flags &
 			NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
@@ -608,7 +610,7 @@ static int nvgpu_submit_deterministic(struct nvgpu_channel *c,
 	}

 	err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence,
-			fence_out, profile, need_job_tracking, false);
+			fence_out, profiler, need_job_tracking, false);
 	if (err != 0) {
 		goto clean_up;
 	}
@@ -633,7 +635,7 @@ static int nvgpu_submit_nondeterministic(struct nvgpu_channel *c,
 				u32 flags,
 				struct nvgpu_channel_fence *fence,
 				struct nvgpu_fence_type **fence_out,
-				struct nvgpu_profile *profile)
+				struct nvgpu_swprofiler *profiler)
 {
 	bool skip_buffer_refcounting = (flags &
 			NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
@@ -682,7 +684,7 @@ static int nvgpu_submit_nondeterministic(struct nvgpu_channel *c,
 	}

 	err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence,
-			fence_out, profile, need_job_tracking, true);
+			fence_out, profiler, need_job_tracking, true);
 	if (err != 0) {
 		goto clean_up;
 	}
@@ -734,7 +736,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
 				u32 flags,
 				struct nvgpu_channel_fence *fence,
 				struct nvgpu_fence_type **fence_out,
-				struct nvgpu_profile *profile)
+				struct nvgpu_swprofiler *profiler)
 {
 	struct gk20a *g = c->g;
 	int err;
@@ -755,7 +757,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
 		return -ENOMEM;
 	}

-	nvgpu_profile_snapshot(profile, PROFILE_ENTRY);
+	nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_ENTRY);

 	/* update debug settings */
 	nvgpu_ltc_sync_enabled(g);
@@ -765,12 +767,12 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
 #ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 	if (c->deterministic) {
 		err = nvgpu_submit_deterministic(c, gpfifo, userdata,
-				num_entries, flags, fence, fence_out, profile);
+				num_entries, flags, fence, fence_out, profiler);
 	} else
 #endif
 	{
 		err = nvgpu_submit_nondeterministic(c, gpfifo, userdata,
-				num_entries, flags, fence, fence_out, profile);
+				num_entries, flags, fence, fence_out, profiler);
 	}

 	if (err != 0) {
@@ -793,7 +795,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
 	nvgpu_log_info(g, "post-submit put %d, get %d, size %d",
 		c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);

-	nvgpu_profile_snapshot(profile, PROFILE_END);
+	nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_END);

 	nvgpu_log_fn(g, "done");
 	return err;
@@ -805,10 +807,10 @@ int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c,
 				u32 flags,
 				struct nvgpu_channel_fence *fence,
 				struct nvgpu_fence_type **fence_out,
-				struct nvgpu_profile *profile)
+				struct nvgpu_swprofiler *profiler)
 {
 	return nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries,
-			flags, fence, fence_out, profile);
+			flags, fence, fence_out, profiler);
 }

 int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c,