gpu: nvgpu: Add a generic profiler

Add a generic profiler based on the channel kickoff profiler. This aims to provide a mechanism to allow engineers to (more) easily profile arbitrary software paths within nvgpu. Usage of this profiler is still primarily through debugfs. Next up is a generic debugfs interface for this profiler in the Linux code. The end goal for this is to profile the recovery code and generate interesting statistics. JIRA NVGPU-5606 Signed-off-by: Alex Waterman <alexw@nvidia.com> Change-Id: I99783ec7e5143855845bde4e98760ff43350456d Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2355319 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-23 18:16:01 +03:00 · 2020-06-01 19:56:37 -05:00
parent 59eb714c48
commit 70ce67df2d
18 changed files with 579 additions and 292 deletions
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -40,7 +40,7 @@
 struct gk20a;
 struct dbg_session_gk20a;
 struct nvgpu_fence_type;
-struct nvgpu_profile;
+struct nvgpu_swprofiler;
 struct nvgpu_channel_sync;
 struct nvgpu_gpfifo_userdata;
 struct nvgpu_gr_subctx;
@@ -576,7 +576,7 @@ int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c,
 				u32 flags,
 				struct nvgpu_channel_fence *fence,
 				struct nvgpu_fence_type **fence_out,
-				struct nvgpu_profile *profile);
+				struct nvgpu_swprofiler *profiler);

 int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c,
 				struct nvgpu_gpfifo_entry *gpfifo,
--- a/drivers/gpu/nvgpu/include/nvgpu/fifo.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/fifo.h
@@ -197,6 +197,8 @@
 #include <nvgpu/lock.h>
 #include <nvgpu/kref.h>
 #include <nvgpu/list.h>
+#include <nvgpu/swprofile.h>
+
 /**
 * H/w defined value for Channel ID type
 */
@@ -231,6 +233,7 @@ struct nvgpu_engine_info;
 struct nvgpu_runlist_info;
 struct nvgpu_channel;
 struct nvgpu_tsg;
+struct nvgpu_swprofiler;

 struct nvgpu_fifo {
 	/** Pointer to GPU driver struct. */
@@ -297,16 +300,8 @@ struct nvgpu_fifo {
 	/** Number of active runlists. */
 	u32 num_runlists;

-#ifdef CONFIG_DEBUG_FS
-	struct {
-		struct nvgpu_profile *data;
-		nvgpu_atomic_t get;
-		bool enabled;
-		u64 *sorted;
-		struct nvgpu_ref ref;
-		struct nvgpu_mutex lock;
-	} profile;
-#endif
+	struct nvgpu_swprofiler kickoff_profiler;
+
 #ifdef CONFIG_NVGPU_USERD
 	struct nvgpu_mutex userd_mutex;
 	struct nvgpu_mem *userd_slabs;
--- a/drivers/gpu/nvgpu/include/nvgpu/fifo/swprofile.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/fifo/swprofile.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_FIFO_PROFILE_H
+#define NVGPU_FIFO_PROFILE_H
+
+/*
+ * Define these here, not in the C file so that they are closer to the other
+ * macro definitions below. The two lists must be in sync.
+ */
+#define NVGPU_FIFO_KICKOFF_PROFILE_EVENTS	\
+	"ioctl_entry",				\
+	"entry",				\
+	"job_tracking",				\
+	"append",				\
+	"end",					\
+	"ioctl_exit",				\
+	NULL					\
+
+/*
+ * The kickoff profile events; these are used to index into the profile's sample
+ * array.
+ */
+#define PROF_KICKOFF_IOCTL_ENTRY		0U
+#define PROF_KICKOFF_ENTRY			1U
+#define PROF_KICKOFF_JOB_TRACKING		2U
+#define PROF_KICKOFF_APPEND			3U
+#define PROF_KICKOFF_END			4U
+#define PROF_KICKOFF_IOCTL_EXIT			5U
+
+#endif
--- a/drivers/gpu/nvgpu/include/nvgpu/profile.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/profile.h
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2011-2019, NVIDIA CORPORATION.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef NVGPU_PROFILE_H
-#define NVGPU_PROFILE_H
-
-/*
- * Number of entries in the kickoff latency buffer, used to calculate
- * the profiling and histogram. This number is calculated to be statistically
- * significative on a histogram on a 5% step
- */
-#ifdef CONFIG_DEBUG_FS
-#define FIFO_PROFILING_ENTRIES	16384U
-#endif
-
-enum {
-	PROFILE_IOCTL_ENTRY = 0U,
-	PROFILE_ENTRY,
-	PROFILE_JOB_TRACKING,
-	PROFILE_APPEND,
-	PROFILE_END,
-	PROFILE_IOCTL_EXIT,
-	PROFILE_MAX
-};
-
-struct nvgpu_profile {
-	u64 timestamp[PROFILE_MAX];
-};
-
-#ifdef CONFIG_DEBUG_FS
-struct nvgpu_profile *nvgpu_profile_acquire(struct gk20a *g);
-void nvgpu_profile_release(struct gk20a *g,
-	struct nvgpu_profile *profile);
-void nvgpu_profile_snapshot(struct nvgpu_profile *profile, int idx);
-#else
-static inline struct nvgpu_profile *
-nvgpu_profile_acquire(struct gk20a *g)
-{
-	return NULL;
-}
-static inline void nvgpu_profile_release(struct gk20a *g,
-	struct nvgpu_profile *profile)
-{
-}
-static inline void nvgpu_profile_snapshot(
-		struct nvgpu_profile *profile, int idx)
-{
-}
-#endif
-
-#endif /* NVGPU_PROFILE_H */
--- a/drivers/gpu/nvgpu/include/nvgpu/swprofile.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/swprofile.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_PROFILE_H
+#define NVGPU_PROFILE_H
+
+#include <nvgpu/lock.h>
+#include <nvgpu/types.h>
+#include <nvgpu/kref.h>
+
+struct nvgpu_debug_context;
+
+/*
+ * Number of entries in the kickoff latency buffer used to calculate the
+ * profiling and histogram. This number is calculated to be statistically
+ * significant on a histogram on a 5% step.
+ */
+#define PROFILE_ENTRIES		16384U
+
+struct nvgpu_swprofiler {
+	struct nvgpu_mutex    lock;
+
+	/**
+	 * The number of sample components that make up a sample for this
+	 * profiler.
+	 */
+	u32                   psample_len;
+
+	/**
+	 * Sample array: this is essentially a matrix where rows correspond to
+	 * a given sample and rows correspond to a type of sample. Number of
+	 * samples is always %PROFILING_ENTRIES. This 1d array is accessed with
+	 * row-major indexing.
+	 */
+	u64                  *samples;
+
+	/**
+	 * Pointer to next sample array to write. Will be wrapped at
+	 * %PROFILING_ENTRIES.
+	 */
+	u32                   sample_index;
+
+	/**
+	 * Column names used for printing the histogram. This is NULL terminated
+	 * so that the profiler can infer the number of subsamples in a
+	 * psample.
+	 */
+	const char          **col_names;
+
+	struct nvgpu_ref      ref;
+
+	/**
+	 * Necessary since we won't have an access to a gk20a struct to vfree()
+	 * against when this profiler is freed via an nvgpu_ref.
+	 */
+	struct gk20a         *g;
+};
+
+/**
+ * @brief Create a profiler with the passed column names.
+ *
+ * @param[in] g          The GPU that owns this profiler.
+ * @param[in] p          Pointer to a profiler object to initialize.
+ * @param[in] col_names  %NULL terminated list of column names.
+ *
+ * The sample array length is determined by the NULL terminated %col_names
+ * array. This will not allocate the underlying data; that's controlled by
+ * the open and close functions:
+ *
+ *    nvgpu_swprofile_open()
+ *    nvgpu_swprofile_close()
+ *
+ * Once nvgpu_swprofile_initialize() is called all of the below functions
+ * may also be called. All of the sampling related functions will become
+ * no-ops if the SW profiler is not opened.
+ */
+void nvgpu_swprofile_initialize(struct gk20a *g,
+				struct nvgpu_swprofiler *p,
+				const char **col_names);
+
+/**
+ * @brief Open a profiler for use.
+ *
+ * @param[in] g   The GPU that owns this profiler.
+ * @param[in] p   The profiler to open.
+ *
+ * This functions prepares a SW profiler object for actual profiling. Necessary
+ * data structures are allocated and subsequent snapshots will be captured.
+ *
+ * SW profiler objects are reference counted: for each open call made, a
+ * corresponding close call must also be made.
+ *
+ * @return Returns 0 on success, otherwise a negative error code.
+ */
+int nvgpu_swprofile_open(struct gk20a *g, struct nvgpu_swprofiler *p);
+
+/**
+ * @brief Close a profiler.
+ *
+ * @param[in] p  The profiler to close.
+ *
+ * Close call corresponding to nvgpu_swprofile_open().
+ */
+void nvgpu_swprofile_close(struct nvgpu_swprofiler *p);
+
+/**
+ * @brief Begin a series of timestamp samples.
+ *
+ * @param[in] p  The profiler to start sampling with.
+ *
+ * Each iteration through a given SW sequence requires one call to this
+ * function. It essentially just increments (with wraparound) an internal
+ * tracker which points to the sample space in the internal sample array.
+ * Typical usage is to call nvgpu_swprofile_begin_sample() and then a
+ * sequence of calls to nvgpu_swprofile_snapshot().
+ *
+ * Once done with the sequence being profiled nothing needs to happen. When
+ * the next iteration of the sequence is executed this function should be
+ * called again.
+ */
+void nvgpu_swprofile_begin_sample(struct nvgpu_swprofiler *p);
+
+/**
+ * @brief Capture a timestamp sample.
+ *
+ * @param[in] p    The profiler to sample with.
+ * @param[in] idx  The index to the subsample to capture.
+ *
+ * This captures a subsample. Any given run through a SW sequence that is
+ * being profiled will result in one or more subsamples which together make
+ * up a sample.
+ */
+void nvgpu_swprofile_snapshot(struct nvgpu_swprofiler *p, u32 idx);
+
+/**
+ * @brief Print percentile ranges for a SW profiler.
+ *
+ * @param[in] g   The GPU that owns this profiler.
+ * @param[in] p   The profiler to print.
+ * @param[in] o   A debug context object used for printing.
+ *
+ * Print a percentile table for all columns of sub-samples. This gives a
+ * good overview of the collected data.
+ */
+void nvgpu_swprofile_print_ranges(struct gk20a *g,
+				  struct nvgpu_swprofiler *p,
+				  struct nvgpu_debug_context *o);
+
+#endif /* NVGPU_PROFILE_H */