mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 18:16:01 +03:00
gpu: nvgpu: Add a generic profiler
Add a generic profiler based on the channel kickoff profiler. This aims to provide a mechanism to allow engineers to (more) easily profile arbitrary software paths within nvgpu. Usage of this profiler is still primarily through debugfs. Next up is a generic debugfs interface for this profiler in the Linux code. The end goal for this is to profile the recovery code and generate interesting statistics. JIRA NVGPU-5606 Signed-off-by: Alex Waterman <alexw@nvidia.com> Change-Id: I99783ec7e5143855845bde4e98760ff43350456d Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2355319 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
@@ -40,7 +40,7 @@
|
||||
struct gk20a;
|
||||
struct dbg_session_gk20a;
|
||||
struct nvgpu_fence_type;
|
||||
struct nvgpu_profile;
|
||||
struct nvgpu_swprofiler;
|
||||
struct nvgpu_channel_sync;
|
||||
struct nvgpu_gpfifo_userdata;
|
||||
struct nvgpu_gr_subctx;
|
||||
@@ -576,7 +576,7 @@ int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c,
|
||||
u32 flags,
|
||||
struct nvgpu_channel_fence *fence,
|
||||
struct nvgpu_fence_type **fence_out,
|
||||
struct nvgpu_profile *profile);
|
||||
struct nvgpu_swprofiler *profiler);
|
||||
|
||||
int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c,
|
||||
struct nvgpu_gpfifo_entry *gpfifo,
|
||||
|
||||
@@ -197,6 +197,8 @@
|
||||
#include <nvgpu/lock.h>
|
||||
#include <nvgpu/kref.h>
|
||||
#include <nvgpu/list.h>
|
||||
#include <nvgpu/swprofile.h>
|
||||
|
||||
/**
|
||||
* H/w defined value for Channel ID type
|
||||
*/
|
||||
@@ -231,6 +233,7 @@ struct nvgpu_engine_info;
|
||||
struct nvgpu_runlist_info;
|
||||
struct nvgpu_channel;
|
||||
struct nvgpu_tsg;
|
||||
struct nvgpu_swprofiler;
|
||||
|
||||
struct nvgpu_fifo {
|
||||
/** Pointer to GPU driver struct. */
|
||||
@@ -297,16 +300,8 @@ struct nvgpu_fifo {
|
||||
/** Number of active runlists. */
|
||||
u32 num_runlists;
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
struct {
|
||||
struct nvgpu_profile *data;
|
||||
nvgpu_atomic_t get;
|
||||
bool enabled;
|
||||
u64 *sorted;
|
||||
struct nvgpu_ref ref;
|
||||
struct nvgpu_mutex lock;
|
||||
} profile;
|
||||
#endif
|
||||
struct nvgpu_swprofiler kickoff_profiler;
|
||||
|
||||
#ifdef CONFIG_NVGPU_USERD
|
||||
struct nvgpu_mutex userd_mutex;
|
||||
struct nvgpu_mem *userd_slabs;
|
||||
|
||||
50
drivers/gpu/nvgpu/include/nvgpu/fifo/swprofile.h
Normal file
50
drivers/gpu/nvgpu/include/nvgpu/fifo/swprofile.h
Normal file
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_FIFO_PROFILE_H
|
||||
#define NVGPU_FIFO_PROFILE_H
|
||||
|
||||
/*
|
||||
* Define these here, not in the C file so that they are closer to the other
|
||||
* macro definitions below. The two lists must be in sync.
|
||||
*/
|
||||
#define NVGPU_FIFO_KICKOFF_PROFILE_EVENTS \
|
||||
"ioctl_entry", \
|
||||
"entry", \
|
||||
"job_tracking", \
|
||||
"append", \
|
||||
"end", \
|
||||
"ioctl_exit", \
|
||||
NULL \
|
||||
|
||||
/*
|
||||
* The kickoff profile events; these are used to index into the profile's sample
|
||||
* array.
|
||||
*/
|
||||
#define PROF_KICKOFF_IOCTL_ENTRY 0U
|
||||
#define PROF_KICKOFF_ENTRY 1U
|
||||
#define PROF_KICKOFF_JOB_TRACKING 2U
|
||||
#define PROF_KICKOFF_APPEND 3U
|
||||
#define PROF_KICKOFF_END 4U
|
||||
#define PROF_KICKOFF_IOCTL_EXIT 5U
|
||||
|
||||
#endif
|
||||
@@ -1,70 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_PROFILE_H
|
||||
#define NVGPU_PROFILE_H
|
||||
|
||||
/*
|
||||
* Number of entries in the kickoff latency buffer, used to calculate
|
||||
* the profiling and histogram. This number is calculated to be statistically
|
||||
* significative on a histogram on a 5% step
|
||||
*/
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
#define FIFO_PROFILING_ENTRIES 16384U
|
||||
#endif
|
||||
|
||||
enum {
|
||||
PROFILE_IOCTL_ENTRY = 0U,
|
||||
PROFILE_ENTRY,
|
||||
PROFILE_JOB_TRACKING,
|
||||
PROFILE_APPEND,
|
||||
PROFILE_END,
|
||||
PROFILE_IOCTL_EXIT,
|
||||
PROFILE_MAX
|
||||
};
|
||||
|
||||
struct nvgpu_profile {
|
||||
u64 timestamp[PROFILE_MAX];
|
||||
};
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
struct nvgpu_profile *nvgpu_profile_acquire(struct gk20a *g);
|
||||
void nvgpu_profile_release(struct gk20a *g,
|
||||
struct nvgpu_profile *profile);
|
||||
void nvgpu_profile_snapshot(struct nvgpu_profile *profile, int idx);
|
||||
#else
|
||||
static inline struct nvgpu_profile *
|
||||
nvgpu_profile_acquire(struct gk20a *g)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline void nvgpu_profile_release(struct gk20a *g,
|
||||
struct nvgpu_profile *profile)
|
||||
{
|
||||
}
|
||||
static inline void nvgpu_profile_snapshot(
|
||||
struct nvgpu_profile *profile, int idx)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* NVGPU_PROFILE_H */
|
||||
168
drivers/gpu/nvgpu/include/nvgpu/swprofile.h
Normal file
168
drivers/gpu/nvgpu/include/nvgpu/swprofile.h
Normal file
@@ -0,0 +1,168 @@
|
||||
/*
|
||||
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_PROFILE_H
|
||||
#define NVGPU_PROFILE_H
|
||||
|
||||
#include <nvgpu/lock.h>
|
||||
#include <nvgpu/types.h>
|
||||
#include <nvgpu/kref.h>
|
||||
|
||||
struct nvgpu_debug_context;
|
||||
|
||||
/*
|
||||
* Number of entries in the kickoff latency buffer used to calculate the
|
||||
* profiling and histogram. This number is calculated to be statistically
|
||||
* significant on a histogram on a 5% step.
|
||||
*/
|
||||
#define PROFILE_ENTRIES 16384U
|
||||
|
||||
struct nvgpu_swprofiler {
|
||||
struct nvgpu_mutex lock;
|
||||
|
||||
/**
|
||||
* The number of sample components that make up a sample for this
|
||||
* profiler.
|
||||
*/
|
||||
u32 psample_len;
|
||||
|
||||
/**
|
||||
* Sample array: this is essentially a matrix where rows correspond to
|
||||
* a given sample and rows correspond to a type of sample. Number of
|
||||
* samples is always %PROFILING_ENTRIES. This 1d array is accessed with
|
||||
* row-major indexing.
|
||||
*/
|
||||
u64 *samples;
|
||||
|
||||
/**
|
||||
* Pointer to next sample array to write. Will be wrapped at
|
||||
* %PROFILING_ENTRIES.
|
||||
*/
|
||||
u32 sample_index;
|
||||
|
||||
/**
|
||||
* Column names used for printing the histogram. This is NULL terminated
|
||||
* so that the profiler can infer the number of subsamples in a
|
||||
* psample.
|
||||
*/
|
||||
const char **col_names;
|
||||
|
||||
struct nvgpu_ref ref;
|
||||
|
||||
/**
|
||||
* Necessary since we won't have an access to a gk20a struct to vfree()
|
||||
* against when this profiler is freed via an nvgpu_ref.
|
||||
*/
|
||||
struct gk20a *g;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Create a profiler with the passed column names.
|
||||
*
|
||||
* @param[in] g The GPU that owns this profiler.
|
||||
* @param[in] p Pointer to a profiler object to initialize.
|
||||
* @param[in] col_names %NULL terminated list of column names.
|
||||
*
|
||||
* The sample array length is determined by the NULL terminated %col_names
|
||||
* array. This will not allocate the underlying data; that's controlled by
|
||||
* the open and close functions:
|
||||
*
|
||||
* nvgpu_swprofile_open()
|
||||
* nvgpu_swprofile_close()
|
||||
*
|
||||
* Once nvgpu_swprofile_initialize() is called all of the below functions
|
||||
* may also be called. All of the sampling related functions will become
|
||||
* no-ops if the SW profiler is not opened.
|
||||
*/
|
||||
void nvgpu_swprofile_initialize(struct gk20a *g,
|
||||
struct nvgpu_swprofiler *p,
|
||||
const char **col_names);
|
||||
|
||||
/**
|
||||
* @brief Open a profiler for use.
|
||||
*
|
||||
* @param[in] g The GPU that owns this profiler.
|
||||
* @param[in] p The profiler to open.
|
||||
*
|
||||
* This functions prepares a SW profiler object for actual profiling. Necessary
|
||||
* data structures are allocated and subsequent snapshots will be captured.
|
||||
*
|
||||
* SW profiler objects are reference counted: for each open call made, a
|
||||
* corresponding close call must also be made.
|
||||
*
|
||||
* @return Returns 0 on success, otherwise a negative error code.
|
||||
*/
|
||||
int nvgpu_swprofile_open(struct gk20a *g, struct nvgpu_swprofiler *p);
|
||||
|
||||
/**
|
||||
* @brief Close a profiler.
|
||||
*
|
||||
* @param[in] p The profiler to close.
|
||||
*
|
||||
* Close call corresponding to nvgpu_swprofile_open().
|
||||
*/
|
||||
void nvgpu_swprofile_close(struct nvgpu_swprofiler *p);
|
||||
|
||||
/**
|
||||
* @brief Begin a series of timestamp samples.
|
||||
*
|
||||
* @param[in] p The profiler to start sampling with.
|
||||
*
|
||||
* Each iteration through a given SW sequence requires one call to this
|
||||
* function. It essentially just increments (with wraparound) an internal
|
||||
* tracker which points to the sample space in the internal sample array.
|
||||
* Typical usage is to call nvgpu_swprofile_begin_sample() and then a
|
||||
* sequence of calls to nvgpu_swprofile_snapshot().
|
||||
*
|
||||
* Once done with the sequence being profiled nothing needs to happen. When
|
||||
* the next iteration of the sequence is executed this function should be
|
||||
* called again.
|
||||
*/
|
||||
void nvgpu_swprofile_begin_sample(struct nvgpu_swprofiler *p);
|
||||
|
||||
/**
|
||||
* @brief Capture a timestamp sample.
|
||||
*
|
||||
* @param[in] p The profiler to sample with.
|
||||
* @param[in] idx The index to the subsample to capture.
|
||||
*
|
||||
* This captures a subsample. Any given run through a SW sequence that is
|
||||
* being profiled will result in one or more subsamples which together make
|
||||
* up a sample.
|
||||
*/
|
||||
void nvgpu_swprofile_snapshot(struct nvgpu_swprofiler *p, u32 idx);
|
||||
|
||||
/**
|
||||
* @brief Print percentile ranges for a SW profiler.
|
||||
*
|
||||
* @param[in] g The GPU that owns this profiler.
|
||||
* @param[in] p The profiler to print.
|
||||
* @param[in] o A debug context object used for printing.
|
||||
*
|
||||
* Print a percentile table for all columns of sub-samples. This gives a
|
||||
* good overview of the collected data.
|
||||
*/
|
||||
void nvgpu_swprofile_print_ranges(struct gk20a *g,
|
||||
struct nvgpu_swprofiler *p,
|
||||
struct nvgpu_debug_context *o);
|
||||
|
||||
#endif /* NVGPU_PROFILE_H */
|
||||
Reference in New Issue
Block a user