From 70ce67df2dba25bf118cbc81b645149b386ec75c Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Mon, 1 Jun 2020 19:56:37 -0500 Subject: [PATCH] gpu: nvgpu: Add a generic profiler Add a generic profiler based on the channel kickoff profiler. This aims to provide a mechanism to allow engineers to (more) easily profile arbitrary software paths within nvgpu. Usage of this profiler is still primarily through debugfs. Next up is a generic debugfs interface for this profiler in the Linux code. The end goal for this is to profile the recovery code and generate interesting statistics. JIRA NVGPU-5606 Signed-off-by: Alex Waterman Change-Id: I99783ec7e5143855845bde4e98760ff43350456d Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2355319 Tested-by: mobile promotions Reviewed-by: mobile promotions --- arch/nvgpu-common.yaml | 10 +- drivers/gpu/nvgpu/Makefile | 1 + drivers/gpu/nvgpu/Makefile.sources | 1 + drivers/gpu/nvgpu/common/fifo/fifo.c | 9 + drivers/gpu/nvgpu/common/fifo/submit.c | 42 +-- drivers/gpu/nvgpu/common/swdebug/profile.c | 284 ++++++++++++++++++ drivers/gpu/nvgpu/include/nvgpu/channel.h | 4 +- drivers/gpu/nvgpu/include/nvgpu/fifo.h | 15 +- .../gpu/nvgpu/include/nvgpu/fifo/swprofile.h | 50 +++ drivers/gpu/nvgpu/include/nvgpu/profile.h | 70 ----- drivers/gpu/nvgpu/include/nvgpu/swprofile.h | 168 +++++++++++ drivers/gpu/nvgpu/os/linux/channel.h | 2 +- drivers/gpu/nvgpu/os/linux/debug.c | 2 - drivers/gpu/nvgpu/os/linux/debug_fifo.c | 189 +----------- drivers/gpu/nvgpu/os/linux/debug_fifo.h | 3 +- drivers/gpu/nvgpu/os/linux/ioctl_channel.c | 17 +- libs/dgpu/libnvgpu-drv-dgpu_safe.export | 2 + libs/igpu/libnvgpu-drv-igpu_safe.export | 2 + 18 files changed, 579 insertions(+), 292 deletions(-) create mode 100644 drivers/gpu/nvgpu/common/swdebug/profile.c create mode 100644 drivers/gpu/nvgpu/include/nvgpu/fifo/swprofile.h delete mode 100644 drivers/gpu/nvgpu/include/nvgpu/profile.h create mode 100644 drivers/gpu/nvgpu/include/nvgpu/swprofile.h diff --git a/arch/nvgpu-common.yaml b/arch/nvgpu-common.yaml index 6db08f30b..3f57422bc 100644 --- a/arch/nvgpu-common.yaml +++ b/arch/nvgpu-common.yaml @@ -357,8 +357,7 @@ fifo: common/fifo/priv_cmdbuf.c, common/fifo/job.c, include/nvgpu/priv_cmdbuf.h, - include/nvgpu/job.h, - include/nvgpu/profile.h ] + include/nvgpu/job.h ] deps: [ ] runlist: safe: yes @@ -993,6 +992,13 @@ power_features: sources: [ common/power_features/pg/pg.c, include/nvgpu/power_features/pg.h ] +swdebug: + owner: Alex W + safe: no + sources: [ common/swdebug/profile.c, + include/nvgpu/swprofile.h, + include/nvgpu/fifo/swprofile.h ] + ## ## HAL units. Currently they are under common but this needs to change. ## We are moving these to a top level directory. diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 35718b529..63c2778b0 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -182,6 +182,7 @@ nvgpu-y += \ common/utils/rbtree.o \ common/utils/string.o \ common/utils/worker.o \ + common/swdebug/profile.o \ common/ptimer/ptimer.o \ common/perf/perfbuf.o \ common/therm/therm.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 47ca68365..447c1d660 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -95,6 +95,7 @@ srcs += common/device.c \ common/utils/rbtree.c \ common/utils/string.c \ common/utils/worker.c \ + common/swdebug/profile.c \ common/init/nvgpu_init.c \ common/mm/allocators/nvgpu_allocator.c \ common/mm/allocators/bitmap_allocator.c \ diff --git a/drivers/gpu/nvgpu/common/fifo/fifo.c b/drivers/gpu/nvgpu/common/fifo/fifo.c index ed83b9db8..c8241c13f 100644 --- a/drivers/gpu/nvgpu/common/fifo/fifo.c +++ b/drivers/gpu/nvgpu/common/fifo/fifo.c @@ -35,6 +35,12 @@ #include #include #include +#include +#include + +static const char *nvgpu_fifo_kickoff_profile_events[] = { + NVGPU_FIFO_KICKOFF_PROFILE_EVENTS, +}; void nvgpu_fifo_cleanup_sw_common(struct gk20a *g) { @@ -93,6 +99,9 @@ int nvgpu_fifo_setup_sw_common(struct gk20a *g) nvgpu_mutex_init(&f->deferred_reset_mutex); #endif + nvgpu_swprofile_initialize(g, &f->kickoff_profiler, + nvgpu_fifo_kickoff_profile_events); + err = nvgpu_channel_setup_sw(g); if (err != 0) { nvgpu_err(g, "failed to init channel support"); diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index 9e288937e..fe315df29 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -34,11 +34,13 @@ #include #include #include -#include +#include #include #include #include +#include + /* * We might need two extra gpfifo entries per submit - one for pre fence and * one for post fence. @@ -340,7 +342,7 @@ static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c, u32 flags, struct nvgpu_channel_fence *fence, struct nvgpu_fence_type **fence_out, - struct nvgpu_profile *profile, + struct nvgpu_swprofiler *profiler, bool need_deferred_cleanup) { bool skip_buffer_refcounting = (flags & @@ -358,7 +360,7 @@ static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c, goto clean_up_job; } - nvgpu_profile_snapshot(profile, PROFILE_JOB_TRACKING); + nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_JOB_TRACKING); /* * wait_cmd can be unset even if flag_fence_wait exists; the @@ -432,11 +434,11 @@ static int nvgpu_submit_prepare_gpfifo_notrack(struct nvgpu_channel *c, struct nvgpu_gpfifo_userdata userdata, u32 num_entries, struct nvgpu_fence_type **fence_out, - struct nvgpu_profile *profile) + struct nvgpu_swprofiler *profiler) { int err; - nvgpu_profile_snapshot(profile, PROFILE_JOB_TRACKING); + nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_JOB_TRACKING); err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata, num_entries); @@ -475,7 +477,7 @@ static int nvgpu_do_submit(struct nvgpu_channel *c, u32 flags, struct nvgpu_channel_fence *fence, struct nvgpu_fence_type **fence_out, - struct nvgpu_profile *profile, + struct nvgpu_swprofiler *profiler, bool need_job_tracking, bool need_deferred_cleanup) { @@ -502,17 +504,17 @@ static int nvgpu_do_submit(struct nvgpu_channel *c, if (need_job_tracking) { err = nvgpu_submit_prepare_gpfifo_track(c, gpfifo, userdata, num_entries, flags, fence, - fence_out, profile, need_deferred_cleanup); + fence_out, profiler, need_deferred_cleanup); } else { err = nvgpu_submit_prepare_gpfifo_notrack(c, gpfifo, - userdata, num_entries, fence_out, profile); + userdata, num_entries, fence_out, profiler); } if (err != 0) { return err; } - nvgpu_profile_snapshot(profile, PROFILE_APPEND); + nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_APPEND); g->ops.userd.gp_put(g, c); @@ -527,7 +529,7 @@ static int nvgpu_submit_deterministic(struct nvgpu_channel *c, u32 flags, struct nvgpu_channel_fence *fence, struct nvgpu_fence_type **fence_out, - struct nvgpu_profile *profile) + struct nvgpu_swprofiler *profiler) { bool skip_buffer_refcounting = (flags & NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U; @@ -608,7 +610,7 @@ static int nvgpu_submit_deterministic(struct nvgpu_channel *c, } err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence, - fence_out, profile, need_job_tracking, false); + fence_out, profiler, need_job_tracking, false); if (err != 0) { goto clean_up; } @@ -633,7 +635,7 @@ static int nvgpu_submit_nondeterministic(struct nvgpu_channel *c, u32 flags, struct nvgpu_channel_fence *fence, struct nvgpu_fence_type **fence_out, - struct nvgpu_profile *profile) + struct nvgpu_swprofiler *profiler) { bool skip_buffer_refcounting = (flags & NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U; @@ -682,7 +684,7 @@ static int nvgpu_submit_nondeterministic(struct nvgpu_channel *c, } err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence, - fence_out, profile, need_job_tracking, true); + fence_out, profiler, need_job_tracking, true); if (err != 0) { goto clean_up; } @@ -734,7 +736,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, u32 flags, struct nvgpu_channel_fence *fence, struct nvgpu_fence_type **fence_out, - struct nvgpu_profile *profile) + struct nvgpu_swprofiler *profiler) { struct gk20a *g = c->g; int err; @@ -755,7 +757,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, return -ENOMEM; } - nvgpu_profile_snapshot(profile, PROFILE_ENTRY); + nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_ENTRY); /* update debug settings */ nvgpu_ltc_sync_enabled(g); @@ -765,12 +767,12 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, #ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS if (c->deterministic) { err = nvgpu_submit_deterministic(c, gpfifo, userdata, - num_entries, flags, fence, fence_out, profile); + num_entries, flags, fence, fence_out, profiler); } else #endif { err = nvgpu_submit_nondeterministic(c, gpfifo, userdata, - num_entries, flags, fence, fence_out, profile); + num_entries, flags, fence, fence_out, profiler); } if (err != 0) { @@ -793,7 +795,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, nvgpu_log_info(g, "post-submit put %d, get %d, size %d", c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); - nvgpu_profile_snapshot(profile, PROFILE_END); + nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_END); nvgpu_log_fn(g, "done"); return err; @@ -805,10 +807,10 @@ int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c, u32 flags, struct nvgpu_channel_fence *fence, struct nvgpu_fence_type **fence_out, - struct nvgpu_profile *profile) + struct nvgpu_swprofiler *profiler) { return nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries, - flags, fence, fence_out, profile); + flags, fence, fence_out, profiler); } int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c, diff --git a/drivers/gpu/nvgpu/common/swdebug/profile.c b/drivers/gpu/nvgpu/common/swdebug/profile.c new file mode 100644 index 000000000..3bab64696 --- /dev/null +++ b/drivers/gpu/nvgpu/common/swdebug/profile.c @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * A simple profiler, capable of generating histograms. + */ + +/* + * The sample array is a 1d array comprised of repeating rows of data. To + * index the array as though it were a row-major matrix, we need to do some + * simple math. + */ +static inline u32 matrix_to_linear_index(struct nvgpu_swprofiler *p, + u32 row, u32 col) +{ + return (row * p->psample_len) + col; +} + +void nvgpu_swprofile_initialize(struct gk20a *g, + struct nvgpu_swprofiler *p, + const char *col_names[]) +{ + if (p->col_names != NULL) { + /* + * Profiler is already initialized. + */ + return; + } + + nvgpu_mutex_init(&p->lock); + p->g = g; + + p->col_names = col_names; + + p->psample_len = 0U; + while (col_names[p->psample_len] != NULL) { + p->psample_len++; + } +} + +int nvgpu_swprofile_open(struct gk20a *g, struct nvgpu_swprofiler *p) +{ + int ret = 0; + + nvgpu_mutex_acquire(&p->lock); + + /* + * If this profiler is already opened, just take a ref and return. + */ + if (p->samples != NULL) { + nvgpu_ref_get(&p->ref); + goto done; + } + + p->samples = nvgpu_vzalloc(g, + PROFILE_ENTRIES * p->psample_len * + sizeof(*p->samples)); + if (p->samples == NULL) { + ret = -ENOMEM; + goto done; + } + + /* + * Otherwise allocate the necessary data structures, etc. + */ + nvgpu_ref_init(&p->ref); + +done: + nvgpu_mutex_release(&p->lock); + return ret; +} + +static void nvgpu_swprofile_free(struct nvgpu_ref *ref) +{ + struct nvgpu_swprofiler *p = container_of(ref, struct nvgpu_swprofiler, ref); + + nvgpu_vfree(p->g, p->samples); + p->samples = NULL; +} + +void nvgpu_swprofile_close(struct nvgpu_swprofiler *p) +{ + nvgpu_ref_put(&p->ref, nvgpu_swprofile_free); +} + +/* + * Note: this does _not_ lock the profiler. This is a conscious choice. If we + * do lock the profiler then there's the possibility that you get bad data due + * to the snapshot blocking on some other user printing the contents of the + * profiler. + * + * Instead, this way, it's possible that someone printing the data in the + * profiler gets a sample that's a mix of old and new. That's not great, but + * IMO worse than a completely bogus sample. + * + * Also it's really quite unlikely for this race to happen in practice as the + * print function is executed as a result of a debugfs call. + */ +void nvgpu_swprofile_snapshot(struct nvgpu_swprofiler *p, u32 idx) +{ + u32 index; + + /* + * Handle two cases: the first allows calling code to simply skip + * any profiling by passing in a NULL profiler; see the CDE code + * for this. The second case is if a profiler is not "opened". + */ + if (p == NULL || p->samples == NULL) { + return; + } + + /* + * p->sample_index is the current row, aka sample, we are writing to. + * idx is the column - i.e the sub-sample. + */ + index = matrix_to_linear_index(p, p->sample_index, idx); + + p->samples[index] = nvgpu_current_time_ns(); +} + +void nvgpu_swprofile_begin_sample(struct nvgpu_swprofiler *p) +{ + nvgpu_mutex_acquire(&p->lock); + p->sample_index++; + + /* Handle wrap. */ + if (p->sample_index >= PROFILE_ENTRIES) { + p->sample_index = 0U; + } + nvgpu_mutex_release(&p->lock); +} + +static int profile_cmp(const void *a, const void *b) +{ + return *((const u64 *) a) - *((const u64 *) b); +} + +#define PERCENTILE_WIDTH 5 +#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH) + +static u32 nvgpu_swprofile_build_ranges(struct nvgpu_swprofiler *p, + u64 *storage, + u64 *percentiles, + u32 index_end, + u32 index_start) +{ + u32 i; + u32 nelem = 0U; + + /* + * Iterate through a column and build a temporary slice array of samples + * so that we can sort them without corrupting the current data. + * + * Note that we have to first convert the row/column indexes into linear + * indexes to access the underlying sample array. + */ + for (i = 0; i < PROFILE_ENTRIES; i++) { + u32 linear_idx_start = matrix_to_linear_index(p, i, index_start); + u32 linear_idx_end = matrix_to_linear_index(p, i, index_end); + + if (p->samples[linear_idx_end] <= + p->samples[linear_idx_start]) { + /* This is an invalid element */ + continue; + } + + storage[nelem] = p->samples[linear_idx_end] - + p->samples[linear_idx_start]; + nelem++; + } + + /* sort it */ + sort(storage, nelem, sizeof(u64), profile_cmp, NULL); + + /* build ranges */ + for (i = 0; i < PERCENTILE_RANGES; i++) { + percentiles[i] = nelem < PERCENTILE_RANGES ? 0 : + storage[(PERCENTILE_WIDTH * (i + 1) * nelem)/100 - 1]; + } + + return nelem; +} + +/* + * Print a list of percentiles spaced by 5%. Note that the debug_context needs + * to be special here. _Most_ print functions in NvGPU automatically add a new + * line to the end of each print statement. This function _specifically_ + * requires that your debug print function does _NOT_ do this. + */ +void nvgpu_swprofile_print_ranges(struct gk20a *g, + struct nvgpu_swprofiler *p, + struct nvgpu_debug_context *o) +{ + u32 nelem = 0U, i, j; + u64 *sorted_data = NULL; + u64 *percentiles = NULL; + + nvgpu_mutex_acquire(&p->lock); + + if (p->samples == NULL) { + gk20a_debug_output(o, "Profiler not enabled.\n"); + goto done; + } + + sorted_data = nvgpu_vzalloc(g, + PROFILE_ENTRIES * p->psample_len * + sizeof(u64)); + percentiles = nvgpu_vzalloc(g, + PERCENTILE_RANGES * p->psample_len * + sizeof(u64)); + if (!sorted_data || !percentiles) { + nvgpu_err(g, "vzalloc: OOM!"); + goto done; + } + + /* + * Loop over each column; sort the column's data and then build + * percentile ranges based on that sorted data. + */ + for (i = 0U; i < p->psample_len; i++) { + nelem = nvgpu_swprofile_build_ranges(p, + &sorted_data[i * PROFILE_ENTRIES], + &percentiles[i * PERCENTILE_RANGES], + i, 0U); + } + + gk20a_debug_output(o, "Samples: %u\n", nelem); + gk20a_debug_output(o, "%6s", "Perc"); + for (i = 0U; i < p->psample_len; i++) { + gk20a_debug_output(o, " %15s", p->col_names[i]); + } + gk20a_debug_output(o, "\n"); + gk20a_debug_output(o, "%6s", "----"); + for (i = 0U; i < p->psample_len; i++) { + gk20a_debug_output(o, " %15s", "---------------"); + } + gk20a_debug_output(o, "\n"); + + /* + * percentiles is another matrix, but this time it's using column major indexing. + */ + for (i = 0U; i < PERCENTILE_RANGES; i++) { + gk20a_debug_output(o, "%3upc ", PERCENTILE_WIDTH * (i + 1)); + for (j = 0U; j < p->psample_len; j++) { + gk20a_debug_output(o, " %15llu", + percentiles[(j * PERCENTILE_RANGES) + i]); + } + gk20a_debug_output(o, "\n"); + } + gk20a_debug_output(o, "\n"); + +done: + nvgpu_vfree(g, sorted_data); + nvgpu_vfree(g, percentiles); + nvgpu_mutex_release(&p->lock); +} diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index 7c2a39cc6..fca50b5ff 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -40,7 +40,7 @@ struct gk20a; struct dbg_session_gk20a; struct nvgpu_fence_type; -struct nvgpu_profile; +struct nvgpu_swprofiler; struct nvgpu_channel_sync; struct nvgpu_gpfifo_userdata; struct nvgpu_gr_subctx; @@ -576,7 +576,7 @@ int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c, u32 flags, struct nvgpu_channel_fence *fence, struct nvgpu_fence_type **fence_out, - struct nvgpu_profile *profile); + struct nvgpu_swprofiler *profiler); int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c, struct nvgpu_gpfifo_entry *gpfifo, diff --git a/drivers/gpu/nvgpu/include/nvgpu/fifo.h b/drivers/gpu/nvgpu/include/nvgpu/fifo.h index a74dc8a26..0b9048edf 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/fifo.h +++ b/drivers/gpu/nvgpu/include/nvgpu/fifo.h @@ -197,6 +197,8 @@ #include #include #include +#include + /** * H/w defined value for Channel ID type */ @@ -231,6 +233,7 @@ struct nvgpu_engine_info; struct nvgpu_runlist_info; struct nvgpu_channel; struct nvgpu_tsg; +struct nvgpu_swprofiler; struct nvgpu_fifo { /** Pointer to GPU driver struct. */ @@ -297,16 +300,8 @@ struct nvgpu_fifo { /** Number of active runlists. */ u32 num_runlists; -#ifdef CONFIG_DEBUG_FS - struct { - struct nvgpu_profile *data; - nvgpu_atomic_t get; - bool enabled; - u64 *sorted; - struct nvgpu_ref ref; - struct nvgpu_mutex lock; - } profile; -#endif + struct nvgpu_swprofiler kickoff_profiler; + #ifdef CONFIG_NVGPU_USERD struct nvgpu_mutex userd_mutex; struct nvgpu_mem *userd_slabs; diff --git a/drivers/gpu/nvgpu/include/nvgpu/fifo/swprofile.h b/drivers/gpu/nvgpu/include/nvgpu/fifo/swprofile.h new file mode 100644 index 000000000..57169aef9 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/fifo/swprofile.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_FIFO_PROFILE_H +#define NVGPU_FIFO_PROFILE_H + +/* + * Define these here, not in the C file so that they are closer to the other + * macro definitions below. The two lists must be in sync. + */ +#define NVGPU_FIFO_KICKOFF_PROFILE_EVENTS \ + "ioctl_entry", \ + "entry", \ + "job_tracking", \ + "append", \ + "end", \ + "ioctl_exit", \ + NULL \ + +/* + * The kickoff profile events; these are used to index into the profile's sample + * array. + */ +#define PROF_KICKOFF_IOCTL_ENTRY 0U +#define PROF_KICKOFF_ENTRY 1U +#define PROF_KICKOFF_JOB_TRACKING 2U +#define PROF_KICKOFF_APPEND 3U +#define PROF_KICKOFF_END 4U +#define PROF_KICKOFF_IOCTL_EXIT 5U + +#endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/profile.h b/drivers/gpu/nvgpu/include/nvgpu/profile.h deleted file mode 100644 index dc54e7ffe..000000000 --- a/drivers/gpu/nvgpu/include/nvgpu/profile.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef NVGPU_PROFILE_H -#define NVGPU_PROFILE_H - -/* - * Number of entries in the kickoff latency buffer, used to calculate - * the profiling and histogram. This number is calculated to be statistically - * significative on a histogram on a 5% step - */ -#ifdef CONFIG_DEBUG_FS -#define FIFO_PROFILING_ENTRIES 16384U -#endif - -enum { - PROFILE_IOCTL_ENTRY = 0U, - PROFILE_ENTRY, - PROFILE_JOB_TRACKING, - PROFILE_APPEND, - PROFILE_END, - PROFILE_IOCTL_EXIT, - PROFILE_MAX -}; - -struct nvgpu_profile { - u64 timestamp[PROFILE_MAX]; -}; - -#ifdef CONFIG_DEBUG_FS -struct nvgpu_profile *nvgpu_profile_acquire(struct gk20a *g); -void nvgpu_profile_release(struct gk20a *g, - struct nvgpu_profile *profile); -void nvgpu_profile_snapshot(struct nvgpu_profile *profile, int idx); -#else -static inline struct nvgpu_profile * -nvgpu_profile_acquire(struct gk20a *g) -{ - return NULL; -} -static inline void nvgpu_profile_release(struct gk20a *g, - struct nvgpu_profile *profile) -{ -} -static inline void nvgpu_profile_snapshot( - struct nvgpu_profile *profile, int idx) -{ -} -#endif - -#endif /* NVGPU_PROFILE_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/swprofile.h b/drivers/gpu/nvgpu/include/nvgpu/swprofile.h new file mode 100644 index 000000000..94bcf28e4 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/swprofile.h @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_PROFILE_H +#define NVGPU_PROFILE_H + +#include +#include +#include + +struct nvgpu_debug_context; + +/* + * Number of entries in the kickoff latency buffer used to calculate the + * profiling and histogram. This number is calculated to be statistically + * significant on a histogram on a 5% step. + */ +#define PROFILE_ENTRIES 16384U + +struct nvgpu_swprofiler { + struct nvgpu_mutex lock; + + /** + * The number of sample components that make up a sample for this + * profiler. + */ + u32 psample_len; + + /** + * Sample array: this is essentially a matrix where rows correspond to + * a given sample and rows correspond to a type of sample. Number of + * samples is always %PROFILING_ENTRIES. This 1d array is accessed with + * row-major indexing. + */ + u64 *samples; + + /** + * Pointer to next sample array to write. Will be wrapped at + * %PROFILING_ENTRIES. + */ + u32 sample_index; + + /** + * Column names used for printing the histogram. This is NULL terminated + * so that the profiler can infer the number of subsamples in a + * psample. + */ + const char **col_names; + + struct nvgpu_ref ref; + + /** + * Necessary since we won't have an access to a gk20a struct to vfree() + * against when this profiler is freed via an nvgpu_ref. + */ + struct gk20a *g; +}; + +/** + * @brief Create a profiler with the passed column names. + * + * @param[in] g The GPU that owns this profiler. + * @param[in] p Pointer to a profiler object to initialize. + * @param[in] col_names %NULL terminated list of column names. + * + * The sample array length is determined by the NULL terminated %col_names + * array. This will not allocate the underlying data; that's controlled by + * the open and close functions: + * + * nvgpu_swprofile_open() + * nvgpu_swprofile_close() + * + * Once nvgpu_swprofile_initialize() is called all of the below functions + * may also be called. All of the sampling related functions will become + * no-ops if the SW profiler is not opened. + */ +void nvgpu_swprofile_initialize(struct gk20a *g, + struct nvgpu_swprofiler *p, + const char **col_names); + +/** + * @brief Open a profiler for use. + * + * @param[in] g The GPU that owns this profiler. + * @param[in] p The profiler to open. + * + * This functions prepares a SW profiler object for actual profiling. Necessary + * data structures are allocated and subsequent snapshots will be captured. + * + * SW profiler objects are reference counted: for each open call made, a + * corresponding close call must also be made. + * + * @return Returns 0 on success, otherwise a negative error code. + */ +int nvgpu_swprofile_open(struct gk20a *g, struct nvgpu_swprofiler *p); + +/** + * @brief Close a profiler. + * + * @param[in] p The profiler to close. + * + * Close call corresponding to nvgpu_swprofile_open(). + */ +void nvgpu_swprofile_close(struct nvgpu_swprofiler *p); + +/** + * @brief Begin a series of timestamp samples. + * + * @param[in] p The profiler to start sampling with. + * + * Each iteration through a given SW sequence requires one call to this + * function. It essentially just increments (with wraparound) an internal + * tracker which points to the sample space in the internal sample array. + * Typical usage is to call nvgpu_swprofile_begin_sample() and then a + * sequence of calls to nvgpu_swprofile_snapshot(). + * + * Once done with the sequence being profiled nothing needs to happen. When + * the next iteration of the sequence is executed this function should be + * called again. + */ +void nvgpu_swprofile_begin_sample(struct nvgpu_swprofiler *p); + +/** + * @brief Capture a timestamp sample. + * + * @param[in] p The profiler to sample with. + * @param[in] idx The index to the subsample to capture. + * + * This captures a subsample. Any given run through a SW sequence that is + * being profiled will result in one or more subsamples which together make + * up a sample. + */ +void nvgpu_swprofile_snapshot(struct nvgpu_swprofiler *p, u32 idx); + +/** + * @brief Print percentile ranges for a SW profiler. + * + * @param[in] g The GPU that owns this profiler. + * @param[in] p The profiler to print. + * @param[in] o A debug context object used for printing. + * + * Print a percentile table for all columns of sub-samples. This gives a + * good overview of the collected data. + */ +void nvgpu_swprofile_print_ranges(struct gk20a *g, + struct nvgpu_swprofiler *p, + struct nvgpu_debug_context *o); + +#endif /* NVGPU_PROFILE_H */ diff --git a/drivers/gpu/nvgpu/os/linux/channel.h b/drivers/gpu/nvgpu/os/linux/channel.h index d7262c3fe..b9d27abc9 100644 --- a/drivers/gpu/nvgpu/os/linux/channel.h +++ b/drivers/gpu/nvgpu/os/linux/channel.h @@ -26,7 +26,7 @@ struct nvgpu_gpfifo; struct nvgpu_submit_gpfifo_args; struct nvgpu_channel_fence; struct nvgpu_fence_type; -struct nvgpu_profile; +struct nvgpu_swprofile; struct nvgpu_os_linux; struct sync_fence; diff --git a/drivers/gpu/nvgpu/os/linux/debug.c b/drivers/gpu/nvgpu/os/linux/debug.c index de7bd2ced..e4d99bc49 100644 --- a/drivers/gpu/nvgpu/os/linux/debug.c +++ b/drivers/gpu/nvgpu/os/linux/debug.c @@ -442,8 +442,6 @@ void gk20a_debug_deinit(struct gk20a *g) if (!l->debugfs) return; - gk20a_fifo_debugfs_deinit(g); - debugfs_remove_recursive(l->debugfs); debugfs_remove(l->debugfs_alias); } diff --git a/drivers/gpu/nvgpu/os/linux/debug_fifo.c b/drivers/gpu/nvgpu/os/linux/debug_fifo.c index d2752c8c1..4239861bb 100644 --- a/drivers/gpu/nvgpu/os/linux/debug_fifo.c +++ b/drivers/gpu/nvgpu/os/linux/debug_fifo.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2019 NVIDIA Corporation. All rights reserved. + * Copyright (C) 2017-2020 NVIDIA Corporation. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -9,7 +9,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * */ #include "debug_fifo.h" @@ -24,7 +23,9 @@ #include #include #include -#include +#include + +#include void __gk20a_fifo_profile_free(struct nvgpu_ref *ref); @@ -147,41 +148,12 @@ static int gk20a_fifo_profile_enable(void *data, u64 val) struct gk20a *g = (struct gk20a *) data; struct nvgpu_fifo *f = &g->fifo; - - nvgpu_mutex_acquire(&f->profile.lock); if (val == 0) { - if (f->profile.enabled) { - f->profile.enabled = false; - nvgpu_ref_put(&f->profile.ref, - __gk20a_fifo_profile_free); - } + nvgpu_swprofile_close(&f->kickoff_profiler); + return 0; } else { - if (!f->profile.enabled) { - /* not kref init as it can have a running condition if - * we enable/disable/enable while kickoff is happening - */ - if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) { - f->profile.data = nvgpu_vzalloc(g, - FIFO_PROFILING_ENTRIES * - sizeof(struct nvgpu_profile)); - f->profile.sorted = nvgpu_vzalloc(g, - FIFO_PROFILING_ENTRIES * - sizeof(u64)); - if (!(f->profile.data && f->profile.sorted)) { - nvgpu_vfree(g, f->profile.data); - nvgpu_vfree(g, f->profile.sorted); - nvgpu_mutex_release(&f->profile.lock); - return -ENOMEM; - } - nvgpu_ref_init(&f->profile.ref); - } - atomic_set(&f->profile.get.atomic_var, 0); - f->profile.enabled = true; - } + return nvgpu_swprofile_open(g, &f->kickoff_profiler); } - nvgpu_mutex_release(&f->profile.lock); - - return 0; } DEFINE_SIMPLE_ATTRIBUTE( @@ -191,96 +163,20 @@ DEFINE_SIMPLE_ATTRIBUTE( "%llu\n" ); -static int __profile_cmp(const void *a, const void *b) +static void gk20a_fifo_write_to_seqfile_no_nl(void *ctx, const char *str) { - return *((unsigned long long *) a) - *((unsigned long long *) b); -} - -/* - * This uses about 800b in the stack, but the function using it is not part - * of a callstack where much memory is being used, so it is fine - */ -#define PERCENTILE_WIDTH 5 -#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH) - -static unsigned int __gk20a_fifo_create_stats(struct gk20a *g, - u64 *percentiles, u32 index_end, u32 index_start) -{ - unsigned int nelem = 0; - unsigned int index; - struct nvgpu_profile *profile; - - for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) { - profile = &g->fifo.profile.data[index]; - - if (profile->timestamp[index_end] > - profile->timestamp[index_start]) { - /* This is a valid element */ - g->fifo.profile.sorted[nelem] = - profile->timestamp[index_end] - - profile->timestamp[index_start]; - nelem++; - } - } - - /* sort it */ - sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long), - __profile_cmp, NULL); - - /* build ranges */ - for (index = 0; index < PERCENTILE_RANGES; index++) { - percentiles[index] = nelem < PERCENTILE_RANGES ? 0 : - g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) * - nelem)/100 - 1]; - } - return nelem; + seq_printf((struct seq_file *)ctx, str); } static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused) { struct gk20a *g = s->private; - unsigned int get, nelem, index; - /* - * 800B in the stack, but function is declared statically and only - * called from debugfs handler - */ - u64 percentiles_ioctl[PERCENTILE_RANGES]; - u64 percentiles_kickoff[PERCENTILE_RANGES]; - u64 percentiles_jobtracking[PERCENTILE_RANGES]; - u64 percentiles_append[PERCENTILE_RANGES]; - u64 percentiles_userd[PERCENTILE_RANGES]; + struct nvgpu_debug_context o = { + .fn = gk20a_fifo_write_to_seqfile_no_nl, + .ctx = s, + }; - if (!nvgpu_ref_get_unless_zero(&g->fifo.profile.ref)) { - seq_printf(s, "Profiling disabled\n"); - return 0; - } - - get = atomic_read(&g->fifo.profile.get.atomic_var); - - __gk20a_fifo_create_stats(g, percentiles_ioctl, - PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY); - __gk20a_fifo_create_stats(g, percentiles_kickoff, - PROFILE_END, PROFILE_ENTRY); - __gk20a_fifo_create_stats(g, percentiles_jobtracking, - PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY); - __gk20a_fifo_create_stats(g, percentiles_append, - PROFILE_APPEND, PROFILE_JOB_TRACKING); - nelem = __gk20a_fifo_create_stats(g, percentiles_userd, - PROFILE_END, PROFILE_APPEND); - - seq_printf(s, "Number of kickoffs: %d\n", nelem); - seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n"); - - for (index = 0; index < PERCENTILE_RANGES; index++) - seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n", - PERCENTILE_WIDTH * (index+1), - percentiles_ioctl[index], - percentiles_kickoff[index], - percentiles_append[index], - percentiles_jobtracking[index], - percentiles_userd[index]); - - nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); + nvgpu_swprofile_print_ranges(g, &g->fifo.kickoff_profiler, &o); return 0; } @@ -297,7 +193,6 @@ static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = { .release = single_release, }; - void gk20a_fifo_debugfs_init(struct gk20a *g) { struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); @@ -318,11 +213,6 @@ void gk20a_fifo_debugfs_init(struct gk20a *g) if (IS_ERR_OR_NULL(profile_root)) return; - nvgpu_mutex_init(&g->fifo.profile.lock); - g->fifo.profile.enabled = false; - atomic_set(&g->fifo.profile.get.atomic_var, 0); - atomic_set(&g->fifo.profile.ref.refcount.atomic_var, 0); - debugfs_create_file("enable", 0600, profile_root, g, &gk20a_fifo_profile_enable_debugfs_fops); @@ -330,54 +220,3 @@ void gk20a_fifo_debugfs_init(struct gk20a *g) &gk20a_fifo_profile_stats_debugfs_fops); } - -void nvgpu_profile_snapshot(struct nvgpu_profile *profile, int idx) -{ - if (profile) - profile->timestamp[idx] = nvgpu_current_time_ns(); -} - -void __gk20a_fifo_profile_free(struct nvgpu_ref *ref) -{ - struct nvgpu_fifo *f = container_of(ref, struct nvgpu_fifo, - profile.ref); - nvgpu_vfree(f->g, f->profile.data); - nvgpu_vfree(f->g, f->profile.sorted); -} - -/* Get the next element in the ring buffer of profile entries - * and grab a reference to the structure - */ -struct nvgpu_profile *nvgpu_profile_acquire(struct gk20a *g) -{ - struct nvgpu_fifo *f = &g->fifo; - struct nvgpu_profile *profile; - unsigned int index; - - /* If kref is zero, profiling is not enabled */ - if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) - return NULL; - index = atomic_inc_return(&f->profile.get.atomic_var); - profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES]; - - return profile; -} - -/* Free the reference to the structure. This allows deferred cleanups */ -void nvgpu_profile_release(struct gk20a *g, - struct nvgpu_profile *profile) -{ - nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); -} - -void gk20a_fifo_debugfs_deinit(struct gk20a *g) -{ - struct nvgpu_fifo *f = &g->fifo; - - nvgpu_mutex_acquire(&f->profile.lock); - if (f->profile.enabled) { - f->profile.enabled = false; - nvgpu_ref_put(&f->profile.ref, __gk20a_fifo_profile_free); - } - nvgpu_mutex_release(&f->profile.lock); -} diff --git a/drivers/gpu/nvgpu/os/linux/debug_fifo.h b/drivers/gpu/nvgpu/os/linux/debug_fifo.h index 46ac853e6..0c02aa42d 100644 --- a/drivers/gpu/nvgpu/os/linux/debug_fifo.h +++ b/drivers/gpu/nvgpu/os/linux/debug_fifo.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * Copyright (C) 2017-2020 NVIDIA Corporation. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -17,6 +17,5 @@ struct gk20a; void gk20a_fifo_debugfs_init(struct gk20a *g); -void gk20a_fifo_debugfs_deinit(struct gk20a *g); #endif /* __NVGPU_DEBUG_FIFO_H__ */ diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c index 5d66cc08e..c8288fb47 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c @@ -46,9 +46,11 @@ #include #include #include -#include +#include #include +#include + #include "platform_gk20a.h" #include "ioctl_channel.h" #include "channel.h" @@ -792,10 +794,11 @@ static int gk20a_ioctl_channel_submit_gpfifo( { struct nvgpu_channel_fence fence; struct nvgpu_fence_type *fence_out; - struct nvgpu_profile *profile = NULL; u32 submit_flags = 0; int fd = -1; struct gk20a *g = ch->g; + struct nvgpu_fifo *f = &g->fifo; + struct nvgpu_swprofiler *kickoff_profiler = &f->kickoff_profiler; struct nvgpu_gpfifo_userdata userdata; bool flag_fence_wait = (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) != 0U; @@ -807,8 +810,8 @@ static int gk20a_ioctl_channel_submit_gpfifo( int ret = 0; nvgpu_log_fn(g, " "); - profile = nvgpu_profile_acquire(ch->g); - nvgpu_profile_snapshot(profile, PROFILE_IOCTL_ENTRY); + nvgpu_swprofile_begin_sample(kickoff_profiler); + nvgpu_swprofile_snapshot(kickoff_profiler, PROF_KICKOFF_IOCTL_ENTRY); if (nvgpu_channel_check_unserviceable(ch)) { return -ETIMEDOUT; @@ -846,7 +849,7 @@ static int gk20a_ioctl_channel_submit_gpfifo( ret = nvgpu_submit_channel_gpfifo_user(ch, userdata, args->num_entries, - submit_flags, &fence, &fence_out, profile); + submit_flags, &fence, &fence_out, kickoff_profiler); if (ret) { if (fd != -1) @@ -869,9 +872,7 @@ static int gk20a_ioctl_channel_submit_gpfifo( } nvgpu_fence_put(fence_out); - nvgpu_profile_snapshot(profile, PROFILE_IOCTL_EXIT); - if (profile) - nvgpu_profile_release(ch->g, profile); + nvgpu_swprofile_snapshot(kickoff_profiler, PROF_KICKOFF_IOCTL_EXIT); clean_up: return ret; diff --git a/libs/dgpu/libnvgpu-drv-dgpu_safe.export b/libs/dgpu/libnvgpu-drv-dgpu_safe.export index 77af1078f..320cb676c 100644 --- a/libs/dgpu/libnvgpu-drv-dgpu_safe.export +++ b/libs/dgpu/libnvgpu-drv-dgpu_safe.export @@ -725,6 +725,8 @@ nvgpu_spinlock_release nvgpu_strnadd_u32 nvgpu_sw_quiesce nvgpu_sw_quiesce_remove_support +nvgpu_swprofile_initialize +nvgpu_swprofile_snapshot nvgpu_thread_create nvgpu_thread_create_priority nvgpu_thread_get_fault_injection diff --git a/libs/igpu/libnvgpu-drv-igpu_safe.export b/libs/igpu/libnvgpu-drv-igpu_safe.export index 8f782f4d3..bc70c3833 100644 --- a/libs/igpu/libnvgpu-drv-igpu_safe.export +++ b/libs/igpu/libnvgpu-drv-igpu_safe.export @@ -740,6 +740,8 @@ nvgpu_spinlock_release nvgpu_strnadd_u32 nvgpu_sw_quiesce nvgpu_sw_quiesce_remove_support +nvgpu_swprofile_initialize +nvgpu_swprofile_snapshot nvgpu_thread_create nvgpu_thread_create_priority nvgpu_thread_get_fault_injection