mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: Add a generic profiler
Add a generic profiler based on the channel kickoff profiler. This aims to provide a mechanism to allow engineers to (more) easily profile arbitrary software paths within nvgpu. Usage of this profiler is still primarily through debugfs. Next up is a generic debugfs interface for this profiler in the Linux code. The end goal for this is to profile the recovery code and generate interesting statistics. JIRA NVGPU-5606 Signed-off-by: Alex Waterman <alexw@nvidia.com> Change-Id: I99783ec7e5143855845bde4e98760ff43350456d Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2355319 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
@@ -357,8 +357,7 @@ fifo:
|
|||||||
common/fifo/priv_cmdbuf.c,
|
common/fifo/priv_cmdbuf.c,
|
||||||
common/fifo/job.c,
|
common/fifo/job.c,
|
||||||
include/nvgpu/priv_cmdbuf.h,
|
include/nvgpu/priv_cmdbuf.h,
|
||||||
include/nvgpu/job.h,
|
include/nvgpu/job.h ]
|
||||||
include/nvgpu/profile.h ]
|
|
||||||
deps: [ ]
|
deps: [ ]
|
||||||
runlist:
|
runlist:
|
||||||
safe: yes
|
safe: yes
|
||||||
@@ -993,6 +992,13 @@ power_features:
|
|||||||
sources: [ common/power_features/pg/pg.c,
|
sources: [ common/power_features/pg/pg.c,
|
||||||
include/nvgpu/power_features/pg.h ]
|
include/nvgpu/power_features/pg.h ]
|
||||||
|
|
||||||
|
swdebug:
|
||||||
|
owner: Alex W
|
||||||
|
safe: no
|
||||||
|
sources: [ common/swdebug/profile.c,
|
||||||
|
include/nvgpu/swprofile.h,
|
||||||
|
include/nvgpu/fifo/swprofile.h ]
|
||||||
|
|
||||||
##
|
##
|
||||||
## HAL units. Currently they are under common but this needs to change.
|
## HAL units. Currently they are under common but this needs to change.
|
||||||
## We are moving these to a top level directory.
|
## We are moving these to a top level directory.
|
||||||
|
|||||||
@@ -182,6 +182,7 @@ nvgpu-y += \
|
|||||||
common/utils/rbtree.o \
|
common/utils/rbtree.o \
|
||||||
common/utils/string.o \
|
common/utils/string.o \
|
||||||
common/utils/worker.o \
|
common/utils/worker.o \
|
||||||
|
common/swdebug/profile.o \
|
||||||
common/ptimer/ptimer.o \
|
common/ptimer/ptimer.o \
|
||||||
common/perf/perfbuf.o \
|
common/perf/perfbuf.o \
|
||||||
common/therm/therm.o \
|
common/therm/therm.o \
|
||||||
|
|||||||
@@ -95,6 +95,7 @@ srcs += common/device.c \
|
|||||||
common/utils/rbtree.c \
|
common/utils/rbtree.c \
|
||||||
common/utils/string.c \
|
common/utils/string.c \
|
||||||
common/utils/worker.c \
|
common/utils/worker.c \
|
||||||
|
common/swdebug/profile.c \
|
||||||
common/init/nvgpu_init.c \
|
common/init/nvgpu_init.c \
|
||||||
common/mm/allocators/nvgpu_allocator.c \
|
common/mm/allocators/nvgpu_allocator.c \
|
||||||
common/mm/allocators/bitmap_allocator.c \
|
common/mm/allocators/bitmap_allocator.c \
|
||||||
|
|||||||
@@ -35,6 +35,12 @@
|
|||||||
#include <nvgpu/vm_area.h>
|
#include <nvgpu/vm_area.h>
|
||||||
#include <nvgpu/nvgpu_err.h>
|
#include <nvgpu/nvgpu_err.h>
|
||||||
#include <nvgpu/mc.h>
|
#include <nvgpu/mc.h>
|
||||||
|
#include <nvgpu/swprofile.h>
|
||||||
|
#include <nvgpu/fifo/swprofile.h>
|
||||||
|
|
||||||
|
static const char *nvgpu_fifo_kickoff_profile_events[] = {
|
||||||
|
NVGPU_FIFO_KICKOFF_PROFILE_EVENTS,
|
||||||
|
};
|
||||||
|
|
||||||
void nvgpu_fifo_cleanup_sw_common(struct gk20a *g)
|
void nvgpu_fifo_cleanup_sw_common(struct gk20a *g)
|
||||||
{
|
{
|
||||||
@@ -93,6 +99,9 @@ int nvgpu_fifo_setup_sw_common(struct gk20a *g)
|
|||||||
nvgpu_mutex_init(&f->deferred_reset_mutex);
|
nvgpu_mutex_init(&f->deferred_reset_mutex);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
nvgpu_swprofile_initialize(g, &f->kickoff_profiler,
|
||||||
|
nvgpu_fifo_kickoff_profile_events);
|
||||||
|
|
||||||
err = nvgpu_channel_setup_sw(g);
|
err = nvgpu_channel_setup_sw(g);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
nvgpu_err(g, "failed to init channel support");
|
nvgpu_err(g, "failed to init channel support");
|
||||||
|
|||||||
@@ -34,11 +34,13 @@
|
|||||||
#include <nvgpu/priv_cmdbuf.h>
|
#include <nvgpu/priv_cmdbuf.h>
|
||||||
#include <nvgpu/bug.h>
|
#include <nvgpu/bug.h>
|
||||||
#include <nvgpu/fence.h>
|
#include <nvgpu/fence.h>
|
||||||
#include <nvgpu/profile.h>
|
#include <nvgpu/swprofile.h>
|
||||||
#include <nvgpu/vpr.h>
|
#include <nvgpu/vpr.h>
|
||||||
#include <nvgpu/trace.h>
|
#include <nvgpu/trace.h>
|
||||||
#include <nvgpu/nvhost.h>
|
#include <nvgpu/nvhost.h>
|
||||||
|
|
||||||
|
#include <nvgpu/fifo/swprofile.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We might need two extra gpfifo entries per submit - one for pre fence and
|
* We might need two extra gpfifo entries per submit - one for pre fence and
|
||||||
* one for post fence.
|
* one for post fence.
|
||||||
@@ -340,7 +342,7 @@ static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c,
|
|||||||
u32 flags,
|
u32 flags,
|
||||||
struct nvgpu_channel_fence *fence,
|
struct nvgpu_channel_fence *fence,
|
||||||
struct nvgpu_fence_type **fence_out,
|
struct nvgpu_fence_type **fence_out,
|
||||||
struct nvgpu_profile *profile,
|
struct nvgpu_swprofiler *profiler,
|
||||||
bool need_deferred_cleanup)
|
bool need_deferred_cleanup)
|
||||||
{
|
{
|
||||||
bool skip_buffer_refcounting = (flags &
|
bool skip_buffer_refcounting = (flags &
|
||||||
@@ -358,7 +360,7 @@ static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c,
|
|||||||
goto clean_up_job;
|
goto clean_up_job;
|
||||||
}
|
}
|
||||||
|
|
||||||
nvgpu_profile_snapshot(profile, PROFILE_JOB_TRACKING);
|
nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_JOB_TRACKING);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* wait_cmd can be unset even if flag_fence_wait exists; the
|
* wait_cmd can be unset even if flag_fence_wait exists; the
|
||||||
@@ -432,11 +434,11 @@ static int nvgpu_submit_prepare_gpfifo_notrack(struct nvgpu_channel *c,
|
|||||||
struct nvgpu_gpfifo_userdata userdata,
|
struct nvgpu_gpfifo_userdata userdata,
|
||||||
u32 num_entries,
|
u32 num_entries,
|
||||||
struct nvgpu_fence_type **fence_out,
|
struct nvgpu_fence_type **fence_out,
|
||||||
struct nvgpu_profile *profile)
|
struct nvgpu_swprofiler *profiler)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
nvgpu_profile_snapshot(profile, PROFILE_JOB_TRACKING);
|
nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_JOB_TRACKING);
|
||||||
|
|
||||||
err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
|
err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
|
||||||
num_entries);
|
num_entries);
|
||||||
@@ -475,7 +477,7 @@ static int nvgpu_do_submit(struct nvgpu_channel *c,
|
|||||||
u32 flags,
|
u32 flags,
|
||||||
struct nvgpu_channel_fence *fence,
|
struct nvgpu_channel_fence *fence,
|
||||||
struct nvgpu_fence_type **fence_out,
|
struct nvgpu_fence_type **fence_out,
|
||||||
struct nvgpu_profile *profile,
|
struct nvgpu_swprofiler *profiler,
|
||||||
bool need_job_tracking,
|
bool need_job_tracking,
|
||||||
bool need_deferred_cleanup)
|
bool need_deferred_cleanup)
|
||||||
{
|
{
|
||||||
@@ -502,17 +504,17 @@ static int nvgpu_do_submit(struct nvgpu_channel *c,
|
|||||||
if (need_job_tracking) {
|
if (need_job_tracking) {
|
||||||
err = nvgpu_submit_prepare_gpfifo_track(c, gpfifo,
|
err = nvgpu_submit_prepare_gpfifo_track(c, gpfifo,
|
||||||
userdata, num_entries, flags, fence,
|
userdata, num_entries, flags, fence,
|
||||||
fence_out, profile, need_deferred_cleanup);
|
fence_out, profiler, need_deferred_cleanup);
|
||||||
} else {
|
} else {
|
||||||
err = nvgpu_submit_prepare_gpfifo_notrack(c, gpfifo,
|
err = nvgpu_submit_prepare_gpfifo_notrack(c, gpfifo,
|
||||||
userdata, num_entries, fence_out, profile);
|
userdata, num_entries, fence_out, profiler);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
nvgpu_profile_snapshot(profile, PROFILE_APPEND);
|
nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_APPEND);
|
||||||
|
|
||||||
g->ops.userd.gp_put(g, c);
|
g->ops.userd.gp_put(g, c);
|
||||||
|
|
||||||
@@ -527,7 +529,7 @@ static int nvgpu_submit_deterministic(struct nvgpu_channel *c,
|
|||||||
u32 flags,
|
u32 flags,
|
||||||
struct nvgpu_channel_fence *fence,
|
struct nvgpu_channel_fence *fence,
|
||||||
struct nvgpu_fence_type **fence_out,
|
struct nvgpu_fence_type **fence_out,
|
||||||
struct nvgpu_profile *profile)
|
struct nvgpu_swprofiler *profiler)
|
||||||
{
|
{
|
||||||
bool skip_buffer_refcounting = (flags &
|
bool skip_buffer_refcounting = (flags &
|
||||||
NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
|
NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
|
||||||
@@ -608,7 +610,7 @@ static int nvgpu_submit_deterministic(struct nvgpu_channel *c,
|
|||||||
}
|
}
|
||||||
|
|
||||||
err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence,
|
err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence,
|
||||||
fence_out, profile, need_job_tracking, false);
|
fence_out, profiler, need_job_tracking, false);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
goto clean_up;
|
goto clean_up;
|
||||||
}
|
}
|
||||||
@@ -633,7 +635,7 @@ static int nvgpu_submit_nondeterministic(struct nvgpu_channel *c,
|
|||||||
u32 flags,
|
u32 flags,
|
||||||
struct nvgpu_channel_fence *fence,
|
struct nvgpu_channel_fence *fence,
|
||||||
struct nvgpu_fence_type **fence_out,
|
struct nvgpu_fence_type **fence_out,
|
||||||
struct nvgpu_profile *profile)
|
struct nvgpu_swprofiler *profiler)
|
||||||
{
|
{
|
||||||
bool skip_buffer_refcounting = (flags &
|
bool skip_buffer_refcounting = (flags &
|
||||||
NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
|
NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
|
||||||
@@ -682,7 +684,7 @@ static int nvgpu_submit_nondeterministic(struct nvgpu_channel *c,
|
|||||||
}
|
}
|
||||||
|
|
||||||
err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence,
|
err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence,
|
||||||
fence_out, profile, need_job_tracking, true);
|
fence_out, profiler, need_job_tracking, true);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
goto clean_up;
|
goto clean_up;
|
||||||
}
|
}
|
||||||
@@ -734,7 +736,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
|
|||||||
u32 flags,
|
u32 flags,
|
||||||
struct nvgpu_channel_fence *fence,
|
struct nvgpu_channel_fence *fence,
|
||||||
struct nvgpu_fence_type **fence_out,
|
struct nvgpu_fence_type **fence_out,
|
||||||
struct nvgpu_profile *profile)
|
struct nvgpu_swprofiler *profiler)
|
||||||
{
|
{
|
||||||
struct gk20a *g = c->g;
|
struct gk20a *g = c->g;
|
||||||
int err;
|
int err;
|
||||||
@@ -755,7 +757,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
|
|||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
nvgpu_profile_snapshot(profile, PROFILE_ENTRY);
|
nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_ENTRY);
|
||||||
|
|
||||||
/* update debug settings */
|
/* update debug settings */
|
||||||
nvgpu_ltc_sync_enabled(g);
|
nvgpu_ltc_sync_enabled(g);
|
||||||
@@ -765,12 +767,12 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
|
|||||||
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
|
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
|
||||||
if (c->deterministic) {
|
if (c->deterministic) {
|
||||||
err = nvgpu_submit_deterministic(c, gpfifo, userdata,
|
err = nvgpu_submit_deterministic(c, gpfifo, userdata,
|
||||||
num_entries, flags, fence, fence_out, profile);
|
num_entries, flags, fence, fence_out, profiler);
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
err = nvgpu_submit_nondeterministic(c, gpfifo, userdata,
|
err = nvgpu_submit_nondeterministic(c, gpfifo, userdata,
|
||||||
num_entries, flags, fence, fence_out, profile);
|
num_entries, flags, fence, fence_out, profiler);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
@@ -793,7 +795,7 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
|
|||||||
nvgpu_log_info(g, "post-submit put %d, get %d, size %d",
|
nvgpu_log_info(g, "post-submit put %d, get %d, size %d",
|
||||||
c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
|
c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
|
||||||
|
|
||||||
nvgpu_profile_snapshot(profile, PROFILE_END);
|
nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_END);
|
||||||
|
|
||||||
nvgpu_log_fn(g, "done");
|
nvgpu_log_fn(g, "done");
|
||||||
return err;
|
return err;
|
||||||
@@ -805,10 +807,10 @@ int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c,
|
|||||||
u32 flags,
|
u32 flags,
|
||||||
struct nvgpu_channel_fence *fence,
|
struct nvgpu_channel_fence *fence,
|
||||||
struct nvgpu_fence_type **fence_out,
|
struct nvgpu_fence_type **fence_out,
|
||||||
struct nvgpu_profile *profile)
|
struct nvgpu_swprofiler *profiler)
|
||||||
{
|
{
|
||||||
return nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries,
|
return nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries,
|
||||||
flags, fence, fence_out, profile);
|
flags, fence, fence_out, profiler);
|
||||||
}
|
}
|
||||||
|
|
||||||
int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c,
|
int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c,
|
||||||
|
|||||||
284
drivers/gpu/nvgpu/common/swdebug/profile.c
Normal file
284
drivers/gpu/nvgpu/common/swdebug/profile.c
Normal file
@@ -0,0 +1,284 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <nvgpu/swprofile.h>
|
||||||
|
#include <nvgpu/lock.h>
|
||||||
|
#include <nvgpu/kref.h>
|
||||||
|
#include <nvgpu/debug.h>
|
||||||
|
#include <nvgpu/kmem.h>
|
||||||
|
#include <nvgpu/timers.h>
|
||||||
|
#include <nvgpu/sort.h>
|
||||||
|
#include <nvgpu/log.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A simple profiler, capable of generating histograms.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The sample array is a 1d array comprised of repeating rows of data. To
|
||||||
|
* index the array as though it were a row-major matrix, we need to do some
|
||||||
|
* simple math.
|
||||||
|
*/
|
||||||
|
static inline u32 matrix_to_linear_index(struct nvgpu_swprofiler *p,
|
||||||
|
u32 row, u32 col)
|
||||||
|
{
|
||||||
|
return (row * p->psample_len) + col;
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvgpu_swprofile_initialize(struct gk20a *g,
|
||||||
|
struct nvgpu_swprofiler *p,
|
||||||
|
const char *col_names[])
|
||||||
|
{
|
||||||
|
if (p->col_names != NULL) {
|
||||||
|
/*
|
||||||
|
* Profiler is already initialized.
|
||||||
|
*/
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
nvgpu_mutex_init(&p->lock);
|
||||||
|
p->g = g;
|
||||||
|
|
||||||
|
p->col_names = col_names;
|
||||||
|
|
||||||
|
p->psample_len = 0U;
|
||||||
|
while (col_names[p->psample_len] != NULL) {
|
||||||
|
p->psample_len++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvgpu_swprofile_open(struct gk20a *g, struct nvgpu_swprofiler *p)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
nvgpu_mutex_acquire(&p->lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If this profiler is already opened, just take a ref and return.
|
||||||
|
*/
|
||||||
|
if (p->samples != NULL) {
|
||||||
|
nvgpu_ref_get(&p->ref);
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
p->samples = nvgpu_vzalloc(g,
|
||||||
|
PROFILE_ENTRIES * p->psample_len *
|
||||||
|
sizeof(*p->samples));
|
||||||
|
if (p->samples == NULL) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Otherwise allocate the necessary data structures, etc.
|
||||||
|
*/
|
||||||
|
nvgpu_ref_init(&p->ref);
|
||||||
|
|
||||||
|
done:
|
||||||
|
nvgpu_mutex_release(&p->lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void nvgpu_swprofile_free(struct nvgpu_ref *ref)
|
||||||
|
{
|
||||||
|
struct nvgpu_swprofiler *p = container_of(ref, struct nvgpu_swprofiler, ref);
|
||||||
|
|
||||||
|
nvgpu_vfree(p->g, p->samples);
|
||||||
|
p->samples = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvgpu_swprofile_close(struct nvgpu_swprofiler *p)
|
||||||
|
{
|
||||||
|
nvgpu_ref_put(&p->ref, nvgpu_swprofile_free);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note: this does _not_ lock the profiler. This is a conscious choice. If we
|
||||||
|
* do lock the profiler then there's the possibility that you get bad data due
|
||||||
|
* to the snapshot blocking on some other user printing the contents of the
|
||||||
|
* profiler.
|
||||||
|
*
|
||||||
|
* Instead, this way, it's possible that someone printing the data in the
|
||||||
|
* profiler gets a sample that's a mix of old and new. That's not great, but
|
||||||
|
* IMO worse than a completely bogus sample.
|
||||||
|
*
|
||||||
|
* Also it's really quite unlikely for this race to happen in practice as the
|
||||||
|
* print function is executed as a result of a debugfs call.
|
||||||
|
*/
|
||||||
|
void nvgpu_swprofile_snapshot(struct nvgpu_swprofiler *p, u32 idx)
|
||||||
|
{
|
||||||
|
u32 index;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handle two cases: the first allows calling code to simply skip
|
||||||
|
* any profiling by passing in a NULL profiler; see the CDE code
|
||||||
|
* for this. The second case is if a profiler is not "opened".
|
||||||
|
*/
|
||||||
|
if (p == NULL || p->samples == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* p->sample_index is the current row, aka sample, we are writing to.
|
||||||
|
* idx is the column - i.e the sub-sample.
|
||||||
|
*/
|
||||||
|
index = matrix_to_linear_index(p, p->sample_index, idx);
|
||||||
|
|
||||||
|
p->samples[index] = nvgpu_current_time_ns();
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvgpu_swprofile_begin_sample(struct nvgpu_swprofiler *p)
|
||||||
|
{
|
||||||
|
nvgpu_mutex_acquire(&p->lock);
|
||||||
|
p->sample_index++;
|
||||||
|
|
||||||
|
/* Handle wrap. */
|
||||||
|
if (p->sample_index >= PROFILE_ENTRIES) {
|
||||||
|
p->sample_index = 0U;
|
||||||
|
}
|
||||||
|
nvgpu_mutex_release(&p->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int profile_cmp(const void *a, const void *b)
|
||||||
|
{
|
||||||
|
return *((const u64 *) a) - *((const u64 *) b);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define PERCENTILE_WIDTH 5
|
||||||
|
#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH)
|
||||||
|
|
||||||
|
static u32 nvgpu_swprofile_build_ranges(struct nvgpu_swprofiler *p,
|
||||||
|
u64 *storage,
|
||||||
|
u64 *percentiles,
|
||||||
|
u32 index_end,
|
||||||
|
u32 index_start)
|
||||||
|
{
|
||||||
|
u32 i;
|
||||||
|
u32 nelem = 0U;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Iterate through a column and build a temporary slice array of samples
|
||||||
|
* so that we can sort them without corrupting the current data.
|
||||||
|
*
|
||||||
|
* Note that we have to first convert the row/column indexes into linear
|
||||||
|
* indexes to access the underlying sample array.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < PROFILE_ENTRIES; i++) {
|
||||||
|
u32 linear_idx_start = matrix_to_linear_index(p, i, index_start);
|
||||||
|
u32 linear_idx_end = matrix_to_linear_index(p, i, index_end);
|
||||||
|
|
||||||
|
if (p->samples[linear_idx_end] <=
|
||||||
|
p->samples[linear_idx_start]) {
|
||||||
|
/* This is an invalid element */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
storage[nelem] = p->samples[linear_idx_end] -
|
||||||
|
p->samples[linear_idx_start];
|
||||||
|
nelem++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* sort it */
|
||||||
|
sort(storage, nelem, sizeof(u64), profile_cmp, NULL);
|
||||||
|
|
||||||
|
/* build ranges */
|
||||||
|
for (i = 0; i < PERCENTILE_RANGES; i++) {
|
||||||
|
percentiles[i] = nelem < PERCENTILE_RANGES ? 0 :
|
||||||
|
storage[(PERCENTILE_WIDTH * (i + 1) * nelem)/100 - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
return nelem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Print a list of percentiles spaced by 5%. Note that the debug_context needs
|
||||||
|
* to be special here. _Most_ print functions in NvGPU automatically add a new
|
||||||
|
* line to the end of each print statement. This function _specifically_
|
||||||
|
* requires that your debug print function does _NOT_ do this.
|
||||||
|
*/
|
||||||
|
void nvgpu_swprofile_print_ranges(struct gk20a *g,
|
||||||
|
struct nvgpu_swprofiler *p,
|
||||||
|
struct nvgpu_debug_context *o)
|
||||||
|
{
|
||||||
|
u32 nelem = 0U, i, j;
|
||||||
|
u64 *sorted_data = NULL;
|
||||||
|
u64 *percentiles = NULL;
|
||||||
|
|
||||||
|
nvgpu_mutex_acquire(&p->lock);
|
||||||
|
|
||||||
|
if (p->samples == NULL) {
|
||||||
|
gk20a_debug_output(o, "Profiler not enabled.\n");
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
sorted_data = nvgpu_vzalloc(g,
|
||||||
|
PROFILE_ENTRIES * p->psample_len *
|
||||||
|
sizeof(u64));
|
||||||
|
percentiles = nvgpu_vzalloc(g,
|
||||||
|
PERCENTILE_RANGES * p->psample_len *
|
||||||
|
sizeof(u64));
|
||||||
|
if (!sorted_data || !percentiles) {
|
||||||
|
nvgpu_err(g, "vzalloc: OOM!");
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Loop over each column; sort the column's data and then build
|
||||||
|
* percentile ranges based on that sorted data.
|
||||||
|
*/
|
||||||
|
for (i = 0U; i < p->psample_len; i++) {
|
||||||
|
nelem = nvgpu_swprofile_build_ranges(p,
|
||||||
|
&sorted_data[i * PROFILE_ENTRIES],
|
||||||
|
&percentiles[i * PERCENTILE_RANGES],
|
||||||
|
i, 0U);
|
||||||
|
}
|
||||||
|
|
||||||
|
gk20a_debug_output(o, "Samples: %u\n", nelem);
|
||||||
|
gk20a_debug_output(o, "%6s", "Perc");
|
||||||
|
for (i = 0U; i < p->psample_len; i++) {
|
||||||
|
gk20a_debug_output(o, " %15s", p->col_names[i]);
|
||||||
|
}
|
||||||
|
gk20a_debug_output(o, "\n");
|
||||||
|
gk20a_debug_output(o, "%6s", "----");
|
||||||
|
for (i = 0U; i < p->psample_len; i++) {
|
||||||
|
gk20a_debug_output(o, " %15s", "---------------");
|
||||||
|
}
|
||||||
|
gk20a_debug_output(o, "\n");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* percentiles is another matrix, but this time it's using column major indexing.
|
||||||
|
*/
|
||||||
|
for (i = 0U; i < PERCENTILE_RANGES; i++) {
|
||||||
|
gk20a_debug_output(o, "%3upc ", PERCENTILE_WIDTH * (i + 1));
|
||||||
|
for (j = 0U; j < p->psample_len; j++) {
|
||||||
|
gk20a_debug_output(o, " %15llu",
|
||||||
|
percentiles[(j * PERCENTILE_RANGES) + i]);
|
||||||
|
}
|
||||||
|
gk20a_debug_output(o, "\n");
|
||||||
|
}
|
||||||
|
gk20a_debug_output(o, "\n");
|
||||||
|
|
||||||
|
done:
|
||||||
|
nvgpu_vfree(g, sorted_data);
|
||||||
|
nvgpu_vfree(g, percentiles);
|
||||||
|
nvgpu_mutex_release(&p->lock);
|
||||||
|
}
|
||||||
@@ -40,7 +40,7 @@
|
|||||||
struct gk20a;
|
struct gk20a;
|
||||||
struct dbg_session_gk20a;
|
struct dbg_session_gk20a;
|
||||||
struct nvgpu_fence_type;
|
struct nvgpu_fence_type;
|
||||||
struct nvgpu_profile;
|
struct nvgpu_swprofiler;
|
||||||
struct nvgpu_channel_sync;
|
struct nvgpu_channel_sync;
|
||||||
struct nvgpu_gpfifo_userdata;
|
struct nvgpu_gpfifo_userdata;
|
||||||
struct nvgpu_gr_subctx;
|
struct nvgpu_gr_subctx;
|
||||||
@@ -576,7 +576,7 @@ int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c,
|
|||||||
u32 flags,
|
u32 flags,
|
||||||
struct nvgpu_channel_fence *fence,
|
struct nvgpu_channel_fence *fence,
|
||||||
struct nvgpu_fence_type **fence_out,
|
struct nvgpu_fence_type **fence_out,
|
||||||
struct nvgpu_profile *profile);
|
struct nvgpu_swprofiler *profiler);
|
||||||
|
|
||||||
int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c,
|
int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c,
|
||||||
struct nvgpu_gpfifo_entry *gpfifo,
|
struct nvgpu_gpfifo_entry *gpfifo,
|
||||||
|
|||||||
@@ -197,6 +197,8 @@
|
|||||||
#include <nvgpu/lock.h>
|
#include <nvgpu/lock.h>
|
||||||
#include <nvgpu/kref.h>
|
#include <nvgpu/kref.h>
|
||||||
#include <nvgpu/list.h>
|
#include <nvgpu/list.h>
|
||||||
|
#include <nvgpu/swprofile.h>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* H/w defined value for Channel ID type
|
* H/w defined value for Channel ID type
|
||||||
*/
|
*/
|
||||||
@@ -231,6 +233,7 @@ struct nvgpu_engine_info;
|
|||||||
struct nvgpu_runlist_info;
|
struct nvgpu_runlist_info;
|
||||||
struct nvgpu_channel;
|
struct nvgpu_channel;
|
||||||
struct nvgpu_tsg;
|
struct nvgpu_tsg;
|
||||||
|
struct nvgpu_swprofiler;
|
||||||
|
|
||||||
struct nvgpu_fifo {
|
struct nvgpu_fifo {
|
||||||
/** Pointer to GPU driver struct. */
|
/** Pointer to GPU driver struct. */
|
||||||
@@ -297,16 +300,8 @@ struct nvgpu_fifo {
|
|||||||
/** Number of active runlists. */
|
/** Number of active runlists. */
|
||||||
u32 num_runlists;
|
u32 num_runlists;
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_FS
|
struct nvgpu_swprofiler kickoff_profiler;
|
||||||
struct {
|
|
||||||
struct nvgpu_profile *data;
|
|
||||||
nvgpu_atomic_t get;
|
|
||||||
bool enabled;
|
|
||||||
u64 *sorted;
|
|
||||||
struct nvgpu_ref ref;
|
|
||||||
struct nvgpu_mutex lock;
|
|
||||||
} profile;
|
|
||||||
#endif
|
|
||||||
#ifdef CONFIG_NVGPU_USERD
|
#ifdef CONFIG_NVGPU_USERD
|
||||||
struct nvgpu_mutex userd_mutex;
|
struct nvgpu_mutex userd_mutex;
|
||||||
struct nvgpu_mem *userd_slabs;
|
struct nvgpu_mem *userd_slabs;
|
||||||
|
|||||||
50
drivers/gpu/nvgpu/include/nvgpu/fifo/swprofile.h
Normal file
50
drivers/gpu/nvgpu/include/nvgpu/fifo/swprofile.h
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef NVGPU_FIFO_PROFILE_H
|
||||||
|
#define NVGPU_FIFO_PROFILE_H
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Define these here, not in the C file so that they are closer to the other
|
||||||
|
* macro definitions below. The two lists must be in sync.
|
||||||
|
*/
|
||||||
|
#define NVGPU_FIFO_KICKOFF_PROFILE_EVENTS \
|
||||||
|
"ioctl_entry", \
|
||||||
|
"entry", \
|
||||||
|
"job_tracking", \
|
||||||
|
"append", \
|
||||||
|
"end", \
|
||||||
|
"ioctl_exit", \
|
||||||
|
NULL \
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The kickoff profile events; these are used to index into the profile's sample
|
||||||
|
* array.
|
||||||
|
*/
|
||||||
|
#define PROF_KICKOFF_IOCTL_ENTRY 0U
|
||||||
|
#define PROF_KICKOFF_ENTRY 1U
|
||||||
|
#define PROF_KICKOFF_JOB_TRACKING 2U
|
||||||
|
#define PROF_KICKOFF_APPEND 3U
|
||||||
|
#define PROF_KICKOFF_END 4U
|
||||||
|
#define PROF_KICKOFF_IOCTL_EXIT 5U
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -1,70 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef NVGPU_PROFILE_H
|
|
||||||
#define NVGPU_PROFILE_H
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Number of entries in the kickoff latency buffer, used to calculate
|
|
||||||
* the profiling and histogram. This number is calculated to be statistically
|
|
||||||
* significative on a histogram on a 5% step
|
|
||||||
*/
|
|
||||||
#ifdef CONFIG_DEBUG_FS
|
|
||||||
#define FIFO_PROFILING_ENTRIES 16384U
|
|
||||||
#endif
|
|
||||||
|
|
||||||
enum {
|
|
||||||
PROFILE_IOCTL_ENTRY = 0U,
|
|
||||||
PROFILE_ENTRY,
|
|
||||||
PROFILE_JOB_TRACKING,
|
|
||||||
PROFILE_APPEND,
|
|
||||||
PROFILE_END,
|
|
||||||
PROFILE_IOCTL_EXIT,
|
|
||||||
PROFILE_MAX
|
|
||||||
};
|
|
||||||
|
|
||||||
struct nvgpu_profile {
|
|
||||||
u64 timestamp[PROFILE_MAX];
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_FS
|
|
||||||
struct nvgpu_profile *nvgpu_profile_acquire(struct gk20a *g);
|
|
||||||
void nvgpu_profile_release(struct gk20a *g,
|
|
||||||
struct nvgpu_profile *profile);
|
|
||||||
void nvgpu_profile_snapshot(struct nvgpu_profile *profile, int idx);
|
|
||||||
#else
|
|
||||||
static inline struct nvgpu_profile *
|
|
||||||
nvgpu_profile_acquire(struct gk20a *g)
|
|
||||||
{
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
static inline void nvgpu_profile_release(struct gk20a *g,
|
|
||||||
struct nvgpu_profile *profile)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
static inline void nvgpu_profile_snapshot(
|
|
||||||
struct nvgpu_profile *profile, int idx)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* NVGPU_PROFILE_H */
|
|
||||||
168
drivers/gpu/nvgpu/include/nvgpu/swprofile.h
Normal file
168
drivers/gpu/nvgpu/include/nvgpu/swprofile.h
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef NVGPU_PROFILE_H
|
||||||
|
#define NVGPU_PROFILE_H
|
||||||
|
|
||||||
|
#include <nvgpu/lock.h>
|
||||||
|
#include <nvgpu/types.h>
|
||||||
|
#include <nvgpu/kref.h>
|
||||||
|
|
||||||
|
struct nvgpu_debug_context;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of entries in the kickoff latency buffer used to calculate the
|
||||||
|
* profiling and histogram. This number is calculated to be statistically
|
||||||
|
* significant on a histogram on a 5% step.
|
||||||
|
*/
|
||||||
|
#define PROFILE_ENTRIES 16384U
|
||||||
|
|
||||||
|
struct nvgpu_swprofiler {
|
||||||
|
struct nvgpu_mutex lock;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The number of sample components that make up a sample for this
|
||||||
|
* profiler.
|
||||||
|
*/
|
||||||
|
u32 psample_len;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sample array: this is essentially a matrix where rows correspond to
|
||||||
|
* a given sample and rows correspond to a type of sample. Number of
|
||||||
|
* samples is always %PROFILING_ENTRIES. This 1d array is accessed with
|
||||||
|
* row-major indexing.
|
||||||
|
*/
|
||||||
|
u64 *samples;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pointer to next sample array to write. Will be wrapped at
|
||||||
|
* %PROFILING_ENTRIES.
|
||||||
|
*/
|
||||||
|
u32 sample_index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Column names used for printing the histogram. This is NULL terminated
|
||||||
|
* so that the profiler can infer the number of subsamples in a
|
||||||
|
* psample.
|
||||||
|
*/
|
||||||
|
const char **col_names;
|
||||||
|
|
||||||
|
struct nvgpu_ref ref;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Necessary since we won't have an access to a gk20a struct to vfree()
|
||||||
|
* against when this profiler is freed via an nvgpu_ref.
|
||||||
|
*/
|
||||||
|
struct gk20a *g;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Create a profiler with the passed column names.
|
||||||
|
*
|
||||||
|
* @param[in] g The GPU that owns this profiler.
|
||||||
|
* @param[in] p Pointer to a profiler object to initialize.
|
||||||
|
* @param[in] col_names %NULL terminated list of column names.
|
||||||
|
*
|
||||||
|
* The sample array length is determined by the NULL terminated %col_names
|
||||||
|
* array. This will not allocate the underlying data; that's controlled by
|
||||||
|
* the open and close functions:
|
||||||
|
*
|
||||||
|
* nvgpu_swprofile_open()
|
||||||
|
* nvgpu_swprofile_close()
|
||||||
|
*
|
||||||
|
* Once nvgpu_swprofile_initialize() is called all of the below functions
|
||||||
|
* may also be called. All of the sampling related functions will become
|
||||||
|
* no-ops if the SW profiler is not opened.
|
||||||
|
*/
|
||||||
|
void nvgpu_swprofile_initialize(struct gk20a *g,
|
||||||
|
struct nvgpu_swprofiler *p,
|
||||||
|
const char **col_names);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Open a profiler for use.
|
||||||
|
*
|
||||||
|
* @param[in] g The GPU that owns this profiler.
|
||||||
|
* @param[in] p The profiler to open.
|
||||||
|
*
|
||||||
|
* This functions prepares a SW profiler object for actual profiling. Necessary
|
||||||
|
* data structures are allocated and subsequent snapshots will be captured.
|
||||||
|
*
|
||||||
|
* SW profiler objects are reference counted: for each open call made, a
|
||||||
|
* corresponding close call must also be made.
|
||||||
|
*
|
||||||
|
* @return Returns 0 on success, otherwise a negative error code.
|
||||||
|
*/
|
||||||
|
int nvgpu_swprofile_open(struct gk20a *g, struct nvgpu_swprofiler *p);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Close a profiler.
|
||||||
|
*
|
||||||
|
* @param[in] p The profiler to close.
|
||||||
|
*
|
||||||
|
* Close call corresponding to nvgpu_swprofile_open().
|
||||||
|
*/
|
||||||
|
void nvgpu_swprofile_close(struct nvgpu_swprofiler *p);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Begin a series of timestamp samples.
|
||||||
|
*
|
||||||
|
* @param[in] p The profiler to start sampling with.
|
||||||
|
*
|
||||||
|
* Each iteration through a given SW sequence requires one call to this
|
||||||
|
* function. It essentially just increments (with wraparound) an internal
|
||||||
|
* tracker which points to the sample space in the internal sample array.
|
||||||
|
* Typical usage is to call nvgpu_swprofile_begin_sample() and then a
|
||||||
|
* sequence of calls to nvgpu_swprofile_snapshot().
|
||||||
|
*
|
||||||
|
* Once done with the sequence being profiled nothing needs to happen. When
|
||||||
|
* the next iteration of the sequence is executed this function should be
|
||||||
|
* called again.
|
||||||
|
*/
|
||||||
|
void nvgpu_swprofile_begin_sample(struct nvgpu_swprofiler *p);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Capture a timestamp sample.
|
||||||
|
*
|
||||||
|
* @param[in] p The profiler to sample with.
|
||||||
|
* @param[in] idx The index to the subsample to capture.
|
||||||
|
*
|
||||||
|
* This captures a subsample. Any given run through a SW sequence that is
|
||||||
|
* being profiled will result in one or more subsamples which together make
|
||||||
|
* up a sample.
|
||||||
|
*/
|
||||||
|
void nvgpu_swprofile_snapshot(struct nvgpu_swprofiler *p, u32 idx);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Print percentile ranges for a SW profiler.
|
||||||
|
*
|
||||||
|
* @param[in] g The GPU that owns this profiler.
|
||||||
|
* @param[in] p The profiler to print.
|
||||||
|
* @param[in] o A debug context object used for printing.
|
||||||
|
*
|
||||||
|
* Print a percentile table for all columns of sub-samples. This gives a
|
||||||
|
* good overview of the collected data.
|
||||||
|
*/
|
||||||
|
void nvgpu_swprofile_print_ranges(struct gk20a *g,
|
||||||
|
struct nvgpu_swprofiler *p,
|
||||||
|
struct nvgpu_debug_context *o);
|
||||||
|
|
||||||
|
#endif /* NVGPU_PROFILE_H */
|
||||||
@@ -26,7 +26,7 @@ struct nvgpu_gpfifo;
|
|||||||
struct nvgpu_submit_gpfifo_args;
|
struct nvgpu_submit_gpfifo_args;
|
||||||
struct nvgpu_channel_fence;
|
struct nvgpu_channel_fence;
|
||||||
struct nvgpu_fence_type;
|
struct nvgpu_fence_type;
|
||||||
struct nvgpu_profile;
|
struct nvgpu_swprofile;
|
||||||
struct nvgpu_os_linux;
|
struct nvgpu_os_linux;
|
||||||
|
|
||||||
struct sync_fence;
|
struct sync_fence;
|
||||||
|
|||||||
@@ -442,8 +442,6 @@ void gk20a_debug_deinit(struct gk20a *g)
|
|||||||
if (!l->debugfs)
|
if (!l->debugfs)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
gk20a_fifo_debugfs_deinit(g);
|
|
||||||
|
|
||||||
debugfs_remove_recursive(l->debugfs);
|
debugfs_remove_recursive(l->debugfs);
|
||||||
debugfs_remove(l->debugfs_alias);
|
debugfs_remove(l->debugfs_alias);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2017-2019 NVIDIA Corporation. All rights reserved.
|
* Copyright (C) 2017-2020 NVIDIA Corporation. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is licensed under the terms of the GNU General Public
|
* This software is licensed under the terms of the GNU General Public
|
||||||
* License version 2, as published by the Free Software Foundation, and
|
* License version 2, as published by the Free Software Foundation, and
|
||||||
@@ -9,7 +9,6 @@
|
|||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
* GNU General Public License for more details.
|
* GNU General Public License for more details.
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "debug_fifo.h"
|
#include "debug_fifo.h"
|
||||||
@@ -24,7 +23,9 @@
|
|||||||
#include <nvgpu/gr/ctx.h>
|
#include <nvgpu/gr/ctx.h>
|
||||||
#include <nvgpu/engines.h>
|
#include <nvgpu/engines.h>
|
||||||
#include <nvgpu/runlist.h>
|
#include <nvgpu/runlist.h>
|
||||||
#include <nvgpu/profile.h>
|
#include <nvgpu/swprofile.h>
|
||||||
|
|
||||||
|
#include <nvgpu/fifo/swprofile.h>
|
||||||
|
|
||||||
void __gk20a_fifo_profile_free(struct nvgpu_ref *ref);
|
void __gk20a_fifo_profile_free(struct nvgpu_ref *ref);
|
||||||
|
|
||||||
@@ -147,41 +148,12 @@ static int gk20a_fifo_profile_enable(void *data, u64 val)
|
|||||||
struct gk20a *g = (struct gk20a *) data;
|
struct gk20a *g = (struct gk20a *) data;
|
||||||
struct nvgpu_fifo *f = &g->fifo;
|
struct nvgpu_fifo *f = &g->fifo;
|
||||||
|
|
||||||
|
|
||||||
nvgpu_mutex_acquire(&f->profile.lock);
|
|
||||||
if (val == 0) {
|
if (val == 0) {
|
||||||
if (f->profile.enabled) {
|
nvgpu_swprofile_close(&f->kickoff_profiler);
|
||||||
f->profile.enabled = false;
|
return 0;
|
||||||
nvgpu_ref_put(&f->profile.ref,
|
|
||||||
__gk20a_fifo_profile_free);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
if (!f->profile.enabled) {
|
return nvgpu_swprofile_open(g, &f->kickoff_profiler);
|
||||||
/* not kref init as it can have a running condition if
|
|
||||||
* we enable/disable/enable while kickoff is happening
|
|
||||||
*/
|
|
||||||
if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) {
|
|
||||||
f->profile.data = nvgpu_vzalloc(g,
|
|
||||||
FIFO_PROFILING_ENTRIES *
|
|
||||||
sizeof(struct nvgpu_profile));
|
|
||||||
f->profile.sorted = nvgpu_vzalloc(g,
|
|
||||||
FIFO_PROFILING_ENTRIES *
|
|
||||||
sizeof(u64));
|
|
||||||
if (!(f->profile.data && f->profile.sorted)) {
|
|
||||||
nvgpu_vfree(g, f->profile.data);
|
|
||||||
nvgpu_vfree(g, f->profile.sorted);
|
|
||||||
nvgpu_mutex_release(&f->profile.lock);
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
nvgpu_ref_init(&f->profile.ref);
|
|
||||||
}
|
|
||||||
atomic_set(&f->profile.get.atomic_var, 0);
|
|
||||||
f->profile.enabled = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
nvgpu_mutex_release(&f->profile.lock);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_SIMPLE_ATTRIBUTE(
|
DEFINE_SIMPLE_ATTRIBUTE(
|
||||||
@@ -191,96 +163,20 @@ DEFINE_SIMPLE_ATTRIBUTE(
|
|||||||
"%llu\n"
|
"%llu\n"
|
||||||
);
|
);
|
||||||
|
|
||||||
static int __profile_cmp(const void *a, const void *b)
|
static void gk20a_fifo_write_to_seqfile_no_nl(void *ctx, const char *str)
|
||||||
{
|
{
|
||||||
return *((unsigned long long *) a) - *((unsigned long long *) b);
|
seq_printf((struct seq_file *)ctx, str);
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This uses about 800b in the stack, but the function using it is not part
|
|
||||||
* of a callstack where much memory is being used, so it is fine
|
|
||||||
*/
|
|
||||||
#define PERCENTILE_WIDTH 5
|
|
||||||
#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH)
|
|
||||||
|
|
||||||
static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
|
|
||||||
u64 *percentiles, u32 index_end, u32 index_start)
|
|
||||||
{
|
|
||||||
unsigned int nelem = 0;
|
|
||||||
unsigned int index;
|
|
||||||
struct nvgpu_profile *profile;
|
|
||||||
|
|
||||||
for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
|
|
||||||
profile = &g->fifo.profile.data[index];
|
|
||||||
|
|
||||||
if (profile->timestamp[index_end] >
|
|
||||||
profile->timestamp[index_start]) {
|
|
||||||
/* This is a valid element */
|
|
||||||
g->fifo.profile.sorted[nelem] =
|
|
||||||
profile->timestamp[index_end] -
|
|
||||||
profile->timestamp[index_start];
|
|
||||||
nelem++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* sort it */
|
|
||||||
sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
|
|
||||||
__profile_cmp, NULL);
|
|
||||||
|
|
||||||
/* build ranges */
|
|
||||||
for (index = 0; index < PERCENTILE_RANGES; index++) {
|
|
||||||
percentiles[index] = nelem < PERCENTILE_RANGES ? 0 :
|
|
||||||
g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
|
|
||||||
nelem)/100 - 1];
|
|
||||||
}
|
|
||||||
return nelem;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
|
static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
|
||||||
{
|
{
|
||||||
struct gk20a *g = s->private;
|
struct gk20a *g = s->private;
|
||||||
unsigned int get, nelem, index;
|
struct nvgpu_debug_context o = {
|
||||||
/*
|
.fn = gk20a_fifo_write_to_seqfile_no_nl,
|
||||||
* 800B in the stack, but function is declared statically and only
|
.ctx = s,
|
||||||
* called from debugfs handler
|
};
|
||||||
*/
|
|
||||||
u64 percentiles_ioctl[PERCENTILE_RANGES];
|
|
||||||
u64 percentiles_kickoff[PERCENTILE_RANGES];
|
|
||||||
u64 percentiles_jobtracking[PERCENTILE_RANGES];
|
|
||||||
u64 percentiles_append[PERCENTILE_RANGES];
|
|
||||||
u64 percentiles_userd[PERCENTILE_RANGES];
|
|
||||||
|
|
||||||
if (!nvgpu_ref_get_unless_zero(&g->fifo.profile.ref)) {
|
nvgpu_swprofile_print_ranges(g, &g->fifo.kickoff_profiler, &o);
|
||||||
seq_printf(s, "Profiling disabled\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
get = atomic_read(&g->fifo.profile.get.atomic_var);
|
|
||||||
|
|
||||||
__gk20a_fifo_create_stats(g, percentiles_ioctl,
|
|
||||||
PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
|
|
||||||
__gk20a_fifo_create_stats(g, percentiles_kickoff,
|
|
||||||
PROFILE_END, PROFILE_ENTRY);
|
|
||||||
__gk20a_fifo_create_stats(g, percentiles_jobtracking,
|
|
||||||
PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
|
|
||||||
__gk20a_fifo_create_stats(g, percentiles_append,
|
|
||||||
PROFILE_APPEND, PROFILE_JOB_TRACKING);
|
|
||||||
nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
|
|
||||||
PROFILE_END, PROFILE_APPEND);
|
|
||||||
|
|
||||||
seq_printf(s, "Number of kickoffs: %d\n", nelem);
|
|
||||||
seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
|
|
||||||
|
|
||||||
for (index = 0; index < PERCENTILE_RANGES; index++)
|
|
||||||
seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
|
|
||||||
PERCENTILE_WIDTH * (index+1),
|
|
||||||
percentiles_ioctl[index],
|
|
||||||
percentiles_kickoff[index],
|
|
||||||
percentiles_append[index],
|
|
||||||
percentiles_jobtracking[index],
|
|
||||||
percentiles_userd[index]);
|
|
||||||
|
|
||||||
nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -297,7 +193,6 @@ static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
|
|||||||
.release = single_release,
|
.release = single_release,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
void gk20a_fifo_debugfs_init(struct gk20a *g)
|
void gk20a_fifo_debugfs_init(struct gk20a *g)
|
||||||
{
|
{
|
||||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||||
@@ -318,11 +213,6 @@ void gk20a_fifo_debugfs_init(struct gk20a *g)
|
|||||||
if (IS_ERR_OR_NULL(profile_root))
|
if (IS_ERR_OR_NULL(profile_root))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
nvgpu_mutex_init(&g->fifo.profile.lock);
|
|
||||||
g->fifo.profile.enabled = false;
|
|
||||||
atomic_set(&g->fifo.profile.get.atomic_var, 0);
|
|
||||||
atomic_set(&g->fifo.profile.ref.refcount.atomic_var, 0);
|
|
||||||
|
|
||||||
debugfs_create_file("enable", 0600, profile_root, g,
|
debugfs_create_file("enable", 0600, profile_root, g,
|
||||||
&gk20a_fifo_profile_enable_debugfs_fops);
|
&gk20a_fifo_profile_enable_debugfs_fops);
|
||||||
|
|
||||||
@@ -330,54 +220,3 @@ void gk20a_fifo_debugfs_init(struct gk20a *g)
|
|||||||
&gk20a_fifo_profile_stats_debugfs_fops);
|
&gk20a_fifo_profile_stats_debugfs_fops);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void nvgpu_profile_snapshot(struct nvgpu_profile *profile, int idx)
|
|
||||||
{
|
|
||||||
if (profile)
|
|
||||||
profile->timestamp[idx] = nvgpu_current_time_ns();
|
|
||||||
}
|
|
||||||
|
|
||||||
void __gk20a_fifo_profile_free(struct nvgpu_ref *ref)
|
|
||||||
{
|
|
||||||
struct nvgpu_fifo *f = container_of(ref, struct nvgpu_fifo,
|
|
||||||
profile.ref);
|
|
||||||
nvgpu_vfree(f->g, f->profile.data);
|
|
||||||
nvgpu_vfree(f->g, f->profile.sorted);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Get the next element in the ring buffer of profile entries
|
|
||||||
* and grab a reference to the structure
|
|
||||||
*/
|
|
||||||
struct nvgpu_profile *nvgpu_profile_acquire(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct nvgpu_fifo *f = &g->fifo;
|
|
||||||
struct nvgpu_profile *profile;
|
|
||||||
unsigned int index;
|
|
||||||
|
|
||||||
/* If kref is zero, profiling is not enabled */
|
|
||||||
if (!nvgpu_ref_get_unless_zero(&f->profile.ref))
|
|
||||||
return NULL;
|
|
||||||
index = atomic_inc_return(&f->profile.get.atomic_var);
|
|
||||||
profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
|
|
||||||
|
|
||||||
return profile;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Free the reference to the structure. This allows deferred cleanups */
|
|
||||||
void nvgpu_profile_release(struct gk20a *g,
|
|
||||||
struct nvgpu_profile *profile)
|
|
||||||
{
|
|
||||||
nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
|
|
||||||
}
|
|
||||||
|
|
||||||
void gk20a_fifo_debugfs_deinit(struct gk20a *g)
|
|
||||||
{
|
|
||||||
struct nvgpu_fifo *f = &g->fifo;
|
|
||||||
|
|
||||||
nvgpu_mutex_acquire(&f->profile.lock);
|
|
||||||
if (f->profile.enabled) {
|
|
||||||
f->profile.enabled = false;
|
|
||||||
nvgpu_ref_put(&f->profile.ref, __gk20a_fifo_profile_free);
|
|
||||||
}
|
|
||||||
nvgpu_mutex_release(&f->profile.lock);
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
* Copyright (C) 2017-2020 NVIDIA Corporation. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is licensed under the terms of the GNU General Public
|
* This software is licensed under the terms of the GNU General Public
|
||||||
* License version 2, as published by the Free Software Foundation, and
|
* License version 2, as published by the Free Software Foundation, and
|
||||||
@@ -17,6 +17,5 @@
|
|||||||
|
|
||||||
struct gk20a;
|
struct gk20a;
|
||||||
void gk20a_fifo_debugfs_init(struct gk20a *g);
|
void gk20a_fifo_debugfs_init(struct gk20a *g);
|
||||||
void gk20a_fifo_debugfs_deinit(struct gk20a *g);
|
|
||||||
|
|
||||||
#endif /* __NVGPU_DEBUG_FIFO_H__ */
|
#endif /* __NVGPU_DEBUG_FIFO_H__ */
|
||||||
|
|||||||
@@ -46,9 +46,11 @@
|
|||||||
#include <nvgpu/gr/obj_ctx.h>
|
#include <nvgpu/gr/obj_ctx.h>
|
||||||
#include <nvgpu/fence.h>
|
#include <nvgpu/fence.h>
|
||||||
#include <nvgpu/preempt.h>
|
#include <nvgpu/preempt.h>
|
||||||
#include <nvgpu/profile.h>
|
#include <nvgpu/swprofile.h>
|
||||||
#include <nvgpu/nvgpu_init.h>
|
#include <nvgpu/nvgpu_init.h>
|
||||||
|
|
||||||
|
#include <nvgpu/fifo/swprofile.h>
|
||||||
|
|
||||||
#include "platform_gk20a.h"
|
#include "platform_gk20a.h"
|
||||||
#include "ioctl_channel.h"
|
#include "ioctl_channel.h"
|
||||||
#include "channel.h"
|
#include "channel.h"
|
||||||
@@ -792,10 +794,11 @@ static int gk20a_ioctl_channel_submit_gpfifo(
|
|||||||
{
|
{
|
||||||
struct nvgpu_channel_fence fence;
|
struct nvgpu_channel_fence fence;
|
||||||
struct nvgpu_fence_type *fence_out;
|
struct nvgpu_fence_type *fence_out;
|
||||||
struct nvgpu_profile *profile = NULL;
|
|
||||||
u32 submit_flags = 0;
|
u32 submit_flags = 0;
|
||||||
int fd = -1;
|
int fd = -1;
|
||||||
struct gk20a *g = ch->g;
|
struct gk20a *g = ch->g;
|
||||||
|
struct nvgpu_fifo *f = &g->fifo;
|
||||||
|
struct nvgpu_swprofiler *kickoff_profiler = &f->kickoff_profiler;
|
||||||
struct nvgpu_gpfifo_userdata userdata;
|
struct nvgpu_gpfifo_userdata userdata;
|
||||||
bool flag_fence_wait = (args->flags &
|
bool flag_fence_wait = (args->flags &
|
||||||
NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) != 0U;
|
NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) != 0U;
|
||||||
@@ -807,8 +810,8 @@ static int gk20a_ioctl_channel_submit_gpfifo(
|
|||||||
int ret = 0;
|
int ret = 0;
|
||||||
nvgpu_log_fn(g, " ");
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
profile = nvgpu_profile_acquire(ch->g);
|
nvgpu_swprofile_begin_sample(kickoff_profiler);
|
||||||
nvgpu_profile_snapshot(profile, PROFILE_IOCTL_ENTRY);
|
nvgpu_swprofile_snapshot(kickoff_profiler, PROF_KICKOFF_IOCTL_ENTRY);
|
||||||
|
|
||||||
if (nvgpu_channel_check_unserviceable(ch)) {
|
if (nvgpu_channel_check_unserviceable(ch)) {
|
||||||
return -ETIMEDOUT;
|
return -ETIMEDOUT;
|
||||||
@@ -846,7 +849,7 @@ static int gk20a_ioctl_channel_submit_gpfifo(
|
|||||||
|
|
||||||
ret = nvgpu_submit_channel_gpfifo_user(ch,
|
ret = nvgpu_submit_channel_gpfifo_user(ch,
|
||||||
userdata, args->num_entries,
|
userdata, args->num_entries,
|
||||||
submit_flags, &fence, &fence_out, profile);
|
submit_flags, &fence, &fence_out, kickoff_profiler);
|
||||||
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
if (fd != -1)
|
if (fd != -1)
|
||||||
@@ -869,9 +872,7 @@ static int gk20a_ioctl_channel_submit_gpfifo(
|
|||||||
}
|
}
|
||||||
nvgpu_fence_put(fence_out);
|
nvgpu_fence_put(fence_out);
|
||||||
|
|
||||||
nvgpu_profile_snapshot(profile, PROFILE_IOCTL_EXIT);
|
nvgpu_swprofile_snapshot(kickoff_profiler, PROF_KICKOFF_IOCTL_EXIT);
|
||||||
if (profile)
|
|
||||||
nvgpu_profile_release(ch->g, profile);
|
|
||||||
|
|
||||||
clean_up:
|
clean_up:
|
||||||
return ret;
|
return ret;
|
||||||
|
|||||||
@@ -725,6 +725,8 @@ nvgpu_spinlock_release
|
|||||||
nvgpu_strnadd_u32
|
nvgpu_strnadd_u32
|
||||||
nvgpu_sw_quiesce
|
nvgpu_sw_quiesce
|
||||||
nvgpu_sw_quiesce_remove_support
|
nvgpu_sw_quiesce_remove_support
|
||||||
|
nvgpu_swprofile_initialize
|
||||||
|
nvgpu_swprofile_snapshot
|
||||||
nvgpu_thread_create
|
nvgpu_thread_create
|
||||||
nvgpu_thread_create_priority
|
nvgpu_thread_create_priority
|
||||||
nvgpu_thread_get_fault_injection
|
nvgpu_thread_get_fault_injection
|
||||||
|
|||||||
@@ -740,6 +740,8 @@ nvgpu_spinlock_release
|
|||||||
nvgpu_strnadd_u32
|
nvgpu_strnadd_u32
|
||||||
nvgpu_sw_quiesce
|
nvgpu_sw_quiesce
|
||||||
nvgpu_sw_quiesce_remove_support
|
nvgpu_sw_quiesce_remove_support
|
||||||
|
nvgpu_swprofile_initialize
|
||||||
|
nvgpu_swprofile_snapshot
|
||||||
nvgpu_thread_create
|
nvgpu_thread_create
|
||||||
nvgpu_thread_create_priority
|
nvgpu_thread_create_priority
|
||||||
nvgpu_thread_get_fault_injection
|
nvgpu_thread_get_fault_injection
|
||||||
|
|||||||
Reference in New Issue
Block a user