mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: in-kernel kickoff profiling
Add a debugfs interface to profile the kickoff ioctl it provides the probability distribution and separates the information between time spent in: the full ioctl, the kickoff function, the amount of time spent in job tracking and the amount of time doing pushbuffer copies JIRA: EVLR-1003 Change-Id: I9888b114c3fbced61b1cf134c79f7a8afce15f56 Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: http://git-master/r/1308997 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
b9991767cc
commit
b9feba6efc
@@ -727,7 +727,8 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
|
return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
|
||||||
num_entries, flags, fence, fence_out, true);
|
num_entries, flags, fence, fence_out, true,
|
||||||
|
NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
|
static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
|
||||||
|
|||||||
@@ -653,7 +653,7 @@ int gk20a_ce_execute_ops(struct device *dev,
|
|||||||
|
|
||||||
ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
|
ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
|
||||||
1, submit_flags, &fence,
|
1, submit_flags, &fence,
|
||||||
&ce_cmd_buf_fence_out, false);
|
&ce_cmd_buf_fence_out, false, NULL);
|
||||||
|
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
memcpy((void *)(cmd_buf_cpu_va + fence_index),
|
memcpy((void *)(cmd_buf_cpu_va + fence_index),
|
||||||
|
|||||||
@@ -2987,7 +2987,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
u32 flags,
|
u32 flags,
|
||||||
struct nvgpu_fence *fence,
|
struct nvgpu_fence *fence,
|
||||||
struct gk20a_fence **fence_out,
|
struct gk20a_fence **fence_out,
|
||||||
bool force_need_sync_fence)
|
bool force_need_sync_fence,
|
||||||
|
struct fifo_profile_gk20a *profile)
|
||||||
{
|
{
|
||||||
struct gk20a *g = c->g;
|
struct gk20a *g = c->g;
|
||||||
struct device *d = dev_from_gk20a(g);
|
struct device *d = dev_from_gk20a(g);
|
||||||
@@ -3036,6 +3037,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (profile)
|
||||||
|
profile->timestamp[PROFILE_ENTRY] = sched_clock();
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_FS
|
#ifdef CONFIG_DEBUG_FS
|
||||||
/* update debug settings */
|
/* update debug settings */
|
||||||
if (g->ops.ltc.sync_debugfs)
|
if (g->ops.ltc.sync_debugfs)
|
||||||
@@ -3162,6 +3166,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
goto clean_up_job;
|
goto clean_up_job;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (profile)
|
||||||
|
profile->timestamp[PROFILE_JOB_TRACKING] = sched_clock();
|
||||||
|
|
||||||
if (wait_cmd)
|
if (wait_cmd)
|
||||||
gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
|
gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
|
||||||
|
|
||||||
@@ -3184,6 +3191,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
if (need_job_tracking)
|
if (need_job_tracking)
|
||||||
/* TODO! Check for errors... */
|
/* TODO! Check for errors... */
|
||||||
gk20a_channel_add_job(c, job, skip_buffer_refcounting);
|
gk20a_channel_add_job(c, job, skip_buffer_refcounting);
|
||||||
|
if (profile)
|
||||||
|
profile->timestamp[PROFILE_APPEND] = sched_clock();
|
||||||
|
|
||||||
g->ops.fifo.userd_gp_put(g, c);
|
g->ops.fifo.userd_gp_put(g, c);
|
||||||
|
|
||||||
@@ -3197,6 +3206,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
gk20a_dbg_info("post-submit put %d, get %d, size %d",
|
gk20a_dbg_info("post-submit put %d, get %d, size %d",
|
||||||
c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
|
c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
|
||||||
|
|
||||||
|
if (profile)
|
||||||
|
profile->timestamp[PROFILE_END] = sched_clock();
|
||||||
gk20a_dbg_fn("done");
|
gk20a_dbg_fn("done");
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
@@ -3789,15 +3800,22 @@ static int gk20a_ioctl_channel_submit_gpfifo(
|
|||||||
struct nvgpu_submit_gpfifo_args *args)
|
struct nvgpu_submit_gpfifo_args *args)
|
||||||
{
|
{
|
||||||
struct gk20a_fence *fence_out;
|
struct gk20a_fence *fence_out;
|
||||||
|
struct fifo_profile_gk20a *profile = NULL;
|
||||||
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
gk20a_dbg_fn("");
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
|
#ifdef CONFIG_DEBUG_FS
|
||||||
|
profile = gk20a_fifo_profile_acquire(ch->g);
|
||||||
|
|
||||||
|
if (profile)
|
||||||
|
profile->timestamp[PROFILE_IOCTL_ENTRY] = sched_clock();
|
||||||
|
#endif
|
||||||
if (ch->has_timedout)
|
if (ch->has_timedout)
|
||||||
return -ETIMEDOUT;
|
return -ETIMEDOUT;
|
||||||
|
|
||||||
ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
|
ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
|
||||||
args->flags, &args->fence,
|
args->flags, &args->fence,
|
||||||
&fence_out, false);
|
&fence_out, false, profile);
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
goto clean_up;
|
goto clean_up;
|
||||||
@@ -3816,7 +3834,12 @@ static int gk20a_ioctl_channel_submit_gpfifo(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
gk20a_fence_put(fence_out);
|
gk20a_fence_put(fence_out);
|
||||||
|
#ifdef CONFIG_DEBUG_FS
|
||||||
|
if (profile) {
|
||||||
|
profile->timestamp[PROFILE_IOCTL_EXIT] = sched_clock();
|
||||||
|
gk20a_fifo_profile_release(ch->g, profile);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
clean_up:
|
clean_up:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ struct gk20a;
|
|||||||
struct gr_gk20a;
|
struct gr_gk20a;
|
||||||
struct dbg_session_gk20a;
|
struct dbg_session_gk20a;
|
||||||
struct gk20a_fence;
|
struct gk20a_fence;
|
||||||
|
struct fifo_profile_gk20a;
|
||||||
|
|
||||||
#include "channel_sync_gk20a.h"
|
#include "channel_sync_gk20a.h"
|
||||||
|
|
||||||
@@ -344,7 +345,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
u32 flags,
|
u32 flags,
|
||||||
struct nvgpu_fence *fence,
|
struct nvgpu_fence *fence,
|
||||||
struct gk20a_fence **fence_out,
|
struct gk20a_fence **fence_out,
|
||||||
bool force_need_sync_fence);
|
bool force_need_sync_fence,
|
||||||
|
struct fifo_profile_gk20a *profile);
|
||||||
|
|
||||||
int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
|
int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
|
||||||
struct nvgpu_alloc_gpfifo_ex_args *args);
|
struct nvgpu_alloc_gpfifo_ex_args *args);
|
||||||
|
|||||||
@@ -23,6 +23,7 @@
|
|||||||
#include <trace/events/gk20a.h>
|
#include <trace/events/gk20a.h>
|
||||||
#include <linux/dma-mapping.h>
|
#include <linux/dma-mapping.h>
|
||||||
#include <linux/nvhost.h>
|
#include <linux/nvhost.h>
|
||||||
|
#include <linux/sort.h>
|
||||||
|
|
||||||
#include <nvgpu/timers.h>
|
#include <nvgpu/timers.h>
|
||||||
#include <nvgpu/semaphore.h>
|
#include <nvgpu/semaphore.h>
|
||||||
@@ -46,6 +47,10 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
|
|||||||
bool wait_for_finish);
|
bool wait_for_finish);
|
||||||
static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg);
|
static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg);
|
||||||
|
|
||||||
|
#ifdef CONFIG_DEBUG_FS
|
||||||
|
static void __gk20a_fifo_profile_free(struct kref *ref);
|
||||||
|
#endif
|
||||||
|
|
||||||
u32 gk20a_fifo_get_engine_ids(struct gk20a *g,
|
u32 gk20a_fifo_get_engine_ids(struct gk20a *g,
|
||||||
u32 engine_id[], u32 engine_id_sz,
|
u32 engine_id[], u32 engine_id_sz,
|
||||||
u32 engine_enum)
|
u32 engine_enum)
|
||||||
@@ -532,6 +537,14 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
|
|||||||
f->engine_info = NULL;
|
f->engine_info = NULL;
|
||||||
kfree(f->active_engines_list);
|
kfree(f->active_engines_list);
|
||||||
f->active_engines_list = NULL;
|
f->active_engines_list = NULL;
|
||||||
|
#ifdef CONFIG_DEBUG_FS
|
||||||
|
nvgpu_mutex_acquire(&f->profile.lock);
|
||||||
|
if (f->profile.enabled) {
|
||||||
|
f->profile.enabled = false;
|
||||||
|
kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
|
||||||
|
}
|
||||||
|
nvgpu_mutex_release(&f->profile.lock);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* reads info from hardware and fills in pbmda exception info record */
|
/* reads info from hardware and fills in pbmda exception info record */
|
||||||
@@ -3203,6 +3216,32 @@ struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_FS
|
#ifdef CONFIG_DEBUG_FS
|
||||||
|
|
||||||
|
/* Get the next element in the ring buffer of profile entries
|
||||||
|
* and grab a reference to the structure
|
||||||
|
*/
|
||||||
|
struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
|
||||||
|
{
|
||||||
|
struct fifo_gk20a *f = &g->fifo;
|
||||||
|
struct fifo_profile_gk20a *profile;
|
||||||
|
unsigned int index;
|
||||||
|
|
||||||
|
/* If kref is zero, profiling is not enabled */
|
||||||
|
if (!kref_get_unless_zero(&f->profile.ref))
|
||||||
|
return NULL;
|
||||||
|
index = atomic_inc_return(&f->profile.get);
|
||||||
|
profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
|
||||||
|
|
||||||
|
return profile;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Free the reference to the structure. This allows deferred cleanups */
|
||||||
|
void gk20a_fifo_profile_release(struct gk20a *g,
|
||||||
|
struct fifo_profile_gk20a *profile)
|
||||||
|
{
|
||||||
|
kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
|
||||||
|
}
|
||||||
|
|
||||||
static void *gk20a_fifo_sched_debugfs_seq_start(
|
static void *gk20a_fifo_sched_debugfs_seq_start(
|
||||||
struct seq_file *s, loff_t *pos)
|
struct seq_file *s, loff_t *pos)
|
||||||
{
|
{
|
||||||
@@ -3316,6 +3355,168 @@ static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
|
|||||||
.release = seq_release
|
.release = seq_release
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void __gk20a_fifo_profile_free(struct kref *ref)
|
||||||
|
{
|
||||||
|
struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
|
||||||
|
profile.ref);
|
||||||
|
vfree(f->profile.data);
|
||||||
|
vfree(f->profile.sorted);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int gk20a_fifo_profile_enable(void *data, u64 val)
|
||||||
|
{
|
||||||
|
struct gk20a *g = (struct gk20a *) data;
|
||||||
|
struct fifo_gk20a *f = &g->fifo;
|
||||||
|
|
||||||
|
|
||||||
|
nvgpu_mutex_acquire(&f->profile.lock);
|
||||||
|
if (val == 0) {
|
||||||
|
if (f->profile.enabled) {
|
||||||
|
f->profile.enabled = false;
|
||||||
|
kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!f->profile.enabled) {
|
||||||
|
/* not kref init as it can have a running condition if
|
||||||
|
* we enable/disable/enable while kickoff is happening
|
||||||
|
*/
|
||||||
|
if (!kref_get_unless_zero(&f->profile.ref)) {
|
||||||
|
f->profile.data = vzalloc(
|
||||||
|
FIFO_PROFILING_ENTRIES *
|
||||||
|
sizeof(struct fifo_profile_gk20a));
|
||||||
|
f->profile.sorted = vzalloc(
|
||||||
|
FIFO_PROFILING_ENTRIES *
|
||||||
|
sizeof(u64));
|
||||||
|
if (!(f->profile.data && f->profile.sorted)) {
|
||||||
|
vfree(f->profile.data);
|
||||||
|
vfree(f->profile.sorted);
|
||||||
|
nvgpu_mutex_release(&f->profile.lock);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
kref_init(&f->profile.ref);
|
||||||
|
}
|
||||||
|
atomic_set(&f->profile.get, 0);
|
||||||
|
f->profile.enabled = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nvgpu_mutex_release(&f->profile.lock);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_SIMPLE_ATTRIBUTE(
|
||||||
|
gk20a_fifo_profile_enable_debugfs_fops,
|
||||||
|
NULL,
|
||||||
|
gk20a_fifo_profile_enable,
|
||||||
|
"%llu\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
static int __profile_cmp(const void *a, const void *b)
|
||||||
|
{
|
||||||
|
return *((unsigned long long *) a) - *((unsigned long long *) b);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This uses about 800b in the stack, but the function using it is not part
|
||||||
|
* of a callstack where much memory is being used, so it is fine
|
||||||
|
*/
|
||||||
|
#define PERCENTILE_WIDTH 5
|
||||||
|
#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH)
|
||||||
|
|
||||||
|
static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
|
||||||
|
u64 *percentiles, u32 index_end, u32 index_start)
|
||||||
|
{
|
||||||
|
unsigned int nelem = 0;
|
||||||
|
unsigned int index;
|
||||||
|
struct fifo_profile_gk20a *profile;
|
||||||
|
|
||||||
|
for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
|
||||||
|
profile = &g->fifo.profile.data[index];
|
||||||
|
|
||||||
|
if (profile->timestamp[index_end] >
|
||||||
|
profile->timestamp[index_start]) {
|
||||||
|
/* This is a valid element */
|
||||||
|
g->fifo.profile.sorted[nelem] =
|
||||||
|
profile->timestamp[index_end] -
|
||||||
|
profile->timestamp[index_start];
|
||||||
|
nelem++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* sort it */
|
||||||
|
sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
|
||||||
|
__profile_cmp, NULL);
|
||||||
|
|
||||||
|
/* build ranges */
|
||||||
|
for (index = 0; index < PERCENTILE_RANGES; index++)
|
||||||
|
percentiles[index] =
|
||||||
|
g->fifo.profile.sorted[(PERCENTILE_WIDTH * index *
|
||||||
|
nelem)/100];
|
||||||
|
return nelem;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
|
||||||
|
{
|
||||||
|
struct gk20a *g = s->private;
|
||||||
|
unsigned int get, nelem, index;
|
||||||
|
/*
|
||||||
|
* 800B in the stack, but function is declared statically and only
|
||||||
|
* called from debugfs handler
|
||||||
|
*/
|
||||||
|
u64 percentiles_ioctl[PERCENTILE_RANGES];
|
||||||
|
u64 percentiles_kickoff[PERCENTILE_RANGES];
|
||||||
|
u64 percentiles_jobtracking[PERCENTILE_RANGES];
|
||||||
|
u64 percentiles_append[PERCENTILE_RANGES];
|
||||||
|
u64 percentiles_userd[PERCENTILE_RANGES];
|
||||||
|
|
||||||
|
if (!kref_get_unless_zero(&g->fifo.profile.ref)) {
|
||||||
|
seq_printf(s, "Profiling disabled\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
get = atomic_read(&g->fifo.profile.get);
|
||||||
|
|
||||||
|
__gk20a_fifo_create_stats(g, percentiles_ioctl,
|
||||||
|
PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
|
||||||
|
__gk20a_fifo_create_stats(g, percentiles_kickoff,
|
||||||
|
PROFILE_END, PROFILE_ENTRY);
|
||||||
|
__gk20a_fifo_create_stats(g, percentiles_jobtracking,
|
||||||
|
PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
|
||||||
|
__gk20a_fifo_create_stats(g, percentiles_append,
|
||||||
|
PROFILE_APPEND, PROFILE_JOB_TRACKING);
|
||||||
|
nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
|
||||||
|
PROFILE_END, PROFILE_APPEND);
|
||||||
|
|
||||||
|
seq_printf(s, "Number of kickoffs: %d\n", nelem);
|
||||||
|
seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
|
||||||
|
|
||||||
|
for (index = 0; index < PERCENTILE_RANGES; index++)
|
||||||
|
seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
|
||||||
|
PERCENTILE_WIDTH * (index+1),
|
||||||
|
percentiles_ioctl[index],
|
||||||
|
percentiles_kickoff[index],
|
||||||
|
percentiles_append[index],
|
||||||
|
percentiles_jobtracking[index],
|
||||||
|
percentiles_userd[index]);
|
||||||
|
|
||||||
|
kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
|
||||||
|
{
|
||||||
|
return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
|
||||||
|
.open = gk20a_fifo_profile_stats_open,
|
||||||
|
.read = seq_read,
|
||||||
|
.llseek = seq_lseek,
|
||||||
|
.release = single_release,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
void gk20a_fifo_debugfs_init(struct device *dev)
|
void gk20a_fifo_debugfs_init(struct device *dev)
|
||||||
{
|
{
|
||||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||||
@@ -3323,6 +3524,8 @@ void gk20a_fifo_debugfs_init(struct device *dev)
|
|||||||
|
|
||||||
struct dentry *gpu_root = platform->debugfs;
|
struct dentry *gpu_root = platform->debugfs;
|
||||||
struct dentry *fifo_root;
|
struct dentry *fifo_root;
|
||||||
|
struct dentry *profile_root;
|
||||||
|
|
||||||
|
|
||||||
fifo_root = debugfs_create_dir("fifo", gpu_root);
|
fifo_root = debugfs_create_dir("fifo", gpu_root);
|
||||||
if (IS_ERR_OR_NULL(fifo_root))
|
if (IS_ERR_OR_NULL(fifo_root))
|
||||||
@@ -3333,6 +3536,21 @@ void gk20a_fifo_debugfs_init(struct device *dev)
|
|||||||
debugfs_create_file("sched", 0600, fifo_root, g,
|
debugfs_create_file("sched", 0600, fifo_root, g,
|
||||||
&gk20a_fifo_sched_debugfs_fops);
|
&gk20a_fifo_sched_debugfs_fops);
|
||||||
|
|
||||||
|
profile_root = debugfs_create_dir("profile", fifo_root);
|
||||||
|
if (IS_ERR_OR_NULL(profile_root))
|
||||||
|
return;
|
||||||
|
|
||||||
|
nvgpu_mutex_init(&g->fifo.profile.lock);
|
||||||
|
g->fifo.profile.enabled = false;
|
||||||
|
atomic_set(&g->fifo.profile.get, 0);
|
||||||
|
atomic_set(&g->fifo.profile.ref.refcount, 0);
|
||||||
|
|
||||||
|
debugfs_create_file("enable", 0600, profile_root, g,
|
||||||
|
&gk20a_fifo_profile_enable_debugfs_fops);
|
||||||
|
|
||||||
|
debugfs_create_file("stats", 0600, profile_root, g,
|
||||||
|
&gk20a_fifo_profile_stats_debugfs_fops);
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_DEBUG_FS */
|
#endif /* CONFIG_DEBUG_FS */
|
||||||
|
|
||||||
|
|||||||
@@ -30,6 +30,15 @@
|
|||||||
#define FIFO_INVAL_CHANNEL_ID ((u32)~0)
|
#define FIFO_INVAL_CHANNEL_ID ((u32)~0)
|
||||||
#define FIFO_INVAL_TSG_ID ((u32)~0)
|
#define FIFO_INVAL_TSG_ID ((u32)~0)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of entries in the kickoff latency buffer, used to calculate
|
||||||
|
* the profiling and histogram. This number is calculated to be statistically
|
||||||
|
* significative on a histogram on a 5% step
|
||||||
|
*/
|
||||||
|
#ifdef CONFIG_DEBUG_FS
|
||||||
|
#define FIFO_PROFILING_ENTRIES 16384
|
||||||
|
#endif
|
||||||
|
|
||||||
/* generally corresponds to the "pbdma" engine */
|
/* generally corresponds to the "pbdma" engine */
|
||||||
|
|
||||||
struct fifo_runlist_info_gk20a {
|
struct fifo_runlist_info_gk20a {
|
||||||
@@ -99,6 +108,20 @@ struct fifo_engine_info_gk20a {
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
PROFILE_IOCTL_ENTRY = 0,
|
||||||
|
PROFILE_ENTRY,
|
||||||
|
PROFILE_JOB_TRACKING,
|
||||||
|
PROFILE_APPEND,
|
||||||
|
PROFILE_END,
|
||||||
|
PROFILE_IOCTL_EXIT,
|
||||||
|
PROFILE_MAX
|
||||||
|
};
|
||||||
|
|
||||||
|
struct fifo_profile_gk20a {
|
||||||
|
u64 timestamp[PROFILE_MAX];
|
||||||
|
};
|
||||||
|
|
||||||
struct fifo_gk20a {
|
struct fifo_gk20a {
|
||||||
struct gk20a *g;
|
struct gk20a *g;
|
||||||
unsigned int num_channels;
|
unsigned int num_channels;
|
||||||
@@ -115,7 +138,16 @@ struct fifo_gk20a {
|
|||||||
|
|
||||||
struct fifo_runlist_info_gk20a *runlist_info;
|
struct fifo_runlist_info_gk20a *runlist_info;
|
||||||
u32 max_runlists;
|
u32 max_runlists;
|
||||||
|
#ifdef CONFIG_DEBUG_FS
|
||||||
|
struct {
|
||||||
|
struct fifo_profile_gk20a *data;
|
||||||
|
atomic_t get;
|
||||||
|
bool enabled;
|
||||||
|
u64 *sorted;
|
||||||
|
struct kref ref;
|
||||||
|
struct nvgpu_mutex lock;
|
||||||
|
} profile;
|
||||||
|
#endif
|
||||||
struct mem_desc userd;
|
struct mem_desc userd;
|
||||||
u32 userd_entry_size;
|
u32 userd_entry_size;
|
||||||
|
|
||||||
@@ -275,5 +307,10 @@ void gk20a_get_ch_runlist_entry(struct channel_gk20a *ch, u32 *runlist);
|
|||||||
u32 gk20a_userd_gp_get(struct gk20a *g, struct channel_gk20a *c);
|
u32 gk20a_userd_gp_get(struct gk20a *g, struct channel_gk20a *c);
|
||||||
void gk20a_userd_gp_put(struct gk20a *g, struct channel_gk20a *c);
|
void gk20a_userd_gp_put(struct gk20a *g, struct channel_gk20a *c);
|
||||||
bool gk20a_is_fault_engine_subid_gpc(struct gk20a *g, u32 engine_subid);
|
bool gk20a_is_fault_engine_subid_gpc(struct gk20a *g, u32 engine_subid);
|
||||||
|
#ifdef CONFIG_DEBUG_FS
|
||||||
|
struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g);
|
||||||
|
void gk20a_fifo_profile_release(struct gk20a *g,
|
||||||
|
struct fifo_profile_gk20a *profile);
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /*__GR_GK20A_H__*/
|
#endif /*__GR_GK20A_H__*/
|
||||||
|
|||||||
Reference in New Issue
Block a user