gpu: nvgpu: hal for timestamps correlation

In order to perform timestamps correlation for FECS
traces, we need to collect GPU / GPU timestamps
samples. In virtualization case, it is possible for
a guest to get GPU timestamps by using read_ptimer.
However, if the CPU timestamp is read on guest side,
and the GPU timestamp is read on vm-server side,
then it introduces some latency that will create an
artificial offset for GPU timestamps (~2 us in
average). For better CPU / GPU timestamps correlation,
Added a command to collect all timestamps on vm-server
side.

Bug 1900475

Change-Id: Idfdc6ae4c16c501dc5e00053a5b75932c55148d6
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1472447
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Aparna Das <aparnad@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Thomas Fleury
2017-04-27 11:28:27 -07:00
committed by mobile promotions
parent 70f507eec7
commit 56f56b5cd9
9 changed files with 160 additions and 50 deletions

View File

@@ -643,58 +643,17 @@ static inline u64 get_cpu_timestamp_timeofday(void)
return timeval_to_jiffies(&tv);
}
static inline int get_timestamps_zipper(struct gk20a *g,
u64 (*get_cpu_timestamp)(void),
struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
int gk20a_get_timestamps_zipper(struct gk20a *g,
u32 source_id, u32 count,
struct nvgpu_cpu_time_correlation_sample *samples)
{
int err = 0;
unsigned int i = 0;
u32 gpu_timestamp_hi_new = 0;
u32 gpu_timestamp_hi_old = 0;
if (gk20a_busy(g)) {
gk20a_err(dev_from_gk20a(g), "GPU not powered on\n");
err = -EINVAL;
goto end;
}
/* get zipper reads of gpu and cpu counter values */
gpu_timestamp_hi_old = gk20a_readl(g, timer_time_1_r());
for (i = 0; i < args->count; i++) {
u32 gpu_timestamp_lo = 0;
u32 gpu_timestamp_hi = 0;
gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
args->samples[i].cpu_timestamp = get_cpu_timestamp();
rmb(); /* maintain zipper read order */
gpu_timestamp_hi_new = gk20a_readl(g, timer_time_1_r());
/* pick the appropriate gpu counter hi bits */
gpu_timestamp_hi = (gpu_timestamp_lo & (1L << 31)) ?
gpu_timestamp_hi_old : gpu_timestamp_hi_new;
args->samples[i].gpu_timestamp =
((u64)gpu_timestamp_hi << 32) | (u64)gpu_timestamp_lo;
gpu_timestamp_hi_old = gpu_timestamp_hi_new;
}
end:
gk20a_idle(g);
return err;
}
static int nvgpu_gpu_get_cpu_time_correlation_info(
struct gk20a *g,
struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
{
int err = 0;
u64 (*get_cpu_timestamp)(void) = NULL;
if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT)
return -EINVAL;
switch (args->source_id) {
switch (source_id) {
case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC:
get_cpu_timestamp = get_cpu_timestamp_tsc;
break;
@@ -709,7 +668,67 @@ static int nvgpu_gpu_get_cpu_time_correlation_info(
return -EINVAL;
}
err = get_timestamps_zipper(g, get_cpu_timestamp, args);
if (gk20a_busy(g)) {
gk20a_err(dev_from_gk20a(g), "GPU not powered on\n");
err = -EINVAL;
goto end;
}
/* get zipper reads of gpu and cpu counter values */
gpu_timestamp_hi_old = gk20a_readl(g, timer_time_1_r());
for (i = 0; i < count; i++) {
u32 gpu_timestamp_lo = 0;
u32 gpu_timestamp_hi = 0;
gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
samples[i].cpu_timestamp = get_cpu_timestamp();
rmb(); /* maintain zipper read order */
gpu_timestamp_hi_new = gk20a_readl(g, timer_time_1_r());
/* pick the appropriate gpu counter hi bits */
gpu_timestamp_hi = (gpu_timestamp_lo & (1L << 31)) ?
gpu_timestamp_hi_old : gpu_timestamp_hi_new;
samples[i].gpu_timestamp =
((u64)gpu_timestamp_hi << 32) | (u64)gpu_timestamp_lo;
gpu_timestamp_hi_old = gpu_timestamp_hi_new;
}
end:
gk20a_idle(g);
return err;
}
static int nvgpu_gpu_get_cpu_time_correlation_info(
struct gk20a *g,
struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
{
struct nvgpu_cpu_time_correlation_sample *samples;
int err;
u32 i;
if (args->count >= NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT) {
return -EINVAL;
}
samples = kzalloc(args->count *
sizeof(struct nvgpu_cpu_time_correlation_sample), GFP_KERNEL);
if (!samples) {
return -ENOMEM;
}
err = g->ops.bus.get_timestamps_zipper(g,
args->source_id, args->count, samples);
if (!err) {
for (i = 0; i < args->count; i++) {
args->samples[i].cpu_timestamp = samples[i].cpu_timestamp;
args->samples[i].gpu_timestamp = samples[i].gpu_timestamp;
}
}
kfree(samples);
return err;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2011-2012, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -16,8 +16,20 @@
#ifndef CTRL_GK20A_H
#define CTRL_GK20A_H
#include <linux/fs.h>
#include <linux/file.h>
struct nvgpu_cpu_time_correlation_sample {
u64 cpu_timestamp;
u64 gpu_timestamp;
};
int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp);
int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp);
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
int gk20a_get_timestamps_zipper(struct gk20a *g,
u32 source_id, u32 count,
struct nvgpu_cpu_time_correlation_sample *samples);
#endif /* CTRL_GK20A_H */

View File

@@ -45,6 +45,7 @@ struct dbg_profiler_object_data;
#include "as_gk20a.h"
#include "clk_gk20a.h"
#include "ce2_gk20a.h"
#include "ctrl_gk20a.h"
#include "fifo_gk20a.h"
#include "tsg_gk20a.h"
#include "gr_gk20a.h"
@@ -774,6 +775,12 @@ struct gpu_ops {
size_t scatter_buffer_size);
} cde;
struct {
int (*get_timestamps_zipper)(struct gk20a *g,
u32 source_id, u32 count,
struct nvgpu_cpu_time_correlation_sample *);
} bus;
int (*get_litter_value)(struct gk20a *g, int value);
int (*chip_init_gpu_characteristics)(struct gk20a *g);
int (*read_ptimer)(struct gk20a *g, u64 *value);

View File

@@ -3,7 +3,7 @@
*
* GK20A Tegra HAL interface.
*
* Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -18,6 +18,7 @@
#include "hal_gk20a.h"
#include "ltc_gk20a.h"
#include "fb_gk20a.h"
#include "ctrl_gk20a.h"
#include "gk20a.h"
#include "gk20a_gating_reglist.h"
#include "channel_gk20a.h"
@@ -170,6 +171,7 @@ int gk20a_init_hal(struct gk20a *g)
gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
gops->get_litter_value = gk20a_get_litter_value;
gops->read_ptimer = gk20a_read_ptimer;
gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;
c->twod_class = FERMI_TWOD_A;
c->threed_class = KEPLER_C;

View File

@@ -1,7 +1,7 @@
/*
* GM20B Graphics
*
* Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -20,6 +20,7 @@
#include "gk20a/gk20a.h"
#include "gk20a/dbg_gpu_gk20a.h"
#include "gk20a/css_gr_gk20a.h"
#include "gk20a/ctrl_gk20a.h"
#include "ltc_gm20b.h"
#include "ce2_gm20b.h"
@@ -244,6 +245,7 @@ int gm20b_init_hal(struct gk20a *g)
gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
gops->get_litter_value = gm20b_get_litter_value;
gops->read_ptimer = gk20a_read_ptimer;
gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;
c->twod_class = FERMI_TWOD_A;
c->threed_class = MAXWELL_B;

View File

@@ -21,6 +21,7 @@
#include "gk20a/gk20a.h"
#include "gk20a/dbg_gpu_gk20a.h"
#include "gk20a/css_gr_gk20a.h"
#include "gk20a/ctrl_gk20a.h"
#include "gp10b/gr_gp10b.h"
#include "gp10b/fecs_trace_gp10b.h"
@@ -252,6 +253,7 @@ int gp106_init_hal(struct gk20a *g)
gops->chip_init_gpu_characteristics = gp106_init_gpu_characteristics;
gops->gr_ctx.use_dma_for_fw_bootstrap = true;
gops->read_ptimer = gk20a_read_ptimer;
gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;
c->twod_class = FERMI_TWOD_A;
c->threed_class = PASCAL_B;

View File

@@ -1,7 +1,7 @@
/*
* GP10B Tegra HAL interface
*
* Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -22,6 +22,7 @@
#include "gk20a/gk20a.h"
#include "gk20a/dbg_gpu_gk20a.h"
#include "gk20a/css_gr_gk20a.h"
#include "gk20a/ctrl_gk20a.h"
#include "gp10b/gr_gp10b.h"
#include "gp10b/fecs_trace_gp10b.h"
@@ -257,6 +258,7 @@ int gp10b_init_hal(struct gk20a *g)
gops->chip_init_gpu_characteristics = gp10b_init_gpu_characteristics;
gops->get_litter_value = gp10b_get_litter_value;
gops->read_ptimer = gk20a_read_ptimer;
gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;
c->twod_class = FERMI_TWOD_A;
c->threed_class = PASCAL_A;

View File

@@ -356,6 +356,51 @@ static int vgpu_read_ptimer(struct gk20a *g, u64 *value)
return err;
}
int vgpu_get_timestamps_zipper(struct gk20a *g,
u32 source_id, u32 count,
struct nvgpu_cpu_time_correlation_sample *samples)
{
struct tegra_vgpu_cmd_msg msg = {0};
struct tegra_vgpu_get_timestamps_zipper_params *p =
&msg.params.get_timestamps_zipper;
int err;
u32 i;
gk20a_dbg_fn("");
if (count > TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT) {
gk20a_err(dev_from_gk20a(g),
"count %u overflow", count);
return -EINVAL;
}
if (source_id != NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC) {
gk20a_err(dev_from_gk20a(g),
"source_id %u not supported", source_id);
return -EINVAL;
}
msg.cmd = TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER;
msg.handle = vgpu_get_handle(g);
p->source_id = TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_SRC_ID_TSC;
p->count = count;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
err = err ? err : msg.ret;
if (err) {
gk20a_err(dev_from_gk20a(g),
"vgpu get timestamps zipper failed, err=%d", err);
return err;
}
for (i = 0; i < count; i++) {
samples[i].cpu_timestamp = p->samples[i].cpu_timestamp;
samples[i].gpu_timestamp = p->samples[i].gpu_timestamp;
}
return err;
}
void vgpu_init_hal_common(struct gk20a *g)
{
struct gpu_ops *gops = &g->ops;
@@ -373,6 +418,7 @@ void vgpu_init_hal_common(struct gk20a *g)
#endif
gops->chip_init_gpu_characteristics = vgpu_init_gpu_characteristics;
gops->read_ptimer = vgpu_read_ptimer;
gops->bus.get_timestamps_zipper = vgpu_get_timestamps_zipper;
}
static int vgpu_init_hal(struct gk20a *g)

View File

@@ -106,6 +106,7 @@ enum {
TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE = 70,
TEGRA_VGPU_CMD_CAP_GPU_CLK_RATE = 71,
TEGRA_VGPU_CMD_PROF_MGT = 72,
TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER = 74,
};
struct tegra_vgpu_connect_params {
@@ -405,6 +406,22 @@ struct tegra_vgpu_read_ptimer_params {
u64 time;
};
#define TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT 16
#define TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_SRC_ID_TSC 1
struct tegra_vgpu_get_timestamps_zipper_params {
/* timestamp pairs */
struct {
/* gpu timestamp value */
u64 cpu_timestamp;
/* raw GPU counter (PTIMER) value */
u64 gpu_timestamp;
} samples[TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT];
/* number of pairs to read */
u32 count;
/* cpu clock source id */
u32 source_id;
};
struct tegra_vgpu_set_powergate_params {
u32 mode;
};
@@ -541,6 +558,7 @@ struct tegra_vgpu_cmd_msg {
struct tegra_vgpu_clear_sm_error_state clear_sm_error_state;
struct tegra_vgpu_get_gpu_freq_table_params get_gpu_freq_table;
struct tegra_vgpu_prof_mgt_params prof_management;
struct tegra_vgpu_get_timestamps_zipper_params get_timestamps_zipper;
char padding[192];
} params;
};