gpu: nvgpu: hal for timestamps correlation

In order to perform timestamps correlation for FECS
traces, we need to collect GPU / GPU timestamps
samples. In virtualization case, it is possible for
a guest to get GPU timestamps by using read_ptimer.
However, if the CPU timestamp is read on guest side,
and the GPU timestamp is read on vm-server side,
then it introduces some latency that will create an
artificial offset for GPU timestamps (~2 us in
average). For better CPU / GPU timestamps correlation,
Added a command to collect all timestamps on vm-server
side.

Bug 1900475

Change-Id: Idfdc6ae4c16c501dc5e00053a5b75932c55148d6
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1472447
(cherry picked from commit 56f56b5cd9)
Reviewed-on: http://git-master/r/1489183
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Thomas Fleury
2017-04-27 11:28:27 -07:00
committed by mobile promotions
parent 77e2cbab23
commit 741e5c4517
7 changed files with 154 additions and 60 deletions

View File

@@ -533,76 +533,34 @@ static int gk20a_ctrl_get_buffer_info(
&args->out.id, &args->out.length); &args->out.id, &args->out.length);
} }
static inline u64 get_cpu_timestamp_tsc(void)
{
return ((u64) get_cycles());
}
static inline u64 get_cpu_timestamp_jiffies(void)
{
return (get_jiffies_64() - INITIAL_JIFFIES);
}
static inline u64 get_cpu_timestamp_timeofday(void)
{
struct timeval tv;
do_gettimeofday(&tv);
return timeval_to_jiffies(&tv);
}
static inline int get_timestamps_zipper(struct gk20a *g,
u64 (*get_cpu_timestamp)(void),
struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
{
int err = 0;
unsigned int i = 0;
if (gk20a_busy(g)) {
nvgpu_err(g, "GPU not powered on");
err = -EINVAL;
goto end;
}
for (i = 0; i < args->count; i++) {
err = g->ops.bus.read_ptimer(g, &args->samples[i].gpu_timestamp);
if (err)
return err;
args->samples[i].cpu_timestamp = get_cpu_timestamp();
}
end:
gk20a_idle(g);
return err;
}
static int nvgpu_gpu_get_cpu_time_correlation_info( static int nvgpu_gpu_get_cpu_time_correlation_info(
struct gk20a *g, struct gk20a *g,
struct nvgpu_gpu_get_cpu_time_correlation_info_args *args) struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
{ {
int err = 0; struct nvgpu_cpu_time_correlation_sample *samples;
u64 (*get_cpu_timestamp)(void) = NULL; int err;
u32 i;
if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT) if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT)
return -EINVAL; return -EINVAL;
switch (args->source_id) { samples = nvgpu_kzalloc(g, args->count *
case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC: sizeof(struct nvgpu_cpu_time_correlation_sample));
get_cpu_timestamp = get_cpu_timestamp_tsc; if (!samples) {
break; return -ENOMEM;
case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_JIFFIES:
get_cpu_timestamp = get_cpu_timestamp_jiffies;
break;
case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TIMEOFDAY:
get_cpu_timestamp = get_cpu_timestamp_timeofday;
break;
default:
nvgpu_err(g, "invalid cpu clock source id");
return -EINVAL;
} }
err = get_timestamps_zipper(g, get_cpu_timestamp, args); err = g->ops.bus.get_timestamps_zipper(g,
args->source_id, args->count, samples);
if (!err) {
for (i = 0; i < args->count; i++) {
args->samples[i].cpu_timestamp = samples[i].cpu_timestamp;
args->samples[i].gpu_timestamp = samples[i].gpu_timestamp;
}
}
nvgpu_kfree(g, samples);
return err; return err;
} }

View File

@@ -128,6 +128,66 @@ int gk20a_read_ptimer(struct gk20a *g, u64 *value)
return -EBUSY; return -EBUSY;
} }
static inline u64 get_cpu_timestamp_tsc(void)
{
return ((u64) get_cycles());
}
static inline u64 get_cpu_timestamp_jiffies(void)
{
return (get_jiffies_64() - INITIAL_JIFFIES);
}
static inline u64 get_cpu_timestamp_timeofday(void)
{
struct timeval tv;
do_gettimeofday(&tv);
return timeval_to_jiffies(&tv);
}
int gk20a_get_timestamps_zipper(struct gk20a *g,
u32 source_id, u32 count,
struct nvgpu_cpu_time_correlation_sample *samples)
{
int err = 0;
unsigned int i = 0;
u64 (*get_cpu_timestamp)(void) = NULL;
switch (source_id) {
case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC:
get_cpu_timestamp = get_cpu_timestamp_tsc;
break;
case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_JIFFIES:
get_cpu_timestamp = get_cpu_timestamp_jiffies;
break;
case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TIMEOFDAY:
get_cpu_timestamp = get_cpu_timestamp_timeofday;
break;
default:
nvgpu_err(g, "invalid cpu clock source id\n");
return -EINVAL;
}
if (gk20a_busy(g)) {
nvgpu_err(g, "GPU not powered on\n");
err = -EINVAL;
goto end;
}
for (i = 0; i < count; i++) {
err = g->ops.bus.read_ptimer(g, &samples[i].gpu_timestamp);
if (err)
return err;
samples[i].cpu_timestamp = get_cpu_timestamp();
}
end:
gk20a_idle(g);
return err;
}
static int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst) static int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
{ {
u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst); u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst);
@@ -150,5 +210,6 @@ void gk20a_init_bus(struct gpu_ops *gops)
gops->bus.init_hw = gk20a_bus_init_hw; gops->bus.init_hw = gk20a_bus_init_hw;
gops->bus.isr = gk20a_bus_isr; gops->bus.isr = gk20a_bus_isr;
gops->bus.read_ptimer = gk20a_read_ptimer; gops->bus.read_ptimer = gk20a_read_ptimer;
gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;
gops->bus.bar1_bind = gk20a_bus_bar1_bind; gops->bus.bar1_bind = gk20a_bus_bar1_bind;
} }

View File

@@ -22,10 +22,19 @@ struct gk20a;
struct gpu_ops; struct gpu_ops;
struct nvgpu_mem; struct nvgpu_mem;
struct nvgpu_cpu_time_correlation_sample {
u64 cpu_timestamp;
u64 gpu_timestamp;
};
void gk20a_init_bus(struct gpu_ops *gops); void gk20a_init_bus(struct gpu_ops *gops);
void gk20a_bus_isr(struct gk20a *g); void gk20a_bus_isr(struct gk20a *g);
int gk20a_read_ptimer(struct gk20a *g, u64 *value); int gk20a_read_ptimer(struct gk20a *g, u64 *value);
void gk20a_bus_init_hw(struct gk20a *g); void gk20a_bus_init_hw(struct gk20a *g);
int gk20a_get_timestamps_zipper(struct gk20a *g,
u32 source_id, u32 count,
struct nvgpu_cpu_time_correlation_sample *samples);
#endif /* GK20A_H */ #endif /* GK20A_H */

View File

@@ -63,6 +63,7 @@ struct nvgpu_nvhost_dev;
#include "priv_ring_gk20a.h" #include "priv_ring_gk20a.h"
#include "therm_gk20a.h" #include "therm_gk20a.h"
#include "gm20b/acr_gm20b.h" #include "gm20b/acr_gm20b.h"
#include "gk20a/bus_gk20a.h"
#include "cde_gk20a.h" #include "cde_gk20a.h"
#include "sched_gk20a.h" #include "sched_gk20a.h"
#ifdef CONFIG_ARCH_TEGRA_18x_SOC #ifdef CONFIG_ARCH_TEGRA_18x_SOC
@@ -905,6 +906,9 @@ struct gpu_ops {
void (*init_hw)(struct gk20a *g); void (*init_hw)(struct gk20a *g);
void (*isr)(struct gk20a *g); void (*isr)(struct gk20a *g);
int (*read_ptimer)(struct gk20a *g, u64 *value); int (*read_ptimer)(struct gk20a *g, u64 *value);
int (*get_timestamps_zipper)(struct gk20a *g,
u32 source_id, u32 count,
struct nvgpu_cpu_time_correlation_sample *);
int (*bar1_bind)(struct gk20a *g, struct nvgpu_mem *bar1_inst); int (*bar1_bind)(struct gk20a *g, struct nvgpu_mem *bar1_inst);
} bus; } bus;

View File

@@ -58,5 +58,6 @@ void gm20b_init_bus(struct gpu_ops *gops)
gops->bus.init_hw = gk20a_bus_init_hw; gops->bus.init_hw = gk20a_bus_init_hw;
gops->bus.isr = gk20a_bus_isr; gops->bus.isr = gk20a_bus_isr;
gops->bus.read_ptimer = gk20a_read_ptimer; gops->bus.read_ptimer = gk20a_read_ptimer;
gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;
gops->bus.bar1_bind = gm20b_bus_bar1_bind; gops->bus.bar1_bind = gm20b_bus_bar1_bind;
} }

View File

@@ -367,6 +367,48 @@ static int vgpu_read_ptimer(struct gk20a *g, u64 *value)
return err; return err;
} }
int vgpu_get_timestamps_zipper(struct gk20a *g,
u32 source_id, u32 count,
struct nvgpu_cpu_time_correlation_sample *samples)
{
struct tegra_vgpu_cmd_msg msg = {0};
struct tegra_vgpu_get_timestamps_zipper_params *p =
&msg.params.get_timestamps_zipper;
int err;
u32 i;
gk20a_dbg_fn("");
if (count > TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT) {
nvgpu_err(g, "count %u overflow", count);
return -EINVAL;
}
if (source_id != NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC) {
nvgpu_err(g, "source_id %u not supported", source_id);
return -EINVAL;
}
msg.cmd = TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER;
msg.handle = vgpu_get_handle(g);
p->source_id = TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_SRC_ID_TSC;
p->count = count;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
err = err ? err : msg.ret;
if (err) {
nvgpu_err(g, "vgpu get timestamps zipper failed, err=%d", err);
return err;
}
for (i = 0; i < count; i++) {
samples[i].cpu_timestamp = p->samples[i].cpu_timestamp;
samples[i].gpu_timestamp = p->samples[i].gpu_timestamp;
}
return err;
}
void vgpu_init_hal_common(struct gk20a *g) void vgpu_init_hal_common(struct gk20a *g)
{ {
struct gpu_ops *gops = &g->ops; struct gpu_ops *gops = &g->ops;
@@ -384,6 +426,7 @@ void vgpu_init_hal_common(struct gk20a *g)
#endif #endif
gops->chip_init_gpu_characteristics = vgpu_init_gpu_characteristics; gops->chip_init_gpu_characteristics = vgpu_init_gpu_characteristics;
gops->bus.read_ptimer = vgpu_read_ptimer; gops->bus.read_ptimer = vgpu_read_ptimer;
gops->bus.get_timestamps_zipper = vgpu_get_timestamps_zipper;
} }
static int vgpu_init_hal(struct gk20a *g) static int vgpu_init_hal(struct gk20a *g)

View File

@@ -101,6 +101,7 @@ enum {
TEGRA_VGPU_CMD_RESUME_CONTEXTS = 67, TEGRA_VGPU_CMD_RESUME_CONTEXTS = 67,
TEGRA_VGPU_CMD_CLEAR_SM_ERROR_STATE = 68, TEGRA_VGPU_CMD_CLEAR_SM_ERROR_STATE = 68,
TEGRA_VGPU_CMD_PROF_MGT = 72, TEGRA_VGPU_CMD_PROF_MGT = 72,
TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER = 74,
}; };
struct tegra_vgpu_connect_params { struct tegra_vgpu_connect_params {
@@ -389,6 +390,22 @@ struct tegra_vgpu_read_ptimer_params {
u64 time; u64 time;
}; };
#define TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT 16
#define TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_SRC_ID_TSC 1
struct tegra_vgpu_get_timestamps_zipper_params {
/* timestamp pairs */
struct {
/* gpu timestamp value */
u64 cpu_timestamp;
/* raw GPU counter (PTIMER) value */
u64 gpu_timestamp;
} samples[TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT];
/* number of pairs to read */
u32 count;
/* cpu clock source id */
u32 source_id;
};
struct tegra_vgpu_set_powergate_params { struct tegra_vgpu_set_powergate_params {
u32 mode; u32 mode;
}; };
@@ -518,6 +535,7 @@ struct tegra_vgpu_cmd_msg {
struct tegra_vgpu_suspend_resume_contexts resume_contexts; struct tegra_vgpu_suspend_resume_contexts resume_contexts;
struct tegra_vgpu_clear_sm_error_state clear_sm_error_state; struct tegra_vgpu_clear_sm_error_state clear_sm_error_state;
struct tegra_vgpu_prof_mgt_params prof_management; struct tegra_vgpu_prof_mgt_params prof_management;
struct tegra_vgpu_get_timestamps_zipper_params get_timestamps_zipper;
char padding[192]; char padding[192];
} params; } params;
}; };