gpu: nvgpu: hal for timestamps correlation

In order to perform timestamps correlation for FECS traces, we need to collect GPU / GPU timestamps samples. In virtualization case, it is possible for a guest to get GPU timestamps by using read_ptimer. However, if the CPU timestamp is read on guest side, and the GPU timestamp is read on vm-server side, then it introduces some latency that will create an artificial offset for GPU timestamps (~2 us in average). For better CPU / GPU timestamps correlation, Added a command to collect all timestamps on vm-server side. Bug 1900475 Change-Id: Idfdc6ae4c16c501dc5e00053a5b75932c55148d6 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: http://git-master/r/1472447 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Aparna Das <aparnad@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
2025-12-22 09:12:24 +03:00 · 2017-04-27 11:28:27 -07:00
parent 70f507eec7
commit 56f56b5cd9
9 changed files with 160 additions and 50 deletions
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -643,58 +643,17 @@ static inline u64 get_cpu_timestamp_timeofday(void)
 	return timeval_to_jiffies(&tv);
 }
-static inline int get_timestamps_zipper(struct gk20a *g,
+int gk20a_get_timestamps_zipper(struct gk20a *g,
-		u64 (*get_cpu_timestamp)(void),
+		u32 source_id, u32 count,
-		struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
+		struct nvgpu_cpu_time_correlation_sample *samples)
 {
 	int err = 0;
 	unsigned int i = 0;
 	u32 gpu_timestamp_hi_new = 0;
 	u32 gpu_timestamp_hi_old = 0;
 	if (gk20a_busy(g)) {
 		gk20a_err(dev_from_gk20a(g), "GPU not powered on\n");
 		err = -EINVAL;
 		goto end;
 	}
 	/* get zipper reads of gpu and cpu counter values */
 	gpu_timestamp_hi_old = gk20a_readl(g, timer_time_1_r());
 	for (i = 0; i < args->count; i++) {
 		u32 gpu_timestamp_lo = 0;
 		u32 gpu_timestamp_hi = 0;
 		gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
 		args->samples[i].cpu_timestamp = get_cpu_timestamp();
 		rmb(); /* maintain zipper read order */
 		gpu_timestamp_hi_new = gk20a_readl(g, timer_time_1_r());
 		/* pick the appropriate gpu counter hi bits */
 		gpu_timestamp_hi = (gpu_timestamp_lo & (1L << 31)) ?
 			gpu_timestamp_hi_old : gpu_timestamp_hi_new;
 		args->samples[i].gpu_timestamp =
 			((u64)gpu_timestamp_hi << 32) | (u64)gpu_timestamp_lo;
 		gpu_timestamp_hi_old = gpu_timestamp_hi_new;
 	}
 end:
 	gk20a_idle(g);
 	return err;
 }
 static int nvgpu_gpu_get_cpu_time_correlation_info(
 	struct gk20a *g,
 	struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
 {
 	int err = 0;
 	u64 (*get_cpu_timestamp)(void) = NULL;
-	if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT)
+	switch (source_id) {
 		return -EINVAL;
 	switch (args->source_id) {
 	case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC:
 		get_cpu_timestamp = get_cpu_timestamp_tsc;
 		break;
@@ -709,7 +668,67 @@ static int nvgpu_gpu_get_cpu_time_correlation_info(
 		return -EINVAL;
 	}
-	err = get_timestamps_zipper(g, get_cpu_timestamp, args);
+	if (gk20a_busy(g)) {
 		gk20a_err(dev_from_gk20a(g), "GPU not powered on\n");
 		err = -EINVAL;
 		goto end;
 	}
 	/* get zipper reads of gpu and cpu counter values */
 	gpu_timestamp_hi_old = gk20a_readl(g, timer_time_1_r());
 	for (i = 0; i < count; i++) {
 		u32 gpu_timestamp_lo = 0;
 		u32 gpu_timestamp_hi = 0;
 		gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
 		samples[i].cpu_timestamp = get_cpu_timestamp();
 		rmb(); /* maintain zipper read order */
 		gpu_timestamp_hi_new = gk20a_readl(g, timer_time_1_r());
 		/* pick the appropriate gpu counter hi bits */
 		gpu_timestamp_hi = (gpu_timestamp_lo & (1L << 31)) ?
 			gpu_timestamp_hi_old : gpu_timestamp_hi_new;
 		samples[i].gpu_timestamp =
 			((u64)gpu_timestamp_hi << 32) | (u64)gpu_timestamp_lo;
 		gpu_timestamp_hi_old = gpu_timestamp_hi_new;
 	}
 end:
 	gk20a_idle(g);
 	return err;
 }
 static int nvgpu_gpu_get_cpu_time_correlation_info(
 	struct gk20a *g,
 	struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
 {
 	struct nvgpu_cpu_time_correlation_sample *samples;
 	int err;
 	u32 i;
 	if (args->count >= NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT) {
 		return -EINVAL;
 	}
 	samples = kzalloc(args->count *
 		sizeof(struct nvgpu_cpu_time_correlation_sample), GFP_KERNEL);
 	if (!samples) {
 		return -ENOMEM;
 	}
 	err = g->ops.bus.get_timestamps_zipper(g,
 			args->source_id, args->count, samples);
 	if (!err) {
 		for (i = 0; i < args->count; i++) {
 			args->samples[i].cpu_timestamp = samples[i].cpu_timestamp;
 			args->samples[i].gpu_timestamp = samples[i].gpu_timestamp;
 		}
 	}
 	kfree(samples);
 	return err;
 }
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -16,8 +16,20 @@
 #ifndef CTRL_GK20A_H
 #define CTRL_GK20A_H
 #include <linux/fs.h>
 #include <linux/file.h>
 struct nvgpu_cpu_time_correlation_sample {
 	u64 cpu_timestamp;
 	u64 gpu_timestamp;
 };
 int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp);
 int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp);
 long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
 int gk20a_get_timestamps_zipper(struct gk20a *g,
 		u32 source_id, u32 count,
 		struct nvgpu_cpu_time_correlation_sample *samples);
 #endif /* CTRL_GK20A_H */
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -45,6 +45,7 @@ struct dbg_profiler_object_data;
 #include "as_gk20a.h"
 #include "clk_gk20a.h"
 #include "ce2_gk20a.h"
 #include "ctrl_gk20a.h"
 #include "fifo_gk20a.h"
 #include "tsg_gk20a.h"
 #include "gr_gk20a.h"
@@ -774,6 +775,12 @@ struct gpu_ops {
 					       size_t scatter_buffer_size);
 	} cde;
 	struct {
 		int (*get_timestamps_zipper)(struct gk20a *g,
 			u32 source_id, u32 count,
 			struct nvgpu_cpu_time_correlation_sample *);
 	} bus;
 	int (*get_litter_value)(struct gk20a *g, int value);
 	int (*chip_init_gpu_characteristics)(struct gk20a *g);
 	int (*read_ptimer)(struct gk20a *g, u64 *value);
--- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
@@ -3,7 +3,7 @@
 *
 * GK20A Tegra HAL interface.
 *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -18,6 +18,7 @@
 #include "hal_gk20a.h"
 #include "ltc_gk20a.h"
 #include "fb_gk20a.h"
 #include "ctrl_gk20a.h"
 #include "gk20a.h"
 #include "gk20a_gating_reglist.h"
 #include "channel_gk20a.h"
@@ -170,6 +171,7 @@ int gk20a_init_hal(struct gk20a *g)
 	gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
 	gops->get_litter_value = gk20a_get_litter_value;
 	gops->read_ptimer = gk20a_read_ptimer;
 	gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;
 	c->twod_class = FERMI_TWOD_A;
 	c->threed_class = KEPLER_C;
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -1,7 +1,7 @@
 /*
 * GM20B Graphics
 *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -20,6 +20,7 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/dbg_gpu_gk20a.h"
 #include "gk20a/css_gr_gk20a.h"
 #include "gk20a/ctrl_gk20a.h"
 #include "ltc_gm20b.h"
 #include "ce2_gm20b.h"
@@ -244,6 +245,7 @@ int gm20b_init_hal(struct gk20a *g)
 	gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
 	gops->get_litter_value = gm20b_get_litter_value;
 	gops->read_ptimer = gk20a_read_ptimer;
 	gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;
 	c->twod_class = FERMI_TWOD_A;
 	c->threed_class = MAXWELL_B;
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -21,6 +21,7 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/dbg_gpu_gk20a.h"
 #include "gk20a/css_gr_gk20a.h"
 #include "gk20a/ctrl_gk20a.h"
 #include "gp10b/gr_gp10b.h"
 #include "gp10b/fecs_trace_gp10b.h"
@@ -252,6 +253,7 @@ int gp106_init_hal(struct gk20a *g)
 	gops->chip_init_gpu_characteristics = gp106_init_gpu_characteristics;
 	gops->gr_ctx.use_dma_for_fw_bootstrap = true;
 	gops->read_ptimer = gk20a_read_ptimer;
 	gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;
 	c->twod_class = FERMI_TWOD_A;
 	c->threed_class = PASCAL_B;
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -1,7 +1,7 @@
 /*
 * GP10B Tegra HAL interface
 *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -22,6 +22,7 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/dbg_gpu_gk20a.h"
 #include "gk20a/css_gr_gk20a.h"
 #include "gk20a/ctrl_gk20a.h"
 #include "gp10b/gr_gp10b.h"
 #include "gp10b/fecs_trace_gp10b.h"
@@ -257,6 +258,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gops->chip_init_gpu_characteristics = gp10b_init_gpu_characteristics;
 	gops->get_litter_value = gp10b_get_litter_value;
 	gops->read_ptimer = gk20a_read_ptimer;
 	gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;
 	c->twod_class = FERMI_TWOD_A;
 	c->threed_class = PASCAL_A;
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -356,6 +356,51 @@ static int vgpu_read_ptimer(struct gk20a *g, u64 *value)
 	return err;
 }
 int vgpu_get_timestamps_zipper(struct gk20a *g,
 		u32 source_id, u32 count,
 		struct nvgpu_cpu_time_correlation_sample *samples)
 {
 	struct tegra_vgpu_cmd_msg msg = {0};
 	struct tegra_vgpu_get_timestamps_zipper_params *p =
 			&msg.params.get_timestamps_zipper;
 	int err;
 	u32 i;
 	gk20a_dbg_fn("");
 	if (count > TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT) {
 		gk20a_err(dev_from_gk20a(g),
 			"count %u overflow", count);
 		return -EINVAL;
 	}
 	if (source_id != NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC) {
 		gk20a_err(dev_from_gk20a(g),
 			"source_id %u not supported", source_id);
 		return -EINVAL;
 	}
 	msg.cmd = TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER;
 	msg.handle = vgpu_get_handle(g);
 	p->source_id = TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_SRC_ID_TSC;
 	p->count = count;
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	err = err ? err : msg.ret;
 	if (err) {
 		gk20a_err(dev_from_gk20a(g),
 			"vgpu get timestamps zipper failed, err=%d", err);
 		return err;
 	}
 	for (i = 0; i < count; i++) {
 		samples[i].cpu_timestamp = p->samples[i].cpu_timestamp;
 		samples[i].gpu_timestamp = p->samples[i].gpu_timestamp;
 	}
 	return err;
 }
 void vgpu_init_hal_common(struct gk20a *g)
 {
 	struct gpu_ops *gops = &g->ops;
@@ -373,6 +418,7 @@ void vgpu_init_hal_common(struct gk20a *g)
 #endif
 	gops->chip_init_gpu_characteristics = vgpu_init_gpu_characteristics;
 	gops->read_ptimer = vgpu_read_ptimer;
 	gops->bus.get_timestamps_zipper = vgpu_get_timestamps_zipper;
 }
 static int vgpu_init_hal(struct gk20a *g)
--- a/include/linux/tegra_vgpu.h
+++ b/include/linux/tegra_vgpu.h
@@ -106,6 +106,7 @@ enum {
 	TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE = 70,
 	TEGRA_VGPU_CMD_CAP_GPU_CLK_RATE = 71,
 	TEGRA_VGPU_CMD_PROF_MGT = 72,
 	TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER = 74,
 };
 struct tegra_vgpu_connect_params {
@@ -405,6 +406,22 @@ struct tegra_vgpu_read_ptimer_params {
 	u64 time;
 };
 #define TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT      16
 #define TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_SRC_ID_TSC     1
 struct tegra_vgpu_get_timestamps_zipper_params {
 	/* timestamp pairs */
 	struct {
 		/* gpu timestamp value */
 		u64 cpu_timestamp;
 		/* raw GPU counter (PTIMER) value */
 		u64 gpu_timestamp;
 	} samples[TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT];
 	/* number of pairs to read */
 	u32 count;
 	/* cpu clock source id */
 	u32 source_id;
 };
 struct tegra_vgpu_set_powergate_params {
 	u32 mode;
 };
@@ -541,6 +558,7 @@ struct tegra_vgpu_cmd_msg {
 		struct tegra_vgpu_clear_sm_error_state clear_sm_error_state;
 		struct tegra_vgpu_get_gpu_freq_table_params get_gpu_freq_table;
 		struct tegra_vgpu_prof_mgt_params prof_management;
 		struct tegra_vgpu_get_timestamps_zipper_params get_timestamps_zipper;
 		char padding[192];
 	} params;
 };