gpu: nvgpu: hal for timestamps correlation

In order to perform timestamps correlation for FECS traces, we need to collect GPU / GPU timestamps samples. In virtualization case, it is possible for a guest to get GPU timestamps by using read_ptimer. However, if the CPU timestamp is read on guest side, and the GPU timestamp is read on vm-server side, then it introduces some latency that will create an artificial offset for GPU timestamps (~2 us in average). For better CPU / GPU timestamps correlation, Added a command to collect all timestamps on vm-server side. Bug 1900475 Change-Id: Idfdc6ae4c16c501dc5e00053a5b75932c55148d6 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: http://git-master/r/1472447 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Aparna Das <aparnad@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2017-04-27 11:28:27 -07:00
parent 70f507eec7
commit 56f56b5cd9
9 changed files with 160 additions and 50 deletions
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -643,58 +643,17 @@ static inline u64 get_cpu_timestamp_timeofday(void)
 	return timeval_to_jiffies(&tv);
 }

-static inline int get_timestamps_zipper(struct gk20a *g,
-		u64 (*get_cpu_timestamp)(void),
-		struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
+int gk20a_get_timestamps_zipper(struct gk20a *g,
+		u32 source_id, u32 count,
+		struct nvgpu_cpu_time_correlation_sample *samples)
 {
 	int err = 0;
 	unsigned int i = 0;
 	u32 gpu_timestamp_hi_new = 0;
 	u32 gpu_timestamp_hi_old = 0;
-
-	if (gk20a_busy(g)) {
-		gk20a_err(dev_from_gk20a(g), "GPU not powered on\n");
-		err = -EINVAL;
-		goto end;
-	}
-
-	/* get zipper reads of gpu and cpu counter values */
-	gpu_timestamp_hi_old = gk20a_readl(g, timer_time_1_r());
-	for (i = 0; i < args->count; i++) {
-		u32 gpu_timestamp_lo = 0;
-		u32 gpu_timestamp_hi = 0;
-
-		gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
-		args->samples[i].cpu_timestamp = get_cpu_timestamp();
-		rmb(); /* maintain zipper read order */
-		gpu_timestamp_hi_new = gk20a_readl(g, timer_time_1_r());
-
-		/* pick the appropriate gpu counter hi bits */
-		gpu_timestamp_hi = (gpu_timestamp_lo & (1L << 31)) ?
-			gpu_timestamp_hi_old : gpu_timestamp_hi_new;
-
-		args->samples[i].gpu_timestamp =
-			((u64)gpu_timestamp_hi << 32) | (u64)gpu_timestamp_lo;
-
-		gpu_timestamp_hi_old = gpu_timestamp_hi_new;
-	}
-
-end:
-	gk20a_idle(g);
-	return err;
-}
-
-static int nvgpu_gpu_get_cpu_time_correlation_info(
-	struct gk20a *g,
-	struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
-{
-	int err = 0;
 	u64 (*get_cpu_timestamp)(void) = NULL;

-	if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT)
-		return -EINVAL;
-
-	switch (args->source_id) {
+	switch (source_id) {
 	case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC:
 		get_cpu_timestamp = get_cpu_timestamp_tsc;
 		break;
@@ -709,7 +668,67 @@ static int nvgpu_gpu_get_cpu_time_correlation_info(
 		return -EINVAL;
 	}

-	err = get_timestamps_zipper(g, get_cpu_timestamp, args);
+	if (gk20a_busy(g)) {
+		gk20a_err(dev_from_gk20a(g), "GPU not powered on\n");
+		err = -EINVAL;
+		goto end;
+	}
+
+	/* get zipper reads of gpu and cpu counter values */
+	gpu_timestamp_hi_old = gk20a_readl(g, timer_time_1_r());
+	for (i = 0; i < count; i++) {
+		u32 gpu_timestamp_lo = 0;
+		u32 gpu_timestamp_hi = 0;
+
+		gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
+		samples[i].cpu_timestamp = get_cpu_timestamp();
+		rmb(); /* maintain zipper read order */
+		gpu_timestamp_hi_new = gk20a_readl(g, timer_time_1_r());
+
+		/* pick the appropriate gpu counter hi bits */
+		gpu_timestamp_hi = (gpu_timestamp_lo & (1L << 31)) ?
+			gpu_timestamp_hi_old : gpu_timestamp_hi_new;
+
+		samples[i].gpu_timestamp =
+			((u64)gpu_timestamp_hi << 32) | (u64)gpu_timestamp_lo;
+
+		gpu_timestamp_hi_old = gpu_timestamp_hi_new;
+	}
+
+end:
+	gk20a_idle(g);
+	return err;
+}
+
+static int nvgpu_gpu_get_cpu_time_correlation_info(
+	struct gk20a *g,
+	struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
+{
+	struct nvgpu_cpu_time_correlation_sample *samples;
+	int err;
+	u32 i;
+
+	if (args->count >= NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT) {
+		return -EINVAL;
+	}
+
+	samples = kzalloc(args->count *
+		sizeof(struct nvgpu_cpu_time_correlation_sample), GFP_KERNEL);
+	if (!samples) {
+		return -ENOMEM;
+	}
+
+	err = g->ops.bus.get_timestamps_zipper(g,
+			args->source_id, args->count, samples);
+	if (!err) {
+		for (i = 0; i < args->count; i++) {
+			args->samples[i].cpu_timestamp = samples[i].cpu_timestamp;
+			args->samples[i].gpu_timestamp = samples[i].gpu_timestamp;
+		}
+	}
+
+	kfree(samples);
+
 	return err;
 }

--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -16,8 +16,20 @@
 #ifndef CTRL_GK20A_H
 #define CTRL_GK20A_H

+#include <linux/fs.h>
+#include <linux/file.h>
+
+struct nvgpu_cpu_time_correlation_sample {
+	u64 cpu_timestamp;
+	u64 gpu_timestamp;
+};
+
 int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp);
 int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp);
 long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);

+int gk20a_get_timestamps_zipper(struct gk20a *g,
+		u32 source_id, u32 count,
+		struct nvgpu_cpu_time_correlation_sample *samples);
+
 #endif /* CTRL_GK20A_H */
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -45,6 +45,7 @@ struct dbg_profiler_object_data;
 #include "as_gk20a.h"
 #include "clk_gk20a.h"
 #include "ce2_gk20a.h"
+#include "ctrl_gk20a.h"
 #include "fifo_gk20a.h"
 #include "tsg_gk20a.h"
 #include "gr_gk20a.h"
@@ -774,6 +775,12 @@ struct gpu_ops {
 					       size_t scatter_buffer_size);
 	} cde;

+	struct {
+		int (*get_timestamps_zipper)(struct gk20a *g,
+			u32 source_id, u32 count,
+			struct nvgpu_cpu_time_correlation_sample *);
+	} bus;
+
 	int (*get_litter_value)(struct gk20a *g, int value);
 	int (*chip_init_gpu_characteristics)(struct gk20a *g);
 	int (*read_ptimer)(struct gk20a *g, u64 *value);
--- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
@@ -3,7 +3,7 @@
 *
 * GK20A Tegra HAL interface.
 *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -18,6 +18,7 @@
 #include "hal_gk20a.h"
 #include "ltc_gk20a.h"
 #include "fb_gk20a.h"
+#include "ctrl_gk20a.h"
 #include "gk20a.h"
 #include "gk20a_gating_reglist.h"
 #include "channel_gk20a.h"
@@ -170,6 +171,7 @@ int gk20a_init_hal(struct gk20a *g)
 	gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
 	gops->get_litter_value = gk20a_get_litter_value;
 	gops->read_ptimer = gk20a_read_ptimer;
+	gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;

 	c->twod_class = FERMI_TWOD_A;
 	c->threed_class = KEPLER_C;
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -1,7 +1,7 @@
 /*
 * GM20B Graphics
 *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -20,6 +20,7 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/dbg_gpu_gk20a.h"
 #include "gk20a/css_gr_gk20a.h"
+#include "gk20a/ctrl_gk20a.h"

 #include "ltc_gm20b.h"
 #include "ce2_gm20b.h"
@@ -244,6 +245,7 @@ int gm20b_init_hal(struct gk20a *g)
 	gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
 	gops->get_litter_value = gm20b_get_litter_value;
 	gops->read_ptimer = gk20a_read_ptimer;
+	gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;

 	c->twod_class = FERMI_TWOD_A;
 	c->threed_class = MAXWELL_B;
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -21,6 +21,7 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/dbg_gpu_gk20a.h"
 #include "gk20a/css_gr_gk20a.h"
+#include "gk20a/ctrl_gk20a.h"

 #include "gp10b/gr_gp10b.h"
 #include "gp10b/fecs_trace_gp10b.h"
@@ -252,6 +253,7 @@ int gp106_init_hal(struct gk20a *g)
 	gops->chip_init_gpu_characteristics = gp106_init_gpu_characteristics;
 	gops->gr_ctx.use_dma_for_fw_bootstrap = true;
 	gops->read_ptimer = gk20a_read_ptimer;
+	gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;

 	c->twod_class = FERMI_TWOD_A;
 	c->threed_class = PASCAL_B;
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -1,7 +1,7 @@
 /*
 * GP10B Tegra HAL interface
 *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -22,6 +22,7 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/dbg_gpu_gk20a.h"
 #include "gk20a/css_gr_gk20a.h"
+#include "gk20a/ctrl_gk20a.h"

 #include "gp10b/gr_gp10b.h"
 #include "gp10b/fecs_trace_gp10b.h"
@@ -257,6 +258,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gops->chip_init_gpu_characteristics = gp10b_init_gpu_characteristics;
 	gops->get_litter_value = gp10b_get_litter_value;
 	gops->read_ptimer = gk20a_read_ptimer;
+	gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;

 	c->twod_class = FERMI_TWOD_A;
 	c->threed_class = PASCAL_A;
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -356,6 +356,51 @@ static int vgpu_read_ptimer(struct gk20a *g, u64 *value)
 	return err;
 }

+int vgpu_get_timestamps_zipper(struct gk20a *g,
+		u32 source_id, u32 count,
+		struct nvgpu_cpu_time_correlation_sample *samples)
+{
+	struct tegra_vgpu_cmd_msg msg = {0};
+	struct tegra_vgpu_get_timestamps_zipper_params *p =
+			&msg.params.get_timestamps_zipper;
+	int err;
+	u32 i;
+
+	gk20a_dbg_fn("");
+
+	if (count > TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT) {
+		gk20a_err(dev_from_gk20a(g),
+			"count %u overflow", count);
+		return -EINVAL;
+	}
+
+	if (source_id != NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC) {
+		gk20a_err(dev_from_gk20a(g),
+			"source_id %u not supported", source_id);
+		return -EINVAL;
+	}
+
+	msg.cmd = TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER;
+	msg.handle = vgpu_get_handle(g);
+	p->source_id = TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_SRC_ID_TSC;
+	p->count = count;
+
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	err = err ? err : msg.ret;
+	if (err) {
+		gk20a_err(dev_from_gk20a(g),
+			"vgpu get timestamps zipper failed, err=%d", err);
+		return err;
+	}
+
+	for (i = 0; i < count; i++) {
+		samples[i].cpu_timestamp = p->samples[i].cpu_timestamp;
+		samples[i].gpu_timestamp = p->samples[i].gpu_timestamp;
+	}
+
+	return err;
+}
+
 void vgpu_init_hal_common(struct gk20a *g)
 {
 	struct gpu_ops *gops = &g->ops;
@@ -373,6 +418,7 @@ void vgpu_init_hal_common(struct gk20a *g)
 #endif
 	gops->chip_init_gpu_characteristics = vgpu_init_gpu_characteristics;
 	gops->read_ptimer = vgpu_read_ptimer;
+	gops->bus.get_timestamps_zipper = vgpu_get_timestamps_zipper;
 }

 static int vgpu_init_hal(struct gk20a *g)
--- a/include/linux/tegra_vgpu.h
+++ b/include/linux/tegra_vgpu.h
@@ -106,6 +106,7 @@ enum {
 	TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE = 70,
 	TEGRA_VGPU_CMD_CAP_GPU_CLK_RATE = 71,
 	TEGRA_VGPU_CMD_PROF_MGT = 72,
+	TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER = 74,
 };

 struct tegra_vgpu_connect_params {
@@ -405,6 +406,22 @@ struct tegra_vgpu_read_ptimer_params {
 	u64 time;
 };

+#define TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT      16
+#define TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_SRC_ID_TSC     1
+struct tegra_vgpu_get_timestamps_zipper_params {
+	/* timestamp pairs */
+	struct {
+		/* gpu timestamp value */
+		u64 cpu_timestamp;
+		/* raw GPU counter (PTIMER) value */
+		u64 gpu_timestamp;
+	} samples[TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT];
+	/* number of pairs to read */
+	u32 count;
+	/* cpu clock source id */
+	u32 source_id;
+};
+
 struct tegra_vgpu_set_powergate_params {
 	u32 mode;
 };
@@ -541,6 +558,7 @@ struct tegra_vgpu_cmd_msg {
 		struct tegra_vgpu_clear_sm_error_state clear_sm_error_state;
 		struct tegra_vgpu_get_gpu_freq_table_params get_gpu_freq_table;
 		struct tegra_vgpu_prof_mgt_params prof_management;
+		struct tegra_vgpu_get_timestamps_zipper_params get_timestamps_zipper;
 		char padding[192];
 	} params;
 };