gpu: nvgpu: hal for timestamps correlation

In order to perform timestamps correlation for FECS traces, we need to collect GPU / GPU timestamps samples. In virtualization case, it is possible for a guest to get GPU timestamps by using read_ptimer. However, if the CPU timestamp is read on guest side, and the GPU timestamp is read on vm-server side, then it introduces some latency that will create an artificial offset for GPU timestamps (~2 us in average). For better CPU / GPU timestamps correlation, Added a command to collect all timestamps on vm-server side. Bug 1900475 Change-Id: Idfdc6ae4c16c501dc5e00053a5b75932c55148d6 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: http://git-master/r/1472447 (cherry picked from commit 56f56b5cd9) Reviewed-on: http://git-master/r/1489183 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2017-04-27 11:28:27 -07:00
parent 77e2cbab23
commit 741e5c4517
7 changed files with 154 additions and 60 deletions
--- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
@@ -533,76 +533,34 @@ static int gk20a_ctrl_get_buffer_info(
 					&args->out.id, &args->out.length);
 }
 static inline u64 get_cpu_timestamp_tsc(void)
 {
 	return ((u64) get_cycles());
 }
 static inline u64 get_cpu_timestamp_jiffies(void)
 {
 	return (get_jiffies_64() - INITIAL_JIFFIES);
 }
 static inline u64 get_cpu_timestamp_timeofday(void)
 {
 	struct timeval tv;
 	do_gettimeofday(&tv);
 	return timeval_to_jiffies(&tv);
 }
 static inline int get_timestamps_zipper(struct gk20a *g,
 		u64 (*get_cpu_timestamp)(void),
 		struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
 {
 	int err = 0;
 	unsigned int i = 0;
 	if (gk20a_busy(g)) {
 		nvgpu_err(g, "GPU not powered on");
 		err = -EINVAL;
 		goto end;
 	}
 	for (i = 0; i < args->count; i++) {
 		err = g->ops.bus.read_ptimer(g, &args->samples[i].gpu_timestamp);
 		if (err)
 			return err;
 		args->samples[i].cpu_timestamp = get_cpu_timestamp();
 	}
 end:
 	gk20a_idle(g);
 	return err;
 }
 static int nvgpu_gpu_get_cpu_time_correlation_info(
 	struct gk20a *g,
 	struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
 {
-	int err = 0;
+	struct nvgpu_cpu_time_correlation_sample *samples;
-	u64 (*get_cpu_timestamp)(void) = NULL;
+	int err;
 	u32 i;
 	if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT)
 		return -EINVAL;
-	switch (args->source_id) {
+	samples = nvgpu_kzalloc(g, args->count *
-	case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC:
+		sizeof(struct nvgpu_cpu_time_correlation_sample));
-		get_cpu_timestamp = get_cpu_timestamp_tsc;
+	if (!samples) {
-		break;
+		return -ENOMEM;
 	case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_JIFFIES:
 		get_cpu_timestamp = get_cpu_timestamp_jiffies;
 		break;
 	case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TIMEOFDAY:
 		get_cpu_timestamp = get_cpu_timestamp_timeofday;
 		break;
 	default:
 		nvgpu_err(g, "invalid cpu clock source id");
 		return -EINVAL;
 	}
-	err = get_timestamps_zipper(g, get_cpu_timestamp, args);
+	err = g->ops.bus.get_timestamps_zipper(g,
 			args->source_id, args->count, samples);
 	if (!err) {
 		for (i = 0; i < args->count; i++) {
 			args->samples[i].cpu_timestamp = samples[i].cpu_timestamp;
 			args->samples[i].gpu_timestamp = samples[i].gpu_timestamp;
 		}
 	}
 	nvgpu_kfree(g, samples);
 	return err;
 }
--- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
@@ -128,6 +128,66 @@ int gk20a_read_ptimer(struct gk20a *g, u64 *value)
 	return -EBUSY;
 }
 static inline u64 get_cpu_timestamp_tsc(void)
 {
 	return ((u64) get_cycles());
 }
 static inline u64 get_cpu_timestamp_jiffies(void)
 {
 	return (get_jiffies_64() - INITIAL_JIFFIES);
 }
 static inline u64 get_cpu_timestamp_timeofday(void)
 {
 	struct timeval tv;
 	do_gettimeofday(&tv);
 	return timeval_to_jiffies(&tv);
 }
 int gk20a_get_timestamps_zipper(struct gk20a *g,
 		u32 source_id, u32 count,
 		struct nvgpu_cpu_time_correlation_sample *samples)
 {
 	int err = 0;
 	unsigned int i = 0;
 	u64 (*get_cpu_timestamp)(void) = NULL;
 	switch (source_id) {
 	case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC:
 		get_cpu_timestamp = get_cpu_timestamp_tsc;
 		break;
 	case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_JIFFIES:
 		get_cpu_timestamp = get_cpu_timestamp_jiffies;
 		break;
 	case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TIMEOFDAY:
 		get_cpu_timestamp = get_cpu_timestamp_timeofday;
 		break;
 	default:
 		nvgpu_err(g, "invalid cpu clock source id\n");
 		return -EINVAL;
 	}
 	if (gk20a_busy(g)) {
 		nvgpu_err(g, "GPU not powered on\n");
 		err = -EINVAL;
 		goto end;
 	}
 	for (i = 0; i < count; i++) {
 		err = g->ops.bus.read_ptimer(g, &samples[i].gpu_timestamp);
 		if (err)
 			return err;
 		samples[i].cpu_timestamp = get_cpu_timestamp();
 	}
 end:
 	gk20a_idle(g);
 	return err;
 }
 static int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
 {
 	u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst);
@@ -150,5 +210,6 @@ void gk20a_init_bus(struct gpu_ops *gops)
 	gops->bus.init_hw = gk20a_bus_init_hw;
 	gops->bus.isr = gk20a_bus_isr;
 	gops->bus.read_ptimer = gk20a_read_ptimer;
 	gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;
 	gops->bus.bar1_bind = gk20a_bus_bar1_bind;
 }
--- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.h
@@ -22,10 +22,19 @@ struct gk20a;
 struct gpu_ops;
 struct nvgpu_mem;
 struct nvgpu_cpu_time_correlation_sample {
 	u64 cpu_timestamp;
 	u64 gpu_timestamp;
 };
 void gk20a_init_bus(struct gpu_ops *gops);
 void gk20a_bus_isr(struct gk20a *g);
 int gk20a_read_ptimer(struct gk20a *g, u64 *value);
 void gk20a_bus_init_hw(struct gk20a *g);
 int gk20a_get_timestamps_zipper(struct gk20a *g,
 		u32 source_id, u32 count,
 		struct nvgpu_cpu_time_correlation_sample *samples);
 #endif /* GK20A_H */
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -63,6 +63,7 @@ struct nvgpu_nvhost_dev;
 #include "priv_ring_gk20a.h"
 #include "therm_gk20a.h"
 #include "gm20b/acr_gm20b.h"
 #include "gk20a/bus_gk20a.h"
 #include "cde_gk20a.h"
 #include "sched_gk20a.h"
 #ifdef CONFIG_ARCH_TEGRA_18x_SOC
@@ -905,6 +906,9 @@ struct gpu_ops {
 		void (*init_hw)(struct gk20a *g);
 		void (*isr)(struct gk20a *g);
 		int (*read_ptimer)(struct gk20a *g, u64 *value);
 		int (*get_timestamps_zipper)(struct gk20a *g,
 			u32 source_id, u32 count,
 			struct nvgpu_cpu_time_correlation_sample *);
 		int (*bar1_bind)(struct gk20a *g, struct nvgpu_mem *bar1_inst);
 	} bus;
--- a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
@@ -58,5 +58,6 @@ void gm20b_init_bus(struct gpu_ops *gops)
 	gops->bus.init_hw = gk20a_bus_init_hw;
 	gops->bus.isr = gk20a_bus_isr;
 	gops->bus.read_ptimer = gk20a_read_ptimer;
 	gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;
 	gops->bus.bar1_bind = gm20b_bus_bar1_bind;
 }
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -367,6 +367,48 @@ static int vgpu_read_ptimer(struct gk20a *g, u64 *value)
 	return err;
 }
 int vgpu_get_timestamps_zipper(struct gk20a *g,
 		u32 source_id, u32 count,
 		struct nvgpu_cpu_time_correlation_sample *samples)
 {
 	struct tegra_vgpu_cmd_msg msg = {0};
 	struct tegra_vgpu_get_timestamps_zipper_params *p =
 			&msg.params.get_timestamps_zipper;
 	int err;
 	u32 i;
 	gk20a_dbg_fn("");
 	if (count > TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT) {
 		nvgpu_err(g, "count %u overflow", count);
 		return -EINVAL;
 	}
 	if (source_id != NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC) {
 		nvgpu_err(g, "source_id %u not supported", source_id);
 		return -EINVAL;
 	}
 	msg.cmd = TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER;
 	msg.handle = vgpu_get_handle(g);
 	p->source_id = TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_SRC_ID_TSC;
 	p->count = count;
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	err = err ? err : msg.ret;
 	if (err) {
 		nvgpu_err(g, "vgpu get timestamps zipper failed, err=%d", err);
 		return err;
 	}
 	for (i = 0; i < count; i++) {
 		samples[i].cpu_timestamp = p->samples[i].cpu_timestamp;
 		samples[i].gpu_timestamp = p->samples[i].gpu_timestamp;
 	}
 	return err;
 }
 void vgpu_init_hal_common(struct gk20a *g)
 {
 	struct gpu_ops *gops = &g->ops;
@@ -384,6 +426,7 @@ void vgpu_init_hal_common(struct gk20a *g)
 #endif
 	gops->chip_init_gpu_characteristics = vgpu_init_gpu_characteristics;
 	gops->bus.read_ptimer = vgpu_read_ptimer;
 	gops->bus.get_timestamps_zipper = vgpu_get_timestamps_zipper;
 }
 static int vgpu_init_hal(struct gk20a *g)
--- a/include/linux/tegra_vgpu.h
+++ b/include/linux/tegra_vgpu.h
@@ -101,6 +101,7 @@ enum {
 	TEGRA_VGPU_CMD_RESUME_CONTEXTS = 67,
 	TEGRA_VGPU_CMD_CLEAR_SM_ERROR_STATE = 68,
 	TEGRA_VGPU_CMD_PROF_MGT = 72,
 	TEGRA_VGPU_CMD_GET_TIMESTAMPS_ZIPPER = 74,
 };
 struct tegra_vgpu_connect_params {
@@ -389,6 +390,22 @@ struct tegra_vgpu_read_ptimer_params {
 	u64 time;
 };
 #define TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT      16
 #define TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_SRC_ID_TSC     1
 struct tegra_vgpu_get_timestamps_zipper_params {
 	/* timestamp pairs */
 	struct {
 		/* gpu timestamp value */
 		u64 cpu_timestamp;
 		/* raw GPU counter (PTIMER) value */
 		u64 gpu_timestamp;
 	} samples[TEGRA_VGPU_GET_TIMESTAMPS_ZIPPER_MAX_COUNT];
 	/* number of pairs to read */
 	u32 count;
 	/* cpu clock source id */
 	u32 source_id;
 };
 struct tegra_vgpu_set_powergate_params {
 	u32 mode;
 };
@@ -518,6 +535,7 @@ struct tegra_vgpu_cmd_msg {
 		struct tegra_vgpu_suspend_resume_contexts resume_contexts;
 		struct tegra_vgpu_clear_sm_error_state clear_sm_error_state;
 		struct tegra_vgpu_prof_mgt_params prof_management;
 		struct tegra_vgpu_get_timestamps_zipper_params get_timestamps_zipper;
 		char padding[192];
 	} params;
 };