gpu: nvgpu: hal for timestamps correlation

In order to perform timestamps correlation for FECS traces, we need to collect GPU / GPU timestamps samples. In virtualization case, it is possible for a guest to get GPU timestamps by using read_ptimer. However, if the CPU timestamp is read on guest side, and the GPU timestamp is read on vm-server side, then it introduces some latency that will create an artificial offset for GPU timestamps (~2 us in average). For better CPU / GPU timestamps correlation, Added a command to collect all timestamps on vm-server side. Bug 1900475 Change-Id: Idfdc6ae4c16c501dc5e00053a5b75932c55148d6 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: http://git-master/r/1472447 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Aparna Das <aparnad@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2017-04-27 11:28:27 -07:00
parent 70f507eec7
commit 56f56b5cd9
9 changed files with 160 additions and 50 deletions
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -643,58 +643,17 @@ static inline u64 get_cpu_timestamp_timeofday(void)
 	return timeval_to_jiffies(&tv);
 }

-static inline int get_timestamps_zipper(struct gk20a *g,
-		u64 (*get_cpu_timestamp)(void),
-		struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
+int gk20a_get_timestamps_zipper(struct gk20a *g,
+		u32 source_id, u32 count,
+		struct nvgpu_cpu_time_correlation_sample *samples)
 {
 	int err = 0;
 	unsigned int i = 0;
 	u32 gpu_timestamp_hi_new = 0;
 	u32 gpu_timestamp_hi_old = 0;
-
-	if (gk20a_busy(g)) {
-		gk20a_err(dev_from_gk20a(g), "GPU not powered on\n");
-		err = -EINVAL;
-		goto end;
-	}
-
-	/* get zipper reads of gpu and cpu counter values */
-	gpu_timestamp_hi_old = gk20a_readl(g, timer_time_1_r());
-	for (i = 0; i < args->count; i++) {
-		u32 gpu_timestamp_lo = 0;
-		u32 gpu_timestamp_hi = 0;
-
-		gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
-		args->samples[i].cpu_timestamp = get_cpu_timestamp();
-		rmb(); /* maintain zipper read order */
-		gpu_timestamp_hi_new = gk20a_readl(g, timer_time_1_r());
-
-		/* pick the appropriate gpu counter hi bits */
-		gpu_timestamp_hi = (gpu_timestamp_lo & (1L << 31)) ?
-			gpu_timestamp_hi_old : gpu_timestamp_hi_new;
-
-		args->samples[i].gpu_timestamp =
-			((u64)gpu_timestamp_hi << 32) | (u64)gpu_timestamp_lo;
-
-		gpu_timestamp_hi_old = gpu_timestamp_hi_new;
-	}
-
-end:
-	gk20a_idle(g);
-	return err;
-}
-
-static int nvgpu_gpu_get_cpu_time_correlation_info(
-	struct gk20a *g,
-	struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
-{
-	int err = 0;
 	u64 (*get_cpu_timestamp)(void) = NULL;

-	if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT)
-		return -EINVAL;
-
-	switch (args->source_id) {
+	switch (source_id) {
 	case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC:
 		get_cpu_timestamp = get_cpu_timestamp_tsc;
 		break;
@@ -709,7 +668,67 @@ static int nvgpu_gpu_get_cpu_time_correlation_info(
 		return -EINVAL;
 	}

-	err = get_timestamps_zipper(g, get_cpu_timestamp, args);
+	if (gk20a_busy(g)) {
+		gk20a_err(dev_from_gk20a(g), "GPU not powered on\n");
+		err = -EINVAL;
+		goto end;
+	}
+
+	/* get zipper reads of gpu and cpu counter values */
+	gpu_timestamp_hi_old = gk20a_readl(g, timer_time_1_r());
+	for (i = 0; i < count; i++) {
+		u32 gpu_timestamp_lo = 0;
+		u32 gpu_timestamp_hi = 0;
+
+		gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
+		samples[i].cpu_timestamp = get_cpu_timestamp();
+		rmb(); /* maintain zipper read order */
+		gpu_timestamp_hi_new = gk20a_readl(g, timer_time_1_r());
+
+		/* pick the appropriate gpu counter hi bits */
+		gpu_timestamp_hi = (gpu_timestamp_lo & (1L << 31)) ?
+			gpu_timestamp_hi_old : gpu_timestamp_hi_new;
+
+		samples[i].gpu_timestamp =
+			((u64)gpu_timestamp_hi << 32) | (u64)gpu_timestamp_lo;
+
+		gpu_timestamp_hi_old = gpu_timestamp_hi_new;
+	}
+
+end:
+	gk20a_idle(g);
+	return err;
+}
+
+static int nvgpu_gpu_get_cpu_time_correlation_info(
+	struct gk20a *g,
+	struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
+{
+	struct nvgpu_cpu_time_correlation_sample *samples;
+	int err;
+	u32 i;
+
+	if (args->count >= NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT) {
+		return -EINVAL;
+	}
+
+	samples = kzalloc(args->count *
+		sizeof(struct nvgpu_cpu_time_correlation_sample), GFP_KERNEL);
+	if (!samples) {
+		return -ENOMEM;
+	}
+
+	err = g->ops.bus.get_timestamps_zipper(g,
+			args->source_id, args->count, samples);
+	if (!err) {
+		for (i = 0; i < args->count; i++) {
+			args->samples[i].cpu_timestamp = samples[i].cpu_timestamp;
+			args->samples[i].gpu_timestamp = samples[i].gpu_timestamp;
+		}
+	}
+
+	kfree(samples);
+
 	return err;
 }