gpu: nvgpu: add support for FECS VA

- On t186, ucode expects physical address to be programmed for FECS trace buffer. - On t194, ucode expects GPU VA to be programmed for FECS trace buffer. This patch adds extra support to handle this change for linux native. - Increase the size of FECS trace buffer (as few entries were getting dropped due to overflow of FECS trace buffer.) - This moves FECS trace buffer handling in global context buffer. - This adds extra check for updation of mailbox1 register. (Bug 200417403) EVLR-2077 Change-Id: I7c3324ce9341976a1375e0afe6c53c424a053723 Signed-off-by: Vaibhav Kachore <vkachore@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1536028 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Nirav Patel <nipatel@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2018-02-22 16:45:30 +05:30
parent 97d697a848
commit ca3215c6b2
8 changed files with 119 additions and 62 deletions
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -28,6 +28,7 @@

 #include <nvgpu/kmem.h>
 #include <nvgpu/dma.h>
+#include <nvgpu/enabled.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/hashtable.h>
 #include <nvgpu/circ_buf.h>
@@ -51,7 +52,7 @@
 * If HW circular buffer is getting too many "buffer full" conditions,
 * increasing this constant should help (it drives Linux' internal buffer size).
 */
-#define GK20A_FECS_TRACE_NUM_RECORDS		(1 << 6)
+#define GK20A_FECS_TRACE_NUM_RECORDS		(1 << 10)
 #define GK20A_FECS_TRACE_HASH_BITS		8 /* 2^8 */
 #define GK20A_FECS_TRACE_FRAME_PERIOD_US	(1000000ULL/60ULL)
 #define GK20A_FECS_TRACE_PTIMER_SHIFT		5
@@ -74,7 +75,6 @@ struct gk20a_fecs_trace_hash_ent {

 struct gk20a_fecs_trace {

-	struct nvgpu_mem trace_buf;
 	DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
 	struct nvgpu_mutex hash_lock;
 	struct nvgpu_mutex poll_lock;
@@ -106,10 +106,12 @@ static inline int gk20a_fecs_trace_num_ts(void)
 }

 static struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(
-	struct gk20a_fecs_trace *trace, int idx)
+	struct gk20a *g, int idx)
 {
+	struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
+
 	return (struct gk20a_fecs_trace_record *)
-		((u8 *) trace->trace_buf.cpu_va
+		((u8 *) mem->cpu_va
 		+ (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));
 }

@@ -258,12 +260,13 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
 	struct gk20a_fecs_trace *trace = g->fecs_trace;
 	pid_t cur_pid;
 	pid_t new_pid;
+	int count = 0;

 	/* for now, only one VM */
 	const int vmid = 0;

-	struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(
-		trace, index);
+	struct gk20a_fecs_trace_record *r =
+		gk20a_fecs_trace_get_record(g, index);

 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
 		"consuming record trace=%p read=%d record=%p", trace, index, r);
@@ -334,10 +337,11 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
 			continue;

 		gk20a_ctxsw_trace_write(g, &entry);
+		count++;
 	}

 	gk20a_ctxsw_trace_wake_up(g, vmid);
-	return 0;
+	return count;
 }

 int gk20a_fecs_trace_poll(struct gk20a *g)
@@ -376,15 +380,16 @@ int gk20a_fecs_trace_poll(struct gk20a *g)
 	g->ops.mm.fb_flush(g);

 	while (read != write) {
-		/* Ignore error code, as we want to consume all records */
-		(void)gk20a_fecs_trace_ring_read(g, read);
+		cnt = gk20a_fecs_trace_ring_read(g, read);
+		if (cnt <= 0)
+			break;

 		/* Get to next record. */
 		read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
 	}

 	/* ensure FECS records has been updated before incrementing read index */
-	nvgpu_smp_wmb();
+	nvgpu_wmb();
 	gk20a_fecs_trace_set_read_index(g, read);

 done:
@@ -411,20 +416,10 @@ static int gk20a_fecs_trace_periodic_polling(void *arg)
 	return 0;
 }

-static int gk20a_fecs_trace_alloc_ring(struct gk20a *g)
+size_t gk20a_fecs_trace_buffer_size(struct gk20a *g)
 {
-	struct gk20a_fecs_trace *trace = g->fecs_trace;
-
-	return nvgpu_dma_alloc_sys(g, GK20A_FECS_TRACE_NUM_RECORDS
-			* ctxsw_prog_record_timestamp_record_size_in_bytes_v(),
-			&trace->trace_buf);
-}
-
-static void gk20a_fecs_trace_free_ring(struct gk20a *g)
-{
-	struct gk20a_fecs_trace *trace = g->fecs_trace;
-
-	nvgpu_dma_free(g, &trace->trace_buf);
+	return GK20A_FECS_TRACE_NUM_RECORDS
+			* ctxsw_prog_record_timestamp_record_size_in_bytes_v();
 }

 #ifdef CONFIG_DEBUG_FS
@@ -460,8 +455,8 @@ static int gk20a_fecs_trace_debugfs_ring_seq_show(
 {
 	loff_t *pos = (loff_t *) v;
 	struct gk20a *g = *(struct gk20a **)s->private;
-	struct gk20a_fecs_trace *trace = g->fecs_trace;
-	struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(trace, *pos);
+	struct gk20a_fecs_trace_record *r =
+		gk20a_fecs_trace_get_record(g, *pos);
 	int i;
 	const u32 invalid_tag =
 	    ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
@@ -588,12 +583,6 @@ int gk20a_fecs_trace_init(struct gk20a *g)
 		goto clean_poll_lock;

 	BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
-	err = gk20a_fecs_trace_alloc_ring(g);
-	if (err) {
-		nvgpu_warn(g, "failed to allocate FECS ring");
-		goto clean_hash_lock;
-	}
-
 	hash_init(trace->pid_hash_table);

 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
@@ -604,8 +593,6 @@ int gk20a_fecs_trace_init(struct gk20a *g)

 	return 0;

-clean_hash_lock:
-	nvgpu_mutex_destroy(&trace->hash_lock);
 clean_poll_lock:
 	nvgpu_mutex_destroy(&trace->poll_lock);
 clean:
@@ -624,14 +611,14 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,

 	u32 lo;
 	u32 hi;
-	u64 pa;
+	u64 addr;
 	struct tsg_gk20a *tsg;
 	struct nvgpu_gr_ctx *ch_ctx;
 	struct gk20a_fecs_trace *trace = g->fecs_trace;
 	struct nvgpu_mem *mem;
 	u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
 	pid_t pid;
-	u32 aperture;
+	u32 aperture_mask;

 	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
 			"chid=%d context_ptr=%x inst_block=%llx",
@@ -648,34 +635,54 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
 	if (!trace)
 		return -ENOMEM;

-	pa = nvgpu_inst_block_addr(g, &trace->trace_buf);
-	if (!pa)
-		return -ENOMEM;
-	aperture = nvgpu_aperture_mask(g, &trace->trace_buf,
+	mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
+
+	if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
+		addr = ch_ctx->global_ctx_buffer_va[FECS_TRACE_BUFFER_VA];
+		nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr);
+		aperture_mask = 0;
+	} else {
+		addr = nvgpu_inst_block_addr(g, mem);
+		nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr);
+		aperture_mask = nvgpu_aperture_mask(g, mem,
 			ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
 			ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
 			ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
+	}
+	if (!addr)
+		return -ENOMEM;
+
+	lo = u64_lo32(addr);
+	hi = u64_hi32(addr);
+
+	mem = &ch_ctx->mem;

 	if (nvgpu_mem_begin(g, mem))
 		return -ENOMEM;

-	lo = u64_lo32(pa);
-	hi = u64_hi32(pa);
-
 	nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
 		lo, GK20A_FECS_TRACE_NUM_RECORDS);

+	nvgpu_mem_wr(g, mem,
+		ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
+		ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
+			GK20A_FECS_TRACE_NUM_RECORDS));
+
+	nvgpu_mem_end(g, mem);
+
+	if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA))
+		mem = &ch->ctx_header.mem;
+
+	if (nvgpu_mem_begin(g, mem))
+		return -ENOMEM;
+
 	nvgpu_mem_wr(g, mem,
 		ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
 		lo);
 	nvgpu_mem_wr(g, mem,
 		ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
 		ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) |
-		aperture);
-	nvgpu_mem_wr(g, mem,
-		ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
-		ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
-			GK20A_FECS_TRACE_NUM_RECORDS));
+		aperture_mask);

 	nvgpu_mem_end(g, mem);

@@ -728,7 +735,6 @@ int gk20a_fecs_trace_deinit(struct gk20a *g)
 		return 0;

 	nvgpu_thread_stop(&trace->poll_task);
-	gk20a_fecs_trace_free_ring(g);
 	gk20a_fecs_trace_free_hash_table(g);

 	nvgpu_mutex_destroy(&g->fecs_trace->hash_lock);
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -39,5 +39,6 @@ int gk20a_gr_max_entries(struct gk20a *g,
 int gk20a_fecs_trace_enable(struct gk20a *g);
 int gk20a_fecs_trace_disable(struct gk20a *g);
 bool gk20a_fecs_trace_is_enabled(struct gk20a *g);
+size_t gk20a_fecs_trace_buffer_size(struct gk20a *g);

 #endif /* __FECS_TRACE_GK20A_H */
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -41,6 +41,7 @@

 #include "gk20a.h"
 #include "gr_gk20a.h"
+#include "gk20a/fecs_trace_gk20a.h"
 #include "gr_ctx_gk20a.h"
 #include "gr_pri_gk20a.h"
 #include "regops_gk20a.h"
@@ -2499,6 +2500,10 @@ int gr_gk20a_init_ctx_state(struct gk20a *g)
 			return ret;
 		}
 		g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+		g->gr.ctx_vars.fecs_trace_buffer_size =
+			gk20a_fecs_trace_buffer_size(g);
+#endif
 	}

 	nvgpu_log_fn(g, "done");
@@ -2630,6 +2635,20 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
 	if (err)
 		goto clean_up;

+#ifdef CONFIG_GK20A_CTXSW_TRACE
+	nvgpu_log_info(g, "fecs_trace_buffer_size : %d",
+		   gr->ctx_vars.fecs_trace_buffer_size);
+
+	err = nvgpu_dma_alloc_sys(g,
+			gr->ctx_vars.fecs_trace_buffer_size,
+			&gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem);
+	if (err)
+		goto clean_up;
+
+	gr->global_ctx_buffer[FECS_TRACE_BUFFER].destroy =
+			 gk20a_gr_destroy_ctx_buffer;
+#endif
+
 	nvgpu_log_fn(g, "done");
 	return 0;

@@ -2769,6 +2788,21 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
 	g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP;

 	tsg->gr_ctx.global_ctx_buffer_mapped = true;
+
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+	/* FECS trace buffer */
+	if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
+		mem = &gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem;
+		gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
+				gk20a_mem_flag_none, true, mem->aperture);
+		if (!gpu_va)
+			goto clean_up;
+		g_bfr_va[FECS_TRACE_BUFFER_VA] = gpu_va;
+		g_bfr_size[FECS_TRACE_BUFFER_VA] = mem->size;
+		g_bfr_index[FECS_TRACE_BUFFER_VA] = FECS_TRACE_BUFFER;
+	}
+#endif
+
 	return 0;

 clean_up:
@@ -3050,6 +3084,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags)
 				"fail to commit gr ctx buffer");
 			goto out;
 		}
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+		if (g->ops.fecs_trace.bind_channel && !c->vpr) {
+			err = g->ops.fecs_trace.bind_channel(g, c);
+			if (err)
+				nvgpu_warn(g,
+					"fail to bind channel for ctxsw trace");
+		}
+#endif
 	}

 	nvgpu_log_fn(g, "done");
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -79,6 +79,7 @@ enum /* global_ctx_buffer */ {
 	ATTRIBUTE_VPR		= 5,
 	GOLDEN_CTX		= 6,
 	PRIV_ACCESS_MAP		= 7,
+	FECS_TRACE_BUFFER	= 8,
 	NR_GLOBAL_CTX_BUF	= 9
 };

@@ -89,6 +90,7 @@ enum  /*global_ctx_buffer_va */ {
 	ATTRIBUTE_VA		= 2,
 	GOLDEN_CTX_VA		= 3,
 	PRIV_ACCESS_MAP_VA	= 4,
+	FECS_TRACE_BUFFER_VA	= 5,
 	NR_GLOBAL_CTX_BUF_VA	= 6
 };

@@ -290,6 +292,8 @@ struct gr_gk20a {

 		u32 priv_access_map_size;

+		u32 fecs_trace_buffer_size;
+
 		struct gr_ucode_gk20a ucode;

 		struct av_list_gk20a  sw_bundle_init;
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -834,6 +834,7 @@ int gp106_init_hal(struct gk20a *g)
 	__nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, true);
 	__nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);
+	__nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false);

 	/* Read fuses to check if gpu needs to boot in secure/non-secure mode */
 	if (gops->fuse.check_priv_security(g))
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -732,6 +732,7 @@ int gp10b_init_hal(struct gk20a *g)

 	__nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true);
 	__nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false);
+	__nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false);

 	/* Read fuses to check if gpu needs to boot in secure/non-secure mode */
 	if (gops->fuse.check_priv_security(g))
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -586,20 +586,20 @@ static const struct gpu_ops gv11b_ops = {
 	},
 #ifdef CONFIG_GK20A_CTXSW_TRACE
 	.fecs_trace = {
-		.alloc_user_buffer = NULL,
-		.free_user_buffer = NULL,
-		.mmap_user_buffer = NULL,
-		.init = NULL,
-		.deinit = NULL,
-		.enable = NULL,
-		.disable = NULL,
-		.is_enabled = NULL,
-		.reset = NULL,
+		.alloc_user_buffer = gk20a_ctxsw_dev_ring_alloc,
+		.free_user_buffer = gk20a_ctxsw_dev_ring_free,
+		.mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer,
+		.init = gk20a_fecs_trace_init,
+		.deinit = gk20a_fecs_trace_deinit,
+		.enable = gk20a_fecs_trace_enable,
+		.disable = gk20a_fecs_trace_disable,
+		.is_enabled = gk20a_fecs_trace_is_enabled,
+		.reset = gk20a_fecs_trace_reset,
 		.flush = NULL,
-		.poll = NULL,
-		.bind_channel = NULL,
-		.unbind_channel = NULL,
-		.max_entries = NULL,
+		.poll = gk20a_fecs_trace_poll,
+		.bind_channel = gk20a_fecs_trace_bind_channel,
+		.unbind_channel = gk20a_fecs_trace_unbind_channel,
+		.max_entries = gk20a_gr_max_entries,
 	},
 #endif /* CONFIG_GK20A_CTXSW_TRACE */
 	.mm = {
@@ -843,6 +843,7 @@ int gv11b_init_hal(struct gk20a *g)
 	}

 	__nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
+	__nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, true);
 	g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;

 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -34,6 +34,7 @@ struct gk20a;
 #define NVGPU_IS_FMODEL				1
 #define NVGPU_DRIVER_IS_DYING			2
 #define NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP 3
+#define NVGPU_FECS_TRACE_VA			4

 /*
 * ECC flags