gpu: nvgpu: define error_notifiers in common code

All the linux specific error_notifier codes are defined in linux specific header file <uapi/linux/nvgpu.h> and used in all the common driver But since they are defined in linux specific file, we need to move all the uses of those error_notifiers in linux specific code only Hence define new error_notifiers in include/nvgpu/error_notifier.h and use them in the common code Add new API nvgpu_error_notifier_to_channel_notifier() to convert common error_notifier of the form NVGPU_ERR_NOTIFIER_* to linux specific error notifier of the form NVGPU_CHANNEL_* Any future additions to error notifiers requires update to both the form of error notifiers Move all error notifier related metadata from channel_gk20a (common code) to linux specific structure nvgpu_channel_linux Update all accesses to this data from new structure instead of channel_gk20a Move and rename below APIs to linux specific file and declare them in error_notifier.h nvgpu_set_error_notifier_locked() nvgpu_set_error_notifier() nvgpu_is_error_notifier_set() Add below new API and use it in fifo_vgpu.c nvgpu_set_error_notifier_if_empty() Include <nvgpu/error_notifier.h> wherever new error_notifier codes are used NVGPU-426 Change-Id: Iaa5bfc150e6e9ec17d797d445c2d6407afe9f4bd Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1593361 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-23 09:57:08 +03:00 · 2017-11-06 05:44:23 -08:00
parent a0cea295e7
commit c6b9177cff
10 changed files with 285 additions and 138 deletions
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -17,6 +17,7 @@
 #include <nvgpu/enabled.h>
 #include <nvgpu/debug.h>
 #include <nvgpu/ltc.h>
+#include <nvgpu/error_notifier.h>

 /*
 * This is required for nvgpu_vm_find_buf() which is used in the tracing
@@ -37,6 +38,124 @@
 #include <trace/events/gk20a.h>
 #include <uapi/linux/nvgpu.h>

+/*
+ * API to convert error_notifiers in common code and of the form
+ * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user
+ * space and of the form  NVGPU_CHANNEL_*
+ */
+static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier)
+{
+	switch (error_notifier) {
+	case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT:
+		return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT;
+	case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD:
+		return NVGPU_CHANNEL_GR_ERROR_SW_METHOD;
+	case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY:
+		return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY;
+	case NVGPU_ERR_NOTIFIER_GR_EXCEPTION:
+		return NVGPU_CHANNEL_GR_EXCEPTION;
+	case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT:
+		return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT;
+	case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY:
+		return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY;
+	case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT:
+		return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT;
+	case NVGPU_ERR_NOTIFIER_PBDMA_ERROR:
+		return NVGPU_CHANNEL_PBDMA_ERROR;
+	case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD:
+		return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD;
+	case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR:
+		return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR;
+	case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH:
+		return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH;
+	}
+
+	pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier);
+
+	return error_notifier;
+}
+
+/**
+ * nvgpu_set_error_notifier_locked()
+ * Should be called with ch->error_notifier_mutex held
+ *
+ * error should be of the form  NVGPU_ERR_NOTIFIER_*
+ */
+void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	error = nvgpu_error_notifier_to_channel_notifier(error);
+
+	if (priv->error_notifier.dmabuf) {
+		struct nvgpu_notification *notification =
+			priv->error_notifier.notification;
+		struct timespec time_data;
+		u64 nsec;
+
+		getnstimeofday(&time_data);
+		nsec = ((u64)time_data.tv_sec) * 1000000000u +
+				(u64)time_data.tv_nsec;
+		notification->time_stamp.nanoseconds[0] =
+				(u32)nsec;
+		notification->time_stamp.nanoseconds[1] =
+				(u32)(nsec >> 32);
+		notification->info32 = error;
+		notification->status = 0xffff;
+
+		nvgpu_err(ch->g,
+		    "error notifier set to %d for ch %d", error, ch->chid);
+	}
+}
+
+/* error should be of the form  NVGPU_ERR_NOTIFIER_* */
+void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
+	nvgpu_set_error_notifier_locked(ch, error);
+	nvgpu_mutex_release(&priv->error_notifier.mutex);
+}
+
+void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
+	if (priv->error_notifier.dmabuf) {
+		struct nvgpu_notification *notification =
+			priv->error_notifier.notification;
+
+		/* Don't overwrite error flag if it is already set */
+		if (notification->status != 0xffff)
+			nvgpu_set_error_notifier_locked(ch, error);
+	}
+	nvgpu_mutex_release(&priv->error_notifier.mutex);
+}
+
+/* error_notifier should be of the form  NVGPU_ERR_NOTIFIER_* */
+bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+	bool notifier_set = false;
+
+	error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier);
+
+	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
+	if (priv->error_notifier.dmabuf) {
+		struct nvgpu_notification *notification =
+			priv->error_notifier.notification;
+		u32 err = notification->info32;
+
+		if (err == error_notifier)
+			notifier_set = true;
+	}
+	nvgpu_mutex_release(&priv->error_notifier.mutex);
+
+	return notifier_set;
+}
+
 static void gk20a_channel_update_runcb_fn(struct work_struct *work)
 {
 	struct nvgpu_channel_completion_cb *completion_cb =
@@ -128,6 +247,7 @@ static void nvgpu_channel_close_linux(struct channel_gk20a *ch)
 static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
 {
 	struct nvgpu_channel_linux *priv;
+	int err;

 	priv = nvgpu_kzalloc(g, sizeof(*priv));
 	if (!priv)
@@ -136,6 +256,12 @@ static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
 	ch->os_priv = priv;
 	priv->ch = ch;

+	err = nvgpu_mutex_init(&priv->error_notifier.mutex);
+	if (err) {
+		nvgpu_kfree(g, priv);
+		return err;
+	}
+
 	nvgpu_channel_work_completion_init(ch);

 	return 0;
@@ -143,7 +269,10 @@ static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)

 static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch)
 {
-	nvgpu_kfree(g, ch->os_priv);
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	nvgpu_mutex_destroy(&priv->error_notifier.mutex);
+	nvgpu_kfree(g, priv);
 }

 int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
--- a/drivers/gpu/nvgpu/common/linux/channel.h
+++ b/drivers/gpu/nvgpu/common/linux/channel.h
@@ -42,10 +42,20 @@ struct nvgpu_channel_completion_cb {
 	struct work_struct work;
 };

+struct nvgpu_error_notifier {
+	struct dma_buf *dmabuf;
+	void *vaddr;
+
+	struct nvgpu_notification *notification;
+
+	struct nvgpu_mutex mutex;
+};
+
 struct nvgpu_channel_linux {
 	struct channel_gk20a *ch;

 	struct nvgpu_channel_completion_cb completion_cb;
+	struct nvgpu_error_notifier error_notifier;
 };

 int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l);
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -30,6 +30,7 @@
 #include <nvgpu/list.h>
 #include <nvgpu/debug.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/error_notifier.h>

 #include "gk20a/gk20a.h"
 #include "gk20a/dbg_gpu_gk20a.h"
@@ -227,15 +228,17 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,

 static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch)
 {
-	nvgpu_mutex_acquire(&ch->error_notifier_mutex);
-	if (ch->error_notifier_ref) {
-		dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
-		dma_buf_put(ch->error_notifier_ref);
-		ch->error_notifier_ref = NULL;
-		ch->error_notifier = NULL;
-		ch->error_notifier_va = NULL;
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
+	if (priv->error_notifier.dmabuf) {
+		dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr);
+		dma_buf_put(priv->error_notifier.dmabuf);
+		priv->error_notifier.dmabuf = NULL;
+		priv->error_notifier.notification = NULL;
+		priv->error_notifier.vaddr = NULL;
 	}
-	nvgpu_mutex_release(&ch->error_notifier_mutex);
+	nvgpu_mutex_release(&priv->error_notifier.mutex);
 }

 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
@@ -244,6 +247,7 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch,
 	struct dma_buf *dmabuf;
 	void *va;
 	u64 end = args->offset + sizeof(struct nvgpu_notification);
+	struct nvgpu_channel_linux *priv = ch->os_priv;

 	if (!args->mem) {
 		pr_err("gk20a_init_error_notifier: invalid memory handle\n");
@@ -273,14 +277,15 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch,
 		return -ENOMEM;
 	}

-	ch->error_notifier = va + args->offset;
-	ch->error_notifier_va = va;
-	memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
+	priv->error_notifier.notification = va + args->offset;
+	priv->error_notifier.vaddr = va;
+	memset(priv->error_notifier.notification, 0,
+		sizeof(struct nvgpu_notification));

 	/* set channel notifiers pointer */
-	nvgpu_mutex_acquire(&ch->error_notifier_mutex);
-	ch->error_notifier_ref = dmabuf;
-	nvgpu_mutex_release(&ch->error_notifier_mutex);
+	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
+	priv->error_notifier.dmabuf = dmabuf;
+	nvgpu_mutex_release(&priv->error_notifier.mutex);

 	return 0;
 }
@@ -1361,7 +1366,7 @@ long gk20a_channel_ioctl(struct file *filp,
 			break;
 		}
 		err = ch->g->ops.fifo.force_reset_ch(ch,
-				NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR, true);
+				NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true);
 		gk20a_idle(ch->g);
 		break;
 	case NVGPU_IOCTL_CHANNEL_EVENT_ID_CTRL:
--- a/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/fifo_vgpu.c
@@ -25,10 +25,13 @@
 #include <nvgpu/atomic.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/barrier.h>
+#include <nvgpu/error_notifier.h>

 #include "vgpu.h"
 #include "fifo_vgpu.h"

+#include "common/linux/channel.h"
+
 #include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_ram_gk20a.h>

@@ -691,7 +694,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,

 		list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
 			if (gk20a_channel_get(ch_tsg)) {
-				gk20a_set_error_notifier(ch_tsg, err_code);
+				nvgpu_set_error_notifier(ch_tsg, err_code);
 				ch_tsg->has_timedout = true;
 				gk20a_channel_put(ch_tsg);
 			}
@@ -699,7 +702,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,

 		nvgpu_rwsem_up_read(&tsg->ch_list_lock);
 	} else {
-		gk20a_set_error_notifier(ch, err_code);
+		nvgpu_set_error_notifier(ch, err_code);
 		ch->has_timedout = true;
 	}

@@ -716,19 +719,14 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
 static void vgpu_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
 		struct channel_gk20a *ch)
 {
-	nvgpu_mutex_acquire(&ch->error_notifier_mutex);
-	if (ch->error_notifier_ref) {
-		if (ch->error_notifier->status == 0xffff) {
-			/* If error code is already set, this mmu fault
-			 * was triggered as part of recovery from other
-			 * error condition.
-			 * Don't overwrite error flag. */
-		} else {
-			gk20a_set_error_notifier_locked(ch,
-				NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
-		}
-	}
-	nvgpu_mutex_release(&ch->error_notifier_mutex);
+	/*
+	 * If error code is already set, this mmu fault
+	 * was triggered as part of recovery from other
+	 * error condition.
+	 * Don't overwrite error flag.
+	 */
+	nvgpu_set_error_notifier_if_empty(ch,
+		NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);

 	/* mark channel as faulted */
 	ch->has_timedout = true;
@@ -778,11 +776,11 @@ int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info)

 	switch (info->type) {
 	case TEGRA_VGPU_FIFO_INTR_PBDMA:
-		gk20a_set_error_notifier(ch, NVGPU_CHANNEL_PBDMA_ERROR);
+		nvgpu_set_error_notifier(ch, NVGPU_ERR_NOTIFIER_PBDMA_ERROR);
 		break;
 	case TEGRA_VGPU_FIFO_INTR_CTXSW_TIMEOUT:
-		gk20a_set_error_notifier(ch,
-					NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
+		nvgpu_set_error_notifier(ch,
+					NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
 		break;
 	case TEGRA_VGPU_FIFO_INTR_MMU_FAULT:
 		vgpu_fifo_set_ctx_mmu_error_ch_tsg(g, ch);
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
@@ -20,6 +20,7 @@

 #include <nvgpu/kmem.h>
 #include <nvgpu/bug.h>
+#include <nvgpu/error_notifier.h>

 #include "vgpu.h"
 #include "gr_vgpu.h"
@@ -941,31 +942,31 @@ int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info)
 		nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq);
 		break;
 	case TEGRA_VGPU_GR_INTR_SEMAPHORE_TIMEOUT:
-		gk20a_set_error_notifier(ch,
-				NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT);
+		nvgpu_set_error_notifier(ch,
+				NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT);
 		break;
 	case TEGRA_VGPU_GR_INTR_ILLEGAL_NOTIFY:
-		gk20a_set_error_notifier(ch,
-					NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY);
+		nvgpu_set_error_notifier(ch,
+					NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
 	case TEGRA_VGPU_GR_INTR_ILLEGAL_METHOD:
 		break;
 	case TEGRA_VGPU_GR_INTR_ILLEGAL_CLASS:
-		gk20a_set_error_notifier(ch,
-					NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
+		nvgpu_set_error_notifier(ch,
+					NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
 		break;
 	case TEGRA_VGPU_GR_INTR_FECS_ERROR:
 		break;
 	case TEGRA_VGPU_GR_INTR_CLASS_ERROR:
-		gk20a_set_error_notifier(ch,
-					NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
+		nvgpu_set_error_notifier(ch,
+					NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
 		break;
 	case TEGRA_VGPU_GR_INTR_FIRMWARE_METHOD:
-		gk20a_set_error_notifier(ch,
-				NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
+		nvgpu_set_error_notifier(ch,
+				NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
 		break;
 	case TEGRA_VGPU_GR_INTR_EXCEPTION:
-		gk20a_set_error_notifier(ch,
-				NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
+		nvgpu_set_error_notifier(ch,
+				NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
 		break;
 	case TEGRA_VGPU_GR_INTR_SM_EXCEPTION:
 		gk20a_dbg_gpu_post_events(ch);