From 38bcba3190a7a8a1f59a858b926364cb3dd8e1fa Mon Sep 17 00:00:00 2001
From: Thomas Fleury <tfleury@nvidia.com>
Date: Tue, 26 Nov 2019 10:32:20 -0500
Subject: [PATCH] gpu: nvgpu: unit: add coverage for tsg

Add/improve coverage for the following functions:
- nvgpu_tsg_open
- nvgpu_tsg_release
- nvgpu_tsg_bind
- nvgpu_tsg_unbind
- nvgpu_tsg_mark_error
- nvgpu_tsg_set_ctx_mmu_error
- nvgpu_tsg_reset_faulted_eng_pbdma

Update list of required tests in JSON file.

Jira NVGPU-4387

Change-Id: Ic389c91d8cf98ba5dca312a4a3a96e0c6d1c6b97
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2248161
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/libnvgpu-drv_safe.export |   5 +
 userspace/units/fifo/tsg/nvgpu-tsg.c       | 543 +++++++++++++++------
 userspace/units/fifo/tsg/nvgpu-tsg.h       | 114 ++++-
 3 files changed, 496 insertions(+), 166 deletions(-)

diff --git a/drivers/gpu/nvgpu/libnvgpu-drv_safe.export b/drivers/gpu/nvgpu/libnvgpu-drv_safe.export
index 4748a9112..60f736890 100644
--- a/drivers/gpu/nvgpu/libnvgpu-drv_safe.export
+++ b/drivers/gpu/nvgpu/libnvgpu-drv_safe.export
@@ -339,6 +339,7 @@ nvgpu_channel_open_new
 nvgpu_channel_put__func
 nvgpu_channel_setup_bind
 nvgpu_channel_refch_from_inst_ptr
+nvgpu_channel_set_unserviceable
 nvgpu_channel_setup_sw
 nvgpu_channel_sync_create
 nvgpu_channel_sync_destroy
@@ -545,8 +546,12 @@ nvgpu_tsg_bind_channel
 nvgpu_tsg_check_and_get_from_id
 nvgpu_tsg_cleanup_sw
 nvgpu_tsg_default_timeslice_us
+nvgpu_tsg_mark_error
 nvgpu_tsg_open
 nvgpu_tsg_release
+nvgpu_tsg_reset_faulted_eng_pbdma
+nvgpu_tsg_set_ctx_mmu_error
+nvgpu_tsg_set_error_notifier
 nvgpu_tsg_setup_sw
 nvgpu_tsg_unbind_channel
 nvgpu_tsg_unbind_channel_check_hw_state
diff --git a/userspace/units/fifo/tsg/nvgpu-tsg.c b/userspace/units/fifo/tsg/nvgpu-tsg.c
index b80a58448..b8ca1706a 100644
--- a/userspace/units/fifo/tsg/nvgpu-tsg.c
+++ b/userspace/units/fifo/tsg/nvgpu-tsg.c
@@ -28,6 +28,7 @@
 #include <unit/unit.h>
 
 #include <nvgpu/channel.h>
+#include <nvgpu/error_notifier.h>
 #include <nvgpu/tsg.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/fifo/userd.h>
@@ -38,6 +39,7 @@
 
 #include "common/gr/ctx_priv.h"
 #include <nvgpu/posix/posix-fault-injection.h>
+#include <nvgpu/posix/posix-channel.h>
 
 #include "hal/fifo/tsg_gk20a.h"
 
@@ -92,80 +94,37 @@ static void subtest_setup(u32 branches)
 	}
 }
 
-static int branches_strn(char *dst, size_t size,
-		const char *labels[], u32 branches)
-{
-	int i;
-	char *p = dst;
-	int len;
-
-	for (i = 0; i < 32; i++) {
-		if (branches & BIT(i)) {
-			len = snprintf(p, size, "%s ", labels[i]);
-			size -= len;
-			p += len;
-		}
-	}
-
-	return (p - dst);
-}
-
-/* not MT-safe */
-static char *branches_str(u32 branches, const char *labels[])
-{
-	static char buf[256];
-
-	memset(buf, 0, sizeof(buf));
-	branches_strn(buf, sizeof(buf), labels, branches);
-	return buf;
-}
-
-/*
- * If taken, some branches are final, e.g. the function exits.
- * There is no need to test subsequent branches combinations,
- * if one final branch is taken.
- *
- * We want to skip the subtest if:
- * - it has at least one final branch
- * - it is supposed to test some branches after this final branch
- *
- * Parameters:
- * branches		bitmask of branches to be taken for one subtest
- * final_branches	bitmask of final branches
- *
- * Note: the assumption is that branches are numbered in their
- * order of appearance in the function to be tested.
- */
-static bool pruned(u32 branches, u32 final_branches)
-{
-	u32 match = branches & final_branches;
-	int bit;
-
-	/* Does the subtest have one final branch ? */
-	if (match == 0U) {
-		return false;
-	}
-	bit = nvgpu_ffs(match) - 1;
-
-	/*
-	 * Skip the test if it attempts to test some branches
-	 * after this final branch.
-	 */
-	return (branches > BIT(bit));
-}
+#define pruned	test_fifo_subtest_pruned
+#define branches_str test_fifo_flags_str
 
 #define F_TSG_OPEN_ACQUIRE_CH_FAIL		BIT(0)
 #define F_TSG_OPEN_SM_FAIL			BIT(1)
 #define F_TSG_OPEN_ALLOC_SM_FAIL		BIT(2)
 #define F_TSG_OPEN_ALLOC_SM_KZALLOC_FAIL	BIT(3)
-#define F_TSG_OPEN_LAST				BIT(4)
+#define F_TSG_OPEN_ALLOC_GR_FAIL		BIT(4)
+#define F_TSG_OPEN_NO_INIT_BUF			BIT(5)
+#define F_TSG_OPEN_INIT_BUF_FAIL		BIT(6)
+#define F_TSG_OPEN_NO_OPEN_HAL			BIT(7)
+#define F_TSG_OPEN_OPEN_HAL_FAIL		BIT(8)
+#define F_TSG_OPEN_LAST				BIT(9)
 
-static const char *f_tsg_open[] = {
-	"acquire_ch_fail",
-	"sm_fail",
-	"alloc_sm_fail",
-	"alloc_sm_kzalloc_fail",
-};
+static int stub_tsg_init_eng_method_buffers(struct gk20a *g,
+	struct nvgpu_tsg *tsg)
+{
+	if (unit_ctx.branches & F_TSG_OPEN_INIT_BUF_FAIL) {
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+
+static int stub_tsg_open(struct nvgpu_tsg *tsg)
+{
+	if (unit_ctx.branches & F_TSG_OPEN_OPEN_HAL_FAIL) {
+		return -EINVAL;
+	}
+	return 0;
+}
 
 static u32 stub_gr_init_get_no_of_sm_0(struct gk20a *g)
 {
@@ -186,9 +145,23 @@ int test_tsg_open(struct unit_module *m,
 	u32 fail = F_TSG_OPEN_ACQUIRE_CH_FAIL |
 		   F_TSG_OPEN_SM_FAIL |
 		   F_TSG_OPEN_ALLOC_SM_FAIL |
-		   F_TSG_OPEN_ALLOC_SM_KZALLOC_FAIL;
+		   F_TSG_OPEN_ALLOC_SM_KZALLOC_FAIL |
+		   F_TSG_OPEN_ALLOC_GR_FAIL |
+		   F_TSG_OPEN_INIT_BUF_FAIL |
+		   F_TSG_OPEN_OPEN_HAL_FAIL;
 	u32 prune = fail;
 	u32 tsgid;
+	const char *labels[] = {
+		"acquire_ch_fail",
+		"sm_fail",
+		"alloc_sm_fail",
+		"alloc_sm_kzalloc_fail",
+		"alloc_gr_fail",
+		"no_init_buf",
+		"init_buf_fail",
+		"no_open_hal",
+		"open_hal_fail"
+	};
 
 	kmem_fi = nvgpu_kmem_get_fault_injection();
 
@@ -196,11 +169,11 @@ int test_tsg_open(struct unit_module *m,
 
 		if (pruned(branches, prune)) {
 			unit_verbose(m, "%s branches=%s (pruned)\n", __func__,
-				branches_str(branches, f_tsg_open));
+				branches_str(branches, labels));
 			continue;
 		}
 		unit_verbose(m, "%s branches=%s\n", __func__,
-			branches_str(branches, f_tsg_open));
+			branches_str(branches, labels));
 		subtest_setup(branches);
 
 		/* find next tsg (if acquire succeeds) */
@@ -226,9 +199,23 @@ int test_tsg_open(struct unit_module *m,
 			branches & F_TSG_OPEN_ALLOC_SM_FAIL ?
 			(void*)1 : NULL;
 
-		nvgpu_posix_enable_fault_injection(kmem_fi,
-			branches & F_TSG_OPEN_ALLOC_SM_KZALLOC_FAIL ?
-				true : false, 0);
+		g->ops.tsg.init_eng_method_buffers =
+			branches & F_TSG_OPEN_NO_INIT_BUF ?
+			NULL : stub_tsg_init_eng_method_buffers;
+
+		g->ops.tsg.open =
+			branches & F_TSG_OPEN_NO_OPEN_HAL ?
+			NULL : stub_tsg_open;
+
+		nvgpu_posix_enable_fault_injection(kmem_fi, false, 0);
+
+		if (branches & F_TSG_OPEN_ALLOC_SM_KZALLOC_FAIL) {
+			nvgpu_posix_enable_fault_injection(kmem_fi, true, 0);
+		}
+
+		if (branches & F_TSG_OPEN_ALLOC_GR_FAIL) {
+			nvgpu_posix_enable_fault_injection(kmem_fi, true, 1);
+		}
 
 		tsg = nvgpu_tsg_open(g, getpid());
 
@@ -248,7 +235,7 @@ int test_tsg_open(struct unit_module *m,
 done:
 	if (ret != UNIT_SUCCESS) {
 		unit_err(m, "%s branches=%s\n", __func__,
-			branches_str(branches, f_tsg_open));
+			branches_str(branches, labels));
 	}
 	nvgpu_posix_enable_fault_injection(kmem_fi, false, 0);
 	if (tsg != NULL) {
@@ -263,8 +250,9 @@ done:
 #define F_TSG_BIND_CHANNEL_RL_MISMATCH		BIT(1)
 #define F_TSG_BIND_CHANNEL_ACTIVE		BIT(2)
 #define F_TSG_BIND_CHANNEL_BIND_HAL		BIT(3)
-#define F_TSG_BIND_CHANNEL_ENG_METHOD_BUFFER	BIT(3)
-#define F_TSG_BIND_CHANNEL_LAST			BIT(4)
+#define F_TSG_BIND_CHANNEL_BIND_HAL_ERR		BIT(4)
+#define F_TSG_BIND_CHANNEL_ENG_METHOD_BUFFER	BIT(5)
+#define F_TSG_BIND_CHANNEL_LAST			BIT(6)
 
 static const char *f_tsg_bind[] = {
 	"ch_bound",
@@ -274,6 +262,16 @@ static const char *f_tsg_bind[] = {
 	"eng_method_buffer",
 };
 
+static int stub_tsg_bind_channel(struct nvgpu_tsg *tsg,
+			struct nvgpu_channel *ch)
+{
+	if (unit_ctx.branches & F_TSG_BIND_CHANNEL_BIND_HAL_ERR) {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 int test_tsg_bind_channel(struct unit_module *m,
 		struct gk20a *g, void *args)
 {
@@ -288,9 +286,11 @@ int test_tsg_bind_channel(struct unit_module *m,
 	u32 branches = 0U;
 	int ret = UNIT_FAIL;
 	int err;
-	u32 prune = F_TSG_BIND_CHANNEL_CH_BOUND |
+	u32 fail = F_TSG_BIND_CHANNEL_CH_BOUND |
 		    F_TSG_BIND_CHANNEL_RL_MISMATCH |
-		    F_TSG_BIND_CHANNEL_ACTIVE;
+		    F_TSG_BIND_CHANNEL_ACTIVE |
+		    F_TSG_BIND_CHANNEL_BIND_HAL_ERR;
+	u32 prune = fail;
 
 	tsg = nvgpu_tsg_open(g, getpid());
 	assert(tsg != NULL);
@@ -334,9 +334,12 @@ int test_tsg_bind_channel(struct unit_module *m,
 			nvgpu_clear_bit(ch->chid, runlist->active_channels);
 		}
 
-		g->ops.tsg.bind_channel =
-			branches & F_TSG_BIND_CHANNEL_BIND_HAL ?
-			gops.tsg.bind_channel : NULL;
+		if ((branches & F_TSG_BIND_CHANNEL_BIND_HAL) ||
+		    (branches & F_TSG_BIND_CHANNEL_BIND_HAL_ERR)) {
+			g->ops.tsg.bind_channel = stub_tsg_bind_channel;
+		} else {
+			g->ops.tsg.bind_channel = NULL;
+		}
 
 		g->ops.tsg.bind_channel_eng_method_buffers =
 			branches & F_TSG_BIND_CHANNEL_ENG_METHOD_BUFFER ?
@@ -347,9 +350,7 @@ int test_tsg_bind_channel(struct unit_module *m,
 
 		err = nvgpu_tsg_bind_channel(tsg, ch);
 
-		if (branches & (F_TSG_BIND_CHANNEL_CH_BOUND|
-				F_TSG_BIND_CHANNEL_RL_MISMATCH|
-				F_TSG_BIND_CHANNEL_ACTIVE)) {
+		if (branches & fail) {
 			assert(err != 0);
 		} else {
 			assert(err == 0);
@@ -381,27 +382,15 @@ done:
 	return ret;
 }
 
-#define F_TSG_UNBIND_CHANNEL_UNSERVICEABLE		BIT(0)
-#define F_TSG_UNBIND_CHANNEL_PREEMPT_TSG_FAIL		BIT(1)
-#define F_TSG_UNBIND_CHANNEL_CHECK_HW_STATE_FAIL	BIT(2)
-#define F_TSG_UNBIND_CHANNEL_RUNLIST_UPDATE_FAIL	BIT(3)
-#define F_TSG_UNBIND_CHANNEL_UNBIND_HAL			BIT(4)
-#define F_TSG_UNBIND_CHANNEL_ABORT_RUNLIST_UPDATE_FAIL	BIT(5)
-#define F_TSG_UNBIND_CHANNEL_LAST			BIT(6)
-
-#define F_TSG_UNBIND_CHANNEL_COMMON_FAIL_MASK		\
-	(F_TSG_UNBIND_CHANNEL_PREEMPT_TSG_FAIL|		\
-	 F_TSG_UNBIND_CHANNEL_CHECK_HW_STATE_FAIL|	\
-	 F_TSG_UNBIND_CHANNEL_RUNLIST_UPDATE_FAIL)
-
-static const char *f_tsg_unbind[] = {
-	"ch_timedout",
-	"preempt_tsg_fail",
-	"check_hw_state_fail",
-	"runlist_update_fail",
-	"unbind_hal",
-	"abort_runlist_update_fail",
-};
+#define F_TSG_UNBIND_CHANNEL_ABORT_RUNLIST_UPDATE_FAIL	BIT(0)
+#define F_TSG_UNBIND_CHANNEL_UNSERVICEABLE		BIT(1)
+#define F_TSG_UNBIND_CHANNEL_PREEMPT_TSG_FAIL		BIT(2)
+#define F_TSG_UNBIND_CHANNEL_CHECK_HW_STATE		BIT(3)
+#define F_TSG_UNBIND_CHANNEL_CHECK_HW_STATE_FAIL	BIT(4)
+#define F_TSG_UNBIND_CHANNEL_RUNLIST_UPDATE_FAIL	BIT(5)
+#define F_TSG_UNBIND_CHANNEL_UNBIND_HAL			BIT(6)
+#define F_TSG_UNBIND_CHANNEL_UNBIND_HAL_FAIL		BIT(7)
+#define F_TSG_UNBIND_CHANNEL_LAST			BIT(8)
 
 static int stub_fifo_preempt_tsg_EINVAL(
 		struct gk20a *g, struct nvgpu_tsg *tsg)
@@ -409,16 +398,20 @@ static int stub_fifo_preempt_tsg_EINVAL(
 	return -EINVAL;
 }
 
-static int stub_tsg_unbind_channel_check_hw_state_EINVAL(
+static int stub_tsg_unbind_channel_check_hw_state(
 		struct nvgpu_tsg *tsg, struct nvgpu_channel *ch)
 {
-	return -EINVAL;
+	if (unit_ctx.branches & F_TSG_UNBIND_CHANNEL_CHECK_HW_STATE_FAIL) {
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 static int stub_tsg_unbind_channel(struct nvgpu_tsg *tsg,
 		struct nvgpu_channel *ch)
 {
-	if (ch->tsgid != tsg->tsgid) {
+	if (unit_ctx.branches & F_TSG_UNBIND_CHANNEL_UNBIND_HAL_FAIL) {
 		return -EINVAL;
 	}
 
@@ -441,30 +434,19 @@ static int stub_runlist_update_for_channel_EINVAL(
 	return 0;
 }
 
-static bool unbind_pruned(u32 branches)
+static bool unbind_pruned(u32 branches, u32 fail)
 {
 	u32 branches_init = branches;
 
-	if (branches & F_TSG_UNBIND_CHANNEL_PREEMPT_TSG_FAIL) {
-		branches &= ~F_TSG_UNBIND_CHANNEL_COMMON_FAIL_MASK;
-	}
-
-	if (branches & F_TSG_UNBIND_CHANNEL_UNSERVICEABLE) {
-		branches &= ~F_TSG_UNBIND_CHANNEL_CHECK_HW_STATE_FAIL;
-	}
-
-	if (branches & F_TSG_UNBIND_CHANNEL_CHECK_HW_STATE_FAIL) {
-		branches &= ~F_TSG_UNBIND_CHANNEL_RUNLIST_UPDATE_FAIL;
-	}
-
-	if (!(branches & F_TSG_UNBIND_CHANNEL_COMMON_FAIL_MASK)) {
+	if ((branches & fail) == 0) {
 		branches &= ~F_TSG_UNBIND_CHANNEL_ABORT_RUNLIST_UPDATE_FAIL;
 	}
 
 	if (branches < branches_init) {
 		return true;
 	}
-	return false;
+
+	return pruned(branches, fail);
 }
 
 int test_tsg_unbind_channel(struct unit_module *m,
@@ -477,17 +459,32 @@ int test_tsg_unbind_channel(struct unit_module *m,
 	u32 branches = 0U;
 	int ret = UNIT_FAIL;
 	int err;
+	const char *labels[] = {
+		"abort_runlist_update_fail",
+		"unserviceable",
+		"preempt_tsg_fail",
+		"check_hw_state",
+		"check_hw_state_fail",
+		"runlist_update_fail",
+		"unbind_hal",
+		"unbind_hal_fail",
+	};
+	u32 fail =
+		F_TSG_UNBIND_CHANNEL_PREEMPT_TSG_FAIL |
+		F_TSG_UNBIND_CHANNEL_CHECK_HW_STATE_FAIL |
+		F_TSG_UNBIND_CHANNEL_RUNLIST_UPDATE_FAIL |
+		F_TSG_UNBIND_CHANNEL_UNBIND_HAL_FAIL;
 
-	for (branches = 0U; branches < F_TSG_BIND_CHANNEL_LAST; branches++) {
+	for (branches = 0U; branches < F_TSG_UNBIND_CHANNEL_LAST; branches++) {
 
-		if (unbind_pruned(branches)) {
+		if (unbind_pruned(branches, fail)) {
 			unit_verbose(m, "%s branches=%s (pruned)\n", __func__,
-				branches_str(branches, f_tsg_unbind));
+				branches_str(branches, labels));
 			continue;
 		}
 		subtest_setup(branches);
 		unit_verbose(m, "%s branches=%s\n", __func__,
-			branches_str(branches, f_tsg_unbind));
+			branches_str(branches, labels));
 
 		/*
 		 * tsg unbind tears down TSG in case of failure:
@@ -517,10 +514,13 @@ int test_tsg_unbind_channel(struct unit_module *m,
 			stub_fifo_preempt_tsg_EINVAL :
 			gops.fifo.preempt_tsg;
 
-		g->ops.tsg.unbind_channel_check_hw_state =
-			branches & F_TSG_UNBIND_CHANNEL_CHECK_HW_STATE_FAIL ?
-			stub_tsg_unbind_channel_check_hw_state_EINVAL :
-			gops.tsg.unbind_channel_check_hw_state;
+		if ((branches & F_TSG_UNBIND_CHANNEL_CHECK_HW_STATE) ||
+		    (branches & F_TSG_UNBIND_CHANNEL_CHECK_HW_STATE_FAIL)) {
+			g->ops.tsg.unbind_channel_check_hw_state =
+				stub_tsg_unbind_channel_check_hw_state;
+		} else {
+			g->ops.tsg.unbind_channel_check_hw_state = NULL;
+		}
 
 		g->ops.runlist.update_for_channel =
 			branches & F_TSG_UNBIND_CHANNEL_RUNLIST_UPDATE_FAIL ?
@@ -533,14 +533,18 @@ int test_tsg_unbind_channel(struct unit_module *m,
 				stub_runlist_update_for_channel_EINVAL;
 		}
 
-		g->ops.tsg.unbind_channel =
-			branches & F_TSG_UNBIND_CHANNEL_UNBIND_HAL ?
-			stub_tsg_unbind_channel : NULL;
+		if ((branches & F_TSG_UNBIND_CHANNEL_UNBIND_HAL) ||
+		    (branches & F_TSG_UNBIND_CHANNEL_UNBIND_HAL_FAIL)) {
+			g->ops.tsg.unbind_channel = stub_tsg_unbind_channel;
+		} else {
+			g->ops.tsg.unbind_channel = NULL;
+		}
 
-		(void) nvgpu_tsg_unbind_channel(tsg, chA);
+		err = nvgpu_tsg_unbind_channel(tsg, chA);
 
-		if (branches & F_TSG_UNBIND_CHANNEL_COMMON_FAIL_MASK) {
+		if (branches & fail) {
 			/* check that TSG has been torn down */
+			assert(err != 0);
 			assert(chA->unserviceable);
 			assert(chB->unserviceable);
 			assert(chA->tsgid == NVGPU_INVALID_TSG_ID);
@@ -566,7 +570,7 @@ int test_tsg_unbind_channel(struct unit_module *m,
 done:
 	if (ret == UNIT_FAIL) {
 		unit_err(m, "%s branches=%s\n", __func__,
-			branches_str(branches, f_tsg_unbind));
+			branches_str(branches, labels));
 	}
 	if (chA != NULL) {
 		nvgpu_channel_close(chA);
@@ -581,22 +585,19 @@ done:
 	return ret;
 }
 
-#define F_TSG_RELEASE_GR_CTX		BIT(0)
-#define F_TSG_RELEASE_MEM		BIT(1)
-#define F_TSG_RELEASE_VM		BIT(2)
-#define F_TSG_RELEASE_UNHOOK_EVENTS	BIT(3)
-#define F_TSG_RELEASE_ENG_BUFS		BIT(4)
-#define F_TSG_RELEASE_SM_ERR_STATES	BIT(5)
-#define F_TSG_RELEASE_LAST		BIT(6)
+#define F_TSG_RELEASE_NO_RELEASE_HAL	BIT(0)
+#define F_TSG_RELEASE_GR_CTX		BIT(1)
+#define F_TSG_RELEASE_MEM		BIT(2)
+#define F_TSG_RELEASE_VM		BIT(3)
+#define F_TSG_RELEASE_UNHOOK_EVENTS	BIT(4)
+#define F_TSG_RELEASE_ENG_BUFS		BIT(5)
+#define F_TSG_RELEASE_SM_ERR_STATES	BIT(6)
+#define F_TSG_RELEASE_LAST		BIT(7)
 
-static const char *f_tsg_release[] = {
-	"gr_ctx",
-	"mem",
-	"vm",
-	"unhook_events",
-	"eng_bufs",
-	"sm_err_states"
-};
+
+static void stub_tsg_release(struct nvgpu_tsg *tsg)
+{
+}
 
 static void stub_tsg_deinit_eng_method_buffers(struct gk20a *g,
 		struct nvgpu_tsg *tsg)
@@ -626,24 +627,37 @@ int test_tsg_release(struct unit_module *m,
 	struct nvgpu_mem mem;
 	u32 free_gr_ctx_mask =
 		F_TSG_RELEASE_GR_CTX|F_TSG_RELEASE_MEM|F_TSG_RELEASE_VM;
+	const char *labels[] = {
+		"no_release_hal",
+		"gr_ctx",
+		"mem",
+		"vm",
+		"unhook_events",
+		"eng_bufs",
+		"sm_err_states"
+	};
 
 	for (branches = 0U; branches < F_TSG_RELEASE_LAST; branches++) {
 
 		if (!(branches & F_TSG_RELEASE_GR_CTX) &&
 				(branches & F_TSG_RELEASE_MEM)) {
 			unit_verbose(m, "%s branches=%s (pruned)\n", __func__,
-				branches_str(branches, f_tsg_release));
+				branches_str(branches, labels));
 			continue;
 		}
 		subtest_setup(branches);
 		unit_verbose(m, "%s branches=%s\n", __func__,
-			branches_str(branches, f_tsg_release));
+			branches_str(branches, labels));
 
 		tsg = nvgpu_tsg_open(g, getpid());
 		assert(tsg != NULL);
 		assert(tsg->gr_ctx != NULL);
 		assert(tsg->gr_ctx->mem.aperture == APERTURE_INVALID);
 
+		g->ops.tsg.release =
+			branches & F_TSG_RELEASE_NO_RELEASE_HAL ?
+			NULL : stub_tsg_release;
+
 		if (!(branches & F_TSG_RELEASE_GR_CTX)) {
 			nvgpu_free_gr_ctx_struct(g, tsg->gr_ctx);
 			tsg->gr_ctx = NULL;
@@ -721,7 +735,7 @@ int test_tsg_release(struct unit_module *m,
 done:
 	if (ret != UNIT_SUCCESS) {
 		unit_err(m, "%s branches=%s\n", __func__,
-			branches_str(branches, f_tsg_release));
+			branches_str(branches, labels));
 	}
 	g->ops = gops;
 	return ret;
@@ -1231,6 +1245,220 @@ done:
 	return ret;
 }
 
+#define F_TSG_MARK_ERROR_NO_CHANNEL		BIT(0)
+#define F_TSG_MARK_ERROR_UNSERVICEABLE		BIT(1)
+#define F_TSG_MARK_ERROR_VERBOSE		BIT(2)
+#define F_TSG_MARK_ERROR_LAST			BIT(3)
+
+int test_tsg_mark_error(struct unit_module *m,
+		struct gk20a *g, void *args)
+{
+	struct gpu_ops gops = g->ops;
+	struct nvgpu_tsg *tsg = NULL;
+	struct nvgpu_channel *ch = NULL;
+	int ret = UNIT_FAIL;
+	bool verbose;
+	int err;
+	u32 branches;
+	static const char *labels[] = {
+		"no_channel",
+		"unserviceable",
+		"verbose",
+	};
+	u32 prune =
+		F_TSG_MARK_ERROR_NO_CHANNEL |
+		F_TSG_MARK_ERROR_UNSERVICEABLE;
+	struct nvgpu_posix_channel ch_priv;
+
+	for (branches = 0U; branches < F_TSG_MARK_ERROR_LAST; branches++) {
+
+		if (pruned(branches, prune)) {
+			unit_verbose(m, "%s branches=%s (pruned)\n", __func__,
+				branches_str(branches, labels));
+			continue;
+		}
+		subtest_setup(branches);
+		unit_verbose(m, "%s branches=%s\n", __func__,
+			branches_str(branches, labels));
+
+		tsg = nvgpu_tsg_open(g, getpid());
+		assert(tsg != NULL);
+
+		ch = nvgpu_channel_open_new(g, ~0U, false, getpid(), getpid());
+		assert(ch != NULL);
+
+		ch->os_priv = &ch_priv;
+		ch_priv.err_notifier.error = U32_MAX;
+		ch_priv.err_notifier.status = 0;
+
+		if ((branches & F_TSG_MARK_ERROR_NO_CHANNEL) == 0) {
+			err = nvgpu_tsg_bind_channel(tsg, ch);
+			assert(err == 0);
+		}
+
+		if (branches & F_TSG_MARK_ERROR_UNSERVICEABLE) {
+			nvgpu_channel_set_unserviceable(ch);
+		}
+
+		ch->ctxsw_timeout_debug_dump =
+			branches & F_TSG_MARK_ERROR_VERBOSE ? true : false;
+
+		nvgpu_tsg_set_error_notifier(g, tsg,
+			NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
+
+		verbose = nvgpu_tsg_mark_error(g, tsg);
+
+		if ((branches & F_TSG_MARK_ERROR_NO_CHANNEL) ||
+		    (branches & F_TSG_MARK_ERROR_UNSERVICEABLE)) {
+			assert(!verbose);
+		}
+
+		if (branches & F_TSG_MARK_ERROR_VERBOSE) {
+			assert(verbose);
+		} else {
+			assert(!verbose);
+		}
+
+		nvgpu_channel_close(ch);
+		ch = NULL;
+		nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release);
+		tsg = NULL;
+	}
+
+	ret = UNIT_SUCCESS;
+done:
+	if (ret != UNIT_SUCCESS) {
+		unit_err(m, "%s branches=%s\n", __func__,
+			branches_str(branches, labels));
+	}
+	if (ch != NULL) {
+		nvgpu_channel_close(ch);
+	}
+	if (tsg != NULL) {
+		nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release);
+	}
+	g->ops = gops;
+	return ret;
+}
+
+int test_tsg_set_ctx_mmu_error(struct unit_module *m,
+		struct gk20a *g, void *args)
+{
+	struct nvgpu_tsg *tsg = NULL;
+	struct nvgpu_channel *ch = NULL;
+	int ret = UNIT_FAIL;
+	struct nvgpu_posix_channel ch_priv;
+	int err;
+
+	tsg = nvgpu_tsg_open(g, getpid());
+	assert(tsg != NULL);
+
+	ch = nvgpu_channel_open_new(g, ~0U, false, getpid(), getpid());
+	assert(ch != NULL);
+
+	err = nvgpu_tsg_bind_channel(tsg, ch);
+	assert(err == 0);
+
+	ch->os_priv = &ch_priv;
+	ch_priv.err_notifier.error = U32_MAX;
+	ch_priv.err_notifier.status = 0;
+
+	nvgpu_tsg_set_ctx_mmu_error(g, tsg);
+
+	assert(ch_priv.err_notifier.error == NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
+	assert(ch_priv.err_notifier.status != 0);
+
+	ret = UNIT_SUCCESS;
+done:
+	if (ch != NULL) {
+		nvgpu_channel_close(ch);
+	}
+	if (tsg != NULL) {
+		nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release);
+	}
+	return ret;
+}
+
+#define F_TSG_RESET_FAULTED_NO_RESET_HAL		BIT(0)
+#define F_TSG_RESET_FAULTED_TSG_NULL			BIT(1)
+#define F_TSG_RESET_FAULTED_LAST			BIT(2)
+
+static void stub_channel_reset_faulted(struct gk20a *g, struct nvgpu_channel *ch,
+		bool eng, bool pbdma)
+{
+	stub[0].name = __func__;
+	stub[0].chid = ch->chid;
+}
+
+int test_tsg_reset_faulted_eng_pbdma(struct unit_module *m,
+		struct gk20a *g, void *args)
+{
+	struct nvgpu_tsg *tsg = NULL;
+	struct nvgpu_channel *ch = NULL;
+	int ret = UNIT_FAIL;
+	struct gpu_ops gops = g->ops;
+	int err;
+	u32 branches;
+	static const char *labels[] = {
+		"no_reset_hal",
+		"tsg_null"
+	};
+	u32 fail =
+		F_TSG_RESET_FAULTED_NO_RESET_HAL |
+		F_TSG_RESET_FAULTED_TSG_NULL;
+	u32 prune = fail;
+
+	tsg = nvgpu_tsg_open(g, getpid());
+	assert(tsg != NULL);
+
+	ch = nvgpu_channel_open_new(g, ~0U, false, getpid(), getpid());
+	assert(ch != NULL);
+
+	err = nvgpu_tsg_bind_channel(tsg, ch);
+	assert(err == 0);
+
+	for (branches = 0U; branches < F_TSG_MARK_ERROR_LAST; branches++) {
+
+		if (pruned(branches, prune)) {
+			unit_verbose(m, "%s branches=%s (pruned)\n", __func__,
+				branches_str(branches, labels));
+			continue;
+		}
+		subtest_setup(branches);
+		unit_verbose(m, "%s branches=%s\n", __func__,
+			branches_str(branches, labels));
+
+		g->ops.channel.reset_faulted =
+			branches & F_TSG_RESET_FAULTED_NO_RESET_HAL ?
+			NULL : stub_channel_reset_faulted;
+
+		if (branches & F_TSG_RESET_FAULTED_TSG_NULL) {
+			nvgpu_tsg_reset_faulted_eng_pbdma(g, NULL, true, true);
+		} else {
+			nvgpu_tsg_reset_faulted_eng_pbdma(g, tsg, true, true);
+		}
+
+		if (branches & fail) {
+			assert(stub[0].chid != ch->chid);
+		} else {
+			assert(stub[0].chid == ch->chid);
+		}
+	}
+
+	ret = UNIT_SUCCESS;
+done:
+	if (ch != NULL) {
+		nvgpu_channel_close(ch);
+	}
+	if (tsg != NULL) {
+		nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release);
+	}
+	g->ops = gops;
+	return ret;
+}
+
+
+
 struct unit_module_test nvgpu_tsg_tests[] = {
 	UNIT_TEST(setup_sw, test_tsg_setup_sw, &unit_ctx, 0),
 	UNIT_TEST(init_support, test_fifo_init_support, &unit_ctx, 0),
@@ -1245,6 +1473,9 @@ struct unit_module_test nvgpu_tsg_tests[] = {
 		test_tsg_unbind_channel_check_ctx_reload, &unit_ctx, 0),
 	UNIT_TEST(enable_disable, test_tsg_enable, &unit_ctx, 0),
 	UNIT_TEST(abort, test_tsg_abort, &unit_ctx, 0),
+	UNIT_TEST(mark_error, test_tsg_mark_error, &unit_ctx, 0),
+	UNIT_TEST(set_ctx_mmu_error, test_tsg_set_ctx_mmu_error, &unit_ctx, 0),
+	UNIT_TEST(reset_faulted_eng_pbdma, test_tsg_reset_faulted_eng_pbdma, &unit_ctx, 0),
 	UNIT_TEST(remove_support, test_fifo_remove_support, &unit_ctx, 0),
 };
 
diff --git a/userspace/units/fifo/tsg/nvgpu-tsg.h b/userspace/units/fifo/tsg/nvgpu-tsg.h
index 793c1f1a1..82174a513 100644
--- a/userspace/units/fifo/tsg/nvgpu-tsg.h
+++ b/userspace/units/fifo/tsg/nvgpu-tsg.h
@@ -36,10 +36,12 @@ struct gk20a;
 /**
  * Test specification for: test_tsg_open
  *
- * Description: Branch coverage for nvgpu_tsg_open.
+ * Description: Create TSG
  *
  * Test Type: Feature based
  *
+ * Targets: nvgpu_tsg_open
+ *
  * Input: test_fifo_init_support() run for this GPU
  *
  * Steps:
@@ -65,10 +67,12 @@ int test_tsg_open(struct unit_module *m,
 /**
  * Test specification for: test_tsg_bind_channel
  *
- * Description: Branch coverage for nvgpu_tsg_bind_channel.
+ * Description: Bind channel to TSG.
  *
  * Test Type: Feature based
  *
+ * Targets: nvgpu_tsg_bind_channel
+ *
  * Input: test_fifo_init_support() run for this GPU
  *
  * Steps:
@@ -100,10 +104,12 @@ int test_tsg_bind_channel(struct unit_module *m,
 /**
  * Test specification for: test_tsg_unbind_channel
  *
- * Description: Branch coverage for nvgpu_tsg_unbind_channel.
+ * Description: Unbind channel from TSG.
  *
  * Test Type: Feature based
  *
+ * Targets: nvgpu_tsg_unbind_channel
+ *
  * Input: test_fifo_init_support() run for this GPU
  *
  * Steps:
@@ -143,10 +149,12 @@ int test_tsg_unbind_channel(struct unit_module *m,
 /**
  * Test specification for: test_tsg_release
  *
- * Description: Branch coverage for nvgpu_tsg_release.
+ * Description: Release TSG.
  *
  * Test Type: Feature based
  *
+ * Targets: nvgpu_tsg_release
+ *
  * Input: test_fifo_init_support() run for this GPU
  *
  * Steps:
@@ -176,10 +184,12 @@ int test_tsg_release(struct unit_module *m,
 /**
  * Test specification for: test_tsg_unbind_channel_check_hw_state
  *
- * Description: Branch coverage for nvgpu_tsg_unbind_channel_check_hw_state.
+ * Description: Check HW state during TSG unbind channel.
  *
  * Test Type: Feature based
  *
+ * Targets: nvgpu_tsg_unbind_channel_check_hw_state
+ *
  * Input: test_fifo_init_support() run for this GPU
  *
  * Steps:
@@ -200,10 +210,12 @@ int test_tsg_unbind_channel_check_hw_state(struct unit_module *m,
 /**
  * Test specification for: test_tsg_unbind_channel_check_ctx_reload
  *
- * Description: Branch coverage for nvgpu_tsg_unbind_channel_check_ctx_reload.
+ * Description: Check if channel reload is needed during TSG unbind
  *
  * Test Type: Feature based
  *
+ * Targets: nvgpu_tsg_unbind_channel_check_ctx_reload
+ *
  * Input: test_fifo_init_support() run for this GPU
  *
  * Steps:
@@ -225,10 +237,12 @@ int test_tsg_unbind_channel_check_ctx_reload(struct unit_module *m,
 /**
  * Test specification for: test_tsg_enable
  *
- * Description: Branch coverage for nvgpu_tsg_enable/disable.
+ * Description: Enable/disable TSG
  *
  * Test Type: Feature based
  *
+ * Targets: nvgpu_tsg_enable, nvgpu_tsg_disable
+ *
  * Input: test_fifo_init_support() run for this GPU
  *
  * Steps:
@@ -250,10 +264,12 @@ int test_tsg_enable(struct unit_module *m,
 /**
  * Test specification for: test_tsg_check_and_get_from_id
  *
- * Description: Branch coverage for test_tsg_check_and_get_from_id.
+ * Description: Get TSG context from id
  *
  * Test Type: Feature based
  *
+ * Targets: tsg_check_and_get_from_id
+ *
  * Input: test_fifo_init_support() run for this GPU
  *
  * Steps:
@@ -270,10 +286,12 @@ int test_tsg_check_and_get_from_id(struct unit_module *m,
 /**
  * Test specification for: test_tsg_abort
  *
- * Description: Branch coverage for nvgpu_tsg_abort.
+ * Description: Abort TSG
  *
  * Test Type: Feature based
  *
+ * Targets: nvgpu_tsg_abort
+ *
  * Input: test_fifo_init_support() run for this GPU
  *
  * Steps:
@@ -296,10 +314,12 @@ int test_tsg_abort(struct unit_module *m,
 /**
  * Test specification for: test_tsg_setup_sw
  *
- * Description: Branch coverage for nvgpu_tsg_setup_sw.
+ * Description: SW Initialization for TSGs
  *
  * Test Type: Feature based
  *
+ * Targets: nvgpu_tsg_setup_sw
+ *
  * Input: None
  *
  * Steps:
@@ -313,6 +333,80 @@ int test_tsg_abort(struct unit_module *m,
 int test_tsg_setup_sw(struct unit_module *m,
 		struct gk20a *g, void *args);
 
+/**
+ * Test specification for: test_tsg_mark_error
+ *
+ * Description: Mark all channels unserviceable in a TSG
+ *
+ * Test Type: Feature based
+ *
+ * Targets: nvgpu_tsg_mark_error
+ *
+ * Input: None
+ *
+ * Steps:
+ * - Check marginal cases:
+ *   - Mark error for TSG with no bound channel.
+ *   - Mark error for TSG with one non serviceable channel.
+ * - Check likely cases:
+ *   - Use one TSG with one bound channel.
+ *   - Set error notifier to NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT.
+ *   - Check that nvgpu_tsg_mark_error returns true (i.e. verbose), when
+ *     ch->ctxsw_timeout_debug_dump is true.
+ *   - Check that nvgpu_tsg_mark_error returns false otherwise.
+ *
+ * Output: Returns PASS if all branches gave expected results. FAIL otherwise.
+ */
+int test_tsg_mark_error(struct unit_module *m,
+		struct gk20a *g, void *args);
+
+/**
+ * Test specification for: test_tsg_set_ctx_mmu_error
+ *
+ * Description: Set MMU fault error notifier for TSG
+ *
+ * Test Type: Feature based
+ *
+ * Targets: nvgpu_tsg_set_ctx_mmu_error
+ *
+ * Input: None
+ *
+ * Steps:
+ * - Setup a TSG with one bound channel.
+ * - Initialize error notifier for channel.
+ * - Call nvgpu_tsg_set_ctx_mmu_erro for TSG.
+ * - Check that channel's error notifier has been set to
+ *   NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT.
+ *
+ * Output: Returns PASS if all branches gave expected results. FAIL otherwise.
+ */
+int test_tsg_set_ctx_mmu_error(struct unit_module *m,
+		struct gk20a *g, void *args);
+
+/**
+ * Test specification for: test_tsg_reset_faulted_eng_pbdma
+ *
+ * Description: Reset faulted engine and/or PBDMAs for a TSG
+ *
+ * Test Type: Feature based
+ *
+ * Targets: nvgpu_tsg_reset_faulted_eng_pbdma
+ *
+ * Input: None
+ *
+ * Steps:
+ * - Check valid case:
+ *   - Setup a TSG with one bound channel.
+ *   - Call nvgpu_tsg_reset_faulted_eng_pbdma.
+ *   - Check that g->ops.channel.reset_faulted was called for channel.
+ * - Check invalid cases:
+ *   - Case where TSG pointer is NULL.
+ *   - Case where  g->ops.channel.reset_faulted is NULL.
+ *
+ * Output: Returns PASS if all branches gave expected results. FAIL otherwise.
+ */
+int test_tsg_reset_faulted_eng_pbdma(struct unit_module *m,
+		struct gk20a *g, void *args);
 /**
  * @}
  */