gpu: nvgpu: remove code for ch not bound to tsg

- Remove handling for channels that are no more bound to tsg as channel could be referenceable but no more part of a tsg - Use tsg_gk20a_from_ch to get pointer to tsg for a given channel - Clear unhandled gr interrupts Bug 2429295 JIRA NVGPU-1580 Change-Id: I9da43a2bc9a0282c793b9f301eaf8e8604f91d70 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1972492 (cherry picked from commit 013ca60edd in dev-kernel) Reviewed-on: https://git-master.nvidia.com/r/2018262 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Debarshi Dutta <ddutta@nvidia.com> Tested-by: Debarshi Dutta <ddutta@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Bibek Basu <bbasu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 02:22:34 +03:00 · 2018-12-13 11:02:11 -08:00
parent d975bda398
commit c9d4df288d
6 changed files with 110 additions and 124 deletions
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -164,28 +164,26 @@ int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch)
 {
 	struct tsg_gk20a *tsg;

-	if (gk20a_is_channel_marked_as_tsg(ch)) {
-		tsg = &g->fifo.tsg[ch->tsgid];
+	tsg = tsg_gk20a_from_ch(ch);
+	if (tsg != NULL) {
 		g->ops.fifo.enable_tsg(tsg);
+		return 0;
 	} else {
-		g->ops.fifo.enable_channel(ch);
+		return -EINVAL;
 	}
-
-	return 0;
 }

 int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch)
 {
 	struct tsg_gk20a *tsg;

-	if (gk20a_is_channel_marked_as_tsg(ch)) {
-		tsg = &g->fifo.tsg[ch->tsgid];
+	tsg = tsg_gk20a_from_ch(ch);
+	if (tsg != NULL) {
 		g->ops.fifo.disable_tsg(tsg);
+		return 0;
 	} else {
-		g->ops.fifo.disable_channel(ch);
+		return -EINVAL;
 	}
-
-	return 0;
 }

 void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
@@ -238,19 +236,8 @@ void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)

 	if (tsg != NULL) {
 		return gk20a_fifo_abort_tsg(ch->g, tsg, channel_preempt);
-	}
-
-	/* make sure new kickoffs are prevented */
-	gk20a_channel_set_timedout(ch);
-
-	ch->g->ops.fifo.disable_channel(ch);
-
-	if (channel_preempt) {
-		ch->g->ops.fifo.preempt_channel(ch->g, ch);
-	}
-
-	if (ch->g->ops.fifo.ch_abort_clean_up) {
-		ch->g->ops.fifo.ch_abort_clean_up(ch);
+	} else {
+		nvgpu_err(ch->g, "chid: %d is not bound to tsg", ch->chid);
 	}
 }

--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -457,9 +457,14 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
 	struct gk20a_fecs_trace *trace = g->fecs_trace;
 	struct nvgpu_mem *mem;
 	u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
-	pid_t pid;
 	u32 aperture_mask;

+	tsg = tsg_gk20a_from_ch(ch);
+	if (tsg == NULL) {
+		nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
+		return -EINVAL;
+	}
+
 	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
 			"chid=%d context_ptr=%x inst_block=%llx",
 			ch->chid, context_ptr,
@@ -519,11 +524,7 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
 	/* pid (process identifier) in user space, corresponds to tgid (thread
 	 * group id) in kernel space.
 	 */
-	if (gk20a_is_channel_marked_as_tsg(ch))
-		pid = tsg_gk20a_from_ch(ch)->tgid;
-	else
-		pid = ch->tgid;
-	gk20a_fecs_trace_hash_add(g, context_ptr, pid);
+	gk20a_fecs_trace_hash_add(g, context_ptr, tsg->tgid);

 	return 0;
 }
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1580,7 +1580,8 @@ void gk20a_fifo_abort_tsg(struct gk20a *g, struct tsg_gk20a *tsg, bool preempt)

 int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch)
 {
-	u32 engine_id, engines;
+	unsigned long engine_id, engines = 0U;
+	struct tsg_gk20a *tsg;

 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	gr_gk20a_disable_ctxsw(g);
@@ -1589,12 +1590,14 @@ int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch)
 		goto clean_up;
 	}

-	if (gk20a_is_channel_marked_as_tsg(ch)) {
-		engines = gk20a_fifo_engines_on_id(g, ch->tsgid, true);
+	tsg = tsg_gk20a_from_ch(ch);
+	if (tsg != NULL) {
+		engines = gk20a_fifo_engines_on_id(g, tsg->tsgid, true);
 	} else {
-		engines = gk20a_fifo_engines_on_id(g, ch->chid, false);
+		nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
 	}
-	if (!engines) {
+
+	if (engines == 0U) {
 		goto clean_up;
 	}

@@ -1750,16 +1753,18 @@ static bool gk20a_fifo_handle_mmu_fault_locked(
 			} else if (type == fifo_engine_status_id_type_chid_v()) {
 				ch = &g->fifo.channel[id];
 				refch = gk20a_channel_get(ch);
+				if (refch != NULL) {
+					tsg = tsg_gk20a_from_ch(refch);
+				}
 			}
 		} else {
 			/* read channel based on instruction pointer */
 			ch = gk20a_refch_from_inst_ptr(g,
 					mmfault_info.inst_ptr);
 			refch = ch;
-		}
-
-		if (ch && gk20a_is_channel_marked_as_tsg(ch)) {
-			tsg = &g->fifo.tsg[ch->tsgid];
+			if (refch != NULL) {
+				tsg = tsg_gk20a_from_ch(refch);
+			}
 		}

 		/* check if engine reset should be deferred */
@@ -1786,16 +1791,10 @@ static bool gk20a_fifo_handle_mmu_fault_locked(
 		}

 #ifdef CONFIG_GK20A_CTXSW_TRACE
-		/*
-		 * For non fake mmu fault, both tsg and ch pointers
-		 * could be valid. Check tsg first.
-		 */
-		if (tsg)
+		if (tsg) {
 			gk20a_ctxsw_trace_tsg_reset(g, tsg);
-		else if (ch)
-			gk20a_ctxsw_trace_channel_reset(g, ch);
+		}
 #endif
-
 		/*
 		 * Disable the channel/TSG from hw and increment syncpoints.
 		 */
@@ -1815,26 +1814,10 @@ static bool gk20a_fifo_handle_mmu_fault_locked(
 			if (refch) {
 				gk20a_channel_put(ch);
 			}
-		} else if (ch) {
-			if (refch) {
-				if (g->fifo.deferred_reset_pending) {
-					g->ops.fifo.disable_channel(ch);
-				} else {
-					if (!fake_fault) {
-						gk20a_fifo_set_ctx_mmu_error_ch(
-							g, refch);
-					}
-
-					verbose = gk20a_fifo_error_ch(g,
-							 refch);
-					gk20a_channel_abort(ch, false);
-				}
-				gk20a_channel_put(ch);
-			} else {
-				nvgpu_err(g,
-						"mmu error in freed channel %d",
-						ch->chid);
-			}
+		} else if (refch != NULL) {
+			nvgpu_err(g, "mmu error in unbound channel %d",
+					  ch->chid);
+			gk20a_channel_put(ch);
 		} else if (mmfault_info.inst_ptr ==
 				nvgpu_inst_block_addr(g, &g->mm.bar1.inst_block)) {
 			nvgpu_err(g, "mmu fault from bar1");
@@ -2116,7 +2099,7 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
 					 rc_type, NULL);
 }

-/* force reset channel and tsg (if it's part of one) */
+/* force reset channel and tsg */
 int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
 				u32 err_code, bool verbose)
 {
@@ -2126,7 +2109,6 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
 	struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);

 	if (tsg != NULL) {
-
 		nvgpu_rwsem_down_read(&tsg->ch_list_lock);

 		nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list,
@@ -2142,9 +2124,7 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
 		gk20a_fifo_recover_tsg(g, tsg, verbose,
 				RC_TYPE_FORCE_RESET);
 	} else {
-		g->ops.fifo.set_error_notifier(ch, err_code);
-		gk20a_fifo_recover_ch(g, ch, verbose,
-				RC_TYPE_FORCE_RESET);
+		nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
 	}

 	return 0;
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3018,7 +3018,6 @@ static void gr_gk20a_free_channel_pm_ctx(struct gk20a *g,
 int gk20a_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags)
 {
 	struct gk20a *g = c->g;
-	struct fifo_gk20a *f = &g->fifo;
 	struct nvgpu_gr_ctx *gr_ctx;
 	struct tsg_gk20a *tsg = NULL;
 	int err = 0;
@@ -3041,11 +3040,11 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c, u32 class_num, u32 flags)
 	}
 	c->obj_class = class_num;

-	if (!gk20a_is_channel_marked_as_tsg(c)) {
+	tsg = tsg_gk20a_from_ch(c);
+	if (tsg == NULL) {
 		return -EINVAL;
 	}

-	tsg = &f->tsg[c->tsgid];
 	gr_ctx = &tsg->gr_ctx;

 	if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
@@ -5213,21 +5212,21 @@ static void gk20a_gr_set_error_notifier(struct gk20a *g,
 		return;
 	}

-	if (gk20a_is_channel_marked_as_tsg(ch)) {
-		tsg = &g->fifo.tsg[ch->tsgid];
+	tsg = tsg_gk20a_from_ch(ch);
+	if (tsg != NULL) {
 		nvgpu_rwsem_down_read(&tsg->ch_list_lock);
 		nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list,
 				channel_gk20a, ch_entry) {
 			if (gk20a_channel_get(ch_tsg)) {
 				g->ops.fifo.set_error_notifier(ch_tsg,
-						 error_notifier);
+					 error_notifier);
 				gk20a_channel_put(ch_tsg);
 			}

 		}
 		nvgpu_rwsem_up_read(&tsg->ch_list_lock);
 	} else {
-		g->ops.fifo.set_error_notifier(ch, error_notifier);
+		nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
 	}
 }

@@ -5394,12 +5393,21 @@ int gk20a_gr_handle_semaphore_pending(struct gk20a *g,
 				     struct gr_gk20a_isr_data *isr_data)
 {
 	struct channel_gk20a *ch = isr_data->ch;
-	struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
+	struct tsg_gk20a *tsg;

-	g->ops.fifo.post_event_id(tsg,
-		NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN);
+	if (ch == NULL) {
+		return 0;
+	}

-	nvgpu_cond_broadcast(&ch->semaphore_wq);
+	tsg = tsg_gk20a_from_ch(ch);
+	if (tsg != NULL) {
+		g->ops.fifo.post_event_id(tsg,
+			NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN);
+
+		nvgpu_cond_broadcast(&ch->semaphore_wq);
+	} else {
+		nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
+	}

 	return 0;
 }
@@ -5434,7 +5442,12 @@ int gk20a_gr_handle_notify_pending(struct gk20a *g,
 	u32 buffer_size;
 	u32 offset;
 	bool exit;
+#endif
+	if (ch == NULL || tsg_gk20a_from_ch(ch) == NULL) {
+		return 0;
+	}

+#if defined(CONFIG_GK20A_CYCLE_STATS)
 	/* GL will never use payload 0 for cycle state */
 	if ((ch->cyclestate.cyclestate_buffer == NULL) || (isr_data->data_lo == 0))
 		return 0;
@@ -5975,7 +5988,7 @@ int gk20a_gr_isr(struct gk20a *g)
 	u32 chid;

 	nvgpu_log_fn(g, " ");
-	nvgpu_log(g, gpu_dbg_intr, "pgraph intr %08x", gr_intr);
+	nvgpu_log(g, gpu_dbg_intr, "pgraph intr 0x%08x", gr_intr);

 	if (gr_intr == 0U) {
 		return 0;
@@ -6009,11 +6022,13 @@ int gk20a_gr_isr(struct gk20a *g)
 	chid = ch != NULL ? ch->chid : FIFO_INVAL_CHANNEL_ID;

 	if (ch == NULL) {
-		nvgpu_err(g, "ch id is INVALID 0xffffffff");
-	}
-
-	if ((ch != NULL) && gk20a_is_channel_marked_as_tsg(ch)) {
-		tsg = &g->fifo.tsg[ch->tsgid];
+		nvgpu_err(g, "pgraph intr: 0x%08x, chid: INVALID", gr_intr);
+	} else {
+		tsg = tsg_gk20a_from_ch(ch);
+		if (tsg == NULL) {
+			nvgpu_err(g, "pgraph intr: 0x%08x, chid: %d "
+				"not bound to tsg", gr_intr, chid);
+		}
 	}

 	nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
@@ -6198,7 +6213,9 @@ int gk20a_gr_isr(struct gk20a *g)
 			nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
 					 "GPC exception pending");

-			fault_ch = isr_data.ch;
+			if (tsg != NULL) {
+				fault_ch = isr_data.ch;
+			}

 			/* fault_ch can be NULL */
 			/* check if any gpc has an exception */
@@ -6225,39 +6242,42 @@ int gk20a_gr_isr(struct gk20a *g)
 	}

 	if (need_reset) {
-		if (tsgid != NVGPU_INVALID_TSG_ID) {
+		if (tsg != NULL) {
 			gk20a_fifo_recover(g, gr_engine_id,
 					   tsgid, true, true, true,
 						RC_TYPE_GR_FAULT);
-		} else if (ch) {
-			gk20a_fifo_recover(g, gr_engine_id,
-					   ch->chid, false, true, true,
-						RC_TYPE_GR_FAULT);
 		} else {
+			if (ch != NULL) {
+				nvgpu_err(g, "chid: %d referenceable but not "
+					"bound to tsg", chid);
+			}
 			gk20a_fifo_recover(g, gr_engine_id,
 					   0, false, false, true,
 						RC_TYPE_GR_FAULT);
 		}
 	}

-	if ((gr_intr != 0U) && (ch == NULL)) {
-		/* Clear interrupts for unused channel. This is
-		   probably an interrupt during gk20a_free_channel() */
-		nvgpu_err(g,
-			  "unhandled gr interrupt 0x%08x for unreferenceable channel, clearing",
-			  gr_intr);
+	if (gr_intr != 0U) {
+		/* clear unhandled interrupts */
+		if (ch == NULL) {
+			/*
+			 * This is probably an interrupt during
+			 * gk20a_free_channel()
+			 */
+			nvgpu_err(g, "unhandled gr intr 0x%08x for "
+				"unreferenceable channel, clearing",
+				gr_intr);
+		} else {
+			nvgpu_err(g, "unhandled gr intr 0x%08x for chid: %d",
+				gr_intr, chid);
+		}
 		gk20a_writel(g, gr_intr_r(), gr_intr);
-		gr_intr = 0;
 	}

 	gk20a_writel(g, gr_gpfifo_ctl_r(),
 		grfifo_ctl | gr_gpfifo_ctl_access_f(1) |
 		gr_gpfifo_ctl_semaphore_access_f(1));

-	if (gr_intr) {
-		nvgpu_err(g,
-			   "unhandled gr interrupt 0x%08x", gr_intr);
-	}

 	/* Posting of BPT events should be the last thing in this function */
 	if ((global_esr != 0U) && (tsg != NULL)) {
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1691,6 +1691,14 @@ void gr_gp10b_get_access_map(struct gk20a *g,
 static int gr_gp10b_disable_channel_or_tsg(struct gk20a *g, struct channel_gk20a *fault_ch)
 {
 	int ret = 0;
+	struct tsg_gk20a *tsg;
+
+	tsg = tsg_gk20a_from_ch(fault_ch);
+	if (tsg == NULL) {
+		nvgpu_err(g, "CILP: chid: %d is not bound to tsg",
+				fault_ch->chid);
+		return -EINVAL;
+	}

 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " ");

@@ -1711,18 +1719,11 @@ static int gr_gp10b_disable_channel_or_tsg(struct gk20a *g, struct channel_gk20a
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: restarted runlist");

 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
-			"CILP: tsgid: 0x%x", fault_ch->tsgid);
+			"CILP: tsgid: 0x%x", tsg->tsgid);

-	if (gk20a_is_channel_marked_as_tsg(fault_ch)) {
-		gk20a_fifo_issue_preempt(g, fault_ch->tsgid, true);
-		nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+	gk20a_fifo_issue_preempt(g, tsg->tsgid, true);
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
 			"CILP: preempted tsg");
-	} else {
-		gk20a_fifo_issue_preempt(g, fault_ch->chid, false);
-		nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
-			"CILP: preempted channel");
-	}
-
 	return ret;
 }

--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -641,9 +641,8 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,

 	nvgpu_log_fn(g, " ");

-	if (gk20a_is_channel_marked_as_tsg(ch)) {
-		tsg = &g->fifo.tsg[ch->tsgid];
-
+	tsg = tsg_gk20a_from_ch(ch);
+	if (tsg != NULL) {
 		nvgpu_rwsem_down_read(&tsg->ch_list_lock);

 		nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list,
@@ -658,8 +657,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,

 		nvgpu_rwsem_up_read(&tsg->ch_list_lock);
 	} else {
-		g->ops.fifo.set_error_notifier(ch, err_code);
-		gk20a_channel_set_timedout(ch);
+		nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
 	}

 	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FORCE_RESET;
@@ -698,9 +696,8 @@ static void vgpu_fifo_set_ctx_mmu_error_ch_tsg(struct gk20a *g,
 	struct tsg_gk20a *tsg = NULL;
 	struct channel_gk20a *ch_tsg = NULL;

-	if (gk20a_is_channel_marked_as_tsg(ch)) {
-		tsg = &g->fifo.tsg[ch->tsgid];
-
+	tsg = tsg_gk20a_from_ch(ch);
+	if (tsg != NULL) {
 		nvgpu_rwsem_down_read(&tsg->ch_list_lock);

 		nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list,
@@ -713,7 +710,7 @@ static void vgpu_fifo_set_ctx_mmu_error_ch_tsg(struct gk20a *g,

 		nvgpu_rwsem_up_read(&tsg->ch_list_lock);
 	} else {
-		vgpu_fifo_set_ctx_mmu_error_ch(g, ch);
+		nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
 	}
 }