gpu: nvgpu: disallow binding more channels than MAX channels supported per TSG

There is HW specific limit on number of channel entries that can be added for each TSG entry in runlist. Right now there is no checking to enforce this from SW and hence if User binds more than supported channels to same TSG, invalid TSG formation error interrupts are generated. Fix this by adding appropriate checks in below steps : - Add new field ch_count to struct nvgpu_tsg to keep track of channels bound to TSG. - Define new hal gops.runlist.get_max_channels_per_tsg() to retrieve HW specific maximum channel count per TSG. - Implement the HAL for gk20a and gv11b chips, and assign new HALs for all chips appropriately. - Increment ch_count while binding the channel to TSG and decrement it while unbinding. - While binding channel to TSG, Check if current channel count is already equal to max channel count. If yes, print an error and bail out. Bug 200763991 Change-Id: Ic5f17a52e0fb171d1c020bf4f085f57cdb95f923 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2582095 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit
2025-12-22 17:36:20 +03:00 · 2021-08-24 13:40:13 +05:30
parent 608decf1e6
commit 3c97f3b932
20 changed files with 65 additions and 6 deletions
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -96,6 +96,7 @@ static bool nvgpu_tsg_is_channel_active(struct gk20a *g,
 int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch)
 {
 	struct gk20a *g = ch->g;
+	u32 max_ch_per_tsg;
 	int err = 0;

 	nvgpu_log_fn(g, "bind tsg:%u ch:%u\n", tsg->tsgid, ch->chid);
@@ -110,6 +111,17 @@ int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch)
 		return -EINVAL;
 	}

+	/* cannot bind more channels than MAX channels supported per TSG */
+	nvgpu_rwsem_down_read(&tsg->ch_list_lock);
+	max_ch_per_tsg = g->ops.runlist.get_max_channels_per_tsg();
+	if (tsg->ch_count == max_ch_per_tsg) {
+		nvgpu_rwsem_up_read(&tsg->ch_list_lock);
+		nvgpu_warn(g, "TSG %u trying to bind more than supported channels (%u)",
+			tsg->tsgid, max_ch_per_tsg);
+		return -EINVAL;
+	}
+	nvgpu_rwsem_up_read(&tsg->ch_list_lock);
+
 	/* Use runqueue selector 1 for all ASYNC ids */
 	if (ch->subctx_id > CHANNEL_INFO_VEID0) {
 		ch->runqueue_sel = 1;
@@ -141,6 +153,7 @@ int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch)

 	nvgpu_rwsem_down_write(&tsg->ch_list_lock);
 	nvgpu_list_add_tail(&ch->ch_entry, &tsg->ch_list);
+	tsg->ch_count = nvgpu_safe_add_u32(tsg->ch_count, 1U);
 	ch->tsgid = tsg->tsgid;
 	/* channel is serviceable after it is bound to tsg */
 	ch->unserviceable = false;
@@ -234,6 +247,7 @@ static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg,
 	/* Remove channel from TSG and re-enable rest of the channels */
 	nvgpu_rwsem_down_write(&tsg->ch_list_lock);
 	nvgpu_list_del(&ch->ch_entry);
+	tsg->ch_count = nvgpu_safe_sub_u32(tsg->ch_count, 1U);
 	ch->tsgid = NVGPU_INVALID_TSG_ID;

 	/* another thread could have re-enabled the channel because it was
@@ -807,6 +821,7 @@ int nvgpu_tsg_open_common(struct gk20a *g, struct nvgpu_tsg *tsg, pid_t pid)
 	tsg->tgid = pid;
 	tsg->g = g;
 	tsg->num_active_channels = 0U;
+	tsg->ch_count = 0U;
 	nvgpu_ref_init(&tsg->refcount);

 	tsg->vm = NULL;
--- a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.c
@@ -81,3 +81,8 @@ void gk20a_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist)
 	runlist[0] = ram_rl_entry_chid_f(ch->chid);
 	runlist[1] = 0;
 }
+
+u32 gk20a_runlist_get_max_channels_per_tsg(void)
+{
+	return ram_rl_entry_tsg_length_max_v();
+}
--- a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.h
+++ b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.h
@@ -34,5 +34,6 @@ u32 gk20a_runlist_max_timeslice(void);
 void gk20a_runlist_get_tsg_entry(struct nvgpu_tsg *tsg,
 		u32 *runlist, u32 timeslice);
 void gk20a_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist);
+u32 gk20a_runlist_get_max_channels_per_tsg(void);

 #endif /* NVGPU_RUNLIST_RAM_GK20A_H */
--- a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b.h
@@ -33,5 +33,6 @@ u32 gv11b_runlist_max_timeslice(void);
 void gv11b_runlist_get_tsg_entry(struct nvgpu_tsg *tsg,
 		u32 *runlist, u32 timeslice);
 void gv11b_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist);
+u32 gv11b_runlist_get_max_channels_per_tsg(void);

 #endif /* NVGPU_RUNLIST_RAM_GV11B_H */
--- a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b_fusa.c
@@ -118,3 +118,7 @@ void gv11b_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist)
 			runlist[0], runlist[1], runlist[2], runlist[3]);
 }

+u32 gv11b_runlist_get_max_channels_per_tsg(void)
+{
+	return ram_rl_entry_tsg_length_max_v();
+}
--- a/drivers/gpu/nvgpu/hal/init/hal_ga100.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga100.c
@@ -1127,6 +1127,7 @@ static const struct gops_runlist ga100_ops_runlist = {
 	.init_enginfo = nvgpu_runlist_init_enginfo,
 	.get_tsg_max_timeslice = gv11b_runlist_max_timeslice,
 	.get_esched_fb_thread_id = ga10b_runlist_get_esched_fb_thread_id,
+	.get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg,
 };

 static const struct gops_userd ga100_ops_userd = {
--- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
@@ -1117,6 +1117,7 @@ static const struct gops_runlist ga10b_ops_runlist = {
 	.init_enginfo = nvgpu_runlist_init_enginfo,
 	.get_tsg_max_timeslice = gv11b_runlist_max_timeslice,
 	.get_esched_fb_thread_id = ga10b_runlist_get_esched_fb_thread_id,
+	.get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg,
 };

 static const struct gops_userd ga10b_ops_userd = {
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -698,6 +698,7 @@ static const struct gops_runlist gm20b_ops_runlist = {
 	.write_state = gk20a_runlist_write_state,
 	.init_enginfo = nvgpu_runlist_init_enginfo,
 	.get_tsg_max_timeslice = gk20a_runlist_max_timeslice,
+	.get_max_channels_per_tsg = gk20a_runlist_get_max_channels_per_tsg,
 };

 static const struct gops_userd gm20b_ops_userd = {
--- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
@@ -788,6 +788,7 @@ static const struct gops_runlist gp10b_ops_runlist = {
 	.write_state = gk20a_runlist_write_state,
 	.init_enginfo = nvgpu_runlist_init_enginfo,
 	.get_tsg_max_timeslice = gk20a_runlist_max_timeslice,
+	.get_max_channels_per_tsg = gk20a_runlist_get_max_channels_per_tsg,
 };

 static const struct gops_userd gp10b_ops_userd = {
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -965,6 +965,7 @@ static const struct gops_runlist gv11b_ops_runlist = {
 	.write_state = gk20a_runlist_write_state,
 	.init_enginfo = nvgpu_runlist_init_enginfo,
 	.get_tsg_max_timeslice = gv11b_runlist_max_timeslice,
+	.get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg,
 };

 static const struct gops_userd gv11b_ops_userd = {
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -1019,6 +1019,7 @@ static const struct gops_runlist tu104_ops_runlist = {
 	.write_state = gk20a_runlist_write_state,
 	.init_enginfo = nvgpu_runlist_init_enginfo,
 	.get_tsg_max_timeslice = gv11b_runlist_max_timeslice,
+	.get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg,
 };

 static const struct gops_userd tu104_ops_userd = {
--- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c
@@ -700,6 +700,7 @@ static const struct gops_runlist vgpu_ga10b_ops_runlist = {
 	.hw_submit = NULL,
 	.wait_pending = NULL,
 	.init_enginfo = nvgpu_runlist_init_enginfo,
+	.get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg,
 };

 static const struct gops_userd vgpu_ga10b_ops_userd = {
--- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
@@ -668,6 +668,7 @@ static const struct gops_runlist vgpu_gv11b_ops_runlist = {
 	.wait_pending = NULL,
 	.init_enginfo = nvgpu_runlist_init_enginfo,
 	.get_tsg_max_timeslice = gv11b_runlist_max_timeslice,
+	.get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg,
 };

 static const struct gops_userd vgpu_gv11b_ops_userd = {
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h
@@ -68,6 +68,16 @@ struct gops_runlist {
 	int (*reload)(struct gk20a *g, struct nvgpu_runlist *rl,
 			bool add, bool wait_for_finish);

+	/**
+	 * @brief Get maximum number of channels supported per TSG entry
+	 *        in runlist.
+	 *
+	 * @param none.
+	 *
+	 * @return maximum number of channels supported per TSG in runlist.
+	 */
+	u32 (*get_max_channels_per_tsg)(void);
+
 	/** @cond DOXYGEN_SHOULD_SKIP_THIS */

 	int (*update)(struct gk20a *g, struct nvgpu_runlist *rl,
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga100/hw_ram_ga100.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga100/hw_ram_ga100.h
@@ -169,6 +169,7 @@
 #define ram_rl_entry_tsg_timeslice_scale_f(v)           ((U32(v) & 0xfU) << 16U)
 #define ram_rl_entry_tsg_timeslice_timeout_f(v)        ((U32(v) & 0xffU) << 24U)
 #define ram_rl_entry_tsg_length_f(v)                    ((U32(v) & 0xffU) << 0U)
+#define ram_rl_entry_tsg_length_max_v()                            (0x00000080U)
 #define ram_rl_entry_tsg_tsgid_f(v)                    ((U32(v) & 0xfffU) << 0U)
 #define ram_rl_entry_chan_userd_ptr_align_shift_v()                (0x00000008U)
 #define ram_rl_entry_chan_inst_ptr_align_shift_v()                 (0x0000000cU)
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ram_ga10b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ram_ga10b.h
@@ -166,6 +166,7 @@
 #define ram_rl_entry_tsg_timeslice_scale_f(v)           ((U32(v) & 0xfU) << 16U)
 #define ram_rl_entry_tsg_timeslice_timeout_f(v)        ((U32(v) & 0xffU) << 24U)
 #define ram_rl_entry_tsg_length_f(v)                    ((U32(v) & 0xffU) << 0U)
+#define ram_rl_entry_tsg_length_max_v()                            (0x00000080U)
 #define ram_rl_entry_tsg_tsgid_f(v)                    ((U32(v) & 0xfffU) << 0U)
 #define ram_rl_entry_chan_userd_ptr_align_shift_v()                (0x00000008U)
 #define ram_rl_entry_chan_inst_ptr_align_shift_v()                 (0x0000000cU)
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_ram_gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_ram_gk20a.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2012-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -157,4 +157,5 @@
 #define ram_rl_entry_timeslice_timeout_v(r)               (((r) >> 18U) & 0xffU)
 #define ram_rl_entry_timeslice_timeout_128_f()                      (0x2000000U)
 #define ram_rl_entry_tsg_length_f(v)                   ((U32(v) & 0x3fU) << 26U)
+#define ram_rl_entry_tsg_length_max_v()                            (0x00000020U)
 #endif
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_ram_gm20b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_ram_gm20b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -160,4 +160,5 @@
 #define ram_rl_entry_timeslice_timeout_v(r)               (((r) >> 18U) & 0xffU)
 #define ram_rl_entry_timeslice_timeout_128_f()                      (0x2000000U)
 #define ram_rl_entry_tsg_length_f(v)                   ((U32(v) & 0x3fU) << 26U)
+#define ram_rl_entry_tsg_length_max_v()                            (0x00000020U)
 #endif
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_ram_gp10b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_ram_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -175,4 +175,5 @@
 #define ram_rl_entry_timeslice_timeout_v(r)               (((r) >> 18U) & 0xffU)
 #define ram_rl_entry_timeslice_timeout_128_f()                      (0x2000000U)
 #define ram_rl_entry_tsg_length_f(v)                   ((U32(v) & 0x3fU) << 26U)
+#define ram_rl_entry_tsg_length_max_v()                            (0x00000020U)
 #endif
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
@@ -129,10 +129,20 @@ struct nvgpu_tsg {
 	 * #ch_list.
 	 */
 	struct nvgpu_rwsem ch_list_lock;
+
 	/**
-	 * Total number of channels that are bound to a TSG. This can change
-	 * during run time whenever channels are bound to a TSG or unbound
-	 * from a TSG.
+	 * Total number of channels that are bound to a TSG. This count is
+	 * incremented when a channel is bound to TSG, and decremented when
+	 * channel is unbound from TSG.
+	 */
+	u32 ch_count;
+
+	/**
+	 * Total number of active channels that are bound to a TSG. This count
+	 * is incremented when a channel bound to TSG is added into the runlist
+	 * under the same TSG header. Count is decremented when channel bound
+	 * to TSG is removed from the runlist. This count is specifically
+	 * tracked for runlist construction of TSG entry.
 	 */
 	u32 num_active_channels;