diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index e49ccae17..ddbec58cd 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -96,6 +96,7 @@ static bool nvgpu_tsg_is_channel_active(struct gk20a *g, int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) { struct gk20a *g = ch->g; + u32 max_ch_per_tsg; int err = 0; nvgpu_log_fn(g, "bind tsg:%u ch:%u\n", tsg->tsgid, ch->chid); @@ -110,6 +111,17 @@ int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) return -EINVAL; } + /* cannot bind more channels than MAX channels supported per TSG */ + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + max_ch_per_tsg = g->ops.runlist.get_max_channels_per_tsg(); + if (tsg->ch_count == max_ch_per_tsg) { + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + nvgpu_warn(g, "TSG %u trying to bind more than supported channels (%u)", + tsg->tsgid, max_ch_per_tsg); + return -EINVAL; + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + /* Use runqueue selector 1 for all ASYNC ids */ if (ch->subctx_id > CHANNEL_INFO_VEID0) { ch->runqueue_sel = 1; @@ -141,6 +153,7 @@ int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) nvgpu_rwsem_down_write(&tsg->ch_list_lock); nvgpu_list_add_tail(&ch->ch_entry, &tsg->ch_list); + tsg->ch_count = nvgpu_safe_add_u32(tsg->ch_count, 1U); ch->tsgid = tsg->tsgid; /* channel is serviceable after it is bound to tsg */ ch->unserviceable = false; @@ -234,6 +247,7 @@ static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg, /* Remove channel from TSG and re-enable rest of the channels */ nvgpu_rwsem_down_write(&tsg->ch_list_lock); nvgpu_list_del(&ch->ch_entry); + tsg->ch_count = nvgpu_safe_sub_u32(tsg->ch_count, 1U); ch->tsgid = NVGPU_INVALID_TSG_ID; /* another thread could have re-enabled the channel because it was @@ -807,6 +821,7 @@ int nvgpu_tsg_open_common(struct gk20a *g, struct nvgpu_tsg *tsg, pid_t pid) tsg->tgid = pid; tsg->g = g; tsg->num_active_channels = 0U; + tsg->ch_count = 0U; nvgpu_ref_init(&tsg->refcount); tsg->vm = NULL; diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.c b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.c index 18b6784ab..a2f6e2fc4 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.c +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.c @@ -81,3 +81,8 @@ void gk20a_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist) runlist[0] = ram_rl_entry_chid_f(ch->chid); runlist[1] = 0; } + +u32 gk20a_runlist_get_max_channels_per_tsg(void) +{ + return ram_rl_entry_tsg_length_max_v(); +} diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.h b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.h index fc31c4a18..6bb978d9d 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.h +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.h @@ -34,5 +34,6 @@ u32 gk20a_runlist_max_timeslice(void); void gk20a_runlist_get_tsg_entry(struct nvgpu_tsg *tsg, u32 *runlist, u32 timeslice); void gk20a_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist); +u32 gk20a_runlist_get_max_channels_per_tsg(void); #endif /* NVGPU_RUNLIST_RAM_GK20A_H */ diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b.h b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b.h index 881fedaa0..7d953096c 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b.h +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b.h @@ -33,5 +33,6 @@ u32 gv11b_runlist_max_timeslice(void); void gv11b_runlist_get_tsg_entry(struct nvgpu_tsg *tsg, u32 *runlist, u32 timeslice); void gv11b_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist); +u32 gv11b_runlist_get_max_channels_per_tsg(void); #endif /* NVGPU_RUNLIST_RAM_GV11B_H */ diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b_fusa.c index 1873011f2..307856464 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b_fusa.c @@ -118,3 +118,7 @@ void gv11b_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist) runlist[0], runlist[1], runlist[2], runlist[3]); } +u32 gv11b_runlist_get_max_channels_per_tsg(void) +{ + return ram_rl_entry_tsg_length_max_v(); +} diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga100.c b/drivers/gpu/nvgpu/hal/init/hal_ga100.c index b520cf981..ccb3189ce 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_ga100.c +++ b/drivers/gpu/nvgpu/hal/init/hal_ga100.c @@ -1127,6 +1127,7 @@ static const struct gops_runlist ga100_ops_runlist = { .init_enginfo = nvgpu_runlist_init_enginfo, .get_tsg_max_timeslice = gv11b_runlist_max_timeslice, .get_esched_fb_thread_id = ga10b_runlist_get_esched_fb_thread_id, + .get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg, }; static const struct gops_userd ga100_ops_userd = { diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c index eaa281207..b288c3278 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c @@ -1117,6 +1117,7 @@ static const struct gops_runlist ga10b_ops_runlist = { .init_enginfo = nvgpu_runlist_init_enginfo, .get_tsg_max_timeslice = gv11b_runlist_max_timeslice, .get_esched_fb_thread_id = ga10b_runlist_get_esched_fb_thread_id, + .get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg, }; static const struct gops_userd ga10b_ops_userd = { diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c index e4dd47d7d..6e23061a6 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c @@ -698,6 +698,7 @@ static const struct gops_runlist gm20b_ops_runlist = { .write_state = gk20a_runlist_write_state, .init_enginfo = nvgpu_runlist_init_enginfo, .get_tsg_max_timeslice = gk20a_runlist_max_timeslice, + .get_max_channels_per_tsg = gk20a_runlist_get_max_channels_per_tsg, }; static const struct gops_userd gm20b_ops_userd = { diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c index eca3dbf52..febb6ea46 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c @@ -788,6 +788,7 @@ static const struct gops_runlist gp10b_ops_runlist = { .write_state = gk20a_runlist_write_state, .init_enginfo = nvgpu_runlist_init_enginfo, .get_tsg_max_timeslice = gk20a_runlist_max_timeslice, + .get_max_channels_per_tsg = gk20a_runlist_get_max_channels_per_tsg, }; static const struct gops_userd gp10b_ops_userd = { diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c index 212fe2498..c8e106a0a 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c @@ -965,6 +965,7 @@ static const struct gops_runlist gv11b_ops_runlist = { .write_state = gk20a_runlist_write_state, .init_enginfo = nvgpu_runlist_init_enginfo, .get_tsg_max_timeslice = gv11b_runlist_max_timeslice, + .get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg, }; static const struct gops_userd gv11b_ops_userd = { diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index f433402d8..ed5924362 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -1019,6 +1019,7 @@ static const struct gops_runlist tu104_ops_runlist = { .write_state = gk20a_runlist_write_state, .init_enginfo = nvgpu_runlist_init_enginfo, .get_tsg_max_timeslice = gv11b_runlist_max_timeslice, + .get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg, }; static const struct gops_userd tu104_ops_userd = { diff --git a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c index 1514a890a..82d8e55d2 100644 --- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c +++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c @@ -700,6 +700,7 @@ static const struct gops_runlist vgpu_ga10b_ops_runlist = { .hw_submit = NULL, .wait_pending = NULL, .init_enginfo = nvgpu_runlist_init_enginfo, + .get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg, }; static const struct gops_userd vgpu_ga10b_ops_userd = { diff --git a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c index 18900048e..695a57ef1 100644 --- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c @@ -668,6 +668,7 @@ static const struct gops_runlist vgpu_gv11b_ops_runlist = { .wait_pending = NULL, .init_enginfo = nvgpu_runlist_init_enginfo, .get_tsg_max_timeslice = gv11b_runlist_max_timeslice, + .get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg, }; static const struct gops_userd vgpu_gv11b_ops_userd = { diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h b/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h index 37edf14ce..f3c7ab169 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h @@ -68,6 +68,16 @@ struct gops_runlist { int (*reload)(struct gk20a *g, struct nvgpu_runlist *rl, bool add, bool wait_for_finish); + /** + * @brief Get maximum number of channels supported per TSG entry + * in runlist. + * + * @param none. + * + * @return maximum number of channels supported per TSG in runlist. + */ + u32 (*get_max_channels_per_tsg)(void); + /** @cond DOXYGEN_SHOULD_SKIP_THIS */ int (*update)(struct gk20a *g, struct nvgpu_runlist *rl, diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/ga100/hw_ram_ga100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/ga100/hw_ram_ga100.h index 62f2cde5c..956be1334 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga100/hw_ram_ga100.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga100/hw_ram_ga100.h @@ -169,6 +169,7 @@ #define ram_rl_entry_tsg_timeslice_scale_f(v) ((U32(v) & 0xfU) << 16U) #define ram_rl_entry_tsg_timeslice_timeout_f(v) ((U32(v) & 0xffU) << 24U) #define ram_rl_entry_tsg_length_f(v) ((U32(v) & 0xffU) << 0U) +#define ram_rl_entry_tsg_length_max_v() (0x00000080U) #define ram_rl_entry_tsg_tsgid_f(v) ((U32(v) & 0xfffU) << 0U) #define ram_rl_entry_chan_userd_ptr_align_shift_v() (0x00000008U) #define ram_rl_entry_chan_inst_ptr_align_shift_v() (0x0000000cU) diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ram_ga10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ram_ga10b.h index b558c0a95..0de328bbc 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ram_ga10b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ram_ga10b.h @@ -166,6 +166,7 @@ #define ram_rl_entry_tsg_timeslice_scale_f(v) ((U32(v) & 0xfU) << 16U) #define ram_rl_entry_tsg_timeslice_timeout_f(v) ((U32(v) & 0xffU) << 24U) #define ram_rl_entry_tsg_length_f(v) ((U32(v) & 0xffU) << 0U) +#define ram_rl_entry_tsg_length_max_v() (0x00000080U) #define ram_rl_entry_tsg_tsgid_f(v) ((U32(v) & 0xfffU) << 0U) #define ram_rl_entry_chan_userd_ptr_align_shift_v() (0x00000008U) #define ram_rl_entry_chan_inst_ptr_align_shift_v() (0x0000000cU) diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_ram_gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_ram_gk20a.h index 773a9f10b..6181d204c 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_ram_gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_ram_gk20a.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2012-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -157,4 +157,5 @@ #define ram_rl_entry_timeslice_timeout_v(r) (((r) >> 18U) & 0xffU) #define ram_rl_entry_timeslice_timeout_128_f() (0x2000000U) #define ram_rl_entry_tsg_length_f(v) ((U32(v) & 0x3fU) << 26U) +#define ram_rl_entry_tsg_length_max_v() (0x00000020U) #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_ram_gm20b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_ram_gm20b.h index c71ceeebe..b204cb305 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_ram_gm20b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_ram_gm20b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -160,4 +160,5 @@ #define ram_rl_entry_timeslice_timeout_v(r) (((r) >> 18U) & 0xffU) #define ram_rl_entry_timeslice_timeout_128_f() (0x2000000U) #define ram_rl_entry_tsg_length_f(v) ((U32(v) & 0x3fU) << 26U) +#define ram_rl_entry_tsg_length_max_v() (0x00000020U) #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_ram_gp10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_ram_gp10b.h index 2fd944025..c16bcc13d 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_ram_gp10b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_ram_gp10b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -175,4 +175,5 @@ #define ram_rl_entry_timeslice_timeout_v(r) (((r) >> 18U) & 0xffU) #define ram_rl_entry_timeslice_timeout_128_f() (0x2000000U) #define ram_rl_entry_tsg_length_f(v) ((U32(v) & 0x3fU) << 26U) +#define ram_rl_entry_tsg_length_max_v() (0x00000020U) #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h index ca003517d..f061ca44f 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h +++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h @@ -129,10 +129,20 @@ struct nvgpu_tsg { * #ch_list. */ struct nvgpu_rwsem ch_list_lock; + /** - * Total number of channels that are bound to a TSG. This can change - * during run time whenever channels are bound to a TSG or unbound - * from a TSG. + * Total number of channels that are bound to a TSG. This count is + * incremented when a channel is bound to TSG, and decremented when + * channel is unbound from TSG. + */ + u32 ch_count; + + /** + * Total number of active channels that are bound to a TSG. This count + * is incremented when a channel bound to TSG is added into the runlist + * under the same TSG header. Count is decremented when channel bound + * to TSG is removed from the runlist. This count is specifically + * tracked for runlist construction of TSG entry. */ u32 num_active_channels;