From 3c97f3b932f573f7a3fa4c5bad43852d4990feed Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Tue, 24 Aug 2021 13:40:13 +0530 Subject: [PATCH] gpu: nvgpu: disallow binding more channels than MAX channels supported per TSG There is HW specific limit on number of channel entries that can be added for each TSG entry in runlist. Right now there is no checking to enforce this from SW and hence if User binds more than supported channels to same TSG, invalid TSG formation error interrupts are generated. Fix this by adding appropriate checks in below steps : - Add new field ch_count to struct nvgpu_tsg to keep track of channels bound to TSG. - Define new hal gops.runlist.get_max_channels_per_tsg() to retrieve HW specific maximum channel count per TSG. - Implement the HAL for gk20a and gv11b chips, and assign new HALs for all chips appropriately. - Increment ch_count while binding the channel to TSG and decrement it while unbinding. - While binding channel to TSG, Check if current channel count is already equal to max channel count. If yes, print an error and bail out. Bug 200763991 Change-Id: Ic5f17a52e0fb171d1c020bf4f085f57cdb95f923 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2582095 Tested-by: mobile promotions Reviewed-by: Konsta Holtta Reviewed-by: svc_kernel_abi Reviewed-by: mobile promotions GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/common/fifo/tsg.c | 15 +++++++++++++++ drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.c | 5 +++++ drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.h | 1 + drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b.h | 1 + .../gpu/nvgpu/hal/fifo/runlist_ram_gv11b_fusa.c | 4 ++++ drivers/gpu/nvgpu/hal/init/hal_ga100.c | 1 + drivers/gpu/nvgpu/hal/init/hal_ga10b.c | 1 + drivers/gpu/nvgpu/hal/init/hal_gm20b.c | 1 + drivers/gpu/nvgpu/hal/init/hal_gp10b.c | 1 + drivers/gpu/nvgpu/hal/init/hal_gv11b.c | 1 + drivers/gpu/nvgpu/hal/init/hal_tu104.c | 1 + drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c | 1 + drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c | 1 + drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h | 10 ++++++++++ .../nvgpu/include/nvgpu/hw/ga100/hw_ram_ga100.h | 1 + .../nvgpu/include/nvgpu/hw/ga10b/hw_ram_ga10b.h | 1 + .../nvgpu/include/nvgpu/hw/gk20a/hw_ram_gk20a.h | 3 ++- .../nvgpu/include/nvgpu/hw/gm20b/hw_ram_gm20b.h | 3 ++- .../nvgpu/include/nvgpu/hw/gp10b/hw_ram_gp10b.h | 3 ++- drivers/gpu/nvgpu/include/nvgpu/tsg.h | 16 +++++++++++++--- 20 files changed, 65 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index e49ccae17..ddbec58cd 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -96,6 +96,7 @@ static bool nvgpu_tsg_is_channel_active(struct gk20a *g, int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) { struct gk20a *g = ch->g; + u32 max_ch_per_tsg; int err = 0; nvgpu_log_fn(g, "bind tsg:%u ch:%u\n", tsg->tsgid, ch->chid); @@ -110,6 +111,17 @@ int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) return -EINVAL; } + /* cannot bind more channels than MAX channels supported per TSG */ + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + max_ch_per_tsg = g->ops.runlist.get_max_channels_per_tsg(); + if (tsg->ch_count == max_ch_per_tsg) { + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + nvgpu_warn(g, "TSG %u trying to bind more than supported channels (%u)", + tsg->tsgid, max_ch_per_tsg); + return -EINVAL; + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + /* Use runqueue selector 1 for all ASYNC ids */ if (ch->subctx_id > CHANNEL_INFO_VEID0) { ch->runqueue_sel = 1; @@ -141,6 +153,7 @@ int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) nvgpu_rwsem_down_write(&tsg->ch_list_lock); nvgpu_list_add_tail(&ch->ch_entry, &tsg->ch_list); + tsg->ch_count = nvgpu_safe_add_u32(tsg->ch_count, 1U); ch->tsgid = tsg->tsgid; /* channel is serviceable after it is bound to tsg */ ch->unserviceable = false; @@ -234,6 +247,7 @@ static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg, /* Remove channel from TSG and re-enable rest of the channels */ nvgpu_rwsem_down_write(&tsg->ch_list_lock); nvgpu_list_del(&ch->ch_entry); + tsg->ch_count = nvgpu_safe_sub_u32(tsg->ch_count, 1U); ch->tsgid = NVGPU_INVALID_TSG_ID; /* another thread could have re-enabled the channel because it was @@ -807,6 +821,7 @@ int nvgpu_tsg_open_common(struct gk20a *g, struct nvgpu_tsg *tsg, pid_t pid) tsg->tgid = pid; tsg->g = g; tsg->num_active_channels = 0U; + tsg->ch_count = 0U; nvgpu_ref_init(&tsg->refcount); tsg->vm = NULL; diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.c b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.c index 18b6784ab..a2f6e2fc4 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.c +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.c @@ -81,3 +81,8 @@ void gk20a_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist) runlist[0] = ram_rl_entry_chid_f(ch->chid); runlist[1] = 0; } + +u32 gk20a_runlist_get_max_channels_per_tsg(void) +{ + return ram_rl_entry_tsg_length_max_v(); +} diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.h b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.h index fc31c4a18..6bb978d9d 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.h +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gk20a.h @@ -34,5 +34,6 @@ u32 gk20a_runlist_max_timeslice(void); void gk20a_runlist_get_tsg_entry(struct nvgpu_tsg *tsg, u32 *runlist, u32 timeslice); void gk20a_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist); +u32 gk20a_runlist_get_max_channels_per_tsg(void); #endif /* NVGPU_RUNLIST_RAM_GK20A_H */ diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b.h b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b.h index 881fedaa0..7d953096c 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b.h +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b.h @@ -33,5 +33,6 @@ u32 gv11b_runlist_max_timeslice(void); void gv11b_runlist_get_tsg_entry(struct nvgpu_tsg *tsg, u32 *runlist, u32 timeslice); void gv11b_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist); +u32 gv11b_runlist_get_max_channels_per_tsg(void); #endif /* NVGPU_RUNLIST_RAM_GV11B_H */ diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b_fusa.c index 1873011f2..307856464 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_ram_gv11b_fusa.c @@ -118,3 +118,7 @@ void gv11b_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist) runlist[0], runlist[1], runlist[2], runlist[3]); } +u32 gv11b_runlist_get_max_channels_per_tsg(void) +{ + return ram_rl_entry_tsg_length_max_v(); +} diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga100.c b/drivers/gpu/nvgpu/hal/init/hal_ga100.c index b520cf981..ccb3189ce 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_ga100.c +++ b/drivers/gpu/nvgpu/hal/init/hal_ga100.c @@ -1127,6 +1127,7 @@ static const struct gops_runlist ga100_ops_runlist = { .init_enginfo = nvgpu_runlist_init_enginfo, .get_tsg_max_timeslice = gv11b_runlist_max_timeslice, .get_esched_fb_thread_id = ga10b_runlist_get_esched_fb_thread_id, + .get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg, }; static const struct gops_userd ga100_ops_userd = { diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c index eaa281207..b288c3278 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c @@ -1117,6 +1117,7 @@ static const struct gops_runlist ga10b_ops_runlist = { .init_enginfo = nvgpu_runlist_init_enginfo, .get_tsg_max_timeslice = gv11b_runlist_max_timeslice, .get_esched_fb_thread_id = ga10b_runlist_get_esched_fb_thread_id, + .get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg, }; static const struct gops_userd ga10b_ops_userd = { diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c index e4dd47d7d..6e23061a6 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c @@ -698,6 +698,7 @@ static const struct gops_runlist gm20b_ops_runlist = { .write_state = gk20a_runlist_write_state, .init_enginfo = nvgpu_runlist_init_enginfo, .get_tsg_max_timeslice = gk20a_runlist_max_timeslice, + .get_max_channels_per_tsg = gk20a_runlist_get_max_channels_per_tsg, }; static const struct gops_userd gm20b_ops_userd = { diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c index eca3dbf52..febb6ea46 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c @@ -788,6 +788,7 @@ static const struct gops_runlist gp10b_ops_runlist = { .write_state = gk20a_runlist_write_state, .init_enginfo = nvgpu_runlist_init_enginfo, .get_tsg_max_timeslice = gk20a_runlist_max_timeslice, + .get_max_channels_per_tsg = gk20a_runlist_get_max_channels_per_tsg, }; static const struct gops_userd gp10b_ops_userd = { diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c index 212fe2498..c8e106a0a 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c @@ -965,6 +965,7 @@ static const struct gops_runlist gv11b_ops_runlist = { .write_state = gk20a_runlist_write_state, .init_enginfo = nvgpu_runlist_init_enginfo, .get_tsg_max_timeslice = gv11b_runlist_max_timeslice, + .get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg, }; static const struct gops_userd gv11b_ops_userd = { diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index f433402d8..ed5924362 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -1019,6 +1019,7 @@ static const struct gops_runlist tu104_ops_runlist = { .write_state = gk20a_runlist_write_state, .init_enginfo = nvgpu_runlist_init_enginfo, .get_tsg_max_timeslice = gv11b_runlist_max_timeslice, + .get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg, }; static const struct gops_userd tu104_ops_userd = { diff --git a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c index 1514a890a..82d8e55d2 100644 --- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c +++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c @@ -700,6 +700,7 @@ static const struct gops_runlist vgpu_ga10b_ops_runlist = { .hw_submit = NULL, .wait_pending = NULL, .init_enginfo = nvgpu_runlist_init_enginfo, + .get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg, }; static const struct gops_userd vgpu_ga10b_ops_userd = { diff --git a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c index 18900048e..695a57ef1 100644 --- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c @@ -668,6 +668,7 @@ static const struct gops_runlist vgpu_gv11b_ops_runlist = { .wait_pending = NULL, .init_enginfo = nvgpu_runlist_init_enginfo, .get_tsg_max_timeslice = gv11b_runlist_max_timeslice, + .get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg, }; static const struct gops_userd vgpu_gv11b_ops_userd = { diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h b/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h index 37edf14ce..f3c7ab169 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h @@ -68,6 +68,16 @@ struct gops_runlist { int (*reload)(struct gk20a *g, struct nvgpu_runlist *rl, bool add, bool wait_for_finish); + /** + * @brief Get maximum number of channels supported per TSG entry + * in runlist. + * + * @param none. + * + * @return maximum number of channels supported per TSG in runlist. + */ + u32 (*get_max_channels_per_tsg)(void); + /** @cond DOXYGEN_SHOULD_SKIP_THIS */ int (*update)(struct gk20a *g, struct nvgpu_runlist *rl, diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/ga100/hw_ram_ga100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/ga100/hw_ram_ga100.h index 62f2cde5c..956be1334 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga100/hw_ram_ga100.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga100/hw_ram_ga100.h @@ -169,6 +169,7 @@ #define ram_rl_entry_tsg_timeslice_scale_f(v) ((U32(v) & 0xfU) << 16U) #define ram_rl_entry_tsg_timeslice_timeout_f(v) ((U32(v) & 0xffU) << 24U) #define ram_rl_entry_tsg_length_f(v) ((U32(v) & 0xffU) << 0U) +#define ram_rl_entry_tsg_length_max_v() (0x00000080U) #define ram_rl_entry_tsg_tsgid_f(v) ((U32(v) & 0xfffU) << 0U) #define ram_rl_entry_chan_userd_ptr_align_shift_v() (0x00000008U) #define ram_rl_entry_chan_inst_ptr_align_shift_v() (0x0000000cU) diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ram_ga10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ram_ga10b.h index b558c0a95..0de328bbc 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ram_ga10b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ram_ga10b.h @@ -166,6 +166,7 @@ #define ram_rl_entry_tsg_timeslice_scale_f(v) ((U32(v) & 0xfU) << 16U) #define ram_rl_entry_tsg_timeslice_timeout_f(v) ((U32(v) & 0xffU) << 24U) #define ram_rl_entry_tsg_length_f(v) ((U32(v) & 0xffU) << 0U) +#define ram_rl_entry_tsg_length_max_v() (0x00000080U) #define ram_rl_entry_tsg_tsgid_f(v) ((U32(v) & 0xfffU) << 0U) #define ram_rl_entry_chan_userd_ptr_align_shift_v() (0x00000008U) #define ram_rl_entry_chan_inst_ptr_align_shift_v() (0x0000000cU) diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_ram_gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_ram_gk20a.h index 773a9f10b..6181d204c 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_ram_gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_ram_gk20a.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2012-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -157,4 +157,5 @@ #define ram_rl_entry_timeslice_timeout_v(r) (((r) >> 18U) & 0xffU) #define ram_rl_entry_timeslice_timeout_128_f() (0x2000000U) #define ram_rl_entry_tsg_length_f(v) ((U32(v) & 0x3fU) << 26U) +#define ram_rl_entry_tsg_length_max_v() (0x00000020U) #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_ram_gm20b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_ram_gm20b.h index c71ceeebe..b204cb305 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_ram_gm20b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_ram_gm20b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -160,4 +160,5 @@ #define ram_rl_entry_timeslice_timeout_v(r) (((r) >> 18U) & 0xffU) #define ram_rl_entry_timeslice_timeout_128_f() (0x2000000U) #define ram_rl_entry_tsg_length_f(v) ((U32(v) & 0x3fU) << 26U) +#define ram_rl_entry_tsg_length_max_v() (0x00000020U) #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_ram_gp10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_ram_gp10b.h index 2fd944025..c16bcc13d 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_ram_gp10b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_ram_gp10b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -175,4 +175,5 @@ #define ram_rl_entry_timeslice_timeout_v(r) (((r) >> 18U) & 0xffU) #define ram_rl_entry_timeslice_timeout_128_f() (0x2000000U) #define ram_rl_entry_tsg_length_f(v) ((U32(v) & 0x3fU) << 26U) +#define ram_rl_entry_tsg_length_max_v() (0x00000020U) #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h index ca003517d..f061ca44f 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h +++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h @@ -129,10 +129,20 @@ struct nvgpu_tsg { * #ch_list. */ struct nvgpu_rwsem ch_list_lock; + /** - * Total number of channels that are bound to a TSG. This can change - * during run time whenever channels are bound to a TSG or unbound - * from a TSG. + * Total number of channels that are bound to a TSG. This count is + * incremented when a channel is bound to TSG, and decremented when + * channel is unbound from TSG. + */ + u32 ch_count; + + /** + * Total number of active channels that are bound to a TSG. This count + * is incremented when a channel bound to TSG is added into the runlist + * under the same TSG header. Count is decremented when channel bound + * to TSG is removed from the runlist. This count is specifically + * tracked for runlist construction of TSG entry. */ u32 num_active_channels;