gpu: nvgpu: disallow binding more channels than MAX channels supported per TSG

There is HW specific limit on number of channel entries that can be
added for each TSG entry in runlist. Right now there is no checking
to enforce this from SW and hence if User binds more than supported
channels to same TSG, invalid TSG formation error interrupts are
generated.

Fix this by adding appropriate checks in below steps :

- Add new field ch_count to struct nvgpu_tsg to keep track of
  channels bound to TSG.
- Define new hal gops.runlist.get_max_channels_per_tsg() to retrieve
  HW specific maximum channel count per TSG.
- Implement the HAL for gk20a and gv11b chips, and assign new HALs for
  all chips appropriately.
- Increment ch_count while binding the channel to TSG and decrement it
  while unbinding.
- While binding channel to TSG, Check if current channel count is
  already equal to max channel count. If yes, print an error and bail
  out.

Bug 200763991

Change-Id: Ic5f17a52e0fb171d1c020bf4f085f57cdb95f923
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2582095
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Deepak Nibade
2021-08-24 13:40:13 +05:30
committed by mobile promotions
parent 608decf1e6
commit 3c97f3b932
20 changed files with 65 additions and 6 deletions

View File

@@ -96,6 +96,7 @@ static bool nvgpu_tsg_is_channel_active(struct gk20a *g,
int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch)
{
struct gk20a *g = ch->g;
u32 max_ch_per_tsg;
int err = 0;
nvgpu_log_fn(g, "bind tsg:%u ch:%u\n", tsg->tsgid, ch->chid);
@@ -110,6 +111,17 @@ int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch)
return -EINVAL;
}
/* cannot bind more channels than MAX channels supported per TSG */
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
max_ch_per_tsg = g->ops.runlist.get_max_channels_per_tsg();
if (tsg->ch_count == max_ch_per_tsg) {
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
nvgpu_warn(g, "TSG %u trying to bind more than supported channels (%u)",
tsg->tsgid, max_ch_per_tsg);
return -EINVAL;
}
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
/* Use runqueue selector 1 for all ASYNC ids */
if (ch->subctx_id > CHANNEL_INFO_VEID0) {
ch->runqueue_sel = 1;
@@ -141,6 +153,7 @@ int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch)
nvgpu_rwsem_down_write(&tsg->ch_list_lock);
nvgpu_list_add_tail(&ch->ch_entry, &tsg->ch_list);
tsg->ch_count = nvgpu_safe_add_u32(tsg->ch_count, 1U);
ch->tsgid = tsg->tsgid;
/* channel is serviceable after it is bound to tsg */
ch->unserviceable = false;
@@ -234,6 +247,7 @@ static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg,
/* Remove channel from TSG and re-enable rest of the channels */
nvgpu_rwsem_down_write(&tsg->ch_list_lock);
nvgpu_list_del(&ch->ch_entry);
tsg->ch_count = nvgpu_safe_sub_u32(tsg->ch_count, 1U);
ch->tsgid = NVGPU_INVALID_TSG_ID;
/* another thread could have re-enabled the channel because it was
@@ -807,6 +821,7 @@ int nvgpu_tsg_open_common(struct gk20a *g, struct nvgpu_tsg *tsg, pid_t pid)
tsg->tgid = pid;
tsg->g = g;
tsg->num_active_channels = 0U;
tsg->ch_count = 0U;
nvgpu_ref_init(&tsg->refcount);
tsg->vm = NULL;

View File

@@ -81,3 +81,8 @@ void gk20a_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist)
runlist[0] = ram_rl_entry_chid_f(ch->chid);
runlist[1] = 0;
}
u32 gk20a_runlist_get_max_channels_per_tsg(void)
{
return ram_rl_entry_tsg_length_max_v();
}

View File

@@ -34,5 +34,6 @@ u32 gk20a_runlist_max_timeslice(void);
void gk20a_runlist_get_tsg_entry(struct nvgpu_tsg *tsg,
u32 *runlist, u32 timeslice);
void gk20a_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist);
u32 gk20a_runlist_get_max_channels_per_tsg(void);
#endif /* NVGPU_RUNLIST_RAM_GK20A_H */

View File

@@ -33,5 +33,6 @@ u32 gv11b_runlist_max_timeslice(void);
void gv11b_runlist_get_tsg_entry(struct nvgpu_tsg *tsg,
u32 *runlist, u32 timeslice);
void gv11b_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist);
u32 gv11b_runlist_get_max_channels_per_tsg(void);
#endif /* NVGPU_RUNLIST_RAM_GV11B_H */

View File

@@ -118,3 +118,7 @@ void gv11b_runlist_get_ch_entry(struct nvgpu_channel *ch, u32 *runlist)
runlist[0], runlist[1], runlist[2], runlist[3]);
}
u32 gv11b_runlist_get_max_channels_per_tsg(void)
{
return ram_rl_entry_tsg_length_max_v();
}

View File

@@ -1127,6 +1127,7 @@ static const struct gops_runlist ga100_ops_runlist = {
.init_enginfo = nvgpu_runlist_init_enginfo,
.get_tsg_max_timeslice = gv11b_runlist_max_timeslice,
.get_esched_fb_thread_id = ga10b_runlist_get_esched_fb_thread_id,
.get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg,
};
static const struct gops_userd ga100_ops_userd = {

View File

@@ -1117,6 +1117,7 @@ static const struct gops_runlist ga10b_ops_runlist = {
.init_enginfo = nvgpu_runlist_init_enginfo,
.get_tsg_max_timeslice = gv11b_runlist_max_timeslice,
.get_esched_fb_thread_id = ga10b_runlist_get_esched_fb_thread_id,
.get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg,
};
static const struct gops_userd ga10b_ops_userd = {

View File

@@ -698,6 +698,7 @@ static const struct gops_runlist gm20b_ops_runlist = {
.write_state = gk20a_runlist_write_state,
.init_enginfo = nvgpu_runlist_init_enginfo,
.get_tsg_max_timeslice = gk20a_runlist_max_timeslice,
.get_max_channels_per_tsg = gk20a_runlist_get_max_channels_per_tsg,
};
static const struct gops_userd gm20b_ops_userd = {

View File

@@ -788,6 +788,7 @@ static const struct gops_runlist gp10b_ops_runlist = {
.write_state = gk20a_runlist_write_state,
.init_enginfo = nvgpu_runlist_init_enginfo,
.get_tsg_max_timeslice = gk20a_runlist_max_timeslice,
.get_max_channels_per_tsg = gk20a_runlist_get_max_channels_per_tsg,
};
static const struct gops_userd gp10b_ops_userd = {

View File

@@ -965,6 +965,7 @@ static const struct gops_runlist gv11b_ops_runlist = {
.write_state = gk20a_runlist_write_state,
.init_enginfo = nvgpu_runlist_init_enginfo,
.get_tsg_max_timeslice = gv11b_runlist_max_timeslice,
.get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg,
};
static const struct gops_userd gv11b_ops_userd = {

View File

@@ -1019,6 +1019,7 @@ static const struct gops_runlist tu104_ops_runlist = {
.write_state = gk20a_runlist_write_state,
.init_enginfo = nvgpu_runlist_init_enginfo,
.get_tsg_max_timeslice = gv11b_runlist_max_timeslice,
.get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg,
};
static const struct gops_userd tu104_ops_userd = {

View File

@@ -700,6 +700,7 @@ static const struct gops_runlist vgpu_ga10b_ops_runlist = {
.hw_submit = NULL,
.wait_pending = NULL,
.init_enginfo = nvgpu_runlist_init_enginfo,
.get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg,
};
static const struct gops_userd vgpu_ga10b_ops_userd = {

View File

@@ -668,6 +668,7 @@ static const struct gops_runlist vgpu_gv11b_ops_runlist = {
.wait_pending = NULL,
.init_enginfo = nvgpu_runlist_init_enginfo,
.get_tsg_max_timeslice = gv11b_runlist_max_timeslice,
.get_max_channels_per_tsg = gv11b_runlist_get_max_channels_per_tsg,
};
static const struct gops_userd vgpu_gv11b_ops_userd = {

View File

@@ -68,6 +68,16 @@ struct gops_runlist {
int (*reload)(struct gk20a *g, struct nvgpu_runlist *rl,
bool add, bool wait_for_finish);
/**
* @brief Get maximum number of channels supported per TSG entry
* in runlist.
*
* @param none.
*
* @return maximum number of channels supported per TSG in runlist.
*/
u32 (*get_max_channels_per_tsg)(void);
/** @cond DOXYGEN_SHOULD_SKIP_THIS */
int (*update)(struct gk20a *g, struct nvgpu_runlist *rl,

View File

@@ -169,6 +169,7 @@
#define ram_rl_entry_tsg_timeslice_scale_f(v) ((U32(v) & 0xfU) << 16U)
#define ram_rl_entry_tsg_timeslice_timeout_f(v) ((U32(v) & 0xffU) << 24U)
#define ram_rl_entry_tsg_length_f(v) ((U32(v) & 0xffU) << 0U)
#define ram_rl_entry_tsg_length_max_v() (0x00000080U)
#define ram_rl_entry_tsg_tsgid_f(v) ((U32(v) & 0xfffU) << 0U)
#define ram_rl_entry_chan_userd_ptr_align_shift_v() (0x00000008U)
#define ram_rl_entry_chan_inst_ptr_align_shift_v() (0x0000000cU)

View File

@@ -166,6 +166,7 @@
#define ram_rl_entry_tsg_timeslice_scale_f(v) ((U32(v) & 0xfU) << 16U)
#define ram_rl_entry_tsg_timeslice_timeout_f(v) ((U32(v) & 0xffU) << 24U)
#define ram_rl_entry_tsg_length_f(v) ((U32(v) & 0xffU) << 0U)
#define ram_rl_entry_tsg_length_max_v() (0x00000080U)
#define ram_rl_entry_tsg_tsgid_f(v) ((U32(v) & 0xfffU) << 0U)
#define ram_rl_entry_chan_userd_ptr_align_shift_v() (0x00000008U)
#define ram_rl_entry_chan_inst_ptr_align_shift_v() (0x0000000cU)

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2012-2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2012-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -157,4 +157,5 @@
#define ram_rl_entry_timeslice_timeout_v(r) (((r) >> 18U) & 0xffU)
#define ram_rl_entry_timeslice_timeout_128_f() (0x2000000U)
#define ram_rl_entry_tsg_length_f(v) ((U32(v) & 0x3fU) << 26U)
#define ram_rl_entry_tsg_length_max_v() (0x00000020U)
#endif

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -160,4 +160,5 @@
#define ram_rl_entry_timeslice_timeout_v(r) (((r) >> 18U) & 0xffU)
#define ram_rl_entry_timeslice_timeout_128_f() (0x2000000U)
#define ram_rl_entry_tsg_length_f(v) ((U32(v) & 0x3fU) << 26U)
#define ram_rl_entry_tsg_length_max_v() (0x00000020U)
#endif

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -175,4 +175,5 @@
#define ram_rl_entry_timeslice_timeout_v(r) (((r) >> 18U) & 0xffU)
#define ram_rl_entry_timeslice_timeout_128_f() (0x2000000U)
#define ram_rl_entry_tsg_length_f(v) ((U32(v) & 0x3fU) << 26U)
#define ram_rl_entry_tsg_length_max_v() (0x00000020U)
#endif

View File

@@ -129,10 +129,20 @@ struct nvgpu_tsg {
* #ch_list.
*/
struct nvgpu_rwsem ch_list_lock;
/**
* Total number of channels that are bound to a TSG. This can change
* during run time whenever channels are bound to a TSG or unbound
* from a TSG.
* Total number of channels that are bound to a TSG. This count is
* incremented when a channel is bound to TSG, and decremented when
* channel is unbound from TSG.
*/
u32 ch_count;
/**
* Total number of active channels that are bound to a TSG. This count
* is incremented when a channel bound to TSG is added into the runlist
* under the same TSG header. Count is decremented when channel bound
* to TSG is removed from the runlist. This count is specifically
* tracked for runlist construction of TSG entry.
*/
u32 num_active_channels;