gpu: nvgpu: fix arbiter teardown on PCI

The driver is not properly tearing down the arbiter on the PCI driver
unload. This change makes sure that the workqueues are drained before
tearing down the driver

bug 200277762
JIRA: EVLR-1023

Change-Id: If98fd00e27949ba1569dd26e2af02b75897231a7
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1320147
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
(cherry picked from commit 469308beca)
Reviewed-on: http://git-master/r/1324636
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Sumeet Gupta <sumeetg@nvidia.com>
This commit is contained in:
David Nieto
2017-02-13 11:22:59 -08:00
committed by mobile promotions
parent ce057d784d
commit 26f904e2b7
3 changed files with 62 additions and 25 deletions

View File

@@ -402,7 +402,8 @@ void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm)
struct nvgpu_clk_arb *arb = g->clk_arb;
nvgpu_clk_arb_set_global_alarm(g, alarm);
queue_work(arb->update_work_queue, &arb->update_fn_work);
if (arb->update_work_queue)
queue_work(arb->update_work_queue, &arb->update_fn_work);
}
static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
@@ -454,7 +455,30 @@ static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm)
void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
{
struct nvgpu_clk_arb *arb = g->clk_arb;
int index;
if (arb) {
cancel_work_sync(&arb->vf_table_fn_work);
destroy_workqueue(arb->vf_table_work_queue);
arb->vf_table_work_queue = NULL;
cancel_work_sync(&arb->update_fn_work);
destroy_workqueue(arb->update_work_queue);
arb->update_work_queue = NULL;
kfree(arb->gpc2clk_f_points);
kfree(arb->mclk_f_points);
for (index = 0; index < 2; index++) {
kfree(arb->vf_table_pool[index].gpc2clk_points);
kfree(arb->vf_table_pool[index].mclk_points);
}
}
nvgpu_mutex_destroy(&g->clk_arb->pstate_lock);
kfree(g->clk_arb);
g->clk_arb = NULL;
}
static int nvgpu_clk_arb_install_fd(struct gk20a *g,
@@ -573,9 +597,11 @@ static void nvgpu_clk_arb_free_session(struct kref *refcount)
gk20a_dbg_fn("");
nvgpu_spinlock_acquire(&arb->sessions_lock);
list_del_rcu(&session->link);
nvgpu_spinlock_release(&arb->sessions_lock);
if (arb) {
nvgpu_spinlock_acquire(&arb->sessions_lock);
list_del_rcu(&session->link);
nvgpu_spinlock_release(&arb->sessions_lock);
}
head = llist_del_all(&session->targets);
llist_for_each_entry_safe(dev, tmp, head, node) {
@@ -594,8 +620,8 @@ void nvgpu_clk_arb_release_session(struct gk20a *g,
session->zombie = true;
kref_put(&session->refcount, nvgpu_clk_arb_free_session);
queue_work(arb->update_work_queue, &arb->update_fn_work);
if (arb && arb->update_work_queue)
queue_work(arb->update_work_queue, &arb->update_fn_work);
}
int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
@@ -962,8 +988,8 @@ exit_vf_table:
if (status < 0)
nvgpu_clk_arb_set_global_alarm(g,
EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
queue_work(arb->update_work_queue, &arb->update_fn_work);
if (arb->update_work_queue)
queue_work(arb->update_work_queue, &arb->update_fn_work);
return status;
}
@@ -971,8 +997,8 @@ exit_vf_table:
void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
{
struct nvgpu_clk_arb *arb = g->clk_arb;
queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work);
if (arb->vf_table_work_queue)
queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work);
}
static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
@@ -989,8 +1015,9 @@ static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
"failed to cache VF table");
nvgpu_clk_arb_set_global_alarm(g,
EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
queue_work(arb->update_work_queue, &arb->update_fn_work);
if (arb->update_work_queue)
queue_work(arb->update_work_queue,
&arb->update_fn_work);
return;
}
@@ -1488,8 +1515,8 @@ int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
}
kref_get(&dev->refcount);
llist_add(&dev->node, &session->targets);
queue_work(arb->update_work_queue, &arb->update_fn_work);
if (arb->update_work_queue)
queue_work(arb->update_work_queue, &arb->update_fn_work);
fdput_fd:
fdput(fd);
@@ -1566,15 +1593,12 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
{
struct nvgpu_clk_dev *dev = filp->private_data;
struct nvgpu_clk_session *session = dev->session;
struct nvgpu_clk_arb *arb;
arb = session->g->clk_arb;
gk20a_dbg_fn("");
kref_put(&session->refcount, nvgpu_clk_arb_free_session);
kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
return 0;
}
@@ -1589,15 +1613,17 @@ static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
gk20a_dbg_fn("");
nvgpu_spinlock_acquire(&arb->users_lock);
list_del_rcu(&dev->link);
nvgpu_spinlock_release(&arb->users_lock);
if (arb) {
nvgpu_spinlock_acquire(&arb->users_lock);
list_del_rcu(&dev->link);
nvgpu_spinlock_release(&arb->users_lock);
}
synchronize_rcu();
kref_put(&session->refcount, nvgpu_clk_arb_free_session);
nvgpu_clk_notification_queue_free(&dev->queue);
kfree(dev);
kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
return 0;
}

View File

@@ -39,6 +39,7 @@
struct gk20a_ctrl_priv {
struct device *dev;
struct gk20a *g;
#ifdef CONFIG_ARCH_TEGRA_18x_SOC
struct nvgpu_clk_session *clk_session;
#endif
@@ -58,28 +59,33 @@ int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
priv = kzalloc(sizeof(struct gk20a_ctrl_priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
filp->private_data = priv;
priv->dev = g->dev;
/*
* We dont close the arbiter fd's after driver teardown to support
* GPU_LOST events, so we store g here, instead of dereferencing the
* dev structure on teardown
*/
priv->g = g;
if (!g->gr.sw_ready) {
err = gk20a_busy(g->dev);
if (err)
return err;
gk20a_idle(g->dev);
}
#ifdef CONFIG_ARCH_TEGRA_18x_SOC
err = nvgpu_clk_arb_init_session(g, &priv->clk_session);
if (err)
return err;
#endif
return err;
}
int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp)
{
struct gk20a_ctrl_priv *priv = filp->private_data;
gk20a_dbg_fn("");
#ifdef CONFIG_ARCH_TEGRA_18x_SOC
@@ -87,6 +93,7 @@ int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp)
nvgpu_clk_arb_release_session(gk20a_from_dev(priv->dev),
priv->clk_session);
#endif
kfree(priv);
return 0;

View File

@@ -460,6 +460,10 @@ static void nvgpu_pci_remove(struct pci_dev *pdev)
gk20a_wait_for_idle(&pdev->dev);
gk20a_dbg(gpu_dbg_shutdown, "Driver idle.\n");
#ifdef CONFIG_ARCH_TEGRA_18x_SOC
nvgpu_clk_arb_cleanup_arbiter(g);
#endif
gk20a_user_deinit(g->dev, &nvgpu_pci_class);
gk20a_dbg(gpu_dbg_shutdown, "User de-init done.\b");