gpu: nvgpu: hold ch ref when getting ch from fd

Fix a race condition in gk20a_get_channel_from_file() that returns a
channel pointer from an fd: take a reference to the channel before
putting the file ref back. Now the caller is responsible of releasing
the channel reference eventually.

Also document why dbg_session_channel_data has to hold a ref to the
channel file instead of just the channel: that might deadlock if the fds
were closed in "wrong" order.

Change-Id: I8e91b809f5f7b1cb0c1487bd955ad6d643727a53
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1549290
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Konsta Holtta
2017-08-31 13:01:26 +03:00
committed by mobile promotions
parent de0ce3e017
commit 17451138cf
6 changed files with 58 additions and 16 deletions

View File

@@ -42,14 +42,19 @@ static int gk20a_as_ioctl_bind_channel(
gk20a_dbg_fn("");
ch = gk20a_get_channel_from_file(args->channel_fd);
if (!ch || gk20a_channel_as_bound(ch))
if (!ch)
return -EINVAL;
if (gk20a_channel_as_bound(ch)) {
err = -EINVAL;
goto out;
}
/* this will set channel_gk20a->vm */
err = ch->g->ops.mm.vm_bind_channel(as_share, ch);
if (err)
return err;
out:
gk20a_channel_put(ch);
return err;
}

View File

@@ -260,8 +260,15 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch,
return 0;
}
/*
* This returns the channel with a reference. The caller must
* gk20a_channel_put() the ref back after use.
*
* NULL is returned if the channel was not found.
*/
struct channel_gk20a *gk20a_get_channel_from_file(int fd)
{
struct channel_gk20a *ch;
struct channel_priv *priv;
struct file *f = fget(fd);
@@ -274,8 +281,9 @@ struct channel_gk20a *gk20a_get_channel_from_file(int fd)
}
priv = (struct channel_priv *)f->private_data;
ch = gk20a_channel_get(priv->c);
fput(f);
return priv->c;
return ch;
}
int gk20a_channel_release(struct inode *inode, struct file *filp)

View File

@@ -354,6 +354,8 @@ static int nvgpu_gpu_ioctl_inval_icache(
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
err = g->ops.gr.inval_icache(g, ch);
nvgpu_mutex_release(&g->dbg_sessions_lock);
gk20a_channel_put(ch);
return err;
}
@@ -393,6 +395,7 @@ static int nvgpu_gpu_ioctl_set_debug_mode(
err = -ENOSYS;
nvgpu_mutex_release(&g->dbg_sessions_lock);
gk20a_channel_put(ch);
return err;
}

View File

@@ -49,6 +49,8 @@ static int gk20a_tsg_bind_channel_fd(struct tsg_gk20a *tsg, int ch_fd)
return -EINVAL;
err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch);
gk20a_channel_put(ch);
return err;
}

View File

@@ -514,7 +514,8 @@ static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
if (!channel_found) {
gk20a_dbg_fn("channel not bounded, fd=%d\n", args->channel_fd);
return -EINVAL;
err = -EINVAL;
goto out;
}
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
@@ -523,6 +524,8 @@ static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
nvgpu_mutex_release(&dbg_s->ch_list_lock);
nvgpu_mutex_release(&g->dbg_sessions_lock);
out:
gk20a_channel_put(ch);
return err;
}
@@ -584,13 +587,16 @@ static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
struct channel_gk20a *ch;
struct dbg_session_channel_data *ch_data;
struct dbg_session_data *session_data;
int err = 0;
gk20a_dbg(gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d",
g->name, args->channel_fd);
/* even though get_file_channel is doing this it releases it as well */
/* by holding it here we'll keep it from disappearing while the
* debugger is in session */
/*
* Although gk20a_get_channel_from_file gives us a channel ref, need to
* hold a ref to the file during the session lifetime. See comment in
* struct dbg_session_channel_data.
*/
f = fget(args->channel_fd);
if (!f)
return -ENODEV;
@@ -598,8 +604,8 @@ static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
ch = gk20a_get_channel_from_file(args->channel_fd);
if (!ch) {
gk20a_dbg_fn("no channel found for fd");
fput(f);
return -EINVAL;
err = -EINVAL;
goto out_fput;
}
gk20a_dbg_fn("%s hwchid=%d", g->name, ch->chid);
@@ -609,8 +615,8 @@ static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
ch_data = nvgpu_kzalloc(g, sizeof(*ch_data));
if (!ch_data) {
fput(f);
return -ENOMEM;
err = -ENOMEM;
goto out_chput;
}
ch_data->ch_f = f;
ch_data->channel_fd = args->channel_fd;
@@ -619,9 +625,8 @@ static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
session_data = nvgpu_kzalloc(g, sizeof(*session_data));
if (!session_data) {
nvgpu_kfree(g, ch_data);
fput(f);
return -ENOMEM;
err = -ENOMEM;
goto out_kfree;
}
session_data->dbg_s = dbg_s;
nvgpu_init_list_node(&session_data->dbg_s_entry);
@@ -636,7 +641,19 @@ static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
nvgpu_mutex_release(&ch->dbg_s_lock);
nvgpu_mutex_release(&g->dbg_sessions_lock);
gk20a_channel_put(ch);
return 0;
out_kfree:
nvgpu_kfree(g, ch_data);
out_chput:
gk20a_channel_put(ch);
nvgpu_mutex_release(&ch->dbg_s_lock);
nvgpu_mutex_release(&g->dbg_sessions_lock);
out_fput:
fput(f);
return err;
}
static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,

View File

@@ -93,6 +93,13 @@ dbg_session_data_from_dbg_s_entry(struct nvgpu_list_node *node)
};
struct dbg_session_channel_data {
/*
* We have to keep a ref to the _file_, not the channel, because
* close(channel_fd) is synchronous and would deadlock if we had an
* open debug session fd holding a channel ref at that time. Holding a
* ref to the file makes close(channel_fd) just drop a kernel ref to
* the file; the channel will close when the last file ref is dropped.
*/
struct file *ch_f;
int channel_fd;
int chid;