diff --git a/drivers/gpu/nvgpu/common/nvs/nvs_sched.c b/drivers/gpu/nvgpu/common/nvs/nvs_sched.c index 5515ce6e1..a54e6f785 100644 --- a/drivers/gpu/nvgpu/common/nvs/nvs_sched.c +++ b/drivers/gpu/nvgpu/common/nvs/nvs_sched.c @@ -344,6 +344,8 @@ nvgpu_nvs_domain_by_id_locked(struct gk20a *g, u64 domain_id) struct nvgpu_nvs_scheduler *sched = g->scheduler; struct nvs_domain *nvs_dom; + nvgpu_log(g, gpu_dbg_nvs, "lookup %llu", domain_id); + nvs_domain_for_each(sched->sched, nvs_dom) { struct nvgpu_nvs_domain *nvgpu_dom = nvs_dom->priv; @@ -360,6 +362,8 @@ nvgpu_nvs_domain_by_id(struct gk20a *g, u64 domain_id) { struct nvgpu_nvs_domain *dom = NULL; + nvgpu_log(g, gpu_dbg_nvs, "lookup %llu", domain_id); + nvgpu_mutex_acquire(&g->sched_mutex); dom = nvgpu_nvs_domain_by_id_locked(g, domain_id); @@ -381,6 +385,8 @@ nvgpu_nvs_domain_by_name(struct gk20a *g, const char *name) struct nvgpu_nvs_domain *dom = NULL; struct nvgpu_nvs_scheduler *sched = g->scheduler; + nvgpu_log(g, gpu_dbg_nvs, "lookup %s", name); + nvgpu_mutex_acquire(&g->sched_mutex); nvs_dom = nvs_domain_by_name(sched->sched, name); @@ -396,11 +402,23 @@ unlock: return dom; } +void nvgpu_nvs_domain_get(struct gk20a *g, struct nvgpu_nvs_domain *dom) +{ + nvgpu_mutex_acquire(&g->sched_mutex); + WARN_ON(dom->ref == 0U); + dom->ref++; + nvgpu_log(g, gpu_dbg_nvs, "domain %s: ref++ = %u", + dom->parent->name, dom->ref); + nvgpu_mutex_release(&g->sched_mutex); +} + void nvgpu_nvs_domain_put(struct gk20a *g, struct nvgpu_nvs_domain *dom) { nvgpu_mutex_acquire(&g->sched_mutex); dom->ref--; WARN_ON(dom->ref == 0U); + nvgpu_log(g, gpu_dbg_nvs, "domain %s: ref-- = %u", + dom->parent->name, dom->ref); nvgpu_mutex_release(&g->sched_mutex); } diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvs.h b/drivers/gpu/nvgpu/include/nvgpu/nvs.h index b141fe464..d44c9f106 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvs.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvs.h @@ -38,6 +38,7 @@ #define NVS_LOG_BUF_SIZE 128 struct gk20a; +struct nvgpu_nvs_domain_ioctl; /* * NvGPU KMD domain implementation details for nvsched. @@ -65,6 +66,11 @@ struct nvgpu_nvs_domain { * ioctl layer and a TSG putting a ref does not result in domain deletion. */ u32 ref; + + /* + * Userspace API on the device nodes. + */ + struct nvgpu_nvs_domain_ioctl *ioctl; }; struct nvgpu_nvs_worker { @@ -95,6 +101,7 @@ struct nvgpu_nvs_domain * nvgpu_nvs_domain_by_id(struct gk20a *g, u64 domain_id); struct nvgpu_nvs_domain * nvgpu_nvs_domain_by_name(struct gk20a *g, const char *name); +void nvgpu_nvs_domain_get(struct gk20a *g, struct nvgpu_nvs_domain *dom); void nvgpu_nvs_domain_put(struct gk20a *g, struct nvgpu_nvs_domain *dom); /* * Debug wrapper for NVS code. diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_nvs.c b/drivers/gpu/nvgpu/os/linux/ioctl_nvs.c index 67ce5f1c2..e8bf28ba1 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_nvs.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_nvs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -20,12 +21,64 @@ #include #include #include +#include "os_linux.h" #include #include #include "ioctl.h" +/* + * OS-specific layer to hold device node mapping for a domain. + */ +struct nvgpu_nvs_domain_ioctl { + struct gk20a *g; + struct nvgpu_nvs_domain *domain; + struct cdev *cdev; + struct device *dev; + struct nvgpu_class *class; + struct list_head list; /* entry in cdev_lookup_list */ +}; + +/* + * This lock serializes domain removal and opening of domain device nodes. + */ +static DEFINE_MUTEX(cdev_lookup_mutex); +/* + * A list of struct nvgpu_nvs_domain_ioctl objects. + */ +static LIST_HEAD(cdev_lookup_list); + +/* + * Priv data for an open domain device file. + * + * While a domain device is open, it holds a ref to the domain. + */ +struct nvgpu_nvs_domain_file_private { + struct gk20a *g; + struct nvgpu_nvs_domain *domain; +}; + +static struct nvgpu_nvs_domain_ioctl *nvgpu_nvs_lookup_cdev(dev_t dev) +{ + struct nvgpu_nvs_domain_ioctl *ioctl, *ret = NULL; + + mutex_lock(&cdev_lookup_mutex); + + list_for_each_entry(ioctl, &cdev_lookup_list, list) { + if (ioctl->cdev->dev == dev) { + /* put back in nvgpu_nvs_domain_dev_release */ + nvgpu_nvs_domain_get(ioctl->g, ioctl->domain); + ret = ioctl; + goto out; + } + } + +out: + mutex_unlock(&cdev_lookup_mutex); + return ret; +} + int nvgpu_nvs_dev_open(struct inode *inode, struct file *filp) { struct nvgpu_cdev *cdev; @@ -48,6 +101,190 @@ int nvgpu_nvs_dev_release(struct inode *inode, struct file *filp) return 0; } +static int nvgpu_nvs_domain_dev_do_open(struct gk20a *g, + struct nvgpu_nvs_domain *domain, + struct file *filp) +{ + struct nvgpu_nvs_domain_file_private *priv; + int err; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_nvs, "opening domain %s", + domain->parent->name); + + g = nvgpu_get(g); + if (!g) + return -ENODEV; + + priv = nvgpu_kzalloc(g, sizeof(*priv)); + if (!priv) { + err = -ENOMEM; + goto put_ref; + } + + priv->g = g; + priv->domain = domain; + filp->private_data = priv; + + return 0; + +put_ref: + nvgpu_put(g); + return err; +} + +static int nvgpu_nvs_domain_dev_open(struct inode *inode, struct file *filp) +{ + struct nvgpu_nvs_domain_ioctl *ioctl; + struct cdev *cdev = inode->i_cdev; + struct nvgpu_nvs_domain *domain; + struct gk20a *g; + int err; + + ioctl = nvgpu_nvs_lookup_cdev(cdev->dev); + if (ioctl == NULL) { + return -ENXIO; + } + + g = ioctl->g; + domain = ioctl->domain; + + err = nvgpu_nvs_domain_dev_do_open(g, domain, filp); + if (err) { + nvgpu_nvs_domain_put(g, domain); + } + + return err; + +} + +static int nvgpu_nvs_domain_dev_release(struct inode *inode, struct file *filp) +{ + struct nvgpu_nvs_domain_file_private *priv = filp->private_data; + struct nvgpu_nvs_domain *domain; + struct gk20a *g; + + if (!priv) + return 0; + + g = priv->g; + domain = priv->domain; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_nvs, "releasing domain %s", + domain->parent->name); + + /* this was taken when the file was opened */ + nvgpu_nvs_domain_put(g, domain); + + nvgpu_kfree(g, priv); + nvgpu_put(g); + filp->private_data = NULL; + + return 0; +} + +static const struct file_operations nvgpu_nvs_domain_ops = { + .owner = THIS_MODULE, + .open = nvgpu_nvs_domain_dev_open, + .release = nvgpu_nvs_domain_dev_release, +}; + +struct nvgpu_nvs_domain *nvgpu_nvs_domain_get_from_file(int fd) +{ + struct nvgpu_nvs_domain_file_private *priv; + struct nvgpu_nvs_domain *domain; + struct file *f = fget(fd); + + if (!f) + return NULL; + + if (f->f_op != &nvgpu_nvs_domain_ops) { + fput(f); + return NULL; + } + + priv = (struct nvgpu_nvs_domain_file_private *)f->private_data; + domain = priv->domain; + + nvgpu_log(priv->g, gpu_dbg_fn | gpu_dbg_nvs, "domain %s", + domain->parent->name); + nvgpu_nvs_domain_get(priv->g, domain); + fput(f); + + return domain; +} + +static int create_domain_dev(struct gk20a *g, + struct nvgpu_nvs_domain *domain) +{ + struct device *dev = dev_from_gk20a(g); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct nvs_domain *nvs_domain = domain->parent; + struct nvgpu_nvs_domain_ioctl *ioctl = domain->ioctl; + char name[sizeof("nvsched-") + ARRAY_SIZE(nvs_domain->name)]; + struct nvgpu_class *class; + dev_t devno; + unsigned int minor; + int err; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_nvs, " "); + + class = nvgpu_get_v2_user_class(g); + if (class == NULL) { + /* MIG? */ + dev_err(dev, "unsupported GPU for scheduling"); + return -ENOSYS; + } + + minor = nvgpu_allocate_cdev_minor(g); + devno = MKDEV(MAJOR(l->cdev_region), minor); + err = register_chrdev_region(devno, 1, dev_name(dev)); + if (err) { + dev_err(dev, "failed to allocate devno"); + return err; + } + + sprintf(name, "nvsched-%s", nvs_domain->name); + + ioctl->g = g; + ioctl->domain = domain; + INIT_LIST_HEAD(&ioctl->list); + ioctl->cdev = cdev_alloc(); + ioctl->cdev->ops = &nvgpu_nvs_domain_ops; + ioctl->class = class; + err = nvgpu_create_device(dev, devno, name, + ioctl->cdev, &ioctl->dev, class); + if (err) { + unregister_chrdev_region(devno, 1); + return err; + } + + list_add_tail(&ioctl->list, &cdev_lookup_list); + + return 0; +} + +static void delete_domain_dev(struct gk20a *g, + struct nvgpu_nvs_domain *domain) +{ + struct nvgpu_nvs_domain_ioctl *ioctl = domain->ioctl; + dev_t dev = ioctl->cdev->dev; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_nvs, " "); + /* + * note: we're under the lookup lock, so no new open would succeed after this. + * + * nvgpu_nvs_domain_dev_open() might be waiting for the lock now. Open + * cdevs remain accessible even after cdev deletion, but we won't get + * here until all successfully opened devices have been closed because + * they hold domain refs. + */ + list_del(&ioctl->list); + + device_destroy(nvgpu_class_get_class(ioctl->class), dev); + cdev_del(ioctl->cdev); + unregister_chrdev_region(dev, 1); +} + static int nvgpu_nvs_ioctl_create_domain( struct gk20a *g, struct nvgpu_nvs_ioctl_create_domain *dom_args) @@ -55,6 +292,8 @@ static int nvgpu_nvs_ioctl_create_domain( struct nvgpu_nvs_domain *domain = NULL; int err; + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_nvs, " "); + if (dom_args->reserved1 != 0) { return -EINVAL; } @@ -88,12 +327,36 @@ static int nvgpu_nvs_ioctl_create_domain( dom_args->domain_params.dom_id = domain->id; + domain->ioctl = nvgpu_kzalloc(g, sizeof(*domain->ioctl)); + if (domain->ioctl == NULL) { + err = -ENOMEM; + goto del_domain; + } + + mutex_lock(&cdev_lookup_mutex); + err = create_domain_dev(g, domain); + mutex_unlock(&cdev_lookup_mutex); + if (err != 0) { + goto free_ioctl; + } + return 0; +free_ioctl: + nvgpu_kfree(g, domain->ioctl); +del_domain: + nvgpu_nvs_del_domain(g, domain->id); + return err; } static int nvgpu_nvs_ioctl_remove_domain(struct gk20a *g, struct nvgpu_nvs_ioctl_remove_domain *args) { + struct nvgpu_nvs_domain_ioctl *ioctl; + struct nvgpu_nvs_domain *domain; + int ret; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_nvs, " "); + if (args->reserved1 != 0) { return -EINVAL; } @@ -102,10 +365,31 @@ static int nvgpu_nvs_ioctl_remove_domain(struct gk20a *g, return -ENOSYS; } - return nvgpu_nvs_del_domain(g, args->dom_id); + domain = nvgpu_nvs_domain_by_id(g, args->dom_id); + if (domain == NULL) { + nvgpu_err(g, "domain %llu does not exist!", args->dom_id); + return -ENOENT; + } + + ioctl = domain->ioctl; + + mutex_lock(&cdev_lookup_mutex); + + nvgpu_nvs_domain_put(g, domain); + ret = nvgpu_nvs_del_domain(g, args->dom_id); + + /* note: the internal default domain lacks ->ioctl */ + if (ret == 0 && ioctl != NULL) { + delete_domain_dev(g, domain); + nvgpu_kfree(g, ioctl); + } + + mutex_unlock(&cdev_lookup_mutex); + + return ret; } -static int nvgpu_nvs_ioctl_query_domains( +static int nvgpu_nvs_ioctl_query_domains_locked( struct gk20a *g, void __user *user_arg, struct nvgpu_nvs_ioctl_query_domains *args) @@ -117,6 +401,8 @@ static int nvgpu_nvs_ioctl_query_domains( struct nvgpu_nvs_ioctl_domain *args_domains = (void __user *)(uintptr_t)args->domains; + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_nvs, " "); + if (args->reserved0 != 0) { return -EINVAL; } @@ -130,7 +416,7 @@ static int nvgpu_nvs_ioctl_query_domains( } /* First call variant: return number of domains. */ - args->nr = nvgpu_nvs_domain_count(g); + args->nr = nvs_domain_count(g->scheduler->sched); if (copy_to_user(user_arg, args, sizeof(*args))) { return -EFAULT; } @@ -149,8 +435,8 @@ static int nvgpu_nvs_ioctl_query_domains( nvgpu_dom = nvs_dom->priv; - nvs_dbg(g, "Copying dom #%u [%s] (%llu)", - index, nvs_dom->name, nvgpu_dom->id); + nvs_dbg(g, "Copying dom #%u [%s] (%llu) (%u refs)", + index, nvs_dom->name, nvgpu_dom->id, nvgpu_dom->ref); (void)memset(&dom, 0, sizeof(dom)); @@ -173,6 +459,19 @@ static int nvgpu_nvs_ioctl_query_domains( return 0; } +static int nvgpu_nvs_ioctl_query_domains( + struct gk20a *g, + void __user *user_arg, + struct nvgpu_nvs_ioctl_query_domains *args) +{ + int err; + + nvgpu_mutex_acquire(&g->sched_mutex); + err = nvgpu_nvs_ioctl_query_domains_locked(g, user_arg, args); + nvgpu_mutex_release(&g->sched_mutex); + return err; +} + long nvgpu_nvs_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { u8 buf[NVGPU_NVS_IOCTL_MAX_ARG_SIZE] = { 0 }; diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_nvs.h b/drivers/gpu/nvgpu/os/linux/ioctl_nvs.h index 9b21f5e0d..818fd9f15 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_nvs.h +++ b/drivers/gpu/nvgpu/os/linux/ioctl_nvs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -24,5 +24,6 @@ int nvgpu_nvs_dev_release(struct inode *inode, struct file *filp); long nvgpu_nvs_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); ssize_t nvgpu_nvs_dev_read(struct file *filp, char __user *buf, size_t size, loff_t *off); +struct nvgpu_nvs_domain *nvgpu_nvs_domain_get_from_file(int fd); #endif diff --git a/include/uapi/linux/nvgpu-nvs.h b/include/uapi/linux/nvgpu-nvs.h index 7f76ce999..75449bc4a 100644 --- a/include/uapi/linux/nvgpu-nvs.h +++ b/include/uapi/linux/nvgpu-nvs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -87,6 +87,14 @@ struct nvgpu_nvs_ioctl_create_domain { __u64 reserved1; }; +/** + * NVGPU_NVS_IOCTL_REMOVE_DOMAIN + * + * Remove a domain that has been previously created. + * + * The domain must be empty; it must have no TSGs bound to it. The domain's + * device node must not be open by anyone. + */ struct nvgpu_nvs_ioctl_remove_domain { /* * In: a domain_id to remove.