From be507aea5020b0524b75a89b11618c54347c5411 Mon Sep 17 00:00:00 2001 From: dt Date: Fri, 30 Apr 2021 06:13:50 +0000 Subject: [PATCH] gpu: nvgpu: MIG mode selection at runtime This is adding code to select MIG mode and boot the GPU with selected mig config. For testing MIG, after system boots 1. write mig_mode_config by echo x > /sys/devices/gpu.0/mig_mode_config for igpu echo x > /sys/devices/./platform/14100000.pcie/pci0001:00/0001:00:00.0/0001:01:00.0/ for dgpu 2. Then run any nvgpu* tests or nvrm_gpu_info. If the mig_mode need to be changed , note down the supported configs by "cat mig_mode_config_list" and reboot the system 3. Follow steps 1 and 2. example output: "cat mig_mode_config" 2 "cat mig_mode_config_list" +++++++++ Config list Start ++++++++++ CONFIG_ID : 0 for CONFIG NAME : 2 GPU instances each with 4 GPCs CONFIG_ID : 1 for CONFIG NAME : 4 GPU instances each with 2 GPCs CONFIG_ID : 2 for CONFIG NAME : 7 GPU instances - 1 GPU instance with 2 GPCs + 6 GPU instances each with 1 GPC CONFIG_ID : 3 for CONFIG NAME : 5 GPU instances - 1 GPU instance with 4 GPCs + 4 GPU instances each with 1 GPC CONFIG_ID : 4 for CONFIG NAME : 4 GPU instances - 1 GPU instance with 2 GPCs + 2 GPU instances each with 1 GPC + 1 GPU instance with 4 GPCs CONFIG_ID : 5 for CONFIG NAME : 6 GPU instances - 2 GPU instances each with 2 GPCs + 4 GPU instances each with 1 GPC CONFIG_ID : 6 for CONFIG NAME : 5 GPU instances - 1 GPU instance with 2 GPCs + 2 GPU instances each with 1 GPC + 2 GPU instances with 2 GPCs CONFIG_ID : 7 for CONFIG NAME : 5 GPU instances - 2 GPU instances each with 2 GPCs + 1 GPC instance with 2 GPCs + 2 GPU instances with 1 GPC CONFIG_ID : 8 for CONFIG NAME : 5 GPU instances - 1 GPC instance with 2 GPCs + 2 GPU instances each with 1 GPC + 2 GPU instances each with 2 GPCs CONFIG_ID : 9 for CONFIG NAME : 1 GPU instance with 8 GPCs ++++++++++ Config list End +++++++++++ JIRA NVGPU-6633 Change-Id: I3e56f8c836e1ced8753a60f328da63916faa7696 Signed-off-by: dt Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2522821 Tested-by: mobile promotions Reviewed-by: mobile promotions --- drivers/gpu/nvgpu/common/grmgr/grmgr.c | 1 - drivers/gpu/nvgpu/include/nvgpu/mig.h | 5 -- drivers/gpu/nvgpu/os/linux/ioctl.c | 107 +++++++------------------ drivers/gpu/nvgpu/os/linux/sysfs.c | 13 ++- 4 files changed, 37 insertions(+), 89 deletions(-) diff --git a/drivers/gpu/nvgpu/common/grmgr/grmgr.c b/drivers/gpu/nvgpu/common/grmgr/grmgr.c index a69e06020..70f6603f7 100644 --- a/drivers/gpu/nvgpu/common/grmgr/grmgr.c +++ b/drivers/gpu/nvgpu/common/grmgr/grmgr.c @@ -42,7 +42,6 @@ int nvgpu_init_gr_manager(struct gk20a *g) g->mig.gpc_count = g->ops.priv_ring.get_gpc_count(g); nvgpu_assert(g->mig.gpc_count > 0U); g->mig.num_gpu_instances = 1U; - g->mig.current_gpu_instance_config_id = 0U; g->mig.is_nongr_engine_sharable = false; gpu_instance->gpu_instance_id = 0U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/mig.h b/drivers/gpu/nvgpu/include/nvgpu/mig.h index d62dd10e1..6c560a9e7 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/mig.h +++ b/drivers/gpu/nvgpu/include/nvgpu/mig.h @@ -221,11 +221,6 @@ struct nvgpu_mig { struct nvgpu_mutex gr_syspipe_lock; /** Gpu instance configuration id. */ u32 current_gpu_instance_config_id; - /** - * current mig_instance_config. - */ - const struct nvgpu_mig_gpu_instance_config - *current_mig_gpu_instance_config; /** * Flag to indicate whether nonGR(CE) engine is sharable * between gr syspipes or not. diff --git a/drivers/gpu/nvgpu/os/linux/ioctl.c b/drivers/gpu/nvgpu/os/linux/ioctl.c index 8e73dd9c0..e72944c91 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl.c @@ -231,24 +231,6 @@ static char *nvgpu_pci_devnode_v2(struct device *dev, umode_t *mode) dev_name(dev)); } -static char *nvgpu_mig_phys_devnode(struct device *dev, umode_t *mode) -{ - struct nvgpu_cdev_class_priv_data *priv_data; - - if (mode) { - *mode = S_IRUSR | S_IWUSR; - } - - priv_data = dev_get_drvdata(dev); - - if (priv_data->pci) { - return kasprintf(GFP_KERNEL, "nvgpu/dgpu-%s/%s", - dev_name(dev->parent), dev_name(dev)); - } - - return kasprintf(GFP_KERNEL, "nvgpu/igpu0/%s", dev_name(dev)); -} - static char *nvgpu_mig_fgpu_devnode(struct device *dev, umode_t *mode) { struct nvgpu_cdev_class_priv_data *priv_data; @@ -388,60 +370,19 @@ struct nvgpu_mig_static_info { u32 minor_instance_id; }; -static const struct nvgpu_mig_static_info nvgpu_default_mig_static_info[] = -{ - { - .instance_type = NVGPU_MIG_TYPE_PHYSICAL, - }, - { - .instance_type = NVGPU_MIG_TYPE_MIG, - .major_instance_id = 0, - .minor_instance_id = 0, - }, - { - .instance_type = NVGPU_MIG_TYPE_MIG, - .major_instance_id = 0, - .minor_instance_id = 1, - }, -}; - -static const struct nvgpu_mig_static_info nvgpu_default_pci_mig_static_info[] = -{ - { - .instance_type = NVGPU_MIG_TYPE_PHYSICAL, - }, - { - .instance_type = NVGPU_MIG_TYPE_MIG, - .major_instance_id = 1, - .minor_instance_id = 0, - }, - { - .instance_type = NVGPU_MIG_TYPE_MIG, - .major_instance_id = 2, - .minor_instance_id = 4, - }, -}; - static int nvgpu_prepare_mig_dev_node_class_list(struct gk20a *g, u32 *num_classes) { u32 class_count = 0U; - const struct nvgpu_mig_static_info *info; struct nvgpu_class *class; u32 i; u32 num_instances; struct nvgpu_cdev_class_priv_data *priv_data; - if (g->pci_class != 0U) { - info = &nvgpu_default_pci_mig_static_info[0]; - num_instances = sizeof(nvgpu_default_pci_mig_static_info) / - sizeof(nvgpu_default_pci_mig_static_info[0]); - } else { - info = &nvgpu_default_mig_static_info[0]; - num_instances = sizeof(nvgpu_default_mig_static_info) / - sizeof(nvgpu_default_mig_static_info[0]); - } - - for (i = 0U; i < num_instances; i++) { + num_instances = g->mig.num_gpu_instances; + /* + * TODO: i=0 need to be added after ctrl node fixup. + */ + for (i = 1U; i < num_instances; i++) { priv_data = nvgpu_kzalloc(g, sizeof(*priv_data)); if (priv_data == NULL) { return -ENOMEM; @@ -456,20 +397,16 @@ static int nvgpu_prepare_mig_dev_node_class_list(struct gk20a *g, u32 *num_class kfree(priv_data); return -ENOMEM; } + class_count++; + class->class->devnode = nvgpu_mig_fgpu_devnode; + priv_data->major_instance_id = g->mig.gpu_instance[i].gpu_instance_id; + priv_data->minor_instance_id = g->mig.gpu_instance[i].gr_syspipe.gr_syspipe_id; + class->instance_type = NVGPU_MIG_TYPE_MIG; - if (info[i].instance_type == NVGPU_MIG_TYPE_PHYSICAL) { - class->class->devnode = nvgpu_mig_phys_devnode; - } else { - class->class->devnode = nvgpu_mig_fgpu_devnode; - } - - priv_data->local_instance_id = i; - priv_data->major_instance_id = info[i].major_instance_id; - priv_data->minor_instance_id = info[i].minor_instance_id; - priv_data->pci = (g->pci_class != 0U); class->priv_data = priv_data; - class->instance_type = info[i].instance_type; + priv_data->local_instance_id = i; + priv_data->pci = (g->pci_class != 0U); } *num_classes = class_count; @@ -568,7 +505,7 @@ static int nvgpu_prepare_dev_node_class_list(struct gk20a *g, u32 *num_classes, static bool check_valid_dev_node(struct gk20a *g, struct nvgpu_class *class, const struct nvgpu_dev_node *node) { - if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + if (nvgpu_grmgr_is_multi_gr_enabled(g)) { if ((class->instance_type == NVGPU_MIG_TYPE_PHYSICAL) && !node->mig_physical_node) { return false; @@ -584,7 +521,7 @@ static bool check_valid_class(struct gk20a *g, struct nvgpu_class *class) return false; } - if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + if (nvgpu_grmgr_is_multi_gr_enabled(g)) { if ((class->instance_type == NVGPU_MIG_TYPE_PHYSICAL)) { return false; } @@ -681,7 +618,19 @@ int gk20a_user_init(struct device *dev) } num_cdevs = sizeof(dev_node_list) / sizeof(dev_node_list[0]); - total_cdevs = num_cdevs * num_classes; + if (nvgpu_grmgr_is_multi_gr_enabled(g)) { + /** + * As mig physical node needs the ctrl node only. + * We need to add total_cdevs + 1 when we enable ctrl node. + */ + total_cdevs = (num_cdevs - 1) * (num_classes - 1); + } else { + /* + * As the power node is already created, we need to + * reduced devs by by one. + */ + total_cdevs = (num_cdevs - 1) * num_classes; + } err = alloc_chrdev_region(&devno, 0, total_cdevs, dev_name(dev)); if (err) { @@ -742,7 +691,7 @@ u32 nvgpu_get_gpu_instance_id_from_cdev(struct gk20a *g, struct nvgpu_cdev *cdev { struct nvgpu_cdev_class_priv_data *priv_data; - if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + if (nvgpu_grmgr_is_multi_gr_enabled(g)) { priv_data = dev_get_drvdata(cdev->node); return priv_data->local_instance_id; } diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c index 4157f10f9..2ff63bd2e 100644 --- a/drivers/gpu/nvgpu/os/linux/sysfs.c +++ b/drivers/gpu/nvgpu/os/linux/sysfs.c @@ -1086,15 +1086,20 @@ static ssize_t mig_mode_config_list_show(struct device *dev, u32 num_config = 0; struct gk20a *g = get_gk20a(dev); const struct nvgpu_mig_gpu_instance_config *mig_gpu_instance_config; - char *power_on_string = "MIG list will be displayed after gpu power" + const char *power_on_string = "MIG list will be displayed after gpu power" " on with default MIG mode \n Boot with config id zero\n" " Get the available configs \n" " Change the init script and reboot"; + const char *error_on_nullconfig = "MIG list can't be displayed"; - if (nvgpu_is_powered_on(g) && - (g->mig.current_mig_gpu_instance_config != NULL)) { + if (nvgpu_is_powered_on(g)) { mig_gpu_instance_config = - g->mig.current_mig_gpu_instance_config; + (g->ops.grmgr.get_mig_config_ptr != NULL) ? + g->ops.grmgr.get_mig_config_ptr(g) : NULL; + if (mig_gpu_instance_config == NULL) { + res += sprintf(&buf[res], "%s", error_on_nullconfig); + return res; + } } else { res += sprintf(&buf[res], "%s", power_on_string); return res;