From be507aea5020b0524b75a89b11618c54347c5411 Mon Sep 17 00:00:00 2001
From: dt
Date: Fri, 30 Apr 2021 06:13:50 +0000
Subject: [PATCH] gpu: nvgpu: MIG mode selection at runtime
This is adding code to select MIG mode and boot
the GPU with selected mig config.
For testing MIG, after system boots
1. write mig_mode_config by
echo x > /sys/devices/gpu.0/mig_mode_config for igpu
echo x > /sys/devices/./platform/14100000.pcie/pci0001:00/0001:00:00.0/0001:01:00.0/ for dgpu
2. Then run any nvgpu* tests or nvrm_gpu_info.
If the mig_mode need to be changed , note down the supported
configs by "cat mig_mode_config_list" and reboot the system
3. Follow steps 1 and 2.
example output:
"cat mig_mode_config" 2
"cat mig_mode_config_list"
+++++++++ Config list Start ++++++++++
CONFIG_ID : 0 for CONFIG NAME : 2 GPU instances each with 4 GPCs
CONFIG_ID : 1 for CONFIG NAME : 4 GPU instances each with 2 GPCs
CONFIG_ID : 2 for CONFIG NAME : 7 GPU instances - 1 GPU instance with 2
GPCs + 6 GPU instances each with 1 GPC
CONFIG_ID : 3 for CONFIG NAME : 5 GPU instances - 1 GPU instance with 4
GPCs + 4 GPU instances each with 1 GPC
CONFIG_ID : 4 for CONFIG NAME : 4 GPU instances - 1 GPU instance with 2
GPCs + 2 GPU instances each with 1 GPC + 1 GPU instance with 4 GPCs
CONFIG_ID : 5 for CONFIG NAME : 6 GPU instances - 2 GPU instances each
with 2 GPCs + 4 GPU instances each with 1 GPC
CONFIG_ID : 6 for CONFIG NAME : 5 GPU instances - 1 GPU instance with
2 GPCs + 2 GPU instances each with 1 GPC + 2 GPU instances with 2 GPCs
CONFIG_ID : 7 for CONFIG NAME : 5 GPU instances - 2 GPU instances each
with 2 GPCs + 1 GPC instance with 2 GPCs + 2 GPU instances with 1 GPC
CONFIG_ID : 8 for CONFIG NAME : 5 GPU instances - 1 GPC instance with 2
GPCs + 2 GPU instances each with 1 GPC + 2 GPU instances each with 2
GPCs
CONFIG_ID : 9 for CONFIG NAME : 1 GPU instance with 8 GPCs
++++++++++ Config list End +++++++++++
JIRA NVGPU-6633
Change-Id: I3e56f8c836e1ced8753a60f328da63916faa7696
Signed-off-by: dt
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2522821
Tested-by: mobile promotions
Reviewed-by: mobile promotions
---
drivers/gpu/nvgpu/common/grmgr/grmgr.c | 1 -
drivers/gpu/nvgpu/include/nvgpu/mig.h | 5 --
drivers/gpu/nvgpu/os/linux/ioctl.c | 107 +++++++------------------
drivers/gpu/nvgpu/os/linux/sysfs.c | 13 ++-
4 files changed, 37 insertions(+), 89 deletions(-)
diff --git a/drivers/gpu/nvgpu/common/grmgr/grmgr.c b/drivers/gpu/nvgpu/common/grmgr/grmgr.c
index a69e06020..70f6603f7 100644
--- a/drivers/gpu/nvgpu/common/grmgr/grmgr.c
+++ b/drivers/gpu/nvgpu/common/grmgr/grmgr.c
@@ -42,7 +42,6 @@ int nvgpu_init_gr_manager(struct gk20a *g)
g->mig.gpc_count = g->ops.priv_ring.get_gpc_count(g);
nvgpu_assert(g->mig.gpc_count > 0U);
g->mig.num_gpu_instances = 1U;
- g->mig.current_gpu_instance_config_id = 0U;
g->mig.is_nongr_engine_sharable = false;
gpu_instance->gpu_instance_id = 0U;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/mig.h b/drivers/gpu/nvgpu/include/nvgpu/mig.h
index d62dd10e1..6c560a9e7 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/mig.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/mig.h
@@ -221,11 +221,6 @@ struct nvgpu_mig {
struct nvgpu_mutex gr_syspipe_lock;
/** Gpu instance configuration id. */
u32 current_gpu_instance_config_id;
- /**
- * current mig_instance_config.
- */
- const struct nvgpu_mig_gpu_instance_config
- *current_mig_gpu_instance_config;
/**
* Flag to indicate whether nonGR(CE) engine is sharable
* between gr syspipes or not.
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl.c b/drivers/gpu/nvgpu/os/linux/ioctl.c
index 8e73dd9c0..e72944c91 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl.c
@@ -231,24 +231,6 @@ static char *nvgpu_pci_devnode_v2(struct device *dev, umode_t *mode)
dev_name(dev));
}
-static char *nvgpu_mig_phys_devnode(struct device *dev, umode_t *mode)
-{
- struct nvgpu_cdev_class_priv_data *priv_data;
-
- if (mode) {
- *mode = S_IRUSR | S_IWUSR;
- }
-
- priv_data = dev_get_drvdata(dev);
-
- if (priv_data->pci) {
- return kasprintf(GFP_KERNEL, "nvgpu/dgpu-%s/%s",
- dev_name(dev->parent), dev_name(dev));
- }
-
- return kasprintf(GFP_KERNEL, "nvgpu/igpu0/%s", dev_name(dev));
-}
-
static char *nvgpu_mig_fgpu_devnode(struct device *dev, umode_t *mode)
{
struct nvgpu_cdev_class_priv_data *priv_data;
@@ -388,60 +370,19 @@ struct nvgpu_mig_static_info {
u32 minor_instance_id;
};
-static const struct nvgpu_mig_static_info nvgpu_default_mig_static_info[] =
-{
- {
- .instance_type = NVGPU_MIG_TYPE_PHYSICAL,
- },
- {
- .instance_type = NVGPU_MIG_TYPE_MIG,
- .major_instance_id = 0,
- .minor_instance_id = 0,
- },
- {
- .instance_type = NVGPU_MIG_TYPE_MIG,
- .major_instance_id = 0,
- .minor_instance_id = 1,
- },
-};
-
-static const struct nvgpu_mig_static_info nvgpu_default_pci_mig_static_info[] =
-{
- {
- .instance_type = NVGPU_MIG_TYPE_PHYSICAL,
- },
- {
- .instance_type = NVGPU_MIG_TYPE_MIG,
- .major_instance_id = 1,
- .minor_instance_id = 0,
- },
- {
- .instance_type = NVGPU_MIG_TYPE_MIG,
- .major_instance_id = 2,
- .minor_instance_id = 4,
- },
-};
-
static int nvgpu_prepare_mig_dev_node_class_list(struct gk20a *g, u32 *num_classes)
{
u32 class_count = 0U;
- const struct nvgpu_mig_static_info *info;
struct nvgpu_class *class;
u32 i;
u32 num_instances;
struct nvgpu_cdev_class_priv_data *priv_data;
- if (g->pci_class != 0U) {
- info = &nvgpu_default_pci_mig_static_info[0];
- num_instances = sizeof(nvgpu_default_pci_mig_static_info) /
- sizeof(nvgpu_default_pci_mig_static_info[0]);
- } else {
- info = &nvgpu_default_mig_static_info[0];
- num_instances = sizeof(nvgpu_default_mig_static_info) /
- sizeof(nvgpu_default_mig_static_info[0]);
- }
-
- for (i = 0U; i < num_instances; i++) {
+ num_instances = g->mig.num_gpu_instances;
+ /*
+ * TODO: i=0 need to be added after ctrl node fixup.
+ */
+ for (i = 1U; i < num_instances; i++) {
priv_data = nvgpu_kzalloc(g, sizeof(*priv_data));
if (priv_data == NULL) {
return -ENOMEM;
@@ -456,20 +397,16 @@ static int nvgpu_prepare_mig_dev_node_class_list(struct gk20a *g, u32 *num_class
kfree(priv_data);
return -ENOMEM;
}
+
class_count++;
+ class->class->devnode = nvgpu_mig_fgpu_devnode;
+ priv_data->major_instance_id = g->mig.gpu_instance[i].gpu_instance_id;
+ priv_data->minor_instance_id = g->mig.gpu_instance[i].gr_syspipe.gr_syspipe_id;
+ class->instance_type = NVGPU_MIG_TYPE_MIG;
- if (info[i].instance_type == NVGPU_MIG_TYPE_PHYSICAL) {
- class->class->devnode = nvgpu_mig_phys_devnode;
- } else {
- class->class->devnode = nvgpu_mig_fgpu_devnode;
- }
-
- priv_data->local_instance_id = i;
- priv_data->major_instance_id = info[i].major_instance_id;
- priv_data->minor_instance_id = info[i].minor_instance_id;
- priv_data->pci = (g->pci_class != 0U);
class->priv_data = priv_data;
- class->instance_type = info[i].instance_type;
+ priv_data->local_instance_id = i;
+ priv_data->pci = (g->pci_class != 0U);
}
*num_classes = class_count;
@@ -568,7 +505,7 @@ static int nvgpu_prepare_dev_node_class_list(struct gk20a *g, u32 *num_classes,
static bool check_valid_dev_node(struct gk20a *g, struct nvgpu_class *class,
const struct nvgpu_dev_node *node)
{
- if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
+ if (nvgpu_grmgr_is_multi_gr_enabled(g)) {
if ((class->instance_type == NVGPU_MIG_TYPE_PHYSICAL) &&
!node->mig_physical_node) {
return false;
@@ -584,7 +521,7 @@ static bool check_valid_class(struct gk20a *g, struct nvgpu_class *class)
return false;
}
- if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
+ if (nvgpu_grmgr_is_multi_gr_enabled(g)) {
if ((class->instance_type == NVGPU_MIG_TYPE_PHYSICAL)) {
return false;
}
@@ -681,7 +618,19 @@ int gk20a_user_init(struct device *dev)
}
num_cdevs = sizeof(dev_node_list) / sizeof(dev_node_list[0]);
- total_cdevs = num_cdevs * num_classes;
+ if (nvgpu_grmgr_is_multi_gr_enabled(g)) {
+ /**
+ * As mig physical node needs the ctrl node only.
+ * We need to add total_cdevs + 1 when we enable ctrl node.
+ */
+ total_cdevs = (num_cdevs - 1) * (num_classes - 1);
+ } else {
+ /*
+ * As the power node is already created, we need to
+ * reduced devs by by one.
+ */
+ total_cdevs = (num_cdevs - 1) * num_classes;
+ }
err = alloc_chrdev_region(&devno, 0, total_cdevs, dev_name(dev));
if (err) {
@@ -742,7 +691,7 @@ u32 nvgpu_get_gpu_instance_id_from_cdev(struct gk20a *g, struct nvgpu_cdev *cdev
{
struct nvgpu_cdev_class_priv_data *priv_data;
- if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
+ if (nvgpu_grmgr_is_multi_gr_enabled(g)) {
priv_data = dev_get_drvdata(cdev->node);
return priv_data->local_instance_id;
}
diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c
index 4157f10f9..2ff63bd2e 100644
--- a/drivers/gpu/nvgpu/os/linux/sysfs.c
+++ b/drivers/gpu/nvgpu/os/linux/sysfs.c
@@ -1086,15 +1086,20 @@ static ssize_t mig_mode_config_list_show(struct device *dev,
u32 num_config = 0;
struct gk20a *g = get_gk20a(dev);
const struct nvgpu_mig_gpu_instance_config *mig_gpu_instance_config;
- char *power_on_string = "MIG list will be displayed after gpu power"
+ const char *power_on_string = "MIG list will be displayed after gpu power"
" on with default MIG mode \n Boot with config id zero\n"
" Get the available configs \n"
" Change the init script and reboot";
+ const char *error_on_nullconfig = "MIG list can't be displayed";
- if (nvgpu_is_powered_on(g) &&
- (g->mig.current_mig_gpu_instance_config != NULL)) {
+ if (nvgpu_is_powered_on(g)) {
mig_gpu_instance_config =
- g->mig.current_mig_gpu_instance_config;
+ (g->ops.grmgr.get_mig_config_ptr != NULL) ?
+ g->ops.grmgr.get_mig_config_ptr(g) : NULL;
+ if (mig_gpu_instance_config == NULL) {
+ res += sprintf(&buf[res], "%s", error_on_nullconfig);
+ return res;
+ }
} else {
res += sprintf(&buf[res], "%s", power_on_string);
return res;