mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: MIG mode selection at runtime
This is adding code to select MIG mode and boot
the GPU with selected mig config.
For testing MIG, after system boots
1. write mig_mode_config by
echo x > /sys/devices/gpu.0/mig_mode_config for igpu
echo x > /sys/devices/./platform/14100000.pcie/pci0001:00/0001:00:00.0/0001:01:00.0/ for dgpu
2. Then run any nvgpu* tests or nvrm_gpu_info.
If the mig_mode need to be changed , note down the supported
configs by "cat mig_mode_config_list" and reboot the system
3. Follow steps 1 and 2.
example output:
"cat mig_mode_config" 2
"cat mig_mode_config_list"
+++++++++ Config list Start ++++++++++
CONFIG_ID : 0 for CONFIG NAME : 2 GPU instances each with 4 GPCs
CONFIG_ID : 1 for CONFIG NAME : 4 GPU instances each with 2 GPCs
CONFIG_ID : 2 for CONFIG NAME : 7 GPU instances - 1 GPU instance with 2
GPCs + 6 GPU instances each with 1 GPC
CONFIG_ID : 3 for CONFIG NAME : 5 GPU instances - 1 GPU instance with 4
GPCs + 4 GPU instances each with 1 GPC
CONFIG_ID : 4 for CONFIG NAME : 4 GPU instances - 1 GPU instance with 2
GPCs + 2 GPU instances each with 1 GPC + 1 GPU instance with 4 GPCs
CONFIG_ID : 5 for CONFIG NAME : 6 GPU instances - 2 GPU instances each
with 2 GPCs + 4 GPU instances each with 1 GPC
CONFIG_ID : 6 for CONFIG NAME : 5 GPU instances - 1 GPU instance with
2 GPCs + 2 GPU instances each with 1 GPC + 2 GPU instances with 2 GPCs
CONFIG_ID : 7 for CONFIG NAME : 5 GPU instances - 2 GPU instances each
with 2 GPCs + 1 GPC instance with 2 GPCs + 2 GPU instances with 1 GPC
CONFIG_ID : 8 for CONFIG NAME : 5 GPU instances - 1 GPC instance with 2
GPCs + 2 GPU instances each with 1 GPC + 2 GPU instances each with 2
GPCs
CONFIG_ID : 9 for CONFIG NAME : 1 GPU instance with 8 GPCs
++++++++++ Config list End +++++++++++
JIRA NVGPU-6633
Change-Id: I3e56f8c836e1ced8753a60f328da63916faa7696
Signed-off-by: dt <dt@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2522821
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
@@ -42,7 +42,6 @@ int nvgpu_init_gr_manager(struct gk20a *g)
|
|||||||
g->mig.gpc_count = g->ops.priv_ring.get_gpc_count(g);
|
g->mig.gpc_count = g->ops.priv_ring.get_gpc_count(g);
|
||||||
nvgpu_assert(g->mig.gpc_count > 0U);
|
nvgpu_assert(g->mig.gpc_count > 0U);
|
||||||
g->mig.num_gpu_instances = 1U;
|
g->mig.num_gpu_instances = 1U;
|
||||||
g->mig.current_gpu_instance_config_id = 0U;
|
|
||||||
g->mig.is_nongr_engine_sharable = false;
|
g->mig.is_nongr_engine_sharable = false;
|
||||||
|
|
||||||
gpu_instance->gpu_instance_id = 0U;
|
gpu_instance->gpu_instance_id = 0U;
|
||||||
|
|||||||
@@ -221,11 +221,6 @@ struct nvgpu_mig {
|
|||||||
struct nvgpu_mutex gr_syspipe_lock;
|
struct nvgpu_mutex gr_syspipe_lock;
|
||||||
/** Gpu instance configuration id. */
|
/** Gpu instance configuration id. */
|
||||||
u32 current_gpu_instance_config_id;
|
u32 current_gpu_instance_config_id;
|
||||||
/**
|
|
||||||
* current mig_instance_config.
|
|
||||||
*/
|
|
||||||
const struct nvgpu_mig_gpu_instance_config
|
|
||||||
*current_mig_gpu_instance_config;
|
|
||||||
/**
|
/**
|
||||||
* Flag to indicate whether nonGR(CE) engine is sharable
|
* Flag to indicate whether nonGR(CE) engine is sharable
|
||||||
* between gr syspipes or not.
|
* between gr syspipes or not.
|
||||||
|
|||||||
@@ -231,24 +231,6 @@ static char *nvgpu_pci_devnode_v2(struct device *dev, umode_t *mode)
|
|||||||
dev_name(dev));
|
dev_name(dev));
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *nvgpu_mig_phys_devnode(struct device *dev, umode_t *mode)
|
|
||||||
{
|
|
||||||
struct nvgpu_cdev_class_priv_data *priv_data;
|
|
||||||
|
|
||||||
if (mode) {
|
|
||||||
*mode = S_IRUSR | S_IWUSR;
|
|
||||||
}
|
|
||||||
|
|
||||||
priv_data = dev_get_drvdata(dev);
|
|
||||||
|
|
||||||
if (priv_data->pci) {
|
|
||||||
return kasprintf(GFP_KERNEL, "nvgpu/dgpu-%s/%s",
|
|
||||||
dev_name(dev->parent), dev_name(dev));
|
|
||||||
}
|
|
||||||
|
|
||||||
return kasprintf(GFP_KERNEL, "nvgpu/igpu0/%s", dev_name(dev));
|
|
||||||
}
|
|
||||||
|
|
||||||
static char *nvgpu_mig_fgpu_devnode(struct device *dev, umode_t *mode)
|
static char *nvgpu_mig_fgpu_devnode(struct device *dev, umode_t *mode)
|
||||||
{
|
{
|
||||||
struct nvgpu_cdev_class_priv_data *priv_data;
|
struct nvgpu_cdev_class_priv_data *priv_data;
|
||||||
@@ -388,60 +370,19 @@ struct nvgpu_mig_static_info {
|
|||||||
u32 minor_instance_id;
|
u32 minor_instance_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct nvgpu_mig_static_info nvgpu_default_mig_static_info[] =
|
|
||||||
{
|
|
||||||
{
|
|
||||||
.instance_type = NVGPU_MIG_TYPE_PHYSICAL,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.instance_type = NVGPU_MIG_TYPE_MIG,
|
|
||||||
.major_instance_id = 0,
|
|
||||||
.minor_instance_id = 0,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.instance_type = NVGPU_MIG_TYPE_MIG,
|
|
||||||
.major_instance_id = 0,
|
|
||||||
.minor_instance_id = 1,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
static const struct nvgpu_mig_static_info nvgpu_default_pci_mig_static_info[] =
|
|
||||||
{
|
|
||||||
{
|
|
||||||
.instance_type = NVGPU_MIG_TYPE_PHYSICAL,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.instance_type = NVGPU_MIG_TYPE_MIG,
|
|
||||||
.major_instance_id = 1,
|
|
||||||
.minor_instance_id = 0,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.instance_type = NVGPU_MIG_TYPE_MIG,
|
|
||||||
.major_instance_id = 2,
|
|
||||||
.minor_instance_id = 4,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
static int nvgpu_prepare_mig_dev_node_class_list(struct gk20a *g, u32 *num_classes)
|
static int nvgpu_prepare_mig_dev_node_class_list(struct gk20a *g, u32 *num_classes)
|
||||||
{
|
{
|
||||||
u32 class_count = 0U;
|
u32 class_count = 0U;
|
||||||
const struct nvgpu_mig_static_info *info;
|
|
||||||
struct nvgpu_class *class;
|
struct nvgpu_class *class;
|
||||||
u32 i;
|
u32 i;
|
||||||
u32 num_instances;
|
u32 num_instances;
|
||||||
struct nvgpu_cdev_class_priv_data *priv_data;
|
struct nvgpu_cdev_class_priv_data *priv_data;
|
||||||
|
|
||||||
if (g->pci_class != 0U) {
|
num_instances = g->mig.num_gpu_instances;
|
||||||
info = &nvgpu_default_pci_mig_static_info[0];
|
/*
|
||||||
num_instances = sizeof(nvgpu_default_pci_mig_static_info) /
|
* TODO: i=0 need to be added after ctrl node fixup.
|
||||||
sizeof(nvgpu_default_pci_mig_static_info[0]);
|
*/
|
||||||
} else {
|
for (i = 1U; i < num_instances; i++) {
|
||||||
info = &nvgpu_default_mig_static_info[0];
|
|
||||||
num_instances = sizeof(nvgpu_default_mig_static_info) /
|
|
||||||
sizeof(nvgpu_default_mig_static_info[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0U; i < num_instances; i++) {
|
|
||||||
priv_data = nvgpu_kzalloc(g, sizeof(*priv_data));
|
priv_data = nvgpu_kzalloc(g, sizeof(*priv_data));
|
||||||
if (priv_data == NULL) {
|
if (priv_data == NULL) {
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
@@ -456,20 +397,16 @@ static int nvgpu_prepare_mig_dev_node_class_list(struct gk20a *g, u32 *num_class
|
|||||||
kfree(priv_data);
|
kfree(priv_data);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
class_count++;
|
class_count++;
|
||||||
|
class->class->devnode = nvgpu_mig_fgpu_devnode;
|
||||||
|
priv_data->major_instance_id = g->mig.gpu_instance[i].gpu_instance_id;
|
||||||
|
priv_data->minor_instance_id = g->mig.gpu_instance[i].gr_syspipe.gr_syspipe_id;
|
||||||
|
class->instance_type = NVGPU_MIG_TYPE_MIG;
|
||||||
|
|
||||||
if (info[i].instance_type == NVGPU_MIG_TYPE_PHYSICAL) {
|
|
||||||
class->class->devnode = nvgpu_mig_phys_devnode;
|
|
||||||
} else {
|
|
||||||
class->class->devnode = nvgpu_mig_fgpu_devnode;
|
|
||||||
}
|
|
||||||
|
|
||||||
priv_data->local_instance_id = i;
|
|
||||||
priv_data->major_instance_id = info[i].major_instance_id;
|
|
||||||
priv_data->minor_instance_id = info[i].minor_instance_id;
|
|
||||||
priv_data->pci = (g->pci_class != 0U);
|
|
||||||
class->priv_data = priv_data;
|
class->priv_data = priv_data;
|
||||||
class->instance_type = info[i].instance_type;
|
priv_data->local_instance_id = i;
|
||||||
|
priv_data->pci = (g->pci_class != 0U);
|
||||||
}
|
}
|
||||||
|
|
||||||
*num_classes = class_count;
|
*num_classes = class_count;
|
||||||
@@ -568,7 +505,7 @@ static int nvgpu_prepare_dev_node_class_list(struct gk20a *g, u32 *num_classes,
|
|||||||
static bool check_valid_dev_node(struct gk20a *g, struct nvgpu_class *class,
|
static bool check_valid_dev_node(struct gk20a *g, struct nvgpu_class *class,
|
||||||
const struct nvgpu_dev_node *node)
|
const struct nvgpu_dev_node *node)
|
||||||
{
|
{
|
||||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
if (nvgpu_grmgr_is_multi_gr_enabled(g)) {
|
||||||
if ((class->instance_type == NVGPU_MIG_TYPE_PHYSICAL) &&
|
if ((class->instance_type == NVGPU_MIG_TYPE_PHYSICAL) &&
|
||||||
!node->mig_physical_node) {
|
!node->mig_physical_node) {
|
||||||
return false;
|
return false;
|
||||||
@@ -584,7 +521,7 @@ static bool check_valid_class(struct gk20a *g, struct nvgpu_class *class)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
if (nvgpu_grmgr_is_multi_gr_enabled(g)) {
|
||||||
if ((class->instance_type == NVGPU_MIG_TYPE_PHYSICAL)) {
|
if ((class->instance_type == NVGPU_MIG_TYPE_PHYSICAL)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -681,7 +618,19 @@ int gk20a_user_init(struct device *dev)
|
|||||||
}
|
}
|
||||||
|
|
||||||
num_cdevs = sizeof(dev_node_list) / sizeof(dev_node_list[0]);
|
num_cdevs = sizeof(dev_node_list) / sizeof(dev_node_list[0]);
|
||||||
total_cdevs = num_cdevs * num_classes;
|
if (nvgpu_grmgr_is_multi_gr_enabled(g)) {
|
||||||
|
/**
|
||||||
|
* As mig physical node needs the ctrl node only.
|
||||||
|
* We need to add total_cdevs + 1 when we enable ctrl node.
|
||||||
|
*/
|
||||||
|
total_cdevs = (num_cdevs - 1) * (num_classes - 1);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* As the power node is already created, we need to
|
||||||
|
* reduced devs by by one.
|
||||||
|
*/
|
||||||
|
total_cdevs = (num_cdevs - 1) * num_classes;
|
||||||
|
}
|
||||||
|
|
||||||
err = alloc_chrdev_region(&devno, 0, total_cdevs, dev_name(dev));
|
err = alloc_chrdev_region(&devno, 0, total_cdevs, dev_name(dev));
|
||||||
if (err) {
|
if (err) {
|
||||||
@@ -742,7 +691,7 @@ u32 nvgpu_get_gpu_instance_id_from_cdev(struct gk20a *g, struct nvgpu_cdev *cdev
|
|||||||
{
|
{
|
||||||
struct nvgpu_cdev_class_priv_data *priv_data;
|
struct nvgpu_cdev_class_priv_data *priv_data;
|
||||||
|
|
||||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
if (nvgpu_grmgr_is_multi_gr_enabled(g)) {
|
||||||
priv_data = dev_get_drvdata(cdev->node);
|
priv_data = dev_get_drvdata(cdev->node);
|
||||||
return priv_data->local_instance_id;
|
return priv_data->local_instance_id;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1086,15 +1086,20 @@ static ssize_t mig_mode_config_list_show(struct device *dev,
|
|||||||
u32 num_config = 0;
|
u32 num_config = 0;
|
||||||
struct gk20a *g = get_gk20a(dev);
|
struct gk20a *g = get_gk20a(dev);
|
||||||
const struct nvgpu_mig_gpu_instance_config *mig_gpu_instance_config;
|
const struct nvgpu_mig_gpu_instance_config *mig_gpu_instance_config;
|
||||||
char *power_on_string = "MIG list will be displayed after gpu power"
|
const char *power_on_string = "MIG list will be displayed after gpu power"
|
||||||
" on with default MIG mode \n Boot with config id zero\n"
|
" on with default MIG mode \n Boot with config id zero\n"
|
||||||
" Get the available configs \n"
|
" Get the available configs \n"
|
||||||
" Change the init script and reboot";
|
" Change the init script and reboot";
|
||||||
|
const char *error_on_nullconfig = "MIG list can't be displayed";
|
||||||
|
|
||||||
if (nvgpu_is_powered_on(g) &&
|
if (nvgpu_is_powered_on(g)) {
|
||||||
(g->mig.current_mig_gpu_instance_config != NULL)) {
|
|
||||||
mig_gpu_instance_config =
|
mig_gpu_instance_config =
|
||||||
g->mig.current_mig_gpu_instance_config;
|
(g->ops.grmgr.get_mig_config_ptr != NULL) ?
|
||||||
|
g->ops.grmgr.get_mig_config_ptr(g) : NULL;
|
||||||
|
if (mig_gpu_instance_config == NULL) {
|
||||||
|
res += sprintf(&buf[res], "%s", error_on_nullconfig);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
res += sprintf(&buf[res], "%s", power_on_string);
|
res += sprintf(&buf[res], "%s", power_on_string);
|
||||||
return res;
|
return res;
|
||||||
|
|||||||
Reference in New Issue
Block a user