mirror of
git://nv-tegra.nvidia.com/linux-nv-oot.git
synced 2025-12-24 10:11:26 +03:00
misc: mods: update MODS driver from Perforce
Bug 1949265 Change-Id: If7e55bcbf181d0b230a792ff0f557000482598df Signed-off-by: Chris Dragan <kdragan@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1517878 GVS: Gerrit_Virtual_Submit Reviewed-by: Laxman Dewangan <ldewangan@nvidia.com>
This commit is contained in:
committed by
Laxman Dewangan
parent
324b7be6e5
commit
9f0eb7f789
@@ -24,7 +24,7 @@
|
||||
|
||||
/* Driver version */
|
||||
#define MODS_DRIVER_VERSION_MAJOR 3
|
||||
#define MODS_DRIVER_VERSION_MINOR 73
|
||||
#define MODS_DRIVER_VERSION_MINOR 75
|
||||
#define MODS_DRIVER_VERSION ((MODS_DRIVER_VERSION_MAJOR << 8) | \
|
||||
((MODS_DRIVER_VERSION_MINOR/10) << 4) | \
|
||||
(MODS_DRIVER_VERSION_MINOR%10))
|
||||
@@ -957,6 +957,13 @@ struct MODS_GET_ATS_ADDRESS_RANGE {
|
||||
__s32 numa_memory_node;
|
||||
};
|
||||
|
||||
/* MODS_ESC_SET_NVLINK_SYSMEM_TRAINED */
|
||||
struct MODS_SET_NVLINK_SYSMEM_TRAINED {
|
||||
/* IN */
|
||||
struct mods_pci_dev_2 pci_device;
|
||||
__u8 trained;
|
||||
};
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
/* ************************************************************************* */
|
||||
@@ -1184,5 +1191,8 @@ struct MODS_GET_ATS_ADDRESS_RANGE {
|
||||
#define MODS_ESC_GET_ATS_ADDRESS_RANGE \
|
||||
_IOWR(MODS_IOC_MAGIC, 101, \
|
||||
struct MODS_GET_ATS_ADDRESS_RANGE)
|
||||
#define MODS_ESC_SET_NVLINK_SYSMEM_TRAINED \
|
||||
_IOW(MODS_IOC_MAGIC, 102, \
|
||||
struct MODS_SET_NVLINK_SYSMEM_TRAINED)
|
||||
|
||||
#endif /* _MODS_H_ */
|
||||
|
||||
@@ -30,6 +30,10 @@
|
||||
#include "mods_config.h"
|
||||
#include "mods.h"
|
||||
|
||||
#ifdef MODS_HAS_SET_MEMORY_HEADER
|
||||
#include <asm/set_memory.h>
|
||||
#endif
|
||||
|
||||
#ifndef true
|
||||
#define true 1
|
||||
#define false 0
|
||||
@@ -63,8 +67,9 @@ struct mods_file_private_data {
|
||||
struct list_head *mods_alloc_list;
|
||||
struct list_head *mods_mapping_list;
|
||||
struct list_head *mods_pci_res_map_list;
|
||||
#if defined(MODS_HAS_SET_PPC_TCE_BYPASS)
|
||||
#if defined(CONFIG_PPC64)
|
||||
struct list_head *mods_ppc_tce_bypass_list;
|
||||
struct list_head *mods_nvlink_sysmem_trained_list;
|
||||
#endif
|
||||
wait_queue_head_t interrupt_event;
|
||||
struct en_dev_entry *enabled_devices;
|
||||
@@ -154,7 +159,7 @@ int mods_check_debug_level(int mask);
|
||||
int mods_get_multi_instance(void);
|
||||
void mods_set_multi_instance(int mi);
|
||||
|
||||
#if defined(MODS_HAS_SET_PPC_TCE_BYPASS)
|
||||
#if defined(CONFIG_PPC64)
|
||||
void mods_set_ppc_tce_bypass(int bypass);
|
||||
int mods_get_ppc_tce_bypass(void);
|
||||
|
||||
@@ -164,6 +169,16 @@ struct PPC_TCE_BYPASS {
|
||||
u64 dma_mask;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
int mods_is_nvlink_sysmem_trained(struct file *fp,
|
||||
struct pci_dev *dev);
|
||||
|
||||
/* NvLink Trained tracking */
|
||||
struct NVL_TRAINED {
|
||||
struct pci_dev *dev;
|
||||
u8 trained;
|
||||
struct list_head list;
|
||||
};
|
||||
#endif
|
||||
|
||||
#define IRQ_MAX (256+PCI_IRQ_MAX)
|
||||
@@ -359,11 +374,16 @@ const char *mods_get_prot_str(u32 mem_type);
|
||||
int mods_unregister_all_alloc(struct file *fp);
|
||||
struct MODS_MEM_INFO *mods_find_alloc(struct file *fp, u64 phys_addr);
|
||||
|
||||
#if defined(MODS_HAS_SET_PPC_TCE_BYPASS)
|
||||
#if defined(CONFIG_PPC64)
|
||||
/* ppc64 */
|
||||
int mods_unregister_all_ppc_tce_bypass(struct file *fp);
|
||||
|
||||
int mods_unregister_all_nvlink_sysmem_trained(struct file *fp);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PCI
|
||||
int mods_enable_device(struct mods_file_private_data *priv,
|
||||
struct pci_dev *pdev);
|
||||
int mods_unregister_all_pci_res_mappings(struct file *fp);
|
||||
#define MODS_UNREGISTER_PCI_MAP(fp) mods_unregister_all_pci_res_mappings(fp)
|
||||
#else
|
||||
@@ -397,18 +417,21 @@ int esc_mods_virtual_to_phys(struct file *fp,
|
||||
int esc_mods_phys_to_virtual(struct file *fp,
|
||||
struct MODS_PHYSICAL_TO_VIRTUAL *p);
|
||||
int esc_mods_memory_barrier(struct file *fp);
|
||||
#if defined(MODS_HAS_SET_PPC_TCE_BYPASS)
|
||||
int esc_mods_set_ppc_tce_bypass(struct file *fp,
|
||||
struct MODS_SET_PPC_TCE_BYPASS *p);
|
||||
int esc_mods_get_ats_address_range(struct file *fp,
|
||||
struct MODS_GET_ATS_ADDRESS_RANGE *p);
|
||||
#endif
|
||||
|
||||
int esc_mods_dma_map_memory(struct file *fp,
|
||||
struct MODS_DMA_MAP_MEMORY *p);
|
||||
int esc_mods_dma_unmap_memory(struct file *fp,
|
||||
struct MODS_DMA_MAP_MEMORY *p);
|
||||
|
||||
#if defined(CONFIG_PPC64)
|
||||
/* ppc64 */
|
||||
int esc_mods_set_ppc_tce_bypass(struct file *fp,
|
||||
struct MODS_SET_PPC_TCE_BYPASS *p);
|
||||
int esc_mods_get_ats_address_range(struct file *fp,
|
||||
struct MODS_GET_ATS_ADDRESS_RANGE *p);
|
||||
int esc_mods_set_nvlink_sysmem_trained(struct file *fp,
|
||||
struct MODS_SET_NVLINK_SYSMEM_TRAINED *p);
|
||||
#endif
|
||||
|
||||
/* acpi */
|
||||
#ifdef CONFIG_ACPI
|
||||
int esc_mods_eval_acpi_method(struct file *fp,
|
||||
|
||||
@@ -67,7 +67,7 @@ static struct nv_device *get_dev(void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PCI
|
||||
static int mods_enable_device(struct mods_file_private_data *priv,
|
||||
int mods_enable_device(struct mods_file_private_data *priv,
|
||||
struct pci_dev *pdev)
|
||||
{
|
||||
int ret = -1;
|
||||
|
||||
@@ -111,7 +111,7 @@ struct pci_driver mods_pci_driver = {
|
||||
static int debug = -0x80000000;
|
||||
static int multi_instance = MODS_MULTI_INSTANCE_DEFAULT_VALUE;
|
||||
|
||||
#if defined(MODS_HAS_SET_PPC_TCE_BYPASS)
|
||||
#if defined(CONFIG_PPC64)
|
||||
static int ppc_tce_bypass = MODS_PPC_TCE_BYPASS_DEFAULT;
|
||||
|
||||
void mods_set_ppc_tce_bypass(int bypass)
|
||||
@@ -246,7 +246,7 @@ module_param(multi_instance, int, 0644);
|
||||
MODULE_PARM_DESC(multi_instance,
|
||||
"allows more than one client to simultaneously open the driver");
|
||||
|
||||
#if defined(MODS_HAS_SET_PPC_TCE_BYPASS)
|
||||
#if defined(CONFIG_PPC64)
|
||||
module_param(ppc_tce_bypass, int, 0644);
|
||||
MODULE_PARM_DESC(ppc_tce_bypass,
|
||||
"PPC TCE bypass (0=sys default, 1=force bypass, 2=force non bypass)");
|
||||
@@ -524,8 +524,9 @@ static int mods_krnl_open(struct inode *ip, struct file *fp)
|
||||
struct list_head *mods_alloc_list;
|
||||
struct list_head *mods_mapping_list;
|
||||
struct list_head *mods_pci_res_map_list;
|
||||
#if defined(MODS_HAS_SET_PPC_TCE_BYPASS)
|
||||
#if defined(CONFIG_PPC64)
|
||||
struct list_head *mods_ppc_tce_bypass_list;
|
||||
struct list_head *mods_nvlink_sysmem_trained_list;
|
||||
#endif
|
||||
struct mods_file_private_data *private_data;
|
||||
int id = 0;
|
||||
@@ -556,7 +557,7 @@ static int mods_krnl_open(struct inode *ip, struct file *fp)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
#if defined(MODS_HAS_SET_PPC_TCE_BYPASS)
|
||||
#if defined(CONFIG_PPC64)
|
||||
mods_ppc_tce_bypass_list =
|
||||
kmalloc(sizeof(struct list_head), GFP_KERNEL | __GFP_NORETRY);
|
||||
if (unlikely(!mods_ppc_tce_bypass_list)) {
|
||||
@@ -566,6 +567,17 @@ static int mods_krnl_open(struct inode *ip, struct file *fp)
|
||||
LOG_EXT();
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
mods_nvlink_sysmem_trained_list =
|
||||
kmalloc(sizeof(struct list_head), GFP_KERNEL | __GFP_NORETRY);
|
||||
if (unlikely(!mods_nvlink_sysmem_trained_list)) {
|
||||
kfree(mods_alloc_list);
|
||||
kfree(mods_mapping_list);
|
||||
kfree(mods_pci_res_map_list);
|
||||
kfree(mods_ppc_tce_bypass_list);
|
||||
LOG_EXT();
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
private_data = kmalloc(sizeof(*private_data),
|
||||
@@ -574,8 +586,9 @@ static int mods_krnl_open(struct inode *ip, struct file *fp)
|
||||
kfree(mods_alloc_list);
|
||||
kfree(mods_mapping_list);
|
||||
kfree(mods_pci_res_map_list);
|
||||
#if defined(MODS_HAS_SET_PPC_TCE_BYPASS)
|
||||
#if defined(CONFIG_PPC64)
|
||||
kfree(mods_ppc_tce_bypass_list);
|
||||
kfree(mods_nvlink_sysmem_trained_list);
|
||||
#endif
|
||||
LOG_EXT();
|
||||
return -ENOMEM;
|
||||
@@ -587,8 +600,9 @@ static int mods_krnl_open(struct inode *ip, struct file *fp)
|
||||
kfree(mods_alloc_list);
|
||||
kfree(mods_mapping_list);
|
||||
kfree(mods_pci_res_map_list);
|
||||
#if defined(MODS_HAS_SET_PPC_TCE_BYPASS)
|
||||
#if defined(CONFIG_PPC64)
|
||||
kfree(mods_ppc_tce_bypass_list);
|
||||
kfree(mods_nvlink_sysmem_trained_list);
|
||||
#endif
|
||||
kfree(private_data);
|
||||
LOG_EXT();
|
||||
@@ -604,9 +618,12 @@ static int mods_krnl_open(struct inode *ip, struct file *fp)
|
||||
private_data->mods_alloc_list = mods_alloc_list;
|
||||
private_data->mods_mapping_list = mods_mapping_list;
|
||||
private_data->mods_pci_res_map_list = mods_pci_res_map_list;
|
||||
#if defined(MODS_HAS_SET_PPC_TCE_BYPASS)
|
||||
#if defined(CONFIG_PPC64)
|
||||
INIT_LIST_HEAD(mods_ppc_tce_bypass_list);
|
||||
INIT_LIST_HEAD(mods_nvlink_sysmem_trained_list);
|
||||
private_data->mods_ppc_tce_bypass_list = mods_ppc_tce_bypass_list;
|
||||
private_data->mods_nvlink_sysmem_trained_list
|
||||
= mods_nvlink_sysmem_trained_list;
|
||||
#endif
|
||||
private_data->enabled_devices = 0;
|
||||
private_data->mem_type.dma_addr = 0;
|
||||
@@ -649,10 +666,14 @@ static int mods_krnl_close(struct inode *ip, struct file *fp)
|
||||
if (ret)
|
||||
mods_error_printk("failed to free pci mappings\n");
|
||||
|
||||
#if defined(MODS_HAS_SET_PPC_TCE_BYPASS)
|
||||
#if defined(CONFIG_PPC64)
|
||||
ret = mods_unregister_all_ppc_tce_bypass(fp);
|
||||
if (ret)
|
||||
mods_error_printk("failed to restore dma bypass\n");
|
||||
|
||||
ret = mods_unregister_all_nvlink_sysmem_trained(fp);
|
||||
if (ret)
|
||||
mods_error_printk("failed to free nvlink trained\n");
|
||||
#endif
|
||||
|
||||
mods_disable_all_devices(private_data);
|
||||
@@ -660,8 +681,9 @@ static int mods_krnl_close(struct inode *ip, struct file *fp)
|
||||
kfree(private_data->mods_alloc_list);
|
||||
kfree(private_data->mods_mapping_list);
|
||||
kfree(private_data->mods_pci_res_map_list);
|
||||
#if defined(MODS_HAS_SET_PPC_TCE_BYPASS)
|
||||
#if defined(CONFIG_PPC64)
|
||||
kfree(private_data->mods_ppc_tce_bypass_list);
|
||||
kfree(private_data->mods_nvlink_sysmem_trained_list);
|
||||
#endif
|
||||
kfree(private_data);
|
||||
|
||||
@@ -1187,7 +1209,7 @@ static long mods_krnl_ioctl(struct file *fp,
|
||||
esc_mods_phys_to_virtual, MODS_PHYSICAL_TO_VIRTUAL);
|
||||
break;
|
||||
|
||||
#if defined(MODS_HAS_SET_PPC_TCE_BYPASS)
|
||||
#if defined(CONFIG_PPC64)
|
||||
case MODS_ESC_SET_PPC_TCE_BYPASS:
|
||||
MODS_IOCTL(MODS_ESC_SET_PPC_TCE_BYPASS,
|
||||
esc_mods_set_ppc_tce_bypass,
|
||||
@@ -1199,6 +1221,11 @@ static long mods_krnl_ioctl(struct file *fp,
|
||||
esc_mods_get_ats_address_range,
|
||||
MODS_GET_ATS_ADDRESS_RANGE);
|
||||
break;
|
||||
case MODS_ESC_SET_NVLINK_SYSMEM_TRAINED:
|
||||
MODS_IOCTL(MODS_ESC_SET_NVLINK_SYSMEM_TRAINED,
|
||||
esc_mods_set_nvlink_sysmem_trained,
|
||||
MODS_SET_NVLINK_SYSMEM_TRAINED);
|
||||
break;
|
||||
#endif
|
||||
|
||||
case MODS_ESC_DMA_MAP_MEMORY:
|
||||
|
||||
@@ -36,6 +36,9 @@ static int mods_post_alloc(struct MODS_PHYS_CHUNK *pt,
|
||||
static void mods_pre_free(struct MODS_PHYS_CHUNK *pt,
|
||||
struct MODS_MEM_INFO *p_mem_info);
|
||||
|
||||
static u64 mods_compress_nvlink_addr(struct pci_dev *dev, u64 addr);
|
||||
static u64 mods_expand_nvlink_addr(struct pci_dev *dev, u64 addr47);
|
||||
|
||||
/****************************
|
||||
* DMA MAP HELPER FUNCTIONS *
|
||||
****************************/
|
||||
@@ -47,6 +50,8 @@ static void mods_dma_unmap_page(struct MODS_DMA_MAP *p_dma_map,
|
||||
if (!pm->pt)
|
||||
return;
|
||||
|
||||
pm->map_addr = mods_expand_nvlink_addr(p_dma_map->dev, pm->map_addr);
|
||||
|
||||
pci_unmap_page(p_dma_map->dev,
|
||||
pm->map_addr,
|
||||
(1U<<pm->pt->order)*PAGE_SIZE,
|
||||
@@ -155,6 +160,9 @@ static void mods_dma_map_pages(struct MODS_MEM_INFO *p_mem_info,
|
||||
(1U << pt->order) * PAGE_SIZE,
|
||||
DMA_BIDIRECTIONAL);
|
||||
|
||||
pm->map_addr = mods_compress_nvlink_addr(p_dma_map->dev,
|
||||
pm->map_addr);
|
||||
|
||||
mods_debug_printk(DEBUG_MEM_DETAILED,
|
||||
"%s : Mapped map_addr=0x%llx, dma_addr=0x%llx on dev %x:%x:%x.%x\n",
|
||||
__func__,
|
||||
@@ -742,6 +750,11 @@ int esc_mods_device_alloc_pages_2(struct file *fp,
|
||||
p_mem_info->dev = dev;
|
||||
#if defined(MODS_HAS_DEV_TO_NUMA_NODE)
|
||||
p_mem_info->numa_node = dev_to_node(&dev->dev);
|
||||
#endif
|
||||
#if defined(MODS_HAS_PNV_PCI_GET_NPU_DEV)
|
||||
if (!mods_is_nvlink_sysmem_trained(fp, dev) &&
|
||||
pnv_pci_get_npu_dev(dev, 0))
|
||||
p_mem_info->numa_node = 0;
|
||||
#endif
|
||||
mods_debug_printk(DEBUG_MEM_DETAILED,
|
||||
"affinity %x:%x.%x node %d\n",
|
||||
@@ -1178,349 +1191,6 @@ int esc_mods_memory_barrier(struct file *fp)
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(MODS_HAS_SET_PPC_TCE_BYPASS)
|
||||
static struct PPC_TCE_BYPASS *mods_find_ppc_tce_bypass(struct file *fp,
|
||||
struct pci_dev *dev)
|
||||
{
|
||||
MODS_PRIV private_data = fp->private_data;
|
||||
struct list_head *plist_head;
|
||||
struct list_head *plist_iter;
|
||||
struct PPC_TCE_BYPASS *p_ppc_tce_bypass;
|
||||
|
||||
plist_head = private_data->mods_ppc_tce_bypass_list;
|
||||
|
||||
list_for_each(plist_iter, plist_head) {
|
||||
p_ppc_tce_bypass = list_entry(plist_iter,
|
||||
struct PPC_TCE_BYPASS,
|
||||
list);
|
||||
if (dev == p_ppc_tce_bypass->dev)
|
||||
return p_ppc_tce_bypass;
|
||||
}
|
||||
|
||||
/* The device has never had its dma mask changed */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int mods_register_ppc_tce_bypass(struct file *fp,
|
||||
struct pci_dev *dev,
|
||||
u64 original_mask)
|
||||
{
|
||||
MODS_PRIV private_data = fp->private_data;
|
||||
struct PPC_TCE_BYPASS *p_ppc_tce_bypass;
|
||||
|
||||
/* only register the first time in order to restore the true actual dma
|
||||
* mask
|
||||
*/
|
||||
if (mods_find_ppc_tce_bypass(fp, dev) != NULL) {
|
||||
mods_debug_printk(DEBUG_MEM,
|
||||
"TCE bypass already registered on dev %x:%x:%x.%x\n",
|
||||
pci_domain_nr(dev->bus),
|
||||
dev->bus->number,
|
||||
PCI_SLOT(dev->devfn),
|
||||
PCI_FUNC(dev->devfn));
|
||||
return OK;
|
||||
}
|
||||
|
||||
if (unlikely(mutex_lock_interruptible(&private_data->mtx)))
|
||||
return -EINTR;
|
||||
|
||||
p_ppc_tce_bypass = kmalloc(sizeof(struct PPC_TCE_BYPASS),
|
||||
GFP_KERNEL | __GFP_NORETRY);
|
||||
if (unlikely(!p_ppc_tce_bypass)) {
|
||||
mods_error_printk("failed to allocate TCE bypass struct\n");
|
||||
LOG_EXT();
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
p_ppc_tce_bypass->dev = dev;
|
||||
p_ppc_tce_bypass->dma_mask = original_mask;
|
||||
|
||||
list_add(&p_ppc_tce_bypass->list,
|
||||
private_data->mods_ppc_tce_bypass_list);
|
||||
|
||||
mods_debug_printk(DEBUG_MEM,
|
||||
"Registered TCE bypass on dev %x:%x:%x.%x\n",
|
||||
pci_domain_nr(dev->bus),
|
||||
dev->bus->number,
|
||||
PCI_SLOT(dev->devfn),
|
||||
PCI_FUNC(dev->devfn));
|
||||
mutex_unlock(&private_data->mtx);
|
||||
return OK;
|
||||
}
|
||||
|
||||
static int mods_unregister_ppc_tce_bypass(struct file *fp, struct pci_dev *dev)
|
||||
{
|
||||
struct PPC_TCE_BYPASS *p_ppc_tce_bypass;
|
||||
MODS_PRIV private_data = fp->private_data;
|
||||
struct list_head *head = private_data->mods_ppc_tce_bypass_list;
|
||||
struct list_head *iter;
|
||||
|
||||
LOG_ENT();
|
||||
|
||||
if (unlikely(mutex_lock_interruptible(&private_data->mtx)))
|
||||
return -EINTR;
|
||||
|
||||
list_for_each(iter, head) {
|
||||
p_ppc_tce_bypass =
|
||||
list_entry(iter, struct PPC_TCE_BYPASS, list);
|
||||
|
||||
if (p_ppc_tce_bypass->dev == dev) {
|
||||
int ret = 0;
|
||||
|
||||
list_del(iter);
|
||||
|
||||
mutex_unlock(&private_data->mtx);
|
||||
|
||||
ret = pci_set_dma_mask(p_ppc_tce_bypass->dev,
|
||||
p_ppc_tce_bypass->dma_mask);
|
||||
dma_set_coherent_mask(&p_ppc_tce_bypass->dev->dev,
|
||||
dev->dma_mask);
|
||||
mods_debug_printk(DEBUG_MEM,
|
||||
"Restored dma_mask on dev %x:%x:%x.%x to %llx\n",
|
||||
pci_domain_nr(p_ppc_tce_bypass->dev->bus),
|
||||
p_ppc_tce_bypass->dev->bus->number,
|
||||
PCI_SLOT(p_ppc_tce_bypass->dev->devfn),
|
||||
PCI_FUNC(p_ppc_tce_bypass->dev->devfn),
|
||||
p_ppc_tce_bypass->dma_mask);
|
||||
|
||||
kfree(p_ppc_tce_bypass);
|
||||
|
||||
LOG_EXT();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&private_data->mtx);
|
||||
|
||||
mods_error_printk(
|
||||
"Failed to unregister TCE bypass on dev %x:%x:%x.%x\n",
|
||||
pci_domain_nr(dev->bus),
|
||||
dev->bus->number,
|
||||
PCI_SLOT(dev->devfn),
|
||||
PCI_FUNC(dev->devfn));
|
||||
LOG_EXT();
|
||||
|
||||
return -EINVAL;
|
||||
|
||||
}
|
||||
|
||||
int mods_unregister_all_ppc_tce_bypass(struct file *fp)
|
||||
{
|
||||
MODS_PRIV private_data = fp->private_data;
|
||||
struct list_head *head = private_data->mods_ppc_tce_bypass_list;
|
||||
struct list_head *iter;
|
||||
struct list_head *tmp;
|
||||
|
||||
list_for_each_safe(iter, tmp, head) {
|
||||
struct PPC_TCE_BYPASS *p_ppc_tce_bypass;
|
||||
int ret;
|
||||
|
||||
p_ppc_tce_bypass =
|
||||
list_entry(iter, struct PPC_TCE_BYPASS, list);
|
||||
ret = mods_unregister_ppc_tce_bypass(fp, p_ppc_tce_bypass->dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
int esc_mods_set_ppc_tce_bypass(struct file *fp,
|
||||
struct MODS_SET_PPC_TCE_BYPASS *p)
|
||||
{
|
||||
int ret = OK;
|
||||
dma_addr_t dma_addr;
|
||||
unsigned int devfn = PCI_DEVFN(p->pci_device.device,
|
||||
p->pci_device.function);
|
||||
struct pci_dev *dev = MODS_PCI_GET_SLOT(p->pci_device.domain,
|
||||
p->pci_device.bus,
|
||||
devfn);
|
||||
u64 original_dma_mask;
|
||||
u32 bypass_mode = p->mode;
|
||||
u32 cur_bypass_mode = MODS_PPC_TCE_BYPASS_OFF;
|
||||
u64 dma_mask = DMA_BIT_MASK(64);
|
||||
|
||||
LOG_ENT();
|
||||
|
||||
if (!dev) {
|
||||
mods_error_printk(
|
||||
"PCI device not found %x:%x:%x.%x\n",
|
||||
p->pci_device.domain,
|
||||
p->pci_device.bus,
|
||||
p->pci_device.device,
|
||||
p->pci_device.function);
|
||||
LOG_EXT();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
original_dma_mask = dev->dma_mask;
|
||||
|
||||
if (bypass_mode == MODS_PPC_TCE_BYPASS_DEFAULT)
|
||||
bypass_mode = mods_get_ppc_tce_bypass();
|
||||
|
||||
if (original_dma_mask == DMA_BIT_MASK(64))
|
||||
cur_bypass_mode = MODS_PPC_TCE_BYPASS_ON;
|
||||
|
||||
|
||||
/*
|
||||
* Linux on IBM POWER8 offers 2 different DMA set-ups, sometimes
|
||||
* referred to as "windows".
|
||||
*
|
||||
* The "default window" provides a 2GB region of PCI address space
|
||||
* located below the 32-bit line. The IOMMU is used to provide a
|
||||
* "rich" mapping--any page in system memory can be mapped at an
|
||||
* arbitrary address within this window. The mappings are dynamic
|
||||
* and pass in and out of being as pci_map*()/pci_unmap*() calls
|
||||
* are made.
|
||||
*
|
||||
* Dynamic DMA Windows (sometimes "Huge DDW", also PPC TCE Bypass "ON")
|
||||
* provides a linear
|
||||
* mapping of the system's entire physical address space at some
|
||||
* fixed offset above the 59-bit line. IOMMU is still used, and
|
||||
* pci_map*()/pci_unmap*() are still required, but mappings are
|
||||
* static. They're effectively set up in advance, and any given
|
||||
* system page will always map to the same PCI bus address. I.e.
|
||||
* physical 0x00000000xxxxxxxx => PCI 0x08000000xxxxxxxx
|
||||
*
|
||||
* Linux on POWER8 will only provide the DDW-style full linear
|
||||
* mapping when the driver claims support for 64-bit DMA addressing
|
||||
* (a pre-requisite because the PCI addresses used in this case will
|
||||
* be near the top of the 64-bit range). The linear mapping
|
||||
* is not available in all system configurations.
|
||||
*
|
||||
* Detect whether the linear mapping is present by claiming
|
||||
* 64-bit support and then mapping physical page 0. For historical
|
||||
* reasons, Linux on POWER8 will never map a page to PCI address 0x0.
|
||||
* In the "default window" case page 0 will be mapped to some
|
||||
* non-zero address below the 32-bit line. In the
|
||||
* DDW/linear-mapping case, it will be mapped to address 0 plus
|
||||
* some high-order offset.
|
||||
*
|
||||
* If the linear mapping is present and sane then return the offset
|
||||
* as the starting address for all DMA mappings.
|
||||
*/
|
||||
if ((bypass_mode != MODS_PPC_TCE_BYPASS_DEFAULT) &&
|
||||
(cur_bypass_mode != bypass_mode)) {
|
||||
/* Set DMA mask appropriately here */
|
||||
if (bypass_mode == MODS_PPC_TCE_BYPASS_OFF)
|
||||
dma_mask = p->device_dma_mask;
|
||||
|
||||
if (pci_set_dma_mask(dev, dma_mask) != 0) {
|
||||
mods_error_printk(
|
||||
"pci_set_dma_mask failed on dev %x:%x:%x.%x\n",
|
||||
p->pci_device.domain,
|
||||
p->pci_device.bus,
|
||||
p->pci_device.device,
|
||||
p->pci_device.function);
|
||||
LOG_EXT();
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
dma_addr = pci_map_single(dev, NULL, 1, DMA_BIDIRECTIONAL);
|
||||
if (pci_dma_mapping_error(dev, dma_addr)) {
|
||||
pci_set_dma_mask(dev, original_dma_mask);
|
||||
mods_error_printk(
|
||||
"pci_map_single failed on dev %x:%x:%x.%x\n",
|
||||
p->pci_device.domain,
|
||||
p->pci_device.bus,
|
||||
p->pci_device.device,
|
||||
p->pci_device.function);
|
||||
LOG_EXT();
|
||||
return -EINVAL;
|
||||
}
|
||||
pci_unmap_single(dev, dma_addr, 1, DMA_BIDIRECTIONAL);
|
||||
|
||||
if (bypass_mode == MODS_PPC_TCE_BYPASS_ON) {
|
||||
bool bBypassFailed = false;
|
||||
|
||||
/*
|
||||
* From IBM: "For IODA2, native DMA bypass or KVM TCE-based
|
||||
* implementation of full 64-bit DMA support will establish a
|
||||
* window in address-space with the high 14 bits being constant
|
||||
* and the bottom up-to-50 bits varying with the mapping."
|
||||
*
|
||||
* Unfortunately, we don't have any good interfaces or
|
||||
* definitions from the kernel to get information about the DMA
|
||||
* offset assigned by OS. However, we have been told that the
|
||||
* offset will be defined by the top 14 bits of the address,
|
||||
* and bits 40-49 will not vary for any DMA mappings until 1TB
|
||||
* of system memory is surpassed; this limitation is essential
|
||||
* for us to function properly since our current GPUs only
|
||||
* support 40 physical address bits. We are in a fragile place
|
||||
* where we need to tell the OS that we're capable of 64-bit
|
||||
* addressing, while relying on the assumption that the top 24
|
||||
* bits will not vary in this case.
|
||||
*
|
||||
* The way we try to compute the window, then, is mask the trial
|
||||
* mapping against the DMA capabilities of the device. That way,
|
||||
* devices with greater addressing capabilities will only take
|
||||
* the bits it needs to define the window.
|
||||
*/
|
||||
if ((dma_addr & DMA_BIT_MASK(32)) != 0) {
|
||||
/*
|
||||
* Huge DDW not available - page 0 mapped to non-zero
|
||||
* address below the 32-bit line.
|
||||
*/
|
||||
mods_warning_printk(
|
||||
"Enabling PPC TCE bypass mode failed due to platform on device %x:%x:%x.%x\n",
|
||||
p->pci_device.domain,
|
||||
p->pci_device.bus,
|
||||
p->pci_device.device,
|
||||
p->pci_device.function);
|
||||
bBypassFailed = true;
|
||||
} else if ((dma_addr & original_dma_mask) != 0) {
|
||||
/*
|
||||
* The physical window straddles our addressing limit
|
||||
* boundary, e.g., for an adapter that can address up to
|
||||
* 1TB, the window crosses the 40-bit limit so that the
|
||||
* lower end of the range has different bits 63:40 than
|
||||
* the higher end of the range. We can only handle a
|
||||
* single, static value for bits 63:40, so we must fall
|
||||
* back here.
|
||||
*/
|
||||
mods_warning_printk(
|
||||
"Enabling PPC TCE bypass mode failed due to memory size on device %x:%x:%x.%x\n",
|
||||
p->pci_device.domain,
|
||||
p->pci_device.bus,
|
||||
p->pci_device.device,
|
||||
p->pci_device.function);
|
||||
bBypassFailed = true;
|
||||
}
|
||||
if (bBypassFailed)
|
||||
pci_set_dma_mask(dev, original_dma_mask);
|
||||
}
|
||||
|
||||
mods_debug_printk(DEBUG_MEM,
|
||||
"%s ppc tce bypass on device %x:%x:%x.%x with dma mask 0x%llx\n",
|
||||
(dev->dma_mask == DMA_BIT_MASK(64)) ? "Enabled" : "Disabled",
|
||||
p->pci_device.domain,
|
||||
p->pci_device.bus,
|
||||
p->pci_device.device,
|
||||
p->pci_device.function,
|
||||
dev->dma_mask);
|
||||
|
||||
p->dma_base_address = dma_addr & ~(p->device_dma_mask);
|
||||
|
||||
mods_debug_printk(DEBUG_MEM,
|
||||
"dma base address 0x%0llx on device %x:%x:%x.%x\n",
|
||||
p->dma_base_address,
|
||||
p->pci_device.domain,
|
||||
p->pci_device.bus,
|
||||
p->pci_device.device,
|
||||
p->pci_device.function);
|
||||
|
||||
/* Update the coherent mask to match */
|
||||
dma_set_coherent_mask(&dev->dev, dev->dma_mask);
|
||||
|
||||
if (original_dma_mask != dev->dma_mask)
|
||||
ret = mods_register_ppc_tce_bypass(fp, dev, original_dma_mask);
|
||||
|
||||
LOG_EXT();
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
int esc_mods_dma_map_memory(struct file *fp,
|
||||
struct MODS_DMA_MAP_MEMORY *p)
|
||||
{
|
||||
@@ -1806,3 +1476,56 @@ static void mods_pre_free(struct MODS_PHYS_CHUNK *pt,
|
||||
kunmap(pt->p_page + i);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Starting on Power9 systems, DMA addresses for NVLink are no longer
|
||||
* the same as used over PCIE.
|
||||
*
|
||||
* Power9 supports a 56-bit Real Address. This address range is compressed
|
||||
* when accessed over NvLink to allow the GPU to access all of memory using
|
||||
* its 47-bit Physical address.
|
||||
*
|
||||
* If there is an NPU device present on the system, it implies that NvLink
|
||||
* sysmem links are present and we need to apply the required address
|
||||
* conversion for NvLink within the driver. This is intended to be temporary
|
||||
* to ease the transition to kernel APIs to handle NvLink DMA mappings
|
||||
* via the NPU device.
|
||||
*
|
||||
* Note, a deviation from the documented compression scheme is that the
|
||||
* upper address bits (i.e. bit 56-63) instead of being set to zero are
|
||||
* preserved during NvLink address compression so the orignal PCIE DMA
|
||||
* address can be reconstructed on expansion. These bits can be safely
|
||||
* ignored on NvLink since they are truncated by the GPU.
|
||||
*/
|
||||
static u64 mods_compress_nvlink_addr(struct pci_dev *dev, u64 addr)
|
||||
{
|
||||
u64 addr47 = addr;
|
||||
/* Note, one key difference from the documented compression scheme
|
||||
* is that BIT59 used for TCE bypass mode on PCIe is preserved during
|
||||
* NVLink address compression to allow for the resulting DMA address to
|
||||
* be used transparently on PCIe.
|
||||
*/
|
||||
#if defined(MODS_HAS_PNV_PCI_GET_NPU_DEV)
|
||||
if (pnv_pci_get_npu_dev(dev, 0)) {
|
||||
addr47 = addr & (1LLU << 59);
|
||||
addr47 |= ((addr >> 45) & 0x3) << 43;
|
||||
addr47 |= ((addr >> 49) & 0x3) << 45;
|
||||
addr47 |= addr & ((1LLU << 43) - 1);
|
||||
}
|
||||
#endif
|
||||
return addr47;
|
||||
}
|
||||
|
||||
static u64 mods_expand_nvlink_addr(struct pci_dev *dev, u64 addr47)
|
||||
{
|
||||
u64 addr = addr47;
|
||||
#if defined(MODS_HAS_PNV_PCI_GET_NPU_DEV)
|
||||
if (pnv_pci_get_npu_dev(dev, 0)) {
|
||||
addr = addr47 & ((1LLU << 43) - 1);
|
||||
addr |= (addr47 & (3ULL << 43)) << 2;
|
||||
addr |= (addr47 & (3ULL << 45)) << 4;
|
||||
addr |= addr47 & ~((1ULL << 56) - 1);
|
||||
}
|
||||
#endif
|
||||
return addr;
|
||||
}
|
||||
|
||||
@@ -107,6 +107,7 @@ int mods_unregister_all_pci_res_mappings(struct file *fp)
|
||||
int esc_mods_find_pci_dev_2(struct file *pfile,
|
||||
struct MODS_FIND_PCI_DEVICE_2 *p)
|
||||
{
|
||||
MODS_PRIV private_data = pfile->private_data;
|
||||
struct pci_dev *dev;
|
||||
int index = 0;
|
||||
|
||||
@@ -124,6 +125,16 @@ int esc_mods_find_pci_dev_2(struct file *pfile,
|
||||
p->pci_device.bus = dev->bus->number;
|
||||
p->pci_device.device = PCI_SLOT(dev->devfn);
|
||||
p->pci_device.function = PCI_FUNC(dev->devfn);
|
||||
/* Enable device on the PCI bus */
|
||||
if (mods_enable_device(private_data, dev)) {
|
||||
mods_error_printk(
|
||||
"unable to enable dev %04x:%02x:%02x.%x\n",
|
||||
(unsigned int)p->pci_device.domain,
|
||||
(unsigned int)p->pci_device.bus,
|
||||
(unsigned int)p->pci_device.device,
|
||||
(unsigned int)p->pci_device.function);
|
||||
return -EINVAL;
|
||||
}
|
||||
return OK;
|
||||
}
|
||||
dev = pci_get_device(p->vendor_id, p->device_id, dev);
|
||||
@@ -136,6 +147,7 @@ int esc_mods_find_pci_dev_2(struct file *pfile,
|
||||
int esc_mods_find_pci_dev(struct file *pfile,
|
||||
struct MODS_FIND_PCI_DEVICE *p)
|
||||
{
|
||||
MODS_PRIV private_data = pfile->private_data;
|
||||
struct pci_dev *dev;
|
||||
int index = 0;
|
||||
|
||||
@@ -152,6 +164,15 @@ int esc_mods_find_pci_dev(struct file *pfile,
|
||||
p->bus_number = dev->bus->number;
|
||||
p->device_number = PCI_SLOT(dev->devfn);
|
||||
p->function_number = PCI_FUNC(dev->devfn);
|
||||
/* Enable device on the PCI bus */
|
||||
if (mods_enable_device(private_data, dev)) {
|
||||
mods_error_printk(
|
||||
"unable to enable dev %02x:%02x.%x\n",
|
||||
(unsigned int)p->bus_number,
|
||||
(unsigned int)p->device_number,
|
||||
(unsigned int)p->function_number);
|
||||
return -EINVAL;
|
||||
}
|
||||
return OK;
|
||||
}
|
||||
/* Only return devices in the first domain, but don't assume
|
||||
@@ -168,6 +189,7 @@ int esc_mods_find_pci_dev(struct file *pfile,
|
||||
int esc_mods_find_pci_class_code_2(struct file *pfile,
|
||||
struct MODS_FIND_PCI_CLASS_CODE_2 *p)
|
||||
{
|
||||
MODS_PRIV private_data = pfile->private_data;
|
||||
struct pci_dev *dev;
|
||||
int index = 0;
|
||||
|
||||
@@ -182,6 +204,16 @@ int esc_mods_find_pci_class_code_2(struct file *pfile,
|
||||
p->pci_device.bus = dev->bus->number;
|
||||
p->pci_device.device = PCI_SLOT(dev->devfn);
|
||||
p->pci_device.function = PCI_FUNC(dev->devfn);
|
||||
/* Enable device on the PCI bus */
|
||||
if (mods_enable_device(private_data, dev)) {
|
||||
mods_error_printk(
|
||||
"unable to enable dev %04x:%02x:%02x.%x\n",
|
||||
(unsigned int)p->pci_device.domain,
|
||||
(unsigned int)p->pci_device.bus,
|
||||
(unsigned int)p->pci_device.device,
|
||||
(unsigned int)p->pci_device.function);
|
||||
return -EINVAL;
|
||||
}
|
||||
return OK;
|
||||
}
|
||||
dev = pci_get_class(p->class_code, dev);
|
||||
@@ -194,6 +226,7 @@ int esc_mods_find_pci_class_code_2(struct file *pfile,
|
||||
int esc_mods_find_pci_class_code(struct file *pfile,
|
||||
struct MODS_FIND_PCI_CLASS_CODE *p)
|
||||
{
|
||||
MODS_PRIV private_data = pfile->private_data;
|
||||
struct pci_dev *dev;
|
||||
int index = 0;
|
||||
|
||||
@@ -207,6 +240,15 @@ int esc_mods_find_pci_class_code(struct file *pfile,
|
||||
p->bus_number = dev->bus->number;
|
||||
p->device_number = PCI_SLOT(dev->devfn);
|
||||
p->function_number = PCI_FUNC(dev->devfn);
|
||||
/* Enable device on the PCI bus */
|
||||
if (mods_enable_device(private_data, dev)) {
|
||||
mods_error_printk(
|
||||
"unable to enable dev %02x:%02x.%x\n",
|
||||
(unsigned int)p->bus_number,
|
||||
(unsigned int)p->device_number,
|
||||
(unsigned int)p->function_number);
|
||||
return -EINVAL;
|
||||
}
|
||||
return OK;
|
||||
}
|
||||
/* Only return devices in the first domain, but don't assume
|
||||
@@ -817,109 +859,3 @@ int esc_mods_pci_unmap_resource(struct file *fp,
|
||||
return OK;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(MODS_HAS_SET_PPC_TCE_BYPASS)
|
||||
int esc_mods_get_ats_address_range(struct file *fp,
|
||||
struct MODS_GET_ATS_ADDRESS_RANGE *p)
|
||||
{
|
||||
unsigned int devfn;
|
||||
struct pci_dev *dev;
|
||||
struct pci_dev *npu_dev;
|
||||
struct device_node *mem_node = NULL;
|
||||
const __u32 *val32;
|
||||
const __u64 *val64;
|
||||
int len;
|
||||
int ret = -EINVAL;
|
||||
|
||||
LOG_ENT();
|
||||
|
||||
mods_debug_printk(DEBUG_PCICFG,
|
||||
"get ats addr, dev %04x:%x:%02x:%x, npu index %d\n",
|
||||
(int)p->pci_device.domain,
|
||||
(int)p->pci_device.bus,
|
||||
(int)p->pci_device.device,
|
||||
(int)p->pci_device.function,
|
||||
(int)p->npu_index);
|
||||
|
||||
devfn = PCI_DEVFN(p->pci_device.device, p->pci_device.function);
|
||||
dev = MODS_PCI_GET_SLOT(p->pci_device.domain, p->pci_device.bus, devfn);
|
||||
if (dev == NULL) {
|
||||
mods_error_printk("PCI device %04x:%x:%02x.%x not found\n",
|
||||
p->pci_device.domain,
|
||||
p->pci_device.bus,
|
||||
p->pci_device.device,
|
||||
p->pci_device.function);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
npu_dev = pnv_pci_get_npu_dev(dev, p->npu_index);
|
||||
if (npu_dev == NULL) {
|
||||
mods_error_printk("NPU device for %04x:%x:%02x.%x not found\n",
|
||||
p->pci_device.domain,
|
||||
p->pci_device.bus,
|
||||
p->pci_device.device,
|
||||
p->pci_device.function);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
p->npu_device.domain = pci_domain_nr(npu_dev->bus);
|
||||
p->npu_device.bus = npu_dev->bus->number;
|
||||
p->npu_device.device = PCI_SLOT(npu_dev->devfn);
|
||||
p->npu_device.function = PCI_FUNC(npu_dev->devfn);
|
||||
|
||||
mods_debug_printk(DEBUG_PCICFG,
|
||||
"Found NPU device %04x:%x:%02x.%x\n",
|
||||
p->npu_device.domain,
|
||||
p->npu_device.bus,
|
||||
p->npu_device.device,
|
||||
p->npu_device.function);
|
||||
|
||||
val32 = (const __u32 *)of_get_property(npu_dev->dev.of_node,
|
||||
"memory-region",
|
||||
&len);
|
||||
if (!val32 || len < 4) {
|
||||
mods_error_printk("Property memory-region for NPU not found\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
mem_node = of_find_node_by_phandle(be32_to_cpu(*val32));
|
||||
if (!mem_node) {
|
||||
mods_error_printk("Node memory-region for NPU not found\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
p->numa_memory_node = of_node_to_nid(mem_node);
|
||||
if (p->numa_memory_node == NUMA_NO_NODE) {
|
||||
mods_error_printk("NUMA node for NPU not found\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
val64 = (const __u64 *)of_get_property(npu_dev->dev.of_node,
|
||||
"ibm,device-tgt-addr",
|
||||
&len);
|
||||
if (!val64 || len < 8) {
|
||||
mods_error_printk(
|
||||
"Property ibm,device-tgt-addr for NPU not found\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
p->phys_addr = be64_to_cpu(*val64);
|
||||
|
||||
val64 = (const __u64 *)of_get_property(mem_node, "reg", &len);
|
||||
if (!val64 || len < 16) {
|
||||
mods_error_printk("Property reg for memory region not found\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
p->guest_addr = be64_to_cpu(val64[0]);
|
||||
p->aperture_size = be64_to_cpu(val64[1]);
|
||||
|
||||
ret = OK;
|
||||
|
||||
exit:
|
||||
if (mem_node)
|
||||
of_node_put(mem_node);
|
||||
LOG_EXT();
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user