[dpdk-dev] [PATCH V20 1/4] bus/pci: introduce device hot unplug handle

Guo, Jia jia.guo at intel.com
Thu May 3 05:05:29 CEST 2018



On 4/20/2018 6:32 PM, Ananyev, Konstantin wrote:
> Hi Jeff,
>
>> As of device hot unplug, we need some preparatory measures so that we will
>> not encounter memory fault after device be plug out of the system,
>> and also let we could recover the running data path but not been break.
>> This patch allows the buses to handle device hot unplug event.
>> The patch only enable the ops in pci bus, when handle device hot unplug
>> event, remap a dummy memory to avoid bus read/write error.
>> Other buses could accordingly implement this ops specific by themselves.
>>
>> Signed-off-by: Jeff Guo <jia.guo at intel.com>
>> ---
>> v20->19:
>> clean the code
>> ---
>>   drivers/bus/pci/pci_common.c            | 67 +++++++++++++++++++++++++++++++++
>>   drivers/bus/pci/pci_common_uio.c        | 32 ++++++++++++++++
>>   drivers/bus/pci/private.h               | 12 ++++++
>>   lib/librte_eal/common/include/rte_bus.h | 16 ++++++++
>>   4 files changed, 127 insertions(+)
>>
>> diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
>> index 2a00f36..709eaf3 100644
>> --- a/drivers/bus/pci/pci_common.c
>> +++ b/drivers/bus/pci/pci_common.c
>> @@ -474,6 +474,72 @@ pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
>>   }
>>
>>   static int
>> +pci_handle_hot_unplug(struct rte_device *dev, void *failure_addr)
>> +{
>> +	struct rte_pci_device *pdev = NULL;
>> +	int ret = 0, i, isfound = 0;
>> +
>> +	if (failure_addr != NULL) {
>> +		FOREACH_DEVICE_ON_PCIBUS(pdev) {
>> +			for (i = 0; i != sizeof(pdev->mem_resource) /
>> +				sizeof(pdev->mem_resource[0]); i++) {
> You can do i != RTE_DIM(pdev->mem_resource) here.
sure.
>> +				if ((uint64_t)failure_addr >=
>> +				    (uint64_t)pdev->mem_resource[i].addr &&
>> +				    (uint64_t)failure_addr <=
>> +				    (uint64_t)pdev->mem_resource[i].addr +
>> +				    pdev->mem_resource[i].len) {
>
> I think it should be failure_addr < addr + len
i think you are right.
>> +					RTE_LOG(ERR, EAL, "Failure address "
>> +						"%16.16"PRIx64" is belong to "
>> +						"resource of device %s!\n",
>> +						(uint64_t)failure_addr,
>> +						pdev->device.name);
>> +					isfound = 1;
>> +					break;
>> +				}
>> +			}
>> +			if (isfound)
>> +				break;
>
> Might be it is a good thing to put the code that searches for address into a separate function.
good idea.
>> +		}
>> +	} else if (dev != NULL) {
>> +		pdev = RTE_DEV_TO_PCI(dev);
>> +	} else {
>> +		return -EINVAL;
>> +	}
>> +
>> +	if (!pdev)
>> +		return -1;
>> +
>> +	/* remap resources for devices */
>> +	switch (pdev->kdrv) {
>> +	case RTE_KDRV_VFIO:
>> +#ifdef VFIO_PRESENT
>> +		/* TODO */
>> +#endif
> Should set ret =-1 as not implemented now.
ok.
>> +		break;
>> +	case RTE_KDRV_IGB_UIO:
>> +	case RTE_KDRV_UIO_GENERIC:
>> +		if (rte_eal_using_phys_addrs()) {
>> +			/* map resources for devices that use uio */
>> +			ret = pci_uio_remap_resource(pdev);
>> +		}
>> +		break;
>> +	case RTE_KDRV_NIC_UIO:
>> +		ret = pci_uio_remap_resource(pdev);
>> +		break;
>> +	default:
>> +		RTE_LOG(DEBUG, EAL,
>> +			"  Not managed by a supported kernel driver, skipped\n");
>> +		ret = -1;
>> +		break;
>> +	}
>> +
>> +	if (ret != 0)
>> +		RTE_LOG(ERR, EAL, "failed to handle hot unplug of %s",
>> +			pdev->name);
>> +	return ret;
>> +}
>> +
>> +static int
>>   pci_plug(struct rte_device *dev)
>>   {
>>   	return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev));
>> @@ -503,6 +569,7 @@ struct rte_pci_bus rte_pci_bus = {
>>   		.unplug = pci_unplug,
>>   		.parse = pci_parse,
>>   		.get_iommu_class = rte_pci_get_iommu_class,
>> +		.handle_hot_unplug = pci_handle_hot_unplug,
>>   	},
>>   	.device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
>>   	.driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
>> diff --git a/drivers/bus/pci/pci_common_uio.c b/drivers/bus/pci/pci_common_uio.c
>> index 54bc20b..ba2c458 100644
>> --- a/drivers/bus/pci/pci_common_uio.c
>> +++ b/drivers/bus/pci/pci_common_uio.c
>> @@ -146,6 +146,38 @@ pci_uio_unmap(struct mapped_pci_resource *uio_res)
>>   	}
>>   }
>>
>> +/* remap the PCI resource of a PCI device in anonymous virtual memory */
>> +int
>> +pci_uio_remap_resource(struct rte_pci_device *dev)
>> +{
>> +	int i;
>> +	void *map_address;
>> +
>> +	if (dev == NULL)
>> +		return -1;
>> +
>> +	/* Remap all BARs */
>> +	for (i = 0; i != PCI_MAX_RESOURCE; i++) {
>> +		/* skip empty BAR */
>> +		if (dev->mem_resource[i].phys_addr == 0)
>> +			continue;
>> +		pci_unmap_resource(dev->mem_resource[i].addr,
>> +				(size_t)dev->mem_resource[i].len);
>> +		map_address = pci_map_resource(
>> +				dev->mem_resource[i].addr, -1, 0,
>> +				(size_t)dev->mem_resource[i].len,
>> +				MAP_ANONYMOUS | MAP_FIXED);
> Instead of using mumap/mmap() can we use mremap() here?
> Might be a bit safer approach.
because of mremap not have the can not map an anonymous memory, so that 
is not fit for this case, and i check and found that  MAP_FIXED could 
overlap the part of the existing mapping, no need to use unmap at first 
before remap.
>> +		if (map_address == MAP_FAILED) {
>> +			RTE_LOG(ERR, EAL,
>> +				"Cannot remap resource for device %s\n",
>> +				dev->name);
>> +			return -1;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>   static struct mapped_pci_resource *
>>   pci_uio_find_resource(struct rte_pci_device *dev)
>>   {
>> diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h
>> index 88fa587..cc1668c 100644
>> --- a/drivers/bus/pci/private.h
>> +++ b/drivers/bus/pci/private.h
>> @@ -173,6 +173,18 @@ void pci_uio_free_resource(struct rte_pci_device *dev,
>>   		struct mapped_pci_resource *uio_res);
>>
>>   /**
>> + * remap the pci uio resource.
>> + *
>> + * @param dev
>> + *   Point to the struct rte pci device.
>> + * @return
>> + *   - On success, zero.
>> + *   - On failure, a negative value.
>> + */
>> +int
>> +pci_uio_remap_resource(struct rte_pci_device *dev);
>> +
>> +/**
>>    * Map device memory to uio resource
>>    *
>>    * This function is private to EAL.
>> diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
>> index 6fb0834..d2c5778 100644
>> --- a/lib/librte_eal/common/include/rte_bus.h
>> +++ b/lib/librte_eal/common/include/rte_bus.h
>> @@ -168,6 +168,20 @@ typedef int (*rte_bus_unplug_t)(struct rte_device *dev);
>>   typedef int (*rte_bus_parse_t)(const char *name, void *addr);
>>
>>   /**
>> + * Implementation specific hot unplug handler function which is responsible
>> + * for handle the failure when hot unplug the device, guaranty the system
>> + * would not crash in the case.
>> + * @param dev
>> + *	Pointer of the device structure.
>> + *
>> + * @return
>> + *	0 on success.
>> + *	!0 on error.
>> + */
>> +typedef int (*rte_bus_handle_hot_unplug_t)(struct rte_device *dev,
>> +						void *dev_addr);
>> +
>> +/**
>>    * Bus scan policies
>>    */
>>   enum rte_bus_scan_mode {
>> @@ -209,6 +223,8 @@ struct rte_bus {
>>   	rte_bus_plug_t plug;         /**< Probe single device for drivers */
>>   	rte_bus_unplug_t unplug;     /**< Remove single device from driver */
>>   	rte_bus_parse_t parse;       /**< Parse a device name */
>> +	rte_bus_handle_hot_unplug_t handle_hot_unplug; /**< handle hot unplug
>> +							device event */
>>   	struct rte_bus_conf conf;    /**< Bus configuration */
>>   	rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */
>>   };
>> --
>> 2.7.4



More information about the dev mailing list