[dpdk-dev] [PATCH V20 1/4] bus/pci: introduce device hot unplug handle

Ananyev, Konstantin konstantin.ananyev at intel.com
Fri Apr 20 12:32:01 CEST 2018


Hi Jeff,

> As of device hot unplug, we need some preparatory measures so that we will
> not encounter memory fault after device be plug out of the system,
> and also let we could recover the running data path but not been break.
> This patch allows the buses to handle device hot unplug event.
> The patch only enable the ops in pci bus, when handle device hot unplug
> event, remap a dummy memory to avoid bus read/write error.
> Other buses could accordingly implement this ops specific by themselves.
> 
> Signed-off-by: Jeff Guo <jia.guo at intel.com>
> ---
> v20->19:
> clean the code
> ---
>  drivers/bus/pci/pci_common.c            | 67 +++++++++++++++++++++++++++++++++
>  drivers/bus/pci/pci_common_uio.c        | 32 ++++++++++++++++
>  drivers/bus/pci/private.h               | 12 ++++++
>  lib/librte_eal/common/include/rte_bus.h | 16 ++++++++
>  4 files changed, 127 insertions(+)
> 
> diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
> index 2a00f36..709eaf3 100644
> --- a/drivers/bus/pci/pci_common.c
> +++ b/drivers/bus/pci/pci_common.c
> @@ -474,6 +474,72 @@ pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
>  }
> 
>  static int
> +pci_handle_hot_unplug(struct rte_device *dev, void *failure_addr)
> +{
> +	struct rte_pci_device *pdev = NULL;
> +	int ret = 0, i, isfound = 0;
> +
> +	if (failure_addr != NULL) {
> +		FOREACH_DEVICE_ON_PCIBUS(pdev) {
> +			for (i = 0; i != sizeof(pdev->mem_resource) /
> +				sizeof(pdev->mem_resource[0]); i++) {

You can do i != RTE_DIM(pdev->mem_resource) here.

> +				if ((uint64_t)failure_addr >=
> +				    (uint64_t)pdev->mem_resource[i].addr &&
> +				    (uint64_t)failure_addr <=
> +				    (uint64_t)pdev->mem_resource[i].addr +
> +				    pdev->mem_resource[i].len) {


I think it should be failure_addr < addr + len

> +					RTE_LOG(ERR, EAL, "Failure address "
> +						"%16.16"PRIx64" is belong to "
> +						"resource of device %s!\n",
> +						(uint64_t)failure_addr,
> +						pdev->device.name);
> +					isfound = 1;
> +					break;
> +				}
> +			}
> +			if (isfound)
> +				break;


Might be it is a good thing to put the code that searches for address into a separate function. 

> +		}
> +	} else if (dev != NULL) {
> +		pdev = RTE_DEV_TO_PCI(dev);
> +	} else {
> +		return -EINVAL;
> +	}
> +
> +	if (!pdev)
> +		return -1;
> +
> +	/* remap resources for devices */
> +	switch (pdev->kdrv) {
> +	case RTE_KDRV_VFIO:
> +#ifdef VFIO_PRESENT
> +		/* TODO */
> +#endif

Should set ret =-1 as not implemented now.

> +		break;
> +	case RTE_KDRV_IGB_UIO:
> +	case RTE_KDRV_UIO_GENERIC:
> +		if (rte_eal_using_phys_addrs()) {
> +			/* map resources for devices that use uio */
> +			ret = pci_uio_remap_resource(pdev);
> +		}
> +		break;
> +	case RTE_KDRV_NIC_UIO:
> +		ret = pci_uio_remap_resource(pdev);
> +		break;
> +	default:
> +		RTE_LOG(DEBUG, EAL,
> +			"  Not managed by a supported kernel driver, skipped\n");
> +		ret = -1;
> +		break;
> +	}
> +
> +	if (ret != 0)
> +		RTE_LOG(ERR, EAL, "failed to handle hot unplug of %s",
> +			pdev->name);
> +	return ret;
> +}
> +
> +static int
>  pci_plug(struct rte_device *dev)
>  {
>  	return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev));
> @@ -503,6 +569,7 @@ struct rte_pci_bus rte_pci_bus = {
>  		.unplug = pci_unplug,
>  		.parse = pci_parse,
>  		.get_iommu_class = rte_pci_get_iommu_class,
> +		.handle_hot_unplug = pci_handle_hot_unplug,
>  	},
>  	.device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
>  	.driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
> diff --git a/drivers/bus/pci/pci_common_uio.c b/drivers/bus/pci/pci_common_uio.c
> index 54bc20b..ba2c458 100644
> --- a/drivers/bus/pci/pci_common_uio.c
> +++ b/drivers/bus/pci/pci_common_uio.c
> @@ -146,6 +146,38 @@ pci_uio_unmap(struct mapped_pci_resource *uio_res)
>  	}
>  }
> 
> +/* remap the PCI resource of a PCI device in anonymous virtual memory */
> +int
> +pci_uio_remap_resource(struct rte_pci_device *dev)
> +{
> +	int i;
> +	void *map_address;
> +
> +	if (dev == NULL)
> +		return -1;
> +
> +	/* Remap all BARs */
> +	for (i = 0; i != PCI_MAX_RESOURCE; i++) {
> +		/* skip empty BAR */
> +		if (dev->mem_resource[i].phys_addr == 0)
> +			continue;
> +		pci_unmap_resource(dev->mem_resource[i].addr,
> +				(size_t)dev->mem_resource[i].len);
> +		map_address = pci_map_resource(
> +				dev->mem_resource[i].addr, -1, 0,
> +				(size_t)dev->mem_resource[i].len,
> +				MAP_ANONYMOUS | MAP_FIXED);

Instead of using mumap/mmap() can we use mremap() here?
Might be a bit safer approach.

> +		if (map_address == MAP_FAILED) {
> +			RTE_LOG(ERR, EAL,
> +				"Cannot remap resource for device %s\n",
> +				dev->name);
> +			return -1;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>  static struct mapped_pci_resource *
>  pci_uio_find_resource(struct rte_pci_device *dev)
>  {
> diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h
> index 88fa587..cc1668c 100644
> --- a/drivers/bus/pci/private.h
> +++ b/drivers/bus/pci/private.h
> @@ -173,6 +173,18 @@ void pci_uio_free_resource(struct rte_pci_device *dev,
>  		struct mapped_pci_resource *uio_res);
> 
>  /**
> + * remap the pci uio resource.
> + *
> + * @param dev
> + *   Point to the struct rte pci device.
> + * @return
> + *   - On success, zero.
> + *   - On failure, a negative value.
> + */
> +int
> +pci_uio_remap_resource(struct rte_pci_device *dev);
> +
> +/**
>   * Map device memory to uio resource
>   *
>   * This function is private to EAL.
> diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
> index 6fb0834..d2c5778 100644
> --- a/lib/librte_eal/common/include/rte_bus.h
> +++ b/lib/librte_eal/common/include/rte_bus.h
> @@ -168,6 +168,20 @@ typedef int (*rte_bus_unplug_t)(struct rte_device *dev);
>  typedef int (*rte_bus_parse_t)(const char *name, void *addr);
> 
>  /**
> + * Implementation specific hot unplug handler function which is responsible
> + * for handle the failure when hot unplug the device, guaranty the system
> + * would not crash in the case.
> + * @param dev
> + *	Pointer of the device structure.
> + *
> + * @return
> + *	0 on success.
> + *	!0 on error.
> + */
> +typedef int (*rte_bus_handle_hot_unplug_t)(struct rte_device *dev,
> +						void *dev_addr);
> +
> +/**
>   * Bus scan policies
>   */
>  enum rte_bus_scan_mode {
> @@ -209,6 +223,8 @@ struct rte_bus {
>  	rte_bus_plug_t plug;         /**< Probe single device for drivers */
>  	rte_bus_unplug_t unplug;     /**< Remove single device from driver */
>  	rte_bus_parse_t parse;       /**< Parse a device name */
> +	rte_bus_handle_hot_unplug_t handle_hot_unplug; /**< handle hot unplug
> +							device event */
>  	struct rte_bus_conf conf;    /**< Bus configuration */
>  	rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */
>  };
> --
> 2.7.4



More information about the dev mailing list