[dpdk-dev] [PATCH v2 4/4] vfio: enable vfio hotplug by req notifier handler

Jeff Guo jia.guo at intel.com
Tue Oct 2 07:42:57 CEST 2018


thanks , andrew.

On 10/1/2018 5:47 PM, Andrew Rybchenko wrote:
> On 9/30/18 5:16 PM, Jeff Guo wrote:
>> When device is be hot-unplugged, the vfio kernel module will sent req
>> notifier to request user space to release the allocated resources at
>> first. After that, vfio kernel module will detect the device disappear,
>> and then delete the device in kernel.
>>
>> This patch aim to add req notifier processing to enable hotplug for vfio.
>> By enable the req notifier monitoring and register the notifier callback,
>> when device be hot-unplugged, the hot-unplug handler will be called to
>> process hotplug for vfio.
>>
>> Signed-off-by: Jeff Guo<jia.guo at intel.com>
>> ---
>> v2->v1:
>> refine some code logic.
>> ---
>>   drivers/bus/pci/linux/pci_vfio.c | 95 ++++++++++++++++++++++++++++++++++++++++
>>   drivers/bus/pci/pci_common.c     | 10 +++++
>>   2 files changed, 105 insertions(+)
>>
>> diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
>> index 686386d..c780860 100644
>> --- a/drivers/bus/pci/linux/pci_vfio.c
>> +++ b/drivers/bus/pci/linux/pci_vfio.c
>> @@ -17,6 +17,8 @@
>>   #include <rte_eal_memconfig.h>
>>   #include <rte_malloc.h>
>>   #include <rte_vfio.h>
>> +#include <rte_eal.h>
>> +#include <rte_bus.h>
>>   
>>   #include "eal_filesystem.h"
>>   
>> @@ -277,6 +279,89 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
>>   	return -1;
>>   }
>>   
>> +static void
>> +pci_vfio_req_handler(void *param)
>> +{
>> +	struct rte_bus *bus;
>> +	int ret;
>> +	struct rte_device *device = (struct rte_device *)param;
>> +
>> +	bus = rte_bus_find_by_device(device);
>> +	if (bus == NULL) {
>> +		RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n",
>> +			device->name);
>> +		return;
>> +	}
>> +
>> +	/**
>
> Why is doxygen style comment used here?
>

my fault, thanks.


>> +	 * vfio kernel module request user space to release allocated
>> +	 * resources before device be deleted in kernel, so it can directly
>> +	 * call the vfio bus hot-unplug handler to process it.
>> +	 */
>> +	ret = bus->hot_unplug_handler(device);
>> +	if (ret)
>> +		RTE_LOG(ERR, EAL, "Can not handle hot-unplug for "
>> +			"device (%s)\n", device->name);
>
> Consider to avoid format string split to simplify search using grep.
>

ok, i think i could try more to make it not split.


>> +}
>> +
>> +/* enable notifier (only enable req now) */
>> +static int
>> +pci_vfio_enable_notifier(struct rte_pci_device *dev, int vfio_dev_fd)
>> +{
>> +	int ret;
>> +	int fd = -1;
>> +
>> +	/* set up an eventfd for req notifier */
>> +	fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>> +	if (fd < 0) {
>> +		RTE_LOG(ERR, EAL, "Cannot set up eventfd, "
>> +				"error %i (%s)\n", errno, strerror(errno));
>
> Consider to avoid format string split to simplify search using grep.
>

ok.


>> +		return -1;
>> +	}
>> +
>> +	dev->req_notifier_handler.fd = fd;
>> +	dev->req_notifier_handler.type = RTE_INTR_HANDLE_VFIO_REQ;
>> +	dev->req_notifier_handler.vfio_dev_fd = vfio_dev_fd;
>> +	ret = rte_intr_callback_register(&dev->req_notifier_handler,
>> +					 pci_vfio_req_handler,
>> +					 (void *)&dev->device);
>> +	if (ret) {
>> +		RTE_LOG(ERR, EAL, "Fail to register req notifier handler.\n");
>
> I think we should close(fd) here.
>

you are right here.


>> +		return -1;
>> +	}
>> +
>> +	ret = rte_intr_enable(&dev->req_notifier_handler);
>> +	if (ret) {
>> +		RTE_LOG(ERR, EAL, "Fail to enable req notifier.\n");
>
> I think we should unregister notifier and close(fd) here.
>

ok.


>> +		return -1;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +/*disable notifier (only disable req now) */
>
> Space is missing before disable.
>

thanks.


>> +static int
>> +pci_vfio_disable_notifier(struct rte_pci_device *dev)
>> +{
>> +	int ret;
>> +
>> +	ret = rte_intr_disable(&dev->req_notifier_handler);
>> +	if (ret) {
>> +		RTE_LOG(ERR, EAL, "fail to disable req notifier.\n");
>
> I'd like to understand correct way handle errors here. Should we
> terminate here or continue and unregister handler and close FD anyway?
>

I think terminate and directly return anyway should be ok, just show the 
disable failure is make sense.


>> +		return -1;
>> +	}
>> +
>> +	ret = rte_intr_callback_unregister(&dev->req_notifier_handler,
>> +					   pci_vfio_req_handler,
>> +					   (void *)&dev->device);
>> +	if (ret) {
>> +		RTE_LOG(ERR, EAL,
>> +			 "fail to unregister req notifier handler.\n");
>> +		return -1;
>> +	}
>
> Shoudn't we close eventfd?
>

It should be, i think.


>> +	return 0;
>> +}
>> +
>>   static int
>>   pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
>>   {
>> @@ -430,6 +515,7 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
>>   	struct pci_map *maps;
>>   
>>   	dev->intr_handle.fd = -1;
>> +	dev->req_notifier_handler.fd = -1;
>>   
>>   	/* store PCI address string */
>>   	snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
>> @@ -521,6 +607,11 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
>>   		goto err_vfio_res;
>>   	}
>>   
>> +	if (pci_vfio_enable_notifier(dev, vfio_dev_fd) != 0) {
>> +		RTE_LOG(ERR, EAL, "Error setting up notifier!\n");
>> +		return -1;
>
> I think we should do goto to make required cleanup.
>

you are definitely right.


>> +	}
>> +
>>   	TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next);
>>   
>>   	return 0;
>> @@ -546,6 +637,7 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
>>   	struct pci_map *maps;
>>   
>>   	dev->intr_handle.fd = -1;
>> +	dev->req_notifier_handler.fd = -1;
>>   
>>   	/* store PCI address string */
>>   	snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
>> @@ -586,6 +678,7 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
>>   
>>   	/* we need save vfio_dev_fd, so it can be used during release */
>>   	dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
>> +	dev->req_notifier_handler.vfio_dev_fd = vfio_dev_fd;
>>   
>>   	return 0;
>>   err_vfio_dev_fd:
>> @@ -658,6 +751,8 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev)
>>   	snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
>>   			loc->domain, loc->bus, loc->devid, loc->function);
>>   
>> +	pci_vfio_disable_notifier(dev);
>> +
>
> Is it OK to ignore disable failure here? Why? It would be good to see
> explanations in comments. Does it leak eventfd now?
>

It might be leak eventfd, but anyway it should not ignore the check here.


>>   	if (close(dev->intr_handle.fd) < 0) {
>>   		RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n",
>>   			pci_addr);
>> diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
>> index f313fe9..2a8e5e9 100644
>> --- a/drivers/bus/pci/pci_common.c
>> +++ b/drivers/bus/pci/pci_common.c
>> @@ -446,6 +446,16 @@ pci_hot_unplug_handler(struct rte_device *dev)
>>   		return -1;
>>   
>>   	switch (pdev->kdrv) {
>> +	case RTE_KDRV_VFIO:
>> +		/**
>
> Why is doxygen style comment is used here?
>

ok.


>> +		 * vfio kernel module guaranty the pci device would not be
>> +		 * deleted until the user space release the resource, so no
>> +		 * need to remap BARs resource here, just directly notify
>> +		 * the req event to the user space to handle it.
>> +		 */
>> +		rte_dev_event_callback_process(dev->name,
>> +					       RTE_DEV_EVENT_REMOVE);
>> +		break;
>>   	case RTE_KDRV_IGB_UIO:
>>   	case RTE_KDRV_UIO_GENERIC:
>>   	case RTE_KDRV_NIC_UIO:
>


More information about the dev mailing list