[dpdk-dev] [PATCH V20 2/4] eal: add failure handler mechanism for hot plug

Ananyev, Konstantin konstantin.ananyev at intel.com
Fri Apr 20 13:14:49 CEST 2018



> This patch introduces a failure handler mechanism to handle device
> hot unplug event. When device be hot plug out, the device resource
> become invalid, if this resource is still be unexpected read/write,
> system will crash. This patch let eal help application to handle
> this fault, when sigbus error occur, check the failure address and
> accordingly remap the invalid memory for the corresponding device,
> that could guaranty the application not to be shut down when hot plug.
> 
> Signed-off-by: Jeff Guo <jia.guo at intel.com>
> ---
> v20->v19:
> refine the logic of remapping for multiple device.
> ---
>  doc/guides/rel_notes/release_18_05.rst  |   6 ++
>  lib/librte_eal/common/include/rte_dev.h |  11 +++
>  lib/librte_eal/linuxapp/eal/eal_dev.c   | 124 +++++++++++++++++++++++++++++++-
>  lib/librte_eal/rte_eal_version.map      |   1 +
>  4 files changed, 141 insertions(+), 1 deletion(-)
> 
> diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
> index a018ef5..a4ea9af 100644
> --- a/doc/guides/rel_notes/release_18_05.rst
> +++ b/doc/guides/rel_notes/release_18_05.rst
> @@ -70,6 +70,12 @@ New Features
> 
>    Linux uevent is supported as backend of this device event notification framework.
> 
> +* **Added hot plug failure handler.**
> +
> +  Added a failure handler machenism to handle hot unplug device.
> +
> +  * ``rte_dev_handle_hot_unplug`` for handle hot unplug device failure.
> +
> 
>  API Changes
>  -----------
> diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
> index 0955e9a..9933131 100644
> --- a/lib/librte_eal/common/include/rte_dev.h
> +++ b/lib/librte_eal/common/include/rte_dev.h
> @@ -360,4 +360,15 @@ rte_dev_event_monitor_start(void);
>  int __rte_experimental
>  rte_dev_event_monitor_stop(void);
> 
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * It can be used to handle the device signal bus error. when signal bus error
> + * occur, the handler would check the failure address to find the corresponding
> + * device and remap the memory resource of the device, that would guaranty
> + * the system not crash when the device be hot unplug.
> + */
> +void __rte_experimental
> +rte_dev_handle_hot_unplug(void);
>  #endif /* _RTE_DEV_H_ */
> diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
> index 9478a39..33e7026 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_dev.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
> @@ -4,6 +4,8 @@
> 
>  #include <string.h>
>  #include <unistd.h>
> +#include <fcntl.h>
> +#include <signal.h>
>  #include <sys/socket.h>
>  #include <linux/netlink.h>
> 
> @@ -13,12 +15,16 @@
>  #include <rte_malloc.h>
>  #include <rte_interrupts.h>
>  #include <rte_alarm.h>
> +#include <rte_bus.h>
> +#include <rte_eal.h>
> 
>  #include "eal_private.h"
> 
>  static struct rte_intr_handle intr_handle = {.fd = -1 };
>  static bool monitor_started;
> 
> +extern struct rte_bus_list rte_bus_list;
> +
>  #define EAL_UEV_MSG_LEN 4096
>  #define EAL_UEV_MSG_ELEM_LEN 128
> 
> @@ -33,6 +39,68 @@ enum eal_dev_event_subsystem {
>  };
> 
>  static int
> +dev_uev_failure_process(struct rte_device *dev, void *dev_addr)
> +{
> +	struct rte_bus *bus;
> +	int ret = 0;
> +
> +	if (!dev && !dev_addr) {
> +		return -EINVAL;
> +	} else if (dev) {
> +		bus = rte_bus_find_by_device_name(dev->name);
> +		if (bus->handle_hot_unplug) {
> +			/**
> +			 * call bus ops to handle hot unplug.
> +			 */
> +			ret = bus->handle_hot_unplug(dev, dev_addr);
> +			if (ret) {
> +				RTE_LOG(ERR, EAL,
> +					"It cannot handle hot unplug "
> +					"for device (%s) "
> +					"on the bus.\n ",
> +					dev->name);
> +			}
> +		}


You would retrun 0 if bus->handle_hot_unplug == NULL.
Is that intended?
Shouldn't be I think.

> +	} else {
> +		TAILQ_FOREACH(bus, &rte_bus_list, next) {
> +			if (bus->handle_hot_unplug) {
> +				/**
> +				 * call bus ops to handle hot unplug.
> +				 */
> +				ret = bus->handle_hot_unplug(dev, dev_addr);
> +				if (ret) {
> +					RTE_LOG(ERR, EAL,
> +						"It cannot handle hot unplug "
> +						"for the device "
> +						"on the bus.\n ");
> +				}

So how we would know what happened here:
That address doesn't belong to that bus or unplug_handler failed?
Should we separate search and unplug ops?
Another question - shouldn't we break out of loop if bus->handle_hot_unplug()
returns 0?
Otherwise you can return error value even when unplug handled worked correctly.


> +			}
> +		}
> +	}
> +	return ret;
> +}
> +
> +static void sigbus_handler(int signum __rte_unused, siginfo_t *info,
> +				void *ctx __rte_unused)
> +{
> +	int ret;
> +
> +	RTE_LOG(ERR, EAL, "SIGBUS error, fault address:%p\n", info->si_addr);
> +	ret = dev_uev_failure_process(NULL, info->si_addr);

As now you can try to mmap/munmap same address from two or more different threads
you probably need some synchronization here.
Something simple as spinlock seems to be enough here. 
We might have one per device or might be even a global one would be ok here.

> +	if (!ret)
> +		RTE_LOG(DEBUG, EAL,
> +			"SIGBUS error is because of hot unplug!\n");
> +}
> +
> +static int cmp_dev_name(const struct rte_device *dev,
> +	const void *_name)
> +{
> +	const char *name = _name;
> +
> +	return strcmp(dev->name, name);
> +}

Is it really worth a separate function?

> +
> +static int
>  dev_uev_socket_fd_create(void)
>  {
>  	struct sockaddr_nl addr;
> @@ -146,6 +214,9 @@ dev_uev_handler(__rte_unused void *param)
>  	struct rte_dev_event uevent;
>  	int ret;
>  	char buf[EAL_UEV_MSG_LEN];
> +	struct rte_bus *bus;
> +	struct rte_device *dev;
> +	const char *busname;
> 
>  	memset(&uevent, 0, sizeof(struct rte_dev_event));
>  	memset(buf, 0, EAL_UEV_MSG_LEN);
> @@ -170,8 +241,41 @@ dev_uev_handler(__rte_unused void *param)
>  	RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
>  		uevent.devname, uevent.type, uevent.subsystem);
> 
> -	if (uevent.devname)
> +	switch (uevent.subsystem) {
> +	case EAL_DEV_EVENT_SUBSYSTEM_PCI:
> +	case EAL_DEV_EVENT_SUBSYSTEM_UIO:
> +		busname = "pci";
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (uevent.devname) {
> +		if (uevent.type == RTE_DEV_EVENT_REMOVE) {
> +			bus = rte_bus_find_by_name(busname);
> +			if (bus == NULL) {
> +				RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
> +					uevent.devname);
> +				return;
> +			}
> +			dev = bus->find_device(NULL, cmp_dev_name,
> +					       uevent.devname);
> +			if (dev == NULL) {
> +				RTE_LOG(ERR, EAL,
> +					"Cannot find unplugged device (%s)\n",
> +					uevent.devname);
> +				return;
> +			}
> +			ret = dev_uev_failure_process(dev, NULL);
> +			if (ret) {
> +				RTE_LOG(ERR, EAL, "Driver cannot remap the "
> +					"device (%s)\n",
> +					dev->name);
> +				return;
> +			}
> +		}
>  		dev_callback_process(uevent.devname, uevent.type);
> +	}
>  }
> 
>  int __rte_experimental
> @@ -216,8 +320,26 @@ rte_dev_event_monitor_stop(void)
>  		return ret;
>  	}
> 
> +	/* recover sigbus. */
> +	sigaction(SIGBUS, NULL, NULL);
> +

Probably better to restore previous action.

>  	close(intr_handle.fd);
>  	intr_handle.fd = -1;
>  	monitor_started = false;
> +
>  	return 0;
>  }
> +
> +void __rte_experimental
> +rte_dev_handle_hot_unplug(void)
> +{
> +	struct sigaction act;
> +
> +	/* set sigbus handler for hotplug. */
> +	memset(&act, 0x00, sizeof(struct sigaction));
> +	act.sa_sigaction = sigbus_handler;
> +	sigemptyset(&act.sa_mask);
> +	sigaddset(&act.sa_mask, SIGBUS);
> +	act.sa_flags = SA_SIGINFO;
> +	sigaction(SIGBUS, &act, NULL);
> +}
> diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
> index d02d80b..39a0213 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -217,6 +217,7 @@ EXPERIMENTAL {
>  	rte_dev_event_callback_unregister;
>  	rte_dev_event_monitor_start;
>  	rte_dev_event_monitor_stop;
> +	rte_dev_handle_hot_unplug;
>  	rte_eal_cleanup;
>  	rte_eal_devargs_insert;
>  	rte_eal_devargs_parse;
> --
> 2.7.4



More information about the dev mailing list