[dpdk-dev] [PATCH v3 2/2] net/tap: add queues when attaching from secondary process

Wiles, Keith keith.wiles at intel.com
Thu Sep 27 15:04:27 CEST 2018



> On Sep 27, 2018, at 6:19 AM, Raslan Darawsheh <rasland at mellanox.com> wrote:
> 
> In the case the device is created by the primary process,
> the secondary must request some file descriptors to attach the queues.
> The file descriptors are shared via IPC Unix socket.
> 
> Thanks to the IPC synchronization, the secondary process
> is now able to do Rx/Tx on a TAP created by the primary process.
> 
> Signed-off-by: Raslan Darawsheh <rasland at mellanox.com>
> Signed-off-by: Thomas Monjalon <thomas at monjalon.net>
> 
> ---
> v2:
>   - translate file descriptors via IPC API
>   - add documentation
> v3:
>   - rabse the commit
>   - use private static array for fd's to be local for each process
> 
> ---
> ---
> doc/guides/nics/tap.rst                |  16 ++++
> doc/guides/rel_notes/release_18_11.rst |   4 +
> drivers/net/tap/Makefile               |   1 +
> drivers/net/tap/rte_eth_tap.c          | 133 +++++++++++++++++++++++++++++++++
> 4 files changed, 154 insertions(+)
> 
> diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
> index 2714868..d1f3e1c 100644
> --- a/doc/guides/nics/tap.rst
> +++ b/doc/guides/nics/tap.rst
> @@ -152,6 +152,22 @@ Distribute IPv4 TCP packets using RSS to a given MAC address over queues 0-3::
>    testpmd> flow create 0 priority 4 ingress pattern eth dst is 0a:0b:0c:0d:0e:0f \
>             / ipv4 / tcp / end actions rss queues 0 1 2 3 end / end
> 
> +Multi-process sharing
> +---------------------
> +
> +It is possible to attach an existing TAP device in a secondary process,
> +by declaring it as a vdev with the same name as in the primary process,
> +and without any parameter.
> +
> +The port attached in a secondary process will give access to the
> +statistics and the queues.
> +Therefore it can be used for monitoring or Rx/Tx processing.
> +
> +The IPC synchronization of Rx/Tx queues is currently limited:
> +
> +  - Only 8 queues
> +  - Synchronized on probing, but not on later port update
> +
> Example
> -------
> 
> diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst
> index 8c4bb54..a9dda5b 100644
> --- a/doc/guides/rel_notes/release_18_11.rst
> +++ b/doc/guides/rel_notes/release_18_11.rst
> @@ -67,6 +67,10 @@ New Features
>   SR-IOV option in Hyper-V and Azure. This is an alternative to the previous
>   vdev_netvsc, tap, and failsafe drivers combination.
> 
> +* **Added TAP Rx/Tx queues sharing with a secondary process.**
> +
> +  A secondary process can attach a TAP device created in the primary process,
> +  probe the queues, and process Rx/Tx in a secondary process.
> 
> API Changes
> -----------
> diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
> index 3243365..7748283 100644
> --- a/drivers/net/tap/Makefile
> +++ b/drivers/net/tap/Makefile
> @@ -22,6 +22,7 @@ CFLAGS += -O3
> CFLAGS += -I$(SRCDIR)
> CFLAGS += -I.
> CFLAGS += $(WERROR_FLAGS)
> +CFLAGS += -DALLOW_EXPERIMENTAL_API
> LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
> LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
> LDLIBS += -lrte_bus_vdev -lrte_gso
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index 8cc4552..8d276eb 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -16,6 +16,8 @@
> #include <rte_debug.h>
> #include <rte_ip.h>
> #include <rte_string_fns.h>
> +#include <rte_ethdev.h>
> +#include <rte_errno.h>
> 
> #include <assert.h>
> #include <sys/types.h>
> @@ -62,6 +64,9 @@
> #define TAP_GSO_MBUFS_NUM \
> 	(TAP_GSO_MBUFS_PER_CORE * TAP_GSO_MBUF_CACHE_SIZE)
> 
> +/* IPC key for queue fds sync */
> +#define TAP_MP_KEY "tap_mp_sync_queues"
> +
> static struct rte_vdev_driver pmd_tap_drv;
> static struct rte_vdev_driver pmd_tun_drv;
> static struct pmd_process_private *process_private;
> @@ -101,6 +106,17 @@ enum ioctl_mode {
> 	REMOTE_ONLY,
> };
> 
> +/* Message header to synchronize queues via IPC */
> +struct ipc_queues {
> +	char port_name[RTE_DEV_NAME_MAX_LEN];
> +	int rxq_count;
> +	int txq_count;
> +	/*
> +	 * The file descriptors are in the dedicated part
> +	 * of the Unix message to be translated by the kernel.
> +	 */
> +};
> +
> static int tap_intr_handle_set(struct rte_eth_dev *dev, int set);
> 
> /**
> @@ -1980,6 +1996,100 @@ rte_pmd_tun_probe(struct rte_vdev_device *dev)
> 	return ret;
> }
> 
> +/* Request queue file descriptors from secondary to primary. */
> +static int
> +tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
> +{
> +	int ret;
> +	struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
> +	struct rte_mp_msg request, *reply;
> +	struct rte_mp_reply replies;
> +	struct ipc_queues *request_param = (struct ipc_queues *)request.param;
> +	struct ipc_queues *reply_param;
> +	int queue, fd_iterator;
> +
> +	/* Prepare the request */
> +	strcpy(request.name, TAP_MP_KEY);
> +	strcpy(request_param->port_name, port_name);

Should we not be using the strlcpy() functions here.
> +	request.len_param = sizeof(*request_param);
> +	/* Send request and receive reply */
> +	ret = rte_mp_request_sync(&request, &replies, &timeout);
> +	if (ret < 0) {
> +		TAP_LOG(ERR, "Failed to request queues from primary: %d",
> +			rte_errno);
> +		return -1;
> +	}
> +	/* FIXME: handle replies.nb_received > 1 */
> +	reply = &replies.msgs[0];
> +	reply_param = (struct ipc_queues *)reply->param;
> +	TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name);
> +
> +	/* Attach the queues from received file descriptors */
> +
> +	dev->data->nb_rx_queues = reply_param->rxq_count;
> +	dev->data->nb_tx_queues = reply_param->txq_count;

Do we really need a rxq_count and txq_count as they are always the same it seems? Just a comment not a request to change it.
> +	fd_iterator = 0;
> +	for (queue = 0; queue < reply_param->rxq_count; queue++)
> +		process_private->rxq_fds[queue] = reply->fds[fd_iterator++];
> +	for (queue = 0; queue < reply_param->txq_count; queue++)
> +		process_private->txq_fds[queue] = reply->fds[fd_iterator++];
> +
> +

Extra blank line here, needs to be removed.
> +	return 0;
> +}
> +
> +/* Send the queue file descriptors from the primary process to secondary. */
> +static int
> +tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
> +{
> +	struct rte_eth_dev *dev;
> +	struct rte_mp_msg reply;
> +	const struct ipc_queues *request_param =
> +		(const struct ipc_queues *)request->param;
> +	struct ipc_queues *reply_param =
> +		(struct ipc_queues *)reply.param;
> +	uint16_t port_id;
> +	int queue;
> +	int ret;
> +
> +	/* Get requested port */
> +	TAP_LOG(DEBUG, "Received IPC request for %s", request_param->port_name);
> +	ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id);
> +	if (ret) {
> +		TAP_LOG(ERR, "Failed to get port id for %s",
> +			request_param->port_name);
> +		return -1;
> +	}
> +	dev = &rte_eth_devices[port_id];
> +
> +	/* Fill file descriptors for all queues */
> +	reply.num_fds = 0;
> +	reply_param->rxq_count = 0;
> +	for (queue = 0; queue < dev->data->nb_rx_queues; queue++) {
> +		reply.fds[reply.num_fds++] = process_private->rxq_fds[queue];
> +		reply_param->rxq_count++;
> +	}
> +	assert(reply_param->rxq_count == dev->data->nb_rx_queues);

Pick an assert method assert() or RTE_ASSERT() why have both? I would suggest using RTE_ASSERT everywhere.
> +	reply_param->txq_count = 0;
> +	for (queue = 0; queue < dev->data->nb_tx_queues; queue++) {
> +		reply.fds[reply.num_fds++] = process_private->txq_fds[queue];
> +		reply_param->txq_count++;
> +	}
> +	assert(reply_param->txq_count == dev->data->nb_tx_queues);
> +	/* FIXME: split message if more queues than RTE_MP_MAX_FD_NUM */
> +	RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);

I do not like FIXME or TODO statements in the code. For FIXME we need to fix it or remove the comment. For TODO we need to put it in a enhancement list and do it later.
> +
> +	/* Send reply */
> +	strcpy(reply.name, request->name);
> +	strcpy(reply_param->port_name, request_param->port_name);

strlcpy() here as well.
> +	reply.len_param = sizeof(*reply_param);
> +	if (rte_mp_reply(&reply, peer) < 0) {
> +		TAP_LOG(ERR, "Failed to reply an IPC request to sync queues");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> /* Open a TAP interface device.
>  */
> static int
> @@ -2009,6 +2119,21 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> 		/* TODO: request info from primary to set up Rx and Tx */
> 		eth_dev->dev_ops = &ops;
> 		eth_dev->device = &dev->device;
> +		eth_dev->rx_pkt_burst = pmd_rx_burst;
> +		eth_dev->tx_pkt_burst = pmd_tx_burst;
> +		if (!rte_eal_primary_proc_alive(NULL)) {
> +			TAP_LOG(ERR, "Primary process is missing");
> +			return -1;
> +		}
> +		process_private = (struct pmd_process_private *)
> +			rte_zmalloc_socket(name,
> +				sizeof(struct pmd_process_private),
> +					RTE_CACHE_LINE_SIZE,
> +					eth_dev->device->numa_node);
> +
> +		ret = tap_mp_attach_queues(name, eth_dev);
> +		if (ret != 0)
> +			return -1;
> 		rte_eth_dev_probing_finish(eth_dev);
> 		return 0;
> 	}
> @@ -2056,6 +2181,13 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> 	TAP_LOG(NOTICE, "Initializing pmd_tap for %s as %s",
> 		name, tap_name);
> 
> +	/* Register IPC feed callback */
> +	ret = rte_mp_action_register(TAP_MP_KEY, tap_mp_sync_queues);
> +	if (ret < 0 && rte_errno != EEXIST) {
> +		TAP_LOG(ERR, "%s: Failed to register IPC callback: %s",
> +			tuntap_name, strerror(rte_errno));
> +		goto leave;
> +	}
> 	ret = eth_dev_tap_create(dev, tap_name, remote_iface, &user_mac,
> 		ETH_TUNTAP_TYPE_TAP);
> 
> @@ -2063,6 +2195,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> 	if (ret == -1) {
> 		TAP_LOG(ERR, "Failed to create pmd for %s as %s",
> 			name, tap_name);
> +		rte_mp_action_unregister(TAP_MP_KEY);
> 		tap_unit--;		/* Restore the unit number */
> 	}
> 	rte_kvargs_free(kvlist);
> -- 
> 2.7.4
> 

Regards,
Keith



More information about the dev mailing list