[dpdk-dev] [PATCH v7 2/2] vhost: Add VHOST PMD

Ferruh Yigit ferruh.yigit at intel.com
Thu Feb 4 12:17:35 CET 2016


On Thu, Feb 04, 2016 at 04:26:31PM +0900, Tetsuya Mukawa wrote:

Hi Tetsuya,

> The patch introduces a new PMD. This PMD is implemented as thin wrapper
> of librte_vhost. It means librte_vhost is also needed to compile the PMD.
> The vhost messages will be handled only when a port is started. So start
> a port first, then invoke QEMU.
> 
> The PMD has 2 parameters.
>  - iface:  The parameter is used to specify a path to connect to a
>            virtio-net device.
>  - queues: The parameter is used to specify the number of the queues
>            virtio-net device has.
>            (Default: 1)
> 
> Here is an example.
> $ ./testpmd -c f -n 4 --vdev 'eth_vhost0,iface=/tmp/sock0,queues=1' -- -i
> 
> To connect above testpmd, here is qemu command example.
> 
> $ qemu-system-x86_64 \
>         <snip>
>         -chardev socket,id=chr0,path=/tmp/sock0 \
>         -netdev vhost-user,id=net0,chardev=chr0,vhostforce,queues=1 \
>         -device virtio-net-pci,netdev=net0,mq=on
> 
> Signed-off-by: Tetsuya Mukawa <mukawa at igel.co.jp>

Please find some more comments, mostly minor nits,

please feel free to add my ack for next version of this patch:
Acked-by: Ferruh Yigit <ferruh.yigit at intel.com>

<...>
> diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
> new file mode 100644
> index 0000000..b2305c2
> --- /dev/null
> +++ b/drivers/net/vhost/rte_eth_vhost.c
<...>
> +
> +struct pmd_internal {
> +	TAILQ_ENTRY(pmd_internal) next;
> +	char *dev_name;
> +	char *iface_name;
> +	uint8_t port_id;

You can also get rid of port_id too, if you keep list of rte_eth_dev.
But this is not so important, keep as it is if you want to.

> +
> +	volatile uint16_t once;
> +};
> +

<...>
> +
> +static int
> +new_device(struct virtio_net *dev)
> +{
<...>
> +
> +	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
> +		vq = eth_dev->data->rx_queues[i];
> +		if (vq == NULL)

can vq be NULL? It is allocated in rx/tx_queue_setup() and there is already a NULL check there?

> +			continue;
> +		vq->device = dev;
> +		vq->internal = internal;
> +		rte_vhost_enable_guest_notification(dev, vq->virtqueue_id, 0);
> +	}
> +	for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
> +		vq = eth_dev->data->tx_queues[i];
> +		if (vq == NULL)
> +			continue;
> +		vq->device = dev;
> +		vq->internal = internal;
> +		rte_vhost_enable_guest_notification(dev, vq->virtqueue_id, 0);
> +	}
> +
> +	dev->flags |= VIRTIO_DEV_RUNNING;
> +	dev->priv = eth_dev;
> +	eth_dev->data->dev_link.link_status = 1;
> +
> +	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
> +		vq = eth_dev->data->rx_queues[i];
> +		if (vq == NULL)
> +			continue;
> +		rte_atomic32_set(&vq->allow_queuing, 1);
> +	}
> +	for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
> +		vq = eth_dev->data->tx_queues[i];
> +		if (vq == NULL)
> +			continue;
> +		rte_atomic32_set(&vq->allow_queuing, 1);
> +	}
> +
> +	RTE_LOG(INFO, PMD, "New connection established\n");
> +
> +	_rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC);
> +
> +	return 0;
> +}
> +

<...>
> +
> +static int
> +vring_state_changed(struct virtio_net *dev, uint16_t vring, int enable)
> +{
> +	struct rte_vhost_vring_state *state;
> +	struct pmd_internal *internal;
> +#ifdef RTE_LIBRTE_VHOST_NUMA
> +	int newnode, ret;
> +#endif
> +
> +	if (dev == NULL) {
> +		RTE_LOG(ERR, PMD, "Invalid argument\n");
> +		return -1;
> +	}
> +
> +	internal = find_internal_resource(dev->ifname);
> +	if (internal == NULL) {
> +		RTE_LOG(ERR, PMD, "Invalid interface name: %s\n", dev->ifname);
> +		return -1;
> +	}
> +
> +	state = vring_states[internal->port_id];
> +	if (!state) {
> +		RTE_LOG(ERR, PMD, "Unused port id: %d\n", internal->port_id);
> +		return -1;
> +	}
> +
> +#ifdef RTE_LIBRTE_VHOST_NUMA
> +	ret  = get_mempolicy(&newnode, NULL, 0, dev,
> +			MPOL_F_NODE | MPOL_F_ADDR);
> +	if (ret < 0) {
> +		RTE_LOG(ERR, PMD, "Unknow numa node\n");
> +		return -1;
> +	}
> +
> +	rte_eth_devices[internal->port_id].data->numa_node = newnode;

If you prefer to keep the list of device instead of list of internal data, can escape accessing global device array

> +#endif
> +	rte_spinlock_lock(&state->lock);
> +	state->cur[vring] = enable;
> +	state->max_vring = RTE_MAX(vring, state->max_vring);
> +	rte_spinlock_unlock(&state->lock);
> +
> +	RTE_LOG(INFO, PMD, "vring%u is %s\n",
> +			vring, enable ? "enabled" : "disabled");
> +
> +	_rte_eth_dev_callback_process(&rte_eth_devices[internal->port_id],
> +			RTE_ETH_EVENT_QUEUE_STATE_CHANGE);
> +
> +	return 0;
> +}
> +
<...>
> +
> +static void *
> +vhost_driver_session(void *param __rte_unused)
> +{
> +	static struct virtio_net_device_ops vhost_ops;
> +
> +	/* set vhost arguments */
> +	vhost_ops.new_device = new_device;
> +	vhost_ops.destroy_device = destroy_device;
> +	vhost_ops.vring_state_changed = vring_state_changed;
> +	if (rte_vhost_driver_callback_register(&vhost_ops) < 0)
> +		rte_panic("Can't register callbacks\n");
> +
> +	/* start event handling */
> +	rte_vhost_driver_session_start();
> +
> +	pthread_exit(0);

Do we need pthread_exit(), I think just a "return" does same thing for this context.

> +}
> +
> +static void
> +vhost_driver_session_start(void)
> +{
> +	int ret;
> +
> +	ret = pthread_create(&session_th,
> +			NULL, vhost_driver_session, NULL);
> +	if (ret)
> +		rte_panic("Can't create a thread\n");

rte_panic() terminates the process, since we are in a driver, it can be good to return some kind of error and application to decide to terminate or not
application can be using multiple PMDs, and may prefer to not terminate if one PMD is not working.

> +}
> +
<...>
> +
> +static int
> +eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
> +		   uint16_t nb_rx_desc __rte_unused,
> +		   unsigned int socket_id,
> +		   const struct rte_eth_rxconf *rx_conf __rte_unused,
> +		   struct rte_mempool *mb_pool)
> +{
> +	struct vhost_queue *vq;
> +
> +	vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue),
> +			RTE_CACHE_LINE_SIZE, socket_id);
> +	if (vq == NULL) {
> +		RTE_LOG(ERR, PMD, "Failed to allocate memory for rx queue\n");
> +		return -ENOMEM;
> +	}
> +
> +	vq->mb_pool = mb_pool;
> +	vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ;
> +	dev->data->rx_queues[rx_queue_id] = vq;
> +	dev->data->rx_queues[rx_queue_id] = vq;

duplicated line?

> +	return 0;
> +}
> +
> +static int
> +eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
> +		   uint16_t nb_tx_desc __rte_unused,
> +		   unsigned int socket_id,
> +		   const struct rte_eth_txconf *tx_conf __rte_unused)
> +{
> +	struct vhost_queue *vq;
> +
> +	vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue),
> +			RTE_CACHE_LINE_SIZE, socket_id);
> +	if (vq == NULL) {
> +		RTE_LOG(ERR, PMD, "Failed to allocate memory for tx queue\n");
> +		return -ENOMEM;
> +	}
> +
> +	vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ;
> +	dev->data->tx_queues[tx_queue_id] = vq;
> +	dev->data->tx_queues[tx_queue_id] = vq;

duplicated line?

> +	return 0;
> +}
> +
> +static void
> +eth_dev_info(struct rte_eth_dev *dev,
> +	     struct rte_eth_dev_info *dev_info)
> +{
> +	dev_info->driver_name = drivername;
> +	dev_info->max_mac_addrs = 1;
> +	dev_info->max_rx_pktlen = (uint32_t)-1;
> +	dev_info->max_rx_queues = (uint16_t)dev->data->nb_rx_queues;
> +	dev_info->max_tx_queues = (uint16_t)dev->data->nb_tx_queues;

no need to (uint16_t) cast here

> +	dev_info->min_rx_bufsize = 0;
> +}
> +
> +static void
> +eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
> +{
> +	unsigned i;
> +	unsigned long rx_total = 0, tx_total = 0, tx_missed_total = 0;
> +	unsigned long rx_total_bytes = 0, tx_total_bytes = 0;
> +	struct vhost_queue *vq;
> +
> +	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
> +	     i < dev->data->nb_rx_queues; i++) {

syntax: I think guideline suggest two tabs here, instead of tab and space mixture.

> +		if (dev->data->rx_queues[i] == NULL)
> +			continue;
> +		vq = dev->data->rx_queues[i];
> +		stats->q_ipackets[i] = vq->rx_pkts;
> +		rx_total += stats->q_ipackets[i];
> +
> +		stats->q_ibytes[i] = vq->rx_bytes;
> +		rx_total_bytes += stats->q_ibytes[i];
> +	}
> +
> +	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
> +	     i < dev->data->nb_tx_queues; i++) {
> +		if (dev->data->tx_queues[i] == NULL)

more queue NULL check here, I am not sure if these are necessary

> +			continue;
> +		vq = dev->data->tx_queues[i];
> +		stats->q_opackets[i] = vq->tx_pkts;
> +		tx_missed_total += vq->missed_pkts;
> +		tx_total += stats->q_opackets[i];
> +
> +		stats->q_obytes[i] = vq->tx_bytes;
> +		tx_total_bytes += stats->q_obytes[i];
> +	}
> +
> +	stats->ipackets = rx_total;
> +	stats->opackets = tx_total;
> +	stats->imissed = tx_missed_total;
> +	stats->ibytes = rx_total_bytes;
> +	stats->obytes = tx_total_bytes;
> +}
> +
<...>
> +
> +static inline int
> +open_queues(const char *key __rte_unused, const char *value, void *extra_args)
> +{
> +	uint16_t *q = extra_args;
> +
> +	if ((value == NULL) || (extra_args == NULL))

syntax: extra parenthesis can be removed

> +		return -EINVAL;
> +
> +	*q = (uint16_t)strtoul(value, NULL, 0);
> +	if ((*q == USHRT_MAX) && (errno == ERANGE))
same here
> +		return -1;
> +
> +	if (*q > RTE_MAX_QUEUES_PER_PORT)
> +		return -1;
> +
> +	return 0;
> +}
> +
<...>
> +
> +static int
> +rte_pmd_vhost_devuninit(const char *name)
> +{
> +	struct rte_eth_dev *eth_dev = NULL;
> +	struct pmd_internal *internal;
> +	unsigned int i;
> +
> +	RTE_LOG(INFO, PMD, "Un-Initializing pmd_vhost for %s\n", name);
> +
> +	/* find an ethdev entry */
> +	eth_dev = rte_eth_dev_allocated(name);
> +	if (eth_dev == NULL)
> +		return -ENODEV;
> +
> +	internal = eth_dev->data->dev_private;
> +	if (internal == NULL)
> +		return -ENODEV;
> +
> +	rte_free(vring_states[internal->port_id]);
> +	vring_states[internal->port_id] = NULL;
> +
> +	pthread_mutex_lock(&internal_list_lock);
> +	TAILQ_REMOVE(&internals_list, internal, next);
> +	pthread_mutex_unlock(&internal_list_lock);
> +
> +	eth_dev_stop(eth_dev);
> +
> +	if (internal->dev_name)

no need to NULL check for free()

> +		free(internal->dev_name);
> +	if (internal->iface_name)
> +		free(internal->iface_name);
> +
> +	for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
> +		rte_free(eth_dev->data->rx_queues[i]);
> +	for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
> +		rte_free(eth_dev->data->tx_queues[i]);
> +
> +	rte_free(eth_dev->data->mac_addrs);
> +	rte_free(eth_dev->data);
> +
> +	rte_free(internal);
> +
> +	rte_eth_dev_release_port(eth_dev);
> +
> +	return 0;
> +}
> +
<...>


More information about the dev mailing list