[dpdk-dev] [PATCH v3 2/2] vhost: Add VHOST PMD

Tetsuya Mukawa mukawa at igel.co.jp
Mon Nov 9 07:27:21 CET 2015


Hi Liu,

Thank you so much for your reviewing.
I will fix them, then submit again in this week.

Thanks,
Tetsuya


On 2015/11/09 15:21, Yuanhan Liu wrote:
> Hi Tetsuya,
>
> Here I just got some minor nits after a very rough glimpse.
>
> On Mon, Nov 09, 2015 at 02:17:01PM +0900, Tetsuya Mukawa wrote:
> ...
>> +static uint16_t
>> +eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>> +{
>> +	struct vhost_queue *r = q;
>> +	uint16_t nb_rx = 0;
>> +
>> +	if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
>> +		return 0;
>> +
>> +	rte_atomic32_set(&r->while_queuing, 1);
>> +
>> +	if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
>> +		goto out;
>> +
>> +	/* Dequeue packets from guest TX queue */
>> +	nb_rx = (uint16_t)rte_vhost_dequeue_burst(r->device,
>> +			r->virtqueue_id, r->mb_pool, bufs, nb_bufs);
> Unnecessary cast, as rte_vhost_enqueue_burst is defined with uint16_t
> return type.
>
>> +
>> +	r->rx_pkts += nb_rx;
>> +
>> +out:
>> +	rte_atomic32_set(&r->while_queuing, 0);
>> +
>> +	return nb_rx;
>> +}
>> +
>> +static uint16_t
>> +eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>> +{
>> +	struct vhost_queue *r = q;
>> +	uint16_t i, nb_tx = 0;
>> +
>> +	if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
>> +		return 0;
>> +
>> +	rte_atomic32_set(&r->while_queuing, 1);
>> +
>> +	if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
>> +		goto out;
>> +
>> +	/* Enqueue packets to guest RX queue */
>> +	nb_tx = (uint16_t)rte_vhost_enqueue_burst(r->device,
>> +			r->virtqueue_id, bufs, nb_bufs);
> Ditto.
>
>> +
>> +	r->tx_pkts += nb_tx;
>> +	r->err_pkts += nb_bufs - nb_tx;
>> +
>> +	for (i = 0; likely(i < nb_tx); i++)
>> +		rte_pktmbuf_free(bufs[i]);
>> +
>> +out:
>> +	rte_atomic32_set(&r->while_queuing, 0);
>> +
>> +	return nb_tx;
>> +}
>> +
>> +static int
>> +eth_dev_configure(struct rte_eth_dev *dev __rte_unused) { return 0; }
> I personally would not prefer to saving few lines of code to sacrifice
> the readability.
>
>> +
>> +static int
>> +eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
>> +		   uint16_t nb_rx_desc __rte_unused,
>> +		   unsigned int socket_id,
>> +		   const struct rte_eth_rxconf *rx_conf __rte_unused,
>> +		   struct rte_mempool *mb_pool)
>> +{
>> +	struct pmd_internal *internal = dev->data->dev_private;
>> +	struct vhost_queue *vq;
>> +
>> +	if (internal->rx_vhost_queues[rx_queue_id] != NULL)
>> +		rte_free(internal->rx_vhost_queues[rx_queue_id]);
> Such NULL check is unnecessary; rte_free will handle it.
>
>> +
>> +	vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue),
>> +			RTE_CACHE_LINE_SIZE, socket_id);
>> +	if (vq == NULL) {
>> +		RTE_LOG(ERR, PMD, "Failed to allocate memory for rx queue\n");
>> +		return -ENOMEM;
>> +	}
>> +
>> +	vq->mb_pool = mb_pool;
>> +	vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ;
>> +	internal->rx_vhost_queues[rx_queue_id] = vq;
>> +	dev->data->rx_queues[rx_queue_id] = vq;
>> +	return 0;
>> +}
>> +
>> +static int
>> +eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
>> +		   uint16_t nb_tx_desc __rte_unused,
>> +		   unsigned int socket_id,
>> +		   const struct rte_eth_txconf *tx_conf __rte_unused)
>> +{
>> +	struct pmd_internal *internal = dev->data->dev_private;
>> +	struct vhost_queue *vq;
>> +
>> +	if (internal->tx_vhost_queues[tx_queue_id] != NULL)
>> +		rte_free(internal->tx_vhost_queues[tx_queue_id]);
> Ditto.
>
>> +
>> +	vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue),
>> +			RTE_CACHE_LINE_SIZE, socket_id);
>> +	if (vq == NULL) {
>> +		RTE_LOG(ERR, PMD, "Failed to allocate memory for tx queue\n");
>> +		return -ENOMEM;
>> +	}
>> +
>> +	vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ;
>> +	internal->tx_vhost_queues[tx_queue_id] = vq;
>> +	dev->data->tx_queues[tx_queue_id] = vq;
>> +	return 0;
>> +}
>> +
>> +
>> +static void
>> +eth_dev_info(struct rte_eth_dev *dev,
>> +	     struct rte_eth_dev_info *dev_info)
>> +{
>> +	struct pmd_internal *internal = dev->data->dev_private;
>> +
>> +	dev_info->driver_name = drivername;
>> +	dev_info->max_mac_addrs = 1;
>> +	dev_info->max_rx_pktlen = (uint32_t)-1;
>> +	dev_info->max_rx_queues = (uint16_t)internal->nb_rx_queues;
>> +	dev_info->max_tx_queues = (uint16_t)internal->nb_tx_queues;
>> +	dev_info->min_rx_bufsize = 0;
>> +}
>> +
>> +static void
>> +eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
>> +{
>> +	unsigned i;
>> +	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
>> +	const struct pmd_internal *internal = dev->data->dev_private;
>> +
>> +	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
>> +	     i < internal->nb_rx_queues; i++) {
>> +		if (internal->rx_vhost_queues[i] == NULL)
>> +			continue;
>> +		igb_stats->q_ipackets[i] = internal->rx_vhost_queues[i]->rx_pkts;
>> +		rx_total += igb_stats->q_ipackets[i];
>> +	}
>> +
>> +	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
>> +	     i < internal->nb_tx_queues; i++) {
>> +		if (internal->tx_vhost_queues[i] == NULL)
>> +			continue;
>> +		igb_stats->q_opackets[i] = internal->tx_vhost_queues[i]->tx_pkts;
>> +		igb_stats->q_errors[i] = internal->tx_vhost_queues[i]->err_pkts;
>> +		tx_total += igb_stats->q_opackets[i];
>> +		tx_err_total += igb_stats->q_errors[i];
>> +	}
>> +
>> +	igb_stats->ipackets = rx_total;
>> +	igb_stats->opackets = tx_total;
>> +	igb_stats->oerrors = tx_err_total;
>> +}
>> +
>> +static void
>> +eth_stats_reset(struct rte_eth_dev *dev)
>> +{
>> +	unsigned i;
>> +	struct pmd_internal *internal = dev->data->dev_private;
>> +
>> +	for (i = 0; i < internal->nb_rx_queues; i++) {
>> +		if (internal->rx_vhost_queues[i] == NULL)
>> +			continue;
>> +		internal->rx_vhost_queues[i]->rx_pkts = 0;
>> +	}
>> +	for (i = 0; i < internal->nb_tx_queues; i++) {
>> +		if (internal->tx_vhost_queues[i] == NULL)
>> +			continue;
>> +		internal->tx_vhost_queues[i]->tx_pkts = 0;
>> +		internal->tx_vhost_queues[i]->err_pkts = 0;
>> +	}
>> +}
>> +
>> +static void
>> +eth_queue_release(void *q __rte_unused) { ; }
>> +static int
>> +eth_link_update(struct rte_eth_dev *dev __rte_unused,
>> +		int wait_to_complete __rte_unused) { return 0; }
> Ditto.
>
>> +
>> +static const struct eth_dev_ops ops = {
>> +	.dev_start = eth_dev_start,
>> +	.dev_stop = eth_dev_stop,
>> +	.dev_configure = eth_dev_configure,
>> +	.dev_infos_get = eth_dev_info,
>> +	.rx_queue_setup = eth_rx_queue_setup,
>> +	.tx_queue_setup = eth_tx_queue_setup,
>> +	.rx_queue_release = eth_queue_release,
>> +	.tx_queue_release = eth_queue_release,
>> +	.link_update = eth_link_update,
>> +	.stats_get = eth_stats_get,
>> +	.stats_reset = eth_stats_reset,
>> +};
>> +
>> +static int
>> +eth_dev_vhost_create(const char *name, int index,
>> +		     char *iface_name,
>> +		     int16_t queues,
>> +		     const unsigned numa_node)
>> +{
>> +	struct rte_eth_dev_data *data = NULL;
>> +	struct pmd_internal *internal = NULL;
>> +	struct rte_eth_dev *eth_dev = NULL;
>> +	struct ether_addr *eth_addr = NULL;
>> +
>> +	RTE_LOG(INFO, PMD, "Creating VHOST-USER backend on numa socket %u\n",
>> +		numa_node);
>> +
>> +	/* now do all data allocation - for eth_dev structure, dummy pci driver
>> +	 * and internal (private) data
>> +	 */
>> +	data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
>> +	if (data == NULL)
>> +		goto error;
>> +
>> +	internal = rte_zmalloc_socket(name, sizeof(*internal), 0, numa_node);
>> +	if (internal == NULL)
>> +		goto error;
>> +
>> +	eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node);
>> +	if (eth_addr == NULL)
>> +		goto error;
>> +	*eth_addr = base_eth_addr;
>> +	eth_addr->addr_bytes[5] = index;
>> +
>> +	/* reserve an ethdev entry */
>> +	eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
>> +	if (eth_dev == NULL)
>> +		goto error;
>> +
>> +	/* now put it all together
>> +	 * - store queue data in internal,
>> +	 * - store numa_node info in ethdev data
>> +	 * - point eth_dev_data to internals
>> +	 * - and point eth_dev structure to new eth_dev_data structure
>> +	 */
>> +	internal->nb_rx_queues = queues;
>> +	internal->nb_tx_queues = queues;
>> +	internal->dev_name = strdup(name);
>> +	if (internal->dev_name == NULL)
>> +		goto error;
>> +	internal->iface_name = strdup(iface_name);
>> +	if (internal->iface_name == NULL)
>> +		goto error;
> If allocation failed here, you will find that internal->dev_name is not
> freed.
>
>> +
>> +	pthread_mutex_lock(&internal_list_lock);
>> +	TAILQ_INSERT_TAIL(&internals_list, internal, next);
>> +	pthread_mutex_unlock(&internal_list_lock);
>> +
>> +	data->dev_private = internal;
>> +	data->port_id = eth_dev->data->port_id;
>> +	memmove(data->name, eth_dev->data->name, sizeof(data->name));
>> +	data->nb_rx_queues = queues;
>> +	data->nb_tx_queues = queues;
>> +	data->dev_link = pmd_link;
>> +	data->mac_addrs = eth_addr;
>> +
>> +	/* We'll replace the 'data' originally allocated by eth_dev. So the
>> +	 * vhost PMD resources won't be shared between multi processes.
>> +	 */
>> +	eth_dev->data = data;
>> +	eth_dev->dev_ops = &ops;
>> +	eth_dev->driver = NULL;
>> +	eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE;
>> +	eth_dev->data->kdrv = RTE_KDRV_NONE;
>> +	eth_dev->data->drv_name = internal->dev_name;
>> +	eth_dev->data->numa_node = numa_node;
>> +
>> +	/* finally assign rx and tx ops */
>> +	eth_dev->rx_pkt_burst = eth_vhost_rx;
>> +	eth_dev->tx_pkt_burst = eth_vhost_tx;
>> +
>> +	return data->port_id;
>> +
>> +error:
>> +	rte_free(data);
>> +	rte_free(internal);
>> +	rte_free(eth_addr);
>> +
>> +	return -1;
>> +}
> ...
> ...
>> +
>> +	if ((internal) && (internal->dev_name))
>> +		free(internal->dev_name);
>> +	if ((internal) && (internal->iface_name))
>> +		free(internal->iface_name);
>> +
>> +	rte_free(eth_dev->data->mac_addrs);
>> +	rte_free(eth_dev->data);
>> +
>> +	for (i = 0; i < internal->nb_rx_queues; i++) {
>> +		if (internal->rx_vhost_queues[i] != NULL)
>> +			rte_free(internal->rx_vhost_queues[i]);
>> +	}
>> +	for (i = 0; i < internal->nb_tx_queues; i++) {
>> +		if (internal->tx_vhost_queues[i] != NULL)
>> +			rte_free(internal->tx_vhost_queues[i]);
> Ditto.
>
> (Hopefully I could have a detailed review later, say next week).
>
> 	--yliu



More information about the dev mailing list