[dpdk-dev] [PATCH v2 1/6] vhost: rewrite enqueue

Zhihong Wang zhihong.wang at intel.com
Thu Aug 18 08:33:06 CEST 2016


This patch implements the vhost logic from scratch into a single function
designed for high performance and better maintainability.

Signed-off-by: Zhihong Wang <zhihong.wang at intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 212 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 205 insertions(+), 7 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 08a73fd..8e6d782 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb)
 	return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
 }
 
-static void
+static inline void __attribute__((always_inline))
 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 {
 	if (m_buf->ol_flags & PKT_TX_L4_MASK) {
@@ -533,19 +533,217 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 	return pkt_idx;
 }
 
+static inline uint32_t __attribute__((always_inline))
+loop_check(struct vhost_virtqueue *vq, uint16_t avail_idx, uint32_t pkt_left)
+{
+	if (pkt_left == 0 || avail_idx == vq->last_used_idx)
+		return 1;
+
+	return 0;
+}
+
+static inline uint32_t __attribute__((always_inline))
+enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		uint16_t avail_idx, struct rte_mbuf *mbuf,
+		uint32_t is_mrg_rxbuf)
+{
+	struct virtio_net_hdr_mrg_rxbuf *virtio_hdr;
+	struct vring_desc *desc;
+	uint64_t desc_host_write_addr = 0;
+	uint32_t desc_chain_head = 0;
+	uint32_t desc_chain_len = 0;
+	uint32_t desc_current = 0;
+	uint32_t desc_write_offset = 0;
+	uint32_t mbuf_len = 0;
+	uint32_t mbuf_len_left = 0;
+	uint32_t copy_len = 0;
+	uint32_t extra_buffers = 0;
+	uint32_t used_idx_round = 0;
+
+	/* start with the first mbuf of the packet */
+	mbuf_len = rte_pktmbuf_data_len(mbuf);
+	mbuf_len_left = mbuf_len;
+
+	/* get the current desc */
+	desc_current = vq->avail->ring[(vq->last_used_idx) & (vq->size - 1)];
+	desc_chain_head = desc_current;
+	desc = &vq->desc[desc_current];
+	desc_host_write_addr = gpa_to_vva(dev, desc->addr);
+	if (unlikely(!desc_host_write_addr))
+		goto error;
+
+	/* handle virtio header */
+	virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *)
+		(uintptr_t)desc_host_write_addr;
+	memset((void *)(uintptr_t)&(virtio_hdr->hdr),
+			0, dev->vhost_hlen);
+	virtio_enqueue_offload(mbuf, &(virtio_hdr->hdr));
+	vhost_log_write(dev, desc->addr, dev->vhost_hlen);
+	desc_write_offset = dev->vhost_hlen;
+	desc_chain_len = desc_write_offset;
+	desc_host_write_addr += desc_write_offset;
+	if (is_mrg_rxbuf)
+		virtio_hdr->num_buffers = 1;
+
+	/* start copy from mbuf to desc */
+	while (1) {
+		/* get the next mbuf if the current done */
+		if (!mbuf_len_left) {
+			if (mbuf->next) {
+				mbuf = mbuf->next;
+				mbuf_len = rte_pktmbuf_data_len(mbuf);
+				mbuf_len_left = mbuf_len;
+			} else
+				break;
+		}
+
+		/* get the next desc if the current done */
+		if (desc->len <= desc_write_offset) {
+			if (desc->flags & VRING_DESC_F_NEXT) {
+				/* go on with the current desc chain */
+				desc_write_offset = 0;
+				desc_current = desc->next;
+				desc = &vq->desc[desc_current];
+				desc_host_write_addr =
+					gpa_to_vva(dev, desc->addr);
+				if (unlikely(!desc_host_write_addr))
+					goto rollback;
+			} else if (is_mrg_rxbuf) {
+				/* start with the next desc chain */
+				used_idx_round = vq->last_used_idx
+					& (vq->size - 1);
+				vq->used->ring[used_idx_round].id =
+					desc_chain_head;
+				vq->used->ring[used_idx_round].len =
+					desc_chain_len;
+				vhost_log_used_vring(dev, vq,
+					offsetof(struct vring_used,
+						ring[used_idx_round]),
+					sizeof(vq->used->ring[
+						used_idx_round]));
+				vq->last_used_idx++;
+				extra_buffers++;
+				virtio_hdr->num_buffers++;
+				if (avail_idx == vq->last_used_idx)
+					goto rollback;
+
+				desc_current =
+					vq->avail->ring[(vq->last_used_idx) &
+					(vq->size - 1)];
+				desc_chain_head = desc_current;
+				desc = &vq->desc[desc_current];
+				desc_host_write_addr =
+					gpa_to_vva(dev, desc->addr);
+				if (unlikely(!desc_host_write_addr))
+					goto rollback;
+
+				desc_chain_len = 0;
+				desc_write_offset = 0;
+			} else
+				goto rollback;
+		}
+
+		/* copy mbuf data */
+		copy_len = RTE_MIN(desc->len - desc_write_offset,
+				mbuf_len_left);
+		rte_memcpy((void *)(uintptr_t)desc_host_write_addr,
+				rte_pktmbuf_mtod_offset(mbuf, void *,
+					mbuf_len - mbuf_len_left),
+				copy_len);
+		vhost_log_write(dev, desc->addr + desc_write_offset,
+				copy_len);
+		mbuf_len_left -= copy_len;
+		desc_write_offset += copy_len;
+		desc_host_write_addr += copy_len;
+		desc_chain_len += copy_len;
+	}
+
+	used_idx_round = vq->last_used_idx & (vq->size - 1);
+	vq->used->ring[used_idx_round].id = desc_chain_head;
+	vq->used->ring[used_idx_round].len = desc_chain_len;
+	vhost_log_used_vring(dev, vq,
+		offsetof(struct vring_used, ring[used_idx_round]),
+		sizeof(vq->used->ring[used_idx_round]));
+	vq->last_used_idx++;
+
+	return 0;
+
+rollback:
+	/* rollback on any error if last_used_idx update on-the-fly */
+	if (is_mrg_rxbuf)
+		vq->last_used_idx -= extra_buffers;
+
+error:
+	return 1;
+}
+
+static inline void __attribute__((always_inline))
+notify_guest(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	rte_smp_wmb();
+	vq->used->idx = vq->last_used_idx;
+	vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
+			sizeof(vq->used->idx));
+	rte_mb();
+	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+			&& (vq->callfd >= 0))
+		eventfd_write(vq->callfd, (eventfd_t)1);
+}
+
 uint16_t
 rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
 	struct rte_mbuf **pkts, uint16_t count)
 {
-	struct virtio_net *dev = get_device(vid);
+	struct vhost_virtqueue *vq;
+	struct virtio_net *dev;
+	uint32_t pkt_idx = 0;
+	uint32_t pkt_left = 0;
+	uint32_t pkt_sent = 0;
+	uint32_t is_mrg_rxbuf = 0;
+	uint16_t avail_idx = 0;
+
+	/* precheck */
+	if (unlikely(count == 0))
+		return 0;
 
-	if (!dev)
+	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
+
+	dev = get_device(vid);
+	if (unlikely(!dev))
 		return 0;
 
-	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
-		return virtio_dev_merge_rx(dev, queue_id, pkts, count);
-	else
-		return virtio_dev_rx(dev, queue_id, pkts, count);
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb)))
+		return 0;
+
+	vq = dev->virtqueue[queue_id];
+	if (unlikely(!vq->enabled))
+		return 0;
+
+	if (dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))
+		is_mrg_rxbuf = 1;
+
+	/* start enqueuing packets 1 by 1 */
+	pkt_idx = 0;
+	pkt_left = count;
+	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
+	while (1) {
+		if (loop_check(vq, avail_idx, pkt_left))
+			break;
+
+		if (enqueue_packet(dev, vq, avail_idx, pkts[pkt_idx],
+					is_mrg_rxbuf))
+			break;
+
+		pkt_idx++;
+		pkt_sent++;
+		pkt_left--;
+	}
+
+	/* update used idx and kick the guest if necessary */
+	if (pkt_sent)
+		notify_guest(dev, vq);
+
+	return pkt_sent;
 }
 
 static void
-- 
2.7.4



More information about the dev mailing list