[dpdk-dev] [PATCH v3 14/21] vhost: dequeue for packed queues

Jens Freimann jfreimann at redhat.com
Fri Apr 6 12:07:26 CEST 2018


On Fri, Apr 06, 2018 at 11:30:10AM +0200, Maxime Coquelin wrote:
>
>
>On 04/05/2018 12:10 PM, Jens Freimann wrote:
>>Implement code to dequeue and process descriptors from
>>the vring if VIRTIO_F_RING_PACKED is enabled.
>>
>>Check if descriptor was made available by driver by looking at
>>VIRTIO_F_DESC_AVAIL flag in descriptor. If so dequeue and set
>>the used flag VIRTIO_F_DESC_USED to the current value of the
>>used wrap counter.
>>
>>Used ring wrap counter needs to be toggled when last descriptor is
>>written out. This allows the host/guest to detect new descriptors even
>>after the ring has wrapped.
>>
>>Signed-off-by: Jens Freimann <jfreimann at redhat.com>
>>---
>>  lib/librte_vhost/vhost.c      |   1 +
>>  lib/librte_vhost/vhost.h      |   1 +
>>  lib/librte_vhost/virtio_net.c | 228 ++++++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 230 insertions(+)
>>
>>diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
>>index 1f17cdd75..eb5a98875 100644
>>--- a/lib/librte_vhost/vhost.c
>>+++ b/lib/librte_vhost/vhost.c
>>@@ -185,6 +185,7 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
>>  	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
>>  	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
>>+	vq->used_wrap_counter = 1;
>>  	vhost_user_iotlb_init(dev, vring_idx);
>>  	/* Backends are set to -1 indicating an inactive device. */
>>diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>>index 20d78f883..c8aa946fd 100644
>>--- a/lib/librte_vhost/vhost.h
>>+++ b/lib/librte_vhost/vhost.h
>>@@ -112,6 +112,7 @@ struct vhost_virtqueue {
>>  	struct batch_copy_elem	*batch_copy_elems;
>>  	uint16_t		batch_copy_nb_elems;
>>+	uint16_t 		used_wrap_counter;
>>  	rte_rwlock_t	iotlb_lock;
>>  	rte_rwlock_t	iotlb_pending_lock;
>>diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
>>index ed7198dbb..7eea1da04 100644
>>--- a/lib/librte_vhost/virtio_net.c
>>+++ b/lib/librte_vhost/virtio_net.c
>>@@ -19,6 +19,7 @@
>>  #include "iotlb.h"
>>  #include "vhost.h"
>>+#include "virtio-1.1.h"
>>  #define MAX_PKT_BURST 32
>>@@ -1118,6 +1119,233 @@ restore_mbuf(struct rte_mbuf *m)
>>  	}
>>  }
>>+static inline uint16_t
>>+dequeue_desc_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
>>+	     struct rte_mempool *mbuf_pool, struct rte_mbuf *m,
>>+	     struct vring_desc_packed *descs)
>>+{
>>+	struct vring_desc_packed *desc;
>>+	uint64_t desc_addr;
>>+	uint32_t desc_avail, desc_offset;
>>+	uint32_t mbuf_avail, mbuf_offset;
>>+	uint32_t cpy_len;
>>+	struct rte_mbuf *cur = m, *prev = m;
>>+	struct virtio_net_hdr *hdr = NULL;
>>+	uint16_t head_idx = vq->last_used_idx & (vq->size - 1);
>>+	int wrap_counter = vq->used_wrap_counter;
>>+	int rc = 0;
>>+
>>+	rte_spinlock_lock(&vq->access_lock);
>>+
>>+	if (unlikely(vq->enabled == 0))
>>+		goto out;
>
>It is unbalanced as it would unlock iotlb_rd_lock that isn't locked yet.

yes, will fix
>
>>+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
>>+		vhost_user_iotlb_rd_lock(vq);
>>+
>>+	desc = &descs[vq->last_used_idx & (vq->size - 1)];
>>+	if (unlikely((desc->len < dev->vhost_hlen)) ||
>>+			(desc->flags & VRING_DESC_F_INDIRECT)) {
>>+			RTE_LOG(ERR, VHOST_DATA,
>>+				"INDIRECT not supported yet\n");
>>+			rc = -1;
>>+			goto out;
>>+	}
>>+
>>+	desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
>>+				      sizeof(*desc), VHOST_ACCESS_RO);
>>+
>>+	if (unlikely(!desc_addr)) {
>>+		rc = -1;
>>+		goto out;
>>+	}
>>+
>>+	if (virtio_net_with_host_offload(dev)) {
>>+		hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
>>+		rte_prefetch0(hdr);
>>+	}
>>+
>>+	/*
>>+	 * A virtio driver normally uses at least 2 desc buffers
>>+	 * for Tx: the first for storing the header, and others
>>+	 * for storing the data.
>>+	 */
>>+	if (likely((desc->len == dev->vhost_hlen) &&
>>+		   (desc->flags & VRING_DESC_F_NEXT) != 0)) {
>>+		if ((++vq->last_used_idx & (vq->size - 1)) == 0)
>>+			toggle_wrap_counter(vq);
>>+
>>+		desc = &descs[vq->last_used_idx & (vq->size - 1)];
>>+
>>+		if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
>>+			RTE_LOG(ERR, VHOST_DATA,
>>+				"INDIRECT not supported yet\n");
>>+			rc = -1;
>>+			goto out;
>>+		}
>>+
>>+		desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
>>+					      sizeof(*desc), VHOST_ACCESS_RO);
>>+		if (unlikely(!desc_addr)) {
>>+			rc = -1;
>>+			goto out;
>>+		}
>>+
>>+		desc_offset = 0;
>>+		desc_avail  = desc->len;
>>+	} else {
>>+		desc_avail  = desc->len - dev->vhost_hlen;
>>+		desc_offset = dev->vhost_hlen;
>>+	}
>>+
>>+	rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
>>+
>>+	PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
>>+
>>+	mbuf_offset = 0;
>>+	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
>>+	while (1) {
>>+		uint64_t hpa;
>>+
>>+		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
>>+
>>+		/*
>>+		 * A desc buf might across two host physical pages that are
>>+		 * not continuous. In such case (gpa_to_hpa returns 0), data
>>+		 * will be copied even though zero copy is enabled.
>>+		 */
>>+		if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
>>+					desc->addr + desc_offset, cpy_len)))) {
>>+			cur->data_len = cpy_len;
>>+			cur->data_off = 0;
>>+			cur->buf_addr = (void *)(uintptr_t)desc_addr;
>>+			cur->buf_physaddr = hpa;
>>+
>>+			/*
>>+			 * In zero copy mode, one mbuf can only reference data
>>+			 * for one or partial of one desc buff.
>>+			 */
>>+			mbuf_avail = cpy_len;
>>+		} else {
>>+			rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
>>+							   mbuf_offset),
>>+				(void *)((uintptr_t)(desc_addr + desc_offset)),
>>+				cpy_len);
>>+		}
>>+
>>+		mbuf_avail  -= cpy_len;
>>+		mbuf_offset += cpy_len;
>>+		desc_avail  -= cpy_len;
>>+		desc_offset += cpy_len;
>>+
>>+		/* This desc reaches to its end, get the next one */
>>+		if (desc_avail == 0) {
>>+			if ((desc->flags & VRING_DESC_F_NEXT) == 0)
>>+				break;
>>+
>>+			if ((++vq->last_used_idx & (vq->size - 1)) == 0)
>>+				toggle_wrap_counter(vq);
>>+
>>+			desc = &descs[vq->last_used_idx & (vq->size - 1)];
>>+			if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
>>+				RTE_LOG(ERR, VHOST_DATA, "INDIRECT not supported yet");
>>+			}
>>+
>>+			desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
>>+						      sizeof(*desc), VHOST_ACCESS_RO);
>>+			if (unlikely(!desc_addr)) {
>>+				rc = -1;
>>+				goto out;
>Not sure, but, shouldn't you break here instead so rings gets updated if
>some of the descs have been processed?

It should break here and in other places in this loop as well. will
fix it.
>>+			}
>>+
>>+			rte_prefetch0((void *)(uintptr_t)desc_addr);
>>+
>>+			desc_offset = 0;
>>+			desc_avail  = desc->len;
>>+
>>+			PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
>>+		}
>>+
>>+		/*
>>+		 * This mbuf reaches to its end, get a new one
>>+		 * to hold more data.
>>+		 */
>>+		if (mbuf_avail == 0) {
>>+			cur = rte_pktmbuf_alloc(mbuf_pool);
>>+			if (unlikely(cur == NULL)) {
>>+				RTE_LOG(ERR, VHOST_DATA, "Failed to "
>>+					"allocate memory for mbuf.\n");
>>+				rc = -1;
>>+				goto out;
>>+			}
>>+
>>+			prev->next = cur;
>>+			prev->data_len = mbuf_offset;
>>+			m->nb_segs += 1;
>>+			m->pkt_len += mbuf_offset;
>>+			prev = cur;
>>+
>>+			mbuf_offset = 0;
>>+			mbuf_avail  = cur->buf_len - RTE_PKTMBUF_HEADROOM;
>>+		}
>>+	}
>>+
>>+	if (hdr)
>>+		vhost_dequeue_offload(hdr, m);
>>+
>>+	if ((++vq->last_used_idx & (vq->size - 1)) == 0)
>>+		toggle_wrap_counter(vq);
>>+
>>+	rte_smp_wmb();
>>+	_set_desc_used(&descs[head_idx], wrap_counter);
>>+
>>+	prev->data_len = mbuf_offset;
>>+	m->pkt_len    += mbuf_offset;
>>+
>>+out:
>>+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
>>+		vhost_user_iotlb_rd_unlock(vq);
>>+	rte_spinlock_unlock(&vq->access_lock);
>>+
>>+	return rc;
>>+}
>>+
>>+static inline uint16_t
>>+vhost_dequeue_burst_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
>>+			struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
>>+			uint16_t count)
>>+{
>>+	uint16_t i;
>>+	uint16_t idx;
>>+	struct vring_desc_packed *desc = vq->desc_packed;
>>+	int err;
>>+
>>+	count = RTE_MIN(MAX_PKT_BURST, count);
>>+	for (i = 0; i < count; i++) {
>>+		idx = vq->last_used_idx & (vq->size - 1);
>>+		if (!desc_is_avail(vq, &desc[idx]))
>>+			break;
>>+		rte_smp_rmb();
>>+
>>+		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
>>+		if (unlikely(pkts[i] == NULL)) {
>>+			RTE_LOG(ERR, VHOST_DATA,
>>+				"Failed to allocate memory for mbuf.\n");
>>+			break;
>>+		}
>>+
>>+		err = dequeue_desc_packed(dev, vq, mbuf_pool, pkts[i], desc);
>>+		if (unlikely(err)) {
>>+			rte_pktmbuf_free(pkts[i]);
>>+			break;
>>+		}
>>+	}
>>+
>>+	rte_spinlock_unlock(&vq->access_lock);
>I think you forgot to remove this line.

yes

Thanks!

regards,
Jens 


More information about the dev mailing list