[dpdk-dev] [PATCH v1 12/14] vhost: optimize Tx function of packed ring

Marvin Liu yong.liu at intel.com
Thu Sep 5 18:14:19 CEST 2019


Optimize vhost device tx function like rx function.

Signed-off-by: Marvin Liu <yong.liu at intel.com>

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 8032229a0..554617292 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -302,17 +302,6 @@ update_enqueue_shadow_packed(struct vhost_virtqueue *vq, uint16_t desc_idx,
 	vq->enqueue_shadow_count += count;
 }
 
-static __rte_always_inline void
-update_shadow_packed(struct vhost_virtqueue *vq,
-			 uint16_t desc_idx, uint32_t len, uint16_t count)
-{
-	uint16_t i = vq->shadow_used_idx++;
-
-	vq->shadow_used_packed[i].id  = desc_idx;
-	vq->shadow_used_packed[i].len = len;
-	vq->shadow_used_packed[i].count = count;
-}
-
 static __rte_always_inline void
 update_dequeue_shadow_packed(struct vhost_virtqueue *vq, uint16_t buf_id,
 	uint16_t count)
@@ -394,7 +383,7 @@ flush_enqueue_packed(struct virtio_net *dev,
 	}
 }
 
-static __rte_unused void
+static __rte_always_inline void
 flush_dequeue_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
 	if (!vq->shadow_used_idx)
@@ -1866,7 +1855,7 @@ vhost_dequeue_burst_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	return 0;
 }
 
-static __rte_unused int
+static __rte_always_inline int
 virtio_dev_tx_burst_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts)
 {
@@ -1957,7 +1946,7 @@ vhost_dequeue_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	return 0;
 }
 
-static __rte_unused int
+static __rte_always_inline int
 virtio_dev_tx_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts)
 {
@@ -1979,7 +1968,7 @@ virtio_dev_tx_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	return 0;
 }
 
-static __rte_unused int
+static __rte_always_inline int
 virtio_dev_tx_burst_packed_zmbuf(struct virtio_net *dev,
 					struct vhost_virtqueue *vq,
 					struct rte_mempool *mbuf_pool,
@@ -2047,7 +2036,7 @@ virtio_dev_tx_burst_packed_zmbuf(struct virtio_net *dev,
 	return 0;
 }
 
-static __rte_unused int
+static __rte_always_inline int
 virtio_dev_tx_single_packed_zmbuf(struct virtio_net *dev,
 	struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
 	struct rte_mbuf **pkts)
@@ -2082,119 +2071,7 @@ virtio_dev_tx_single_packed_zmbuf(struct virtio_net *dev,
 	return 0;
 }
 
-static __rte_noinline uint16_t
-virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
-	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
-{
-	uint16_t i;
-
-	if (unlikely(dev->dequeue_zero_copy)) {
-		struct zcopy_mbuf *zmbuf, *next;
-
-		for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
-		     zmbuf != NULL; zmbuf = next) {
-			next = TAILQ_NEXT(zmbuf, next);
-
-			if (mbuf_is_consumed(zmbuf->mbuf)) {
-				update_shadow_packed(vq, zmbuf->desc_idx, 0,
-						     zmbuf->desc_count);
-
-				TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
-				restore_mbuf(zmbuf->mbuf);
-				rte_pktmbuf_free(zmbuf->mbuf);
-				put_zmbuf(zmbuf);
-				vq->nr_zmbuf -= 1;
-			}
-		}
-
-		if (likely(vq->shadow_used_idx)) {
-			flush_dequeue_shadow_packed(dev, vq);
-			vhost_vring_call_packed(dev, vq);
-		}
-	}
-
-	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
-
-	count = RTE_MIN(count, MAX_PKT_BURST);
-	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
-			dev->vid, count);
-
-	for (i = 0; i < count; i++) {
-		struct buf_vector buf_vec[BUF_VECTOR_MAX];
-		uint16_t buf_id;
-		uint32_t dummy_len;
-		uint16_t desc_count, nr_vec = 0;
-		int err;
-
-		if (unlikely(fill_vec_buf_packed(dev, vq,
-						vq->last_avail_idx, &desc_count,
-						buf_vec, &nr_vec,
-						&buf_id, &dummy_len,
-						VHOST_ACCESS_RO) < 0))
-			break;
-
-		if (likely(dev->dequeue_zero_copy == 0))
-			update_shadow_packed(vq, buf_id, 0, desc_count);
-
-		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
-		if (unlikely(pkts[i] == NULL)) {
-			RTE_LOG(ERR, VHOST_DATA,
-				"Failed to allocate memory for mbuf.\n");
-			break;
-		}
-
-		err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
-				mbuf_pool);
-		if (unlikely(err)) {
-			rte_pktmbuf_free(pkts[i]);
-			break;
-		}
-
-		if (unlikely(dev->dequeue_zero_copy)) {
-			struct zcopy_mbuf *zmbuf;
-
-			zmbuf = get_zmbuf(vq);
-			if (!zmbuf) {
-				rte_pktmbuf_free(pkts[i]);
-				break;
-			}
-			zmbuf->mbuf = pkts[i];
-			zmbuf->desc_idx = buf_id;
-			zmbuf->desc_count = desc_count;
-
-			/*
-			 * Pin lock the mbuf; we will check later to see
-			 * whether the mbuf is freed (when we are the last
-			 * user) or not. If that's the case, we then could
-			 * update the used ring safely.
-			 */
-			rte_mbuf_refcnt_update(pkts[i], 1);
-
-			vq->nr_zmbuf += 1;
-			TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
-		}
-
-		vq->last_avail_idx += desc_count;
-		if (vq->last_avail_idx >= vq->size) {
-			vq->last_avail_idx -= vq->size;
-			vq->avail_wrap_counter ^= 1;
-		}
-	}
-
-	if (likely(dev->dequeue_zero_copy == 0)) {
-		do_data_copy_dequeue(vq);
-		if (unlikely(i < count))
-			vq->shadow_used_idx = i;
-		if (likely(vq->shadow_used_idx)) {
-			flush_dequeue_shadow_packed(dev, vq);
-			vhost_vring_call_packed(dev, vq);
-		}
-	}
-
-	return i;
-}
-
-static __rte_unused void
+static __rte_always_inline void
 free_zmbuf(struct vhost_virtqueue *vq)
 {
 	struct zcopy_mbuf *next = NULL;
@@ -2235,6 +2112,105 @@ free_zmbuf(struct vhost_virtqueue *vq)
 	}
 }
 
+static __rte_noinline uint16_t
+virtio_dev_tx_packed_zmbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count)
+{
+	uint32_t pkt_idx = 0;
+	uint32_t pkt_num;
+	uint32_t remained = count;
+	int ret;
+
+	free_zmbuf(vq);
+
+	do {
+		if (remained >= PACKED_DESCS_BURST) {
+			ret = virtio_dev_tx_burst_packed_zmbuf(dev, vq,
+							       mbuf_pool,
+							       &pkts[pkt_idx]);
+
+			if (!ret) {
+				pkt_num = PACKED_DESCS_BURST;
+				pkt_idx += pkt_num;
+				remained -= pkt_num;
+				continue;
+			}
+		}
+
+		if (virtio_dev_tx_single_packed_zmbuf(dev, vq, mbuf_pool,
+						      &pkts[pkt_idx]))
+			break;
+
+		pkt_num = 1;
+		pkt_idx += pkt_num;
+		remained -= pkt_num;
+	} while (remained);
+
+	if (pkt_idx)
+		vhost_vring_call_packed(dev, vq);
+
+	return pkt_idx;
+}
+
+static __rte_noinline uint16_t
+virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count)
+{
+	uint32_t pkt_idx = 0;
+	uint32_t pkt_num;
+	uint32_t remained = count;
+	uint16_t fetch_idx;
+	int ret;
+	struct vring_packed_desc *descs = vq->desc_packed;
+
+	do {
+		if ((vq->last_avail_idx & 0x7) == 0) {
+			fetch_idx = vq->last_avail_idx + 8;
+			rte_prefetch0((void *)(uintptr_t)&descs[fetch_idx]);
+		}
+
+		if (remained >= PACKED_DESCS_BURST) {
+			ret = virtio_dev_tx_burst_packed(dev, vq, mbuf_pool,
+							 &pkts[pkt_idx]);
+
+			if (!ret) {
+				pkt_num = PACKED_DESCS_BURST;
+				flush_dequeue_packed(dev, vq);
+				pkt_idx += pkt_num;
+				remained -= pkt_num;
+				continue;
+			}
+		}
+
+		/*
+		 * If remained descs can't bundled into one burst, just skip to
+		 * next round.
+		 */
+		if (((vq->last_avail_idx & PACKED_BURST_MASK) + remained) <
+			PACKED_DESCS_BURST)
+			break;
+
+		if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool,
+						&pkts[pkt_idx]))
+			break;
+
+		pkt_num = 1;
+		pkt_idx += pkt_num;
+		remained -= pkt_num;
+		flush_dequeue_packed(dev, vq);
+
+	} while (remained);
+
+	if (pkt_idx) {
+		if (vq->shadow_used_idx)
+			do_data_copy_dequeue(vq);
+
+		vhost_vring_call_packed(dev, vq);
+	}
+
+	return pkt_idx;
+}
+
 uint16_t
 rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
@@ -2308,9 +2284,14 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 		count -= 1;
 	}
 
-	if (vq_is_packed(dev))
-		count = virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count);
-	else
+	if (vq_is_packed(dev)) {
+		if (unlikely(dev->dequeue_zero_copy))
+			count = virtio_dev_tx_packed_zmbuf(dev, vq, mbuf_pool,
+							   pkts, count);
+		else
+			count = virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts,
+						     count);
+	} else
 		count = virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count);
 
 out:
-- 
2.17.1



More information about the dev mailing list