[dpdk-dev] [PATCH v2 3/7] virtio: rx/tx ring layout optimization

Huawei Xie huawei.xie at intel.com
Sun Oct 18 08:29:00 CEST 2015


In DPDK based switching enviroment, mostly vhost runs on a dedicated core
while virtio processing in guest VMs runs on different cores.
Take RX for example, with generic implementation, for each guest buffer,
a) virtio driver allocates a descriptor from free descriptor list
b) modify the entry of avail ring to point to allocated descriptor
c) after packet is received, free the descriptor

When vhost fetches the avail ring, it needs to fetch the modified L1 cache from
virtio core, which is a heavy cost in current CPU implementation.

This idea of this optimization is:
    allocate the fixed descriptor for each entry of avail ring.
and avail ring will always be the same during the run.
This removes L1 cache transfer from virtio core to vhost core for avail ring.
Besides, no descriptor free and allocation is needed.
This also makes vector procesing possible to further accelerate the processing.

This is the layout for the avail ring(take 256 ring entries for example), with
each entry pointing to the descriptor with the same index.
                    avail
                    idx
                    +
                    |
+----+----+---+-------------+------+
| 0  | 1  | 2 | ... |  254  | 255  |  avail ring
+-+--+-+--+-+-+---------+---+--+---+
  |    |    |       |   |      |
  |    |    |       |   |      |
  v    v    v       |   v      v
+-+--+-+--+-+-+---------+---+--+---+
| 0  | 1  | 2 | ... |  254  | 255  |  desc ring
+----+----+---+-------------+------+
                    |
                    |
+----+----+---+-------------+------+
| 0  | 1  | 2 |     |  254  | 255  |  used ring
+----+----+---+-------------+------+
                    |
                    +

This is the ring layout for TX.
As we need one virtio header for each xmit packet, we have 128 slots available.

                         ++
                         ||
                         ||
+-----+-----+-----+--------------+------+------+------+
|  0  |  1  | ... |  127 || 128  | 129  | ...  | 255  |   avail ring
+--+--+--+--+-----+---+------+---+--+---+------+--+---+
   |     |            |  ||  |      |             |
   v     v            v  ||  v      v             v
+--+--+--+--+-----+---+------+---+--+---+------+--+---+
| 127 | 128 | ... |  255 || 127  | 128  | ...  | 255  |   desc ring for virtio_net_hdr
+--+--+--+--+-----+---+------+---+--+---+------+--+---+
   |     |            |  ||  |      |             |
   v     v            v  ||  v      v             v
+--+--+--+--+-----+---+------+---+--+---+------+--+---+
|  0  |  1  | ... |  127 ||  0   |  1   | ...  | 127  |   desc ring for tx dat
+-----+-----+-----+--------------+------+------+------+
                         ||
                         ||
                         ++

Signed-off-by: Huawei Xie <huawei.xie at intel.com>
---
 drivers/net/virtio/virtio_rxtx.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 5c00e9d..7c82a6a 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -302,6 +302,12 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
 		nbufs = 0;
 		error = ENOSPC;
 
+		if (use_simple_rxtx)
+			for (i = 0; i < vq->vq_nentries; i++) {
+				vq->vq_ring.avail->ring[i] = i;
+				vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
+			}
+
 		memset(&vq->fake_mbuf, 0, sizeof(vq->fake_mbuf));
 		for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
 			vq->sw_ring[vq->vq_nentries + i] = &vq->fake_mbuf;
@@ -332,6 +338,24 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
 		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
 			vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
 	} else if (queue_type == VTNET_TQ) {
+		if (use_simple_rxtx) {
+			int mid_idx  = vq->vq_nentries >> 1;
+			for (i = 0; i < mid_idx; i++) {
+				vq->vq_ring.avail->ring[i] = i + mid_idx;
+				vq->vq_ring.desc[i + mid_idx].next = i;
+				vq->vq_ring.desc[i + mid_idx].addr =
+					vq->virtio_net_hdr_mem +
+						mid_idx * vq->hw->vtnet_hdr_size;
+				vq->vq_ring.desc[i + mid_idx].len =
+					vq->hw->vtnet_hdr_size;
+				vq->vq_ring.desc[i + mid_idx].flags =
+					VRING_DESC_F_NEXT;
+				vq->vq_ring.desc[i].flags = 0;
+			}
+			for (i = mid_idx; i < vq->vq_nentries; i++)
+				vq->vq_ring.avail->ring[i] = i;
+		}
+
 		VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
 			vq->vq_queue_index);
 		VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
-- 
1.8.1.4



More information about the dev mailing list