[dpdk-dev] [PATCH v1 02/14] vhost: add burst enqueue function for packed ring

Ilya Maximets i.maximets at samsung.com
Thu Sep 5 12:31:05 CEST 2019


On 05.09.2019 19:14, Marvin Liu wrote:
> Burst enqueue function will first check whether descriptors are cache
> aligned. It will also check prerequisites in the beginning. Burst
> enqueue function not support chained mbufs, single packet enqueue
> function will handle it.
> 
> Signed-off-by: Marvin Liu <yong.liu at intel.com>

Hi.

Can we rely on loop unrolling by compiler instead of repeating each
command 4 times?

For example:

    uint64_t len[PACKED_DESCS_BURST];

    for (i = 0; i < PACKED_DESCS_BURST; i++)
        len[i] = descs[avail_idx + i].len;


For 'if's:

    res = false;
    for (i = 0; i < PACKED_DESCS_BURST; i++)
        res |= pkts[i]->next != NULL;
    if (unlikely(res))
        return -1;

or just

    for (i = 0; i < PACKED_DESCS_BURST; i++)
        if (unlikely(pkts[i]->next != NULL))
            return -1;

Since PACKED_DESCS_BURST is a fairly small constant, loops should be
unrolled by compiler producing almost same code.

This will significantly reduce code size and will also allow to
play with PACKED_DESCS_BURST value without massive code changes.

Same is applicable to other patches in the series.

What do you think?

Best regards, Ilya Maximets.

> 
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 884befa85..ed8b4aabf 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -39,6 +39,8 @@
>  
>  #define VHOST_LOG_CACHE_NR 32
>  
> +#define PACKED_DESCS_BURST 4
> +#define PACKED_BURST_MASK (PACKED_DESCS_BURST - 1)
>  /**
>   * Structure contains buffer address, length and descriptor index
>   * from vring to do scatter RX.
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index 5ad0a8175..51ed20543 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -896,6 +896,106 @@ virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
>  	return pkt_idx;
>  }
>  
> +static __rte_unused uint16_t
> +virtio_dev_rx_burst_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
> +	 struct rte_mbuf **pkts)
> +{
> +	bool wrap_counter = vq->avail_wrap_counter;
> +	struct vring_packed_desc *descs = vq->desc_packed;
> +	uint16_t avail_idx = vq->last_avail_idx;
> +	uint64_t desc_addr, desc_addr1, desc_addr2, desc_addr3;
> +	uint64_t len, len1, len2, len3;
> +	struct virtio_net_hdr_mrg_rxbuf *hdr, *hdr1, *hdr2, *hdr3;
> +	uint32_t buf_offset = dev->vhost_hlen;
> +
> +	if (unlikely(avail_idx & PACKED_BURST_MASK))
> +		return -1;
> +
> +	if (unlikely((pkts[0]->next != NULL) |
> +		(pkts[1]->next != NULL) |
> +		(pkts[2]->next != NULL) |
> +		(pkts[3]->next != NULL)))
> +		return -1;
> +
> +	if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter)) |
> +		unlikely(!desc_is_avail(&descs[avail_idx + 1], wrap_counter)) |
> +		unlikely(!desc_is_avail(&descs[avail_idx + 2], wrap_counter)) |
> +		unlikely(!desc_is_avail(&descs[avail_idx + 3], wrap_counter)))
> +		return 1;
> +
> +	rte_smp_rmb();
> +
> +	len = descs[avail_idx].len;
> +	len1 = descs[avail_idx + 1].len;
> +	len2 = descs[avail_idx + 2].len;
> +	len3 = descs[avail_idx + 3].len;
> +
> +	if (unlikely((pkts[0]->pkt_len > (len - buf_offset)) |
> +		     (pkts[1]->pkt_len > (len1 - buf_offset)) |
> +		     (pkts[2]->pkt_len > (len2 - buf_offset)) |
> +		     (pkts[3]->pkt_len > (len3 - buf_offset))))
> +		return -1;
> +
> +	desc_addr = vhost_iova_to_vva(dev, vq, descs[avail_idx].addr, &len,
> +				      VHOST_ACCESS_RW);
> +
> +	desc_addr1 = vhost_iova_to_vva(dev, vq, descs[avail_idx + 1].addr,
> +				       &len1, VHOST_ACCESS_RW);
> +
> +	desc_addr2 = vhost_iova_to_vva(dev, vq, descs[avail_idx + 2].addr,
> +				       &len2, VHOST_ACCESS_RW);
> +
> +	desc_addr3 = vhost_iova_to_vva(dev, vq, descs[avail_idx + 3].addr,
> +				       &len3, VHOST_ACCESS_RW);
> +
> +	if (unlikely((len != descs[avail_idx].len) |
> +		(len1 != descs[avail_idx + 1].len) |
> +		(len2 != descs[avail_idx + 2].len) |
> +		(len3 != descs[avail_idx + 3].len)))
> +		return -1;
> +
> +	rte_prefetch0((void *)(uintptr_t)desc_addr);
> +	rte_prefetch0((void *)(uintptr_t)desc_addr1);
> +	rte_prefetch0((void *)(uintptr_t)desc_addr2);
> +	rte_prefetch0((void *)(uintptr_t)desc_addr3);
> +
> +	hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr;
> +	hdr1 = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr1;
> +	hdr2 = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr2;
> +	hdr3 = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr3;
> +
> +	virtio_enqueue_offload(pkts[0], &hdr->hdr);
> +	virtio_enqueue_offload(pkts[1], &hdr1->hdr);
> +	virtio_enqueue_offload(pkts[2], &hdr2->hdr);
> +	virtio_enqueue_offload(pkts[3], &hdr3->hdr);
> +
> +	len = pkts[0]->pkt_len + dev->vhost_hlen;
> +	len1 = pkts[1]->pkt_len + dev->vhost_hlen;
> +	len2 = pkts[2]->pkt_len + dev->vhost_hlen;
> +	len3 = pkts[3]->pkt_len + dev->vhost_hlen;
> +
> +	vq->last_avail_idx += PACKED_DESCS_BURST;
> +	if (vq->last_avail_idx >= vq->size) {
> +		vq->last_avail_idx -= vq->size;
> +		vq->avail_wrap_counter ^= 1;
> +	}
> +
> +	rte_memcpy((void *)(uintptr_t)(desc_addr + buf_offset),
> +		   rte_pktmbuf_mtod_offset(pkts[0], void *, 0),
> +		   pkts[0]->pkt_len);
> +	rte_memcpy((void *)(uintptr_t)(desc_addr1 + buf_offset),
> +		    rte_pktmbuf_mtod_offset(pkts[1], void *, 0),
> +		    pkts[1]->pkt_len);
> +	rte_memcpy((void *)(uintptr_t)(desc_addr2 + buf_offset),
> +		    rte_pktmbuf_mtod_offset(pkts[2], void *, 0),
> +		    pkts[2]->pkt_len);
> +	rte_memcpy((void *)(uintptr_t)(desc_addr3 + buf_offset),
> +		    rte_pktmbuf_mtod_offset(pkts[3], void *, 0),
> +		    pkts[3]->pkt_len);
> +
> +	return 0;
> +}
> +
>  static __rte_unused int16_t
>  virtio_dev_rx_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
>  	struct rte_mbuf *pkt)
> 


More information about the dev mailing list