[dpdk-dev] [PATCH v2 4/7] vhost: translate iovas at vectors fill time
Maxime Coquelin
maxime.coquelin at redhat.com
Mon Jun 25 09:19:34 CEST 2018
On 06/25/2018 04:21 AM, Tiwei Bie wrote:
> On Sat, Jun 23, 2018 at 09:11:24AM +0200, Maxime Coquelin wrote:
>> This patch aims at simplifying the desc to mbuf and mbuf to desc
>> copy functions. It performs the iova to hva translations at
>> vectors fill time.
>>
>> Doing this, in case desc buffer isn't contiguous in hva space,
>> it gets split into multiple vectors.
>>
>> Signed-off-by: Maxime Coquelin <maxime.coquelin at redhat.com>
>> ---
>> lib/librte_vhost/vhost.h | 1 +
>> lib/librte_vhost/virtio_net.c | 340 ++++++++++++++++++------------------------
>> 2 files changed, 144 insertions(+), 197 deletions(-)
>>
>> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>> index 786a74f64..e3b2ed2ff 100644
>> --- a/lib/librte_vhost/vhost.h
>> +++ b/lib/librte_vhost/vhost.h
>> @@ -43,6 +43,7 @@
>> * from vring to do scatter RX.
>> */
>> struct buf_vector {
>> + uint64_t buf_iova;
>> uint64_t buf_addr;
>> uint32_t buf_len;
>> uint32_t desc_idx;
>> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
>> index 4816e8003..1ab1edd67 100644
>> --- a/lib/librte_vhost/virtio_net.c
>> +++ b/lib/librte_vhost/virtio_net.c
>> @@ -225,12 +225,12 @@ static __rte_always_inline int
>> fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>> uint32_t avail_idx, uint32_t *vec_idx,
>> struct buf_vector *buf_vec, uint16_t *desc_chain_head,
>> - uint16_t *desc_chain_len)
>> + uint16_t *desc_chain_len, uint8_t perm)
>> {
>> uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
>> uint32_t vec_id = *vec_idx;
>> uint32_t len = 0;
>> - uint64_t dlen;
>> + uint64_t dlen, desc_avail, desc_iova;
>> struct vring_desc *descs = vq->desc;
>> struct vring_desc *idesc = NULL;
>>
>> @@ -267,10 +267,31 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>> }
>>
>> len += descs[idx].len;
>> - buf_vec[vec_id].buf_addr = descs[idx].addr;
>> - buf_vec[vec_id].buf_len = descs[idx].len;
>> - buf_vec[vec_id].desc_idx = idx;
>> - vec_id++;
>> + desc_avail = descs[idx].len;
>> + desc_iova = descs[idx].addr;
>> +
>> + while (desc_avail) {
>
> We also need to check whether:
>
> vec_id >= BUF_VECTOR_MAX
Right.
>> + uint64_t desc_addr;
>> + uint64_t desc_chunck_len = desc_avail;
>> +
>> + desc_addr = vhost_iova_to_vva(dev, vq,
>> + desc_iova,
>> + &desc_chunck_len,
>> + perm);
>> + if (unlikely(!desc_addr)) {
>> + free_ind_table(idesc);
>> + return -1;
>> + }
>> +
>> + buf_vec[vec_id].buf_iova = desc_iova;
>> + buf_vec[vec_id].buf_addr = desc_addr;
>> + buf_vec[vec_id].buf_len = desc_chunck_len;
>> + buf_vec[vec_id].desc_idx = idx;
>> +
>> + desc_avail -= desc_chunck_len;
>> + desc_iova += desc_chunck_len;
>> + vec_id++;
>> + }
>>
>> if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
>> break;
>> @@ -293,7 +314,8 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>> static inline int
>> reserve_avail_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>> uint32_t size, struct buf_vector *buf_vec,
>> - uint16_t *num_buffers, uint16_t avail_head)
>> + uint16_t *num_buffers, uint16_t avail_head,
>> + uint16_t *nr_vec)
>> {
>> uint16_t cur_idx;
>> uint32_t vec_idx = 0;
>> @@ -315,7 +337,8 @@ reserve_avail_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>> return -1;
>>
>> if (unlikely(fill_vec_buf(dev, vq, cur_idx, &vec_idx, buf_vec,
>> - &head_idx, &len) < 0))
>> + &head_idx, &len,
>> + VHOST_ACCESS_RO) < 0))
>
> reserve_avail_buf() is called by virtio_dev_rx(),
> so the write perm is needed.
Right.
To avoid having to pass the perms, I wonder if it wouldn't be better to
rely on the descriptors' VRING_DESC_F_WRITE flag.
>> return -1;
>> len = RTE_MIN(len, size);
>> update_shadow_used_ring(vq, head_idx, len);
>> @@ -334,21 +357,22 @@ reserve_avail_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>> return -1;
>> }
>>
>> + *nr_vec = vec_idx;
>> +
>> return 0;
>> }
> [...]
>> @@ -455,18 +454,12 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
>> uint64_t len;
>> uint64_t remain = dev->vhost_hlen;
>> uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
>> - uint64_t guest_addr = hdr_phys_addr;
>> + uint64_t iova = buf_vec[0].buf_iova;
>> + uint16_t hdr_vec_idx = 0;
>>
>> while (remain) {
>> len = remain;
>> - dst = vhost_iova_to_vva(dev, vq,
>> - guest_addr, &len,
>> - VHOST_ACCESS_RW);
>> - if (unlikely(!dst || !len)) {
>> - error = -1;
>> - goto out;
>> - }
>> -
>> + dst = buf_vec[hdr_vec_idx].buf_addr;
>
> There is no need to have two ' ' after '='.
Agree.
>> rte_memcpy((void *)(uintptr_t)dst,
>> (void *)(uintptr_t)src,
>> len);
> [...]
>>
>> /*
>> @@ -1175,7 +1120,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
>> if (unlikely(fill_vec_buf(dev, vq,
>> vq->last_avail_idx + i,
>> &nr_vec, buf_vec,
>> - &head_idx, &dummy_len) < 0))
>> + &head_idx, &dummy_len,
>> + VHOST_ACCESS_RW) < 0))
>
> This is dequeue path, so _RO should be used.
Right.
>> break;
>>
>> if (likely(dev->dequeue_zero_copy == 0))
>> --
>> 2.14.4
>>
Thanks the review,
Maxime
More information about the dev
mailing list