[dpdk-dev] [PATCH v2 4/7] vhost: translate iovas at vectors fill time

Maxime Coquelin maxime.coquelin at redhat.com
Mon Jun 25 09:19:34 CEST 2018



On 06/25/2018 04:21 AM, Tiwei Bie wrote:
> On Sat, Jun 23, 2018 at 09:11:24AM +0200, Maxime Coquelin wrote:
>> This patch aims at simplifying the desc to mbuf and mbuf to desc
>> copy functions. It performs the iova to hva translations at
>> vectors fill time.
>>
>> Doing this, in case desc buffer isn't contiguous in hva space,
>> it gets split into multiple vectors.
>>
>> Signed-off-by: Maxime Coquelin <maxime.coquelin at redhat.com>
>> ---
>>   lib/librte_vhost/vhost.h      |   1 +
>>   lib/librte_vhost/virtio_net.c | 340 ++++++++++++++++++------------------------
>>   2 files changed, 144 insertions(+), 197 deletions(-)
>>
>> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>> index 786a74f64..e3b2ed2ff 100644
>> --- a/lib/librte_vhost/vhost.h
>> +++ b/lib/librte_vhost/vhost.h
>> @@ -43,6 +43,7 @@
>>    * from vring to do scatter RX.
>>    */
>>   struct buf_vector {
>> +	uint64_t buf_iova;
>>   	uint64_t buf_addr;
>>   	uint32_t buf_len;
>>   	uint32_t desc_idx;
>> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
>> index 4816e8003..1ab1edd67 100644
>> --- a/lib/librte_vhost/virtio_net.c
>> +++ b/lib/librte_vhost/virtio_net.c
>> @@ -225,12 +225,12 @@ static __rte_always_inline int
>>   fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>>   			 uint32_t avail_idx, uint32_t *vec_idx,
>>   			 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
>> -			 uint16_t *desc_chain_len)
>> +			 uint16_t *desc_chain_len, uint8_t perm)
>>   {
>>   	uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
>>   	uint32_t vec_id = *vec_idx;
>>   	uint32_t len    = 0;
>> -	uint64_t dlen;
>> +	uint64_t dlen, desc_avail, desc_iova;
>>   	struct vring_desc *descs = vq->desc;
>>   	struct vring_desc *idesc = NULL;
>>   
>> @@ -267,10 +267,31 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>>   		}
>>   
>>   		len += descs[idx].len;
>> -		buf_vec[vec_id].buf_addr = descs[idx].addr;
>> -		buf_vec[vec_id].buf_len  = descs[idx].len;
>> -		buf_vec[vec_id].desc_idx = idx;
>> -		vec_id++;
>> +		desc_avail = descs[idx].len;
>> +		desc_iova = descs[idx].addr;
>> +
>> +		while (desc_avail) {
> 
> We also need to check whether:
> 
> vec_id >= BUF_VECTOR_MAX

Right.

>> +			uint64_t desc_addr;
>> +			uint64_t desc_chunck_len = desc_avail;
>> +
>> +			desc_addr = vhost_iova_to_vva(dev, vq,
>> +					desc_iova,
>> +					&desc_chunck_len,
>> +					perm);
>> +			if (unlikely(!desc_addr)) {
>> +				free_ind_table(idesc);
>> +				return -1;
>> +			}
>> +
>> +			buf_vec[vec_id].buf_iova = desc_iova;
>> +			buf_vec[vec_id].buf_addr = desc_addr;
>> +			buf_vec[vec_id].buf_len  = desc_chunck_len;
>> +			buf_vec[vec_id].desc_idx = idx;
>> +
>> +			desc_avail -= desc_chunck_len;
>> +			desc_iova += desc_chunck_len;
>> +			vec_id++;
>> +		}
>>   
>>   		if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
>>   			break;
>> @@ -293,7 +314,8 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>>   static inline int
>>   reserve_avail_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>>   				uint32_t size, struct buf_vector *buf_vec,
>> -				uint16_t *num_buffers, uint16_t avail_head)
>> +				uint16_t *num_buffers, uint16_t avail_head,
>> +				uint16_t *nr_vec)
>>   {
>>   	uint16_t cur_idx;
>>   	uint32_t vec_idx = 0;
>> @@ -315,7 +337,8 @@ reserve_avail_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>>   			return -1;
>>   
>>   		if (unlikely(fill_vec_buf(dev, vq, cur_idx, &vec_idx, buf_vec,
>> -						&head_idx, &len) < 0))
>> +						&head_idx, &len,
>> +						VHOST_ACCESS_RO) < 0))
> 
> reserve_avail_buf() is called by virtio_dev_rx(),
> so the write perm is needed.
Right.

To avoid having to pass the perms, I wonder if it wouldn't be better to
rely on the descriptors' VRING_DESC_F_WRITE flag.

>>   			return -1;
>>   		len = RTE_MIN(len, size);
>>   		update_shadow_used_ring(vq, head_idx, len);
>> @@ -334,21 +357,22 @@ reserve_avail_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>>   			return -1;
>>   	}
>>   
>> +	*nr_vec = vec_idx;
>> +
>>   	return 0;
>>   }
> [...]
>> @@ -455,18 +454,12 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
>>   				uint64_t len;
>>   				uint64_t remain = dev->vhost_hlen;
>>   				uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
>> -				uint64_t guest_addr = hdr_phys_addr;
>> +				uint64_t iova = buf_vec[0].buf_iova;
>> +				uint16_t hdr_vec_idx = 0;
>>   
>>   				while (remain) {
>>   					len = remain;
>> -					dst = vhost_iova_to_vva(dev, vq,
>> -							guest_addr, &len,
>> -							VHOST_ACCESS_RW);
>> -					if (unlikely(!dst || !len)) {
>> -						error = -1;
>> -						goto out;
>> -					}
>> -
>> +					dst =  buf_vec[hdr_vec_idx].buf_addr;
> 
> There is no need to have two ' ' after '='.

Agree.

>>   					rte_memcpy((void *)(uintptr_t)dst,
>>   							(void *)(uintptr_t)src,
>>   							len);
> [...]
>>   
>>   		/*
>> @@ -1175,7 +1120,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
>>   		if (unlikely(fill_vec_buf(dev, vq,
>>   						vq->last_avail_idx + i,
>>   						&nr_vec, buf_vec,
>> -						&head_idx, &dummy_len) < 0))
>> +						&head_idx, &dummy_len,
>> +						VHOST_ACCESS_RW) < 0))
> 
> This is dequeue path, so _RO should be used.

Right.

>>   			break;
>>   
>>   		if (likely(dev->dequeue_zero_copy == 0))
>> -- 
>> 2.14.4
>>

Thanks the review,
Maxime


More information about the dev mailing list