[dpdk-dev] [RFC] net/virtio: use real barriers for vDPA
Ilya Maximets
i.maximets at samsung.com
Fri Dec 14 15:03:43 CET 2018
On 14.12.2018 15:58, Michael S. Tsirkin wrote:
> On Fri, Dec 14, 2018 at 03:37:04PM +0300, Ilya Maximets wrote:
>> SMP barriers are OK for software vhost implementation, but vDPA is a
>> DMA capable hardware and we have to use at least DMA barriers to
>> ensure the memory ordering.
>>
>> This patch mimics the kernel virtio-net driver in part of choosing
>> barriers we need.
>>
>> Fixes: a3f8150eac6d ("net/ifcvf: add ifcvf vDPA driver")
>> Cc: stable at dpdk.org
>>
>> Signed-off-by: Ilya Maximets <i.maximets at samsung.com>
>
> Instead of this vendor specific hack, can you please
> use VIRTIO_F_ORDER_PLATFORM?
Hmm. Yes. You're probably right.
But VIRTIO_F_ORDER_PLATFORM a.k.a. VIRTIO_F_IO_BARRIER is not implemented
anywhere at the time. This will require changes to the whole stack (vhost,
qemu, virtio drivers) to allow the negotiation. Moreover, I guess that it's
not offered/correctly treated by the current vDPA HW i.e. it allows to work
with weak barriers (It allowed to do that by the spec, but I don't think that
it's right). This vendor specific hack could allow to work right now.
Actually, I'm not sure if vDPA is usable with current QEMU ?
Does anybody know ? If it's usable, than having a hack could be acceptable
for now. If not, of course, we could drop this and implement virtio native
solution.
But yes, I agree that virtio native solution should be done anyway regardless
of having or not having hacks like this.
Maybe I can split the patch in two:
1. Add ability to use real barriers. ('weak_barriers' always 'true')
2. Hack to initialize 'weak_barriers' according to subsystem_device_id.
The first patch will be needed anyway. The second one could be dropped or
applied and replaced in the future by proper solution with VIRTIO_F_ORDER_PLATFORM.
Thoughts ?
One more thing I have concerns about is that vDPA offers support for earlier
versions of virtio protocol, which looks strange.
>
>
>> ---
>>
>> Sending as RFC, because the patch is completely not tested. I heve no
>> HW to test the real behaviour. And I actually do not know if the
>> subsystem_vendor/device_id's are available at the time and has
>> IFCVF_SUBSYS_* values inside the real guest system.
>>
>> One more thing I want to mention that cross net client Live Migration
>> is actually not possible without any support from the guest driver.
>> Because migration from the software vhost to vDPA will lead to using
>> weaker barriers than required.
>>
>> The similar change (weak_barriers = false) should be made in the linux
>> kernel virtio-net driver.
>
> IMHO no, it should use VIRTIO_F_ORDER_PLATFORM.
Sure. I'm not a fan of hacks too, especially inside kernel.
>
>> Another dirty solution could be to restrict the vDPA support to x86
>> systems only.
>>
>> drivers/net/virtio/virtio_ethdev.c | 9 +++++++
>> drivers/net/virtio/virtio_pci.h | 1 +
>> drivers/net/virtio/virtio_rxtx.c | 14 +++++-----
>> drivers/net/virtio/virtio_user_ethdev.c | 1 +
>> drivers/net/virtio/virtqueue.h | 35 +++++++++++++++++++++----
>> 5 files changed, 48 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
>> index cb2b2e0bf..249b536fd 100644
>> --- a/drivers/net/virtio/virtio_ethdev.c
>> +++ b/drivers/net/virtio/virtio_ethdev.c
>> @@ -1448,6 +1448,9 @@ virtio_configure_intr(struct rte_eth_dev *dev)
>> return 0;
>> }
>>
>> +#define IFCVF_SUBSYS_VENDOR_ID 0x8086
>> +#define IFCVF_SUBSYS_DEVICE_ID 0x001A
>> +
>> /* reset device and renegotiate features if needed */
>> static int
>> virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
>> @@ -1477,6 +1480,12 @@ virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
>> if (!hw->virtio_user_dev) {
>> pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
>> rte_eth_copy_pci_info(eth_dev, pci_dev);
>> +
>> + if (pci_dev->id.subsystem_vendor_id == IFCVF_SUBSYS_VENDOR_ID &&
>> + pci_dev->id.subsystem_device_id == IFCVF_SUBSYS_DEVICE_ID)
>> + hw->weak_barriers = 0;
>> + else
>> + hw->weak_barriers = 1;
>> }
>>
>> /* If host does not support both status and MSI-X then disable LSC */
>> diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
>> index e961a58ca..1f8e719a9 100644
>> --- a/drivers/net/virtio/virtio_pci.h
>> +++ b/drivers/net/virtio/virtio_pci.h
>> @@ -240,6 +240,7 @@ struct virtio_hw {
>> uint8_t use_simple_rx;
>> uint8_t use_inorder_rx;
>> uint8_t use_inorder_tx;
>> + uint8_t weak_barriers;
>> bool has_tx_offload;
>> bool has_rx_offload;
>> uint16_t port_id;
>> diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
>> index cb8f89f18..66195bf47 100644
>> --- a/drivers/net/virtio/virtio_rxtx.c
>> +++ b/drivers/net/virtio/virtio_rxtx.c
>> @@ -906,7 +906,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
>>
>> nb_used = VIRTQUEUE_NUSED(vq);
>>
>> - virtio_rmb();
>> + virtio_rmb(hw->weak_barriers);
>>
>> num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
>> if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
>> @@ -1017,7 +1017,7 @@ virtio_recv_mergeable_pkts_inorder(void *rx_queue,
>> nb_used = RTE_MIN(nb_used, nb_pkts);
>> nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
>>
>> - virtio_rmb();
>> + virtio_rmb(hw->weak_barriers);
>>
>> PMD_RX_LOG(DEBUG, "used:%d", nb_used);
>>
>> @@ -1202,7 +1202,7 @@ virtio_recv_mergeable_pkts(void *rx_queue,
>>
>> nb_used = VIRTQUEUE_NUSED(vq);
>>
>> - virtio_rmb();
>> + virtio_rmb(hw->weak_barriers);
>>
>> PMD_RX_LOG(DEBUG, "used:%d", nb_used);
>>
>> @@ -1365,7 +1365,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
>> PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
>> nb_used = VIRTQUEUE_NUSED(vq);
>>
>> - virtio_rmb();
>> + virtio_rmb(hw->weak_barriers);
>> if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
>> virtio_xmit_cleanup(vq, nb_used);
>>
>> @@ -1407,7 +1407,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
>> /* Positive value indicates it need free vring descriptors */
>> if (unlikely(need > 0)) {
>> nb_used = VIRTQUEUE_NUSED(vq);
>> - virtio_rmb();
>> + virtio_rmb(hw->weak_barriers);
>> need = RTE_MIN(need, (int)nb_used);
>>
>> virtio_xmit_cleanup(vq, need);
>> @@ -1463,7 +1463,7 @@ virtio_xmit_pkts_inorder(void *tx_queue,
>> PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
>> nb_used = VIRTQUEUE_NUSED(vq);
>>
>> - virtio_rmb();
>> + virtio_rmb(hw->weak_barriers);
>> if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
>> virtio_xmit_cleanup_inorder(vq, nb_used);
>>
>> @@ -1511,7 +1511,7 @@ virtio_xmit_pkts_inorder(void *tx_queue,
>> need = slots - vq->vq_free_cnt;
>> if (unlikely(need > 0)) {
>> nb_used = VIRTQUEUE_NUSED(vq);
>> - virtio_rmb();
>> + virtio_rmb(hw->weak_barriers);
>> need = RTE_MIN(need, (int)nb_used);
>>
>> virtio_xmit_cleanup_inorder(vq, need);
>> diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
>> index f8791391a..f075774b4 100644
>> --- a/drivers/net/virtio/virtio_user_ethdev.c
>> +++ b/drivers/net/virtio/virtio_user_ethdev.c
>> @@ -434,6 +434,7 @@ virtio_user_eth_dev_alloc(struct rte_vdev_device *vdev)
>> hw->use_simple_rx = 0;
>> hw->use_inorder_rx = 0;
>> hw->use_inorder_tx = 0;
>> + hw->weak_barriers = 1;
>> hw->virtio_user_dev = dev;
>> return eth_dev;
>> }
>> diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
>> index 26518ed98..7bf17d3bf 100644
>> --- a/drivers/net/virtio/virtqueue.h
>> +++ b/drivers/net/virtio/virtqueue.h
>> @@ -19,15 +19,40 @@
>> struct rte_mbuf;
>>
>> /*
>> - * Per virtio_config.h in Linux.
>> + * Per virtio_ring.h in Linux.
>> * For virtio_pci on SMP, we don't need to order with respect to MMIO
>> * accesses through relaxed memory I/O windows, so smp_mb() et al are
>> * sufficient.
>> *
>> + * For using virtio to talk to real devices (eg. vDPA) we do need real
>> + * barriers.
>> */
>> -#define virtio_mb() rte_smp_mb()
>> -#define virtio_rmb() rte_smp_rmb()
>> -#define virtio_wmb() rte_smp_wmb()
>> +static inline void
>> +virtio_mb(uint8_t weak_barriers)
>> +{
>> + if (weak_barriers)
>> + rte_smp_mb();
>> + else
>> + rte_mb();
>> +}
>> +
>> +static inline void
>> +virtio_rmb(uint8_t weak_barriers)
>> +{
>> + if (weak_barriers)
>> + rte_smp_rmb();
>> + else
>> + rte_cio_rmb();
>> +}
>> +
>> +static inline void
>> +virtio_wmb(uint8_t weak_barriers)
>> +{
>> + if (weak_barriers)
>> + rte_smp_wmb();
>> + else
>> + rte_cio_wmb();
>> +}
>>
>> #ifdef RTE_PMD_PACKET_PREFETCH
>> #define rte_packet_prefetch(p) rte_prefetch1(p)
>
>
> Note kernel uses dma_ barriers not full barriers.
> I don't know whether it applies to dpdk.
Yes. I know. I used rte_cio_* barriers which are dpdk equivalent of dma_*
barriers from kernel.
>
>> @@ -312,7 +337,7 @@ void vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx,
>> static inline void
>> vq_update_avail_idx(struct virtqueue *vq)
>> {
>> - virtio_wmb();
>> + virtio_wmb(vq->hw->weak_barriers);
>> vq->vq_ring.avail->idx = vq->vq_avail_idx;
>> }
>>
>> --
>> 2.17.1
>
>
More information about the dev
mailing list