[dpdk-dev] [PATCH v2 4/4] lib/librte_vhost: improve vhost perf using rte_memset
Zhiyong Yang
zhiyong.yang at intel.com
Tue Dec 27 11:04:58 CET 2016
Using rte_memset instead of copy_virtio_net_hdr can bring 3%~4%
performance improvements on IA platform from virtio/vhost
non-mergeable loopback testing.
Two key points have been considered:
1. One variable initialization could be saved, which involves memory
store.
2. copy_virtio_net_hdr involves both load (from stack, the virtio_hdr
var) and store (to virtio driver memory), while rte_memset just involves
store.
Signed-off-by: Zhiyong Yang <zhiyong.yang at intel.com>
---
Changes in V2:
Modify release_17_02.rst description.
doc/guides/rel_notes/release_17_02.rst | 7 +++++++
lib/librte_vhost/virtio_net.c | 18 +++++++++++-------
2 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/doc/guides/rel_notes/release_17_02.rst b/doc/guides/rel_notes/release_17_02.rst
index 180af82..3d39cde 100644
--- a/doc/guides/rel_notes/release_17_02.rst
+++ b/doc/guides/rel_notes/release_17_02.rst
@@ -52,6 +52,13 @@ New Features
See the :ref:`Generic flow API <Generic_flow_API>` documentation for more
information.
+* **Introduced rte_memset on IA platform.**
+
+ Performance drop had been caused in some cases on Ivybridge when DPDK code calls
+ glibc function memset. It was necessary to introduce more high efficient function
+ to replace it. The function rte_memset supported three types of instruction sets
+ including sse & avx(128 bits), avx2(256 bits) and avx512(512bits) and have better
+ performance than glibc memset.
Resolved Issues
---------------
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 595f67c..392b31b 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -37,6 +37,7 @@
#include <rte_mbuf.h>
#include <rte_memcpy.h>
+#include <rte_memset.h>
#include <rte_ether.h>
#include <rte_ip.h>
#include <rte_virtio_net.h>
@@ -194,7 +195,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
uint32_t cpy_len;
struct vring_desc *desc;
uint64_t desc_addr;
- struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
+ struct virtio_net_hdr *virtio_hdr;
desc = &descs[desc_idx];
desc_addr = gpa_to_vva(dev, desc->addr);
@@ -208,8 +209,9 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
rte_prefetch0((void *)(uintptr_t)desc_addr);
- virtio_enqueue_offload(m, &virtio_hdr.hdr);
- copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
+ virtio_hdr = (struct virtio_net_hdr *)(uintptr_t)desc_addr;
+ rte_memset(virtio_hdr, 0, sizeof(*virtio_hdr));
+ virtio_enqueue_offload(m, virtio_hdr);
vhost_log_write(dev, desc->addr, dev->vhost_hlen);
PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
@@ -459,7 +461,6 @@ static inline int __attribute__((always_inline))
copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
struct buf_vector *buf_vec, uint16_t num_buffers)
{
- struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
uint32_t vec_idx = 0;
uint64_t desc_addr;
uint32_t mbuf_offset, mbuf_avail;
@@ -480,7 +481,6 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
hdr_phys_addr = buf_vec[vec_idx].buf_addr;
rte_prefetch0((void *)(uintptr_t)hdr_addr);
- virtio_hdr.num_buffers = num_buffers;
LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
dev->vid, num_buffers);
@@ -512,8 +512,12 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
}
if (hdr_addr) {
- virtio_enqueue_offload(hdr_mbuf, &virtio_hdr.hdr);
- copy_virtio_net_hdr(dev, hdr_addr, virtio_hdr);
+ struct virtio_net_hdr_mrg_rxbuf *hdr =
+ (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
+
+ rte_memset(&(hdr->hdr), 0, sizeof(hdr->hdr));
+ hdr->num_buffers = num_buffers;
+ virtio_enqueue_offload(hdr_mbuf, &(hdr->hdr));
vhost_log_write(dev, hdr_phys_addr, dev->vhost_hlen);
PRINT_PACKET(dev, (uintptr_t)hdr_addr,
dev->vhost_hlen, 0);
--
2.7.4
More information about the dev
mailing list