[dpdk-dev] [PATCH v1 6/7] net/mlx4: improve performance of one Tx segment
Ophir Munk
ophirmu at mellanox.com
Mon Oct 23 12:04:26 CEST 2017
From: Matan Azrad <matan at mellanox.com>
Since one segment shouldn't use additional memory to save segments
byte_count for writing them in different order we can prevent
additional memory unnecessary usage in this case.
By the way, prevent loop management.
All for performance improvement.
Signed-off-by: Matan Azrad <matan at mellanox.com>
---
drivers/net/mlx4/mlx4_rxtx.c | 125 +++++++++++++++++++++++++++++--------------
1 file changed, 85 insertions(+), 40 deletions(-)
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 8adac0f..321ffe4 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -310,7 +310,6 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
uint32_t owner_opcode = MLX4_OPCODE_SEND;
struct mlx4_wqe_ctrl_seg *ctrl;
struct mlx4_wqe_data_seg *dseg;
- struct rte_mbuf *sbuf;
union {
uint32_t flags;
uint16_t flags16[2];
@@ -363,12 +362,12 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
dseg = (struct mlx4_wqe_data_seg *)((uintptr_t)ctrl +
sizeof(struct mlx4_wqe_ctrl_seg));
/* Fill the data segments with buffer information. */
- for (sbuf = buf; sbuf != NULL; sbuf = sbuf->next, dseg++) {
- addr = rte_pktmbuf_mtod(sbuf, uintptr_t);
+ if (likely(buf->nb_segs == 1)) {
+ addr = rte_pktmbuf_mtod(buf, uintptr_t);
rte_prefetch0((volatile void *)addr);
/* Handle WQE wraparound. */
- if (unlikely(dseg >=
- (struct mlx4_wqe_data_seg *)sq->eob))
+ if (unlikely(dseg >= (struct mlx4_wqe_data_seg *)
+ sq->eob))
dseg = (struct mlx4_wqe_data_seg *)sq->buf;
dseg->addr = rte_cpu_to_be_64(addr);
/* Memory region key (big endian). */
@@ -392,44 +391,90 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
break;
}
#endif /* NDEBUG */
- if (likely(sbuf->data_len)) {
- byte_count = rte_cpu_to_be_32(sbuf->data_len);
- } else {
- /*
- * Zero length segment is treated as inline
- * segment with zero data.
- */
- byte_count = RTE_BE32(0x80000000);
- }
- /*
- * If the data segment is not at the beginning
- * of a Tx basic block (TXBB) then write the
- * byte count, else postpone the writing to
- * just before updating the control segment.
- */
- if ((uintptr_t)dseg & (uintptr_t)(MLX4_TXBB_SIZE - 1)) {
- /*
- * Need a barrier here before writing the
- * byte_count fields to make sure that all the
- * data is visible before the byte_count field
- * is set. otherwise, if the segment begins a
- * new cacheline, the HCA prefetcher could grab
- * the 64-byte chunk and get a valid
- * (!= 0xffffffff) byte count but stale data,
- * and end up sending the wrong data.
- */
- rte_io_wmb();
- dseg->byte_count = byte_count;
- } else {
+ /* Need a barrier here before writing the byte_count. */
+ rte_io_wmb();
+ dseg->byte_count = rte_cpu_to_be_32(buf->data_len);
+ } else {
+ /* Fill the data segments with buffer information. */
+ struct rte_mbuf *sbuf;
+
+ for (sbuf = buf;
+ sbuf != NULL;
+ sbuf = sbuf->next, dseg++) {
+ addr = rte_pktmbuf_mtod(sbuf, uintptr_t);
+ rte_prefetch0((volatile void *)addr);
+ /* Handle WQE wraparound. */
+ if (unlikely(dseg >=
+ (struct mlx4_wqe_data_seg *)sq->eob))
+ dseg = (struct mlx4_wqe_data_seg *)
+ sq->buf;
+ dseg->addr = rte_cpu_to_be_64(addr);
+ /* Memory region key (big endian). */
+ dseg->lkey = mlx4_txq_mp2mr(txq,
+ mlx4_txq_mb2mp(sbuf));
+ #ifndef NDEBUG
+ if (unlikely(dseg->lkey ==
+ rte_cpu_to_be_32((uint32_t)-1))) {
+ /* MR does not exist. */
+ DEBUG("%p: unable to get MP <-> MR association",
+ (void *)txq);
+ /*
+ * Restamp entry in case of failure.
+ * Make sure that size is written
+ * correctly, note that we give
+ * ownership to the SW, not the HW.
+ */
+ ctrl->fence_size =
+ (wqe_real_size >> 4) & 0x3f;
+ mlx4_txq_stamp_freed_wqe(sq, head_idx,
+ (sq->head & sq->txbb_cnt) ? 0 : 1);
+ elt->buf = NULL;
+ break;
+ }
+ #endif /* NDEBUG */
+ if (likely(sbuf->data_len)) {
+ byte_count =
+ rte_cpu_to_be_32(sbuf->data_len);
+ } else {
+ /*
+ * Zero length segment is treated as
+ * inline segment with zero data.
+ */
+ byte_count = RTE_BE32(0x80000000);
+ }
/*
- * This data segment starts at the beginning of
- * a new TXBB, so we need to postpone its
- * byte_count writing for later.
+ * If the data segment is not at the beginning
+ * of a Tx basic block (TXBB) then write the
+ * byte count, else postpone the writing to
+ * just before updating the control segment.
*/
- pv[pv_counter].dseg = dseg;
- pv[pv_counter++].val = byte_count;
+ if ((uintptr_t)dseg &
+ (uintptr_t)(MLX4_TXBB_SIZE - 1)) {
+ /*
+ * Need a barrier here before writing
+ * the byte_count fields to make sure
+ * that all the data is visible before
+ * the byte_count field is set.
+ * Otherwise, if the segment begins a
+ * new cacheline, the HCA prefetcher
+ * could grab the 64-byte chunk and get
+ * a valid (!= 0xffffffff) byte count
+ * but stale data, and end up sending
+ * the wrong data.
+ */
+ rte_io_wmb();
+ dseg->byte_count = byte_count;
+ } else {
+ /*
+ * This data segment starts at the
+ * beginning of a new TXBB, so we
+ * need to postpone its byte_count
+ * writing for later.
+ */
+ pv[pv_counter].dseg = dseg;
+ pv[pv_counter++].val = byte_count;
+ }
}
- }
/* Write the first DWORD of each TXBB save earlier. */
if (pv_counter) {
/* Need a barrier before writing the byte_count. */
--
2.7.4
More information about the dev
mailing list