[dpdk-dev] [PATCH v2 1/6] net/mlx5: rework hardware structures

Nelio Laranjeiro nelio.laranjeiro at 6wind.com
Wed Sep 14 14:18:02 CEST 2016


Rework Work Queue Element (aka WQE) structures to fit PMD needs.
A WQE is an aggregation of 16 bytes elements known as "data segments"
(aka dseg).

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com>
---
 drivers/net/mlx5/mlx5_prm.h  |  70 ++++++------------
 drivers/net/mlx5/mlx5_rxtx.c | 167 ++++++++++++++++++++++---------------------
 drivers/net/mlx5/mlx5_rxtx.h |   2 +-
 drivers/net/mlx5/mlx5_txq.c  |   4 +-
 4 files changed, 111 insertions(+), 132 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 5db219b..042562c 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -65,8 +65,15 @@
 /* Maximum number of packets a multi-packet WQE can handle. */
 #define MLX5_MPW_DSEG_MAX 5
 
-/* Room for inline data in regular work queue element. */
-#define MLX5_WQE64_INL_DATA 12
+/* WQE DWORD size */
+#define MLX5_WQE_DWORD_SIZE 16
+
+/* WQE size */
+#define MLX5_WQE_SIZE (4 * MLX5_WQE_DWORD_SIZE)
+
+/* Compute the number of DS. */
+#define MLX5_WQE_DS(n) \
+	(((n) + MLX5_WQE_DWORD_SIZE - 1) / MLX5_WQE_DWORD_SIZE)
 
 /* Room for inline data in multi-packet WQE. */
 #define MLX5_MWQE64_INL_DATA 28
@@ -79,59 +86,26 @@ struct mlx5_wqe_eth_seg_small {
 	uint16_t mss;
 	uint32_t rsvd2;
 	uint16_t inline_hdr_sz;
+	uint8_t inline_hdr[2];
 };
 
-/* Regular WQE. */
-struct mlx5_wqe_regular {
-	union {
-		struct mlx5_wqe_ctrl_seg ctrl;
-		uint32_t data[4];
-	} ctrl;
-	struct mlx5_wqe_eth_seg eseg;
-	struct mlx5_wqe_data_seg dseg;
-} __rte_aligned(64);
-
-/* Inline WQE. */
-struct mlx5_wqe_inl {
-	union {
-		struct mlx5_wqe_ctrl_seg ctrl;
-		uint32_t data[4];
-	} ctrl;
-	struct mlx5_wqe_eth_seg eseg;
+struct mlx5_wqe_inl_small {
 	uint32_t byte_cnt;
-	uint8_t data[MLX5_WQE64_INL_DATA];
-} __rte_aligned(64);
+	uint8_t raw;
+};
 
-/* Multi-packet WQE. */
-struct mlx5_wqe_mpw {
-	union {
-		struct mlx5_wqe_ctrl_seg ctrl;
-		uint32_t data[4];
-	} ctrl;
+/* Small common part of the WQE. */
+struct mlx5_wqe {
+	uint32_t ctrl[4];
 	struct mlx5_wqe_eth_seg_small eseg;
-	struct mlx5_wqe_data_seg dseg[2];
-} __rte_aligned(64);
+};
 
-/* Multi-packet WQE with inline. */
-struct mlx5_wqe_mpw_inl {
-	union {
-		struct mlx5_wqe_ctrl_seg ctrl;
-		uint32_t data[4];
-	} ctrl;
-	struct mlx5_wqe_eth_seg_small eseg;
-	uint32_t byte_cnt;
-	uint8_t data[MLX5_MWQE64_INL_DATA];
+/* WQE. */
+struct mlx5_wqe64 {
+	struct mlx5_wqe hdr;
+	uint8_t raw[32];
 } __rte_aligned(64);
 
-/* Union of all WQE types. */
-union mlx5_wqe {
-	struct mlx5_wqe_regular wqe;
-	struct mlx5_wqe_inl inl;
-	struct mlx5_wqe_mpw mpw;
-	struct mlx5_wqe_mpw_inl mpw_inl;
-	uint8_t data[64];
-};
-
 /* MPW session status. */
 enum mlx5_mpw_state {
 	MLX5_MPW_STATE_OPENED,
@@ -145,7 +119,7 @@ struct mlx5_mpw {
 	unsigned int pkts_n;
 	unsigned int len;
 	unsigned int total_len;
-	volatile union mlx5_wqe *wqe;
+	volatile struct mlx5_wqe *wqe;
 	union {
 		volatile struct mlx5_wqe_data_seg *dseg[MLX5_MPW_DSEG_MAX];
 		volatile uint8_t *raw;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ecc76ad..5feeb3f 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -173,7 +173,7 @@ txq_complete(struct txq *txq)
 	uint16_t elts_tail;
 	uint16_t cq_ci = txq->cq_ci;
 	volatile struct mlx5_cqe64 *cqe = NULL;
-	volatile union mlx5_wqe *wqe;
+	volatile struct mlx5_wqe *wqe;
 
 	do {
 		volatile struct mlx5_cqe64 *tmp;
@@ -199,8 +199,8 @@ txq_complete(struct txq *txq)
 	} while (1);
 	if (unlikely(cqe == NULL))
 		return;
-	wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)];
-	elts_tail = wqe->wqe.ctrl.data[3];
+	wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)].hdr;
+	elts_tail = wqe->ctrl[3];
 	assert(elts_tail < txq->wqe_n);
 	/* Free buffers. */
 	while (elts_free != elts_tail) {
@@ -302,33 +302,33 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
  *   Number of DS elements consumed.
  */
 static inline unsigned int
-mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
+mlx5_wqe_write(struct txq *txq, volatile struct mlx5_wqe *wqe,
 	       struct rte_mbuf *buf, uint32_t length)
 {
-	uintptr_t raw = (uintptr_t)&wqe->wqe.eseg.inline_hdr_start;
+	uint8_t *raw = (uint8_t *)(uintptr_t)&wqe->eseg.inline_hdr[0];
 	uint16_t ds;
-	uint16_t pkt_inline_sz = 16;
+	uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
 	uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
 	struct mlx5_wqe_data_seg *dseg = NULL;
 
-	assert(length >= 16);
+	assert(length >= MLX5_WQE_DWORD_SIZE);
 	/* Start the know and common part of the WQE structure. */
-	wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
-	wqe->wqe.ctrl.data[2] = 0;
-	wqe->wqe.ctrl.data[3] = 0;
-	wqe->wqe.eseg.rsvd0 = 0;
-	wqe->wqe.eseg.rsvd1 = 0;
-	wqe->wqe.eseg.mss = 0;
-	wqe->wqe.eseg.rsvd2 = 0;
+	wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
+	wqe->ctrl[2] = 0;
+	wqe->ctrl[3] = 0;
+	wqe->eseg.rsvd0 = 0;
+	wqe->eseg.rsvd1 = 0;
+	wqe->eseg.mss = 0;
+	wqe->eseg.rsvd2 = 0;
 	/* Start by copying the Ethernet Header. */
 	rte_mov16((uint8_t *)raw, (uint8_t *)addr);
-	length -= 16;
-	addr += 16;
+	length -= MLX5_WQE_DWORD_SIZE;
+	addr += MLX5_WQE_DWORD_SIZE;
 	/* Replace the Ethernet type by the VLAN if necessary. */
 	if (buf->ol_flags & PKT_TX_VLAN_PKT) {
 		uint32_t vlan = htonl(0x81000000 | buf->vlan_tci);
 
-		memcpy((uint8_t *)(raw + 16 - sizeof(vlan)),
+		memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE - sizeof(vlan)),
 		       &vlan, sizeof(vlan));
 		addr -= sizeof(vlan);
 		length += sizeof(vlan);
@@ -339,7 +339,7 @@ mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
 		uint16_t max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE;
 		uint16_t room;
 
-		raw += 16;
+		raw += MLX5_WQE_DWORD_SIZE;
 		room = end - (uintptr_t)raw;
 		if (room > max_inline) {
 			uintptr_t addr_end = (addr + max_inline) &
@@ -356,15 +356,15 @@ mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
 			assert(addr <= addr_end);
 		}
 		/* Store the inlined packet size in the WQE. */
-		wqe->wqe.eseg.inline_hdr_sz = htons(pkt_inline_sz);
+		wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz);
 		/*
 		 * 2 DWORDs consumed by the WQE header + 1 DSEG +
 		 * the size of the inline part of the packet.
 		 */
-		ds = 2 + ((pkt_inline_sz - 2 + 15) / 16);
+		ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
 		if (length > 0) {
 			dseg = (struct mlx5_wqe_data_seg *)
-				((uintptr_t)wqe + (ds * 16));
+				((uintptr_t)wqe + (ds * MLX5_WQE_DWORD_SIZE));
 			if ((uintptr_t)dseg >= end)
 				dseg = (struct mlx5_wqe_data_seg *)
 					((uintptr_t)&(*txq->wqes)[0]);
@@ -377,9 +377,9 @@ mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
 		 * No inline has been done in the packet, only the Ethernet
 		 * Header as been stored.
 		 */
-		wqe->wqe.eseg.inline_hdr_sz = htons(16);
+		wqe->eseg.inline_hdr_sz = htons(MLX5_WQE_DWORD_SIZE);
 		dseg = (struct mlx5_wqe_data_seg *)
-			((uintptr_t)wqe + (ds * 16));
+			((uintptr_t)wqe + (ds * MLX5_WQE_DWORD_SIZE));
 use_dseg:
 		*dseg = (struct mlx5_wqe_data_seg) {
 			.addr = htonll(addr),
@@ -388,7 +388,7 @@ use_dseg:
 		};
 		++ds;
 	}
-	wqe->wqe.ctrl.data[1] = htonl(txq->qp_num_8s | ds);
+	wqe->ctrl[1] = htonl(txq->qp_num_8s | ds);
 	return ds;
 }
 
@@ -444,7 +444,7 @@ tx_prefetch_cqe(struct txq *txq, uint16_t ci)
 static inline void
 tx_prefetch_wqe(struct txq *txq, uint16_t ci)
 {
-	volatile union mlx5_wqe *wqe;
+	volatile struct mlx5_wqe64 *wqe;
 
 	wqe = &(*txq->wqes)[ci & (txq->wqe_n - 1)];
 	rte_prefetch0(wqe);
@@ -473,7 +473,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	unsigned int j = 0;
 	unsigned int max;
 	unsigned int comp;
-	volatile union mlx5_wqe *wqe = NULL;
+	volatile struct mlx5_wqe *wqe = NULL;
 
 	if (unlikely(!pkts_n))
 		return 0;
@@ -492,7 +492,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		uint32_t length;
 		unsigned int segs_n = buf->nb_segs;
 		volatile struct mlx5_wqe_data_seg *dseg;
-		unsigned int ds = sizeof(*wqe) / 16;
+		unsigned int ds = 0;
 
 		/*
 		 * Make sure there is enough room to store this packet and
@@ -504,7 +504,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		max -= segs_n;
 		--pkts_n;
 		elts_head_next = (elts_head + 1) & (elts_n - 1);
-		wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)];
+		wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)].hdr;
 		tx_prefetch_wqe(txq, txq->wqe_ci);
 		tx_prefetch_wqe(txq, txq->wqe_ci + 1);
 		if (pkts_n)
@@ -519,25 +519,25 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		/* Should we enable HW CKSUM offload */
 		if (buf->ol_flags &
 		    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
-			wqe->wqe.eseg.cs_flags =
+			wqe->eseg.cs_flags =
 				MLX5_ETH_WQE_L3_CSUM |
 				MLX5_ETH_WQE_L4_CSUM;
 		} else {
-			wqe->wqe.eseg.cs_flags = 0;
+			wqe->eseg.cs_flags = 0;
 		}
 		ds = mlx5_wqe_write(txq, wqe, buf, length);
 		if (segs_n == 1)
 			goto skip_segs;
 		dseg = (volatile struct mlx5_wqe_data_seg *)
-			(((uintptr_t)wqe) + ds * 16);
+			(((uintptr_t)wqe) + ds * MLX5_WQE_DWORD_SIZE);
 		while (--segs_n) {
 			/*
 			 * Spill on next WQE when the current one does not have
 			 * enough room left. Size of WQE must a be a multiple
 			 * of data segment size.
 			 */
-			assert(!(sizeof(*wqe) % sizeof(*dseg)));
-			if (!(ds % (sizeof(*wqe) / 16)))
+			assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE));
+			if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE)))
 				dseg = (volatile void *)
 					&(*txq->wqes)[txq->wqe_ci++ &
 						      (txq->wqe_n - 1)];
@@ -558,8 +558,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			++j;
 		}
 		/* Update DS field in WQE. */
-		wqe->wqe.ctrl.data[1] &= htonl(0xffffffc0);
-		wqe->wqe.ctrl.data[1] |= htonl(ds & 0x3f);
+		wqe->ctrl[1] &= htonl(0xffffffc0);
+		wqe->ctrl[1] |= htonl(ds & 0x3f);
 skip_segs:
 #ifdef MLX5_PMD_SOFT_COUNTERS
 		/* Increment sent bytes counter. */
@@ -577,9 +577,9 @@ skip_segs:
 	comp = txq->elts_comp + i + j;
 	if (comp >= MLX5_TX_COMP_THRESH) {
 		/* Request completion on last WQE. */
-		wqe->wqe.ctrl.data[2] = htonl(8);
+		wqe->ctrl[2] = htonl(8);
 		/* Save elts_head in unused "immediate" field of WQE. */
-		wqe->wqe.ctrl.data[3] = elts_head;
+		wqe->ctrl[3] = elts_head;
 		txq->elts_comp = 0;
 	} else {
 		txq->elts_comp = comp;
@@ -616,19 +616,20 @@ mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
 	mpw->pkts_n = 0;
 	mpw->len = length;
 	mpw->total_len = 0;
-	mpw->wqe = &(*txq->wqes)[idx];
-	mpw->wqe->mpw.eseg.mss = htons(length);
-	mpw->wqe->mpw.eseg.inline_hdr_sz = 0;
-	mpw->wqe->mpw.eseg.rsvd0 = 0;
-	mpw->wqe->mpw.eseg.rsvd1 = 0;
-	mpw->wqe->mpw.eseg.rsvd2 = 0;
-	mpw->wqe->mpw.ctrl.data[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
-					   (txq->wqe_ci << 8) |
-					   MLX5_OPCODE_LSO_MPW);
-	mpw->wqe->mpw.ctrl.data[2] = 0;
-	mpw->wqe->mpw.ctrl.data[3] = 0;
-	mpw->data.dseg[0] = &mpw->wqe->mpw.dseg[0];
-	mpw->data.dseg[1] = &mpw->wqe->mpw.dseg[1];
+	mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr;
+	mpw->wqe->eseg.mss = htons(length);
+	mpw->wqe->eseg.inline_hdr_sz = 0;
+	mpw->wqe->eseg.rsvd0 = 0;
+	mpw->wqe->eseg.rsvd1 = 0;
+	mpw->wqe->eseg.rsvd2 = 0;
+	mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
+				  (txq->wqe_ci << 8) | MLX5_OPCODE_LSO_MPW);
+	mpw->wqe->ctrl[2] = 0;
+	mpw->wqe->ctrl[3] = 0;
+	mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *)
+		(((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));
+	mpw->data.dseg[1] = (volatile struct mlx5_wqe_data_seg *)
+		(((uintptr_t)mpw->wqe) + (3 * MLX5_WQE_DWORD_SIZE));
 	mpw->data.dseg[2] = &(*dseg)[0];
 	mpw->data.dseg[3] = &(*dseg)[1];
 	mpw->data.dseg[4] = &(*dseg)[2];
@@ -651,7 +652,7 @@ mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
 	 * Store size in multiple of 16 bytes. Control and Ethernet segments
 	 * count as 2.
 	 */
-	mpw->wqe->mpw.ctrl.data[1] = htonl(txq->qp_num_8s | (2 + num));
+	mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | (2 + num));
 	mpw->state = MLX5_MPW_STATE_CLOSED;
 	if (num < 3)
 		++txq->wqe_ci;
@@ -729,11 +730,11 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		if ((mpw.state == MLX5_MPW_STATE_OPENED) &&
 		    ((mpw.len != length) ||
 		     (segs_n != 1) ||
-		     (mpw.wqe->mpw.eseg.cs_flags != cs_flags)))
+		     (mpw.wqe->eseg.cs_flags != cs_flags)))
 			mlx5_mpw_close(txq, &mpw);
 		if (mpw.state == MLX5_MPW_STATE_CLOSED) {
 			mlx5_mpw_new(txq, &mpw, length);
-			mpw.wqe->mpw.eseg.cs_flags = cs_flags;
+			mpw.wqe->eseg.cs_flags = cs_flags;
 		}
 		/* Multi-segment packets must be alone in their MPW. */
 		assert((segs_n == 1) || (mpw.pkts_n == 0));
@@ -779,12 +780,12 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	/* "j" includes both packets and segments. */
 	comp = txq->elts_comp + j;
 	if (comp >= MLX5_TX_COMP_THRESH) {
-		volatile union mlx5_wqe *wqe = mpw.wqe;
+		volatile struct mlx5_wqe *wqe = mpw.wqe;
 
 		/* Request completion on last WQE. */
-		wqe->mpw.ctrl.data[2] = htonl(8);
+		wqe->ctrl[2] = htonl(8);
 		/* Save elts_head in unused "immediate" field of WQE. */
-		wqe->mpw.ctrl.data[3] = elts_head;
+		wqe->ctrl[3] = elts_head;
 		txq->elts_comp = 0;
 	} else {
 		txq->elts_comp = comp;
@@ -815,24 +816,27 @@ static inline void
 mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
 {
 	uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
+	struct mlx5_wqe_inl_small *inl;
 
 	mpw->state = MLX5_MPW_INL_STATE_OPENED;
 	mpw->pkts_n = 0;
 	mpw->len = length;
 	mpw->total_len = 0;
-	mpw->wqe = &(*txq->wqes)[idx];
-	mpw->wqe->mpw_inl.ctrl.data[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
-					       (txq->wqe_ci << 8) |
-					       MLX5_OPCODE_LSO_MPW);
-	mpw->wqe->mpw_inl.ctrl.data[2] = 0;
-	mpw->wqe->mpw_inl.ctrl.data[3] = 0;
-	mpw->wqe->mpw_inl.eseg.mss = htons(length);
-	mpw->wqe->mpw_inl.eseg.inline_hdr_sz = 0;
-	mpw->wqe->mpw_inl.eseg.cs_flags = 0;
-	mpw->wqe->mpw_inl.eseg.rsvd0 = 0;
-	mpw->wqe->mpw_inl.eseg.rsvd1 = 0;
-	mpw->wqe->mpw_inl.eseg.rsvd2 = 0;
-	mpw->data.raw = &mpw->wqe->mpw_inl.data[0];
+	mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr;
+	mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
+				  (txq->wqe_ci << 8) |
+				  MLX5_OPCODE_LSO_MPW);
+	mpw->wqe->ctrl[2] = 0;
+	mpw->wqe->ctrl[3] = 0;
+	mpw->wqe->eseg.mss = htons(length);
+	mpw->wqe->eseg.inline_hdr_sz = 0;
+	mpw->wqe->eseg.cs_flags = 0;
+	mpw->wqe->eseg.rsvd0 = 0;
+	mpw->wqe->eseg.rsvd1 = 0;
+	mpw->wqe->eseg.rsvd2 = 0;
+	inl = (struct mlx5_wqe_inl_small *)
+		(((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE);
+	mpw->data.raw = (uint8_t *)&inl->raw;
 }
 
 /**
@@ -847,17 +851,18 @@ static inline void
 mlx5_mpw_inline_close(struct txq *txq, struct mlx5_mpw *mpw)
 {
 	unsigned int size;
+	struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *)
+		(((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));
 
-	size = sizeof(*mpw->wqe) - MLX5_MWQE64_INL_DATA + mpw->total_len;
+	size = MLX5_WQE_SIZE - MLX5_MWQE64_INL_DATA + mpw->total_len;
 	/*
 	 * Store size in multiple of 16 bytes. Control and Ethernet segments
 	 * count as 2.
 	 */
-	mpw->wqe->mpw_inl.ctrl.data[1] =
-		htonl(txq->qp_num_8s | ((size + 15) / 16));
+	mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | MLX5_WQE_DS(size));
 	mpw->state = MLX5_MPW_STATE_CLOSED;
-	mpw->wqe->mpw_inl.byte_cnt = htonl(mpw->total_len | MLX5_INLINE_SEG);
-	txq->wqe_ci += (size + (sizeof(*mpw->wqe) - 1)) / sizeof(*mpw->wqe);
+	inl->byte_cnt = htonl(mpw->total_len | MLX5_INLINE_SEG);
+	txq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE;
 }
 
 /**
@@ -930,13 +935,13 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 		if (mpw.state == MLX5_MPW_STATE_OPENED) {
 			if ((mpw.len != length) ||
 			    (segs_n != 1) ||
-			    (mpw.wqe->mpw.eseg.cs_flags != cs_flags))
+			    (mpw.wqe->eseg.cs_flags != cs_flags))
 				mlx5_mpw_close(txq, &mpw);
 		} else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) {
 			if ((mpw.len != length) ||
 			    (segs_n != 1) ||
 			    (length > inline_room) ||
-			    (mpw.wqe->mpw_inl.eseg.cs_flags != cs_flags)) {
+			    (mpw.wqe->eseg.cs_flags != cs_flags)) {
 				mlx5_mpw_inline_close(txq, &mpw);
 				inline_room =
 					txq->max_inline * RTE_CACHE_LINE_SIZE;
@@ -946,10 +951,10 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 			if ((segs_n != 1) ||
 			    (length > inline_room)) {
 				mlx5_mpw_new(txq, &mpw, length);
-				mpw.wqe->mpw.eseg.cs_flags = cs_flags;
+				mpw.wqe->eseg.cs_flags = cs_flags;
 			} else {
 				mlx5_mpw_inline_new(txq, &mpw, length);
-				mpw.wqe->mpw_inl.eseg.cs_flags = cs_flags;
+				mpw.wqe->eseg.cs_flags = cs_flags;
 			}
 		}
 		/* Multi-segment packets must be alone in their MPW. */
@@ -1042,12 +1047,12 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 	/* "j" includes both packets and segments. */
 	comp = txq->elts_comp + j;
 	if (comp >= MLX5_TX_COMP_THRESH) {
-		volatile union mlx5_wqe *wqe = mpw.wqe;
+		volatile struct mlx5_wqe *wqe = mpw.wqe;
 
 		/* Request completion on last WQE. */
-		wqe->mpw_inl.ctrl.data[2] = htonl(8);
+		wqe->ctrl[2] = htonl(8);
 		/* Save elts_head in unused "immediate" field of WQE. */
-		wqe->mpw_inl.ctrl.data[3] = elts_head;
+		wqe->ctrl[3] = elts_head;
 		txq->elts_comp = 0;
 	} else {
 		txq->elts_comp = comp;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 8c568ad..fbc2a78 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -252,7 +252,7 @@ struct txq {
 	uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
 	uint32_t qp_num_8s; /* QP number shifted by 8. */
 	volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
-	volatile union mlx5_wqe (*wqes)[]; /* Work queue. */
+	volatile struct mlx5_wqe64 (*wqes)[]; /* Work queue. */
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
 	volatile void *bf_reg; /* Blueflame register. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 5ddd2fb..e8ebbbe 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -82,7 +82,7 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
 	for (i = 0; (i != elts_n); ++i)
 		(*txq_ctrl->txq.elts)[i] = NULL;
 	for (i = 0; (i != txq_ctrl->txq.wqe_n); ++i) {
-		volatile union mlx5_wqe *wqe = &(*txq_ctrl->txq.wqes)[i];
+		volatile struct mlx5_wqe64 *wqe = &(*txq_ctrl->txq.wqes)[i];
 
 		memset((void *)(uintptr_t)wqe, 0x0, sizeof(*wqe));
 	}
@@ -215,7 +215,7 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
 	tmpl->txq.cqe_n = ibcq->cqe + 1;
 	tmpl->txq.qp_num_8s = qp->ctrl_seg.qp_num << 8;
 	tmpl->txq.wqes =
-		(volatile union mlx5_wqe (*)[])
+		(volatile struct mlx5_wqe64 (*)[])
 		(uintptr_t)qp->gen_data.sqstart;
 	tmpl->txq.wqe_n = qp->sq.wqe_cnt;
 	tmpl->txq.qp_db = &qp->gen_data.db[MLX5_SND_DBR];
-- 
2.1.4



More information about the dev mailing list