[dpdk-dev] [PATCH v3 1/6] net/mlx5: rework hardware structures

Nelio Laranjeiro nelio.laranjeiro at 6wind.com
Tue Sep 20 10:53:46 CEST 2016


Rework Work Queue Element (aka WQE) structures to fit PMD needs.
A WQE is an aggregation of 16 bytes elements known as "data segments"
(aka dseg).

The only common part is the first two elements i.e. the control one to
define the job type, and the Ethernet segment which embed offload requests
with other informations,  after that, it can have:
  - a raw data packet,
  - a data pointer to the packet itself,
  - both.

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com>
---
 drivers/net/mlx5/mlx5_prm.h  |  70 ++++++------------
 drivers/net/mlx5/mlx5_rxtx.c | 167 ++++++++++++++++++++++---------------------
 drivers/net/mlx5/mlx5_rxtx.h |   2 +-
 drivers/net/mlx5/mlx5_txq.c  |   4 +-
 4 files changed, 111 insertions(+), 132 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 5db219b..042562c 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -65,8 +65,15 @@
 /* Maximum number of packets a multi-packet WQE can handle. */
 #define MLX5_MPW_DSEG_MAX 5
 
-/* Room for inline data in regular work queue element. */
-#define MLX5_WQE64_INL_DATA 12
+/* WQE DWORD size */
+#define MLX5_WQE_DWORD_SIZE 16
+
+/* WQE size */
+#define MLX5_WQE_SIZE (4 * MLX5_WQE_DWORD_SIZE)
+
+/* Compute the number of DS. */
+#define MLX5_WQE_DS(n) \
+	(((n) + MLX5_WQE_DWORD_SIZE - 1) / MLX5_WQE_DWORD_SIZE)
 
 /* Room for inline data in multi-packet WQE. */
 #define MLX5_MWQE64_INL_DATA 28
@@ -79,59 +86,26 @@ struct mlx5_wqe_eth_seg_small {
 	uint16_t mss;
 	uint32_t rsvd2;
 	uint16_t inline_hdr_sz;
+	uint8_t inline_hdr[2];
 };
 
-/* Regular WQE. */
-struct mlx5_wqe_regular {
-	union {
-		struct mlx5_wqe_ctrl_seg ctrl;
-		uint32_t data[4];
-	} ctrl;
-	struct mlx5_wqe_eth_seg eseg;
-	struct mlx5_wqe_data_seg dseg;
-} __rte_aligned(64);
-
-/* Inline WQE. */
-struct mlx5_wqe_inl {
-	union {
-		struct mlx5_wqe_ctrl_seg ctrl;
-		uint32_t data[4];
-	} ctrl;
-	struct mlx5_wqe_eth_seg eseg;
+struct mlx5_wqe_inl_small {
 	uint32_t byte_cnt;
-	uint8_t data[MLX5_WQE64_INL_DATA];
-} __rte_aligned(64);
+	uint8_t raw;
+};
 
-/* Multi-packet WQE. */
-struct mlx5_wqe_mpw {
-	union {
-		struct mlx5_wqe_ctrl_seg ctrl;
-		uint32_t data[4];
-	} ctrl;
+/* Small common part of the WQE. */
+struct mlx5_wqe {
+	uint32_t ctrl[4];
 	struct mlx5_wqe_eth_seg_small eseg;
-	struct mlx5_wqe_data_seg dseg[2];
-} __rte_aligned(64);
+};
 
-/* Multi-packet WQE with inline. */
-struct mlx5_wqe_mpw_inl {
-	union {
-		struct mlx5_wqe_ctrl_seg ctrl;
-		uint32_t data[4];
-	} ctrl;
-	struct mlx5_wqe_eth_seg_small eseg;
-	uint32_t byte_cnt;
-	uint8_t data[MLX5_MWQE64_INL_DATA];
+/* WQE. */
+struct mlx5_wqe64 {
+	struct mlx5_wqe hdr;
+	uint8_t raw[32];
 } __rte_aligned(64);
 
-/* Union of all WQE types. */
-union mlx5_wqe {
-	struct mlx5_wqe_regular wqe;
-	struct mlx5_wqe_inl inl;
-	struct mlx5_wqe_mpw mpw;
-	struct mlx5_wqe_mpw_inl mpw_inl;
-	uint8_t data[64];
-};
-
 /* MPW session status. */
 enum mlx5_mpw_state {
 	MLX5_MPW_STATE_OPENED,
@@ -145,7 +119,7 @@ struct mlx5_mpw {
 	unsigned int pkts_n;
 	unsigned int len;
 	unsigned int total_len;
-	volatile union mlx5_wqe *wqe;
+	volatile struct mlx5_wqe *wqe;
 	union {
 		volatile struct mlx5_wqe_data_seg *dseg[MLX5_MPW_DSEG_MAX];
 		volatile uint8_t *raw;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ecc76ad..5feeb3f 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -173,7 +173,7 @@ txq_complete(struct txq *txq)
 	uint16_t elts_tail;
 	uint16_t cq_ci = txq->cq_ci;
 	volatile struct mlx5_cqe64 *cqe = NULL;
-	volatile union mlx5_wqe *wqe;
+	volatile struct mlx5_wqe *wqe;
 
 	do {
 		volatile struct mlx5_cqe64 *tmp;
@@ -199,8 +199,8 @@ txq_complete(struct txq *txq)
 	} while (1);
 	if (unlikely(cqe == NULL))
 		return;
-	wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)];
-	elts_tail = wqe->wqe.ctrl.data[3];
+	wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)].hdr;
+	elts_tail = wqe->ctrl[3];
 	assert(elts_tail < txq->wqe_n);
 	/* Free buffers. */
 	while (elts_free != elts_tail) {
@@ -302,33 +302,33 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
  *   Number of DS elements consumed.
  */
 static inline unsigned int
-mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
+mlx5_wqe_write(struct txq *txq, volatile struct mlx5_wqe *wqe,
 	       struct rte_mbuf *buf, uint32_t length)
 {
-	uintptr_t raw = (uintptr_t)&wqe->wqe.eseg.inline_hdr_start;
+	uint8_t *raw = (uint8_t *)(uintptr_t)&wqe->eseg.inline_hdr[0];
 	uint16_t ds;
-	uint16_t pkt_inline_sz = 16;
+	uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
 	uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
 	struct mlx5_wqe_data_seg *dseg = NULL;
 
-	assert(length >= 16);
+	assert(length >= MLX5_WQE_DWORD_SIZE);
 	/* Start the know and common part of the WQE structure. */
-	wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
-	wqe->wqe.ctrl.data[2] = 0;
-	wqe->wqe.ctrl.data[3] = 0;
-	wqe->wqe.eseg.rsvd0 = 0;
-	wqe->wqe.eseg.rsvd1 = 0;
-	wqe->wqe.eseg.mss = 0;
-	wqe->wqe.eseg.rsvd2 = 0;
+	wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
+	wqe->ctrl[2] = 0;
+	wqe->ctrl[3] = 0;
+	wqe->eseg.rsvd0 = 0;
+	wqe->eseg.rsvd1 = 0;
+	wqe->eseg.mss = 0;
+	wqe->eseg.rsvd2 = 0;
 	/* Start by copying the Ethernet Header. */
 	rte_mov16((uint8_t *)raw, (uint8_t *)addr);
-	length -= 16;
-	addr += 16;
+	length -= MLX5_WQE_DWORD_SIZE;
+	addr += MLX5_WQE_DWORD_SIZE;
 	/* Replace the Ethernet type by the VLAN if necessary. */
 	if (buf->ol_flags & PKT_TX_VLAN_PKT) {
 		uint32_t vlan = htonl(0x81000000 | buf->vlan_tci);
 
-		memcpy((uint8_t *)(raw + 16 - sizeof(vlan)),
+		memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE - sizeof(vlan)),
 		       &vlan, sizeof(vlan));
 		addr -= sizeof(vlan);
 		length += sizeof(vlan);
@@ -339,7 +339,7 @@ mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
 		uint16_t max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE;
 		uint16_t room;
 
-		raw += 16;
+		raw += MLX5_WQE_DWORD_SIZE;
 		room = end - (uintptr_t)raw;
 		if (room > max_inline) {
 			uintptr_t addr_end = (addr + max_inline) &
@@ -356,15 +356,15 @@ mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
 			assert(addr <= addr_end);
 		}
 		/* Store the inlined packet size in the WQE. */
-		wqe->wqe.eseg.inline_hdr_sz = htons(pkt_inline_sz);
+		wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz);
 		/*
 		 * 2 DWORDs consumed by the WQE header + 1 DSEG +
 		 * the size of the inline part of the packet.
 		 */
-		ds = 2 + ((pkt_inline_sz - 2 + 15) / 16);
+		ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
 		if (length > 0) {
 			dseg = (struct mlx5_wqe_data_seg *)
-				((uintptr_t)wqe + (ds * 16));
+				((uintptr_t)wqe + (ds * MLX5_WQE_DWORD_SIZE));
 			if ((uintptr_t)dseg >= end)
 				dseg = (struct mlx5_wqe_data_seg *)
 					((uintptr_t)&(*txq->wqes)[0]);
@@ -377,9 +377,9 @@ mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
 		 * No inline has been done in the packet, only the Ethernet
 		 * Header as been stored.
 		 */
-		wqe->wqe.eseg.inline_hdr_sz = htons(16);
+		wqe->eseg.inline_hdr_sz = htons(MLX5_WQE_DWORD_SIZE);
 		dseg = (struct mlx5_wqe_data_seg *)
-			((uintptr_t)wqe + (ds * 16));
+			((uintptr_t)wqe + (ds * MLX5_WQE_DWORD_SIZE));
 use_dseg:
 		*dseg = (struct mlx5_wqe_data_seg) {
 			.addr = htonll(addr),
@@ -388,7 +388,7 @@ use_dseg:
 		};
 		++ds;
 	}
-	wqe->wqe.ctrl.data[1] = htonl(txq->qp_num_8s | ds);
+	wqe->ctrl[1] = htonl(txq->qp_num_8s | ds);
 	return ds;
 }
 
@@ -444,7 +444,7 @@ tx_prefetch_cqe(struct txq *txq, uint16_t ci)
 static inline void
 tx_prefetch_wqe(struct txq *txq, uint16_t ci)
 {
-	volatile union mlx5_wqe *wqe;
+	volatile struct mlx5_wqe64 *wqe;
 
 	wqe = &(*txq->wqes)[ci & (txq->wqe_n - 1)];
 	rte_prefetch0(wqe);
@@ -473,7 +473,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	unsigned int j = 0;
 	unsigned int max;
 	unsigned int comp;
-	volatile union mlx5_wqe *wqe = NULL;
+	volatile struct mlx5_wqe *wqe = NULL;
 
 	if (unlikely(!pkts_n))
 		return 0;
@@ -492,7 +492,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		uint32_t length;
 		unsigned int segs_n = buf->nb_segs;
 		volatile struct mlx5_wqe_data_seg *dseg;
-		unsigned int ds = sizeof(*wqe) / 16;
+		unsigned int ds = 0;
 
 		/*
 		 * Make sure there is enough room to store this packet and
@@ -504,7 +504,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		max -= segs_n;
 		--pkts_n;
 		elts_head_next = (elts_head + 1) & (elts_n - 1);
-		wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)];
+		wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)].hdr;
 		tx_prefetch_wqe(txq, txq->wqe_ci);
 		tx_prefetch_wqe(txq, txq->wqe_ci + 1);
 		if (pkts_n)
@@ -519,25 +519,25 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		/* Should we enable HW CKSUM offload */
 		if (buf->ol_flags &
 		    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
-			wqe->wqe.eseg.cs_flags =
+			wqe->eseg.cs_flags =
 				MLX5_ETH_WQE_L3_CSUM |
 				MLX5_ETH_WQE_L4_CSUM;
 		} else {
-			wqe->wqe.eseg.cs_flags = 0;
+			wqe->eseg.cs_flags = 0;
 		}
 		ds = mlx5_wqe_write(txq, wqe, buf, length);
 		if (segs_n == 1)
 			goto skip_segs;
 		dseg = (volatile struct mlx5_wqe_data_seg *)
-			(((uintptr_t)wqe) + ds * 16);
+			(((uintptr_t)wqe) + ds * MLX5_WQE_DWORD_SIZE);
 		while (--segs_n) {
 			/*
 			 * Spill on next WQE when the current one does not have
 			 * enough room left. Size of WQE must a be a multiple
 			 * of data segment size.
 			 */
-			assert(!(sizeof(*wqe) % sizeof(*dseg)));
-			if (!(ds % (sizeof(*wqe) / 16)))
+			assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE));
+			if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE)))
 				dseg = (volatile void *)
 					&(*txq->wqes)[txq->wqe_ci++ &
 						      (txq->wqe_n - 1)];
@@ -558,8 +558,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			++j;
 		}
 		/* Update DS field in WQE. */
-		wqe->wqe.ctrl.data[1] &= htonl(0xffffffc0);
-		wqe->wqe.ctrl.data[1] |= htonl(ds & 0x3f);
+		wqe->ctrl[1] &= htonl(0xffffffc0);
+		wqe->ctrl[1] |= htonl(ds & 0x3f);
 skip_segs:
 #ifdef MLX5_PMD_SOFT_COUNTERS
 		/* Increment sent bytes counter. */
@@ -577,9 +577,9 @@ skip_segs:
 	comp = txq->elts_comp + i + j;
 	if (comp >= MLX5_TX_COMP_THRESH) {
 		/* Request completion on last WQE. */
-		wqe->wqe.ctrl.data[2] = htonl(8);
+		wqe->ctrl[2] = htonl(8);
 		/* Save elts_head in unused "immediate" field of WQE. */
-		wqe->wqe.ctrl.data[3] = elts_head;
+		wqe->ctrl[3] = elts_head;
 		txq->elts_comp = 0;
 	} else {
 		txq->elts_comp = comp;
@@ -616,19 +616,20 @@ mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
 	mpw->pkts_n = 0;
 	mpw->len = length;
 	mpw->total_len = 0;
-	mpw->wqe = &(*txq->wqes)[idx];
-	mpw->wqe->mpw.eseg.mss = htons(length);
-	mpw->wqe->mpw.eseg.inline_hdr_sz = 0;
-	mpw->wqe->mpw.eseg.rsvd0 = 0;
-	mpw->wqe->mpw.eseg.rsvd1 = 0;
-	mpw->wqe->mpw.eseg.rsvd2 = 0;
-	mpw->wqe->mpw.ctrl.data[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
-					   (txq->wqe_ci << 8) |
-					   MLX5_OPCODE_LSO_MPW);
-	mpw->wqe->mpw.ctrl.data[2] = 0;
-	mpw->wqe->mpw.ctrl.data[3] = 0;
-	mpw->data.dseg[0] = &mpw->wqe->mpw.dseg[0];
-	mpw->data.dseg[1] = &mpw->wqe->mpw.dseg[1];
+	mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr;
+	mpw->wqe->eseg.mss = htons(length);
+	mpw->wqe->eseg.inline_hdr_sz = 0;
+	mpw->wqe->eseg.rsvd0 = 0;
+	mpw->wqe->eseg.rsvd1 = 0;
+	mpw->wqe->eseg.rsvd2 = 0;
+	mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
+				  (txq->wqe_ci << 8) | MLX5_OPCODE_LSO_MPW);
+	mpw->wqe->ctrl[2] = 0;
+	mpw->wqe->ctrl[3] = 0;
+	mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *)
+		(((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));
+	mpw->data.dseg[1] = (volatile struct mlx5_wqe_data_seg *)
+		(((uintptr_t)mpw->wqe) + (3 * MLX5_WQE_DWORD_SIZE));
 	mpw->data.dseg[2] = &(*dseg)[0];
 	mpw->data.dseg[3] = &(*dseg)[1];
 	mpw->data.dseg[4] = &(*dseg)[2];
@@ -651,7 +652,7 @@ mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
 	 * Store size in multiple of 16 bytes. Control and Ethernet segments
 	 * count as 2.
 	 */
-	mpw->wqe->mpw.ctrl.data[1] = htonl(txq->qp_num_8s | (2 + num));
+	mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | (2 + num));
 	mpw->state = MLX5_MPW_STATE_CLOSED;
 	if (num < 3)
 		++txq->wqe_ci;
@@ -729,11 +730,11 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		if ((mpw.state == MLX5_MPW_STATE_OPENED) &&
 		    ((mpw.len != length) ||
 		     (segs_n != 1) ||
-		     (mpw.wqe->mpw.eseg.cs_flags != cs_flags)))
+		     (mpw.wqe->eseg.cs_flags != cs_flags)))
 			mlx5_mpw_close(txq, &mpw);
 		if (mpw.state == MLX5_MPW_STATE_CLOSED) {
 			mlx5_mpw_new(txq, &mpw, length);
-			mpw.wqe->mpw.eseg.cs_flags = cs_flags;
+			mpw.wqe->eseg.cs_flags = cs_flags;
 		}
 		/* Multi-segment packets must be alone in their MPW. */
 		assert((segs_n == 1) || (mpw.pkts_n == 0));
@@ -779,12 +780,12 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	/* "j" includes both packets and segments. */
 	comp = txq->elts_comp + j;
 	if (comp >= MLX5_TX_COMP_THRESH) {
-		volatile union mlx5_wqe *wqe = mpw.wqe;
+		volatile struct mlx5_wqe *wqe = mpw.wqe;
 
 		/* Request completion on last WQE. */
-		wqe->mpw.ctrl.data[2] = htonl(8);
+		wqe->ctrl[2] = htonl(8);
 		/* Save elts_head in unused "immediate" field of WQE. */
-		wqe->mpw.ctrl.data[3] = elts_head;
+		wqe->ctrl[3] = elts_head;
 		txq->elts_comp = 0;
 	} else {
 		txq->elts_comp = comp;
@@ -815,24 +816,27 @@ static inline void
 mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
 {
 	uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
+	struct mlx5_wqe_inl_small *inl;
 
 	mpw->state = MLX5_MPW_INL_STATE_OPENED;
 	mpw->pkts_n = 0;
 	mpw->len = length;
 	mpw->total_len = 0;
-	mpw->wqe = &(*txq->wqes)[idx];
-	mpw->wqe->mpw_inl.ctrl.data[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
-					       (txq->wqe_ci << 8) |
-					       MLX5_OPCODE_LSO_MPW);
-	mpw->wqe->mpw_inl.ctrl.data[2] = 0;
-	mpw->wqe->mpw_inl.ctrl.data[3] = 0;
-	mpw->wqe->mpw_inl.eseg.mss = htons(length);
-	mpw->wqe->mpw_inl.eseg.inline_hdr_sz = 0;
-	mpw->wqe->mpw_inl.eseg.cs_flags = 0;
-	mpw->wqe->mpw_inl.eseg.rsvd0 = 0;
-	mpw->wqe->mpw_inl.eseg.rsvd1 = 0;
-	mpw->wqe->mpw_inl.eseg.rsvd2 = 0;
-	mpw->data.raw = &mpw->wqe->mpw_inl.data[0];
+	mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr;
+	mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
+				  (txq->wqe_ci << 8) |
+				  MLX5_OPCODE_LSO_MPW);
+	mpw->wqe->ctrl[2] = 0;
+	mpw->wqe->ctrl[3] = 0;
+	mpw->wqe->eseg.mss = htons(length);
+	mpw->wqe->eseg.inline_hdr_sz = 0;
+	mpw->wqe->eseg.cs_flags = 0;
+	mpw->wqe->eseg.rsvd0 = 0;
+	mpw->wqe->eseg.rsvd1 = 0;
+	mpw->wqe->eseg.rsvd2 = 0;
+	inl = (struct mlx5_wqe_inl_small *)
+		(((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE);
+	mpw->data.raw = (uint8_t *)&inl->raw;
 }
 
 /**
@@ -847,17 +851,18 @@ static inline void
 mlx5_mpw_inline_close(struct txq *txq, struct mlx5_mpw *mpw)
 {
 	unsigned int size;
+	struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *)
+		(((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));
 
-	size = sizeof(*mpw->wqe) - MLX5_MWQE64_INL_DATA + mpw->total_len;
+	size = MLX5_WQE_SIZE - MLX5_MWQE64_INL_DATA + mpw->total_len;
 	/*
 	 * Store size in multiple of 16 bytes. Control and Ethernet segments
 	 * count as 2.
 	 */
-	mpw->wqe->mpw_inl.ctrl.data[1] =
-		htonl(txq->qp_num_8s | ((size + 15) / 16));
+	mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | MLX5_WQE_DS(size));
 	mpw->state = MLX5_MPW_STATE_CLOSED;
-	mpw->wqe->mpw_inl.byte_cnt = htonl(mpw->total_len | MLX5_INLINE_SEG);
-	txq->wqe_ci += (size + (sizeof(*mpw->wqe) - 1)) / sizeof(*mpw->wqe);
+	inl->byte_cnt = htonl(mpw->total_len | MLX5_INLINE_SEG);
+	txq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE;
 }
 
 /**
@@ -930,13 +935,13 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 		if (mpw.state == MLX5_MPW_STATE_OPENED) {
 			if ((mpw.len != length) ||
 			    (segs_n != 1) ||
-			    (mpw.wqe->mpw.eseg.cs_flags != cs_flags))
+			    (mpw.wqe->eseg.cs_flags != cs_flags))
 				mlx5_mpw_close(txq, &mpw);
 		} else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) {
 			if ((mpw.len != length) ||
 			    (segs_n != 1) ||
 			    (length > inline_room) ||
-			    (mpw.wqe->mpw_inl.eseg.cs_flags != cs_flags)) {
+			    (mpw.wqe->eseg.cs_flags != cs_flags)) {
 				mlx5_mpw_inline_close(txq, &mpw);
 				inline_room =
 					txq->max_inline * RTE_CACHE_LINE_SIZE;
@@ -946,10 +951,10 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 			if ((segs_n != 1) ||
 			    (length > inline_room)) {
 				mlx5_mpw_new(txq, &mpw, length);
-				mpw.wqe->mpw.eseg.cs_flags = cs_flags;
+				mpw.wqe->eseg.cs_flags = cs_flags;
 			} else {
 				mlx5_mpw_inline_new(txq, &mpw, length);
-				mpw.wqe->mpw_inl.eseg.cs_flags = cs_flags;
+				mpw.wqe->eseg.cs_flags = cs_flags;
 			}
 		}
 		/* Multi-segment packets must be alone in their MPW. */
@@ -1042,12 +1047,12 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 	/* "j" includes both packets and segments. */
 	comp = txq->elts_comp + j;
 	if (comp >= MLX5_TX_COMP_THRESH) {
-		volatile union mlx5_wqe *wqe = mpw.wqe;
+		volatile struct mlx5_wqe *wqe = mpw.wqe;
 
 		/* Request completion on last WQE. */
-		wqe->mpw_inl.ctrl.data[2] = htonl(8);
+		wqe->ctrl[2] = htonl(8);
 		/* Save elts_head in unused "immediate" field of WQE. */
-		wqe->mpw_inl.ctrl.data[3] = elts_head;
+		wqe->ctrl[3] = elts_head;
 		txq->elts_comp = 0;
 	} else {
 		txq->elts_comp = comp;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 8c568ad..fbc2a78 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -252,7 +252,7 @@ struct txq {
 	uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
 	uint32_t qp_num_8s; /* QP number shifted by 8. */
 	volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
-	volatile union mlx5_wqe (*wqes)[]; /* Work queue. */
+	volatile struct mlx5_wqe64 (*wqes)[]; /* Work queue. */
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
 	volatile void *bf_reg; /* Blueflame register. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 5ddd2fb..e8ebbbe 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -82,7 +82,7 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
 	for (i = 0; (i != elts_n); ++i)
 		(*txq_ctrl->txq.elts)[i] = NULL;
 	for (i = 0; (i != txq_ctrl->txq.wqe_n); ++i) {
-		volatile union mlx5_wqe *wqe = &(*txq_ctrl->txq.wqes)[i];
+		volatile struct mlx5_wqe64 *wqe = &(*txq_ctrl->txq.wqes)[i];
 
 		memset((void *)(uintptr_t)wqe, 0x0, sizeof(*wqe));
 	}
@@ -215,7 +215,7 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
 	tmpl->txq.cqe_n = ibcq->cqe + 1;
 	tmpl->txq.qp_num_8s = qp->ctrl_seg.qp_num << 8;
 	tmpl->txq.wqes =
-		(volatile union mlx5_wqe (*)[])
+		(volatile struct mlx5_wqe64 (*)[])
 		(uintptr_t)qp->gen_data.sqstart;
 	tmpl->txq.wqe_n = qp->sq.wqe_cnt;
 	tmpl->txq.qp_db = &qp->gen_data.db[MLX5_SND_DBR];
-- 
2.1.4



More information about the dev mailing list