[dpdk-dev] [PATCH v3 03/25] mlx5: remove Tx gather support

Nelio Laranjeiro nelio.laranjeiro at 6wind.com
Tue Jun 21 09:23:16 CEST 2016


This is done in preparation of bypassing Verbs entirely for the data path
as a performance improvement. TX gather cannot be maintained during the
transition and will be reimplemented later.

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com>
Signed-off-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
---
 drivers/net/mlx5/mlx5_ethdev.c |   2 +-
 drivers/net/mlx5/mlx5_rxtx.c   | 315 ++++++++---------------------------------
 drivers/net/mlx5/mlx5_rxtx.h   |  17 ---
 drivers/net/mlx5/mlx5_txq.c    |  49 ++-----
 4 files changed, 69 insertions(+), 314 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 0a881b6..280a90a 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1260,7 +1260,7 @@ mlx5_secondary_data_setup(struct priv *priv)
 		if (txq != NULL) {
 			if (txq_setup(priv->dev,
 				      txq,
-				      primary_txq->elts_n * MLX5_PMD_SGE_WR_N,
+				      primary_txq->elts_n,
 				      primary_txq->socket,
 				      NULL) == 0) {
 				txq->stats.idx = primary_txq->stats.idx;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 616cf7a..6e184c3 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -228,156 +228,6 @@ insert_vlan_sw(struct rte_mbuf *buf)
 	return 0;
 }
 
-#if MLX5_PMD_SGE_WR_N > 1
-
-/**
- * Copy scattered mbuf contents to a single linear buffer.
- *
- * @param[out] linear
- *   Linear output buffer.
- * @param[in] buf
- *   Scattered input buffer.
- *
- * @return
- *   Number of bytes copied to the output buffer or 0 if not large enough.
- */
-static unsigned int
-linearize_mbuf(linear_t *linear, struct rte_mbuf *buf)
-{
-	unsigned int size = 0;
-	unsigned int offset;
-
-	do {
-		unsigned int len = DATA_LEN(buf);
-
-		offset = size;
-		size += len;
-		if (unlikely(size > sizeof(*linear)))
-			return 0;
-		memcpy(&(*linear)[offset],
-		       rte_pktmbuf_mtod(buf, uint8_t *),
-		       len);
-		buf = NEXT(buf);
-	} while (buf != NULL);
-	return size;
-}
-
-/**
- * Handle scattered buffers for mlx5_tx_burst().
- *
- * @param txq
- *   TX queue structure.
- * @param segs
- *   Number of segments in buf.
- * @param elt
- *   TX queue element to fill.
- * @param[in] buf
- *   Buffer to process.
- * @param elts_head
- *   Index of the linear buffer to use if necessary (normally txq->elts_head).
- * @param[out] sges
- *   Array filled with SGEs on success.
- *
- * @return
- *   A structure containing the processed packet size in bytes and the
- *   number of SGEs. Both fields are set to (unsigned int)-1 in case of
- *   failure.
- */
-static struct tx_burst_sg_ret {
-	unsigned int length;
-	unsigned int num;
-}
-tx_burst_sg(struct txq *txq, unsigned int segs, struct txq_elt *elt,
-	    struct rte_mbuf *buf, unsigned int elts_head,
-	    struct ibv_sge (*sges)[MLX5_PMD_SGE_WR_N])
-{
-	unsigned int sent_size = 0;
-	unsigned int j;
-	int linearize = 0;
-
-	/* When there are too many segments, extra segments are
-	 * linearized in the last SGE. */
-	if (unlikely(segs > RTE_DIM(*sges))) {
-		segs = (RTE_DIM(*sges) - 1);
-		linearize = 1;
-	}
-	/* Update element. */
-	elt->buf = buf;
-	/* Register segments as SGEs. */
-	for (j = 0; (j != segs); ++j) {
-		struct ibv_sge *sge = &(*sges)[j];
-		uint32_t lkey;
-
-		/* Retrieve Memory Region key for this memory pool. */
-		lkey = txq_mp2mr(txq, txq_mb2mp(buf));
-		if (unlikely(lkey == (uint32_t)-1)) {
-			/* MR does not exist. */
-			DEBUG("%p: unable to get MP <-> MR association",
-			      (void *)txq);
-			/* Clean up TX element. */
-			elt->buf = NULL;
-			goto stop;
-		}
-		/* Update SGE. */
-		sge->addr = rte_pktmbuf_mtod(buf, uintptr_t);
-		if (txq->priv->sriov)
-			rte_prefetch0((volatile void *)
-				      (uintptr_t)sge->addr);
-		sge->length = DATA_LEN(buf);
-		sge->lkey = lkey;
-		sent_size += sge->length;
-		buf = NEXT(buf);
-	}
-	/* If buf is not NULL here and is not going to be linearized,
-	 * nb_segs is not valid. */
-	assert(j == segs);
-	assert((buf == NULL) || (linearize));
-	/* Linearize extra segments. */
-	if (linearize) {
-		struct ibv_sge *sge = &(*sges)[segs];
-		linear_t *linear = &(*txq->elts_linear)[elts_head];
-		unsigned int size = linearize_mbuf(linear, buf);
-
-		assert(segs == (RTE_DIM(*sges) - 1));
-		if (size == 0) {
-			/* Invalid packet. */
-			DEBUG("%p: packet too large to be linearized.",
-			      (void *)txq);
-			/* Clean up TX element. */
-			elt->buf = NULL;
-			goto stop;
-		}
-		/* If MLX5_PMD_SGE_WR_N is 1, free mbuf immediately. */
-		if (RTE_DIM(*sges) == 1) {
-			do {
-				struct rte_mbuf *next = NEXT(buf);
-
-				rte_pktmbuf_free_seg(buf);
-				buf = next;
-			} while (buf != NULL);
-			elt->buf = NULL;
-		}
-		/* Update SGE. */
-		sge->addr = (uintptr_t)&(*linear)[0];
-		sge->length = size;
-		sge->lkey = txq->mr_linear->lkey;
-		sent_size += size;
-		/* Include last segment. */
-		segs++;
-	}
-	return (struct tx_burst_sg_ret){
-		.length = sent_size,
-		.num = segs,
-	};
-stop:
-	return (struct tx_burst_sg_ret){
-		.length = -1,
-		.num = -1,
-	};
-}
-
-#endif /* MLX5_PMD_SGE_WR_N > 1 */
-
 /**
  * DPDK callback for TX.
  *
@@ -424,14 +274,14 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		unsigned int elts_head_next =
 			(((elts_head + 1) == elts_n) ? 0 : elts_head + 1);
 		struct txq_elt *elt = &(*txq->elts)[elts_head];
-		unsigned int segs = NB_SEGS(buf);
-#ifdef MLX5_PMD_SOFT_COUNTERS
-		unsigned int sent_size = 0;
-#endif
 		uint32_t send_flags = 0;
 #ifdef HAVE_VERBS_VLAN_INSERTION
 		int insert_vlan = 0;
 #endif /* HAVE_VERBS_VLAN_INSERTION */
+		uintptr_t addr;
+		uint32_t length;
+		uint32_t lkey;
+		uintptr_t buf_next_addr;
 
 		if (i + 1 < max)
 			rte_prefetch0(buf_next);
@@ -464,126 +314,81 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 					goto stop;
 			}
 		}
-		if (likely(segs == 1)) {
-			uintptr_t addr;
-			uint32_t length;
-			uint32_t lkey;
-			uintptr_t buf_next_addr;
-
-			/* Retrieve buffer information. */
-			addr = rte_pktmbuf_mtod(buf, uintptr_t);
-			length = DATA_LEN(buf);
-			/* Update element. */
-			elt->buf = buf;
-			if (txq->priv->sriov)
-				rte_prefetch0((volatile void *)
-					      (uintptr_t)addr);
-			/* Prefetch next buffer data. */
-			if (i + 1 < max) {
-				buf_next_addr =
-					rte_pktmbuf_mtod(buf_next, uintptr_t);
-				rte_prefetch0((volatile void *)
-					      (uintptr_t)buf_next_addr);
-			}
-			/* Put packet into send queue. */
+		/* Retrieve buffer information. */
+		addr = rte_pktmbuf_mtod(buf, uintptr_t);
+		length = DATA_LEN(buf);
+		/* Update element. */
+		elt->buf = buf;
+		if (txq->priv->sriov)
+			rte_prefetch0((volatile void *)
+				      (uintptr_t)addr);
+		/* Prefetch next buffer data. */
+		if (i + 1 < max) {
+			buf_next_addr =
+				rte_pktmbuf_mtod(buf_next, uintptr_t);
+			rte_prefetch0((volatile void *)
+				      (uintptr_t)buf_next_addr);
+		}
+		/* Put packet into send queue. */
 #if MLX5_PMD_MAX_INLINE > 0
-			if (length <= txq->max_inline) {
+		if (length <= txq->max_inline) {
 #ifdef HAVE_VERBS_VLAN_INSERTION
-				if (insert_vlan)
-					err = txq->send_pending_inline_vlan
-						(txq->qp,
-						 (void *)addr,
-						 length,
-						 send_flags,
-						 &buf->vlan_tci);
-				else
-#endif /* HAVE_VERBS_VLAN_INSERTION */
-					err = txq->send_pending_inline
-						(txq->qp,
-						 (void *)addr,
-						 length,
-						 send_flags);
-			} else
-#endif
-			{
-				/* Retrieve Memory Region key for this
-				 * memory pool. */
-				lkey = txq_mp2mr(txq, txq_mb2mp(buf));
-				if (unlikely(lkey == (uint32_t)-1)) {
-					/* MR does not exist. */
-					DEBUG("%p: unable to get MP <-> MR"
-					      " association", (void *)txq);
-					/* Clean up TX element. */
-					elt->buf = NULL;
-					goto stop;
-				}
-#ifdef HAVE_VERBS_VLAN_INSERTION
-				if (insert_vlan)
-					err = txq->send_pending_vlan
-						(txq->qp,
-						 addr,
-						 length,
-						 lkey,
-						 send_flags,
-						 &buf->vlan_tci);
-				else
+			if (insert_vlan)
+				err = txq->send_pending_inline_vlan
+					(txq->qp,
+					 (void *)addr,
+					 length,
+					 send_flags,
+					 &buf->vlan_tci);
+			else
 #endif /* HAVE_VERBS_VLAN_INSERTION */
-					err = txq->send_pending
-						(txq->qp,
-						 addr,
-						 length,
-						 lkey,
-						 send_flags);
-			}
-			if (unlikely(err))
-				goto stop;
-#ifdef MLX5_PMD_SOFT_COUNTERS
-			sent_size += length;
+				err = txq->send_pending_inline
+					(txq->qp,
+					 (void *)addr,
+					 length,
+					 send_flags);
+		} else
 #endif
-		} else {
-#if MLX5_PMD_SGE_WR_N > 1
-			struct ibv_sge sges[MLX5_PMD_SGE_WR_N];
-			struct tx_burst_sg_ret ret;
-
-			ret = tx_burst_sg(txq, segs, elt, buf, elts_head,
-					  &sges);
-			if (ret.length == (unsigned int)-1)
+		{
+			/* Retrieve Memory Region key for this
+			 * memory pool. */
+			lkey = txq_mp2mr(txq, txq_mb2mp(buf));
+			if (unlikely(lkey == (uint32_t)-1)) {
+				/* MR does not exist. */
+				DEBUG("%p: unable to get MP <-> MR"
+				      " association", (void *)txq);
+				/* Clean up TX element. */
+				elt->buf = NULL;
 				goto stop;
-			/* Put SG list into send queue. */
+			}
 #ifdef HAVE_VERBS_VLAN_INSERTION
 			if (insert_vlan)
-				err = txq->send_pending_sg_list_vlan
+				err = txq->send_pending_vlan
 					(txq->qp,
-					 sges,
-					 ret.num,
+					 addr,
+					 length,
+					 lkey,
 					 send_flags,
 					 &buf->vlan_tci);
 			else
 #endif /* HAVE_VERBS_VLAN_INSERTION */
-				err = txq->send_pending_sg_list
+				err = txq->send_pending
 					(txq->qp,
-					 sges,
-					 ret.num,
+					 addr,
+					 length,
+					 lkey,
 					 send_flags);
-			if (unlikely(err))
-				goto stop;
-#ifdef MLX5_PMD_SOFT_COUNTERS
-			sent_size += ret.length;
-#endif
-#else /* MLX5_PMD_SGE_WR_N > 1 */
-			DEBUG("%p: TX scattered buffers support not"
-			      " compiled in", (void *)txq);
-			goto stop;
-#endif /* MLX5_PMD_SGE_WR_N > 1 */
 		}
-		elts_head = elts_head_next;
-		buf = buf_next;
+		if (unlikely(err))
+			goto stop;
 #ifdef MLX5_PMD_SOFT_COUNTERS
 		/* Increment sent bytes counter. */
-		txq->stats.obytes += sent_size;
+		txq->stats.obytes += length;
 #endif
-	}
 stop:
+		elts_head = elts_head_next;
+		buf = buf_next;
+	}
 	/* Take a shortcut if nothing must be sent. */
 	if (unlikely(i == 0))
 		return 0;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 462eddf..8358ccb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -242,14 +242,6 @@ struct txq_elt {
 	struct rte_mbuf *buf;
 };
 
-/* Linear buffer type. It is used when transmitting buffers with too many
- * segments that do not fit the hardware queue (see max_send_sge).
- * Extra segments are copied (linearized) in such buffers, replacing the
- * last SGE during TX.
- * The size is arbitrary but large enough to hold a jumbo frame with
- * 8 segments considering mbuf.buf_len is about 2048 bytes. */
-typedef uint8_t linear_t[16384];
-
 /* TX queue descriptor. */
 struct txq {
 	struct priv *priv; /* Back pointer to private data. */
@@ -264,12 +256,6 @@ struct txq {
 	int (*send_pending_inline_vlan)();
 #endif
 #endif
-#if MLX5_PMD_SGE_WR_N > 1
-	int (*send_pending_sg_list)();
-#ifdef HAVE_VERBS_VLAN_INSERTION
-	int (*send_pending_sg_list_vlan)();
-#endif
-#endif
 	int (*send_flush)(struct ibv_qp *qp);
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_qp *qp; /* Queue Pair. */
@@ -289,9 +275,6 @@ struct txq {
 		uint32_t lkey; /* mr->lkey */
 	} mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
-	/* Elements used only for init part are here. */
-	linear_t (*elts_linear)[]; /* Linearized buffers. */
-	struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
 #ifdef HAVE_VERBS_VLAN_INSERTION
 	struct ibv_exp_qp_burst_family_v1 *if_qp; /* QP burst interface. */
 #else
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index e20df21..5a248c9 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -82,26 +82,13 @@ txq_alloc_elts(struct txq *txq, unsigned int elts_n)
 	unsigned int i;
 	struct txq_elt (*elts)[elts_n] =
 		rte_calloc_socket("TXQ", 1, sizeof(*elts), 0, txq->socket);
-	linear_t (*elts_linear)[elts_n] =
-		rte_calloc_socket("TXQ", 1, sizeof(*elts_linear), 0,
-				  txq->socket);
-	struct ibv_mr *mr_linear = NULL;
 	int ret = 0;
 
-	if ((elts == NULL) || (elts_linear == NULL)) {
+	if (elts == NULL) {
 		ERROR("%p: can't allocate packets array", (void *)txq);
 		ret = ENOMEM;
 		goto error;
 	}
-	mr_linear =
-		ibv_reg_mr(txq->priv->pd, elts_linear, sizeof(*elts_linear),
-			   IBV_ACCESS_LOCAL_WRITE);
-	if (mr_linear == NULL) {
-		ERROR("%p: unable to configure MR, ibv_reg_mr() failed",
-		      (void *)txq);
-		ret = EINVAL;
-		goto error;
-	}
 	for (i = 0; (i != elts_n); ++i) {
 		struct txq_elt *elt = &(*elts)[i];
 
@@ -119,15 +106,9 @@ txq_alloc_elts(struct txq *txq, unsigned int elts_n)
 		((MLX5_PMD_TX_PER_COMP_REQ < (elts_n / 4)) ?
 		 MLX5_PMD_TX_PER_COMP_REQ : (elts_n / 4));
 	txq->elts_comp_cd = txq->elts_comp_cd_init;
-	txq->elts_linear = elts_linear;
-	txq->mr_linear = mr_linear;
 	assert(ret == 0);
 	return 0;
 error:
-	if (mr_linear != NULL)
-		claim_zero(ibv_dereg_mr(mr_linear));
-
-	rte_free(elts_linear);
 	rte_free(elts);
 
 	DEBUG("%p: failed, freed everything", (void *)txq);
@@ -148,8 +129,6 @@ txq_free_elts(struct txq *txq)
 	unsigned int elts_head = txq->elts_head;
 	unsigned int elts_tail = txq->elts_tail;
 	struct txq_elt (*elts)[elts_n] = txq->elts;
-	linear_t (*elts_linear)[elts_n] = txq->elts_linear;
-	struct ibv_mr *mr_linear = txq->mr_linear;
 
 	DEBUG("%p: freeing WRs", (void *)txq);
 	txq->elts_n = 0;
@@ -159,12 +138,7 @@ txq_free_elts(struct txq *txq)
 	txq->elts_comp_cd = 0;
 	txq->elts_comp_cd_init = 0;
 	txq->elts = NULL;
-	txq->elts_linear = NULL;
-	txq->mr_linear = NULL;
-	if (mr_linear != NULL)
-		claim_zero(ibv_dereg_mr(mr_linear));
 
-	rte_free(elts_linear);
 	if (elts == NULL)
 		return;
 	while (elts_tail != elts_head) {
@@ -286,12 +260,14 @@ txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
 	int ret = 0;
 
 	(void)conf; /* Thresholds configuration (ignored). */
-	if ((desc == 0) || (desc % MLX5_PMD_SGE_WR_N)) {
-		ERROR("%p: invalid number of TX descriptors (must be a"
-		      " multiple of %d)", (void *)dev, MLX5_PMD_SGE_WR_N);
+	if (desc == 0) {
+		ERROR("%p: invalid number of TX descriptors", (void *)dev);
+		return EINVAL;
+	}
+	if (MLX5_PMD_SGE_WR_N > 1) {
+		ERROR("%p: TX gather is not supported", (void *)dev);
 		return EINVAL;
 	}
-	desc /= MLX5_PMD_SGE_WR_N;
 	/* MRs will be registered in mp2mr[] later. */
 	attr.rd = (struct ibv_exp_res_domain_init_attr){
 		.comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
@@ -332,10 +308,7 @@ txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
 					priv->device_attr.max_qp_wr :
 					desc),
 			/* Max number of scatter/gather elements in a WR. */
-			.max_send_sge = ((priv->device_attr.max_sge <
-					  MLX5_PMD_SGE_WR_N) ?
-					 priv->device_attr.max_sge :
-					 MLX5_PMD_SGE_WR_N),
+			.max_send_sge = 1,
 #if MLX5_PMD_MAX_INLINE > 0
 			.max_inline_data = MLX5_PMD_MAX_INLINE,
 #endif
@@ -440,12 +413,6 @@ txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
 	txq->send_pending_inline_vlan = txq->if_qp->send_pending_inline_vlan;
 #endif
 #endif
-#if MLX5_PMD_SGE_WR_N > 1
-	txq->send_pending_sg_list = txq->if_qp->send_pending_sg_list;
-#ifdef HAVE_VERBS_VLAN_INSERTION
-	txq->send_pending_sg_list_vlan = txq->if_qp->send_pending_sg_list_vlan;
-#endif
-#endif
 	txq->send_pending = txq->if_qp->send_pending;
 #ifdef HAVE_VERBS_VLAN_INSERTION
 	txq->send_pending_vlan = txq->if_qp->send_pending_vlan;
-- 
2.1.4



More information about the dev mailing list