[dpdk-dev] [PATCH] net/bond: burst mode hash calculation

Declan Doherty declan.doherty at intel.com
Fri Dec 1 01:04:05 CET 2017


change the xmit_hash functions to handle bursts of packet instead of
packet at a time. Updating effect tx_burst functions.

Signed-off-by: Declan Doherty <declan.doherty at intel.com>
Signed-off-by: Keith Wiles <keith.wiles at intel.com>
---
 drivers/net/bonding/rte_eth_bond_api.c     |   3 -
 drivers/net/bonding/rte_eth_bond_pmd.c     | 582 ++++++++++++++++++-----------
 drivers/net/bonding/rte_eth_bond_private.h |  22 +-
 3 files changed, 376 insertions(+), 231 deletions(-)

diff --git a/drivers/net/bonding/rte_eth_bond_api.c b/drivers/net/bonding/rte_eth_bond_api.c
index 980e636..60f5c9c 100644
--- a/drivers/net/bonding/rte_eth_bond_api.c
+++ b/drivers/net/bonding/rte_eth_bond_api.c
@@ -667,15 +667,12 @@ rte_eth_bond_xmit_policy_set(uint16_t bonded_port_id, uint8_t policy)
 	switch (policy) {
 	case BALANCE_XMIT_POLICY_LAYER2:
 		internals->balance_xmit_policy = policy;
-		internals->xmit_hash = xmit_l2_hash;
 		break;
 	case BALANCE_XMIT_POLICY_LAYER23:
 		internals->balance_xmit_policy = policy;
-		internals->xmit_hash = xmit_l23_hash;
 		break;
 	case BALANCE_XMIT_POLICY_LAYER34:
 		internals->balance_xmit_policy = policy;
-		internals->xmit_hash = xmit_l34_hash;
 		break;
 
 	default:
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index fe23289..67dff1e 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -309,87 +309,114 @@ bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
 
 static uint16_t
 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
-		uint16_t nb_pkts)
+		uint16_t nb_bufs)
 {
-	struct bond_dev_private *internals;
-	struct bond_tx_queue *bd_tx_q;
+	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
+	struct bond_dev_private *internals = bd_tx_q->dev_private;
 
-	uint16_t num_of_slaves;
-	uint16_t slaves[RTE_MAX_ETHPORTS];
-	 /* positions in slaves, not ID */
-	uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
-	uint8_t distributing_count;
+	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
+	uint16_t slave_count;
 
-	uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
-	uint16_t i, op_slave_idx;
+	uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
+	uint16_t dist_slave_count;
 
-	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
+	/* 2-D array to sort mbufs for transmission on each slave into */
+	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
+	/* Number of mbufs for transmission on each slave */
+	uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
+	/* Mapping array generated by hash function to map mbufs to slaves */
+	uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
 
-	/* Total amount of packets in slave_bufs */
-	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
-	/* Slow packets placed in each slave */
+	uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
+	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-	if (unlikely(nb_pkts == 0))
-		return 0;
+	uint16_t i, j;
 
-	bd_tx_q = (struct bond_tx_queue *)queue;
-	internals = bd_tx_q->dev_private;
+	if (unlikely(nb_bufs == 0))
+		return 0;
 
 	/* Copy slave list to protect against slave up/down changes during tx
 	 * bursting */
-	num_of_slaves = internals->active_slave_count;
-	if (num_of_slaves < 1)
-		return num_tx_total;
+	slave_count = internals->active_slave_count;
+	if (unlikely(slave_count < 1))
+		return 0;
 
-	memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
-			num_of_slaves);
+	memcpy(slave_port_ids, internals->active_slaves,
+			sizeof(slave_port_ids[0]) * slave_count);
+
+
+	dist_slave_count = 0;
+	for (i = 0; i < slave_count; i++) {
+		struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
 
-	distributing_count = 0;
-	for (i = 0; i < num_of_slaves; i++) {
-		struct port *port = &mode_8023ad_ports[slaves[i]];
 		if (ACTOR_STATE(port, DISTRIBUTING))
-			distributing_offsets[distributing_count++] = i;
+			dist_slave_port_ids[dist_slave_count++] =
+					slave_port_ids[i];
 	}
 
-	if (likely(distributing_count > 0)) {
-		/* Populate slaves mbuf with the packets which are to be sent */
-		for (i = 0; i < nb_pkts; i++) {
-			/* Select output slave using hash based on xmit policy */
-			op_slave_idx = internals->xmit_hash(bufs[i],
-					distributing_count);
+	if (unlikely(dist_slave_count < 1))
+		return 0;
 
-			/* Populate slave mbuf arrays with mbufs for that slave.
-			 * Use only slaves that are currently distributing.
-			 */
-			uint8_t slave_offset =
-					distributing_offsets[op_slave_idx];
-			slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
-					bufs[i];
-			slave_nb_pkts[slave_offset]++;
-		}
+	/*
+	 * Populate slaves mbuf with the packets which are to be sent on it
+	 * selecting output slave using hash based on xmit policy
+	 */
+	internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
+			bufs_slave_port_idxs);
+
+	for (i = 0; i < nb_bufs; i++) {
+		/* Populate slave mbuf arrays with mbufs for that slave. */
+		uint8_t slave_idx = bufs_slave_port_idxs[i];
+
+		slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
 	}
 
+
 	/* Send packet burst on each slave device */
-	for (i = 0; i < num_of_slaves; i++) {
-		if (slave_nb_pkts[i] == 0)
+	for (i = 0; i < dist_slave_count; i++) {
+		if (slave_nb_bufs[i] == 0)
 			continue;
 
-		num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
-				slave_bufs[i], slave_nb_pkts[i]);
+		slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
+				bd_tx_q->queue_id, slave_bufs[i],
+				slave_nb_bufs[i]);
 
-		num_tx_total += num_tx_slave;
-		num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
+		total_tx_count += slave_tx_count;
 
 		/* If tx burst fails move packets to end of bufs */
-		if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
-			uint16_t j = nb_pkts - num_tx_fail_total;
-			for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
-					num_tx_slave++)
-				bufs[j] = slave_bufs[i][num_tx_slave];
+		if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
+			slave_tx_fail_count[i] = slave_nb_bufs[i] -
+					slave_tx_count;
+			total_tx_fail_count += slave_tx_fail_count[i];
+
+			/*
+			 * Shift bufs to beginning of array to allow reordering
+			 * later
+			 */
+			for (j = 0; j < slave_tx_fail_count[i]; j++) {
+				slave_bufs[i][j] =
+					slave_bufs[i][(slave_tx_count - 1) + j];
+			}
 		}
 	}
 
-	return num_tx_total;
+	/*
+	 * If there are tx burst failures we move packets to end of bufs to
+	 * preserve expected PMD behaviour of all failed transmitted being
+	 * at the end of the input mbuf array
+	 */
+	if (unlikely(total_tx_fail_count > 0)) {
+		int bufs_idx = nb_bufs - total_tx_fail_count - 1;
+
+		for (i = 0; i < slave_count; i++) {
+			if (slave_tx_fail_count[i] > 0) {
+				for (j = 0; j < slave_tx_fail_count[i]; j++)
+					bufs[bufs_idx++] = slave_bufs[i][j];
+			}
+		}
+	}
+
+	return total_tx_count;
 }
 
 
@@ -788,96 +815,129 @@ ipv6_hash(struct ipv6_hdr *ipv6_hdr)
 			(word_src_addr[3] ^ word_dst_addr[3]);
 }
 
-uint16_t
-xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
+
+void
+burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+		uint8_t slave_count, uint16_t *slaves)
 {
-	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
+	struct ether_hdr *eth_hdr;
+	uint32_t hash;
+	int i;
+
+	for (i = 0; i < nb_pkts; i++) {
+		eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
 
-	uint32_t hash = ether_hash(eth_hdr);
+		hash = ether_hash(eth_hdr);
 
-	return (hash ^= hash >> 8) % slave_count;
+		slaves[i++] = (hash ^= hash >> 8) % slave_count;
+	}
 }
 
-uint16_t
-xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
+void
+burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+		uint8_t slave_count, uint16_t *slaves)
 {
-	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
-	uint16_t proto = eth_hdr->ether_type;
-	size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
-	uint32_t hash, l3hash = 0;
+	uint16_t i;
+	struct ether_hdr *eth_hdr;
+	uint16_t proto;
+	size_t vlan_offset;
+	uint32_t hash, l3hash;
 
-	hash = ether_hash(eth_hdr);
+	for (i = 0; i < nb_pkts; i++) {
+		eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
+		l3hash = 0;
 
-	if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
-		struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
-				((char *)(eth_hdr + 1) + vlan_offset);
-		l3hash = ipv4_hash(ipv4_hdr);
+		proto = eth_hdr->ether_type;
+		hash = ether_hash(eth_hdr);
 
-	} else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
-		struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
-				((char *)(eth_hdr + 1) + vlan_offset);
-		l3hash = ipv6_hash(ipv6_hdr);
-	}
+		vlan_offset = get_vlan_offset(eth_hdr, &proto);
 
-	hash = hash ^ l3hash;
-	hash ^= hash >> 16;
-	hash ^= hash >> 8;
+		if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
+			struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
+					((char *)(eth_hdr + 1) + vlan_offset);
+			l3hash = ipv4_hash(ipv4_hdr);
 
-	return hash % slave_count;
-}
+		} else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
+			struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
+					((char *)(eth_hdr + 1) + vlan_offset);
+			l3hash = ipv6_hash(ipv6_hdr);
+		}
 
-uint16_t
-xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
-{
-	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
-	uint16_t proto = eth_hdr->ether_type;
-	size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
+		hash = hash ^ l3hash;
+		hash ^= hash >> 16;
+		hash ^= hash >> 8;
 
-	struct udp_hdr *udp_hdr = NULL;
-	struct tcp_hdr *tcp_hdr = NULL;
-	uint32_t hash, l3hash = 0, l4hash = 0;
+		slaves[i++] = hash % slave_count;
+	}
+}
 
-	if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
-		struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
-				((char *)(eth_hdr + 1) + vlan_offset);
-		size_t ip_hdr_offset;
+void
+burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+		uint8_t slave_count, uint16_t *slaves)
+{
+	struct ether_hdr *eth_hdr;
+	uint16_t proto;
+	size_t vlan_offset;
+	int i;
 
-		l3hash = ipv4_hash(ipv4_hdr);
+	struct udp_hdr *udp_hdr;
+	struct tcp_hdr *tcp_hdr;
+	uint32_t hash, l3hash, l4hash;
 
-		/* there is no L4 header in fragmented packet */
-		if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
-			ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
+	for (i = 0; i < nb_pkts; i++) {
+		eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
+		proto = eth_hdr->ether_type;
+		vlan_offset = get_vlan_offset(eth_hdr, &proto);
+		l3hash = 0;
+		l4hash = 0;
+
+		if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
+			struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
+					((char *)(eth_hdr + 1) + vlan_offset);
+			size_t ip_hdr_offset;
+
+			l3hash = ipv4_hash(ipv4_hdr);
+
+			/* there is no L4 header in fragmented packet */
+			if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
+								== 0)) {
+				ip_hdr_offset = (ipv4_hdr->version_ihl
+					& IPV4_HDR_IHL_MASK) *
 					IPV4_IHL_MULTIPLIER;
 
-			if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
-				tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
-						ip_hdr_offset);
+				if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
+					tcp_hdr = (struct tcp_hdr *)
+						((char *)ipv4_hdr +
+							ip_hdr_offset);
+					l4hash = HASH_L4_PORTS(tcp_hdr);
+				} else if (ipv4_hdr->next_proto_id ==
+								IPPROTO_UDP) {
+					udp_hdr = (struct udp_hdr *)
+						((char *)ipv4_hdr +
+							ip_hdr_offset);
+					l4hash = HASH_L4_PORTS(udp_hdr);
+				}
+			}
+		} else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
+			struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
+					((char *)(eth_hdr + 1) + vlan_offset);
+			l3hash = ipv6_hash(ipv6_hdr);
+
+			if (ipv6_hdr->proto == IPPROTO_TCP) {
+				tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
 				l4hash = HASH_L4_PORTS(tcp_hdr);
-			} else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
-				udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
-						ip_hdr_offset);
+			} else if (ipv6_hdr->proto == IPPROTO_UDP) {
+				udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
 				l4hash = HASH_L4_PORTS(udp_hdr);
 			}
 		}
-	} else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
-		struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
-				((char *)(eth_hdr + 1) + vlan_offset);
-		l3hash = ipv6_hash(ipv6_hdr);
 
-		if (ipv6_hdr->proto == IPPROTO_TCP) {
-			tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
-			l4hash = HASH_L4_PORTS(tcp_hdr);
-		} else if (ipv6_hdr->proto == IPPROTO_UDP) {
-			udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
-			l4hash = HASH_L4_PORTS(udp_hdr);
-		}
-	}
-
-	hash = l3hash ^ l4hash;
-	hash ^= hash >> 16;
-	hash ^= hash >> 8;
+		hash = l3hash ^ l4hash;
+		hash ^= hash >> 16;
+		hash ^= hash >> 8;
 
-	return hash % slave_count;
+		slaves[i++] = hash % slave_count;
+	}
 }
 
 struct bwg_slave {
@@ -1185,156 +1245,240 @@ bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 static uint16_t
 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
-		uint16_t nb_pkts)
+		uint16_t nb_bufs)
 {
-	struct bond_dev_private *internals;
-	struct bond_tx_queue *bd_tx_q;
+	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
+	struct bond_dev_private *internals = bd_tx_q->dev_private;
 
-	uint16_t num_of_slaves;
-	uint16_t slaves[RTE_MAX_ETHPORTS];
+	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
+	uint16_t slave_count;
 
-	uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
+	/* 2-D array to sort mbufs for transmission on each slave into */
+	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
+	/* Number of mbufs for transmission on each slave */
+	uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
+	/* Mapping array generated by hash function to map mbufs to slaves */
+	uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
 
-	int i, op_slave_id;
+	uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
+	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
-	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+	uint16_t i, j;
 
-	bd_tx_q = (struct bond_tx_queue *)queue;
-	internals = bd_tx_q->dev_private;
+	if (unlikely(nb_bufs == 0))
+		return 0;
 
 	/* Copy slave list to protect against slave up/down changes during tx
 	 * bursting */
-	num_of_slaves = internals->active_slave_count;
-	memcpy(slaves, internals->active_slaves,
-			sizeof(internals->active_slaves[0]) * num_of_slaves);
+	slave_count = internals->active_slave_count;
+	if (unlikely(slave_count < 1))
+		return 0;
 
-	if (num_of_slaves < 1)
-		return num_tx_total;
+	memcpy(slave_port_ids, internals->active_slaves,
+			sizeof(slave_port_ids[0]) * slave_count);
 
-	/* Populate slaves mbuf with the packets which are to be sent on it  */
-	for (i = 0; i < nb_pkts; i++) {
-		/* Select output slave using hash based on xmit policy */
-		op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
+	/*
+	 * Populate slaves mbuf with the packets which are to be sent on it
+	 * selecting output slave using hash based on xmit policy
+	 */
+	internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
+			bufs_slave_port_idxs);
 
-		/* Populate slave mbuf arrays with mbufs for that slave */
-		slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
+	for (i = 0; i < nb_bufs; i++) {
+		/* Populate slave mbuf arrays with mbufs for that slave. */
+		uint8_t slave_idx = bufs_slave_port_idxs[i];
+
+		slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
 	}
 
 	/* Send packet burst on each slave device */
-	for (i = 0; i < num_of_slaves; i++) {
-		if (slave_nb_pkts[i] > 0) {
-			num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
-					slave_bufs[i], slave_nb_pkts[i]);
+	for (i = 0; i < slave_count; i++) {
+		if (slave_nb_bufs[i] == 0)
+			continue;
 
-			/* if tx burst fails move packets to end of bufs */
-			if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
-				int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
+		slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
+				bd_tx_q->queue_id, slave_bufs[i],
+				slave_nb_bufs[i]);
 
-				tx_fail_total += slave_tx_fail_count;
-				memcpy(&bufs[nb_pkts - tx_fail_total],
-						&slave_bufs[i][num_tx_slave],
-						slave_tx_fail_count * sizeof(bufs[0]));
+		total_tx_count += slave_tx_count;
+
+		/* If tx burst fails move packets to end of bufs */
+		if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
+			slave_tx_fail_count[i] = slave_nb_bufs[i] -
+					slave_tx_count;
+			total_tx_fail_count += slave_tx_fail_count[i];
+
+			/*
+			 * Shift bufs to beginning of array to allow reordering
+			 * later
+			 */
+			for (j = 0; j < slave_tx_fail_count[i]; j++) {
+				slave_bufs[i][j] =
+					slave_bufs[i][(slave_tx_count - 1) + j];
 			}
+		}
+	}
 
-			num_tx_total += num_tx_slave;
+	/*
+	 * If there are tx burst failures we move packets to end of bufs to
+	 * preserve expected PMD behaviour of all failed transmitted being
+	 * at the end of the input mbuf array
+	 */
+	if (unlikely(total_tx_fail_count > 0)) {
+		int bufs_idx = nb_bufs - total_tx_fail_count - 1;
+
+		for (i = 0; i < slave_count; i++) {
+			if (slave_tx_fail_count[i] > 0) {
+				for (j = 0; j < slave_tx_fail_count[i]; j++)
+					bufs[bufs_idx++] = slave_bufs[i][j];
+			}
 		}
 	}
 
-	return num_tx_total;
+	return total_tx_count;
 }
 
 static uint16_t
 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
-		uint16_t nb_pkts)
+		uint16_t nb_bufs)
 {
-	struct bond_dev_private *internals;
-	struct bond_tx_queue *bd_tx_q;
+	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
+	struct bond_dev_private *internals = bd_tx_q->dev_private;
 
-	uint16_t num_of_slaves;
-	uint16_t slaves[RTE_MAX_ETHPORTS];
-	 /* positions in slaves, not ID */
-	uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
-	uint8_t distributing_count;
+	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
+	uint16_t slave_count;
 
-	uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
-	uint16_t i, j, op_slave_idx;
-	const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
+	uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
+	uint16_t dist_slave_count;
 
-	/* Allocate additional packets in case 8023AD mode. */
-	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
-	void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
+	/* 2-D array to sort mbufs for transmission on each slave into */
+	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
+	/* Number of mbufs for transmission on each slave */
+	uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
+	/* Mapping array generated by hash function to map mbufs to slaves */
+	uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
 
-	/* Total amount of packets in slave_bufs */
-	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
-	/* Slow packets placed in each slave */
-	uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+	uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
+	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-	bd_tx_q = (struct bond_tx_queue *)queue;
-	internals = bd_tx_q->dev_private;
+	uint16_t i, j;
+
+	if (unlikely(nb_bufs == 0))
+		return 0;
 
 	/* Copy slave list to protect against slave up/down changes during tx
 	 * bursting */
-	num_of_slaves = internals->active_slave_count;
-	if (num_of_slaves < 1)
-		return num_tx_total;
-
-	memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
+	slave_count = internals->active_slave_count;
+	if (unlikely(slave_count < 1))
+		return 0;
 
-	distributing_count = 0;
-	for (i = 0; i < num_of_slaves; i++) {
-		struct port *port = &mode_8023ad_ports[slaves[i]];
+	memcpy(slave_port_ids, internals->active_slaves,
+			sizeof(slave_port_ids[0]) * slave_count);
 
-		slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
-				slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
-				NULL);
-		slave_nb_pkts[i] = slave_slow_nb_pkts[i];
 
-		for (j = 0; j < slave_slow_nb_pkts[i]; j++)
-			slave_bufs[i][j] = slow_pkts[j];
+	dist_slave_count = 0;
+	for (i = 0; i < slave_count; i++) {
+		struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
 
 		if (ACTOR_STATE(port, DISTRIBUTING))
-			distributing_offsets[distributing_count++] = i;
+			dist_slave_port_ids[dist_slave_count++] =
+					slave_port_ids[i];
 	}
 
-	if (likely(distributing_count > 0)) {
-		/* Populate slaves mbuf with the packets which are to be sent on it */
-		for (i = 0; i < nb_pkts; i++) {
-			/* Select output slave using hash based on xmit policy */
-			op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
+	if (likely(dist_slave_count > 1)) {
+
+		/*
+		 * Populate slaves mbuf with the packets which are to be sent
+		 * on it, selecting output slave using hash based on xmit policy
+		 */
+		internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
+				bufs_slave_port_idxs);
+
+		for (i = 0; i < nb_bufs; i++) {
+			/*
+			 * Populate slave mbuf arrays with mbufs for that
+			 * slave
+			 */
+			uint8_t slave_idx = bufs_slave_port_idxs[i];
+
+			slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
+					bufs[i];
+		}
+
+
+		/* Send packet burst on each slave device */
+		for (i = 0; i < dist_slave_count; i++) {
+			if (slave_nb_bufs[i] == 0)
+				continue;
 
-			/* Populate slave mbuf arrays with mbufs for that slave. Use only
-			 * slaves that are currently distributing. */
-			uint8_t slave_offset = distributing_offsets[op_slave_idx];
-			slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
-			slave_nb_pkts[slave_offset]++;
+			slave_tx_count = rte_eth_tx_burst(
+					dist_slave_port_ids[i],
+					bd_tx_q->queue_id, slave_bufs[i],
+					slave_nb_bufs[i]);
+
+			total_tx_count += slave_tx_count;
+
+			/* If tx burst fails move packets to end of bufs */
+			if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
+				slave_tx_fail_count[i] = slave_nb_bufs[i] -
+						slave_tx_count;
+				total_tx_fail_count += slave_tx_fail_count[i];
+
+				/*
+				 * Shift bufs to beginning of array to allow
+				 * reordering later
+				 */
+				for (j = 0; j < slave_tx_fail_count[i]; j++)
+					slave_bufs[i][j] =
+						slave_bufs[i]
+							[(slave_tx_count - 1)
+							+ j];
+			}
+		}
+
+		/*
+		 * If there are tx burst failures we move packets to end of
+		 * bufs to preserve expected PMD behaviour of all failed
+		 * transmitted being at the end of the input mbuf array
+		 */
+		if (unlikely(total_tx_fail_count > 0)) {
+			int bufs_idx = nb_bufs - total_tx_fail_count - 1;
+
+			for (i = 0; i < slave_count; i++) {
+				if (slave_tx_fail_count[i] > 0) {
+					for (j = 0;
+						j < slave_tx_fail_count[i];
+						j++) {
+						bufs[bufs_idx++] =
+							slave_bufs[i][j];
+					}
+				}
+			}
 		}
 	}
 
-	/* Send packet burst on each slave device */
-	for (i = 0; i < num_of_slaves; i++) {
-		if (slave_nb_pkts[i] == 0)
-			continue;
+	/* Check for LACP control packets and send if available */
+	for (i = 0; i < slave_count; i++) {
+		struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
+		struct rte_mbuf *ctrl_pkt = NULL;
 
-		num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
-				slave_bufs[i], slave_nb_pkts[i]);
+		if (likely(rte_ring_empty(port->tx_ring)))
+			continue;
 
-		/* If tx burst fails drop slow packets */
-		for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
-			rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
+		rte_ring_dequeue(port->tx_ring,	(void **)&ctrl_pkt);
 
-		num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
-		num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
+		slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
+					bd_tx_q->queue_id, &ctrl_pkt, 1);
 
-		/* If tx burst fails move packets to end of bufs */
-		if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
-			uint16_t j = nb_pkts - num_tx_fail_total;
-			for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
-				bufs[j] = slave_bufs[i][num_tx_slave];
-		}
+		/*
+		 * re-enqueue LAG control plane packets to buffering
+		 * ring if transmission fails so the packet isn't lost.
+		 */
+		if (slave_tx_count != 1)
+			rte_ring_enqueue(port->tx_ring,	ctrl_pkt);
 	}
 
-	return num_tx_total;
+	return total_tx_count;
 }
 
 static uint16_t
@@ -2769,7 +2913,7 @@ bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
 	internals->mode = BONDING_MODE_INVALID;
 	internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
 	internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
-	internals->xmit_hash = xmit_l2_hash;
+	internals->burst_xmit_hash = burst_xmit_l2_hash;
 	internals->user_defined_mac = 0;
 
 	internals->link_status_polling_enabled = 0;
diff --git a/drivers/net/bonding/rte_eth_bond_private.h b/drivers/net/bonding/rte_eth_bond_private.h
index 1392da9..3dc2e5a 100644
--- a/drivers/net/bonding/rte_eth_bond_private.h
+++ b/drivers/net/bonding/rte_eth_bond_private.h
@@ -109,8 +109,8 @@ struct bond_slave_details {
 	uint16_t reta_size;
 };
 
-
-typedef uint16_t (*xmit_hash_t)(const struct rte_mbuf *buf, uint8_t slave_count);
+typedef void (*burst_xmit_hash_t)(struct rte_mbuf **buf, uint16_t nb_pkts,
+		uint8_t slave_count, uint16_t *slaves);
 
 /** Link Bonding PMD device private configuration Structure */
 struct bond_dev_private {
@@ -126,7 +126,7 @@ struct bond_dev_private {
 
 	uint8_t balance_xmit_policy;
 	/**< Transmit policy - l2 / l23 / l34 for operation in balance mode */
-	xmit_hash_t xmit_hash;
+	burst_xmit_hash_t burst_xmit_hash;
 	/**< Transmit policy hash function */
 
 	uint8_t user_defined_mac;
@@ -245,14 +245,18 @@ void
 slave_add(struct bond_dev_private *internals,
 		struct rte_eth_dev *slave_eth_dev);
 
-uint16_t
-xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count);
+void
+burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+		uint8_t slave_count, uint16_t *slaves);
 
-uint16_t
-xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count);
+void
+burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+		uint8_t slave_count, uint16_t *slaves);
+
+void
+burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+		uint8_t slave_count, uint16_t *slaves);
 
-uint16_t
-xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count);
 
 void
 bond_ethdev_primary_set(struct bond_dev_private *internals,
-- 
2.9.4



More information about the dev mailing list