[PATCH v5] net/bonding: call Tx prepare before Tx burst

Chas Williams 3chas3 at gmail.com
Sat Oct 15 17:26:29 CEST 2022


This looks fine. Thanks for making the changes!

Signed-off-by: Chas Williams <3chas3 at gmail.com>

On 10/11/22 09:20, Chengwen Feng wrote:
> Normally, to use the HW offloads capability (e.g. checksum and TSO) in
> the Tx direction, the application needs to call rte_eth_tx_prepare() to
> do some adjustment with the packets before sending them. But the
> tx_prepare callback of the bonding driver is not implemented. Therefore,
> the sent packets may have errors (e.g. checksum errors).
> 
> However, it is difficult to design the tx_prepare callback for bonding
> driver. Because when a bonded device sends packets, the bonded device
> allocates the packets to different slave devices based on the real-time
> link status and bonding mode. That is, it is very difficult for the
> bonded device to determine which slave device's prepare function should
> be invoked.
> 
> So in this patch, the tx_prepare callback of bonding driver is not
> implemented. Instead, the rte_eth_tx_prepare() will be called before
> rte_eth_tx_burst(). In this way, all tx_offloads can be processed
> correctly for all NIC devices.
> 
> Note: because it is rara that bond different PMDs together, so just
> call tx-prepare once in broadcast bonding mode.
> 
> Also the following description was added to the rte_eth_tx_burst()
> function:
> "@note This function must not modify mbufs (including packets data)
> unless the refcnt is 1. The exception is the bonding PMD, which does not
> have tx-prepare function, in this case, mbufs maybe modified."
> 
> Signed-off-by: Chengchang Tang <tangchengchang at huawei.com>
> Signed-off-by: Chengwen Feng <fengchengwen at huawei.com>
> Reviewed-by: Min Hu (Connor) <humin29 at huawei.com>
> 
> ---
> v5: address Chas's comments.
> v4: address Chas and Konstantin's comments.
> v3: support tx-prepare when Tx internal generate mbufs.
> v2: support tx-prepare enable flag and fail stats.
> 
> ---
>   drivers/net/bonding/rte_eth_bond_8023ad.c | 10 ++++--
>   drivers/net/bonding/rte_eth_bond_pmd.c    | 37 ++++++++++++++++++-----
>   lib/ethdev/rte_ethdev.h                   |  4 +++
>   3 files changed, 41 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c b/drivers/net/bonding/rte_eth_bond_8023ad.c
> index b3cddd8a20..29a71ae0bf 100644
> --- a/drivers/net/bonding/rte_eth_bond_8023ad.c
> +++ b/drivers/net/bonding/rte_eth_bond_8023ad.c
> @@ -636,9 +636,12 @@ tx_machine(struct bond_dev_private *internals, uint16_t slave_id)
>   			return;
>   		}
>   	} else {
> -		uint16_t pkts_sent = rte_eth_tx_burst(slave_id,
> +		uint16_t pkts_sent = rte_eth_tx_prepare(slave_id,
>   				internals->mode4.dedicated_queues.tx_qid,
>   				&lacp_pkt, 1);
> +		pkts_sent = rte_eth_tx_burst(slave_id,
> +				internals->mode4.dedicated_queues.tx_qid,
> +				&lacp_pkt, pkts_sent);
>   		if (pkts_sent != 1) {
>   			rte_pktmbuf_free(lacp_pkt);
>   			set_warning_flags(port, WRN_TX_QUEUE_FULL);
> @@ -1371,9 +1374,12 @@ bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
>   			}
>   		} else {
>   			/* Send packet directly to the slow queue */
> -			uint16_t tx_count = rte_eth_tx_burst(slave_id,
> +			uint16_t tx_count = rte_eth_tx_prepare(slave_id,
>   					internals->mode4.dedicated_queues.tx_qid,
>   					&pkt, 1);
> +			tx_count = rte_eth_tx_burst(slave_id,
> +					internals->mode4.dedicated_queues.tx_qid,
> +					&pkt, tx_count);
>   			if (tx_count != 1) {
>   				/* reset timer */
>   				port->rx_marker_timer = 0;
> diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
> index 4081b21338..a2c68ec9bc 100644
> --- a/drivers/net/bonding/rte_eth_bond_pmd.c
> +++ b/drivers/net/bonding/rte_eth_bond_pmd.c
> @@ -602,8 +602,11 @@ bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
>   	/* Send packet burst on each slave device */
>   	for (i = 0; i < num_of_slaves; i++) {
>   		if (slave_nb_pkts[i] > 0) {
> +			num_tx_slave = rte_eth_tx_prepare(slaves[i],
> +					bd_tx_q->queue_id, slave_bufs[i],
> +					slave_nb_pkts[i]);
>   			num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
> -					slave_bufs[i], slave_nb_pkts[i]);
> +					slave_bufs[i], num_tx_slave);
>   
>   			/* if tx burst fails move packets to end of bufs */
>   			if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
> @@ -628,6 +631,7 @@ bond_ethdev_tx_burst_active_backup(void *queue,
>   {
>   	struct bond_dev_private *internals;
>   	struct bond_tx_queue *bd_tx_q;
> +	uint16_t nb_prep_pkts;
>   
>   	bd_tx_q = (struct bond_tx_queue *)queue;
>   	internals = bd_tx_q->dev_private;
> @@ -635,8 +639,11 @@ bond_ethdev_tx_burst_active_backup(void *queue,
>   	if (internals->active_slave_count < 1)
>   		return 0;
>   
> +	nb_prep_pkts = rte_eth_tx_prepare(internals->current_primary_port,
> +				bd_tx_q->queue_id, bufs, nb_pkts);
> +
>   	return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
> -			bufs, nb_pkts);
> +			bufs, nb_prep_pkts);
>   }
>   
>   static inline uint16_t
> @@ -910,7 +917,7 @@ bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   
>   	struct rte_eth_dev *primary_port =
>   			&rte_eth_devices[internals->primary_port];
> -	uint16_t num_tx_total = 0;
> +	uint16_t num_tx_total = 0, num_tx_prep;
>   	uint16_t i, j;
>   
>   	uint16_t num_of_slaves = internals->active_slave_count;
> @@ -951,8 +958,10 @@ bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   #endif
>   		}
>   
> -		num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
> +		num_tx_prep = rte_eth_tx_prepare(slaves[i], bd_tx_q->queue_id,
>   				bufs + num_tx_total, nb_pkts - num_tx_total);
> +		num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
> +				bufs + num_tx_total, num_tx_prep);
>   
>   		if (num_tx_total == nb_pkts)
>   			break;
> @@ -1064,8 +1073,10 @@ bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   	/* Send ARP packets on proper slaves */
>   	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
>   		if (slave_bufs_pkts[i] > 0) {
> -			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
> +			num_send = rte_eth_tx_prepare(i, bd_tx_q->queue_id,
>   					slave_bufs[i], slave_bufs_pkts[i]);
> +			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
> +					slave_bufs[i], num_send);
>   			for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
>   				bufs[nb_pkts - 1 - num_not_send - j] =
>   						slave_bufs[i][nb_pkts - 1 - j];
> @@ -1088,8 +1099,10 @@ bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   	/* Send update packets on proper slaves */
>   	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
>   		if (update_bufs_pkts[i] > 0) {
> +			num_send = rte_eth_tx_prepare(i, bd_tx_q->queue_id,
> +					update_bufs[i], update_bufs_pkts[i]);
>   			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
> -					update_bufs_pkts[i]);
> +					num_send);
>   			for (j = num_send; j < update_bufs_pkts[i]; j++) {
>   				rte_pktmbuf_free(update_bufs[i][j]);
>   			}
> @@ -1158,9 +1171,12 @@ tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
>   		if (slave_nb_bufs[i] == 0)
>   			continue;
>   
> -		slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
> +		slave_tx_count = rte_eth_tx_prepare(slave_port_ids[i],
>   				bd_tx_q->queue_id, slave_bufs[i],
>   				slave_nb_bufs[i]);
> +		slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
> +				bd_tx_q->queue_id, slave_bufs[i],
> +				slave_tx_count);
>   
>   		total_tx_count += slave_tx_count;
>   
> @@ -1243,8 +1259,10 @@ tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
>   
>   		if (rte_ring_dequeue(port->tx_ring,
>   				     (void **)&ctrl_pkt) != -ENOENT) {
> -			slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
> +			slave_tx_count = rte_eth_tx_prepare(slave_port_ids[i],
>   					bd_tx_q->queue_id, &ctrl_pkt, 1);
> +			slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
> +					bd_tx_q->queue_id, &ctrl_pkt, slave_tx_count);
>   			/*
>   			 * re-enqueue LAG control plane packets to buffering
>   			 * ring if transmission fails so the packet isn't lost.
> @@ -1316,6 +1334,9 @@ bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
>   	if (num_of_slaves < 1)
>   		return 0;
>   
> +	/* It is rare that bond different PMDs together, so just call tx-prepare once */
> +	nb_pkts = rte_eth_tx_prepare(slaves[0], bd_tx_q->queue_id, bufs, nb_pkts);
> +
>   	/* Increment reference count on mbufs */
>   	for (i = 0; i < nb_pkts; i++)
>   		rte_pktmbuf_refcnt_update(bufs[i], num_of_slaves - 1);
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index d43a638aff..e92139f105 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -6095,6 +6095,10 @@ uint16_t rte_eth_call_tx_callbacks(uint16_t port_id, uint16_t queue_id,
>    * @see rte_eth_tx_prepare to perform some prior checks or adjustments
>    * for offloads.
>    *
> + * @note This function must not modify mbufs (including packets data) unless
> + * the refcnt is 1. The exception is the bonding PMD, which does not have
> + * tx-prepare function, in this case, mbufs maybe modified.
> + *
>    * @param port_id
>    *   The port identifier of the Ethernet device.
>    * @param queue_id


More information about the dev mailing list