[dpdk-dev] [PATCH 1/2] LACP control packet filtering offload

Declan Doherty declan.doherty at intel.com
Thu Jun 29 11:18:27 CEST 2017


On 27/05/17 12:27, Tomasz Kulasek wrote:
> New API funtions implemented:
> 
>     rte_eth_bond_8023ad_slow_queue_enable(uint8_t port_id);
>     rte_eth_bond_8023ad_slow_queue_disable(uint8_t port_id);
> 
> rte_eth_bond_8023ad_slow_queue_enable should be called before bonding port
> start to enable new path.
> 
> When this option is enabled all slaves must support flow director's
> filtering by ethernet type and support one additional queue on slaves
> tx/rx.
> 
> Signed-off-by: Tomasz Kulasek <tomaszx.kulasek at intel.com>
> ---
>   drivers/net/bonding/rte_eth_bond_8023ad.c         | 141 +++++++--
>   drivers/net/bonding/rte_eth_bond_8023ad.h         |   6 +
>   drivers/net/bonding/rte_eth_bond_8023ad_private.h |  15 +
>   drivers/net/bonding/rte_eth_bond_pmd.c            | 345 +++++++++++++++++++++-
>   drivers/net/bonding/rte_eth_bond_version.map      |   9 +
>   5 files changed, 481 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c b/drivers/net/bonding/rte_eth_bond_8023ad.c
> index 7b863d6..125eb45 100644
> --- a/drivers/net/bonding/rte_eth_bond_8023ad.c
> +++ b/drivers/net/bonding/rte_eth_bond_8023ad.c
> @@ -632,12 +632,20 @@
>   	lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
>   	lacpdu->terminator_length = 0;
>   
> -	if (rte_ring_enqueue(port->tx_ring, lacp_pkt) == -ENOBUFS) {
> -		/* If TX ring full, drop packet and free message. Retransmission
> -		 * will happen in next function call. */
> -		rte_pktmbuf_free(lacp_pkt);
> -		set_warning_flags(port, WRN_TX_QUEUE_FULL);
> -		return;
> +	if (internals->mode4.slow_rx_queue == 0) {

I think we should have an explicit flag set for if hw filtering of slow 
packets is enabled instead of checking the rx/tx queue id like above.

> +		if (rte_ring_enqueue(port->tx_ring, lacp_pkt) == -ENOBUFS) {
> +			/* If TX ring full, drop packet and free message. Retransmission
> +			 * will happen in next function call. */
> +			rte_pktmbuf_free(lacp_pkt);
> +			set_warning_flags(port, WRN_TX_QUEUE_FULL);
> +			return;
> +		}
> +	} else {
> +		if (rte_eth_tx_burst(slave_id, internals->mode4.slow_tx_queue, &lacp_pkt, 1) == 0) {
> +			rte_pktmbuf_free(lacp_pkt);
> +			set_warning_flags(port, WRN_TX_QUEUE_FULL);
> +			return;
> +		}
>   	}
>   
>   	MODE4_DEBUG("sending LACP frame\n");
> @@ -741,6 +749,25 @@
>   }
>   
>   static void
> +rx_machine_update(struct bond_dev_private *internals, uint8_t slave_id,
> +		struct rte_mbuf *lacp_pkt) {
> +
> +	/* Find LACP packet to this port. Do not check subtype, it is done in
> +	 * function that queued packet */
> +	if (lacp_pkt != NULL) {
> +		struct lacpdu_header *lacp;
> +
> +		lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
> +		RTE_ASSERT(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
> +
> +		/* This is LACP frame so pass it to rx_machine */
> +		rx_machine(internals, slave_id, &lacp->lacpdu);
> +		rte_pktmbuf_free(lacp_pkt);
> +	} else
> +		rx_machine(internals, slave_id, NULL);
> +}
> +
> +static void
>   bond_mode_8023ad_periodic_cb(void *arg)
>   {
>   	struct rte_eth_dev *bond_dev = arg;
> @@ -809,20 +836,21 @@
>   
>   		SM_FLAG_SET(port, LACP_ENABLED);
>   
> -		/* Find LACP packet to this port. Do not check subtype, it is done in
> -		 * function that queued packet */
> -		if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) {
> -			struct rte_mbuf *lacp_pkt = pkt;
> -			struct lacpdu_header *lacp;
> +		struct rte_mbuf *lacp_pkt = NULL;
>   
> -			lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
> -			RTE_ASSERT(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
> +		if (internals->mode4.slow_rx_queue == 0) {
 >

As above instead of checking rx queue id and explicit enable/disable 
flag would be clearer.

> +			/* Find LACP packet to this port. Do not check subtype, it is done in
> +			 * function that queued packet */
> +			if (rte_ring_dequeue(port->rx_ring, &pkt) == 0)
> +				lacp_pkt = pkt;
>   
> -			/* This is LACP frame so pass it to rx_machine */
> -			rx_machine(internals, slave_id, &lacp->lacpdu);
> -			rte_pktmbuf_free(lacp_pkt);
> -		} else
> -			rx_machine(internals, slave_id, NULL);
> +			rx_machine_update(internals, slave_id, lacp_pkt);
> +		} else {
> +			if (rte_eth_rx_burst(slave_id, internals->mode4.slow_rx_queue, &lacp_pkt, 1) == 1)
> +				bond_mode_8023ad_handle_slow_pkt(internals, slave_id, lacp_pkt);
> +			else
> +				rx_machine_update(internals, slave_id, NULL);
> +		}


If possible it would be good if the hw filtered path and the using the 
sw queue followed the same code path here. We are now calling 
bond_mode_8023ad_handle_slow_pkt from both the 
bond_mode_8023ad_periodic_cb and bond_ethdev_tx_burst_8023ad, it would 
be clearer if both follow the same processing path and 
bond_mode_8023ad_handle_slow_pkt wasn't called within 
bond_ethdev_tx_burst_8023ad.

>   
>   		periodic_machine(internals, slave_id);
>   		mux_machine(internals, slave_id);
> @@ -1188,18 +1216,36 @@
>   		m_hdr->marker.tlv_type_marker = MARKER_TLV_TYPE_RESP;
>   		rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr);
>   
> -		if (unlikely(rte_ring_enqueue(port->tx_ring, pkt) == -ENOBUFS)) {
> -			/* reset timer */
> -			port->rx_marker_timer = 0;
> -			wrn = WRN_TX_QUEUE_FULL;
> -			goto free_out;
> +		if (internals->mode4.slow_tx_queue == 0) {
> +			if (unlikely(rte_ring_enqueue(port->tx_ring, pkt) ==
> +					-ENOBUFS)) {
> +				/* reset timer */
> +				port->rx_marker_timer = 0;
> +				wrn = WRN_TX_QUEUE_FULL;
> +				goto free_out;
> +			}
> +		} else {
> +			/* Send packet directly to the slow queue */
> +			if (unlikely(rte_eth_tx_burst(slave_id,
> +					internals->mode4.slow_tx_queue,
> +					&pkt, 1) == 0)) {
> +				/* reset timer */
> +				port->rx_marker_timer = 0;
> +				wrn = WRN_TX_QUEUE_FULL;
> +				goto free_out;
> +			}
>   		}
>   	} else if (likely(subtype == SLOW_SUBTYPE_LACP)) {
> -		if (unlikely(rte_ring_enqueue(port->rx_ring, pkt) == -ENOBUFS)) {
> -			/* If RX fing full free lacpdu message and drop packet */
> -			wrn = WRN_RX_QUEUE_FULL;
> -			goto free_out;
> -		}
> +
> +		if (internals->mode4.slow_rx_queue == 0) {
> +			if (unlikely(rte_ring_enqueue(port->rx_ring, pkt) == -ENOBUFS)) {
> +				/* If RX fing full free lacpdu message and drop packet */
> +				wrn = WRN_RX_QUEUE_FULL;
> +				goto free_out;
> +			}
> +		} else
> +			rx_machine_update(internals, slave_id, pkt);
> +
>   	} else {
>   		wrn = WRN_UNKNOWN_SLOW_TYPE;
>   		goto free_out;
> @@ -1504,3 +1550,42 @@
>   	rte_eal_alarm_set(internals->mode4.update_timeout_us,
>   			bond_mode_8023ad_ext_periodic_cb, arg);
>   }
> +
> +#define MBUF_CACHE_SIZE 250
> +#define NUM_MBUFS 8191
> +
> +int
> +rte_eth_bond_8023ad_slow_queue_enable(uint8_t port)
> +{
> +	int retval = 0;
> +	struct rte_eth_dev *dev = &rte_eth_devices[port];
> +	struct bond_dev_private *internals = (struct bond_dev_private *)
> +		dev->data->dev_private;
> +
> +	if (check_for_bonded_ethdev(dev) != 0)
> +		return -1;
> +
> +	internals->mode4.slow_rx_queue = dev->data->nb_rx_queues;
> +	internals->mode4.slow_tx_queue = dev->data->nb_tx_queues;
> +

We shouldn't be setting the slow queues here as they won't necessarily 
be the right values, as mentioned above just an enable flag would be 
sufficient.

Also we should really be testing whether all the slaves of the bond can 
support applying the filtering rule required here and then fail 
enablement if they don't.

> +	bond_ethdev_mode_set(dev, internals->mode);
> +	return retval;
> +}
> +
> +int
> +rte_eth_bond_8023ad_slow_queue_disable(uint8_t port)
> +{
> +	int retval = 0;
> +	struct rte_eth_dev *dev = &rte_eth_devices[port];
> +	struct bond_dev_private *internals = (struct bond_dev_private *)
> +		dev->data->dev_private;
> +
> +	if (check_for_bonded_ethdev(dev) != 0)
> +		return -1;
> +
> +	internals->mode4.slow_rx_queue = 0;
> +	internals->mode4.slow_tx_queue = 0;
> +


As above, in regards to the enable flag

> +	bond_ethdev_mode_set(dev, internals->mode);
> +	return retval;
> +}
> diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.h b/drivers/net/bonding/rte_eth_bond_8023ad.h
> index 6b8ff57..8d21c7a 100644
> --- a/drivers/net/bonding/rte_eth_bond_8023ad.h
> +++ b/drivers/net/bonding/rte_eth_bond_8023ad.h
> @@ -302,4 +302,10 @@ struct rte_eth_bond_8023ad_slave_info {
>   rte_eth_bond_8023ad_ext_slowtx(uint8_t port_id, uint8_t slave_id,
>   		struct rte_mbuf *lacp_pkt);
>   
> +int
> +rte_eth_bond_8023ad_slow_queue_enable(uint8_t port_id);
> 
> +int
> +rte_eth_bond_8023ad_slow_queue_disable(uint8_t port_id);
> +


We need to include the doxygen here, with some  details on what is being 
enable here, i.e. details that dedicated rx/tx queues on slaves are 
being created for filtering the lacp control plane traffic from data 
path traffic so filtering in the data path is not required.

Also, I think that these functions purpose would be clearer if there 
where called rte_eth_bond_8023ad_slow_pkt_hw_filter_enable/disable

>   #endif /* RTE_ETH_BOND_8023AD_H_ */
> diff --git a/drivers/net/bonding/rte_eth_bond_8023ad_private.h b/drivers/net/bonding/rte_eth_bond_8023ad_private.h
> index ca8858b..3963714 100644
....
> 

On thing missing is the reporting to the application that there is a 
reduced number of tx/rx queues available when hw filtering is enabled. 
Looking at the bond_ethdev_info() it doesn't look like this is getting 
reported correctly at the moment anyway but it should be smallest value 
of the max number of queues of the slave devices minus one. So if we had 
3 slaves one which support 8 rx queues and the other 2 supported 16, 
then we should report 7 (8-1) as the maximum number of rx queues for the 
bonded devices.


Finally, we are missing some updated documentation about this new 
feature. The information in the cover note should be added to the 
bonding documentation at a minimum.


More information about the dev mailing list