[dpdk-dev] [PATCH 1/4] ethdev: fix max Rx packet length

Andrew Rybchenko andrew.rybchenko at oktetlabs.ru
Tue Jul 13 14:47:31 CEST 2021


On 7/9/21 8:29 PM, Ferruh Yigit wrote:
> There is a confusion on setting max Rx packet length, this patch aims to
> clarify it.
> 
> 'rte_eth_dev_configure()' API accepts max Rx packet size via
> 'uint32_t max_rx_pkt_len' filed of the config struct 'struct
> rte_eth_conf'.
> 
> Also 'rte_eth_dev_set_mtu()' API can be used to set the MTU, and result
> stored into '(struct rte_eth_dev)->data->mtu'.
> 
> These two APIs are related but they work in a disconnected way, they
> store the set values in different variables which makes hard to figure
> out which one to use, also two different related method is confusing for
> the users.
> 
> Other issues causing confusion is:
> * maximum transmission unit (MTU) is payload of the Ethernet frame. And
>   'max_rx_pkt_len' is the size of the Ethernet frame. Difference is
>   Ethernet frame overhead, but this may be different from device to
>   device based on what device supports, like VLAN and QinQ.
> * 'max_rx_pkt_len' is only valid when application requested jumbo frame,
>   which adds additional confusion and some APIs and PMDs already
>   discards this documented behavior.
> * For the jumbo frame enabled case, 'max_rx_pkt_len' is an mandatory
>   field, this adds configuration complexity for application.
> 
> As solution, both APIs gets MTU as parameter, and both saves the result
> in same variable '(struct rte_eth_dev)->data->mtu'. For this
> 'max_rx_pkt_len' updated as 'mtu', and it is always valid independent
> from jumbo frame.
> 
> For 'rte_eth_dev_configure()', 'dev->data->dev_conf.rxmode.mtu' is user
> request and it should be used only within configure function and result
> should be stored to '(struct rte_eth_dev)->data->mtu'. After that point
> both application and PMD uses MTU from this variable.
> 
> When application doesn't provide an MTU during 'rte_eth_dev_configure()'
> default 'RTE_ETHER_MTU' value is used.
> 
> As additional clarification, MTU is used to configure the device for
> physical Rx/Tx limitation. Other related issue is size of the buffer to
> store Rx packets, many PMDs use mbuf data buffer size as Rx buffer size.
> And compares MTU against Rx buffer size to decide enabling scattered Rx
> or not, if PMD supports it. If scattered Rx is not supported by device,
> MTU bigger than Rx buffer size should fail.
> 

Do I understand correctly that target is 21.11?

Really huge work. Many thanks.

See my notes below.

> Signed-off-by: Ferruh Yigit <ferruh.yigit at intel.com>

[snip]

> diff --git a/app/test-eventdev/test_pipeline_common.c b/app/test-eventdev/test_pipeline_common.c
> index 6ee530d4cdc9..5fcea74b4d43 100644
> --- a/app/test-eventdev/test_pipeline_common.c
> +++ b/app/test-eventdev/test_pipeline_common.c
> @@ -197,8 +197,9 @@ pipeline_ethdev_setup(struct evt_test *test, struct evt_options *opt)
>  		return -EINVAL;
>  	}
>  
> -	port_conf.rxmode.max_rx_pkt_len = opt->max_pkt_sz;
> -	if (opt->max_pkt_sz > RTE_ETHER_MAX_LEN)
> +	port_conf.rxmode.mtu = opt->max_pkt_sz - RTE_ETHER_HDR_LEN -
> +		RTE_ETHER_CRC_LEN;

Subtract requires overflow check. May max_pkt_size be 0 or just
smaller that RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN?

> +	if (port_conf.rxmode.mtu > RTE_ETHER_MTU)
>  		port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
>  
>  	t->internal_port = 1;
> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
> index 8468018cf35d..8bdc042f6e8e 100644
> --- a/app/test-pmd/cmdline.c
> +++ b/app/test-pmd/cmdline.c
> @@ -1892,43 +1892,36 @@ cmd_config_max_pkt_len_parsed(void *parsed_result,
>  				__rte_unused void *data)
>  {
>  	struct cmd_config_max_pkt_len_result *res = parsed_result;
> -	uint32_t max_rx_pkt_len_backup = 0;
> -	portid_t pid;
> +	portid_t port_id;
>  	int ret;
>  
> +	if (strcmp(res->name, "max-pkt-len")) {
> +		printf("Unknown parameter\n");
> +		return;
> +	}
> +
>  	if (!all_ports_stopped()) {
>  		printf("Please stop all ports first\n");
>  		return;
>  	}
>  
> -	RTE_ETH_FOREACH_DEV(pid) {
> -		struct rte_port *port = &ports[pid];
> -
> -		if (!strcmp(res->name, "max-pkt-len")) {
> -			if (res->value < RTE_ETHER_MIN_LEN) {
> -				printf("max-pkt-len can not be less than %d\n",
> -						RTE_ETHER_MIN_LEN);
> -				return;
> -			}
> -			if (res->value == port->dev_conf.rxmode.max_rx_pkt_len)
> -				return;
> -
> -			ret = eth_dev_info_get_print_err(pid, &port->dev_info);
> -			if (ret != 0) {
> -				printf("rte_eth_dev_info_get() failed for port %u\n",
> -					pid);
> -				return;
> -			}
> +	RTE_ETH_FOREACH_DEV(port_id) {
> +		struct rte_port *port = &ports[port_id];
>  
> -			max_rx_pkt_len_backup = port->dev_conf.rxmode.max_rx_pkt_len;
> +		if (res->value < RTE_ETHER_MIN_LEN) {
> +			printf("max-pkt-len can not be less than %d\n",

fprintf() to stderr, please.
Here and in a number of places below.

> +					RTE_ETHER_MIN_LEN);
> +			return;
> +		}
>  
> -			port->dev_conf.rxmode.max_rx_pkt_len = res->value;
> -			if (update_jumbo_frame_offload(pid) != 0)
> -				port->dev_conf.rxmode.max_rx_pkt_len = max_rx_pkt_len_backup;
> -		} else {
> -			printf("Unknown parameter\n");
> +		ret = eth_dev_info_get_print_err(port_id, &port->dev_info);
> +		if (ret != 0) {
> +			printf("rte_eth_dev_info_get() failed for port %u\n",
> +				port_id);
>  			return;
>  		}
> +
> +		update_jumbo_frame_offload(port_id, res->value);
>  	}
>  
>  	init_port_config();
> diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
> index 04ae0feb5852..a87265d7638b 100644
> --- a/app/test-pmd/config.c
> +++ b/app/test-pmd/config.c

[snip]

> @@ -1155,20 +1154,17 @@ port_mtu_set(portid_t port_id, uint16_t mtu)
>  		return;
>  	}
>  	diag = rte_eth_dev_set_mtu(port_id, mtu);
> -	if (diag)
> +	if (diag) {
>  		printf("Set MTU failed. diag=%d\n", diag);
> -	else if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME) {
> -		/*
> -		 * Ether overhead in driver is equal to the difference of
> -		 * max_rx_pktlen and max_mtu in rte_eth_dev_info when the
> -		 * device supports jumbo frame.
> -		 */
> -		eth_overhead = dev_info.max_rx_pktlen - dev_info.max_mtu;
> +		return;
> +	}
> +
> +	rte_port->dev_conf.rxmode.mtu = mtu;
> +
> +	if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME) {
>  		if (mtu > RTE_ETHER_MTU) {
>  			rte_port->dev_conf.rxmode.offloads |=
>  						DEV_RX_OFFLOAD_JUMBO_FRAME;
> -			rte_port->dev_conf.rxmode.max_rx_pkt_len =
> -						mtu + eth_overhead;
>  		} else

I guess curly brackets should be removed now.

>  			rte_port->dev_conf.rxmode.offloads &=
>  						~DEV_RX_OFFLOAD_JUMBO_FRAME;

[snip]

> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index 1cdd3cdd12b6..2c79cae05664 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c

[snip]

> @@ -1465,7 +1473,7 @@ init_config(void)
>  			rte_exit(EXIT_FAILURE,
>  				 "rte_eth_dev_info_get() failed\n");
>  
> -		ret = update_jumbo_frame_offload(pid);
> +		ret = update_jumbo_frame_offload(pid, 0);
>  		if (ret != 0)
>  			printf("Updating jumbo frame offload failed for port %u\n",
>  				pid);
> @@ -1512,14 +1520,19 @@ init_config(void)
>  		 */
>  		if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
>  				port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
> -			data_size = rx_mode.max_rx_pkt_len /
> -				port->dev_info.rx_desc_lim.nb_mtu_seg_max;
> +			uint32_t eth_overhead = get_eth_overhead(&port->dev_info);
> +			uint16_t mtu;
>  
> -			if ((data_size + RTE_PKTMBUF_HEADROOM) >
> +			if (rte_eth_dev_get_mtu(pid, &mtu) == 0) {
> +				data_size = mtu + eth_overhead /
> +					port->dev_info.rx_desc_lim.nb_mtu_seg_max;
> +
> +				if ((data_size + RTE_PKTMBUF_HEADROOM) >

Unnecessary parenthesis.

>  							mbuf_data_size[0]) {
> -				mbuf_data_size[0] = data_size +
> -						 RTE_PKTMBUF_HEADROOM;
> -				warning = 1;
> +					mbuf_data_size[0] = data_size +
> +							 RTE_PKTMBUF_HEADROOM;
> +					warning = 1;
> +				}
>  			}
>  		}
>  	}

[snip]

> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index c515de3bf71d..0a8d29277aeb 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -1627,13 +1627,8 @@ tap_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
>  {
>  	struct pmd_internals *pmd = dev->data->dev_private;
>  	struct ifreq ifr = { .ifr_mtu = mtu };
> -	int err = 0;
>  
> -	err = tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE);
> -	if (!err)
> -		dev->data->mtu = mtu;
> -
> -	return err;
> +	return tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE);

The cleanup could be done separately before the patch, since
it just makes the long patch longer and unrelated in fact,
since assignment after callback is already done.

>  }
>  
>  static int

[snip]

> diff --git a/examples/ip_fragmentation/main.c b/examples/ip_fragmentation/main.c
> index 77a6a18d1914..f97287ce2243 100644
> --- a/examples/ip_fragmentation/main.c
> +++ b/examples/ip_fragmentation/main.c
> @@ -146,7 +146,7 @@ struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
>  
>  static struct rte_eth_conf port_conf = {
>  	.rxmode = {
> -		.max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE,
> +		.mtu = JUMBO_FRAME_MAX_SIZE,

Before the patch JUMBO_FRAME_MAX_SIZE inluded overhad, but
after the patch it is used as it is does not include overhead.

There a number of similiar cases in other apps.

>  		.split_hdr_size = 0,
>  		.offloads = (DEV_RX_OFFLOAD_CHECKSUM |
>  			     DEV_RX_OFFLOAD_SCATTER |

[snip]

> diff --git a/examples/ip_pipeline/link.c b/examples/ip_pipeline/link.c
> index 16bcffe356bc..8628db22f56b 100644
> --- a/examples/ip_pipeline/link.c
> +++ b/examples/ip_pipeline/link.c
> @@ -46,7 +46,7 @@ static struct rte_eth_conf port_conf_default = {
>  	.link_speeds = 0,
>  	.rxmode = {
>  		.mq_mode = ETH_MQ_RX_NONE,
> -		.max_rx_pkt_len = 9000, /* Jumbo frame max packet len */
> +		.mtu = 9000, /* Jumbo frame MTU */

Strictly speaking 9000 included overhead before the patch and
does not include overhead after the patch.

There a number of similiar cases in other apps.

>  		.split_hdr_size = 0, /* Header split buffer size */
>  	},
>  	.rx_adv_conf = {

[snip]

> diff --git a/examples/l3fwd-acl/main.c b/examples/l3fwd-acl/main.c
> index a1f457b564b6..913037d5f835 100644
> --- a/examples/l3fwd-acl/main.c
> +++ b/examples/l3fwd-acl/main.c

[snip]

> @@ -1833,12 +1832,12 @@ parse_args(int argc, char **argv)
>  					print_usage(prgname);
>  					return -1;
>  				}
> -				port_conf.rxmode.max_rx_pkt_len = ret;
> +				port_conf.rxmode.mtu = ret - (RTE_ETHER_HDR_LEN +
> +					RTE_ETHER_CRC_LEN);
>  			}
> -			printf("set jumbo frame max packet length "
> -				"to %u\n",
> -				(unsigned int)
> -				port_conf.rxmode.max_rx_pkt_len);
> +			printf("set jumbo frame max packet length to %u\n",
> +				(unsigned int)port_conf.rxmode.mtu +
> +				RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN);


I think that overhead should be obtainded from dev_info with
fallback to value used above.

There are many similar cases in other apps.

[snip]

> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index c607eabb5b0c..3451125639f9 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -1249,15 +1249,15 @@ rte_eth_dev_tx_offload_name(uint64_t offload)
>  
>  static inline int
>  eth_dev_check_lro_pkt_size(uint16_t port_id, uint32_t config_size,
> -		   uint32_t max_rx_pkt_len, uint32_t dev_info_size)
> +		   uint32_t max_rx_pktlen, uint32_t dev_info_size)
>  {
>  	int ret = 0;
>  
>  	if (dev_info_size == 0) {
> -		if (config_size != max_rx_pkt_len) {
> +		if (config_size != max_rx_pktlen) {
>  			RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%d max_lro_pkt_size"
>  				       " %u != %u is not allowed\n",
> -				       port_id, config_size, max_rx_pkt_len);
> +				       port_id, config_size, max_rx_pktlen);

This patch looks a bit unrelated and make the long patch
even more longer. May be it is better to do the cleanup
first (before the patch).

>  			ret = -EINVAL;
>  		}
>  	} else if (config_size > dev_info_size) {
> @@ -1325,6 +1325,19 @@ eth_dev_validate_offloads(uint16_t port_id, uint64_t req_offloads,
>  	return ret;
>  }
>  
> +static uint16_t
> +eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu)
> +{
> +	uint16_t overhead_len;
> +
> +	if (max_mtu != UINT16_MAX && max_rx_pktlen > max_mtu)
> +		overhead_len = max_rx_pktlen - max_mtu;
> +	else
> +		overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
> +
> +	return overhead_len;
> +}
> +
>  int
>  rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>  		      const struct rte_eth_conf *dev_conf)
> @@ -1332,6 +1345,7 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>  	struct rte_eth_dev *dev;
>  	struct rte_eth_dev_info dev_info;
>  	struct rte_eth_conf orig_conf;
> +	uint32_t max_rx_pktlen;
>  	uint16_t overhead_len;
>  	int diag;
>  	int ret;
> @@ -1375,11 +1389,8 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>  		goto rollback;
>  
>  	/* Get the real Ethernet overhead length */
> -	if (dev_info.max_mtu != UINT16_MAX &&
> -	    dev_info.max_rx_pktlen > dev_info.max_mtu)
> -		overhead_len = dev_info.max_rx_pktlen - dev_info.max_mtu;
> -	else
> -		overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
> +	overhead_len = eth_dev_get_overhead_len(dev_info.max_rx_pktlen,
> +			dev_info.max_mtu);
>  
>  	/* If number of queues specified by application for both Rx and Tx is
>  	 * zero, use driver preferred values. This cannot be done individually
> @@ -1448,49 +1459,45 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>  	}
>  
>  	/*
> -	 * If jumbo frames are enabled, check that the maximum RX packet
> -	 * length is supported by the configured device.
> +	 * Check that the maximum RX packet length is supported by the
> +	 * configured device.
>  	 */
> -	if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
> -		if (dev_conf->rxmode.max_rx_pkt_len > dev_info.max_rx_pktlen) {
> -			RTE_ETHDEV_LOG(ERR,
> -				"Ethdev port_id=%u max_rx_pkt_len %u > max valid value %u\n",
> -				port_id, dev_conf->rxmode.max_rx_pkt_len,
> -				dev_info.max_rx_pktlen);
> -			ret = -EINVAL;
> -			goto rollback;
> -		} else if (dev_conf->rxmode.max_rx_pkt_len < RTE_ETHER_MIN_LEN) {
> -			RTE_ETHDEV_LOG(ERR,
> -				"Ethdev port_id=%u max_rx_pkt_len %u < min valid value %u\n",
> -				port_id, dev_conf->rxmode.max_rx_pkt_len,
> -				(unsigned int)RTE_ETHER_MIN_LEN);
> -			ret = -EINVAL;
> -			goto rollback;
> -		}
> +	if (dev_conf->rxmode.mtu == 0)
> +		dev->data->dev_conf.rxmode.mtu = RTE_ETHER_MTU;
> +	max_rx_pktlen = dev->data->dev_conf.rxmode.mtu + overhead_len;
> +	if (max_rx_pktlen > dev_info.max_rx_pktlen) {
> +		RTE_ETHDEV_LOG(ERR,
> +			"Ethdev port_id=%u max_rx_pktlen %u > max valid value %u\n",
> +			port_id, max_rx_pktlen, dev_info.max_rx_pktlen);
> +		ret = -EINVAL;
> +		goto rollback;
> +	} else if (max_rx_pktlen < RTE_ETHER_MIN_LEN) {
> +		RTE_ETHDEV_LOG(ERR,
> +			"Ethdev port_id=%u max_rx_pktlen %u < min valid value %u\n",
> +			port_id, max_rx_pktlen, RTE_ETHER_MIN_LEN);
> +		ret = -EINVAL;
> +		goto rollback;
> +	}
>  
> -		/* Scale the MTU size to adapt max_rx_pkt_len */
> -		dev->data->mtu = dev->data->dev_conf.rxmode.max_rx_pkt_len -
> -				overhead_len;
> -	} else {
> -		uint16_t pktlen = dev_conf->rxmode.max_rx_pkt_len;
> -		if (pktlen < RTE_ETHER_MIN_MTU + overhead_len ||
> -		    pktlen > RTE_ETHER_MTU + overhead_len)
> +	if ((dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
> +		if (dev->data->dev_conf.rxmode.mtu < RTE_ETHER_MIN_MTU ||
> +				dev->data->dev_conf.rxmode.mtu > RTE_ETHER_MTU)
>  			/* Use default value */
> -			dev->data->dev_conf.rxmode.max_rx_pkt_len =
> -						RTE_ETHER_MTU + overhead_len;
> +			dev->data->dev_conf.rxmode.mtu = RTE_ETHER_MTU;

I don't understand it. It would be good to add comments to
explain logic above.

>  	}
>  
> +	dev->data->mtu = dev->data->dev_conf.rxmode.mtu;
> +
>  	/*
>  	 * If LRO is enabled, check that the maximum aggregated packet
>  	 * size is supported by the configured device.
>  	 */
>  	if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_TCP_LRO) {
>  		if (dev_conf->rxmode.max_lro_pkt_size == 0)
> -			dev->data->dev_conf.rxmode.max_lro_pkt_size =
> -				dev->data->dev_conf.rxmode.max_rx_pkt_len;
> +			dev->data->dev_conf.rxmode.max_lro_pkt_size = max_rx_pktlen;
>  		ret = eth_dev_check_lro_pkt_size(port_id,
>  				dev->data->dev_conf.rxmode.max_lro_pkt_size,
> -				dev->data->dev_conf.rxmode.max_rx_pkt_len,
> +				max_rx_pktlen,
>  				dev_info.max_lro_pkt_size);
>  		if (ret != 0)
>  			goto rollback;

[snip]

> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index faf3bd901d75..9f288f98329c 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -410,7 +410,7 @@ enum rte_eth_tx_mq_mode {
>  struct rte_eth_rxmode {
>  	/** The multi-queue packet distribution mode to be used, e.g. RSS. */
>  	enum rte_eth_rx_mq_mode mq_mode;
> -	uint32_t max_rx_pkt_len;  /**< Only used if JUMBO_FRAME enabled. */
> +	uint32_t mtu;  /**< Requested MTU. */

Maximum Transmit Unit looks a bit confusing in Rx mode
structure.

>  	/** Maximum allowed size of LRO aggregated packet. */
>  	uint32_t max_lro_pkt_size;
>  	uint16_t split_hdr_size;  /**< hdr buf size (header_split enabled).*/

[snip]


More information about the dev mailing list