[dpdk-dev] [PATCH 1/4] ethdev: fix max Rx packet length
Andrew Rybchenko
andrew.rybchenko at oktetlabs.ru
Tue Jul 13 14:47:31 CEST 2021
On 7/9/21 8:29 PM, Ferruh Yigit wrote:
> There is a confusion on setting max Rx packet length, this patch aims to
> clarify it.
>
> 'rte_eth_dev_configure()' API accepts max Rx packet size via
> 'uint32_t max_rx_pkt_len' filed of the config struct 'struct
> rte_eth_conf'.
>
> Also 'rte_eth_dev_set_mtu()' API can be used to set the MTU, and result
> stored into '(struct rte_eth_dev)->data->mtu'.
>
> These two APIs are related but they work in a disconnected way, they
> store the set values in different variables which makes hard to figure
> out which one to use, also two different related method is confusing for
> the users.
>
> Other issues causing confusion is:
> * maximum transmission unit (MTU) is payload of the Ethernet frame. And
> 'max_rx_pkt_len' is the size of the Ethernet frame. Difference is
> Ethernet frame overhead, but this may be different from device to
> device based on what device supports, like VLAN and QinQ.
> * 'max_rx_pkt_len' is only valid when application requested jumbo frame,
> which adds additional confusion and some APIs and PMDs already
> discards this documented behavior.
> * For the jumbo frame enabled case, 'max_rx_pkt_len' is an mandatory
> field, this adds configuration complexity for application.
>
> As solution, both APIs gets MTU as parameter, and both saves the result
> in same variable '(struct rte_eth_dev)->data->mtu'. For this
> 'max_rx_pkt_len' updated as 'mtu', and it is always valid independent
> from jumbo frame.
>
> For 'rte_eth_dev_configure()', 'dev->data->dev_conf.rxmode.mtu' is user
> request and it should be used only within configure function and result
> should be stored to '(struct rte_eth_dev)->data->mtu'. After that point
> both application and PMD uses MTU from this variable.
>
> When application doesn't provide an MTU during 'rte_eth_dev_configure()'
> default 'RTE_ETHER_MTU' value is used.
>
> As additional clarification, MTU is used to configure the device for
> physical Rx/Tx limitation. Other related issue is size of the buffer to
> store Rx packets, many PMDs use mbuf data buffer size as Rx buffer size.
> And compares MTU against Rx buffer size to decide enabling scattered Rx
> or not, if PMD supports it. If scattered Rx is not supported by device,
> MTU bigger than Rx buffer size should fail.
>
Do I understand correctly that target is 21.11?
Really huge work. Many thanks.
See my notes below.
> Signed-off-by: Ferruh Yigit <ferruh.yigit at intel.com>
[snip]
> diff --git a/app/test-eventdev/test_pipeline_common.c b/app/test-eventdev/test_pipeline_common.c
> index 6ee530d4cdc9..5fcea74b4d43 100644
> --- a/app/test-eventdev/test_pipeline_common.c
> +++ b/app/test-eventdev/test_pipeline_common.c
> @@ -197,8 +197,9 @@ pipeline_ethdev_setup(struct evt_test *test, struct evt_options *opt)
> return -EINVAL;
> }
>
> - port_conf.rxmode.max_rx_pkt_len = opt->max_pkt_sz;
> - if (opt->max_pkt_sz > RTE_ETHER_MAX_LEN)
> + port_conf.rxmode.mtu = opt->max_pkt_sz - RTE_ETHER_HDR_LEN -
> + RTE_ETHER_CRC_LEN;
Subtract requires overflow check. May max_pkt_size be 0 or just
smaller that RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN?
> + if (port_conf.rxmode.mtu > RTE_ETHER_MTU)
> port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
>
> t->internal_port = 1;
> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
> index 8468018cf35d..8bdc042f6e8e 100644
> --- a/app/test-pmd/cmdline.c
> +++ b/app/test-pmd/cmdline.c
> @@ -1892,43 +1892,36 @@ cmd_config_max_pkt_len_parsed(void *parsed_result,
> __rte_unused void *data)
> {
> struct cmd_config_max_pkt_len_result *res = parsed_result;
> - uint32_t max_rx_pkt_len_backup = 0;
> - portid_t pid;
> + portid_t port_id;
> int ret;
>
> + if (strcmp(res->name, "max-pkt-len")) {
> + printf("Unknown parameter\n");
> + return;
> + }
> +
> if (!all_ports_stopped()) {
> printf("Please stop all ports first\n");
> return;
> }
>
> - RTE_ETH_FOREACH_DEV(pid) {
> - struct rte_port *port = &ports[pid];
> -
> - if (!strcmp(res->name, "max-pkt-len")) {
> - if (res->value < RTE_ETHER_MIN_LEN) {
> - printf("max-pkt-len can not be less than %d\n",
> - RTE_ETHER_MIN_LEN);
> - return;
> - }
> - if (res->value == port->dev_conf.rxmode.max_rx_pkt_len)
> - return;
> -
> - ret = eth_dev_info_get_print_err(pid, &port->dev_info);
> - if (ret != 0) {
> - printf("rte_eth_dev_info_get() failed for port %u\n",
> - pid);
> - return;
> - }
> + RTE_ETH_FOREACH_DEV(port_id) {
> + struct rte_port *port = &ports[port_id];
>
> - max_rx_pkt_len_backup = port->dev_conf.rxmode.max_rx_pkt_len;
> + if (res->value < RTE_ETHER_MIN_LEN) {
> + printf("max-pkt-len can not be less than %d\n",
fprintf() to stderr, please.
Here and in a number of places below.
> + RTE_ETHER_MIN_LEN);
> + return;
> + }
>
> - port->dev_conf.rxmode.max_rx_pkt_len = res->value;
> - if (update_jumbo_frame_offload(pid) != 0)
> - port->dev_conf.rxmode.max_rx_pkt_len = max_rx_pkt_len_backup;
> - } else {
> - printf("Unknown parameter\n");
> + ret = eth_dev_info_get_print_err(port_id, &port->dev_info);
> + if (ret != 0) {
> + printf("rte_eth_dev_info_get() failed for port %u\n",
> + port_id);
> return;
> }
> +
> + update_jumbo_frame_offload(port_id, res->value);
> }
>
> init_port_config();
> diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
> index 04ae0feb5852..a87265d7638b 100644
> --- a/app/test-pmd/config.c
> +++ b/app/test-pmd/config.c
[snip]
> @@ -1155,20 +1154,17 @@ port_mtu_set(portid_t port_id, uint16_t mtu)
> return;
> }
> diag = rte_eth_dev_set_mtu(port_id, mtu);
> - if (diag)
> + if (diag) {
> printf("Set MTU failed. diag=%d\n", diag);
> - else if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME) {
> - /*
> - * Ether overhead in driver is equal to the difference of
> - * max_rx_pktlen and max_mtu in rte_eth_dev_info when the
> - * device supports jumbo frame.
> - */
> - eth_overhead = dev_info.max_rx_pktlen - dev_info.max_mtu;
> + return;
> + }
> +
> + rte_port->dev_conf.rxmode.mtu = mtu;
> +
> + if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME) {
> if (mtu > RTE_ETHER_MTU) {
> rte_port->dev_conf.rxmode.offloads |=
> DEV_RX_OFFLOAD_JUMBO_FRAME;
> - rte_port->dev_conf.rxmode.max_rx_pkt_len =
> - mtu + eth_overhead;
> } else
I guess curly brackets should be removed now.
> rte_port->dev_conf.rxmode.offloads &=
> ~DEV_RX_OFFLOAD_JUMBO_FRAME;
[snip]
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index 1cdd3cdd12b6..2c79cae05664 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
[snip]
> @@ -1465,7 +1473,7 @@ init_config(void)
> rte_exit(EXIT_FAILURE,
> "rte_eth_dev_info_get() failed\n");
>
> - ret = update_jumbo_frame_offload(pid);
> + ret = update_jumbo_frame_offload(pid, 0);
> if (ret != 0)
> printf("Updating jumbo frame offload failed for port %u\n",
> pid);
> @@ -1512,14 +1520,19 @@ init_config(void)
> */
> if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
> port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
> - data_size = rx_mode.max_rx_pkt_len /
> - port->dev_info.rx_desc_lim.nb_mtu_seg_max;
> + uint32_t eth_overhead = get_eth_overhead(&port->dev_info);
> + uint16_t mtu;
>
> - if ((data_size + RTE_PKTMBUF_HEADROOM) >
> + if (rte_eth_dev_get_mtu(pid, &mtu) == 0) {
> + data_size = mtu + eth_overhead /
> + port->dev_info.rx_desc_lim.nb_mtu_seg_max;
> +
> + if ((data_size + RTE_PKTMBUF_HEADROOM) >
Unnecessary parenthesis.
> mbuf_data_size[0]) {
> - mbuf_data_size[0] = data_size +
> - RTE_PKTMBUF_HEADROOM;
> - warning = 1;
> + mbuf_data_size[0] = data_size +
> + RTE_PKTMBUF_HEADROOM;
> + warning = 1;
> + }
> }
> }
> }
[snip]
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index c515de3bf71d..0a8d29277aeb 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -1627,13 +1627,8 @@ tap_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
> {
> struct pmd_internals *pmd = dev->data->dev_private;
> struct ifreq ifr = { .ifr_mtu = mtu };
> - int err = 0;
>
> - err = tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE);
> - if (!err)
> - dev->data->mtu = mtu;
> -
> - return err;
> + return tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE);
The cleanup could be done separately before the patch, since
it just makes the long patch longer and unrelated in fact,
since assignment after callback is already done.
> }
>
> static int
[snip]
> diff --git a/examples/ip_fragmentation/main.c b/examples/ip_fragmentation/main.c
> index 77a6a18d1914..f97287ce2243 100644
> --- a/examples/ip_fragmentation/main.c
> +++ b/examples/ip_fragmentation/main.c
> @@ -146,7 +146,7 @@ struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
>
> static struct rte_eth_conf port_conf = {
> .rxmode = {
> - .max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE,
> + .mtu = JUMBO_FRAME_MAX_SIZE,
Before the patch JUMBO_FRAME_MAX_SIZE inluded overhad, but
after the patch it is used as it is does not include overhead.
There a number of similiar cases in other apps.
> .split_hdr_size = 0,
> .offloads = (DEV_RX_OFFLOAD_CHECKSUM |
> DEV_RX_OFFLOAD_SCATTER |
[snip]
> diff --git a/examples/ip_pipeline/link.c b/examples/ip_pipeline/link.c
> index 16bcffe356bc..8628db22f56b 100644
> --- a/examples/ip_pipeline/link.c
> +++ b/examples/ip_pipeline/link.c
> @@ -46,7 +46,7 @@ static struct rte_eth_conf port_conf_default = {
> .link_speeds = 0,
> .rxmode = {
> .mq_mode = ETH_MQ_RX_NONE,
> - .max_rx_pkt_len = 9000, /* Jumbo frame max packet len */
> + .mtu = 9000, /* Jumbo frame MTU */
Strictly speaking 9000 included overhead before the patch and
does not include overhead after the patch.
There a number of similiar cases in other apps.
> .split_hdr_size = 0, /* Header split buffer size */
> },
> .rx_adv_conf = {
[snip]
> diff --git a/examples/l3fwd-acl/main.c b/examples/l3fwd-acl/main.c
> index a1f457b564b6..913037d5f835 100644
> --- a/examples/l3fwd-acl/main.c
> +++ b/examples/l3fwd-acl/main.c
[snip]
> @@ -1833,12 +1832,12 @@ parse_args(int argc, char **argv)
> print_usage(prgname);
> return -1;
> }
> - port_conf.rxmode.max_rx_pkt_len = ret;
> + port_conf.rxmode.mtu = ret - (RTE_ETHER_HDR_LEN +
> + RTE_ETHER_CRC_LEN);
> }
> - printf("set jumbo frame max packet length "
> - "to %u\n",
> - (unsigned int)
> - port_conf.rxmode.max_rx_pkt_len);
> + printf("set jumbo frame max packet length to %u\n",
> + (unsigned int)port_conf.rxmode.mtu +
> + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN);
I think that overhead should be obtainded from dev_info with
fallback to value used above.
There are many similar cases in other apps.
[snip]
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index c607eabb5b0c..3451125639f9 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -1249,15 +1249,15 @@ rte_eth_dev_tx_offload_name(uint64_t offload)
>
> static inline int
> eth_dev_check_lro_pkt_size(uint16_t port_id, uint32_t config_size,
> - uint32_t max_rx_pkt_len, uint32_t dev_info_size)
> + uint32_t max_rx_pktlen, uint32_t dev_info_size)
> {
> int ret = 0;
>
> if (dev_info_size == 0) {
> - if (config_size != max_rx_pkt_len) {
> + if (config_size != max_rx_pktlen) {
> RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%d max_lro_pkt_size"
> " %u != %u is not allowed\n",
> - port_id, config_size, max_rx_pkt_len);
> + port_id, config_size, max_rx_pktlen);
This patch looks a bit unrelated and make the long patch
even more longer. May be it is better to do the cleanup
first (before the patch).
> ret = -EINVAL;
> }
> } else if (config_size > dev_info_size) {
> @@ -1325,6 +1325,19 @@ eth_dev_validate_offloads(uint16_t port_id, uint64_t req_offloads,
> return ret;
> }
>
> +static uint16_t
> +eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu)
> +{
> + uint16_t overhead_len;
> +
> + if (max_mtu != UINT16_MAX && max_rx_pktlen > max_mtu)
> + overhead_len = max_rx_pktlen - max_mtu;
> + else
> + overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
> +
> + return overhead_len;
> +}
> +
> int
> rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
> const struct rte_eth_conf *dev_conf)
> @@ -1332,6 +1345,7 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
> struct rte_eth_dev *dev;
> struct rte_eth_dev_info dev_info;
> struct rte_eth_conf orig_conf;
> + uint32_t max_rx_pktlen;
> uint16_t overhead_len;
> int diag;
> int ret;
> @@ -1375,11 +1389,8 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
> goto rollback;
>
> /* Get the real Ethernet overhead length */
> - if (dev_info.max_mtu != UINT16_MAX &&
> - dev_info.max_rx_pktlen > dev_info.max_mtu)
> - overhead_len = dev_info.max_rx_pktlen - dev_info.max_mtu;
> - else
> - overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
> + overhead_len = eth_dev_get_overhead_len(dev_info.max_rx_pktlen,
> + dev_info.max_mtu);
>
> /* If number of queues specified by application for both Rx and Tx is
> * zero, use driver preferred values. This cannot be done individually
> @@ -1448,49 +1459,45 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
> }
>
> /*
> - * If jumbo frames are enabled, check that the maximum RX packet
> - * length is supported by the configured device.
> + * Check that the maximum RX packet length is supported by the
> + * configured device.
> */
> - if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
> - if (dev_conf->rxmode.max_rx_pkt_len > dev_info.max_rx_pktlen) {
> - RTE_ETHDEV_LOG(ERR,
> - "Ethdev port_id=%u max_rx_pkt_len %u > max valid value %u\n",
> - port_id, dev_conf->rxmode.max_rx_pkt_len,
> - dev_info.max_rx_pktlen);
> - ret = -EINVAL;
> - goto rollback;
> - } else if (dev_conf->rxmode.max_rx_pkt_len < RTE_ETHER_MIN_LEN) {
> - RTE_ETHDEV_LOG(ERR,
> - "Ethdev port_id=%u max_rx_pkt_len %u < min valid value %u\n",
> - port_id, dev_conf->rxmode.max_rx_pkt_len,
> - (unsigned int)RTE_ETHER_MIN_LEN);
> - ret = -EINVAL;
> - goto rollback;
> - }
> + if (dev_conf->rxmode.mtu == 0)
> + dev->data->dev_conf.rxmode.mtu = RTE_ETHER_MTU;
> + max_rx_pktlen = dev->data->dev_conf.rxmode.mtu + overhead_len;
> + if (max_rx_pktlen > dev_info.max_rx_pktlen) {
> + RTE_ETHDEV_LOG(ERR,
> + "Ethdev port_id=%u max_rx_pktlen %u > max valid value %u\n",
> + port_id, max_rx_pktlen, dev_info.max_rx_pktlen);
> + ret = -EINVAL;
> + goto rollback;
> + } else if (max_rx_pktlen < RTE_ETHER_MIN_LEN) {
> + RTE_ETHDEV_LOG(ERR,
> + "Ethdev port_id=%u max_rx_pktlen %u < min valid value %u\n",
> + port_id, max_rx_pktlen, RTE_ETHER_MIN_LEN);
> + ret = -EINVAL;
> + goto rollback;
> + }
>
> - /* Scale the MTU size to adapt max_rx_pkt_len */
> - dev->data->mtu = dev->data->dev_conf.rxmode.max_rx_pkt_len -
> - overhead_len;
> - } else {
> - uint16_t pktlen = dev_conf->rxmode.max_rx_pkt_len;
> - if (pktlen < RTE_ETHER_MIN_MTU + overhead_len ||
> - pktlen > RTE_ETHER_MTU + overhead_len)
> + if ((dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
> + if (dev->data->dev_conf.rxmode.mtu < RTE_ETHER_MIN_MTU ||
> + dev->data->dev_conf.rxmode.mtu > RTE_ETHER_MTU)
> /* Use default value */
> - dev->data->dev_conf.rxmode.max_rx_pkt_len =
> - RTE_ETHER_MTU + overhead_len;
> + dev->data->dev_conf.rxmode.mtu = RTE_ETHER_MTU;
I don't understand it. It would be good to add comments to
explain logic above.
> }
>
> + dev->data->mtu = dev->data->dev_conf.rxmode.mtu;
> +
> /*
> * If LRO is enabled, check that the maximum aggregated packet
> * size is supported by the configured device.
> */
> if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_TCP_LRO) {
> if (dev_conf->rxmode.max_lro_pkt_size == 0)
> - dev->data->dev_conf.rxmode.max_lro_pkt_size =
> - dev->data->dev_conf.rxmode.max_rx_pkt_len;
> + dev->data->dev_conf.rxmode.max_lro_pkt_size = max_rx_pktlen;
> ret = eth_dev_check_lro_pkt_size(port_id,
> dev->data->dev_conf.rxmode.max_lro_pkt_size,
> - dev->data->dev_conf.rxmode.max_rx_pkt_len,
> + max_rx_pktlen,
> dev_info.max_lro_pkt_size);
> if (ret != 0)
> goto rollback;
[snip]
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index faf3bd901d75..9f288f98329c 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -410,7 +410,7 @@ enum rte_eth_tx_mq_mode {
> struct rte_eth_rxmode {
> /** The multi-queue packet distribution mode to be used, e.g. RSS. */
> enum rte_eth_rx_mq_mode mq_mode;
> - uint32_t max_rx_pkt_len; /**< Only used if JUMBO_FRAME enabled. */
> + uint32_t mtu; /**< Requested MTU. */
Maximum Transmit Unit looks a bit confusing in Rx mode
structure.
> /** Maximum allowed size of LRO aggregated packet. */
> uint32_t max_lro_pkt_size;
> uint16_t split_hdr_size; /**< hdr buf size (header_split enabled).*/
[snip]
More information about the dev
mailing list