[dpdk-dev] [dpdk-users] RSS Hash not working for XL710/X710 NICs for some RX mbuf sizes

Zhang, Helin helin.zhang at intel.com
Mon Jul 18 17:15:24 CEST 2016


Hi Ceara

Could you help to let me know your firmware version?
And could you help to try with the standard DPDK example application, such as testpmd, to see if there is the same issue?
Basically we always set the same size for both rx and tx buffer, like the default one of 2048 for a lot of applications.

Definitely we will try to reproduce that issue with testpmd, with using 2K mbufs. Hopefully we can find the root cause, or tell you that's not an issue.

Thank you very much for your reporting!

BTW, dev at dpdk.org should be the right one to replace users at dpdk.org, for sending questions/issues like this.

Regards,
Helin

> -----Original Message-----
> From: Take Ceara [mailto:dumitru.ceara at gmail.com]
> Sent: Monday, July 18, 2016 4:03 PM
> To: users at dpdk.org
> Cc: Zhang, Helin <helin.zhang at intel.com>; Wu, Jingjing <jingjing.wu at intel.com>
> Subject: [dpdk-users] RSS Hash not working for XL710/X710 NICs for some RX
> mbuf sizes
> 
> Hi,
> 
> Is there any known issue regarding the i40e DPDK driver when having RSS
> hashing enabled in DPDK 16.04?
> I've noticed that for some specific receive mbuf sizes the RSS hash is always set
> to 0 for incoming packets.
> 
> I have a setup with two XL710 ports connected back to back. The simple test
> program below sends fixed TCP packets from port 0 to port 1. The
> L5 payload is added in the packet in such a way that the packet consumes exactly
> one TX mbuf. For some values of the RX mbuf size the incoming mbuf has the
> hash.rss == 0 even though the PKT_RX_RSS_HASH flag is set in ol_flags. In my
> code the TX/RX mbuf sizes are controlled by the RX_MBUF_SIZE and
> TX_MBUF_SIZE macros.
> 
> As an example, with some of the following TX/RX sizes the assert that checks if
> the RSS hash is non-zero fails and with the other it passes:
> 
> RX_MBUF_SIZE  TX_MBUF_SIZE assert
> =================================
> 1024          1024         fail
> 1025          1024         ok
> 1024          2048         fail
> 2048          2048         fail
> 2048          2047         fail
> 2049          2048         ok
> 
> On the same setup I have another loopback connection between two 82599ES
> 10G NICs and when I run exactly the same test the RSS hash is always correct in
> all cases.
> 
> $ $RTE_SDK/tools/dpdk_nic_bind.py -s
> 
> Network devices using DPDK-compatible driver
> ============================================
> 0000:02:00.0 '82599ES 10-Gigabit SFI/SFP+ Network Connection'
> drv=igb_uio unused=
> 0000:03:00.0 '82599ES 10-Gigabit SFI/SFP+ Network Connection'
> drv=igb_uio unused=
> 0000:82:00.0 'Ethernet Controller XL710 for 40GbE QSFP+' drv=igb_uio unused=
> 0000:83:00.0 'Ethernet Controller XL710 for 40GbE QSFP+' drv=igb_uio unused=
> 
> The command line I use for running the test on the 40G NICs is:
> 
> ./build/test -c 0x1 -n 4 -m 1024 -w 0000:82:00.0 -w 0000:83:00.0
> 
> Thanks,
> Dumitru Ceara
> 
> #include <stdbool.h>
> #include <stdint.h>
> #include <assert.h>
> #include <unistd.h>
> 
> #include <rte_ethdev.h>
> #include <rte_timer.h>
> #include <rte_ip.h>
> #include <rte_tcp.h>
> #include <rte_udp.h>
> #include <rte_errno.h>
> #include <rte_arp.h>
> 
> #define MBUF_SIZE(frag_size) \
>     ((frag_size) + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
> 
> #define RX_MBUF_SIZE MBUF_SIZE(RTE_MBUF_DEFAULT_DATAROOM)
> #define TX_MBUF_SIZE MBUF_SIZE(RTE_MBUF_DEFAULT_DATAROOM)
> 
> #define MBUF_CACHE 512
> #define MBUF_COUNT 1024
> 
> static struct rte_mempool *rx_mpool;
> static struct rte_mempool *tx_mpool;
> 
> #define PORT_MAX_MTU 9198
> 
> #define L5_GET_LEN(pkt) (rte_pktmbuf_tailroom((pkt)))
> 
> #define PORT0  0
> #define PORT1  1
> #define QUEUE0 0
> #define Q_CNT  1
> 
> 
> struct rte_eth_conf default_port_config = {
>     .rxmode = {
>         .mq_mode        = ETH_MQ_RX_RSS,
>         .max_rx_pkt_len = PORT_MAX_MTU,
>         .split_hdr_size = 0,
>         .header_split   = 0, /**< Header Split disabled */
>         .hw_ip_checksum = 1, /**< IP checksum offload enabled */
>         .hw_vlan_filter = 0, /**< VLAN filtering disabled */
>         .jumbo_frame    = 1, /**< Jumbo Frame Support disabled */
>         .hw_strip_crc   = 0, /**< CRC stripped by hardware */
>     },
>     .rx_adv_conf = {
>         .rss_conf = {
>             .rss_key = NULL,
>             .rss_key_len = 0,
>             .rss_hf = ETH_RSS_IPV4 | ETH_RSS_NONFRAG_IPV4_TCP |
> ETH_RSS_NONFRAG_IPV4_UDP,
>         },
>     },
>     .txmode = {
>         .mq_mode = ETH_MQ_TX_NONE,
>     }
> };
> 
> struct rte_eth_rxconf rx_conf = {
>     .rx_thresh = {
>         .pthresh = 8,
>         .hthresh = 8,
>         .wthresh = 4,
>     },
>     .rx_free_thresh = 64,
>     .rx_drop_en = 0
> };
> 
> struct rte_eth_txconf tx_conf = {
>     .tx_thresh = {
>         .pthresh = 36,
>         .hthresh = 0,
>         .wthresh = 0,
>     },
>     .tx_free_thresh = 64,
>     .tx_rs_thresh = 32,
> };
> 
> static void port_setup(uint32_t port)
> {
>     uint32_t queue;
>     int ret;
> 
>     assert(rte_eth_dev_configure(port, Q_CNT, Q_CNT,
>                                  &default_port_config) == 0);
>     for (queue = 0; queue < Q_CNT; queue++) {
>         ret = rte_eth_rx_queue_setup(port, queue, 128, SOCKET_ID_ANY,
>                                      &rx_conf,
>                                      rx_mpool);
>         assert(ret == 0);
>         ret = rte_eth_tx_queue_setup(port, queue, 128, SOCKET_ID_ANY,
>                                      &tx_conf);
>         assert(ret == 0);
>     }
> 
>     assert(rte_eth_dev_start(port) == 0); }
> 
> #define HDRS_SIZE                   \
>         (sizeof(struct ether_hdr) + \
>          sizeof(struct ipv4_hdr) +  \
>          sizeof(struct tcp_hdr))
> 
> static struct rte_mbuf *get_tcp_pkt(uint16_t eth_port) {
>     struct rte_mbuf  *pkt;
>     struct ether_hdr *eth_hdr;
>     struct ipv4_hdr  *ip_hdr;
>     struct tcp_hdr   *tcp_hdr;
>     uint32_t          ip_hdr_len = sizeof(*ip_hdr);
>     uint32_t          tcp_hdr_len = sizeof(*tcp_hdr);
>     uint32_t          l5_len;
> 
>     assert(pkt = rte_pktmbuf_alloc(tx_mpool));
> 
>     pkt->port = eth_port;
>     pkt->l2_len = sizeof(*eth_hdr);
> 
>     RTE_LOG(ERR, USER1, "1:head = %d, tail = %d, len = %d\n",
>             rte_pktmbuf_headroom(pkt), rte_pktmbuf_tailroom(pkt),
>             rte_pktmbuf_pkt_len(pkt));
> 
>     /* Reserve space for ETH + IP + TCP Headers.
>      * Store how much tailroom we have.
>      */
>     eth_hdr = (struct ether_hdr *)rte_pktmbuf_append(pkt, HDRS_SIZE);
>     assert(eth_hdr);
>     l5_len = L5_GET_LEN(pkt);
> 
>     /* ETH Header. */
>     rte_eth_macaddr_get(PORT0, &eth_hdr->s_addr);
>     rte_eth_macaddr_get(PORT1, &eth_hdr->d_addr);
>     eth_hdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
> 
>     /* IP Header. */
>     ip_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
>     ip_hdr->version_ihl = (4 << 4) | (ip_hdr_len >> 2);
>     ip_hdr->type_of_service = 0;
>     ip_hdr->total_length = rte_cpu_to_be_16(ip_hdr_len + tcp_hdr_len +
> l5_len);
>     ip_hdr->packet_id = 0;
>     ip_hdr->fragment_offset = rte_cpu_to_be_16(0);
>     ip_hdr->time_to_live = 60;
>     ip_hdr->next_proto_id = IPPROTO_TCP;
>     ip_hdr->src_addr = rte_cpu_to_be_32(0x01010101);
>     ip_hdr->dst_addr = rte_cpu_to_be_32(0x01010101);
>     ip_hdr->hdr_checksum = rte_cpu_to_be_16(0);
> 
>     pkt->l3_len = ip_hdr_len;
>     pkt->ol_flags |= PKT_TX_IP_CKSUM;
> 
>     /* TCP Header. */
>     tcp_hdr = (struct tcp_hdr *)(ip_hdr + 1);
>     tcp_hdr->src_port = rte_cpu_to_be_16(0x42);
>     tcp_hdr->dst_port = rte_cpu_to_be_16(0x24);
>     tcp_hdr->sent_seq = rte_cpu_to_be_32(0x1234);
>     tcp_hdr->recv_ack = rte_cpu_to_be_32(0x1234);
>     tcp_hdr->data_off = tcp_hdr_len >> 2 << 4;
>     tcp_hdr->tcp_flags = TCP_FIN_FLAG;
>     tcp_hdr->rx_win = rte_cpu_to_be_16(0xffff);
>     tcp_hdr->tcp_urp = rte_cpu_to_be_16(0);
> 
>     pkt->ol_flags |= PKT_TX_TCP_CKSUM | PKT_TX_IPV4;
>     pkt->l4_len = tcp_hdr_len;
> 
>     tcp_hdr->cksum = 0;
>     tcp_hdr->cksum = rte_ipv4_phdr_cksum(ip_hdr, pkt->ol_flags);
> 
>     /* Add Payload. */
>     assert(rte_pktmbuf_append(pkt, l5_len));
> 
>     RTE_LOG(ERR, USER1, "1:head = %d, tail = %d, len = %d\n",
>             rte_pktmbuf_headroom(pkt), rte_pktmbuf_tailroom(pkt),
>             rte_pktmbuf_pkt_len(pkt));
> 
>     return pkt;
> }
> 
> int main(int argc, char **argv)
> {
>     struct rte_mbuf *tx_mbuf[3];
> 
>     rte_eal_init(argc, argv);
> 
>     rx_mpool = rte_mempool_create("rx_mpool", MBUF_COUNT,
> RX_MBUF_SIZE,
>                                   0,
>                                   sizeof(struct rte_pktmbuf_pool_private),
>                                   rte_pktmbuf_pool_init, NULL,
>                                   rte_pktmbuf_init, NULL,
>                                   SOCKET_ID_ANY,
>                                   0);
> 
>     tx_mpool = rte_mempool_create("tx_mpool", MBUF_COUNT,
> TX_MBUF_SIZE,
>                                   0,
>                                   sizeof(struct rte_pktmbuf_pool_private),
>                                   rte_pktmbuf_pool_init, NULL,
>                                   rte_pktmbuf_init, NULL,
>                                   SOCKET_ID_ANY,
>                                   0);
> 
>     assert(rx_mpool && tx_mpool);
> 
>     port_setup(PORT0);
>     port_setup(PORT1);
> 
>     for (;;) {
>         uint16_t no_rx_buffers;
>         uint16_t i;
>         struct rte_mbuf *rx_pkts[16];
> 
>         tx_mbuf[0] = get_tcp_pkt(PORT0);
>         assert(rte_eth_tx_burst(PORT0, QUEUE0, tx_mbuf, 1) == 1);
> 
>         no_rx_buffers = rte_eth_rx_burst(PORT1, QUEUE0, rx_pkts, 16);
>         for (i = 0; i < no_rx_buffers; i++) {
>             RTE_LOG(ERR, USER1, "RX RSS HASH: %8lX %4X\n",
>                     rx_pkts[i]->ol_flags,
>                     rx_pkts[i]->hash.rss);
> 
>             assert(rx_pkts[i]->ol_flags == PKT_RX_RSS_HASH);
>             assert(rx_pkts[i]->hash.rss != 0);
> 
>             rte_pktmbuf_free(rx_pkts[i]);
>         }
>     }
> 
>     return 0;
> }


More information about the dev mailing list