[dpdk-users] movzbl in rte_eth_rx_burst

Dorsett, Michal Michal.Dorsett at verint.com
Sat Aug 19 10:45:15 CEST 2017


We are running dpdk 16.07. Below is a snippet from a perf annotate report of a CPU running a thread that constantly reads packets.
As you can see, the hottest instructruction is

movzbl 0x10(%rcx),%r15d

which, I believe, is referring to

struct rte_eth_dev *dev = &rte_eth_devices[port_id];

Can someone explain why this instruction is so costly, and how I can remedy this?

  0.66 x        lea    0xc8(%rsp),%rax
       x      _ZN8LBThread7executeEv():
       x              {
       x                  u32RetPkt = vecRXQ->at(u32Index)->receiveRawPackets(xPktArr, BURST_SIZE);
       x        movq   $0x0,0x50(%rsp)
       x      _ZNSt6vectorIP18ReceivePacketQueueSaIS1_EE2atEm():
       x        movq   $0x0,0x48(%rsp)
       x      __mempool_generic_put():
  0.09 x        mov    %rax,0x88(%rsp)
  0.42 x        mov    0x60(%rsp),%rax
       x        add    $0x18,%rax
  0.05 x        mov    %rax,0x40(%rsp)
  0.14 x        mov    0x50(%rsp),%rax
      x      _ZN8LBThread7executeEv():
  0.71 x 370:   mov    (%rdx,%rax,8),%rax
  0.57 x        mov    %rax,%rcx
  1.23 x        mov    %rax,0x80(%rsp)
       x      rte_rdtsc():
       x              }
       x      #endif
       x              asm volatile("rdtsc" :
       x                           "=a" (tsc.lo_32),
       x                           "=d" (tsc.hi_32));
  0.47 x        rdtsc
       x      rte_eth_rx_burst():
       x       */
       x      static inline uint16_t
       x      rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
       x                       struct rte_mbuf **rx_pkts, const uint16_t nb_pkts)
       x      {
       x              struct rte_eth_dev *dev = &rte_eth_devices[port_id];
24.74 x        movzbl 0x10(%rcx),%r15d
       x      rte_rdtsc():
  0.09 x        mov    %eax,%r13d
       x      _ZN18ReceivePacketQueue17receiveRawPacketsEP6Packetj():
       x          uint64_t u64StartTick = CPUCycles::getTSCCycles();
       x          uint32_t u32PtksReceived;
       x          int32_t refcnt;
       x          int retCode;
       x          u32PtksReceived = rte_eth_rx_burst(m_u8PortId, m_u16QueueIndexForNICPort, m_pArrPktsBurst, u32NumOfPkts);
  2.60 x        movzwl 0xe(%rcx),%r14d
       x      rte_rdtsc():
       x        shl    $0x20,%rdx
       x      _ZN18ReceivePacketQueue17receiveRawPacketsEP6Packetj():
       x        lea    0x18(%rcx),%r12
       x      rte_rdtsc():
  0.05 x        or     %rdx,%r13
       x      rte_eth_rx_burst():
       x                      RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id);
       x                      return 0;
       x              }
       x      #endif
       x              int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],


