[dpdk-dev] [EXT] Re: [PATCH] app/testpmd: use mempool bulk get for txonly mode
Pavan Nikhilesh Bhagavatula
pbhagavatula at marvell.com
Fri Mar 1 09:45:31 CET 2019
On Fri, 2019-03-01 at 10:38 +0300, Andrew Rybchenko wrote:
> On 2/28/19 10:42 PM, Pavan Nikhilesh Bhagavatula wrote:
> > From: Pavan Nikhilesh <pbhagavatula at marvell.com>
> >
> > Use mempool bulk get ops to alloc burst of packets and process them
> > instead of calling pktalloc for every packet.
> >
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula at marvell.com>
> > ---
> > app/test-pmd/txonly.c | 139 +++++++++++++++++++++-----------------
> > ----
> > 1 file changed, 71 insertions(+), 68 deletions(-)
> >
> > diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
> > index 1f08b6ed3..eef8b3a45 100644
> > --- a/app/test-pmd/txonly.c
> > +++ b/app/test-pmd/txonly.c
> > @@ -147,6 +147,61 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr
> > *ip_hdr,
> > ip_hdr->hdr_checksum = (uint16_t) ip_cksum;
> > }
> >
> > +static inline bool
> > +pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
> > + struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
> > + const uint16_t vlan_tci_outer, const uint64_t ol_flags)
> > +{
> > + uint32_t nb_segs, pkt_len = 0;
> > + struct rte_mbuf *pkt_seg;
> > + uint8_t i;
> > +
> > + if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
> > + nb_segs = random() % tx_pkt_nb_segs + 1;
> > + else
> > + nb_segs = tx_pkt_nb_segs;
> > +
> > + rte_pktmbuf_reset_headroom(pkt);
> > + pkt->data_len = tx_pkt_seg_lengths[0];
> > + pkt->ol_flags = ol_flags;
> > + pkt->vlan_tci = vlan_tci;
> > + pkt->vlan_tci_outer = vlan_tci_outer;
> > + pkt->l2_len = sizeof(struct ether_hdr);
> > + pkt->l3_len = sizeof(struct ipv4_hdr);
> > +
> > + pkt_seg = pkt;
> > + for (i = 1; i < nb_segs; i++) {
> > + pkt_seg->next = rte_mbuf_raw_alloc(mbp);
>
> Why is bulk allocation not used here?
Will update in v2.
>
> > + if (pkt_seg->next == NULL) {
> > + pkt->nb_segs = i;
> > + rte_pktmbuf_free(pkt);
> > + return false;
> > + }
> > + pkt_seg = pkt_seg->next;
> > + pkt_seg->data_len = tx_pkt_seg_lengths[i];
> > + pkt_len += pkt_seg->data_len;
> > + }
> > + pkt_seg->next = NULL; /* Last segment of packet. */
> > + /*
> > + * Copy headers in first packet segment(s).
> > + */
> > + copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
> > + copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> > + sizeof(struct ether_hdr));
> > + copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> > + sizeof(struct ether_hdr) +
> > + sizeof(struct ipv4_hdr));
> > +
> > + /*
> > + * Complete first mbuf of packet and append it to the
> > + * burst of packets to be transmitted.
> > + */
> > + pkt->nb_segs = nb_segs;
> > + pkt->pkt_len += pkt_len;
> > +
> > + return true;
> > +}
> > +
> > /*
> > * Transmit a burst of multi-segments packets.
> > */
> > @@ -155,8 +210,6 @@ pkt_burst_transmit(struct fwd_stream *fs)
> > {
> > struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> > struct rte_port *txp;
> > - struct rte_mbuf *pkt;
> > - struct rte_mbuf *pkt_seg;
> > struct rte_mempool *mbp;
> > struct ether_hdr eth_hdr;
> > uint16_t nb_tx;
> > @@ -164,14 +217,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
> > uint16_t vlan_tci, vlan_tci_outer;
> > uint32_t retry;
> > uint64_t ol_flags = 0;
> > - uint8_t i;
> > uint64_t tx_offloads;
> > #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
> > uint64_t start_tsc;
> > uint64_t end_tsc;
> > uint64_t core_cycles;
> > #endif
> > - uint32_t nb_segs, pkt_len;
> >
> > #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
> > start_tsc = rte_rdtsc();
> > @@ -188,72 +239,24 @@ pkt_burst_transmit(struct fwd_stream *fs)
> > ol_flags |= PKT_TX_QINQ_PKT;
> > if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
> > ol_flags |= PKT_TX_MACSEC;
> > - for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
> > - pkt = rte_mbuf_raw_alloc(mbp);
> > - if (pkt == NULL) {
> > - nomore_mbuf:
> > - if (nb_pkt == 0)
> > - return;
> > - break;
> > - }
> >
> > - /*
> > - * Using raw alloc is good to improve performance,
> > - * but some consumers may use the headroom and so
> > - * decrement data_off. We need to make sure it is
> > - * reset to default value.
> > - */
> > - rte_pktmbuf_reset_headroom(pkt);
> > - pkt->data_len = tx_pkt_seg_lengths[0];
> > - pkt_seg = pkt;
> > - if (tx_pkt_split == TX_PKT_SPLIT_RND)
> > - nb_segs = random() % tx_pkt_nb_segs + 1;
> > - else
> > - nb_segs = tx_pkt_nb_segs;
> > - pkt_len = pkt->data_len;
> > - for (i = 1; i < nb_segs; i++) {
> > - pkt_seg->next = rte_mbuf_raw_alloc(mbp);
> > - if (pkt_seg->next == NULL) {
> > - pkt->nb_segs = i;
> > - rte_pktmbuf_free(pkt);
> > - goto nomore_mbuf;
> > - }
> > - pkt_seg = pkt_seg->next;
> > - pkt_seg->data_len = tx_pkt_seg_lengths[i];
> > - pkt_len += pkt_seg->data_len;
> > - }
> > - pkt_seg->next = NULL; /* Last segment of packet. */
> > -
> > - /*
> > - * Initialize Ethernet header.
> > - */
> > - ether_addr_copy(&peer_eth_addrs[fs-
> > >peer_addr],ð_hdr.d_addr);
> > - ether_addr_copy(&ports[fs->tx_port].eth_addr,
> > ð_hdr.s_addr);
> > - eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
> > -
> > - /*
> > - * Copy headers in first packet segment(s).
> > - */
> > - copy_buf_to_pkt(ð_hdr, sizeof(eth_hdr), pkt, 0);
> > - copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> > - sizeof(struct ether_hdr));
> > - copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> > - sizeof(struct ether_hdr) +
> > - sizeof(struct ipv4_hdr));
> > -
> > - /*
> > - * Complete first mbuf of packet and append it to the
> > - * burst of packets to be transmitted.
> > - */
> > - pkt->nb_segs = nb_segs;
> > - pkt->pkt_len = pkt_len;
> > - pkt->ol_flags = ol_flags;
> > - pkt->vlan_tci = vlan_tci;
> > - pkt->vlan_tci_outer = vlan_tci_outer;
> > - pkt->l2_len = sizeof(struct ether_hdr);
> > - pkt->l3_len = sizeof(struct ipv4_hdr);
> > - pkts_burst[nb_pkt] = pkt;
> > + /*
> > + * Initialize Ethernet header.
> > + */
> > + ether_addr_copy(&peer_eth_addrs[fs->peer_addr],
> > ð_hdr.d_addr);
> > + ether_addr_copy(&ports[fs->tx_port].eth_addr, ð_hdr.s_addr);
> > + eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
> > +
> > + if (rte_mempool_get_bulk(mbp, (void **)pkts_burst,
> > nb_pkt_per_burst))
> > + return;
>
> Before the patch the code survived insufficient of mbufs condition
> and
> sent as much as it can allocate. Now it is not. I can't say for sure
> if the
> new behaviour is acceptable or not (I'd say no), but even if it is
> acceptable
> it should be highlighted in the changeset description.
>
Ack.
> Taking segments allocation into account may I suggest to consider
> a bit sophisticated implementation which allocates packets in bulks
> with fallback to individual mbufs allocation and usage of the
> mechanism
> for all segments (i.e. allocate bulk, use it, allocate next, use it,
> etc).
I think segment allocation can always use bulk as failure should always
free the entire packet chain.
I will send v2 with fallback to individual mbuf allocs.
>
> > +
> > + for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
> > + if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt],
> > mbp,
> > + ð_hdr, vlan_tci, vlan_tci_outer,
> > ol_flags)))
> > + goto tx_pkts;
>
> If segment allocation fails, who frees remaining packets from the
> bulk?
My bad. Thanks for the heads up.
>
> > }
> > +tx_pkts:
> > +
> > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
> > nb_pkt);
> > /*
> > * Retry if necessary
>
More information about the dev
mailing list