[PATCH v6 08/13] pcapng: split packet copy from header insertion
Ivan Malov
ivan.malov at arknetworks.am
Wed Jul 23 04:32:45 CEST 2025
Hi Stephen,
On Tue, 22 Jul 2025, Stephen Hemminger wrote:
> In new model, the packet was already copied, only need
Copied? But what if it was "indirect attached" instead, as the model envisages?
Perhaps this is a silly question of mine, but it may not be clear what happens
in case of 'RTE_ETH_MIRROR_INDIRECT_FLAG' - whether it is safe to modify the
mbuf and whether the 'indirect' clone has to be "freed" in the sense of being
detached/refcnt updated after successful pcapng write, to avoid memory leaks?
> to wrap it in pcapng format.
>
> Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
> ---
> lib/pcapng/rte_pcapng.c | 178 +++++++++++++++++++++-------------------
> lib/pcapng/rte_pcapng.h | 27 +++++-
> 2 files changed, 120 insertions(+), 85 deletions(-)
>
> diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
> index 2a07b4c1f5..6db5d4da50 100644
> --- a/lib/pcapng/rte_pcapng.c
> +++ b/lib/pcapng/rte_pcapng.c
> @@ -1,3 +1,4 @@
> +
> /* SPDX-License-Identifier: BSD-3-Clause
> * Copyright(c) 2019 Microsoft Corporation
> */
> @@ -432,8 +433,24 @@ pcapng_vlan_insert(struct rte_mbuf *m, uint16_t ether_type, uint16_t tci)
> return 0;
> }
>
> +/* pad the packet to 32 bit boundary */
> +static inline int
> +pcapng_mbuf_pad32(struct rte_mbuf *m)
> +{
> + uint32_t pkt_len = rte_pktmbuf_pkt_len(m);
> + uint32_t padding = RTE_ALIGN(pkt_len, sizeof(uint32_t)) - pkt_len;
> +
> + if (padding > 0) {
> + void *tail = rte_pktmbuf_append(m, padding);
If the packet was indirectly attached in fact, is this OK to do? Just asking.
Thank you.
> + if (tail == NULL)
> + return -1;
> + memset(tail, 0, padding);
> + }
> + return 0;
> +}
> +
> /*
> - * The mbufs created use the Pcapng standard enhanced packet block.
> + * The mbufs created use the Pcapng standard enhanced packet block.
> *
> * 1 2 3
> * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
> @@ -468,71 +485,28 @@ pcapng_vlan_insert(struct rte_mbuf *m, uint16_t ether_type, uint16_t tci)
> * | Block Total Length |
> * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
> */
> -
> -/* Make a copy of original mbuf with pcapng header and options */
> -RTE_EXPORT_SYMBOL(rte_pcapng_copy)
> -struct rte_mbuf *
> -rte_pcapng_copy(uint16_t port_id, uint32_t queue,
> - const struct rte_mbuf *md,
> - struct rte_mempool *mp,
> - uint32_t length,
> - enum rte_pcapng_direction direction,
> - const char *comment)
> +RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_pcapng_insert, 25.07)
> +int
> +rte_pcapng_insert(struct rte_mbuf *m, uint32_t queue,
> + enum rte_pcapng_direction direction, uint32_t orig_len,
> + uint64_t timestamp, const char *comment)
> {
> struct pcapng_enhance_packet_block *epb;
> - uint32_t orig_len, pkt_len, padding, flags;
> - struct pcapng_option *opt;
> - uint64_t timestamp;
> - uint16_t optlen;
> - struct rte_mbuf *mc;
> - bool rss_hash;
> -
> -#ifdef RTE_LIBRTE_ETHDEV_DEBUG
> - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
> -#endif
> - orig_len = rte_pktmbuf_pkt_len(md);
> + uint32_t pkt_len = rte_pktmbuf_pkt_len(m);
> + uint32_t flags;
>
> - /* Take snapshot of the data */
> - mc = rte_pktmbuf_copy(md, mp, 0, length);
> - if (unlikely(mc == NULL))
> - return NULL;
> -
> - /* Expand any offloaded VLAN information */
> - if ((direction == RTE_PCAPNG_DIRECTION_IN &&
> - (md->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED)) ||
> - (direction == RTE_PCAPNG_DIRECTION_OUT &&
> - (md->ol_flags & RTE_MBUF_F_TX_VLAN))) {
> - if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_VLAN,
> - md->vlan_tci) != 0)
> - goto fail;
> - }
> + if (unlikely(pcapng_mbuf_pad32(m) < 0))
> + return -1;
>
> - if ((direction == RTE_PCAPNG_DIRECTION_IN &&
> - (md->ol_flags & RTE_MBUF_F_RX_QINQ_STRIPPED)) ||
> - (direction == RTE_PCAPNG_DIRECTION_OUT &&
> - (md->ol_flags & RTE_MBUF_F_TX_QINQ))) {
> - if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_QINQ,
> - md->vlan_tci_outer) != 0)
> - goto fail;
> - }
> + uint16_t optlen = pcapng_optlen(sizeof(flags));
>
> - /* record HASH on incoming packets */
> - rss_hash = (direction == RTE_PCAPNG_DIRECTION_IN &&
> - (md->ol_flags & RTE_MBUF_F_RX_RSS_HASH));
> + /* make queue optional? */
> + optlen += pcapng_optlen(sizeof(queue));
>
> - /* pad the packet to 32 bit boundary */
> - pkt_len = rte_pktmbuf_pkt_len(mc);
> - padding = RTE_ALIGN(pkt_len, sizeof(uint32_t)) - pkt_len;
> - if (padding > 0) {
> - void *tail = rte_pktmbuf_append(mc, padding);
> + /* does packet have valid RSS hash to include */
> + bool rss_hash = (direction == RTE_PCAPNG_DIRECTION_IN &&
> + (m->ol_flags & RTE_MBUF_F_RX_RSS_HASH));
>
> - if (tail == NULL)
> - goto fail;
> - memset(tail, 0, padding);
> - }
> -
> - optlen = pcapng_optlen(sizeof(flags));
> - optlen += pcapng_optlen(sizeof(queue));
> if (rss_hash)
> optlen += pcapng_optlen(sizeof(uint8_t) + sizeof(uint32_t));
>
> @@ -540,10 +514,10 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
> optlen += pcapng_optlen(strlen(comment));
>
> /* reserve trailing options and block length */
> - opt = (struct pcapng_option *)
> - rte_pktmbuf_append(mc, optlen + sizeof(uint32_t));
> + struct pcapng_option *opt = (struct pcapng_option *)
> + rte_pktmbuf_append(m, optlen + sizeof(uint32_t));
> if (unlikely(opt == NULL))
> - goto fail;
> + return -1;
>
> switch (direction) {
> case RTE_PCAPNG_DIRECTION_IN:
> @@ -556,24 +530,20 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
> flags = 0;
> }
>
> - opt = pcapng_add_option(opt, PCAPNG_EPB_FLAGS,
> - &flags, sizeof(flags));
> -
> - opt = pcapng_add_option(opt, PCAPNG_EPB_QUEUE,
> - &queue, sizeof(queue));
> + opt = pcapng_add_option(opt, PCAPNG_EPB_FLAGS, &flags, sizeof(flags));
> + opt = pcapng_add_option(opt, PCAPNG_EPB_QUEUE, &queue, sizeof(queue));
>
> if (rss_hash) {
> uint8_t hash_opt[5];
>
> - /* The algorithm could be something else if
> - * using rte_flow_action_rss; but the current API does not
> - * have a way for ethdev to report this on a per-packet basis.
> + /* The algorithm could be something else but the current API does not
> + * have a way for to record this on a per-packet basis
> + * and the PCAPNG hash types don't match the DPDK types.
> */
> hash_opt[0] = PCAPNG_HASH_TOEPLITZ;
>
> - memcpy(&hash_opt[1], &md->hash.rss, sizeof(uint32_t));
> - opt = pcapng_add_option(opt, PCAPNG_EPB_HASH,
> - &hash_opt, sizeof(hash_opt));
> + memcpy(&hash_opt[1], &m->hash.rss, sizeof(uint32_t));
> + opt = pcapng_add_option(opt, PCAPNG_EPB_HASH, &hash_opt, sizeof(hash_opt));
> }
>
> if (comment)
> @@ -583,19 +553,14 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
> /* Note: END_OPT necessary here. Wireshark doesn't do it. */
>
> /* Add PCAPNG packet header */
> - epb = (struct pcapng_enhance_packet_block *)
> - rte_pktmbuf_prepend(mc, sizeof(*epb));
> + epb = (struct pcapng_enhance_packet_block *) rte_pktmbuf_prepend(m, sizeof(*epb));
> if (unlikely(epb == NULL))
> - goto fail;
> + return -1;
>
> epb->block_type = PCAPNG_ENHANCED_PACKET_BLOCK;
> - epb->block_length = rte_pktmbuf_pkt_len(mc);
> -
> - /* Interface index is filled in later during write */
> - mc->port = port_id;
> + epb->block_length = rte_pktmbuf_pkt_len(m);
>
> - /* Put timestamp in cycles here - adjust in packet write */
> - timestamp = rte_get_tsc_cycles();
> + /* Put timestamp in cycles here - adjusted in packet write */
> epb->timestamp_hi = timestamp >> 32;
> epb->timestamp_lo = (uint32_t)timestamp;
> epb->capture_length = pkt_len;
> @@ -603,9 +568,56 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
>
> /* set trailer of block length */
> *(uint32_t *)opt = epb->block_length;
> + return 0;
> +}
> +
> +/* Make a copy of original mbuf with pcapng header and options */
> +RTE_EXPORT_SYMBOL(rte_pcapng_copy)
> +struct rte_mbuf *
> +rte_pcapng_copy(uint16_t port_id, uint32_t queue,
> + const struct rte_mbuf *md,
> + struct rte_mempool *mp,
> + uint32_t length,
> + enum rte_pcapng_direction direction,
> + const char *comment)
> +{
> + uint32_t orig_len = rte_pktmbuf_pkt_len(md);
> + struct rte_mbuf *mc;
>
> - return mc;
> +#ifdef RTE_LIBRTE_ETHDEV_DEBUG
> + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
> +#endif
> +
> + /* Take snapshot of the data */
> + mc = rte_pktmbuf_copy(md, mp, 0, length);
> + if (unlikely(mc == NULL))
> + return NULL;
> +
> + /* Expand any offloaded VLAN information */
> + if ((direction == RTE_PCAPNG_DIRECTION_IN &&
> + (md->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED)) ||
> + (direction == RTE_PCAPNG_DIRECTION_OUT &&
> + (md->ol_flags & RTE_MBUF_F_TX_VLAN))) {
> + if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_VLAN,
> + md->vlan_tci) != 0)
> + goto fail;
> + }
> +
> + if ((direction == RTE_PCAPNG_DIRECTION_IN &&
> + (md->ol_flags & RTE_MBUF_F_RX_QINQ_STRIPPED)) ||
> + (direction == RTE_PCAPNG_DIRECTION_OUT &&
> + (md->ol_flags & RTE_MBUF_F_TX_QINQ))) {
> + if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_QINQ,
> + md->vlan_tci_outer) != 0)
> + goto fail;
> + }
> +
> + /* Interface index is filled in later during write */
> + mc->port = port_id;
>
> + if (likely(rte_pcapng_insert(mc, queue, direction, orig_len,
> + rte_get_tsc_cycles(), comment) == 0))
> + return mc;
> fail:
> rte_pktmbuf_free(mc);
> return NULL;
> diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h
> index 48f2b57564..4914ac9622 100644
> --- a/lib/pcapng/rte_pcapng.h
> +++ b/lib/pcapng/rte_pcapng.h
> @@ -99,7 +99,7 @@ enum rte_pcapng_direction {
> };
>
> /**
> - * Format an mbuf for writing to file.
> + * Make a copy of mbuf for writing to file.
> *
> * @param port_id
> * The Ethernet port on which packet was received
> @@ -117,7 +117,7 @@ enum rte_pcapng_direction {
> * @param direction
> * The direction of the packer: receive, transmit or unknown.
> * @param comment
> - * Packet comment.
> + * Packet comment (optional).
> *
> * @return
> * - The pointer to the new mbuf formatted for pcapng_write
> @@ -129,6 +129,29 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
> uint32_t length,
> enum rte_pcapng_direction direction, const char *comment);
>
> +/**
> + * Format an mbuf for writing to file.
> + *
> + * @param m
> + * The mbuf to modify.
> + * @param queue
> + * The queue on the Ethernet port where packet was received
> + * or is going to be transmitted.
> + * @param direction
> + * The direction of the packer: receive, transmit or unknown.
> + * @param orig_len
> + * The length of the original packet which maybe less than actual
> + * packet if only a snapshot was captured.
> + * @param timestamp
> + * The timestamp for packet in TSC cycles.
> + * @param comment
> + * Packet comment (optional).
> + */
> +__rte_experimental
> +int
> +rte_pcapng_insert(struct rte_mbuf *m, uint32_t queue,
> + enum rte_pcapng_direction direction, uint32_t orig_len,
> + uint64_t timestamp, const char *comment);
>
> /**
> * Determine optimum mbuf data size.
> --
> 2.47.2
>
>
More information about the dev
mailing list