[PATCH v6 08/13] pcapng: split packet copy from header insertion

Ivan Malov ivan.malov at arknetworks.am
Wed Jul 23 05:18:43 CEST 2025


On Wed, 23 Jul 2025, Ivan Malov wrote:

> Hi Stephen,
>
> On Tue, 22 Jul 2025, Stephen Hemminger wrote:
>
>> In new model, the packet was already copied, only need
>
> Copied? But what if it was "indirect attached" instead, as the model 
> envisages?
>
> Perhaps this is a silly question of mine, but it may not be clear what 
> happens
> in case of 'RTE_ETH_MIRROR_INDIRECT_FLAG' - whether it is safe to modify the
> mbuf and whether the 'indirect' clone has to be "freed" in the sense of being
> detached/refcnt updated after successful pcapng write, to avoid memory leaks?

Presumably, no leaks, - I now see 'rte_pktmbuf_free_bulk' invocation outside the
invocation of 'pcap_write_packets'. But the question about INDIRECT stands.

Thank you.

>
>> to wrap it in pcapng format.
>> 
>> Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
>> ---
>> lib/pcapng/rte_pcapng.c | 178 +++++++++++++++++++++-------------------
>> lib/pcapng/rte_pcapng.h |  27 +++++-
>> 2 files changed, 120 insertions(+), 85 deletions(-)
>> 
>> diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
>> index 2a07b4c1f5..6db5d4da50 100644
>> --- a/lib/pcapng/rte_pcapng.c
>> +++ b/lib/pcapng/rte_pcapng.c
>> @@ -1,3 +1,4 @@
>> +
>> /* SPDX-License-Identifier: BSD-3-Clause
>>  * Copyright(c) 2019 Microsoft Corporation
>>  */
>> @@ -432,8 +433,24 @@ pcapng_vlan_insert(struct rte_mbuf *m, uint16_t 
>> ether_type, uint16_t tci)
>> 	return 0;
>> }
>> 
>> +/* pad the packet to 32 bit boundary */
>> +static inline int
>> +pcapng_mbuf_pad32(struct rte_mbuf *m)
>> +{
>> +	uint32_t pkt_len = rte_pktmbuf_pkt_len(m);
>> +	uint32_t padding = RTE_ALIGN(pkt_len, sizeof(uint32_t)) - pkt_len;
>> +
>> +	if (padding > 0) {
>> +		void *tail = rte_pktmbuf_append(m, padding);
>
> If the packet was indirectly attached in fact, is this OK to do? Just asking.
>
> Thank you.
>
>> +		if (tail == NULL)
>> +			return -1;
>> +		memset(tail, 0, padding);
>> +	}
>> +	return 0;
>> +}
>> +
>> /*
>> - *   The mbufs created use the Pcapng standard enhanced packet  block.
>> + *  The mbufs created use the Pcapng standard enhanced packet block.
>>  *
>>  *                         1                   2                   3
>>  *     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
>> @@ -468,71 +485,28 @@ pcapng_vlan_insert(struct rte_mbuf *m, uint16_t 
>> ether_type, uint16_t tci)
>>  *    |                      Block Total Length                       |
>>  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
>>  */
>> -
>> -/* Make a copy of original mbuf with pcapng header and options */
>> -RTE_EXPORT_SYMBOL(rte_pcapng_copy)
>> -struct rte_mbuf *
>> -rte_pcapng_copy(uint16_t port_id, uint32_t queue,
>> -		const struct rte_mbuf *md,
>> -		struct rte_mempool *mp,
>> -		uint32_t length,
>> -		enum rte_pcapng_direction direction,
>> -		const char *comment)
>> +RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_pcapng_insert, 25.07)
>> +int
>> +rte_pcapng_insert(struct rte_mbuf *m, uint32_t queue,
>> +		  enum rte_pcapng_direction direction, uint32_t orig_len,
>> +		  uint64_t timestamp, const char *comment)
>> {
>> 	struct pcapng_enhance_packet_block *epb;
>> -	uint32_t orig_len, pkt_len, padding, flags;
>> -	struct pcapng_option *opt;
>> -	uint64_t timestamp;
>> -	uint16_t optlen;
>> -	struct rte_mbuf *mc;
>> -	bool rss_hash;
>> -
>> -#ifdef RTE_LIBRTE_ETHDEV_DEBUG
>> -	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
>> -#endif
>> -	orig_len = rte_pktmbuf_pkt_len(md);
>> +	uint32_t pkt_len = rte_pktmbuf_pkt_len(m);
>> +	uint32_t flags;
>> 
>> -	/* Take snapshot of the data */
>> -	mc = rte_pktmbuf_copy(md, mp, 0, length);
>> -	if (unlikely(mc == NULL))
>> -		return NULL;
>> -
>> -	/* Expand any offloaded VLAN information */
>> -	if ((direction == RTE_PCAPNG_DIRECTION_IN &&
>> -	     (md->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED)) ||
>> -	    (direction == RTE_PCAPNG_DIRECTION_OUT &&
>> -	     (md->ol_flags & RTE_MBUF_F_TX_VLAN))) {
>> -		if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_VLAN,
>> -				       md->vlan_tci) != 0)
>> -			goto fail;
>> -	}
>> +	if (unlikely(pcapng_mbuf_pad32(m) < 0))
>> +		return -1;
>> 
>> -	if ((direction == RTE_PCAPNG_DIRECTION_IN &&
>> -	     (md->ol_flags & RTE_MBUF_F_RX_QINQ_STRIPPED)) ||
>> -	    (direction == RTE_PCAPNG_DIRECTION_OUT &&
>> -	     (md->ol_flags & RTE_MBUF_F_TX_QINQ))) {
>> -		if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_QINQ,
>> -				       md->vlan_tci_outer) != 0)
>> -			goto fail;
>> -	}
>> +	uint16_t optlen = pcapng_optlen(sizeof(flags));
>> 
>> -	/* record HASH on incoming packets */
>> -	rss_hash = (direction == RTE_PCAPNG_DIRECTION_IN &&
>> -		    (md->ol_flags & RTE_MBUF_F_RX_RSS_HASH));
>> +	/* make queue optional? */
>> +	optlen += pcapng_optlen(sizeof(queue));
>> 
>> -	/* pad the packet to 32 bit boundary */
>> -	pkt_len = rte_pktmbuf_pkt_len(mc);
>> -	padding = RTE_ALIGN(pkt_len, sizeof(uint32_t)) - pkt_len;
>> -	if (padding > 0) {
>> -		void *tail = rte_pktmbuf_append(mc, padding);
>> +	/* does packet have valid RSS hash to include */
>> +	bool rss_hash = (direction == RTE_PCAPNG_DIRECTION_IN &&
>> +			 (m->ol_flags & RTE_MBUF_F_RX_RSS_HASH));
>> 
>> -		if (tail == NULL)
>> -			goto fail;
>> -		memset(tail, 0, padding);
>> -	}
>> -
>> -	optlen = pcapng_optlen(sizeof(flags));
>> -	optlen += pcapng_optlen(sizeof(queue));
>> 	if (rss_hash)
>> 		optlen += pcapng_optlen(sizeof(uint8_t) + sizeof(uint32_t));
>> 
>> @@ -540,10 +514,10 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
>> 		optlen += pcapng_optlen(strlen(comment));
>>
>> 	/* reserve trailing options and block length */
>> -	opt = (struct pcapng_option *)
>> -		rte_pktmbuf_append(mc, optlen + sizeof(uint32_t));
>> +	struct pcapng_option *opt = (struct pcapng_option *)
>> +		rte_pktmbuf_append(m, optlen + sizeof(uint32_t));
>> 	if (unlikely(opt == NULL))
>> -		goto fail;
>> +		return -1;
>>
>> 	switch (direction) {
>> 	case RTE_PCAPNG_DIRECTION_IN:
>> @@ -556,24 +530,20 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
>> 		flags = 0;
>> 	}
>> 
>> -	opt = pcapng_add_option(opt, PCAPNG_EPB_FLAGS,
>> -				&flags, sizeof(flags));
>> -
>> -	opt = pcapng_add_option(opt, PCAPNG_EPB_QUEUE,
>> -				&queue, sizeof(queue));
>> +	opt = pcapng_add_option(opt, PCAPNG_EPB_FLAGS, &flags, 
>> sizeof(flags));
>> +	opt = pcapng_add_option(opt, PCAPNG_EPB_QUEUE, &queue, 
>> sizeof(queue));
>>
>> 	if (rss_hash) {
>> 		uint8_t hash_opt[5];
>> 
>> -		/* The algorithm could be something else if
>> -		 * using rte_flow_action_rss; but the current API does not
>> -		 * have a way for ethdev to report  this on a per-packet 
>> basis.
>> +		/* The algorithm could be something else but the current API 
>> does not
>> +		 * have a way for to record this on a per-packet basis
>> +		 * and the PCAPNG hash types don't match the DPDK types.
>> 		 */
>> 		hash_opt[0] = PCAPNG_HASH_TOEPLITZ;
>> 
>> -		memcpy(&hash_opt[1], &md->hash.rss, sizeof(uint32_t));
>> -		opt = pcapng_add_option(opt, PCAPNG_EPB_HASH,
>> -					&hash_opt, sizeof(hash_opt));
>> +		memcpy(&hash_opt[1], &m->hash.rss, sizeof(uint32_t));
>> +		opt = pcapng_add_option(opt, PCAPNG_EPB_HASH, &hash_opt, 
>> sizeof(hash_opt));
>> 	}
>>
>> 	if (comment)
>> @@ -583,19 +553,14 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
>> 	/* Note: END_OPT necessary here. Wireshark doesn't do it. */
>>
>> 	/* Add PCAPNG packet header */
>> -	epb = (struct pcapng_enhance_packet_block *)
>> -		rte_pktmbuf_prepend(mc, sizeof(*epb));
>> +	epb = (struct pcapng_enhance_packet_block *) rte_pktmbuf_prepend(m, 
>> sizeof(*epb));
>> 	if (unlikely(epb == NULL))
>> -		goto fail;
>> +		return -1;
>>
>> 	epb->block_type = PCAPNG_ENHANCED_PACKET_BLOCK;
>> -	epb->block_length = rte_pktmbuf_pkt_len(mc);
>> -
>> -	/* Interface index is filled in later during write */
>> -	mc->port = port_id;
>> +	epb->block_length = rte_pktmbuf_pkt_len(m);
>> 
>> -	/* Put timestamp in cycles here - adjust in packet write */
>> -	timestamp = rte_get_tsc_cycles();
>> +	/* Put timestamp in cycles here - adjusted in packet write */
>> 	epb->timestamp_hi = timestamp >> 32;
>> 	epb->timestamp_lo = (uint32_t)timestamp;
>> 	epb->capture_length = pkt_len;
>> @@ -603,9 +568,56 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
>>
>> 	/* set trailer of block length */
>> 	*(uint32_t *)opt = epb->block_length;
>> +	return 0;
>> +}
>> +
>> +/* Make a copy of original mbuf with pcapng header and options */
>> +RTE_EXPORT_SYMBOL(rte_pcapng_copy)
>> +struct rte_mbuf *
>> +rte_pcapng_copy(uint16_t port_id, uint32_t queue,
>> +		const struct rte_mbuf *md,
>> +		struct rte_mempool *mp,
>> +		uint32_t length,
>> +		enum rte_pcapng_direction direction,
>> +		const char *comment)
>> +{
>> +	uint32_t orig_len = rte_pktmbuf_pkt_len(md);
>> +	struct rte_mbuf *mc;
>> 
>> -	return mc;
>> +#ifdef RTE_LIBRTE_ETHDEV_DEBUG
>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
>> +#endif
>> +
>> +	/* Take snapshot of the data */
>> +	mc = rte_pktmbuf_copy(md, mp, 0, length);
>> +	if (unlikely(mc == NULL))
>> +		return NULL;
>> +
>> +	/* Expand any offloaded VLAN information */
>> +	if ((direction == RTE_PCAPNG_DIRECTION_IN &&
>> +	     (md->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED)) ||
>> +	    (direction == RTE_PCAPNG_DIRECTION_OUT &&
>> +	     (md->ol_flags & RTE_MBUF_F_TX_VLAN))) {
>> +		if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_VLAN,
>> +				       md->vlan_tci) != 0)
>> +			goto fail;
>> +	}
>> +
>> +	if ((direction == RTE_PCAPNG_DIRECTION_IN &&
>> +	     (md->ol_flags & RTE_MBUF_F_RX_QINQ_STRIPPED)) ||
>> +	    (direction == RTE_PCAPNG_DIRECTION_OUT &&
>> +	     (md->ol_flags & RTE_MBUF_F_TX_QINQ))) {
>> +		if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_QINQ,
>> +				       md->vlan_tci_outer) != 0)
>> +			goto fail;
>> +	}
>> +
>> +	/* Interface index is filled in later during write */
>> +	mc->port = port_id;
>> 
>> +	if (likely(rte_pcapng_insert(mc, queue, direction, orig_len,
>> +				     rte_get_tsc_cycles(), comment) == 0))
>> +		return mc;
>> fail:
>> 	rte_pktmbuf_free(mc);
>> 	return NULL;
>> diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h
>> index 48f2b57564..4914ac9622 100644
>> --- a/lib/pcapng/rte_pcapng.h
>> +++ b/lib/pcapng/rte_pcapng.h
>> @@ -99,7 +99,7 @@ enum rte_pcapng_direction {
>> };
>> 
>> /**
>> - * Format an mbuf for writing to file.
>> + * Make a copy of mbuf for writing to file.
>>  *
>>  * @param port_id
>>  *   The Ethernet port on which packet was received
>> @@ -117,7 +117,7 @@ enum rte_pcapng_direction {
>>  * @param direction
>>  *   The direction of the packer: receive, transmit or unknown.
>>  * @param comment
>> - *   Packet comment.
>> + *   Packet comment (optional).
>>  *
>>  * @return
>>  *   - The pointer to the new mbuf formatted for pcapng_write
>> @@ -129,6 +129,29 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
>> 		uint32_t length,
>> 		enum rte_pcapng_direction direction, const char *comment);
>> 
>> +/**
>> + * Format an mbuf for writing to file.
>> + *
>> + * @param m
>> + *   The mbuf to modify.
>> + * @param queue
>> + *   The queue on the Ethernet port where packet was received
>> + *   or is going to be transmitted.
>> + * @param direction
>> + *   The direction of the packer: receive, transmit or unknown.
>> + * @param orig_len
>> + *   The length of the original packet which maybe less than actual
>> + *   packet if only a snapshot was captured.
>> + * @param timestamp
>> + *   The timestamp for packet in TSC cycles.
>> + * @param comment
>> + *   Packet comment (optional).
>> + */
>> +__rte_experimental
>> +int
>> +rte_pcapng_insert(struct rte_mbuf *m, uint32_t queue,
>> +		  enum rte_pcapng_direction direction, uint32_t orig_len,
>> +		  uint64_t timestamp, const char *comment);
>> 
>> /**
>>  * Determine optimum mbuf data size.
>> -- 
>> 2.47.2
>> 
>> 
>


More information about the dev mailing list