|SUCCESS| [v3,3/3] net/zxdh: optimize Tx xmit pkts performance

dpdklab at iol.unh.edu dpdklab at iol.unh.edu
Sat May 9 13:49:15 CEST 2026


Test-Label: ai-code-review
Test-Status: SUCCESS
https://dpdk.org/patch/163835

_AI Code Review_

=== Patch Review: series.patch (via Claude) ===
Review date: 2026-05-09

# Review of DPDK Patch Series

commit: 20260509062930.3254766-2-wang.junlong1 at zte.com.cn
Author: Junlong Wang <wang.junlong1 at zte.com.cn>
Subject: [PATCH v3 1/3] net/zxdh: optimize queue structure to improve performance

This patch reorganizes structure fields for better cache locality and removes
the RX software ring (sw_ring) to reduce memory allocation and copy operations.

> diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
> index aeb01f4652..08119e28c7 100644
> --- a/drivers/net/zxdh/zxdh_ethdev.c
> +++ b/drivers/net/zxdh/zxdh_ethdev.c

[ ... ]

> @@ -741,16 +739,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
>  	}
>  
>  	if (queue_type == ZXDH_VTNET_RQ) {
> -		size_t sz_sw = (ZXDH_MBUF_BURST_SZ + vq_size) * sizeof(vq->sw_ring[0]);
> -
> -		sw_ring = rte_zmalloc_socket("sw_ring", sz_sw, RTE_CACHE_LINE_SIZE, numa_node);
> -		if (!sw_ring) {
> -			PMD_DRV_LOG(ERR, "can not allocate RX soft ring");
> -			ret = -ENOMEM;
> -			goto fail_q_alloc;
> -		}
> -
> -		vq->sw_ring = sw_ring;

The removal of sw_ring allocation seems correct, but does the code that
previously used sw_ring still function properly? Can this cause issues in
zxdh_recv_pkts_packed() or other RX functions that may have relied on sw_ring?

> @@ -764,23 +752,9 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
>  		txvq->zxdh_net_hdr_mem = hdr_mz->iova;
>  	}
>  
> -	vq->offset = offsetof(struct rte_mbuf, buf_iova);
>  	if (queue_type == ZXDH_VTNET_TQ) {
>  		struct zxdh_tx_region *txr = hdr_mz->addr;
> -		uint32_t i;
> -
>  		memset(txr, 0, vq_size * sizeof(*txr));
> -		for (i = 0; i < vq_size; i++) {
> -			/* first indirect descriptor is always the tx header */
> -			struct zxdh_vring_packed_desc *start_dp = txr[i].tx_packed_indir;
> -
> -			zxdh_vring_desc_init_indirect_packed(start_dp,
> -					RTE_DIM(txr[i].tx_packed_indir));
> -			start_dp->addr = txvq->zxdh_net_hdr_mem + i * sizeof(*txr) +
> -					offsetof(struct zxdh_tx_region, tx_hdr);
> -			/* length will be updated to actual pi hdr size when xmit pkt */
> -			start_dp->len = 0;
> -		}

Does removal of the indirect descriptor initialization cause issues in
zxdh_enqueue_xmit_packed()? The comment mentions the length will be updated
when transmitting packets - is that still happening correctly?

> @@ -788,7 +762,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
>  	}
>  	return 0;
>  fail_q_alloc:
> -	rte_free(sw_ring);
>  	rte_memzone_free(hdr_mz);

[ ... ]

> diff --git a/drivers/net/zxdh/zxdh_queue.h b/drivers/net/zxdh/zxdh_queue.h
> index 1a0c8a0d90..b079272162 100644
> --- a/drivers/net/zxdh/zxdh_queue.h
> +++ b/drivers/net/zxdh/zxdh_queue.h

[ ... ]

> @@ -129,50 +129,59 @@ struct zxdh_vq_desc_extra {
>  	uint16_t next;
>  };
>  
> +struct zxdh_vring {
> +	uint32_t num;
> +	struct zxdh_vring_desc  *desc;
> +	struct zxdh_vring_avail *avail;
> +	struct zxdh_vring_used  *used;
> +};
> +
>  struct zxdh_virtqueue {
> +	union {
> +		struct {
> +			struct zxdh_vring ring; /**< vring keeping desc, used and avail */
> +		} vq_split;
> +		struct __rte_packed_begin {
> +			struct zxdh_vring_packed ring;
> +		} __rte_packed_end vq_packed;
> +	};

Does this alignment of the vq_packed structure with __rte_packed attributes
match the hardware requirements? Can packing this struct cause unaligned
accesses on some platforms?

> +	uint16_t cached_flags; /**< cached flags for descs */
> +	uint8_t used_wrap_counter;
> +	uint8_t rsv;
> +	uint16_t vq_free_thresh; /**< free threshold */
> +	uint16_t next_qidx;

The next_qidx field is added but never initialized or used in this patch.
Does this field cause issues or is it intended for future use?

[ ... ]

> @@ -331,30 +339,21 @@ zxdh_vring_desc_init_indirect_packed(struct zxdh_vring_packed_desc *dp, int32_t
>  static inline void
>  zxdh_queue_disable_intr(struct zxdh_virtqueue *vq)
>  {
> -	if (vq->vq_packed.event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
> -		vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
> -		vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
> +	if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
> +		vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
> +		vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;
>  	}
>  }
>  
>  static inline void
>  zxdh_queue_enable_intr(struct zxdh_virtqueue *vq)
>  {
> -	if (vq->vq_packed.event_flags_shadow == ZXDH_RING_EVENT_FLAGS_DISABLE) {
> -		vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
> -		vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
> +	if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_ENABLE) {
> +		vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_ENABLE;
> +		vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;

The original zxdh_queue_enable_intr() set event_flags_shadow to DISABLE
instead of ENABLE. Was this a bug being fixed here, or does this change
introduce a functional regression?

[ ... ]

> @@ -381,22 +380,17 @@ zxdh_queue_store_flags_packed(struct zxdh_vring_packed_desc *dp, uint16_t flags)
>  	dp->flags = flags;
>  }
>  
> -static inline int32_t
> -zxdh_desc_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq)
> -{
> -	uint16_t flags;
> -	uint16_t used, avail;
> -
> -	flags = desc->flags;
> -	rte_io_rmb();
> -	used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
> -	avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
> -	return avail == used && used == vq->vq_packed.used_wrap_counter;
> -}
> -
>  static inline void zxdh_queue_notify(struct zxdh_virtqueue *vq)
>  {
> -	ZXDH_VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq);
> +	/* Bit[0:15]: vq queue index
> +	 * Bit[16:30]: avail index
> +	 * Bit[31]: avail wrap counter
> +	 */
> +	uint32_t notify_data = ((uint32_t)(!!(vq->cached_flags &
> +		ZXDH_VRING_PACKED_DESC_F_AVAIL)) << 31) |
> +		((uint32_t)vq->vq_avail_idx << 16) |
> +		vq->vq_queue_index;
> +	rte_write32(notify_data, vq->notify_addr);

This changes from calling ZXDH_VTPCI_OPS()->notify_queue() to directly
writing to notify_addr. Does this bypass necessary operations in the
original notify_queue function? Can this cause synchronization issues?

[ ... ]

commit: 20260509062930.3254766-3-wang.junlong1 at zte.com.cn
Author: Junlong Wang <wang.junlong1 at zte.com.cn>
Subject: [PATCH v3 2/3] net/zxdh: optimize Rx recv pkts performance

This patch adds simple RX recv functions for single-segment packet recv,
optimizes RX recv packed ops, and removes unnecessary feature checks.

> diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
> index 08119e28c7..0ab137189b 100644

[ ... ]

> @@ -1263,18 +1263,43 @@ zxdh_dev_close(struct rte_eth_dev *dev)
>  	return ret;
>  }
>  
> -static int32_t
> -zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
> +/*
> + * Determine whether the current configuration requires support for scattered
> + * receive; return 1 if scattered receive is required and 0 if not.
> + */
> +static int zxdh_scattered_rx(struct rte_eth_dev *eth_dev)
>  {
> -	struct zxdh_hw *hw = eth_dev->data->dev_private;
> +	uint16_t buf_size;
>  
> -	if (!zxdh_pci_with_feature(hw, ZXDH_NET_F_MRG_RXBUF)) {
> -		PMD_DRV_LOG(ERR, "port %u not support rx mergeable", eth_dev->data->port_id);
> -		return -1;
> +	if (eth_dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) {
> +		eth_dev->data->lro = 1;
> +		return 1;
>  	}

Setting eth_dev->data->lro = 1 as a side effect in a function named
zxdh_scattered_rx() seems unexpected. Should this side effect be documented or
moved to a more appropriate location?

[ ... ]

> diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
> index 93506a4b1d2..4723d4b1d2 100644

[ ... ]

> @@ -825,17 +825,52 @@ zxdh_rx_update_mbuf(struct zxdh_hw *hw, struct rte_mbuf *m, struct zxdh_net_hdr_
>  	}
>  }
>  
> -static void zxdh_discard_rxbuf(struct zxdh_virtqueue *vq, struct rte_mbuf *m)
> +static void refill_desc_unwrap(struct zxdh_virtqueue *vq,
> +		struct rte_mbuf **cookie, uint16_t nb_pkts)
>  {
> -	int32_t error = 0;
> -	/*
> -	 * Requeue the discarded mbuf. This should always be
> -	 * successful since it was just dequeued.
> -	 */
> -	error = zxdh_enqueue_recv_refill_packed(vq, &m, 1);
> -	if (unlikely(error)) {
> -		PMD_RX_LOG(ERR, "cannot enqueue discarded mbuf");
> -		rte_pktmbuf_free(m);
> +	struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
> +	struct zxdh_vq_desc_extra *dxp;
> +	uint16_t flags = vq->cached_flags;
> +	int32_t i;
> +	uint16_t idx;
> +
> +	idx = vq->vq_avail_idx;
> +	for (i = 0; i < nb_pkts; i++) {
> +		dxp = &vq->vq_descx[idx];
> +		dxp->cookie = (void *)cookie[i];
> +		start_dp[idx].addr = rte_mbuf_iova_get(cookie[i]) + RTE_PKTMBUF_HEADROOM;
> +		start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM;
> +		zxdh_queue_store_flags_packed(&start_dp[idx], flags);
> +		idx++;

Does this loop properly handle idx wrapping at vq_nentries? The function is
named unwrap but seems to handle a contiguous region.

> +	}
> +	vq->vq_avail_idx += nb_pkts;
> +	vq->vq_free_cnt = vq->vq_free_cnt - nb_pkts;

Is the subtraction vq->vq_free_cnt - nb_pkts safe from


More information about the test-report mailing list