|SUCCESS| [v3,3/3] net/zxdh: optimize Tx xmit pkts performance
dpdklab at iol.unh.edu
dpdklab at iol.unh.edu
Sat May 9 13:49:15 CEST 2026
Test-Label: ai-code-review
Test-Status: SUCCESS
https://dpdk.org/patch/163835
_AI Code Review_
=== Patch Review: series.patch (via Claude) ===
Review date: 2026-05-09
# Review of DPDK Patch Series
commit: 20260509062930.3254766-2-wang.junlong1 at zte.com.cn
Author: Junlong Wang <wang.junlong1 at zte.com.cn>
Subject: [PATCH v3 1/3] net/zxdh: optimize queue structure to improve performance
This patch reorganizes structure fields for better cache locality and removes
the RX software ring (sw_ring) to reduce memory allocation and copy operations.
> diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
> index aeb01f4652..08119e28c7 100644
> --- a/drivers/net/zxdh/zxdh_ethdev.c
> +++ b/drivers/net/zxdh/zxdh_ethdev.c
[ ... ]
> @@ -741,16 +739,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
> }
>
> if (queue_type == ZXDH_VTNET_RQ) {
> - size_t sz_sw = (ZXDH_MBUF_BURST_SZ + vq_size) * sizeof(vq->sw_ring[0]);
> -
> - sw_ring = rte_zmalloc_socket("sw_ring", sz_sw, RTE_CACHE_LINE_SIZE, numa_node);
> - if (!sw_ring) {
> - PMD_DRV_LOG(ERR, "can not allocate RX soft ring");
> - ret = -ENOMEM;
> - goto fail_q_alloc;
> - }
> -
> - vq->sw_ring = sw_ring;
The removal of sw_ring allocation seems correct, but does the code that
previously used sw_ring still function properly? Can this cause issues in
zxdh_recv_pkts_packed() or other RX functions that may have relied on sw_ring?
> @@ -764,23 +752,9 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
> txvq->zxdh_net_hdr_mem = hdr_mz->iova;
> }
>
> - vq->offset = offsetof(struct rte_mbuf, buf_iova);
> if (queue_type == ZXDH_VTNET_TQ) {
> struct zxdh_tx_region *txr = hdr_mz->addr;
> - uint32_t i;
> -
> memset(txr, 0, vq_size * sizeof(*txr));
> - for (i = 0; i < vq_size; i++) {
> - /* first indirect descriptor is always the tx header */
> - struct zxdh_vring_packed_desc *start_dp = txr[i].tx_packed_indir;
> -
> - zxdh_vring_desc_init_indirect_packed(start_dp,
> - RTE_DIM(txr[i].tx_packed_indir));
> - start_dp->addr = txvq->zxdh_net_hdr_mem + i * sizeof(*txr) +
> - offsetof(struct zxdh_tx_region, tx_hdr);
> - /* length will be updated to actual pi hdr size when xmit pkt */
> - start_dp->len = 0;
> - }
Does removal of the indirect descriptor initialization cause issues in
zxdh_enqueue_xmit_packed()? The comment mentions the length will be updated
when transmitting packets - is that still happening correctly?
> @@ -788,7 +762,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
> }
> return 0;
> fail_q_alloc:
> - rte_free(sw_ring);
> rte_memzone_free(hdr_mz);
[ ... ]
> diff --git a/drivers/net/zxdh/zxdh_queue.h b/drivers/net/zxdh/zxdh_queue.h
> index 1a0c8a0d90..b079272162 100644
> --- a/drivers/net/zxdh/zxdh_queue.h
> +++ b/drivers/net/zxdh/zxdh_queue.h
[ ... ]
> @@ -129,50 +129,59 @@ struct zxdh_vq_desc_extra {
> uint16_t next;
> };
>
> +struct zxdh_vring {
> + uint32_t num;
> + struct zxdh_vring_desc *desc;
> + struct zxdh_vring_avail *avail;
> + struct zxdh_vring_used *used;
> +};
> +
> struct zxdh_virtqueue {
> + union {
> + struct {
> + struct zxdh_vring ring; /**< vring keeping desc, used and avail */
> + } vq_split;
> + struct __rte_packed_begin {
> + struct zxdh_vring_packed ring;
> + } __rte_packed_end vq_packed;
> + };
Does this alignment of the vq_packed structure with __rte_packed attributes
match the hardware requirements? Can packing this struct cause unaligned
accesses on some platforms?
> + uint16_t cached_flags; /**< cached flags for descs */
> + uint8_t used_wrap_counter;
> + uint8_t rsv;
> + uint16_t vq_free_thresh; /**< free threshold */
> + uint16_t next_qidx;
The next_qidx field is added but never initialized or used in this patch.
Does this field cause issues or is it intended for future use?
[ ... ]
> @@ -331,30 +339,21 @@ zxdh_vring_desc_init_indirect_packed(struct zxdh_vring_packed_desc *dp, int32_t
> static inline void
> zxdh_queue_disable_intr(struct zxdh_virtqueue *vq)
> {
> - if (vq->vq_packed.event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
> - vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
> - vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
> + if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
> + vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
> + vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;
> }
> }
>
> static inline void
> zxdh_queue_enable_intr(struct zxdh_virtqueue *vq)
> {
> - if (vq->vq_packed.event_flags_shadow == ZXDH_RING_EVENT_FLAGS_DISABLE) {
> - vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
> - vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
> + if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_ENABLE) {
> + vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_ENABLE;
> + vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;
The original zxdh_queue_enable_intr() set event_flags_shadow to DISABLE
instead of ENABLE. Was this a bug being fixed here, or does this change
introduce a functional regression?
[ ... ]
> @@ -381,22 +380,17 @@ zxdh_queue_store_flags_packed(struct zxdh_vring_packed_desc *dp, uint16_t flags)
> dp->flags = flags;
> }
>
> -static inline int32_t
> -zxdh_desc_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq)
> -{
> - uint16_t flags;
> - uint16_t used, avail;
> -
> - flags = desc->flags;
> - rte_io_rmb();
> - used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
> - avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
> - return avail == used && used == vq->vq_packed.used_wrap_counter;
> -}
> -
> static inline void zxdh_queue_notify(struct zxdh_virtqueue *vq)
> {
> - ZXDH_VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq);
> + /* Bit[0:15]: vq queue index
> + * Bit[16:30]: avail index
> + * Bit[31]: avail wrap counter
> + */
> + uint32_t notify_data = ((uint32_t)(!!(vq->cached_flags &
> + ZXDH_VRING_PACKED_DESC_F_AVAIL)) << 31) |
> + ((uint32_t)vq->vq_avail_idx << 16) |
> + vq->vq_queue_index;
> + rte_write32(notify_data, vq->notify_addr);
This changes from calling ZXDH_VTPCI_OPS()->notify_queue() to directly
writing to notify_addr. Does this bypass necessary operations in the
original notify_queue function? Can this cause synchronization issues?
[ ... ]
commit: 20260509062930.3254766-3-wang.junlong1 at zte.com.cn
Author: Junlong Wang <wang.junlong1 at zte.com.cn>
Subject: [PATCH v3 2/3] net/zxdh: optimize Rx recv pkts performance
This patch adds simple RX recv functions for single-segment packet recv,
optimizes RX recv packed ops, and removes unnecessary feature checks.
> diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
> index 08119e28c7..0ab137189b 100644
[ ... ]
> @@ -1263,18 +1263,43 @@ zxdh_dev_close(struct rte_eth_dev *dev)
> return ret;
> }
>
> -static int32_t
> -zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
> +/*
> + * Determine whether the current configuration requires support for scattered
> + * receive; return 1 if scattered receive is required and 0 if not.
> + */
> +static int zxdh_scattered_rx(struct rte_eth_dev *eth_dev)
> {
> - struct zxdh_hw *hw = eth_dev->data->dev_private;
> + uint16_t buf_size;
>
> - if (!zxdh_pci_with_feature(hw, ZXDH_NET_F_MRG_RXBUF)) {
> - PMD_DRV_LOG(ERR, "port %u not support rx mergeable", eth_dev->data->port_id);
> - return -1;
> + if (eth_dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) {
> + eth_dev->data->lro = 1;
> + return 1;
> }
Setting eth_dev->data->lro = 1 as a side effect in a function named
zxdh_scattered_rx() seems unexpected. Should this side effect be documented or
moved to a more appropriate location?
[ ... ]
> diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
> index 93506a4b1d2..4723d4b1d2 100644
[ ... ]
> @@ -825,17 +825,52 @@ zxdh_rx_update_mbuf(struct zxdh_hw *hw, struct rte_mbuf *m, struct zxdh_net_hdr_
> }
> }
>
> -static void zxdh_discard_rxbuf(struct zxdh_virtqueue *vq, struct rte_mbuf *m)
> +static void refill_desc_unwrap(struct zxdh_virtqueue *vq,
> + struct rte_mbuf **cookie, uint16_t nb_pkts)
> {
> - int32_t error = 0;
> - /*
> - * Requeue the discarded mbuf. This should always be
> - * successful since it was just dequeued.
> - */
> - error = zxdh_enqueue_recv_refill_packed(vq, &m, 1);
> - if (unlikely(error)) {
> - PMD_RX_LOG(ERR, "cannot enqueue discarded mbuf");
> - rte_pktmbuf_free(m);
> + struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
> + struct zxdh_vq_desc_extra *dxp;
> + uint16_t flags = vq->cached_flags;
> + int32_t i;
> + uint16_t idx;
> +
> + idx = vq->vq_avail_idx;
> + for (i = 0; i < nb_pkts; i++) {
> + dxp = &vq->vq_descx[idx];
> + dxp->cookie = (void *)cookie[i];
> + start_dp[idx].addr = rte_mbuf_iova_get(cookie[i]) + RTE_PKTMBUF_HEADROOM;
> + start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM;
> + zxdh_queue_store_flags_packed(&start_dp[idx], flags);
> + idx++;
Does this loop properly handle idx wrapping at vq_nentries? The function is
named unwrap but seems to handle a contiguous region.
> + }
> + vq->vq_avail_idx += nb_pkts;
> + vq->vq_free_cnt = vq->vq_free_cnt - nb_pkts;
Is the subtraction vq->vq_free_cnt - nb_pkts safe from
More information about the test-report
mailing list