[RFC PATCH 20/27] net/intel: write descriptors using non-volatile pointers
Morten Brørup
mb at smartsharesystems.com
Sat Dec 20 09:43:46 CET 2025
> From: Bruce Richardson [mailto:bruce.richardson at intel.com]
> Sent: Friday, 19 December 2025 18.26
>
> Use a non-volatile uint64_t pointer to store to the descriptor ring.
> This will allow the compiler to optionally merge the stores as it sees
> best.
I suppose there was a reason for the volatile.
Is removing it really safe?
E.g. this will also allow the compiler to reorder stores; not just the pair of 64-bits, but also stores to multiple descriptors.
One more comment inline below.
>
> Signed-off-by: Bruce Richardson <bruce.richardson at intel.com>
> ---
> drivers/net/intel/common/tx_scalar_fns.h | 24 ++++++++++++++++--------
> 1 file changed, 16 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/intel/common/tx_scalar_fns.h
> b/drivers/net/intel/common/tx_scalar_fns.h
> index 7b643fcf44..95e9acbe60 100644
> --- a/drivers/net/intel/common/tx_scalar_fns.h
> +++ b/drivers/net/intel/common/tx_scalar_fns.h
> @@ -184,6 +184,15 @@ struct ci_timesstamp_queue_fns {
> write_ts_tail_t write_ts_tail;
> };
>
> +static inline void
> +write_txd(volatile void *txd, uint64_t qw0, uint64_t qw1)
> +{
> + uint64_t *txd_qw = RTE_CAST_PTR(void *, txd);
If the descriptors are 16-byte aligned, you could mark them as such, so the compiler can use 128-bit stores on architectures where alignment matters.
> +
> + txd_qw[0] = rte_cpu_to_le_64(qw0);
> + txd_qw[1] = rte_cpu_to_le_64(qw1);
> +}
> +
> static inline uint16_t
> ci_xmit_pkts(struct ci_tx_queue *txq,
> struct rte_mbuf **tx_pkts,
> @@ -313,8 +322,7 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
> txe->mbuf = NULL;
> }
>
> - ctx_txd[0] = cd_qw0;
> - ctx_txd[1] = cd_qw1;
> + write_txd(ctx_txd, cd_qw0, cd_qw1);
>
> txe->last_id = tx_last;
> tx_id = txe->next_id;
> @@ -361,12 +369,12 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
>
> while ((ol_flags & (RTE_MBUF_F_TX_TCP_SEG |
> RTE_MBUF_F_TX_UDP_SEG)) &&
> unlikely(slen > CI_MAX_DATA_PER_TXD)) {
> - txd->buffer_addr =
> rte_cpu_to_le_64(buf_dma_addr);
> - txd->cmd_type_offset_bsz =
> rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
> + const uint64_t cmd_type_offset_bsz =
> CI_TX_DESC_DTYPE_DATA |
> ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
> ((uint64_t)td_offset <<
> CI_TXD_QW1_OFFSET_S) |
> ((uint64_t)CI_MAX_DATA_PER_TXD <<
> CI_TXD_QW1_TX_BUF_SZ_S) |
> - ((uint64_t)td_tag <<
> CI_TXD_QW1_L2TAG1_S));
> + ((uint64_t)td_tag <<
> CI_TXD_QW1_L2TAG1_S);
> + write_txd(txd, buf_dma_addr,
> cmd_type_offset_bsz);
>
> buf_dma_addr += CI_MAX_DATA_PER_TXD;
> slen -= CI_MAX_DATA_PER_TXD;
> @@ -382,12 +390,12 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
> if (m_seg->next == NULL)
> td_cmd |= CI_TX_DESC_CMD_EOP;
>
> - txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
> - txd->cmd_type_offset_bsz =
> rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
> + const uint64_t cmd_type_offset_bsz =
> CI_TX_DESC_DTYPE_DATA |
> ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
> ((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) |
> ((uint64_t)slen << CI_TXD_QW1_TX_BUF_SZ_S) |
> - ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S));
> + ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S);
> + write_txd(txd, buf_dma_addr, cmd_type_offset_bsz);
>
> txe->last_id = tx_last;
> tx_id = txe->next_id;
> --
> 2.51.0
More information about the dev
mailing list