[dpdk-dev] [PATCH v1 1/1] net/octeontx2: allow vec to process pkts not multiple of 4
Jerin Jacob
jerinjacobk at gmail.com
Tue Jan 14 05:06:23 CET 2020
On Fri, Dec 20, 2019 at 6:33 PM <vattunuru at marvell.com> wrote:
>
> From: Vamsi Attunuru <vattunuru at marvell.com>
>
> Current vector mode implementation floor-aligns pkt count
> with NIX_DESCS_PER_LOOP and process that many packets.
>
> Patch addresses the case where pkt count modulo NIX_DESCS_PER_LOOP
> is non-zero, after the vector mode processing, scalar routine is
> used to process if there are any leftover packets. Scalar routine
> is also used when descriptor head is about to wrap and turn out to
> be unaligned.
>
> Signed-off-by: Vamsi Attunuru <vattunuru at marvell.com>
> Signed-off-by: Nithin Dabilpuram <ndabilpuram at marvell.com>
Applied to dpdk-next-net-mrvl/master. Thanks
> ---
> drivers/net/octeontx2/otx2_rx.c | 18 ++++++++++++++----
> drivers/net/octeontx2/otx2_tx.c | 18 +++++++++++++-----
> 2 files changed, 27 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/octeontx2/otx2_rx.c b/drivers/net/octeontx2/otx2_rx.c
> index 48565db..8e6452a 100644
> --- a/drivers/net/octeontx2/otx2_rx.c
> +++ b/drivers/net/octeontx2/otx2_rx.c
> @@ -130,16 +130,22 @@ nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
> const uintptr_t desc = rxq->desc;
> uint8x16_t f0, f1, f2, f3;
> uint32_t head = rxq->head;
> + uint16_t pkts_left;
>
> pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
> + pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
> +
> /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */
> pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
>
> while (packets < pkts) {
> - /* Get the CQ pointers, since the ring size is multiple of
> - * 4, We can avoid checking the wrap around of head
> - * value after the each access unlike scalar version.
> - */
> + /* Exit loop if head is about to wrap and become unaligned */
> + if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) <
> + NIX_DESCS_PER_LOOP) {
> + pkts_left += (pkts - packets);
> + break;
> + }
> +
> const uintptr_t cq0 = desc + CQE_SZ(head);
>
> /* Prefetch N desc ahead */
> @@ -301,6 +307,10 @@ nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
> /* Free all the CQs that we've processed */
> otx2_write64((rxq->wdata | packets), rxq->cq_door);
>
> + if (unlikely(pkts_left))
> + packets += nix_recv_pkts(rx_queue, &rx_pkts[packets],
> + pkts_left, flags);
> +
> return packets;
> }
>
> diff --git a/drivers/net/octeontx2/otx2_tx.c b/drivers/net/octeontx2/otx2_tx.c
> index fa53300..96be92a 100644
> --- a/drivers/net/octeontx2/otx2_tx.c
> +++ b/drivers/net/octeontx2/otx2_tx.c
> @@ -97,7 +97,7 @@ nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
> #define NIX_DESCS_PER_LOOP 4
> static __rte_always_inline uint16_t
> nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> - uint16_t pkts, const uint16_t flags)
> + uint16_t pkts, uint64_t *cmd, const uint16_t flags)
> {
> uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
> uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
> @@ -118,11 +118,13 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> uint64x2_t cmd20, cmd21;
> uint64x2_t cmd30, cmd31;
> uint64_t lmt_status, i;
> -
> - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
> + uint16_t pkts_left;
>
> NIX_XMIT_FC_OR_RETURN(txq, pkts);
>
> + pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
> + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
> +
> /* Reduce the cached count */
> txq->fc_cache_pkts -= pkts;
>
> @@ -929,17 +931,21 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> } while (lmt_status == 0);
> }
>
> + if (unlikely(pkts_left))
> + pkts += nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd, flags);
> +
> return pkts;
> }
>
> #else
> static __rte_always_inline uint16_t
> nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> - uint16_t pkts, const uint16_t flags)
> + uint16_t pkts, uint64_t *cmd, const uint16_t flags)
> {
> RTE_SET_USED(tx_queue);
> RTE_SET_USED(tx_pkts);
> RTE_SET_USED(pkts);
> + RTE_SET_USED(cmd);
> RTE_SET_USED(flags);
> return 0;
> }
> @@ -985,12 +991,14 @@ static uint16_t __rte_noinline __hot \
> otx2_nix_xmit_pkts_vec_ ## name(void *tx_queue, \
> struct rte_mbuf **tx_pkts, uint16_t pkts) \
> { \
> + uint64_t cmd[sz]; \
> + \
> /* VLAN, TSTMP, TSO is not supported by vec */ \
> if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \
> (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
> (flags) & NIX_TX_OFFLOAD_TSO_F) \
> return 0; \
> - return nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, (flags)); \
> + return nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, (flags)); \
> }
>
> NIX_TX_FASTPATH_MODES
> --
> 2.8.4
>
More information about the dev
mailing list