[dpdk-dev] [PATCH v1 1/1] net/octeontx2: allow vec to process pkts not multiple of 4

Jerin Jacob jerinjacobk at gmail.com
Tue Jan 14 05:06:23 CET 2020


On Fri, Dec 20, 2019 at 6:33 PM <vattunuru at marvell.com> wrote:
>
> From: Vamsi Attunuru <vattunuru at marvell.com>
>
> Current vector mode implementation floor-aligns pkt count
> with NIX_DESCS_PER_LOOP and process that many packets.
>
> Patch addresses the case where pkt count modulo NIX_DESCS_PER_LOOP
> is non-zero, after the vector mode processing, scalar routine is
> used to process if there are any leftover packets. Scalar routine
> is also used when descriptor head is about to wrap and turn out to
> be unaligned.
>
> Signed-off-by: Vamsi Attunuru <vattunuru at marvell.com>
> Signed-off-by: Nithin Dabilpuram <ndabilpuram at marvell.com>


Applied to dpdk-next-net-mrvl/master. Thanks


> ---
>  drivers/net/octeontx2/otx2_rx.c | 18 ++++++++++++++----
>  drivers/net/octeontx2/otx2_tx.c | 18 +++++++++++++-----
>  2 files changed, 27 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/octeontx2/otx2_rx.c b/drivers/net/octeontx2/otx2_rx.c
> index 48565db..8e6452a 100644
> --- a/drivers/net/octeontx2/otx2_rx.c
> +++ b/drivers/net/octeontx2/otx2_rx.c
> @@ -130,16 +130,22 @@ nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
>         const uintptr_t desc = rxq->desc;
>         uint8x16_t f0, f1, f2, f3;
>         uint32_t head = rxq->head;
> +       uint16_t pkts_left;
>
>         pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
> +       pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
> +
>         /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */
>         pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
>
>         while (packets < pkts) {
> -               /* Get the CQ pointers, since the ring size is multiple of
> -                * 4, We can avoid checking the wrap around of head
> -                * value after the each access unlike scalar version.
> -                */
> +               /* Exit loop if head is about to wrap and become unaligned */
> +               if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) <
> +                               NIX_DESCS_PER_LOOP) {
> +                       pkts_left += (pkts - packets);
> +                       break;
> +               }
> +
>                 const uintptr_t cq0 = desc + CQE_SZ(head);
>
>                 /* Prefetch N desc ahead */
> @@ -301,6 +307,10 @@ nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
>         /* Free all the CQs that we've processed */
>         otx2_write64((rxq->wdata | packets), rxq->cq_door);
>
> +       if (unlikely(pkts_left))
> +               packets += nix_recv_pkts(rx_queue, &rx_pkts[packets],
> +                                        pkts_left, flags);
> +
>         return packets;
>  }
>
> diff --git a/drivers/net/octeontx2/otx2_tx.c b/drivers/net/octeontx2/otx2_tx.c
> index fa53300..96be92a 100644
> --- a/drivers/net/octeontx2/otx2_tx.c
> +++ b/drivers/net/octeontx2/otx2_tx.c
> @@ -97,7 +97,7 @@ nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
>  #define NIX_DESCS_PER_LOOP     4
>  static __rte_always_inline uint16_t
>  nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> -                    uint16_t pkts, const uint16_t flags)
> +                    uint16_t pkts, uint64_t *cmd, const uint16_t flags)
>  {
>         uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
>         uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
> @@ -118,11 +118,13 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>         uint64x2_t cmd20, cmd21;
>         uint64x2_t cmd30, cmd31;
>         uint64_t lmt_status, i;
> -
> -       pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
> +       uint16_t pkts_left;
>
>         NIX_XMIT_FC_OR_RETURN(txq, pkts);
>
> +       pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
> +       pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
> +
>         /* Reduce the cached count */
>         txq->fc_cache_pkts -= pkts;
>
> @@ -929,17 +931,21 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
>                 } while (lmt_status == 0);
>         }
>
> +       if (unlikely(pkts_left))
> +               pkts += nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd, flags);
> +
>         return pkts;
>  }
>
>  #else
>  static __rte_always_inline uint16_t
>  nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
> -                    uint16_t pkts, const uint16_t flags)
> +                    uint16_t pkts, uint64_t *cmd, const uint16_t flags)
>  {
>         RTE_SET_USED(tx_queue);
>         RTE_SET_USED(tx_pkts);
>         RTE_SET_USED(pkts);
> +       RTE_SET_USED(cmd);
>         RTE_SET_USED(flags);
>         return 0;
>  }
> @@ -985,12 +991,14 @@ static uint16_t __rte_noinline    __hot                                   \
>  otx2_nix_xmit_pkts_vec_ ## name(void *tx_queue,                                \
>                         struct rte_mbuf **tx_pkts, uint16_t pkts)       \
>  {                                                                      \
> +       uint64_t cmd[sz];                                               \
> +                                                                       \
>         /* VLAN, TSTMP, TSO is not supported by vec */                  \
>         if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F ||                     \
>             (flags) & NIX_TX_OFFLOAD_TSTAMP_F ||                        \
>             (flags) & NIX_TX_OFFLOAD_TSO_F)                             \
>                 return 0;                                               \
> -       return nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, (flags));  \
> +       return nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, (flags)); \
>  }
>
>  NIX_TX_FASTPATH_MODES
> --
> 2.8.4
>


More information about the dev mailing list