[PATCH v7 15/21] net/cpfl: add AVX512 data path for single queue model
    Mingxia Liu 
    mingxia.liu at intel.com
       
    Thu Feb 16 01:30:04 CET 2023
    
    
  
Add support of AVX512 vector data path for single queue model.
Signed-off-by: Wenjun Wu <wenjun1.wu at intel.com>
Signed-off-by: Mingxia Liu <mingxia.liu at intel.com>
---
 doc/guides/nics/cpfl.rst                |  24 +++++-
 drivers/net/cpfl/cpfl_ethdev.c          |   3 +-
 drivers/net/cpfl/cpfl_rxtx.c            |  94 ++++++++++++++++++++++
 drivers/net/cpfl/cpfl_rxtx_vec_common.h | 100 ++++++++++++++++++++++++
 drivers/net/cpfl/meson.build            |  25 +++++-
 5 files changed, 243 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/cpfl/cpfl_rxtx_vec_common.h
diff --git a/doc/guides/nics/cpfl.rst b/doc/guides/nics/cpfl.rst
index 7c5aff0789..f0018b41df 100644
--- a/doc/guides/nics/cpfl.rst
+++ b/doc/guides/nics/cpfl.rst
@@ -63,4 +63,26 @@ Runtime Config Options
 Driver compilation and testing
 ------------------------------
 
-Refer to the document :doc:`build_and_test` for details.
\ No newline at end of file
+Refer to the document :doc:`build_and_test` for details.
+
+Features
+--------
+
+Vector PMD
+~~~~~~~~~~
+
+Vector path for Rx and Tx path are selected automatically.
+The paths are chosen based on 2 conditions:
+
+- ``CPU``
+
+  On the x86 platform, the driver checks if the CPU supports AVX512.
+  If the CPU supports AVX512 and EAL argument ``--force-max-simd-bitwidth``
+  is set to 512, AVX512 paths will be chosen.
+
+- ``Offload features``
+
+  The supported HW offload features are described in the document cpfl.ini,
+  A value "P" means the offload feature is not supported by vector path.
+  If any not supported features are used, cpfl vector PMD is disabled
+  and the scalar paths are chosen.
diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c
index fda945d34b..346af055cf 100644
--- a/drivers/net/cpfl/cpfl_ethdev.c
+++ b/drivers/net/cpfl/cpfl_ethdev.c
@@ -111,7 +111,8 @@ cpfl_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 		RTE_ETH_TX_OFFLOAD_TCP_CKSUM		|
 		RTE_ETH_TX_OFFLOAD_SCTP_CKSUM		|
 		RTE_ETH_TX_OFFLOAD_TCP_TSO		|
-		RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
+		RTE_ETH_TX_OFFLOAD_MULTI_SEGS		|
+		RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
 
 	dev_info->default_txconf = (struct rte_eth_txconf) {
 		.tx_free_thresh = CPFL_DEFAULT_TX_FREE_THRESH,
diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c
index c250642719..cb7bbddb16 100644
--- a/drivers/net/cpfl/cpfl_rxtx.c
+++ b/drivers/net/cpfl/cpfl_rxtx.c
@@ -8,6 +8,7 @@
 
 #include "cpfl_ethdev.h"
 #include "cpfl_rxtx.h"
+#include "cpfl_rxtx_vec_common.h"
 
 static uint64_t
 cpfl_rx_offload_convert(uint64_t offload)
@@ -735,11 +736,61 @@ cpfl_stop_queues(struct rte_eth_dev *dev)
 	}
 }
 
+
 void
 cpfl_set_rx_function(struct rte_eth_dev *dev)
 {
 	struct idpf_vport *vport = dev->data->dev_private;
+#ifdef RTE_ARCH_X86
+	struct idpf_rx_queue *rxq;
+	int i;
+
+	if (cpfl_rx_vec_dev_check_default(dev) == CPFL_VECTOR_PATH &&
+	    rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
+		vport->rx_vec_allowed = true;
 
+		if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)
+#ifdef CC_AVX512_SUPPORT
+			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&
+			    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1)
+				vport->rx_use_avx512 = true;
+#else
+		PMD_DRV_LOG(NOTICE,
+			    "AVX512 is not supported in build env");
+#endif /* CC_AVX512_SUPPORT */
+	} else {
+		vport->rx_vec_allowed = false;
+	}
+#endif /* RTE_ARCH_X86 */
+
+#ifdef RTE_ARCH_X86
+	if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
+		PMD_DRV_LOG(NOTICE,
+			    "Using Split Scalar Rx (port %d).",
+			    dev->data->port_id);
+		dev->rx_pkt_burst = idpf_dp_splitq_recv_pkts;
+	} else {
+		if (vport->rx_vec_allowed) {
+			for (i = 0; i < dev->data->nb_rx_queues; i++) {
+				rxq = dev->data->rx_queues[i];
+				(void)idpf_qc_singleq_rx_vec_setup(rxq);
+			}
+#ifdef CC_AVX512_SUPPORT
+			if (vport->rx_use_avx512) {
+				PMD_DRV_LOG(NOTICE,
+					    "Using Single AVX512 Vector Rx (port %d).",
+					    dev->data->port_id);
+				dev->rx_pkt_burst = idpf_dp_singleq_recv_pkts_avx512;
+				return;
+			}
+#endif /* CC_AVX512_SUPPORT */
+		}
+		PMD_DRV_LOG(NOTICE,
+			    "Using Single Scalar Rx (port %d).",
+			    dev->data->port_id);
+		dev->rx_pkt_burst = idpf_dp_singleq_recv_pkts;
+	}
+#else
 	if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
 		PMD_DRV_LOG(NOTICE,
 			    "Using Split Scalar Rx (port %d).",
@@ -751,12 +802,35 @@ cpfl_set_rx_function(struct rte_eth_dev *dev)
 			    dev->data->port_id);
 		dev->rx_pkt_burst = idpf_dp_singleq_recv_pkts;
 	}
+#endif /* RTE_ARCH_X86 */
 }
 
 void
 cpfl_set_tx_function(struct rte_eth_dev *dev)
 {
 	struct idpf_vport *vport = dev->data->dev_private;
+#ifdef RTE_ARCH_X86
+#ifdef CC_AVX512_SUPPORT
+	struct idpf_tx_queue *txq;
+	int i;
+#endif /* CC_AVX512_SUPPORT */
+
+	if (cpfl_tx_vec_dev_check_default(dev) == CPFL_VECTOR_PATH &&
+	    rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
+		vport->tx_vec_allowed = true;
+		if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)
+#ifdef CC_AVX512_SUPPORT
+			if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&
+			    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1)
+				vport->tx_use_avx512 = true;
+#else
+		PMD_DRV_LOG(NOTICE,
+			    "AVX512 is not supported in build env");
+#endif /* CC_AVX512_SUPPORT */
+	} else {
+		vport->tx_vec_allowed = false;
+	}
+#endif /* RTE_ARCH_X86 */
 
 	if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
 		PMD_DRV_LOG(NOTICE,
@@ -765,6 +839,26 @@ cpfl_set_tx_function(struct rte_eth_dev *dev)
 		dev->tx_pkt_burst = idpf_dp_splitq_xmit_pkts;
 		dev->tx_pkt_prepare = idpf_dp_prep_pkts;
 	} else {
+#ifdef RTE_ARCH_X86
+		if (vport->tx_vec_allowed) {
+#ifdef CC_AVX512_SUPPORT
+			if (vport->tx_use_avx512) {
+				for (i = 0; i < dev->data->nb_tx_queues; i++) {
+					txq = dev->data->tx_queues[i];
+					if (txq == NULL)
+						continue;
+					idpf_qc_tx_vec_avx512_setup(txq);
+				}
+				PMD_DRV_LOG(NOTICE,
+					    "Using Single AVX512 Vector Tx (port %d).",
+					    dev->data->port_id);
+				dev->tx_pkt_burst = idpf_dp_singleq_xmit_pkts_avx512;
+				dev->tx_pkt_prepare = idpf_dp_prep_pkts;
+				return;
+			}
+#endif /* CC_AVX512_SUPPORT */
+		}
+#endif /* RTE_ARCH_X86 */
 		PMD_DRV_LOG(NOTICE,
 			    "Using Single Scalar Tx (port %d).",
 			    dev->data->port_id);
diff --git a/drivers/net/cpfl/cpfl_rxtx_vec_common.h b/drivers/net/cpfl/cpfl_rxtx_vec_common.h
new file mode 100644
index 0000000000..2d4c6a0ef3
--- /dev/null
+++ b/drivers/net/cpfl/cpfl_rxtx_vec_common.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Intel Corporation
+ */
+
+#ifndef _CPFL_RXTX_VEC_COMMON_H_
+#define _CPFL_RXTX_VEC_COMMON_H_
+#include <stdint.h>
+#include <ethdev_driver.h>
+#include <rte_malloc.h>
+
+#include "cpfl_ethdev.h"
+#include "cpfl_rxtx.h"
+
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
+#define CPFL_SCALAR_PATH		0
+#define CPFL_VECTOR_PATH		1
+#define CPFL_RX_NO_VECTOR_FLAGS (		\
+		RTE_ETH_RX_OFFLOAD_IPV4_CKSUM |	\
+		RTE_ETH_RX_OFFLOAD_UDP_CKSUM |	\
+		RTE_ETH_RX_OFFLOAD_TCP_CKSUM |	\
+		RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM |	\
+		RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+#define CPFL_TX_NO_VECTOR_FLAGS (		\
+		RTE_ETH_TX_OFFLOAD_TCP_TSO |	\
+		RTE_ETH_TX_OFFLOAD_MULTI_SEGS)
+
+static inline int
+cpfl_rx_vec_queue_default(struct idpf_rx_queue *rxq)
+{
+	if (rxq == NULL)
+		return CPFL_SCALAR_PATH;
+
+	if (rte_is_power_of_2(rxq->nb_rx_desc) == 0)
+		return CPFL_SCALAR_PATH;
+
+	if (rxq->rx_free_thresh < IDPF_VPMD_RX_MAX_BURST)
+		return CPFL_SCALAR_PATH;
+
+	if ((rxq->nb_rx_desc % rxq->rx_free_thresh) != 0)
+		return CPFL_SCALAR_PATH;
+
+	if ((rxq->offloads & CPFL_RX_NO_VECTOR_FLAGS) != 0)
+		return CPFL_SCALAR_PATH;
+
+	return CPFL_VECTOR_PATH;
+}
+
+static inline int
+cpfl_tx_vec_queue_default(struct idpf_tx_queue *txq)
+{
+	if (txq == NULL)
+		return CPFL_SCALAR_PATH;
+
+	if (txq->rs_thresh < IDPF_VPMD_TX_MAX_BURST ||
+	    (txq->rs_thresh & 3) != 0)
+		return CPFL_SCALAR_PATH;
+
+	if ((txq->offloads & CPFL_TX_NO_VECTOR_FLAGS) != 0)
+		return CPFL_SCALAR_PATH;
+
+	return CPFL_VECTOR_PATH;
+}
+
+static inline int
+cpfl_rx_vec_dev_check_default(struct rte_eth_dev *dev)
+{
+	struct idpf_rx_queue *rxq;
+	int i, ret = 0;
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		rxq = dev->data->rx_queues[i];
+		ret = (cpfl_rx_vec_queue_default(rxq));
+		if (ret == CPFL_SCALAR_PATH)
+			return CPFL_SCALAR_PATH;
+	}
+
+	return CPFL_VECTOR_PATH;
+}
+
+static inline int
+cpfl_tx_vec_dev_check_default(struct rte_eth_dev *dev)
+{
+	int i;
+	struct idpf_tx_queue *txq;
+	int ret = 0;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		txq = dev->data->tx_queues[i];
+		ret = cpfl_tx_vec_queue_default(txq);
+		if (ret == CPFL_SCALAR_PATH)
+			return CPFL_SCALAR_PATH;
+	}
+
+	return CPFL_VECTOR_PATH;
+}
+
+#endif /*_CPFL_RXTX_VEC_COMMON_H_*/
diff --git a/drivers/net/cpfl/meson.build b/drivers/net/cpfl/meson.build
index 1894423689..fbe6500826 100644
--- a/drivers/net/cpfl/meson.build
+++ b/drivers/net/cpfl/meson.build
@@ -7,9 +7,32 @@ if is_windows
     subdir_done()
 endif
 
+if dpdk_conf.get('RTE_IOVA_AS_PA') == 0
+    build = false
+    reason = 'driver does not support disabling IOVA as PA mode'
+    subdir_done()
+endif
+
 deps += ['common_idpf']
 
 sources = files(
         'cpfl_ethdev.c',
         'cpfl_rxtx.c',
-)
\ No newline at end of file
+)
+
+if arch_subdir == 'x86'
+    cpfl_avx512_cpu_support = (
+        cc.get_define('__AVX512F__', args: machine_args) != '' and
+        cc.get_define('__AVX512BW__', args: machine_args) != ''
+    )
+
+    cpfl_avx512_cc_support = (
+        not machine_args.contains('-mno-avx512f') and
+        cc.has_argument('-mavx512f') and
+        cc.has_argument('-mavx512bw')
+    )
+
+    if cpfl_avx512_cpu_support == true or cpfl_avx512_cc_support == true
+        cflags += ['-DCC_AVX512_SUPPORT']
+    endif
+endif
-- 
2.25.1
    
    
More information about the dev
mailing list