[PATCH v2 6/6] drivers: use bitops API instead of compiler builtins
David Marchand
david.marchand at redhat.com
Fri Oct 25 09:04:23 CEST 2024
Stop using directly __builtin_ bit operations,
prefer existing DPDK wrappers.
Note: this is a brute sed all over drivers (skipping base drivers)
for __builtin_* that have a direct replacement in EAL bitops.
There is more work to do, like adding some missing macros inspired from
kernel (FIELD_*) macros but this is left for later.
Signed-off-by: David Marchand <david.marchand at redhat.com>
---
drivers/common/nfp/nfp_platform.h | 4 +++-
drivers/dma/hisilicon/hisi_dmadev.h | 3 ++-
drivers/ml/cnxk/cn10k_ml_ocm.c | 7 ++++---
drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 4 ++--
drivers/net/bnxt/tf_ulp/ulp_flow_db.c | 6 ++++--
drivers/net/bnxt/tf_ulp/ulp_gen_hash.c | 4 +++-
drivers/net/bonding/rte_eth_bond_pmd.c | 3 ++-
drivers/net/cpfl/cpfl_flow_engine_fxp.c | 5 ++++-
drivers/net/enetfec/enet_ethdev.c | 5 +++--
drivers/net/enetfec/enet_ethdev.h | 6 ------
drivers/net/hns3/hns3_rxtx_vec_neon.h | 4 +++-
drivers/net/i40e/i40e_rxtx_vec_neon.c | 4 +++-
drivers/net/iavf/iavf_rxtx_vec_neon.c | 4 +++-
drivers/net/mlx5/hws/mlx5dr_definer.c | 8 +++++---
drivers/net/mlx5/mlx5_flow_dv.c | 3 ++-
drivers/net/mlx5/mlx5_rxtx_vec_neon.h | 12 ++++++------
drivers/net/mlx5/mlx5_tx.c | 2 +-
17 files changed, 50 insertions(+), 34 deletions(-)
diff --git a/drivers/common/nfp/nfp_platform.h b/drivers/common/nfp/nfp_platform.h
index 1687942e41..0b02fcf1e8 100644
--- a/drivers/common/nfp/nfp_platform.h
+++ b/drivers/common/nfp/nfp_platform.h
@@ -8,6 +8,8 @@
#include <stdint.h>
+#include <rte_bitops.h>
+
#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
#define DMA_BIT_MASK(n) ((1ULL << (n)) - 1)
@@ -21,7 +23,7 @@
#define GENMASK_ULL(h, l) \
((~0ULL << (l)) & (~0ULL >> (BITS_PER_LONG_LONG - (h) - 1)))
-#define __bf_shf(x) (__builtin_ffsll(x) - 1)
+#define __bf_shf(x) rte_bsf64(x)
#define FIELD_GET(_mask, _reg) \
(__extension__ ({ \
diff --git a/drivers/dma/hisilicon/hisi_dmadev.h b/drivers/dma/hisilicon/hisi_dmadev.h
index a57b5c759a..786fe3cc0e 100644
--- a/drivers/dma/hisilicon/hisi_dmadev.h
+++ b/drivers/dma/hisilicon/hisi_dmadev.h
@@ -5,6 +5,7 @@
#ifndef HISI_DMADEV_H
#define HISI_DMADEV_H
+#include <rte_bitops.h>
#include <rte_byteorder.h>
#include <rte_common.h>
#include <rte_memzone.h>
@@ -14,7 +15,7 @@
#define BITS_PER_LONG (__SIZEOF_LONG__ * 8)
#define GENMASK(h, l) \
(((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
-#define BF_SHF(x) (__builtin_ffsll(x) - 1)
+#define BF_SHF(x) rte_bsf64(x)
#define FIELD_GET(mask, reg) \
((typeof(mask))(((reg) & (mask)) >> BF_SHF(mask)))
diff --git a/drivers/ml/cnxk/cn10k_ml_ocm.c b/drivers/ml/cnxk/cn10k_ml_ocm.c
index 749ddeb344..0032fe82da 100644
--- a/drivers/ml/cnxk/cn10k_ml_ocm.c
+++ b/drivers/ml/cnxk/cn10k_ml_ocm.c
@@ -2,6 +2,7 @@
* Copyright (c) 2022 Marvell.
*/
+#include <rte_bitops.h>
#include <rte_mldev_pmd.h>
#include <roc_api.h>
@@ -203,11 +204,11 @@ cn10k_ml_ocm_tilecount(uint64_t tilemask, int *start, int *end)
PLT_ASSERT(tilemask != 0);
- *start = __builtin_ctzl(tilemask);
- *end = 64 - __builtin_clzl(tilemask) - 1;
+ *start = rte_ctz64(tilemask);
+ *end = 64 - rte_clz64(tilemask) - 1;
count = *end - *start + 1;
- PLT_ASSERT(count == __builtin_popcountl(tilemask));
+ PLT_ASSERT(count == rte_popcount64(tilemask));
return count;
}
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
index 355d41bbd3..840b21cef9 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
@@ -4,7 +4,7 @@
#include <inttypes.h>
#include <stdbool.h>
-#include <rte_bitmap.h>
+#include <rte_bitops.h>
#include <rte_byteorder.h>
#include <rte_malloc.h>
#include <rte_memory.h>
@@ -290,7 +290,7 @@ recv_burst_vec_neon(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
if (valid == 0)
num_valid = 4;
else
- num_valid = __builtin_ctzl(valid) / 16;
+ num_valid = rte_ctz64(valid) / 16;
if (num_valid == 0)
break;
diff --git a/drivers/net/bnxt/tf_ulp/ulp_flow_db.c b/drivers/net/bnxt/tf_ulp/ulp_flow_db.c
index 2e6ea43ac1..aac974a970 100644
--- a/drivers/net/bnxt/tf_ulp/ulp_flow_db.c
+++ b/drivers/net/bnxt/tf_ulp/ulp_flow_db.c
@@ -3,7 +3,9 @@
* All rights reserved.
*/
+#include <rte_bitops.h>
#include <rte_malloc.h>
+
#include "bnxt.h"
#include "bnxt_tf_common.h"
#include "ulp_utils.h"
@@ -938,7 +940,7 @@ ulp_flow_db_next_entry_get(struct bnxt_ulp_flow_db *flow_db,
*/
if (s_idx == idx)
bs &= (-1UL >> mod_fid);
- lfid = (idx * ULP_INDEX_BITMAP_SIZE) + __builtin_clzl(bs);
+ lfid = (idx * ULP_INDEX_BITMAP_SIZE) + rte_clz64(bs);
if (*fid >= lfid) {
BNXT_TF_DBG(ERR, "Flow Database is corrupt\n");
return -ENOENT;
@@ -1480,7 +1482,7 @@ ulp_flow_db_parent_child_flow_next_entry_get(struct bnxt_ulp_flow_db *flow_db,
*/
if (s_idx == idx)
bs &= (-1UL >> mod_fid);
- next_fid = (idx * ULP_INDEX_BITMAP_SIZE) + __builtin_clzl(bs);
+ next_fid = (idx * ULP_INDEX_BITMAP_SIZE) + rte_clz64(bs);
if (*child_fid >= next_fid) {
BNXT_TF_DBG(ERR, "Parent Child Database is corrupt\n");
return -ENOENT;
diff --git a/drivers/net/bnxt/tf_ulp/ulp_gen_hash.c b/drivers/net/bnxt/tf_ulp/ulp_gen_hash.c
index d746fbbd4e..9f27b56334 100644
--- a/drivers/net/bnxt/tf_ulp/ulp_gen_hash.c
+++ b/drivers/net/bnxt/tf_ulp/ulp_gen_hash.c
@@ -3,8 +3,10 @@
* All rights reserved.
*/
+#include <rte_bitops.h>
#include <rte_log.h>
#include <rte_malloc.h>
+
#include "bnxt_tf_common.h"
#include "ulp_gen_hash.h"
#include "ulp_utils.h"
@@ -25,7 +27,7 @@ int32_t ulp_bit_alloc_list_alloc(struct bit_alloc_list *blist,
if (idx <= bsize_64) {
if (bentry)
- jdx = __builtin_clzl(~bentry);
+ jdx = rte_clz64(~bentry);
*index = ((idx - 1) * ULP_INDEX_BITMAP_SIZE) + jdx;
ULP_INDEX_BITMAP_SET(blist->bdata[(idx - 1)], jdx);
return 0;
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index cda1c37124..91bf2c2345 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -5,6 +5,7 @@
#include <stdbool.h>
#include <netinet/in.h>
+#include <rte_bitops.h>
#include <rte_mbuf.h>
#include <rte_malloc.h>
#include <ethdev_driver.h>
@@ -3982,7 +3983,7 @@ bond_ethdev_configure(struct rte_eth_dev *dev)
* Two '1' in binary of 'link_speeds': bit0 and a unique
* speed bit.
*/
- if (__builtin_popcountl(link_speeds) != 2) {
+ if (rte_popcount64(link_speeds) != 2) {
RTE_BOND_LOG(ERR, "please set a unique speed.");
return -EINVAL;
}
diff --git a/drivers/net/cpfl/cpfl_flow_engine_fxp.c b/drivers/net/cpfl/cpfl_flow_engine_fxp.c
index 2c75ea6577..0101c30911 100644
--- a/drivers/net/cpfl/cpfl_flow_engine_fxp.c
+++ b/drivers/net/cpfl/cpfl_flow_engine_fxp.c
@@ -10,6 +10,8 @@
#include <unistd.h>
#include <stdarg.h>
#include <math.h>
+
+#include <rte_bitops.h>
#include <rte_debug.h>
#include <rte_ether.h>
#include <rte_log.h>
@@ -20,6 +22,7 @@
#include <rte_flow.h>
#include <rte_bitmap.h>
#include <ethdev_driver.h>
+
#include "cpfl_rules.h"
#include "cpfl_logs.h"
#include "cpfl_ethdev.h"
@@ -608,7 +611,7 @@ cpfl_fxp_mod_idx_alloc(struct cpfl_adapter_ext *ad)
if (!rte_bitmap_scan(ad->mod_bm, &pos, &slab))
return CPFL_MAX_MOD_CONTENT_INDEX;
- pos += __builtin_ffsll(slab) - 1;
+ pos += rte_bsf64(slab);
rte_bitmap_clear(ad->mod_bm, pos);
return pos;
diff --git a/drivers/net/enetfec/enet_ethdev.c b/drivers/net/enetfec/enet_ethdev.c
index 8c7067fbb5..4151d7fca3 100644
--- a/drivers/net/enetfec/enet_ethdev.c
+++ b/drivers/net/enetfec/enet_ethdev.c
@@ -6,6 +6,7 @@
#include <ethdev_vdev.h>
#include <ethdev_driver.h>
+#include <rte_bitops.h>
#include <rte_io.h>
#include "enet_pmd_logs.h"
@@ -374,7 +375,7 @@ enetfec_tx_queue_setup(struct rte_eth_dev *dev,
unsigned int size;
unsigned int dsize = fep->bufdesc_ex ? sizeof(struct bufdesc_ex) :
sizeof(struct bufdesc);
- unsigned int dsize_log2 = fls64(dsize);
+ unsigned int dsize_log2 = rte_fls_u64(dsize);
/* Tx deferred start is not supported */
if (tx_conf->tx_deferred_start) {
@@ -453,7 +454,7 @@ enetfec_rx_queue_setup(struct rte_eth_dev *dev,
unsigned int size;
unsigned int dsize = fep->bufdesc_ex ? sizeof(struct bufdesc_ex) :
sizeof(struct bufdesc);
- unsigned int dsize_log2 = fls64(dsize);
+ unsigned int dsize_log2 = rte_fls_u64(dsize);
/* Rx deferred start is not supported */
if (rx_conf->rx_deferred_start) {
diff --git a/drivers/net/enetfec/enet_ethdev.h b/drivers/net/enetfec/enet_ethdev.h
index 02a3397890..4e196b8552 100644
--- a/drivers/net/enetfec/enet_ethdev.h
+++ b/drivers/net/enetfec/enet_ethdev.h
@@ -125,12 +125,6 @@ bufdesc *enet_get_nextdesc(struct bufdesc *bdp, struct bufdesc_prop *bd)
: (struct bufdesc *)(((uintptr_t)bdp) + bd->d_size);
}
-static inline int
-fls64(unsigned long word)
-{
- return (64 - __builtin_clzl(word)) - 1;
-}
-
static inline struct
bufdesc *enet_get_prevdesc(struct bufdesc *bdp, struct bufdesc_prop *bd)
{
diff --git a/drivers/net/hns3/hns3_rxtx_vec_neon.h b/drivers/net/hns3/hns3_rxtx_vec_neon.h
index 0dc6b9f0a2..bbb5478015 100644
--- a/drivers/net/hns3/hns3_rxtx_vec_neon.h
+++ b/drivers/net/hns3/hns3_rxtx_vec_neon.h
@@ -5,6 +5,8 @@
#ifndef HNS3_RXTX_VEC_NEON_H
#define HNS3_RXTX_VEC_NEON_H
+#include <rte_bitops.h>
+
#include <arm_neon.h>
#pragma GCC diagnostic ignored "-Wcast-qual"
@@ -189,7 +191,7 @@ hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,
if (likely(stat == 0))
bd_valid_num = HNS3_DEFAULT_DESCS_PER_LOOP;
else
- bd_valid_num = __builtin_ctzl(stat) / HNS3_UINT16_BIT;
+ bd_valid_num = rte_ctz64(stat) / HNS3_UINT16_BIT;
if (bd_valid_num == 0)
break;
diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c b/drivers/net/i40e/i40e_rxtx_vec_neon.c
index 3a99137b5e..e1c5c7041b 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@@ -4,7 +4,9 @@
*/
#include <stdint.h>
+
#include <ethdev_driver.h>
+#include <rte_bitops.h>
#include <rte_malloc.h>
#include <rte_vect.h>
@@ -558,7 +560,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
if (unlikely(stat == 0)) {
nb_pkts_recd += RTE_I40E_DESCS_PER_LOOP;
} else {
- nb_pkts_recd += __builtin_ctzl(stat) / I40E_UINT16_BIT;
+ nb_pkts_recd += rte_ctz64(stat) / I40E_UINT16_BIT;
break;
}
}
diff --git a/drivers/net/iavf/iavf_rxtx_vec_neon.c b/drivers/net/iavf/iavf_rxtx_vec_neon.c
index 20b656e899..04be574683 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_neon.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_neon.c
@@ -4,7 +4,9 @@
*/
#include <stdint.h>
+
#include <ethdev_driver.h>
+#include <rte_bitops.h>
#include <rte_malloc.h>
#include <rte_vect.h>
@@ -366,7 +368,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *__rte_restrict rxq,
if (unlikely(stat == 0)) {
nb_pkts_recd += IAVF_VPMD_DESCS_PER_LOOP;
} else {
- nb_pkts_recd += __builtin_ctzl(stat) / IAVF_UINT16_BIT;
+ nb_pkts_recd += rte_ctz64(stat) / IAVF_UINT16_BIT;
break;
}
}
diff --git a/drivers/net/mlx5/hws/mlx5dr_definer.c b/drivers/net/mlx5/hws/mlx5dr_definer.c
index a9fa5d06ed..5c2e889444 100644
--- a/drivers/net/mlx5/hws/mlx5dr_definer.c
+++ b/drivers/net/mlx5/hws/mlx5dr_definer.c
@@ -2,6 +2,8 @@
* Copyright (c) 2022 NVIDIA Corporation & Affiliates
*/
+#include <rte_bitops.h>
+
#include "mlx5dr_internal.h"
#define GTP_PDU_SC 0x85
@@ -1548,7 +1550,7 @@ mlx5dr_definer_conv_item_port(struct mlx5dr_definer_conv_data *cd,
fc->tag_set = &mlx5dr_definer_vport_set;
fc->tag_mask_set = &mlx5dr_definer_ones_set;
DR_CALC_SET_HDR(fc, registers, register_c_0);
- fc->bit_off = __builtin_ctz(caps->wire_regc_mask);
+ fc->bit_off = rte_ctz32(caps->wire_regc_mask);
fc->bit_mask = caps->wire_regc_mask >> fc->bit_off;
fc->dr_ctx = cd->ctx;
} else {
@@ -2666,8 +2668,8 @@ mlx5dr_definer_conv_item_geneve_opt(struct mlx5dr_definer_conv_data *cd,
fc->item_idx = item_idx;
fc->tag_set = &mlx5dr_definer_ones_set;
fc->byte_off = hl_ok_bit->dw_offset * DW_SIZE +
- __builtin_clz(hl_ok_bit->dw_mask) / 8;
- fc->bit_off = __builtin_ctz(hl_ok_bit->dw_mask);
+ rte_clz32(hl_ok_bit->dw_mask) / 8;
+ fc->bit_off = rte_ctz32(hl_ok_bit->dw_mask);
fc->bit_mask = 0x1;
}
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 201e215e4b..040727f2e8 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -8,6 +8,7 @@
#include <string.h>
#include <unistd.h>
+#include <rte_bitops.h>
#include <rte_common.h>
#include <rte_ether.h>
#include <ethdev_driver.h>
@@ -9068,7 +9069,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
!(non_shared_age && count) &&
(attr->group || (attr->transfer && priv->fdb_def_rule)) &&
priv->sh->flow_hit_aso_en);
- if (__builtin_popcountl(aso_mask) > 1)
+ if (rte_popcount64(aso_mask) > 1)
return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
NULL, "unsupported combining AGE, METER, CT ASO actions in a single rule");
/*
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index 510f60b25d..0ce9827ed9 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -11,6 +11,7 @@
#include <stdlib.h>
#include <arm_neon.h>
+#include <rte_bitops.h>
#include <rte_mbuf.h>
#include <rte_mempool.h>
#include <rte_prefetch.h>
@@ -620,7 +621,7 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
/*
* Note that vectors have reverse order - {v3, v2, v1, v0}, because
- * there's no instruction to count trailing zeros. __builtin_clzl() is
+ * there's no instruction to count trailing zeros. rte_clz64() is
* used instead.
*
* A. copy 4 mbuf pointers from elts ring to returning pkts.
@@ -808,13 +809,12 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
/* E.2 mask out invalid entries. */
comp_mask = vbic_u16(comp_mask, invalid_mask);
/* E.3 get the first compressed CQE. */
- comp_idx = __builtin_clzl(vget_lane_u64(vreinterpret_u64_u16(
- comp_mask), 0)) /
- (sizeof(uint16_t) * 8);
+ comp_idx = rte_clz64(vget_lane_u64(vreinterpret_u64_u16(comp_mask), 0)) /
+ (sizeof(uint16_t) * 8);
invalid_mask = vorr_u16(invalid_mask, comp_mask);
/* D.7 count non-compressed valid CQEs. */
- n = __builtin_clzl(vget_lane_u64(vreinterpret_u64_u16(
- invalid_mask), 0)) / (sizeof(uint16_t) * 8);
+ n = rte_clz64(vget_lane_u64(vreinterpret_u64_u16(invalid_mask), 0)) /
+ (sizeof(uint16_t) * 8);
nocmp_n += n;
/*
* D.2 mask out entries after the compressed CQE.
diff --git a/drivers/net/mlx5/mlx5_tx.c b/drivers/net/mlx5/mlx5_tx.c
index 04f80bb9bd..fc105970a3 100644
--- a/drivers/net/mlx5/mlx5_tx.c
+++ b/drivers/net/mlx5/mlx5_tx.c
@@ -619,7 +619,7 @@ mlx5_select_tx_function(struct rte_eth_dev *dev)
* Check whether it has minimal amount
* of not requested offloads.
*/
- tmp = __builtin_popcountl(tmp & ~olx);
+ tmp = rte_popcount64(tmp & ~olx);
if (m >= RTE_DIM(txoff_func) || tmp < diff) {
/* First or better match, save and continue. */
m = i;
--
2.46.2
More information about the dev
mailing list