[dpdk-dev] [PATCH v5 01/17] eal: add max SIMD bitwidth

Honnappa Nagarahalli Honnappa.Nagarahalli at arm.com
Wed Oct 14 16:19:00 CEST 2020


<snip>

> >
> > This patch adds a max SIMD bitwidth EAL configuration. The API allows
> > for an app to set this value. It can also be set using EAL argument
> > --force-max-simd- bitwidth, which will lock the value and override any
> > modifications made by the app.
> >
> > Each arch has a define for the default SIMD bitwidth value, this is
> > used on EAL init to set the config max SIMD bitwidth.
> >
> > Cc: Ruifeng Wang <ruifeng.wang at arm.com>
> > Cc: Jerin Jacob <jerinj at marvell.com>
> > Cc: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
> > Cc: David Christensen <drc at linux.vnet.ibm.com>
> >
> > Signed-off-by: Ciara Power <ciara.power at intel.com>
> >
> > ---
> > v4:
> >   - Used RTE_SIMD_MAX instead of UINT16_MAX.
> >   - Renamed enums to better reflect usage.
> >   - Added functions to windows symbol export file.
> >   - Modified Doxygen comments.
> >   - Modified enum name.
> >   - Changed RTE_SIMD_MAX value to a power of 2.
> >   - Merged patch 2 into this patch.
> >   - Enum now used for default value defines.
> >   - Fixed some small comments on v3.
> > v3:
> >   - Added enum value to essentially disable using max SIMD to choose
> >     paths, intended for use by ARM SVE.
> >   - Fixed parsing bitwidth argument to return an error for values
> >     greater than uint16_t.
> >   - Removed unnecessary define in generic rte_vect.h
> >   - Changed default bitwidth for ARM to UINT16_MAX, to allow for SVE.
> > v2:
> >   - Added to Doxygen comment for API.
> >   - Changed default bitwidth for Arm to 128.
> > ---
> >  lib/librte_eal/arm/include/rte_vect.h      |  2 +
> >  lib/librte_eal/common/eal_common_options.c | 66
> > ++++++++++++++++++++++
> >  lib/librte_eal/common/eal_internal_cfg.h   |  8 +++
> >  lib/librte_eal/common/eal_options.h        |  2 +
> >  lib/librte_eal/include/rte_eal.h           | 40 +++++++++++++
> >  lib/librte_eal/ppc/include/rte_vect.h      |  2 +
> >  lib/librte_eal/rte_eal_exports.def         |  2 +
> >  lib/librte_eal/rte_eal_version.map         |  2 +
> >  lib/librte_eal/x86/include/rte_vect.h      |  2 +
> >  9 files changed, 126 insertions(+)
> >
> > diff --git a/lib/librte_eal/arm/include/rte_vect.h
> > b/lib/librte_eal/arm/include/rte_vect.h
> > index 01c51712a1..f53c89be97 100644
> > --- a/lib/librte_eal/arm/include/rte_vect.h
> > +++ b/lib/librte_eal/arm/include/rte_vect.h
> > @@ -14,6 +14,8 @@
> >  extern "C" {
> >  #endif
> >
> > +#define RTE_DEFAULT_SIMD_BITWIDTH RTE_SIMD_MAX
> > +
> >  typedef int32x4_t xmm_t;
> >
> >  #defineXMM_SIZE(sizeof(xmm_t))
> > diff --git a/lib/librte_eal/common/eal_common_options.c
> > b/lib/librte_eal/common/eal_common_options.c
> > index a5426e1234..8c79f1b2fc 100644
> > --- a/lib/librte_eal/common/eal_common_options.c
> > +++ b/lib/librte_eal/common/eal_common_options.c
> > @@ -35,6 +35,7 @@
> >  #ifndef RTE_EXEC_ENV_WINDOWS
> >  #include <rte_telemetry.h>
> >  #endif
> > +#include <rte_vect.h>
> >
> >  #include "eal_internal_cfg.h"
> >  #include "eal_options.h"
> > @@ -102,6 +103,7 @@ eal_long_options[] =
> {  {OPT_MATCH_ALLOCATIONS, 0,
> > NULL, OPT_MATCH_ALLOCATIONS_NUM},
> >  {OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
> >  {OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
> > +{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL,
> > +OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
> >  {0,                     0, NULL, 0                        }
> >  };
> >
> > @@ -343,6 +345,8 @@ eal_reset_internal_config(struct internal_config
> > *internal_cfg)
> >  internal_cfg->user_mbuf_pool_ops_name = NULL;
> > CPU_ZERO(&internal_cfg->ctrl_cpuset);
> >  internal_cfg->init_complete = 0;
> > +internal_cfg->max_simd_bitwidth.bitwidth =
> > RTE_DEFAULT_SIMD_BITWIDTH;
> > +internal_cfg->max_simd_bitwidth.forced = 0;
> >  }
> >
> >  static int
> > @@ -1309,6 +1313,34 @@ eal_parse_iova_mode(const char *name)
> return
> > 0;  }
> >
> > +static int
> > +eal_parse_simd_bitwidth(const char *arg) { char *end; unsigned long
> > +bitwidth; int ret; struct internal_config *internal_conf =
> > +eal_get_internal_configuration();
> > +
> > +if (arg == NULL || arg[0] == '\0')
> > +return -1;
> > +
> > +errno = 0;
> > +bitwidth = strtoul(arg, &end, 0);
> > +
> > +/* check for errors */
> > +if (errno != 0 || end == NULL || *end != '\0' || bitwidth >
> > RTE_SIMD_MAX)
> > +return -1;
> > +
> > +if (bitwidth == 0)
> > +bitwidth = (unsigned long) RTE_SIMD_MAX; ret =
> > +rte_set_max_simd_bitwidth(bitwidth);
> > +if (ret < 0)
> > +return -1;
> > +internal_conf->max_simd_bitwidth.forced = 1; return 0; }
> > +
> >  static int
> >  eal_parse_base_virtaddr(const char *arg)  { @@ -1707,6 +1739,13 @@
> > eal_parse_common_option(int opt, const char *optarg,  case
> > OPT_NO_TELEMETRY_NUM:
> >  conf->no_telemetry = 1;
> >  break;
> > +case OPT_FORCE_MAX_SIMD_BITWIDTH_NUM:
> > +if (eal_parse_simd_bitwidth(optarg) < 0) { RTE_LOG(ERR, EAL, "invalid
> > +parameter for --"
> > +OPT_FORCE_MAX_SIMD_BITWIDTH
> > "\n");
> > +return -1;
> > +}
> > +break;
> >
> >  /* don't know what to do, leave this to caller */
> >  default:
> > @@ -1903,6 +1942,32 @@ eal_check_common_options(struct
> internal_config
> > *internal_cfg)  return 0;  }
> >
> > +uint16_t
> > +rte_get_max_simd_bitwidth(void)
> > +{
> > +const struct internal_config *internal_conf =
> > +eal_get_internal_configuration();
> > +return internal_conf->max_simd_bitwidth.bitwidth;
> > +}
> > +
> > +int
> > +rte_set_max_simd_bitwidth(uint16_t bitwidth) { struct internal_config
> > +*internal_conf = eal_get_internal_configuration(); if
> > +(internal_conf->max_simd_bitwidth.forced) { RTE_LOG(NOTICE, EAL,
> > +"Cannot set max SIMD bitwidth - user
> > runtime override enabled");
> > +return -EPERM;
> > +}
> > +
> > +if (bitwidth < RTE_SIMD_DISABLED || !rte_is_power_of_2(bitwidth))
> > {
> > +RTE_LOG(ERR, EAL, "Invalid bitwidth value!\n"); return -EINVAL; }
> > +internal_conf->max_simd_bitwidth.bitwidth = bitwidth; return 0; }
> > +
> >  void
> >  eal_common_usage(void)
> >  {
> > @@ -1981,6 +2046,7 @@ eal_common_usage(void)
> >         "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
> >         "  --"OPT_TELEMETRY"   Enable telemetry support (on by
> > default)\n"
> >         "  --"OPT_NO_TELEMETRY"   Disable telemetry support\n"
> > +       "  --"OPT_FORCE_MAX_SIMD_BITWIDTH" Force the max SIMD
> > bitwidth\n"
> >         "\nEAL options for DEBUG use only:\n"
> >         "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
> >         "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
> > diff --git a/lib/librte_eal/common/eal_internal_cfg.h
> > b/lib/librte_eal/common/eal_internal_cfg.h
> > index 13f93388a7..0c880cbe17 100644
> > --- a/lib/librte_eal/common/eal_internal_cfg.h
> > +++ b/lib/librte_eal/common/eal_internal_cfg.h
> > @@ -33,6 +33,12 @@ struct hugepage_info {
> >  int lock_descriptor;    /**< file descriptor for hugepage dir */
> >  };
> >
> > +struct simd_bitwidth {
> > +bool forced;
> > +/**< flag indicating if bitwidth is forced and can't be modified */
> > +uint16_t bitwidth; /**< bitwidth value */ };
> > +
> >  /**
> >   * internal configuration
> >   */
> > @@ -85,6 +91,8 @@ struct internal_config {  volatile unsigned int
> > init_complete;  /**< indicates whether EAL has completed
> > initialization */  unsigned int no_telemetry; /**< true to disable
> > Telemetry */
> > +struct simd_bitwidth max_simd_bitwidth; /**< max simd bitwidth path
> > +to use */
> >  };
> >
> >  void eal_reset_internal_config(struct internal_config *internal_cfg);
> > diff --git a/lib/librte_eal/common/eal_options.h
> > b/lib/librte_eal/common/eal_options.h
> > index 89769d48b4..ef33979664 100644
> > --- a/lib/librte_eal/common/eal_options.h
> > +++ b/lib/librte_eal/common/eal_options.h
> > @@ -85,6 +85,8 @@ enum {
> >  OPT_TELEMETRY_NUM,
> >  #define OPT_NO_TELEMETRY      "no-telemetry"
> >  OPT_NO_TELEMETRY_NUM,
> > +#define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
> > +OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
> >  OPT_LONG_MAX_NUM
> >  };
> >
> > diff --git a/lib/librte_eal/include/rte_eal.h
> > b/lib/librte_eal/include/rte_eal.h
> > index e3c2ef185e..706d3cca5a 100644
> > --- a/lib/librte_eal/include/rte_eal.h
> > +++ b/lib/librte_eal/include/rte_eal.h
> > @@ -43,6 +43,23 @@ enum rte_proc_type_t {  RTE_PROC_INVALID  };
> >
> > +/**
> > + * The max SIMD bitwidth value to limit vector path selection.
> > + */
> > +enum rte_max_simd {
> > +RTE_SIMD_DISABLED = 64,
> > +/**< Limits path selection to scalar, disables all vector paths. */
> > +RTE_SIMD_128 = 128,
> > +/**< Limits path selection to SSE/NEON/Altivec or below. */
> > +RTE_SIMD_256 = 256, /**< Limits path selection to AVX2 or below. */
> > +RTE_SIMD_512 = 512, /**< Limits path selection to AVX512 or below.
> > */
> > +RTE_SIMD_MAX = INT16_MAX + 1,
> > +/**<
> > + * Disables limiting by max SIMD bitwidth, allows all suitable paths.
> > + * This value is used as it is a large number and a power of 2.
> > + */
> > +};
> > +
> >  /**
> >   * Get the process type in a multi-process setup
> >   *
> > @@ -51,6 +68,29 @@ enum rte_proc_type_t {
> >   */
> >  enum rte_proc_type_t rte_eal_process_type(void);
> >
> > +/**
> > + * Get the supported SIMD bitwidth.
> > + *
> > + * @return
> > + *   uint16_t bitwidth.
> > + */
> > +__rte_experimental
> > +uint16_t rte_get_max_simd_bitwidth(void);
> > +
> > +/**
> > + * Set the supported SIMD bitwidth.
> > + * This API should only be called once at initialization, before EAL init.
> > + *
> > + * @param bitwidth
> > + *   uint16_t bitwidth.
> > + * @return
> > + *   - 0 on success.
> > + *   - -EINVAL on invalid bitwidth parameter.
> > + *   - -EPERM if bitwidth is forced.
> > + */
> > +__rte_experimental
> > +int rte_set_max_simd_bitwidth(uint16_t bitwidth);
> > +
> >  /**
> >   * Request iopl privilege for all RPL.
> >   *
> > diff --git a/lib/librte_eal/ppc/include/rte_vect.h
> > b/lib/librte_eal/ppc/include/rte_vect.h
> > index b0545c878c..a69aabc568 100644
> > --- a/lib/librte_eal/ppc/include/rte_vect.h
> > +++ b/lib/librte_eal/ppc/include/rte_vect.h
> > @@ -15,6 +15,8 @@
> >  extern "C" {
> >  #endif
> >
> > +#define RTE_DEFAULT_SIMD_BITWIDTH RTE_SIMD_256
> > +
> >  typedef vector signed int xmm_t;
> >
> >  #defineXMM_SIZE(sizeof(xmm_t))
> > diff --git a/lib/librte_eal/rte_eal_exports.def
> > b/lib/librte_eal/rte_eal_exports.def
> > index 7b35beb702..81e99b00d9 100644
> > --- a/lib/librte_eal/rte_eal_exports.def
> > +++ b/lib/librte_eal/rte_eal_exports.def
> > @@ -26,6 +26,7 @@ EXPORTS
> >  rte_eal_tailq_register
> >  rte_eal_using_phys_addrs
> >  rte_free
> > +rte_get_max_simd_bitwidth
> >  rte_get_tsc_hz
> >  rte_hexdump
> >  rte_intr_rx_ctl
> > @@ -62,6 +63,7 @@ EXPORTS
> >  rte_memzone_reserve_aligned
> >  rte_memzone_reserve_bounded
> >  rte_memzone_walk
> > +rte_set_max_simd_bitwidth
> >  rte_socket_id
> >  rte_strerror
> >  rte_strsplit
> > diff --git a/lib/librte_eal/rte_eal_version.map
> > b/lib/librte_eal/rte_eal_version.map
> > index a93dea9fe6..714be49377 100644
> > --- a/lib/librte_eal/rte_eal_version.map
> > +++ b/lib/librte_eal/rte_eal_version.map
> > @@ -400,6 +400,8 @@ EXPERIMENTAL {
> >  # added in 20.11
> >  __rte_eal_trace_generic_size_t;
> >  rte_service_lcore_may_be_active;
> > +rte_get_max_simd_bitwidth;
> > +rte_set_max_simd_bitwidth;
> >  };
> >
> >  INTERNAL {
> > diff --git a/lib/librte_eal/x86/include/rte_vect.h
> > b/lib/librte_eal/x86/include/rte_vect.h
> > index df5a607623..a00d3d5a62 100644
> > --- a/lib/librte_eal/x86/include/rte_vect.h
> > +++ b/lib/librte_eal/x86/include/rte_vect.h
> > @@ -35,6 +35,8 @@
> >  extern "C" {
> >  #endif
> >
> > +#define RTE_DEFAULT_SIMD_BITWIDTH RTE_SIMD_256
> > +
> >  typedef __m128i xmm_t;
> >
> >  #defineXMM_SIZE(sizeof(xmm_t))
> > --
> > 2.22.0
> Arm SVE may not perfectly fit into this. Because SIMD bitwidth is unknown /
> unconcerned by application.
> I think vector path will be taken when max SIMD bitwidth is not set to
> RTE_SIMD_DISABLED.
SVE code will be independent of the width. User either chooses 128b (for NEON) or SVE. Internally in DPDK, Arm code will choose SVE when width is set to max.
The default config for Arm is set to SVE (instead of NEON) as the performance will be equivalent to NEON when the SVE implementation is 128b wide.

> 
> Reviewed-by: Ruifeng Wang <ruifeng.wang at arm.com>



More information about the dev mailing list