[dpdk-dev] [PATCH v3 4/5] mlx5: add support for HW packet padding
Adrien Mazarguil
adrien.mazarguil at 6wind.com
Thu Mar 17 16:38:57 CET 2016
From: Olga Shern <olgas at mellanox.com>
Environment variable MLX5_PMD_ENABLE_PADDING enables HW packet padding
in PCI bus transactions.
When packet size is cache aligned and CRC stripping is enabled, 4 fewer
bytes are written to the PCI bus. Enabling padding makes such packets
aligned again.
In cases where PCI bandwidth is the bottleneck, padding can improve
performance by 10%.
This is disabled by default since this can also decrease performance for
unaligned packet sizes.
Signed-off-by: Olga Shern <olgas at mellanox.com>
---
doc/guides/nics/mlx5.rst | 14 ++++++++++++++
doc/guides/rel_notes/release_16_04.rst | 7 +++++++
drivers/net/mlx5/Makefile | 5 +++++
drivers/net/mlx5/mlx5.c | 28 ++++++++++++++++++++++++++++
drivers/net/mlx5/mlx5.h | 5 +++++
drivers/net/mlx5/mlx5_rxq.c | 15 +++++++++++++++
6 files changed, 74 insertions(+)
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 8b63f3f..9df30be 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -156,6 +156,20 @@ Environment variables
lower performance when there is no backpressure, it is not enabled by
default.
+- ``MLX5_PMD_ENABLE_PADDING``
+
+ Enables HW packet padding in PCI bus transactions.
+
+ When packet size is cache aligned and CRC stripping is enabled, 4 fewer
+ bytes are written to the PCI bus. Enabling padding makes such packets
+ aligned again.
+
+ In cases where PCI bandwidth is the bottleneck, padding can improve
+ performance by 10%.
+
+ This is disabled by default since this can also decrease performance for
+ unaligned packet sizes.
+
Run-time configuration
~~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/guides/rel_notes/release_16_04.rst b/doc/guides/rel_notes/release_16_04.rst
index a498ef7..8eb423f 100644
--- a/doc/guides/rel_notes/release_16_04.rst
+++ b/doc/guides/rel_notes/release_16_04.rst
@@ -144,6 +144,13 @@ This section should contain new features added in this release. Sample format:
Only available with Mellanox OFED >= 3.2.
+* **Added mlx5 optional packet padding by HW.**
+
+ Added an option to make PCI bus transactions rounded to multiple of a
+ cache line size for better alignment.
+
+ Only available with Mellanox OFED >= 3.2.
+
* **Added af_packet dynamic removal function.**
Af_packet device can now be detached using API, like other PMD devices.
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index cc6de2d..a6a3cab 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -142,6 +142,11 @@ mlx5_autoconf.h: $(RTE_SDK)/scripts/auto-config-h.sh
infiniband/verbs.h \
enum IBV_EXP_CREATE_WQ_FLAG_SCATTER_FCS \
$(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
+ HAVE_VERBS_RX_END_PADDING \
+ infiniband/verbs.h \
+ enum IBV_EXP_CREATE_WQ_FLAG_RX_END_PADDING \
+ $(AUTOCONF_OUTPUT)
$(SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD):.c=.o): mlx5_autoconf.h
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index acfb365..94eefb9 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -68,6 +68,25 @@
#include "mlx5_defs.h"
/**
+ * Retrieve integer value from environment variable.
+ *
+ * @param[in] name
+ * Environment variable name.
+ *
+ * @return
+ * Integer value, 0 if the variable is not set.
+ */
+int
+mlx5_getenv_int(const char *name)
+{
+ const char *val = getenv(name);
+
+ if (val == NULL)
+ return 0;
+ return atoi(val);
+}
+
+/**
* DPDK callback to close the device.
*
* Destroy all queues and objects, free memory.
@@ -332,6 +351,9 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
#ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS
IBV_EXP_DEVICE_ATTR_VLAN_OFFLOADS |
#endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */
+#ifdef HAVE_EXP_CREATE_WQ_FLAG_RX_END_PADDING
+ IBV_EXP_DEVICE_ATTR_RX_PAD_END_ALIGN |
+#endif /* HAVE_EXP_CREATE_WQ_FLAG_RX_END_PADDING */
0;
#endif /* HAVE_EXP_QUERY_DEVICE */
@@ -424,6 +446,12 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
DEBUG("FCS stripping configuration is %ssupported",
(priv->hw_fcs_strip ? "" : "not "));
+#ifdef HAVE_VERBS_RX_END_PADDING
+ priv->hw_padding = !!exp_device_attr.rx_pad_end_addr_align;
+#endif /* HAVE_VERBS_RX_END_PADDING */
+ DEBUG("hardware RX end alignment padding is %ssupported",
+ (priv->hw_padding ? "" : "not "));
+
#else /* HAVE_EXP_QUERY_DEVICE */
priv->ind_table_max_size = RSS_INDIRECTION_TABLE_SIZE;
#endif /* HAVE_EXP_QUERY_DEVICE */
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 9690827..1904d54 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -104,6 +104,7 @@ struct priv {
unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
unsigned int hw_vlan_strip:1; /* VLAN stripping is supported. */
unsigned int hw_fcs_strip:1; /* FCS stripping is supported. */
+ unsigned int hw_padding:1; /* End alignment padding is supported. */
unsigned int vf:1; /* This is a VF device. */
unsigned int pending_alarm:1; /* An alarm is pending. */
/* RX/TX queues. */
@@ -160,6 +161,10 @@ priv_unlock(struct priv *priv)
rte_spinlock_unlock(&priv->lock);
}
+/* mlx5.c */
+
+int mlx5_getenv_int(const char *);
+
/* mlx5_ethdev.c */
struct priv *mlx5_get_priv(struct rte_eth_dev *dev);
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 19a1119..c8af77f 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1282,6 +1282,21 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
tmpl.crc_present << 2);
#endif /* HAVE_VERBS_FCS */
+#ifdef HAVE_VERBS_RX_END_PADDING
+ if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING"))
+ ; /* Nothing else to do. */
+ else if (priv->hw_padding) {
+ INFO("%p: enabling packet padding on queue %p",
+ (void *)dev, (void *)rxq);
+ attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_RX_END_PADDING;
+ attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS;
+ } else
+ WARN("%p: packet padding has been requested but is not"
+ " supported, make sure MLNX_OFED and firmware are"
+ " up to date",
+ (void *)dev);
+#endif /* HAVE_VERBS_RX_END_PADDING */
+
tmpl.wq = ibv_exp_create_wq(priv->ctx, &attr.wq);
if (tmpl.wq == NULL) {
ret = (errno ? errno : EINVAL);
--
2.1.4
More information about the dev
mailing list