[dpdk-dev] [PATCH v2 03/16] mlx5: refactor RX code for the new Verbs RSS API

Adrien Mazarguil adrien.mazarguil at 6wind.com
Fri Oct 30 19:55:06 CET 2015


The new Verbs RSS API is lower-level than the previous one and much more
flexible but requires RX queues to use Work Queues (WQs) internally instead
of Queue Pairs (QPs), which are grouped in an indirection table used by a
new kind of hash RX QPs.

Hash RX QPs and the indirection table together replace the parent RSS QP
while WQs are mostly similar to child QPs.

RSS hash key is not configurable yet.

Summary of changes:

- Individual DPDK RX queues do not store flow properties anymore, this info
  is now part of the hash RX queues.
- All functions affecting the parent queue when RSS is enabled or the basic
  queues otherwise are modified to affect hash RX queues instead.
- Hash RX queues are also used when a single DPDK RX queue is configured (no
  RSS) to remove that special case.
- Hash RX queues and indirection table are created/destroyed when device
  is started/stopped in addition to create/destroy flows.
- Contrary to QPs, WQs are moved to the "ready" state before posting RX
  buffers, otherwise they are ignored.
- Resource domain information is added to WQs for better performance.
- CQs are not resized anymore when switching between non-SG and SG modes as
  it does not work correctly with WQs. Use the largest possible size
  instead, since CQ size does not have to be the same as the number of
  elements in the RX queue. This also applies to the maximum number of
  outstanding WRs in a WQ (max_recv_wr).

Signed-off-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
Signed-off-by: Olga Shern <olgas at mellanox.com>
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com>
Signed-off-by: Or Ami <ora at mellanox.com>
Signed-off-by: Yaacov Hazan <yaacovh at mellanox.com>
---
 drivers/net/mlx5/Makefile       |   8 -
 drivers/net/mlx5/mlx5.c         |  36 +--
 drivers/net/mlx5/mlx5.h         |  25 +-
 drivers/net/mlx5/mlx5_ethdev.c  |  54 +---
 drivers/net/mlx5/mlx5_mac.c     | 186 +++++++------
 drivers/net/mlx5/mlx5_rxmode.c  | 266 ++++++++++---------
 drivers/net/mlx5/mlx5_rxq.c     | 559 +++++++++++++++++++++-------------------
 drivers/net/mlx5/mlx5_rxtx.c    |  11 +-
 drivers/net/mlx5/mlx5_rxtx.h    |  19 +-
 drivers/net/mlx5/mlx5_trigger.c |  87 ++-----
 drivers/net/mlx5/mlx5_vlan.c    |  33 +--
 11 files changed, 606 insertions(+), 678 deletions(-)

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 2969045..938f924 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -112,17 +112,9 @@ endif
 mlx5_autoconf.h: $(RTE_SDK)/scripts/auto-config-h.sh
 	$Q $(RM) -f -- '$@'
 	$Q sh -- '$<' '$@' \
-		RSS_SUPPORT \
-		infiniband/verbs.h \
-		enum IBV_EXP_DEVICE_UD_RSS $(AUTOCONF_OUTPUT)
-	$Q sh -- '$<' '$@' \
 		HAVE_EXP_QUERY_DEVICE \
 		infiniband/verbs.h \
 		type 'struct ibv_exp_device_attr' $(AUTOCONF_OUTPUT)
-	$Q sh -- '$<' '$@' \
-		HAVE_EXP_QP_BURST_RECV_SG_LIST \
-		infiniband/verbs.h \
-		field 'struct ibv_exp_qp_burst_family.recv_sg_list' $(AUTOCONF_OUTPUT)
 
 mlx5.o: mlx5_autoconf.h
 
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 8f75f76..e394d32 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -85,6 +85,11 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	DEBUG("%p: closing device \"%s\"",
 	      (void *)dev,
 	      ((priv->ctx != NULL) ? priv->ctx->device->name : ""));
+	/* In case mlx5_dev_stop() has not been called. */
+	priv_allmulticast_disable(priv);
+	priv_promiscuous_disable(priv);
+	priv_mac_addrs_disable(priv);
+	priv_destroy_hash_rxqs(priv);
 	/* Prevent crashes when queues are still in use. */
 	dev->rx_pkt_burst = removed_rx_burst;
 	dev->tx_pkt_burst = removed_tx_burst;
@@ -116,8 +121,6 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		priv->txqs_n = 0;
 		priv->txqs = NULL;
 	}
-	if (priv->rss)
-		rxq_cleanup(&priv->rxq_parent);
 	if (priv->pd != NULL) {
 		assert(priv->ctx != NULL);
 		claim_zero(ibv_dealloc_pd(priv->pd));
@@ -297,9 +300,6 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 
 #ifdef HAVE_EXP_QUERY_DEVICE
 		exp_device_attr.comp_mask = IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS;
-#ifdef RSS_SUPPORT
-		exp_device_attr.comp_mask |= IBV_EXP_DEVICE_ATTR_RSS_TBL_SZ;
-#endif /* RSS_SUPPORT */
 #endif /* HAVE_EXP_QUERY_DEVICE */
 
 		DEBUG("using port %u (%08" PRIx32 ")", port, test);
@@ -349,32 +349,6 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 			ERROR("ibv_exp_query_device() failed");
 			goto port_error;
 		}
-#ifdef RSS_SUPPORT
-		if ((exp_device_attr.exp_device_cap_flags &
-		     IBV_EXP_DEVICE_QPG) &&
-		    (exp_device_attr.exp_device_cap_flags &
-		     IBV_EXP_DEVICE_UD_RSS) &&
-		    (exp_device_attr.comp_mask &
-		     IBV_EXP_DEVICE_ATTR_RSS_TBL_SZ) &&
-		    (exp_device_attr.max_rss_tbl_sz > 0)) {
-			priv->hw_qpg = 1;
-			priv->hw_rss = 1;
-			priv->max_rss_tbl_sz = exp_device_attr.max_rss_tbl_sz;
-		} else {
-			priv->hw_qpg = 0;
-			priv->hw_rss = 0;
-			priv->max_rss_tbl_sz = 0;
-		}
-		priv->hw_tss = !!(exp_device_attr.exp_device_cap_flags &
-				  IBV_EXP_DEVICE_UD_TSS);
-		DEBUG("device flags: %s%s%s",
-		      (priv->hw_qpg ? "IBV_DEVICE_QPG " : ""),
-		      (priv->hw_tss ? "IBV_DEVICE_TSS " : ""),
-		      (priv->hw_rss ? "IBV_DEVICE_RSS " : ""));
-		if (priv->hw_rss)
-			DEBUG("maximum RSS indirection table size: %u",
-			      exp_device_attr.max_rss_tbl_sz);
-#endif /* RSS_SUPPORT */
 
 		priv->hw_csum =
 			((exp_device_attr.exp_device_cap_flags &
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c8a517c..4407b18 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -98,20 +98,19 @@ struct priv {
 	unsigned int started:1; /* Device started, flows enabled. */
 	unsigned int promisc_req:1; /* Promiscuous mode requested. */
 	unsigned int allmulti_req:1; /* All multicast mode requested. */
-	unsigned int hw_qpg:1; /* QP groups are supported. */
-	unsigned int hw_tss:1; /* TSS is supported. */
-	unsigned int hw_rss:1; /* RSS is supported. */
 	unsigned int hw_csum:1; /* Checksum offload is supported. */
 	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
-	unsigned int rss:1; /* RSS is enabled. */
 	unsigned int vf:1; /* This is a VF device. */
-	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
 	/* RX/TX queues. */
-	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
 	unsigned int rxqs_n; /* RX queues array size. */
 	unsigned int txqs_n; /* TX queues array size. */
 	struct rxq *(*rxqs)[]; /* RX queues. */
 	struct txq *(*txqs)[]; /* TX queues. */
+	/* Indirection table referencing all RX WQs. */
+	struct ibv_exp_rwq_ind_table *ind_table;
+	/* Hash RX QPs feeding the indirection table. */
+	struct hash_rxq (*hash_rxqs)[];
+	unsigned int hash_rxqs_n; /* Hash RX QPs array size. */
 	rte_spinlock_t lock; /* Lock for control functions. */
 };
 
@@ -158,23 +157,25 @@ int mlx5_ibv_device_to_pci_addr(const struct ibv_device *,
 /* mlx5_mac.c */
 
 int priv_get_mac(struct priv *, uint8_t (*)[ETHER_ADDR_LEN]);
-void rxq_mac_addrs_del(struct rxq *);
+void hash_rxq_mac_addrs_del(struct hash_rxq *);
+void priv_mac_addrs_disable(struct priv *);
 void mlx5_mac_addr_remove(struct rte_eth_dev *, uint32_t);
-int rxq_mac_addrs_add(struct rxq *);
+int hash_rxq_mac_addrs_add(struct hash_rxq *);
 int priv_mac_addr_add(struct priv *, unsigned int,
 		      const uint8_t (*)[ETHER_ADDR_LEN]);
+int priv_mac_addrs_enable(struct priv *);
 void mlx5_mac_addr_add(struct rte_eth_dev *, struct ether_addr *, uint32_t,
 		       uint32_t);
 
 /* mlx5_rxmode.c */
 
-int rxq_promiscuous_enable(struct rxq *);
+int priv_promiscuous_enable(struct priv *);
 void mlx5_promiscuous_enable(struct rte_eth_dev *);
-void rxq_promiscuous_disable(struct rxq *);
+void priv_promiscuous_disable(struct priv *);
 void mlx5_promiscuous_disable(struct rte_eth_dev *);
-int rxq_allmulticast_enable(struct rxq *);
+int priv_allmulticast_enable(struct priv *);
 void mlx5_allmulticast_enable(struct rte_eth_dev *);
-void rxq_allmulticast_disable(struct rxq *);
+void priv_allmulticast_disable(struct priv *);
 void mlx5_allmulticast_disable(struct rte_eth_dev *);
 
 /* mlx5_stats.c */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 5df5fa1..fac685e 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -394,7 +394,6 @@ priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags)
  * Ethernet device configuration.
  *
  * Prepare the driver for a given number of TX and RX queues.
- * Allocate parent RSS queue when several RX queues are requested.
  *
  * @param dev
  *   Pointer to Ethernet device structure.
@@ -408,8 +407,6 @@ dev_configure(struct rte_eth_dev *dev)
 	struct priv *priv = dev->data->dev_private;
 	unsigned int rxqs_n = dev->data->nb_rx_queues;
 	unsigned int txqs_n = dev->data->nb_tx_queues;
-	unsigned int tmp;
-	int ret;
 
 	priv->rxqs = (void *)dev->data->rx_queues;
 	priv->txqs = (void *)dev->data->tx_queues;
@@ -422,47 +419,8 @@ dev_configure(struct rte_eth_dev *dev)
 		return 0;
 	INFO("%p: RX queues number update: %u -> %u",
 	     (void *)dev, priv->rxqs_n, rxqs_n);
-	/* If RSS is enabled, disable it first. */
-	if (priv->rss) {
-		unsigned int i;
-
-		/* Only if there are no remaining child RX queues. */
-		for (i = 0; (i != priv->rxqs_n); ++i)
-			if ((*priv->rxqs)[i] != NULL)
-				return EINVAL;
-		rxq_cleanup(&priv->rxq_parent);
-		priv->rss = 0;
-		priv->rxqs_n = 0;
-	}
-	if (rxqs_n <= 1) {
-		/* Nothing else to do. */
-		priv->rxqs_n = rxqs_n;
-		return 0;
-	}
-	/* Allocate a new RSS parent queue if supported by hardware. */
-	if (!priv->hw_rss) {
-		ERROR("%p: only a single RX queue can be configured when"
-		      " hardware doesn't support RSS",
-		      (void *)dev);
-		return EINVAL;
-	}
-	/* Fail if hardware doesn't support that many RSS queues. */
-	if (rxqs_n >= priv->max_rss_tbl_sz) {
-		ERROR("%p: only %u RX queues can be configured for RSS",
-		      (void *)dev, priv->max_rss_tbl_sz);
-		return EINVAL;
-	}
-	priv->rss = 1;
-	tmp = priv->rxqs_n;
 	priv->rxqs_n = rxqs_n;
-	ret = rxq_setup(dev, &priv->rxq_parent, 0, 0, NULL, NULL);
-	if (!ret)
-		return 0;
-	/* Failure, rollback. */
-	priv->rss = 0;
-	priv->rxqs_n = tmp;
-	assert(ret > 0);
-	return ret;
+	return 0;
 }
 
 /**
@@ -671,16 +629,6 @@ mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
 				rx_func = mlx5_rx_burst_sp;
 			break;
 		}
-		/* Reenable non-RSS queue attributes. No need to check
-		 * for errors at this stage. */
-		if (!priv->rss) {
-			if (priv->started)
-				rxq_mac_addrs_add(rxq);
-			if (priv->started && priv->promisc_req)
-				rxq_promiscuous_enable(rxq);
-			if (priv->started && priv->allmulti_req)
-				rxq_allmulticast_enable(rxq);
-		}
 		/* Scattered burst function takes priority. */
 		if (rxq->sp)
 			rx_func = mlx5_rx_burst_sp;
diff --git a/drivers/net/mlx5/mlx5_mac.c b/drivers/net/mlx5/mlx5_mac.c
index 95afccf..b580494 100644
--- a/drivers/net/mlx5/mlx5_mac.c
+++ b/drivers/net/mlx5/mlx5_mac.c
@@ -93,75 +93,75 @@ priv_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN])
 /**
  * Delete MAC flow steering rule.
  *
- * @param rxq
- *   Pointer to RX queue structure.
+ * @param hash_rxq
+ *   Pointer to hash RX queue structure.
  * @param mac_index
  *   MAC address index.
  * @param vlan_index
  *   VLAN index to use.
  */
 static void
-rxq_del_mac_flow(struct rxq *rxq, unsigned int mac_index,
-		 unsigned int vlan_index)
+hash_rxq_del_mac_flow(struct hash_rxq *hash_rxq, unsigned int mac_index,
+		      unsigned int vlan_index)
 {
 #ifndef NDEBUG
 	const uint8_t (*mac)[ETHER_ADDR_LEN] =
 		(const uint8_t (*)[ETHER_ADDR_LEN])
-		rxq->priv->mac[mac_index].addr_bytes;
+		hash_rxq->priv->mac[mac_index].addr_bytes;
 #endif
 
-	assert(mac_index < RTE_DIM(rxq->mac_flow));
-	assert(vlan_index < RTE_DIM(rxq->mac_flow[mac_index]));
-	if (rxq->mac_flow[mac_index][vlan_index] == NULL)
+	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
+	assert(vlan_index < RTE_DIM(hash_rxq->mac_flow[mac_index]));
+	if (hash_rxq->mac_flow[mac_index][vlan_index] == NULL)
 		return;
 	DEBUG("%p: removing MAC address %02x:%02x:%02x:%02x:%02x:%02x index %u"
 	      " VLAN index %u",
-	      (void *)rxq,
+	      (void *)hash_rxq,
 	      (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5],
 	      mac_index,
 	      vlan_index);
-	claim_zero(ibv_destroy_flow(rxq->mac_flow[mac_index][vlan_index]));
-	rxq->mac_flow[mac_index][vlan_index] = NULL;
+	claim_zero(ibv_destroy_flow(hash_rxq->mac_flow
+				    [mac_index][vlan_index]));
+	hash_rxq->mac_flow[mac_index][vlan_index] = NULL;
 }
 
 /**
- * Unregister a MAC address from a RX queue.
+ * Unregister a MAC address from a hash RX queue.
  *
- * @param rxq
- *   Pointer to RX queue structure.
+ * @param hash_rxq
+ *   Pointer to hash RX queue structure.
  * @param mac_index
  *   MAC address index.
  */
 static void
-rxq_mac_addr_del(struct rxq *rxq, unsigned int mac_index)
+hash_rxq_mac_addr_del(struct hash_rxq *hash_rxq, unsigned int mac_index)
 {
 	unsigned int i;
 
-	assert(mac_index < RTE_DIM(rxq->mac_flow));
-	for (i = 0; (i != RTE_DIM(rxq->mac_flow[mac_index])); ++i)
-		rxq_del_mac_flow(rxq, mac_index, i);
+	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
+	for (i = 0; (i != RTE_DIM(hash_rxq->mac_flow[mac_index])); ++i)
+		hash_rxq_del_mac_flow(hash_rxq, mac_index, i);
 }
 
 /**
- * Unregister all MAC addresses from a RX queue.
+ * Unregister all MAC addresses from a hash RX queue.
  *
- * @param rxq
- *   Pointer to RX queue structure.
+ * @param hash_rxq
+ *   Pointer to hash RX queue structure.
  */
 void
-rxq_mac_addrs_del(struct rxq *rxq)
+hash_rxq_mac_addrs_del(struct hash_rxq *hash_rxq)
 {
 	unsigned int i;
 
-	for (i = 0; (i != RTE_DIM(rxq->mac_flow)); ++i)
-		rxq_mac_addr_del(rxq, i);
+	for (i = 0; (i != RTE_DIM(hash_rxq->mac_flow)); ++i)
+		hash_rxq_mac_addr_del(hash_rxq, i);
 }
 
 /**
  * Unregister a MAC address.
  *
- * In RSS mode, the MAC address is unregistered from the parent queue,
- * otherwise it is unregistered from each queue directly.
+ * This is done for each hash RX queue.
  *
  * @param priv
  *   Pointer to private structure.
@@ -176,17 +176,27 @@ priv_mac_addr_del(struct priv *priv, unsigned int mac_index)
 	assert(mac_index < RTE_DIM(priv->mac));
 	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
 		return;
-	if (priv->rss) {
-		rxq_mac_addr_del(&priv->rxq_parent, mac_index);
-		goto end;
-	}
-	for (i = 0; (i != priv->dev->data->nb_rx_queues); ++i)
-		rxq_mac_addr_del((*priv->rxqs)[i], mac_index);
-end:
+	for (i = 0; (i != priv->hash_rxqs_n); ++i)
+		hash_rxq_mac_addr_del(&(*priv->hash_rxqs)[i], mac_index);
 	BITFIELD_RESET(priv->mac_configured, mac_index);
 }
 
 /**
+ * Unregister all MAC addresses from all hash RX queues.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+void
+priv_mac_addrs_disable(struct priv *priv)
+{
+	unsigned int i;
+
+	for (i = 0; (i != priv->hash_rxqs_n); ++i)
+		hash_rxq_mac_addrs_del(&(*priv->hash_rxqs)[i]);
+}
+
+/**
  * DPDK callback to remove a MAC address.
  *
  * @param dev
@@ -213,8 +223,8 @@ end:
 /**
  * Add MAC flow steering rule.
  *
- * @param rxq
- *   Pointer to RX queue structure.
+ * @param hash_rxq
+ *   Pointer to hash RX queue structure.
  * @param mac_index
  *   MAC address index to register.
  * @param vlan_index
@@ -224,11 +234,11 @@ end:
  *   0 on success, errno value on failure.
  */
 static int
-rxq_add_mac_flow(struct rxq *rxq, unsigned int mac_index,
-		 unsigned int vlan_index)
+hash_rxq_add_mac_flow(struct hash_rxq *hash_rxq, unsigned int mac_index,
+		      unsigned int vlan_index)
 {
 	struct ibv_flow *flow;
-	struct priv *priv = rxq->priv;
+	struct priv *priv = hash_rxq->priv;
 	const uint8_t (*mac)[ETHER_ADDR_LEN] =
 			(const uint8_t (*)[ETHER_ADDR_LEN])
 			priv->mac[mac_index].addr_bytes;
@@ -241,9 +251,9 @@ rxq_add_mac_flow(struct rxq *rxq, unsigned int mac_index,
 	unsigned int vlan_enabled = !!priv->vlan_filter_n;
 	unsigned int vlan_id = priv->vlan_filter[vlan_index];
 
-	assert(mac_index < RTE_DIM(rxq->mac_flow));
-	assert(vlan_index < RTE_DIM(rxq->mac_flow[mac_index]));
-	if (rxq->mac_flow[mac_index][vlan_index] != NULL)
+	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
+	assert(vlan_index < RTE_DIM(hash_rxq->mac_flow[mac_index]));
+	if (hash_rxq->mac_flow[mac_index][vlan_index] != NULL)
 		return 0;
 	/*
 	 * No padding must be inserted by the compiler between attr and spec.
@@ -273,7 +283,7 @@ rxq_add_mac_flow(struct rxq *rxq, unsigned int mac_index,
 	};
 	DEBUG("%p: adding MAC address %02x:%02x:%02x:%02x:%02x:%02x index %u"
 	      " VLAN index %u filtering %s, ID %u",
-	      (void *)rxq,
+	      (void *)hash_rxq,
 	      (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5],
 	      mac_index,
 	      vlan_index,
@@ -281,25 +291,25 @@ rxq_add_mac_flow(struct rxq *rxq, unsigned int mac_index,
 	      vlan_id);
 	/* Create related flow. */
 	errno = 0;
-	flow = ibv_create_flow(rxq->qp, attr);
+	flow = ibv_create_flow(hash_rxq->qp, attr);
 	if (flow == NULL) {
 		/* It's not clear whether errno is always set in this case. */
 		ERROR("%p: flow configuration failed, errno=%d: %s",
-		      (void *)rxq, errno,
+		      (void *)hash_rxq, errno,
 		      (errno ? strerror(errno) : "Unknown error"));
 		if (errno)
 			return errno;
 		return EINVAL;
 	}
-	rxq->mac_flow[mac_index][vlan_index] = flow;
+	hash_rxq->mac_flow[mac_index][vlan_index] = flow;
 	return 0;
 }
 
 /**
- * Register a MAC address in a RX queue.
+ * Register a MAC address in a hash RX queue.
  *
- * @param rxq
- *   Pointer to RX queue structure.
+ * @param hash_rxq
+ *   Pointer to hash RX queue structure.
  * @param mac_index
  *   MAC address index to register.
  *
@@ -307,22 +317,23 @@ rxq_add_mac_flow(struct rxq *rxq, unsigned int mac_index,
  *   0 on success, errno value on failure.
  */
 static int
-rxq_mac_addr_add(struct rxq *rxq, unsigned int mac_index)
+hash_rxq_mac_addr_add(struct hash_rxq *hash_rxq, unsigned int mac_index)
 {
-	struct priv *priv = rxq->priv;
+	struct priv *priv = hash_rxq->priv;
 	unsigned int i = 0;
 	int ret;
 
-	assert(mac_index < RTE_DIM(rxq->mac_flow));
-	assert(RTE_DIM(rxq->mac_flow[mac_index]) ==
+	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
+	assert(RTE_DIM(hash_rxq->mac_flow[mac_index]) ==
 	       RTE_DIM(priv->vlan_filter));
 	/* Add a MAC address for each VLAN filter, or at least once. */
 	do {
-		ret = rxq_add_mac_flow(rxq, mac_index, i);
+		ret = hash_rxq_add_mac_flow(hash_rxq, mac_index, i);
 		if (ret) {
 			/* Failure, rollback. */
 			while (i != 0)
-				rxq_del_mac_flow(rxq, mac_index, --i);
+				hash_rxq_del_mac_flow(hash_rxq, mac_index,
+						      --i);
 			return ret;
 		}
 	} while (++i < priv->vlan_filter_n);
@@ -330,31 +341,31 @@ rxq_mac_addr_add(struct rxq *rxq, unsigned int mac_index)
 }
 
 /**
- * Register all MAC addresses in a RX queue.
+ * Register all MAC addresses in a hash RX queue.
  *
- * @param rxq
- *   Pointer to RX queue structure.
+ * @param hash_rxq
+ *   Pointer to hash RX queue structure.
  *
  * @return
  *   0 on success, errno value on failure.
  */
 int
-rxq_mac_addrs_add(struct rxq *rxq)
+hash_rxq_mac_addrs_add(struct hash_rxq *hash_rxq)
 {
-	struct priv *priv = rxq->priv;
+	struct priv *priv = hash_rxq->priv;
 	unsigned int i;
 	int ret;
 
-	assert(RTE_DIM(priv->mac) == RTE_DIM(rxq->mac_flow));
+	assert(RTE_DIM(priv->mac) == RTE_DIM(hash_rxq->mac_flow));
 	for (i = 0; (i != RTE_DIM(priv->mac)); ++i) {
 		if (!BITFIELD_ISSET(priv->mac_configured, i))
 			continue;
-		ret = rxq_mac_addr_add(rxq, i);
+		ret = hash_rxq_mac_addr_add(hash_rxq, i);
 		if (!ret)
 			continue;
 		/* Failure, rollback. */
 		while (i != 0)
-			rxq_mac_addr_del(rxq, --i);
+			hash_rxq_mac_addr_del(hash_rxq, --i);
 		assert(ret > 0);
 		return ret;
 	}
@@ -364,8 +375,7 @@ rxq_mac_addrs_add(struct rxq *rxq)
 /**
  * Register a MAC address.
  *
- * In RSS mode, the MAC address is registered in the parent queue,
- * otherwise it is registered in each queue directly.
+ * This is done for each hash RX queue.
  *
  * @param priv
  *   Pointer to private structure.
@@ -405,33 +415,49 @@ priv_mac_addr_add(struct priv *priv, unsigned int mac_index,
 			(*mac)[3], (*mac)[4], (*mac)[5]
 		}
 	};
-	/* If device isn't started, this is all we need to do. */
-	if (!priv->started)
-		goto end;
-	if (priv->rss) {
-		ret = rxq_mac_addr_add(&priv->rxq_parent, mac_index);
-		if (ret)
-			return ret;
-		goto end;
-	}
-	for (i = 0; (i != priv->rxqs_n); ++i) {
-		if ((*priv->rxqs)[i] == NULL)
-			continue;
-		ret = rxq_mac_addr_add((*priv->rxqs)[i], mac_index);
+	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
+		ret = hash_rxq_mac_addr_add(&(*priv->hash_rxqs)[i], mac_index);
 		if (!ret)
 			continue;
 		/* Failure, rollback. */
 		while (i != 0)
-			if ((*priv->rxqs)[--i] != NULL)
-				rxq_mac_addr_del((*priv->rxqs)[i], mac_index);
+			hash_rxq_mac_addr_del(&(*priv->hash_rxqs)[--i],
+					      mac_index);
 		return ret;
 	}
-end:
 	BITFIELD_SET(priv->mac_configured, mac_index);
 	return 0;
 }
 
 /**
+ * Register all MAC addresses in all hash RX queues.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+priv_mac_addrs_enable(struct priv *priv)
+{
+	unsigned int i;
+	int ret;
+
+	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
+		ret = hash_rxq_mac_addrs_add(&(*priv->hash_rxqs)[i]);
+		if (!ret)
+			continue;
+		/* Failure, rollback. */
+		while (i != 0)
+			hash_rxq_mac_addrs_del(&(*priv->hash_rxqs)[--i]);
+		assert(ret > 0);
+		return ret;
+	}
+	return 0;
+}
+
+/**
  * DPDK callback to add a MAC address.
  *
  * @param dev
diff --git a/drivers/net/mlx5/mlx5_rxmode.c b/drivers/net/mlx5/mlx5_rxmode.c
index 7efa21b..2a74c64 100644
--- a/drivers/net/mlx5/mlx5_rxmode.c
+++ b/drivers/net/mlx5/mlx5_rxmode.c
@@ -58,44 +58,78 @@
 #include "mlx5_rxtx.h"
 #include "mlx5_utils.h"
 
+static void hash_rxq_promiscuous_disable(struct hash_rxq *);
+static void hash_rxq_allmulticast_disable(struct hash_rxq *);
+
 /**
- * Enable promiscuous mode in a RX queue.
+ * Enable promiscuous mode in a hash RX queue.
  *
- * @param rxq
- *   Pointer to RX queue structure.
+ * @param hash_rxq
+ *   Pointer to hash RX queue structure.
  *
  * @return
  *   0 on success, errno value on failure.
  */
-int
-rxq_promiscuous_enable(struct rxq *rxq)
+static int
+hash_rxq_promiscuous_enable(struct hash_rxq *hash_rxq)
 {
 	struct ibv_flow *flow;
 	struct ibv_flow_attr attr = {
 		.type = IBV_FLOW_ATTR_ALL_DEFAULT,
 		.num_of_specs = 0,
-		.port = rxq->priv->port,
+		.port = hash_rxq->priv->port,
 		.flags = 0
 	};
 
-	if (rxq->priv->vf)
+	if (hash_rxq->priv->vf)
 		return 0;
-	if (rxq->promisc_flow != NULL)
+	if (hash_rxq->promisc_flow != NULL)
 		return 0;
-	DEBUG("%p: enabling promiscuous mode", (void *)rxq);
+	DEBUG("%p: enabling promiscuous mode", (void *)hash_rxq);
 	errno = 0;
-	flow = ibv_create_flow(rxq->qp, &attr);
+	flow = ibv_create_flow(hash_rxq->qp, &attr);
 	if (flow == NULL) {
 		/* It's not clear whether errno is always set in this case. */
 		ERROR("%p: flow configuration failed, errno=%d: %s",
-		      (void *)rxq, errno,
+		      (void *)hash_rxq, errno,
 		      (errno ? strerror(errno) : "Unknown error"));
 		if (errno)
 			return errno;
 		return EINVAL;
 	}
-	rxq->promisc_flow = flow;
-	DEBUG("%p: promiscuous mode enabled", (void *)rxq);
+	hash_rxq->promisc_flow = flow;
+	DEBUG("%p: promiscuous mode enabled", (void *)hash_rxq);
+	return 0;
+}
+
+/**
+ * Enable promiscuous mode in all hash RX queues.
+ *
+ * @param priv
+ *   Private structure.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+priv_promiscuous_enable(struct priv *priv)
+{
+	unsigned int i;
+
+	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
+		struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
+		int ret;
+
+		ret = hash_rxq_promiscuous_enable(hash_rxq);
+		if (!ret)
+			continue;
+		/* Failure, rollback. */
+		while (i != 0) {
+			hash_rxq = &(*priv->hash_rxqs)[--i];
+			hash_rxq_promiscuous_disable(hash_rxq);
+		}
+		return ret;
+	}
 	return 0;
 }
 
@@ -109,56 +143,48 @@ void
 mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
-	unsigned int i;
 	int ret;
 
 	priv_lock(priv);
 	priv->promisc_req = 1;
-	/* If device isn't started, this is all we need to do. */
-	if (!priv->started)
-		goto end;
-	if (priv->rss) {
-		ret = rxq_promiscuous_enable(&priv->rxq_parent);
-		if (ret) {
-			priv_unlock(priv);
-			return;
-		}
-		goto end;
-	}
-	for (i = 0; (i != priv->rxqs_n); ++i) {
-		if ((*priv->rxqs)[i] == NULL)
-			continue;
-		ret = rxq_promiscuous_enable((*priv->rxqs)[i]);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			if ((*priv->rxqs)[--i] != NULL)
-				rxq_promiscuous_disable((*priv->rxqs)[i]);
-		priv_unlock(priv);
-		return;
-	}
-end:
+	ret = priv_promiscuous_enable(priv);
+	if (ret)
+		ERROR("cannot enable promiscuous mode: %s", strerror(ret));
 	priv_unlock(priv);
 }
 
 /**
- * Disable promiscuous mode in a RX queue.
+ * Disable promiscuous mode in a hash RX queue.
  *
- * @param rxq
- *   Pointer to RX queue structure.
+ * @param hash_rxq
+ *   Pointer to hash RX queue structure.
  */
-void
-rxq_promiscuous_disable(struct rxq *rxq)
+static void
+hash_rxq_promiscuous_disable(struct hash_rxq *hash_rxq)
 {
-	if (rxq->priv->vf)
+	if (hash_rxq->priv->vf)
 		return;
-	if (rxq->promisc_flow == NULL)
+	if (hash_rxq->promisc_flow == NULL)
 		return;
-	DEBUG("%p: disabling promiscuous mode", (void *)rxq);
-	claim_zero(ibv_destroy_flow(rxq->promisc_flow));
-	rxq->promisc_flow = NULL;
-	DEBUG("%p: promiscuous mode disabled", (void *)rxq);
+	DEBUG("%p: disabling promiscuous mode", (void *)hash_rxq);
+	claim_zero(ibv_destroy_flow(hash_rxq->promisc_flow));
+	hash_rxq->promisc_flow = NULL;
+	DEBUG("%p: promiscuous mode disabled", (void *)hash_rxq);
+}
+
+/**
+ * Disable promiscuous mode in all hash RX queues.
+ *
+ * @param priv
+ *   Private structure.
+ */
+void
+priv_promiscuous_disable(struct priv *priv)
+{
+	unsigned int i;
+
+	for (i = 0; (i != priv->hash_rxqs_n); ++i)
+		hash_rxq_promiscuous_disable(&(*priv->hash_rxqs)[i]);
 }
 
 /**
@@ -171,57 +197,81 @@ void
 mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
-	unsigned int i;
 
 	priv_lock(priv);
 	priv->promisc_req = 0;
-	if (priv->rss) {
-		rxq_promiscuous_disable(&priv->rxq_parent);
-		goto end;
-	}
-	for (i = 0; (i != priv->rxqs_n); ++i)
-		if ((*priv->rxqs)[i] != NULL)
-			rxq_promiscuous_disable((*priv->rxqs)[i]);
-end:
+	priv_promiscuous_disable(priv);
 	priv_unlock(priv);
 }
 
 /**
- * Enable allmulti mode in a RX queue.
+ * Enable allmulti mode in a hash RX queue.
  *
- * @param rxq
- *   Pointer to RX queue structure.
+ * @param hash_rxq
+ *   Pointer to hash RX queue structure.
  *
  * @return
  *   0 on success, errno value on failure.
  */
-int
-rxq_allmulticast_enable(struct rxq *rxq)
+static int
+hash_rxq_allmulticast_enable(struct hash_rxq *hash_rxq)
 {
 	struct ibv_flow *flow;
 	struct ibv_flow_attr attr = {
 		.type = IBV_FLOW_ATTR_MC_DEFAULT,
 		.num_of_specs = 0,
-		.port = rxq->priv->port,
+		.port = hash_rxq->priv->port,
 		.flags = 0
 	};
 
-	if (rxq->allmulti_flow != NULL)
+	if (hash_rxq->allmulti_flow != NULL)
 		return 0;
-	DEBUG("%p: enabling allmulticast mode", (void *)rxq);
+	DEBUG("%p: enabling allmulticast mode", (void *)hash_rxq);
 	errno = 0;
-	flow = ibv_create_flow(rxq->qp, &attr);
+	flow = ibv_create_flow(hash_rxq->qp, &attr);
 	if (flow == NULL) {
 		/* It's not clear whether errno is always set in this case. */
 		ERROR("%p: flow configuration failed, errno=%d: %s",
-		      (void *)rxq, errno,
+		      (void *)hash_rxq, errno,
 		      (errno ? strerror(errno) : "Unknown error"));
 		if (errno)
 			return errno;
 		return EINVAL;
 	}
-	rxq->allmulti_flow = flow;
-	DEBUG("%p: allmulticast mode enabled", (void *)rxq);
+	hash_rxq->allmulti_flow = flow;
+	DEBUG("%p: allmulticast mode enabled", (void *)hash_rxq);
+	return 0;
+}
+
+/**
+ * Enable allmulti mode in most hash RX queues.
+ * TCP queues are exempted to save resources.
+ *
+ * @param priv
+ *   Private structure.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+priv_allmulticast_enable(struct priv *priv)
+{
+	unsigned int i;
+
+	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
+		struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
+		int ret;
+
+		ret = hash_rxq_allmulticast_enable(hash_rxq);
+		if (!ret)
+			continue;
+		/* Failure, rollback. */
+		while (i != 0) {
+			hash_rxq = &(*priv->hash_rxqs)[--i];
+			hash_rxq_allmulticast_disable(hash_rxq);
+		}
+		return ret;
+	}
 	return 0;
 }
 
@@ -235,54 +285,46 @@ void
 mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
-	unsigned int i;
 	int ret;
 
 	priv_lock(priv);
 	priv->allmulti_req = 1;
-	/* If device isn't started, this is all we need to do. */
-	if (!priv->started)
-		goto end;
-	if (priv->rss) {
-		ret = rxq_allmulticast_enable(&priv->rxq_parent);
-		if (ret) {
-			priv_unlock(priv);
-			return;
-		}
-		goto end;
-	}
-	for (i = 0; (i != priv->rxqs_n); ++i) {
-		if ((*priv->rxqs)[i] == NULL)
-			continue;
-		ret = rxq_allmulticast_enable((*priv->rxqs)[i]);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			if ((*priv->rxqs)[--i] != NULL)
-				rxq_allmulticast_disable((*priv->rxqs)[i]);
-		priv_unlock(priv);
-		return;
-	}
-end:
+	ret = priv_allmulticast_enable(priv);
+	if (ret)
+		ERROR("cannot enable allmulticast mode: %s", strerror(ret));
 	priv_unlock(priv);
 }
 
 /**
- * Disable allmulti mode in a RX queue.
+ * Disable allmulti mode in a hash RX queue.
  *
- * @param rxq
- *   Pointer to RX queue structure.
+ * @param hash_rxq
+ *   Pointer to hash RX queue structure.
  */
-void
-rxq_allmulticast_disable(struct rxq *rxq)
+static void
+hash_rxq_allmulticast_disable(struct hash_rxq *hash_rxq)
 {
-	if (rxq->allmulti_flow == NULL)
+	if (hash_rxq->allmulti_flow == NULL)
 		return;
-	DEBUG("%p: disabling allmulticast mode", (void *)rxq);
-	claim_zero(ibv_destroy_flow(rxq->allmulti_flow));
-	rxq->allmulti_flow = NULL;
-	DEBUG("%p: allmulticast mode disabled", (void *)rxq);
+	DEBUG("%p: disabling allmulticast mode", (void *)hash_rxq);
+	claim_zero(ibv_destroy_flow(hash_rxq->allmulti_flow));
+	hash_rxq->allmulti_flow = NULL;
+	DEBUG("%p: allmulticast mode disabled", (void *)hash_rxq);
+}
+
+/**
+ * Disable allmulti mode in all hash RX queues.
+ *
+ * @param priv
+ *   Private structure.
+ */
+void
+priv_allmulticast_disable(struct priv *priv)
+{
+	unsigned int i;
+
+	for (i = 0; (i != priv->hash_rxqs_n); ++i)
+		hash_rxq_allmulticast_disable(&(*priv->hash_rxqs)[i]);
 }
 
 /**
@@ -295,17 +337,9 @@ void
 mlx5_allmulticast_disable(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
-	unsigned int i;
 
 	priv_lock(priv);
 	priv->allmulti_req = 0;
-	if (priv->rss) {
-		rxq_allmulticast_disable(&priv->rxq_parent);
-		goto end;
-	}
-	for (i = 0; (i != priv->rxqs_n); ++i)
-		if ((*priv->rxqs)[i] != NULL)
-			rxq_allmulticast_disable((*priv->rxqs)[i]);
-end:
+	priv_allmulticast_disable(priv);
 	priv_unlock(priv);
 }
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index f2f773e..6d8f7d2 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -60,11 +60,220 @@
 #endif
 
 #include "mlx5.h"
-#include "mlx5_autoconf.h"
 #include "mlx5_rxtx.h"
 #include "mlx5_utils.h"
 #include "mlx5_defs.h"
 
+/* Default RSS hash key also used for ConnectX-3. */
+static uint8_t hash_rxq_default_key[] = {
+	0x2c, 0xc6, 0x81, 0xd1,
+	0x5b, 0xdb, 0xf4, 0xf7,
+	0xfc, 0xa2, 0x83, 0x19,
+	0xdb, 0x1a, 0x3e, 0x94,
+	0x6b, 0x9e, 0x38, 0xd9,
+	0x2c, 0x9c, 0x03, 0xd1,
+	0xad, 0x99, 0x44, 0xa7,
+	0xd9, 0x56, 0x3d, 0x59,
+	0x06, 0x3c, 0x25, 0xf3,
+	0xfc, 0x1f, 0xdc, 0x2a,
+};
+
+/**
+ * Return nearest power of two above input value.
+ *
+ * @param v
+ *   Input value.
+ *
+ * @return
+ *   Nearest power of two above input value.
+ */
+static unsigned int
+log2above(unsigned int v)
+{
+	unsigned int l;
+	unsigned int r;
+
+	for (l = 0, r = 0; (v >> 1); ++l, v >>= 1)
+		r |= (v & 1);
+	return (l + r);
+}
+
+/**
+ * Initialize hash RX queues and indirection table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+priv_create_hash_rxqs(struct priv *priv)
+{
+	static const uint64_t rss_hash_table[] = {
+		/* TCPv4. */
+		(IBV_EXP_RX_HASH_SRC_IPV4 | IBV_EXP_RX_HASH_DST_IPV4 |
+		 IBV_EXP_RX_HASH_SRC_PORT_TCP | IBV_EXP_RX_HASH_DST_PORT_TCP),
+		/* UDPv4. */
+		(IBV_EXP_RX_HASH_SRC_IPV4 | IBV_EXP_RX_HASH_DST_IPV4 |
+		 IBV_EXP_RX_HASH_SRC_PORT_UDP | IBV_EXP_RX_HASH_DST_PORT_UDP),
+		/* Other IPv4. */
+		(IBV_EXP_RX_HASH_SRC_IPV4 | IBV_EXP_RX_HASH_DST_IPV4),
+		/* None, used for everything else. */
+		0,
+	};
+
+	DEBUG("allocating hash RX queues for %u WQs", priv->rxqs_n);
+	assert(priv->ind_table == NULL);
+	assert(priv->hash_rxqs == NULL);
+	assert(priv->hash_rxqs_n == 0);
+	assert(priv->pd != NULL);
+	assert(priv->ctx != NULL);
+	if (priv->rxqs_n == 0)
+		return EINVAL;
+	assert(priv->rxqs != NULL);
+
+	/* FIXME: large data structures are allocated on the stack. */
+	unsigned int wqs_n = (1 << log2above(priv->rxqs_n));
+	struct ibv_exp_wq *wqs[wqs_n];
+	struct ibv_exp_rwq_ind_table_init_attr ind_init_attr = {
+		.pd = priv->pd,
+		.log_ind_tbl_size = log2above(priv->rxqs_n),
+		.ind_tbl = wqs,
+		.comp_mask = 0,
+	};
+	struct ibv_exp_rwq_ind_table *ind_table = NULL;
+	/* If only one RX queue is configured, RSS is not needed and a single
+	 * empty hash entry is used (last rss_hash_table[] entry). */
+	unsigned int hash_rxqs_n =
+		((priv->rxqs_n == 1) ? 1 : RTE_DIM(rss_hash_table));
+	struct hash_rxq (*hash_rxqs)[hash_rxqs_n] = NULL;
+	unsigned int i;
+	unsigned int j;
+	int err = 0;
+
+	if (wqs_n < priv->rxqs_n) {
+		ERROR("cannot handle this many RX queues (%u)", priv->rxqs_n);
+		err = ERANGE;
+		goto error;
+	}
+	if (wqs_n != priv->rxqs_n)
+		WARN("%u RX queues are configured, consider rounding this"
+		     " number to the next power of two (%u) for optimal"
+		     " performance",
+		     priv->rxqs_n, wqs_n);
+	/* When the number of RX queues is not a power of two, the remaining
+	 * table entries are padded with reused WQs and hashes are not spread
+	 * uniformly. */
+	for (i = 0, j = 0; (i != wqs_n); ++i) {
+		wqs[i] = (*priv->rxqs)[j]->wq;
+		if (++j == priv->rxqs_n)
+			j = 0;
+	}
+	errno = 0;
+	ind_table = ibv_exp_create_rwq_ind_table(priv->ctx, &ind_init_attr);
+	if (ind_table == NULL) {
+		/* Not clear whether errno is set. */
+		err = (errno ? errno : EINVAL);
+		ERROR("RX indirection table creation failed with error %d: %s",
+		      err, strerror(err));
+		goto error;
+	}
+	/* Allocate array that holds hash RX queues and related data. */
+	hash_rxqs = rte_malloc(__func__, sizeof(*hash_rxqs), 0);
+	if (hash_rxqs == NULL) {
+		err = ENOMEM;
+		ERROR("cannot allocate hash RX queues container: %s",
+		      strerror(err));
+		goto error;
+	}
+	for (i = 0, j = (RTE_DIM(rss_hash_table) - hash_rxqs_n);
+	     (j != RTE_DIM(rss_hash_table));
+	     ++i, ++j) {
+		struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
+
+		struct ibv_exp_rx_hash_conf hash_conf = {
+			.rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
+			.rx_hash_key_len = sizeof(hash_rxq_default_key),
+			.rx_hash_key = hash_rxq_default_key,
+			.rx_hash_fields_mask = rss_hash_table[j],
+			.rwq_ind_tbl = ind_table,
+		};
+		struct ibv_exp_qp_init_attr qp_init_attr = {
+			.max_inl_recv = 0, /* Currently not supported. */
+			.qp_type = IBV_QPT_RAW_PACKET,
+			.comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
+				      IBV_EXP_QP_INIT_ATTR_RX_HASH),
+			.pd = priv->pd,
+			.rx_hash_conf = &hash_conf,
+			.port_num = priv->port,
+		};
+
+		*hash_rxq = (struct hash_rxq){
+			.priv = priv,
+			.qp = ibv_exp_create_qp(priv->ctx, &qp_init_attr),
+		};
+		if (hash_rxq->qp == NULL) {
+			err = (errno ? errno : EINVAL);
+			ERROR("Hash RX QP creation failure: %s",
+			      strerror(err));
+			while (i) {
+				hash_rxq = &(*hash_rxqs)[--i];
+				claim_zero(ibv_destroy_qp(hash_rxq->qp));
+			}
+			goto error;
+		}
+	}
+	priv->ind_table = ind_table;
+	priv->hash_rxqs = hash_rxqs;
+	priv->hash_rxqs_n = hash_rxqs_n;
+	assert(err == 0);
+	return 0;
+error:
+	rte_free(hash_rxqs);
+	if (ind_table != NULL)
+		claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table));
+	return err;
+}
+
+/**
+ * Clean up hash RX queues and indirection table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+void
+priv_destroy_hash_rxqs(struct priv *priv)
+{
+	unsigned int i;
+
+	DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n);
+	if (priv->hash_rxqs_n == 0) {
+		assert(priv->hash_rxqs == NULL);
+		assert(priv->ind_table == NULL);
+		return;
+	}
+	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
+		struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
+		unsigned int j, k;
+
+		assert(hash_rxq->priv == priv);
+		assert(hash_rxq->qp != NULL);
+		/* Also check that there are no remaining flows. */
+		assert(hash_rxq->allmulti_flow == NULL);
+		assert(hash_rxq->promisc_flow == NULL);
+		for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
+			for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
+				assert(hash_rxq->mac_flow[j][k] == NULL);
+		claim_zero(ibv_destroy_qp(hash_rxq->qp));
+	}
+	priv->hash_rxqs_n = 0;
+	rte_free(priv->hash_rxqs);
+	priv->hash_rxqs = NULL;
+	claim_zero(ibv_exp_destroy_rwq_ind_table(priv->ind_table));
+	priv->ind_table = NULL;
+}
+
 /**
  * Allocate RX queue elements with scattered packets support.
  *
@@ -336,15 +545,15 @@ rxq_cleanup(struct rxq *rxq)
 		rxq_free_elts_sp(rxq);
 	else
 		rxq_free_elts(rxq);
-	if (rxq->if_qp != NULL) {
+	if (rxq->if_wq != NULL) {
 		assert(rxq->priv != NULL);
 		assert(rxq->priv->ctx != NULL);
-		assert(rxq->qp != NULL);
+		assert(rxq->wq != NULL);
 		params = (struct ibv_exp_release_intf_params){
 			.comp_mask = 0,
 		};
 		claim_zero(ibv_exp_release_intf(rxq->priv->ctx,
-						rxq->if_qp,
+						rxq->if_wq,
 						&params));
 	}
 	if (rxq->if_cq != NULL) {
@@ -358,12 +567,8 @@ rxq_cleanup(struct rxq *rxq)
 						rxq->if_cq,
 						&params));
 	}
-	if (rxq->qp != NULL) {
-		rxq_promiscuous_disable(rxq);
-		rxq_allmulticast_disable(rxq);
-		rxq_mac_addrs_del(rxq);
-		claim_zero(ibv_destroy_qp(rxq->qp));
-	}
+	if (rxq->wq != NULL)
+		claim_zero(ibv_exp_destroy_wq(rxq->wq));
 	if (rxq->cq != NULL)
 		claim_zero(ibv_destroy_cq(rxq->cq));
 	if (rxq->rd != NULL) {
@@ -383,112 +588,6 @@ rxq_cleanup(struct rxq *rxq)
 }
 
 /**
- * Allocate a Queue Pair.
- * Optionally setup inline receive if supported.
- *
- * @param priv
- *   Pointer to private structure.
- * @param cq
- *   Completion queue to associate with QP.
- * @param desc
- *   Number of descriptors in QP (hint only).
- *
- * @return
- *   QP pointer or NULL in case of error.
- */
-static struct ibv_qp *
-rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
-	     struct ibv_exp_res_domain *rd)
-{
-	struct ibv_exp_qp_init_attr attr = {
-		/* CQ to be associated with the send queue. */
-		.send_cq = cq,
-		/* CQ to be associated with the receive queue. */
-		.recv_cq = cq,
-		.cap = {
-			/* Max number of outstanding WRs. */
-			.max_recv_wr = ((priv->device_attr.max_qp_wr < desc) ?
-					priv->device_attr.max_qp_wr :
-					desc),
-			/* Max number of scatter/gather elements in a WR. */
-			.max_recv_sge = ((priv->device_attr.max_sge <
-					  MLX5_PMD_SGE_WR_N) ?
-					 priv->device_attr.max_sge :
-					 MLX5_PMD_SGE_WR_N),
-		},
-		.qp_type = IBV_QPT_RAW_PACKET,
-		.comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
-			      IBV_EXP_QP_INIT_ATTR_RES_DOMAIN),
-		.pd = priv->pd,
-		.res_domain = rd,
-	};
-
-	return ibv_exp_create_qp(priv->ctx, &attr);
-}
-
-#ifdef RSS_SUPPORT
-
-/**
- * Allocate a RSS Queue Pair.
- * Optionally setup inline receive if supported.
- *
- * @param priv
- *   Pointer to private structure.
- * @param cq
- *   Completion queue to associate with QP.
- * @param desc
- *   Number of descriptors in QP (hint only).
- * @param parent
- *   If nonzero, create a parent QP, otherwise a child.
- *
- * @return
- *   QP pointer or NULL in case of error.
- */
-static struct ibv_qp *
-rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
-		 int parent, struct ibv_exp_res_domain *rd)
-{
-	struct ibv_exp_qp_init_attr attr = {
-		/* CQ to be associated with the send queue. */
-		.send_cq = cq,
-		/* CQ to be associated with the receive queue. */
-		.recv_cq = cq,
-		.cap = {
-			/* Max number of outstanding WRs. */
-			.max_recv_wr = ((priv->device_attr.max_qp_wr < desc) ?
-					priv->device_attr.max_qp_wr :
-					desc),
-			/* Max number of scatter/gather elements in a WR. */
-			.max_recv_sge = ((priv->device_attr.max_sge <
-					  MLX5_PMD_SGE_WR_N) ?
-					 priv->device_attr.max_sge :
-					 MLX5_PMD_SGE_WR_N),
-		},
-		.qp_type = IBV_QPT_RAW_PACKET,
-		.comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
-			      IBV_EXP_QP_INIT_ATTR_RES_DOMAIN |
-			      IBV_EXP_QP_INIT_ATTR_QPG),
-		.pd = priv->pd,
-		.res_domain = rd,
-	};
-
-	if (parent) {
-		attr.qpg.qpg_type = IBV_EXP_QPG_PARENT;
-		/* TSS isn't necessary. */
-		attr.qpg.parent_attrib.tss_child_count = 0;
-		attr.qpg.parent_attrib.rss_child_count = priv->rxqs_n;
-		DEBUG("initializing parent RSS queue");
-	} else {
-		attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX;
-		attr.qpg.qpg_parent = priv->rxq_parent.qp;
-		DEBUG("initializing child RSS queue");
-	}
-	return ibv_exp_create_qp(priv->ctx, &attr);
-}
-
-#endif /* RSS_SUPPORT */
-
-/**
  * Reconfigure a RX queue with new parameters.
  *
  * rxq_rehash() does not allocate mbufs, which, if not done from the right
@@ -512,15 +611,9 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
 	unsigned int desc_n;
 	struct rte_mbuf **pool;
 	unsigned int i, k;
-	struct ibv_exp_qp_attr mod;
+	struct ibv_exp_wq_attr mod;
 	int err;
-	int parent = (rxq == &priv->rxq_parent);
 
-	if (parent) {
-		ERROR("%p: cannot rehash parent queue %p",
-		      (void *)dev, (void *)rxq);
-		return EINVAL;
-	}
 	DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
 	/* Number of descriptors and mbufs currently allocated. */
 	desc_n = (tmpl.elts_n * (tmpl.sp ? MLX5_PMD_SGE_WR_N : 1));
@@ -549,61 +642,17 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
 		DEBUG("%p: nothing to do", (void *)dev);
 		return 0;
 	}
-	/* Remove attached flows if RSS is disabled (no parent queue). */
-	if (!priv->rss) {
-		rxq_allmulticast_disable(&tmpl);
-		rxq_promiscuous_disable(&tmpl);
-		rxq_mac_addrs_del(&tmpl);
-		/* Update original queue in case of failure. */
-		rxq->allmulti_flow = tmpl.allmulti_flow;
-		rxq->promisc_flow = tmpl.promisc_flow;
-		memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow));
-	}
 	/* From now on, any failure will render the queue unusable.
-	 * Reinitialize QP. */
-	mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET };
-	err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
-	if (err) {
-		ERROR("%p: cannot reset QP: %s", (void *)dev, strerror(err));
-		assert(err > 0);
-		return err;
-	}
-	err = ibv_resize_cq(tmpl.cq, desc_n);
-	if (err) {
-		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
-		assert(err > 0);
-		return err;
-	}
-	mod = (struct ibv_exp_qp_attr){
-		/* Move the QP to this state. */
-		.qp_state = IBV_QPS_INIT,
-		/* Primary port number. */
-		.port_num = priv->port
+	 * Reinitialize WQ. */
+	mod = (struct ibv_exp_wq_attr){
+		.attr_mask = IBV_EXP_WQ_ATTR_STATE,
+		.wq_state = IBV_EXP_WQS_RESET,
 	};
-	err = ibv_exp_modify_qp(tmpl.qp, &mod,
-				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
-				 IBV_EXP_QP_PORT));
+	err = ibv_exp_modify_wq(tmpl.wq, &mod);
 	if (err) {
-		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
-		      (void *)dev, strerror(err));
+		ERROR("%p: cannot reset WQ: %s", (void *)dev, strerror(err));
 		assert(err > 0);
 		return err;
-	};
-	/* Reconfigure flows. Do not care for errors. */
-	if (!priv->rss) {
-		if (priv->started)
-			rxq_mac_addrs_add(&tmpl);
-		if (priv->started && priv->promisc_req)
-			rxq_promiscuous_enable(&tmpl);
-		if (priv->started && priv->allmulti_req)
-			rxq_allmulticast_enable(&tmpl);
-		/* Update original queue in case of failure. */
-		rxq->allmulti_flow = tmpl.allmulti_flow;
-		rxq->promisc_flow = tmpl.promisc_flow;
-		memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow));
 	}
 	/* Allocate pool. */
 	pool = rte_malloc(__func__, (mbuf_n * sizeof(*pool)), 0);
@@ -655,21 +704,27 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
 	rxq->elts_n = 0;
 	rte_free(rxq->elts.sp);
 	rxq->elts.sp = NULL;
+	/* Change queue state to ready. */
+	mod = (struct ibv_exp_wq_attr){
+		.attr_mask = IBV_EXP_WQ_ATTR_STATE,
+		.wq_state = IBV_EXP_WQS_RDY,
+	};
+	err = ibv_exp_modify_wq(tmpl.wq, &mod);
+	if (err) {
+		ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
+		      (void *)dev, strerror(err));
+		goto error;
+	}
 	/* Post SGEs. */
-	assert(tmpl.if_qp != NULL);
+	assert(tmpl.if_wq != NULL);
 	if (tmpl.sp) {
 		struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp;
 
 		for (i = 0; (i != RTE_DIM(*elts)); ++i) {
-#ifdef HAVE_EXP_QP_BURST_RECV_SG_LIST
-			err = tmpl.if_qp->recv_sg_list
-				(tmpl.qp,
+			err = tmpl.if_wq->recv_sg_list
+				(tmpl.wq,
 				 (*elts)[i].sges,
 				 RTE_DIM((*elts)[i].sges));
-#else /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
-			errno = ENOSYS;
-			err = -1;
-#endif /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
 			if (err)
 				break;
 		}
@@ -677,8 +732,8 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
 		struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp;
 
 		for (i = 0; (i != RTE_DIM(*elts)); ++i) {
-			err = tmpl.if_qp->recv_burst(
-				tmpl.qp,
+			err = tmpl.if_wq->recv_burst(
+				tmpl.wq,
 				&(*elts)[i].sge,
 				1);
 			if (err)
@@ -690,16 +745,9 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
 		      (void *)dev, err);
 		/* Set err because it does not contain a valid errno value. */
 		err = EIO;
-		goto skip_rtr;
+		goto error;
 	}
-	mod = (struct ibv_exp_qp_attr){
-		.qp_state = IBV_QPS_RTR
-	};
-	err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
-	if (err)
-		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-		      (void *)dev, strerror(err));
-skip_rtr:
+error:
 	*rxq = tmpl;
 	assert(err >= 0);
 	return err;
@@ -735,30 +783,20 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
 		.mp = mp,
 		.socket = socket
 	};
-	struct ibv_exp_qp_attr mod;
+	struct ibv_exp_wq_attr mod;
 	union {
 		struct ibv_exp_query_intf_params params;
 		struct ibv_exp_cq_init_attr cq;
 		struct ibv_exp_res_domain_init_attr rd;
+		struct ibv_exp_wq_init_attr wq;
 	} attr;
 	enum ibv_exp_query_intf_status status;
 	struct rte_mbuf *buf;
 	int ret = 0;
-	int parent = (rxq == &priv->rxq_parent);
 	unsigned int i;
+	unsigned int cq_size = desc;
 
 	(void)conf; /* Thresholds configuration (ignored). */
-	/*
-	 * If this is a parent queue, hardware must support RSS and
-	 * RSS must be enabled.
-	 */
-	assert((!parent) || ((priv->hw_rss) && (priv->rss)));
-	if (parent) {
-		/* Even if unused, ibv_create_cq() requires at least one
-		 * descriptor. */
-		desc = 1;
-		goto skip_mr;
-	}
 	if ((desc == 0) || (desc % MLX5_PMD_SGE_WR_N)) {
 		ERROR("%p: invalid number of RX descriptors (must be a"
 		      " multiple of %d)", (void *)dev, MLX5_PMD_SGE_WR_N);
@@ -801,7 +839,6 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
 		      (void *)dev, strerror(ret));
 		goto error;
 	}
-skip_mr:
 	attr.rd = (struct ibv_exp_res_domain_init_attr){
 		.comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
 			      IBV_EXP_RES_DOMAIN_MSG_MODEL),
@@ -819,7 +856,8 @@ skip_mr:
 		.comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
 		.res_domain = tmpl.rd,
 	};
-	tmpl.cq = ibv_exp_create_cq(priv->ctx, desc, NULL, NULL, 0, &attr.cq);
+	tmpl.cq = ibv_exp_create_cq(priv->ctx, cq_size, NULL, NULL, 0,
+				    &attr.cq);
 	if (tmpl.cq == NULL) {
 		ret = ENOMEM;
 		ERROR("%p: CQ creation failure: %s",
@@ -830,48 +868,30 @@ skip_mr:
 	      priv->device_attr.max_qp_wr);
 	DEBUG("priv->device_attr.max_sge is %d",
 	      priv->device_attr.max_sge);
-#ifdef RSS_SUPPORT
-	if (priv->rss)
-		tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent,
-					   tmpl.rd);
-	else
-#endif /* RSS_SUPPORT */
-		tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc, tmpl.rd);
-	if (tmpl.qp == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: QP creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	mod = (struct ibv_exp_qp_attr){
-		/* Move the QP to this state. */
-		.qp_state = IBV_QPS_INIT,
-		/* Primary port number. */
-		.port_num = priv->port
+	attr.wq = (struct ibv_exp_wq_init_attr){
+		.wq_context = NULL, /* Could be useful in the future. */
+		.wq_type = IBV_EXP_WQT_RQ,
+		/* Max number of outstanding WRs. */
+		.max_recv_wr = ((priv->device_attr.max_qp_wr < (int)cq_size) ?
+				priv->device_attr.max_qp_wr :
+				(int)cq_size),
+		/* Max number of scatter/gather elements in a WR. */
+		.max_recv_sge = ((priv->device_attr.max_sge <
+				  MLX5_PMD_SGE_WR_N) ?
+				 priv->device_attr.max_sge :
+				 MLX5_PMD_SGE_WR_N),
+		.pd = priv->pd,
+		.cq = tmpl.cq,
+		.comp_mask = IBV_EXP_CREATE_WQ_RES_DOMAIN,
+		.res_domain = tmpl.rd,
 	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod,
-				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
-				 IBV_EXP_QP_PORT));
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
+	tmpl.wq = ibv_exp_create_wq(priv->ctx, &attr.wq);
+	if (tmpl.wq == NULL) {
+		ret = (errno ? errno : EINVAL);
+		ERROR("%p: WQ creation failure: %s",
 		      (void *)dev, strerror(ret));
 		goto error;
 	}
-	if ((parent) || (!priv->rss))  {
-		/* Configure MAC and broadcast addresses. */
-		ret = rxq_mac_addrs_add(&tmpl);
-		if (ret) {
-			ERROR("%p: QP flow attachment failed: %s",
-			      (void *)dev, strerror(ret));
-			goto error;
-		}
-	}
-	/* Allocate descriptors for RX queues, except for the RSS parent. */
-	if (parent)
-		goto skip_alloc;
 	if (tmpl.sp)
 		ret = rxq_alloc_elts_sp(&tmpl, desc, NULL);
 	else
@@ -881,7 +901,6 @@ skip_mr:
 		      (void *)dev, strerror(ret));
 		goto error;
 	}
-skip_alloc:
 	/* Save port ID. */
 	tmpl.port_id = dev->data->port_id;
 	DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id);
@@ -898,38 +917,44 @@ skip_alloc:
 	}
 	attr.params = (struct ibv_exp_query_intf_params){
 		.intf_scope = IBV_EXP_INTF_GLOBAL,
-		.intf = IBV_EXP_INTF_QP_BURST,
-		.obj = tmpl.qp,
+		.intf = IBV_EXP_INTF_WQ,
+		.obj = tmpl.wq,
 	};
-	tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
-	if (tmpl.if_qp == NULL) {
-		ERROR("%p: QP interface family query failed with status %d",
+	tmpl.if_wq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
+	if (tmpl.if_wq == NULL) {
+		ERROR("%p: WQ interface family query failed with status %d",
 		      (void *)dev, status);
 		goto error;
 	}
+	/* Change queue state to ready. */
+	mod = (struct ibv_exp_wq_attr){
+		.attr_mask = IBV_EXP_WQ_ATTR_STATE,
+		.wq_state = IBV_EXP_WQS_RDY,
+	};
+	ret = ibv_exp_modify_wq(tmpl.wq, &mod);
+	if (ret) {
+		ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
+		      (void *)dev, strerror(ret));
+		goto error;
+	}
 	/* Post SGEs. */
-	if (!parent && tmpl.sp) {
+	if (tmpl.sp) {
 		struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp;
 
 		for (i = 0; (i != RTE_DIM(*elts)); ++i) {
-#ifdef HAVE_EXP_QP_BURST_RECV_SG_LIST
-			ret = tmpl.if_qp->recv_sg_list
-				(tmpl.qp,
+			ret = tmpl.if_wq->recv_sg_list
+				(tmpl.wq,
 				 (*elts)[i].sges,
 				 RTE_DIM((*elts)[i].sges));
-#else /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
-			errno = ENOSYS;
-			ret = -1;
-#endif /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
 			if (ret)
 				break;
 		}
-	} else if (!parent) {
+	} else {
 		struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp;
 
 		for (i = 0; (i != RTE_DIM(*elts)); ++i) {
-			ret = tmpl.if_qp->recv_burst(
-				tmpl.qp,
+			ret = tmpl.if_wq->recv_burst(
+				tmpl.wq,
 				&(*elts)[i].sge,
 				1);
 			if (ret)
@@ -943,15 +968,6 @@ skip_alloc:
 		ret = EIO;
 		goto error;
 	}
-	mod = (struct ibv_exp_qp_attr){
-		.qp_state = IBV_QPS_RTR
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
 	/* Clean up rxq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
 	rxq_cleanup(rxq);
@@ -1055,7 +1071,6 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 		return;
 	priv = rxq->priv;
 	priv_lock(priv);
-	assert(rxq != &priv->rxq_parent);
 	for (i = 0; (i != priv->rxqs_n); ++i)
 		if ((*priv->rxqs)[i] == rxq) {
 			DEBUG("%p: removing RX queue %p from list",
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index f48fec1..db2ac03 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -35,7 +35,6 @@
 #include <stdint.h>
 #include <string.h>
 #include <stdlib.h>
-#include <errno.h>
 
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -61,7 +60,6 @@
 #endif
 
 #include "mlx5.h"
-#include "mlx5_autoconf.h"
 #include "mlx5_utils.h"
 #include "mlx5_rxtx.h"
 #include "mlx5_defs.h"
@@ -755,14 +753,9 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		rxq->stats.ibytes += pkt_buf_len;
 #endif
 repost:
-#ifdef HAVE_EXP_QP_BURST_RECV_SG_LIST
-		ret = rxq->if_qp->recv_sg_list(rxq->qp,
+		ret = rxq->if_wq->recv_sg_list(rxq->wq,
 					       elt->sges,
 					       RTE_DIM(elt->sges));
-#else /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
-		errno = ENOSYS;
-		ret = -1;
-#endif /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
 		if (unlikely(ret)) {
 			/* Inability to repost WRs is fatal. */
 			DEBUG("%p: recv_sg_list(): failed (ret=%d)",
@@ -919,7 +912,7 @@ repost:
 #ifdef DEBUG_RECV
 	DEBUG("%p: reposting %u WRs", (void *)rxq, i);
 #endif
-	ret = rxq->if_qp->recv_burst(rxq->qp, sges, i);
+	ret = rxq->if_wq->recv_burst(rxq->wq, sges, i);
 	if (unlikely(ret)) {
 		/* Inability to repost WRs is fatal. */
 		DEBUG("%p: recv_burst(): failed (ret=%d)",
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 90c99dc..df1d52b 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -99,13 +99,9 @@ struct rxq {
 	struct rte_mempool *mp; /* Memory Pool for allocations. */
 	struct ibv_mr *mr; /* Memory Region (for mp). */
 	struct ibv_cq *cq; /* Completion Queue. */
-	struct ibv_qp *qp; /* Queue Pair. */
-	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
+	struct ibv_exp_wq *wq; /* Work Queue. */
+	struct ibv_exp_wq_family *if_wq; /* WQ burst interface. */
 	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
-	/* MAC flow steering rules, one per VLAN ID. */
-	struct ibv_flow *mac_flow[MLX5_MAX_MAC_ADDRESSES][MLX5_MAX_VLAN_IDS];
-	struct ibv_flow *promisc_flow; /* Promiscuous flow. */
-	struct ibv_flow *allmulti_flow; /* Multicast flow. */
 	unsigned int port_id; /* Port ID for incoming packets. */
 	unsigned int elts_n; /* (*elts)[] length. */
 	unsigned int elts_head; /* Current index in (*elts)[]. */
@@ -122,6 +118,15 @@ struct rxq {
 	struct ibv_exp_res_domain *rd; /* Resource Domain. */
 };
 
+struct hash_rxq {
+	struct priv *priv; /* Back pointer to private data. */
+	struct ibv_qp *qp; /* Hash RX QP. */
+	/* MAC flow steering rules, one per VLAN ID. */
+	struct ibv_flow *mac_flow[MLX5_MAX_MAC_ADDRESSES][MLX5_MAX_VLAN_IDS];
+	struct ibv_flow *promisc_flow; /* Promiscuous flow. */
+	struct ibv_flow *allmulti_flow; /* Multicast flow. */
+};
+
 /* TX element. */
 struct txq_elt {
 	struct rte_mbuf *buf;
@@ -166,6 +171,8 @@ struct txq {
 
 /* mlx5_rxq.c */
 
+int priv_create_hash_rxqs(struct priv *);
+void priv_destroy_hash_rxqs(struct priv *);
 void rxq_cleanup(struct rxq *);
 int rxq_rehash(struct rte_eth_dev *, struct rxq *);
 int rxq_setup(struct rte_eth_dev *, struct rxq *, uint16_t, unsigned int,
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index dced025..233c0d8 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -60,55 +60,35 @@ int
 mlx5_dev_start(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
-	unsigned int i = 0;
-	unsigned int r;
-	struct rxq *rxq;
+	int err;
 
 	priv_lock(priv);
 	if (priv->started) {
 		priv_unlock(priv);
 		return 0;
 	}
-	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
-	priv->started = 1;
-	if (priv->rss) {
-		rxq = &priv->rxq_parent;
-		r = 1;
-	} else {
-		rxq = (*priv->rxqs)[0];
-		r = priv->rxqs_n;
-	}
-	/* Iterate only once when RSS is enabled. */
-	do {
-		int ret;
-
-		/* Ignore nonexistent RX queues. */
-		if (rxq == NULL)
-			continue;
-		ret = rxq_mac_addrs_add(rxq);
-		if (!ret && priv->promisc_req)
-			ret = rxq_promiscuous_enable(rxq);
-		if (!ret && priv->allmulti_req)
-			ret = rxq_allmulticast_enable(rxq);
-		if (!ret)
-			continue;
-		WARN("%p: QP flow attachment failed: %s",
-		     (void *)dev, strerror(ret));
+	DEBUG("%p: allocating and configuring hash RX queues", (void *)dev);
+	err = priv_create_hash_rxqs(priv);
+	if (!err)
+		err = priv_mac_addrs_enable(priv);
+	if (!err && priv->promisc_req)
+		err = priv_promiscuous_enable(priv);
+	if (!err && priv->allmulti_req)
+		err = priv_allmulticast_enable(priv);
+	if (!err)
+		priv->started = 1;
+	else {
+		ERROR("%p: an error occurred while configuring hash RX queues:"
+		      " %s",
+		      (void *)priv, strerror(err));
 		/* Rollback. */
-		while (i != 0) {
-			rxq = (*priv->rxqs)[--i];
-			if (rxq != NULL) {
-				rxq_allmulticast_disable(rxq);
-				rxq_promiscuous_disable(rxq);
-				rxq_mac_addrs_del(rxq);
-			}
-		}
-		priv->started = 0;
-		priv_unlock(priv);
-		return -ret;
-	} while ((--r) && ((rxq = (*priv->rxqs)[++i]), i));
+		priv_allmulticast_disable(priv);
+		priv_promiscuous_disable(priv);
+		priv_mac_addrs_disable(priv);
+		priv_destroy_hash_rxqs(priv);
+	}
 	priv_unlock(priv);
-	return 0;
+	return -err;
 }
 
 /**
@@ -123,32 +103,17 @@ void
 mlx5_dev_stop(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
-	unsigned int i = 0;
-	unsigned int r;
-	struct rxq *rxq;
 
 	priv_lock(priv);
 	if (!priv->started) {
 		priv_unlock(priv);
 		return;
 	}
-	DEBUG("%p: detaching flows from all RX queues", (void *)dev);
+	DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev);
+	priv_allmulticast_disable(priv);
+	priv_promiscuous_disable(priv);
+	priv_mac_addrs_disable(priv);
+	priv_destroy_hash_rxqs(priv);
 	priv->started = 0;
-	if (priv->rss) {
-		rxq = &priv->rxq_parent;
-		r = 1;
-	} else {
-		rxq = (*priv->rxqs)[0];
-		r = priv->rxqs_n;
-	}
-	/* Iterate only once when RSS is enabled. */
-	do {
-		/* Ignore nonexistent RX queues. */
-		if (rxq == NULL)
-			continue;
-		rxq_allmulticast_disable(rxq);
-		rxq_promiscuous_disable(rxq);
-		rxq_mac_addrs_del(rxq);
-	} while ((--r) && ((rxq = (*priv->rxqs)[++i]), i));
 	priv_unlock(priv);
 }
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index ca80571..3a07ad1 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -67,8 +67,6 @@ vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 {
 	struct priv *priv = dev->data->dev_private;
 	unsigned int i;
-	unsigned int r;
-	struct rxq *rxq;
 
 	DEBUG("%p: %s VLAN filter ID %" PRIu16,
 	      (void *)dev, (on ? "enable" : "disable"), vlan_id);
@@ -99,34 +97,9 @@ vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 		priv->vlan_filter[priv->vlan_filter_n] = vlan_id;
 		++priv->vlan_filter_n;
 	}
-	if (!priv->started)
-		return 0;
-	/* Rehash MAC flows in all RX queues. */
-	if (priv->rss) {
-		rxq = &priv->rxq_parent;
-		r = 1;
-	} else {
-		rxq = (*priv->rxqs)[0];
-		r = priv->rxqs_n;
-	}
-	for (i = 0; (i < r); rxq = (*priv->rxqs)[++i]) {
-		int ret;
-
-		if (rxq == NULL)
-			continue;
-		rxq_mac_addrs_del(rxq);
-		ret = rxq_mac_addrs_add(rxq);
-		if (!ret)
-			continue;
-		/* Rollback. */
-		while (i != 0) {
-			rxq = (*priv->rxqs)[--i];
-			if (rxq != NULL)
-				rxq_mac_addrs_del(rxq);
-		}
-		return ret;
-	}
-	return 0;
+	/* Rehash MAC flows in all hash RX queues. */
+	priv_mac_addrs_disable(priv);
+	return priv_mac_addrs_enable(priv);
 }
 
 /**
-- 
2.1.0



More information about the dev mailing list