[dpdk-dev] net/mlx5: no TX in multi-process setup (testpmd working)

Rajesh Kumar rajeshthepro at gmail.com
Tue Apr 20 13:16:44 CEST 2021


I backported the fix from suanmingm at nvidia.com(related to secondary
process) and it worked !!!

>From 1676903aea413fe8be4138b285633e01332efa17 Mon Sep 17 00:00:00 2001
From: RajeshKumar Kalidass <rajesh.kalidass at gigamon.com>
Date: Tue, 20 Apr 2021 02:56:32 -0700
Subject: [PATCH] VM-16160 mlx5: secondary not able to transmit out pkt

Change-Id: I647ba4f4d2534c2c97b5e23ce8a11a20eac207a3
---

diff --git a/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5.c
b/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5.c
index baffa75..0bfaddb 100644
--- a/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5.c
+++ b/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5.c
@@ -1183,7 +1183,7 @@
  rte_errno = ENOMEM;
  return -rte_errno;
  }
- ppriv->uar_table_sz = ppriv_size;
+ ppriv->uar_table_sz = priv->txqs_n;
  dev->process_private = ppriv;
  return 0;
 }
@@ -1194,7 +1194,7 @@
  * @param dev
  *   Pointer to Ethernet device structure.
  */
-static void
+void
 mlx5_proc_priv_uninit(struct rte_eth_dev *dev)
 {
  if (!dev->process_private)
@@ -2036,24 +2036,6 @@
  }
  return 0;
 }
-int
-mlx5_uar_table_init (struct rte_eth_dev *eth_dev)
-{
-    int err = 0;
-    int fd;
-    /* Receive command fd from primary process. */
-    fd = mlx5_mp_req_verbs_cmd_fd(eth_dev);
-    if (fd < 0) {
-        return (rte_errno);
-    }
-    /* Remap UAR for Tx queues. */
-    err = mlx5_tx_uar_init_secondary(eth_dev, fd);
-    if (err) {
-        err = rte_errno;
-    }
-
-    return err;
-}
 /**
  * Spawn an Ethernet device from Verbs information.
  *
diff --git a/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5.h
b/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5.h
index 0c3a90e..5230ad6 100644
--- a/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5.h
+++ b/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5.h
@@ -793,6 +793,7 @@

 int mlx5_getenv_int(const char *);
 int mlx5_proc_priv_init(struct rte_eth_dev *dev);
+void mlx5_proc_priv_uninit(struct rte_eth_dev *dev);
 int64_t mlx5_get_dbr(struct rte_eth_dev *dev,
      struct mlx5_devx_dbr_page **dbr_page);
 int32_t mlx5_release_dbr(struct rte_eth_dev *dev, uint32_t umem_id,
diff --git a/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5_mp.c
b/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5_mp.c
index b65b019..3e8a030 100644
--- a/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5_mp.c
+++ b/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5_mp.c
@@ -119,6 +119,8 @@
  const struct mlx5_mp_param *param =
  (const struct mlx5_mp_param *)mp_msg->param;
  struct rte_eth_dev *dev;
+ struct mlx5_proc_priv *ppriv;
+ struct mlx5_priv *priv;
  int ret;

  assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
@@ -128,12 +130,28 @@
  return -rte_errno;
  }
  dev = &rte_eth_devices[param->port_id];
+ priv = dev->data->dev_private;
  switch (param->type) {
  case MLX5_MP_REQ_START_RXTX:
  DRV_LOG(INFO, "port %u starting datapath", dev->data->port_id);
  rte_mb();
  dev->rx_pkt_burst = mlx5_select_rx_function(dev);
  dev->tx_pkt_burst = mlx5_select_tx_function(dev);
+ ppriv = (struct mlx5_proc_priv *)dev->process_private;
+ /* If Tx queue number changes, re-initialize UAR. */
+ if (ppriv->uar_table_sz != priv->txqs_n) {
+ mlx5_tx_uar_uninit_secondary(dev);
+ mlx5_proc_priv_uninit(dev);
+ ret = mlx5_proc_priv_init(dev);
+ if (ret)
+ return -rte_errno;
+ ret = mlx5_tx_uar_init_secondary(dev, mp_msg->fds[0]);
+ if (ret) {
+ mlx5_proc_priv_uninit(dev);
+ return -rte_errno;
+ }
+ }
+
  mp_init_msg(dev, &mp_res, param->type);
  res->result = 0;
  ret = rte_mp_reply(&mp_res, peer);
@@ -175,6 +193,7 @@
  struct timespec ts = {.tv_sec = MLX5_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};
  int ret;
  int i;
+ struct mlx5_priv *priv;

  assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
  if (!mlx5_shared_data->secondary_cnt)
@@ -184,7 +203,12 @@
  dev->data->port_id, type);
  return;
  }
+ priv = dev->data->dev_private;
  mp_init_msg(dev, &mp_req, type);
+ if (type == MLX5_MP_REQ_START_RXTX) {
+ mp_req.num_fds = 1;
+ mp_req.fds[0] = ((struct ibv_context *)priv->sh->ctx)->cmd_fd;
+ }
  ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
  if (ret) {
  if (rte_errno != ENOTSUP)
diff --git a/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5_rxtx.h
b/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5_rxtx.h
index e927343..d8b3220 100644
--- a/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5_rxtx.h
+++ b/dpdk/dpdk-19.11/drivers/net/mlx5/mlx5_rxtx.h
@@ -424,6 +424,7 @@
  const struct rte_eth_hairpin_conf *hairpin_conf);
 void mlx5_tx_queue_release(void *dpdk_txq);
 int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
+void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev);
 struct mlx5_txq_obj *mlx5_txq_obj_new(struct rte_eth_dev *dev, uint16_t
idx,
       enum mlx5_txq_obj_type type);
 struct mlx5_txq_obj *mlx5_txq_obj_get(struct rte_eth_dev *dev, uint16_t
idx);


On Mon, Apr 19, 2021 at 7:05 PM Rajesh Kumar <rajeshthepro at gmail.com> wrote:

> Hi,
>
>
>
> DPDK: 19.11
>
> OS: Ubuntu 18.04 (Kernel: 5.4.0-1043-azure)
>
> Iface: Mellanox Technologies MT27710 Family [ConnectX-4 Lx Virtual
> Function]
>
>
>
> We are bringing-up our dpdk based app on azure cloud, its multi-process
> setup (primary does dev_configure & dev_start ) – however no packet are
> getting transmitted out (Tx-packet increases upto number of descriptor and
> then all further packets are txDropped)
>
>
>
> "stats": [
>
>     {
>
>       "name": "rep1",
>
>       "txPkts": 1024,           <<<<<<----------------------- it
> increases upto number of tx-descriptors
>
>       "rxPkts": 5408,
>
>       "txBytes": 65536,
>
>       "rxBytes": 346112,
>
>       "txDropped": 4384,  <<<<<<--------------------- All further packets
> are txDropped
>
>       "rxDropped": 96,
>
>       "txErrors": 0,
>
>       "rxErrors": 0
>
>     }
>
>   ]
>
>
>
> However mlx4 driver is working perfectly fine on multi-process setup.
> Also testpmd working fine with mlx5. I guess problem is when we try to run
> in multi-process setup ?
>
>
>
>
>
> Thanks,
>
> *-Rajesh*
>


More information about the dev mailing list