[dpdk-dev] [PATCH v1] net/mlx: control netdevices through ioctl only

Adrien Mazarguil adrien.mazarguil at 6wind.com
Thu Feb 8 17:37:06 CET 2018


Several control operations implemented by these PMDs affect netdevices
through sysfs, itself subject to file system permission checks enforced by
the kernel, which limits their use for most purposes to applications
running with root privileges.

Since performing the same operations through ioctl() requires fewer
capabilities (only CAP_NET_ADMIN) and given the remaining operations are
already implemented this way, this patch standardizes on ioctl() and gets
rid of redundant code.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
---
 drivers/net/mlx4/mlx4_ethdev.c | 192 ++-------------------------
 drivers/net/mlx5/mlx5.h        |   2 -
 drivers/net/mlx5/mlx5_ethdev.c | 255 ++++--------------------------------
 drivers/net/mlx5/mlx5_stats.c  |  28 +++-
 4 files changed, 63 insertions(+), 414 deletions(-)

diff --git a/drivers/net/mlx4/mlx4_ethdev.c b/drivers/net/mlx4/mlx4_ethdev.c
index 3bc692731..fbeef16c8 100644
--- a/drivers/net/mlx4/mlx4_ethdev.c
+++ b/drivers/net/mlx4/mlx4_ethdev.c
@@ -132,167 +132,6 @@ mlx4_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
 }
 
 /**
- * Read from sysfs entry.
- *
- * @param[in] priv
- *   Pointer to private structure.
- * @param[in] entry
- *   Entry name relative to sysfs path.
- * @param[out] buf
- *   Data output buffer.
- * @param size
- *   Buffer size.
- *
- * @return
- *   Number of bytes read on success, negative errno value otherwise and
- *   rte_errno is set.
- */
-static int
-mlx4_sysfs_read(const struct priv *priv, const char *entry,
-		char *buf, size_t size)
-{
-	char ifname[IF_NAMESIZE];
-	FILE *file;
-	int ret;
-
-	ret = mlx4_get_ifname(priv, &ifname);
-	if (ret)
-		return ret;
-
-	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
-	      ifname, entry);
-
-	file = fopen(path, "rb");
-	if (file == NULL) {
-		rte_errno = errno;
-		return -rte_errno;
-	}
-	ret = fread(buf, 1, size, file);
-	if ((size_t)ret < size && ferror(file)) {
-		rte_errno = EIO;
-		ret = -rte_errno;
-	} else {
-		ret = size;
-	}
-	fclose(file);
-	return ret;
-}
-
-/**
- * Write to sysfs entry.
- *
- * @param[in] priv
- *   Pointer to private structure.
- * @param[in] entry
- *   Entry name relative to sysfs path.
- * @param[in] buf
- *   Data buffer.
- * @param size
- *   Buffer size.
- *
- * @return
- *   Number of bytes written on success, negative errno value otherwise and
- *   rte_errno is set.
- */
-static int
-mlx4_sysfs_write(const struct priv *priv, const char *entry,
-		 char *buf, size_t size)
-{
-	char ifname[IF_NAMESIZE];
-	FILE *file;
-	int ret;
-
-	ret = mlx4_get_ifname(priv, &ifname);
-	if (ret)
-		return ret;
-
-	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
-	      ifname, entry);
-
-	file = fopen(path, "wb");
-	if (file == NULL) {
-		rte_errno = errno;
-		return -rte_errno;
-	}
-	ret = fwrite(buf, 1, size, file);
-	if ((size_t)ret < size || ferror(file)) {
-		rte_errno = EIO;
-		ret = -rte_errno;
-	} else {
-		ret = size;
-	}
-	fclose(file);
-	return ret;
-}
-
-/**
- * Get unsigned long sysfs property.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[in] name
- *   Entry name relative to sysfs path.
- * @param[out] value
- *   Value output buffer.
- *
- * @return
- *   0 on success, negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value)
-{
-	int ret;
-	unsigned long value_ret;
-	char value_str[32];
-
-	ret = mlx4_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
-	if (ret < 0) {
-		DEBUG("cannot read %s value from sysfs: %s",
-		      name, strerror(rte_errno));
-		return ret;
-	}
-	value_str[ret] = '\0';
-	errno = 0;
-	value_ret = strtoul(value_str, NULL, 0);
-	if (errno) {
-		rte_errno = errno;
-		DEBUG("invalid %s value `%s': %s", name, value_str,
-		      strerror(rte_errno));
-		return -rte_errno;
-	}
-	*value = value_ret;
-	return 0;
-}
-
-/**
- * Set unsigned long sysfs property.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[in] name
- *   Entry name relative to sysfs path.
- * @param value
- *   Value to set.
- *
- * @return
- *   0 on success, negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value)
-{
-	int ret;
-	MKSTR(value_str, "%lu", value);
-
-	ret = mlx4_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
-	if (ret < 0) {
-		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
-		      name, value_str, value, strerror(rte_errno));
-		return ret;
-	}
-	return 0;
-}
-
-/**
  * Perform ifreq ioctl() on associated Ethernet device.
  *
  * @param[in] priv
@@ -361,12 +200,12 @@ mlx4_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN])
 int
 mlx4_mtu_get(struct priv *priv, uint16_t *mtu)
 {
-	unsigned long ulong_mtu = 0;
-	int ret = mlx4_get_sysfs_ulong(priv, "mtu", &ulong_mtu);
+	struct ifreq request;
+	int ret = mlx4_ifreq(priv, SIOCGIFMTU, &request);
 
 	if (ret)
 		return ret;
-	*mtu = ulong_mtu;
+	*mtu = request.ifr_mtu;
 	return 0;
 }
 
@@ -385,20 +224,13 @@ int
 mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 {
 	struct priv *priv = dev->data->dev_private;
-	uint16_t new_mtu;
-	int ret = mlx4_set_sysfs_ulong(priv, "mtu", mtu);
+	struct ifreq request = { .ifr_mtu = mtu, };
+	int ret = mlx4_ifreq(priv, SIOCSIFMTU, &request);
 
 	if (ret)
 		return ret;
-	ret = mlx4_mtu_get(priv, &new_mtu);
-	if (ret)
-		return ret;
-	if (new_mtu == mtu) {
-		priv->mtu = mtu;
-		return 0;
-	}
-	rte_errno = EINVAL;
-	return -rte_errno;
+	priv->mtu = mtu;
+	return 0;
 }
 
 /**
@@ -417,14 +249,14 @@ mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 static int
 mlx4_set_flags(struct priv *priv, unsigned int keep, unsigned int flags)
 {
-	unsigned long tmp = 0;
-	int ret = mlx4_get_sysfs_ulong(priv, "flags", &tmp);
+	struct ifreq request;
+	int ret = mlx4_ifreq(priv, SIOCGIFFLAGS, &request);
 
 	if (ret)
 		return ret;
-	tmp &= keep;
-	tmp |= (flags & (~keep));
-	return mlx4_set_sysfs_ulong(priv, "flags", tmp);
+	request.ifr_flags &= keep;
+	request.ifr_flags |= flags & ~keep;
+	return mlx4_ifreq(priv, SIOCSIFFLAGS, &request);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 965c19f21..da44faaf4 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -209,8 +209,6 @@ struct priv *mlx5_get_priv(struct rte_eth_dev *dev);
 int mlx5_is_secondary(void);
 int priv_get_ifname(const struct priv *, char (*)[IF_NAMESIZE]);
 int priv_ifreq(const struct priv *, int req, struct ifreq *);
-int priv_is_ib_cntr(const char *);
-int priv_get_cntr_sysfs(struct priv *, const char *, uint64_t *);
 int priv_get_num_vfs(struct priv *, uint16_t *);
 int priv_get_mtu(struct priv *, uint16_t *);
 int priv_set_flags(struct priv *, unsigned int, unsigned int);
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 666507691..b73cb53df 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -7,6 +7,7 @@
 
 #include <stddef.h>
 #include <assert.h>
+#include <inttypes.h>
 #include <unistd.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -173,181 +174,6 @@ priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
 }
 
 /**
- * Check if the counter is located on ib counters file.
- *
- * @param[in] cntr
- *   Counter name.
- *
- * @return
- *   1 if counter is located on ib counters file , 0 otherwise.
- */
-int
-priv_is_ib_cntr(const char *cntr)
-{
-	if (!strcmp(cntr, "out_of_buffer"))
-		return 1;
-	return 0;
-}
-
-/**
- * Read from sysfs entry.
- *
- * @param[in] priv
- *   Pointer to private structure.
- * @param[in] entry
- *   Entry name relative to sysfs path.
- * @param[out] buf
- *   Data output buffer.
- * @param size
- *   Buffer size.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_sysfs_read(const struct priv *priv, const char *entry,
-		char *buf, size_t size)
-{
-	char ifname[IF_NAMESIZE];
-	FILE *file;
-	int ret;
-	int err;
-
-	if (priv_get_ifname(priv, &ifname))
-		return -1;
-
-	if (priv_is_ib_cntr(entry)) {
-		MKSTR(path, "%s/ports/1/hw_counters/%s",
-		      priv->ibdev_path, entry);
-		file = fopen(path, "rb");
-	} else {
-		MKSTR(path, "%s/device/net/%s/%s",
-		      priv->ibdev_path, ifname, entry);
-		file = fopen(path, "rb");
-	}
-	if (file == NULL)
-		return -1;
-	ret = fread(buf, 1, size, file);
-	err = errno;
-	if (((size_t)ret < size) && (ferror(file)))
-		ret = -1;
-	else
-		ret = size;
-	fclose(file);
-	errno = err;
-	return ret;
-}
-
-/**
- * Write to sysfs entry.
- *
- * @param[in] priv
- *   Pointer to private structure.
- * @param[in] entry
- *   Entry name relative to sysfs path.
- * @param[in] buf
- *   Data buffer.
- * @param size
- *   Buffer size.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_sysfs_write(const struct priv *priv, const char *entry,
-		 char *buf, size_t size)
-{
-	char ifname[IF_NAMESIZE];
-	FILE *file;
-	int ret;
-	int err;
-
-	if (priv_get_ifname(priv, &ifname))
-		return -1;
-
-	MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname, entry);
-
-	file = fopen(path, "wb");
-	if (file == NULL)
-		return -1;
-	ret = fwrite(buf, 1, size, file);
-	err = errno;
-	if (((size_t)ret < size) || (ferror(file)))
-		ret = -1;
-	else
-		ret = size;
-	fclose(file);
-	errno = err;
-	return ret;
-}
-
-/**
- * Get unsigned long sysfs property.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[in] name
- *   Entry name relative to sysfs path.
- * @param[out] value
- *   Value output buffer.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value)
-{
-	int ret;
-	unsigned long value_ret;
-	char value_str[32];
-
-	ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
-	if (ret == -1) {
-		DEBUG("cannot read %s value from sysfs: %s",
-		      name, strerror(errno));
-		return -1;
-	}
-	value_str[ret] = '\0';
-	errno = 0;
-	value_ret = strtoul(value_str, NULL, 0);
-	if (errno) {
-		DEBUG("invalid %s value `%s': %s", name, value_str,
-		      strerror(errno));
-		return -1;
-	}
-	*value = value_ret;
-	return 0;
-}
-
-/**
- * Set unsigned long sysfs property.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[in] name
- *   Entry name relative to sysfs path.
- * @param value
- *   Value to set.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value)
-{
-	int ret;
-	MKSTR(value_str, "%lu", value);
-
-	ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
-	if (ret == -1) {
-		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
-		      name, value_str, value, strerror(errno));
-		return -1;
-	}
-	return 0;
-}
-
-/**
  * Perform ifreq ioctl() on associated Ethernet device.
  *
  * @param[in] priv
@@ -390,20 +216,25 @@ priv_get_num_vfs(struct priv *priv, uint16_t *num_vfs)
 {
 	/* The sysfs entry name depends on the operating system. */
 	const char **name = (const char *[]){
-		"device/sriov_numvfs",
-		"device/mlx5_num_vfs",
+		"sriov_numvfs",
+		"mlx5_num_vfs",
 		NULL,
 	};
-	int ret;
 
 	do {
-		unsigned long ulong_num_vfs;
+		int n;
+		FILE *file;
+		MKSTR(path, "%s/device/%s", priv->ibdev_path, *name);
 
-		ret = priv_get_sysfs_ulong(priv, *name, &ulong_num_vfs);
-		if (!ret)
-			*num_vfs = ulong_num_vfs;
-	} while (*(++name) && ret);
-	return ret;
+		file = fopen(path, "rb");
+		if (!file)
+			continue;
+		n = fscanf(file, "%" SCNu16, num_vfs);
+		fclose(file);
+		if (n == 1)
+			return 0;
+	} while (*(++name));
+	return -1;
 }
 
 /**
@@ -420,35 +251,12 @@ priv_get_num_vfs(struct priv *priv, uint16_t *num_vfs)
 int
 priv_get_mtu(struct priv *priv, uint16_t *mtu)
 {
-	unsigned long ulong_mtu;
+	struct ifreq request;
+	int ret = priv_ifreq(priv, SIOCGIFMTU, &request);
 
-	if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1)
-		return -1;
-	*mtu = ulong_mtu;
-	return 0;
-}
-
-/**
- * Read device counter from sysfs.
- *
- * @param priv
- *   Pointer to private structure.
- * @param name
- *   Counter name.
- * @param[out] cntr
- *   Counter output buffer.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-int
-priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr)
-{
-	unsigned long ulong_ctr;
-
-	if (priv_get_sysfs_ulong(priv, name, &ulong_ctr) == -1)
-		return -1;
-	*cntr = ulong_ctr;
+	if (ret)
+		return ret;
+	*mtu = request.ifr_mtu;
 	return 0;
 }
 
@@ -466,15 +274,9 @@ priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr)
 static int
 priv_set_mtu(struct priv *priv, uint16_t mtu)
 {
-	uint16_t new_mtu;
+	struct ifreq request = { .ifr_mtu = mtu, };
 
-	if (priv_set_sysfs_ulong(priv, "mtu", mtu) ||
-	    priv_get_mtu(priv, &new_mtu))
-		return -1;
-	if (new_mtu == mtu)
-		return 0;
-	errno = EINVAL;
-	return -1;
+	return priv_ifreq(priv, SIOCSIFMTU, &request);
 }
 
 /**
@@ -493,13 +295,14 @@ priv_set_mtu(struct priv *priv, uint16_t mtu)
 int
 priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags)
 {
-	unsigned long tmp;
+	struct ifreq request;
+	int ret = priv_ifreq(priv, SIOCGIFFLAGS, &request);
 
-	if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1)
-		return -1;
-	tmp &= keep;
-	tmp |= (flags & (~keep));
-	return priv_set_sysfs_ulong(priv, "flags", tmp);
+	if (ret)
+		return ret;
+	request.ifr_flags &= keep;
+	request.ifr_flags |= flags & ~keep;
+	return priv_ifreq(priv, SIOCSIFFLAGS, &request);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 378472a70..eb9c65dcc 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -3,8 +3,11 @@
  * Copyright 2015 Mellanox.
  */
 
+#include <inttypes.h>
 #include <linux/sockios.h>
 #include <linux/ethtool.h>
+#include <stdint.h>
+#include <stdio.h>
 
 #include <rte_ethdev_driver.h>
 #include <rte_common.h>
@@ -19,6 +22,7 @@ struct mlx5_counter_ctrl {
 	char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
 	/* Name of the counter on the device table. */
 	char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
+	uint32_t ib:1; /**< Nonzero for IB counters. */
 };
 
 static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
@@ -93,6 +97,7 @@ static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
 	{
 		.dpdk_name = "rx_out_of_buffer",
 		.ctr_name = "out_of_buffer",
+		.ib = 1,
 	},
 	{
 		.dpdk_name = "tx_packets_phy",
@@ -143,13 +148,24 @@ priv_read_dev_counters(struct priv *priv, uint64_t *stats)
 		return -1;
 	}
 	for (i = 0; i != xstats_n; ++i) {
-		if (priv_is_ib_cntr(mlx5_counters_init[i].ctr_name))
-			priv_get_cntr_sysfs(priv,
-					    mlx5_counters_init[i].ctr_name,
-					    &stats[i]);
-		else
+		if (mlx5_counters_init[i].ib) {
+			FILE *file;
+			MKSTR(path, "%s/ports/1/hw_counters/%s",
+			      priv->ibdev_path,
+			      mlx5_counters_init[i].ctr_name);
+
+			file = fopen(path, "rb");
+			if (file) {
+				int n = fscanf(file, "%" SCNu64, &stats[i]);
+
+				fclose(file);
+				if (n != 1)
+					stats[i] = 0;
+			}
+		} else {
 			stats[i] = (uint64_t)
 				et_stats->data[xstats_ctrl->dev_table_idx[i]];
+		}
 	}
 	return 0;
 }
@@ -232,7 +248,7 @@ priv_xstats_init(struct priv *priv)
 		}
 	}
 	for (j = 0; j != xstats_n; ++j) {
-		if (priv_is_ib_cntr(mlx5_counters_init[j].ctr_name))
+		if (mlx5_counters_init[j].ib)
 			continue;
 		if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) {
 			WARN("counter \"%s\" is not recognized",
-- 
2.11.0


More information about the dev mailing list