[dpdk-dev] [PATCH v2 1/2] net/mlx5: refactor TC-flow infrastructure

Shahaf Shuler shahafs at mellanox.com
Sun Oct 14 13:07:11 CEST 2018


Hi Moty,

Thursday, October 11, 2018 4:05 PM, Mordechay Haimovsky:
> Subject: [PATCH v2 1/2] net/mlx5: refactor TC-flow infrastructure
> 
> This commit refactors tc_flow as a preparation to coming commits that sends
> different type of messages and expect differ type of replies while still using
> the same underlying routines.
> 
> Signed-off-by: Moti Haimovsky <motih at mellanox.com>
> ---
>  drivers/net/mlx5/mlx5.c          |  18 +++----
>  drivers/net/mlx5/mlx5.h          |   4 +-
>  drivers/net/mlx5/mlx5_flow.h     |   8 +--
>  drivers/net/mlx5/mlx5_flow_tcf.c | 113
> ++++++++++++++++++++++++++++++---------
>  4 files changed, 102 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> 7425e2f..20adf88 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -286,8 +286,8 @@
>  		close(priv->nl_socket_route);
>  	if (priv->nl_socket_rdma >= 0)
>  		close(priv->nl_socket_rdma);
> -	if (priv->mnl_socket)
> -		mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
> +	if (priv->tcf_context)
> +		mlx5_flow_tcf_context_destroy(priv->tcf_context);
>  	ret = mlx5_hrxq_ibv_verify(dev);
>  	if (ret)
>  		DRV_LOG(WARNING, "port %u some hash Rx queue still
> remain", @@ -1137,8 +1137,8 @@
>  	claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
>  	if (vf && config.vf_nl_en)
>  		mlx5_nl_mac_addr_sync(eth_dev);
> -	priv->mnl_socket = mlx5_flow_tcf_socket_create();
> -	if (!priv->mnl_socket) {
> +	priv->tcf_context = mlx5_flow_tcf_context_create();
> +	if (!priv->tcf_context) {
>  		err = -rte_errno;
>  		DRV_LOG(WARNING,
>  			"flow rules relying on switch offloads will not be"
> @@ -1153,7 +1153,7 @@
>  			error.message =
>  				"cannot retrieve network interface index";
>  		} else {
> -			err = mlx5_flow_tcf_init(priv->mnl_socket, ifindex,
> +			err = mlx5_flow_tcf_init(priv->tcf_context, ifindex,
>  						&error);
>  		}
>  		if (err) {
> @@ -1161,8 +1161,8 @@
>  				"flow rules relying on switch offloads will"
>  				" not be supported: %s: %s",
>  				error.message, strerror(rte_errno));
> -			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
> -			priv->mnl_socket = NULL;
> +			mlx5_flow_tcf_context_destroy(priv->tcf_context);
> +			priv->tcf_context = NULL;
>  		}
>  	}
>  	TAILQ_INIT(&priv->flows);
> @@ -1217,8 +1217,8 @@
>  			close(priv->nl_socket_route);
>  		if (priv->nl_socket_rdma >= 0)
>  			close(priv->nl_socket_rdma);
> -		if (priv->mnl_socket)
> -			mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
> +		if (priv->tcf_context)
> +			mlx5_flow_tcf_context_destroy(priv->tcf_context);
>  		if (own_domain_id)
>  			claim_zero(rte_eth_switch_domain_free(priv-
> >domain_id));
>  		rte_free(priv);
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 2dec88a..d14239c 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -169,7 +169,7 @@ struct mlx5_drop {
>  	struct mlx5_rxq_ibv *rxq; /* Verbs Rx queue. */  };
> 
> -struct mnl_socket;
> +struct mlx5_flow_tcf_context;
> 
>  struct priv {
>  	LIST_ENTRY(priv) mem_event_cb; /* Called by memory event
> callback. */ @@ -236,7 +236,7 @@ struct priv {
>  	rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
>  	/* UAR same-page access control required in 32bit implementations.
> */  #endif
> -	struct mnl_socket *mnl_socket; /* Libmnl socket. */
> +	struct mlx5_flow_tcf_context *tcf_context; /* TC flower context. */
>  };
> 
>  #define PORT_ID(priv) ((priv)->dev_data->port_id) diff --git
> a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index
> fee05f0..41d55e8 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -338,9 +338,9 @@ int mlx5_flow_validate_item_vxlan_gpe(const struct
> rte_flow_item *item,
> 
>  /* mlx5_flow_tcf.c */
> 
> -int mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
> -		       struct rte_flow_error *error);
> -struct mnl_socket *mlx5_flow_tcf_socket_create(void);
> -void mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl);
> +int mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
> +		       unsigned int ifindex, struct rte_flow_error *error); struct
> +mlx5_flow_tcf_context *mlx5_flow_tcf_context_create(void);
> +void mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx);
> 
>  #endif /* RTE_PMD_MLX5_FLOW_H_ */
> diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c
> b/drivers/net/mlx5/mlx5_flow_tcf.c
> index 91f6ef6..8535a15 100644
> --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> @@ -153,6 +153,19 @@ struct tc_vlan {
>  #define IPV6_ADDR_LEN 16
>  #endif
> 
> +/**
> + * Structure for holding netlink context.
> + * Note the size of the message buffer which is
> MNL_SOCKET_BUFFER_SIZE.
> + * Using this (8KB) buffer size ensures that netlink messages will
> +never be
> + * truncated.
> + */
> +struct mlx5_flow_tcf_context {
> +	struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
> +	uint32_t seq; /* Message sequence number. */

I don't think the seq is needed here. See comments below. 

> +	uint32_t buf_size; /* Message buffer size. */

If the buffer is always MNL_SOCKET_BUFFER_SIZE why not statically allocate it, like 
uint8_t buf[MNL_SOCKET_BUFFER_SIZE];

however I don't think you need it. See comments on of the next patch in the series.

> +	uint8_t *buf; /* Message buffer. */
> +};

so in fact maybe this struct is not needed at all. 

> +
>  /** Empty masks for known item types. */  static const union {
>  	struct rte_flow_item_port_id port_id;
> @@ -1429,8 +1442,8 @@ struct flow_tcf_ptoi {
>  /**
>   * Send Netlink message with acknowledgment.
>   *
> - * @param nl
> - *   Libmnl socket to use.
> + * @param ctx
> + *   Flow context to use.
>   * @param nlh
>   *   Message to send. This function always raises the NLM_F_ACK flag before
>   *   sending.
> @@ -1439,12 +1452,13 @@ struct flow_tcf_ptoi {
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  static int
> -flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
> +flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr
> +*nlh)
>  {
>  	alignas(struct nlmsghdr)
>  	uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
>  		    nlh->nlmsg_len - sizeof(*nlh)];
> -	uint32_t seq = random();
> +	uint32_t seq = ctx->seq++;

Why changing it to serially increment? 
Netlink messages requires requests to have a unique seq id. It is more likely to hit conflict on the serial increment in case both port received a similar or close number.
Making each request with random value has less chances for such conflict.  


> +	struct mnl_socket *nl = ctx->nl;
>  	int ret;
> 
>  	nlh->nlmsg_flags |= NLM_F_ACK;
> @@ -1479,7 +1493,7 @@ struct flow_tcf_ptoi {
>  	       struct rte_flow_error *error)
>  {
>  	struct priv *priv = dev->data->dev_private;
> -	struct mnl_socket *nl = priv->mnl_socket;
> +	struct mlx5_flow_tcf_context *nl = priv->tcf_context;
>  	struct mlx5_flow *dev_flow;
>  	struct nlmsghdr *nlh;
> 
> @@ -1508,7 +1522,7 @@ struct flow_tcf_ptoi {  flow_tcf_remove(struct
> rte_eth_dev *dev, struct rte_flow *flow)  {
>  	struct priv *priv = dev->data->dev_private;
> -	struct mnl_socket *nl = priv->mnl_socket;
> +	struct mlx5_flow_tcf_context *nl = priv->tcf_context;
>  	struct mlx5_flow *dev_flow;
>  	struct nlmsghdr *nlh;
> 
> @@ -1560,10 +1574,47 @@ struct flow_tcf_ptoi {  };
> 
>  /**
> - * Initialize ingress qdisc of a given network interface.
> + * Create and configure a libmnl socket for Netlink flow rules.
> + *
> + * @return
> + *   A valid libmnl socket object pointer on success, NULL otherwise and
> + *   rte_errno is set.
> + */
> +static struct mnl_socket *
> +mlx5_flow_mnl_socket_create(void)
> +{
> +	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
> +
> +	if (nl) {
> +		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
> +				      sizeof(int));
> +		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
> +			return nl;
> +	}
> +	rte_errno = errno;
> +	if (nl)
> +		mnl_socket_close(nl);
> +	return NULL;
> +}
> +
> +/**
> + * Destroy a libmnl socket.
>   *
>   * @param nl
>   *   Libmnl socket of the @p NETLINK_ROUTE kind.
> + */
> +static void
> +mlx5_flow_mnl_socket_destroy(struct mnl_socket *nl) {
> +	if (nl)
> +		mnl_socket_close(nl);
> +}
> +
> +/**
> + * Initialize ingress qdisc of a given network interface.
> + *
> + * @param nl
> + *   Pointer to tc-flower context to use.
>   * @param ifindex
>   *   Index of network interface to initialize.
>   * @param[out] error
> @@ -1573,8 +1624,8 @@ struct flow_tcf_ptoi {
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  int
> -mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
> -		   struct rte_flow_error *error)
> +mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *nl,
> +		   unsigned int ifindex, struct rte_flow_error *error)
>  {
>  	struct nlmsghdr *nlh;
>  	struct tcmsg *tcm;
> @@ -1616,37 +1667,47 @@ struct flow_tcf_ptoi {  }
> 
>  /**
> - * Create and configure a libmnl socket for Netlink flow rules.
> + * Create libmnl context for Netlink flow rules.
>   *
>   * @return
>   *   A valid libmnl socket object pointer on success, NULL otherwise and
>   *   rte_errno is set.
>   */
> -struct mnl_socket *
> -mlx5_flow_tcf_socket_create(void)
> +struct mlx5_flow_tcf_context *
> +mlx5_flow_tcf_context_create(void)
>  {
> -	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
> -
> -	if (nl) {
> -		mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
> -				      sizeof(int));
> -		if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
> -			return nl;
> -	}
> -	rte_errno = errno;
> -	if (nl)
> -		mnl_socket_close(nl);
> +	struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
> +							sizeof(*ctx),
> +							sizeof(uint32_t));
> +	if (!ctx)
> +		goto error;
> +	ctx->nl = mlx5_flow_mnl_socket_create();
> +	if (!ctx->nl)
> +		goto error;
> +	ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
> +	ctx->buf = rte_zmalloc(__func__,
> +			       ctx->buf_size, sizeof(uint32_t));
> +	if (!ctx->buf)
> +		goto error;
> +	ctx->seq = random();
> +	return ctx;
> +error:
> +	mlx5_flow_tcf_context_destroy(ctx);
>  	return NULL;
>  }
> 
>  /**
> - * Destroy a libmnl socket.
> + * Destroy a libmnl context.
>   *
>   * @param nl
>   *   Libmnl socket of the @p NETLINK_ROUTE kind.
>   */
>  void
> -mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
> +mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
>  {
> -	mnl_socket_close(nl);
> +	if (!ctx)
> +		return;
> +	mlx5_flow_mnl_socket_destroy(ctx->nl);
> +	rte_free(ctx->buf);
> +	rte_free(ctx);
>  }
> --
> 1.8.3.1



More information about the dev mailing list