[dpdk-dev] [PATCH] net/mlx4: enhance Rx packet type offloads

Adrien Mazarguil adrien.mazarguil at 6wind.com
Mon Oct 30 16:57:27 CET 2017


Hi Moti,

On Mon, Oct 30, 2017 at 01:34:16PM +0200, Moti Haimovsky wrote:
> This patch enhances the Rx packet type offload to also report the L4
> protocol information in the hw ptype filled by the PMD for each received
> packet.
> 
> Signed-off-by: Moti Haimovsky <motih at mellanox.com>
> ---
>  drivers/net/mlx4/mlx4.c      |   2 +
>  drivers/net/mlx4/mlx4_prm.h  |  16 +++
>  drivers/net/mlx4/mlx4_rxtx.c | 264 +++++++++++++++++++++++++++++++++++++++----
>  drivers/net/mlx4/mlx4_rxtx.h |   1 +
>  4 files changed, 260 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index 5d35a50..c228da3 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -706,6 +706,8 @@ struct mlx4_conf {
>  static void
>  rte_mlx4_pmd_init(void)
>  {
> +	/* Build the static table for ptype conversion. */
> +	mlx4_set_ptype_table();
>  	/*
>  	 * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
>  	 * huge pages. Calling ibv_fork_init() during init allows
> diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
> index b0fd982..a301f95 100644
> --- a/drivers/net/mlx4/mlx4_prm.h
> +++ b/drivers/net/mlx4/mlx4_prm.h
> @@ -78,6 +78,22 @@ enum {
>  	MLX4_CQE_L2_TUNNEL_IPOK = (int)(1u << 31),
>  };
>  
> +/* CQE status flags. */
> +enum {
> +	MLX4_CQE_STATUS_IPV4 = 1 << 22,
> +	MLX4_CQE_STATUS_IPV4F = 1 << 23,
> +	MLX4_CQE_STATUS_IPV6 = 1 << 24,
> +	MLX4_CQE_STATUS_IPV4OPT = 1 << 25,
> +	MLX4_CQE_STATUS_TCP = 1 << 26,
> +	MLX4_CQE_STATUS_UDP = 1 << 27,
> +	MLX4_CQE_STATUS_PTYPE_MASK = MLX4_CQE_STATUS_IPV4 |
> +				     MLX4_CQE_STATUS_IPV4F |
> +				     MLX4_CQE_STATUS_IPV6 |
> +				     MLX4_CQE_STATUS_IPV4OPT |
> +				     MLX4_CQE_STATUS_TCP |
> +				     MLX4_CQE_STATUS_UDP,
> +};
> +

Note it's better to name enums (and more generally all global types).
I'm aware this file already contains one such enum, therefore it's only a
suggestion.

Moreover in both cases, there's actually no advantage in using enums over
basic #defines given they are bit-field values. It only causes issue since
you're stuck with signed integers that require workarounds such as:

 MLX4_CQE_L2_TUNNEL_IPOK = (int)(1u << 31),

>  /* Send queue information. */
>  struct mlx4_sq {
>  	uint8_t *buf; /**< SQ buffer. */
> diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
> index 67dc712..fd31a6f 100644
> --- a/drivers/net/mlx4/mlx4_rxtx.c
> +++ b/drivers/net/mlx4/mlx4_rxtx.c
> @@ -71,6 +71,215 @@ struct pv {
>  	uint32_t val;
>  };
>  
> +/* A table to translate Rx completion flags to packet type. */

This comment should start with "/**" because this PMD standardizes on
Doxygen documentation.

> +uint32_t mlx4_ptype_table[] __rte_cache_aligned = {
> +	[0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */

Comment should start with "/**<" for the same reasons.

> +};

The last entry used for RTE_PTYPE_ALL_MASK comes from a vector optimization
in the mlx5 PMD, I'm not sure it's needed here.

> +
> +/**
> + * Build a table to translate Rx completion flags to packet type.
> + *
> + * @note: fix mlx5_dev_supported_ptypes_get() if any change here.
> + */
> +void
> +mlx4_set_ptype_table(void)
> +{
> +	unsigned int i;
> +	uint32_t (*p)[RTE_DIM(mlx4_ptype_table)] = &mlx4_ptype_table;

The target array could be filled directly, it's not like it's provided to
this function through a parameter (doing so would remove all the unnecessary
(*p)[x] indirection below).

> +
> +	/* Last entry must not be overwritten, reserved for errored packet. */
> +	for (i = 0; i < RTE_DIM(mlx4_ptype_table) - 1; ++i)

Assuming 0xff doesn't need to be reserved, how about changing the condition
to "i <= RTE_DIM(mlx4_ptype_table)"?

> +		(*p)[i] = RTE_PTYPE_UNKNOWN;
> +	/*
> +	 * The index to the array should have:
> +	 *  bit[7] - MLX4_CQE_L2_TUNNEL
> +	 *  bit[6] - MLX4_CQE_L2_TUNNEL_IPV4
> +	 *  bit[5] - MLX4_CQE_STATUS_UDP
> +	 *  bit[4] - MLX4_CQE_STATUS_TCP
> +	 *  bit[3] - MLX4_CQE_STATUS_IPV4OPT
> +	 *  bit[2] - MLX4_CQE_STATUS_IPV6
> +	 *  bit[1] - MLX4_CQE_STATUS_IPV4F
> +	 *  bit[0] - MLX4_CQE_STATUS_IPV4
> +	 * giving a total of up to 256 entries.
> +	 */
> +	(*p)[0x00] = RTE_PTYPE_L2_ETHER;
> +	(*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_L4_NONFRAG;
> +	(*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_L4_FRAG;
> +	(*p)[0x03] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_L4_FRAG;
> +	(*p)[0x04] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
> +	(*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
> +		     RTE_PTYPE_L4_NONFRAG;
> +	(*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
> +		     RTE_PTYPE_L4_FRAG;
> +	(*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_L4_TCP;
> +	(*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_L4_TCP;
> +	(*p)[0x14] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_L4_TCP;
> +	(*p)[0x18] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
> +		     RTE_PTYPE_L4_TCP;
> +	(*p)[0x19] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
> +		     RTE_PTYPE_L4_TCP;
> +	(*p)[0x1a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
> +		     RTE_PTYPE_L4_TCP;
> +	(*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_L4_UDP;
> +	(*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_L4_UDP;
> +	(*p)[0x24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_L4_UDP;
> +	(*p)[0x28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
> +		     RTE_PTYPE_L4_UDP;
> +	(*p)[0x29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
> +		     RTE_PTYPE_L4_UDP;
> +	(*p)[0x2a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
> +		     RTE_PTYPE_L4_UDP;
> +	/* Tunneled - L3 IPV6 */
> +	(*p)[0x80] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
> +	(*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_NONFRAG;
> +	(*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_FRAG;
> +	(*p)[0x83] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_FRAG;
> +	(*p)[0x84] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
> +	(*p)[0x88] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT;
> +	(*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_NONFRAG;
> +	(*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_FRAG;
> +	/* Tunneled - L3 IPV6, TCP */
> +	(*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_TCP;
> +	(*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_FRAG |
> +		     RTE_PTYPE_INNER_L4_TCP;
> +	(*p)[0x93] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_FRAG |
> +		     RTE_PTYPE_INNER_L4_TCP;
> +	(*p)[0x94] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_TCP;
> +	(*p)[0x98] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT |
> +		     RTE_PTYPE_INNER_L4_TCP;
> +	(*p)[0x99] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT |
> +		     RTE_PTYPE_INNER_L4_TCP;
> +	(*p)[0x9a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_FRAG |
> +		     RTE_PTYPE_INNER_L4_TCP;
> +	/* Tunneled - L3 IPV6, UDP */
> +	(*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_UDP;
> +	(*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_FRAG |
> +		     RTE_PTYPE_INNER_L4_UDP;
> +	(*p)[0xa3] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_FRAG |
> +		     RTE_PTYPE_INNER_L4_UDP;
> +	(*p)[0xa4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_UDP;
> +	(*p)[0xa8] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT |
> +		     RTE_PTYPE_INNER_L4_UDP;
> +	(*p)[0xa9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT |
> +		     RTE_PTYPE_INNER_L4_UDP;
> +	(*p)[0xaa] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_FRAG |
> +		     RTE_PTYPE_INNER_L4_UDP;
> +	/* Tunneled - L3 IPV4 */
> +	(*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
> +	(*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_NONFRAG;
> +	(*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_FRAG;
> +	(*p)[0xc3] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_FRAG;
> +	(*p)[0xc4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
> +	(*p)[0xc8] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT;
> +	(*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT |
> +		     RTE_PTYPE_INNER_L4_NONFRAG;
> +	(*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT |
> +		     RTE_PTYPE_INNER_L4_FRAG;
> +	/* Tunneled - L3 IPV4, TCP */
> +	(*p)[0xd0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_TCP;
> +	(*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_TCP;
> +	(*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_FRAG |
> +		     RTE_PTYPE_INNER_L4_TCP;
> +	(*p)[0xd3] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_FRAG |
> +		     RTE_PTYPE_INNER_L4_TCP;
> +	(*p)[0xd4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_TCP;
> +	(*p)[0xd8] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT |
> +		     RTE_PTYPE_INNER_L4_TCP;
> +	(*p)[0xd9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT |
> +		     RTE_PTYPE_INNER_L4_TCP;
> +	(*p)[0xda] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_FRAG |
> +		     RTE_PTYPE_INNER_L4_TCP;
> +	/* Tunneled - L3 IPV4, UDP */
> +	(*p)[0xe0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_UDP;
> +	(*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_UDP;
> +	(*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_FRAG |
> +		     RTE_PTYPE_INNER_L4_UDP;
> +	(*p)[0xe3] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_FRAG |
> +		     RTE_PTYPE_INNER_L4_UDP;
> +	(*p)[0xe4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L4_UDP;
> +	(*p)[0xe8] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP;
> +	(*p)[0xe9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP;
> +	(*p)[0xea] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_FRAG |
> +		     RTE_PTYPE_INNER_L4_UDP;
> +}
> +

Isn't there a way to compute all these entries with some kind of loop
instead of statically filling them? Doing so could avoid mistakes later.

>  /**
>   * Stamp a WQE so it won't be reused by the HW.
>   *
> @@ -568,30 +777,40 @@ struct pv {
>  /**
>   * Translate Rx completion flags to packet type.
>   *
> - * @param flags
> - *   Rx completion flags returned by mlx4_cqe_flags().
> + * @param[in] cqe
> + *   Pointer to CQE.
>   *
>   * @return
> - *   Packet type in mbuf format.
> + *   Packet type for struct rte_mbuf.
>   */
>  static inline uint32_t
> -rxq_cq_to_pkt_type(uint32_t flags)
> +rxq_cq_to_pkt_type(volatile struct mlx4_cqe *cqe)
>  {
> -	uint32_t pkt_type;
> +	uint8_t idx = 0;
> +	uint32_t pinfo = rte_be_to_cpu_32(cqe->vlan_my_qpn);
> +	uint32_t status = rte_be_to_cpu_32(cqe->status);
>  
> -	if (flags & MLX4_CQE_L2_TUNNEL)
> -		pkt_type =
> -			mlx4_transpose(flags,
> -				       MLX4_CQE_L2_TUNNEL_IPV4,
> -				       RTE_PTYPE_L3_IPV4_EXT_UNKNOWN) |
> -			mlx4_transpose(flags,
> -				       MLX4_CQE_STATUS_IPV4_PKT,
> -				       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN);
> -	else
> -		pkt_type = mlx4_transpose(flags,
> -					  MLX4_CQE_STATUS_IPV4_PKT,
> -					  RTE_PTYPE_L3_IPV4_EXT_UNKNOWN);
> -	return pkt_type;
> +	/*
> +	 * The index to the array should have:
> +	 *  bit[7] - MLX4_CQE_L2_TUNNEL
> +	 *  bit[6] - MLX4_CQE_L2_TUNNEL_IPV4
> +	 */
> +	if (pinfo & MLX4_CQE_L2_TUNNEL)
> +		idx |= ((pinfo & MLX4_CQE_L2_TUNNEL) >> 20) |
> +		       ((pinfo & MLX4_CQE_L2_TUNNEL_IPV4) >> 19);
> +	/*
> +	 * The index to the array should have:
> +	 *  bit[5] - MLX4_CQE_STATUS_UDP
> +	 *  bit[4] - MLX4_CQE_STATUS_TCP
> +	 *  bit[3] - MLX4_CQE_STATUS_IPV4OPT
> +	 *  bit[2] - MLX4_CQE_STATUS_IPV6
> +	 *  bit[1] - MLX4_CQE_STATUS_IPV4F
> +	 *  bit[0] - MLX4_CQE_STATUS_IPV4
> +	 * giving a total of up to 256 entries.
> +	 */
> +	idx |= ((status & MLX4_CQE_STATUS_PTYPE_MASK) >> 22);
> +	rte_prefetch0(&(mlx4_ptype_table[idx]));

This forced prefetch looks useless at best and may negatively affect
performance at worst. You should remove it.

> +	return mlx4_ptype_table[idx];
>  }
>  
>  /**
> @@ -774,6 +993,10 @@ struct pv {
>  				goto skip;
>  			}
>  			pkt = seg;
> +			/* Update packet information. */
> +			pkt->packet_type = rxq_cq_to_pkt_type(cqe);
> +			pkt->ol_flags = 0;
> +			pkt->pkt_len = len;
>  			if (rxq->csum | rxq->csum_l2tun) {
>  				uint32_t flags =
>  					mlx4_cqe_flags(cqe,
> @@ -784,12 +1007,7 @@ struct pv {
>  					rxq_cq_to_ol_flags(flags,
>  							   rxq->csum,
>  							   rxq->csum_l2tun);
> -				pkt->packet_type = rxq_cq_to_pkt_type(flags);
> -			} else {
> -				pkt->packet_type = 0;
> -				pkt->ol_flags = 0;
>  			}
> -			pkt->pkt_len = len;
>  		}
>  		rep->nb_segs = 1;
>  		rep->port = rxq->port_id;
> diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
> index 7d67748..5302232 100644
> --- a/drivers/net/mlx4/mlx4_rxtx.h
> +++ b/drivers/net/mlx4/mlx4_rxtx.h
> @@ -165,6 +165,7 @@ int mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
>  
>  /* mlx4_rxtx.c */
>  
> +void mlx4_set_ptype_table(void);
>  uint32_t mlx4_txq_mp2mr(struct txq *txq, struct rte_mempool *mp);
>  uint16_t mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts,
>  		       uint16_t pkts_n);
> -- 
> 1.8.3.1
> 

-- 
Adrien Mazarguil
6WIND


More information about the dev mailing list