[dpdk-dev] [PATCH v2 1/3] mbuf: implement generic format for sched field

Dumitrescu, Cristian cristian.dumitrescu at intel.com
Wed Dec 12 19:17:36 CET 2018


Hi Reshma,

More comments on this patch.

<snip>

> diff --git a/lib/librte_pipeline/rte_table_action.c
> b/lib/librte_pipeline/rte_table_action.c
> index 7c7c8dd82..f9768b9cc 100644
> --- a/lib/librte_pipeline/rte_table_action.c
> +++ b/lib/librte_pipeline/rte_table_action.c
> @@ -107,14 +107,6 @@ mtr_cfg_check(struct rte_table_action_mtr_config
> *mtr)
>  	return 0;
>  }
> 

<snip>

> @@ -368,11 +358,16 @@ tm_cfg_check(struct rte_table_action_tm_config
> *tm)
>  }
> 
>  struct tm_data {
> -	uint16_t queue_tc_color;
>  	uint16_t subport;
>  	uint32_t pipe;
>  } __attribute__((__packed__));
> 

Currently, the tm_data has the same memory layout as mbuf->hash.sched, as defined by librte_sched.
Now, we are basically replacing sched with queue_id, whose format is defined by librte_sched. So why not keep the same approach going forward?
What does this mean in practice:
- queue_id is simply packaging (subport ID, pipe ID, tc, tc_queue) into the 32-bit queue_id.
- we can simply pre-pack subport ID and pipe ID into queue_id, and initially use tc = 0 and tc_queue = 0
-later on, as we read tc and tc_queue from DSCP table, we simply update them in the queue_id, basically we change the least significant 4 bits of queue_id

Does it make sense?

Let's also keep tm_data size of 8 bytes to preserve the current 8-byte action data alignment:

struct tm_data {
  	uint32_t queue_id;
	uint32_t reserved;
} __attribute__((__packed__));

> +/* log2 representation of tm configuration */
> +struct tm_cfg {
> +	uint32_t subports_per_port_log2;
> +	uint32_t pipes_per_subport_log2;
> +} __attribute__((__packed__));

No need to worry about _log2 values if we follow this approach, as they will only be needed (and computed) by apply function, not the run-time tm_work function.

> +
>  static int
>  tm_apply_check(struct rte_table_action_tm_params *p,
>  	struct rte_table_action_tm_config *cfg)
> @@ -397,26 +392,37 @@ tm_apply(struct tm_data *data,
>  		return status;
> 
>  	/* Apply */
> -	data->queue_tc_color = 0;
>  	data->subport = (uint16_t) p->subport_id;
>  	data->pipe = p->pipe_id;
> 
>  	return 0;
>  }
> 
> +static uint32_t
> +tm_sched_qindex(struct tm_data *data,
> +	struct dscp_table_entry_data *dscp,
> +	struct tm_cfg *tm)
> +{
> +	uint32_t result;
> +
> +	result = (data->subport << tm->pipes_per_subport_log2) + data-
> >pipe;
> +	result = result * RTE_TABLE_ACTION_TC_MAX + dscp->tc;
> +	result = result * RTE_TABLE_ACTION_TC_QUEUE_MAX + dscp-
> >queue;
> +
> +	return result;
> +}
> +

This function is no longer needed with the above approach.

>  static __rte_always_inline void
>  pkt_work_tm(struct rte_mbuf *mbuf,
>  	struct tm_data *data,
>  	struct dscp_table_data *dscp_table,
> -	uint32_t dscp)
> +	uint32_t dscp,
> +	struct tm_cfg *tm)
>  {
>  	struct dscp_table_entry_data *dscp_entry = &dscp_table-
> >entry[dscp];
> -	struct tm_data *sched_ptr = (struct tm_data *) &mbuf->hash.sched;
> -	struct tm_data sched;
> +	uint32_t queue = tm_sched_qindex(data, dscp_entry, tm);
> 
> -	sched = *data;
> -	sched.queue_tc_color = dscp_entry->queue_tc_color;
> -	*sched_ptr = sched;
> +	rte_mbuf_sched_set(mbuf, queue, dscp_entry->tc, dscp_entry-
> >color);
>  }
> 

With the above approach:

uint32_t queue_id = data->queue_id | (dscp_entry-> tc << 2) | dscp_entry->tc_queue;
rte_mbuf_sched_set(mbuf, queue_id, dscp_entry->tc, dscp_entry-color);

>  /**
> @@ -2440,6 +2446,7 @@ struct rte_table_action {
>  	struct ap_data data;
>  	struct dscp_table_data dscp_table;
>  	struct meter_profile_data mp[METER_PROFILES_MAX];
> +	struct tm_cfg tm;
>  };
> 

Not needed with the above approach.

>  struct rte_table_action *
> @@ -2465,6 +2472,11 @@ rte_table_action_create(struct
> rte_table_action_profile *profile,
>  	memcpy(&action->cfg, &profile->cfg, sizeof(profile->cfg));
>  	memcpy(&action->data, &profile->data, sizeof(profile->data));
> 
> +	action->tm.subports_per_port_log2 =
> +			__builtin_ctz(profile->cfg.tm.n_subports_per_port);
> +	action->tm.pipes_per_subport_log2 =
> +			__builtin_ctz(profile->cfg.tm.n_pipes_per_subport);
> +
>  	return action;
>  }
> 
> @@ -2580,17 +2592,13 @@ rte_table_action_dscp_table_update(struct
> rte_table_action *action,
>  			&action->dscp_table.entry[i];
>  		struct rte_table_action_dscp_table_entry *entry =
>  			&table->entry[i];
> -		uint16_t queue_tc_color =
> -			MBUF_SCHED_QUEUE_TC_COLOR(entry-
> >tc_queue_id,
> -				entry->tc_id,
> -				entry->color);
> 
>  		if ((dscp_mask & (1LLU << i)) == 0)
>  			continue;
> 
>  		data->color = entry->color;
>  		data->tc = entry->tc_id;
> -		data->queue_tc_color = queue_tc_color;
> +		data->queue = entry->tc_queue_id;
>  	}
> 
>  	return 0;
> @@ -2882,7 +2890,8 @@ pkt_work(struct rte_mbuf *mbuf,
>  		pkt_work_tm(mbuf,
>  			data,
>  			&action->dscp_table,
> -			dscp);
> +			dscp,
> +			&action->tm);
>  	}
> 
>  	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_DECAP)) {
> @@ -3108,22 +3117,26 @@ pkt4_work(struct rte_mbuf **mbufs,
>  		pkt_work_tm(mbuf0,
>  			data0,
>  			&action->dscp_table,
> -			dscp0);
> +			dscp0,
> +			&action->tm);
> 
>  		pkt_work_tm(mbuf1,
>  			data1,
>  			&action->dscp_table,
> -			dscp1);
> +			dscp1,
> +			&action->tm);
> 
>  		pkt_work_tm(mbuf2,
>  			data2,
>  			&action->dscp_table,
> -			dscp2);
> +			dscp2,
> +			&action->tm);
> 
>  		pkt_work_tm(mbuf3,
>  			data3,
>  			&action->dscp_table,
> -			dscp3);
> +			dscp3,
> +			&action->tm);
>  	}
> 
>  	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_DECAP)) {
> diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
> index 46c53ed71..644fd9d15 100644
> --- a/lib/librte_sched/Makefile
> +++ b/lib/librte_sched/Makefile
> @@ -18,7 +18,7 @@ LDLIBS += -lrte_timer
> 
>  EXPORT_MAP := rte_sched_version.map
> 
> -LIBABIVER := 1
> +LIBABIVER := 2
> 
>  #
>  # all source are stored in SRCS-y
> diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
> index 587d5e602..a6d9a5886 100644
> --- a/lib/librte_sched/rte_sched.c
> +++ b/lib/librte_sched/rte_sched.c
> @@ -41,6 +41,9 @@
>  #define RTE_SCHED_PIPE_INVALID                UINT32_MAX
>  #define RTE_SCHED_BMP_POS_INVALID             UINT32_MAX
> 
> +#define RTE_SCHED_QUEUES_PER_PIPE_LOG2 \
> +	__builtin_ctz(RTE_SCHED_QUEUES_PER_PIPE)
> +
>  /* Scaling for cycles_per_byte calculation
>   * Chosen so that minimum rate is 480 bit/sec
>   */
> @@ -128,22 +131,6 @@ enum grinder_state {
>  	e_GRINDER_READ_MBUF
>  };
> 
> -/*
> - * Path through the scheduler hierarchy used by the scheduler enqueue
> - * operation to identify the destination queue for the current
> - * packet. Stored in the field pkt.hash.sched of struct rte_mbuf of
> - * each packet, typically written by the classification stage and read
> - * by scheduler enqueue.
> - */
> -struct rte_sched_port_hierarchy {
> -	uint16_t queue:2;                /**< Queue ID (0 .. 3) */
> -	uint16_t traffic_class:2;        /**< Traffic class ID (0 .. 3)*/
> -	uint32_t color:2;                /**< Color */
> -	uint16_t unused:10;
> -	uint16_t subport;                /**< Subport ID */
> -	uint32_t pipe;		         /**< Pipe ID */
> -};
> -
>  struct rte_sched_grinder {
>  	/* Pipe cache */
>  	uint16_t pcache_qmask[RTE_SCHED_GRINDER_PCACHE_SIZE];
> @@ -228,6 +215,7 @@ struct rte_sched_port {
>  	uint8_t *bmp_array;
>  	struct rte_mbuf **queue_array;
>  	uint8_t memory[0] __rte_cache_aligned;
> +
>  } __rte_cache_aligned;
> 

Please remove this blank line.

>  enum rte_sched_port_array {
> @@ -242,13 +230,11 @@ enum rte_sched_port_array {
>  };
> 
>  #ifdef RTE_SCHED_COLLECT_STATS
> -
>  static inline uint32_t
>  rte_sched_port_queues_per_subport(struct rte_sched_port *port)
>  {
>  	return RTE_SCHED_QUEUES_PER_PIPE * port-
> >n_pipes_per_subport;
>  }
> -
>  #endif
> 

Please put back the two blank lines that got removed.

>  static inline uint32_t
> @@ -1006,44 +992,56 @@ rte_sched_port_pipe_profile_add(struct
> rte_sched_port *port,
>  	return 0;
>  }
> 
> +static inline uint32_t
> +rte_sched_port_qindex(struct rte_sched_port *port,
> +	uint32_t subport,
> +	uint32_t pipe,
> +	uint32_t traffic_class,
> +	uint32_t queue)
> +{
> +	uint32_t result;
> +
> +	result = (subport << __builtin_ctz(port->n_pipes_per_subport)) +
> pipe;
> +	result = result * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE +
> traffic_class;
> +	result = result * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue;
> +
> +	return result;
> +}
> +

This is a performance critical run-time function, so let's optimize it by pre-computing log2 value for n_pipes_per_subport and store it in the port data structure: port->n_pipes_per_subport_log2.

We should also make sure we truncate subport ID and pipe ID in case they are bigger than max, which is easily done since the max values are enforced to be power of 2.

static inline uint32_t
rte_sched_port_qindex(struct rte_sched_port *port,
	uint32_t subport,
	uint32_t pipe,
	uint32_t traffic_class,
	uint32_t queue)
{
	return ((subport & (port->n_subports_per_port - 1)) << (port->n_pipes_per_subport_log2 + 4)) |
		((pipe & (port->n_pipes_per_subport - 1)) << 4) |
		((traffic_class & (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1)) << 2) |
		(queue & (RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS - 1));
}

>  void
> -rte_sched_port_pkt_write(struct rte_mbuf *pkt,
> +rte_sched_port_pkt_write(struct rte_sched_port *port,
> +			 struct rte_mbuf *pkt,
>  			 uint32_t subport, uint32_t pipe, uint32_t
> traffic_class,
>  			 uint32_t queue, enum rte_meter_color color)
>  {
> -	struct rte_sched_port_hierarchy *sched
> -		= (struct rte_sched_port_hierarchy *) &pkt->hash.sched;
> -
> -	RTE_BUILD_BUG_ON(sizeof(*sched) > sizeof(pkt->hash.sched));
> -
> -	sched->color = (uint32_t) color;
> -	sched->subport = subport;
> -	sched->pipe = pipe;
> -	sched->traffic_class = traffic_class;
> -	sched->queue = queue;
> +	uint32_t queue_id = rte_sched_port_qindex(port, subport, pipe,
> +			traffic_class, queue);
> +	rte_mbuf_sched_set(pkt, queue_id, traffic_class, (uint8_t)color);
>  }
> 
>  void
> -rte_sched_port_pkt_read_tree_path(const struct rte_mbuf *pkt,
> +rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port,
> +				  const struct rte_mbuf *pkt,
>  				  uint32_t *subport, uint32_t *pipe,
>  				  uint32_t *traffic_class, uint32_t *queue)
>  {
> -	const struct rte_sched_port_hierarchy *sched
> -		= (const struct rte_sched_port_hierarchy *) &pkt-
> >hash.sched;
> -
> -	*subport = sched->subport;
> -	*pipe = sched->pipe;
> -	*traffic_class = sched->traffic_class;
> -	*queue = sched->queue;
> +	uint32_t qid = rte_mbuf_sched_queue_read(pkt);

queue_id instead of qid, please.

> +
> +	*subport = qid >>
> +		(__builtin_ctz
> +			(RTE_SCHED_QUEUES_PER_PIPE <<
> +			__builtin_ctz(port->n_pipes_per_subport)
> +			)
> +		);

Not correct.

*subport = queue_id >> (port->n_pipes_per_subport_log2 + 4);

> +	*pipe = qid >> RTE_SCHED_QUEUES_PER_PIPE_LOG2;

Not correct.

*pipe = (queue_id >> 4) & (port->n_queues_per_subport - 1);

> +	*traffic_class = rte_mbuf_sched_traffic_class_read(pkt);

Let's read traffic class from queue_id in order to keep consistency with support ID and pipe ID, please:

*traffic_class = (queue_id >> 2) & (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1);

> +	*queue = qid & (RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS - 1);
>  }
> 
>  enum rte_meter_color
>  rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt)
>  {
> -	const struct rte_sched_port_hierarchy *sched
> -		= (const struct rte_sched_port_hierarchy *) &pkt-
> >hash.sched;
> -
> -	return (enum rte_meter_color) sched->color;
> +	return (enum rte_meter_color)rte_mbuf_sched_color_read(pkt);
>  }
> 
>  int
> @@ -1100,18 +1098,6 @@ rte_sched_queue_read_stats(struct
> rte_sched_port *port,
>  	return 0;
>  }
> 
> -static inline uint32_t
> -rte_sched_port_qindex(struct rte_sched_port *port, uint32_t subport,
> uint32_t pipe, uint32_t traffic_class, uint32_t queue)
> -{
> -	uint32_t result;
> -
> -	result = subport * port->n_pipes_per_subport + pipe;
> -	result = result * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE +
> traffic_class;
> -	result = result * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue;
> -
> -	return result;
> -}
> -
>  #ifdef RTE_SCHED_DEBUG
> 
>  static inline int
> @@ -1272,11 +1258,8 @@ rte_sched_port_enqueue_qptrs_prefetch0(struct
> rte_sched_port *port,
>  #ifdef RTE_SCHED_COLLECT_STATS
>  	struct rte_sched_queue_extra *qe;
>  #endif
> -	uint32_t subport, pipe, traffic_class, queue, qindex;
> -
> -	rte_sched_port_pkt_read_tree_path(pkt, &subport, &pipe,
> &traffic_class, &queue);
> +	uint32_t qindex = rte_mbuf_sched_queue_read(pkt);
> 
> -	qindex = rte_sched_port_qindex(port, subport, pipe, traffic_class,
> queue);
>  	q = port->queue + qindex;
>  	rte_prefetch0(q);
>  #ifdef RTE_SCHED_COLLECT_STATS
> diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h
> index 84fa896de..243efa1d4 100644
> --- a/lib/librte_sched/rte_sched.h
> +++ b/lib/librte_sched/rte_sched.h
> @@ -355,6 +355,8 @@ rte_sched_queue_read_stats(struct rte_sched_port
> *port,
>   * Scheduler hierarchy path write to packet descriptor. Typically
>   * called by the packet classification stage.
>   *
> + * @param port
> + *   Handle to port scheduler instance
>   * @param pkt
>   *   Packet descriptor handle
>   * @param subport
> @@ -369,7 +371,8 @@ rte_sched_queue_read_stats(struct rte_sched_port
> *port,
>   *   Packet color set
>   */
>  void
> -rte_sched_port_pkt_write(struct rte_mbuf *pkt,
> +rte_sched_port_pkt_write(struct rte_sched_port *port,
> +			 struct rte_mbuf *pkt,
>  			 uint32_t subport, uint32_t pipe, uint32_t
> traffic_class,
>  			 uint32_t queue, enum rte_meter_color color);
> 
> @@ -379,6 +382,8 @@ rte_sched_port_pkt_write(struct rte_mbuf *pkt,
>   * enqueue operation. The subport, pipe, traffic class and queue
>   * parameters need to be pre-allocated by the caller.
>   *
> + * @param port
> + *   Handle to port scheduler instance
>   * @param pkt
>   *   Packet descriptor handle
>   * @param subport
> @@ -392,7 +397,8 @@ rte_sched_port_pkt_write(struct rte_mbuf *pkt,
>   *
>   */
>  void
> -rte_sched_port_pkt_read_tree_path(const struct rte_mbuf *pkt,
> +rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port,
> +				  const struct rte_mbuf *pkt,
>  				  uint32_t *subport, uint32_t *pipe,
>  				  uint32_t *traffic_class, uint32_t *queue);
> 
> diff --git a/test/test/test_sched.c b/test/test/test_sched.c
> index 32e500ba9..40e411cab 100644
> --- a/test/test/test_sched.c
> +++ b/test/test/test_sched.c
> @@ -76,7 +76,7 @@ create_mempool(void)
>  }
> 
>  static void
> -prepare_pkt(struct rte_mbuf *mbuf)
> +prepare_pkt(struct rte_sched_port *port, struct rte_mbuf *mbuf)
>  {
>  	struct ether_hdr *eth_hdr;
>  	struct vlan_hdr *vlan1, *vlan2;
> @@ -95,7 +95,8 @@ prepare_pkt(struct rte_mbuf *mbuf)
>  	ip_hdr->dst_addr = IPv4(0,0,TC,QUEUE);
> 
> 
> -	rte_sched_port_pkt_write(mbuf, SUBPORT, PIPE, TC, QUEUE,
> e_RTE_METER_YELLOW);
> +	rte_sched_port_pkt_write(port, mbuf, SUBPORT, PIPE, TC, QUEUE,
> +					e_RTE_METER_YELLOW);
> 
>  	/* 64 byte packet */
>  	mbuf->pkt_len  = 60;
> @@ -138,7 +139,7 @@ test_sched(void)
>  	for (i = 0; i < 10; i++) {
>  		in_mbufs[i] = rte_pktmbuf_alloc(mp);
>  		TEST_ASSERT_NOT_NULL(in_mbufs[i], "Packet allocation
> failed\n");
> -		prepare_pkt(in_mbufs[i]);
> +		prepare_pkt(port, in_mbufs[i]);
>  	}
> 
> 
> @@ -155,7 +156,7 @@ test_sched(void)
>  		color = rte_sched_port_pkt_read_color(out_mbufs[i]);
>  		TEST_ASSERT_EQUAL(color, e_RTE_METER_YELLOW, "Wrong
> color\n");
> 
> -		rte_sched_port_pkt_read_tree_path(out_mbufs[i],
> +		rte_sched_port_pkt_read_tree_path(port, out_mbufs[i],
>  				&subport, &pipe, &traffic_class, &queue);
> 
>  		TEST_ASSERT_EQUAL(subport, SUBPORT, "Wrong
> subport\n");
> --
> 2.17.1



More information about the dev mailing list