[dpdk-dev] [RFC PATCH v4 1/3] sched: add PIE based congestion management

Dumitrescu, Cristian cristian.dumitrescu at intel.com
Fri Jul 16 17:11:56 CEST 2021


Hi Wojciech,

> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski at intel.com>
> Sent: Monday, July 5, 2021 9:04 AM
> To: dev at dpdk.org; Singh, Jasvinder <jasvinder.singh at intel.com>;
> Dumitrescu, Cristian <cristian.dumitrescu at intel.com>
> Cc: Dharmappa, Savinay <savinay.dharmappa at intel.com>; Ajmera, Megha
> <megha.ajmera at intel.com>
> Subject: [RFC PATCH v4 1/3] sched: add PIE based congestion management
> 
> Implement PIE based congestion management based on rfc8033
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski at intel.com>
> ---
>  drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
>  lib/sched/meson.build                    |  10 +-
>  lib/sched/rte_pie.c                      |  82 +++++
>  lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
>  lib/sched/rte_sched.c                    | 229 +++++++++----
>  lib/sched/rte_sched.h                    |  53 ++-
>  lib/sched/version.map                    |   3 +
>  7 files changed, 685 insertions(+), 91 deletions(-)
>  create mode 100644 lib/sched/rte_pie.c
>  create mode 100644 lib/sched/rte_pie.h
> 
> diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c
> b/drivers/net/softnic/rte_eth_softnic_tm.c
> index 90baba15ce..5b6c4e6d4b 100644
> --- a/drivers/net/softnic/rte_eth_softnic_tm.c
> +++ b/drivers/net/softnic/rte_eth_softnic_tm.c
> @@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
>  	return 0;
>  }
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
>  #define WRED_SUPPORTED						1
>  #else
>  #define WRED_SUPPORTED						0
> @@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev,
> uint32_t tc_id)
>  	return NULL;
>  }
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
> 
>  static void
>  wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
> @@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev,
> uint32_t subport_id)
>  	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE;
> tc_id++)
>  		for (color = RTE_COLOR_GREEN; color < RTE_COLORS;
> color++) {
>  			struct rte_red_params *dst =
> -				&pp->red_params[tc_id][color];
> +				&pp->wred_params[tc_id][color];

Please do NOT rename red to wred in this patch set.

>  			struct tm_wred_profile *src_wp =
>  				tm_tc_wred_profile_get(dev, tc_id);
>  			struct rte_tm_red_params *src =

<snip>

> diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
> index cd87e688e4..afda39caf5 100644
> --- a/lib/sched/rte_sched.c
> +++ b/lib/sched/rte_sched.c
> @@ -89,8 +89,12 @@ struct rte_sched_queue {
> 
>  struct rte_sched_queue_extra {
>  	struct rte_sched_queue_stats stats;
> -#ifdef RTE_SCHED_RED
> -	struct rte_red red;
> +#ifdef RTE_SCHED_AQM
> +	RTE_STD_C11
> +	union {
> +		struct rte_red red;
> +		struct rte_pie pie;
> +	};
>  #endif
>  };
> 
> @@ -183,8 +187,13 @@ struct rte_sched_subport {
>  	/* Pipe queues size */
>  	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> -#ifdef RTE_SCHED_RED
> -	struct rte_red_config
> red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +	enum rte_sched_aqm_mode aqm;
> +#ifdef RTE_SCHED_AQM
> +	RTE_STD_C11
> +	union {
> +		struct rte_red_config
> wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +		struct rte_pie_config
> pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
>  #endif

Please use the proposed rte_sched_cman_params structure.


> 
>  	/* Scheduling loop detection */
> @@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port
> *port, uint32_t n_subports)
>  	rte_free(port);
>  }
> 
> +#ifdef RTE_SCHED_AQM
> +
> +static int
> +rte_sched_red_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +
> +		uint32_t j;
> +
> +		for (j = 0; j < RTE_COLORS; j++) {
> +			/* if min/max are both zero, then RED is disabled */
> +			if ((params->wred_params[i][j].min_th |
> +				 params->wred_params[i][j].max_th) == 0) {
> +				continue;
> +			}
> +
> +			if (rte_red_config_init(&s->wred_config[i][j],
> +				params->wred_params[i][j].wq_log2,
> +				params->wred_params[i][j].min_th,
> +				params->wred_params[i][j].max_th,
> +				params->wred_params[i][j].maxp_inv) != 0) {
> +				rte_sched_free_memory(port, n_subports);
> +
> +				RTE_LOG(NOTICE, SCHED,
> +				"%s: RED configuration init fails\n",
> __func__);
> +				return -EINVAL;
> +			}
> +		}
> +	}
> +	s->aqm = RTE_SCHED_AQM_WRED;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_pie_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +		if (params->pie_params[i].tailq_th > params->qsize[i]) {
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE tailq threshold incorrect\n", __func__);
> +			return -EINVAL;
> +		}
> +
> +		if (rte_pie_config_init(&s->pie_config[i],
> +			params->pie_params[i].qdelay_ref,
> +			params->pie_params[i].dp_update_interval,
> +			params->pie_params[i].max_burst,
> +			params->pie_params[i].tailq_th) != 0) {
> +			rte_sched_free_memory(port, n_subports);
> +
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE configuration init fails\n", __func__);
> +			return -EINVAL;
> +			}
> +	}
> +	s->aqm = RTE_SCHED_AQM_PIE;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_aqm_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	if (params->aqm == RTE_SCHED_AQM_WRED)
> +		return rte_sched_red_config(port, s, params, n_subports);
> +
> +	else if (params->aqm == RTE_SCHED_AQM_PIE)
> +		return rte_sched_pie_config(port, s, params, n_subports);
> +
> +	return -EINVAL;
> +}
> +#endif
> +
>  int
>  rte_sched_subport_config(struct rte_sched_port *port,
>  	uint32_t subport_id,
> @@ -1169,30 +1263,11 @@ rte_sched_subport_config(struct
> rte_sched_port *port,
>  		s->n_pipe_profiles = params->n_pipe_profiles;
>  		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
> 
> -#ifdef RTE_SCHED_RED
> -		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> -			uint32_t j;
> -
> -			for (j = 0; j < RTE_COLORS; j++) {
> -			/* if min/max are both zero, then RED is disabled */
> -				if ((params->red_params[i][j].min_th |
> -				     params->red_params[i][j].max_th) == 0) {
> -					continue;
> -				}
> -
> -				if (rte_red_config_init(&s->red_config[i][j],
> -				    params->red_params[i][j].wq_log2,
> -				    params->red_params[i][j].min_th,
> -				    params->red_params[i][j].max_th,
> -				    params->red_params[i][j].maxp_inv) != 0)
> {
> -					rte_sched_free_memory(port,
> n_subports);
> -
> -					RTE_LOG(NOTICE, SCHED,
> -					"%s: RED configuration init fails\n",
> -					__func__);
> -					return -EINVAL;
> -				}
> -			}
> +#ifdef RTE_SCHED_AQM
> +		status = rte_sched_aqm_config(port, s, params,
> n_subports);
> +		if (status) {
> +			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration
> fails\n", __func__);
> +			return status;
>  		}
>  #endif
> 
> @@ -1714,29 +1789,20 @@ rte_sched_port_update_subport_stats(struct
> rte_sched_port *port,
>  	subport->stats.n_bytes_tc[tc_index] += pkt_len;
>  }
> 
> -#ifdef RTE_SCHED_RED
>  static inline void
>  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> *port,
>  	struct rte_sched_subport *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	uint32_t red)
> -#else
> -static inline void
> -rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> *port,
> -	struct rte_sched_subport *subport,
> -	uint32_t qindex,
> -	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> -#endif
> +	__rte_unused uint32_t drops)

Please rename drops with n_pkts_cman_dropped.

>  {
>  	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
>  	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
> -#ifdef RTE_SCHED_RED
> -	subport->stats.n_pkts_red_dropped[tc_index] += red;
> +#ifdef RTE_SCHED_AQM
> +	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
>  #endif
>  }
> 

Due to the recommended generic field n_pkts_cman_dropped of the rte_sched_subport_stats structure, you don't need the macro here anymore :)

> @@ -1752,58 +1818,61 @@ rte_sched_port_update_queue_stats(struct
> rte_sched_subport *subport,
>  	qe->stats.n_bytes += pkt_len;
>  }
> 
> -#ifdef RTE_SCHED_RED
> -static inline void
> -rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport
> *subport,
> -	uint32_t qindex,
> -	struct rte_mbuf *pkt,
> -	uint32_t red)
> -#else
>  static inline void
>  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport
> *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> -#endif
> +	__rte_unused uint32_t drops)

Please rename drops with n_pkts_cman_dropped.

>  {
>  	struct rte_sched_queue_extra *qe = subport->queue_extra +
> qindex;
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	qe->stats.n_pkts_dropped += 1;
>  	qe->stats.n_bytes_dropped += pkt_len;
> -#ifdef RTE_SCHED_RED
> -	qe->stats.n_pkts_red_dropped += red;
> +#ifdef RTE_SCHED_AQM
> +	qe->stats.n_pkts_aqm_dropped += drops;
>  #endif
>  }
> 
>  #endif /* RTE_SCHED_COLLECT_STATS */
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
> 
>  static inline int
> -rte_sched_port_red_drop(struct rte_sched_port *port,
> +rte_sched_port_aqm_drop(struct rte_sched_port *port,
>  	struct rte_sched_subport *subport,
>  	struct rte_mbuf *pkt,
>  	uint32_t qindex,
>  	uint16_t qlen)
>  {
>  	struct rte_sched_queue_extra *qe;
> -	struct rte_red_config *red_cfg;
> -	struct rte_red *red;
>  	uint32_t tc_index;
> -	enum rte_color color;
> 
>  	tc_index = rte_sched_port_pipe_tc(port, qindex);
> -	color = rte_sched_port_pkt_read_color(pkt);
> -	red_cfg = &subport->red_config[tc_index][color];
> +	qe = subport->queue_extra + qindex;
> 
> -	if ((red_cfg->min_th | red_cfg->max_th) == 0)
> -		return 0;
> +	/* WRED */
> +	if (subport->aqm == RTE_SCHED_AQM_WRED) {
> +		struct rte_red_config *red_cfg;
> +		struct rte_red *red;
> +		enum rte_color color;
> 
> -	qe = subport->queue_extra + qindex;
> -	red = &qe->red;
> +		color = rte_sched_port_pkt_read_color(pkt);
> +		red_cfg = &subport->wred_config[tc_index][color];
> +
> +		if ((red_cfg->min_th | red_cfg->max_th) == 0)
> +			return 0;
> 
> -	return rte_red_enqueue(red_cfg, red, qlen, port->time);
> +		red = &qe->red;
> +
> +		return rte_red_enqueue(red_cfg, red, qlen, port->time);
> +	}
> +
> +	/* PIE */
> +	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
> +	struct rte_pie *pie = &qe->pie;
> +

You don't want to declare new variables in the middle of the function, but you do want to reduce their scope, so maybe use the else for this (although not needed, since you call return on the if branch)?

> +	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port-
> >time_cpu_cycles);
>  }
> 
>  static inline void
> @@ -1811,14 +1880,29 @@
> rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port
> *port,
>  	struct rte_sched_subport *subport, uint32_t qindex)
>  {
>  	struct rte_sched_queue_extra *qe = subport->queue_extra +
> qindex;
> -	struct rte_red *red = &qe->red;
> +	if (subport->aqm == RTE_SCHED_AQM_WRED) {
> +		struct rte_red *red = &qe->red;
> +
> +		rte_red_mark_queue_empty(red, port->time);
> +	}
> +}
> +

Please rename this function to rte_sched_port_red_set_queue_empty_timestamp() to reflect that it is RED-specific.

> +static inline void
> +rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
> +uint32_t qindex, uint32_t pkt_len, uint64_t time) {
> +	struct rte_sched_queue_extra *qe = subport->queue_extra +
> qindex;
> +	struct rte_pie *pie = &qe->pie;
> 
> -	rte_red_mark_queue_empty(red, port->time);
> +	/* Update queue length */
> +	pie->qlen -= 1;
> +	pie->qlen_bytes -= pkt_len;
> +
> +	rte_pie_dequeue(pie, pkt_len, time);
>  }
> 

Please do the processing in this function only if CMAN is PIE (add an if statement), and remove the if statement from where it gets called.

Also you need to define this function as do-nothing on the #else branch, right?

>  #else
> 
> -static inline int rte_sched_port_red_drop(struct rte_sched_port *port
> __rte_unused,
> +static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port
> __rte_unused,
>  	struct rte_sched_subport *subport __rte_unused,
>  	struct rte_mbuf *pkt __rte_unused,
>  	uint32_t qindex __rte_unused,
> @@ -1829,7 +1913,7 @@ static inline int rte_sched_port_red_drop(struct
> rte_sched_port *port __rte_unus
> 
>  #define rte_sched_port_set_queue_empty_timestamp(port, subport,
> qindex)
> 
> -#endif /* RTE_SCHED_RED */
> +#endif /* RTE_SCHED_AQM */
> 
>  #ifdef RTE_SCHED_DEBUG
> 
> @@ -1925,7 +2009,7 @@ rte_sched_port_enqueue_qwa(struct
> rte_sched_port *port,
>  	qlen = q->qw - q->qr;
> 
>  	/* Drop the packet (and update drop stats) when queue is full */
> -	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex,
> qlen) ||
> +	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex,
> qlen) ||
>  		     (qlen >= qsize))) {
>  		rte_pktmbuf_free(pkt);
>  #ifdef RTE_SCHED_COLLECT_STATS
> @@ -2398,6 +2482,7 @@ grinder_schedule(struct rte_sched_port *port,
>  {
>  	struct rte_sched_grinder *grinder = subport->grinder + pos;
>  	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
> +	uint32_t qindex = grinder->qindex[grinder->qpos];
>  	struct rte_mbuf *pkt = grinder->pkt;
>  	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
>  	uint32_t be_tc_active;
> @@ -2417,15 +2502,19 @@ grinder_schedule(struct rte_sched_port *port,
>  		(pkt_len * grinder->wrr_cost[grinder->qpos]) &
> be_tc_active;
> 
>  	if (queue->qr == queue->qw) {
> -		uint32_t qindex = grinder->qindex[grinder->qpos];
> -
>  		rte_bitmap_clear(subport->bmp, qindex);
>  		grinder->qmask &= ~(1 << grinder->qpos);
>  		if (be_tc_active)
>  			grinder->wrr_mask[grinder->qpos] = 0;
> +
>  		rte_sched_port_set_queue_empty_timestamp(port,
> subport, qindex);
>  	}
> 
> +#ifdef RTE_SCHED_AQM
> +	if (subport->aqm == RTE_SCHED_AQM_PIE)
> +		rte_sched_port_pie_dequeue(subport, qindex, pkt_len,
> port->time_cpu_cycles);

As stated before, move the if statement within the function, and remove the macro from here by defining this same function as do-nothing on the #else branch above.

> +#endif
> +
>  	/* Reset pipe loop detection */
>  	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
>  	grinder->productive = 1;

Regards,
Cristian


More information about the dev mailing list