[dpdk-dev] [PATCH v4 01/11] sched: remove wrr from strict priority tc queues

Dumitrescu, Cristian cristian.dumitrescu at intel.com
Tue Jul 16 01:50:02 CEST 2019



> -----Original Message-----
> From: Singh, Jasvinder
> Sent: Friday, July 12, 2019 11:57 AM
> To: dev at dpdk.org
> Cc: Dumitrescu, Cristian <cristian.dumitrescu at intel.com>; Tovar, AbrahamX
> <abrahamx.tovar at intel.com>; Krakowiak, LukaszX
> <lukaszx.krakowiak at intel.com>
> Subject: [PATCH v4 01/11] sched: remove wrr from strict priority tc queues
> 
> All higher priority traffic classes contain only one queue, thus
> remove wrr function for them. The lowest priority best-effort
> traffic class conitnue to have multiple queues and packet are
> scheduled from its queues using wrr function.
> 
> Signed-off-by: Jasvinder Singh <jasvinder.singh at intel.com>
> Signed-off-by: Abraham Tovar <abrahamx.tovar at intel.com>
> Signed-off-by: Lukasz Krakowiak <lukaszx.krakowiak at intel.com>
> ---
>  app/test/test_sched.c        |   2 +-
>  examples/qos_sched/init.c    |   2 +-
>  lib/librte_sched/Makefile    |   2 +-
>  lib/librte_sched/meson.build |   2 +-
>  lib/librte_sched/rte_sched.c | 182 ++++++++++++++++++++---------------
>  lib/librte_sched/rte_sched.h |  23 +++--
>  6 files changed, 124 insertions(+), 89 deletions(-)
> 
> diff --git a/app/test/test_sched.c b/app/test/test_sched.c
> index 49bb9ea6f..36fa2d425 100644
> --- a/app/test/test_sched.c
> +++ b/app/test/test_sched.c
> @@ -40,7 +40,7 @@ static struct rte_sched_pipe_params pipe_profile[] = {
>  		.tc_rate = {305175, 305175, 305175, 305175},
>  		.tc_period = 40,
> 
> -		.wrr_weights = {1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1},
> +		.wrr_weights = {1, 1, 1, 1},
>  	},
>  };
> 
> diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
> index 1209bd7ce..6b63d4e0e 100644
> --- a/examples/qos_sched/init.c
> +++ b/examples/qos_sched/init.c
> @@ -186,7 +186,7 @@ static struct rte_sched_pipe_params
> pipe_profiles[RTE_SCHED_PIPE_PROFILES_PER_PO
>  		.tc_ov_weight = 1,
>  #endif
> 
> -		.wrr_weights = {1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1},
> +		.wrr_weights = {1, 1, 1, 1},
>  	},
>  };
> 
> diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
> index 644fd9d15..3d7f410e1 100644
> --- a/lib/librte_sched/Makefile
> +++ b/lib/librte_sched/Makefile
> @@ -18,7 +18,7 @@ LDLIBS += -lrte_timer
> 
>  EXPORT_MAP := rte_sched_version.map
> 
> -LIBABIVER := 2
> +LIBABIVER := 3
> 
>  #
>  # all source are stored in SRCS-y
> diff --git a/lib/librte_sched/meson.build b/lib/librte_sched/meson.build
> index 8e989e5f6..59d43c6d8 100644
> --- a/lib/librte_sched/meson.build
> +++ b/lib/librte_sched/meson.build
> @@ -1,7 +1,7 @@
>  # SPDX-License-Identifier: BSD-3-Clause
>  # Copyright(c) 2017 Intel Corporation
> 
> -version = 2
> +version = 3
>  sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
>  headers = files('rte_sched.h', 'rte_sched_common.h',
>  		'rte_red.h', 'rte_approx.h')
> diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
> index bc06bc3f4..b1f521794 100644
> --- a/lib/librte_sched/rte_sched.c
> +++ b/lib/librte_sched/rte_sched.c
> @@ -37,6 +37,8 @@
> 
>  #define RTE_SCHED_TB_RATE_CONFIG_ERR          (1e-7)
>  #define RTE_SCHED_WRR_SHIFT                   3
> +#define RTE_SCHED_TRAFFIC_CLASS_BE
> (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1)
> +#define RTE_SCHED_MAX_QUEUES_PER_TC
> RTE_SCHED_BE_QUEUES_PER_PIPE
>  #define RTE_SCHED_GRINDER_PCACHE_SIZE         (64 /
> RTE_SCHED_QUEUES_PER_PIPE)
>  #define RTE_SCHED_PIPE_INVALID                UINT32_MAX
>  #define RTE_SCHED_BMP_POS_INVALID             UINT32_MAX
> @@ -84,8 +86,9 @@ struct rte_sched_pipe_profile {
>  	uint32_t
> tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
>  	uint8_t tc_ov_weight;
> 
> -	/* Pipe queues */
> -	uint8_t  wrr_cost[RTE_SCHED_QUEUES_PER_PIPE];
> +	/* Pipe best-effort traffic class queues */
> +	uint8_t n_be_queues;

The n_be_queues is the same for all pipes within the same port, so it does not make sense to save this per-port value in each pipe profile. At the very least, let's move it to the port data structure, please.

In fact, a better solution (that also simplifies the implementation) is to enforce the same queue size for all BE queues, as it does not make sense to have queues within the same traffic class of different size (see my comment in the other patch where you update the API). So n_be_queues should always be 4, therefore no need for this variable.


> +	uint8_t  wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
>  };
> 
>  struct rte_sched_pipe {
> @@ -100,8 +103,10 @@ struct rte_sched_pipe {
>  	uint64_t tc_time; /* time of next update */
>  	uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> +	uint8_t n_be_queues; /* Best effort traffic class queues */

Same comment here, even more important, as we need to strive reducing the size of this struct for performance reasons.

> +
>  	/* Weighted Round Robin (WRR) */
> -	uint8_t wrr_tokens[RTE_SCHED_QUEUES_PER_PIPE];
> +	uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
> 
>  	/* TC oversubscription */
>  	uint32_t tc_ov_credits;
> @@ -153,16 +158,16 @@ struct rte_sched_grinder {
>  	uint32_t tc_index;
>  	struct rte_sched_queue
> *queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
>  	struct rte_mbuf **qbase[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> -	uint32_t qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> -	uint16_t qsize;
> +	uint32_t qindex[RTE_SCHED_MAX_QUEUES_PER_TC];
> +	uint16_t qsize[RTE_SCHED_MAX_QUEUES_PER_TC];
>  	uint32_t qmask;
>  	uint32_t qpos;
>  	struct rte_mbuf *pkt;
> 
>  	/* WRR */
> -	uint16_t wrr_tokens[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
> -	uint16_t wrr_mask[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
> -	uint8_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
> +	uint16_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
> +	uint16_t wrr_mask[RTE_SCHED_BE_QUEUES_PER_PIPE];
> +	uint8_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
>  };
> 
>  struct rte_sched_port {
> @@ -301,7 +306,6 @@ pipe_profile_check(struct rte_sched_pipe_params
> *params,
>  		if (params->wrr_weights[i] == 0)
>  			return -16;
>  	}
> -
>  	return 0;
>  }
> 
> @@ -483,7 +487,7 @@ rte_sched_port_log_pipe_profile(struct
> rte_sched_port *port, uint32_t i)
>  		"    Token bucket: period = %u, credits per period = %u, size =
> %u\n"
>  		"    Traffic classes: period = %u, credits per period = [%u, %u,
> %u, %u]\n"
>  		"    Traffic class 3 oversubscription: weight = %hhu\n"
> -		"    WRR cost: [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu,
> %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu]\n",
> +		"    WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
>  		i,
> 
>  		/* Token bucket */
> @@ -502,10 +506,7 @@ rte_sched_port_log_pipe_profile(struct
> rte_sched_port *port, uint32_t i)
>  		p->tc_ov_weight,
> 
>  		/* WRR */
> -		p->wrr_cost[ 0], p->wrr_cost[ 1], p->wrr_cost[ 2], p-
> >wrr_cost[ 3],
> -		p->wrr_cost[ 4], p->wrr_cost[ 5], p->wrr_cost[ 6], p-
> >wrr_cost[ 7],
> -		p->wrr_cost[ 8], p->wrr_cost[ 9], p->wrr_cost[10], p-
> >wrr_cost[11],
> -		p->wrr_cost[12], p->wrr_cost[13], p->wrr_cost[14], p-
> >wrr_cost[15]);
> +		p->wrr_cost[0], p->wrr_cost[1], p->wrr_cost[2], p-
> >wrr_cost[3]);
>  }
> 
>  static inline uint64_t
> @@ -519,10 +520,12 @@ rte_sched_time_ms_to_bytes(uint32_t time_ms,
> uint32_t rate)
>  }
> 
>  static void
> -rte_sched_pipe_profile_convert(struct rte_sched_pipe_params *src,
> +rte_sched_pipe_profile_convert(struct rte_sched_port *port,
> +	struct rte_sched_pipe_params *src,
>  	struct rte_sched_pipe_profile *dst,
>  	uint32_t rate)
>  {
> +	uint32_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
>  	uint32_t i;
> 
>  	/* Token Bucket */
> @@ -553,18 +556,36 @@ rte_sched_pipe_profile_convert(struct
> rte_sched_pipe_params *src,
>  	dst->tc_ov_weight = src->tc_ov_weight;
>  #endif
> 
> -	/* WRR */
> -	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> -		uint32_t
> wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
> -		uint32_t lcd, lcd1, lcd2;
> -		uint32_t qindex;
> +	/* WRR queues */
> +	for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++)
> +		if (port->qsize[i])
> +			dst->n_be_queues++;
> +
> +	if (dst->n_be_queues == 1)
> +		dst->wrr_cost[0] = src->wrr_weights[0];
> +
> +	if (dst->n_be_queues == 2) {
> +		uint32_t lcd;
> +
> +		wrr_cost[0] = src->wrr_weights[0];
> +		wrr_cost[1] = src->wrr_weights[1];
> +
> +		lcd = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
> +
> +		wrr_cost[0] = lcd / wrr_cost[0];
> +		wrr_cost[1] = lcd / wrr_cost[1];
> 
> -		qindex = i * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
> +		dst->wrr_cost[0] = (uint8_t) wrr_cost[0];
> +		dst->wrr_cost[1] = (uint8_t) wrr_cost[1];
> +	}
> 
> -		wrr_cost[0] = src->wrr_weights[qindex];
> -		wrr_cost[1] = src->wrr_weights[qindex + 1];
> -		wrr_cost[2] = src->wrr_weights[qindex + 2];
> -		wrr_cost[3] = src->wrr_weights[qindex + 3];
> +	if (dst->n_be_queues == 4) {

See the above comment, it is better and simpler to enforce n_be_queues == 4, which simplifies this code a loot, as it keeps only this branch and removes the need for the above two.

> +		uint32_t lcd1, lcd2, lcd;
> +
> +		wrr_cost[0] = src->wrr_weights[0];
> +		wrr_cost[1] = src->wrr_weights[1];
> +		wrr_cost[2] = src->wrr_weights[2];
> +		wrr_cost[3] = src->wrr_weights[3];
> 
>  		lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
>  		lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
> @@ -575,10 +596,10 @@ rte_sched_pipe_profile_convert(struct
> rte_sched_pipe_params *src,
>  		wrr_cost[2] = lcd / wrr_cost[2];
>  		wrr_cost[3] = lcd / wrr_cost[3];
> 
> -		dst->wrr_cost[qindex] = (uint8_t) wrr_cost[0];
> -		dst->wrr_cost[qindex + 1] = (uint8_t) wrr_cost[1];
> -		dst->wrr_cost[qindex + 2] = (uint8_t) wrr_cost[2];
> -		dst->wrr_cost[qindex + 3] = (uint8_t) wrr_cost[3];
> +		dst->wrr_cost[0] = (uint8_t) wrr_cost[0];
> +		dst->wrr_cost[1] = (uint8_t) wrr_cost[1];
> +		dst->wrr_cost[2] = (uint8_t) wrr_cost[2];
> +		dst->wrr_cost[3] = (uint8_t) wrr_cost[3];
>  	}
>  }
> 
> @@ -592,7 +613,7 @@ rte_sched_port_config_pipe_profile_table(struct
> rte_sched_port *port,
>  		struct rte_sched_pipe_params *src = params->pipe_profiles
> + i;
>  		struct rte_sched_pipe_profile *dst = port->pipe_profiles + i;
> 
> -		rte_sched_pipe_profile_convert(src, dst, params->rate);
> +		rte_sched_pipe_profile_convert(port, src, dst, params-
> >rate);
>  		rte_sched_port_log_pipe_profile(port, i);
>  	}
> 
> @@ -976,7 +997,7 @@ rte_sched_port_pipe_profile_add(struct
> rte_sched_port *port,
>  		return status;
> 
>  	pp = &port->pipe_profiles[port->n_pipe_profiles];
> -	rte_sched_pipe_profile_convert(params, pp, port->rate);
> +	rte_sched_pipe_profile_convert(port, params, pp, port->rate);
> 
>  	/* Pipe profile not exists */
>  	for (i = 0; i < port->n_pipe_profiles; i++)
> @@ -1715,6 +1736,7 @@ grinder_schedule(struct rte_sched_port *port,
> uint32_t pos)
>  	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
>  	struct rte_mbuf *pkt = grinder->pkt;
>  	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
> +	int be_tc_active;
> 
>  	if (!grinder_credits_check(port, pos))
>  		return 0;
> @@ -1725,13 +1747,18 @@ grinder_schedule(struct rte_sched_port *port,
> uint32_t pos)
>  	/* Send packet */
>  	port->pkts_out[port->n_pkts_out++] = pkt;
>  	queue->qr++;
> -	grinder->wrr_tokens[grinder->qpos] += pkt_len * grinder-
> >wrr_cost[grinder->qpos];
> +
> +	be_tc_active = (grinder->tc_index ==
> RTE_SCHED_TRAFFIC_CLASS_BE);
> +	grinder->wrr_tokens[grinder->qpos] +=
> +		pkt_len * grinder->wrr_cost[grinder->qpos] * be_tc_active;
> +

Integer multiplication is very expensive, you can easily avoid it by doing bitwise-and with a mask whose values are either 0 or all-ones.

>  	if (queue->qr == queue->qw) {
>  		uint32_t qindex = grinder->qindex[grinder->qpos];
> 
>  		rte_bitmap_clear(port->bmp, qindex);
>  		grinder->qmask &= ~(1 << grinder->qpos);
> -		grinder->wrr_mask[grinder->qpos] = 0;
> +		if (be_tc_active)
> +			grinder->wrr_mask[grinder->qpos] = 0;
>  		rte_sched_port_set_queue_empty_timestamp(port,
> qindex);
>  	}
> 
> @@ -1877,7 +1904,7 @@ grinder_next_tc(struct rte_sched_port *port,
> uint32_t pos)
> 
>  	grinder->tc_index = (qindex >> 2) & 0x3;
>  	grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
> -	grinder->qsize = qsize;
> +	grinder->qsize[grinder->tc_index] = qsize;
> 
>  	grinder->qindex[0] = qindex;
>  	grinder->qindex[1] = qindex + 1;
> @@ -1962,26 +1989,15 @@ grinder_wrr_load(struct rte_sched_port *port,
> uint32_t pos)
>  	struct rte_sched_grinder *grinder = port->grinder + pos;
>  	struct rte_sched_pipe *pipe = grinder->pipe;
>  	struct rte_sched_pipe_profile *pipe_params = grinder-
> >pipe_params;
> -	uint32_t tc_index = grinder->tc_index;
>  	uint32_t qmask = grinder->qmask;
> -	uint32_t qindex;
> -
> -	qindex = tc_index * 4;
> -
> -	grinder->wrr_tokens[0] = ((uint16_t) pipe->wrr_tokens[qindex]) <<
> RTE_SCHED_WRR_SHIFT;
> -	grinder->wrr_tokens[1] = ((uint16_t) pipe->wrr_tokens[qindex + 1])
> << RTE_SCHED_WRR_SHIFT;
> -	grinder->wrr_tokens[2] = ((uint16_t) pipe->wrr_tokens[qindex + 2])
> << RTE_SCHED_WRR_SHIFT;
> -	grinder->wrr_tokens[3] = ((uint16_t) pipe->wrr_tokens[qindex + 3])
> << RTE_SCHED_WRR_SHIFT;
> -
> -	grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF;
> -	grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF;
> -	grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF;
> -	grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF;
> +	uint32_t i;
> 
> -	grinder->wrr_cost[0] = pipe_params->wrr_cost[qindex];
> -	grinder->wrr_cost[1] = pipe_params->wrr_cost[qindex + 1];
> -	grinder->wrr_cost[2] = pipe_params->wrr_cost[qindex + 2];
> -	grinder->wrr_cost[3] = pipe_params->wrr_cost[qindex + 3];
> +	for (i = 0; i < pipe->n_be_queues; i++) {
> +		grinder->wrr_tokens[i] =
> +			((uint16_t) pipe->wrr_tokens[i]) <<
> RTE_SCHED_WRR_SHIFT;
> +		grinder->wrr_mask[i] = ((qmask >> i) & 0x1) * 0xFFFF;
> +		grinder->wrr_cost[i] = pipe_params->wrr_cost[i];
> +	}
>  }
> 
>  static inline void
> @@ -1989,19 +2005,12 @@ grinder_wrr_store(struct rte_sched_port *port,
> uint32_t pos)
>  {
>  	struct rte_sched_grinder *grinder = port->grinder + pos;
>  	struct rte_sched_pipe *pipe = grinder->pipe;
> -	uint32_t tc_index = grinder->tc_index;
> -	uint32_t qindex;
> -
> -	qindex = tc_index * 4;
> +	uint32_t i;
> 
> -	pipe->wrr_tokens[qindex] = (grinder->wrr_tokens[0] & grinder-
> >wrr_mask[0])
> -		>> RTE_SCHED_WRR_SHIFT;
> -	pipe->wrr_tokens[qindex + 1] = (grinder->wrr_tokens[1] & grinder-
> >wrr_mask[1])
> -		>> RTE_SCHED_WRR_SHIFT;
> -	pipe->wrr_tokens[qindex + 2] = (grinder->wrr_tokens[2] & grinder-
> >wrr_mask[2])
> -		>> RTE_SCHED_WRR_SHIFT;
> -	pipe->wrr_tokens[qindex + 3] = (grinder->wrr_tokens[3] & grinder-
> >wrr_mask[3])
> -		>> RTE_SCHED_WRR_SHIFT;
> +	for (i = 0; i < pipe->n_be_queues; i++)
> +		pipe->wrr_tokens[i] =
> +			(grinder->wrr_tokens[i] & grinder->wrr_mask[i]) >>
> +				RTE_SCHED_WRR_SHIFT;
>  }
> 
>  static inline void
> @@ -2040,22 +2049,31 @@ static inline void
>  grinder_prefetch_tc_queue_arrays(struct rte_sched_port *port, uint32_t
> pos)
>  {
>  	struct rte_sched_grinder *grinder = port->grinder + pos;
> -	uint16_t qsize, qr[4];
> +	struct rte_sched_pipe *pipe = grinder->pipe;
> +	struct rte_sched_queue *queue;
> +	uint32_t i;
> +	uint16_t qsize, qr[RTE_SCHED_MAX_QUEUES_PER_TC];
> 
> -	qsize = grinder->qsize;
> -	qr[0] = grinder->queue[0]->qr & (qsize - 1);
> -	qr[1] = grinder->queue[1]->qr & (qsize - 1);
> -	qr[2] = grinder->queue[2]->qr & (qsize - 1);
> -	qr[3] = grinder->queue[3]->qr & (qsize - 1);
> +	grinder->qpos = 0;
> +	if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
> +		queue = grinder->queue[0];
> +		qsize = grinder->qsize[0];
> +		qr[0] = queue->qr & (qsize - 1);
> 
> -	rte_prefetch0(grinder->qbase[0] + qr[0]);
> -	rte_prefetch0(grinder->qbase[1] + qr[1]);
> +		rte_prefetch0(grinder->qbase[0] + qr[0]);
> +		return;
> +	}
> +
> +	for (i = 0; i < pipe->n_be_queues; i++) {
> +		queue = grinder->queue[i];
> +		qsize = grinder->qsize[i];
> +		qr[i] = queue->qr & (qsize - 1);
> +
> +		rte_prefetch0(grinder->qbase[i] + qr[i]);
> +	}
> 
>  	grinder_wrr_load(port, pos);
>  	grinder_wrr(port, pos);
> -
> -	rte_prefetch0(grinder->qbase[2] + qr[2]);
> -	rte_prefetch0(grinder->qbase[3] + qr[3]);
>  }
> 
>  static inline void
> @@ -2064,7 +2082,7 @@ grinder_prefetch_mbuf(struct rte_sched_port
> *port, uint32_t pos)
>  	struct rte_sched_grinder *grinder = port->grinder + pos;
>  	uint32_t qpos = grinder->qpos;
>  	struct rte_mbuf **qbase = grinder->qbase[qpos];
> -	uint16_t qsize = grinder->qsize;
> +	uint16_t qsize = grinder->qsize[qpos];
>  	uint16_t qr = grinder->queue[qpos]->qr & (qsize - 1);
> 
>  	grinder->pkt = qbase[qr];
> @@ -2118,18 +2136,24 @@ grinder_handle(struct rte_sched_port *port,
> uint32_t pos)
> 
>  	case e_GRINDER_READ_MBUF:
>  	{
> -		uint32_t result = 0;
> +		uint32_t wrr_active, result = 0;
> 
>  		result = grinder_schedule(port, pos);
> 
> +		wrr_active = (grinder->tc_index ==
> RTE_SCHED_TRAFFIC_CLASS_BE);
> +
>  		/* Look for next packet within the same TC */
>  		if (result && grinder->qmask) {
> -			grinder_wrr(port, pos);
> +			if (wrr_active)
> +				grinder_wrr(port, pos);
> +
>  			grinder_prefetch_mbuf(port, pos);
> 
>  			return 1;
>  		}
> -		grinder_wrr_store(port, pos);
> +
> +		if (wrr_active)
> +			grinder_wrr_store(port, pos);
> 
>  		/* Look for another active TC within same pipe */
>  		if (grinder_next_tc(port, pos)) {
> diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h
> index d61dda9f5..2a935998a 100644
> --- a/lib/librte_sched/rte_sched.h
> +++ b/lib/librte_sched/rte_sched.h
> @@ -66,6 +66,22 @@ extern "C" {
>  #include "rte_red.h"
>  #endif
> 
> +/** Maximum number of queues per pipe.
> + * Note that the multiple queues (power of 2) can only be assigned to
> + * lowest priority (best-effort) traffic class. Other higher priority traffic
> + * classes can only have one queue.
> + * Can not change.
> + *
> + * @see struct rte_sched_port_params
> + */
> +#define RTE_SCHED_QUEUES_PER_PIPE    16
> +
> +/** Number of WRR queues for best-effort traffic class per pipe.
> + *
> + * @see struct rte_sched_pipe_params
> + */
> +#define RTE_SCHED_BE_QUEUES_PER_PIPE    4
> +
>  /** Number of traffic classes per pipe (as well as subport).
>   * Cannot be changed.
>   */
> @@ -74,11 +90,6 @@ extern "C" {
>  /** Number of queues per pipe traffic class. Cannot be changed. */
>  #define RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS    4
> 
> -/** Number of queues per pipe. */
> -#define RTE_SCHED_QUEUES_PER_PIPE             \
> -	(RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE *     \
> -	RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS)
> -
>  /** Maximum number of pipe profiles that can be defined per port.
>   * Compile-time configurable.
>   */
> @@ -165,7 +176,7 @@ struct rte_sched_pipe_params {
>  #endif
> 
>  	/* Pipe queues */
> -	uint8_t  wrr_weights[RTE_SCHED_QUEUES_PER_PIPE]; /**< WRR
> weights */
> +	uint8_t  wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE]; /**<
> WRR weights */
>  };
> 
>  /** Queue statistics */
> --
> 2.21.0



More information about the dev mailing list