[dpdk-dev] [PATCH v2] examples/ipsec-secgw: add per core packet stats

Ananyev, Konstantin konstantin.ananyev at intel.com
Fri Apr 24 13:14:11 CEST 2020


 
> Adding per core packet handling stats to analyze traffic distribution
> when multiple cores are engaged.
> 
> Since aggregating the packet stats across cores would affect
> performance, keeping the feature disabled using compile time flags.
> 
> Signed-off-by: Anoob Joseph <anoobj at marvell.com>
> ---
> 
> v2:
> * Added lookup failure cases to drop count
> 
>  examples/ipsec-secgw/ipsec-secgw.c   | 118 +++++++++++++++++++++++++++++++++--
>  examples/ipsec-secgw/ipsec-secgw.h   |   2 +
>  examples/ipsec-secgw/ipsec.c         |  13 +++-
>  examples/ipsec-secgw/ipsec.h         |  22 +++++++
>  examples/ipsec-secgw/ipsec_process.c |   5 ++
>  5 files changed, 154 insertions(+), 6 deletions(-)
> 
> diff --git a/examples/ipsec-secgw/ipsec-secgw.c b/examples/ipsec-secgw/ipsec-secgw.c
> index 6d02341..db92ddc 100644
> --- a/examples/ipsec-secgw/ipsec-secgw.c
> +++ b/examples/ipsec-secgw/ipsec-secgw.c
> @@ -288,6 +288,61 @@ adjust_ipv6_pktlen(struct rte_mbuf *m, const struct rte_ipv6_hdr *iph,
>  	}
>  }
> 
> +#ifdef ENABLE_STATS
> +static uint64_t timer_period = 10; /* default period is 10 seconds */

I think it is better to add user ability to control stats period.
Either runtime-option, or just compile time: 
replace ENABLE_STATS with STATS_PERIOD (0 would mean stats disabled).

> +
> +/* Print out statistics on packet distribution */
> +static void
> +print_stats(void)
> +{
> +	uint64_t total_packets_dropped, total_packets_tx, total_packets_rx;
> +	unsigned int coreid;
> +	float burst_percent;
> +
> +	total_packets_dropped = 0;
> +	total_packets_tx = 0;
> +	total_packets_rx = 0;
> +
> +	const char clr[] = { 27, '[', '2', 'J', '\0' };
> +	const char topLeft[] = { 27, '[', '1', ';', '1', 'H', '\0' };
> +
> +	/* Clear screen and move to top left */
> +	printf("%s%s", clr, topLeft);

Is that really needed?

> +
> +	printf("\nCore statistics ====================================");
> +
> +	for (coreid = 0; coreid < RTE_MAX_LCORE; coreid++) {
> +		/* skip disabled cores */
> +		if (rte_lcore_is_enabled(coreid) == 0)
> +			continue;
> +		burst_percent = (float)(core_statistics[coreid].burst_rx * 100)/
> +					core_statistics[coreid].rx;

Would float be always enough here? Might better long double?

> +		printf("\nStatistics for core %u ------------------------------"
> +			   "\nPackets received: %20"PRIu64
> +			   "\nPackets sent: %24"PRIu64
> +			   "\nPackets dropped: %21"PRIu64
> +			   "\nBurst percent: %23.2f",
> +			   coreid,
> +			   core_statistics[coreid].rx,
> +			   core_statistics[coreid].tx,
> +			   core_statistics[coreid].dropped,
> +			   burst_percent);
> +
> +		total_packets_dropped += core_statistics[coreid].dropped;
> +		total_packets_tx += core_statistics[coreid].tx;
> +		total_packets_rx += core_statistics[coreid].rx;
> +	}
> +	printf("\nAggregate statistics ==============================="
> +		   "\nTotal packets received: %14"PRIu64
> +		   "\nTotal packets sent: %18"PRIu64
> +		   "\nTotal packets dropped: %15"PRIu64,
> +		   total_packets_rx,
> +		   total_packets_tx,
> +		   total_packets_dropped);
> +	printf("\n====================================================\n");
> +}
> +#endif /* ENABLE_STATS */
> +
>  static inline void
>  prepare_one_packet(struct rte_mbuf *pkt, struct ipsec_traffic *t)
>  {
> @@ -333,6 +388,7 @@ prepare_one_packet(struct rte_mbuf *pkt, struct ipsec_traffic *t)
> 
>  		/* drop packet when IPv6 header exceeds first segment length */
>  		if (unlikely(l3len > pkt->data_len)) {
> +			core_stats_update_drop(1);
>  			rte_pktmbuf_free(pkt);
>  			return;
>  		}
> @@ -350,6 +406,7 @@ prepare_one_packet(struct rte_mbuf *pkt, struct ipsec_traffic *t)
>  		/* Unknown/Unsupported type, drop the packet */
>  		RTE_LOG(ERR, IPSEC, "Unsupported packet type 0x%x\n",
>  			rte_be_to_cpu_16(eth->ether_type));
> +		core_stats_update_drop(1);
>  		rte_pktmbuf_free(pkt);
>  		return;
>  	}
> @@ -471,6 +528,11 @@ send_burst(struct lcore_conf *qconf, uint16_t n, uint16_t port)
>  	int32_t ret;
>  	uint16_t queueid;
> 
> +#ifdef ENABLE_STATS
> +	int lcore_id = rte_lcore_id();
> +	core_statistics[lcore_id].tx += n;
> +#endif /* ENABLE_STATS */

Instead of polluting genric code with ifdefs, why not
to introduce 2 new functions: core_stats_update_rx(), core_stats_update_tx(),
as you did for core_stats_drop()?

> +
>  	queueid = qconf->tx_queue_id[port];
>  	m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
> 
> @@ -478,6 +540,9 @@ send_burst(struct lcore_conf *qconf, uint16_t n, uint16_t port)
> 
>  	ret = rte_eth_tx_burst(port, queueid, m_table, n);
>  	if (unlikely(ret < n)) {
> +#ifdef ENABLE_STATS
> +		core_statistics[lcore_id].dropped += n-ret;
> +#endif /* ENABLE_STATS */

You have core_stats_update_drop() for that - use it.

>  		do {
>  			rte_pktmbuf_free(m_table[ret]);
>  		} while (++ret < n);
> @@ -525,6 +590,7 @@ send_fragment_packet(struct lcore_conf *qconf, struct rte_mbuf *m,
>  			"error code: %d\n",
>  			__func__, m->pkt_len, rte_errno);
> 
> +	core_stats_update_drop(1);
>  	rte_pktmbuf_free(m);
>  	return len;
>  }
> @@ -549,8 +615,10 @@ send_single_packet(struct rte_mbuf *m, uint16_t port, uint8_t proto)
>  	/* need to fragment the packet */
>  	} else if (frag_tbl_sz > 0)
>  		len = send_fragment_packet(qconf, m, port, proto);
> -	else
> +	else {
> +		core_stats_update_drop(1);
>  		rte_pktmbuf_free(m);

It looks like a lot of such places...
Would it be worth to unite core_stats_update_drop() and rte_pktmbuf_free(m)
Into some inline function: ipsec_secgw_packet_drop(struct rte_mbuf *m[], uint32_t n) 
and use it all over such places. 

> +	}
> 
>  	/* enough pkts to be sent */
>  	if (unlikely(len == MAX_PKT_BURST)) {
> @@ -584,18 +652,21 @@ inbound_sp_sa(struct sp_ctx *sp, struct sa_ctx *sa, struct traffic_type *ip,
>  			continue;
>  		}
>  		if (res == DISCARD) {
> +			core_stats_update_drop(1);
>  			rte_pktmbuf_free(m);
>  			continue;
>  		}
> 
>  		/* Only check SPI match for processed IPSec packets */
>  		if (i < lim && ((m->ol_flags & PKT_RX_SEC_OFFLOAD) == 0)) {
> +			core_stats_update_drop(1);
>  			rte_pktmbuf_free(m);
>  			continue;
>  		}
> 
>  		sa_idx = res - 1;
>  		if (!inbound_sa_check(sa, m, sa_idx)) {
> +			core_stats_update_drop(1);
>  			rte_pktmbuf_free(m);
>  			continue;
>  		}
> @@ -630,8 +701,10 @@ split46_traffic(struct ipsec_traffic *trf, struct rte_mbuf *mb[], uint32_t num)
>  					uint8_t *,
>  					offsetof(struct ip6_hdr, ip6_nxt));
>  			n6++;
> -		} else
> +		} else {
> +			core_stats_update_drop(1);
>  			rte_pktmbuf_free(m);
> +		}
>  	}
> 
>  	trf->ip4.num = n4;
> @@ -682,11 +755,12 @@ outbound_sp(struct sp_ctx *sp, struct traffic_type *ip,
>  	for (i = 0; i < ip->num; i++) {
>  		m = ip->pkts[i];
>  		sa_idx = ip->res[i] - 1;
> -		if (ip->res[i] == DISCARD)
> +		if (ip->res[i] == DISCARD) {
> +			core_stats_update_drop(1);
>  			rte_pktmbuf_free(m);
> -		else if (ip->res[i] == BYPASS)
> +		} else if (ip->res[i] == BYPASS) {

Looks unnecessary.

>  			ip->pkts[j++] = m;
> -		else {
> +		} else {
>  			ipsec->res[ipsec->num] = sa_idx;
>  			ipsec->pkts[ipsec->num++] = m;
>  		}
> @@ -705,6 +779,8 @@ process_pkts_outbound(struct ipsec_ctx *ipsec_ctx,
>  	for (i = 0; i < traffic->ipsec.num; i++)
>  		rte_pktmbuf_free(traffic->ipsec.pkts[i]);
> 
> +	core_stats_update_drop(traffic->ipsec.num);
> +
>  	traffic->ipsec.num = 0;
> 
>  	outbound_sp(ipsec_ctx->sp4_ctx, &traffic->ip4, &traffic->ipsec);
> @@ -745,12 +821,14 @@ process_pkts_inbound_nosp(struct ipsec_ctx *ipsec_ctx,
>  	/* Drop any IPv4 traffic from unprotected ports */
>  	for (i = 0; i < traffic->ip4.num; i++)
>  		rte_pktmbuf_free(traffic->ip4.pkts[i]);
> +	core_stats_update_drop(traffic->ip4.num);
> 
>  	traffic->ip4.num = 0;
> 
>  	/* Drop any IPv6 traffic from unprotected ports */
>  	for (i = 0; i < traffic->ip6.num; i++)
>  		rte_pktmbuf_free(traffic->ip6.pkts[i]);
> +	core_stats_update_drop(traffic->ip6.num);
> 
>  	traffic->ip6.num = 0;
> 
> @@ -788,6 +866,7 @@ process_pkts_outbound_nosp(struct ipsec_ctx *ipsec_ctx,
>  	/* Drop any IPsec traffic from protected ports */
>  	for (i = 0; i < traffic->ipsec.num; i++)
>  		rte_pktmbuf_free(traffic->ipsec.pkts[i]);
> +	core_stats_update_drop(traffic->ipsec.num);
> 
>  	n = 0;
> 
> @@ -901,6 +980,7 @@ route4_pkts(struct rt_ctx *rt_ctx, struct rte_mbuf *pkts[], uint8_t nb_pkts)
>  		}
> 
>  		if ((pkt_hop & RTE_LPM_LOOKUP_SUCCESS) == 0) {
> +			core_stats_update_drop(1);
>  			rte_pktmbuf_free(pkts[i]);
>  			continue;
>  		}
> @@ -953,6 +1033,7 @@ route6_pkts(struct rt_ctx *rt_ctx, struct rte_mbuf *pkts[], uint8_t nb_pkts)
>  		}
> 
>  		if (pkt_hop == -1) {
> +			core_stats_update_drop(1);
>  			rte_pktmbuf_free(pkts[i]);
>  			continue;
>  		}
> @@ -1099,6 +1180,9 @@ ipsec_poll_mode_worker(void)
>  	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1)
>  			/ US_PER_S * BURST_TX_DRAIN_US;
>  	struct lcore_rx_queue *rxql;
> +#ifdef ENABLE_STATS
> +	uint64_t timer_tsc = 0;
> +#endif /* ENABLE_STATS */

Probably better just RTE_SET_USED(timer_tsc);

> 
>  	prev_tsc = 0;
>  	lcore_id = rte_lcore_id();
> @@ -1159,6 +1243,19 @@ ipsec_poll_mode_worker(void)
>  			drain_tx_buffers(qconf);
>  			drain_crypto_buffers(qconf);
>  			prev_tsc = cur_tsc;
> +#ifdef ENABLE_STATS
> +			if (lcore_id == rte_get_master_lcore()) {
> +				/* advance the timer */
> +				timer_tsc += diff_tsc;
> +
> +				/* if timer has reached its timeout */
> +				if (unlikely(timer_tsc >= timer_period)) {
> +					print_stats();
> +					/* reset the timer */
> +					timer_tsc = 0;
> +				}
> +			}
> +#endif /* ENABLE_STATS */


Why to do stats collection/display inside data-path?
Why not use rte_timer/rte_alarm and make it happen in control thread?
Another option - make stats printing at some signal to the process.
In that case we don't need to bother with time period at all - it will be
user to decide.
Again if we remove that print_stats() from data-path it might become 
cheap enough to always collect it, and we will not need ENABLE_STATS
macro at all.

>  		}
> 
>  		for (i = 0; i < qconf->nb_rx_queue; ++i) {
> @@ -1169,6 +1266,12 @@ ipsec_poll_mode_worker(void)
>  			nb_rx = rte_eth_rx_burst(portid, queueid,
>  					pkts, MAX_PKT_BURST);
> 
> +#ifdef ENABLE_STATS
> +			core_statistics[lcore_id].rx += nb_rx;
> +			if (nb_rx == MAX_PKT_BURST)
> +				core_statistics[lcore_id].burst_rx += nb_rx;
> +#endif /* ENABLE_STATS */
> +

Same for above for TX: no need to pollute the code with ifdefs.
Better to introduce new function: stats_update_rx() or so.


>  			if (nb_rx > 0)
>  				process_pkts(qconf, pkts, nb_rx, portid);
> 
> @@ -2747,6 +2850,11 @@ main(int32_t argc, char **argv)
>  	signal(SIGINT, signal_handler);
>  	signal(SIGTERM, signal_handler);
> 
> +#ifdef ENABLE_STATS
> +	/* convert to number of cycles */
> +	timer_period *= rte_get_timer_hz();
> +#endif /* ENABLE_STATS */
> +
>  	/* initialize event helper configuration */
>  	eh_conf = eh_conf_init();
>  	if (eh_conf == NULL)
> diff --git a/examples/ipsec-secgw/ipsec-secgw.h b/examples/ipsec-secgw/ipsec-secgw.h
> index 4b53cb5..d886a35 100644
> --- a/examples/ipsec-secgw/ipsec-secgw.h
> +++ b/examples/ipsec-secgw/ipsec-secgw.h
> @@ -6,6 +6,8 @@
> 
>  #include <stdbool.h>
> 
> +//#define ENABLE_STATS
> +

Should be removed I think.

>  #define NB_SOCKETS 4
> 
>  #define MAX_PKT_BURST 32
> diff --git a/examples/ipsec-secgw/ipsec.c b/examples/ipsec-secgw/ipsec.c
> index bf88d80..dcb9312 100644
> --- a/examples/ipsec-secgw/ipsec.c
> +++ b/examples/ipsec-secgw/ipsec.c
> @@ -499,8 +499,10 @@ enqueue_cop_burst(struct cdev_qp *cqp)
>  			" enqueued %u crypto ops out of %u\n",
>  			cqp->id, cqp->qp, ret, len);
>  			/* drop packets that we fail to enqueue */
> -			for (i = ret; i < len; i++)
> +			for (i = ret; i < len; i++) {
> +				core_stats_update_drop(1);
>  				rte_pktmbuf_free(cqp->buf[i]->sym->m_src);
> +			}
>  	}
>  	cqp->in_flight += ret;
>  	cqp->len = 0;
> @@ -528,6 +530,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx *ipsec_ctx,
> 
>  	for (i = 0; i < nb_pkts; i++) {
>  		if (unlikely(sas[i] == NULL)) {
> +			core_stats_update_drop(1);
>  			rte_pktmbuf_free(pkts[i]);
>  			continue;
>  		}
> @@ -549,6 +552,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx *ipsec_ctx,
> 
>  			if ((unlikely(ips->security.ses == NULL)) &&
>  				create_lookaside_session(ipsec_ctx, sa, ips)) {
> +				core_stats_update_drop(1);
>  				rte_pktmbuf_free(pkts[i]);
>  				continue;
>  			}
> @@ -563,6 +567,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx *ipsec_ctx,
>  		case RTE_SECURITY_ACTION_TYPE_CPU_CRYPTO:
>  			RTE_LOG(ERR, IPSEC, "CPU crypto is not supported by the"
>  					" legacy mode.");
> +			core_stats_update_drop(1);
>  			rte_pktmbuf_free(pkts[i]);
>  			continue;
> 
> @@ -575,6 +580,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx *ipsec_ctx,
> 
>  			if ((unlikely(ips->crypto.ses == NULL)) &&
>  				create_lookaside_session(ipsec_ctx, sa, ips)) {
> +				core_stats_update_drop(1);
>  				rte_pktmbuf_free(pkts[i]);
>  				continue;
>  			}
> @@ -584,6 +590,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx *ipsec_ctx,
> 
>  			ret = xform_func(pkts[i], sa, &priv->cop);
>  			if (unlikely(ret)) {
> +				core_stats_update_drop(1);
>  				rte_pktmbuf_free(pkts[i]);
>  				continue;
>  			}
> @@ -608,6 +615,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx *ipsec_ctx,
> 
>  			ret = xform_func(pkts[i], sa, &priv->cop);
>  			if (unlikely(ret)) {
> +				core_stats_update_drop(1);
>  				rte_pktmbuf_free(pkts[i]);
>  				continue;
>  			}
> @@ -643,6 +651,7 @@ ipsec_inline_dequeue(ipsec_xform_fn xform_func, struct ipsec_ctx *ipsec_ctx,
>  		sa = priv->sa;
>  		ret = xform_func(pkt, sa, &priv->cop);
>  		if (unlikely(ret)) {
> +			core_stats_update_drop(1);
>  			rte_pktmbuf_free(pkt);
>  			continue;
>  		}
> @@ -690,12 +699,14 @@ ipsec_dequeue(ipsec_xform_fn xform_func, struct ipsec_ctx *ipsec_ctx,
>  				RTE_SECURITY_ACTION_TYPE_NONE) {
>  				ret = xform_func(pkt, sa, cops[j]);
>  				if (unlikely(ret)) {
> +					core_stats_update_drop(1);
>  					rte_pktmbuf_free(pkt);
>  					continue;
>  				}
>  			} else if (ipsec_get_action_type(sa) ==
>  				RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL) {
>  				if (cops[j]->status) {
> +					core_stats_update_drop(1);
>  					rte_pktmbuf_free(pkt);
>  					continue;
>  				}
> diff --git a/examples/ipsec-secgw/ipsec.h b/examples/ipsec-secgw/ipsec.h
> index 1e642d1..8519eab 100644
> --- a/examples/ipsec-secgw/ipsec.h
> +++ b/examples/ipsec-secgw/ipsec.h
> @@ -46,6 +46,17 @@
> 
>  #define IP6_VERSION (6)
> 
> +#ifdef ENABLE_STATS
> +struct ipsec_core_statistics {
> +	uint64_t tx;
> +	uint64_t rx;
> +	uint64_t dropped;
> +	uint64_t burst_rx;

A bit strange to have burst_rx and no similar counterpart for tx.
BTW, do you need burst_rx?
Might be better:
nb_calls_rx, nb_calls_tx;
and then: rx/nb_calls_rx will give you average burst size.

> +} __rte_cache_aligned;
> +
> +struct ipsec_core_statistics core_statistics[RTE_MAX_ETHPORTS];

Should be RTE_MAX_LCORES, I think.

> +#endif /* ENABLE_STATS */
> +
>  struct rte_crypto_xform;
>  struct ipsec_xform;
>  struct rte_mbuf;
> @@ -416,4 +427,15 @@ check_flow_params(uint16_t fdir_portid, uint8_t fdir_qid);
>  int
>  create_ipsec_esp_flow(struct ipsec_sa *sa);
> 
> +static inline void
> +core_stats_update_drop(int n)
> +{
> +#ifdef ENABLE_STATS
> +	int lcore_id = rte_lcore_id();
> +	core_statistics[lcore_id].dropped += n;
> +#else
> +	RTE_SET_USED(n);
> +#endif /* ENABLE_STATS */
> +}
> +
>  #endif /* __IPSEC_H__ */
> diff --git a/examples/ipsec-secgw/ipsec_process.c b/examples/ipsec-secgw/ipsec_process.c
> index bb2f2b8..05cb3ad 100644
> --- a/examples/ipsec-secgw/ipsec_process.c
> +++ b/examples/ipsec-secgw/ipsec_process.c
> @@ -24,6 +24,11 @@ free_pkts(struct rte_mbuf *mb[], uint32_t n)
>  {
>  	uint32_t i;
> 
> +#ifdef ENABLE_STATS
> +	int lcore_id = rte_lcore_id();
> +	core_statistics[lcore_id].dropped += n;
> +#endif /* ENABLE_STATS */
> +

Same as above - why not use stats_update_drop() here?

>  	for (i = 0; i != n; i++)
>  		rte_pktmbuf_free(mb[i]);
>  }
> --
> 2.7.4



More information about the dev mailing list