[dpdk-dev] [PATCH] ethdev: change vtune profiling approach

ilia.kurakin at intel.com ilia.kurakin at intel.com
Tue Jul 3 15:11:28 CEST 2018


From: Ilia Kurakin <ilia.kurakin at intel.com>

The patch changes rx_burst profiling approach:
	1. VTune's instrumentation is removed
	2. empty hook callback for profiling is added
This way all VTune-specific logic moves to the VTune side. Hook is enabled
only when CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE option is turned on. VTune uses
this hook to attach to the polling cycle. It is not possible to attach to the
rx_burst directly, as it is inline.

Signed-off-by: Ilia Kurakin <ilia.kurakin at intel.com>

---
 config/common_base                    |   2 +-
 doc/guides/prog_guide/profile_app.rst |  34 ++---------
 lib/librte_ethdev/ethdev_profile.c    | 103 +++++-----------------------------
 lib/librte_ethdev/ethdev_profile.h    |   6 +-
 lib/librte_ethdev/rte_ethdev.c        |   4 +-
 5 files changed, 25 insertions(+), 124 deletions(-)

diff --git a/config/common_base b/config/common_base
index 721e59b..2bdd895 100644
--- a/config/common_base
+++ b/config/common_base
@@ -128,7 +128,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
 CONFIG_RTE_LIBRTE_IEEE1588=n
 CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
-CONFIG_RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS=n
+CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE=n
 
 #
 # Turn off Tx preparation stage
diff --git a/doc/guides/prog_guide/profile_app.rst b/doc/guides/prog_guide/profile_app.rst
index 1106216..02f0561 100644
--- a/doc/guides/prog_guide/profile_app.rst
+++ b/doc/guides/prog_guide/profile_app.rst
@@ -33,38 +33,12 @@ Refer to the
 for details about application profiling.
 
 
-Empty cycles tracing
+Profiling with VTune
 ~~~~~~~~~~~~~~~~~~~~
 
-Iterations that yielded no RX packets (empty cycles, wasted iterations) can
-be analyzed using VTune Amplifier. This profiling employs the
-`Instrumentation and Tracing Technology (ITT) API
-<https://software.intel.com/en-us/node/544195>`_
-feature of VTune Amplifier and requires only reconfiguring the DPDK library,
-no changes in a DPDK application are needed.
-
-To trace wasted iterations on RX queues, first reconfigure DPDK with
-``CONFIG_RTE_ETHDEV_RXTX_CALLBACKS`` and
-``CONFIG_RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS`` enabled.
-
-Then rebuild DPDK, specifying paths to the ITT header and library, which can
-be found in any VTune Amplifier distribution in the *include* and *lib*
-directories respectively:
-
-.. code-block:: console
-
-    make EXTRA_CFLAGS=-I<path to ittnotify.h> \
-         EXTRA_LDLIBS="-L<path to libittnotify.a> -littnotify"
-
-Finally, to see wasted iterations in your performance analysis results,
-select the *"Analyze user tasks, events, and counters"* checkbox in the
-*"Analysis Type"* tab when configuring analysis via VTune Amplifier GUI.
-Alternatively, when running VTune Amplifier via command line, specify
-``-knob enable-user-tasks=true`` option.
-
-Collected regions of wasted iterations will be marked on VTune Amplifier's
-timeline as ITT tasks. These ITT tasks have predefined names, containing
-Ethernet device and RX queue identifiers.
+To allow VTune attaching to the DPDK application, reconfigure and recompile
+the DPDK with ``CONFIG_RTE_ETHDEV_RXTX_CALLBACKS`` and
+``CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE`` enabled.
 
 
 Profiling on ARM64
diff --git a/lib/librte_ethdev/ethdev_profile.c b/lib/librte_ethdev/ethdev_profile.c
index 0d1dcda..a3c303f 100644
--- a/lib/librte_ethdev/ethdev_profile.c
+++ b/lib/librte_ethdev/ethdev_profile.c
@@ -1,87 +1,33 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2017 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
 #include "ethdev_profile.h"
 
 /**
- * This conditional block enables RX queues profiling by tracking wasted
- * iterations, i.e. iterations which yielded no RX packets. Profiling is
- * performed using the Instrumentation and Tracing Technology (ITT) API,
- * employed by the Intel (R) VTune (TM) Amplifier.
+ * This conditional block enables Ethernet device profiling with
+ * Intel (R) VTune (TM) Amplifier.
  */
-#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
-
-#include <ittnotify.h>
-
-#define ITT_MAX_NAME_LEN (100)
-
-/**
- * Auxiliary ITT structure belonging to Ethernet device and using to:
- *   -  track RX queue state to determine whether it is wasting loop iterations
- *   -  begin or end ITT task using task domain and task name (handle)
- */
-struct itt_profile_rx_data {
-	/**
-	 * ITT domains for each queue.
-	 */
-	__itt_domain *domains[RTE_MAX_QUEUES_PER_PORT];
-	/**
-	 * ITT task names for each queue.
-	 */
-	__itt_string_handle *handles[RTE_MAX_QUEUES_PER_PORT];
-	/**
-	 * Flags indicating the queues state. Possible values:
-	 *   1 - queue is wasting iterations,
-	 *   0 - otherwise.
-	 */
-	uint8_t queue_state[RTE_MAX_QUEUES_PER_PORT];
-};
-
-/**
- * The pool of *itt_profile_rx_data* structures.
- */
-struct itt_profile_rx_data itt_rx_data[RTE_MAX_ETHPORTS];
-
+#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE
 
 /**
- * This callback function manages ITT tasks collection on given port and queue.
- * It must be registered with rte_eth_add_rx_callback() to be called from
- * rte_eth_rx_burst(). To find more comments see rte_rx_callback_fn function
- * type declaration.
+ * Hook callback to trace rte_eth_rx_burst() calls.
  */
-static uint16_t
-collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id,
+uint16_t
+profile_hook_rx_burst_cb(
+	__rte_unused uint16_t port_id, __rte_unused uint16_t queue_id,
 	__rte_unused struct rte_mbuf *pkts[], uint16_t nb_pkts,
 	__rte_unused uint16_t max_pkts, __rte_unused void *user_param)
 {
-	if (unlikely(nb_pkts == 0)) {
-		if (!itt_rx_data[port_id].queue_state[queue_id]) {
-			__itt_task_begin(
-				itt_rx_data[port_id].domains[queue_id],
-				__itt_null, __itt_null,
-				itt_rx_data[port_id].handles[queue_id]);
-			itt_rx_data[port_id].queue_state[queue_id] = 1;
-		}
-	} else {
-		if (unlikely(itt_rx_data[port_id].queue_state[queue_id])) {
-			__itt_task_end(
-				itt_rx_data[port_id].domains[queue_id]);
-			itt_rx_data[port_id].queue_state[queue_id] = 0;
-		}
-	}
 	return nb_pkts;
 }
 
 /**
- * Initialization of itt_profile_rx_data for a given Ethernet device.
+ * Setting profiling rx callback for a given Ethernet device.
  * This function must be invoked when ethernet device is being configured.
- * Result will be stored in the global array *itt_rx_data*.
  *
  * @param port_id
  *  The port identifier of the Ethernet device.
- * @param port_name
- *  The name of the Ethernet device.
  * @param rx_queue_num
  *  The number of RX queues on specified port.
  *
@@ -90,46 +36,27 @@ collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id,
  *  - On failure, a negative value.
  */
 static inline int
-itt_profile_rx_init(uint16_t port_id, char *port_name, uint8_t rx_queue_num)
+vtune_profile_rx_init(uint16_t port_id, uint8_t rx_queue_num)
 {
 	uint16_t q_id;
 
 	for (q_id = 0; q_id < rx_queue_num; ++q_id) {
-		char domain_name[ITT_MAX_NAME_LEN];
-
-		snprintf(domain_name, sizeof(domain_name),
-			"RXBurst.WastedIterations.Port_%s.Queue_%d",
-			port_name, q_id);
-		itt_rx_data[port_id].domains[q_id]
-			= __itt_domain_create(domain_name);
-
-		char task_name[ITT_MAX_NAME_LEN];
-
-		snprintf(task_name, sizeof(task_name),
-			"port id: %d; queue id: %d",
-			port_id, q_id);
-		itt_rx_data[port_id].handles[q_id]
-			= __itt_string_handle_create(task_name);
-
-		itt_rx_data[port_id].queue_state[q_id] = 0;
-
 		if (!rte_eth_add_rx_callback(
-			port_id, q_id, collect_itt_rx_burst_cb, NULL)) {
+			port_id, q_id, profile_hook_rx_burst_cb, NULL)) {
 			return -rte_errno;
 		}
 	}
 
 	return 0;
 }
-#endif /* RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS */
+#endif /* RTE_ETHDEV_PROFILE_WITH_VTUNE */
 
 int
-__rte_eth_profile_rx_init(__rte_unused uint16_t port_id,
+__rte_eth_dev_profile_init(__rte_unused uint16_t port_id,
 	__rte_unused struct rte_eth_dev *dev)
 {
-#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
-	return itt_profile_rx_init(
-		port_id, dev->data->name, dev->data->nb_rx_queues);
+#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE
+	return vtune_profile_rx_init(port_id, dev->data->nb_rx_queues);
 #endif
 	return 0;
 }
diff --git a/lib/librte_ethdev/ethdev_profile.h b/lib/librte_ethdev/ethdev_profile.h
index e5ea368..65031e6 100644
--- a/lib/librte_ethdev/ethdev_profile.h
+++ b/lib/librte_ethdev/ethdev_profile.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2017 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
 #ifndef _RTE_ETHDEV_PROFILE_H_
@@ -8,7 +8,7 @@
 #include "rte_ethdev.h"
 
 /**
- * Initialization of profiling RX queues for the Ethernet device.
+ * Initialization of the Ethernet device profiling.
  * Implementation of this function depends on chosen profiling method,
  * defined in configs.
  *
@@ -22,6 +22,6 @@
  *  - On failure, a negative value.
  */
 int
-__rte_eth_profile_rx_init(uint16_t port_id, struct rte_eth_dev *dev);
+__rte_eth_dev_profile_init(uint16_t port_id, struct rte_eth_dev *dev);
 
 #endif
diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index a9977df..acba712 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -1225,9 +1225,9 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 	}
 
 	/* Initialize Rx profiling if enabled at compilation time. */
-	diag = __rte_eth_profile_rx_init(port_id, dev);
+	diag = __rte_eth_dev_profile_init(port_id, dev);
 	if (diag != 0) {
-		RTE_PMD_DEBUG_TRACE("port%d __rte_eth_profile_rx_init = %d\n",
+		RTE_PMD_DEBUG_TRACE("port%d __rte_eth_dev_profile_init = %d\n",
 				port_id, diag);
 		rte_eth_dev_rx_queue_config(dev, 0);
 		rte_eth_dev_tx_queue_config(dev, 0);
-- 
2.7.4



More information about the dev mailing list