[dpdk-dev] [PATCH 01/10] build: add an option to enable LTO build

Andrzej Ostruszka amo at semihalf.com
Thu Sep 5 11:32:30 CEST 2019


This patch adds an option to enable link time optimization.  In addition
to LTO option itself (-flto) fat-lto-objects are being used.  This is
because during the build pmdinfogen scans the generated ELF objects to
find this_pmd_name* symbol in symbol table.  Without fat-lto-objects gcc
produces ELF only with extra symbols for internal use during linking and
clang does not produce ELF at all (only LLVM IR bitcode).

Signed-off-by: Andrzej Ostruszka <aostruszka at marvell.com>
---
 .travis.yml                                |  7 +++++
 config/common_base                         |  5 +++
 config/meson.build                         |  9 ++++++
 doc/guides/prog_guide/lto.rst              | 36 ++++++++++++++++++++++
 doc/guides/rel_notes/release_19_11.rst     |  8 +++++
 meson_options.txt                          |  2 ++
 mk/toolchain/clang/rte.toolchain-compat.mk |  4 +++
 mk/toolchain/clang/rte.vars.mk             |  8 +++++
 mk/toolchain/gcc/rte.toolchain-compat.mk   |  4 +++
 mk/toolchain/gcc/rte.vars.mk               | 12 ++++++++
 mk/toolchain/icc/rte.vars.mk               |  8 +++++
 11 files changed, 103 insertions(+)
 create mode 100644 doc/guides/prog_guide/lto.rst

diff --git a/.travis.yml b/.travis.yml
index 781f9f666..a9506b13a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -31,6 +31,7 @@ env:
   - DEF_LIB="static" OPTS="-Denable_kmods=false"
   - DEF_LIB="shared" OPTS="-Denable_kmods=false"
   - DEF_LIB="shared" RUN_TESTS=1 BUILD_DOCS=1
+  - DEF_LIB="static" OPTS="-Denable_lto=true"
 
 matrix:
   include:
@@ -100,6 +101,12 @@ matrix:
       apt:
         packages:
           - *extra_packages
+  - env: DEF_LIB="static" OPTS="-Denable_lto=true" EXTRA_PACKAGES=1
+    compiler: gcc
+    addons:
+      apt:
+        packages:
+          - *extra_packages
 
 
 script: ./.ci/${TRAVIS_OS_NAME}-build.sh
diff --git a/config/common_base b/config/common_base
index 8ef75c203..73a55fdec 100644
--- a/config/common_base
+++ b/config/common_base
@@ -49,6 +49,11 @@ CONFIG_RTE_FORCE_INTRINSICS=n
 #
 CONFIG_RTE_ARCH_STRICT_ALIGN=n
 
+#
+# Enable link time optimization
+#
+CONFIG_RTE_ENABLE_LTO=n
+
 #
 # Compile to share library
 #
diff --git a/config/meson.build b/config/meson.build
index 2bafea530..b224d0cb8 100644
--- a/config/meson.build
+++ b/config/meson.build
@@ -196,3 +196,12 @@ add_project_arguments('-D_GNU_SOURCE', language: 'c')
 if is_freebsd
 	add_project_arguments('-D__BSD_VISIBLE', language: 'c')
 endif
+
+if get_option('enable_lto')
+	if cc.has_argument('-flto -ffat-lto-objects')
+		add_project_arguments('-flto -ffat-lto-objects', language: 'c')
+		add_project_link_arguments('-flto', language: 'c')
+	else
+		message('compiler does not support LTO')
+	endif
+endif
diff --git a/doc/guides/prog_guide/lto.rst b/doc/guides/prog_guide/lto.rst
new file mode 100644
index 000000000..effa0fcc3
--- /dev/null
+++ b/doc/guides/prog_guide/lto.rst
@@ -0,0 +1,36 @@
+Link Time Optimization
+======================
+
+The DPDK framework supports compilation with link time optimization
+turned on.  This depends obviously on the capabilities of the compiler
+to do "whole program" optimization at link time and is available only
+for compilers that support that feature (gcc, clang and icc).  To be
+more specific compiler have to support creation of ELF objects
+containing both normal code and internal representation
+(fat-lto-objects).  This is required since during build some code is
+generated by parsing produced ELF objects (pmdinfogen).
+
+The amount of performance gain that one can get from LTO depends on the
+compiler and the code that is being compiled.  However LTO is also
+useful for additional code analysis done by the compiler.  In particular
+due to interprocedural analysis compiler can produce additional warnings
+about variables that might be used uninitialized.  Some of these
+warnings might be "false positives" though and you might need to
+explicitly initialize variable in order to silence the compiler.
+
+Link time optimization can be enabled for whole DPDK framework by
+setting:
+
+.. code-block:: console
+    CONFIG_ENABLE_LTO=y
+
+in config file for the case of make based build and by:
+
+.. code-block:: console
+    meson build -Denable_lto=true
+    ninja -C build
+
+for the case of meson based build.
+
+Please note that turning LTO on causes considerable extension of
+compilation time.
diff --git a/doc/guides/rel_notes/release_19_11.rst b/doc/guides/rel_notes/release_19_11.rst
index 27cfbd9e3..639b7ed53 100644
--- a/doc/guides/rel_notes/release_19_11.rst
+++ b/doc/guides/rel_notes/release_19_11.rst
@@ -56,6 +56,14 @@ New Features
      Also, make sure to start the actual text at the margin.
      =========================================================
 
+**Added build support for Link Time Optimization.**
+
+ LTO is an optimization technique used by the compiler to perform whole
+ program analysis and optimization at link time.  In order to do that
+ compilers store their internal representation of the source code that
+ the linker uses at the final stage of compilation process.
+
+ See :doc:`../prog_guide/lto` for more information:
 
 Removed Items
 -------------
diff --git a/meson_options.txt b/meson_options.txt
index 448f3e63d..a315e0f82 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -6,6 +6,8 @@ option('drivers_install_subdir', type: 'string', value: 'dpdk/pmds-<VERSION>',
 	description: 'Subdirectory of libdir where to install PMDs. Defaults to using a versioned subdirectory.')
 option('enable_docs', type: 'boolean', value: false,
 	description: 'build documentation')
+option('enable_lto', type: 'boolean', value: false,
+	description: 'Enable link time optimization')
 option('enable_kmods', type: 'boolean', value: true,
 	description: 'build kernel modules')
 option('examples', type: 'string', value: '',
diff --git a/mk/toolchain/clang/rte.toolchain-compat.mk b/mk/toolchain/clang/rte.toolchain-compat.mk
index e6189b498..78f96c648 100644
--- a/mk/toolchain/clang/rte.toolchain-compat.mk
+++ b/mk/toolchain/clang/rte.toolchain-compat.mk
@@ -20,3 +20,7 @@ CLANG_MINOR_VERSION := $(shell echo $(CLANG_VERSION) | cut -f2 -d.)
 ifeq ($(shell test $(CLANG_MAJOR_VERSION)$(CLANG_MINOR_VERSION) -lt 35 && echo 1), 1)
 	CC_SUPPORTS_Z := false
 endif
+
+ifeq ($(shell test $(CLANG_MAJOR_VERSION)$(CLANG_MINOR_VERSION) -lt 60 && echo 1), 1)
+	CONFIG_RTE_ENABLE_LTO=n
+endif
diff --git a/mk/toolchain/clang/rte.vars.mk b/mk/toolchain/clang/rte.vars.mk
index 3c49dc568..3b1fa05f9 100644
--- a/mk/toolchain/clang/rte.vars.mk
+++ b/mk/toolchain/clang/rte.vars.mk
@@ -48,6 +48,14 @@ endif
 # process cpu flags
 include $(RTE_SDK)/mk/toolchain/$(RTE_TOOLCHAIN)/rte.toolchain-compat.mk
 
+ifeq ($(CONFIG_RTE_ENABLE_LTO),y)
+# 'fat-lto' is used since pmdinfogen needs to have 'this_pmd_nameX'
+# exported in symbol table and without this option only internal
+# representation is present.
+TOOLCHAIN_CFLAGS += -flto -ffat-lto-objects
+TOOLCHAIN_LDFLAGS += -flto
+endif
+
 # workaround clang bug with warning "missing field initializer" for "= {0}"
 WERROR_FLAGS += -Wno-missing-field-initializers
 
diff --git a/mk/toolchain/gcc/rte.toolchain-compat.mk b/mk/toolchain/gcc/rte.toolchain-compat.mk
index ea40a11c0..ad4fad83c 100644
--- a/mk/toolchain/gcc/rte.toolchain-compat.mk
+++ b/mk/toolchain/gcc/rte.toolchain-compat.mk
@@ -88,6 +88,10 @@ else
 		MACHINE_CFLAGS := $(filter-out -march% -mtune% -msse%,$(MACHINE_CFLAGS))
 	endif
 
+	ifeq ($(shell test $(GCC_VERSION) -lt 45 && echo 1), 1)
+		CONFIG_RTE_ENABLE_LTO=n
+	endif
+
 	# Disable thunderx PMD for gcc < 4.7
 	ifeq ($(shell test $(GCC_VERSION) -lt 47 && echo 1), 1)
 		CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD=d
diff --git a/mk/toolchain/gcc/rte.vars.mk b/mk/toolchain/gcc/rte.vars.mk
index b852fcfd7..9fc704193 100644
--- a/mk/toolchain/gcc/rte.vars.mk
+++ b/mk/toolchain/gcc/rte.vars.mk
@@ -62,6 +62,18 @@ endif
 # process cpu flags
 include $(RTE_SDK)/mk/toolchain/$(RTE_TOOLCHAIN)/rte.toolchain-compat.mk
 
+ifeq ($(CONFIG_RTE_ENABLE_LTO),y)
+# 'fat-lto' is used since pmdinfogen needs to have 'this_pmd_nameX'
+# exported in symbol table and without this option only internal
+# representation is present.
+TOOLCHAIN_CFLAGS += -flto -ffat-lto-objects
+TOOLCHAIN_LDFLAGS += -flto
+# workaround for GCC bug 81440
+ifeq ($(shell test $(GCC_VERSION) -lt 80 && echo 1), 1)
+WERROR_FLAGS += -Wno-lto-type-mismatch
+endif
+endif
+
 # workaround GCC bug with warning "missing initializer" for "= {0}"
 ifeq ($(shell test $(GCC_VERSION) -lt 47 && echo 1), 1)
 WERROR_FLAGS += -Wno-missing-field-initializers
diff --git a/mk/toolchain/icc/rte.vars.mk b/mk/toolchain/icc/rte.vars.mk
index aa1422bf1..8aa87aa1e 100644
--- a/mk/toolchain/icc/rte.vars.mk
+++ b/mk/toolchain/icc/rte.vars.mk
@@ -54,5 +54,13 @@ endif
 # process cpu flags
 include $(RTE_SDK)/mk/toolchain/$(RTE_TOOLCHAIN)/rte.toolchain-compat.mk
 
+ifeq ($(CONFIG_RTE_ENABLE_LTO),y)
+# 'fat-lto' is used since pmdinfogen needs to have 'this_pmd_nameX'
+# exported in symbol table and without this option only internal
+# representation is present.
+TOOLCHAIN_CFLAGS += -flto -ffat-lto-objects
+TOOLCHAIN_LDFLAGS += -flto
+endif
+
 export CC AS AR LD OBJCOPY OBJDUMP STRIP READELF
 export TOOLCHAIN_CFLAGS TOOLCHAIN_LDFLAGS TOOLCHAIN_ASFLAGS
-- 
2.17.1



More information about the dev mailing list