[dpdk-dev] [PATCH v2] SDK: Add scripts to initialize DPDK runtime
Ferruh Yigit
ferruh.yigit at intel.com
Mon Sep 18 22:44:41 CEST 2017
On 12/13/2016 4:47 PM, lboccass at brocade.com (Luca Boccassi) wrote:
> From: Christian Ehrhardt <christian.ehrhardt at canonical.com>
>
> A tools/init directory is added with dpdk-init, a script that can be
> used to initialize a DPDK runtime environment. 2 config files with
> default options, dpdk.conf and interfaces, are provided as well
> together with a SysV init script and a systemd service unit.
I guess this patch missing comments, it is good to have helper scripts
(and v2 has BSD license).
Is this correct:
This script runs as service and does following based on config:
mount_hugetlbfs
setup_hugepages
bind_interfaces
So keeps interfaces binded to DPDK after reboot?
[intentional leaving rest of the patch since this is an old patch]
>
> v2: relicensed dpdk-init.in from GPL3 to BSD-3-clause with authors'
> permission
>
> Signed-off-by: Luca Boccassi <lboccass at brocade.com>
> Signed-off-by: Christian Ehrhardt <christian.ehrhardt at canonical.com>
> ---
> mk/rte.sdkinstall.mk | 21 ++++
> tools/init/dpdk-init.in | 274 +++++++++++++++++++++++++++++++++++++++++++++
> tools/init/dpdk.conf | 60 ++++++++++
> tools/init/dpdk.init.in | 57 ++++++++++
> tools/init/dpdk.service.in | 12 ++
> tools/init/interfaces | 16 +++
> 6 files changed, 440 insertions(+)
> create mode 100755 tools/init/dpdk-init.in
> create mode 100644 tools/init/dpdk.conf
> create mode 100755 tools/init/dpdk.init.in
> create mode 100644 tools/init/dpdk.service.in
> create mode 100644 tools/init/interfaces
>
> diff --git a/mk/rte.sdkinstall.mk b/mk/rte.sdkinstall.mk
> index 7b0d8b5..a3a5a9a 100644
> --- a/mk/rte.sdkinstall.mk
> +++ b/mk/rte.sdkinstall.mk
> @@ -69,6 +69,14 @@ datadir ?= $(datarootdir)/dpdk
> mandir ?= $(datarootdir)/man
> sdkdir ?= $(datadir)
> targetdir ?= $(datadir)/$(RTE_TARGET)
> +# If pkgconfig or systemd.pc are not available fall back to most likely default
> +ifeq ($(shell pkg-config systemd; echo $$?), 0)
> +systemduserunitdir ?= $(shell pkg-config --variable=systemdsystemunitdir systemd)
> +else
> +systemduserunitdir ?= /lib/systemd/system
> +endif
> +initdir ?= /etc/init.d
> +configdir ?= /etc/dpdk
>
> # The install directories may be staged in DESTDIR
>
> @@ -162,6 +170,19 @@ install-sdk:
> $(Q)cp -a $O/app/dpdk-pmdinfogen $(DESTDIR)$(targetdir)/app
> $(Q)$(call rte_symlink, $(DESTDIR)$(includedir), $(DESTDIR)$(targetdir)/include)
> $(Q)$(call rte_symlink, $(DESTDIR)$(libdir), $(DESTDIR)$(targetdir)/lib)
> + $(Q)$(call rte_mkdir, $(DESTDIR)$(initdir))
> + $(Q)sed -e "s|@@configdir@@|$(configdir)|g" -e "s|@@sbindir@@|$(sbindir)|g" \
> + $(RTE_SDK)/tools/init/dpdk.init.in > $(DESTDIR)$(initdir)/dpdk
> + $(Q)chmod +x $(DESTDIR)$(initdir)/dpdk
> + $(Q)$(call rte_mkdir, $(DESTDIR)$(systemduserunitdir))
> + $(Q)sed "s|@@sbindir@@|$(sbindir)|g" $(RTE_SDK)/tools/init/dpdk.service.in > \
> + $(DESTDIR)$(systemduserunitdir)/dpdk.service
> + $(Q)$(call rte_mkdir, $(DESTDIR)$(configdir))
> + $(Q)cp -a $(RTE_SDK)/tools/init/dpdk.conf $(DESTDIR)$(configdir)
> + $(Q)cp -a $(RTE_SDK)/tools/init/interfaces $(DESTDIR)$(configdir)
> + $(Q)sed -e "s|@@configdir@@|$(configdir)|g" -e "s|@@sbindir@@|$(sbindir)|g" \
> + $(RTE_SDK)/tools/init/dpdk-init.in > $(DESTDIR)$(sbindir)/dpdk-init
> + $(Q)chmod +x $(DESTDIR)$(sbindir)/dpdk-init
>
> install-doc:
> ifneq ($(wildcard $O/doc/html),)
> diff --git a/tools/init/dpdk-init.in b/tools/init/dpdk-init.in
> new file mode 100755
> index 0000000..a1a44f7
> --- /dev/null
> +++ b/tools/init/dpdk-init.in
> @@ -0,0 +1,274 @@
> +#!/bin/sh
> +#
> +# dpdk-init: startup script to initialize a dpdk runtime environment
> +#
> +# Autor: Stefan Bader <stefan.bader at canonical.com>
> +# Autor: Christian Ehrhardt <christian.ehrhardt at canonical.com>
> +#
> +# BSD LICENSE
> +#
> +# Copyright(c) 2015-2016 Canonical Ltd. All rights reserved.
> +# All rights reserved.
> +#
> +# Redistribution and use in source and binary forms, with or without
> +# modification, are permitted provided that the following conditions
> +# are met:
> +#
> +# * Redistributions of source code must retain the above copyright
> +# notice, this list of conditions and the following disclaimer.
> +# * Redistributions in binary form must reproduce the above copyright
> +# notice, this list of conditions and the following disclaimer in
> +# the documentation and/or other materials provided with the
> +# distribution.
> +# * Neither the name of Intel Corporation nor the names of its
> +# contributors may be used to endorse or promote products derived
> +# from this software without specific prior written permission.
> +#
> +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +#
> +set -e
> +
> +DPDK_BIND="@@sbindir@@/dpdk-devbind"
> +DPDK_INTERF="@@configdir@@/interfaces"
> +DPDK_CONF="@@configdir@@/dpdk.conf"
> +
> +
> +# pagesize supports [G|g]/[M|m]/[K|k]
> +get_kbytes() {
> + local unit
> + local num
> + unit=$(echo "${1}" | sed 's/[0-9]*//g')
> + num=$(echo "${1}" | sed 's/[^0-9]*//g')
> + case ${unit} in
> + *g | *G)
> + echo $((num*1024*1024))
> + ;;
> + *m | *M)
> + echo $((num*1024))
> + ;;
> + *k | *K)
> + echo $((num))
> + ;;
> + *)
> + echo $((num/1024))
> + ;;
> + esac
> +}
> +
> +get_default_hpgsz() {
> + default_hpgsz=$(grep "Hugepagesize:" /proc/meminfo \
> + | sed 's/^Hugepagesize:\s*//g' | sed 's/\s*kB$//g')
> + echo "${default_hpgsz}"
> +}
> +
> +get_hugetlbfs_mountpoint() {
> + local requested_hpgsz
> + local mp_hpgsz
> + requested_hpgsz=$(get_kbytes "${1}")
> +
> + grep hugetlbfs /proc/mounts | while read \
> + mntfrom mntpoint mntfstype mntopt mntdump mntfsck; do
> +
> + # check if the current muntpoint is of the requested huge page size
> + case ${mntopt} in
> + *pagesize=*)
> + mp_hpgsz=$(echo "${mntopt}" | sed 's/.*pagesize=//g' | sed 's/,.*//g')
> + mp_hpgsz=$(get_kbytes "${mp_hpgsz}")
> + ;;
> + *)
> + mp_hpgsz=$(get_default_hpgsz)
> + ;;
> + esac
> + if [ "${requested_hpgsz}" -eq "${mp_hpgsz}" ]; then
> + echo "${mntpoint}"
> + return
> + fi
> + done
> +}
> +
> +_mount_hugetlbfs() {
> + local MNT="/dev/hugepages"
> + local MNTOPTS=""
> + local requested_hpgsz
> + local default_hpgsz
> + requested_hpgsz=$(get_kbytes "${1}")
> + default_hpgsz=$(get_default_hpgsz)
> +
> + # kernel might not support the requested size
> + if [ ! -d "/sys/kernel/mm/hugepages/hugepages-${requested_hpgsz}kB" ]; then
> + echo "WARNING: requested page size of ${requested_hpgsz}kB " \
> + "not supported by the kernel"
> + return 0
> + fi
> +
> + # special case if this is not the default huge page size
> + if [ "${requested_hpgsz}" -ne "${default_hpgsz}" ]; then
> + MNT="${MNT}-${requested_hpgsz}"
> + MNTOPTS="pagesize=${requested_hpgsz}K"
> + fi
> +
> + if [ ! -e "${MNT}" ]; then
> + mkdir "${MNT}"
> + if [ $? -ne 0 ]; then
> + echo "Could not create directory ${MNT}!" >&2
> + return 1
> + fi
> + fi
> + mount -thugetlbfs hugetlbfs "${MNT}" -o "${MNTOPTS}"
> + return $?
> +}
> +
> +#
> +# The DPDK library will use the first mounted instance it finds for a given
> +# page size. so if there is already one for a given size there is no need to
> +# create another for the same huge page size.
> +#
> +mount_hugetlbfs() {
> + if [ ! -r "$DPDK_CONF" ]; then
> + return 1
> + fi
> + . "$DPDK_CONF"
> +
> + # if a page size is requested, there has to be a mountpoint for that size
> + if [ -n "${NR_2M_PAGES}" -a -z "$(get_hugetlbfs_mountpoint '2M')" ]; then
> + _mount_hugetlbfs 2M
> + fi
> + if [ -n "${NR_16M_PAGES}" -a -z "$(get_hugetlbfs_mountpoint '16M')" ]; then
> + _mount_hugetlbfs 16M
> + fi
> + if [ -n "${NR_1G_PAGES}" -a -z "$(get_hugetlbfs_mountpoint '1G')" ]; then
> + _mount_hugetlbfs 1G
> + fi
> +}
> +
> +_setup_hugepages() {
> + MMDIR="/sys/kernel/mm/hugepages/${1}"
> + PAGES=${2}
> +
> + if [ "$PAGES" != "" ]; then
> + if [ "$PAGES" -gt 0 ]; then
> + if [ -d "$MMDIR" -a -w "$MMDIR/nr_hugepages" ]; then
> + # increases the chance to allocate enough huge pages
> + # configurable, since it comes at a perf penality
> + if [ "$DROPCACHE_BEFORE_HP_ALLOC" = "1" ]; then
> + echo 3 > /proc/sys/vm/drop_caches
> + fi
> +
> + echo "$PAGES" > "$MMDIR/nr_hugepages"
> +
> + GOTPAGES=$(cat "$MMDIR/nr_hugepages")
> + if [ "$GOTPAGES" -lt "$PAGES" ]; then
> + echo "WARNING: could not allocate $PAGES at " \
> + "$MMDIR/nr_hugepages (only got $GOTPAGES)."
> + fi
> + else
> + echo "WARNING: $MMDIR/nr_hugepages not found/writable"
> + fi
> + fi
> + fi
> +}
> +
> +#
> +# Reserve a certain amount of hugepages (defined in /etc/dpdk.conf)
> +#
> +setup_hugepages() {
> + if [ ! -r "$DPDK_CONF" ]; then
> + return 1
> + fi
> + . "$DPDK_CONF"
> +
> + _setup_hugepages "hugepages-2048kB" "$NR_2M_PAGES"
> + _setup_hugepages "hugepages-16384kB" "$NR_16M_PAGES"
> + _setup_hugepages "hugepages-1048576kB" "$NR_1G_PAGES"
> +
> + # dpdk uses 2*#hugepages mappings, increase for huge systems LP #1507921
> + if [ -d /sys/kernel/mm/hugepages ]; then
> + max_map_count=$(awk -v pad=65530 '{tot+=$1}END{print tot*2+pad}' \
> + /sys/kernel/mm/hugepages/hugepages-*/nr_hugepages)
> + sysctl -q vm.max_map_count="${max_map_count:-65530}"
> + fi
> +
> + return 0
> +}
> +
> +#
> +# Allow NICs to be automatically bound to DPDK compatible drivers on boot.
> +#
> +bind_interfaces() {
> + if [ ! -r "$DPDK_INTERF" ]; then
> + return 0
> + fi
> + grep -v '^[ \t]*#' "$DPDK_INTERF" | while read BUS ID MOD; do
> + if [ "$BUS" = "" -o "$ID" = "" -o "$MOD" = "" ]; then
> + echo "WARNING: incomplete spec in $DPDK_INTERF" \
> + " - BUS '$BUS' ID '$ID' MOD '$MOD'"
> + continue
> + fi
> + if [ "$BUS" != "pci" ]; then
> + echo "WARNING: incompatible bus '$BUS' in $DPDK_INTERF"
> + continue
> + fi
> +
> + SYSFSPATH="/sys/bus/$BUS/devices/$ID"
> + if [ ! -e "$SYSFSPATH" ]; then
> + echo "WARNING: invalid pci ID '$ID' in $DPDK_INTERF" \
> + " - '$SYSFSPATH' does not exist"
> + continue
> + fi
> + if [ -L "$SYSFSPATH/driver" ]; then
> + CUR=$(readlink "$SYSFSPATH/driver")
> + CUR=$(basename "$CUR")
> + else
> + # device existing, but currently unregistered
> + CUR=""
> + fi
> + if [ "$MOD" != "$CUR" ]; then
> + modprobe -q "$MOD" || true
> + # cloud img have no linux-image-extra initially (uip_pci_generic)
> + # so check if the module is available (loadable/built in)
> + if [ -e "/sys/bus/pci/drivers/${MOD}" ]; then
> + echo "Reassigning pci:$ID to $MOD"
> + $DPDK_BIND -b "$MOD" "$ID"
> + else
> + echo "Warning: failed assigning pci:$ID," \
> + " module $MOD not available"
> + fi
> + else
> + echo "pci:$ID already assigned to $MOD"
> + fi
> + done
> +}
> +
> +
> +
> +case "$1" in
> +start)
> + mount_hugetlbfs
> + setup_hugepages
> + bind_interfaces
> + ;;
> +stop)
> + ;;
> +reload|force-reload)
> + setup_hugepages
> + bind_interfaces
> + ;;
> +status)
> + $DPDK_BIND --status
> + ;;
> +*)
> + echo "Usage: $0 {start|stop|reload|force-reload|status}"
> + exit 1
> + ;;
> +esac
> +
> diff --git a/tools/init/dpdk.conf b/tools/init/dpdk.conf
> new file mode 100644
> index 0000000..a5aea86
> --- /dev/null
> +++ b/tools/init/dpdk.conf
> @@ -0,0 +1,60 @@
> +#
> +# The number of 2M hugepages to reserve on system boot
> +#
> +# Default is 0
> +# To e.g. let it reserve 128M via 64x 2M Hugepages set:
> +# NR_2M_PAGES=64
> +
> +#
> +# The number of 1G hugepages to reserve on system boot
> +#
> +# Default is 0
> +# To e.g. let it reserve 2G via 2x 1G Hugepages set:
> +# NR_1G_PAGES=2
> +
> +# The number of 16M hugepages to reserve, supported e.g. on ppc64el
> +#
> +# Default is 0
> +# To e.g. let it reserve 512M via 32x 16M Hugepages set:
> +# NR_16M_PAGES=32
> +
> +#
> +# Dropping slab and pagecache can help to successfully allocate hugepages,
> +# especially later in the lifecycle of a system.
> +# This comes at the cost of loosing all slab and pagecache on (re)start
> +# of the dpdk service - therefore the default is off.
> +#
> +# Default is 0
> +# Set to 1 to enable it
> +#DROPCACHE_BEFORE_HP_ALLOC=0
> +
> +# The DPDK library will use the first mounted hugetlbfs.
> +# The init scripts try to ensure there is at least one default hugetlbfs
> +# mountpoint on start.
> +# If you have multiple hugetlbfs mountpoints for a complex (e.g. specific numa
> +# policies) setup it should be controlled by the admin instead of this init
> +# script. In that case specific mountpoints can be provided as parameters to
> +# the DPDK library.
> +
> +# Hardware may support other granularities of hugepages (like 4M). But the
> +# larger the hugepages the earlier those should be allocated.
> +# Note: the dpdk init scripts will report warnings, but not fail if they could
> +# not allocate the requested amount of hugepages.
> +# The more or the larger the hugepages to be allocated are, the more it is
> +# recommended to do the reservation as kernel commandline arguments.
> +# To do so edit /etc/default/grub: GRUB_CMDLINE_LINUX_DEFAULT
> +# and add [hugepagesz=xx] hugepages=yy ...
> +#
> +# Kernel commandline config:
> +# hugepagesz sets the size for the next hugepages reservation (default 2M)
> +# hugepages reserves the given number of hugepages of the size set before
> +#
> +# After modifying /etc/default/grub, the command "update-grub" has to be
> +# run in order to re-generate the grub config files. The new values will
> +# be used after next reboot.
> +#
> +# example:
> +# GRUB_CMDLINE_LINUX_DEFAULT="... hugepages=16 hugepagesz=1G hugepages=2"
> +#
> +# If the system supports it, this will reserve 16x 2M pages and 2x 1G pages.
> +#
> diff --git a/tools/init/dpdk.init.in b/tools/init/dpdk.init.in
> new file mode 100755
> index 0000000..1e26450
> --- /dev/null
> +++ b/tools/init/dpdk.init.in
> @@ -0,0 +1,57 @@
> +#!/bin/sh
> +
> +### BEGIN INIT INFO
> +# Provides: dpdk
> +# Required-Start: $remote_fs $local_fs
> +# Required-Stop: $remote_fs $local_fs
> +# Default-Start: S
> +# Default-Stop: 0 1 6
> +# Short-Description: start dpdk runtime environment
> +### END INIT INFO
> +
> +set -e
> +
> +PATH="/sbin:/bin:/usr/bin"
> +
> +[ -d @@configdir@@ ] || exit 0
> +
> +# Define LSB log_* functions.
> +# Depend on lsb-base (>= 3.2-14) to ensure that this file is present
> +# and status_of_proc is working.
> +. /lib/lsb/init-functions
> +
> +error=0
> +case "$1" in
> +start)
> + log_action_begin_msg "Starting DPDK environment" "dpdk"
> + output=$(@@sbindir@@/dpdk-init start 2>&1) || error="$?"
> + if [ ! -z "$output" ]; then
> + echo "$output" | while read line; do
> + log_action_cont_msg "$line"
> + done
> + fi
> + log_action_end_msg $error
> + exit $error
> + ;;
> +stop)
> + ;;
> +restart|force-reload)
> + ;;
> +status)
> + output=$(@@sbindir@@/dpdk-init --status 2>&1) || error="$?"
> + if [ ! -z "$output" ]; then
> + echo "$output" | while read line; do
> + log_action_cont_msg "$line"
> + done
> + fi
> + log_action_end_msg $error
> + exit $error
> + ;;
> +*)
> + echo "Usage: $0 {start|stop|restart|force-reload|status}"
> + exit 1
> + ;;
> +esac
> +
> +exit 0
> +
> diff --git a/tools/init/dpdk.service.in b/tools/init/dpdk.service.in
> new file mode 100644
> index 0000000..1968081
> --- /dev/null
> +++ b/tools/init/dpdk.service.in
> @@ -0,0 +1,12 @@
> +[Unit]
> +Description=DPDK runtime environment
> +DefaultDependencies=false
> +After=network-pre.target local-fs.target
> +
> +[Service]
> +Type=oneshot
> +RemainAfterExit=yes
> +ExecStart=@@sbindir@@/dpdk-init start
> +
> +[Install]
> +WantedBy=multi-user.target
> diff --git a/tools/init/interfaces b/tools/init/interfaces
> new file mode 100644
> index 0000000..73c3fca
> --- /dev/null
> +++ b/tools/init/interfaces
> @@ -0,0 +1,16 @@
> +#
> +# <bus> Currently only "pci" is supported
> +# <id> Device ID on the specified bus
> +# <driver> Driver to bind against (vfio-pci, uio_pci_generic, igb_uio or
> +# rte_kni)
> +#
> +# Be aware that the two dpdk compatible drivers uio_pci_generic and vfio-pci are
> +# part of linux-image-extra-<VERSION> package on Debian-based distributions.
> +# This package is not always installed by default - for example in cloud-images.
> +# So please install it in case you run into missing module issues.
> +#
> +# <bus> <id> <driver>
> +# pci 0000:04:00.0 vfio-pci
> +# pci 0000:04:00.1 uio_pci_generic
> +# pci 0000:05:00.0 igb_uio
> +# pci 0000:06:00.0 rte_kni
>
More information about the dev
mailing list