[dpdk-dev] [PATCH v5 4/6] net/tap: add eBPF API

Ophir Munk ophirmu at mellanox.com
Thu Jan 18 14:38:09 CET 2018


This commit include BPF API to be used by TAP.

tap_flow_bpf_cls_q() - download to kernel BPF program that classifies
packets to their matching queues
tap_flow_bpf_calc_l3_l4_hash() - download to kernel BPF program that
calculates per packet layer 3 and layer 4 RSS hash
tap_flow_bpf_rss_map_create() - create BPF RSS map for storing RSS
parameters per RSS rule
tap_flow_bpf_update_rss_elem() - update BPF map entry with RSS rule
parameters

Signed-off-by: Ophir Munk <ophirmu at mellanox.com>
---
 drivers/net/tap/Makefile      |   6 ++
 drivers/net/tap/tap_bpf.h     | 112 +++++++++++++++++++++++++
 drivers/net/tap/tap_bpf_api.c | 190 ++++++++++++++++++++++++++++++++++++++++++
 drivers/net/tap/tap_flow.h    |   6 ++
 4 files changed, 314 insertions(+)
 create mode 100644 drivers/net/tap/tap_bpf.h
 create mode 100644 drivers/net/tap/tap_bpf_api.c

diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
index fbf84e1..fad8a94 100644
--- a/drivers/net/tap/Makefile
+++ b/drivers/net/tap/Makefile
@@ -35,6 +35,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap_flow.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap_netlink.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap_tcmsgs.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap_bpf_api.c
 
 include $(RTE_SDK)/mk/rte.lib.mk
 
@@ -61,6 +62,11 @@ tap_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 		linux/pkt_cls.h \
 		enum TCA_FLOWER_KEY_VLAN_PRIO \
 		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
+		HAVE_BPF_PROG_LOAD \
+		linux/bpf.h \
+		enum BPF_PROG_LOAD \
+		$(AUTOCONF_OUTPUT)
 
 # Create tap_autoconf.h or update it in case it differs from the new one.
 
diff --git a/drivers/net/tap/tap_bpf.h b/drivers/net/tap/tap_bpf.h
new file mode 100644
index 0000000..30eefb3
--- /dev/null
+++ b/drivers/net/tap/tap_bpf.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+ * Copyright 2017 Mellanox Technologies, Ltd.
+ */
+
+#ifndef __TAP_BPF_H__
+#define __TAP_BPF_H__
+
+#include <tap_autoconf.h>
+
+#ifdef HAVE_BPF_PROG_LOAD
+#include <linux/bpf.h>
+#else
+/* BPF_MAP_UPDATE_ELEM command flags */
+#define	BPF_ANY	0 /* create a new element or update an existing */
+
+/* BPF architecture instruction struct */
+struct bpf_insn {
+	__u8	code;
+	__u8	dst_reg:4;
+	__u8	src_reg:4;
+	__s16	off;
+	__s32	imm; /* immediate value */
+};
+
+/* BPF program types */
+enum bpf_prog_type {
+	BPF_PROG_TYPE_UNSPEC,
+	BPF_PROG_TYPE_SOCKET_FILTER,
+	BPF_PROG_TYPE_KPROBE,
+	BPF_PROG_TYPE_SCHED_CLS,
+	BPF_PROG_TYPE_SCHED_ACT,
+};
+
+/* BPF commands types */
+enum bpf_cmd {
+	BPF_MAP_CREATE,
+	BPF_MAP_LOOKUP_ELEM,
+	BPF_MAP_UPDATE_ELEM,
+	BPF_MAP_DELETE_ELEM,
+	BPF_MAP_GET_NEXT_KEY,
+	BPF_PROG_LOAD,
+};
+
+/* BPF maps types */
+enum bpf_map_type {
+	BPF_MAP_TYPE_UNSPEC,
+	BPF_MAP_TYPE_HASH,
+};
+
+/* union of anonymous structs used with TAP BPF commands */
+union bpf_attr {
+	/* BPF_MAP_CREATE command */
+	struct {
+		__u32	map_type;
+		__u32	key_size;
+		__u32	value_size;
+		__u32	max_entries;
+		__u32	map_flags;
+		__u32	inner_map_fd;
+	};
+
+	/* BPF_MAP_UPDATE_ELEM, BPF_MAP_DELETE_ELEM commands */
+	struct {
+		__u32		map_fd;
+		__aligned_u64	key;
+		union {
+			__aligned_u64 value;
+			__aligned_u64 next_key;
+		};
+		__u64		flags;
+	};
+
+	/* BPF_PROG_LOAD command */
+	struct {
+		__u32		prog_type;
+		__u32		insn_cnt;
+		__aligned_u64	insns;
+		__aligned_u64	license;
+		__u32		log_level;
+		__u32		log_size;
+		__aligned_u64	log_buf;
+		__u32		kern_version;
+		__u32		prog_flags;
+	};
+} __attribute__((aligned(8)));
+#endif
+
+#ifndef __NR_bpf
+# if defined(__i386__)
+#  define __NR_bpf 357
+# elif defined(__x86_64__)
+#  define __NR_bpf 321
+# elif defined(__aarch64__)
+#  define __NR_bpf 280
+# elif defined(__sparc__)
+#  define __NR_bpf 349
+# elif defined(__s390__)
+#  define __NR_bpf 351
+# else
+#  error __NR_bpf not defined
+# endif
+#endif
+
+enum {
+	BPF_MAP_ID_KEY,
+	BPF_MAP_ID_SIMPLE,
+};
+
+static int bpf_load(enum bpf_prog_type type, const struct bpf_insn *insns,
+		size_t insns_cnt, const char *license);
+
+#endif /* __TAP_BPF_H__ */
diff --git a/drivers/net/tap/tap_bpf_api.c b/drivers/net/tap/tap_bpf_api.c
new file mode 100644
index 0000000..109a681
--- /dev/null
+++ b/drivers/net/tap/tap_bpf_api.c
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 Mellanox Technologies, Ltd.
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/queue.h>
+
+#include <rte_malloc.h>
+#include <rte_eth_tap.h>
+#include <tap_flow.h>
+#include <tap_autoconf.h>
+#include <tap_tcmsgs.h>
+#include <tap_bpf.h>
+#include <tap_bpf_insns.h>
+
+/**
+ * Load BPF program (section cls_q) into the kernel and return a bpf fd
+ *
+ * @param queue_idx
+ *   Queue index matching packet cb
+ *
+ * @return
+ *   -1 if the BPF program couldn't be loaded. An fd (int) otherwise.
+ */
+int tap_flow_bpf_cls_q(__u32 queue_idx)
+{
+	cls_q_insns[1].imm = queue_idx;
+
+	return bpf_load(BPF_PROG_TYPE_SCHED_CLS,
+		(struct bpf_insn *)cls_q_insns,
+		RTE_DIM(cls_q_insns),
+		"Dual BSD/GPL");
+}
+
+/**
+ * Load BPF program (section l3_l4) into the kernel and return a bpf fd.
+ *
+ * @param[in] key_idx
+ *   RSS MAP key index
+ *
+ * @param[in] map_fd
+ *   BPF RSS map file descriptor
+ *
+ * @return
+ *   -1 if the BPF program couldn't be loaded. An fd (int) otherwise.
+ */
+int tap_flow_bpf_calc_l3_l4_hash(__u32 key_idx, int map_fd)
+{
+	l3_l4_hash_insns[4].imm = key_idx;
+	l3_l4_hash_insns[9].imm = map_fd;
+
+	return bpf_load(BPF_PROG_TYPE_SCHED_ACT,
+		(struct bpf_insn *)l3_l4_hash_insns,
+		RTE_DIM(l3_l4_hash_insns),
+		"Dual BSD/GPL");
+}
+
+/**
+ * Helper function to convert a pointer to unsigned 64 bits
+ *
+ * @param[in] ptr
+ *   pointer to address
+ *
+ * @return
+ *   64 bit unsigned long type of pointer address
+ */
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64)(unsigned long)ptr;
+}
+
+/**
+ * Call BPF system call
+ *
+ * @param[in] cmd
+ *   BPF command for program loading, map creation, map entry update, etc
+ *
+ * @param[in] attr
+ *   System call attributes relevant to system call command
+ *
+ * @param[in] size
+ *   size of attr parameter
+ *
+ * @return
+ *   -1 if BPF system call failed, 0 otherwise
+ */
+static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
+			unsigned int size)
+{
+	return syscall(__NR_bpf, cmd, attr, size);
+}
+
+/**
+ * Load BPF instructions to kernel
+ *
+ * @param[in] type
+ *   BPF program type: classifieir or action
+ *
+ * @param[in] insns
+ *   Array of BPF instructions (equivalent to BPF instructions)
+ *
+ * @param[in] insns_cnt
+ *   Number of BPF instructions (size of array)
+ *
+ * @param[in] lincense
+ *   License string that must be acknowledged by the kernel
+ *
+ * @return
+ *   -1 if the BPF program couldn't be loaded, fd (file descriptor) otherwise
+ */
+static int bpf_load(enum bpf_prog_type type,
+		  const struct bpf_insn *insns,
+		  size_t insns_cnt,
+		  const char *license)
+{
+	union bpf_attr attr = {};
+
+	bzero(&attr, sizeof(attr));
+	attr.prog_type = type;
+	attr.insn_cnt = (__u32)insns_cnt;
+	attr.insns = ptr_to_u64(insns);
+	attr.license = ptr_to_u64(license);
+	attr.log_buf = ptr_to_u64(NULL);
+	attr.log_level = 0;
+	attr.kern_version = 0;
+
+	return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
+/**
+ * Create BPF map for RSS rules
+ *
+ * @param[in] key_size
+ *   map RSS key size
+ *
+ * @param[in] value_size
+ *   Map RSS value size
+ *
+ * @param[in] max_entries
+ *   Map max number of RSS entries (limit on max RSS rules)
+ *
+ * @return
+ *   -1 if BPF map couldn't be created, map fd otherwise
+ */
+int tap_flow_bpf_rss_map_create(unsigned int key_size,
+		unsigned int value_size,
+		unsigned int max_entries)
+{
+	union bpf_attr attr = {};
+
+	bzero(&attr, sizeof(attr));
+	attr.map_type    = BPF_MAP_TYPE_HASH;
+	attr.key_size    = key_size;
+	attr.value_size  = value_size;
+	attr.max_entries = max_entries;
+
+	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+/**
+ * Update RSS entry in BPF map
+ *
+ * @param[in] fd
+ *   RSS map fd
+ *
+ * @param[in] key
+ *   Pointer to RSS key whose entry is updated
+ *
+ * @param[in] value
+ *   Pointer to RSS new updated value
+ *
+ * @return
+ *   -1 if RSS entry failed to be updated, 0 otherwise
+ */
+int tap_flow_bpf_update_rss_elem(int fd, void *key, void *value)
+{
+	union bpf_attr attr = {};
+
+	bzero(&attr, sizeof(attr));
+
+	attr.map_type = BPF_MAP_TYPE_HASH;
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.value = ptr_to_u64(value);
+	attr.flags = BPF_ANY;
+
+	return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+}
diff --git a/drivers/net/tap/tap_flow.h b/drivers/net/tap/tap_flow.h
index 9e332b0..894c959 100644
--- a/drivers/net/tap/tap_flow.h
+++ b/drivers/net/tap/tap_flow.h
@@ -80,4 +80,10 @@ int tap_flow_implicit_destroy(struct pmd_internals *pmd,
 int tap_flow_implicit_flush(struct pmd_internals *pmd,
 			    struct rte_flow_error *error);
 
+int tap_flow_bpf_cls_q(__u32 queue_idx);
+int tap_flow_bpf_calc_l3_l4_hash(__u32 key_idx, int map_fd);
+int tap_flow_bpf_rss_map_create(unsigned int key_size, unsigned int value_size,
+			unsigned int max_entries);
+int tap_flow_bpf_update_rss_elem(int fd, void *key, void *value);
+
 #endif /* _TAP_FLOW_H_ */
-- 
2.7.4



More information about the dev mailing list