[dpdk-dev] [PATCH v3 1/2] kdp: add kernel data path kernel module

Ferruh Yigit ferruh.yigit at intel.com
Wed Mar 9 12:17:48 CET 2016


This kernel module is based on KNI module, but this one is stripped
version of it and only for data messages, no control functionality
provided.

FIFO implementation of the KNI is kept exact same, but ethtool related
code removed and virtual network management related code simplified.

This module contains kernel support to create network devices and
this module has a simple driver for virtual network device, the driver
simply puts/gets packets to/from FIFO instead of real hardware.

FIFO is created owned by userspace application, which is for this case
KDP PMD.

In long term this patch intends to replace the KNI and KNI will be
depreciated.

Signed-off-by: Ferruh Yigit <ferruh.yigit at intel.com>
---

v3:
* Remove logging helper macros, use pr_fmt
* Replace rw_semaphore with mutex
* Devices are not up by default
* Use unsigned primitive types as possible
* Update module parameters
* Code cleanup, remove useless comments, reorder fields/code.

v2:
* Use rtnetlink to create interfaces
* include modules.h to prevent compile error in old kernels
---
 MAINTAINERS                                        |   4 +
 config/common_base                                 |   6 +
 config/common_linuxapp                             |   1 +
 lib/librte_eal/linuxapp/Makefile                   |   3 +-
 lib/librte_eal/linuxapp/eal/Makefile               |   3 +-
 .../linuxapp/eal/include/exec-env/rte_kdp_common.h | 134 ++++
 lib/librte_eal/linuxapp/kdp/Makefile               |  55 ++
 lib/librte_eal/linuxapp/kdp/kdp_dev.h              |  76 +++
 lib/librte_eal/linuxapp/kdp/kdp_fifo.h             |  91 +++
 lib/librte_eal/linuxapp/kdp/kdp_net.c              | 718 +++++++++++++++++++++
 10 files changed, 1089 insertions(+), 2 deletions(-)
 create mode 100644 lib/librte_eal/linuxapp/eal/include/exec-env/rte_kdp_common.h
 create mode 100644 lib/librte_eal/linuxapp/kdp/Makefile
 create mode 100644 lib/librte_eal/linuxapp/kdp/kdp_dev.h
 create mode 100644 lib/librte_eal/linuxapp/kdp/kdp_fifo.h
 create mode 100644 lib/librte_eal/linuxapp/kdp/kdp_net.c

diff --git a/MAINTAINERS b/MAINTAINERS
index e253bf7..edcc4cc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -258,6 +258,10 @@ F: app/test/test_kni.c
 F: examples/kni/
 F: doc/guides/sample_app_ug/kernel_nic_interface.rst
 
+Linux KDP
+M: Ferruh Yigit <ferruh.yigit at gmail.com>
+F: lib/librte_eal/linuxapp/kdp/
+
 Linux AF_PACKET
 M: John W. Linville <linville at tuxdriver.com>
 F: drivers/net/af_packet/
diff --git a/config/common_base b/config/common_base
index c73f71a..973baff 100644
--- a/config/common_base
+++ b/config/common_base
@@ -302,6 +302,12 @@ CONFIG_RTE_LIBRTE_PMD_XENVIRT=n
 CONFIG_RTE_LIBRTE_PMD_NULL=y
 
 #
+# Compile KDP PMD
+#
+CONFIG_RTE_KDP_KMOD=n
+CONFIG_RTE_KDP_PREEMPT_DEFAULT=y
+
+#
 # Do prefetch of packet data within PMD driver receive function
 #
 CONFIG_RTE_PMD_PACKET_PREFETCH=y
diff --git a/config/common_linuxapp b/config/common_linuxapp
index ffbe260..569a0fe 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -39,6 +39,7 @@ CONFIG_RTE_EAL_IGB_UIO=y
 CONFIG_RTE_EAL_VFIO=y
 CONFIG_RTE_KNI_KMOD=y
 CONFIG_RTE_LIBRTE_KNI=y
+CONFIG_RTE_KDP_KMOD=y
 CONFIG_RTE_LIBRTE_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_POWER=y
diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile
index 20d2a91..26c70f4 100644
--- a/lib/librte_eal/linuxapp/Makefile
+++ b/lib/librte_eal/linuxapp/Makefile
@@ -1,6 +1,6 @@
 #   BSD LICENSE
 #
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
 #   All rights reserved.
 #
 #   Redistribution and use in source and binary forms, with or without
@@ -34,6 +34,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal
 DIRS-$(CONFIG_RTE_EAL_IGB_UIO) += igb_uio
 DIRS-$(CONFIG_RTE_KNI_KMOD) += kni
+DIRS-$(CONFIG_RTE_KDP_KMOD) += kdp
 DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += xen_dom0
 
 include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index c5490e4..e75662d 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -1,6 +1,6 @@
 #   BSD LICENSE
 #
-#   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+#   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
 #   All rights reserved.
 #
 #   Redistribution and use in source and binary forms, with or without
@@ -121,6 +121,7 @@ CFLAGS_eal_thread.o += -Wno-return-type
 endif
 
 INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h
+INC += rte_kdp_common.h
 
 SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \
 	$(addprefix include/exec-env/,$(INC))
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kdp_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kdp_common.h
new file mode 100644
index 0000000..b9db8ef
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kdp_common.h
@@ -0,0 +1,134 @@
+/*-
+ *   This file is provided under a dual BSD/LGPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GNU LESSER GENERAL PUBLIC LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2.1 of the GNU Lesser General Public License
+ *   as published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public License
+ *   along with this program;
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ *
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _RTE_KDP_COMMON_H_
+#define _RTE_KDP_COMMON_H_
+
+/**
+ * KDP name
+ */
+#define RTE_KDP_NAMESIZE 32
+
+#define KDP_DEVICE "kdp"
+
+/*
+ * Fifo struct mapped in a shared memory. It describes a circular buffer FIFO
+ * Write and read should wrap around. Fifo is empty when write == read
+ * Writing should never overwrite the read position
+ */
+struct rte_kdp_fifo {
+	volatile unsigned write;     /**< Next position to be written*/
+	volatile unsigned read;      /**< Next position to be read */
+	unsigned len;                /**< Circular buffer length */
+	unsigned elem_size;          /**< Pointer size - for 32/64 bit OS */
+	void * volatile buffer[0];   /**< The buffer contains mbuf pointers */
+};
+
+/*
+ * The kernel image of the rte_mbuf struct, with only the relevant fields.
+ * Padding is necessary to assure the offsets of these fields
+ */
+struct rte_kdp_mbuf {
+	void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE)));
+	char pad0[10];
+
+	uint16_t data_off;  /**< Start address of data in segment buffer. */
+	char pad1[4];
+	uint64_t ol_flags;  /**< Offload features. */
+	char pad2[4];
+
+	uint32_t pkt_len;   /**< Total pkt len: sum of all segment data_len. */
+
+	uint16_t data_len;  /**< Amount of data in segment buffer. */
+
+	/* fields on second cache line */
+	char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_SIZE)));
+	void *pool;
+	void *next;
+};
+
+/*
+ * Struct used to create a KDP device. Passed to the kernel in IOCTL call
+ */
+struct rte_kdp_device_info {
+	char name[RTE_KDP_NAMESIZE];  /**< Network device name for KDP */
+	uint16_t port_id;
+
+	phys_addr_t tx_phys;
+	phys_addr_t rx_phys;
+	phys_addr_t alloc_phys;
+	phys_addr_t free_phys;
+
+	/* mbuf mempool */
+	void *mbuf_va;
+	phys_addr_t mbuf_phys;
+
+	unsigned mbuf_size;
+
+	uint8_t force_bind;  /**< Flag for kernel thread binding */
+	uint32_t core_id;    /**< core ID to bind for kernel thread */
+};
+
+enum {
+	IFLA_KDP_UNSPEC,
+	IFLA_KDP_PORTID,
+	IFLA_KDP_DEVINFO,
+	__IFLA_KDP_MAX,
+};
+#define IFLA_KDP_MAX (__IFLA_KDP_MAX - 1)
+
+#endif /* _RTE_KDP_COMMON_H_ */
diff --git a/lib/librte_eal/linuxapp/kdp/Makefile b/lib/librte_eal/linuxapp/kdp/Makefile
new file mode 100644
index 0000000..3897dc6
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kdp/Makefile
@@ -0,0 +1,55 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2016 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = rte_kdp
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+MODULE_CFLAGS += -Wall -Werror
+
+# this lib needs main eal
+DEPDIRS-y += lib/librte_eal/linuxapp/eal
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y += kdp_net.c
+
+include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/kdp/kdp_dev.h b/lib/librte_eal/linuxapp/kdp/kdp_dev.h
new file mode 100644
index 0000000..0689e4f
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kdp/kdp_dev.h
@@ -0,0 +1,76 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ */
+
+#ifndef _KDP_DEV_H_
+#define _KDP_DEV_H_
+
+#include <exec-env/rte_kdp_common.h>
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+/**
+ * A structure describing the private information for a kdp device.
+ */
+struct kdp_dev {
+	/* kdp list */
+	struct list_head list;
+
+	char name[RTE_KDP_NAMESIZE]; /* Network device name */
+
+	u8 port_id;
+	u32 core_id;                 /* Core ID to bind */
+
+	/* kdp device */
+	struct net_device *net_dev;
+
+	struct task_struct *pthread;
+	struct net_device_stats stats;
+
+	/* queue for packets to be sent out */
+	void *tx_q;
+
+	/* queue for the packets received */
+	void *rx_q;
+
+	/* queue for the allocated mbufs those can be used to save sk buffs */
+	void *alloc_q;
+
+	/* free queue for the mbufs to be freed */
+	void *free_q;
+
+	void *mbuf_kva;
+	void *mbuf_va;
+	ssize_t addr_diff;
+
+	/* mbuf size */
+	unsigned mbuf_size;
+};
+
+#ifdef RTE_KDP_KO_DEBUG
+#define KDP_DBG(args...) pr_debug(args)
+#else
+#define KDP_DBG(args...)
+#endif
+
+#endif /* _KDP_DEV_H_ */
diff --git a/lib/librte_eal/linuxapp/kdp/kdp_fifo.h b/lib/librte_eal/linuxapp/kdp/kdp_fifo.h
new file mode 100644
index 0000000..b70ce25
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kdp/kdp_fifo.h
@@ -0,0 +1,91 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ */
+
+#ifndef _KDP_FIFO_H_
+#define _KDP_FIFO_H_
+
+#include <exec-env/rte_kdp_common.h>
+
+/**
+ * Adds num elements into the fifo. Return the number actually written
+ */
+static inline size_t
+kdp_fifo_put(struct rte_kdp_fifo *fifo, void **data, size_t num)
+{
+	size_t i;
+	u32 fifo_write = fifo->write;
+	u32 fifo_read = fifo->read;
+	u32 new_write = fifo_write;
+
+	for (i = 0; i < num; i++) {
+		new_write = (new_write + 1) & (fifo->len - 1);
+
+		if (new_write == fifo_read)
+			break;
+		fifo->buffer[fifo_write] = data[i];
+		fifo_write = new_write;
+	}
+	fifo->write = fifo_write;
+
+	return i;
+}
+
+/**
+ * Get up to num elements from the fifo. Return the number actully read
+ */
+static inline size_t
+kdp_fifo_get(struct rte_kdp_fifo *fifo, void **data, size_t num)
+{
+	size_t i = 0;
+	u32 new_read = fifo->read;
+	u32 fifo_write = fifo->write;
+
+	for (i = 0; i < num; i++) {
+		if (new_read == fifo_write)
+			break;
+
+		data[i] = fifo->buffer[new_read];
+		new_read = (new_read + 1) & (fifo->len - 1);
+	}
+	fifo->read = new_read;
+
+	return i;
+}
+
+/**
+ * Get the num of elements in the fifo
+ */
+static inline size_t
+kdp_fifo_count(struct rte_kdp_fifo *fifo)
+{
+	return (fifo->len + fifo->write - fifo->read) & (fifo->len - 1);
+}
+
+/**
+ * Get the num of available elements in the fifo
+ */
+static inline size_t
+kdp_fifo_free_count(struct rte_kdp_fifo *fifo)
+{
+	return (fifo->read - fifo->write - 1) & (fifo->len - 1);
+}
+
+#endif /* _KDP_FIFO_H_ */
diff --git a/lib/librte_eal/linuxapp/kdp/kdp_net.c b/lib/librte_eal/linuxapp/kdp/kdp_net.c
new file mode 100644
index 0000000..f089339
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kdp/kdp_net.c
@@ -0,0 +1,718 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <net/rtnetlink.h>
+
+#include "kdp_dev.h"
+#include "kdp_fifo.h"
+
+#define WD_TIMEOUT 5 /*jiffies */
+#define MBUF_BURST_SZ 32
+
+#define KDP_RX_LOOP_NUM 1000
+#define KDP_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
+
+static struct task_struct *kdp_kthread;
+static struct mutex kdp_list_lock;
+static struct list_head kdp_list_head;
+
+/* loopback mode */
+static char *lo_mode;
+module_param(lo_mode, charp, S_IRUGO);
+MODULE_PARM_DESC(lo_mode, "Enable loopback mode: fifo or fifo_skb.");
+
+/* Kernel thread mode */
+static bool multiple_kthread;
+module_param(multiple_kthread, bool, S_IRUGO);
+MODULE_PARM_DESC(multiple_kthread, "Enable multiple kernel tread mode.");
+
+/* typedef for rx function */
+typedef void (*kdp_net_rx_t)(struct kdp_dev *kdp);
+
+static int kdp_net_open(struct net_device *dev)
+{
+	random_ether_addr(dev->dev_addr);
+	netif_start_queue(dev);
+
+	return 0;
+}
+
+static int kdp_net_close(struct net_device *dev)
+{
+	netif_stop_queue(dev);
+
+	return 0;
+}
+
+static inline void *va_to_kva(void *va, struct kdp_dev *kdp)
+{
+	return va + kdp->addr_diff;
+}
+
+static inline void *pkt_data(struct rte_kdp_mbuf *pkt, struct kdp_dev *kdp)
+{
+	return va_to_kva(pkt->buf_addr + pkt->data_off, kdp);
+}
+
+/*
+ * Transmit a packet (called by the kernel)
+ */
+static int kdp_net_tx(struct sk_buff *skb, struct net_device *dev)
+{
+	struct kdp_dev *kdp = netdev_priv(dev);
+	struct rte_kdp_mbuf *pkt;
+	void *pkt_va;
+	void *data;
+	u32 len;
+	u32 ret;
+
+	dev->trans_start = jiffies; /* save the timestamp */
+
+	/* Check if the length of skb is less than mbuf size */
+	if (skb->len > kdp->mbuf_size)
+		goto drop;
+
+	/**
+	 * Check if it has at least one free entry in tx_q and
+	 * one entry in alloc_q.
+	 */
+	if (kdp_fifo_free_count(kdp->tx_q) == 0 ||
+			kdp_fifo_count(kdp->alloc_q) == 0) {
+		/**
+		 * If no free entry in tx_q or no entry in alloc_q,
+		 * drops skb and goes out.
+		 */
+		goto drop;
+	}
+
+	/* dequeue a mbuf from alloc_q */
+	ret = kdp_fifo_get(kdp->alloc_q, &pkt_va, 1);
+	if (likely(ret == 1)) {
+		pkt = va_to_kva(pkt_va, kdp);
+		data = pkt_data(pkt, kdp);
+
+		len = skb->len;
+		memcpy(data, skb->data, len);
+		if (unlikely(len < ETH_ZLEN)) {
+			memset(data + len, 0, ETH_ZLEN - len);
+			len = ETH_ZLEN;
+		}
+		pkt->pkt_len = len;
+		pkt->data_len = len;
+
+		/* enqueue mbuf into tx_q */
+		ret = kdp_fifo_put(kdp->tx_q, &pkt_va, 1);
+		if (unlikely(ret != 1)) {
+			/* Failing should not happen */
+			pr_err("Fail to enqueue mbuf into tx_q\n");
+			goto drop;
+		}
+	} else {
+		/* Failing should not happen */
+		pr_err("Fail to dequeue mbuf from alloc_q\n");
+		goto drop;
+	}
+
+	/* Free skb and update statistics */
+	dev_kfree_skb(skb);
+	kdp->stats.tx_bytes += len;
+	kdp->stats.tx_packets++;
+
+	return NETDEV_TX_OK;
+
+drop:
+	/* Free skb and update statistics */
+	dev_kfree_skb(skb);
+	kdp->stats.tx_dropped++;
+
+	return NETDEV_TX_OK;
+}
+
+static void kdp_net_set_rx_mode(struct net_device *dev)
+{
+}
+
+static int kdp_net_set_mac(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+
+	return 0;
+}
+
+static int kdp_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	return -EOPNOTSUPP;
+}
+
+/*
+ * Configuration changes (passed on by ifconfig)
+ */
+static int kdp_net_config(struct net_device *dev, struct ifmap *map)
+{
+	if (dev->flags & IFF_UP)
+		return -EBUSY;
+
+	return -EOPNOTSUPP;
+}
+
+static int kdp_net_change_mtu(struct net_device *dev, int new_mtu)
+{
+	dev->mtu = new_mtu;
+
+	return 0;
+}
+
+/*
+ * Deal with a transmit timeout.
+ */
+static void kdp_net_tx_timeout(struct net_device *dev)
+{
+	struct kdp_dev *kdp = netdev_priv(dev);
+
+	KDP_DBG("Transmit timeout at %ld, latency %ld\n", jiffies,
+			jiffies - dev->trans_start);
+
+	kdp->stats.tx_errors++;
+	netif_wake_queue(dev);
+}
+
+/*
+ * Return statistics to the caller
+ */
+static struct net_device_stats *kdp_net_stats(struct net_device *dev)
+{
+	struct kdp_dev *kdp = netdev_priv(dev);
+
+	return &kdp->stats;
+}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0))
+static int kdp_net_change_carrier(struct net_device *dev, bool new_carrier)
+{
+	if (new_carrier)
+		netif_carrier_on(dev);
+	else
+		netif_carrier_off(dev);
+	return 0;
+}
+#endif
+
+static const struct net_device_ops kdp_net_netdev_ops = {
+	.ndo_open = kdp_net_open,
+	.ndo_stop = kdp_net_close,
+	.ndo_start_xmit = kdp_net_tx,
+	.ndo_set_rx_mode = kdp_net_set_rx_mode,
+	.ndo_set_mac_address = kdp_net_set_mac,
+	.ndo_do_ioctl = kdp_net_ioctl,
+	.ndo_set_config = kdp_net_config,
+	.ndo_change_mtu = kdp_net_change_mtu,
+	.ndo_tx_timeout = kdp_net_tx_timeout,
+	.ndo_get_stats = kdp_net_stats,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0))
+	.ndo_change_carrier = kdp_net_change_carrier,
+#endif
+};
+
+static void kdp_net_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+	dev->netdev_ops = &kdp_net_netdev_ops;
+	dev->watchdog_timeo = WD_TIMEOUT;
+}
+
+/*
+ * RX: normal working mode
+ */
+static void kdp_net_rx_normal(struct kdp_dev *kdp)
+{
+	struct net_device *dev = kdp->net_dev;
+	void *va[MBUF_BURST_SZ];
+	struct rte_kdp_mbuf *pkt;
+	void *data;
+	struct sk_buff *skb;
+	size_t num_rx, num_fq;
+	size_t len;
+	size_t ret;
+	u32 i;
+
+	/* Get the number of free entries in free_q */
+	num_fq = kdp_fifo_free_count(kdp->free_q);
+	if (num_fq == 0)
+		return; /* No room on the free_q, bail out */
+
+	/* Calculate the number of entries to dequeue from rx_q */
+	num_rx = min_t(size_t, num_fq, MBUF_BURST_SZ);
+
+	/* Burst dequeue from rx_q */
+	num_rx = kdp_fifo_get(kdp->rx_q, va, num_rx);
+	if (num_rx == 0)
+		return;
+
+	/* Transfer received packets to netif */
+	for (i = 0; i < num_rx; i++) {
+		pkt = va_to_kva(va[i], kdp);
+		data = pkt_data(pkt, kdp);
+		len = pkt->data_len;
+
+		skb = dev_alloc_skb(len + 2);
+		if (!skb) {
+			kdp->stats.rx_dropped++;
+			continue;
+		}
+
+		/* Align IP on 16B boundary */
+		skb_reserve(skb, 2);
+		memcpy(skb_put(skb, len), data, len);
+		skb->dev = dev;
+		skb->protocol = eth_type_trans(skb, dev);
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+		/* Call netif interface */
+		netif_rx(skb);
+
+		/* Update statistics */
+		kdp->stats.rx_bytes += len;
+		kdp->stats.rx_packets++;
+	}
+
+	/* Burst enqueue mbufs into free_q */
+	ret = kdp_fifo_put(kdp->free_q, va, num_rx);
+	if (ret != num_rx)
+		/* Failing should not happen */
+		pr_err("Fail to enqueue entries into free_q\n");
+}
+
+/*
+ * RX: loopback with enqueue/dequeue fifos.
+ */
+static void kdp_net_rx_lo_fifo(struct kdp_dev *kdp)
+{
+	void *va[MBUF_BURST_SZ];
+	struct rte_kdp_mbuf *pkt;
+	void *data;
+	void *alloc_va[MBUF_BURST_SZ];
+	struct rte_kdp_mbuf *alloc_pkt;
+	void *alloc_data;
+	size_t num, num_q;
+	size_t ret;
+	size_t len;
+	u32 i;
+
+	/* Get the number of entries in rx_q */
+	num_q = kdp_fifo_count(kdp->rx_q);
+	num = min_t(size_t, num_q, MBUF_BURST_SZ);
+
+	/* Get the number of free entrie in tx_q */
+	num_q = kdp_fifo_free_count(kdp->tx_q);
+	num = min_t(size_t, num, num_q);
+
+	/* Get the number of entries in alloc_q */
+	num_q = kdp_fifo_count(kdp->alloc_q);
+	num = min_t(size_t, num, num_q);
+
+	/* Get the number of free entries in free_q */
+	num_q = kdp_fifo_free_count(kdp->free_q);
+	num = min_t(size_t, num, num_q);
+
+	/* Return if no entry to dequeue from rx_q */
+	if (num == 0)
+		return;
+
+	/* Dequeue entries from alloc_q */
+	ret = kdp_fifo_get(kdp->alloc_q, alloc_va, num);
+	if (ret == 0)
+		return;
+
+	/* Burst dequeue from rx_q */
+	ret = kdp_fifo_get(kdp->rx_q, va, num);
+	if (ret == 0) {
+		/* recover enties from alloc_q before return */
+		ret = kdp_fifo_put(kdp->free_q, alloc_va, num);
+		if (ret != num)
+			pr_err("Fail to enqueue alloc mbufs into free_q\n");
+		return;
+	}
+
+	num = ret;
+	/* Copy mbufs */
+	for (i = 0; i < num; i++) {
+		pkt = va_to_kva(va[i], kdp);
+		data = pkt_data(pkt, kdp);
+
+		alloc_pkt = va_to_kva(alloc_va[i], kdp);
+		alloc_data = pkt_data(alloc_pkt, kdp);
+
+		len = pkt->pkt_len;
+		memcpy(alloc_data, data, len);
+
+		alloc_pkt->pkt_len = len;
+		alloc_pkt->data_len = len;
+
+		kdp->stats.tx_bytes += len;
+		kdp->stats.rx_bytes += len;
+	}
+
+	/* Burst enqueue mbufs into tx_q */
+	ret = kdp_fifo_put(kdp->tx_q, alloc_va, num);
+	if (ret != num)
+		/* Failing should not happen */
+		pr_err("Fail to enqueue mbufs into tx_q\n");
+
+	/* Burst enqueue mbufs into free_q */
+	ret = kdp_fifo_put(kdp->free_q, va, num);
+	if (ret != num)
+		/* Failing should not happen */
+		pr_err("Fail to enqueue mbufs into free_q\n");
+
+	/**
+	 * Update statistic, and enqueue/dequeue failure is impossible,
+	 * as all queues are checked at first.
+	 */
+	kdp->stats.tx_packets += num;
+	kdp->stats.rx_packets += num;
+}
+
+/*
+ * RX: loopback with enqueue/dequeue fifos and sk buffer copies.
+ */
+static void kdp_net_rx_lo_fifo_skb(struct kdp_dev *kdp)
+{
+	struct net_device *dev = kdp->net_dev;
+	void *va[MBUF_BURST_SZ];
+	struct rte_kdp_mbuf *pkt;
+	void *data;
+	struct sk_buff *skb;
+	size_t num_rq, num_fq;
+	size_t ret;
+	size_t len;
+	size_t num;
+	u32 i;
+
+	/* Get the number of entries in rx_q */
+	num_rq = kdp_fifo_count(kdp->rx_q);
+
+	/* Get the number of free entries in free_q */
+	num_fq = kdp_fifo_free_count(kdp->free_q);
+
+	/* Calculate the number of entries to dequeue from rx_q */
+	num = min_t(size_t, num_rq, num_fq);
+	num = min_t(size_t, num, MBUF_BURST_SZ);
+
+	/* Return if no entry to dequeue from rx_q */
+	if (num == 0)
+		return;
+
+	/* Burst dequeue mbufs from rx_q */
+	ret = kdp_fifo_get(kdp->rx_q, va, num);
+	if (ret == 0)
+		return;
+
+	num = ret;
+	/* Copy mbufs to sk buffer and then call tx interface */
+	for (i = 0; i < num; i++) {
+		pkt = va_to_kva(va[i], kdp);
+		data = pkt_data(pkt, kdp);
+		len = pkt->data_len;
+
+		skb = dev_alloc_skb(len + 2);
+		if (!skb) {
+			kdp->stats.rx_dropped++;
+			continue;
+		}
+
+		/* Align IP on 16B boundary */
+		skb_reserve(skb, 2);
+		memcpy(skb_put(skb, len), data, len);
+		skb->dev = dev;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+		kdp->stats.rx_bytes += len;
+		kdp->stats.rx_packets++;
+
+		/* call tx interface */
+		kdp_net_tx(skb, dev);
+	}
+
+	/* enqueue all the mbufs from rx_q into free_q */
+	ret = kdp_fifo_put(kdp->free_q, va, num);
+	if (ret != num)
+		/* Failing should not happen */
+		pr_err("Fail to enqueue mbufs into free_q\n");
+}
+
+/* kdp rx function pointer, with default to normal rx */
+static kdp_net_rx_t kdp_net_rx_func = kdp_net_rx_normal;
+
+static int kdp_thread_single(void *data)
+{
+	struct kdp_dev *kdp;
+	u32 i;
+
+	while (!kthread_should_stop()) {
+		mutex_lock(&kdp_list_lock);
+		for (i = 0; i < KDP_RX_LOOP_NUM; i++)
+			list_for_each_entry(kdp, &kdp_list_head, list)
+				(*kdp_net_rx_func)(kdp);
+		mutex_unlock(&kdp_list_lock);
+
+#ifdef RTE_KDP_PREEMPT_DEFAULT
+		/* reschedule out for a while */
+		schedule_timeout_interruptible(
+			usecs_to_jiffies(KDP_KTHREAD_RESCHEDULE_INTERVAL));
+#endif
+	}
+
+	return 0;
+}
+
+static int kdp_thread_multiple(void *param)
+{
+	struct kdp_dev *kdp = param;
+	u32 i;
+
+	while (!kthread_should_stop()) {
+		for (i = 0; i < KDP_RX_LOOP_NUM; i++)
+			(*kdp_net_rx_func)(kdp);
+
+#ifdef RTE_KDP_PREEMPT_DEFAULT
+		schedule_timeout_interruptible(
+			usecs_to_jiffies(KDP_KTHREAD_RESCHEDULE_INTERVAL));
+#endif
+	}
+
+	return 0;
+}
+
+static void kdp_setup(struct kdp_dev *kdp, struct rte_kdp_device_info *info)
+{
+	kdp->port_id = info->port_id;
+	kdp->core_id = info->core_id;
+	strncpy(kdp->name, info->name, RTE_KDP_NAMESIZE);
+
+	/* Translate user space info into kernel space info */
+	kdp->tx_q = phys_to_virt(info->tx_phys);
+	kdp->rx_q = phys_to_virt(info->rx_phys);
+	kdp->alloc_q = phys_to_virt(info->alloc_phys);
+	kdp->free_q = phys_to_virt(info->free_phys);
+
+	kdp->mbuf_kva = phys_to_virt(info->mbuf_phys);
+	kdp->mbuf_va = info->mbuf_va;
+	kdp->addr_diff = kdp->mbuf_kva - kdp->mbuf_va;
+
+	kdp->mbuf_size = info->mbuf_size;
+
+	pr_info("tx_phys:      0x%016llx, tx_q addr:      0x%p\n",
+		(unsigned long long) info->tx_phys, kdp->tx_q);
+	pr_info("rx_phys:      0x%016llx, rx_q addr:      0x%p\n",
+		(unsigned long long) info->rx_phys, kdp->rx_q);
+	pr_info("alloc_phys:   0x%016llx, alloc_q addr:   0x%p\n",
+		(unsigned long long) info->alloc_phys, kdp->alloc_q);
+	pr_info("free_phys:    0x%016llx, free_q addr:    0x%p\n",
+		(unsigned long long) info->free_phys, kdp->free_q);
+	pr_info("mbuf_phys:    0x%016llx, mbuf_kva:       0x%p\n",
+		(unsigned long long) info->mbuf_phys, kdp->mbuf_kva);
+	pr_info("mbuf_va:      0x%p\n", info->mbuf_va);
+	pr_info("mbuf_size:    %u\n", kdp->mbuf_size);
+}
+
+static int create_kthread(struct kdp_dev *kdp,
+		struct rte_kdp_device_info *info)
+{
+	/**
+	 * Create a new kernel thread for multiple mode, set its core affinity,
+	 * and finally wake it up.
+	 */
+	if (multiple_kthread) {
+		/**
+		 * Check if the cpu core id is valid for binding,
+		 * for multiple kernel thread mode.
+		 */
+		if (info->force_bind && !cpu_online(kdp->core_id)) {
+			pr_err("cpu %u is not online\n", kdp->core_id);
+			return -EINVAL;
+		}
+
+		kdp->pthread = kthread_create(kdp_thread_multiple,
+				(void *)kdp, "kdp_%s", kdp->name);
+		if (IS_ERR(kdp->pthread))
+			return -ECANCELED;
+
+		if (info->force_bind)
+			kthread_bind(kdp->pthread, kdp->core_id);
+
+		wake_up_process(kdp->pthread);
+
+		return 0;
+	}
+
+	/* single thread */
+	if (kdp_kthread == NULL) {
+		pr_info("Single kernel thread for all KDP devices\n");
+
+		/* Create kernel thread for RX */
+		kdp_kthread = kthread_run(kdp_thread_single, NULL,
+				"kdp_single");
+		if (IS_ERR(kdp_kthread)) {
+			pr_err("Unable to create kernel thread\n");
+			return -ECANCELED;
+		}
+	}
+
+	return 0;
+}
+
+static int kdp_net_newlink(struct net *net, struct net_device *dev,
+		struct nlattr *tb[], struct nlattr *data[])
+{
+	struct rte_kdp_device_info dev_info;
+	struct kdp_dev *kdp = netdev_priv(dev);
+	int ret;
+
+	if (data && data[IFLA_KDP_PORTID])
+		kdp->port_id = nla_get_u8(data[IFLA_KDP_PORTID]);
+	else
+		goto error_free;
+
+	if (data && data[IFLA_KDP_DEVINFO])
+		memcpy(&dev_info, nla_data(data[IFLA_KDP_DEVINFO]),
+				sizeof(struct rte_kdp_device_info));
+	else
+		goto error_free;
+
+	kdp->net_dev = dev;
+	kdp_setup(kdp, &dev_info);
+
+	ret = register_netdevice(dev);
+	if (ret < 0)
+		goto error_free;
+
+	ret = create_kthread(kdp, &dev_info);
+	if (ret < 0)
+		goto error_unregister;
+
+	mutex_lock(&kdp_list_lock);
+	list_add(&kdp->list, &kdp_list_head);
+	mutex_unlock(&kdp_list_lock);
+
+	return 0;
+
+error_unregister:
+	unregister_netdev(dev);
+error_free:
+	free_netdev(dev);
+	return -EINVAL;
+}
+
+static void single_kthread_stop(void)
+{
+	/* Stop kernel thread for single mode */
+	if (!multiple_kthread && kdp_kthread) {
+		kthread_stop(kdp_kthread);
+		kdp_kthread = NULL;
+	}
+}
+
+static void multiple_kthread_stop(struct kdp_dev *kdp)
+{
+	/* Stop kernel thread for multiple mode */
+	if (multiple_kthread && kdp->pthread) {
+		kthread_stop(kdp->pthread);
+		kdp->pthread = NULL;
+	}
+}
+
+static void kdp_kthread_stop_one(struct kdp_dev *kdp)
+{
+	multiple_kthread_stop(kdp);
+
+	mutex_lock(&kdp_list_lock);
+	if (list_empty(&kdp_list_head))
+		single_kthread_stop();
+	mutex_unlock(&kdp_list_lock);
+}
+
+static void kdp_net_dellink(struct net_device *dev, struct list_head *head)
+{
+	struct kdp_dev *kdp = netdev_priv(dev);
+
+	mutex_lock(&kdp_list_lock);
+	list_del(&kdp->list);
+	mutex_unlock(&kdp_list_lock);
+
+	kdp_kthread_stop_one(kdp);
+
+	unregister_netdevice_queue(dev, head);
+}
+
+static struct rtnl_link_ops kdp_link_ops __read_mostly = {
+	.kind = KDP_DEVICE,
+	.priv_size = sizeof(struct kdp_dev),
+	.setup = kdp_net_setup,
+	.maxtype = IFLA_KDP_MAX,
+	.newlink = kdp_net_newlink,
+	.dellink = kdp_net_dellink,
+};
+
+static void __init kdp_net_config_lo_mode(char *lo_str)
+{
+	if (!lo_str)
+		return;
+
+	if (!strcmp(lo_str, "fifo")) {
+		pr_info("loopback mode fifo enabled");
+		kdp_net_rx_func = kdp_net_rx_lo_fifo;
+	} else if (!strcmp(lo_str, "fifo_skb")) {
+		pr_info("loopback mode fifo_skb enabled");
+		kdp_net_rx_func = kdp_net_rx_lo_fifo_skb;
+	} else
+		pr_info("Incognizant parameter, loopback disabled");
+}
+
+static int __init kdp_init(void)
+{
+	/* Configure the loopback mode according to the input parameter */
+	kdp_net_config_lo_mode(lo_mode);
+
+	mutex_init(&kdp_list_lock);
+	INIT_LIST_HEAD(&kdp_list_head);
+
+	return rtnl_link_register(&kdp_link_ops);
+}
+module_init(kdp_init);
+
+static void __exit kdp_exit(void)
+{
+	rtnl_link_unregister(&kdp_link_ops);
+}
+module_exit(kdp_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Kernel Module for managing kdp devices");
-- 
2.5.0



More information about the dev mailing list