[dpdk-dev] [PATCH v2 1/4] Link Bonding Library
    declan.doherty at intel.com 
    declan.doherty at intel.com
       
    Wed Jun  4 17:18:53 CEST 2014
    
    
  
From: Declan Doherty <declan.doherty at intel.com>
- Broadcast TX burst broadcast bug fix
- Add/remove slave behavior fix
- Checkpatch fixes
Signed-off-by: Declan Doherty <declan.doherty at intel.com>
---
 config/common_bsdapp       |    5 +
 config/common_linuxapp     |    5 +
 lib/Makefile               |    1 +
 lib/librte_bond/Makefile   |   28 +
 lib/librte_bond/rte_bond.c | 1682 ++++++++++++++++++++++++++++++++++++++++++++
 lib/librte_bond/rte_bond.h |  228 ++++++
 mk/rte.app.mk              |    5 +
 7 files changed, 1954 insertions(+)
 create mode 100644 lib/librte_bond/Makefile
 create mode 100644 lib/librte_bond/rte_bond.c
 create mode 100644 lib/librte_bond/rte_bond.h
diff --git a/config/common_bsdapp b/config/common_bsdapp
index 2cc7b80..53ed8b9 100644
--- a/config/common_bsdapp
+++ b/config/common_bsdapp
@@ -187,6 +187,11 @@ CONFIG_RTE_PMD_RING_MAX_TX_RINGS=16
 CONFIG_RTE_LIBRTE_PMD_PCAP=y
 
 #
+# Compile link bonding library
+#
+CONFIG_RTE_LIBRTE_BOND=y
+
+#
 # Do prefetch of packet data within PMD driver receive function
 #
 CONFIG_RTE_PMD_PACKET_PREFETCH=y
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 62619c6..35b525a 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -211,6 +211,11 @@ CONFIG_RTE_PMD_RING_MAX_TX_RINGS=16
 CONFIG_RTE_LIBRTE_PMD_PCAP=n
 
 
+#
+# Compile link bonding library
+#
+CONFIG_RTE_LIBRTE_BOND=y
+
 CONFIG_RTE_LIBRTE_PMD_XENVIRT=n
 
 #
diff --git a/lib/Makefile b/lib/Makefile
index b92b392..9995ba8 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -47,6 +47,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += librte_pmd_pcap
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += librte_pmd_virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += librte_pmd_vmxnet3
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt
+DIRS-$(CONFIG_RTE_LIBRTE_BOND) += librte_bond
 DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
 DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net
diff --git a/lib/librte_bond/Makefile b/lib/librte_bond/Makefile
new file mode 100644
index 0000000..7514378
--- /dev/null
+++ b/lib/librte_bond/Makefile
@@ -0,0 +1,28 @@
+# <COPYRIGHT_TAG>
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_bond.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_BOND) += rte_bond.c
+
+
+#
+# Export include files
+#
+SYMLINK-y-include += rte_bond.h
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_BOND) += lib/librte_mbuf lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_BOND) += lib/librte_malloc
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_bond/rte_bond.c b/lib/librte_bond/rte_bond.c
new file mode 100644
index 0000000..c079b89
--- /dev/null
+++ b/lib/librte_bond/rte_bond.c
@@ -0,0 +1,1682 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/queue.h>
+#include <linux/binfmts.h>
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_memory.h>
+#include <rte_string_fns.h>
+#include <rte_cycles.h>
+#include <rte_ip.h>
+#include <rte_udp.h>
+
+#include <cmdline_parse.h>
+#include <cmdline_parse_etheraddr.h>
+
+#include "rte_bond.h"
+
+static const char *driver_name = "Link Bonding PMD";
+
+/** Port Queue Mapping Structure */
+struct bond_rx_queue {
+	int queue_id;							/**< Queue Id */
+	struct bond_dev_private *dev_private;	/**< Reference to eth_dev private
+												 structure */
+
+	uint16_t nb_rx_desc;					/**< Number of RX descriptors
+												 available for the queue */
+	struct rte_eth_rxconf rx_conf;			/**< Copy of RX configuration
+												 structure for queue */
+	struct rte_mempool *mb_pool;			/**< Reference to mbuf pool to use
+												 for RX queue */
+};
+
+struct bond_tx_queue {
+	int queue_id;							/**< Queue Id */
+	struct bond_dev_private *dev_private;	/**< Reference to dev private
+												 structure */
+	uint16_t nb_tx_desc;					/**< Number of TX descriptors
+												 available for the queue */
+	struct rte_eth_txconf tx_conf;			/**< Copy of TX configuration
+												 structure for queue */
+};
+
+
+/** Persisted Slave Configuration Structure */
+struct slave_conf {
+	uint8_t port_id;				/**< Port Id of slave eth_dev */
+	struct ether_addr mac_addr;		/**< Slave eth_dev original MAC address */
+};
+
+/** Link Bonding PMD device private configuration Structure */
+struct bond_dev_private {
+	uint8_t mode;						/**< Link Bonding Mode */
+	uint8_t primary_port;				/**< Primary Slave Port */
+	uint8_t balance_xmit_policy;		/**< Transmit policy - l2 / l23 / l34
+											 for operation in balance mode */
+	uint8_t user_defined_mac;			/**< Flag for whether MAC address is
+											 user defined or not */
+	uint8_t promiscuous_en;				/**< Enabled/disable promiscuous mode on
+											slave devices */
+	uint8_t link_props_set;				/**< Bonded eth_dev link properties set*/
+
+	uint16_t nb_rx_queues;				/**< Total number of rx queues */
+	uint16_t nb_tx_queues;				/**< Total number of tx queues*/
+
+	uint8_t slave_count;				/**< Number of active slaves */
+	uint8_t active_slave_count;			/**< Number of slaves */
+
+	uint8_t active_slaves[RTE_MAX_ETHPORTS];	/**< Active slave list */
+	uint8_t slaves[RTE_MAX_ETHPORTS];			/**< Slave list */
+
+	/** Persisted configuration of slaves */
+	struct slave_conf presisted_slaves_conf[RTE_MAX_ETHPORTS];
+};
+
+static struct slave_conf *
+slave_config_get(struct bond_dev_private *internals, uint8_t slave_port_id);
+
+static int
+valid_bonded_ethdev(struct rte_eth_dev *eth_dev)
+{
+	size_t len;
+
+	/* Check valid pointer */
+	if (eth_dev->driver->pci_drv.name == NULL || driver_name == NULL)
+		return -1;
+
+	/* Check string lengths are equal */
+	len = strlen(driver_name);
+	if (strlen(eth_dev->driver->pci_drv.name) != len)
+		return -1;
+
+	/* Compare strings */
+	return strncmp(eth_dev->driver->pci_drv.name, driver_name, len);
+}
+
+static int
+valid_port_id(uint8_t port_id)
+{
+	/* Verify that port id is valid */
+	int ethdev_count = rte_eth_dev_count();
+	if (port_id >= ethdev_count) {
+		RTE_LOG(ERR, PMD,
+				"%s: port Id %d is greater than rte_eth_dev_count %d\n",
+				__func__, port_id, ethdev_count);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+valid_bonded_port_id(uint8_t port_id)
+{
+	/* Verify that port id's are valid */
+	if (valid_port_id(port_id))
+		return -1;
+
+	/* Verify that bonded_port_id refers to a bonded port */
+	if (valid_bonded_ethdev(&rte_eth_devices[port_id])) {
+		RTE_LOG(ERR, PMD,
+				"%s: Specified port Id %d is not a bonded eth_dev device\n",
+				__func__, port_id);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+valid_slave_port_id(uint8_t port_id)
+{
+	/* Verify that port id's are valid */
+	if (valid_port_id(port_id))
+		return -1;
+
+	/* Verify that port_id refers to a non bonded port */
+	if (!valid_bonded_ethdev(&rte_eth_devices[port_id]))
+		return -1;
+
+	return 0;
+}
+
+
+static uint16_t
+bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct bond_dev_private *internals;
+
+	uint16_t num_rx_slave = 0;
+	uint16_t num_rx_total = 0;
+
+	int i;
+
+	/* Cast to structure, containing bonded device's port id and queue id */
+	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
+
+	internals = bd_rx_q->dev_private;
+
+	switch (internals->mode) {
+	case BONDING_MODE_ROUND_ROBIN:
+	case BONDING_MODE_BROADCAST:
+	case BONDING_MODE_BALANCE:
+		for (i = 0; i < internals->active_slave_count; i++) {
+			/* Offset of pointer to *bufs increases as packets are received
+			 * from other slaves */
+			num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
+					bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
+			if (num_rx_slave)
+				num_rx_total += num_rx_slave;
+		}
+		break;
+	case BONDING_MODE_ACTIVE_BACKUP:
+		num_rx_slave = rte_eth_rx_burst(internals->primary_port,
+				bd_rx_q->queue_id, bufs, nb_pkts);
+		if (num_rx_slave)
+			num_rx_total = num_rx_slave;
+		break;
+	}
+	return num_rx_total;
+}
+
+
+static uint16_t
+bond_ethdev_tx_round_robin(void *queue, struct rte_mbuf **bufs,
+		uint16_t nb_pkts)
+{
+	struct bond_dev_private *dev_private;
+	struct bond_tx_queue *bd_tx_q;
+
+	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
+	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+
+	uint8_t num_of_slaves;
+	uint8_t slaves[RTE_MAX_ETHPORTS];
+
+	uint16_t num_tx_total = 0;
+
+	static int last_slave_idx = -1;
+	int i, slave_idx;
+
+	bd_tx_q = (struct bond_tx_queue *)queue;
+	dev_private = bd_tx_q->dev_private;
+
+	/* Copy slave list to protect against slave up/down changes during tx
+	 * bursting */
+	num_of_slaves = dev_private->active_slave_count;
+	memcpy(slaves, dev_private->active_slaves,
+			sizeof(dev_private->active_slaves[0]) * num_of_slaves);
+
+	if (num_of_slaves < 1)
+		return num_tx_total;
+
+	/* Populate slaves mbuf with which packets are to be sent on it  */
+	for (i = 0; i < nb_pkts; i++) {
+		slave_idx = i % num_of_slaves;
+		slave_bufs[slave_idx][(slave_nb_pkts[slave_idx])++] = bufs[i];
+	}
+
+	/* calculate the next slave to transmit on based on the last slave idx used
+	 * in the last call to bond_ethdev_tx_burst_round_robin */
+	slave_idx = last_slave_idx + 1;
+
+	/* Send packet burst on each slave device */
+	for (i = 0; i < num_of_slaves; i++) {
+		slave_idx = (slave_idx + i) % num_of_slaves;
+
+		if (slave_nb_pkts[i] > 0) {
+			num_tx_total += rte_eth_tx_burst(slaves[slave_idx],
+					bd_tx_q->queue_id, slave_bufs[i], slave_nb_pkts[i]);
+		}
+	}
+
+	last_slave_idx = slave_idx;
+
+	return num_tx_total;
+}
+
+static uint16_t bond_ethdev_tx_active_backup(void *queue,
+		struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct bond_dev_private *internals;
+	struct bond_tx_queue *bd_tx_q;
+
+	bd_tx_q = (struct bond_tx_queue *)queue;
+	internals = bd_tx_q->dev_private;
+
+	if (internals->active_slave_count < 1)
+		return 0;
+
+	return rte_eth_tx_burst(internals->primary_port, bd_tx_q->queue_id,
+			bufs, nb_pkts);
+}
+
+
+static inline uint16_t
+ether_hash(struct ether_hdr *eth_hdr)
+{
+	uint16_t *word_src_addr = (uint16_t *)eth_hdr->s_addr.addr_bytes;
+	uint16_t *word_dst_addr = (uint16_t *)eth_hdr->d_addr.addr_bytes;
+
+	return (word_src_addr[0] ^ word_dst_addr[0]) ^
+			(word_src_addr[1] ^ word_dst_addr[1]) ^
+			(word_src_addr[2] ^ word_dst_addr[2]);
+}
+
+static inline uint32_t
+ipv4_hash(struct ipv4_hdr *ipv4_hdr)
+{
+	return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr);
+}
+
+static inline uint32_t
+ipv6_hash(struct ipv6_hdr *ipv6_hdr)
+{
+	uint32_t *word_src_addr = (uint32_t *)&(ipv6_hdr->src_addr[0]);
+	uint32_t *word_dst_addr = (uint32_t *)&(ipv6_hdr->dst_addr[0]);
+
+	return (word_src_addr[0] ^ word_dst_addr[0]) ^
+			(word_src_addr[1] ^ word_dst_addr[1]) ^
+			(word_src_addr[2] ^ word_dst_addr[2]) ^
+			(word_src_addr[3] ^ word_dst_addr[3]);
+}
+
+static uint32_t
+udp_hash(struct udp_hdr *hdr)
+{
+	return hdr->src_port ^ hdr->dst_port;
+}
+
+static inline uint16_t
+xmit_slave_hash(const struct rte_mbuf *buf, uint8_t slave_count, uint8_t policy)
+{
+	struct ether_hdr *eth_hdr;
+	struct udp_hdr *udp_hdr;
+	size_t eth_offset = 0;
+	uint32_t hash = 0;
+
+	if (slave_count == 1)
+		return 0;
+
+	switch (policy) {
+	case BALANCE_XMIT_POLICY_LAYER2:
+		eth_hdr = (struct ether_hdr *)buf->pkt.data;
+
+		hash = ether_hash(eth_hdr);
+		hash ^= hash >> 8;
+		return hash % slave_count;
+
+
+	case BALANCE_XMIT_POLICY_LAYER23:
+		eth_hdr = (struct ether_hdr *)buf->pkt.data;
+
+		if (buf->ol_flags & PKT_RX_VLAN_PKT)
+			eth_offset = sizeof(struct ether_hdr) + sizeof(struct vlan_hdr);
+		else
+			eth_offset = sizeof(struct ether_hdr);
+
+		if (buf->ol_flags & PKT_RX_IPV4_HDR) {
+			struct ipv4_hdr *ipv4_hdr;
+			ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(buf,
+					unsigned char *) + eth_offset);
+
+			hash = ether_hash(eth_hdr) ^ ipv4_hash(ipv4_hdr);
+
+		} else {
+			struct ipv6_hdr *ipv6_hdr;
+
+			ipv6_hdr = (struct ipv6_hdr *)(rte_pktmbuf_mtod(buf,
+					unsigned char *) + eth_offset);
+
+			hash = ether_hash(eth_hdr) ^ ipv6_hash(ipv6_hdr);
+		}
+		break;
+
+	case BALANCE_XMIT_POLICY_LAYER34:
+		if (buf->ol_flags & PKT_RX_VLAN_PKT)
+			eth_offset = sizeof(struct ether_hdr) + sizeof(struct vlan_hdr);
+		else
+			eth_offset = sizeof(struct ether_hdr);
+
+		if (buf->ol_flags & PKT_RX_IPV4_HDR) {
+			struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
+					(rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset);
+
+			if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
+				udp_hdr = (struct udp_hdr *)
+						(rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset +
+								sizeof(struct ipv4_hdr));
+				hash = ipv4_hash(ipv4_hdr) ^ udp_hash(udp_hdr);
+			} else {
+				hash = ipv4_hash(ipv4_hdr);
+			}
+		} else {
+			struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
+					(rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset);
+
+			if (ipv6_hdr->proto == IPPROTO_UDP) {
+				udp_hdr = (struct udp_hdr *)
+						(rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset +
+								sizeof(struct ipv6_hdr));
+				hash = ipv6_hash(ipv6_hdr) ^ udp_hash(udp_hdr);
+			} else {
+				hash = ipv6_hash(ipv6_hdr);
+			}
+		}
+		break;
+	}
+
+	hash ^= hash >> 16;
+	hash ^= hash >> 8;
+
+	return hash % slave_count;
+}
+
+static uint16_t
+bond_ethdev_tx_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct bond_dev_private *internals;
+	struct bond_tx_queue *bd_tx_q;
+
+	uint8_t num_of_slaves;
+	uint8_t slaves[RTE_MAX_ETHPORTS];
+
+	uint16_t num_tx_total = 0;
+
+	int i, op_slave_id;
+
+	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
+	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+
+	bd_tx_q = (struct bond_tx_queue *)queue;
+	internals = bd_tx_q->dev_private;
+
+	/* Copy slave list to protect against slave up/down changes during tx
+	 * bursting */
+	num_of_slaves = internals->active_slave_count;
+	memcpy(slaves, internals->active_slaves,
+			sizeof(internals->active_slaves[0]) * num_of_slaves);
+
+	if (num_of_slaves < 1)
+		return num_tx_total;
+
+
+	/* Populate slaves mbuf with the packets which are to be sent on it  */
+	for (i = 0; i < nb_pkts; i++) {
+		/* Select output slave using hash based on xmit policy */
+		op_slave_id = xmit_slave_hash(bufs[i], num_of_slaves,
+				internals->balance_xmit_policy);
+
+		/* Populate slave mbuf arrays with mbufs for that slave */
+		slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
+	}
+
+	/* Send packet burst on each slave device */
+	for (i = 0; i < num_of_slaves; i++) {
+		if (slave_nb_pkts[i] > 0) {
+			num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+					slave_bufs[i], slave_nb_pkts[i]);
+		}
+	}
+
+	return num_tx_total;
+}
+
+static uint16_t
+bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
+		uint16_t nb_pkts)
+{
+	struct bond_dev_private *internals;
+	struct bond_tx_queue *bd_tx_q;
+
+	uint8_t num_of_slaves;
+	uint8_t slaves[RTE_MAX_ETHPORTS];
+
+	uint16_t num_tx_total = 0;
+
+	int i;
+
+	bd_tx_q = (struct bond_tx_queue *)queue;
+	internals = bd_tx_q->dev_private;
+
+	/* Copy slave list to protect against slave up/down changes during tx
+	 * bursting */
+	num_of_slaves = internals->active_slave_count;
+	memcpy(slaves, internals->active_slaves,
+			sizeof(internals->active_slaves[0]) * num_of_slaves);
+
+	if (num_of_slaves < 1)
+		return 0;
+
+	/* Increment reference count on mbufs */
+	for (i = 0; i < nb_pkts; i++)
+		rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
+
+	/* Transmit burst on each active slave */
+	for (i = 0; i < num_of_slaves; i++)
+		num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+				bufs, nb_pkts);
+
+	return num_tx_total;
+}
+
+static void
+link_properties_set(struct rte_eth_dev *bonded_eth_dev,
+		struct rte_eth_link *slave_dev_link)
+{
+	struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
+	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
+
+	if (slave_dev_link->link_status &&
+		bonded_eth_dev->data->dev_started) {
+		bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
+		bonded_dev_link->link_speed = slave_dev_link->link_speed;
+
+		internals->link_props_set = 1;
+	}
+}
+
+static void
+link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
+{
+	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
+
+	memset(&(bonded_eth_dev->data->dev_link), 0,
+			sizeof(bonded_eth_dev->data->dev_link));
+
+	internals->link_props_set = 0;
+}
+
+static int
+link_properties_valid(struct rte_eth_link *bonded_dev_link,
+		struct rte_eth_link *slave_dev_link)
+{
+	if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
+		bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
+		return -1;
+
+	return 0;
+}
+
+static int
+mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
+{
+	struct ether_addr *mac_addr;
+
+	mac_addr = eth_dev->data->mac_addrs;
+
+	if (eth_dev == NULL) {
+		RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
+		return -1;
+	}
+
+	if (new_mac_addr == NULL) {
+		RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
+		return -1;
+	}
+
+	/* if new MAC is different to current MAC then update */
+	if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
+		memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
+
+	return 0;
+}
+
+static int
+mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
+{
+	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
+	int i;
+
+	/* Update slave devices MAC addresses */
+	if (internals->slave_count < 1)
+		return -1;
+
+	switch (internals->mode) {
+	case BONDING_MODE_ROUND_ROBIN:
+	case BONDING_MODE_BALANCE:
+	case BONDING_MODE_BROADCAST:
+		for (i = 0; i < internals->slave_count; i++) {
+			if (mac_address_set(&rte_eth_devices[internals->slaves[i]],
+					bonded_eth_dev->data->mac_addrs)) {
+				RTE_LOG(ERR, PMD,
+						"%s: Failed to update port Id %d MAC address\n",
+						__func__, internals->slaves[i]);
+				return -1;
+			}
+		}
+		break;
+	case BONDING_MODE_ACTIVE_BACKUP:
+	default:
+		for (i = 0; i < internals->slave_count; i++) {
+			if (internals->slaves[i] == internals->primary_port) {
+				if (mac_address_set(&rte_eth_devices[internals->primary_port],
+						bonded_eth_dev->data->mac_addrs)) {
+					RTE_LOG(ERR, PMD,
+							"%s: Failed to update port Id %d MAC address\n",
+							__func__, internals->primary_port);
+				}
+			} else {
+				struct slave_conf *conf =
+						slave_config_get(internals, internals->slaves[i]);
+
+				if (mac_address_set(&rte_eth_devices[internals->slaves[i]],
+						&conf->mac_addr)) {
+					RTE_LOG(ERR, PMD,
+							"%s: Failed to update port Id %d MAC address\n",
+							__func__, internals->slaves[i]);
+
+
+					return -1;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+
+static int
+bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
+{
+	struct bond_dev_private *internals;
+
+	internals = eth_dev->data->dev_private;
+
+	switch (mode) {
+	case BONDING_MODE_ROUND_ROBIN:
+		eth_dev->tx_pkt_burst = bond_ethdev_tx_round_robin;
+		break;
+	case BONDING_MODE_ACTIVE_BACKUP:
+		eth_dev->tx_pkt_burst = bond_ethdev_tx_active_backup;
+		break;
+	case BONDING_MODE_BALANCE:
+		eth_dev->tx_pkt_burst = bond_ethdev_tx_balance;
+		break;
+	case BONDING_MODE_BROADCAST:
+		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
+		break;
+	default:
+		return -1;
+	}
+	internals->mode = mode;
+
+	return 0;
+}
+
+static int
+slave_configure(struct rte_eth_dev *bonded_eth_dev,
+		struct rte_eth_dev *slave_eth_dev)
+{
+	struct bond_rx_queue *bd_rx_q;
+	struct bond_tx_queue *bd_tx_q;
+
+	int q_id;
+
+	/* Stop slave */
+	rte_eth_dev_stop(slave_eth_dev->data->port_id);
+
+	/* Enable interrupts on slave device */
+	slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
+
+	if (rte_eth_dev_configure(slave_eth_dev->data->port_id,
+			bonded_eth_dev->data->nb_rx_queues,
+			bonded_eth_dev->data->nb_tx_queues,
+			&(slave_eth_dev->data->dev_conf)) != 0) {
+		RTE_LOG(ERR, PMD, "Cannot configure slave device: port=%u\n",
+				slave_eth_dev->data->port_id);
+		return -1;
+	}
+
+	/* Setup Rx Queues */
+	for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
+		bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
+
+		if (rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
+				bd_rx_q->nb_rx_desc,
+				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
+				&(bd_rx_q->rx_conf), bd_rx_q->mb_pool) != 0) {
+			RTE_LOG(ERR, PMD, "rte_eth_rx_queue_setup: port=%d queue_id %d\n",
+					slave_eth_dev->data->port_id, q_id);
+			return -1;
+		}
+	}
+
+	/* Setup Tx Queues */
+	for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
+		bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
+
+		if (rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
+				bd_tx_q->nb_tx_desc,
+				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
+				&bd_tx_q->tx_conf) != 0) {
+			RTE_LOG(ERR, PMD, "rte_eth_tx_queue_setup: port=%d queue_id %d\n",
+					slave_eth_dev->data->port_id, q_id);
+			return -1;
+		}
+	}
+
+	/* Start device */
+	if (rte_eth_dev_start(slave_eth_dev->data->port_id) != 0) {
+		RTE_LOG(ERR, PMD, "rte_eth_dev_start: port=%u\n",
+				slave_eth_dev->data->port_id);
+		return -1;
+	}
+
+	return 0;
+}
+
+static struct slave_conf *
+slave_config_get(struct bond_dev_private *internals, uint8_t slave_port_id)
+{
+	int i;
+
+	for (i = 0; i < internals->slave_count; i++) {
+		if (internals->presisted_slaves_conf[i].port_id == slave_port_id)
+			return &internals->presisted_slaves_conf[i];
+	}
+	return NULL;
+}
+
+static void
+slave_config_clear(struct bond_dev_private *internals,
+		struct rte_eth_dev *slave_eth_dev)
+{
+	int i, found = 0;
+
+	for (i = 0; i < internals->slave_count; i++) {
+		if (internals->presisted_slaves_conf[i].port_id ==
+				slave_eth_dev->data->port_id) {
+			found = 1;
+			memset(&internals->presisted_slaves_conf[i], 0,
+					sizeof(internals->presisted_slaves_conf[i]));
+		}
+		if (found && i < (internals->slave_count - 1)) {
+			memcpy(&internals->presisted_slaves_conf[i],
+					&internals->presisted_slaves_conf[i+1],
+					sizeof(internals->presisted_slaves_conf[i]));
+		}
+	}
+}
+
+static void
+slave_config_store(struct bond_dev_private *internals,
+		struct rte_eth_dev *slave_eth_dev)
+{
+	struct slave_conf *presisted_slave_conf =
+			&internals->presisted_slaves_conf[internals->slave_count];
+
+	presisted_slave_conf->port_id = slave_eth_dev->data->port_id;
+
+	memcpy(&(presisted_slave_conf->mac_addr), slave_eth_dev->data->mac_addrs,
+			sizeof(struct ether_addr));
+}
+
+static void
+bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
+
+static int
+bond_ethdev_start(struct rte_eth_dev *eth_dev)
+{
+	struct bond_dev_private *internals;
+	int i;
+
+	/* slave eth dev will be started by bonded device */
+	if (valid_bonded_ethdev(eth_dev)) {
+		RTE_LOG(ERR, PMD,
+				"%s: user tried to explicitly start a slave eth_dev (%d) of the bonded eth_dev\n",
+				__func__, eth_dev->data->port_id);
+		return -1;
+	}
+
+	eth_dev->data->dev_link.link_status = 1;
+	eth_dev->data->dev_started = 1;
+
+	internals = eth_dev->data->dev_private;
+
+	if (internals->slave_count == 0) {
+		RTE_LOG(ERR, PMD,
+				"%s: Cannot start port since there are no slave devices\n",
+				__func__);
+		return -1;
+	}
+
+	if (internals->user_defined_mac == 0) {
+		struct slave_conf *conf = slave_config_get(internals,
+				internals->primary_port);
+
+		if (mac_address_set(eth_dev, &(conf->mac_addr)) != 0) {
+			RTE_LOG(ERR, PMD,
+					"bonded port (%d) failed to update mac address",
+					eth_dev->data->port_id);
+			return -1;
+		}
+	}
+
+	/* Update all slave devices MACs*/
+	if (mac_address_slaves_update(eth_dev) != 0)
+		return -1;
+
+	/* If bonded device is configure in promiscuous mode then re-apply config */
+	if (internals->promiscuous_en)
+		bond_ethdev_promiscuous_enable(eth_dev);
+
+	/* Reconfigure each slave device if starting bonded device */
+	for (i = 0; i < internals->slave_count; i++) {
+		if (slave_configure(eth_dev, &(rte_eth_devices[internals->slaves[i]]))
+				!= 0) {
+			RTE_LOG(ERR, PMD,
+					"bonded port (%d) failed to reconfigure slave device %d)",
+					eth_dev->data->port_id, internals->slaves[i]);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static void
+bond_ethdev_stop(struct rte_eth_dev *eth_dev)
+{
+	struct bond_dev_private *internals = eth_dev->data->dev_private;
+
+	internals->active_slave_count = 0;
+
+	eth_dev->data->dev_link.link_status = 0;
+	eth_dev->data->dev_started = 0;
+}
+
+static void
+bond_ethdev_close(struct rte_eth_dev *dev __rte_unused)
+{
+}
+
+static int
+bond_ethdev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static void
+bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	dev_info->driver_name = driver_name;
+	dev_info->max_mac_addrs = 1;
+
+	dev_info->max_rx_pktlen = (uint32_t)2048;
+
+	dev_info->max_rx_queues = (uint16_t)128;
+	dev_info->max_tx_queues = (uint16_t)512;
+
+	dev_info->min_rx_bufsize = 0;
+	dev_info->pci_dev = dev->pci_dev;
+}
+
+static int
+bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+		uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
+		const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
+{
+	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
+			rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
+					0, dev->pci_dev->numa_node);
+	if (bd_rx_q == NULL)
+		return -1;
+
+	bd_rx_q->queue_id = rx_queue_id;
+	bd_rx_q->dev_private = dev->data->dev_private;
+
+	bd_rx_q->nb_rx_desc = nb_rx_desc;
+
+	memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
+	bd_rx_q->mb_pool = mb_pool;
+
+	dev->data->rx_queues[rx_queue_id] = bd_rx_q;
+
+	return 0;
+}
+
+static int
+bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+		uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
+		const struct rte_eth_txconf *tx_conf)
+{
+	struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
+			rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
+					0, dev->pci_dev->numa_node);
+
+	if (bd_tx_q == NULL)
+			return -1;
+
+	bd_tx_q->queue_id = tx_queue_id;
+	bd_tx_q->dev_private = dev->data->dev_private;
+
+	bd_tx_q->nb_tx_desc = nb_tx_desc;
+	memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
+
+	dev->data->tx_queues[tx_queue_id] = bd_tx_q;
+
+	return 0;
+}
+
+static void
+bond_ethdev_rx_queue_release(void *queue)
+{
+	if (queue == NULL)
+		return;
+
+	rte_free(queue);
+}
+
+static void
+bond_ethdev_tx_queue_release(void *queue)
+{
+	if (queue == NULL)
+		return;
+
+	rte_free(queue);
+}
+
+
+static int
+bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
+		int wait_to_complete)
+{
+	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
+
+	if (!bonded_eth_dev->data->dev_started ||
+		internals->active_slave_count == 0) {
+		bonded_eth_dev->data->dev_link.link_status = 0;
+		return 0;
+	} else {
+		struct rte_eth_dev *slave_eth_dev;
+		int i, link_up = 0;
+
+		for (i = 0; i < internals->active_slave_count; i++) {
+			slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
+
+			(*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
+					wait_to_complete);
+			if (slave_eth_dev->data->dev_link.link_status == 1) {
+				link_up = 1;
+				break;
+			}
+		}
+
+		bonded_eth_dev->data->dev_link.link_status = link_up;
+	}
+
+	return 0;
+}
+
+static void
+bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+	struct bond_dev_private *internals = dev->data->dev_private;
+	struct rte_eth_stats slave_stats;
+
+	int i;
+
+	/* clear bonded stats before populating from slaves */
+	memset(stats, 0, sizeof(*stats));
+
+	for (i = 0; i < internals->slave_count; i++) {
+		rte_eth_stats_get(internals->slaves[i], &slave_stats);
+
+		stats->ipackets += slave_stats.ipackets;
+		stats->opackets += slave_stats.opackets;
+		stats->ibytes += slave_stats.ibytes;
+		stats->obytes += slave_stats.obytes;
+		stats->ierrors += slave_stats.ierrors;
+		stats->oerrors += slave_stats.oerrors;
+		stats->imcasts += slave_stats.imcasts;
+		stats->rx_nombuf += slave_stats.rx_nombuf;
+		stats->fdirmatch += slave_stats.fdirmatch;
+		stats->fdirmiss += slave_stats.fdirmiss;
+		stats->tx_pause_xon += slave_stats.tx_pause_xon;
+		stats->rx_pause_xon += slave_stats.rx_pause_xon;
+		stats->tx_pause_xoff += slave_stats.tx_pause_xoff;
+		stats->rx_pause_xoff += slave_stats.rx_pause_xoff;
+	}
+}
+
+static void
+bond_ethdev_stats_reset(struct rte_eth_dev *dev)
+{
+	struct bond_dev_private *internals = dev->data->dev_private;
+	int i;
+
+	for (i = 0; i < internals->slave_count; i++)
+		rte_eth_stats_reset(internals->slaves[i]);
+}
+
+static void
+bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
+{
+	struct bond_dev_private *internals = eth_dev->data->dev_private;
+	int i;
+
+	internals->promiscuous_en = 1;
+
+	switch (internals->mode) {
+	/* Promiscuous mode is propagated to all slaves */
+	case BONDING_MODE_ROUND_ROBIN:
+	case BONDING_MODE_BALANCE:
+	case BONDING_MODE_BROADCAST:
+		for (i = 0; i < internals->slave_count; i++)
+			rte_eth_promiscuous_enable(internals->slaves[i]);
+		break;
+	/* Promiscuous mode is propagated only to primary slave */
+	case BONDING_MODE_ACTIVE_BACKUP:
+	default:
+		rte_eth_promiscuous_enable(internals->primary_port);
+
+	}
+}
+
+static void
+bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
+{
+	struct bond_dev_private *internals = dev->data->dev_private;
+	int i;
+
+	internals->promiscuous_en = 0;
+
+	switch (internals->mode) {
+	/* Promiscuous mode is propagated to all slaves */
+	case BONDING_MODE_ROUND_ROBIN:
+	case BONDING_MODE_BALANCE:
+	case BONDING_MODE_BROADCAST:
+		for (i = 0; i < internals->slave_count; i++)
+			rte_eth_promiscuous_disable(internals->slaves[i]);
+		break;
+	/* Promiscuous mode is propagated only to primary slave */
+	case BONDING_MODE_ACTIVE_BACKUP:
+	default:
+		rte_eth_promiscuous_disable(internals->primary_port);
+	}
+}
+
+
+static void
+bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
+		void *param)
+{
+	struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
+	struct bond_dev_private *internals;
+	struct rte_eth_link link;
+
+	int i, bonded_port_id, valid_slave, active_pos = -1;
+
+	if (type != RTE_ETH_EVENT_INTR_LSC)
+		return;
+
+	if (param == NULL)
+		return;
+
+	bonded_port_id = *(uint8_t *)param;
+
+	bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+	slave_eth_dev = &rte_eth_devices[port_id];
+
+	if (valid_bonded_ethdev(bonded_eth_dev))
+		return;
+
+	internals = bonded_eth_dev->data->dev_private;
+
+	/* If the device isn't started don't handle interrupts */
+	if (!bonded_eth_dev->data->dev_started)
+		return;
+
+	/* verify that port_id is a valid slave of bonded port */
+	for (i = 0; i < internals->slave_count; i++) {
+		if (internals->slaves[i] == port_id) {
+			valid_slave = 1;
+			break;
+		}
+	}
+
+	if (!valid_slave)
+		return;
+
+	/* Search for port in active port list */
+	for (i = 0; i < internals->active_slave_count; i++) {
+		if (port_id == internals->active_slaves[i]) {
+			active_pos = i;
+			break;
+		}
+	}
+
+	rte_eth_link_get_nowait(port_id, &link);
+	if (link.link_status) {
+		if (active_pos == -1) {
+			/* if no active slave ports then set this port to be primary port */
+			if (internals->active_slave_count == 0) {
+				/* If first active slave, then change link status */
+				bonded_eth_dev->data->dev_link.link_status = 1;
+				internals->primary_port = port_id;
+
+				/* Inherit eth dev link properties from first active slave */
+				link_properties_set(bonded_eth_dev,
+						&(slave_eth_dev->data->dev_link));
+
+			}
+			internals->active_slaves[internals->active_slave_count++] = port_id;
+		}
+	} else {
+		if (active_pos != -1) {
+			/* Remove from active slave list */
+			for (i = active_pos; i < (internals->active_slave_count - 1); i++)
+				internals->active_slaves[i] = internals->active_slaves[i+1];
+
+			internals->active_slave_count--;
+
+			/* No active slaves, change link status to down and reset other
+			 * link properties */
+			if (internals->active_slave_count == 0)
+				link_properties_reset(bonded_eth_dev);
+
+			/* Update primary id, take first active slave from list or if none
+			 * available set to -1 */
+			if (port_id == internals->primary_port) {
+				if (internals->active_slave_count > 0)
+					internals->primary_port = internals->active_slaves[0];
+				else
+					internals->primary_port = internals->slaves[0];
+			}
+		}
+	}
+}
+
+static struct eth_dev_ops default_dev_ops = {
+		.dev_start = bond_ethdev_start,
+		.dev_stop = bond_ethdev_stop,
+		.dev_close = bond_ethdev_close,
+		.dev_configure = bond_ethdev_configure,
+		.dev_infos_get = bond_ethdev_info,
+		.rx_queue_setup = bond_ethdev_rx_queue_setup,
+		.tx_queue_setup = bond_ethdev_tx_queue_setup,
+		.rx_queue_release = bond_ethdev_rx_queue_release,
+		.tx_queue_release = bond_ethdev_tx_queue_release,
+		.link_update = bond_ethdev_link_update,
+		.stats_get = bond_ethdev_stats_get,
+		.stats_reset = bond_ethdev_stats_reset,
+		.promiscuous_enable = bond_ethdev_promiscuous_enable,
+		.promiscuous_disable = bond_ethdev_promiscuous_disable
+};
+
+static uint8_t
+number_of_sockets(void)
+{
+	int sockets = 0;
+	int i;
+	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+
+	for (i = 0; ((i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL)); i++) {
+		if (sockets < ms[i].socket_id)
+			sockets = ms[i].socket_id;
+	}
+
+	/* Number of sockets = maximum socket_id + 1 */
+	return ++sockets;
+
+}
+
+int
+rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
+{
+	struct rte_pci_device *pci_dev = NULL;
+	struct bond_dev_private *internals = NULL;
+	struct rte_eth_dev *eth_dev = NULL;
+	struct eth_driver *eth_drv = NULL;
+	struct rte_pci_driver *pci_drv = NULL;
+
+	/* now do all data allocation - for eth_dev structure, dummy pci driver
+	 * and internal (private) data
+	 */
+
+	if (name == NULL) {
+		RTE_LOG(ERR, PMD, "Invalid name specified\n");
+		goto err;
+	}
+
+	if (socket_id >= number_of_sockets()) {
+		RTE_LOG(ERR, PMD,
+				"%s: invalid socket id specified to create bonded device on.\n",
+				__func__);
+		goto err;
+	}
+
+	pci_dev = rte_zmalloc_socket(name, sizeof(*pci_dev), 0, socket_id);
+	if (pci_dev == NULL) {
+		RTE_LOG(ERR, PMD, "Unable to malloc pci dev on socket");
+		goto err;
+	}
+	eth_drv = rte_zmalloc_socket(name, sizeof(*eth_drv), 0, socket_id);
+	if (eth_drv == NULL) {
+		RTE_LOG(ERR, PMD, "Unable to malloc eth_drv on socket");
+		goto err;
+	}
+
+	pci_drv = rte_zmalloc_socket(name, sizeof(*pci_drv), 0, socket_id);
+	if (pci_drv == NULL) {
+		RTE_LOG(ERR, PMD, "Unable to malloc pci_drv on socket");
+		goto err;
+	}
+
+	internals = rte_zmalloc_socket(name, sizeof(*internals), 0, socket_id);
+	if (internals == NULL) {
+		RTE_LOG(ERR, PMD, "Unable to malloc internals on socket");
+		goto err;
+	}
+
+	/* reserve an ethdev entry */
+	eth_dev = rte_eth_dev_allocate();
+	if (eth_dev == NULL) {
+		RTE_LOG(ERR, PMD, "Unable to allocate rte_eth_dev");
+		goto err;
+	}
+
+	pci_dev->numa_node = socket_id;
+	pci_drv->name = driver_name;
+
+	eth_drv->pci_drv = (struct rte_pci_driver)(*pci_drv);
+	eth_dev->driver = eth_drv;
+
+	eth_dev->data->dev_private = internals;
+	eth_dev->data->nb_rx_queues = (uint16_t)1;
+	eth_dev->data->nb_tx_queues = (uint16_t)1;
+
+	eth_dev->data->dev_link.link_status = 0;
+
+	eth_dev->data->mac_addrs = rte_zmalloc(name, ETHER_ADDR_LEN, 0);
+
+	eth_dev->data->dev_started = 0;
+	eth_dev->data->promiscuous = 0;
+	eth_dev->data->scattered_rx = 0;
+	eth_dev->data->all_multicast = 0;
+
+	eth_dev->dev_ops = &default_dev_ops;
+	eth_dev->pci_dev = pci_dev;
+
+	eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
+	if (bond_ethdev_mode_set(eth_dev, mode)) {
+		RTE_LOG(ERR, PMD,
+				"%s: failed to set bonded device %d mode too %d\n",
+				__func__, eth_dev->data->port_id, mode);
+		goto err;
+	}
+
+	internals->primary_port = 0;
+	internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
+	internals->user_defined_mac = 0;
+	internals->link_props_set = 0;
+	internals->slave_count = 0;
+	internals->active_slave_count = 0;
+
+	memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
+	memset(internals->slaves, 0, sizeof(internals->slaves));
+
+	memset(internals->presisted_slaves_conf, 0,
+			sizeof(internals->presisted_slaves_conf));
+
+	return eth_dev->data->port_id;
+
+err:
+	if (pci_dev)
+		rte_free(pci_dev);
+	if (pci_drv)
+		rte_free(pci_drv);
+	if (eth_drv)
+		rte_free(eth_drv);
+	if (internals)
+		rte_free(internals);
+	return -1;
+}
+
+
+
+int
+rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
+{
+	struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
+	struct bond_dev_private *internals;
+	struct bond_dev_private *temp_internals;
+	struct rte_eth_link link_props;
+
+	int i, j;
+
+	/* Verify that port id's are valid bonded and slave ports */
+	if (valid_bonded_port_id(bonded_port_id) != 0)
+		goto err_add;
+
+	if (valid_slave_port_id(slave_port_id) != 0)
+		goto err_add;
+
+	/*
+	 * Verify that new slave device is not already a slave of another bonded
+	 * device */
+	for (i = rte_eth_dev_count()-1; i >= 0; i--) {
+		if (valid_bonded_ethdev(&rte_eth_devices[i]) == 0) {
+			temp_internals = rte_eth_devices[i].data->dev_private;
+			for (j = 0; j < temp_internals->slave_count; j++) {
+				/* Device already a slave of a bonded device */
+				if (temp_internals->slaves[j] == slave_port_id)
+					goto err_add;
+			}
+		}
+	}
+
+	bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+	internals = bonded_eth_dev->data->dev_private;
+
+	slave_eth_dev = &rte_eth_devices[slave_port_id];
+
+	if (internals->slave_count > 0) {
+		/* Check that new slave device is the same type as the other slaves
+		 * and not repetitive */
+		for (i = 0; i < internals->slave_count; i++) {
+			if (slave_eth_dev->pci_dev->driver->id_table->device_id !=
+					rte_eth_devices[internals->slaves[i]].pci_dev->driver->id_table->device_id ||
+				internals->slaves[i] == slave_port_id)
+				goto err_add;
+		}
+	}
+
+	/* Add slave details to bonded device */
+	internals->slaves[internals->slave_count] = slave_port_id;
+
+	slave_config_store(internals, slave_eth_dev);
+
+	if (internals->slave_count < 1) {
+		/* if MAC is not user defined then use MAC of first slave add to bonded
+		 * device */
+		if (!internals->user_defined_mac)
+			mac_address_set(bonded_eth_dev, slave_eth_dev->data->mac_addrs);
+
+		/* Inherit eth dev link properties from first slave */
+		link_properties_set(bonded_eth_dev, &(slave_eth_dev->data->dev_link));
+
+		/* Make primary slave */
+		internals->primary_port = slave_port_id;
+	} else {
+		/* Check slave link properties are supported if props are set,
+		 * all slaves must be the same */
+		if (internals->link_props_set) {
+			if (link_properties_valid(&(bonded_eth_dev->data->dev_link),
+									  &(slave_eth_dev->data->dev_link))) {
+				RTE_LOG(ERR, PMD,
+						"%s: Slave port %d link speed/duplex not supported\n",
+						__func__, slave_port_id);
+				goto err_add;
+			}
+		} else {
+			link_properties_set(bonded_eth_dev,
+					&(slave_eth_dev->data->dev_link));
+		}
+	}
+
+	internals->slave_count++;
+
+	/* Update all slave devices MACs*/
+	mac_address_slaves_update(bonded_eth_dev);
+
+	if (bonded_eth_dev->data->dev_started) {
+		if (slave_configure(bonded_eth_dev, slave_eth_dev) != 0) {
+			RTE_LOG(ERR, PMD, "rte_bond_slaves_configure: port=%d\n",
+					slave_port_id);
+			goto err_add;
+		}
+	}
+
+	/* Register link status change callback with bonded device pointer as
+	 * argument*/
+	rte_eth_dev_callback_register(slave_port_id, RTE_ETH_EVENT_INTR_LSC,
+			bond_ethdev_lsc_event_callback, &bonded_eth_dev->data->port_id);
+
+	/* If bonded device is started then we can add the slave to our active
+	 * slave array */
+	if (bonded_eth_dev->data->dev_started) {
+		rte_eth_link_get_nowait(slave_port_id, &link_props);
+
+		 if (link_props.link_status == 1) {
+			internals->active_slaves[internals->active_slave_count++] =
+					slave_port_id;
+		}
+	}
+
+	return 0;
+
+err_add:
+	RTE_LOG(ERR, PMD, "Failed to add port %d as slave\n", slave_port_id);
+	return -1;
+
+}
+
+int
+rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id)
+{
+	struct bond_dev_private *internals;
+	struct slave_conf *slave_conf;
+
+	int i;
+	int pos = -1;
+
+	/* Verify that port id's are valid bonded and slave ports */
+	if (valid_bonded_port_id(bonded_port_id) != 0)
+		goto err_del;
+
+	if (valid_slave_port_id(slave_port_id) != 0)
+		goto err_del;
+
+	internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+
+	/* first remove from active slave list */
+	for (i = 0; i < internals->active_slave_count; i++) {
+		if (internals->active_slaves[i] == slave_port_id)
+			pos = i;
+
+		/* shift active slaves up active array list */
+		if (pos >= 0 && i < (internals->active_slave_count - 1))
+			internals->active_slaves[i] = internals->active_slaves[i+1];
+	}
+
+	if (pos >= 0)
+		internals->active_slave_count--;
+
+
+	pos = -1;
+	/* now remove from slave list */
+	for (i = 0; i < internals->slave_count; i++) {
+		if (internals->slaves[i] == slave_port_id)
+			pos = i;
+
+		/* shift slaves up list */
+		if (pos >= 0 && i < internals->slave_count)
+			internals->slaves[i] = internals->slaves[i+1];
+	}
+
+	if (pos < 0)
+		goto err_del;
+
+	/* Un-register link status change callback with bonded device pointer as
+	 * argument*/
+	rte_eth_dev_callback_unregister(slave_port_id, RTE_ETH_EVENT_INTR_LSC,
+			bond_ethdev_lsc_event_callback,
+			&rte_eth_devices[bonded_port_id].data->port_id);
+
+	/* Restore original MAC address of slave device */
+	slave_conf = slave_config_get(internals, slave_port_id);
+
+	mac_address_set(&rte_eth_devices[slave_port_id], &(slave_conf->mac_addr));
+
+	slave_config_clear(internals, &rte_eth_devices[slave_port_id]);
+
+	internals->slave_count--;
+
+	/*  first slave in the active list will be the primary by default,
+	 *  otherwise use first device in list */
+	if (internals->primary_port == slave_port_id) {
+		if (internals->active_slave_count > 0)
+			internals->primary_port = internals->active_slaves[0];
+		else if (internals->slave_count > 0)
+			internals->primary_port = internals->slaves[0];
+		else
+			internals->primary_port = 0;
+	}
+
+	if (internals->active_slave_count < 1) {
+		/* reset device link properties as no slaves are active */
+		link_properties_reset(&rte_eth_devices[bonded_port_id]);
+
+		/* if no slaves are any longer attached to bonded device and MAC is not
+		 * user defined then clear MAC of bonded device as it will be reset
+		 * when a new slave is added */
+		if (internals->slave_count < 1 && !internals->user_defined_mac)
+			memset(rte_eth_devices[bonded_port_id].data->mac_addrs, 0,
+					sizeof(*(rte_eth_devices[bonded_port_id].data->mac_addrs)));
+	}
+
+	return 0;
+
+err_del:
+	RTE_LOG(ERR, PMD,
+			"Cannot remove slave device (not present in bonded device)\n");
+	return -1;
+
+}
+
+int
+rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode)
+{
+	if (valid_bonded_port_id(bonded_port_id) != 0)
+		return -1;
+
+	return bond_ethdev_mode_set(&rte_eth_devices[bonded_port_id], mode);
+}
+
+
+int
+rte_eth_bond_mode_get(uint8_t bonded_port_id)
+{
+	struct bond_dev_private *internals;
+
+	if (valid_bonded_port_id(bonded_port_id) != 0)
+		return -1;
+
+	internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+	return internals->mode;
+}
+
+int
+rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id)
+{
+	struct bond_dev_private *internals;
+	int i;
+
+	if (valid_bonded_port_id(bonded_port_id) != 0)
+		return -1;
+
+	if (valid_slave_port_id(slave_port_id) != 0)
+		return -1;
+
+	internals =  rte_eth_devices[bonded_port_id].data->dev_private;
+
+	/* Search bonded device slave ports for new proposed primary port */
+	for (i = 0; i < internals->slave_count; i++) {
+		if (internals->slaves[i] == slave_port_id) {
+			/* Found slave device in active slave list */
+			internals->primary_port = slave_port_id;
+			return 0;
+		}
+	}
+
+	/* Slave is not bound to this master device */
+	return -1;
+}
+
+int
+rte_eth_bond_primary_get(uint8_t bonded_port_id)
+{
+	struct bond_dev_private *internals;
+
+	if (valid_bonded_port_id(bonded_port_id) != 0)
+		return -1;
+
+	internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+	if (internals->slave_count < 1)
+		return -1;
+
+	return internals->primary_port;
+}
+
+int
+rte_eth_bond_slaves_get(uint8_t bonded_port_id, const uint8_t **slaves)
+{
+	struct bond_dev_private *internals;
+
+	if (valid_bonded_port_id(bonded_port_id) != 0)
+		return -1;
+
+	if (slaves == NULL)
+		return -1;
+
+	internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+	*slaves = (uint8_t *)(&internals->slaves);
+
+	return internals->slave_count;
+
+}
+
+int
+rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, const uint8_t **slaves)
+{
+	struct bond_dev_private *internals;
+
+	if (valid_bonded_port_id(bonded_port_id) != 0)
+		return -1;
+
+	if (slaves == NULL)
+		return -1;
+
+	internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+	*slaves = (uint8_t *)(&internals->active_slaves);
+
+	return internals->active_slave_count;
+
+}
+
+
+int
+rte_eth_bond_mac_address_set(uint8_t bonded_port_id,
+		struct ether_addr *mac_addr)
+{
+	struct rte_eth_dev *bonded_eth_dev;
+	struct bond_dev_private *internals;
+
+	if (valid_bonded_port_id(bonded_port_id) != 0)
+		return -1;
+
+	bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+	internals = bonded_eth_dev->data->dev_private;
+
+	/* Set MAC Address of Bonded Device */
+	if (mac_address_set(bonded_eth_dev, mac_addr))
+		return -1;
+
+	internals->user_defined_mac = 1;
+
+	/* Update all slave devices MACs*/
+	if (internals->slave_count > 0)
+		return mac_address_slaves_update(bonded_eth_dev);
+
+	return 0;
+}
+
+
+int
+rte_eth_bond_mac_address_reset(uint8_t bonded_port_id)
+{
+	struct rte_eth_dev *bonded_eth_dev;
+	struct bond_dev_private *internals;
+
+
+	if (valid_bonded_port_id(bonded_port_id) != 0)
+		return -1;
+
+	bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+	internals = bonded_eth_dev->data->dev_private;
+
+	internals->user_defined_mac = 0;
+
+	if (internals->slave_count > 0) {
+		struct slave_conf *conf;
+		conf = slave_config_get(internals, internals->primary_port);
+
+		/* Set MAC Address of Bonded Device */
+		if (mac_address_set(bonded_eth_dev, &conf->mac_addr) != 0)
+			return -1;
+
+		/* Update all slave devices MAC addresses */
+		return mac_address_slaves_update(bonded_eth_dev);
+	}
+	/* No need to update anything as no slaves present */
+	return 0;
+}
+
+int
+rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy)
+{
+	struct bond_dev_private *internals;
+
+	if (valid_bonded_port_id(bonded_port_id) != 0)
+		return -1;
+
+	internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+	switch (policy) {
+	case BALANCE_XMIT_POLICY_LAYER2:
+	case BALANCE_XMIT_POLICY_LAYER23:
+	case BALANCE_XMIT_POLICY_LAYER34:
+		internals->balance_xmit_policy = policy;
+		break;
+
+	default:
+		return -1;
+	}
+	return 0;
+}
+
+
+int
+rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id)
+{
+	struct bond_dev_private *internals;
+
+	if (valid_bonded_port_id(bonded_port_id) != 0)
+		return -1;
+
+	internals = rte_eth_devices[bonded_port_id].data->dev_private;
+
+	return internals->balance_xmit_policy;
+}
diff --git a/lib/librte_bond/rte_bond.h b/lib/librte_bond/rte_bond.h
new file mode 100644
index 0000000..97b6d5e
--- /dev/null
+++ b/lib/librte_bond/rte_bond.h
@@ -0,0 +1,228 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ETH_BOND_H_
+#define _RTE_ETH_BOND_H_
+
+/**
+ * @file
+ * RTE Link Bonding Ethernet Device
+ * Link Bonding for 1GbE and 10GbE ports to allow the aggregation of multiple
+ * (slave) NICs into a single logical interface. The bonded device processes
+ * these interfaces based on the mode of operation specified and supported.
+ * This implementation supports 4 modes of operation round robin, active backup
+ * balance and broadcast. Providing redundant links, fault tolerance and/or
+ * load balancing of network ports
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_ether.h>
+
+/** Link Bonding Mode Definitions */
+#define BONDING_MODE_ROUND_ROBIN		(0)
+#define BONDING_MODE_ACTIVE_BACKUP		(1)
+#define BONDING_MODE_BALANCE			(2)
+#define BONDING_MODE_BROADCAST			(3)
+
+/** Balance Mode Transmit Policy Types */
+#define BALANCE_XMIT_POLICY_LAYER2		(0)
+#define BALANCE_XMIT_POLICY_LAYER23		(1)
+#define BALANCE_XMIT_POLICY_LAYER34		(2)
+
+/**
+ * Create a bonded rte_eth_dev device
+ *
+ * @param name
+ * @param mode
+ * @param socket_id
+ *
+ * @return
+ *	Port Id of created rte_eth_dev on success, negative value otherwise
+ */
+int
+rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id);
+
+/**
+ * Add a rte_eth_dev device as a slave to the bonded device
+ *
+ * @param bonded_port_id
+ * @param slave_port_id
+ *
+ * @return
+ *	0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id);
+
+/**
+ * Remove a slave rte_eth_dev device from the bonded device
+ *
+ * @param bonded_port_id
+ * @param slave_port_id
+ *
+ * @return
+ *	0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id);
+
+/**
+ * Set link bonding mode of bonded device
+ *
+ * @param bonded_port_id
+ * @param mode
+ *
+ * @return
+ *	0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode);
+
+/**
+ * Get link bonding mode of bonded device
+ *
+ * @param bonded_port_id
+ *
+ * @return
+ *	link bonding mode on success, negative value otherwise
+ */
+int
+rte_eth_bond_mode_get(uint8_t bonded_port_id);
+
+/**
+ * Set slave rte_eth_dev as primary slave of bonded device
+ *
+ * @param bonded_port_id
+ * @param slave_port_id
+ *
+ * @return
+ *	0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id);
+
+/**
+ * Get primary slave of bonded device
+ *
+ * @param bonded_port_id
+ *
+ * @return
+ *	Port Id of primary slave on success, -1 on failure
+ */
+int
+rte_eth_bond_primary_get(uint8_t bonded_port_id);
+
+/**
+ * Populate an array with list of the slaves port id's of the bonded device
+ *
+ * @param bonded_port_id
+ * @param slaves
+ *
+ * @return
+ *	number of slaves associated with bonded device on success,
+ *	negative value otherwise
+ */
+int
+rte_eth_bond_slaves_get(uint8_t bonded_port_id, const uint8_t **slaves);
+
+/**
+ * Populate an array with list of the active slaves port id's of the bonded
+ * device.
+ *
+ * @param bonded_port_id
+ * @param slaves
+ *
+ * @return
+ *	number of active slaves associated with bonded device on success,
+ *	negative value otherwise
+ */
+int
+rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, const uint8_t **slaves);
+
+/**
+ * Set explicit MAC address to use on bonded device and it's slaves.
+ *
+ * @param bonded_port_id
+ * @param mac_addr
+ *
+ * @return
+ *	0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_mac_address_set(uint8_t bonded_port_id,
+		struct ether_addr *mac_addr);
+
+/**
+ * Reset bonded device to use MAC from primary slave on bonded device and it's
+ * slaves.
+ *
+ * @param bonded_port_id
+ *
+ * @return
+ *	0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_mac_address_reset(uint8_t bonded_port_id);
+
+/**
+ * Set the transmit policy for bonded device to use when it is operating in
+ * balance mode
+ *
+ * @param bonded_port_id
+ * @param policy
+ *
+ * @return
+ *	0 on success, negative value otherwise
+ */
+int
+rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy);
+
+/**
+ * Get the transmit policy set on bonded device for balance mode operation
+ *
+ * @param bonded_port_id
+ *
+ * @return
+ *	balance transmit policy on success, negative value otherwise
+ */
+int
+rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index a836577..a803a5c 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -177,8 +177,13 @@ ifeq ($(CONFIG_RTE_LIBRTE_PMD_PCAP),y)
 LDLIBS += -lrte_pmd_pcap -lpcap
 endif
 
+ifeq ($(CONFIG_RTE_LIBRTE_BOND),y)
+LDLIBS += -lrte_bond
 endif
 
+endif
+
+
 LDLIBS += $(EXECENV_LDLIBS)
 
 LDLIBS += --end-group
-- 
1.8.5.3
    
    
More information about the dev
mailing list