[dpdk-dev] [PATCH v2 4/7] net/fm10k: add flow and switch management

Xiaojun Liu xiaojun.liu at silicom.co.il
Wed Dec 11 10:52:10 CET 2019


To support switch management, add the following new files:
Add fm10k/switch/fm10k_flow.h.
Add fm10k/switch/fm10k_flow.c(support dpdk flow operations)
Add fm10k/switch/fm10k_switch.c(support switch management)
Modify fm10k/Makefile(add fm10k_flow.c
and fm10k_switch.c).

To avoid configuration for both kernel driver
and userspace SDK outside DPDK, we add switch
management in FM10K DPDK PMD driver.
To enable switch management, you need add
CONFIG_RTE_FM10K_MANAGEMENT=y in
config/common_linux when building.

Signed-off-by: Xiaojun Liu <xiaojun.liu at silicom.co.il>
---
 drivers/net/fm10k/Makefile              |    2 +
 drivers/net/fm10k/switch/fm10k_flow.c   |  872 +++++++++++
 drivers/net/fm10k/switch/fm10k_flow.h   |   26 +
 drivers/net/fm10k/switch/fm10k_switch.c | 2562 +++++++++++++++++++++++++++++++
 4 files changed, 3462 insertions(+)
 create mode 100644 drivers/net/fm10k/switch/fm10k_flow.c
 create mode 100644 drivers/net/fm10k/switch/fm10k_flow.h
 create mode 100644 drivers/net/fm10k/switch/fm10k_switch.c

diff --git a/drivers/net/fm10k/Makefile b/drivers/net/fm10k/Makefile
index ed73251..ab263c5 100644
--- a/drivers/net/fm10k/Makefile
+++ b/drivers/net/fm10k/Makefile
@@ -93,6 +93,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k_spico_code.c
 SRCS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k_stats.c
 SRCS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k_ffu.c
 SRCS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k_config.c
+SRCS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k_switch.c
+SRCS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k_flow.c
 endif
 
 SRCS-$(CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR) += fm10k_rxtx_vec.c
diff --git a/drivers/net/fm10k/switch/fm10k_flow.c b/drivers/net/fm10k/switch/fm10k_flow.c
new file mode 100644
index 0000000..353f021
--- /dev/null
+++ b/drivers/net/fm10k/switch/fm10k_flow.c
@@ -0,0 +1,872 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019   Silicom Ltd. Connectivity Solutions
+ */
+
+#include <sys/queue.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdarg.h>
+
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_eth_ctrl.h>
+#include <rte_tailq.h>
+#include <rte_flow_driver.h>
+
+#include "fm10k_flow.h"
+#include "fm10k_switch.h"
+#include "fm10k_ffu.h"
+#include "fm10k_config.h"
+
+
+static int fm10k_flow_validate(struct rte_eth_dev *dev,
+			      const struct rte_flow_attr *attr,
+			      const struct rte_flow_item pattern[],
+			      const struct rte_flow_action actions[],
+			      struct rte_flow_error *error);
+static struct rte_flow *fm10k_flow_create(struct rte_eth_dev *dev,
+					 const struct rte_flow_attr *attr,
+					 const struct rte_flow_item pattern[],
+					 const struct rte_flow_action actions[],
+					 struct rte_flow_error *error);
+static int fm10k_flow_destroy(struct rte_eth_dev *dev,
+			     struct rte_flow *flow,
+			     struct rte_flow_error *error);
+static int fm10k_flow_flush(struct rte_eth_dev *dev,
+			   struct rte_flow_error *error);
+static int fm10k_flow_parse_attr(const struct rte_flow_attr *attr,
+				struct rte_flow_error *error);
+
+const struct rte_flow_ops fm10k_flow_ops = {
+	.validate = fm10k_flow_validate,
+	.create = fm10k_flow_create,
+	.destroy = fm10k_flow_destroy,
+	.flush = fm10k_flow_flush,
+};
+
+union fm10k_filter_t cons_filter;
+enum rte_filter_type fm10k_cons_filter_type = RTE_ETH_FILTER_NONE;
+
+/**
+ * MPLS filter configuration.
+ */
+enum fm10k_mpls_type {
+	FM10K_MPLS_TYPE_UNI,
+	FM10K_MPLS_TYPE_MULTI,
+};
+
+enum fm10k_mpls_action {
+	FM10K_MPLS_ACTION_DROP,
+	FM10K_MPLS_ACTION_QUEUE,
+};
+
+struct fm10k_mpls_filter_conf {
+	enum fm10k_mpls_type mpls_type; /**< mandatory for MPLS */
+	uint32_t mpls_header;     /**< MPLS header */
+	uint32_t mpls_header_mask;      /**< MPLS header mask */
+	enum fm10k_mpls_action mpls_action;
+	uint16_t queue;
+	uint8_t ffu_id;
+	uint8_t ffu_prio;
+};
+
+/**
+ * VLAN filter configuration.
+ */
+struct fm10k_vlan_filter_conf {
+	int ffu_id;
+	uint8_t ffu_prio;
+	uint8_t is_ingress;
+	uint8_t port;
+	uint8_t in_ext_port;
+	uint8_t out_ext_port;
+	uint16_t in_vlan;
+	uint16_t out_vlan;
+};
+
+
+union fm10k_filter_t {
+	struct fm10k_mpls_filter_conf mpls_filter;
+	struct fm10k_vlan_filter_conf vlan_filter;
+};
+
+typedef int (*parse_filter_t)(struct rte_eth_dev *dev,
+			      const struct rte_flow_attr *attr,
+			      const struct rte_flow_item pattern[],
+			      const struct rte_flow_action actions[],
+			      struct rte_flow_error *error,
+			      union fm10k_filter_t *filter);
+
+struct fm10k_valid_pattern {
+	enum rte_flow_item_type *items;
+	parse_filter_t parse_filter;
+};
+
+static enum rte_flow_item_type pattern_mpls_1[] = {
+	RTE_FLOW_ITEM_TYPE_ETH,
+	RTE_FLOW_ITEM_TYPE_MPLS,
+	RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_vlan_1[] = {
+	RTE_FLOW_ITEM_TYPE_VLAN,
+	RTE_FLOW_ITEM_TYPE_PHY_PORT,
+	RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_vlan_2[] = {
+	RTE_FLOW_ITEM_TYPE_VLAN,
+	RTE_FLOW_ITEM_TYPE_PHY_PORT,
+	RTE_FLOW_ITEM_TYPE_PHY_PORT,
+	RTE_FLOW_ITEM_TYPE_END,
+};
+
+static int fm10k_flow_parse_mpls_filter(struct rte_eth_dev *dev,
+				    const struct rte_flow_attr *attr,
+				    const struct rte_flow_item pattern[],
+				    const struct rte_flow_action actions[],
+				    struct rte_flow_error *error,
+				    union fm10k_filter_t *filter);
+static int
+fm10k_flow_parse_vlan_filter(struct rte_eth_dev *dev,
+			    const struct rte_flow_attr *attr,
+			    const struct rte_flow_item pattern[],
+			    const struct rte_flow_action actions[],
+			    struct rte_flow_error *error,
+			    union fm10k_filter_t *filter);
+
+static struct fm10k_valid_pattern fm10k_supported_patterns[] = {
+	/* MPLS */
+	{ pattern_mpls_1, fm10k_flow_parse_mpls_filter },
+	/* VLAN */
+	{ pattern_vlan_1, fm10k_flow_parse_vlan_filter },
+	/* VLAN */
+	{ pattern_vlan_2, fm10k_flow_parse_vlan_filter },
+};
+
+static const struct rte_flow_action *
+fm10k_next_item_of_action(const struct rte_flow_action *actions,
+		uint32_t *index)
+{
+	static const struct rte_flow_action *act;
+
+	act = actions + *index;
+	while (act->type == RTE_FLOW_ACTION_TYPE_VOID) {
+		(*index)++;
+		act = actions + *index;
+	}
+	return act;
+}
+
+/* Find the first VOID or non-VOID item pointer */
+static const struct rte_flow_item *
+fm10k_find_first_item(const struct rte_flow_item *item, bool is_void)
+{
+	bool is_find;
+
+	while (item->type != RTE_FLOW_ITEM_TYPE_END) {
+		if (is_void)
+			is_find = item->type == RTE_FLOW_ITEM_TYPE_VOID;
+		else
+			is_find = item->type != RTE_FLOW_ITEM_TYPE_VOID;
+		if (is_find)
+			break;
+		item++;
+	}
+	return item;
+}
+
+/* Skip all VOID items of the pattern */
+static void
+fm10k_pattern_skip_void_item(struct rte_flow_item *items,
+			    const struct rte_flow_item *pattern)
+{
+	uint32_t cpy_count = 0;
+	const struct rte_flow_item *pb = pattern, *pe = pattern;
+
+	for (;;) {
+		/* Find a non-void item first */
+		pb = fm10k_find_first_item(pb, false);
+		if (pb->type == RTE_FLOW_ITEM_TYPE_END) {
+			pe = pb;
+			break;
+		}
+
+		/* Find a void item */
+		pe = fm10k_find_first_item(pb + 1, true);
+
+		cpy_count = pe - pb;
+		rte_memcpy(items, pb, sizeof(struct rte_flow_item) * cpy_count);
+
+		items += cpy_count;
+
+		if (pe->type == RTE_FLOW_ITEM_TYPE_END) {
+			pb = pe;
+			break;
+		}
+
+		pb = pe + 1;
+	}
+	/* Copy the END item. */
+	rte_memcpy(items, pe, sizeof(struct rte_flow_item));
+}
+
+/* Check if the pattern matches a supported item type array */
+static bool
+fm10k_match_pattern(enum rte_flow_item_type *item_array,
+		   struct rte_flow_item *pattern)
+{
+	struct rte_flow_item *item = pattern;
+
+	while ((*item_array == item->type) &&
+	       (*item_array != RTE_FLOW_ITEM_TYPE_END)) {
+		item_array++;
+		item++;
+	}
+
+	return (*item_array == RTE_FLOW_ITEM_TYPE_END &&
+		item->type == RTE_FLOW_ITEM_TYPE_END);
+}
+
+/* Find if there's parse filter function matched */
+static parse_filter_t
+fm10k_find_parse_filter_func(struct rte_flow_item *pattern, uint32_t *idx)
+{
+	parse_filter_t parse_filter = NULL;
+	uint8_t i = *idx;
+
+	for (; i < RTE_DIM(fm10k_supported_patterns); i++) {
+		if (fm10k_match_pattern(fm10k_supported_patterns[i].items,
+					pattern)) {
+			parse_filter = fm10k_supported_patterns[i].parse_filter;
+			break;
+		}
+	}
+
+	*idx = ++i;
+
+	return parse_filter;
+}
+
+/* Parse attributes */
+static int
+fm10k_flow_parse_attr(const struct rte_flow_attr *attr,
+		     struct rte_flow_error *error)
+{
+	/* Not supported */
+	if (attr->group) {
+		rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+				   attr, "Not support group.");
+		return -rte_errno;
+	}
+
+	return 0;
+}
+
+/*
+ * MPLS
+ */
+/* 1. Last in item should be NULL as range is not supported.
+ * 2. Supported filter types: MPLS label.
+ * 3. Mask of fields which need to be matched should be
+ *    filled with 1.
+ * 4. Mask of fields which needn't to be matched should be
+ *    filled with 0.
+ */
+static int
+fm10k_flow_parse_mpls_pattern(__rte_unused struct rte_eth_dev *dev,
+			     const struct rte_flow_item *pattern,
+			     struct rte_flow_error *error,
+			     struct fm10k_mpls_filter_conf *filter)
+{
+	const struct rte_flow_item *item = pattern;
+	const struct rte_flow_item_mpls *mpls_spec;
+	const struct rte_flow_item_mpls *mpls_mask;
+	const struct rte_flow_item_eth *eth_spec;
+	enum rte_flow_item_type item_type;
+	const uint8_t label_mask[3] = {0xFF, 0xFF, 0xF0};
+	uint32_t label_be = 0;
+	uint32_t be_mask = 0;
+
+	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
+		if (item->last) {
+			rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "Not support range");
+			return -rte_errno;
+		}
+		item_type = item->type;
+		switch (item_type) {
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			if (!item->spec || item->mask) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "Invalid ETH item");
+				return -rte_errno;
+			}
+			eth_spec =
+				(const struct rte_flow_item_eth *)item->spec;
+
+			if (rte_be_to_cpu_16(eth_spec->type) == 0x8847) {
+				filter->mpls_type = FM10K_MPLS_TYPE_UNI;
+			} else if (rte_be_to_cpu_16(eth_spec->type) == 0x8848) {
+				filter->mpls_type = FM10K_MPLS_TYPE_MULTI;
+			} else {
+				rte_flow_error_set(error, EINVAL,
+					RTE_FLOW_ERROR_TYPE_ITEM,
+					item, "Invalid ETH item");
+				return -rte_errno;
+			}
+			break;
+		case RTE_FLOW_ITEM_TYPE_MPLS:
+			mpls_spec =
+				(const struct rte_flow_item_mpls *)item->spec;
+			mpls_mask =
+				(const struct rte_flow_item_mpls *)item->mask;
+
+			if (!mpls_spec || !mpls_mask) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "Invalid MPLS item");
+				return -rte_errno;
+			}
+
+			if (memcmp(mpls_mask->label_tc_s, label_mask, 3)) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "Invalid MPLS label mask");
+				return -rte_errno;
+			}
+			rte_memcpy(((uint8_t *)&label_be + 1),
+					mpls_spec->label_tc_s, 3);
+			rte_memcpy(((uint8_t *)&be_mask + 1),
+					mpls_mask->label_tc_s, 3);
+			filter->mpls_header =
+					rte_be_to_cpu_32(label_be) >> 4;
+			filter->mpls_header_mask =
+					rte_be_to_cpu_32(be_mask) >> 4;
+
+			fm10k_cons_filter_type = RTE_ETH_FILTER_TUNNEL;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/* MPLS action only supports QUEUE or DROP. */
+static int
+fm10k_flow_parse_mpls_action(const struct rte_flow_action *actions,
+				 struct rte_flow_error *error,
+				 struct fm10k_mpls_filter_conf *filter)
+{
+	const struct rte_flow_action *act;
+	const struct rte_flow_action_queue *act_q;
+	uint32_t index = 0;
+
+	/* Check if the first non-void action is QUEUE or DROP. */
+	act = fm10k_next_item_of_action(actions, &index);
+	if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE &&
+	    act->type != RTE_FLOW_ACTION_TYPE_DROP) {
+		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
+				   act, "Not supported action.");
+		return -rte_errno;
+	}
+
+	if (act->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+		act_q = (const struct rte_flow_action_queue *)act->conf;
+		filter->mpls_action = FM10K_MPLS_ACTION_QUEUE;
+		filter->queue = act_q->index;
+	} else {
+		filter->mpls_action = FM10K_MPLS_ACTION_DROP;
+	}
+
+	/* Check if the next non-void item is END */
+	index++;
+	act = fm10k_next_item_of_action(actions, &index);
+	if (act->type != RTE_FLOW_ACTION_TYPE_END) {
+		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
+				   act, "Not supported action.");
+		return -rte_errno;
+	}
+
+	return 0;
+}
+
+static int
+fm10k_flow_parse_mpls_filter(struct rte_eth_dev *dev,
+			    const struct rte_flow_attr *attr,
+			    const struct rte_flow_item pattern[],
+			    const struct rte_flow_action actions[],
+			    struct rte_flow_error *error,
+			    union fm10k_filter_t *filter)
+{
+	struct fm10k_mpls_filter_conf *mpls_filter =
+		&filter->mpls_filter;
+	int ret;
+
+	ret = fm10k_flow_parse_mpls_pattern(dev, pattern,
+					   error, mpls_filter);
+	if (ret)
+		return ret;
+
+	ret = fm10k_flow_parse_mpls_action(actions, error, mpls_filter);
+	if (ret)
+		return ret;
+
+	ret = fm10k_flow_parse_attr(attr, error);
+	return ret;
+}
+
+
+/*
+ * VLAN
+ */
+static int
+fm10k_flow_parse_vlan_pattern(__rte_unused struct rte_eth_dev *dev,
+			     const struct rte_flow_item *pattern,
+			     struct rte_flow_error *error,
+			     struct fm10k_vlan_filter_conf *filter)
+{
+	const struct rte_flow_item *item = pattern;
+	const struct rte_flow_item_vlan *vlan_spec;
+	const struct rte_flow_item_phy_port *pp_spec;
+	enum rte_flow_item_type item_type;
+
+	PMD_INIT_LOG(DEBUG, "Parse vlan pattern ffu id %d", filter->ffu_id);
+
+	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
+		if (item->last) {
+			rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "Not support range");
+			return -rte_errno;
+		}
+		item_type = item->type;
+		switch (item_type) {
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			vlan_spec =
+				(const struct rte_flow_item_vlan *)item->spec;
+
+			if (!vlan_spec) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "Invalid VLAN item");
+				return -rte_errno;
+			}
+			break;
+
+		case RTE_FLOW_ITEM_TYPE_PHY_PORT:
+			pp_spec =
+			(const struct rte_flow_item_phy_port *)item->spec;
+			if (!pp_spec) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "Invalid PHY PORT item");
+				return -rte_errno;
+			}
+			break;
+
+		default:
+			break;
+		}
+	}
+
+	return 0;
+}
+
+
+static int
+fm10k_flow_parse_vlan_action(struct rte_eth_dev *dev,
+				 const struct rte_flow_action *actions,
+				 struct rte_flow_error *error,
+				 struct fm10k_vlan_filter_conf *filter)
+{
+	const struct rte_flow_action *act;
+	uint32_t index = 0;
+
+	PMD_INIT_LOG(DEBUG, "Parse vlan action name %s ffu id %d",
+			dev->device->name, filter->ffu_id);
+
+	/* Check if the first non-void action is QUEUE or DROP. */
+	act = fm10k_next_item_of_action(actions, &index);
+	if (act->type != RTE_FLOW_ACTION_TYPE_MARK) {
+		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
+				   act, "Not supported action.");
+		return -rte_errno;
+	}
+
+	index++;
+	act = fm10k_next_item_of_action(actions, &index);
+	if (act->type != RTE_FLOW_ACTION_TYPE_MARK &&
+		act->type != RTE_FLOW_ACTION_TYPE_END) {
+		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
+				   act, "Not supported action.");
+		return -rte_errno;
+	}
+	if (act->type == RTE_FLOW_ACTION_TYPE_END)
+		return 0;
+
+	index++;
+	/* Check if the next non-void item is END */
+	act = fm10k_next_item_of_action(actions, &index);
+	if (act->type != RTE_FLOW_ACTION_TYPE_END) {
+		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
+				   act, "Not supported action.");
+		return -rte_errno;
+	}
+
+	return 0;
+}
+
+static int
+fm10k_flow_parse_vlan_filter(struct rte_eth_dev *dev,
+			    const struct rte_flow_attr *attr,
+			    const struct rte_flow_item pattern[],
+			    const struct rte_flow_action actions[],
+			    struct rte_flow_error *error,
+			    union fm10k_filter_t *filter)
+{
+	int ret;
+	struct fm10k_vlan_filter_conf *vlan_filter =
+		&filter->vlan_filter;
+
+	ret = fm10k_flow_parse_vlan_pattern(dev, pattern,
+					   error, vlan_filter);
+	if (ret)
+		return ret;
+
+	ret = fm10k_flow_parse_vlan_action(dev, actions, error, vlan_filter);
+	if (ret)
+		return ret;
+
+	if (attr->ingress)
+		vlan_filter->is_ingress = 1;
+	else if (attr->egress)
+		vlan_filter->is_ingress = 0;
+	vlan_filter->ffu_prio = attr->priority;
+
+	ret = fm10k_flow_parse_attr(attr, error);
+	return ret;
+}
+
+/*
+ *
+ */
+static int
+fm10k_flow_validate(struct rte_eth_dev *dev,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item pattern[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error)
+{
+	struct rte_flow_item *items; /* internal pattern w/o VOID items */
+	parse_filter_t parse_filter;
+	uint32_t item_num = 0; /* non-void item number of pattern*/
+	uint32_t i = 0;
+	bool flag = false;
+	int ret = -1;
+
+	if (!pattern) {
+		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_NUM,
+				   NULL, "NULL pattern.");
+		return -rte_errno;
+	}
+
+	if (!actions) {
+		rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ACTION_NUM,
+				   NULL, "NULL action.");
+		return -rte_errno;
+	}
+
+	if (!attr) {
+		rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ATTR,
+				   NULL, "NULL attribute.");
+		return -rte_errno;
+	}
+
+	/* Get the non-void item number of pattern */
+	while ((pattern + i)->type != RTE_FLOW_ITEM_TYPE_END) {
+		if ((pattern + i)->type != RTE_FLOW_ITEM_TYPE_VOID)
+			item_num++;
+		i++;
+	}
+	item_num++;
+
+	items = rte_zmalloc("fm10k_pattern",
+			    item_num * sizeof(struct rte_flow_item), 0);
+	if (!items) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_ITEM_NUM,
+				   NULL, "No memory for PMD internal items.");
+		return -ENOMEM;
+	}
+
+	fm10k_pattern_skip_void_item(items, pattern);
+
+	i = 0;
+	do {
+		parse_filter = fm10k_find_parse_filter_func(items, &i);
+		if (!parse_filter && !flag) {
+			rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   pattern, "Unsupported pattern");
+			rte_free(items);
+			return -rte_errno;
+		}
+		if (parse_filter)
+			ret = parse_filter(dev, attr, items, actions,
+					   error, &cons_filter);
+		flag = true;
+	} while ((ret < 0) && (i < RTE_DIM(fm10k_supported_patterns)));
+
+	rte_free(items);
+
+	return ret;
+}
+
+static struct fm10k_cfg_flow *
+fm10k_flow_cfg_transfer(struct rte_eth_dev *dev,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item pattern[],
+		 const struct rte_flow_action actions[],
+		 struct rte_flow_error *error)
+{
+	int i;
+	u8 port_id;
+	int set_port_num = 0, set_vlan_num = 0;
+	u16 fw_port_id = 0, bp_port_id = 0;
+	u16 filter_vlan_id = 0, fw_vlan_id = 0, bp_vlan_id = 0;
+	struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct fm10k_cfg_flow *cf;
+
+	cf = rte_zmalloc("fm10k_rule", sizeof(struct fm10k_cfg_flow), 0);
+	if (!cf) {
+		rte_flow_error_set(error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+				   "Failed to allocate memory");
+		return NULL;
+	}
+	memset(cf, 0, sizeof(struct fm10k_cfg_flow));
+
+	port_id = fm10k_switch_dpdk_port_no_get(hw);
+	for (i = 0; i < 4; i++)	{
+		if (pattern[i].type == RTE_FLOW_ITEM_TYPE_VLAN) {
+			filter_vlan_id =
+				rte_be_to_cpu_16
+				(((const struct rte_flow_item_vlan *)
+				pattern[i].spec)->tci);
+		} else if (pattern[i].type == RTE_FLOW_ITEM_TYPE_PHY_PORT) {
+			if (set_port_num)
+				bp_port_id =
+					((const struct rte_flow_item_phy_port *)
+					pattern[i].spec)->index;
+			else
+				fw_port_id =
+					((const struct rte_flow_item_phy_port *)
+					pattern[i].spec)->index;
+			set_port_num++;
+		} else if (pattern[i].type == RTE_FLOW_ITEM_TYPE_END) {
+			break;
+		}
+	}
+
+	for (i = 0; i < 3; i++)	{
+		if (actions[i].type == RTE_FLOW_ACTION_TYPE_MARK) {
+			if (set_vlan_num)
+				bp_vlan_id =
+					((const struct rte_flow_action_mark *)
+					actions[i].conf)->id;
+			else
+				fw_vlan_id =
+					((const struct rte_flow_action_mark *)
+					actions[i].conf)->id;
+			set_vlan_num++;
+		} else if (actions[i].type == RTE_FLOW_ACTION_TYPE_END) {
+			break;
+		}
+	}
+
+	if (attr->ingress && !attr->egress)	{
+		/* this port is DPDK port and it is destination port */
+		cf->src_port.port_type = FM10K_CONFIG_FLOW_EXT_PORT;
+		cf->src_port.port_no = fw_port_id;
+		cf->src_port.vlan_id = filter_vlan_id;
+		cf->fw_port[0].port_type = FM10K_CONFIG_FLOW_DPDK_PORT;
+		cf->fw_port[0].port_no = port_id;
+		cf->fw_port[0].vlan_id = fw_vlan_id;
+	} else if (!attr->ingress && attr->egress) {
+		/* this port is DPDK port and it is source port */
+		cf->src_port.port_type = FM10K_CONFIG_FLOW_DPDK_PORT;
+		cf->src_port.port_no = port_id;
+		cf->src_port.vlan_id = filter_vlan_id;
+		cf->fw_port[0].port_type = FM10K_CONFIG_FLOW_EXT_PORT;
+		cf->fw_port[0].port_no = fw_port_id;
+		cf->fw_port[0].vlan_id = fw_vlan_id;
+	} else if (!attr->ingress && !attr->egress) {
+		/* two ports are external port */
+		cf->src_port.port_type = FM10K_CONFIG_FLOW_EXT_PORT;
+		cf->src_port.port_no = port_id;
+		cf->src_port.vlan_id = filter_vlan_id;
+		cf->fw_port[0].port_type = FM10K_CONFIG_FLOW_EXT_PORT;
+		cf->fw_port[0].port_no = fw_port_id;
+		cf->fw_port[0].vlan_id = fw_vlan_id;
+	} else {
+		/* two ports are DPDK port */
+		cf->src_port.port_type = FM10K_CONFIG_FLOW_DPDK_PORT;
+		cf->src_port.port_no = port_id;
+		cf->src_port.vlan_id = filter_vlan_id;
+		cf->fw_port[0].port_type = FM10K_CONFIG_FLOW_DPDK_PORT;
+		cf->fw_port[0].port_no = fw_port_id;
+		cf->fw_port[0].vlan_id = fw_vlan_id;
+	}
+
+	if (set_port_num == 2 && set_vlan_num == 2)	{
+		cf->fw_port[1].port_type = FM10K_CONFIG_FLOW_EXT_PORT;
+		cf->fw_port[1].port_no = bp_port_id;
+		cf->fw_port[1].vlan_id = bp_vlan_id;
+	}
+
+	return cf;
+}
+
+
+static struct rte_flow *
+fm10k_flow_create(struct rte_eth_dev *dev,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item pattern[],
+		 const struct rte_flow_action actions[],
+		 struct rte_flow_error *error)
+{
+	struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct rte_flow *flow;
+	struct fm10k_switch *sw = fm10k_switch_get();
+	struct fm10k_cfg_flow *cf;
+	int ret;
+
+	flow = rte_zmalloc("fm10k_flow", sizeof(struct rte_flow), 0);
+	if (!flow) {
+		rte_flow_error_set(error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+				   "Failed to allocate memory");
+		return flow;
+	}
+
+	ret = fm10k_flow_validate(dev, attr, pattern, actions, error);
+	if (ret < 0)
+		return NULL;
+
+	cf = fm10k_flow_cfg_transfer(dev, attr, pattern, actions, error);
+	if (!cf)
+		goto free_flow;
+
+	flow->rule = cf;
+	fm10k_ffu_flow_enable(sw, cf);
+	fm10k_config_flow_list_add_tail(fm10k_config_flowset_current_get(), cf);
+
+	TAILQ_INSERT_TAIL((struct fm10k_flow_list *)
+			fm10k_switch_dpdk_port_flow_list_get(hw), flow, node);
+	return flow;
+
+free_flow:
+	rte_flow_error_set(error, -ret,
+			   RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+			   "Failed to create flow.");
+	rte_free(flow);
+	return NULL;
+}
+
+static int
+fm10k_flow_destroy(struct rte_eth_dev *dev,
+		  struct rte_flow *flow,
+		  struct rte_flow_error *error)
+{
+	struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	int ret = 0;
+	struct fm10k_switch *sw = fm10k_switch_get();
+
+	if (flow->rule)	{
+		fm10k_config_flow_list_delete
+			((struct fm10k_cfg_flow *)flow->rule);
+		fm10k_ffu_flow_disable(sw,
+			(struct fm10k_cfg_flow *)flow->rule);
+	}
+
+	if (!ret) {
+		TAILQ_REMOVE((struct fm10k_flow_list *)
+				fm10k_switch_dpdk_port_flow_list_get(hw),
+				flow, node);
+		rte_free(flow->rule);
+		rte_free(flow);
+	} else {
+		rte_flow_error_set(error, -ret,
+				   RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+				   "Failed to destroy flow.");
+	}
+	return ret;
+}
+
+
+static int
+fm10k_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
+{
+	struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct rte_flow *flow;
+	void *temp;
+	int ret = 0;
+	struct fm10k_cfg_flow *cf;
+	struct fm10k_switch *sw = fm10k_switch_get();
+
+	/* Delete flows in flow list. */
+	TAILQ_FOREACH_SAFE(flow,
+			(struct fm10k_flow_list *)
+			fm10k_switch_dpdk_port_flow_list_get(hw),
+			node, temp) {
+		cf = flow->rule;
+		if (cf) {
+			fm10k_config_flow_list_delete(cf);
+			fm10k_ffu_flow_disable(sw, cf);
+		} else {
+			rte_flow_error_set(error, -ret,
+					   RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+					   "No such rule in flow.");
+		}
+		TAILQ_REMOVE((struct fm10k_flow_list *)
+				fm10k_switch_dpdk_port_flow_list_get(hw),
+				flow, node);
+		rte_free(flow);
+	}
+
+	return ret;
+}
+
+void
+fm10k_flow_list_init(void *flow_list)
+{
+	TAILQ_INIT((struct fm10k_flow_list *)flow_list);
+}
+
+/* Flow operations */
+const struct rte_flow_ops *
+fm10k_flow_ops_get(void)
+{
+	return &fm10k_flow_ops;
+}
+
+
diff --git a/drivers/net/fm10k/switch/fm10k_flow.h b/drivers/net/fm10k/switch/fm10k_flow.h
new file mode 100644
index 0000000..c538544
--- /dev/null
+++ b/drivers/net/fm10k/switch/fm10k_flow.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019   Silicom Ltd. Connectivity Solutions
+ */
+
+#ifndef _FM10K_SW_FLOW_H_
+#define _FM10K_SW_FLOW_H_
+
+#include <rte_time.h>
+#include <rte_kvargs.h>
+#include <rte_hash.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_tm_driver.h>
+
+/*
+ * Struct to store flow created.
+ */
+struct rte_flow {
+	TAILQ_ENTRY(rte_flow) node;
+	enum rte_filter_type filter_type;
+	void *rule;
+};
+
+TAILQ_HEAD(fm10k_flow_list, rte_flow);
+
+#endif /* _FM10K_SW_FLOW_H_ */
diff --git a/drivers/net/fm10k/switch/fm10k_switch.c b/drivers/net/fm10k/switch/fm10k_switch.c
new file mode 100644
index 0000000..c3887d0
--- /dev/null
+++ b/drivers/net/fm10k/switch/fm10k_switch.c
@@ -0,0 +1,2562 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019   Silicom Ltd. Connectivity Solutions
+ */
+
+#include <rte_ethdev.h>
+#include <rte_ethdev_pci.h>
+
+#include <rte_malloc.h>
+
+#include "../base/fm10k_type.h"
+#include "../base/fm10k_osdep.h"
+
+#include "../fm10k.h"
+#include "../fm10k_logs.h"
+#include "fm10k_debug.h"
+#include "fm10k_regs.h"
+#include "fm10k_switch.h"
+#include "fm10k_ext_port.h"
+#include "fm10k_serdes.h"
+#include "fm10k_i2c.h"
+#include "fm10k_ffu.h"
+#include "fm10k_stats.h"
+#include "fm10k_config.h"
+
+static struct fm10k_device_info fm10k_device_table[] = {
+	{   FM10K_SW_VENDOR_ID_SILICOM,	FM10K_SW_DEV_ID_PE3100G2DQIR_QXSL4,
+		"Silicom PE3100G2DQiR-QX4/QS4/QL4",		2, 100, 2, 2 },
+	{   FM10K_SW_VENDOR_ID_SILICOM,	FM10K_SW_DEV_ID_PE3100G2DQIRL_QXSL4,
+		"Silicom PE3100G2DQiRL-QX4/QS4/QL4",	2, 100, 2, 2 },
+	{   FM10K_SW_VENDOR_ID_SILICOM,	FM10K_SW_DEV_ID_PE3100G2DQIRM_QXSL4,
+		"Silicom PE3100G2DQiRM-QX4/QS4/QL4",	2, 100, 2, 4 },
+};
+
+
+static struct fm10k_switch fm10k_sw;
+
+#define FM10K_AM_TIMEOUT			16384
+#define FM10K_COMP_PPM_SCALE		1000000
+
+#define FM10K_LED_POLL_INTERVAL_MS	500
+#define FM10K_LED_BLINKS_PER_SECOND	2
+
+/*
+ * GLORT MAP
+ */
+#define FM10K_SW_EPLA_GLORT			0x10
+#define FM10K_SW_EPLB_GLORT			0x20
+#define FM10K_SW_PEP01_GLORT		0x30
+#define FM10K_SW_PEP23_GLORT		0x40
+#define FM10K_SW_PEP45_GLORT		0x50
+#define FM10K_SW_PEP67_GLORT		0x60
+
+/*
+ * logical port number
+ */
+#define FM10K_SW_EPLA_LOGICAL_PORT		1
+#define FM10K_SW_EPLB_LOGICAL_PORT		2
+
+#define FM10K_SW_PEP01_LOGICAL_PORT		3
+#define FM10K_SW_PEP23_LOGICAL_PORT		4
+#define FM10K_SW_PEP45_LOGICAL_PORT		5
+#define FM10K_SW_PEP67_LOGICAL_PORT		6
+
+/*
+ * physical port number
+ */
+#define FM10K_SW_EPLA_PHYSICAL_PORT		0
+#define FM10K_SW_EPLB_PHYSICAL_PORT		4
+
+#define FM10K_SW_PEP01_PHYSICAL_PORT	36
+#define FM10K_SW_PEP23_PHYSICAL_PORT	40
+#define FM10K_SW_PEP45_PHYSICAL_PORT	44
+#define FM10K_SW_PEP67_PHYSICAL_PORT	48
+
+static struct fm10k_sw_port_map fm10k_pep_port_map[FM10K_SW_PEPS_SUPPORTED] = {
+	{
+		FM10K_SW_PEP01_GLORT,
+		FM10K_SW_PEP01_LOGICAL_PORT,
+		FM10K_SW_PEP01_PHYSICAL_PORT
+	},
+	{
+		FM10K_SW_PEP23_GLORT,
+		FM10K_SW_PEP23_LOGICAL_PORT,
+		FM10K_SW_PEP23_PHYSICAL_PORT
+	},
+	{
+		FM10K_SW_PEP45_GLORT,
+		FM10K_SW_PEP45_LOGICAL_PORT,
+		FM10K_SW_PEP45_PHYSICAL_PORT
+	},
+	{
+		FM10K_SW_PEP67_GLORT,
+		FM10K_SW_PEP67_LOGICAL_PORT,
+		FM10K_SW_PEP67_PHYSICAL_PORT
+	},
+};
+
+static struct fm10k_sw_port_map fm10k_epl_port_map[FM10K_SW_EPLS_SUPPORTED] = {
+	{
+		FM10K_SW_EPLA_GLORT,
+		FM10K_SW_EPLA_LOGICAL_PORT,
+		FM10K_SW_EPLA_PHYSICAL_PORT
+	},
+	{
+		FM10K_SW_EPLB_GLORT,
+		FM10K_SW_EPLB_LOGICAL_PORT,
+		FM10K_SW_EPLB_PHYSICAL_PORT
+	},
+};
+
+/*
+ * use epl as external port map, only support QUAD_ON
+ */
+struct fm10k_sched_prog {
+	uint8_t idle;
+	uint8_t phys;	/* physical port */
+	uint8_t log;	/* logical port */
+	uint8_t quad;	/* now, only support QUAD_ON */
+/* convert entry to idle if EPL in quad port mode */
+#define FM10K_SW_QUAD_OFF		0
+/* always use quad port mode */
+#define FM10K_SW_QUAD_ON		1
+/* use quad port mode if link speed is 40/100G */
+#define FM10K_SW_QUAD_40_100	2
+};
+
+static struct fm10k_sched_prog
+	fm10k_sched_prog[FM10K_SW_PEPS_SUPPORTED + FM10K_SW_EPLS_SUPPORTED + 1];
+
+uint32_t
+fm10k_switch_pf_logical_get(uint8_t pf_no)
+{
+	return fm10k_pep_port_map[pf_no].logical_port;
+}
+
+uint32_t
+fm10k_switch_epl_logical_get(uint8_t epl_no)
+{
+	return fm10k_epl_port_map[epl_no].logical_port;
+}
+
+uint32_t
+fm10k_switch_vf_glort_get(uint8_t vf_no)
+{
+	return FM10K_SW_VF_GLORT_START + vf_no;
+}
+
+uint32_t
+fm10k_switch_pf_glort_get(uint8_t pf_no)
+{
+	return fm10k_pep_port_map[pf_no].glort;
+}
+
+uint32_t
+fm10k_switch_epl_glort_get(uint8_t epl_no)
+{
+	return fm10k_epl_port_map[epl_no].glort;
+}
+
+uint32_t
+fm10k_switch_pfs_glort_get(uint8_t pf1, uint8_t pf2)
+{
+	uint8_t idx;
+	if (pf1 > pf2)
+		idx = (pf2 & 0xf) | (pf1 << 4 & 0xf0);
+	else
+		idx = (pf1 & 0xf) | (pf2 << 4 & 0xf0);
+	return FM10K_SW_PFS_GLORT_START + idx;
+}
+
+struct fm10k_multi_glort {
+	uint8_t lport1;
+	uint8_t lport2;
+	uint16_t vlan1;
+	uint16_t vlan2;
+} fm10k_multi_glorts[FM10K_SW_FFU_RULE_MAX];
+
+uint32_t
+fm10k_switch_multi_glort_get(uint8_t lport1, uint8_t lport2,
+		uint16_t vlan1, uint16_t vlan2, bool *p_new)
+{
+	int i;
+
+	for (i = 0; i < FM10K_SW_FFU_RULE_MAX; i++) {
+		if (lport1 == fm10k_multi_glorts[i].lport1 &&
+		   lport2 == fm10k_multi_glorts[i].lport2 &&
+		   vlan1 == fm10k_multi_glorts[i].vlan1 &&
+		   vlan2 == fm10k_multi_glorts[i].vlan2) {
+			if (p_new != NULL)
+				*p_new = false;
+			return FM10K_SW_MULTI_GLORT_START + i;
+		}
+	}
+
+	for (i = 0; i < FM10K_SW_FFU_RULE_MAX; i++) {
+		if (fm10k_multi_glorts[i].lport1 == 0 &&
+		   fm10k_multi_glorts[i].lport2 == 0 &&
+		   fm10k_multi_glorts[i].vlan1 == 0 &&
+		   fm10k_multi_glorts[i].vlan2 == 0) {
+			fm10k_multi_glorts[i].lport1 = lport1;
+			fm10k_multi_glorts[i].lport2 = lport2;
+			fm10k_multi_glorts[i].vlan1 = vlan1;
+			fm10k_multi_glorts[i].vlan2 = vlan2;
+			if (p_new != NULL)
+				*p_new = true;
+			return FM10K_SW_MULTI_GLORT_START + i;
+		}
+	}
+
+	return 0;
+}
+
+
+/*
+ * Note that for physical port numbers, the initial values used for
+ * the EPL entries assume EPL[A] is EPL[0] and EPL[B] is EPL[1].
+ * fm10k_switch_determine_epls() will update these physical port
+ * numbers based on the actual A and B indices.
+ */
+static void
+fm10k_switch_set_sched_prog(void)
+{
+	int i;
+	int start = 0;
+
+	for (i = 0; i < FM10K_SW_EPLS_SUPPORTED; i++) {
+		fm10k_sched_prog[i].idle = 0;
+		fm10k_sched_prog[i].phys = fm10k_epl_port_map[i].physical_port;
+		fm10k_sched_prog[i].log = fm10k_epl_port_map[i].logical_port;
+		fm10k_sched_prog[i].quad = FM10K_SW_QUAD_ON;
+	}
+	start += FM10K_SW_EPLS_SUPPORTED;
+	for (i = 0; i < FM10K_SW_PEPS_SUPPORTED; i++) {
+		fm10k_sched_prog[start + i].idle = 0;
+		fm10k_sched_prog[start + i].phys =
+				fm10k_pep_port_map[i].physical_port;
+		fm10k_sched_prog[start + i].log =
+				fm10k_pep_port_map[i].logical_port;
+		fm10k_sched_prog[start + i].quad = FM10K_SW_QUAD_40_100;
+	}
+	start += FM10K_SW_PEPS_SUPPORTED;
+	fm10k_sched_prog[start].idle = 1;
+}
+
+struct fm10k_device_info*
+fm10k_get_device_info(struct fm10k_hw *hw)
+{
+	unsigned int i;
+	struct fm10k_device_info *info;
+	uint16_t pci_vendor = hw->vendor_id;
+	uint16_t pci_device = hw->device_id;
+	uint16_t pci_subvendor = hw->subsystem_vendor_id;
+	uint16_t pci_subdevice = hw->subsystem_device_id;
+
+	if (pci_vendor != FM10K_SW_VENDOR_ID_INTEL ||
+	    pci_device != FM10K_SW_DEV_ID_FM10K)
+		return (NULL);
+
+	for (i = 0;
+			i < sizeof(fm10k_device_table) /
+				sizeof(fm10k_device_table[0]);
+			i++) {
+		info = &fm10k_device_table[i];
+		if (pci_subvendor == info->subvendor &&
+		    pci_subdevice == info->subdevice) {
+			return info;
+		}
+	}
+
+	return NULL;
+}
+
+
+static void
+fm10k_switch_determine_epls(struct fm10k_switch *sw)
+{
+	struct fm10k_device_info *cfg = sw->info;
+	unsigned int i;
+	uint8_t phys;
+
+	sw->epla_no = 0;
+	sw->eplb_no = 6;
+
+	switch (FM10K_SW_CARD_ID(cfg->subvendor, cfg->subdevice)) {
+	case FM10K_SW_CARD(SILICOM, PE3100G2DQIR_QXSL4):
+	case FM10K_SW_CARD(SILICOM, PE3100G2DQIR_QXSL4_REV_2):
+		sw->epla_no = 1;
+		sw->eplb_no = 6;
+		break;
+	case FM10K_SW_CARD(SILICOM, PE3100G2DQIRL_QXSL4):
+	case FM10K_SW_CARD(SILICOM, PE3100G2DQIRM_QXSL4):
+	case FM10K_SW_CARD(SILICOM, PE325G2DSIR):
+		sw->epla_no = 1;
+		sw->eplb_no = 7;
+		break;
+	}
+
+	for (i = 0;
+			i < sizeof(fm10k_sched_prog) /
+				sizeof(fm10k_sched_prog[0]);
+			i++) {
+		if (fm10k_sched_prog[i].idle)
+			continue;
+
+		phys = fm10k_sched_prog[i].phys;
+		if (phys <= 3) {
+			/* Substitute actual epla phys port number */
+			fm10k_sched_prog[i].phys = sw->epla_no * 4 + phys;
+		} else if (phys >= 4 && phys <= 7) {
+			/* Substitute actual eplb phys port number */
+			fm10k_sched_prog[i].phys = sw->eplb_no * 4 + (phys - 4);
+		}
+	}
+}
+
+static void
+fm10k_hw_eicr_disable_source(struct fm10k_switch *sw, unsigned int source)
+{
+	unsigned int shift;
+
+	switch (source) {
+	case FM10K_SW_EICR_PCA_FAULT:
+		shift = FM10K_SW_EICR_PCA_FAULT_SHIFT;
+		break;
+	case FM10K_SW_EICR_THI_FAULT:
+		shift = FM10K_SW_EICR_THI_FAULT_SHIFT;
+		break;
+	case FM10K_SW_EICR_FUM_FAULT:
+		shift = FM10K_SW_EICR_FUM_FAULT_SHIFT;
+		break;
+	case FM10K_SW_EICR_MAILBOX:
+		shift = FM10K_SW_EICR_MAILBOX_SHIFT;
+		break;
+	case FM10K_SW_EICR_SWITCH_READY:
+		shift = FM10K_SW_EICR_SWITCH_READY_SHIFT;
+		break;
+	case FM10K_SW_EICR_SWITCH_NREADY:
+		shift = FM10K_SW_EICR_SWITCH_NREADY_SHIFT;
+		break;
+	case FM10K_SW_EICR_SWITCH_INT:
+		shift = FM10K_SW_EICR_SWITCH_INT_SHIFT;
+		break;
+	case FM10K_SW_EICR_SRAM_ERROR:
+		shift = FM10K_SW_EICR_SRAM_ERROR_SHIFT;
+		break;
+	case FM10K_SW_EICR_VFLR:
+		shift = FM10K_SW_EICR_VFLR_SHIFT;
+		break;
+	case FM10K_SW_EICR_MAX_HOLD_TIME:
+		shift = FM10K_SW_EICR_MAX_HOLD_TIME_SHIFT;
+		break;
+	default:
+		return;
+	}
+
+	fm10k_write_switch_reg(sw,
+			FM10K_SW_EIMR, FM10K_SW_EIMR_DISABLE << (shift * 2));
+	fm10k_write_flush(sw);
+}
+
+
+unsigned int
+fm10k_switch_eplidx_to_eplno(struct fm10k_switch *sw, unsigned int eplidx)
+{
+	return eplidx ? sw->eplb_no : sw->epla_no;
+}
+
+
+static uint16_t
+fm10k_switch_ppm_to_tx_clk_compensation_timeout(uint32_t pcs, uint32_t num_ppm)
+{
+	unsigned int scale;
+	unsigned int ppm;
+	unsigned int timeout;
+	unsigned int am_ppm;
+
+	if (num_ppm == 0 || pcs == FM10K_SW_EPL_PCS_SEL_DISABLE) {
+		if (pcs == FM10K_SW_EPL_PCS_SEL_40GBASER ||
+		    pcs == FM10K_SW_EPL_PCS_SEL_100GBASER)
+			return (FM10K_AM_TIMEOUT / 2);
+		else
+			return (0);
+	}
+
+	if (pcs == FM10K_SW_EPL_PCS_SEL_40GBASER ||
+	    pcs == FM10K_SW_EPL_PCS_SEL_100GBASER) {
+		am_ppm = 1000000 / FM10K_AM_TIMEOUT;
+		scale = FM10K_COMP_PPM_SCALE / 2;
+	} else {
+		am_ppm = 0;
+		scale = FM10K_COMP_PPM_SCALE;
+	}
+
+	ppm = num_ppm + am_ppm;
+	timeout = scale / ppm;
+
+	if (timeout >= 0xffff)
+		return 0xffff;
+	else
+		return timeout;
+}
+
+
+static int
+fm10k_switch_configure_epls(struct fm10k_hw *hw, struct fm10k_switch *sw)
+{
+	struct fm10k_ext_ports *ext_ports = sw->ext_ports;
+	struct fm10k_ext_port *port;
+	struct fm10k_device_info *cfg = sw->info;
+	u32 mac_cfg[FM10K_SW_MAC_CFG_ARRAY_SIZE];
+	u32 data, pcs, qpl;
+	u32 pcstmp;
+	unsigned int i, j;
+	unsigned int dic_enable;
+	unsigned int anti_bubble_wm;
+	unsigned int rate_fifo_wm;
+	unsigned int rate_fifo_slow_inc, rate_fifo_fast_inc;
+	int error;
+	u16 timeout;
+
+	cfg = fm10k_get_device_info(hw);
+	if (cfg == NULL)
+		return -1;
+
+	/*
+	 * Assumptions:
+	 *   - All external interfaces are the same speed
+	 *   - 1G/10G ports are packed into the minimum number of EPLs
+	 *   - quad-mode EPLs use lane 0 as master
+	 *   - The lowest numbered PEPs are used
+	 *   - PEPs are always used in x8 mode.
+	 */
+	switch (cfg->ext_port_speed) {
+	case 10:
+		pcs = FM10K_SW_EPL_PCS_SEL_10GBASER;
+		qpl = FM10K_SW_EPL_QPL_MODE_L1_L1_L1_L1;
+		dic_enable = 1;
+		anti_bubble_wm = 5;
+		rate_fifo_wm = 3;
+		rate_fifo_slow_inc = 12;
+		rate_fifo_fast_inc = 13;
+		break;
+		/* XXX	what is the physical config for 25G? */
+	case 25:
+		pcs = FM10K_SW_EPL_PCS_SEL_100GBASER;
+		qpl = FM10K_SW_EPL_QPL_MODE_L1_L1_L1_L1;
+		dic_enable = 0;
+		anti_bubble_wm = 6;
+		rate_fifo_wm = 3;
+		rate_fifo_slow_inc = 32;
+		rate_fifo_fast_inc = 33;
+		break;
+	case 40:
+		pcs = FM10K_SW_EPL_PCS_SEL_40GBASER;
+		qpl = FM10K_SW_EPL_QPL_MODE_L4_XX_XX_XX;
+		dic_enable = 1;
+		anti_bubble_wm = 4;
+		rate_fifo_wm = 5;
+		rate_fifo_slow_inc = 51;
+		rate_fifo_fast_inc = 52;
+		break;
+	case 100:
+		pcs = FM10K_SW_EPL_PCS_SEL_100GBASER;
+		qpl = FM10K_SW_EPL_QPL_MODE_L4_XX_XX_XX;
+		dic_enable = 1;
+		anti_bubble_wm = 4;
+		rate_fifo_wm = 3;
+		rate_fifo_slow_inc = 129;
+		rate_fifo_fast_inc = 130;
+		break;
+	default:
+		error = -1;
+		goto done;
+	}
+
+	/*
+	 * EPL_CFG_A
+	 *
+	 * Mark all used lanes as active and all unused lanes as
+	 * inactive. Adjust timeout and skew tolerance values for EPLs that
+	 * are used.
+	 */
+	for (i = 0; i < FM10K_SW_EPLS_MAX; i++) {
+		data = fm10k_read_switch_reg(sw, FM10K_SW_EPL_CFG_A(i));
+		data &= ~FM10K_SW_EPL_CFG_A_ACTIVE_QUAD;
+		if (FM10K_SW_EXT_PORTS_EPL_USED(ext_ports, i)) {
+			for (j = 0; j < ext_ports->num_ports; j++) {
+				port = &ext_ports->ports[j];
+				if (port->eplno == i)
+					data |= port->is_quad ?
+					    FM10K_SW_EPL_CFG_A_ACTIVE_QUAD :
+					    FM10K_SW_EPL_CFG_A_ACTIVE
+						(port->first_lane);
+			}
+			FM10K_SW_REPLACE_REG_FIELD(data,
+					EPL_CFG_A_TIMEOUT, 19, data);
+			FM10K_SW_REPLACE_REG_FIELD(data,
+					EPL_CFG_A_SKEW_TOLERANCE, 38, data);
+		}
+		fm10k_write_switch_reg(sw, FM10K_SW_EPL_CFG_A(i), data);
+	}
+
+	/*
+	 * EPL_CFG_B
+	 *
+	 * Disable all unused lanes and configure all used ones
+	 * appropriately.  For EPLs used in quad port mode, the master lane
+	 * is the only one that gets configured.
+	 */
+	data = 0;
+	for (i = 0; i < FM10K_SW_EPLS_MAX; i++) {
+		if (FM10K_SW_EXT_PORTS_EPL_USED(ext_ports, i)) {
+			for (j = 0; j < FM10K_SW_EPL_LANES; j++) {
+				if (j < ext_ports->ports_per_epl)
+					pcstmp = pcs;
+				else
+					pcstmp = FM10K_SW_EPL_PCS_SEL_DISABLE;
+				data |=
+				    FM10K_SW_MAKE_REG_FIELD_IDX
+					(EPL_CFG_B_PCS_SEL, j, pcstmp, j, j);
+			}
+			data |=
+			    FM10K_SW_MAKE_REG_FIELD
+				(EPL_CFG_B_QPL_MODE, qpl);
+		} else {
+			for (j = 0; j < FM10K_SW_EPL_LANES; j++)
+				data |=
+				    FM10K_SW_MAKE_REG_FIELD_IDX
+					(EPL_CFG_B_PCS_SEL,	j,
+					FM10K_SW_EPL_PCS_SEL_DISABLE, j, j);
+			data |=
+			    FM10K_SW_MAKE_REG_FIELD
+				(EPL_CFG_B_QPL_MODE,
+				FM10K_SW_EPL_QPL_MODE_XX_XX_XX_XX);
+		}
+		fm10k_write_switch_reg(sw, FM10K_SW_EPL_CFG_B(i), data);
+	}
+
+	/*
+	 * MAC_CFG, LINK_RULES and PCS_ML_BASER_CFG
+	 *
+	 * Only EPLs/lanes that are being used are initialized. All others
+	 * are left at their defaults.
+	 */
+	for (i = 0; i < FM10K_SW_EPLS_MAX; i++) {
+		if (!FM10K_SW_EXT_PORTS_EPL_USED(ext_ports, i))
+			continue;
+		for (j = 0; j < ext_ports->ports_per_epl; j++) {
+			fm10k_read_switch_array(sw, FM10K_SW_MAC_CFG(i, j),
+			    mac_cfg, FM10K_SW_MAC_CFG_ARRAY_SIZE);
+
+			/* dic enable */
+			FM10K_SW_SET_ARRAY_BIT(mac_cfg,
+			    MAC_CFG_TX_IDLE_ENABLE_DIC,
+			    dic_enable, mac_cfg);
+
+			/* ifg */
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_TX_IDLE_MIN_IFG_BYTES,
+			    12, mac_cfg);
+
+			/* tx pad size: (64 bytes + 8 preamble) / 4 */
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_TX_MIN_COLUMNS,
+			    18, mac_cfg);
+
+			/* tx clock compensation */
+			timeout =
+			    fm10k_switch_ppm_to_tx_clk_compensation_timeout(pcs,
+				100);
+			FM10K_SW_SET_ARRAY_BIT(mac_cfg,
+			    MAC_CFG_TX_CLOCK_COMPENSATION_ENABLE,
+			    (timeout > 0), mac_cfg);
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_TX_CLOCK_COMPENSATION_TIMEOUT,
+			    timeout, mac_cfg);
+
+			FM10K_SW_SET_ARRAY_BIT(mac_cfg,
+			    MAC_CFG_PREAMBLE_MODE,
+			    0, mac_cfg);
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_RX_MIN_FRAME_LENGTH,
+			    64, mac_cfg);
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_RX_MAX_FRAME_LENGTH,
+			    FM10K_SW_PACKET_SIZE_MAX, mac_cfg);
+			FM10K_SW_SET_ARRAY_BIT(mac_cfg,
+			    MAC_CFG_RX_IGNORE_IFG_ERRORS,
+			    0, mac_cfg);
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_TX_FCS_MODE,
+			    FM10K_SW_TX_MAX_FCS_MODE_REPLACE_NORMAL, mac_cfg);
+
+			FM10K_SW_SET_ARRAY_BIT(mac_cfg,
+			    MAC_CFG_IEEE_1588_ENABLE,
+			    0, mac_cfg);
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_TX_DRAIN_MODE,
+			    FM10K_SW_TX_MAC_DRAIN_MODE_DRAIN_NORMAL, mac_cfg);
+
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_TX_PC_ACT_TIMEOUT,
+			    100, mac_cfg);
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_TX_PC_ACT_TIME_SCALE,
+			    3, mac_cfg);
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_TX_LPI_TIMEOUT,
+			    180, mac_cfg);
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_TX_LPI_TIME_SCALE,
+			    1, mac_cfg);
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_TX_LPI_HOLD_TIMEOUT,
+			    20, mac_cfg);
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_TX_LPI_HOLD_TIME_SCALE,
+			    1, mac_cfg);
+
+			FM10K_SW_SET_ARRAY_BIT(mac_cfg,
+			    MAC_CFG_TX_LPI_AUTOMATIC,
+			    1, mac_cfg);
+			FM10K_SW_SET_ARRAY_BIT(mac_cfg,
+			    MAC_CFG_TX_LP_IDLE_REQUEST,
+			    0, mac_cfg);
+
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_TX_FAULT_MODE,
+			    FM10K_SW_TX_MAC_FAULT_MODE_NORMAL, mac_cfg);
+
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_TX_ANTI_BUBBLE_WATERMARK,
+			    anti_bubble_wm, mac_cfg);
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_TX_RATE_FIFO_WATERMARK,
+			    rate_fifo_wm, mac_cfg);
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_TX_RATE_FIFO_FAST_INC,
+			    rate_fifo_fast_inc, mac_cfg);
+			FM10K_SW_REPLACE_ARRAY_FIELD(mac_cfg,
+			    MAC_CFG_TX_RATE_FIFO_SLOW_INC,
+			    rate_fifo_slow_inc, mac_cfg);
+
+			fm10k_write_switch_array(sw, FM10K_SW_MAC_CFG(i, j),
+			    mac_cfg, FM10K_SW_MAC_CFG_ARRAY_SIZE);
+
+			data =
+				fm10k_read_switch_reg(sw,
+					FM10K_SW_LINK_RULES(i, j));
+
+			/* link-up debounce params */
+			FM10K_SW_REPLACE_REG_FIELD(data,
+			    LINK_RULES_FAULT_TIME_SCALE_UP,
+			    4, data);
+			FM10K_SW_REPLACE_REG_FIELD(data,
+			    LINK_RULES_FAULT_TICKS_UP,
+			    30, data);
+
+			/* link-down debounce params */
+			FM10K_SW_REPLACE_REG_FIELD(data,
+			    LINK_RULES_FAULT_TIME_SCALE_DOWN,
+			    4, data);
+			FM10K_SW_REPLACE_REG_FIELD(data,
+			    LINK_RULES_FAULT_TICKS_DOWN,
+			    5, data);
+
+			FM10K_SW_REPLACE_REG_FIELD(data,
+			    LINK_RULES_HEARTBEAT_TIME_SCALE,
+			    cfg->ext_port_speed == 10 ? 4 : 0, data);
+			fm10k_write_switch_reg(sw,
+				FM10K_SW_LINK_RULES(i, j), data);
+
+			/* XXX add 10GBASER config */
+		}
+
+		if (cfg->ext_port_speed != 10)
+			fm10k_write_switch_reg(sw,
+				FM10K_SW_PCS_ML_BASER_CFG(i), 0x00003fff);
+	}
+
+
+	/*
+	 * LANE_CFG, LANE_SERDES_CFG, LANE_ENERGY_DETECT_CFG,
+	 * LANE_SIGNAL_DETECT_CFG, and EPL_FIFO_ERROR_STATUS
+	 *
+	 * Only EPLs/lanes that are being used are initialized. All others
+	 * are left at their defaults.
+	 */
+	for (i = 0; i < FM10K_SW_EPLS_MAX; i++) {
+		if (!FM10K_SW_EXT_PORTS_EPL_USED(ext_ports, i))
+			continue;
+		for (j = 0; j < ext_ports->ports_per_epl; j++) {
+			fm10k_write_switch_reg(sw,
+				FM10K_SW_LANE_CFG(i, j), 0);
+			fm10k_write_switch_reg(sw,
+				FM10K_SW_LANE_SERDES_CFG(i, j), 0x1106);
+
+			data = fm10k_read_switch_reg(sw,
+			    FM10K_SW_LANE_ENERGY_DETECT_CFG(i, j));
+
+			data |=
+			FM10K_SW_LANE_ENERGY_DETECT_CFG_ED_MASK_RX_SIGNAL_OK |
+			FM10K_SW_LANE_ENERGY_DETECT_CFG_ED_MASK_RX_RDY |
+			FM10K_SW_LANE_ENERGY_DETECT_CFG_ED_MASK_RX_ACTIVITY;
+
+			data &=
+			~FM10K_SW_LANE_ENERGY_DETECT_CFG_ED_MASK_ENERGY_DETECT;
+			fm10k_write_switch_reg(sw,
+			    FM10K_SW_LANE_ENERGY_DETECT_CFG(i, j), data);
+
+			data = fm10k_read_switch_reg(sw,
+			    FM10K_SW_LANE_SIGNAL_DETECT_CFG(i, j));
+
+			data &=
+			~(FM10K_SW_LANE_SIGNAL_DETECT_CFG_SD_MASK_RX_SIGNAL_OK |
+			FM10K_SW_LANE_SIGNAL_DETECT_CFG_SD_MASK_RX_RDY);
+
+			data |=
+			FM10K_SW_LANE_SIGNAL_DETECT_CFG_SD_MASK_RX_ACTIVITY |
+			FM10K_SW_LANE_SIGNAL_DETECT_CFG_SD_MASK_ENERGY_DETECT;
+
+			fm10k_write_switch_reg(sw,
+			    FM10K_SW_LANE_SIGNAL_DETECT_CFG(i, j), data);
+
+			data = 0;
+			data |= ((1 << j) << 4) | (1 << j);
+			fm10k_write_switch_reg(sw,
+			    FM10K_SW_EPL_FIFO_ERROR_STATUS(i), data);
+		}
+	}
+
+	error = fm10k_epl_serdes_reset_and_load_all(sw);
+	if (error)
+		goto done;
+
+	/*
+	 * EPL_FIFO_ERROR_STATUS LINK_IP
+	 */
+	for (i = 0; i < FM10K_SW_EPLS_MAX; i++) {
+		if (!FM10K_SW_EXT_PORTS_EPL_USED(ext_ports, i))
+			continue;
+		for (j = 0; j < ext_ports->ports_per_epl; j++) {
+			fm10k_write_switch_reg(sw,
+			    FM10K_SW_LINK_IP(i, j), FM10K_SW_MASK32(31, 0));
+		}
+
+		data = 0;
+		data |= ((1 << j) << 4) | (1 << j);
+		fm10k_write_switch_reg(sw,
+		    FM10K_SW_EPL_FIFO_ERROR_STATUS(i), data);
+	}
+done:
+	return (error);
+}
+
+int fm10k_switch_dpdk_port_no_get(struct fm10k_hw *hw);
+
+int
+fm10k_switch_mirror_set(struct fm10k_hw *hw, u16 dest_port, u16 vlan)
+{
+	struct fm10k_switch *sw = &fm10k_sw;
+	int src_port = fm10k_switch_dpdk_port_no_get(hw);
+
+	/* source port is external port number */
+	if (src_port < 0 || src_port == dest_port)
+		return -1;
+
+	return fm10k_ffu_mirror_set(sw, src_port, dest_port, vlan);
+}
+
+int fm10k_switch_mirror_reset(struct fm10k_hw *hw)
+{
+	struct fm10k_switch *sw = &fm10k_sw;
+	int src_port = fm10k_switch_dpdk_port_no_get(hw);
+
+	if (src_port < 0)
+		return -1;
+
+	return fm10k_ffu_mirror_reset(sw, src_port);
+}
+
+
+typedef struct {
+	u8 lport;
+	u8 has_ftag;
+} fm10k_sw_lport;
+
+static int
+fm10k_switch_init(struct fm10k_hw *hw, struct fm10k_switch *sw)
+{
+	u32 data;
+	unsigned int num_lports = sw->info->num_peps + FM10K_SW_EPLS_SUPPORTED;
+	fm10k_sw_lport all_lports[num_lports];
+	struct fm10k_device_info *cfg = sw->info;
+	unsigned int i;
+	unsigned int is_quad;
+	unsigned int table_idx;
+	int error;
+	u32 watermark;
+	u64 data64, data64_2;
+
+	/*
+	 * Build list of all logical ports that might appear in the
+	 * scheduler program.  Note that for any particular card
+	 * configuration, not all of these logical ports may be used.
+	 */
+	table_idx = 0;
+	for (i = 0; i < sw->info->num_peps; i++, table_idx++) {
+		all_lports[table_idx].lport = sw->pep_map[i].logical_port;
+		all_lports[table_idx].has_ftag = 1;
+	}
+	for (i = 0; i < FM10K_SW_EPLS_SUPPORTED; i++, table_idx++) {
+		all_lports[table_idx].lport = sw->epl_map[i].logical_port;
+		all_lports[table_idx].has_ftag = 0;
+	}
+
+	if (table_idx != num_lports) {
+		FM10K_SW_ERR("fm10k switch lport table construction error");
+		return -1;
+	}
+
+	/*
+	 * Reset the switch to get to the default state
+	 */
+	data = fm10k_read_switch_reg(sw, FM10K_SW_SOFT_RESET);
+	data &= ~FM10K_SW_SOFT_RESET_SWITCH_READY;
+	fm10k_write_switch_reg(sw, FM10K_SW_SOFT_RESET, data);
+	fm10k_write_flush(sw);
+
+	usec_delay(100);
+	data = fm10k_read_switch_reg(sw, FM10K_SW_SOFT_RESET);
+	data |= FM10K_SW_SOFT_RESET_SWITCH_RESET |
+			FM10K_SW_SOFT_RESET_EPL_RESET;
+	fm10k_write_switch_reg(sw, FM10K_SW_SOFT_RESET, data);
+	fm10k_write_flush(sw);
+	usec_delay(1000);
+
+	/* Clear memories */
+	fm10k_write_switch_reg64(sw, FM10K_SW_BIST_CTRL,
+	    FM10K_SW_BIST_CTRL_BIST_MODE_FABRIC |
+	    FM10K_SW_BIST_CTRL_BIST_MODE_TUNNEL |
+	    FM10K_SW_BIST_CTRL_BIST_MODE_EPL);
+	fm10k_write_flush(sw);
+
+	fm10k_write_switch_reg64(sw, FM10K_SW_BIST_CTRL,
+	    FM10K_SW_BIST_CTRL_BIST_MODE_FABRIC |
+	    FM10K_SW_BIST_CTRL_BIST_MODE_TUNNEL |
+	    FM10K_SW_BIST_CTRL_BIST_MODE_EPL |
+	    FM10K_SW_BIST_CTRL_BIST_RUN_FABRIC |
+	    FM10K_SW_BIST_CTRL_BIST_RUN_TUNNEL |
+	    FM10K_SW_BIST_CTRL_BIST_RUN_EPL);
+	fm10k_write_flush(sw);
+	usec_delay(800);
+
+	fm10k_write_switch_reg64(sw, FM10K_SW_BIST_CTRL, 0);
+	fm10k_write_flush(sw);
+
+	data = fm10k_read_switch_reg(sw, FM10K_SW_SOFT_RESET);
+	data &= ~(FM10K_SW_SOFT_RESET_SWITCH_RESET |
+			FM10K_SW_SOFT_RESET_EPL_RESET);
+	fm10k_write_switch_reg(sw, FM10K_SW_SOFT_RESET, data);
+	fm10k_write_flush(sw);
+	/* ensure switch reset is deasserted for at least 100ns */
+	usec_delay(1);
+
+	sw->epl_sbus = fm10k_sbus_attach(sw, "EPL", FM10K_SW_SBUS_EPL_CFG);
+	if (sw->epl_sbus == NULL) {
+		error = -1;
+		goto done;
+	}
+
+	/* Clear non-BIST accessible pause state memories */
+	for (i = 0; i < FM10K_SW_LOGICAL_PORTS_MAX; i++) {
+		fm10k_write_switch_reg64(sw,
+		    FM10K_SW_CM_EGRESS_PAUSE_COUNT(i, 0), 0);
+		fm10k_write_switch_reg64(sw,
+		    FM10K_SW_CM_EGRESS_PAUSE_COUNT(i, 1), 0);
+	}
+
+	/* Initialize RXQ_MCAST list */
+	for (i = 0; i < FM10K_SW_SCHED_RXQ_STORAGE_POINTERS_ENTRIES; i++) {
+		fm10k_write_switch_reg64(sw,
+		    FM10K_SW_SCHED_RXQ_STORAGE_POINTERS(i),
+		    FM10K_SW_MAKE_REG_FIELD
+			(SCHED_RXQ_STORAGE_POINTERS_HEAD_PAGE, i) |
+		    FM10K_SW_MAKE_REG_FIELD
+			(SCHED_RXQ_STORAGE_POINTERS_TAIL_PAGE, i));
+	}
+	for (i = 0;
+	     i < FM10K_SW_SCHED_RXQ_FREELIST_INIT_ENTRIES -
+		 FM10K_SW_SCHED_RXQ_STORAGE_POINTERS_ENTRIES; i++) {
+		fm10k_write_switch_reg(sw,
+		    FM10K_SW_SCHED_RXQ_FREELIST_INIT,
+		    FM10K_SW_MAKE_REG_FIELD
+			(SCHED_RXQ_FREELIST_INIT_ADDRESS,
+			i + FM10K_SW_SCHED_RXQ_STORAGE_POINTERS_ENTRIES));
+	}
+	/* Initialize TXQ list */
+	for (i = 0; i < FM10K_SW_SCHED_TXQ_HEAD_PERQ_ENTRIES; i++) {
+		fm10k_write_switch_reg(sw,
+		    FM10K_SW_SCHED_TXQ_HEAD_PERQ(i),
+		    FM10K_SW_MAKE_REG_FIELD(SCHED_TXQ_HEAD_PERQ_HEAD, i));
+		fm10k_write_switch_reg(sw,
+		    FM10K_SW_SCHED_TXQ_TAIL0_PERQ(i),
+		    FM10K_SW_MAKE_REG_FIELD(SCHED_TXQ_TAIL0_PERQ_TAIL, i));
+		fm10k_write_switch_reg(sw,
+		    FM10K_SW_SCHED_TXQ_TAIL1_PERQ(i),
+		    FM10K_SW_MAKE_REG_FIELD(SCHED_TXQ_TAIL1_PERQ_TAIL, i));
+	}
+	for (i = 0;
+	     i < FM10K_SW_SCHED_TXQ_FREELIST_INIT_ENTRIES -
+		 FM10K_SW_SCHED_TXQ_HEAD_PERQ_ENTRIES; i++) {
+		fm10k_write_switch_reg(sw,
+		    FM10K_SW_SCHED_TXQ_FREELIST_INIT,
+		    FM10K_SW_MAKE_REG_FIELD
+			(SCHED_TXQ_FREELIST_INIT_ADDRESS,
+			i + FM10K_SW_SCHED_TXQ_HEAD_PERQ_ENTRIES));
+	}
+	/* Initialize free segment list */
+	for (i = 0; i < FM10K_SW_LOGICAL_PORTS_MAX; i++) {
+		fm10k_write_switch_reg(sw,
+		    FM10K_SW_SCHED_SSCHED_RX_PERPORT(i),
+		    FM10K_SW_MAKE_REG_FIELD(SCHED_SSCHED_RX_PERPORT_NEXT, i));
+	}
+	for (i = 0;
+	     i < FM10K_SW_SCHED_FREELIST_INIT_ENTRIES -
+		 FM10K_SW_LOGICAL_PORTS_MAX; i++) {
+		fm10k_write_switch_reg(sw,
+		    FM10K_SW_SCHED_FREELIST_INIT,
+		    FM10K_SW_MAKE_REG_FIELD
+			(SCHED_FREELIST_INIT_ADDRESS,
+			i + FM10K_SW_LOGICAL_PORTS_MAX));
+	}
+	/* Disable switch scan chain */
+	fm10k_write_switch_reg(sw,
+		FM10K_SW_SCAN_DATA_IN,
+	    FM10K_SW_SCAN_DATA_IN_UPDATE_NODES |
+		FM10K_SW_SCAN_DATA_IN_PASSTHRU);
+
+	error = fm10k_switch_configure_epls(hw, sw);
+	if (error)
+		goto done;
+
+	/*
+	 * XXX for now configure store-and-forward between PEPs and external
+	 * ports regardless of relative speeds
+	 */
+	for (i = 0; i < num_lports; i++) {
+		fm10k_write_switch_reg64(sw,
+		    FM10K_SW_SAF_MATRIX(all_lports[i].lport),
+		    FM10K_SW_SAF_MATRIX_ENABLE_SNF_ALL_PORTS);
+	}
+
+	/* Disable MTU violation trap */
+	data = fm10k_read_switch_reg(sw, FM10K_SW_SYS_CFG_1);
+	data &= ~FM10K_SW_SYS_CFG_1_TRAP_MTU_VIOLATIONS;
+	fm10k_write_switch_reg(sw, FM10K_SW_SYS_CFG_1, data);
+
+	/* Disable ingress VLAN filtering and learning */
+	for (i = 0; i < num_lports; i++) {
+		fm10k_write_switch_reg(sw,
+		    FM10K_SW_PORT_CFG_3(all_lports[i].lport), 0);
+	}
+	/*
+	 * Make all ports members of every VLAN, and configure every VLAN to
+	 * use MST instance 0.
+	 */
+	data64 = 0;
+	for (i = 0; i < num_lports; i++)
+		data64 |=
+			FM10K_SW_INGRESS_VID_TABLE_MEMBERSHIP
+			(all_lports[i].lport);
+	for (i = 0; i < FM10K_SW_INGRESS_VID_TABLE_ENTRIES; i++) {
+		fm10k_write_switch_reg128(sw, FM10K_SW_INGRESS_VID_TABLE(i),
+		    FM10K_SW_INGRESS_VID_TABLE_REFLECT, data64);
+	}
+	data64 = 0;
+	for (i = 0; i < num_lports; i++)
+		data64 |=
+			FM10K_SW_EGRESS_VID_TABLE_MEMBERSHIP
+			(all_lports[i].lport);
+	for (i = 0; i < FM10K_SW_EGRESS_VID_TABLE_ENTRIES; i++)
+		fm10k_write_switch_reg128(sw,
+			FM10K_SW_EGRESS_VID_TABLE(i), 0, data64);
+
+	// Init MOD_VLAN_TAG_VID1_MAP
+	for (i = 0; i < FM10K_SW_INGRESS_VID_TABLE_ENTRIES; i++) {
+		data64 = i;
+		data64 = data64 << 48;
+		fm10k_write_switch_reg64(sw,
+			0xE80000 + 0x2 * i + 0x20000, data64);
+	}
+
+	/* Configure MST instance 0 to forward for all ports */
+	data64 = 0;
+	for (i = 0; i < num_lports; i++)
+		data64 |=
+			FM10K_SW_EGRESS_MST_TABLE_FORWARDING
+			(all_lports[i].lport);
+	fm10k_write_switch_reg64(sw,
+			FM10K_SW_EGRESS_MST_TABLE(0), data64);
+
+	data64 = 0;
+	data64_2 = 0;
+	for (i = 0; i < num_lports; i++) {
+		if (all_lports[i].lport <
+		    FM10K_SW_INGRESS_MST_TABLE_PORTS_PER_TABLE) {
+			data64 |=
+			    FM10K_SW_MAKE_REG_FIELD_IDX64
+				(INGRESS_MST_TABLE_STP_STATE,
+				all_lports[i].lport,
+				FM10K_SW_INGRESS_MST_TABLE_STP_STATE_FORWARD,
+				all_lports[i].lport, all_lports[i].lport);
+		} else {
+			data64_2 |=
+			    FM10K_SW_MAKE_REG_FIELD_IDX64
+				(INGRESS_MST_TABLE_STP_STATE,
+				all_lports[i].lport,
+				FM10K_SW_INGRESS_MST_TABLE_STP_STATE_FORWARD,
+				all_lports[i].lport, all_lports[i].lport);
+		}
+	}
+	fm10k_write_switch_reg64(sw,
+			FM10K_SW_INGRESS_MST_TABLE(0, 0), data64);
+	fm10k_write_switch_reg64(sw,
+			FM10K_SW_INGRESS_MST_TABLE(1, 0), data64_2);
+
+	for (i = 0; i < num_lports; i++) {
+		data64 = fm10k_read_switch_reg64(sw,
+		    FM10K_SW_PARSER_PORT_CFG_1(all_lports[i].lport));
+		data64 |= FM10K_SW_PARSER_PORT_CFG_1_VLAN1_TAG(0) |
+				  FM10K_SW_PARSER_PORT_CFG_1_VLAN2_TAG(0);
+		fm10k_write_switch_reg64(sw,
+		    FM10K_SW_PARSER_PORT_CFG_1(all_lports[i].lport), data64);
+
+		/*
+		 * Configure tags for f-tagged lports
+		 */
+		if (all_lports[i].has_ftag) {
+			data64 = fm10k_read_switch_reg64(sw,
+			    FM10K_SW_PARSER_PORT_CFG_1(all_lports[i].lport));
+			data64 |= FM10K_SW_PARSER_PORT_CFG_1_FTAG;
+			fm10k_write_switch_reg64(sw,
+			    FM10K_SW_PARSER_PORT_CFG_1(all_lports[i].lport),
+			    data64);
+
+			data64 = fm10k_read_switch_reg64(sw,
+			    FM10K_SW_MOD_PER_PORT_CFG_2(all_lports[i].lport));
+			data64 |= FM10K_SW_MOD_PER_PORT_CFG_2_FTAG;
+			fm10k_write_switch_reg64(sw,
+			    FM10K_SW_MOD_PER_PORT_CFG_2(all_lports[i].lport),
+			    data64);
+		}
+		data64 = fm10k_read_switch_reg64(sw,
+		    FM10K_SW_PARSER_PORT_CFG_2(all_lports[i].lport));
+		data64 |= FM10K_SW_PARSER_PORT_CFG_2_PARSE_L3;
+		data64 |= FM10K_SW_PARSER_PORT_CFG_2_PARSE_L4;
+		fm10k_write_switch_reg64(sw,
+		    FM10K_SW_PARSER_PORT_CFG_2(all_lports[i].lport), data64);
+	}
+
+	/*
+	 * Assign default SGLORTs for the EPL ports in the PARSER config.
+	 * This isn't necessary for the PEPs as for those, as all frames
+	 * ingressing from PEPs are tagged with an SGLORT that is derived
+	 * from a per-tx-queue setting.
+	 *
+	 * The PORT_CFG_ISL register offset is determined by logical port
+	 * number and the register contents determined by the corresponding
+	 * SGLORT.
+	 */
+	for (i = 0; i < FM10K_SW_EPLS_SUPPORTED; i++) {
+		fm10k_write_switch_reg(sw,
+			    FM10K_SW_PORT_CFG_ISL
+				(fm10k_sw.epl_map[i].logical_port),
+			    FM10K_SW_MAKE_REG_FIELD
+				(PORT_CFG_ISL_SGLORT,
+				fm10k_sw.epl_map[i].glort));
+	}
+
+	/*
+	 * FFU counter, 11.10.3.5 POLICER_CFG[0..3]
+	 */
+	fm10k_write_switch_reg64(sw,
+			0xE40000 + 0x2 * FM10K_SW_FFU_CNT_BANK + 0x11000,
+			FM10K_SW_POLICER_LAST);
+
+	/*
+	 * 11.10.3.1 POLICER_CFG_4K[0..1][0..4095]
+	 * 11.10.3.2 POLICER_CFG_512[0..1][0..511]
+	 */
+	for (i = 0; i < FM10K_SW_FFU_CNT_MAX; i++) {
+		if (FM10K_SW_FFU_CNT_BANK < 2) {
+			fm10k_read_switch_reg64(sw,
+				0xE40000 + 0x2000 * FM10K_SW_FFU_CNT_BANK +
+				0x2 * (i + FM10K_SW_FFU_CNT_START) + 0x0);
+		} else {
+			fm10k_read_switch_reg64(sw,
+				0xE40000 + 0x400 * (FM10K_SW_FFU_CNT_BANK - 2) +
+				0x2 * (i + FM10K_SW_FFU_CNT_START) + 0x4000);
+		}
+	}
+
+	/*
+	 * FFU
+	 *
+	 * The FFU is programmed to match on SGLORT and to set the DGLORT to
+	 * a unique value corresponding to the given SGLORT.  One TCAM entry
+	 * is required per host port per PEP and one per external interface.
+	 * Only slice 0 is used.
+	 */
+	if (fm10k_ffu_init(sw, sw->dpdk_cfg) < 0)
+		return -1;
+
+	/*
+	 * Program the segment scheduler (see tables at top)
+	 *
+	 * The TX and RX schedules are the same.  Page 0 is used.
+	 */
+	if (cfg->ext_port_speed == 40 || cfg->ext_port_speed == 100)
+		is_quad = 1;
+	else
+		is_quad = 0;
+	for (i = 0;
+			i < sizeof(fm10k_sched_prog) /
+				sizeof(fm10k_sched_prog[0]);
+			i++) {
+		/*
+		 * In addition to explicit idle cycles, non-quad port
+		 * entries are converted to idle cycles if the interfaces
+		 * are configured in quad-port mode.
+		 */
+		if (fm10k_sched_prog[i].idle ||
+		    (!fm10k_sched_prog[i].quad && is_quad))
+			data = FM10K_SW_SCHED_SCHEDULE_IDLE;
+		else
+			data = FM10K_SW_SCHED_SCHEDULE_ENTRY
+				(fm10k_sched_prog[i].phys,
+			    fm10k_sched_prog[i].log,
+			    (fm10k_sched_prog[i].quad == FM10K_SW_QUAD_40_100) ?
+			    is_quad : fm10k_sched_prog[i].quad);
+		fm10k_write_switch_reg(sw,
+			FM10K_SW_SCHED_RX_SCHEDULE(0, i), data);
+		fm10k_write_switch_reg(sw,
+			FM10K_SW_SCHED_TX_SCHEDULE(0, i), data);
+	}
+
+	fm10k_write_switch_reg(sw, FM10K_SW_SCHED_SCHEDULE_CTRL,
+	    FM10K_SW_SCHED_SCHEDULE_CTRL_RX_ENABLE |
+	    FM10K_SW_MAKE_REG_FIELD
+		(SCHED_SCHEDULE_CTRL_RX_MAX_INDEX,
+		(sizeof(fm10k_sched_prog) /
+		sizeof(fm10k_sched_prog[0])) - 1) |
+	    FM10K_SW_SCHED_SCHEDULE_CTRL_TX_ENABLE |
+	    FM10K_SW_MAKE_REG_FIELD
+		(SCHED_SCHEDULE_CTRL_TX_MAX_INDEX,
+		(sizeof(fm10k_sched_prog) /
+		sizeof(fm10k_sched_prog[0])) - 1));
+
+	/* Per 5.7.10.4 */
+	watermark = FM10K_SW_MEM_POOL_SEGS_MAX -
+	    ((sw->info->num_peps +
+	    FM10K_SW_EPLS_SUPPORTED * FM10K_SW_EPL_LANES) *
+	    FM10K_SW_HOWMANY(FM10K_SW_PACKET_SIZE_MAX,
+	    FM10K_SW_MEM_POOL_SEG_SIZE, FM10K_SW_MEM_POOL_SEG_SIZE)) -
+	    FM10K_SW_MEM_POOL_SEGS_RSVD;
+	fm10k_write_switch_reg(sw, FM10K_SW_CM_GLOBAL_WM,
+	    FM10K_SW_MAKE_REG_FIELD(CM_GLOBAL_WM_WATERMARK, watermark));
+	fm10k_write_switch_reg(sw, FM10K_SW_CM_GLOBAL_CFG,
+	    FM10K_SW_CM_GLOBAL_CFG_WM_SWEEP_EN |
+	    FM10K_SW_CM_GLOBAL_CFG_PAUSE_GEN_SWEEP_EN |
+	    FM10K_SW_CM_GLOBAL_CFG_PAUSE_REC_SWEEP_EN |
+	    FM10K_SW_MAKE_REG_FIELD
+		(CM_GLOBAL_CFG_NUM_SWEEPER_PORTS,
+		FM10K_SW_LOGICAL_PORTS_MAX));
+
+	/* Configure stats counters */
+	data = FM10K_SW_RX_STATS_CFG_ENABLE_ALL_BANKS;
+	data64 =
+	    FM10K_SW_MOD_STATS_CFG_ENABLE_GROUP_7 |
+	    FM10K_SW_MOD_STATS_CFG_ENABLE_GROUP_8;
+	for (i = 0; i < num_lports; i++) {
+		fm10k_write_switch_reg(sw,
+		    FM10K_SW_RX_STATS_CFG(all_lports[i].lport),
+		    data |
+		    ((all_lports[i].has_ftag) ?
+			FM10K_SW_MAKE_REG_FIELD
+				(RX_STATS_CFG_PER_FRAME_ADJUSTMENT,
+			    FM10K_SW_FTAG_SIZE) :
+			0));
+		fm10k_write_switch_reg64(sw,
+		    FM10K_SW_MOD_STATS_CFG(all_lports[i].lport),
+		    data64);
+	}
+
+	/* Transition switch to ready */
+	data = fm10k_read_switch_reg(sw, FM10K_SW_SOFT_RESET);
+	data |= FM10K_SW_SOFT_RESET_SWITCH_READY;
+	fm10k_write_switch_reg(sw, FM10K_SW_SOFT_RESET, data);
+
+ done:
+	return (error);
+}
+
+
+static void
+fm10k_switch_leds_init(struct fm10k_switch *sw)
+{
+	struct fm10k_device_info *cfg = sw->info;
+	unsigned int i;
+	uint8_t addr;
+	uint32_t data;
+	int max;
+
+	switch (FM10K_SW_CARD_ID(cfg->subvendor, cfg->subdevice)) {
+	case FM10K_SW_CARD(SILICOM, PE340G2DBIR_QS41):
+	case FM10K_SW_CARD(SILICOM_RB, PE340G2DBIR_QS41):
+	case FM10K_SW_CARD(SILICOM, PE340G2DBIR_QS43):
+	case FM10K_SW_CARD(SILICOM_RB, PE340G2DBIR_QS43):
+	case FM10K_SW_CARD(SILICOM, PE340G2DBIR_QL4):
+	case FM10K_SW_CARD(SILICOM_RB, PE340G2DBIR_QL4):
+		FM10K_SW_I2C_REQ_LOCK(sw->i2c);
+		/*
+		 * Set up the first PCA9545 mux so we can get at the PCA9635
+		 * that the LED control lines are connected to.
+		 */
+		fm10k_i2c_write8(sw->i2c, 0x70, 0x04);
+
+		/*
+		 * PCA9635 initialization
+		 * only differences from defaults are noted
+		 */
+
+		/* MODE1 - put into sleep mode to ensure it is brought out
+		 * of sleep without violating the oscillator startup wait
+		 */
+		fm10k_i2c_write16(sw->i2c, 0x6a, 0x00, 0x10);
+		fm10k_udelay(10);
+
+		/* MODE1 - normal mode, disable all call */
+		fm10k_i2c_write16(sw->i2c, 0x6a, 0x00, 0x00);
+
+		/* Wait for oscillator to stabilize after coming out of sleep */
+		fm10k_udelay(500);
+
+		/* MODE2 - group control is blinking, open drain outputs,
+		 * OE high -> LEDn = 0
+		 */
+		fm10k_i2c_write16(sw->i2c, 0x6a, 0x01, 0x20);
+
+		/* PWM0 - 100% duty cycle */
+		fm10k_i2c_write16(sw->i2c, 0x6a, 0x02, 0xff);
+
+		/* PWM1 - 100% duty cycle */
+		fm10k_i2c_write16(sw->i2c, 0x6a, 0x03, 0xff);
+
+		/* GRPPWM - 50% blink duty cycle */
+		fm10k_i2c_write16(sw->i2c, 0x6a, 0x12, 0x80);
+
+		/* GRPFREQ - FM10K_LED_BLINKS_PER_SECOND */
+		if (24 / FM10K_LED_BLINKS_PER_SECOND > 1)
+			max = 24 / FM10K_LED_BLINKS_PER_SECOND;
+		else
+			max = 1;
+		fm10k_i2c_write16(sw->i2c, 0x6a, 0x13, max - 1);
+
+		FM10K_SW_I2C_REQ_UNLOCK(sw->i2c);
+		break;
+	case FM10K_SW_CARD(SILICOM, PE3100G2DQIR_QXSL4):
+	case FM10K_SW_CARD(SILICOM, PE3100G2DQIR_QXSL4_REV_2):
+		FM10K_SW_I2C_REQ_LOCK(sw->i2c);
+		/*
+		 * Set up the first PCA9545 mux so we can get at the PCA9635s
+		 * that the LED control lines are connected to.
+		 */
+		fm10k_i2c_write8(sw->i2c, 0x70, 0x01);
+
+		/*
+		 * PCA9635 initialization
+		 * only differences from defaults are noted
+		 */
+
+		addr = 0x6a;
+		for (i = 0; i < 2; i++) {
+			/* MODE1 - put into sleep mode to ensure it is
+			 * brought out of sleep without violating the
+			 * oscillator startup wait
+			 */
+			fm10k_i2c_write16(sw->i2c, addr, 0x00, 0x10);
+			fm10k_udelay(10);
+
+			/* MODE1 - normal mode, disable all call */
+			fm10k_i2c_write16(sw->i2c, addr, 0x00, 0x00);
+
+			/* MODE2 - group control is blinking, open drain
+			 * outputs, OE high -> LEDn = 0
+			 */
+			fm10k_i2c_write16(sw->i2c, addr, 0x01, 0x20);
+
+			/* Wait for oscillator to stabilize
+			 * after coming out of sleep
+			 */
+			fm10k_udelay(500);
+
+			/* PWM0 - 100% duty cycle */
+			fm10k_i2c_write16(sw->i2c, addr, 0x02, 0xff);
+
+			/* PWM3 - 100% duty cycle */
+			fm10k_i2c_write16(sw->i2c, addr, 0x05, 0xff);
+
+			/* GRPPWM - 50% blink duty cycle */
+			fm10k_i2c_write16(sw->i2c, addr, 0x12, 0x80);
+
+			/* GRPFREQ - FM10K_LED_BLINKS_PER_SECOND */
+			if (24 / FM10K_LED_BLINKS_PER_SECOND > 1)
+				max = 24 / FM10K_LED_BLINKS_PER_SECOND;
+			else
+				max = 1;
+			fm10k_i2c_write16(sw->i2c, addr, 0x13, max - 1);
+
+			addr = 0x69;
+		}
+		FM10K_SW_I2C_REQ_UNLOCK(sw->i2c);
+		break;
+
+	case FM10K_SW_CARD(SILICOM, PE3100G2DQIRL_QXSL4):
+		FM10K_SW_I2C_REQ_LOCK(sw->i2c);
+		addr = 0x62;
+		fm10k_i2c_write16(sw->i2c, addr, 0x03, 0x88);
+		fm10k_i2c_write16(sw->i2c, addr, 0x01, 0x0);
+
+		data = fm10k_read_switch_reg(sw, 0xc2b);
+		data |= 1 << 24;
+		fm10k_write_switch_reg(sw, 0xc2b, data);
+		FM10K_SW_I2C_REQ_UNLOCK(sw->i2c);
+		break;
+
+	case FM10K_SW_CARD(SILICOM, PE3100G2DQIRM_QXSL4):
+		FM10K_SW_I2C_REQ_LOCK(sw->i2c);
+		/*
+		 * PCA9538 initialization
+		 * only differences from defaults are noted
+		 */
+		/*
+		 *	00000000: No link
+		 *	00000001: 10G Port 0
+		 *  00000010: 40G Port 0
+		 *  00000100: 25G Port 0
+		 *  00001000: 100G Port 0
+		 */
+		addr = 0x62;
+		fm10k_i2c_write16(sw->i2c, addr, 0x03, 0x0);
+		fm10k_i2c_write16(sw->i2c, addr, 0x01, 0x0);
+
+		addr = 0x65;
+		fm10k_i2c_write16(sw->i2c, addr, 0x03, 0xf0);
+		fm10k_i2c_write16(sw->i2c, addr, 0x01, 0x0);
+
+		addr = 0x66;
+		fm10k_i2c_write16(sw->i2c, addr, 0x03, 0x0);
+		fm10k_i2c_write16(sw->i2c, addr, 0x01, 0x0);
+
+		/* set LEC_CFG */
+		data = fm10k_read_switch_reg(sw, 0xc2b);
+		data |= 1 << 24;
+		fm10k_write_switch_reg(sw, 0xc2b, data);
+
+		/* port from rdifd, LED */
+		fm10k_gpio_output_set(sw, 4, 0);
+		FM10K_SW_I2C_REQ_UNLOCK(sw->i2c);
+		break;
+
+	default:
+		FM10K_SW_ERR("don't know how to operate LEDs for this card "
+		    "(subvendor=0x%04x subdevice=0x%04x)",
+		    cfg->subvendor, cfg->subdevice);
+		break;
+	}
+}
+
+
+static unsigned int
+fm10k_switch_pca9635_led_bits(uint8_t led_flags)
+{
+	unsigned int bits;
+
+	if (led_flags & FM10K_SW_EXT_PORT_LED_FLAG_UP) {
+		if (led_flags & FM10K_SW_EXT_PORT_LED_FLAG_ACTIVE)
+			bits = 0x3; /* group blink */
+		else
+			bits = 0x1; /* full on */
+	} else {
+		bits = 0; /* off */
+	}
+	return (bits);
+}
+
+static void
+fm10k_switch_process_leds(void *ctx)
+{
+	struct fm10k_switch *sw = ctx;
+	struct fm10k_device_info *cfg = sw->info;
+	struct fm10k_ext_ports *ports = sw->ext_ports;
+	struct fm10k_ext_port *port;
+	unsigned int i;
+	unsigned int num_ports = ports->num_ports;
+	uint32_t data;
+	uint8_t update_port[num_ports];
+	uint8_t led_flags = 0, read;
+	uint8_t addr;
+
+	FM10K_SW_SWITCH_LOCK(sw);
+
+	if (sw->master_hw->sw_addr == NULL) {
+		FM10K_SW_SWITCH_UNLOCK(sw);
+		return;
+	}
+
+	for (i = 0; i < num_ports; i++) {
+		port = &ports->ports[i];
+		data = fm10k_read_switch_reg(sw,
+		    FM10K_SW_EPL_LED_STATUS(port->eplno));
+		led_flags =
+		    ((data & FM10K_SW_EPL_LED_STATUS_PORT_LINK_UP
+		    (port->first_lane)) ?
+			FM10K_SW_EXT_PORT_LED_FLAG_UP : 0) |
+		    ((data &
+			(FM10K_SW_EPL_LED_STATUS_PORT_TRANSMITTING
+			(port->first_lane) |
+			FM10K_SW_EPL_LED_STATUS_PORT_RECEIVING
+			(port->first_lane))) ?
+			FM10K_SW_EXT_PORT_LED_FLAG_ACTIVE : 0);
+		update_port[i] = (led_flags != port->last_led_flags);
+		port->last_led_flags = led_flags;
+	}
+	FM10K_SW_SWITCH_UNLOCK(sw);
+
+	switch (FM10K_SW_CARD_ID(cfg->subvendor, cfg->subdevice)) {
+	case FM10K_SW_CARD(SILICOM, PE340G2DBIR_QS41):
+	case FM10K_SW_CARD(SILICOM_RB, PE340G2DBIR_QS41):
+	case FM10K_SW_CARD(SILICOM, PE340G2DBIR_QS43):
+	case FM10K_SW_CARD(SILICOM_RB, PE340G2DBIR_QS43):
+	case FM10K_SW_CARD(SILICOM, PE340G2DBIR_QL4):
+	case FM10K_SW_CARD(SILICOM_RB, PE340G2DBIR_QL4):
+		FM10K_SW_I2C_REQ_LOCK(sw->i2c);
+		/*
+		 * Set up the first PCA9545 mux so we can get at the PCA9635
+		 * that the LED control lines are connected to.
+		 */
+		fm10k_i2c_write8(sw->i2c, 0x70, 0x04);
+
+		if (!(update_port[0] || update_port[1])) {
+			FM10K_SW_I2C_REQ_UNLOCK(sw->i2c);
+			break;
+		}
+
+		led_flags =
+		    fm10k_switch_pca9635_led_bits
+			(ports->ports[0].last_led_flags) |
+		    (fm10k_switch_pca9635_led_bits
+		    (ports->ports[1].last_led_flags) << 2);
+
+		fm10k_i2c_write16(sw->i2c, 0x6a, 0x14, led_flags);
+		FM10K_SW_I2C_REQ_UNLOCK(sw->i2c);
+		break;
+
+	case FM10K_SW_CARD(SILICOM, PE3100G2DQIR_QXSL4):
+	case FM10K_SW_CARD(SILICOM, PE3100G2DQIR_QXSL4_REV_2):
+		FM10K_SW_I2C_REQ_LOCK(sw->i2c);
+		/*
+		 * Set up the first PCA9545 mux so we can get at the PCA9635s
+		 * that the LED control lines are connected to.
+		 */
+		fm10k_i2c_write8(sw->i2c, 0x70, 0x01);
+
+		/* XXX will need to update for QSFPs operating
+		 * as four independent lanes/ports
+		 */
+		for (i = 0; i < 2; i++) {
+			if (update_port[i] == 0)
+				continue;
+
+			addr = (i == 0) ? 0x6a : 0x69;
+
+			port = &ports->ports[i];
+			led_flags =
+				fm10k_switch_pca9635_led_bits
+				(port->last_led_flags);
+
+			switch (port->lane_speed * port->num_lanes) {
+			case 100:
+				fm10k_i2c_write16(sw->i2c,
+					addr, 0x14, led_flags);
+				break;
+			case 40:
+				fm10k_i2c_write16(sw->i2c,
+					addr, 0x14, led_flags << 6);
+				break;
+			}
+		}
+		FM10K_SW_I2C_REQ_UNLOCK(sw->i2c);
+		break;
+
+	case FM10K_SW_CARD(SILICOM, PE3100G2DQIRL_QXSL4):
+		FM10K_SW_I2C_REQ_LOCK(sw->i2c);
+		led_flags = 0;
+		addr = 0x62;
+		for (i = 0; i < 2; i++) {
+			if (!update_port[i])
+				continue;
+			port = &ports->ports[i];
+			if (port->last_led_flags &
+				FM10K_SW_EXT_PORT_LED_FLAG_UP) {
+				switch (port->lane_speed * port->num_lanes) {
+				case 100:
+				case 25:
+					led_flags |= 0x6 << (4 * i); /* 100G */
+					break;
+				case 40:
+				case 10:
+					led_flags |= 0x4 << (4 * i); /* 40G */
+					break;
+				default:
+					led_flags = 0;
+				}
+			} else {
+				led_flags = 0; /* off */
+			}
+		}
+
+		if (update_port[0] || update_port[1]) {
+			fm10k_i2c_read8_ext(sw->i2c, addr, 0x1, &read);
+			if (update_port[0])
+				led_flags |= read & 0xf0;
+			else
+				led_flags |= read & 0xf;
+			fm10k_i2c_write16(sw->i2c, addr, 0x1, led_flags);
+			fm10k_i2c_read8_ext(sw->i2c, addr, 0x1, &read);
+		}
+		FM10K_SW_I2C_REQ_UNLOCK(sw->i2c);
+		break;
+
+	case FM10K_SW_CARD(SILICOM, PE3100G2DQIRM_QXSL4):
+		FM10K_SW_I2C_REQ_LOCK(sw->i2c);
+		/* XXX will need to update for QSFPs
+		 * operating as four independent lanes/ports
+		 */
+		for (i = 0; i < 2; i++) {
+			if (!update_port[i])
+				continue;
+			/*
+			 *	00000000: No link
+			 *	00000001: 10G Port 0
+			 *  00000010: 40G Port 0
+			 *  00000100: 25G Port 0
+			 *  00001000: 100G Port 0
+			 */
+			addr = (i == 0) ? 0x62 : 0x66;
+			port = &ports->ports[i];
+			if (port->last_led_flags &
+				FM10K_SW_EXT_PORT_LED_FLAG_UP) {
+				switch (port->lane_speed * port->num_lanes) {
+				case 100:
+					led_flags = 0x08; /* 100G */
+					break;
+				case 40:
+					led_flags = 0x02; /* 40G */
+					break;
+				}
+			} else {
+				led_flags = 0; /* off */
+			}
+			fm10k_i2c_write16(sw->i2c,
+				addr, 0x1, led_flags);
+		}
+		FM10K_SW_I2C_REQ_UNLOCK(sw->i2c);
+		break;
+	}
+}
+
+static void *
+fm10k_switch_leds_update(void *ctx)
+{
+	struct fm10k_switch *sw = ctx;
+
+	while (sw->detaching == 0) {
+		fm10k_switch_process_leds(ctx);
+		usec_delay(FM10K_LED_POLL_INTERVAL_MS * 1000);
+	}
+	return NULL;
+}
+
+static void
+fm10k_switch_leds_off(struct fm10k_switch *sw, struct fm10k_ext_ports *ports)
+{
+	struct fm10k_device_info *cfg = sw->info;
+	struct fm10k_ext_port *port;
+	unsigned int i;
+	uint8_t led_flags;
+	uint8_t addr;
+
+	switch (FM10K_SW_CARD_ID(cfg->subvendor, cfg->subdevice)) {
+	case FM10K_SW_CARD(SILICOM, PE340G2DBIR_QS41):
+	case FM10K_SW_CARD(SILICOM_RB, PE340G2DBIR_QS41):
+	case FM10K_SW_CARD(SILICOM, PE340G2DBIR_QS43):
+	case FM10K_SW_CARD(SILICOM_RB, PE340G2DBIR_QS43):
+	case FM10K_SW_CARD(SILICOM, PE340G2DBIR_QL4):
+	case FM10K_SW_CARD(SILICOM_RB, PE340G2DBIR_QL4):
+		FM10K_SW_I2C_REQ_LOCK(sw->i2c);
+		/*
+		 * Set up the first PCA9545 mux so we can get at the PCA9635
+		 * that the LED control lines are connected to.
+		 */
+		fm10k_i2c_write8(sw->i2c, 0x70, 0x04);
+
+		led_flags =
+		    fm10k_switch_pca9635_led_bits(0) |
+		    (fm10k_switch_pca9635_led_bits(0) << 2);
+
+		fm10k_i2c_write16(sw->i2c, 0x6a, 0x14, led_flags);
+		FM10K_SW_I2C_REQ_UNLOCK(sw->i2c);
+		break;
+
+	case FM10K_SW_CARD(SILICOM, PE3100G2DQIR_QXSL4):
+	case FM10K_SW_CARD(SILICOM, PE3100G2DQIR_QXSL4_REV_2):
+	case FM10K_SW_CARD(SILICOM, PE3100G2DQIRL_QXSL4):
+	case FM10K_SW_CARD(SILICOM, PE3100G2DQIRM_QXSL4):
+		FM10K_SW_I2C_REQ_LOCK(sw->i2c);
+		/*
+		 * Set up the first PCA9545 mux so we can get at the PCA9635s
+		 * that the LED control lines are connected to.
+		 */
+		fm10k_i2c_write8(sw->i2c, 0x70, 0x01);
+
+		/* XXX will need to update for QSFPs
+		 * operating as four independent lanes/ports
+		 */
+		addr = 0x6a;
+		led_flags = fm10k_switch_pca9635_led_bits(0);
+		for (i = 0; i < 2; i++) {
+			port = &ports->ports[i];
+			switch (port->lane_speed * port->num_lanes) {
+			case 100:
+				fm10k_i2c_write16(sw->i2c,
+					addr, 0x14, led_flags);
+				break;
+			case 40:
+				fm10k_i2c_write16(sw->i2c,
+					addr, 0x14, led_flags << 6);
+				break;
+			}
+
+			addr = 0x69;
+		}
+		FM10K_SW_I2C_REQ_UNLOCK(sw->i2c);
+		break;
+	}
+}
+
+
+static void *
+fm10k_switch_process_intr(void *ctx)
+{
+	struct fm10k_switch *sw = ctx;
+	struct fm10k_ext_ports *ports;
+	uint64_t gid;
+	uint64_t reenable_mask;
+	uint16_t epl_mask;
+
+	while (1) {
+		sem_wait(&sw->intr_tq);
+		/*
+		 * Mask off all global interrupt detect interrupts
+		 * toward PCIe to ensure that the PEP sees the
+		 * interrupt condition clear so that all subsequent
+		 * events will be recognized.  The same result could
+		 * be achieved by masking off all block-internal
+		 * interrupt sources in all blocks prior to handling
+		 * any global interrupt detect interrupts, but the
+		 * approach taken here makes the interrupt processing
+		 * easier to decompose and increases the likelihood that
+		 * more work will be coalesced into fewer interrupts.
+		 */
+		FM10K_SW_TRACE("masking off PCIe gid interrupts");
+		FM10K_SW_SWITCH_LOCK(sw);
+		/*
+		 * sw->ext_ports will be set to NULL during detach
+		 * to indicate that we are to no longer process EPL
+		 * interrupts.
+		 */
+		ports = sw->ext_ports;
+		/*
+		 * sw->sm_mailbox_enabled will be set to 0 during
+		 * detach to indicate that we are to no longer process
+		 * PEP interrupts.
+		 */
+		fm10k_write_switch_reg64(sw,
+			FM10K_SW_INTERRUPT_MASK_PCIE(sw->pepno),
+			FM10K_SW_INTERRUPT_MASK_PCIE_ALL);
+		gid = fm10k_read_switch_reg64(sw,
+				FM10K_SW_GLOBAL_INTERRUPT_DETECT);
+		FM10K_SW_SWITCH_UNLOCK(sw);
+
+		reenable_mask = 0;
+		if (ports) {
+			epl_mask = fm10k_ext_ports_epl_intrs(ports, gid);
+			reenable_mask |=
+				FM10K_SW_MAKE_REG_FIELD64
+				(INTERRUPT_MASK_PCIE_EPL, epl_mask);
+		}
+
+		if (gid & FM10K_SW_GLOBAL_INTERRUPT_DETECT_I2C)
+			fm10k_i2c_intr(sw->i2c);
+
+		reenable_mask |= FM10K_SW_INTERRUPT_MASK_PCIE_I2C;
+
+		FM10K_SW_TRACE("re-enabling PCIe gid interrupts");
+		FM10K_SW_SWITCH_LOCK(sw);
+		fm10k_write_switch_reg64(sw,
+			FM10K_SW_INTERRUPT_MASK_PCIE(sw->pepno),
+			~reenable_mask);
+		FM10K_SW_SWITCH_UNLOCK(sw);
+	}
+	return NULL;
+}
+
+void
+fm10k_switch_intr(struct fm10k_hw *hw)
+{
+	if (hw)
+		sem_post(&fm10k_sw.intr_tq);
+}
+
+
+static void
+fm10k_switch_enable_interrupts(struct fm10k_switch *sw)
+{
+	uint64_t data64;
+	unsigned int i;
+
+	data64 = fm10k_read_switch_reg64(sw,
+			FM10K_SW_INTERRUPT_MASK_PCIE(sw->pepno));
+
+	/* enable interrupts from all EPLs in use */
+	FM10K_SW_REPLACE_REG_FIELD64(data64, INTERRUPT_MASK_PCIE_EPL,
+	    ~sw->ext_ports->epl_mask, data64);
+
+	/* enable interrupts from all non-master PEPs in use */
+	FM10K_SW_REPLACE_REG_FIELD64(data64, INTERRUPT_MASK_PCIE_PCIE,
+	    ~sw->pep_mask, data64);
+
+	/* enable I2C interrupt */
+	data64 &= ~FM10K_SW_INTERRUPT_MASK_PCIE_I2C;
+	fm10k_write_switch_reg64(sw,
+		FM10K_SW_INTERRUPT_MASK_PCIE(sw->pepno), data64);
+
+	/* enable outbound mailbox interrupts from all non-master PEPs */
+	for (i = 0; i < FM10K_SW_PEPS_MAX; i++) {
+		if (sw->pep_mask & (1 << i))
+			fm10k_write_switch_reg(sw, FM10K_SW_PCIE_GLOBAL(i, IM),
+			    ~FM10K_SW_IM_MAILBOX);
+	}
+	FM10K_WRITE_REG(sw->master_hw,
+		FM10K_SW_EIMR, FM10K_SW_EIMR_FIELD(SWITCH_INT, ENABLE));
+}
+
+
+static void
+fm10k_switch_detach(struct fm10k_hw *hw)
+{
+	struct fm10k_ext_ports *ports;
+	struct fm10k_switch *sw = &fm10k_sw;
+
+	if (hw == NULL || sw == NULL)
+		return;
+
+	FM10K_SW_SWITCH_LOCK(sw);
+	sw->detaching = 1;
+	FM10K_SW_SWITCH_UNLOCK(sw);
+
+	if (sw->ext_ports) {
+		ports = sw->ext_ports;
+		/*
+		 * Ensure that any further switch interrupts will not
+		 * process sw->ext_ports before detaching them.
+		 */
+		FM10K_SW_SWITCH_LOCK(sw);
+		sw->ext_ports = NULL;
+		FM10K_SW_SWITCH_UNLOCK(sw);
+		fm10k_switch_leds_off(sw, ports);
+		fm10k_ext_ports_detach(ports);
+	}
+
+	if (sw->epl_sbus)
+		fm10k_sbus_detach(sw->epl_sbus);
+
+	/*
+	 * Detach i2c after ext_ports so ext_ports can use i2c to disable
+	 * phys.
+	 */
+	if (sw->i2c)
+		fm10k_i2c_detach(sw->i2c);
+
+	fm10k_hw_eicr_disable_source(sw, FM10K_SW_EICR_SWITCH_INT);
+}
+
+
+static unsigned int
+fm10k_switch_get_fabric_clock(struct fm10k_switch *sw)
+{
+	uint32_t pll_fabric;
+	unsigned int freq;
+
+	pll_fabric = fm10k_read_switch_reg(sw, FM10K_SW_PLL_FABRIC_LOCK);
+
+	switch (FM10K_SW_REG_FIELD(pll_fabric, PLL_FABRIC_FREQSEL)) {
+	case FM10K_SW_PLL_FABRIC_FREQSEL_CTRL: {
+		uint32_t ctrl;
+		unsigned int refdiv, fbdiv4, fbdiv255;
+
+		ctrl = fm10k_read_switch_reg(sw, FM10K_SW_PLL_FABRIC_CTRL);
+		refdiv = FM10K_SW_REG_FIELD(ctrl, PLL_FABRIC_REFDIV);
+		fbdiv4 = FM10K_SW_REG_FIELD(ctrl, PLL_FABRIC_FBDIV4);
+		fbdiv255 = FM10K_SW_REG_FIELD(ctrl, PLL_FABRIC_FBDIV255);
+
+		freq = (15625 * 4 * fbdiv255 * (1 + fbdiv4)) / (refdiv * 100);
+		break;
+	}
+	case FM10K_SW_PLL_FABRIC_FREQSEL_F600:
+		freq = 600;
+		break;
+	case FM10K_SW_PLL_FABRIC_FREQSEL_F500:
+		freq = 500;
+		break;
+	case FM10K_SW_PLL_FABRIC_FREQSEL_F400:
+		freq = 400;
+		break;
+	case FM10K_SW_PLL_FABRIC_FREQSEL_F300:
+		freq = 300;
+		break;
+	default:
+		freq = 0;
+		break;
+	}
+
+	return freq;
+}
+
+static unsigned int
+fm10k_switch_get_sku(struct fm10k_switch *sw)
+{
+	uint32_t fuse_data;
+
+	fuse_data = fm10k_read_switch_reg(sw, FM10K_SW_FUSE_DATA_0);
+	return FM10K_SW_REG_FIELD(fuse_data, FUSE_SKU);
+}
+
+static const char *
+fm10k_switch_get_sku_name(unsigned int sku)
+{
+	const char *name;
+
+	switch (sku) {
+	case FM10K_SW_FUSE_SKU_FM10840:
+		name = "FM10840";
+		break;
+	case FM10K_SW_FUSE_SKU_FM10420:
+		name = "FM10420";
+		break;
+	case FM10K_SW_FUSE_SKU_FM10064:
+		name = "FM10064";
+		break;
+	default:
+		name = "FM10xxx-unknown";
+		break;
+	}
+
+	return name;
+}
+
+
+static void
+fm10k_switch_describe(struct fm10k_switch *sw)
+{
+	unsigned int i, pair;
+	unsigned int reset_state, active_state;
+	uint32_t device_cfg, resets;
+
+	if (!fm10k_config_check_debug(sw->dpdk_cfg, FM10K_CONFIG_DEBUG_CONFIG))
+		return;
+
+	FM10K_SW_INFO("switch: device is %s, fabric clock %u MHz",
+	    fm10k_switch_get_sku_name(fm10k_switch_get_sku(sw)),
+	    fm10k_switch_get_fabric_clock(sw));
+
+	device_cfg = fm10k_read_switch_reg(sw, FM10K_SW_DEVICE_CFG);
+	FM10K_SW_INFO("switch: 100G Ethernet is %s",
+	    (device_cfg & FM10K_SW_DEVICE_CFG_PCIE_100G_DIS) ?
+	    "disabled" : "enabled");
+	switch (FM10K_SW_REG_FIELD(device_cfg, DEVICE_CFG_FEATURE)) {
+	case FM10K_SW_DEVICE_CFG_PCIE_FULL:
+		FM10K_SW_INFO("switch: PEPs 0-7 support 2x4 and 1x8 "
+		    "modes, PEP 8 is x1");
+		break;
+	case FM10K_SW_DEVICE_CFG_PCIE_HALF:
+		FM10K_SW_INFO("switch: PEPs 0-3 support 2x4 and 1x8 "
+		    "modes, PEPs 4 and 6 are 1x4, PEP 8 is x1");
+		break;
+	case FM10K_SW_DEVICE_CFG_PCIE_BASIC:
+		FM10K_SW_INFO("switch: PEP 0 supports, PEP 8 is x1, "
+		    "only one can be used");
+		break;
+	}
+
+	resets = fm10k_read_switch_reg(sw, FM10K_SW_SOFT_RESET);
+	for (i = 0; i < FM10K_SW_PEPS_MAX; i++) {
+		if (!(device_cfg & FM10K_SW_DEVICE_CFG_PCIE_EN(i)))
+			continue;
+
+		pair = i / 2;
+		reset_state = !!(resets & FM10K_SW_SOFT_RESET_PCIE_RESET(i));
+		active_state = !!(resets & FM10K_SW_SOFT_RESET_PCIE_ACTIVE(i));
+		if (pair < FM10K_SW_DEVICE_CFG_PCIE_MODE_PAIRS &&
+		    !(device_cfg & FM10K_SW_DEVICE_CFG_PCIE_MODE_2X4(pair))) {
+			if (i % 2 == 0)
+				FM10K_SW_INFO("switch: PEP[%u,%u] is enabled in 1x8 "
+				    "mode (Reset=%u, Active=%u)", i, i + 1,
+				    reset_state, active_state);
+			else
+				FM10K_SW_INFO("switch: PEP[%u] unexpectedly enabled when "
+				    "PEP[%u,%u] is in 1x8 mode (Reset=%u, "
+				    "Active=%u)", i, i - 1, i, reset_state,
+				    active_state);
+		} else {
+			FM10K_SW_INFO("switch: PEP[%u] is enabled in 1x4 "
+			    "mode (Reset=%u, Active=%u)", i, reset_state,
+			    active_state);
+		}
+	}
+}
+
+
+static struct fm10k_switch *
+fm10k_switch_attach(struct fm10k_hw *hw, struct fm10k_switch *sw)
+{
+	int i;
+	int error = 0;
+
+	/*
+	 * XXX apparently no way to determine one's own PEP number.
+	 */
+	switch (sw->info->num_peps) {
+	case 1:
+		sw->pepno = 0;
+		break;
+
+	case 2:
+		/*
+		 * XXX assumption is using even numbered PEPs
+		 * starting with 0, and the highest numbered PEP
+		 * is the master
+		 */
+		sw->pepno = 2;
+		for (i = 0; i < sw->info->num_peps - 1; i++)
+			sw->pep_mask |= (1 << (i * 2));
+		break;
+
+	case 4:
+		sw->pepno = 6;
+		sw->pep_mask = 0x7f;
+		break;
+
+	default:
+		sw->pepno = 0;
+	}
+
+	/* When not zero, initialize serdes in 'near loopback' mode */
+	sw->serdes_loopback = 0;
+
+	pthread_mutex_init(&sw->lock, NULL);
+
+	fm10k_switch_determine_epls(sw);
+
+	sw->ext_ports = fm10k_ext_ports_attach(sw);
+	if (sw->ext_ports == NULL)
+		goto fail;
+
+	/* label all external ports with their logical port numbers */
+	for (i = 0; i < sw->ext_ports->num_ports; i++)
+		sw->ext_ports->ports[i].lport = sw->epl_map[i].logical_port;
+
+	/* interrupt */
+	sem_init(&sw->intr_tq, 0, 0);
+	pthread_create(&sw->intr_task, NULL, fm10k_switch_process_intr, sw);
+
+	fm10k_switch_enable_interrupts(sw);
+	fm10k_switch_describe(sw);
+
+	sw->i2c = fm10k_i2c_attach(sw);
+	if (sw->i2c == NULL)
+		goto fail;
+
+	error = fm10k_switch_init(hw, sw);
+	if (error)
+		goto fail;
+
+	for (i = 0; i < sw->ext_ports->num_ports; i++)
+		fm10k_ext_port_up(&sw->ext_ports->ports[i]);
+
+	usec_delay(10000);
+	fm10k_switch_leds_init(sw);
+
+	pthread_create(&sw->led_task, NULL, &fm10k_switch_leds_update, sw);
+	pthread_create(&sw->stats_task, NULL, &fm10k_switch_process_stats, sw);
+	sw->inited = 1;
+	return sw;
+fail:
+	if (sw != NULL)
+		fm10k_switch_detach(hw);
+	return NULL;
+}
+
+struct fm10k_switch*
+fm10k_switch_get(void)
+{
+	return &fm10k_sw;
+}
+
+static int
+fm10k_switch_dpdk_port_get(struct fm10k_switch *sw, int pf_no)
+{
+	int i;
+
+	for (i = 0; i < FM10K_SW_LOGICAL_PORTS_MAX; i++) {
+		if (sw->dpdk_cfg->ports[i].pf_no == pf_no)
+			return i;
+	}
+	return -1;
+}
+
+static int
+fm10k_switch_mapped_ext_port_get(struct fm10k_switch *sw, int pf_no)
+{
+	int i;
+	struct fm10k_cfg_port_pf_map *map;
+
+	for (i = 0; i < FM10K_SW_EXT_PORTS_MAX; i++) {
+		map = &sw->dpdk_cfg->ext_port_map[i];
+		if (map->type == FM10K_CONFIG_PORT_MAP_PF) {
+			if (map->map_no[0] == pf_no)
+				return i;
+		} else if (map->type == FM10K_CONFIG_PORT_MAP_PFS) {
+			if (map->map_no[0] == pf_no || map->map_no[1] == pf_no)
+				return i;
+		}
+	}
+	return -1;
+}
+
+static int
+fm10k_switch_start(struct fm10k_switch *sw,
+		struct fm10k_hw *master_hw, eth_fm10k_dev_init_half_func *func)
+{
+	int i, j;
+	struct fm10k_dpdk_port *port;
+	struct fm10k_cfg_port_pf_map *map;
+	struct fm10k_cfg_port_pf_map *dpdk_map;
+	int dpdk_port_no, ext_port_no = 0;
+	int pf_no;
+
+	sw->info = fm10k_get_device_info(master_hw);
+	sw->master_hw = master_hw;
+	sw->pep_map = fm10k_pep_port_map;
+	sw->epl_map = fm10k_epl_port_map;
+
+	sw->info->ext_port_speed = sw->dpdk_cfg->ext_port_speed;
+	fm10k_switch_set_sched_prog();
+	fm10k_switch_attach(master_hw, &fm10k_sw);
+
+	for (i = 0; i < FM10K_SW_LOGICAL_PORTS_MAX; i++) {
+		port = &sw->dpdk_cfg->ports[i];
+		map = &sw->dpdk_cfg->dpdk_port_map[i];
+		if (port->type != FM10K_CONFIG_DPDK_PF ||
+			map->type != FM10K_CONFIG_PORT_MAP_PFS)
+			continue;
+
+		for (j = 0; j < 2; j++) {
+			pf_no =	map->map_no[j];
+			dpdk_port_no =
+				fm10k_switch_dpdk_port_get(sw, pf_no);
+			dpdk_map =
+				&sw->dpdk_cfg->dpdk_port_map[dpdk_port_no];
+			if (map->type == FM10K_CONFIG_PORT_MAP_PF) {
+				dpdk_map->type = FM10K_CONFIG_PORT_MAP_PFSS;
+				dpdk_map->map_no[0] =
+				sw->dpdk_cfg->dpdk_port_map[i].map_no[0];
+				dpdk_map->map_no[1] =
+				sw->dpdk_cfg->dpdk_port_map[i].map_no[1];
+			}
+		}
+	}
+
+	/* do initialize all ports, after switch is ready */
+	for (i = 0; i < FM10K_SW_LOGICAL_PORTS_MAX; i++) {
+		uint32_t sglort;
+		struct fm10k_dpdk_port *port = &sw->dpdk_cfg->ports[i];
+		struct fm10k_hw *hw = port->hw;
+
+		if (hw == NULL)
+			break;
+
+		map = &sw->dpdk_cfg->ext_port_map[ext_port_no];
+		if (port->type == FM10K_CONFIG_DPDK_PF)	{
+			pf_no = fm10k_switch_dpdk_pf_no_get(hw);
+			if (map->type == FM10K_CONFIG_PORT_MAP_PF) {
+				sglort = fm10k_switch_pf_glort_get(pf_no);
+			} else if (map->type == FM10K_CONFIG_PORT_MAP_PFS) {
+				ext_port_no =
+					fm10k_switch_mapped_ext_port_get
+					(sw, pf_no);
+				sglort =
+					fm10k_switch_pfs_glort_get
+					(map->map_no[0], map->map_no[1]);
+			} else {
+				FM10K_SW_ERR("Unknown mapped port type %d!",
+						port->type);
+				return -1;
+			}
+			hw->mac.dglort_map = sglort | 0xffff0000;
+		}
+		func(hw);
+	}
+	return 0;
+}
+
+
+static int
+fm10k_switch_dpdk_port_reg(struct fm10k_switch *sw,
+		struct fm10k_hw *hw, void *rte_dev, uint8_t is_pf, bool master)
+{
+	int i;
+	struct fm10k_dpdk_port *port;
+	struct fm10k_cfg_port_pf_map *map;
+
+	if (sw->dpdk_cfg == NULL && is_pf) {
+		if (fm10k_config_init(sw, hw) < 0)
+			return -1;
+	}
+
+	for (i = 0; i < FM10K_SW_LOGICAL_PORTS_MAX; i++) {
+		port = &sw->dpdk_cfg->ports[i];
+		if (master && port->hw &&
+				port->type == FM10K_CONFIG_DPDK_PF) {
+			port->pf_no =
+				sw->dpdk_cfg->pf_max - 1 -
+				(hw->mac.addr[5] - port->hw->mac.addr[5]);
+			sw->dpdk_cfg->pf_hw[port->pf_no] = port->hw;
+		}
+
+		if (port->hw == NULL) {
+			port->hw = hw;
+			port->rte_dev = rte_dev;
+			fm10k_flow_list_init(&port->flow_list);
+			map = &sw->dpdk_cfg->dpdk_port_map[i];
+			if (is_pf) {
+				sw->dpdk_cfg->pf_bind++;
+				port->type = FM10K_CONFIG_DPDK_PF;
+				if (map->type == FM10K_CONFIG_PORT_MAP_NULL) {
+					map->type = FM10K_CONFIG_PORT_MAP_PF;
+					map->map_no[0] = i;
+				}
+			} else {
+				port->type = FM10K_CONFIG_DPDK_VF;
+			}
+			if (master)
+				sw->dpdk_cfg->master_hw = hw;
+			if (sw->dpdk_cfg->master_hw) {
+				port->pf_no =
+					sw->dpdk_cfg->pf_max - 1 -
+					(sw->dpdk_cfg->master_hw->mac.addr[5] -
+					hw->mac.addr[5]);
+				sw->dpdk_cfg->pf_hw[port->pf_no] = port->hw;
+			}
+
+			return 0;
+		}
+	}
+	return -1;
+}
+
+int
+fm10k_switch_dpdk_port_no_get(struct fm10k_hw *hw)
+{
+	int i;
+
+	for (i = 0; i < FM10K_SW_LOGICAL_PORTS_MAX; i++) {
+		if (fm10k_sw.dpdk_cfg->ports[i].hw == NULL)
+			break;
+
+		if (fm10k_sw.dpdk_cfg->ports[i].hw == hw)
+			return i;
+	}
+	return -1;
+}
+
+void *
+fm10k_switch_dpdk_port_rte_dev_get(struct fm10k_hw *hw)
+{
+	int port_no = fm10k_switch_dpdk_port_no_get(hw);
+
+	if (port_no < 0)
+		return NULL;
+
+	return fm10k_switch_get()->dpdk_cfg->ports[port_no].rte_dev;
+}
+
+struct fm10k_flow_list *
+fm10k_switch_dpdk_port_flow_list_get(struct fm10k_hw *hw)
+{
+	int port_no = fm10k_switch_dpdk_port_no_get(hw);
+
+	if (port_no < 0)
+		return NULL;
+
+	return fm10k_switch_get()->dpdk_cfg->ports[port_no].flow_list;
+}
+
+static int
+fm10k_switch_dpdk_cfg_check(struct fm10k_switch *sw)
+{
+	int i;
+	bool need_default = true;
+	struct fm10k_dpdk_port *port;
+	struct fm10k_cfg_port_pf_map *map;
+
+	if (sw->dpdk_cfg->master_hw == NULL) {
+		FM10K_SW_ERR("Master PF is not bound!!!");
+		return -1;
+	}
+
+	for (i = 0; i < FM10K_SW_LOGICAL_PORTS_MAX; i++) {
+		port = &sw->dpdk_cfg->ports[i];
+		map = &sw->dpdk_cfg->dpdk_port_map[i];
+		if (port->type == FM10K_CONFIG_DPDK_PF &&
+				map->type != FM10K_CONFIG_PORT_MAP_NULL) {
+			need_default = false;
+			break;
+		}
+	}
+
+	if (need_default) {
+		for (i = 0; i < FM10K_SW_LOGICAL_PORTS_MAX; i++) {
+			map = &sw->dpdk_cfg->dpdk_port_map[i];
+			if (port->type == FM10K_CONFIG_DPDK_PF) {
+				map->type = FM10K_CONFIG_PORT_MAP_PF;
+				map->map_no[0] = i;
+			}
+		}
+	}
+
+	return 0;
+}
+
+
+static void
+fm10k_switch_dpdk_cfg_describe(struct fm10k_switch *sw)
+{
+	int i;
+	struct fm10k_cfg_port_pf_map *map;
+
+	if (!fm10k_config_check_debug(sw->dpdk_cfg, FM10K_CONFIG_DEBUG_CONFIG))
+		return;
+
+	printf("--- FM10K DYNAMIC CONFIG ---\n");
+	printf("  PF Bind  : %d\n", sw->dpdk_cfg->pf_bind);
+
+	printf("--- PF ---\n");
+	for (i = 0; i < FM10K_SW_PEP_PORTS_MAX; i++)	{
+		uint8_t *mac;
+		struct rte_eth_dev *dev;
+		struct rte_pci_device *pdev;
+
+		if (sw->dpdk_cfg->pf_hw[i] == NULL)
+			continue;
+		dev = (struct rte_eth_dev *)
+			fm10k_switch_dpdk_port_rte_dev_get
+			(sw->dpdk_cfg->pf_hw[i]);
+		pdev = RTE_ETH_DEV_TO_PCI(dev);
+		mac = sw->dpdk_cfg->pf_hw[i]->mac.addr;
+		printf("  PF%d : Logical %d Glort %#x "
+				"PCI Addr %02x:%02x.%x "
+				"MAC Addr %02x:%02x:%02x:%02x:%02x:%02x\n",
+				i, fm10k_pep_port_map[i].logical_port,
+				fm10k_switch_pf_glort_get(i), pdev->addr.bus,
+				pdev->addr.devid,
+				pdev->addr.function,
+				mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+	}
+
+	printf("--- DPDK PORT ---\n");
+	for (i = 0; i < FM10K_SW_LOGICAL_PORTS_MAX; i++) {
+		if (sw->dpdk_cfg->ports[i].type == FM10K_CONFIG_DPDK_NULL)
+			break;
+		map = &sw->dpdk_cfg->dpdk_port_map[i];
+		if (map->type == FM10K_CONFIG_PORT_MAP_NULL)
+			break;
+		if (map->type == FM10K_CONFIG_PORT_MAP_PF) {
+			printf("  DPDK PORT%d : Map to PF%d\n",
+					i, map->map_no[0]);
+		} else {
+			printf("  DPDK PORT%d : Map to PF%d&PF%d Glort %#x\n",
+					i,
+					map->map_no[0],	map->map_no[1],
+					fm10k_switch_pfs_glort_get
+					(map->map_no[0], map->map_no[1]));
+		}
+	}
+
+	printf("--- EXT PORT ---\n");
+	for (i = 0; i < sw->dpdk_cfg->ext_port_num; i++) {
+		map = &sw->dpdk_cfg->ext_port_map[i];
+		printf("  EXT  PORT%d : Logical %d Glort %#x", i,
+				fm10k_epl_port_map[i].logical_port,
+				fm10k_switch_epl_glort_get(i));
+
+		if (map->type == FM10K_CONFIG_PORT_MAP_NULL)
+			printf("\n");
+		else if (map->type == FM10K_CONFIG_PORT_MAP_PF)
+			printf(" Map to PF%d\n", map->map_no[0]);
+		else
+			printf(" Map to PF%d&PF%d Glort %#x\n",
+					map->map_no[0], map->map_no[1],
+					fm10k_switch_pfs_glort_get
+					(map->map_no[0], map->map_no[1]));
+	}
+}
+
+
+int
+fm10k_switch_dpdk_port_start(struct fm10k_hw *hw, void *rte_dev,
+		uint8_t is_pf, bool master, eth_fm10k_dev_init_half_func *func)
+{
+	int ret;
+	struct fm10k_switch *sw = fm10k_switch_get();
+
+	if (fm10k_switch_dpdk_port_reg(sw, hw, rte_dev, is_pf, master) != 0) {
+		FM10K_SW_ERR("Register ports failed!!!");
+		return -1;
+	}
+
+	/*
+	 * After all pfs are started
+	 * start switch here
+	 */
+	if (fm10k_sw.dpdk_cfg->pf_max != 0 &&
+		fm10k_sw.dpdk_cfg->pf_bind == fm10k_sw.dpdk_cfg->pf_num) {
+		if (fm10k_switch_dpdk_cfg_check(&fm10k_sw) != 0)
+			return -1;
+
+		ret = fm10k_switch_start(&fm10k_sw,
+				fm10k_sw.dpdk_cfg->master_hw, func);
+		fm10k_switch_dpdk_cfg_describe(&fm10k_sw);
+		return ret;
+	}
+	return 0;
+}
+
+void fm10k_switch_dpdk_port_stop(struct fm10k_hw *hw)
+{
+	if (hw)
+		return;
+}
+
+
+/*
+ * for multi-host, only dpdk port 0 and 1 are real rx/tx packets
+ * so we need init/setup/start dpdk port queue for them.
+ * for dpdk port 2/3, we need init/setup except start.
+ * we map the pf queue to 0/1 dpdk queue first, then map
+ * the other pf queue to 2/3 dpdk queue.
+ */
+int
+fm10k_switch_dpdk_hw_queue_map(struct fm10k_hw *hw,
+		uint16_t queue, uint16_t max_queue,
+		struct fm10k_hw **map_hw, uint16_t *map_queue)
+{
+	int idx, pf_no;
+	int dpdk_port_no = fm10k_switch_dpdk_port_no_get(hw);
+	struct fm10k_dpdk_port *port;
+	struct fm10k_switch *sw = fm10k_switch_get();
+
+	if (dpdk_port_no < 0) {
+		FM10K_SW_ERR("Can not find the dpdk port!!!");
+		return -1;
+	}
+
+	port = &sw->dpdk_cfg->ports[dpdk_port_no];
+
+	if (port->type == FM10K_CONFIG_DPDK_VF) {
+		FM10K_SW_ERR("Not support yet!!!");
+		return -1;
+	}
+
+	if (port->type != FM10K_CONFIG_DPDK_PF) {
+		FM10K_SW_ERR("Can not be here!!!");
+		return -1;
+	}
+
+	if (sw->dpdk_cfg->dpdk_port_map[dpdk_port_no].type ==
+			FM10K_CONFIG_PORT_MAP_PF) {
+		pf_no = sw->dpdk_cfg->dpdk_port_map[dpdk_port_no].map_no[0];
+		*map_hw = sw->dpdk_cfg->pf_hw[pf_no];
+		*map_queue = queue;
+		return 1;
+	} else if (sw->dpdk_cfg->dpdk_port_map[dpdk_port_no].type ==
+			FM10K_CONFIG_PORT_MAP_PFS) {
+		idx = queue % 2;
+		pf_no = sw->dpdk_cfg->dpdk_port_map[dpdk_port_no].map_no[idx];
+		*map_hw = sw->dpdk_cfg->pf_hw[pf_no];
+		*map_queue = queue / 2;
+		return 1;
+	} else if (sw->dpdk_cfg->dpdk_port_map[dpdk_port_no].type ==
+			FM10K_CONFIG_PORT_MAP_PFSS) {
+		idx = queue % 2;
+		pf_no = sw->dpdk_cfg->dpdk_port_map[dpdk_port_no].map_no[idx];
+		*map_hw = sw->dpdk_cfg->pf_hw[pf_no];
+		*map_queue = (max_queue + 1) / 2 + queue / 2;
+		return 0;
+	} else if (sw->dpdk_cfg->dpdk_port_map[dpdk_port_no].type ==
+			FM10K_CONFIG_PORT_MAP_NULL) {
+		FM10K_SW_ERR("Unmapped dpdk port %d!!!", dpdk_port_no);
+		return -1;
+	}
+	FM10K_SW_ERR("Unknown mapped type!!!");
+	return -1;
+}
+
+int
+fm10k_switch_dpdk_mapped_hw_get(struct fm10k_hw *hw, struct fm10k_hw *hw_list[])
+{
+	int pf_no, pf_no_ext;
+	int dpdk_port_no = fm10k_switch_dpdk_port_no_get(hw);
+	struct fm10k_dpdk_port *port;
+	struct fm10k_cfg_port_pf_map *map;
+	struct fm10k_switch *sw = fm10k_switch_get();
+
+	if (dpdk_port_no < 0) {
+		FM10K_SW_ERR("Can not find the dpdk port!!!");
+		return -1;
+	}
+
+	port = &sw->dpdk_cfg->ports[dpdk_port_no];
+	map = &sw->dpdk_cfg->dpdk_port_map[dpdk_port_no];
+
+	if (port->type == FM10K_CONFIG_DPDK_VF) {
+		hw_list[0] = hw;
+		hw_list[1] = NULL;
+		return 1;
+	}
+
+	if (port->type != FM10K_CONFIG_DPDK_PF) {
+		FM10K_SW_ERR("Can not be here!!!");
+		return -1;
+	} else if (map->type ==	FM10K_CONFIG_PORT_MAP_PF) {
+		pf_no = map->map_no[0];
+		hw_list[0] = sw->dpdk_cfg->pf_hw[pf_no];
+		hw_list[1] = NULL;
+		return 1;
+	} else if (map->type ==	FM10K_CONFIG_PORT_MAP_PFS) {
+		pf_no = map->map_no[0];
+		hw_list[0] = sw->dpdk_cfg->pf_hw[pf_no];
+		pf_no_ext = map->map_no[1];
+		hw_list[1] = sw->dpdk_cfg->pf_hw[pf_no_ext];
+		return 2;
+	} else if (map->type == FM10K_CONFIG_PORT_MAP_PFSS) {
+		pf_no = map->map_no[0];
+		hw_list[0] = sw->dpdk_cfg->pf_hw[pf_no];
+		pf_no_ext = map->map_no[1];
+		hw_list[1] = sw->dpdk_cfg->pf_hw[pf_no_ext];
+		return 0;
+	}
+
+	hw_list[0] = NULL;
+	hw_list[1] = NULL;
+	return 0;
+}
+
+void
+fm10k_switch_dpdk_tx_queue_num_set(struct fm10k_hw *hw, uint8_t num)
+{
+	int port_no = fm10k_switch_dpdk_port_no_get(hw);
+	struct fm10k_switch *sw = fm10k_switch_get();
+
+	if (port_no < 0) {
+		FM10K_SW_ERR("Can not find the dpdk port!!!");
+		return;
+	}
+
+	sw->dpdk_cfg->ports[port_no].tx_queue_num = num;
+}
+
+void
+fm10k_switch_dpdk_rx_queue_num_set(struct fm10k_hw *hw, uint8_t num)
+{
+	int port_no = fm10k_switch_dpdk_port_no_get(hw);
+	struct fm10k_switch *sw = fm10k_switch_get();
+
+	if (port_no < 0) {
+		FM10K_SW_ERR("Can not find the dpdk port!!!");
+		return;
+	}
+
+	sw->dpdk_cfg->ports[port_no].rx_queue_num = num;
+}
+
+int
+fm10k_switch_dpdk_pf_no_get(struct fm10k_hw *hw)
+{
+	int port_no = fm10k_switch_dpdk_port_no_get(hw);
+	struct fm10k_switch *sw = fm10k_switch_get();
+
+	if (port_no < 0) {
+		FM10K_SW_ERR("Can not find the dpdk port!!!");
+		return -1;
+	}
+
+	return sw->dpdk_cfg->ports[port_no].pf_no;
+}
+
+
+void
+fm10k_switch_flowset_switchto(const char *name)
+{
+	struct fm10k_switch *sw = fm10k_switch_get();
+
+	fm10k_ffu_flowset_switch(sw, name);
+}
+
+void
+fm10k_switch_show_port(void)
+{
+	struct fm10k_switch *sw = fm10k_switch_get();
+
+	fm10k_stats_epl_port_print(sw);
+	fm10k_stats_dpdk_port_print(sw);
+}
+
+void
+fm10k_switch_show_ffu(void)
+{
+	struct fm10k_switch *sw = fm10k_switch_get();
+
+	fm10k_stats_ffu_count_print(sw);
+}
+
+void
+fm10k_switch_show_bank(void)
+{
+	struct fm10k_switch *sw = fm10k_switch_get();
+
+	fm10k_stats_port_bank_print(sw);
+}
-- 
1.8.3.1



More information about the dev mailing list