[dpdk-dev] [PATCH v6 4/4] kernel/linux/kni: add IOVA support in kni module
vattunuru at marvell.com
vattunuru at marvell.com
Tue Jun 25 05:57:00 CEST 2019
From: Kiran Kumar K <kirankumark at marvell.com>
Patch adds support for kernel module to work in IOVA = VA mode,
the idea is to get physical address from iova address using
iommu_iova_to_phys API and later use phys_to_virt API to
convert the physical address to kernel virtual address.
When compared with IOVA = PA mode, there is no performance
drop with this approach.
This approach does not work with the kernel versions less
than 4.4.0 because of API compatibility issues.
Signed-off-by: Kiran Kumar K <kirankumark at marvell.com>
Signed-off-by: Vamsi Attunuru <vattunuru at marvell.com>
---
kernel/linux/kni/kni_dev.h | 3 ++
kernel/linux/kni/kni_misc.c | 62 ++++++++++++++++++++++++++++++------
kernel/linux/kni/kni_net.c | 76 +++++++++++++++++++++++++++++++++++++--------
3 files changed, 119 insertions(+), 22 deletions(-)
diff --git a/kernel/linux/kni/kni_dev.h b/kernel/linux/kni/kni_dev.h
index d57bce6..6ad53c7 100644
--- a/kernel/linux/kni/kni_dev.h
+++ b/kernel/linux/kni/kni_dev.h
@@ -23,6 +23,7 @@
#include <linux/netdevice.h>
#include <linux/spinlock.h>
#include <linux/list.h>
+#include <linux/iommu.h>
#include <rte_kni_common.h>
#define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
@@ -39,6 +40,8 @@ struct kni_dev {
/* kni list */
struct list_head list;
+ uint8_t iova_mode;
+ struct iommu_domain *domain;
struct net_device_stats stats;
int status;
uint16_t group_id; /* Group ID of a group of KNI devices */
diff --git a/kernel/linux/kni/kni_misc.c b/kernel/linux/kni/kni_misc.c
index 1fc5eeb..b70c827 100644
--- a/kernel/linux/kni/kni_misc.c
+++ b/kernel/linux/kni/kni_misc.c
@@ -294,6 +294,9 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
struct rte_kni_device_info dev_info;
struct net_device *net_dev = NULL;
struct kni_dev *kni, *dev, *n;
+ struct pci_dev *pci = NULL;
+ struct iommu_domain *domain = NULL;
+ phys_addr_t phys_addr;
pr_info("Creating kni...\n");
/* Check the buffer size, to avoid warning */
@@ -351,15 +354,56 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
/* Translate user space info into kernel space info */
- kni->tx_q = phys_to_virt(dev_info.tx_phys);
- kni->rx_q = phys_to_virt(dev_info.rx_phys);
- kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
- kni->free_q = phys_to_virt(dev_info.free_phys);
-
- kni->req_q = phys_to_virt(dev_info.req_phys);
- kni->resp_q = phys_to_virt(dev_info.resp_phys);
- kni->sync_va = dev_info.sync_va;
- kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+ if (dev_info.iova_mode) {
+#if KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE
+ (void)pci;
+ pr_err("Kernel version is not supported\n");
+ return -EINVAL;
+#else
+ pci = pci_get_device(dev_info.vendor_id,
+ dev_info.device_id, NULL);
+ while (pci) {
+ if ((pci->bus->number == dev_info.bus) &&
+ (PCI_SLOT(pci->devfn) == dev_info.devid) &&
+ (PCI_FUNC(pci->devfn) == dev_info.function)) {
+ domain = iommu_get_domain_for_dev(&pci->dev);
+ break;
+ }
+ pci = pci_get_device(dev_info.vendor_id,
+ dev_info.device_id, pci);
+ }
+#endif
+ kni->domain = domain;
+ phys_addr = iommu_iova_to_phys(domain, dev_info.tx_phys);
+ kni->tx_q = phys_to_virt(phys_addr);
+ phys_addr = iommu_iova_to_phys(domain, dev_info.rx_phys);
+ kni->rx_q = phys_to_virt(phys_addr);
+ phys_addr = iommu_iova_to_phys(domain, dev_info.alloc_phys);
+ kni->alloc_q = phys_to_virt(phys_addr);
+ phys_addr = iommu_iova_to_phys(domain, dev_info.free_phys);
+ kni->free_q = phys_to_virt(phys_addr);
+ phys_addr = iommu_iova_to_phys(domain, dev_info.req_phys);
+ kni->req_q = phys_to_virt(phys_addr);
+ phys_addr = iommu_iova_to_phys(domain, dev_info.resp_phys);
+ kni->resp_q = phys_to_virt(phys_addr);
+ kni->sync_va = dev_info.sync_va;
+ phys_addr = iommu_iova_to_phys(domain, dev_info.sync_phys);
+ kni->sync_kva = phys_to_virt(phys_addr);
+ kni->iova_mode = 1;
+
+ } else {
+
+ kni->tx_q = phys_to_virt(dev_info.tx_phys);
+ kni->rx_q = phys_to_virt(dev_info.rx_phys);
+ kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
+ kni->free_q = phys_to_virt(dev_info.free_phys);
+
+ kni->req_q = phys_to_virt(dev_info.req_phys);
+ kni->resp_q = phys_to_virt(dev_info.resp_phys);
+ kni->sync_va = dev_info.sync_va;
+ kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+ kni->iova_mode = 0;
+ }
kni->mbuf_size = dev_info.mbuf_size;
diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index ad83658..92d5991 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -35,6 +35,22 @@ static void kni_net_rx_normal(struct kni_dev *kni);
/* kni rx function pointer, with default to normal rx */
static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;
+/* iova to kernel virtual address */
+static void *
+iova2kva(struct kni_dev *kni, void *pa)
+{
+ return phys_to_virt(iommu_iova_to_phys(kni->domain,
+ (uintptr_t)pa));
+}
+
+static void *
+iova2data_kva(struct kni_dev *kni, struct rte_kni_mbuf *m)
+{
+ return phys_to_virt((iommu_iova_to_phys(kni->domain,
+ (uintptr_t)m->buf_physaddr) +
+ m->data_off));
+}
+
/* physical address to kernel virtual address */
static void *
pa2kva(void *pa)
@@ -186,7 +202,10 @@ kni_fifo_trans_pa2va(struct kni_dev *kni,
return;
for (i = 0; i < num_rx; i++) {
- kva = pa2kva(kni->pa[i]);
+ if (likely(kni->iova_mode == 1))
+ kva = iova2kva(kni, kni->pa[i]);
+ else
+ kva = pa2kva(kni->pa[i]);
kni->va[i] = pa2va(kni->pa[i], kva);
}
@@ -263,8 +282,13 @@ kni_net_tx(struct sk_buff *skb, struct net_device *dev)
if (likely(ret == 1)) {
void *data_kva;
- pkt_kva = pa2kva(pkt_pa);
- data_kva = kva2data_kva(pkt_kva);
+ if (likely(kni->iova_mode == 1)) {
+ pkt_kva = iova2kva(kni, pkt_pa);
+ data_kva = iova2data_kva(kni, pkt_kva);
+ } else {
+ pkt_kva = pa2kva(pkt_pa);
+ data_kva = kva2data_kva(pkt_kva);
+ }
pkt_va = pa2va(pkt_pa, pkt_kva);
len = skb->len;
@@ -335,9 +359,14 @@ kni_net_rx_normal(struct kni_dev *kni)
/* Transfer received packets to netif */
for (i = 0; i < num_rx; i++) {
- kva = pa2kva(kni->pa[i]);
+ if (likely(kni->iova_mode == 1)) {
+ kva = iova2kva(kni, kni->pa[i]);
+ data_kva = iova2data_kva(kni, kva);
+ } else {
+ kva = pa2kva(kni->pa[i]);
+ data_kva = kva2data_kva(kva);
+ }
len = kva->pkt_len;
- data_kva = kva2data_kva(kva);
kni->va[i] = pa2va(kni->pa[i], kva);
skb = dev_alloc_skb(len + 2);
@@ -434,13 +463,21 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
num = ret;
/* Copy mbufs */
for (i = 0; i < num; i++) {
- kva = pa2kva(kni->pa[i]);
+
+ if (likely(kni->iova_mode == 1)) {
+ kva = iova2kva(kni, kni->pa[i]);
+ data_kva = iova2data_kva(kni, kva);
+ alloc_kva = iova2kva(kni, kni->alloc_pa[i]);
+ alloc_data_kva = iova2data_kva(kni, alloc_kva);
+ } else {
+ kva = pa2kva(kni->pa[i]);
+ data_kva = kva2data_kva(kva);
+ alloc_kva = pa2kva(kni->alloc_pa[i]);
+ alloc_data_kva = kva2data_kva(alloc_kva);
+ }
len = kva->pkt_len;
- data_kva = kva2data_kva(kva);
kni->va[i] = pa2va(kni->pa[i], kva);
- alloc_kva = pa2kva(kni->alloc_pa[i]);
- alloc_data_kva = kva2data_kva(alloc_kva);
kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
memcpy(alloc_data_kva, data_kva, len);
@@ -507,9 +544,16 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
/* Copy mbufs to sk buffer and then call tx interface */
for (i = 0; i < num; i++) {
- kva = pa2kva(kni->pa[i]);
+
+ if (likely(kni->iova_mode == 1)) {
+ kva = iova2kva(kni, kni->pa[i]);
+ data_kva = iova2data_kva(kni, kva);
+ } else {
+ kva = pa2kva(kni->pa[i]);
+ data_kva = kva2data_kva(kva);
+ }
+
len = kva->pkt_len;
- data_kva = kva2data_kva(kva);
kni->va[i] = pa2va(kni->pa[i], kva);
skb = dev_alloc_skb(len + 2);
@@ -545,8 +589,14 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
if (!kva->next)
break;
- kva = pa2kva(va2pa(kva->next, kva));
- data_kva = kva2data_kva(kva);
+ if (likely(kni->iova_mode == 1)) {
+ kva = iova2kva(kni,
+ va2pa(kva->next, kva));
+ data_kva = iova2data_kva(kni, kva);
+ } else {
+ kva = pa2kva(va2pa(kva->next, kva));
+ data_kva = kva2data_kva(kva);
+ }
}
}
--
2.8.4
More information about the dev
mailing list