[PATCH v5 2/4] bus/pci: introduce the PCIe TLP Processing Hints API
Morten Brørup
mb at smartsharesystems.com
Tue Jun 3 10:11:08 CEST 2025
> From: Wathsala Vithanage [mailto:wathsala.vithanage at arm.com]
> Sent: Tuesday, 3 June 2025 00.38
Some nitpicking inline below.
[...]
> diff --git a/drivers/bus/pci/bsd/pci.c b/drivers/bus/pci/bsd/pci.c
> index 5e2e09d5a4..dff750c4d6 100644
> --- a/drivers/bus/pci/bsd/pci.c
> +++ b/drivers/bus/pci/bsd/pci.c
> @@ -650,3 +650,46 @@ rte_pci_ioport_unmap(struct rte_pci_ioport *p)
>
> return ret;
> }
> +
> +RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_pci_tph_enable, 25.07)
> +int
> +rte_pci_tph_enable(struct rte_pci_device *dev, int mode)
> +{
> + RTE_SET_USED(dev);
> + RTE_SET_USED(mode);
> + /* This feature is not yet implemented for BSD */
> + return -1;
Either set rte_errno and return -1, or return -ENOTSUP.
If one of these two design patterns are already used in a module, please use the same design pattern for new functions.
Applies to all dummy functions, BSD and Windows.
[...]
> diff --git a/drivers/bus/pci/bus_pci_driver.h
> b/drivers/bus/pci/bus_pci_driver.h
> index 2cc1119072..b1c2829fc1 100644
> --- a/drivers/bus/pci/bus_pci_driver.h
> +++ b/drivers/bus/pci/bus_pci_driver.h
> @@ -46,6 +46,7 @@ struct rte_pci_device {
> char *bus_info; /**< PCI bus specific info */
> struct rte_intr_handle *vfio_req_intr_handle;
> /**< Handler of VFIO request interrupt */
> + uint8_t tph_enabled; /**< TPH enabled on this
> device */
> };
>
> /**
> @@ -194,6 +195,57 @@ struct rte_pci_ioport {
> uint64_t len; /* only filled for memory mapped ports */
> };
>
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this structure may change, or be removed, without
> prior
> + * notice
> + *
> + * This structure is passed into the TPH Steering-Tag set or get
> function as an
> + * argument by the caller. Return values are set in the same structure
> in st and
> + * ph_ignore fields by the calee.
> + *
> + * Refer to PCI-SIG ECN "Revised _DSM for Cache Locality TPH Features"
> for
> + * details.
> + */
> +struct rte_tph_info {
> + /* Input */
> + uint32_t cpu_id; /*Logical CPU id*/
> + uint32_t cache_level; /*Cache level relative to CPU.
> l1d=0,l2d=1,...*/
> + uint8_t flags; /*Memory type, procesisng hint etc.*/
> + uint16_t index; /*Index in vector table to store the ST*/
> +
> + /* Output */
I don't think these are Output fields when used in the set() function.
Please update the documentation of this structure accordingly.
> + uint16_t st; /*Steering tag returned by the platform*/
> + uint8_t ph_ignore; /*Platform ignores PH for the returned
> ST*/
> +};
> +
> +#define RTE_PCI_TPH_MEM_TYPE_MASK 0x1
> +#define RTE_PCI_TPH_MEM_TYPE_SHIFT 0
> +/** Request volatile memory ST */
> +#define RTE_PCI_TPH_MEM_TYPE_VMEM 0
> +/** Request persistent memory ST */
> +#define RTE_PCI_TPH_MEM_TYPE_PMEM 1
> +
> +/** TLP Processing Hints - PCIe 6.0 specification section 2.2.7.1.1 */
> +#define RTE_PCI_TPH_HINT_MASK 0x3
> +#define RTE_PCI_TPH_HINT_SHIFT 1
> +/** Host and device access data equally */
> +#define RTE_PCI_TPH_HINT_BIDIR 0
> +/** Device accesses data more frequently */
> +#define RTE_PCI_TPH_HINT_REQSTR (1 << RTE_PCI_TPH_HINT_SHIFT)
> +/** Host access data more frequently */
> +#define RTE_PCI_TPH_HINT_TARGET (2 << RTE_PCI_TPH_HINT_SHIFT)
> +/** Host access data more frequently with a high temporal locality */
> +#define RTE_PCI_TPH_HINT_TARGET_PRIO (3 << RTE_PCI_TPH_HINT_SHIFT)
> +
> +#define RTE_PCI_TPH_ST_MODE_MASK 0x3
> +/** TPH no ST mode */
> +#define RTE_PCI_TPH_ST_NS_MODE 0
> +/** TPH interrupt vector mode */
> +#define RTE_PCI_TPH_ST_IV_MODE 1
> +/** TPH device specific mode */
> +#define RTE_PCI_TPH_ST_DS_MODE 2
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
> index c20d159218..b5a8ba0a86 100644
> --- a/drivers/bus/pci/linux/pci.c
> +++ b/drivers/bus/pci/linux/pci.c
> @@ -814,3 +814,103 @@ rte_pci_ioport_unmap(struct rte_pci_ioport *p)
>
> return ret;
> }
> +
> +RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_pci_tph_enable, 25.07)
> +int
> +rte_pci_tph_enable(struct rte_pci_device *dev, int mode)
> +{
> + int ret = 0;
> +
> + switch (dev->kdrv) {
> +#ifdef VFIO_PRESENT
> + case RTE_PCI_KDRV_VFIO:
> + if (pci_vfio_is_enabled())
> + ret = pci_vfio_tph_enable(dev, mode);
Is it correct to return 0 when pci_vfio_is_enabled() returns false?
> + break;
> +#endif
> + case RTE_PCI_KDRV_IGB_UIO:
Please add "/* fall through */" or similar compiler hint.
Or even better, define a new __rte_fallthrough macro as __attribute__((fallthrough)) in rte_common.h.
Refer to [1].
[1]: https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html
> + case RTE_PCI_KDRV_UIO_GENERIC:
> + default:
> + ret = -ENOTSUP;
> + break;
> + }
> +
> + if (!ret)
This looks wrong. "ret" is not Boolean, so please use "ret == 0" instead of "!ret".
Applies elsewhere in this patch too, e.g. comparing empty "count" in pci_vfio_tph_st_op().
> + dev->tph_enabled = 1;
> +
> + return ret;
> +}
> +
The above comments apply to all four functions.
[...]
> diff --git a/drivers/bus/pci/linux/pci_vfio.c
> b/drivers/bus/pci/linux/pci_vfio.c
> index 5317170231..bdbeb38658 100644
> --- a/drivers/bus/pci/linux/pci_vfio.c
> +++ b/drivers/bus/pci/linux/pci_vfio.c
> @@ -12,6 +12,7 @@
> #include <stdbool.h>
>
> #include <rte_log.h>
> +#include <eal_export.h>
> #include <rte_pci.h>
> #include <rte_bus_pci.h>
> #include <rte_eal_paging.h>
> @@ -1316,6 +1317,175 @@ pci_vfio_mmio_write(const struct rte_pci_device
> *dev, int bar,
> return pwrite(fd, buf, len, offset + offs);
> }
>
> +static int
> +pci_vfio_tph_ioctl(const struct rte_pci_device *dev, struct
> vfio_pci_tph *pci_tph)
> +{
> + const struct rte_intr_handle *intr_handle = dev->intr_handle;
> + int vfio_dev_fd = 0, ret = 0;
> +
> + vfio_dev_fd = rte_intr_dev_fd_get(intr_handle);
> + if (vfio_dev_fd < 0) {
> + ret = -EINVAL;
This should be ret = rte_errno.
Or even better: Other code in /drivers/bus/pci/linux/pci_vfio.c follows the design pattern of setting rte_errno and returning -1 on error. Please use the same design pattern.
Applies to all four functions.
> + goto out;
> + }
> +
> + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_PCI_TPH, pci_tph);
> +out:
> + return ret;
> +}
> +
> +static int
> +pci_vfio_tph_st_op(const struct rte_pci_device *dev,
> + struct rte_tph_info *info, size_t count,
> + enum rte_pci_st_op op)
> +{
> + int ret = 0;
> + size_t argsz = 0, i;
> + struct vfio_pci_tph *pci_tph = NULL;
> + uint8_t mem_type = 0, hint = 0;
> +
> + if (!count) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + argsz = sizeof(struct vfio_pci_tph) +
> + count * sizeof(struct vfio_pci_tph_entry);
> +
> + pci_tph = rte_zmalloc(NULL, argsz, 0);
> + if (!pci_tph) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + pci_tph->argsz = argsz;
> + pci_tph->count = count;
> +
> + switch (op) {
> + case RTE_PCI_TPH_ST_GET:
> + pci_tph->flags = VFIO_DEVICE_TPH_GET_ST;
> + break;
> + case RTE_PCI_TPH_ST_SET:
> + pci_tph->flags = VFIO_DEVICE_TPH_SET_ST;
> + break;
> + default:
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + for (i = 0; i < count; i++) {
> + pci_tph->ents[i].cpu_id = info[i].cpu_id;
> + pci_tph->ents[i].cache_level = info[i].cache_level;
> +
> + mem_type = info[i].flags & RTE_PCI_TPH_MEM_TYPE_MASK;
> + switch (mem_type) {
> + case RTE_PCI_TPH_MEM_TYPE_VMEM:
> + pci_tph->ents[i].flags |= VFIO_TPH_MEM_TYPE_VMEM;
> + break;
> + case RTE_PCI_TPH_MEM_TYPE_PMEM:
> + pci_tph->ents[i].flags |= VFIO_TPH_MEM_TYPE_PMEM;
> + break;
> + default:
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + hint = info[i].flags & RTE_PCI_TPH_HINT_MASK;
> + switch (hint) {
> + case RTE_PCI_TPH_HINT_BIDIR:
> + pci_tph->ents[i].flags |= VFIO_TPH_HINT_BIDIR;
> + break;
> + case RTE_PCI_TPH_HINT_REQSTR:
> + pci_tph->ents[i].flags |= VFIO_TPH_HINT_REQSTR;
> + break;
> + case RTE_PCI_TPH_HINT_TARGET:
> + pci_tph->ents[i].flags |= VFIO_TPH_HINT_TARGET;
> + break;
> + case RTE_PCI_TPH_HINT_TARGET_PRIO:
> + pci_tph->ents[i].flags |= VFIO_TPH_HINT_TARGET_PRIO;
> + break;
> + default:
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + if (op == RTE_PCI_TPH_ST_SET)
> + pci_tph->ents[i].index = info[i].index;
> + }
> +
> + ret = pci_vfio_tph_ioctl(dev, pci_tph);
> + if (ret)
> + goto out;
> +
> + /*
> + * Kernel returns steering-tag and ph-ignore bits for
> + * RTE_PCI_TPH_ST_SET too, therefore copy output for
> + * both RTE_PCI_TPH_ST_SET and RTE_PCI_TPH_ST_GET
> + * cases.
> + */
> + for (i = 0; i < count; i++) {
> + info[i].st = pci_tph->ents[i].st;
> + info[i].ph_ignore = pci_tph->ents[i].ph_ignore;
> + }
> +
> +out:
> + if (pci_tph)
> + rte_free(pci_tph);
> + return ret;
> +}
> +
> +RTE_EXPORT_EXPERIMENTAL_SYMBOL(pci_vfio_tph_enable, 25.07)
> +int
> +pci_vfio_tph_enable(const struct rte_pci_device *dev, int mode)
> +{
> + int ret;
> +
> + if (!(mode ^ (mode & VFIO_TPH_ST_MODE_MASK))) {
> + ret = -EINVAL;
> + goto out;
> + } else
> + mode &= VFIO_TPH_ST_MODE_MASK;
> +
> + struct vfio_pci_tph pci_tph = {
> + .argsz = sizeof(struct vfio_pci_tph),
> + .flags = VFIO_DEVICE_TPH_ENABLE | mode,
> + .count = 0
> + };
> +
> + ret = pci_vfio_tph_ioctl(dev, &pci_tph);
> +out:
> + return ret;
> +}
> +
> +RTE_EXPORT_EXPERIMENTAL_SYMBOL(pci_vfio_tph_disable, 25.07)
> +int
> +pci_vfio_tph_disable(const struct rte_pci_device *dev)
> +{
> + struct vfio_pci_tph pci_tph = {
> + .argsz = sizeof(struct vfio_pci_tph),
> + .flags = VFIO_DEVICE_TPH_DISABLE,
> + .count = 0
> + };
> +
> + return pci_vfio_tph_ioctl(dev, &pci_tph);
> +}
> +
> +RTE_EXPORT_EXPERIMENTAL_SYMBOL(pci_vfio_tph_st_get, 25.07)
> +int
> +pci_vfio_tph_st_get(const struct rte_pci_device *dev,
> + struct rte_tph_info *info, size_t count)
> +{
> + return pci_vfio_tph_st_op(dev, info, count, RTE_PCI_TPH_ST_GET);
> +}
> +
> +RTE_EXPORT_EXPERIMENTAL_SYMBOL(pci_vfio_tph_st_set, 25.07)
> +int
> +pci_vfio_tph_st_set(const struct rte_pci_device *dev,
> + struct rte_tph_info *info, size_t count)
> +{
> + return pci_vfio_tph_st_op(dev, info, count, RTE_PCI_TPH_ST_SET);
> +}
> +
> int
> pci_vfio_is_enabled(void)
> {
> diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h
> index 38109844b9..d2ec370320 100644
> --- a/drivers/bus/pci/private.h
> +++ b/drivers/bus/pci/private.h
> @@ -335,4 +335,12 @@ rte_pci_dev_iterate(const void *start,
> int
> rte_pci_devargs_parse(struct rte_devargs *da);
>
> +/*
> + * TPH Steering-Tag operation types.
> + */
> +enum rte_pci_st_op {
> + RTE_PCI_TPH_ST_SET, /* Set TPH Steering - Tags */
> + RTE_PCI_TPH_ST_GET /* Get TPH Steering - Tags */
> +};
> +
> #endif /* _PCI_PRIVATE_H_ */
> diff --git a/drivers/bus/pci/rte_bus_pci.h
> b/drivers/bus/pci/rte_bus_pci.h
> index 19a7b15b99..e4d4780f54 100644
> --- a/drivers/bus/pci/rte_bus_pci.h
> +++ b/drivers/bus/pci/rte_bus_pci.h
> @@ -31,6 +31,7 @@ extern "C" {
> struct rte_pci_device;
> struct rte_pci_driver;
> struct rte_pci_ioport;
> +struct rte_tph_info;
>
> struct rte_devargs;
>
> @@ -312,6 +313,72 @@ void rte_pci_ioport_read(struct rte_pci_ioport *p,
> void rte_pci_ioport_write(struct rte_pci_ioport *p,
> const void *data, size_t len, off_t offset);
>
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Enable TLP Processing Hints (TPH) in the endpoint device.
> + *
> + * @param dev
> + * A pointer to a rte_pci_device structure describing the device
> + * to use.
> + * @param mode
> + * TPH mode the device must operate in.
The values of the "mode" parameter should be well defined in a DPDK public API.
I cannot find them.
The return value is missing from the documentation.
For all four functions here.
> + */
> +__rte_experimental
> +int rte_pci_tph_enable(struct rte_pci_device *dev, int mode);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Disable TLP Processing Hints (TPH) in the endpoint device.
> + *
> + * @param dev
> + * A pointer to a rte_pci_device structure describing the device
> + * to use.
> + */
> +__rte_experimental
> +int rte_pci_tph_disable(struct rte_pci_device *dev);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Get PCI Steering-Tags (STs) for a list of stashing targets.
> + *
> + * @param mode
> + * TPH mode the device must operate in.
> + * @param info
> + * An array of rte_tph_info objects, each describing the target
> + * cpu-id, cache-level, etc. Steering-tags for each target is
> + * eturned via info array.
> + * @param count
> + * The number of elements in the info array.
> + */
> +__rte_experimental
> +int rte_pci_tph_st_get(const struct rte_pci_device *dev,
> + struct rte_tph_info *info, size_t count);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Set PCI Steering-Tags (STs) for a list of stashing targets.
> + *
> + * @param mode
> + * TPH mode the device must operate in.
> + * @param info
> + * An array of rte_tph_info objects, each describing the target
> + * cpu-id, cache-level, etc. Steering-tags for each target is
> + * eturned via info array.
> + * @param count
> + * The number of elements in the info array.
> + */
> +__rte_experimental
> +int rte_pci_tph_st_set(const struct rte_pci_device *dev,
> + struct rte_tph_info *info, size_t count);
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/drivers/bus/pci/windows/pci.c
> b/drivers/bus/pci/windows/pci.c
> index e7e449306e..218e667a5a 100644
> --- a/drivers/bus/pci/windows/pci.c
> +++ b/drivers/bus/pci/windows/pci.c
> @@ -511,3 +511,46 @@ rte_pci_scan(void)
>
> return ret;
> }
> +
> +RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_pci_tph_enable, 25.07)
> +int
> +rte_pci_tph_enable(struct rte_pci_device *dev, int mode)
> +{
> + RTE_SET_USED(dev);
> + RTE_SET_USED(mode);
> + /* This feature is not yet implemented for windows */
> + return -1;
> +}
> +
> +RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_pci_tph_disable, 25.07)
> +int
> +rte_pci_tph_disable(struct rte_pci_device *dev)
> +{
> + RTE_SET_USED(dev);
> + /* This feature is not yet implemented for windows */
> + return -1;
> +}
> +
> +RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_pci_tph_st_get, 25.07)
> +int
> +rte_pci_tph_st_get(const struct rte_pci_device *dev,
> + struct rte_tph_info *info, size_t count)
> +{
> + RTE_SET_USED(dev);
> + RTE_SET_USED(info);
> + RTE_SET_USED(count);
> + /* This feature is not yet implemented for windows */
> + return -1;
> +}
> +
> +RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_pci_tph_st_set, 25.07)
> +int
> +rte_pci_tph_st_set(const struct rte_pci_device *dev,
> + struct rte_tph_info *info, size_t count)
> +{
> + RTE_SET_USED(dev);
> + RTE_SET_USED(info);
> + RTE_SET_USED(count);
> + /* This feature is not yet implemented for windows */
> + return -1;
> +}
> diff --git a/lib/pci/rte_pci.h b/lib/pci/rte_pci.h
> index 9a50a12142..da9cd666bf 100644
> --- a/lib/pci/rte_pci.h
> +++ b/lib/pci/rte_pci.h
> @@ -137,6 +137,21 @@ extern "C" {
> /* Process Address Space ID (RTE_PCI_EXT_CAP_ID_PASID) */
> #define RTE_PCI_PASID_CTRL 0x06 /* PASID control register
> */
>
> +/* TPH Requester */
> +#define RTE_PCI_TPH_CAP 4 /* capability register */
> +#define RTE_PCI_TPH_CAP_ST_NS 0x00000001 /* No ST Mode Supported
> */
> +#define RTE_PCI_TPH_CAP_ST_IV 0x00000002 /* Interrupt Vector Mode
> Supported */
> +#define RTE_PCI_TPH_CAP_ST_DS 0x00000004 /* Device Specific Mode
> Supported */
> +#define RTE_PCI_TPH_CAP_EXT_TPH 0x00000100 /* Ext TPH Requester
> Supported */
> +#define RTE_PCI_TPH_CAP_LOC_MASK 0x00000600 /* ST Table Location */
> +#define RTE_PCI_TPH_LOC_NONE 0x00000000 /* Not present */
> +#define RTE_PCI_TPH_LOC_CAP 0x00000200 /* In capability */
> +#define RTE_PCI_TPH_LOC_MSIX 0x00000400 /* In MSI-X */
> +#define RTE_PCI_TPH_CAP_ST_MASK 0x07FF0000 /* ST Table Size */
> +#define RTE_PCI_TPH_CAP_ST_SHIFT 16 /* ST Table Size shift */
> +#define RTE_PCI_TPH_BASE_SIZEOF 0xc /* Size with no ST table */
> +
> +
> /** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */
> #define PCI_PRI_FMT "%.4" PRIx32 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
> #define PCI_PRI_STR_SIZE sizeof("XXXXXXXX:XX:XX.X")
> --
> 2.43.0
More information about the dev
mailing list