[PATCH v7 7/7] usertools/mbuf: parse mbuf history dump
Thomas Monjalon
thomas at monjalon.net
Thu Oct 16 22:34:45 CEST 2025
From: Shani Peretz <shperetz at nvidia.com>
Add a Python script that parses the history dump of mbufs
generated by rte_mbuf_history_dump() and related functions,
and presents it in a human-readable format.
If an operation ID is repeated, such as in the case of a double free,
it will be highlighted in red and listed at the end of the file.
Signed-off-by: Shani Peretz <shperetz at nvidia.com>
Acked-by: Robin Jarry <rjarry at redhat.com>
Acked-by: Morten Brørup <mb at smartsharesystems.com>
---
doc/guides/howto/debug_troubleshoot.rst | 3 +-
doc/guides/prog_guide/mbuf_lib.rst | 2 +
doc/guides/rel_notes/release_25_11.rst | 1 +
lib/mbuf/rte_mbuf_history.h | 3 +
usertools/dpdk-mbuf-history-parser.py | 165 ++++++++++++++++++++++++
5 files changed, 173 insertions(+), 1 deletion(-)
create mode 100755 usertools/dpdk-mbuf-history-parser.py
diff --git a/doc/guides/howto/debug_troubleshoot.rst b/doc/guides/howto/debug_troubleshoot.rst
index 16feeb1e54..c19aada6f8 100644
--- a/doc/guides/howto/debug_troubleshoot.rst
+++ b/doc/guides/howto/debug_troubleshoot.rst
@@ -221,7 +221,8 @@ Memory objects close to NUMA :numref:`dtg_mempool`.
by defining the compilation flag ``RTE_MBUF_HISTORY_DEBUG``.
Then the libraries will mark the mbufs,
and more marks can be added in the application.
- Some dump functions must be used to collect the history.
+ Some dump functions must be used to collect the history,
+ and a script can parse it.
#. Lower performance between the pipeline processing stages can be
diff --git a/doc/guides/prog_guide/mbuf_lib.rst b/doc/guides/prog_guide/mbuf_lib.rst
index ae72eecc61..382bfbdca4 100644
--- a/doc/guides/prog_guide/mbuf_lib.rst
+++ b/doc/guides/prog_guide/mbuf_lib.rst
@@ -279,6 +279,8 @@ More marks can be added by the application
by calling functions like ``rte_mbuf_history_mark_bulk()``.
Then the history can be stored in a file
by calling functions like ``rte_mbuf_history_dump_all()``.
+The dump file will be easier to read after being processed
+by the script ``dpdk-mbuf-history-parser.py``.
Use Cases
diff --git a/doc/guides/rel_notes/release_25_11.rst b/doc/guides/rel_notes/release_25_11.rst
index 4d6a02179a..95e64d5d57 100644
--- a/doc/guides/rel_notes/release_25_11.rst
+++ b/doc/guides/rel_notes/release_25_11.rst
@@ -76,6 +76,7 @@ New Features
Added history dynamic field in mbuf (disabled by default)
to store successive states of the mbuf lifecycle.
Some functions were added to dump statistics.
+ A script was added to parse mbuf tracking stored in a file.
* **Updated NXP DPAA2 ethernet driver.**
diff --git a/lib/mbuf/rte_mbuf_history.h b/lib/mbuf/rte_mbuf_history.h
index 56a975a484..daf3cda34c 100644
--- a/lib/mbuf/rte_mbuf_history.h
+++ b/lib/mbuf/rte_mbuf_history.h
@@ -19,6 +19,9 @@
* The history is stored as an atomic value (64-bit) in a dynamic field of the mbuf,
* with each event encoded in 4 bits, allowing up to 16 events to be tracked.
* Atomic operations ensure thread safety for cloned mbufs accessed by multiple lcores.
+ *
+ * After dumping the history in a file,
+ * the script dpdk-mbuf-history-parser.py can be used for parsing.
*/
#include <rte_common.h>
diff --git a/usertools/dpdk-mbuf-history-parser.py b/usertools/dpdk-mbuf-history-parser.py
new file mode 100755
index 0000000000..273adbcfae
--- /dev/null
+++ b/usertools/dpdk-mbuf-history-parser.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2023 NVIDIA Corporation & Affiliates
+
+"""
+Parse the mbuf history dump generated by rte_mbuf_history_dump()
+and related functions, and present it in a human-readable format.
+"""
+
+import argparse
+import enum
+import os
+import re
+
+RED = "\033[91m"
+RESET = "\033[0m"
+ENUM_PATTERN = r"enum\s+rte_mbuf_history_op\s*{([^}]+)}"
+VALUE_PATTERN = r"([A-Z_]+)\s*=\s*(\d+),\s*(?:/\*\s*(.*?)\s*\*/)?"
+HEADER_FILE = os.path.join(
+ os.path.dirname(os.path.dirname(__file__)), "lib/mbuf/rte_mbuf_history.h"
+)
+
+
+def print_history_sequence(address: str, sequence: list[str]):
+ max_op_width = max(len(re.sub(r"\x1b\[[0-9;]*m", "", op)) for op in sequence)
+ op_width = max_op_width
+ for i in range(0, len(sequence), 4):
+ chunk = sequence[i : i + 4]
+ formatted_ops = [f"{op:<{op_width}}" for op in chunk]
+ line = ""
+ for j, op in enumerate(formatted_ops):
+ line += op
+ if j < len(formatted_ops) - 1:
+ line += " -> "
+ if i + 4 < len(sequence):
+ line += " ->"
+ print(f"mbuf {address}: " + line)
+ print()
+
+
+def match_field(match: re.Match) -> tuple[int, str]:
+ name, value, _ = match.groups()
+ return (int(value), name.replace("RTE_MBUF_", ""))
+
+
+class HistoryEnum:
+ def __init__(self, ops: enum.Enum):
+ self.ops = ops
+
+ @staticmethod
+ def from_header(header_file: str) -> "HistoryEnum":
+ with open(header_file, "r") as f:
+ content = f.read()
+
+ # Extract each enum value and its comment
+ enum_content = re.search(ENUM_PATTERN, content, re.DOTALL).group(1)
+ fields = map(match_field, re.finditer(VALUE_PATTERN, enum_content))
+ fields = dict({v: k for k, v in fields})
+ return HistoryEnum(enum.Enum("HistoryOps", fields))
+
+
+class HistoryLine:
+ def __init__(self, address: str, ops: list):
+ self.address = address
+ self.ops = ops
+
+ def repeats(self) -> [list[str], str | None]:
+ repeated = None
+ sequence = []
+ for idx, op in enumerate(self.ops):
+ if idx > 0 and op == self.ops[idx - 1] and op.name != "NEVER":
+ sequence[-1] = f"{RED}{op.name}{RESET}"
+ sequence.append(f"{RED}{op.name}{RESET}")
+ repeated = op.name
+ else:
+ sequence.append(op.name)
+ return sequence, repeated
+
+
+class HistoryMetrics:
+ def __init__(self, metrics: dict[str, int]):
+ self.metrics = metrics
+
+ def max_name_width(self) -> int:
+ return max(len(name) for name in self.metrics.keys())
+
+
+class HistoryParser:
+ def __init__(self):
+ self.history_enum = HistoryEnum.from_header(HEADER_FILE)
+
+ def parse(self, dump_file: str) -> tuple[list[HistoryLine], "HistoryMetrics"]:
+ with open(dump_file, "r") as f:
+ lines = [line for line in f.readlines() if line.strip()]
+ populated = next(line for line in lines if " populated =" in line)
+ metrics_start = lines.index(populated)
+
+ history_lines = lines[3:metrics_start]
+ metrics_lines = lines[metrics_start:]
+ return (self._parse_history(history_lines), self._parse_metrics(metrics_lines))
+
+ def _parse_metrics(self, lines: list[str]) -> HistoryMetrics:
+ metrics = {}
+ for line in lines:
+ key, value = line.split("=", 1)
+ metrics[key] = int(value)
+ return HistoryMetrics(metrics)
+
+ def _parse_history(self, lines: list[str]) -> list[HistoryLine]:
+ # Parse the format "mbuf 0x1054b9980: 0000000000000065"
+ history_lines = []
+ for line in lines:
+ address = line.split(":")[0].split("mbuf ")[1]
+ history = line.split(":")[1]
+ history_lines.append(
+ HistoryLine(address=address, ops=self._parse(int(history, 16)))
+ )
+ return history_lines
+
+ def _parse(self, history: int) -> list[str]:
+ ops = []
+ for _ in range(16): # 64 bits / 4 bits = 16 possible operations
+ op = history & 0xF # Extract lowest 4 bits
+ if op == 0:
+ break
+ ops.append(self.history_enum.ops(op))
+ history >>= 4
+
+ ops.reverse()
+ return ops
+
+
+def print_history_lines(history_lines: list[HistoryLine]):
+ lines = [(line.address, line.repeats()) for line in history_lines]
+
+ for address, (sequence, _) in lines:
+ print_history_sequence(address, sequence)
+
+ print("=== Violations ===")
+ for address, (sequence, repeated) in lines:
+ if repeated:
+ print(f"mbuf {address} has repeated ops: {RED}{repeated}{RESET}")
+
+
+def print_metrics(metrics: HistoryMetrics):
+ print("=== Metrics Summary ===")
+ for name, value in metrics.metrics.items():
+ print(f"{name + '':<{metrics.max_name_width() + 2}} {value}")
+
+
+def main():
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument('history_file')
+ args = parser.parse_args()
+
+ history_parser = HistoryParser()
+ history_lines, metrics = history_parser.parse(args.history_file)
+
+ print_history_lines(history_lines)
+ print()
+ print_metrics(metrics)
+
+
+if __name__ == "__main__":
+ main()
--
2.51.0
More information about the dev
mailing list