[PATCH v6] devtools: add Python-based patch style checker
Stephen Hemminger
stephen at networkplumber.org
Tue Mar 24 15:48:57 CET 2026
Add dpdk-checkpatch.py as a standalone alternative to checkpatches.sh.
Unlike the existing shell script wrapper around checkpatch.pl, this
requires no Linux kernel source tree or Perl installation and is
significantly faster (~0.4s vs ~2m23s on a recent patch series).
Supports the same usage patterns: patch files, mbox bundles, git commit
ranges (-r), last N commits (-n), and stdin. Implements common style
checks (spacing, line length, spelling via codespell) plus
DPDK-specific forbidden token and tag checks.
Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
Acked-by: Bruce Richardson <bruce.richardson at intel.com>
---
v6 - cleanup several false positives
devtools/dpdk-checkpatch.py | 1829 +++++++++++++++++++++++++++++++++++
1 file changed, 1829 insertions(+)
create mode 100755 devtools/dpdk-checkpatch.py
diff --git a/devtools/dpdk-checkpatch.py b/devtools/dpdk-checkpatch.py
new file mode 100755
index 0000000000..64111c4eb7
--- /dev/null
+++ b/devtools/dpdk-checkpatch.py
@@ -0,0 +1,1829 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2026 Stephen Hemminger
+#
+# dpdk-checkpatch.py - Check patches for common style issues
+#
+# This is a standalone Python replacement for the DPDK checkpatches.sh
+# script that previously wrapped the Linux kernel's checkpatch.pl.
+#
+# Usage examples:
+# # Check patch files
+# dpdk-checkpatch.py *.patch
+#
+# # Check patches before applying
+# dpdk-checkpatch.py *.patch && git am *.patch
+#
+# # Check commits since origin/main
+# dpdk-checkpatch.py
+#
+# # Quiet mode for scripting
+# if dpdk-checkpatch.py -q "$patch"; then
+# echo "Clean, applying..."
+# git am "$patch"
+# else
+# echo "Issues found, skipping"
+# fi
+#
+# # Verbose output with context
+# dpdk-checkpatch.py -v my-feature.patch
+
+import argparse
+import os
+import re
+import subprocess
+import sys
+import tempfile
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+VERSION = "1.0"
+
+# Default configuration
+DEFAULT_LINE_LENGTH = 100
+DEFAULT_CODESPELL_DICT = "/usr/share/codespell/dictionary.txt"
+
+
+ at dataclass
+class CheckResult:
+ """Result of a single check."""
+
+ level: str # ERROR, WARNING, CHECK
+ type_name: str
+ message: str
+ filename: str = ""
+ line_num: int = 0
+ line_content: str = ""
+
+
+ at dataclass
+class PatchInfo:
+ """Information extracted from a patch."""
+
+ subject: str = ""
+ author: str = ""
+ author_email: str = ""
+ signoffs: list = field(default_factory=list)
+ files: list = field(default_factory=list)
+ added_lines: dict = field(default_factory=dict) # filename -> [(line_num, content)]
+ context_before: dict = field(
+ default_factory=dict
+ ) # filename -> {line_num: context_line}
+ has_fixes_tag: bool = False
+ fixes_commits: list = field(default_factory=list)
+
+
+class CheckPatch:
+ """Main class for checking patches."""
+
+ def __init__(self, config: dict):
+ self.config = config
+ self.results: list[CheckResult] = []
+ self.errors = 0
+ self.warnings = 0
+ self.checks = 0
+ self.lines_checked = 0
+
+ # Load codespell dictionary if enabled
+ self.spelling_dict = {}
+ if config.get("codespell"):
+ self._load_codespell_dict()
+
+ # Forbidden token rules for DPDK
+ self.forbidden_rules = self._init_forbidden_rules()
+
+ def _load_codespell_dict(self) -> None:
+ """Load the codespell dictionary."""
+ dict_path = self.config.get("codespell_file")
+
+ if not dict_path:
+ # Search common locations for the dictionary
+ search_paths = [
+ DEFAULT_CODESPELL_DICT,
+ "/usr/local/lib/python3.12/dist-packages/codespell_lib/data/dictionary.txt",
+ "/usr/local/lib/python3.11/dist-packages/codespell_lib/data/dictionary.txt",
+ "/usr/local/lib/python3.10/dist-packages/codespell_lib/data/dictionary.txt",
+ "/usr/lib/python3/dist-packages/codespell_lib/data/dictionary.txt",
+ ]
+
+ # Also try to find it via codespell module
+ try:
+ import codespell_lib
+
+ module_path = os.path.join(
+ os.path.dirname(codespell_lib.__file__), "data", "dictionary.txt"
+ )
+ search_paths.insert(0, module_path)
+ except ImportError:
+ pass
+
+ for path in search_paths:
+ if os.path.exists(path):
+ dict_path = path
+ break
+
+ if not dict_path or not os.path.exists(dict_path):
+ return
+
+ try:
+ with open(dict_path, "r", encoding="utf-8", errors="ignore") as f:
+ for line in f:
+ line = line.strip()
+ if not line or line.startswith("#"):
+ continue
+ parts = line.split("->")
+ if len(parts) >= 2:
+ wrong = parts[0].strip().lower()
+ correct = parts[1].strip().split(",")[0].strip()
+ self.spelling_dict[wrong] = correct
+ except IOError:
+ pass
+
+ def _init_forbidden_rules(self) -> list:
+ """Initialize DPDK-specific forbidden token rules."""
+ return [
+ # Refrain from new calls to RTE_LOG in libraries
+ {
+ "folders": ["lib"],
+ "patterns": [r"RTE_LOG\("],
+ "message": "Prefer RTE_LOG_LINE",
+ },
+ # Refrain from new calls to RTE_LOG in drivers
+ {
+ "folders": ["drivers"],
+ "skip_files": [r".*osdep\.h$"],
+ "patterns": [r"RTE_LOG\(", r"RTE_LOG_DP\(", r"rte_log\("],
+ "message": "Prefer RTE_LOG_LINE/RTE_LOG_DP_LINE",
+ },
+ # No output on stdout or stderr
+ {
+ "folders": ["lib", "drivers"],
+ "patterns": [r"\bprintf\b", r"fprintf\(stdout,", r"fprintf\(stderr,"],
+ "message": "Writing to stdout or stderr",
+ },
+ # Refrain from rte_panic() and rte_exit()
+ {
+ "folders": ["lib", "drivers"],
+ "patterns": [r"rte_panic\(", r"rte_exit\("],
+ "message": "Using rte_panic/rte_exit",
+ },
+ # Don't call directly install_headers()
+ {
+ "folders": ["lib", "drivers"],
+ "patterns": [r"\binstall_headers\b"],
+ "message": "Using install_headers()",
+ },
+ # Refrain from using compiler attribute without common macro
+ {
+ "folders": ["lib", "drivers", "app", "examples"],
+ "skip_files": [r"lib/eal/include/rte_common\.h"],
+ "patterns": [r"__attribute__"],
+ "message": "Using compiler attribute directly",
+ },
+ # Check %l or %ll format specifier
+ {
+ "folders": ["lib", "drivers", "app", "examples"],
+ "patterns": [r"%ll*[xud]"],
+ "message": "Using %l format, prefer %PRI*64 if type is [u]int64_t",
+ },
+ # Refrain from 16/32/64 bits rte_atomicNN_xxx()
+ {
+ "folders": ["lib", "drivers", "app", "examples"],
+ "patterns": [r"rte_atomic[0-9][0-9]_.*\("],
+ "message": "Using rte_atomicNN_xxx",
+ },
+ # Refrain from rte_smp_[r/w]mb()
+ {
+ "folders": ["lib", "drivers", "app", "examples"],
+ "patterns": [r"rte_smp_(r|w)?mb\("],
+ "message": "Using rte_smp_[r/w]mb",
+ },
+ # Refrain from __sync_xxx builtins
+ {
+ "folders": ["lib", "drivers", "app", "examples"],
+ "patterns": [r"__sync_.*\("],
+ "message": "Using __sync_xxx builtins",
+ },
+ # Refrain from __rte_atomic_thread_fence()
+ {
+ "folders": ["lib", "drivers", "app", "examples"],
+ "patterns": [r"__rte_atomic_thread_fence\("],
+ "message": "Using __rte_atomic_thread_fence, prefer rte_atomic_thread_fence",
+ },
+ # Refrain from __atomic_xxx builtins
+ {
+ "folders": ["lib", "drivers", "app", "examples"],
+ "skip_files": [r"drivers/common/cnxk/"],
+ "patterns": [
+ r"__atomic_.*\(",
+ r"__ATOMIC_(RELAXED|CONSUME|ACQUIRE|RELEASE|ACQ_REL|SEQ_CST)",
+ ],
+ "message": "Using __atomic_xxx/__ATOMIC_XXX built-ins, prefer rte_atomic_xxx/rte_memory_order_xxx",
+ },
+ # Refrain from some pthread functions
+ {
+ "folders": ["lib", "drivers", "app", "examples"],
+ "patterns": [
+ r"pthread_(create|join|detach|set(_?name_np|affinity_np)|attr_set(inheritsched|schedpolicy))\("
+ ],
+ "message": "Using pthread functions, prefer rte_thread",
+ },
+ # Forbid use of __reserved
+ {
+ "folders": ["lib", "drivers", "app", "examples"],
+ "patterns": [r"\b__reserved\b"],
+ "message": "Using __reserved",
+ },
+ # Forbid use of __alignof__
+ {
+ "folders": ["lib", "drivers", "app", "examples"],
+ "patterns": [r"\b__alignof__\b"],
+ "message": "Using __alignof__, prefer C11 alignof",
+ },
+ # Forbid use of __typeof__
+ {
+ "folders": ["lib", "drivers", "app", "examples"],
+ "patterns": [r"\b__typeof__\b"],
+ "message": "Using __typeof__, prefer typeof",
+ },
+ # Forbid use of __builtin_*
+ {
+ "folders": ["lib", "drivers", "app", "examples"],
+ "skip_files": [
+ r"lib/eal/",
+ r"drivers/.*/base/",
+ r"drivers/.*osdep\.h$",
+ ],
+ "patterns": [r"\b__builtin_"],
+ "message": "Using __builtin helpers, prefer EAL macros",
+ },
+ # Forbid inclusion of linux/pci_regs.h
+ {
+ "folders": ["lib", "drivers", "app", "examples"],
+ "patterns": [r"include.*linux/pci_regs\.h"],
+ "message": "Using linux/pci_regs.h, prefer rte_pci.h",
+ },
+ # Forbid variadic argument pack extension in macros
+ {
+ "folders": ["lib", "drivers", "app", "examples"],
+ "patterns": [r"#\s*define.*[^(,\s]\.\.\.[\s]*\)"],
+ "message": "Do not use variadic argument pack in macros",
+ },
+ # Forbid __rte_packed_begin with enums
+ {
+ "folders": ["lib", "drivers", "app", "examples"],
+ "patterns": [r"enum.*__rte_packed_begin"],
+ "message": "Using __rte_packed_begin with enum is not allowed",
+ },
+ # Forbid use of #pragma
+ {
+ "folders": ["lib", "drivers", "app", "examples"],
+ "skip_files": [r"lib/eal/include/rte_common\.h"],
+ "patterns": [r"(#pragma|_Pragma)"],
+ "message": "Using compilers pragma is not allowed",
+ },
+ # Forbid experimental build flag except in examples
+ {
+ "folders": ["lib", "drivers", "app"],
+ "patterns": [r"-DALLOW_EXPERIMENTAL_API", r"allow_experimental_apis"],
+ "message": "Using experimental build flag for in-tree compilation",
+ },
+ # Refrain from using RTE_LOG_REGISTER for drivers and libs
+ {
+ "folders": ["lib", "drivers"],
+ "patterns": [r"\bRTE_LOG_REGISTER\b"],
+ "message": "Using RTE_LOG_REGISTER, prefer RTE_LOG_REGISTER_(DEFAULT|SUFFIX)",
+ },
+ # Forbid non-internal thread in drivers and libs
+ {
+ "folders": ["lib", "drivers"],
+ "patterns": [r"rte_thread_(set_name|create_control)\("],
+ "message": "Prefer rte_thread_(set_prefixed_name|create_internal_control)",
+ },
+ ]
+
+ def add_result(
+ self,
+ level: str,
+ type_name: str,
+ message: str,
+ filename: str = "",
+ line_num: int = 0,
+ line_content: str = "",
+ ) -> None:
+ """Add a check result."""
+ result = CheckResult(
+ level=level,
+ type_name=type_name,
+ message=message,
+ filename=filename,
+ line_num=line_num,
+ line_content=line_content,
+ )
+ self.results.append(result)
+
+ if level == "ERROR":
+ self.errors += 1
+ elif level == "WARNING":
+ self.warnings += 1
+ else:
+ self.checks += 1
+
+ def parse_patch(self, content: str) -> PatchInfo:
+ """Parse a patch and extract information."""
+ info = PatchInfo()
+ current_file = ""
+ in_diff = False
+ line_num_in_new = 0
+
+ lines = content.split("\n")
+ for i, line in enumerate(lines):
+ # Extract subject
+ if line.startswith("Subject:"):
+ subject = line[8:].strip()
+ # Handle multi-line subjects
+ j = i + 1
+ while j < len(lines) and lines[j].startswith(" "):
+ subject += " " + lines[j].strip()
+ j += 1
+ info.subject = subject
+
+ # Extract author
+ if line.startswith("From:"):
+ info.author = line[5:].strip()
+ match = re.search(r"<([^>]+)>", info.author)
+ if match:
+ info.author_email = match.group(1)
+
+ # Extract Signed-off-by
+ match = re.match(r"^Signed-off-by:\s*(.+)$", line, re.IGNORECASE)
+ if match:
+ info.signoffs.append(match.group(1).strip())
+
+ # Extract Fixes tag
+ match = re.match(r"^Fixes:\s*([0-9a-fA-F]+)", line)
+ if match:
+ info.has_fixes_tag = True
+ info.fixes_commits.append(match.group(1))
+
+ # Track files in diff
+ if line.startswith("diff --git"):
+ match = re.match(r"diff --git a/(\S+) b/(\S+)", line)
+ if match:
+ current_file = match.group(2)
+ if current_file not in info.files:
+ info.files.append(current_file)
+ info.added_lines[current_file] = []
+ in_diff = True
+
+ # Track hunks
+ if line.startswith("@@"):
+ match = re.match(r"@@ -\d+(?:,\d+)? \+(\d+)", line)
+ if match:
+ line_num_in_new = int(match.group(1))
+ continue
+
+ # Track added lines
+ if in_diff and current_file:
+ if line.startswith("+") and not line.startswith("+++"):
+ info.added_lines[current_file].append((line_num_in_new, line[1:]))
+ line_num_in_new += 1
+ elif line.startswith("-"):
+ pass # Deleted line, don't increment
+ elif not line.startswith("\\"):
+ # Context line - store it for reference by line number
+ if current_file not in info.context_before:
+ info.context_before[current_file] = {}
+ info.context_before[current_file][line_num_in_new] = (
+ line[1:] if line.startswith(" ") else line
+ )
+ line_num_in_new += 1
+
+ return info
+
+ def check_line_length(self, patch_info: PatchInfo) -> None:
+ """Check for lines exceeding maximum length."""
+ max_len = self.config.get("max_line_length", DEFAULT_LINE_LENGTH)
+
+ for filename, lines in patch_info.added_lines.items():
+ # Skip documentation files - they have tables and other content
+ # where long lines are acceptable
+ if filename.endswith((".rst", ".md", ".txt")) or "/doc/" in filename:
+ continue
+
+ for line_num, content in lines:
+ # Skip strings that span multiple lines
+ if len(content) > max_len:
+ # Don't warn about long strings or URLs
+ if '"' in content and content.count('"') >= 2:
+ continue
+ if "http://" in content or "https://" in content:
+ continue
+ # Check if it's a comment line
+ if (
+ content.strip().startswith("/*")
+ or content.strip().startswith("*")
+ or content.strip().startswith("//")
+ ):
+ self.add_result(
+ "WARNING",
+ "LONG_LINE_COMMENT",
+ f"line length of {len(content)} exceeds {max_len} columns",
+ filename,
+ line_num,
+ content,
+ )
+ else:
+ self.add_result(
+ "WARNING",
+ "LONG_LINE",
+ f"line length of {len(content)} exceeds {max_len} columns",
+ filename,
+ line_num,
+ content,
+ )
+
+ def check_trailing_whitespace(self, patch_info: PatchInfo) -> None:
+ """Check for trailing whitespace."""
+ for filename, lines in patch_info.added_lines.items():
+ for line_num, content in lines:
+ if content != content.rstrip():
+ self.add_result(
+ "WARNING",
+ "TRAILING_WHITESPACE",
+ "trailing whitespace",
+ filename,
+ line_num,
+ content,
+ )
+
+ def check_tabs_spaces(self, patch_info: PatchInfo) -> None:
+ """Check for space before tab and mixed indentation."""
+ for filename, lines in patch_info.added_lines.items():
+ for line_num, content in lines:
+ if " \t" in content:
+ self.add_result(
+ "WARNING",
+ "SPACE_BEFORE_TAB",
+ "space before tab in indent",
+ filename,
+ line_num,
+ content,
+ )
+
+ def check_signoff(self, patch_info: PatchInfo) -> None:
+ """Check for Signed-off-by line."""
+ if not patch_info.signoffs:
+ self.add_result(
+ "ERROR", "MISSING_SIGN_OFF", "Missing Signed-off-by: line(s)"
+ )
+
+ def check_coding_style(self, patch_info: PatchInfo) -> None:
+ """Check various coding style issues."""
+ for filename, lines in patch_info.added_lines.items():
+ # Skip non-C files for most checks
+ is_c_file = filename.endswith((".c", ".h"))
+ is_c_source = filename.endswith(".c")
+ is_header = filename.endswith(".h")
+
+ prev_line = ""
+ indent_stack = []
+ context_before = patch_info.context_before.get(filename, {})
+ for line_num, content in lines:
+ self.lines_checked += 1
+
+ # Check if the line immediately before this one (which may be
+ # a context line from the patch) ended with backslash continuation
+ prev_context = context_before.get(line_num - 1, "")
+ in_macro_continuation = prev_context.rstrip().endswith("\\")
+
+ if is_c_file:
+ # Check for extern function declarations in .c files
+ # Only flag functions (have parentheses), not data
+ if is_c_source and re.match(r"^\s*extern\b", content):
+ if re.search(r"\(", content):
+ self.add_result(
+ "WARNING",
+ "AVOID_EXTERNS",
+ "extern is not needed for function declarations",
+ filename,
+ line_num,
+ content,
+ )
+
+ # Check for unnecessary break after goto/return/continue
+ # Only flag if the previous statement is unconditional (not inside an if)
+ if re.match(r"^\s*break\s*;", content):
+ # Check if previous line is an unconditional return/goto/continue
+ # It's unconditional if it starts at the same or lower indentation as break
+ # or if it's a plain return/goto not inside an if block
+ prev_stripped = prev_line.strip() if prev_line else ""
+ if re.match(
+ r"^(goto\s+\w+|return\b|continue)\s*[^;]*;\s*$",
+ prev_stripped,
+ ):
+ # Check indentation - if prev line has same or less indentation, it's unconditional
+ break_indent = len(content) - len(content.lstrip())
+ prev_indent = (
+ len(prev_line) - len(prev_line.lstrip())
+ if prev_line
+ else 0
+ )
+ # Only flag if the return/goto is at the same indentation level
+ # (meaning it's not inside a nested if block)
+ if prev_indent <= break_indent:
+ self.add_result(
+ "WARNING",
+ "UNNECESSARY_BREAK",
+ "break is not useful after a goto or return",
+ filename,
+ line_num,
+ content,
+ )
+
+ # STRNCPY: should use strlcpy
+ if re.search(r"\bstrncpy\s*\(", content):
+ self.add_result(
+ "WARNING",
+ "STRNCPY",
+ "Prefer strlcpy over strncpy - see: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/",
+ filename,
+ line_num,
+ content,
+ )
+
+ # STRCPY: unsafe string copy
+ if re.search(r"\bstrcpy\s*\(", content):
+ self.add_result(
+ "ERROR",
+ "STRCPY",
+ "strcpy is unsafe - use strlcpy or snprintf",
+ filename,
+ line_num,
+ content,
+ )
+
+ # Check for complex macros without proper enclosure
+ # Note: Compound literal macros like (type[]){...} are valid C99
+ # and commonly used in DPDK, so we don't flag those.
+ # Only flag macros with multiple statements without do-while wrapping.
+ if re.match(r"^\s*#\s*define\s+\w+\s*\([^)]*\)\s+\{", content):
+ # Macro body starts with { but is not a compound literal
+ # Check if it's missing do { } while(0)
+ if not re.search(r"\bdo\s*\{", content):
+ self.add_result(
+ "ERROR",
+ "COMPLEX_MACRO",
+ "Macros with complex values should be enclosed in parentheses or do { } while(0)",
+ filename,
+ line_num,
+ content,
+ )
+
+ # SPACING: missing space before ( in control statements
+ if re.search(r"\b(if|while|for|switch)\(", content):
+ self.add_result(
+ "WARNING",
+ "SPACING",
+ "space required before the open parenthesis '('",
+ filename,
+ line_num,
+ content,
+ )
+
+ # SPACING: space prohibited after open square bracket
+ # Remove string contents first to avoid false positives
+ code_only = re.sub(r'"[^"]*"', '""', content)
+ if re.search(r"\[\s+[^\]]", code_only) and not re.search(
+ r"\[\s*\]", code_only
+ ):
+ self.add_result(
+ "WARNING",
+ "SPACING",
+ "space prohibited after that open square bracket '['",
+ filename,
+ line_num,
+ content,
+ )
+
+ # SPACING: space prohibited before close square bracket
+ if re.search(r"[^\[]\s+\]", code_only):
+ self.add_result(
+ "WARNING",
+ "SPACING",
+ "space prohibited before that close square bracket ']'",
+ filename,
+ line_num,
+ content,
+ )
+
+ # RETURN_PARENTHESES: return with parentheses wrapping the
+ # entire expression, e.g. "return (x + y);"
+ # Only flag when the outermost parens span the whole value,
+ # so "return (a) | (b);" is NOT flagged (parens are for
+ # sub-expression grouping, not wrapping the return value).
+ if re.search(r"\breturn\s*\([^;]+\)\s*;", content):
+ m = re.search(r"\breturn\s+(.*?)\s*;", content)
+ if m:
+ expr = m.group(1).strip()
+ if expr.startswith("(") and expr.endswith(")"):
+ # Walk to find the matching close paren for the first '('
+ depth = 0
+ match_pos = -1
+ for ci, ch in enumerate(expr):
+ if ch == "(":
+ depth += 1
+ elif ch == ")":
+ depth -= 1
+ if depth == 0:
+ match_pos = ci
+ break
+ # Only flag if the matching ')' is the last character,
+ # meaning the outer parens wrap the entire expression
+ if match_pos == len(expr) - 1:
+ inner = expr[1:-1].strip()
+ # Exclude casts: (type)expr and function calls
+ is_cast = re.match(
+ r"^[a-zA-Z_][\w\s\*]*\b\s*[\w(]", inner
+ )
+ is_func = re.search(r"\w\s*\(", inner)
+ if not is_cast and not is_func:
+ self.add_result(
+ "WARNING",
+ "RETURN_PARENTHESES",
+ "return is not a function, parentheses are not required",
+ filename,
+ line_num,
+ content,
+ )
+
+ # BRACES: single statement blocks that need braces
+ # Check for if/else/while/for without braces on multiline
+ if re.match(r"^\s*(if|else\s+if|while|for)\s*\([^{]*$", content):
+ # Control statement without opening brace - check next line
+ pass # Would need lookahead
+
+ # INITIALISED_STATIC: static initialized to 0/NULL
+ if re.match(
+ r"^\s*static\s+.*=\s*(0|NULL|0L|0UL|0ULL|0LL)\s*;", content
+ ):
+ self.add_result(
+ "WARNING",
+ "INITIALISED_STATIC",
+ "do not initialise statics to 0 or NULL",
+ filename,
+ line_num,
+ content,
+ )
+
+ # GLOBAL_INITIALISERS: global initialized to 0/NULL
+ if re.match(
+ r"^[a-zA-Z_][a-zA-Z0-9_\s\*]*=\s*(0|NULL|0L|0UL|0ULL|0LL)\s*;",
+ content,
+ ):
+ if not re.match(r"^\s*static\s+", content):
+ self.add_result(
+ "WARNING",
+ "GLOBAL_INITIALISERS",
+ "do not initialise globals to 0 or NULL",
+ filename,
+ line_num,
+ content,
+ )
+
+ # Note: DEEP_INDENTATION check removed - without full brace
+ # nesting tracking (as in checkpatch.pl), tab counting produces
+ # too many false positives in legitimate code like switch/case
+ # blocks and nested loops in driver transmit paths.
+
+ # TRAILING_STATEMENTS: code on same line as } OR control statement
+ # But allow struct/union member declarations: } name; or } name; /* comment */
+ # Skip macro definitions - they often have intentional one-line constructs
+ is_macro_line = content.rstrip().endswith("\\") or re.match(
+ r"^\s*#\s*define", content
+ )
+ if not is_macro_line:
+ if re.search(r"\}\s*[a-zA-Z_]", content) and not re.search(
+ r"\}\s*(else|while)\b", content
+ ):
+ # Check if this is a struct/union member declaration or
+ # named initializer list (e.g. } errata_vals[] = {)
+ # Pattern: } identifier; or } identifier[]; or } identifier[] = {
+ if not re.search(
+ r"\}\s*\w+\s*(\[\d*\])?\s*;\s*(/\*.*\*/|//.*)?\s*$",
+ content,
+ ) and not re.search(
+ r"\}\s*\w+\s*(\[\d*\])?\s*=\s*\{", content
+ ):
+ self.add_result(
+ "ERROR",
+ "TRAILING_STATEMENTS",
+ "trailing statements should be on next line",
+ filename,
+ line_num,
+ content,
+ )
+ # Also check for if/while with statement on same line (not opening brace)
+ # Pattern: if (cond) statement; or while (cond) statement;
+ # Note: 'for' is excluded because its header contains semicolons
+ # and nested parentheses (e.g. sizeof()) break simple regex matching
+ if re.search(
+ r"\b(if|while)\s*\([^)]+\)\s+(?![\s{])[^;]*;", content
+ ):
+ self.add_result(
+ "ERROR",
+ "TRAILING_STATEMENTS",
+ "trailing statements should be on next line",
+ filename,
+ line_num,
+ content,
+ )
+
+ # CONSTANT_COMPARISON: Yoda conditions (constant on left)
+ # Check for constants on left side of any comparison operator
+ # Strip comments and strings to avoid false positives
+ cmp_content = content.strip()
+ cmp_content = re.sub(r"/\*.*?\*/", "", cmp_content) # inline /* */
+ cmp_content = re.sub(r"//.*$", "", cmp_content) # trailing //
+ cmp_content = re.sub(r'"[^"]*"', '""', cmp_content) # strings
+ if cmp_content and not cmp_content.startswith("*"):
+ if re.search(
+ r"\b(NULL|true|false)\s*[!=<>]=?\s*[&*\w]", cmp_content
+ ) or re.search(r"[\s(]\s*0\s*[!=<>]=?\s*[&*\w]", cmp_content):
+ # Exclude static_assert - operand order doesn't matter
+ if not re.match(r"^\s*static_assert\s*\(", cmp_content):
+ self.add_result(
+ "WARNING",
+ "CONSTANT_COMPARISON",
+ "Comparisons should place the constant on the right side",
+ filename,
+ line_num,
+ content,
+ )
+
+ # BRACES: single statement block should not have braces (or vice versa)
+ # Check for if/else/while/for with single statement in braces
+ if re.match(r"^\s*(if|while|for)\s*\([^)]+\)\s*\{\s*$", prev_line):
+ if re.match(r"^\s*\w.*;\s*$", content) and not re.search(
+ r"^\s*(if|else|while|for|switch|case|default|return\s*;)",
+ content,
+ ):
+ # Check if next line is just closing brace - would need lookahead
+ pass
+
+ # ONE_SEMICOLON: double semicolon
+ if re.search(r";;", content) and not re.search(
+ r"for\s*\([^)]*;;", content
+ ):
+ self.add_result(
+ "WARNING",
+ "ONE_SEMICOLON",
+ "Statements terminations use 1 semicolon",
+ filename,
+ line_num,
+ content,
+ )
+
+ # CODE_INDENT/LEADING_SPACE: spaces used for indentation instead of tabs
+ if re.match(
+ r"^ +[^\s]", content
+ ) and not content.strip().startswith("*"):
+ # Line starts with spaces (not tabs) - but allow for alignment in comments
+ self.add_result(
+ "WARNING",
+ "CODE_INDENT",
+ "code indent should use tabs where possible",
+ filename,
+ line_num,
+ content,
+ )
+
+ # LEADING_SPACE: spaces at start of line (more general)
+ if re.match(r"^ +\t", content):
+ self.add_result(
+ "WARNING",
+ "LEADING_SPACE",
+ "please, no spaces at the start of a line",
+ filename,
+ line_num,
+ content,
+ )
+
+ # LINE_CONTINUATIONS: backslash continuation outside macros
+ # Check if this line has a backslash continuation
+ if content.rstrip().endswith("\\"):
+ # Only flag if not inside a macro definition
+ # A macro context means either:
+ # - This line starts a #define
+ # - The previous line (added or context) was a continuation
+ # - This line is a preprocessor directive
+ is_in_macro = (
+ re.match(r"^\s*#", content)
+ or (prev_line and prev_line.rstrip().endswith("\\"))
+ or in_macro_continuation
+ )
+ if not is_in_macro:
+ self.add_result(
+ "WARNING",
+ "LINE_CONTINUATIONS",
+ "Avoid unnecessary line continuations",
+ filename,
+ line_num,
+ content,
+ )
+
+ # OPEN_ENDED_LINE: lines should not end with '('
+ # This suggests arguments should start on the same line
+ # as the function name rather than wrapping immediately
+ # Skip macro continuation lines and preprocessor directives
+ stripped_end = content.rstrip()
+ if not is_macro_line and stripped_end.endswith("("):
+ # Don't flag control flow statements - their parens
+ # contain conditions, not argument lists
+ if not re.search(
+ r"\b(if|while|for|switch)\s*\($", stripped_end
+ ):
+ self.add_result(
+ "CHECK",
+ "OPEN_ENDED_LINE",
+ "Lines should not end with a '('",
+ filename,
+ line_num,
+ content,
+ )
+
+ # FUNCTION_WITHOUT_ARGS: empty parens instead of (void)
+ if is_header and re.search(r"\b\w+\s*\(\s*\)\s*;", content):
+ if not re.search(
+ r"\b(while|if|for|switch|return)\s*\(\s*\)", content
+ ):
+ self.add_result(
+ "ERROR",
+ "FUNCTION_WITHOUT_ARGS",
+ "Bad function definition - use (void) instead of ()",
+ filename,
+ line_num,
+ content,
+ )
+
+ # INLINE_LOCATION: inline should come after storage class
+ if re.match(r"^\s*inline\s+(static|extern)", content):
+ self.add_result(
+ "ERROR",
+ "INLINE_LOCATION",
+ "inline keyword should sit between storage class and type",
+ filename,
+ line_num,
+ content,
+ )
+
+ # STATIC_CONST: const should come after static
+ if re.match(r"^\s*const\s+static\b", content):
+ self.add_result(
+ "WARNING",
+ "STATIC_CONST",
+ "Move const after static - use 'static const'",
+ filename,
+ line_num,
+ content,
+ )
+ self.add_result(
+ "WARNING",
+ "STORAGE_CLASS",
+ "storage class should be at the beginning of the declaration",
+ filename,
+ line_num,
+ content,
+ )
+
+ # CONST_CONST: const used twice
+ if re.search(r"\bconst\s+\w+\s+const\b", content):
+ self.add_result(
+ "WARNING",
+ "CONST_CONST",
+ "const used twice - remove duplicate const",
+ filename,
+ line_num,
+ content,
+ )
+
+ # SELF_ASSIGNMENT: x = x (simple variable, not struct members)
+ # Match only simple identifiers, not struct/pointer member access
+ match = re.search(r"^\s*(\w+)\s*=\s*(\w+)\s*;", content)
+ if match and match.group(1) == match.group(2):
+ self.add_result(
+ "WARNING",
+ "SELF_ASSIGNMENT",
+ "Do not use self-assignments to avoid compiler warnings",
+ filename,
+ line_num,
+ content,
+ )
+
+ # PREFER_DEFINED_ATTRIBUTE_MACRO: prefer DPDK/kernel macros over __attribute__
+ attr_macros = {
+ "cold": "__rte_cold",
+ "hot": "__rte_hot",
+ "noinline": "__rte_noinline",
+ "always_inline": "__rte_always_inline",
+ "unused": "__rte_unused",
+ "packed": "__rte_packed",
+ "aligned": "__rte_aligned",
+ "weak": "__rte_weak",
+ "pure": "__rte_pure",
+ }
+ for attr, replacement in attr_macros.items():
+ if re.search(rf"__attribute__\s*\(\s*\(\s*{attr}\b", content):
+ self.add_result(
+ "WARNING",
+ "PREFER_DEFINED_ATTRIBUTE_MACRO",
+ f"Prefer {replacement} over __attribute__(({attr}))",
+ filename,
+ line_num,
+ content,
+ )
+
+ # POINTER_LOCATION: char* instead of char *
+ if re.search(
+ r"\b(char|int|void|short|long|float|double|unsigned|signed)\*\s+\w",
+ content,
+ ):
+ self.add_result(
+ "ERROR",
+ "POINTER_LOCATION",
+ '"foo* bar" should be "foo *bar"',
+ filename,
+ line_num,
+ content,
+ )
+
+ # MACRO_WITH_FLOW_CONTROL: macros with return/goto/break
+ if re.match(
+ r"^\s*#\s*define\s+\w+.*\b(return|goto|break|continue)\b",
+ content,
+ ):
+ self.add_result(
+ "WARNING",
+ "MACRO_WITH_FLOW_CONTROL",
+ "Macros with flow control statements should be avoided",
+ filename,
+ line_num,
+ content,
+ )
+
+ # MULTISTATEMENT_MACRO_USE_DO_WHILE: macros with multiple statements
+ if re.match(
+ r"^\s*#\s*define\s+\w+\([^)]*\)\s+.*;\s*[^\\]", content
+ ):
+ if not re.search(r"do\s*\{", content):
+ self.add_result(
+ "WARNING",
+ "MULTISTATEMENT_MACRO_USE_DO_WHILE",
+ "Macros with multiple statements should use do {} while(0)",
+ filename,
+ line_num,
+ content,
+ )
+
+ # MULTISTATEMENT_MACRO_USE_DO_WHILE: macros starting with if
+ if re.match(r"^\s*#\s*define\s+\w+\([^)]*\)\s+if\s*\(", content):
+ self.add_result(
+ "ERROR",
+ "MULTISTATEMENT_MACRO_USE_DO_WHILE",
+ "Macros starting with if should be enclosed by a do - while loop",
+ filename,
+ line_num,
+ content,
+ )
+
+ # Multiple statements on one line (skip comments and strings)
+ stripped_content = content.strip()
+ if re.search(r";\s*[a-zA-Z_]", content) and "for" not in content:
+ # Skip if line is a comment
+ if not (
+ stripped_content.startswith("/*")
+ or stripped_content.startswith("*")
+ or stripped_content.startswith("//")
+ ):
+ # Skip if the semicolon is inside a string or comment
+ # Remove strings and comments before checking
+ code_only = re.sub(
+ r'"[^"]*"', '""', content
+ ) # Remove string contents
+ code_only = re.sub(
+ r"/\*.*?\*/", "", code_only
+ ) # Remove /* */ comments
+ code_only = re.sub(
+ r"//.*$", "", code_only
+ ) # Remove // comments
+ if re.search(r";\s*[a-zA-Z_]", code_only):
+ self.add_result(
+ "CHECK",
+ "MULTIPLE_STATEMENTS",
+ "multiple statements on one line",
+ filename,
+ line_num,
+ content,
+ )
+
+ # Check for C99 comments in headers that should use C89
+ if is_header and "//" in content:
+ # Only flag if not in a string
+ stripped = re.sub(r'"[^"]*"', "", content)
+ if "//" in stripped:
+ self.add_result(
+ "CHECK",
+ "C99_COMMENTS",
+ "C99 // comments are acceptable but /* */ is preferred in headers",
+ filename,
+ line_num,
+ content,
+ )
+
+ # BLOCK_COMMENT_STYLE: block comments style issues
+ # Leading /* on its own line (but allow Doxygen /** style)
+ if re.match(r"^\s*/\*\*+\s*$", content):
+ # Allow /** (Doxygen) but not /*** or more
+ if not re.match(r"^\s*/\*\*\s*$", content):
+ self.add_result(
+ "WARNING",
+ "BLOCK_COMMENT_STYLE",
+ "Block comments should not use a leading /* on a line by itself",
+ filename,
+ line_num,
+ content,
+ )
+ # Trailing */ on separate line after block comment
+ if re.match(
+ r"^\s*\*+/\s*$", content
+ ) and prev_line.strip().startswith("*"):
+ pass # This is actually acceptable
+ # Block with trailing */ but content before it (like === */)
+ if re.search(r"\S\s*=+\s*\*/\s*$", content):
+ self.add_result(
+ "WARNING",
+ "BLOCK_COMMENT_STYLE",
+ "Block comments use a trailing */ on a separate line",
+ filename,
+ line_num,
+ content,
+ )
+
+ # REPEATED_WORD: check for repeated words (case-sensitive to
+ # avoid false positives like "--format FORMAT" in help text).
+ # Also skip "struct foo foo;" / "union foo foo;" where the
+ # type name and variable name are legitimately identical.
+ words = re.findall(r"\b(\w+)\s+\1\b", content)
+ for word in words:
+ word_lower = word.lower()
+ # Skip common valid repeated patterns
+ if word_lower in ("that", "had", "long", "int", "short"):
+ continue
+ # Skip struct/union type-name used as variable name:
+ # struct foo foo; or } foo foo;
+ if re.search(
+ r"\b(struct|union)\s+" + re.escape(word) + r"\s+" + re.escape(word) + r"\b",
+ content,
+ ):
+ continue
+ self.add_result(
+ "WARNING",
+ "REPEATED_WORD",
+ f"Possible repeated word: '{word}'",
+ filename,
+ line_num,
+ content,
+ )
+
+ # STRING_FRAGMENTS: unnecessary string concatenation like "foo" "bar"
+ # Must have closing quote, whitespace, opening quote pattern
+ if re.search(r'"\s*"\s*[^)]', content) and not re.search(
+ r"#\s*define", content
+ ):
+ # Verify it's actually two separate strings being concatenated
+ # by checking for the pattern: "..." "..."
+ if re.search(r'"[^"]*"\s+"[^"]*"', content):
+ self.add_result(
+ "CHECK",
+ "STRING_FRAGMENTS",
+ "Consecutive strings are generally better as a single string",
+ filename,
+ line_num,
+ content,
+ )
+
+ prev_line = content
+
+ def check_spelling(self, patch_info: PatchInfo) -> None:
+ """Check for spelling errors using codespell dictionary."""
+ for filename, lines in patch_info.added_lines.items():
+ for line_num, content in lines:
+ # REPEATED_WORD check for non-C files (C files handled in check_coding_style)
+ if not filename.endswith((".c", ".h")):
+ words = re.findall(r"\b(\w+)\s+\1\b", content)
+ for word in words:
+ word_lower = word.lower()
+ if word_lower in ("that", "had", "long", "int", "short"):
+ continue
+ if re.search(
+ r"\b(struct|union)\s+" + re.escape(word) + r"\s+" + re.escape(word) + r"\b",
+ content,
+ ):
+ continue
+ self.add_result(
+ "WARNING",
+ "REPEATED_WORD",
+ f"Possible repeated word: '{word}'",
+ filename,
+ line_num,
+ content,
+ )
+
+ # Spelling check
+ if self.spelling_dict:
+ # Common abbreviations that should not be flagged as typos
+ abbreviations = {
+ "nd",
+ "ns",
+ "na",
+ "ra",
+ "rs", # IPv6 Neighbor Discovery
+ "tx",
+ "rx",
+ "id",
+ "io",
+ "ip", # Common networking
+ "tcp",
+ "udp",
+ "arp",
+ "dns", # Protocols
+ "hw",
+ "sw",
+ "fw", # Hardware/Software/Firmware
+ "src",
+ "dst",
+ "ptr",
+ "buf", # Common code abbreviations
+ "cfg",
+ "ctx",
+ "idx",
+ "cnt", # Config/Context/Index/Count
+ "len",
+ "num",
+ "max",
+ "min", # Length/Number/Max/Min
+ "prev",
+ "next",
+ "curr", # Previous/Next/Current
+ "init",
+ "fini",
+ "deinit", # Initialize/Finish
+ "alloc",
+ "dealloc",
+ "realloc", # Memory
+ "endcode", # Doxygen tag
+ }
+ # Extract words, but skip contractions (don't, couldn't, etc.)
+ # by removing them before word extraction
+ spell_content = re.sub(r"[a-zA-Z]+n't\b", "", content)
+ spell_content = re.sub(r"[a-zA-Z]+'[a-zA-Z]+", "", spell_content)
+ words = re.findall(r"\b[a-zA-Z]+\b", spell_content)
+ for word in words:
+ lower_word = word.lower()
+ if (
+ lower_word in self.spelling_dict
+ and lower_word not in abbreviations
+ ):
+ self.add_result(
+ "WARNING",
+ "TYPO_SPELLING",
+ f"'{word}' may be misspelled - perhaps '{self.spelling_dict[lower_word]}'?",
+ filename,
+ line_num,
+ content,
+ )
+
+ def check_forbidden_tokens(self, patch_info: PatchInfo) -> None:
+ """Check for DPDK-specific forbidden tokens."""
+ for filename, lines in patch_info.added_lines.items():
+ for rule in self.forbidden_rules:
+ # Check if file is in one of the target folders
+ in_folder = False
+ for folder in rule["folders"]:
+ if filename.startswith(folder + "/") or filename.startswith(
+ "b/" + folder + "/"
+ ):
+ in_folder = True
+ break
+
+ if not in_folder:
+ continue
+
+ # Check if file should be skipped
+ skip = False
+ for skip_pattern in rule.get("skip_files", []):
+ if re.search(skip_pattern, filename):
+ skip = True
+ break
+
+ if skip:
+ continue
+
+ # Check each line for forbidden patterns
+ for line_num, content in lines:
+ for pattern in rule["patterns"]:
+ if re.search(pattern, content):
+ self.add_result(
+ "WARNING",
+ "FORBIDDEN_TOKEN",
+ rule["message"],
+ filename,
+ line_num,
+ content,
+ )
+ break
+
+ def check_experimental_tags(self, patch_info: PatchInfo) -> None:
+ """Check __rte_experimental tag placement."""
+ for filename, lines in patch_info.added_lines.items():
+ for line_num, content in lines:
+ if "__rte_experimental" in content:
+ # Should only be in headers
+ if filename.endswith(".c"):
+ self.add_result(
+ "WARNING",
+ "EXPERIMENTAL_TAG",
+ f"Please only put __rte_experimental tags in headers ({filename})",
+ filename,
+ line_num,
+ content,
+ )
+ # Should appear alone on the line
+ stripped = content.strip()
+ if stripped != "__rte_experimental":
+ self.add_result(
+ "WARNING",
+ "EXPERIMENTAL_TAG",
+ "__rte_experimental must appear alone on the line immediately preceding the return type of a function",
+ filename,
+ line_num,
+ content,
+ )
+
+ def check_internal_tags(self, patch_info: PatchInfo) -> None:
+ """Check __rte_internal tag placement."""
+ for filename, lines in patch_info.added_lines.items():
+ for line_num, content in lines:
+ if "__rte_internal" in content:
+ # Should only be in headers
+ if filename.endswith(".c"):
+ self.add_result(
+ "WARNING",
+ "INTERNAL_TAG",
+ f"Please only put __rte_internal tags in headers ({filename})",
+ filename,
+ line_num,
+ content,
+ )
+ # Should appear alone on the line
+ stripped = content.strip()
+ if stripped != "__rte_internal":
+ self.add_result(
+ "WARNING",
+ "INTERNAL_TAG",
+ "__rte_internal must appear alone on the line immediately preceding the return type of a function",
+ filename,
+ line_num,
+ content,
+ )
+
+ def check_aligned_attributes(self, patch_info: PatchInfo) -> None:
+ """Check alignment attribute usage."""
+ align_tokens = [
+ "__rte_aligned",
+ "__rte_cache_aligned",
+ "__rte_cache_min_aligned",
+ ]
+
+ for filename, lines in patch_info.added_lines.items():
+ for line_num, content in lines:
+ for token in align_tokens:
+ if re.search(rf"\b{token}\b", content):
+ # Should only be used with struct or union
+ if not re.search(rf"\b(struct|union)\s*{token}\b", content):
+ self.add_result(
+ "WARNING",
+ "ALIGNED_ATTRIBUTE",
+ f"Please use {token} only for struct or union types alignment",
+ filename,
+ line_num,
+ content,
+ )
+
+ def check_packed_attributes(self, patch_info: PatchInfo) -> None:
+ """Check packed attribute usage."""
+ begin_count = 0
+ end_count = 0
+
+ for filename, lines in patch_info.added_lines.items():
+ for line_num, content in lines:
+ if "__rte_packed_begin" in content:
+ begin_count += 1
+ # Should be after struct, union, or alignment attributes
+ if (
+ not re.search(
+ r"\b(struct|union)\s*__rte_packed_begin\b", content
+ )
+ and not re.search(
+ r"__rte_cache_aligned\s*__rte_packed_begin", content
+ )
+ and not re.search(
+ r"__rte_cache_min_aligned\s*__rte_packed_begin", content
+ )
+ and not re.search(
+ r"__rte_aligned\(.*\)\s*__rte_packed_begin", content
+ )
+ ):
+ self.add_result(
+ "WARNING",
+ "PACKED_ATTRIBUTE",
+ "Use __rte_packed_begin only after struct, union or alignment attributes",
+ filename,
+ line_num,
+ content,
+ )
+
+ if "__rte_packed_end" in content:
+ end_count += 1
+
+ if begin_count != end_count:
+ self.add_result(
+ "WARNING",
+ "PACKED_ATTRIBUTE",
+ "__rte_packed_begin and __rte_packed_end should always be used in pairs",
+ )
+
+ def check_patch(self, content: str, patch_file: str = None) -> bool:
+ """Run all checks on a patch."""
+ self.results = []
+ self.errors = 0
+ self.warnings = 0
+ self.checks = 0
+ self.lines_checked = 0
+
+ # Check patch format first
+ self.check_patch_format(content, patch_file)
+
+ patch_info = self.parse_patch(content)
+
+ # Run all checks
+ self.check_signoff(patch_info)
+ self.check_line_length(patch_info)
+ self.check_trailing_whitespace(patch_info)
+ self.check_tabs_spaces(patch_info)
+ self.check_coding_style(patch_info)
+ self.check_spelling(patch_info)
+ self.check_forbidden_tokens(patch_info)
+ self.check_experimental_tags(patch_info)
+ self.check_internal_tags(patch_info)
+ self.check_aligned_attributes(patch_info)
+ self.check_packed_attributes(patch_info)
+ self.check_commit_message(patch_info, content)
+
+ return self.errors == 0 and self.warnings == 0
+
+ def check_patch_format(self, content: str, patch_file: str = None) -> None:
+ """Check basic patch format for corruption."""
+ lines = content.split("\n")
+
+ # Track patch structure
+ has_diff = False
+ has_hunk = False
+ in_hunk = False
+ hunk_line = 0
+
+ for i, line in enumerate(lines, 1):
+ # Track diff headers
+ if line.startswith("diff --git"):
+ has_diff = True
+ in_hunk = False
+
+ # Parse hunk header
+ if line.startswith("@@"):
+ has_hunk = True
+ in_hunk = True
+ hunk_line = i
+ # Validate hunk header format
+ if not re.match(r"@@ -\d+(?:,\d+)? \+\d+(?:,\d+)? @@", line):
+ self.add_result(
+ "ERROR",
+ "CORRUPTED_PATCH",
+ f"patch seems to be corrupt (malformed hunk header) at line {i}",
+ )
+
+ # End of patch content (signature separator)
+ elif line == "-- ":
+ in_hunk = False
+
+ # Check for lines that look like they should be in a hunk but aren't prefixed
+ elif (
+ in_hunk
+ and line
+ and not line.startswith(
+ (
+ "+",
+ "-",
+ " ",
+ "\\",
+ "diff ",
+ "@@",
+ "index ",
+ "--- ",
+ "+++ ",
+ "new file",
+ "deleted file",
+ "old mode",
+ "new mode",
+ "rename ",
+ "similarity",
+ "copy ",
+ )
+ )
+ ):
+ # This could be a wrapped line or corruption
+ # But be careful - empty lines and commit message lines are OK
+ if not line.startswith(
+ (
+ "From ",
+ "Subject:",
+ "Date:",
+ "Signed-off-by:",
+ "Acked-by:",
+ "Reviewed-by:",
+ "Tested-by:",
+ "Fixes:",
+ "Cc:",
+ "---",
+ "Message-Id:",
+ )
+ ):
+ # Likely a corrupted/wrapped line in the diff
+ self.add_result(
+ "ERROR",
+ "CORRUPTED_PATCH",
+ f"patch seems to be corrupt (line wrapped?) at line {i}",
+ )
+ in_hunk = False # Stop checking this hunk
+
+ if has_diff and not has_hunk:
+ self.add_result(
+ "ERROR",
+ "CORRUPTED_PATCH",
+ "Patch appears to be corrupted (has diff but no hunks)",
+ )
+
+ # Check for DOS line endings
+ if "\r\n" in content:
+ self.add_result(
+ "ERROR",
+ "DOS_LINE_ENDINGS",
+ "Patch has DOS line endings, should be UNIX line endings",
+ )
+
+ def check_commit_message(self, patch_info: PatchInfo, content: str) -> None:
+ """Check commit message for issues."""
+ lines = content.split("\n")
+
+ in_commit_msg = False
+ commit_msg_lines = []
+
+ for i, line in enumerate(lines):
+ if line.startswith("Subject:"):
+ in_commit_msg = True
+ continue
+ if line.startswith("---") or line.startswith("diff --git"):
+ in_commit_msg = False
+ continue
+ if in_commit_msg:
+ commit_msg_lines.append((i + 1, line))
+
+ for line_num, line in commit_msg_lines:
+ # UNKNOWN_COMMIT_ID: Fixes tag with short or invalid commit ID
+ match = re.match(r"^Fixes:\s*([0-9a-fA-F]+)", line)
+ if match:
+ commit_id = match.group(1)
+ if len(commit_id) < 12:
+ self.add_result(
+ "WARNING",
+ "UNKNOWN_COMMIT_ID",
+ f"Commit id '{commit_id}' is too short, use at least 12 characters",
+ line_num=line_num,
+ line_content=line,
+ )
+ # Check Fixes format: should be Fixes: <hash> ("commit subject")
+ if not re.match(r'^Fixes:\s+[0-9a-fA-F]{12,}\s+\("[^"]+"\)\s*$', line):
+ self.add_result(
+ "WARNING",
+ "BAD_FIXES_TAG",
+ 'Fixes: tag format should be: Fixes: <12+ char hash> ("commit subject")',
+ line_num=line_num,
+ line_content=line,
+ )
+
+ def format_results(self, show_types: bool = True) -> str:
+ """Format the results for output."""
+ output = []
+
+ for result in self.results:
+ if result.filename and result.line_num:
+ prefix = f"{result.filename}:{result.line_num}:"
+ elif result.filename:
+ prefix = f"{result.filename}:"
+ else:
+ prefix = ""
+
+ type_str = f" [{result.type_name}]" if show_types else ""
+ output.append(f"{result.level}:{type_str} {result.message}")
+
+ if prefix:
+ output.append(f"# {prefix}")
+ if result.line_content:
+ output.append(f"+ {result.line_content}")
+ output.append("")
+
+ return "\n".join(output)
+
+ def get_summary(self) -> str:
+ """Get a summary of the check results."""
+ return f"total: {self.errors} errors, {self.warnings} warnings, {self.checks} checks, {self.lines_checked} lines checked"
+
+
+def split_mbox(content: str) -> list[str]:
+ """Split an mbox file into individual messages.
+
+ Mbox format uses 'From ' at the start of a line as message separator.
+ """
+ messages = []
+ current = []
+
+ for line in content.split("\n"):
+ # Standard mbox separator: line starting with "From " followed by
+ # an address or identifier and a date
+ if line.startswith("From ") and current:
+ messages.append("\n".join(current))
+ current = [line]
+ else:
+ current.append(line)
+
+ if current:
+ messages.append("\n".join(current))
+
+ return messages
+
+
+def check_single_patch(
+ checker: CheckPatch,
+ patch_path: Optional[str],
+ commit: Optional[str],
+ verbose: bool,
+ quiet: bool,
+ pre_content: Optional[str] = None,
+) -> bool:
+ """Check a single patch file or commit."""
+ subject = ""
+ content = ""
+
+ if pre_content:
+ content = pre_content
+ elif patch_path:
+ try:
+ with open(patch_path, "r", encoding="utf-8", errors="replace") as f:
+ content = f.read()
+ except IOError as e:
+ print(f"Error reading {patch_path}: {e}", file=sys.stderr)
+ return False
+ elif commit:
+ try:
+ result = subprocess.run(
+ [
+ "git",
+ "format-patch",
+ "--find-renames",
+ "--no-stat",
+ "--stdout",
+ "-1",
+ commit,
+ ],
+ capture_output=True,
+ text=True,
+ )
+ if result.returncode != 0:
+ print(f"Error getting commit {commit}", file=sys.stderr)
+ return False
+ content = result.stdout
+ except (subprocess.CalledProcessError, FileNotFoundError) as e:
+ print(f"Error running git: {e}", file=sys.stderr)
+ return False
+ else:
+ content = sys.stdin.read()
+
+ # Extract subject
+ match = re.search(
+ r"^Subject:\s*(.+?)(?:\n(?=\S)|\n\n)", content, re.MULTILINE | re.DOTALL
+ )
+ if match:
+ subject = match.group(1).replace("\n ", " ").strip()
+
+ if verbose:
+ print(f"\n### {subject}\n")
+
+ is_clean = checker.check_patch(content, patch_path)
+ has_issues = checker.errors > 0 or checker.warnings > 0
+ has_any_results = has_issues or checker.checks > 0
+
+ if has_any_results or verbose:
+ if not verbose and subject:
+ print(f"\n### {subject}\n")
+ print(checker.format_results(show_types=True))
+ print(checker.get_summary())
+
+ return is_clean
+
+
+def parse_args() -> argparse.Namespace:
+ """Parse command line arguments."""
+ parser = argparse.ArgumentParser(
+ description="Check patches for DPDK coding style and common issues",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog="""
+Examples:
+ %(prog)s patch.diff Check a patch file
+ %(prog)s -n 3 Check last 3 commits
+ %(prog)s -r origin/main..HEAD Check commits in range
+ cat patch.diff | %(prog)s Check patch from stdin
+""",
+ )
+
+ parser.add_argument("patches", nargs="*", help="Patch files to check")
+ parser.add_argument("-n", type=int, metavar="NUM", help="Check last NUM commits")
+ parser.add_argument(
+ "-r",
+ "--range",
+ metavar="RANGE",
+ help="Check commits in git range (default: origin/main..)",
+ )
+ parser.add_argument(
+ "-q", "--quiet", action="store_true", help="Quiet mode - only show summary"
+ )
+ parser.add_argument(
+ "-v", "--verbose", action="store_true", help="Verbose mode - show all checks"
+ )
+ parser.add_argument(
+ "--max-line-length",
+ type=int,
+ default=DEFAULT_LINE_LENGTH,
+ help=f"Maximum line length (default: {DEFAULT_LINE_LENGTH})",
+ )
+ parser.add_argument(
+ "--codespell",
+ action="store_true",
+ default=True,
+ help="Enable spell checking (default: enabled)",
+ )
+ parser.add_argument(
+ "--no-codespell",
+ dest="codespell",
+ action="store_false",
+ help="Disable spell checking",
+ )
+ parser.add_argument(
+ "--codespellfile", metavar="FILE", help="Path to codespell dictionary"
+ )
+ parser.add_argument(
+ "--show-types",
+ action="store_true",
+ default=True,
+ help="Show message types (default: enabled)",
+ )
+ parser.add_argument(
+ "--no-show-types",
+ dest="show_types",
+ action="store_false",
+ help="Hide message types",
+ )
+
+ return parser.parse_args()
+
+
+def main():
+ """Main entry point."""
+ args = parse_args()
+
+ # Build configuration
+ config = {
+ "max_line_length": args.max_line_length,
+ "codespell": args.codespell,
+ "show_types": args.show_types,
+ }
+
+ if args.codespellfile:
+ config["codespell_file"] = args.codespellfile
+
+ checker = CheckPatch(config)
+
+ total = 0
+ failed = 0
+
+ if args.patches:
+ # Check specified patch files
+ for patch in args.patches:
+ try:
+ with open(patch, "r", encoding="utf-8", errors="replace") as f:
+ content = f.read()
+ except IOError as e:
+ print(f"Error reading {patch}: {e}", file=sys.stderr)
+ total += 1
+ failed += 1
+ continue
+
+ # Check if this is an mbox with multiple patches
+ messages = split_mbox(content)
+ if len(messages) > 1:
+ for msg in messages:
+ # Only process messages that contain diffs
+ if "diff --git" in msg or "---" in msg:
+ total += 1
+ if not check_single_patch(
+ checker, None, None, args.verbose, args.quiet, msg
+ ):
+ failed += 1
+ else:
+ total += 1
+ if not check_single_patch(
+ checker, patch, None, args.verbose, args.quiet
+ ):
+ failed += 1
+
+ elif args.n or args.range:
+ # Check git commits
+ if args.n:
+ result = subprocess.run(
+ ["git", "rev-list", "--reverse", f"--max-count={args.n}", "HEAD"],
+ capture_output=True,
+ text=True,
+ )
+ else:
+ git_range = args.range if args.range else "origin/main.."
+ result = subprocess.run(
+ ["git", "rev-list", "--reverse", git_range],
+ capture_output=True,
+ text=True,
+ )
+
+ if result.returncode != 0:
+ print("Error getting git commits", file=sys.stderr)
+ sys.exit(1)
+
+ commits = result.stdout.strip().split("\n")
+ for commit in commits:
+ if commit:
+ total += 1
+ if not check_single_patch(
+ checker, None, commit, args.verbose, args.quiet
+ ):
+ failed += 1
+
+ elif not sys.stdin.isatty():
+ # Read from stdin
+ total = 1
+ if not check_single_patch(checker, None, None, args.verbose, args.quiet):
+ failed += 1
+
+ else:
+ # Default to checking commits since origin/main
+ result = subprocess.run(
+ ["git", "rev-list", "--reverse", "origin/main.."],
+ capture_output=True,
+ text=True,
+ )
+
+ commits = result.stdout.strip().split("\n") if result.stdout.strip() else []
+ for commit in commits:
+ if commit:
+ total += 1
+ if not check_single_patch(
+ checker, None, commit, args.verbose, args.quiet
+ ):
+ failed += 1
+
+ # Print summary
+ passed = total - failed
+ if not args.quiet:
+ print(f"\n{passed}/{total} valid patch{'es' if passed != 1 else ''}")
+
+ sys.exit(0 if failed == 0 else 1)
+
+
+if __name__ == "__main__":
+ main()
--
2.53.0
More information about the dev
mailing list