[PATCH v9 01/23] devtools: add script to check for non inclusive naming
Stephen Hemminger
stephen at networkplumber.org
Mon Feb 5 18:43:29 CET 2024
Add a new script to find words that should not be used.
It is a wrapper around git grep command.
By default it prints matches but can also display counts.
Uses the word lists from Inclusive Naming Initiative
see https://inclusivenaming.org/word-lists/
Note: the JSON list has extra comma at end of list of elements which is not
valid in basic JSON but is allowed in user-friendly JSON5 (https://json5.org/)
To handle this the tool uses the PyPi package for parsing json5 format.
Examples:
$ ./devtools/check-inclusive-naming.py -c | head -5
app/test/test_common.c:1
app/test/test_eal_flags.c:8
app/test/test_hash.c:1
app/test/test_hash_readwrite_lf_perf.c:1
app/test/test_link_bonding_mode4.c:1
$ ./devtools/check-inclusive-naming.py lib/pcapng
lib/pcapng/rte_pcapng.c: /* sanity check that is really a pcapng mbuf */
$ ./devtools/check-inclusive-naming.py -l lib/eal
lib/eal/common/eal_common_memory.c
lib/eal/common/eal_common_proc.c
lib/eal/common/eal_common_trace.c
lib/eal/common/eal_memcfg.h
lib/eal/common/rte_malloc.c
lib/eal/freebsd/eal.c
lib/eal/linux/eal.c
lib/eal/windows/eal.c
Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
---
MAINTAINERS | 1 +
devtools/check-inclusive-naming.py | 135 +++++++++++++++++++++++++++++
2 files changed, 136 insertions(+)
create mode 100755 devtools/check-inclusive-naming.py
diff --git a/MAINTAINERS b/MAINTAINERS
index 5fb3a73f840e..dbf7ea2d916d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -88,6 +88,7 @@ F: devtools/check-doc-vs-code.sh
F: devtools/check-dup-includes.sh
F: devtools/check-maintainers.sh
F: devtools/check-forbidden-tokens.awk
+F: devtools/check-inclusive-naming.py
F: devtools/check-git-log.sh
F: devtools/check-spdx-tag.sh
F: devtools/check-symbol-change.sh
diff --git a/devtools/check-inclusive-naming.py b/devtools/check-inclusive-naming.py
new file mode 100755
index 000000000000..e8989c3c9b79
--- /dev/null
+++ b/devtools/check-inclusive-naming.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2023 Stephen Hemminger
+#
+# This script scans the source tree and creates list of files
+# containing words that are recommended to be avoided by the
+# Inclusive Naming Initiative.
+# See: https://inclusivenaming.org/word-lists/
+
+import argparse
+import subprocess
+from urllib.request import urlopen
+
+# Need JSON5 to be able to handle extra comma
+import json5
+
+DEFAULT_URL = 'https://inclusivenaming.org/word-lists/index.json'
+
+# These give false positives
+skip_files = [
+ 'doc/guides/rel_notes/', 'doc/guides/contributing/coding_style.rst',
+ 'doc/guides/prog_guide/glossary.rst'
+]
+
+# These are allowed for now
+allow_words = ['abort']
+
+
+def args_parse():
+ "parse arguments and return the argument object back to main"
+
+ parser = argparse.ArgumentParser(
+ description="Identify word usage not aligned with inclusive naming")
+ parser.add_argument('-c',
+ '--count',
+ help="Show the number of lines that match",
+ action='store_true')
+ parser.add_argument('-d',
+ '--debug',
+ default=False,
+ help="Debug this script",
+ action='store_true')
+ parser.add_argument('-l',
+ '--files-with-matches',
+ help="Show only names of files with hits",
+ action='store_true')
+ # note: tier 0 is "ok to use"
+ parser.add_argument('-t',
+ '--tier',
+ type=int,
+ choices=range(0, 4),
+ action='append',
+ help="Show non-conforming words of particular tier")
+ parser.add_argument('-x',
+ '--exclude',
+ default=skip_files,
+ action='append',
+ help="Exclude path from scan")
+ parser.add_argument('-a',
+ '--allow',
+ default=allow_words,
+ action='append',
+ help="Ignore these words")
+ parser.add_argument('--url',
+ default=DEFAULT_URL,
+ help="URL for the non-inclusive naming word list")
+ parser.add_argument('paths', nargs='*', help='files and directory to scan')
+
+ return parser.parse_args()
+
+
+def fetch_wordlist(url, tiers):
+ "Read list of words from inclusivenaming.org"
+
+ # The wordlist is returned as JSON like:
+ # {
+ # "data" :
+ # [
+ # {
+ # "term": "abort",
+ # "tier" : "1",
+ # "recommendation": "Replace when possible.",
+ # ...
+ with urlopen(url) as response:
+ entries = json5.loads(response.read())['data']
+
+ wordlist = []
+ for item in entries:
+ tier = int(item['tier'])
+ if tiers.count(tier) > 0:
+ # convert minus sign to minus or space regex
+ pattern = item['term'].replace('-', '[- ]')
+ if not pattern in allow_words:
+ wordlist.append(pattern.lower())
+
+ return wordlist
+
+
+def process(args):
+ "Find matching words"
+
+ # Default to Tier 1, 2 and 3.
+ if args.tier:
+ tiers = args.tier
+ else:
+ tiers = list(range(1, 4))
+
+ wordlist = fetch_wordlist(args.url, tiers)
+ if args.debug:
+ print(f'Matching on {len(wordlist)} words')
+
+ cmd = ['git', 'grep', '-i']
+ if args.files_with_matches:
+ cmd.append('-l')
+ if args.count:
+ cmd.append('-c')
+ for word in wordlist:
+ cmd.append('-e')
+ cmd.append(word)
+ cmd.append('--')
+ for path in skip_files:
+ cmd.append(f':^{path}')
+ cmd += args.paths
+ if args.debug:
+ print(cmd)
+ subprocess.run(cmd, check=False)
+
+
+def main():
+ '''program main function'''
+ process(args_parse())
+
+
+if __name__ == "__main__":
+ main()
--
2.43.0
More information about the dev
mailing list