xref: /linux/tools/lib/python/abi/abi_regex.py (revision 72c395024dac5e215136cbff793455f065603b06)
10d5fd968SMauro Carvalho Chehab#!/usr/bin/env python3
20d5fd968SMauro Carvalho Chehab# xxpylint: disable=R0903
30d5fd968SMauro Carvalho Chehab# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
40d5fd968SMauro Carvalho Chehab# SPDX-License-Identifier: GPL-2.0
50d5fd968SMauro Carvalho Chehab
60d5fd968SMauro Carvalho Chehab"""
70d5fd968SMauro Carvalho ChehabConvert ABI what into regular expressions
80d5fd968SMauro Carvalho Chehab"""
90d5fd968SMauro Carvalho Chehab
100d5fd968SMauro Carvalho Chehabimport re
110d5fd968SMauro Carvalho Chehabimport sys
120d5fd968SMauro Carvalho Chehab
130d5fd968SMauro Carvalho Chehabfrom pprint import pformat
140d5fd968SMauro Carvalho Chehab
15992a9df4SJonathan Corbetfrom abi.abi_parser import AbiParser
16992a9df4SJonathan Corbetfrom abi.helpers import AbiDebug
170d5fd968SMauro Carvalho Chehab
180d5fd968SMauro Carvalho Chehabclass AbiRegex(AbiParser):
19*ff91637dSMauro Carvalho Chehab    """
20*ff91637dSMauro Carvalho Chehab    Extends AbiParser to search ABI nodes with regular expressions.
210d5fd968SMauro Carvalho Chehab
22*ff91637dSMauro Carvalho Chehab    There some optimizations here to allow a quick symbol search:
23*ff91637dSMauro Carvalho Chehab    instead of trying to place all symbols altogether an doing linear
24*ff91637dSMauro Carvalho Chehab    search which is very time consuming, create a tree with one depth,
25*ff91637dSMauro Carvalho Chehab    grouping similar symbols altogether.
26*ff91637dSMauro Carvalho Chehab
27*ff91637dSMauro Carvalho Chehab    Yet, sometimes a full search will be needed, so we have a special branch
28*ff91637dSMauro Carvalho Chehab    on such group tree where other symbols are placed.
29*ff91637dSMauro Carvalho Chehab    """
30*ff91637dSMauro Carvalho Chehab
31*ff91637dSMauro Carvalho Chehab    #: Escape only ASCII visible characters.
320d5fd968SMauro Carvalho Chehab    escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])"
33*ff91637dSMauro Carvalho Chehab
34*ff91637dSMauro Carvalho Chehab    #: Special group for other nodes.
350d5fd968SMauro Carvalho Chehab    leave_others = "others"
360d5fd968SMauro Carvalho Chehab
370d5fd968SMauro Carvalho Chehab    # Tuples with regular expressions to be compiled and replacement data
380d5fd968SMauro Carvalho Chehab    re_whats = [
390d5fd968SMauro Carvalho Chehab        # Drop escape characters that might exist
400d5fd968SMauro Carvalho Chehab        (re.compile("\\\\"), ""),
410d5fd968SMauro Carvalho Chehab
420d5fd968SMauro Carvalho Chehab        # Temporarily escape dot characters
430d5fd968SMauro Carvalho Chehab        (re.compile(r"\."),  "\xf6"),
440d5fd968SMauro Carvalho Chehab
450d5fd968SMauro Carvalho Chehab        # Temporarily change [0-9]+ type of patterns
460d5fd968SMauro Carvalho Chehab        (re.compile(r"\[0\-9\]\+"),  "\xff"),
470d5fd968SMauro Carvalho Chehab
480d5fd968SMauro Carvalho Chehab        # Temporarily change [\d+-\d+] type of patterns
490d5fd968SMauro Carvalho Chehab        (re.compile(r"\[0\-\d+\]"),  "\xff"),
500d5fd968SMauro Carvalho Chehab        (re.compile(r"\[0:\d+\]"),  "\xff"),
510d5fd968SMauro Carvalho Chehab        (re.compile(r"\[(\d+)\]"),  "\xf4\\\\d+\xf5"),
520d5fd968SMauro Carvalho Chehab
530d5fd968SMauro Carvalho Chehab        # Temporarily change [0-9] type of patterns
540d5fd968SMauro Carvalho Chehab        (re.compile(r"\[(\d)\-(\d)\]"),  "\xf4\1-\2\xf5"),
550d5fd968SMauro Carvalho Chehab
560d5fd968SMauro Carvalho Chehab        # Handle multiple option patterns
570d5fd968SMauro Carvalho Chehab        (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"),
580d5fd968SMauro Carvalho Chehab
590d5fd968SMauro Carvalho Chehab        # Handle wildcards
600d5fd968SMauro Carvalho Chehab        (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"),
610d5fd968SMauro Carvalho Chehab        (re.compile(r"/\*/"), "/.*/"),
620d5fd968SMauro Carvalho Chehab        (re.compile(r"/\xf6\xf6\xf6"), "/.*"),
630d5fd968SMauro Carvalho Chehab        (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"),
640d5fd968SMauro Carvalho Chehab        (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"),
650d5fd968SMauro Carvalho Chehab        (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"),
660d5fd968SMauro Carvalho Chehab
670d5fd968SMauro Carvalho Chehab        (re.compile(r"XX+"), "\\\\w\xf7"),
680d5fd968SMauro Carvalho Chehab        (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"),
690d5fd968SMauro Carvalho Chehab        (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"),
700d5fd968SMauro Carvalho Chehab        (re.compile(r"_[AB]_"), "_\\\\w\xf7_"),
710d5fd968SMauro Carvalho Chehab
720d5fd968SMauro Carvalho Chehab        # Recover [0-9] type of patterns
730d5fd968SMauro Carvalho Chehab        (re.compile(r"\xf4"), "["),
740d5fd968SMauro Carvalho Chehab        (re.compile(r"\xf5"),  "]"),
750d5fd968SMauro Carvalho Chehab
760d5fd968SMauro Carvalho Chehab        # Remove duplicated spaces
770d5fd968SMauro Carvalho Chehab        (re.compile(r"\s+"), r" "),
780d5fd968SMauro Carvalho Chehab
790d5fd968SMauro Carvalho Chehab        # Special case: drop comparison as in:
800d5fd968SMauro Carvalho Chehab        # What: foo = <something>
810d5fd968SMauro Carvalho Chehab        # (this happens on a few IIO definitions)
820d5fd968SMauro Carvalho Chehab        (re.compile(r"\s*\=.*$"), ""),
830d5fd968SMauro Carvalho Chehab
840d5fd968SMauro Carvalho Chehab        # Escape all other symbols
850d5fd968SMauro Carvalho Chehab        (re.compile(escape_symbols), r"\\\1"),
860d5fd968SMauro Carvalho Chehab        (re.compile(r"\\\\"), r"\\"),
870d5fd968SMauro Carvalho Chehab        (re.compile(r"\\([\[\]\(\)\|])"), r"\1"),
880d5fd968SMauro Carvalho Chehab        (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"),
890d5fd968SMauro Carvalho Chehab
900d5fd968SMauro Carvalho Chehab        (re.compile(r"\xff"), r"\\d+"),
910d5fd968SMauro Carvalho Chehab
920d5fd968SMauro Carvalho Chehab        # Special case: IIO ABI which a parenthesis.
930d5fd968SMauro Carvalho Chehab        (re.compile(r"sqrt(.*)"), r"sqrt(.*)"),
940d5fd968SMauro Carvalho Chehab
950d5fd968SMauro Carvalho Chehab        # Simplify regexes with multiple .*
960d5fd968SMauro Carvalho Chehab        (re.compile(r"(?:\.\*){2,}"),  ""),
970d5fd968SMauro Carvalho Chehab
980d5fd968SMauro Carvalho Chehab        # Recover dot characters
990d5fd968SMauro Carvalho Chehab        (re.compile(r"\xf6"), "\\."),
1000d5fd968SMauro Carvalho Chehab        # Recover plus characters
1010d5fd968SMauro Carvalho Chehab        (re.compile(r"\xf7"), "+"),
1020d5fd968SMauro Carvalho Chehab    ]
103*ff91637dSMauro Carvalho Chehab
104*ff91637dSMauro Carvalho Chehab    #: Regex to check if the symbol name has a number on it.
1050d5fd968SMauro Carvalho Chehab    re_has_num = re.compile(r"\\d")
1060d5fd968SMauro Carvalho Chehab
107*ff91637dSMauro Carvalho Chehab    #: Symbol name after escape_chars that are considered a devnode basename.
1080d5fd968SMauro Carvalho Chehab    re_symbol_name =  re.compile(r"(\w|\\[\.\-\:])+$")
1090d5fd968SMauro Carvalho Chehab
110*ff91637dSMauro Carvalho Chehab    #: List of popular group names to be skipped to minimize regex group size
111*ff91637dSMauro Carvalho Chehab    #: Use AbiDebug.SUBGROUP_SIZE to detect those.
1120d5fd968SMauro Carvalho Chehab    skip_names = set(["devices", "hwmon"])
1130d5fd968SMauro Carvalho Chehab
1140d5fd968SMauro Carvalho Chehab    def regex_append(self, what, new):
1150d5fd968SMauro Carvalho Chehab        """
1160d5fd968SMauro Carvalho Chehab        Get a search group for a subset of regular expressions.
1170d5fd968SMauro Carvalho Chehab
1180d5fd968SMauro Carvalho Chehab        As ABI may have thousands of symbols, using a for to search all
1190d5fd968SMauro Carvalho Chehab        regular expressions is at least O(n^2). When there are wildcards,
1200d5fd968SMauro Carvalho Chehab        the complexity increases substantially, eventually becoming exponential.
1210d5fd968SMauro Carvalho Chehab
1220d5fd968SMauro Carvalho Chehab        To avoid spending too much time on them, use a logic to split
1230d5fd968SMauro Carvalho Chehab        them into groups. The smaller the group, the better, as it would
1240d5fd968SMauro Carvalho Chehab        mean that searches will be confined to a small number of regular
1250d5fd968SMauro Carvalho Chehab        expressions.
1260d5fd968SMauro Carvalho Chehab
1270d5fd968SMauro Carvalho Chehab        The conversion to a regex subset is tricky, as we need something
1280d5fd968SMauro Carvalho Chehab        that can be easily obtained from the sysfs symbol and from the
1290d5fd968SMauro Carvalho Chehab        regular expression. So, we need to discard nodes that have
1300d5fd968SMauro Carvalho Chehab        wildcards.
1310d5fd968SMauro Carvalho Chehab
1320d5fd968SMauro Carvalho Chehab        If it can't obtain a subgroup, place the regular expression inside
1330d5fd968SMauro Carvalho Chehab        a special group (self.leave_others).
1340d5fd968SMauro Carvalho Chehab        """
1350d5fd968SMauro Carvalho Chehab
1360d5fd968SMauro Carvalho Chehab        search_group = None
1370d5fd968SMauro Carvalho Chehab
1380d5fd968SMauro Carvalho Chehab        for search_group in reversed(new.split("/")):
1390d5fd968SMauro Carvalho Chehab            if not search_group or search_group in self.skip_names:
1400d5fd968SMauro Carvalho Chehab                continue
1410d5fd968SMauro Carvalho Chehab            if self.re_symbol_name.match(search_group):
1420d5fd968SMauro Carvalho Chehab                break
1430d5fd968SMauro Carvalho Chehab
1440d5fd968SMauro Carvalho Chehab        if not search_group:
1450d5fd968SMauro Carvalho Chehab            search_group = self.leave_others
1460d5fd968SMauro Carvalho Chehab
1470d5fd968SMauro Carvalho Chehab        if self.debug & AbiDebug.SUBGROUP_MAP:
1480d5fd968SMauro Carvalho Chehab            self.log.debug("%s: mapped as %s", what, search_group)
1490d5fd968SMauro Carvalho Chehab
1500d5fd968SMauro Carvalho Chehab        try:
1510d5fd968SMauro Carvalho Chehab            if search_group not in self.regex_group:
1520d5fd968SMauro Carvalho Chehab                self.regex_group[search_group] = []
1530d5fd968SMauro Carvalho Chehab
1540d5fd968SMauro Carvalho Chehab            self.regex_group[search_group].append(re.compile(new))
1550d5fd968SMauro Carvalho Chehab            if self.search_string:
1560d5fd968SMauro Carvalho Chehab                if what.find(self.search_string) >= 0:
1570d5fd968SMauro Carvalho Chehab                    print(f"What: {what}")
1580d5fd968SMauro Carvalho Chehab        except re.PatternError:
1590d5fd968SMauro Carvalho Chehab            self.log.warning("Ignoring '%s' as it produced an invalid regex:\n"
1600d5fd968SMauro Carvalho Chehab                             "           '%s'", what, new)
1610d5fd968SMauro Carvalho Chehab
1620d5fd968SMauro Carvalho Chehab    def get_regexes(self, what):
1630d5fd968SMauro Carvalho Chehab        """
1640d5fd968SMauro Carvalho Chehab        Given an ABI devnode, return a list of all regular expressions that
165*ff91637dSMauro Carvalho Chehab        may match it, based on the sub-groups created by regex_append().
1660d5fd968SMauro Carvalho Chehab        """
1670d5fd968SMauro Carvalho Chehab
1680d5fd968SMauro Carvalho Chehab        re_list = []
1690d5fd968SMauro Carvalho Chehab
1700d5fd968SMauro Carvalho Chehab        patches = what.split("/")
1710d5fd968SMauro Carvalho Chehab        patches.reverse()
1720d5fd968SMauro Carvalho Chehab        patches.append(self.leave_others)
1730d5fd968SMauro Carvalho Chehab
1740d5fd968SMauro Carvalho Chehab        for search_group in patches:
1750d5fd968SMauro Carvalho Chehab            if search_group in self.regex_group:
1760d5fd968SMauro Carvalho Chehab                re_list += self.regex_group[search_group]
1770d5fd968SMauro Carvalho Chehab
1780d5fd968SMauro Carvalho Chehab        return re_list
1790d5fd968SMauro Carvalho Chehab
1800d5fd968SMauro Carvalho Chehab    def __init__(self, *args, **kwargs):
1810d5fd968SMauro Carvalho Chehab        """
1820d5fd968SMauro Carvalho Chehab        Override init method to get verbose argument
1830d5fd968SMauro Carvalho Chehab        """
1840d5fd968SMauro Carvalho Chehab
1850d5fd968SMauro Carvalho Chehab        self.regex_group = None
1860d5fd968SMauro Carvalho Chehab        self.search_string = None
1870d5fd968SMauro Carvalho Chehab        self.re_string = None
1880d5fd968SMauro Carvalho Chehab
1890d5fd968SMauro Carvalho Chehab        if "search_string" in kwargs:
1900d5fd968SMauro Carvalho Chehab            self.search_string = kwargs.get("search_string")
1910d5fd968SMauro Carvalho Chehab            del kwargs["search_string"]
1920d5fd968SMauro Carvalho Chehab
1930d5fd968SMauro Carvalho Chehab            if self.search_string:
1940d5fd968SMauro Carvalho Chehab
1950d5fd968SMauro Carvalho Chehab                try:
1960d5fd968SMauro Carvalho Chehab                    self.re_string = re.compile(self.search_string)
1970d5fd968SMauro Carvalho Chehab                except re.PatternError as e:
1980d5fd968SMauro Carvalho Chehab                    msg = f"{self.search_string} is not a valid regular expression"
1990d5fd968SMauro Carvalho Chehab                    raise ValueError(msg) from e
2000d5fd968SMauro Carvalho Chehab
2010d5fd968SMauro Carvalho Chehab        super().__init__(*args, **kwargs)
2020d5fd968SMauro Carvalho Chehab
2030d5fd968SMauro Carvalho Chehab    def parse_abi(self, *args, **kwargs):
2040d5fd968SMauro Carvalho Chehab
2050d5fd968SMauro Carvalho Chehab        super().parse_abi(*args, **kwargs)
2060d5fd968SMauro Carvalho Chehab
2070d5fd968SMauro Carvalho Chehab        self.regex_group = {}
2080d5fd968SMauro Carvalho Chehab
2090d5fd968SMauro Carvalho Chehab        print("Converting ABI What fields into regexes...", file=sys.stderr)
2100d5fd968SMauro Carvalho Chehab
2110d5fd968SMauro Carvalho Chehab        for t in sorted(self.data.items(), key=lambda x: x[0]):
2120d5fd968SMauro Carvalho Chehab            v = t[1]
2130d5fd968SMauro Carvalho Chehab            if v.get("type") == "File":
2140d5fd968SMauro Carvalho Chehab                continue
2150d5fd968SMauro Carvalho Chehab
2160d5fd968SMauro Carvalho Chehab            v["regex"] = []
2170d5fd968SMauro Carvalho Chehab
2180d5fd968SMauro Carvalho Chehab            for what in v.get("what", []):
2190d5fd968SMauro Carvalho Chehab                if not what.startswith("/sys"):
2200d5fd968SMauro Carvalho Chehab                    continue
2210d5fd968SMauro Carvalho Chehab
2220d5fd968SMauro Carvalho Chehab                new = what
2230d5fd968SMauro Carvalho Chehab                for r, s in self.re_whats:
2240d5fd968SMauro Carvalho Chehab                    try:
2250d5fd968SMauro Carvalho Chehab                        new = r.sub(s, new)
2260d5fd968SMauro Carvalho Chehab                    except re.PatternError as e:
2270d5fd968SMauro Carvalho Chehab                        # Help debugging troubles with new regexes
2280d5fd968SMauro Carvalho Chehab                        raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e
2290d5fd968SMauro Carvalho Chehab
2300d5fd968SMauro Carvalho Chehab                v["regex"].append(new)
2310d5fd968SMauro Carvalho Chehab
2320d5fd968SMauro Carvalho Chehab                if self.debug & AbiDebug.REGEX:
2330d5fd968SMauro Carvalho Chehab                    self.log.debug("%-90s <== %s", new, what)
2340d5fd968SMauro Carvalho Chehab
2350d5fd968SMauro Carvalho Chehab                # Store regex into a subgroup to speedup searches
2360d5fd968SMauro Carvalho Chehab                self.regex_append(what, new)
2370d5fd968SMauro Carvalho Chehab
2380d5fd968SMauro Carvalho Chehab        if self.debug & AbiDebug.SUBGROUP_DICT:
2390d5fd968SMauro Carvalho Chehab            self.log.debug("%s", pformat(self.regex_group))
2400d5fd968SMauro Carvalho Chehab
2410d5fd968SMauro Carvalho Chehab        if self.debug & AbiDebug.SUBGROUP_SIZE:
2420d5fd968SMauro Carvalho Chehab            biggestd_keys = sorted(self.regex_group.keys(),
2430d5fd968SMauro Carvalho Chehab                                   key= lambda k: len(self.regex_group[k]),
2440d5fd968SMauro Carvalho Chehab                                   reverse=True)
2450d5fd968SMauro Carvalho Chehab
2460d5fd968SMauro Carvalho Chehab            print("Top regex subgroups:", file=sys.stderr)
2470d5fd968SMauro Carvalho Chehab            for k in biggestd_keys[:10]:
2480d5fd968SMauro Carvalho Chehab                print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr)
249