xref: /linux/scripts/lib/abi/abi_regex.py (revision 0d5fd96880d9135a4b35fb5523896b21b13dde78)
1#!/usr/bin/env python3
2# xxpylint: disable=R0903
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4# SPDX-License-Identifier: GPL-2.0
5
6"""
7Convert ABI what into regular expressions
8"""
9
10import re
11import sys
12
13from pprint import pformat
14
15from abi_parser import AbiParser
16from helpers import AbiDebug
17
18class AbiRegex(AbiParser):
19    """Extends AbiParser to search ABI nodes with regular expressions"""
20
21    # Escape only ASCII visible characters
22    escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])"
23    leave_others = "others"
24
25    # Tuples with regular expressions to be compiled and replacement data
26    re_whats = [
27        # Drop escape characters that might exist
28        (re.compile("\\\\"), ""),
29
30        # Temporarily escape dot characters
31        (re.compile(r"\."),  "\xf6"),
32
33        # Temporarily change [0-9]+ type of patterns
34        (re.compile(r"\[0\-9\]\+"),  "\xff"),
35
36        # Temporarily change [\d+-\d+] type of patterns
37        (re.compile(r"\[0\-\d+\]"),  "\xff"),
38        (re.compile(r"\[0:\d+\]"),  "\xff"),
39        (re.compile(r"\[(\d+)\]"),  "\xf4\\\\d+\xf5"),
40
41        # Temporarily change [0-9] type of patterns
42        (re.compile(r"\[(\d)\-(\d)\]"),  "\xf4\1-\2\xf5"),
43
44        # Handle multiple option patterns
45        (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"),
46
47        # Handle wildcards
48        (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"),
49        (re.compile(r"/\*/"), "/.*/"),
50        (re.compile(r"/\xf6\xf6\xf6"), "/.*"),
51        (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"),
52        (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"),
53        (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"),
54
55        (re.compile(r"XX+"), "\\\\w\xf7"),
56        (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"),
57        (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"),
58        (re.compile(r"_[AB]_"), "_\\\\w\xf7_"),
59
60        # Recover [0-9] type of patterns
61        (re.compile(r"\xf4"), "["),
62        (re.compile(r"\xf5"),  "]"),
63
64        # Remove duplicated spaces
65        (re.compile(r"\s+"), r" "),
66
67        # Special case: drop comparison as in:
68        # What: foo = <something>
69        # (this happens on a few IIO definitions)
70        (re.compile(r"\s*\=.*$"), ""),
71
72        # Escape all other symbols
73        (re.compile(escape_symbols), r"\\\1"),
74        (re.compile(r"\\\\"), r"\\"),
75        (re.compile(r"\\([\[\]\(\)\|])"), r"\1"),
76        (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"),
77
78        (re.compile(r"\xff"), r"\\d+"),
79
80        # Special case: IIO ABI which a parenthesis.
81        (re.compile(r"sqrt(.*)"), r"sqrt(.*)"),
82
83        # Simplify regexes with multiple .*
84        (re.compile(r"(?:\.\*){2,}"),  ""),
85
86        # Recover dot characters
87        (re.compile(r"\xf6"), "\\."),
88        # Recover plus characters
89        (re.compile(r"\xf7"), "+"),
90    ]
91    re_has_num = re.compile(r"\\d")
92
93    # Symbol name after escape_chars that are considered a devnode basename
94    re_symbol_name =  re.compile(r"(\w|\\[\.\-\:])+$")
95
96    # List of popular group names to be skipped to minimize regex group size
97    # Use AbiDebug.SUBGROUP_SIZE to detect those
98    skip_names = set(["devices", "hwmon"])
99
100    def regex_append(self, what, new):
101        """
102        Get a search group for a subset of regular expressions.
103
104        As ABI may have thousands of symbols, using a for to search all
105        regular expressions is at least O(n^2). When there are wildcards,
106        the complexity increases substantially, eventually becoming exponential.
107
108        To avoid spending too much time on them, use a logic to split
109        them into groups. The smaller the group, the better, as it would
110        mean that searches will be confined to a small number of regular
111        expressions.
112
113        The conversion to a regex subset is tricky, as we need something
114        that can be easily obtained from the sysfs symbol and from the
115        regular expression. So, we need to discard nodes that have
116        wildcards.
117
118        If it can't obtain a subgroup, place the regular expression inside
119        a special group (self.leave_others).
120        """
121
122        search_group = None
123
124        for search_group in reversed(new.split("/")):
125            if not search_group or search_group in self.skip_names:
126                continue
127            if self.re_symbol_name.match(search_group):
128                break
129
130        if not search_group:
131            search_group = self.leave_others
132
133        if self.debug & AbiDebug.SUBGROUP_MAP:
134            self.log.debug("%s: mapped as %s", what, search_group)
135
136        try:
137            if search_group not in self.regex_group:
138                self.regex_group[search_group] = []
139
140            self.regex_group[search_group].append(re.compile(new))
141            if self.search_string:
142                if what.find(self.search_string) >= 0:
143                    print(f"What: {what}")
144        except re.PatternError:
145            self.log.warning("Ignoring '%s' as it produced an invalid regex:\n"
146                             "           '%s'", what, new)
147
148    def get_regexes(self, what):
149        """
150        Given an ABI devnode, return a list of all regular expressions that
151        may match it, based on the sub-groups created by regex_append()
152        """
153
154        re_list = []
155
156        patches = what.split("/")
157        patches.reverse()
158        patches.append(self.leave_others)
159
160        for search_group in patches:
161            if search_group in self.regex_group:
162                re_list += self.regex_group[search_group]
163
164        return re_list
165
166    def __init__(self, *args, **kwargs):
167        """
168        Override init method to get verbose argument
169        """
170
171        self.regex_group = None
172        self.search_string = None
173        self.re_string = None
174
175        if "search_string" in kwargs:
176            self.search_string = kwargs.get("search_string")
177            del kwargs["search_string"]
178
179            if self.search_string:
180
181                try:
182                    self.re_string = re.compile(self.search_string)
183                except re.PatternError as e:
184                    msg = f"{self.search_string} is not a valid regular expression"
185                    raise ValueError(msg) from e
186
187        super().__init__(*args, **kwargs)
188
189    def parse_abi(self, *args, **kwargs):
190
191        super().parse_abi(*args, **kwargs)
192
193        self.regex_group = {}
194
195        print("Converting ABI What fields into regexes...", file=sys.stderr)
196
197        for t in sorted(self.data.items(), key=lambda x: x[0]):
198            v = t[1]
199            if v.get("type") == "File":
200                continue
201
202            v["regex"] = []
203
204            for what in v.get("what", []):
205                if not what.startswith("/sys"):
206                    continue
207
208                new = what
209                for r, s in self.re_whats:
210                    try:
211                        new = r.sub(s, new)
212                    except re.PatternError as e:
213                        # Help debugging troubles with new regexes
214                        raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e
215
216                v["regex"].append(new)
217
218                if self.debug & AbiDebug.REGEX:
219                    self.log.debug("%-90s <== %s", new, what)
220
221                # Store regex into a subgroup to speedup searches
222                self.regex_append(what, new)
223
224        if self.debug & AbiDebug.SUBGROUP_DICT:
225            self.log.debug("%s", pformat(self.regex_group))
226
227        if self.debug & AbiDebug.SUBGROUP_SIZE:
228            biggestd_keys = sorted(self.regex_group.keys(),
229                                   key= lambda k: len(self.regex_group[k]),
230                                   reverse=True)
231
232            print("Top regex subgroups:", file=sys.stderr)
233            for k in biggestd_keys[:10]:
234                print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr)
235