1#!/usr/bin/env python3 2# xxpylint: disable=R0903 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# SPDX-License-Identifier: GPL-2.0 5 6""" 7Convert ABI what into regular expressions 8""" 9 10import re 11import sys 12 13from pprint import pformat 14 15from abi_parser import AbiParser 16from helpers import AbiDebug 17 18class AbiRegex(AbiParser): 19 """Extends AbiParser to search ABI nodes with regular expressions""" 20 21 # Escape only ASCII visible characters 22 escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])" 23 leave_others = "others" 24 25 # Tuples with regular expressions to be compiled and replacement data 26 re_whats = [ 27 # Drop escape characters that might exist 28 (re.compile("\\\\"), ""), 29 30 # Temporarily escape dot characters 31 (re.compile(r"\."), "\xf6"), 32 33 # Temporarily change [0-9]+ type of patterns 34 (re.compile(r"\[0\-9\]\+"), "\xff"), 35 36 # Temporarily change [\d+-\d+] type of patterns 37 (re.compile(r"\[0\-\d+\]"), "\xff"), 38 (re.compile(r"\[0:\d+\]"), "\xff"), 39 (re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"), 40 41 # Temporarily change [0-9] type of patterns 42 (re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"), 43 44 # Handle multiple option patterns 45 (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"), 46 47 # Handle wildcards 48 (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"), 49 (re.compile(r"/\*/"), "/.*/"), 50 (re.compile(r"/\xf6\xf6\xf6"), "/.*"), 51 (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"), 52 (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"), 53 (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"), 54 55 (re.compile(r"XX+"), "\\\\w\xf7"), 56 (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"), 57 (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"), 58 (re.compile(r"_[AB]_"), "_\\\\w\xf7_"), 59 60 # Recover [0-9] type of patterns 61 (re.compile(r"\xf4"), "["), 62 (re.compile(r"\xf5"), "]"), 63 64 # Remove duplicated spaces 65 (re.compile(r"\s+"), r" "), 66 67 # Special case: drop comparison as in: 68 # What: foo = <something> 69 # (this happens on a few IIO definitions) 70 (re.compile(r"\s*\=.*$"), ""), 71 72 # Escape all other symbols 73 (re.compile(escape_symbols), r"\\\1"), 74 (re.compile(r"\\\\"), r"\\"), 75 (re.compile(r"\\([\[\]\(\)\|])"), r"\1"), 76 (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"), 77 78 (re.compile(r"\xff"), r"\\d+"), 79 80 # Special case: IIO ABI which a parenthesis. 81 (re.compile(r"sqrt(.*)"), r"sqrt(.*)"), 82 83 # Simplify regexes with multiple .* 84 (re.compile(r"(?:\.\*){2,}"), ""), 85 86 # Recover dot characters 87 (re.compile(r"\xf6"), "\\."), 88 # Recover plus characters 89 (re.compile(r"\xf7"), "+"), 90 ] 91 re_has_num = re.compile(r"\\d") 92 93 # Symbol name after escape_chars that are considered a devnode basename 94 re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$") 95 96 # List of popular group names to be skipped to minimize regex group size 97 # Use AbiDebug.SUBGROUP_SIZE to detect those 98 skip_names = set(["devices", "hwmon"]) 99 100 def regex_append(self, what, new): 101 """ 102 Get a search group for a subset of regular expressions. 103 104 As ABI may have thousands of symbols, using a for to search all 105 regular expressions is at least O(n^2). When there are wildcards, 106 the complexity increases substantially, eventually becoming exponential. 107 108 To avoid spending too much time on them, use a logic to split 109 them into groups. The smaller the group, the better, as it would 110 mean that searches will be confined to a small number of regular 111 expressions. 112 113 The conversion to a regex subset is tricky, as we need something 114 that can be easily obtained from the sysfs symbol and from the 115 regular expression. So, we need to discard nodes that have 116 wildcards. 117 118 If it can't obtain a subgroup, place the regular expression inside 119 a special group (self.leave_others). 120 """ 121 122 search_group = None 123 124 for search_group in reversed(new.split("/")): 125 if not search_group or search_group in self.skip_names: 126 continue 127 if self.re_symbol_name.match(search_group): 128 break 129 130 if not search_group: 131 search_group = self.leave_others 132 133 if self.debug & AbiDebug.SUBGROUP_MAP: 134 self.log.debug("%s: mapped as %s", what, search_group) 135 136 try: 137 if search_group not in self.regex_group: 138 self.regex_group[search_group] = [] 139 140 self.regex_group[search_group].append(re.compile(new)) 141 if self.search_string: 142 if what.find(self.search_string) >= 0: 143 print(f"What: {what}") 144 except re.PatternError: 145 self.log.warning("Ignoring '%s' as it produced an invalid regex:\n" 146 " '%s'", what, new) 147 148 def get_regexes(self, what): 149 """ 150 Given an ABI devnode, return a list of all regular expressions that 151 may match it, based on the sub-groups created by regex_append() 152 """ 153 154 re_list = [] 155 156 patches = what.split("/") 157 patches.reverse() 158 patches.append(self.leave_others) 159 160 for search_group in patches: 161 if search_group in self.regex_group: 162 re_list += self.regex_group[search_group] 163 164 return re_list 165 166 def __init__(self, *args, **kwargs): 167 """ 168 Override init method to get verbose argument 169 """ 170 171 self.regex_group = None 172 self.search_string = None 173 self.re_string = None 174 175 if "search_string" in kwargs: 176 self.search_string = kwargs.get("search_string") 177 del kwargs["search_string"] 178 179 if self.search_string: 180 181 try: 182 self.re_string = re.compile(self.search_string) 183 except re.PatternError as e: 184 msg = f"{self.search_string} is not a valid regular expression" 185 raise ValueError(msg) from e 186 187 super().__init__(*args, **kwargs) 188 189 def parse_abi(self, *args, **kwargs): 190 191 super().parse_abi(*args, **kwargs) 192 193 self.regex_group = {} 194 195 print("Converting ABI What fields into regexes...", file=sys.stderr) 196 197 for t in sorted(self.data.items(), key=lambda x: x[0]): 198 v = t[1] 199 if v.get("type") == "File": 200 continue 201 202 v["regex"] = [] 203 204 for what in v.get("what", []): 205 if not what.startswith("/sys"): 206 continue 207 208 new = what 209 for r, s in self.re_whats: 210 try: 211 new = r.sub(s, new) 212 except re.PatternError as e: 213 # Help debugging troubles with new regexes 214 raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e 215 216 v["regex"].append(new) 217 218 if self.debug & AbiDebug.REGEX: 219 self.log.debug("%-90s <== %s", new, what) 220 221 # Store regex into a subgroup to speedup searches 222 self.regex_append(what, new) 223 224 if self.debug & AbiDebug.SUBGROUP_DICT: 225 self.log.debug("%s", pformat(self.regex_group)) 226 227 if self.debug & AbiDebug.SUBGROUP_SIZE: 228 biggestd_keys = sorted(self.regex_group.keys(), 229 key= lambda k: len(self.regex_group[k]), 230 reverse=True) 231 232 print("Top regex subgroups:", file=sys.stderr) 233 for k in biggestd_keys[:10]: 234 print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr) 235