10d5fd968SMauro Carvalho Chehab#!/usr/bin/env python3 20d5fd968SMauro Carvalho Chehab# xxpylint: disable=R0903 30d5fd968SMauro Carvalho Chehab# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 40d5fd968SMauro Carvalho Chehab# SPDX-License-Identifier: GPL-2.0 50d5fd968SMauro Carvalho Chehab 60d5fd968SMauro Carvalho Chehab""" 70d5fd968SMauro Carvalho ChehabConvert ABI what into regular expressions 80d5fd968SMauro Carvalho Chehab""" 90d5fd968SMauro Carvalho Chehab 100d5fd968SMauro Carvalho Chehabimport re 110d5fd968SMauro Carvalho Chehabimport sys 120d5fd968SMauro Carvalho Chehab 130d5fd968SMauro Carvalho Chehabfrom pprint import pformat 140d5fd968SMauro Carvalho Chehab 15992a9df4SJonathan Corbetfrom abi.abi_parser import AbiParser 16992a9df4SJonathan Corbetfrom abi.helpers import AbiDebug 170d5fd968SMauro Carvalho Chehab 180d5fd968SMauro Carvalho Chehabclass AbiRegex(AbiParser): 19*ff91637dSMauro Carvalho Chehab """ 20*ff91637dSMauro Carvalho Chehab Extends AbiParser to search ABI nodes with regular expressions. 210d5fd968SMauro Carvalho Chehab 22*ff91637dSMauro Carvalho Chehab There some optimizations here to allow a quick symbol search: 23*ff91637dSMauro Carvalho Chehab instead of trying to place all symbols altogether an doing linear 24*ff91637dSMauro Carvalho Chehab search which is very time consuming, create a tree with one depth, 25*ff91637dSMauro Carvalho Chehab grouping similar symbols altogether. 26*ff91637dSMauro Carvalho Chehab 27*ff91637dSMauro Carvalho Chehab Yet, sometimes a full search will be needed, so we have a special branch 28*ff91637dSMauro Carvalho Chehab on such group tree where other symbols are placed. 29*ff91637dSMauro Carvalho Chehab """ 30*ff91637dSMauro Carvalho Chehab 31*ff91637dSMauro Carvalho Chehab #: Escape only ASCII visible characters. 320d5fd968SMauro Carvalho Chehab escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])" 33*ff91637dSMauro Carvalho Chehab 34*ff91637dSMauro Carvalho Chehab #: Special group for other nodes. 350d5fd968SMauro Carvalho Chehab leave_others = "others" 360d5fd968SMauro Carvalho Chehab 370d5fd968SMauro Carvalho Chehab # Tuples with regular expressions to be compiled and replacement data 380d5fd968SMauro Carvalho Chehab re_whats = [ 390d5fd968SMauro Carvalho Chehab # Drop escape characters that might exist 400d5fd968SMauro Carvalho Chehab (re.compile("\\\\"), ""), 410d5fd968SMauro Carvalho Chehab 420d5fd968SMauro Carvalho Chehab # Temporarily escape dot characters 430d5fd968SMauro Carvalho Chehab (re.compile(r"\."), "\xf6"), 440d5fd968SMauro Carvalho Chehab 450d5fd968SMauro Carvalho Chehab # Temporarily change [0-9]+ type of patterns 460d5fd968SMauro Carvalho Chehab (re.compile(r"\[0\-9\]\+"), "\xff"), 470d5fd968SMauro Carvalho Chehab 480d5fd968SMauro Carvalho Chehab # Temporarily change [\d+-\d+] type of patterns 490d5fd968SMauro Carvalho Chehab (re.compile(r"\[0\-\d+\]"), "\xff"), 500d5fd968SMauro Carvalho Chehab (re.compile(r"\[0:\d+\]"), "\xff"), 510d5fd968SMauro Carvalho Chehab (re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"), 520d5fd968SMauro Carvalho Chehab 530d5fd968SMauro Carvalho Chehab # Temporarily change [0-9] type of patterns 540d5fd968SMauro Carvalho Chehab (re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"), 550d5fd968SMauro Carvalho Chehab 560d5fd968SMauro Carvalho Chehab # Handle multiple option patterns 570d5fd968SMauro Carvalho Chehab (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"), 580d5fd968SMauro Carvalho Chehab 590d5fd968SMauro Carvalho Chehab # Handle wildcards 600d5fd968SMauro Carvalho Chehab (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"), 610d5fd968SMauro Carvalho Chehab (re.compile(r"/\*/"), "/.*/"), 620d5fd968SMauro Carvalho Chehab (re.compile(r"/\xf6\xf6\xf6"), "/.*"), 630d5fd968SMauro Carvalho Chehab (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"), 640d5fd968SMauro Carvalho Chehab (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"), 650d5fd968SMauro Carvalho Chehab (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"), 660d5fd968SMauro Carvalho Chehab 670d5fd968SMauro Carvalho Chehab (re.compile(r"XX+"), "\\\\w\xf7"), 680d5fd968SMauro Carvalho Chehab (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"), 690d5fd968SMauro Carvalho Chehab (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"), 700d5fd968SMauro Carvalho Chehab (re.compile(r"_[AB]_"), "_\\\\w\xf7_"), 710d5fd968SMauro Carvalho Chehab 720d5fd968SMauro Carvalho Chehab # Recover [0-9] type of patterns 730d5fd968SMauro Carvalho Chehab (re.compile(r"\xf4"), "["), 740d5fd968SMauro Carvalho Chehab (re.compile(r"\xf5"), "]"), 750d5fd968SMauro Carvalho Chehab 760d5fd968SMauro Carvalho Chehab # Remove duplicated spaces 770d5fd968SMauro Carvalho Chehab (re.compile(r"\s+"), r" "), 780d5fd968SMauro Carvalho Chehab 790d5fd968SMauro Carvalho Chehab # Special case: drop comparison as in: 800d5fd968SMauro Carvalho Chehab # What: foo = <something> 810d5fd968SMauro Carvalho Chehab # (this happens on a few IIO definitions) 820d5fd968SMauro Carvalho Chehab (re.compile(r"\s*\=.*$"), ""), 830d5fd968SMauro Carvalho Chehab 840d5fd968SMauro Carvalho Chehab # Escape all other symbols 850d5fd968SMauro Carvalho Chehab (re.compile(escape_symbols), r"\\\1"), 860d5fd968SMauro Carvalho Chehab (re.compile(r"\\\\"), r"\\"), 870d5fd968SMauro Carvalho Chehab (re.compile(r"\\([\[\]\(\)\|])"), r"\1"), 880d5fd968SMauro Carvalho Chehab (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"), 890d5fd968SMauro Carvalho Chehab 900d5fd968SMauro Carvalho Chehab (re.compile(r"\xff"), r"\\d+"), 910d5fd968SMauro Carvalho Chehab 920d5fd968SMauro Carvalho Chehab # Special case: IIO ABI which a parenthesis. 930d5fd968SMauro Carvalho Chehab (re.compile(r"sqrt(.*)"), r"sqrt(.*)"), 940d5fd968SMauro Carvalho Chehab 950d5fd968SMauro Carvalho Chehab # Simplify regexes with multiple .* 960d5fd968SMauro Carvalho Chehab (re.compile(r"(?:\.\*){2,}"), ""), 970d5fd968SMauro Carvalho Chehab 980d5fd968SMauro Carvalho Chehab # Recover dot characters 990d5fd968SMauro Carvalho Chehab (re.compile(r"\xf6"), "\\."), 1000d5fd968SMauro Carvalho Chehab # Recover plus characters 1010d5fd968SMauro Carvalho Chehab (re.compile(r"\xf7"), "+"), 1020d5fd968SMauro Carvalho Chehab ] 103*ff91637dSMauro Carvalho Chehab 104*ff91637dSMauro Carvalho Chehab #: Regex to check if the symbol name has a number on it. 1050d5fd968SMauro Carvalho Chehab re_has_num = re.compile(r"\\d") 1060d5fd968SMauro Carvalho Chehab 107*ff91637dSMauro Carvalho Chehab #: Symbol name after escape_chars that are considered a devnode basename. 1080d5fd968SMauro Carvalho Chehab re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$") 1090d5fd968SMauro Carvalho Chehab 110*ff91637dSMauro Carvalho Chehab #: List of popular group names to be skipped to minimize regex group size 111*ff91637dSMauro Carvalho Chehab #: Use AbiDebug.SUBGROUP_SIZE to detect those. 1120d5fd968SMauro Carvalho Chehab skip_names = set(["devices", "hwmon"]) 1130d5fd968SMauro Carvalho Chehab 1140d5fd968SMauro Carvalho Chehab def regex_append(self, what, new): 1150d5fd968SMauro Carvalho Chehab """ 1160d5fd968SMauro Carvalho Chehab Get a search group for a subset of regular expressions. 1170d5fd968SMauro Carvalho Chehab 1180d5fd968SMauro Carvalho Chehab As ABI may have thousands of symbols, using a for to search all 1190d5fd968SMauro Carvalho Chehab regular expressions is at least O(n^2). When there are wildcards, 1200d5fd968SMauro Carvalho Chehab the complexity increases substantially, eventually becoming exponential. 1210d5fd968SMauro Carvalho Chehab 1220d5fd968SMauro Carvalho Chehab To avoid spending too much time on them, use a logic to split 1230d5fd968SMauro Carvalho Chehab them into groups. The smaller the group, the better, as it would 1240d5fd968SMauro Carvalho Chehab mean that searches will be confined to a small number of regular 1250d5fd968SMauro Carvalho Chehab expressions. 1260d5fd968SMauro Carvalho Chehab 1270d5fd968SMauro Carvalho Chehab The conversion to a regex subset is tricky, as we need something 1280d5fd968SMauro Carvalho Chehab that can be easily obtained from the sysfs symbol and from the 1290d5fd968SMauro Carvalho Chehab regular expression. So, we need to discard nodes that have 1300d5fd968SMauro Carvalho Chehab wildcards. 1310d5fd968SMauro Carvalho Chehab 1320d5fd968SMauro Carvalho Chehab If it can't obtain a subgroup, place the regular expression inside 1330d5fd968SMauro Carvalho Chehab a special group (self.leave_others). 1340d5fd968SMauro Carvalho Chehab """ 1350d5fd968SMauro Carvalho Chehab 1360d5fd968SMauro Carvalho Chehab search_group = None 1370d5fd968SMauro Carvalho Chehab 1380d5fd968SMauro Carvalho Chehab for search_group in reversed(new.split("/")): 1390d5fd968SMauro Carvalho Chehab if not search_group or search_group in self.skip_names: 1400d5fd968SMauro Carvalho Chehab continue 1410d5fd968SMauro Carvalho Chehab if self.re_symbol_name.match(search_group): 1420d5fd968SMauro Carvalho Chehab break 1430d5fd968SMauro Carvalho Chehab 1440d5fd968SMauro Carvalho Chehab if not search_group: 1450d5fd968SMauro Carvalho Chehab search_group = self.leave_others 1460d5fd968SMauro Carvalho Chehab 1470d5fd968SMauro Carvalho Chehab if self.debug & AbiDebug.SUBGROUP_MAP: 1480d5fd968SMauro Carvalho Chehab self.log.debug("%s: mapped as %s", what, search_group) 1490d5fd968SMauro Carvalho Chehab 1500d5fd968SMauro Carvalho Chehab try: 1510d5fd968SMauro Carvalho Chehab if search_group not in self.regex_group: 1520d5fd968SMauro Carvalho Chehab self.regex_group[search_group] = [] 1530d5fd968SMauro Carvalho Chehab 1540d5fd968SMauro Carvalho Chehab self.regex_group[search_group].append(re.compile(new)) 1550d5fd968SMauro Carvalho Chehab if self.search_string: 1560d5fd968SMauro Carvalho Chehab if what.find(self.search_string) >= 0: 1570d5fd968SMauro Carvalho Chehab print(f"What: {what}") 1580d5fd968SMauro Carvalho Chehab except re.PatternError: 1590d5fd968SMauro Carvalho Chehab self.log.warning("Ignoring '%s' as it produced an invalid regex:\n" 1600d5fd968SMauro Carvalho Chehab " '%s'", what, new) 1610d5fd968SMauro Carvalho Chehab 1620d5fd968SMauro Carvalho Chehab def get_regexes(self, what): 1630d5fd968SMauro Carvalho Chehab """ 1640d5fd968SMauro Carvalho Chehab Given an ABI devnode, return a list of all regular expressions that 165*ff91637dSMauro Carvalho Chehab may match it, based on the sub-groups created by regex_append(). 1660d5fd968SMauro Carvalho Chehab """ 1670d5fd968SMauro Carvalho Chehab 1680d5fd968SMauro Carvalho Chehab re_list = [] 1690d5fd968SMauro Carvalho Chehab 1700d5fd968SMauro Carvalho Chehab patches = what.split("/") 1710d5fd968SMauro Carvalho Chehab patches.reverse() 1720d5fd968SMauro Carvalho Chehab patches.append(self.leave_others) 1730d5fd968SMauro Carvalho Chehab 1740d5fd968SMauro Carvalho Chehab for search_group in patches: 1750d5fd968SMauro Carvalho Chehab if search_group in self.regex_group: 1760d5fd968SMauro Carvalho Chehab re_list += self.regex_group[search_group] 1770d5fd968SMauro Carvalho Chehab 1780d5fd968SMauro Carvalho Chehab return re_list 1790d5fd968SMauro Carvalho Chehab 1800d5fd968SMauro Carvalho Chehab def __init__(self, *args, **kwargs): 1810d5fd968SMauro Carvalho Chehab """ 1820d5fd968SMauro Carvalho Chehab Override init method to get verbose argument 1830d5fd968SMauro Carvalho Chehab """ 1840d5fd968SMauro Carvalho Chehab 1850d5fd968SMauro Carvalho Chehab self.regex_group = None 1860d5fd968SMauro Carvalho Chehab self.search_string = None 1870d5fd968SMauro Carvalho Chehab self.re_string = None 1880d5fd968SMauro Carvalho Chehab 1890d5fd968SMauro Carvalho Chehab if "search_string" in kwargs: 1900d5fd968SMauro Carvalho Chehab self.search_string = kwargs.get("search_string") 1910d5fd968SMauro Carvalho Chehab del kwargs["search_string"] 1920d5fd968SMauro Carvalho Chehab 1930d5fd968SMauro Carvalho Chehab if self.search_string: 1940d5fd968SMauro Carvalho Chehab 1950d5fd968SMauro Carvalho Chehab try: 1960d5fd968SMauro Carvalho Chehab self.re_string = re.compile(self.search_string) 1970d5fd968SMauro Carvalho Chehab except re.PatternError as e: 1980d5fd968SMauro Carvalho Chehab msg = f"{self.search_string} is not a valid regular expression" 1990d5fd968SMauro Carvalho Chehab raise ValueError(msg) from e 2000d5fd968SMauro Carvalho Chehab 2010d5fd968SMauro Carvalho Chehab super().__init__(*args, **kwargs) 2020d5fd968SMauro Carvalho Chehab 2030d5fd968SMauro Carvalho Chehab def parse_abi(self, *args, **kwargs): 2040d5fd968SMauro Carvalho Chehab 2050d5fd968SMauro Carvalho Chehab super().parse_abi(*args, **kwargs) 2060d5fd968SMauro Carvalho Chehab 2070d5fd968SMauro Carvalho Chehab self.regex_group = {} 2080d5fd968SMauro Carvalho Chehab 2090d5fd968SMauro Carvalho Chehab print("Converting ABI What fields into regexes...", file=sys.stderr) 2100d5fd968SMauro Carvalho Chehab 2110d5fd968SMauro Carvalho Chehab for t in sorted(self.data.items(), key=lambda x: x[0]): 2120d5fd968SMauro Carvalho Chehab v = t[1] 2130d5fd968SMauro Carvalho Chehab if v.get("type") == "File": 2140d5fd968SMauro Carvalho Chehab continue 2150d5fd968SMauro Carvalho Chehab 2160d5fd968SMauro Carvalho Chehab v["regex"] = [] 2170d5fd968SMauro Carvalho Chehab 2180d5fd968SMauro Carvalho Chehab for what in v.get("what", []): 2190d5fd968SMauro Carvalho Chehab if not what.startswith("/sys"): 2200d5fd968SMauro Carvalho Chehab continue 2210d5fd968SMauro Carvalho Chehab 2220d5fd968SMauro Carvalho Chehab new = what 2230d5fd968SMauro Carvalho Chehab for r, s in self.re_whats: 2240d5fd968SMauro Carvalho Chehab try: 2250d5fd968SMauro Carvalho Chehab new = r.sub(s, new) 2260d5fd968SMauro Carvalho Chehab except re.PatternError as e: 2270d5fd968SMauro Carvalho Chehab # Help debugging troubles with new regexes 2280d5fd968SMauro Carvalho Chehab raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e 2290d5fd968SMauro Carvalho Chehab 2300d5fd968SMauro Carvalho Chehab v["regex"].append(new) 2310d5fd968SMauro Carvalho Chehab 2320d5fd968SMauro Carvalho Chehab if self.debug & AbiDebug.REGEX: 2330d5fd968SMauro Carvalho Chehab self.log.debug("%-90s <== %s", new, what) 2340d5fd968SMauro Carvalho Chehab 2350d5fd968SMauro Carvalho Chehab # Store regex into a subgroup to speedup searches 2360d5fd968SMauro Carvalho Chehab self.regex_append(what, new) 2370d5fd968SMauro Carvalho Chehab 2380d5fd968SMauro Carvalho Chehab if self.debug & AbiDebug.SUBGROUP_DICT: 2390d5fd968SMauro Carvalho Chehab self.log.debug("%s", pformat(self.regex_group)) 2400d5fd968SMauro Carvalho Chehab 2410d5fd968SMauro Carvalho Chehab if self.debug & AbiDebug.SUBGROUP_SIZE: 2420d5fd968SMauro Carvalho Chehab biggestd_keys = sorted(self.regex_group.keys(), 2430d5fd968SMauro Carvalho Chehab key= lambda k: len(self.regex_group[k]), 2440d5fd968SMauro Carvalho Chehab reverse=True) 2450d5fd968SMauro Carvalho Chehab 2460d5fd968SMauro Carvalho Chehab print("Top regex subgroups:", file=sys.stderr) 2470d5fd968SMauro Carvalho Chehab for k in biggestd_keys[:10]: 2480d5fd968SMauro Carvalho Chehab print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr) 249