xref: /linux/tools/lib/python/kdoc/c_lex.py (revision 5181afcdf99527dd92a88f80fc4d0d8013e1b510)
1df50e848SMauro Carvalho Chehab#!/usr/bin/env python3
2df50e848SMauro Carvalho Chehab# SPDX-License-Identifier: GPL-2.0
3df50e848SMauro Carvalho Chehab# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4df50e848SMauro Carvalho Chehab
5df50e848SMauro Carvalho Chehab"""
6df50e848SMauro Carvalho ChehabRegular expression ancillary classes.
7df50e848SMauro Carvalho Chehab
8df50e848SMauro Carvalho ChehabThose help caching regular expressions and do matching for kernel-doc.
9df50e848SMauro Carvalho Chehab
10df50e848SMauro Carvalho ChehabPlease notice that the code here may rise exceptions to indicate bad
11df50e848SMauro Carvalho Chehabusage inside kdoc to indicate problems at the replace pattern.
12df50e848SMauro Carvalho Chehab
13df50e848SMauro Carvalho ChehabOther errors are logged via log instance.
14df50e848SMauro Carvalho Chehab"""
15df50e848SMauro Carvalho Chehab
16df50e848SMauro Carvalho Chehabimport logging
17df50e848SMauro Carvalho Chehabimport re
18df50e848SMauro Carvalho Chehab
199aaeb817SMauro Carvalho Chehabfrom copy import copy
209aaeb817SMauro Carvalho Chehab
21df50e848SMauro Carvalho Chehabfrom .kdoc_re import KernRe
22df50e848SMauro Carvalho Chehab
23df50e848SMauro Carvalho Chehablog = logging.getLogger(__name__)
24df50e848SMauro Carvalho Chehab
25024e200eSMauro Carvalho Chehabdef tokenizer_set_log(logger, prefix = ""):
26024e200eSMauro Carvalho Chehab    """
27024e200eSMauro Carvalho Chehab    Replace the module‑level logger with a LoggerAdapter that
28024e200eSMauro Carvalho Chehab    prepends *prefix* to every message.
29024e200eSMauro Carvalho Chehab    """
30024e200eSMauro Carvalho Chehab    global log
31024e200eSMauro Carvalho Chehab
32024e200eSMauro Carvalho Chehab    class PrefixAdapter(logging.LoggerAdapter):
33024e200eSMauro Carvalho Chehab        """
34024e200eSMauro Carvalho Chehab        Ancillary class to set prefix on all message logs.
35024e200eSMauro Carvalho Chehab        """
36024e200eSMauro Carvalho Chehab        def process(self, msg, kwargs):
37024e200eSMauro Carvalho Chehab            return f"{prefix}{msg}", kwargs
38024e200eSMauro Carvalho Chehab
39024e200eSMauro Carvalho Chehab    # Wrap the provided logger in our adapter
40024e200eSMauro Carvalho Chehab    log = PrefixAdapter(logger, {"prefix": prefix})
41df50e848SMauro Carvalho Chehab
42df50e848SMauro Carvalho Chehabclass CToken():
43df50e848SMauro Carvalho Chehab    """
44df50e848SMauro Carvalho Chehab    Data class to define a C token.
45df50e848SMauro Carvalho Chehab    """
46df50e848SMauro Carvalho Chehab
47df50e848SMauro Carvalho Chehab    # Tokens that can be used by the parser. Works like an C enum.
48df50e848SMauro Carvalho Chehab
49df50e848SMauro Carvalho Chehab    COMMENT = 0     #: A standard C or C99 comment, including delimiter.
50df50e848SMauro Carvalho Chehab    STRING = 1      #: A string, including quotation marks.
51df50e848SMauro Carvalho Chehab    CHAR = 2        #: A character, including apostophes.
52df50e848SMauro Carvalho Chehab    NUMBER = 3      #: A number.
53df50e848SMauro Carvalho Chehab    PUNC = 4        #: A puntuation mark: / ``,`` / ``.``.
54df50e848SMauro Carvalho Chehab    BEGIN = 5       #: A begin character: ``{`` / ``[`` / ``(``.
55df50e848SMauro Carvalho Chehab    END = 6         #: A end character: ``}`` / ``]`` / ``)``.
56df50e848SMauro Carvalho Chehab    CPP = 7         #: A preprocessor macro.
57df50e848SMauro Carvalho Chehab    HASH = 8        #: The hash character - useful to handle other macros.
58df50e848SMauro Carvalho Chehab    OP = 9          #: A C operator (add, subtract, ...).
59df50e848SMauro Carvalho Chehab    STRUCT = 10     #: A ``struct`` keyword.
60df50e848SMauro Carvalho Chehab    UNION = 11      #: An ``union`` keyword.
61df50e848SMauro Carvalho Chehab    ENUM = 12       #: A ``struct`` keyword.
62df50e848SMauro Carvalho Chehab    TYPEDEF = 13    #: A ``typedef`` keyword.
63df50e848SMauro Carvalho Chehab    NAME = 14       #: A name. Can be an ID or a type.
64df50e848SMauro Carvalho Chehab    SPACE = 15      #: Any space characters, including new lines
65df50e848SMauro Carvalho Chehab    ENDSTMT = 16    #: End of an statement (``;``).
66df50e848SMauro Carvalho Chehab
67df50e848SMauro Carvalho Chehab    BACKREF = 17    #: Not a valid C sequence, but used at sub regex patterns.
68df50e848SMauro Carvalho Chehab
69df50e848SMauro Carvalho Chehab    MISMATCH = 255  #: an error indicator: should never happen in practice.
70df50e848SMauro Carvalho Chehab
71df50e848SMauro Carvalho Chehab    # Dict to convert from an enum interger into a string.
72df50e848SMauro Carvalho Chehab    _name_by_val = {v: k for k, v in dict(vars()).items() if isinstance(v, int)}
73df50e848SMauro Carvalho Chehab
74df50e848SMauro Carvalho Chehab    # Dict to convert from string to an enum-like integer value.
75df50e848SMauro Carvalho Chehab    _name_to_val = {k: v for v, k in _name_by_val.items()}
76df50e848SMauro Carvalho Chehab
77df50e848SMauro Carvalho Chehab    @staticmethod
78df50e848SMauro Carvalho Chehab    def to_name(val):
79df50e848SMauro Carvalho Chehab        """Convert from an integer value from CToken enum into a string"""
80df50e848SMauro Carvalho Chehab
81df50e848SMauro Carvalho Chehab        return CToken._name_by_val.get(val, f"UNKNOWN({val})")
82df50e848SMauro Carvalho Chehab
83df50e848SMauro Carvalho Chehab    @staticmethod
84df50e848SMauro Carvalho Chehab    def from_name(name):
85df50e848SMauro Carvalho Chehab        """Convert a string into a CToken enum value"""
86df50e848SMauro Carvalho Chehab        if name in CToken._name_to_val:
87df50e848SMauro Carvalho Chehab            return CToken._name_to_val[name]
88df50e848SMauro Carvalho Chehab
89df50e848SMauro Carvalho Chehab        return CToken.MISMATCH
90df50e848SMauro Carvalho Chehab
91df50e848SMauro Carvalho Chehab
92df50e848SMauro Carvalho Chehab    def __init__(self, kind, value=None, pos=0,
93df50e848SMauro Carvalho Chehab                 brace_level=0, paren_level=0, bracket_level=0):
94df50e848SMauro Carvalho Chehab        self.kind = kind
95df50e848SMauro Carvalho Chehab        self.value = value
96df50e848SMauro Carvalho Chehab        self.pos = pos
97df50e848SMauro Carvalho Chehab        self.level = (bracket_level, paren_level, brace_level)
98df50e848SMauro Carvalho Chehab
99df50e848SMauro Carvalho Chehab    def __repr__(self):
100df50e848SMauro Carvalho Chehab        name = self.to_name(self.kind)
101df50e848SMauro Carvalho Chehab        if isinstance(self.value, str):
102df50e848SMauro Carvalho Chehab            value = '"' + self.value + '"'
103df50e848SMauro Carvalho Chehab        else:
104df50e848SMauro Carvalho Chehab            value = self.value
105df50e848SMauro Carvalho Chehab
106df50e848SMauro Carvalho Chehab        return f"CToken(CToken.{name}, {value}, {self.pos}, {self.level})"
107df50e848SMauro Carvalho Chehab
108df50e848SMauro Carvalho Chehab#: Regexes to parse C code, transforming it into tokens.
109df50e848SMauro Carvalho ChehabRE_SCANNER_LIST = [
110df50e848SMauro Carvalho Chehab    #
111df50e848SMauro Carvalho Chehab    # Note that \s\S is different than .*, as it also catches \n
112df50e848SMauro Carvalho Chehab    #
113df50e848SMauro Carvalho Chehab    (CToken.COMMENT, r"//[^\n]*|/\*[\s\S]*?\*/"),
114df50e848SMauro Carvalho Chehab
115df50e848SMauro Carvalho Chehab    (CToken.STRING,  r'"(?:\\.|[^"\\])*"'),
116df50e848SMauro Carvalho Chehab    (CToken.CHAR,    r"'(?:\\.|[^'\\])'"),
117df50e848SMauro Carvalho Chehab
118df50e848SMauro Carvalho Chehab    (CToken.NUMBER,  r"0[xX][\da-fA-F]+[uUlL]*|0[0-7]+[uUlL]*|"
119df50e848SMauro Carvalho Chehab                     r"\d+(?:\.\d*)?(?:[eE][+-]?\d+)?[fFlL]*"),
120df50e848SMauro Carvalho Chehab
121df50e848SMauro Carvalho Chehab    (CToken.ENDSTMT, r"(?:\s+;|;)"),
122df50e848SMauro Carvalho Chehab
123df50e848SMauro Carvalho Chehab    (CToken.PUNC,    r"[,\.]"),
124df50e848SMauro Carvalho Chehab
125df50e848SMauro Carvalho Chehab    (CToken.BEGIN,   r"[\[\(\{]"),
126df50e848SMauro Carvalho Chehab
127df50e848SMauro Carvalho Chehab    (CToken.END,     r"[\]\)\}]"),
128df50e848SMauro Carvalho Chehab
129df50e848SMauro Carvalho Chehab    (CToken.CPP,     r"#\s*(?:define|include|ifdef|ifndef|if|else|elif|endif|undef|pragma)\b"),
130df50e848SMauro Carvalho Chehab
131df50e848SMauro Carvalho Chehab    (CToken.HASH,    r"#"),
132df50e848SMauro Carvalho Chehab
133df50e848SMauro Carvalho Chehab    (CToken.OP,      r"\+\+|\-\-|\->|==|\!=|<=|>=|&&|\|\||<<|>>|\+=|\-=|\*=|/=|%="
134df50e848SMauro Carvalho Chehab                     r"|&=|\|=|\^=|[=\+\-\*/%<>&\|\^~!\?\:]"),
135df50e848SMauro Carvalho Chehab
136df50e848SMauro Carvalho Chehab    (CToken.STRUCT,  r"\bstruct\b"),
137df50e848SMauro Carvalho Chehab    (CToken.UNION,   r"\bunion\b"),
138df50e848SMauro Carvalho Chehab    (CToken.ENUM,    r"\benum\b"),
139df50e848SMauro Carvalho Chehab    (CToken.TYPEDEF, r"\btypedef\b"),
140df50e848SMauro Carvalho Chehab
141df50e848SMauro Carvalho Chehab    (CToken.NAME,    r"[A-Za-z_]\w*"),
142df50e848SMauro Carvalho Chehab
143df50e848SMauro Carvalho Chehab    (CToken.SPACE,   r"\s+"),
144df50e848SMauro Carvalho Chehab
145df50e848SMauro Carvalho Chehab    (CToken.BACKREF, r"\\\d+"),
146df50e848SMauro Carvalho Chehab
147df50e848SMauro Carvalho Chehab    (CToken.MISMATCH,r"."),
148df50e848SMauro Carvalho Chehab]
149df50e848SMauro Carvalho Chehab
150df50e848SMauro Carvalho Chehabdef fill_re_scanner(token_list):
151df50e848SMauro Carvalho Chehab    """Ancillary routine to convert RE_SCANNER_LIST into a finditer regex"""
152df50e848SMauro Carvalho Chehab    re_tokens = []
153df50e848SMauro Carvalho Chehab
154df50e848SMauro Carvalho Chehab    for kind, pattern in token_list:
155df50e848SMauro Carvalho Chehab        name = CToken.to_name(kind)
156df50e848SMauro Carvalho Chehab        re_tokens.append(f"(?P<{name}>{pattern})")
157df50e848SMauro Carvalho Chehab
158df50e848SMauro Carvalho Chehab    return KernRe("|".join(re_tokens), re.MULTILINE | re.DOTALL)
159df50e848SMauro Carvalho Chehab
160df50e848SMauro Carvalho Chehab#: Handle C continuation lines.
161df50e848SMauro Carvalho ChehabRE_CONT = KernRe(r"\\\n")
162df50e848SMauro Carvalho Chehab
163df50e848SMauro Carvalho ChehabRE_COMMENT_START = KernRe(r'/\*\s*')
164df50e848SMauro Carvalho Chehab
165df50e848SMauro Carvalho Chehab#: tokenizer regex. Will be filled at the first CTokenizer usage.
166df50e848SMauro Carvalho ChehabRE_SCANNER = fill_re_scanner(RE_SCANNER_LIST)
167df50e848SMauro Carvalho Chehab
168df50e848SMauro Carvalho Chehab
169df50e848SMauro Carvalho Chehabclass CTokenizer():
170df50e848SMauro Carvalho Chehab    """
171df50e848SMauro Carvalho Chehab    Scan C statements and definitions and produce tokens.
172df50e848SMauro Carvalho Chehab
173df50e848SMauro Carvalho Chehab    When converted to string, it drops comments and handle public/private
174df50e848SMauro Carvalho Chehab    values, respecting depth.
175df50e848SMauro Carvalho Chehab    """
176df50e848SMauro Carvalho Chehab
177df50e848SMauro Carvalho Chehab    # This class is inspired and follows the basic concepts of:
178df50e848SMauro Carvalho Chehab    #   https://docs.python.org/3/library/re.html#writing-a-tokenizer
179df50e848SMauro Carvalho Chehab
180*2ca0b54dSMauro Carvalho Chehab    def __init__(self, source=None):
181df50e848SMauro Carvalho Chehab        """
182df50e848SMauro Carvalho Chehab        Create a regular expression to handle RE_SCANNER_LIST.
183df50e848SMauro Carvalho Chehab
184df50e848SMauro Carvalho Chehab        While I generally don't like using regex group naming via:
185df50e848SMauro Carvalho Chehab            (?P<name>...)
186df50e848SMauro Carvalho Chehab
187df50e848SMauro Carvalho Chehab        in this particular case, it makes sense, as we can pick the name
188df50e848SMauro Carvalho Chehab        when matching a code via RE_SCANNER.
189df50e848SMauro Carvalho Chehab        """
190df50e848SMauro Carvalho Chehab
191*2ca0b54dSMauro Carvalho Chehab        #
192*2ca0b54dSMauro Carvalho Chehab        # Store logger to allow parser classes to re-use it
193*2ca0b54dSMauro Carvalho Chehab        #
194*2ca0b54dSMauro Carvalho Chehab        global log
195*2ca0b54dSMauro Carvalho Chehab        self.log = log
196*2ca0b54dSMauro Carvalho Chehab
197df50e848SMauro Carvalho Chehab        self.tokens = []
198df50e848SMauro Carvalho Chehab
199df50e848SMauro Carvalho Chehab        if not source:
200df50e848SMauro Carvalho Chehab            return
201df50e848SMauro Carvalho Chehab
202df50e848SMauro Carvalho Chehab        if isinstance(source, list):
203df50e848SMauro Carvalho Chehab            self.tokens = source
204df50e848SMauro Carvalho Chehab            return
205df50e848SMauro Carvalho Chehab
206df50e848SMauro Carvalho Chehab        #
207df50e848SMauro Carvalho Chehab        # While we could just use _tokenize directly via interator,
208df50e848SMauro Carvalho Chehab        # As we'll need to use the tokenizer several times inside kernel-doc
209df50e848SMauro Carvalho Chehab        # to handle macro transforms, cache the results on a list, as
210df50e848SMauro Carvalho Chehab        # re-using it is cheaper than having to parse everytime.
211df50e848SMauro Carvalho Chehab        #
212df50e848SMauro Carvalho Chehab        for tok in self._tokenize(source):
213df50e848SMauro Carvalho Chehab            self.tokens.append(tok)
214df50e848SMauro Carvalho Chehab
215df50e848SMauro Carvalho Chehab    def _tokenize(self, source):
216df50e848SMauro Carvalho Chehab        """
217df50e848SMauro Carvalho Chehab        Iterator that parses ``source``, splitting it into tokens, as defined
218df50e848SMauro Carvalho Chehab        at ``self.RE_SCANNER_LIST``.
219df50e848SMauro Carvalho Chehab
220df50e848SMauro Carvalho Chehab        The interactor returns a CToken class object.
221df50e848SMauro Carvalho Chehab        """
222df50e848SMauro Carvalho Chehab
223df50e848SMauro Carvalho Chehab        # Handle continuation lines. Note that kdoc_parser already has a
224df50e848SMauro Carvalho Chehab        # logic to do that. Still, let's keep it for completeness, as we might
225df50e848SMauro Carvalho Chehab        # end re-using this tokenizer outsize kernel-doc some day - or we may
226df50e848SMauro Carvalho Chehab        # eventually remove from there as a future cleanup.
227df50e848SMauro Carvalho Chehab        source = RE_CONT.sub("", source)
228df50e848SMauro Carvalho Chehab
229df50e848SMauro Carvalho Chehab        brace_level = 0
230df50e848SMauro Carvalho Chehab        paren_level = 0
231df50e848SMauro Carvalho Chehab        bracket_level = 0
232df50e848SMauro Carvalho Chehab
233df50e848SMauro Carvalho Chehab        for match in RE_SCANNER.finditer(source):
234df50e848SMauro Carvalho Chehab            kind = CToken.from_name(match.lastgroup)
235df50e848SMauro Carvalho Chehab            pos = match.start()
236df50e848SMauro Carvalho Chehab            value = match.group()
237df50e848SMauro Carvalho Chehab
238df50e848SMauro Carvalho Chehab            if kind == CToken.MISMATCH:
239df50e848SMauro Carvalho Chehab                log.error(f"Unexpected token '{value}' on pos {pos}:\n\t'{source}'")
240df50e848SMauro Carvalho Chehab            elif kind == CToken.BEGIN:
241df50e848SMauro Carvalho Chehab                if value == '(':
242df50e848SMauro Carvalho Chehab                    paren_level += 1
243df50e848SMauro Carvalho Chehab                elif value == '[':
244df50e848SMauro Carvalho Chehab                    bracket_level += 1
245df50e848SMauro Carvalho Chehab                else:  # value == '{'
246df50e848SMauro Carvalho Chehab                    brace_level += 1
247df50e848SMauro Carvalho Chehab
248df50e848SMauro Carvalho Chehab            elif kind == CToken.END:
249df50e848SMauro Carvalho Chehab                if value == ')' and paren_level > 0:
250df50e848SMauro Carvalho Chehab                    paren_level -= 1
251df50e848SMauro Carvalho Chehab                elif value == ']' and bracket_level > 0:
252df50e848SMauro Carvalho Chehab                    bracket_level -= 1
253df50e848SMauro Carvalho Chehab                elif brace_level > 0:    # value == '}'
254df50e848SMauro Carvalho Chehab                    brace_level -= 1
255df50e848SMauro Carvalho Chehab
256df50e848SMauro Carvalho Chehab            yield CToken(kind, value, pos,
257df50e848SMauro Carvalho Chehab                         brace_level, paren_level, bracket_level)
258df50e848SMauro Carvalho Chehab
259df50e848SMauro Carvalho Chehab    def __str__(self):
260df50e848SMauro Carvalho Chehab        out=""
261df50e848SMauro Carvalho Chehab        show_stack = [True]
262df50e848SMauro Carvalho Chehab
263df50e848SMauro Carvalho Chehab        for i, tok in enumerate(self.tokens):
264df50e848SMauro Carvalho Chehab            if tok.kind == CToken.BEGIN:
265df50e848SMauro Carvalho Chehab                show_stack.append(show_stack[-1])
266df50e848SMauro Carvalho Chehab
267df50e848SMauro Carvalho Chehab            elif tok.kind == CToken.END:
268df50e848SMauro Carvalho Chehab                prev = show_stack[-1]
269df50e848SMauro Carvalho Chehab                if len(show_stack) > 1:
270df50e848SMauro Carvalho Chehab                    show_stack.pop()
271df50e848SMauro Carvalho Chehab
272df50e848SMauro Carvalho Chehab                if not prev and show_stack[-1]:
273df50e848SMauro Carvalho Chehab                    #
274df50e848SMauro Carvalho Chehab                    # Try to preserve indent
275df50e848SMauro Carvalho Chehab                    #
276df50e848SMauro Carvalho Chehab                    out += "\t" * (len(show_stack) - 1)
277df50e848SMauro Carvalho Chehab
278df50e848SMauro Carvalho Chehab                    out += str(tok.value)
279df50e848SMauro Carvalho Chehab                    continue
280df50e848SMauro Carvalho Chehab
281df50e848SMauro Carvalho Chehab            elif tok.kind == CToken.COMMENT:
282df50e848SMauro Carvalho Chehab                comment = RE_COMMENT_START.sub("", tok.value)
283df50e848SMauro Carvalho Chehab
284df50e848SMauro Carvalho Chehab                if comment.startswith("private:"):
285df50e848SMauro Carvalho Chehab                    show_stack[-1] = False
286df50e848SMauro Carvalho Chehab                    show = False
287df50e848SMauro Carvalho Chehab                elif comment.startswith("public:"):
288df50e848SMauro Carvalho Chehab                    show_stack[-1] = True
289df50e848SMauro Carvalho Chehab
290df50e848SMauro Carvalho Chehab                continue
291df50e848SMauro Carvalho Chehab
292df50e848SMauro Carvalho Chehab            if not show_stack[-1]:
293df50e848SMauro Carvalho Chehab                continue
294df50e848SMauro Carvalho Chehab
295df50e848SMauro Carvalho Chehab            if i < len(self.tokens) - 1:
296df50e848SMauro Carvalho Chehab                next_tok = self.tokens[i + 1]
297df50e848SMauro Carvalho Chehab
298df50e848SMauro Carvalho Chehab                # Do some cleanups before ";"
299df50e848SMauro Carvalho Chehab
300f1cf9f7cSMauro Carvalho Chehab                if tok.kind == CToken.SPACE and next_tok.kind == CToken.ENDSTMT:
301df50e848SMauro Carvalho Chehab                    continue
302df50e848SMauro Carvalho Chehab
303f1cf9f7cSMauro Carvalho Chehab                if tok.kind == CToken.ENDSTMT and next_tok.kind == tok.kind:
304df50e848SMauro Carvalho Chehab                    continue
305df50e848SMauro Carvalho Chehab
306df50e848SMauro Carvalho Chehab            out += str(tok.value)
307df50e848SMauro Carvalho Chehab
308df50e848SMauro Carvalho Chehab        return out
309f1cf9f7cSMauro Carvalho Chehab
310f1cf9f7cSMauro Carvalho Chehab
3119aaeb817SMauro Carvalho Chehabclass CTokenArgs:
3129aaeb817SMauro Carvalho Chehab    """
3139aaeb817SMauro Carvalho Chehab    Ancillary class to help using backrefs from sub matches.
3149aaeb817SMauro Carvalho Chehab
3159aaeb817SMauro Carvalho Chehab    If the highest backref contain a "+" at the last element,
3169aaeb817SMauro Carvalho Chehab    the logic will be greedy, picking all other delims.
3179aaeb817SMauro Carvalho Chehab
3189aaeb817SMauro Carvalho Chehab    This is needed to parse struct_group macros with end with ``MEMBERS...``.
3199aaeb817SMauro Carvalho Chehab    """
3209aaeb817SMauro Carvalho Chehab    def __init__(self, sub_str):
3219aaeb817SMauro Carvalho Chehab        self.sub_groups = set()
3229aaeb817SMauro Carvalho Chehab        self.max_group = -1
3239aaeb817SMauro Carvalho Chehab        self.greedy = None
3249aaeb817SMauro Carvalho Chehab
3259aaeb817SMauro Carvalho Chehab        for m in KernRe(r'\\(\d+)([+]?)').finditer(sub_str):
3269aaeb817SMauro Carvalho Chehab            group = int(m.group(1))
3279aaeb817SMauro Carvalho Chehab            if m.group(2) == "+":
3289aaeb817SMauro Carvalho Chehab                if self.greedy and self.greedy != group:
3299aaeb817SMauro Carvalho Chehab                    raise ValueError("There are multiple greedy patterns!")
3309aaeb817SMauro Carvalho Chehab                self.greedy = group
3319aaeb817SMauro Carvalho Chehab
3329aaeb817SMauro Carvalho Chehab            self.sub_groups.add(group)
3339aaeb817SMauro Carvalho Chehab            self.max_group = max(self.max_group, group)
3349aaeb817SMauro Carvalho Chehab
3359aaeb817SMauro Carvalho Chehab        if self.greedy:
3369aaeb817SMauro Carvalho Chehab            if self.greedy != self.max_group:
3379aaeb817SMauro Carvalho Chehab                raise ValueError("Greedy pattern is not the last one!")
3389aaeb817SMauro Carvalho Chehab
3399aaeb817SMauro Carvalho Chehab            sub_str = KernRe(r'(\\\d+)[+]').sub(r"\1", sub_str)
3409aaeb817SMauro Carvalho Chehab
3419aaeb817SMauro Carvalho Chehab        self.sub_str = sub_str
3429aaeb817SMauro Carvalho Chehab        self.sub_tokeninzer = CTokenizer(sub_str)
3439aaeb817SMauro Carvalho Chehab
3449aaeb817SMauro Carvalho Chehab    def groups(self, new_tokenizer):
3458c0b7c0dSMauro Carvalho Chehab        r"""
3469aaeb817SMauro Carvalho Chehab        Create replacement arguments for backrefs like:
3479aaeb817SMauro Carvalho Chehab
3488c0b7c0dSMauro Carvalho Chehab        ``\0``, ``\1``, ``\2``, ... ``\{number}``
3499aaeb817SMauro Carvalho Chehab
3508c0b7c0dSMauro Carvalho Chehab        It also accepts a ``+`` character to the highest backref, like
3518c0b7c0dSMauro Carvalho Chehab        ``\4+``. When used, the backref will be greedy, picking all other
3528c0b7c0dSMauro Carvalho Chehab        arguments afterwards.
3539aaeb817SMauro Carvalho Chehab
3549aaeb817SMauro Carvalho Chehab        The logic is smart enough to only go up to the maximum required
3559aaeb817SMauro Carvalho Chehab        argument, even if there are more.
3569aaeb817SMauro Carvalho Chehab
3579aaeb817SMauro Carvalho Chehab        If there is a backref for an argument above the limit, it will
3589aaeb817SMauro Carvalho Chehab        raise an exception. Please notice that, on C, square brackets
3599aaeb817SMauro Carvalho Chehab        don't have any separator on it. Trying to use ``\1``..``\n`` for
3609aaeb817SMauro Carvalho Chehab        brackets also raise an exception.
3619aaeb817SMauro Carvalho Chehab        """
3629aaeb817SMauro Carvalho Chehab
3639aaeb817SMauro Carvalho Chehab        level = (0, 0, 0)
3649aaeb817SMauro Carvalho Chehab
3659aaeb817SMauro Carvalho Chehab        if self.max_group < 0:
3669aaeb817SMauro Carvalho Chehab            return level, []
3679aaeb817SMauro Carvalho Chehab
3689aaeb817SMauro Carvalho Chehab        tokens = new_tokenizer.tokens
3699aaeb817SMauro Carvalho Chehab
3709aaeb817SMauro Carvalho Chehab        #
3719aaeb817SMauro Carvalho Chehab        # Fill \0 with the full token contents
3729aaeb817SMauro Carvalho Chehab        #
3739aaeb817SMauro Carvalho Chehab        groups_list = [ [] ]
3749aaeb817SMauro Carvalho Chehab
3759aaeb817SMauro Carvalho Chehab        if 0 in self.sub_groups:
3769aaeb817SMauro Carvalho Chehab            inner_level = 0
3779aaeb817SMauro Carvalho Chehab
3789aaeb817SMauro Carvalho Chehab            for i in range(0, len(tokens)):
3799aaeb817SMauro Carvalho Chehab                tok = tokens[i]
3809aaeb817SMauro Carvalho Chehab
3819aaeb817SMauro Carvalho Chehab                if tok.kind == CToken.BEGIN:
3829aaeb817SMauro Carvalho Chehab                    inner_level += 1
3839aaeb817SMauro Carvalho Chehab
3849aaeb817SMauro Carvalho Chehab                    #
3859aaeb817SMauro Carvalho Chehab                    # Discard first begin
3869aaeb817SMauro Carvalho Chehab                    #
3879aaeb817SMauro Carvalho Chehab                    if not groups_list[0]:
3889aaeb817SMauro Carvalho Chehab                        continue
3899aaeb817SMauro Carvalho Chehab                elif tok.kind == CToken.END:
3909aaeb817SMauro Carvalho Chehab                    inner_level -= 1
3919aaeb817SMauro Carvalho Chehab                    if inner_level < 0:
3929aaeb817SMauro Carvalho Chehab                        break
3939aaeb817SMauro Carvalho Chehab
3949aaeb817SMauro Carvalho Chehab                if inner_level:
3959aaeb817SMauro Carvalho Chehab                    groups_list[0].append(tok)
3969aaeb817SMauro Carvalho Chehab
3979aaeb817SMauro Carvalho Chehab        if not self.max_group:
3989aaeb817SMauro Carvalho Chehab            return level, groups_list
3999aaeb817SMauro Carvalho Chehab
4009aaeb817SMauro Carvalho Chehab        delim = None
4019aaeb817SMauro Carvalho Chehab
4029aaeb817SMauro Carvalho Chehab        #
4039aaeb817SMauro Carvalho Chehab        # Ignore everything before BEGIN. The value of begin gives the
4049aaeb817SMauro Carvalho Chehab        # delimiter to be used for the matches
4059aaeb817SMauro Carvalho Chehab        #
4069aaeb817SMauro Carvalho Chehab        for i in range(0, len(tokens)):
4079aaeb817SMauro Carvalho Chehab            tok = tokens[i]
4089aaeb817SMauro Carvalho Chehab            if tok.kind == CToken.BEGIN:
4099aaeb817SMauro Carvalho Chehab                if tok.value == "{":
4109aaeb817SMauro Carvalho Chehab                    delim = ";"
4119aaeb817SMauro Carvalho Chehab                elif tok.value == "(":
4129aaeb817SMauro Carvalho Chehab                    delim = ","
4139aaeb817SMauro Carvalho Chehab                else:
4149aaeb817SMauro Carvalho Chehab                    self.log.error(fr"Can't handle \1..\n on {sub_str}")
4159aaeb817SMauro Carvalho Chehab
4169aaeb817SMauro Carvalho Chehab                level = tok.level
4179aaeb817SMauro Carvalho Chehab                break
4189aaeb817SMauro Carvalho Chehab
4199aaeb817SMauro Carvalho Chehab        pos = 1
4209aaeb817SMauro Carvalho Chehab        groups_list.append([])
4219aaeb817SMauro Carvalho Chehab
4229aaeb817SMauro Carvalho Chehab        inner_level = 0
4239aaeb817SMauro Carvalho Chehab        for i in range(i + 1, len(tokens)):
4249aaeb817SMauro Carvalho Chehab            tok = tokens[i]
4259aaeb817SMauro Carvalho Chehab
4269aaeb817SMauro Carvalho Chehab            if tok.kind == CToken.BEGIN:
4279aaeb817SMauro Carvalho Chehab                inner_level += 1
4289aaeb817SMauro Carvalho Chehab            if tok.kind == CToken.END:
4299aaeb817SMauro Carvalho Chehab                inner_level -= 1
4309aaeb817SMauro Carvalho Chehab                if inner_level < 0:
4319aaeb817SMauro Carvalho Chehab                    break
4329aaeb817SMauro Carvalho Chehab
4339aaeb817SMauro Carvalho Chehab            if tok.kind in [CToken.PUNC, CToken.ENDSTMT] and delim == tok.value:
4349aaeb817SMauro Carvalho Chehab                pos += 1
4359aaeb817SMauro Carvalho Chehab                if self.greedy and pos > self.max_group:
4369aaeb817SMauro Carvalho Chehab                    pos -= 1
4379aaeb817SMauro Carvalho Chehab                else:
4389aaeb817SMauro Carvalho Chehab                    groups_list.append([])
4399aaeb817SMauro Carvalho Chehab
4409aaeb817SMauro Carvalho Chehab                    if pos > self.max_group:
4419aaeb817SMauro Carvalho Chehab                        break
4429aaeb817SMauro Carvalho Chehab
4439aaeb817SMauro Carvalho Chehab                    continue
4449aaeb817SMauro Carvalho Chehab
4459aaeb817SMauro Carvalho Chehab            groups_list[pos].append(tok)
4469aaeb817SMauro Carvalho Chehab
4479aaeb817SMauro Carvalho Chehab        if pos < self.max_group:
4489aaeb817SMauro Carvalho Chehab            log.error(fr"{self.sub_str} groups are up to {pos} instead of {self.max_group}")
4499aaeb817SMauro Carvalho Chehab
4509aaeb817SMauro Carvalho Chehab        return level, groups_list
4519aaeb817SMauro Carvalho Chehab
4529aaeb817SMauro Carvalho Chehab    def tokens(self, new_tokenizer):
4539aaeb817SMauro Carvalho Chehab        level, groups = self.groups(new_tokenizer)
4549aaeb817SMauro Carvalho Chehab
4559aaeb817SMauro Carvalho Chehab        new = CTokenizer()
4569aaeb817SMauro Carvalho Chehab
4579aaeb817SMauro Carvalho Chehab        for tok in self.sub_tokeninzer.tokens:
4589aaeb817SMauro Carvalho Chehab            if tok.kind == CToken.BACKREF:
4599aaeb817SMauro Carvalho Chehab                group = int(tok.value[1:])
4609aaeb817SMauro Carvalho Chehab
4619aaeb817SMauro Carvalho Chehab                for group_tok in groups[group]:
4629aaeb817SMauro Carvalho Chehab                    new_tok = copy(group_tok)
4639aaeb817SMauro Carvalho Chehab
4649aaeb817SMauro Carvalho Chehab                    new_level = [0, 0, 0]
4659aaeb817SMauro Carvalho Chehab
4669aaeb817SMauro Carvalho Chehab                    for i in range(0, len(level)):
4679aaeb817SMauro Carvalho Chehab                        new_level[i] = new_tok.level[i] + level[i]
4689aaeb817SMauro Carvalho Chehab
4699aaeb817SMauro Carvalho Chehab                    new_tok.level = tuple(new_level)
4709aaeb817SMauro Carvalho Chehab
4719aaeb817SMauro Carvalho Chehab                    new.tokens += [ new_tok ]
4729aaeb817SMauro Carvalho Chehab            else:
4739aaeb817SMauro Carvalho Chehab                new.tokens += [ tok ]
4749aaeb817SMauro Carvalho Chehab
4759aaeb817SMauro Carvalho Chehab        return new.tokens
4769aaeb817SMauro Carvalho Chehab
4779aaeb817SMauro Carvalho Chehab
478f1cf9f7cSMauro Carvalho Chehabclass CMatch:
479f1cf9f7cSMauro Carvalho Chehab    """
480f1cf9f7cSMauro Carvalho Chehab    Finding nested delimiters is hard with regular expressions. It is
481f1cf9f7cSMauro Carvalho Chehab    even harder on Python with its normal re module, as there are several
482f1cf9f7cSMauro Carvalho Chehab    advanced regular expressions that are missing.
483f1cf9f7cSMauro Carvalho Chehab
484f1cf9f7cSMauro Carvalho Chehab    This is the case of this pattern::
485f1cf9f7cSMauro Carvalho Chehab
486f1cf9f7cSMauro Carvalho Chehab            '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;'
487f1cf9f7cSMauro Carvalho Chehab
488f1cf9f7cSMauro Carvalho Chehab    which is used to properly match open/close parentheses of the
489f1cf9f7cSMauro Carvalho Chehab    string search STRUCT_GROUP(),
490f1cf9f7cSMauro Carvalho Chehab
491f1cf9f7cSMauro Carvalho Chehab    Add a class that counts pairs of delimiters, using it to match and
492f1cf9f7cSMauro Carvalho Chehab    replace nested expressions.
493f1cf9f7cSMauro Carvalho Chehab
494f1cf9f7cSMauro Carvalho Chehab    The original approach was suggested by:
495f1cf9f7cSMauro Carvalho Chehab
496f1cf9f7cSMauro Carvalho Chehab        https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
497f1cf9f7cSMauro Carvalho Chehab
498f1cf9f7cSMauro Carvalho Chehab    Although I re-implemented it to make it more generic and match 3 types
499f1cf9f7cSMauro Carvalho Chehab    of delimiters. The logic checks if delimiters are paired. If not, it
500f1cf9f7cSMauro Carvalho Chehab    will ignore the search string.
501f1cf9f7cSMauro Carvalho Chehab    """
502f1cf9f7cSMauro Carvalho Chehab
503f1cf9f7cSMauro Carvalho Chehab
5049aaeb817SMauro Carvalho Chehab    def __init__(self, regex, delim="("):
5059aaeb817SMauro Carvalho Chehab        self.regex = KernRe("^" + regex + r"\b")
5069aaeb817SMauro Carvalho Chehab        self.start_delim = delim
507f1cf9f7cSMauro Carvalho Chehab
508f1cf9f7cSMauro Carvalho Chehab    def _search(self, tokenizer):
509f1cf9f7cSMauro Carvalho Chehab        """
510f1cf9f7cSMauro Carvalho Chehab        Finds paired blocks for a regex that ends with a delimiter.
511f1cf9f7cSMauro Carvalho Chehab
512f1cf9f7cSMauro Carvalho Chehab        The suggestion of using finditer to match pairs came from:
513f1cf9f7cSMauro Carvalho Chehab        https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
514f1cf9f7cSMauro Carvalho Chehab        but I ended using a different implementation to align all three types
515f1cf9f7cSMauro Carvalho Chehab        of delimiters and seek for an initial regular expression.
516f1cf9f7cSMauro Carvalho Chehab
517f1cf9f7cSMauro Carvalho Chehab        The algorithm seeks for open/close paired delimiters and places them
518f1cf9f7cSMauro Carvalho Chehab        into a stack, yielding a start/stop position of each match when the
519f1cf9f7cSMauro Carvalho Chehab        stack is zeroed.
520f1cf9f7cSMauro Carvalho Chehab
521f1cf9f7cSMauro Carvalho Chehab        The algorithm should work fine for properly paired lines, but will
522f1cf9f7cSMauro Carvalho Chehab        silently ignore end delimiters that precede a start delimiter.
523f1cf9f7cSMauro Carvalho Chehab        This should be OK for kernel-doc parser, as unaligned delimiters
524f1cf9f7cSMauro Carvalho Chehab        would cause compilation errors. So, we don't need to raise exceptions
525f1cf9f7cSMauro Carvalho Chehab        to cover such issues.
526f1cf9f7cSMauro Carvalho Chehab        """
527f1cf9f7cSMauro Carvalho Chehab
528f1cf9f7cSMauro Carvalho Chehab        start = None
529f1cf9f7cSMauro Carvalho Chehab        started = False
530f1cf9f7cSMauro Carvalho Chehab
531f1cf9f7cSMauro Carvalho Chehab        import sys
532f1cf9f7cSMauro Carvalho Chehab
533f1cf9f7cSMauro Carvalho Chehab        stack = []
534f1cf9f7cSMauro Carvalho Chehab
535f1cf9f7cSMauro Carvalho Chehab        for i, tok in enumerate(tokenizer.tokens):
536f1cf9f7cSMauro Carvalho Chehab            if start is None:
537f1cf9f7cSMauro Carvalho Chehab                if tok.kind == CToken.NAME and self.regex.match(tok.value):
538f1cf9f7cSMauro Carvalho Chehab                    start = i
539f1cf9f7cSMauro Carvalho Chehab                    stack.append((start, tok.level))
540f1cf9f7cSMauro Carvalho Chehab                    started = False
541f1cf9f7cSMauro Carvalho Chehab
542f1cf9f7cSMauro Carvalho Chehab                continue
543f1cf9f7cSMauro Carvalho Chehab
5449aaeb817SMauro Carvalho Chehab            if not started:
5459aaeb817SMauro Carvalho Chehab                if tok.kind == CToken.SPACE:
5469aaeb817SMauro Carvalho Chehab                    continue
5479aaeb817SMauro Carvalho Chehab
5489aaeb817SMauro Carvalho Chehab                if tok.kind == CToken.BEGIN and tok.value == self.start_delim:
549f1cf9f7cSMauro Carvalho Chehab                    started = True
550f1cf9f7cSMauro Carvalho Chehab                    continue
551f1cf9f7cSMauro Carvalho Chehab
5529aaeb817SMauro Carvalho Chehab                # Name only token without BEGIN/END
5539aaeb817SMauro Carvalho Chehab                if i > start:
5549aaeb817SMauro Carvalho Chehab                    i -= 1
5559aaeb817SMauro Carvalho Chehab                yield start, i
5569aaeb817SMauro Carvalho Chehab                start = None
5579aaeb817SMauro Carvalho Chehab
558f1cf9f7cSMauro Carvalho Chehab            if tok.kind == CToken.END and tok.level == stack[-1][1]:
559f1cf9f7cSMauro Carvalho Chehab                start, level = stack.pop()
560f1cf9f7cSMauro Carvalho Chehab
5619aaeb817SMauro Carvalho Chehab                yield start, i
562f1cf9f7cSMauro Carvalho Chehab                start = None
563f1cf9f7cSMauro Carvalho Chehab
564f1cf9f7cSMauro Carvalho Chehab        #
565f1cf9f7cSMauro Carvalho Chehab        # If an END zeroing levels is not there, return remaining stuff
566f1cf9f7cSMauro Carvalho Chehab        # This is meant to solve cases where the caller logic might be
567f1cf9f7cSMauro Carvalho Chehab        # picking an incomplete block.
568f1cf9f7cSMauro Carvalho Chehab        #
5699aaeb817SMauro Carvalho Chehab        if start and stack:
5709aaeb817SMauro Carvalho Chehab            if started:
5719aaeb817SMauro Carvalho Chehab                s = str(tokenizer)
5729aaeb817SMauro Carvalho Chehab                log.warning(f"can't find a final end at {s}")
5739aaeb817SMauro Carvalho Chehab
5749aaeb817SMauro Carvalho Chehab            yield start, len(tokenizer.tokens)
575f1cf9f7cSMauro Carvalho Chehab
576f1cf9f7cSMauro Carvalho Chehab    def search(self, source):
577f1cf9f7cSMauro Carvalho Chehab        """
578f1cf9f7cSMauro Carvalho Chehab        This is similar to re.search:
579f1cf9f7cSMauro Carvalho Chehab
580f1cf9f7cSMauro Carvalho Chehab        It matches a regex that it is followed by a delimiter,
581f1cf9f7cSMauro Carvalho Chehab        returning occurrences only if all delimiters are paired.
582f1cf9f7cSMauro Carvalho Chehab        """
583f1cf9f7cSMauro Carvalho Chehab
584f1cf9f7cSMauro Carvalho Chehab        if isinstance(source, CTokenizer):
585f1cf9f7cSMauro Carvalho Chehab            tokenizer = source
586f1cf9f7cSMauro Carvalho Chehab            is_token = True
587f1cf9f7cSMauro Carvalho Chehab        else:
588f1cf9f7cSMauro Carvalho Chehab            tokenizer = CTokenizer(source)
589f1cf9f7cSMauro Carvalho Chehab            is_token = False
590f1cf9f7cSMauro Carvalho Chehab
5919aaeb817SMauro Carvalho Chehab        for start, end in self._search(tokenizer):
5929aaeb817SMauro Carvalho Chehab            new_tokenizer = CTokenizer(tokenizer.tokens[start:end + 1])
5939aaeb817SMauro Carvalho Chehab
594f1cf9f7cSMauro Carvalho Chehab            if is_token:
595f1cf9f7cSMauro Carvalho Chehab                yield new_tokenizer
596f1cf9f7cSMauro Carvalho Chehab            else:
597f1cf9f7cSMauro Carvalho Chehab                yield str(new_tokenizer)
5989aaeb817SMauro Carvalho Chehab
5999aaeb817SMauro Carvalho Chehab    def sub(self, sub_str, source, count=0):
6009aaeb817SMauro Carvalho Chehab        """
6019aaeb817SMauro Carvalho Chehab        This is similar to re.sub:
6029aaeb817SMauro Carvalho Chehab
6039aaeb817SMauro Carvalho Chehab        It matches a regex that it is followed by a delimiter,
6049aaeb817SMauro Carvalho Chehab        replacing occurrences only if all delimiters are paired.
6059aaeb817SMauro Carvalho Chehab
6069aaeb817SMauro Carvalho Chehab        if the sub argument contains::
6079aaeb817SMauro Carvalho Chehab
6089aaeb817SMauro Carvalho Chehab            r'\0'
6099aaeb817SMauro Carvalho Chehab
6109aaeb817SMauro Carvalho Chehab        it will work just like re: it places there the matched paired data
6119aaeb817SMauro Carvalho Chehab        with the delimiter stripped.
6129aaeb817SMauro Carvalho Chehab
6139aaeb817SMauro Carvalho Chehab        If count is different than zero, it will replace at most count
6149aaeb817SMauro Carvalho Chehab        items.
6159aaeb817SMauro Carvalho Chehab        """
6169aaeb817SMauro Carvalho Chehab        if isinstance(source, CTokenizer):
6179aaeb817SMauro Carvalho Chehab            is_token = True
6189aaeb817SMauro Carvalho Chehab            tokenizer = source
6199aaeb817SMauro Carvalho Chehab        else:
6209aaeb817SMauro Carvalho Chehab            is_token = False
6219aaeb817SMauro Carvalho Chehab            tokenizer = CTokenizer(source)
6229aaeb817SMauro Carvalho Chehab
6239aaeb817SMauro Carvalho Chehab        # Detect if sub_str contains sub arguments
6249aaeb817SMauro Carvalho Chehab
6259aaeb817SMauro Carvalho Chehab        args_match = CTokenArgs(sub_str)
6269aaeb817SMauro Carvalho Chehab
6279aaeb817SMauro Carvalho Chehab        new_tokenizer = CTokenizer()
6289aaeb817SMauro Carvalho Chehab        pos = 0
6299aaeb817SMauro Carvalho Chehab        n = 0
6309aaeb817SMauro Carvalho Chehab
6319aaeb817SMauro Carvalho Chehab        #
6329aaeb817SMauro Carvalho Chehab        # NOTE: the code below doesn't consider overlays at sub.
6339aaeb817SMauro Carvalho Chehab        # We may need to add some extra unit tests to check if those
6349aaeb817SMauro Carvalho Chehab        # would cause problems. When replacing by "", this should not
6359aaeb817SMauro Carvalho Chehab        # be a problem, but other transformations could be problematic
6369aaeb817SMauro Carvalho Chehab        #
6379aaeb817SMauro Carvalho Chehab        for start, end in self._search(tokenizer):
6389aaeb817SMauro Carvalho Chehab            new_tokenizer.tokens += tokenizer.tokens[pos:start]
6399aaeb817SMauro Carvalho Chehab
6409aaeb817SMauro Carvalho Chehab            new = CTokenizer(tokenizer.tokens[start:end + 1])
6419aaeb817SMauro Carvalho Chehab
6429aaeb817SMauro Carvalho Chehab            new_tokenizer.tokens += args_match.tokens(new)
6439aaeb817SMauro Carvalho Chehab
6449aaeb817SMauro Carvalho Chehab            pos = end + 1
6459aaeb817SMauro Carvalho Chehab
6469aaeb817SMauro Carvalho Chehab            n += 1
6479aaeb817SMauro Carvalho Chehab            if count and n >= count:
6489aaeb817SMauro Carvalho Chehab                break
6499aaeb817SMauro Carvalho Chehab
6509aaeb817SMauro Carvalho Chehab        new_tokenizer.tokens += tokenizer.tokens[pos:]
6519aaeb817SMauro Carvalho Chehab
6529aaeb817SMauro Carvalho Chehab        if not is_token:
6539aaeb817SMauro Carvalho Chehab            return str(new_tokenizer)
6549aaeb817SMauro Carvalho Chehab
6559aaeb817SMauro Carvalho Chehab        return new_tokenizer
6569aaeb817SMauro Carvalho Chehab
6579aaeb817SMauro Carvalho Chehab    def __repr__(self):
6589aaeb817SMauro Carvalho Chehab        """
6599aaeb817SMauro Carvalho Chehab        Returns a displayable version of the class init.
6609aaeb817SMauro Carvalho Chehab        """
6619aaeb817SMauro Carvalho Chehab
6629aaeb817SMauro Carvalho Chehab        return f'CMatch("{self.regex.regex.pattern}")'
663