1#!/usr/bin/env python3
2# pylint: disable=R0902,R0912,R0914,R0915,R1702
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4# SPDX-License-Identifier: GPL-2.0
5
6"""
7Parse ABI documentation and produce results from it.
8"""
9
10import os
11import re
12import sys
13
14from concurrent import futures
15from datetime import datetime
16from random import shuffle
17
18from helpers import AbiDebug
19
20class SystemSymbols:
21    """Stores arguments for the class and initialize class vars"""
22
23    def graph_add_file(self, path, link=None):
24        """
25        add a file path to the sysfs graph stored at self.root
26        """
27
28        if path in self.files:
29            return
30
31        name = ""
32        ref = self.root
33        for edge in path.split("/"):
34            name += edge + "/"
35            if edge not in ref:
36                ref[edge] = {"__name": [name.rstrip("/")]}
37
38            ref = ref[edge]
39
40        if link and link not in ref["__name"]:
41            ref["__name"].append(link.rstrip("/"))
42
43        self.files.add(path)
44
45    def print_graph(self, root_prefix="", root=None, level=0):
46        """Prints a reference tree graph using UTF-8 characters"""
47
48        if not root:
49            root = self.root
50            level = 0
51
52        # Prevent endless traverse
53        if level > 5:
54            return
55
56        if level > 0:
57            prefix = "├──"
58            last_prefix = "└──"
59        else:
60            prefix = ""
61            last_prefix = ""
62
63        items = list(root.items())
64
65        names = root.get("__name", [])
66        for k, edge in items:
67            if k == "__name":
68                continue
69
70            if not k:
71                k = "/"
72
73            if len(names) > 1:
74                k += " links: " + ",".join(names[1:])
75
76            if edge == items[-1][1]:
77                print(root_prefix + last_prefix + k)
78                p = root_prefix
79                if level > 0:
80                    p += "   "
81                self.print_graph(p, edge, level + 1)
82            else:
83                print(root_prefix + prefix + k)
84                p = root_prefix + "│   "
85                self.print_graph(p, edge, level + 1)
86
87    def _walk(self, root):
88        """
89        Walk through sysfs to get all devnodes that aren't ignored.
90
91        By default, uses /sys as sysfs mounting point. If another
92        directory is used, it replaces them to /sys at the patches.
93        """
94
95        with os.scandir(root) as obj:
96            for entry in obj:
97                path = os.path.join(root, entry.name)
98                if self.sysfs:
99                    p = path.replace(self.sysfs, "/sys", count=1)
100                else:
101                    p = path
102
103                if self.re_ignore.search(p):
104                    return
105
106                # Handle link first to avoid directory recursion
107                if entry.is_symlink():
108                    real = os.path.realpath(path)
109                    if not self.sysfs:
110                        self.aliases[path] = real
111                    else:
112                        real = real.replace(self.sysfs, "/sys", count=1)
113
114                    # Add absfile location to graph if it doesn't exist
115                    if not self.re_ignore.search(real):
116                        # Add link to the graph
117                        self.graph_add_file(real, p)
118
119                elif entry.is_file():
120                    self.graph_add_file(p)
121
122                elif entry.is_dir():
123                    self._walk(path)
124
125    def __init__(self, abi, sysfs="/sys", hints=False):
126        """
127        Initialize internal variables and get a list of all files inside
128        sysfs that can currently be parsed.
129
130        Please notice that there are several entries on sysfs that aren't
131        documented as ABI. Ignore those.
132
133        The real paths will be stored under self.files. Aliases will be
134        stored in separate, as self.aliases.
135        """
136
137        self.abi = abi
138        self.log = abi.log
139
140        if sysfs != "/sys":
141            self.sysfs = sysfs.rstrip("/")
142        else:
143            self.sysfs = None
144
145        self.hints = hints
146
147        self.root = {}
148        self.aliases = {}
149        self.files = set()
150
151        dont_walk = [
152            # Those require root access and aren't documented at ABI
153            f"^{sysfs}/kernel/debug",
154            f"^{sysfs}/kernel/tracing",
155            f"^{sysfs}/fs/pstore",
156            f"^{sysfs}/fs/bpf",
157            f"^{sysfs}/fs/fuse",
158
159            # This is not documented at ABI
160            f"^{sysfs}/module",
161
162            f"^{sysfs}/fs/cgroup",  # this is big and has zero docs under ABI
163            f"^{sysfs}/firmware",   # documented elsewhere: ACPI, DT bindings
164            "sections|notes",       # aren't actually part of ABI
165
166            # kernel-parameters.txt - not easy to parse
167            "parameters",
168        ]
169
170        self.re_ignore = re.compile("|".join(dont_walk))
171
172        print(f"Reading {sysfs} directory contents...", file=sys.stderr)
173        self._walk(sysfs)
174
175    def check_file(self, refs, found):
176        """Check missing ABI symbols for a given sysfs file"""
177
178        res_list = []
179
180        try:
181            for names in refs:
182                fname = names[0]
183
184                res = {
185                    "found": False,
186                    "fname": fname,
187                    "msg": "",
188                }
189                res_list.append(res)
190
191                re_what = self.abi.get_regexes(fname)
192                if not re_what:
193                    self.abi.log.warning(f"missing rules for {fname}")
194                    continue
195
196                for name in names:
197                    for r in re_what:
198                        if self.abi.debug & AbiDebug.UNDEFINED:
199                            self.log.debug("check if %s matches '%s'", name, r.pattern)
200                        if r.match(name):
201                            res["found"] = True
202                            if found:
203                                res["msg"] += f"  {fname}: regex:\n\t"
204                            continue
205
206                if self.hints and not res["found"]:
207                    res["msg"] += f"  {fname} not found. Tested regexes:\n"
208                    for r in re_what:
209                        res["msg"] += "    " + r.pattern + "\n"
210
211        except KeyboardInterrupt:
212            pass
213
214        return res_list
215
216    def _ref_interactor(self, root):
217        """Recursive function to interact over the sysfs tree"""
218
219        for k, v in root.items():
220            if isinstance(v, dict):
221                yield from self._ref_interactor(v)
222
223            if root == self.root or k == "__name":
224                continue
225
226            if self.abi.re_string:
227                fname = v["__name"][0]
228                if self.abi.re_string.search(fname):
229                    yield v
230            else:
231                yield v
232
233
234    def get_fileref(self, all_refs, chunk_size):
235        """Interactor to group refs into chunks"""
236
237        n = 0
238        refs = []
239
240        for ref in all_refs:
241            refs.append(ref)
242
243            n += 1
244            if n >= chunk_size:
245                yield refs
246                n = 0
247                refs = []
248
249        yield refs
250
251    def check_undefined_symbols(self, max_workers=None, chunk_size=50,
252                                found=None, dry_run=None):
253        """Seach ABI for sysfs symbols missing documentation"""
254
255        self.abi.parse_abi()
256
257        if self.abi.debug & AbiDebug.GRAPH:
258            self.print_graph()
259
260        all_refs = []
261        for ref in self._ref_interactor(self.root):
262            all_refs.append(ref["__name"])
263
264        if dry_run:
265            print("Would check", file=sys.stderr)
266            for ref in all_refs:
267                print(", ".join(ref))
268
269            return
270
271        print("Starting to search symbols (it may take several minutes):",
272              file=sys.stderr)
273        start = datetime.now()
274        old_elapsed = None
275
276        # Python doesn't support multithreading due to limitations on its
277        # global lock (GIL). While Python 3.13 finally made GIL optional,
278        # there are still issues related to it. Also, we want to have
279        # backward compatibility with older versions of Python.
280        #
281        # So, use instead multiprocess. However, Python is very slow passing
282        # data from/to multiple processes. Also, it may consume lots of memory
283        # if the data to be shared is not small.  So, we need to group workload
284        # in chunks that are big enough to generate performance gains while
285        # not being so big that would cause out-of-memory.
286
287        num_refs = len(all_refs)
288        print(f"Number of references to parse: {num_refs}", file=sys.stderr)
289
290        if not max_workers:
291            max_workers = os.cpu_count()
292        elif max_workers > os.cpu_count():
293            max_workers = os.cpu_count()
294
295        max_workers = max(max_workers, 1)
296
297        max_chunk_size = int((num_refs + max_workers - 1) / max_workers)
298        chunk_size = min(chunk_size, max_chunk_size)
299        chunk_size = max(1, chunk_size)
300
301        if max_workers > 1:
302            executor = futures.ProcessPoolExecutor
303
304            # Place references in a random order. This may help improving
305            # performance, by mixing complex/simple expressions when creating
306            # chunks
307            shuffle(all_refs)
308        else:
309            # Python has a high overhead with processes. When there's just
310            # one worker, it is faster to not create a new process.
311            # Yet, User still deserves to have a progress print. So, use
312            # python's "thread", which is actually a single process, using
313            # an internal schedule to switch between tasks. No performance
314            # gains for non-IO tasks, but still it can be quickly interrupted
315            # from time to time to display progress.
316            executor = futures.ThreadPoolExecutor
317
318        not_found = []
319        f_list = []
320        with executor(max_workers=max_workers) as exe:
321            for refs in self.get_fileref(all_refs, chunk_size):
322                if refs:
323                    try:
324                        f_list.append(exe.submit(self.check_file, refs, found))
325
326                    except KeyboardInterrupt:
327                        return
328
329            total = len(f_list)
330
331            if not total:
332                if self.abi.re_string:
333                    print(f"No ABI symbol matches {self.abi.search_string}")
334                else:
335                    self.abi.log.warning("No ABI symbols found")
336                return
337
338            print(f"{len(f_list):6d} jobs queued on {max_workers} workers",
339                  file=sys.stderr)
340
341            while f_list:
342                try:
343                    t = futures.wait(f_list, timeout=1,
344                                     return_when=futures.FIRST_COMPLETED)
345
346                    done = t[0]
347
348                    for fut in done:
349                        res_list = fut.result()
350
351                        for res in res_list:
352                            if not res["found"]:
353                                not_found.append(res["fname"])
354                            if res["msg"]:
355                                print(res["msg"])
356
357                        f_list.remove(fut)
358                except KeyboardInterrupt:
359                    return
360
361                except RuntimeError as e:
362                    self.abi.log.warning(f"Future: {e}")
363                    break
364
365                if sys.stderr.isatty():
366                    elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0]
367                    if len(f_list) < total:
368                        elapsed += f" ({total - len(f_list)}/{total} jobs completed).  "
369                    if elapsed != old_elapsed:
370                        print(elapsed + "\r", end="", flush=True,
371                              file=sys.stderr)
372                        old_elapsed = elapsed
373
374        elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0]
375        print(elapsed, file=sys.stderr)
376
377        for f in sorted(not_found):
378            print(f"{f} not found.")
379