1#!/usr/bin/env python3 2# pylint: disable=R0902,R0912,R0914,R0915,R1702 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# SPDX-License-Identifier: GPL-2.0 5 6""" 7Parse ABI documentation and produce results from it. 8""" 9 10import os 11import re 12import sys 13 14from concurrent import futures 15from datetime import datetime 16from random import shuffle 17 18from helpers import AbiDebug 19 20class SystemSymbols: 21 """Stores arguments for the class and initialize class vars""" 22 23 def graph_add_file(self, path, link=None): 24 """ 25 add a file path to the sysfs graph stored at self.root 26 """ 27 28 if path in self.files: 29 return 30 31 name = "" 32 ref = self.root 33 for edge in path.split("/"): 34 name += edge + "/" 35 if edge not in ref: 36 ref[edge] = {"__name": [name.rstrip("/")]} 37 38 ref = ref[edge] 39 40 if link and link not in ref["__name"]: 41 ref["__name"].append(link.rstrip("/")) 42 43 self.files.add(path) 44 45 def print_graph(self, root_prefix="", root=None, level=0): 46 """Prints a reference tree graph using UTF-8 characters""" 47 48 if not root: 49 root = self.root 50 level = 0 51 52 # Prevent endless traverse 53 if level > 5: 54 return 55 56 if level > 0: 57 prefix = "├──" 58 last_prefix = "└──" 59 else: 60 prefix = "" 61 last_prefix = "" 62 63 items = list(root.items()) 64 65 names = root.get("__name", []) 66 for k, edge in items: 67 if k == "__name": 68 continue 69 70 if not k: 71 k = "/" 72 73 if len(names) > 1: 74 k += " links: " + ",".join(names[1:]) 75 76 if edge == items[-1][1]: 77 print(root_prefix + last_prefix + k) 78 p = root_prefix 79 if level > 0: 80 p += " " 81 self.print_graph(p, edge, level + 1) 82 else: 83 print(root_prefix + prefix + k) 84 p = root_prefix + "│ " 85 self.print_graph(p, edge, level + 1) 86 87 def _walk(self, root): 88 """ 89 Walk through sysfs to get all devnodes that aren't ignored. 90 91 By default, uses /sys as sysfs mounting point. If another 92 directory is used, it replaces them to /sys at the patches. 93 """ 94 95 with os.scandir(root) as obj: 96 for entry in obj: 97 path = os.path.join(root, entry.name) 98 if self.sysfs: 99 p = path.replace(self.sysfs, "/sys", count=1) 100 else: 101 p = path 102 103 if self.re_ignore.search(p): 104 return 105 106 # Handle link first to avoid directory recursion 107 if entry.is_symlink(): 108 real = os.path.realpath(path) 109 if not self.sysfs: 110 self.aliases[path] = real 111 else: 112 real = real.replace(self.sysfs, "/sys", count=1) 113 114 # Add absfile location to graph if it doesn't exist 115 if not self.re_ignore.search(real): 116 # Add link to the graph 117 self.graph_add_file(real, p) 118 119 elif entry.is_file(): 120 self.graph_add_file(p) 121 122 elif entry.is_dir(): 123 self._walk(path) 124 125 def __init__(self, abi, sysfs="/sys", hints=False): 126 """ 127 Initialize internal variables and get a list of all files inside 128 sysfs that can currently be parsed. 129 130 Please notice that there are several entries on sysfs that aren't 131 documented as ABI. Ignore those. 132 133 The real paths will be stored under self.files. Aliases will be 134 stored in separate, as self.aliases. 135 """ 136 137 self.abi = abi 138 self.log = abi.log 139 140 if sysfs != "/sys": 141 self.sysfs = sysfs.rstrip("/") 142 else: 143 self.sysfs = None 144 145 self.hints = hints 146 147 self.root = {} 148 self.aliases = {} 149 self.files = set() 150 151 dont_walk = [ 152 # Those require root access and aren't documented at ABI 153 f"^{sysfs}/kernel/debug", 154 f"^{sysfs}/kernel/tracing", 155 f"^{sysfs}/fs/pstore", 156 f"^{sysfs}/fs/bpf", 157 f"^{sysfs}/fs/fuse", 158 159 # This is not documented at ABI 160 f"^{sysfs}/module", 161 162 f"^{sysfs}/fs/cgroup", # this is big and has zero docs under ABI 163 f"^{sysfs}/firmware", # documented elsewhere: ACPI, DT bindings 164 "sections|notes", # aren't actually part of ABI 165 166 # kernel-parameters.txt - not easy to parse 167 "parameters", 168 ] 169 170 self.re_ignore = re.compile("|".join(dont_walk)) 171 172 print(f"Reading {sysfs} directory contents...", file=sys.stderr) 173 self._walk(sysfs) 174 175 def check_file(self, refs, found): 176 """Check missing ABI symbols for a given sysfs file""" 177 178 res_list = [] 179 180 try: 181 for names in refs: 182 fname = names[0] 183 184 res = { 185 "found": False, 186 "fname": fname, 187 "msg": "", 188 } 189 res_list.append(res) 190 191 re_what = self.abi.get_regexes(fname) 192 if not re_what: 193 self.abi.log.warning(f"missing rules for {fname}") 194 continue 195 196 for name in names: 197 for r in re_what: 198 if self.abi.debug & AbiDebug.UNDEFINED: 199 self.log.debug("check if %s matches '%s'", name, r.pattern) 200 if r.match(name): 201 res["found"] = True 202 if found: 203 res["msg"] += f" {fname}: regex:\n\t" 204 continue 205 206 if self.hints and not res["found"]: 207 res["msg"] += f" {fname} not found. Tested regexes:\n" 208 for r in re_what: 209 res["msg"] += " " + r.pattern + "\n" 210 211 except KeyboardInterrupt: 212 pass 213 214 return res_list 215 216 def _ref_interactor(self, root): 217 """Recursive function to interact over the sysfs tree""" 218 219 for k, v in root.items(): 220 if isinstance(v, dict): 221 yield from self._ref_interactor(v) 222 223 if root == self.root or k == "__name": 224 continue 225 226 if self.abi.re_string: 227 fname = v["__name"][0] 228 if self.abi.re_string.search(fname): 229 yield v 230 else: 231 yield v 232 233 234 def get_fileref(self, all_refs, chunk_size): 235 """Interactor to group refs into chunks""" 236 237 n = 0 238 refs = [] 239 240 for ref in all_refs: 241 refs.append(ref) 242 243 n += 1 244 if n >= chunk_size: 245 yield refs 246 n = 0 247 refs = [] 248 249 yield refs 250 251 def check_undefined_symbols(self, max_workers=None, chunk_size=50, 252 found=None, dry_run=None): 253 """Seach ABI for sysfs symbols missing documentation""" 254 255 self.abi.parse_abi() 256 257 if self.abi.debug & AbiDebug.GRAPH: 258 self.print_graph() 259 260 all_refs = [] 261 for ref in self._ref_interactor(self.root): 262 all_refs.append(ref["__name"]) 263 264 if dry_run: 265 print("Would check", file=sys.stderr) 266 for ref in all_refs: 267 print(", ".join(ref)) 268 269 return 270 271 print("Starting to search symbols (it may take several minutes):", 272 file=sys.stderr) 273 start = datetime.now() 274 old_elapsed = None 275 276 # Python doesn't support multithreading due to limitations on its 277 # global lock (GIL). While Python 3.13 finally made GIL optional, 278 # there are still issues related to it. Also, we want to have 279 # backward compatibility with older versions of Python. 280 # 281 # So, use instead multiprocess. However, Python is very slow passing 282 # data from/to multiple processes. Also, it may consume lots of memory 283 # if the data to be shared is not small. So, we need to group workload 284 # in chunks that are big enough to generate performance gains while 285 # not being so big that would cause out-of-memory. 286 287 num_refs = len(all_refs) 288 print(f"Number of references to parse: {num_refs}", file=sys.stderr) 289 290 if not max_workers: 291 max_workers = os.cpu_count() 292 elif max_workers > os.cpu_count(): 293 max_workers = os.cpu_count() 294 295 max_workers = max(max_workers, 1) 296 297 max_chunk_size = int((num_refs + max_workers - 1) / max_workers) 298 chunk_size = min(chunk_size, max_chunk_size) 299 chunk_size = max(1, chunk_size) 300 301 if max_workers > 1: 302 executor = futures.ProcessPoolExecutor 303 304 # Place references in a random order. This may help improving 305 # performance, by mixing complex/simple expressions when creating 306 # chunks 307 shuffle(all_refs) 308 else: 309 # Python has a high overhead with processes. When there's just 310 # one worker, it is faster to not create a new process. 311 # Yet, User still deserves to have a progress print. So, use 312 # python's "thread", which is actually a single process, using 313 # an internal schedule to switch between tasks. No performance 314 # gains for non-IO tasks, but still it can be quickly interrupted 315 # from time to time to display progress. 316 executor = futures.ThreadPoolExecutor 317 318 not_found = [] 319 f_list = [] 320 with executor(max_workers=max_workers) as exe: 321 for refs in self.get_fileref(all_refs, chunk_size): 322 if refs: 323 try: 324 f_list.append(exe.submit(self.check_file, refs, found)) 325 326 except KeyboardInterrupt: 327 return 328 329 total = len(f_list) 330 331 if not total: 332 if self.abi.re_string: 333 print(f"No ABI symbol matches {self.abi.search_string}") 334 else: 335 self.abi.log.warning("No ABI symbols found") 336 return 337 338 print(f"{len(f_list):6d} jobs queued on {max_workers} workers", 339 file=sys.stderr) 340 341 while f_list: 342 try: 343 t = futures.wait(f_list, timeout=1, 344 return_when=futures.FIRST_COMPLETED) 345 346 done = t[0] 347 348 for fut in done: 349 res_list = fut.result() 350 351 for res in res_list: 352 if not res["found"]: 353 not_found.append(res["fname"]) 354 if res["msg"]: 355 print(res["msg"]) 356 357 f_list.remove(fut) 358 except KeyboardInterrupt: 359 return 360 361 except RuntimeError as e: 362 self.abi.log.warning(f"Future: {e}") 363 break 364 365 if sys.stderr.isatty(): 366 elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] 367 if len(f_list) < total: 368 elapsed += f" ({total - len(f_list)}/{total} jobs completed). " 369 if elapsed != old_elapsed: 370 print(elapsed + "\r", end="", flush=True, 371 file=sys.stderr) 372 old_elapsed = elapsed 373 374 elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] 375 print(elapsed, file=sys.stderr) 376 377 for f in sorted(not_found): 378 print(f"{f} not found.") 379