1#!/usr/bin/env python3 2# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# SPDX-License-Identifier: GPL-2.0 5 6""" 7Parse ABI documentation and produce results from it. 8""" 9 10from argparse import Namespace 11import logging 12import os 13import re 14 15from pprint import pformat 16from random import randrange, seed 17 18# Import Python modules 19 20from helpers import AbiDebug, ABI_DIR 21 22 23class AbiParser: 24 """Main class to parse ABI files""" 25 26 TAGS = r"(what|where|date|kernelversion|contact|description|users)" 27 XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)" 28 29 def __init__(self, directory, logger=None, 30 enable_lineno=False, show_warnings=True, debug=0): 31 """Stores arguments for the class and initialize class vars""" 32 33 self.directory = directory 34 self.enable_lineno = enable_lineno 35 self.show_warnings = show_warnings 36 self.debug = debug 37 38 if not logger: 39 self.log = logging.getLogger("get_abi") 40 else: 41 self.log = logger 42 43 self.data = {} 44 self.what_symbols = {} 45 self.file_refs = {} 46 self.what_refs = {} 47 48 # Ignore files that contain such suffixes 49 self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~") 50 51 # Regular expressions used on parser 52 self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR) 53 self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I) 54 self.re_valid = re.compile(self.TAGS) 55 self.re_start_spc = re.compile(r"(\s*)(\S.*)") 56 self.re_whitespace = re.compile(r"^\s+") 57 58 # Regular used on print 59 self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})") 60 self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])") 61 self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)") 62 self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n") 63 self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst") 64 self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)") 65 self.re_xref_node = re.compile(self.XREF) 66 67 def warn(self, fdata, msg, extra=None): 68 """Displays a parse error if warning is enabled""" 69 70 if not self.show_warnings: 71 return 72 73 msg = f"{fdata.fname}:{fdata.ln}: {msg}" 74 if extra: 75 msg += "\n\t\t" + extra 76 77 self.log.warning(msg) 78 79 def add_symbol(self, what, fname, ln=None, xref=None): 80 """Create a reference table describing where each 'what' is located""" 81 82 if what not in self.what_symbols: 83 self.what_symbols[what] = {"file": {}} 84 85 if fname not in self.what_symbols[what]["file"]: 86 self.what_symbols[what]["file"][fname] = [] 87 88 if ln and ln not in self.what_symbols[what]["file"][fname]: 89 self.what_symbols[what]["file"][fname].append(ln) 90 91 if xref: 92 self.what_symbols[what]["xref"] = xref 93 94 def _parse_line(self, fdata, line): 95 """Parse a single line of an ABI file""" 96 97 new_what = False 98 new_tag = False 99 content = None 100 101 match = self.re_tag.match(line) 102 if match: 103 new = match.group(1).lower() 104 sep = match.group(2) 105 content = match.group(3) 106 107 match = self.re_valid.search(new) 108 if match: 109 new_tag = match.group(1) 110 else: 111 if fdata.tag == "description": 112 # New "tag" is actually part of description. 113 # Don't consider it a tag 114 new_tag = False 115 elif fdata.tag != "": 116 self.warn(fdata, f"tag '{fdata.tag}' is invalid", line) 117 118 if new_tag: 119 # "where" is Invalid, but was a common mistake. Warn if found 120 if new_tag == "where": 121 self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead") 122 new_tag = "what" 123 124 if new_tag == "what": 125 fdata.space = None 126 127 if content not in self.what_symbols: 128 self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln) 129 130 if fdata.tag == "what": 131 fdata.what.append(content.strip("\n")) 132 else: 133 if fdata.key: 134 if "description" not in self.data.get(fdata.key, {}): 135 self.warn(fdata, f"{fdata.key} doesn't have a description") 136 137 for w in fdata.what: 138 self.add_symbol(what=w, fname=fdata.fname, 139 ln=fdata.what_ln, xref=fdata.key) 140 141 fdata.label = content 142 new_what = True 143 144 key = "abi_" + content.lower() 145 fdata.key = self.re_unprintable.sub("_", key).strip("_") 146 147 # Avoid duplicated keys but using a defined seed, to make 148 # the namespace identical if there aren't changes at the 149 # ABI symbols 150 seed(42) 151 152 while fdata.key in self.data: 153 char = randrange(0, 51) + ord("A") 154 if char > ord("Z"): 155 char += ord("a") - ord("Z") - 1 156 157 fdata.key += chr(char) 158 159 if fdata.key and fdata.key not in self.data: 160 self.data[fdata.key] = { 161 "what": [content], 162 "file": [fdata.file_ref], 163 "path": fdata.ftype, 164 "line_no": fdata.ln, 165 } 166 167 fdata.what = self.data[fdata.key]["what"] 168 169 self.what_refs[content] = fdata.key 170 fdata.tag = new_tag 171 fdata.what_ln = fdata.ln 172 173 if fdata.nametag["what"]: 174 t = (content, fdata.key) 175 if t not in fdata.nametag["symbols"]: 176 fdata.nametag["symbols"].append(t) 177 178 return 179 180 if fdata.tag and new_tag: 181 fdata.tag = new_tag 182 183 if new_what: 184 fdata.label = "" 185 186 if "description" in self.data[fdata.key]: 187 self.data[fdata.key]["description"] += "\n\n" 188 189 if fdata.file_ref not in self.data[fdata.key]["file"]: 190 self.data[fdata.key]["file"].append(fdata.file_ref) 191 192 if self.debug == AbiDebug.WHAT_PARSING: 193 self.log.debug("what: %s", fdata.what) 194 195 if not fdata.what: 196 self.warn(fdata, "'What:' should come first:", line) 197 return 198 199 if new_tag == "description": 200 fdata.space = None 201 202 if content: 203 sep = sep.replace(":", " ") 204 205 c = " " * len(new_tag) + sep + content 206 c = c.expandtabs() 207 208 match = self.re_start_spc.match(c) 209 if match: 210 # Preserve initial spaces for the first line 211 fdata.space = match.group(1) 212 content = match.group(2) + "\n" 213 214 self.data[fdata.key][fdata.tag] = content 215 216 return 217 218 # Store any contents before tags at the database 219 if not fdata.tag and "what" in fdata.nametag: 220 fdata.nametag["description"] += line 221 return 222 223 if fdata.tag == "description": 224 content = line.expandtabs() 225 226 if self.re_whitespace.sub("", content) == "": 227 self.data[fdata.key][fdata.tag] += "\n" 228 return 229 230 if fdata.space is None: 231 match = self.re_start_spc.match(content) 232 if match: 233 # Preserve initial spaces for the first line 234 fdata.space = match.group(1) 235 236 content = match.group(2) + "\n" 237 else: 238 if content.startswith(fdata.space): 239 content = content[len(fdata.space):] 240 241 else: 242 fdata.space = "" 243 244 if fdata.tag == "what": 245 w = content.strip("\n") 246 if w: 247 self.data[fdata.key][fdata.tag].append(w) 248 else: 249 self.data[fdata.key][fdata.tag] += content 250 return 251 252 content = line.strip() 253 if fdata.tag: 254 if fdata.tag == "what": 255 w = content.strip("\n") 256 if w: 257 self.data[fdata.key][fdata.tag].append(w) 258 else: 259 self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n") 260 return 261 262 # Everything else is error 263 if content: 264 self.warn(fdata, "Unexpected content", line) 265 266 def parse_readme(self, nametag, fname): 267 """Parse ABI README file""" 268 269 nametag["what"] = ["Introduction"] 270 nametag["path"] = "README" 271 with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: 272 for line in fp: 273 match = self.re_tag.match(line) 274 if match: 275 new = match.group(1).lower() 276 277 match = self.re_valid.search(new) 278 if match: 279 nametag["description"] += "\n:" + line 280 continue 281 282 nametag["description"] += line 283 284 def parse_file(self, fname, path, basename): 285 """Parse a single file""" 286 287 ref = f"abi_file_{path}_{basename}" 288 ref = self.re_unprintable.sub("_", ref).strip("_") 289 290 # Store per-file state into a namespace variable. This will be used 291 # by the per-line parser state machine and by the warning function. 292 fdata = Namespace 293 294 fdata.fname = fname 295 fdata.name = basename 296 297 pos = fname.find(ABI_DIR) 298 if pos > 0: 299 f = fname[pos:] 300 else: 301 f = fname 302 303 fdata.file_ref = (f, ref) 304 self.file_refs[f] = ref 305 306 fdata.ln = 0 307 fdata.what_ln = 0 308 fdata.tag = "" 309 fdata.label = "" 310 fdata.what = [] 311 fdata.key = None 312 fdata.xrefs = None 313 fdata.space = None 314 fdata.ftype = path.split("/")[0] 315 316 fdata.nametag = {} 317 fdata.nametag["what"] = [f"ABI file {path}/{basename}"] 318 fdata.nametag["type"] = "File" 319 fdata.nametag["path"] = fdata.ftype 320 fdata.nametag["file"] = [fdata.file_ref] 321 fdata.nametag["line_no"] = 1 322 fdata.nametag["description"] = "" 323 fdata.nametag["symbols"] = [] 324 325 self.data[ref] = fdata.nametag 326 327 if self.debug & AbiDebug.WHAT_OPEN: 328 self.log.debug("Opening file %s", fname) 329 330 if basename == "README": 331 self.parse_readme(fdata.nametag, fname) 332 return 333 334 with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: 335 for line in fp: 336 fdata.ln += 1 337 338 self._parse_line(fdata, line) 339 340 if "description" in fdata.nametag: 341 fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n") 342 343 if fdata.key: 344 if "description" not in self.data.get(fdata.key, {}): 345 self.warn(fdata, f"{fdata.key} doesn't have a description") 346 347 for w in fdata.what: 348 self.add_symbol(what=w, fname=fname, xref=fdata.key) 349 350 def _parse_abi(self, root=None): 351 """Internal function to parse documentation ABI recursively""" 352 353 if not root: 354 root = self.directory 355 356 with os.scandir(root) as obj: 357 for entry in obj: 358 name = os.path.join(root, entry.name) 359 360 if entry.is_dir(): 361 self._parse_abi(name) 362 continue 363 364 if not entry.is_file(): 365 continue 366 367 basename = os.path.basename(name) 368 369 if basename.startswith("."): 370 continue 371 372 if basename.endswith(self.ignore_suffixes): 373 continue 374 375 path = self.re_abi_dir.sub("", os.path.dirname(name)) 376 377 self.parse_file(name, path, basename) 378 379 def parse_abi(self, root=None): 380 """Parse documentation ABI""" 381 382 self._parse_abi(root) 383 384 if self.debug & AbiDebug.DUMP_ABI_STRUCTS: 385 self.log.debug(pformat(self.data)) 386 387 def desc_txt(self, desc): 388 """Print description as found inside ABI files""" 389 390 desc = desc.strip(" \t\n") 391 392 return desc + "\n\n" 393 394 def xref(self, fname): 395 """ 396 Converts a Documentation/ABI + basename into a ReST cross-reference 397 """ 398 399 xref = self.file_refs.get(fname) 400 if not xref: 401 return None 402 else: 403 return xref 404 405 def desc_rst(self, desc): 406 """Enrich ReST output by creating cross-references""" 407 408 # Remove title markups from the description 409 # Having titles inside ABI files will only work if extra 410 # care would be taken in order to strictly follow the same 411 # level order for each markup. 412 desc = self.re_title_mark.sub("\n\n", "\n" + desc) 413 desc = desc.rstrip(" \t\n").lstrip("\n") 414 415 # Python's regex performance for non-compiled expressions is a lot 416 # than Perl, as Perl automatically caches them at their 417 # first usage. Here, we'll need to do the same, as otherwise the 418 # performance penalty is be high 419 420 new_desc = "" 421 for d in desc.split("\n"): 422 if d == "": 423 new_desc += "\n" 424 continue 425 426 # Use cross-references for doc files where needed 427 d = self.re_doc.sub(r":doc:`/\1`", d) 428 429 # Use cross-references for ABI generated docs where needed 430 matches = self.re_abi.findall(d) 431 for m in matches: 432 abi = m[0] + m[1] 433 434 xref = self.file_refs.get(abi) 435 if not xref: 436 # This may happen if ABI is on a separate directory, 437 # like parsing ABI testing and symbol is at stable. 438 # The proper solution is to move this part of the code 439 # for it to be inside sphinx/kernel_abi.py 440 self.log.info("Didn't find ABI reference for '%s'", abi) 441 else: 442 new = self.re_escape.sub(r"\\\1", m[1]) 443 d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d) 444 445 # Seek for cross reference symbols like /sys/... 446 # Need to be careful to avoid doing it on a code block 447 if d[0] not in [" ", "\t"]: 448 matches = self.re_xref_node.findall(d) 449 for m in matches: 450 # Finding ABI here is more complex due to wildcards 451 xref = self.what_refs.get(m) 452 if xref: 453 new = self.re_escape.sub(r"\\\1", m) 454 d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d) 455 456 new_desc += d + "\n" 457 458 return new_desc + "\n\n" 459 460 def doc(self, output_in_txt=False, show_symbols=True, show_file=True, 461 filter_path=None): 462 """Print ABI at stdout""" 463 464 part = None 465 for key, v in sorted(self.data.items(), 466 key=lambda x: (x[1].get("type", ""), 467 x[1].get("what"))): 468 469 wtype = v.get("type", "Symbol") 470 file_ref = v.get("file") 471 names = v.get("what", [""]) 472 473 if wtype == "File": 474 if not show_file: 475 continue 476 else: 477 if not show_symbols: 478 continue 479 480 if filter_path: 481 if v.get("path") != filter_path: 482 continue 483 484 msg = "" 485 486 if wtype != "File": 487 cur_part = names[0] 488 if cur_part.find("/") >= 0: 489 match = self.re_what.match(cur_part) 490 if match: 491 symbol = match.group(1).rstrip("/") 492 cur_part = "Symbols under " + symbol 493 494 if cur_part and cur_part != part: 495 part = cur_part 496 msg += part + "\n"+ "-" * len(part) +"\n\n" 497 498 msg += f".. _{key}:\n\n" 499 500 max_len = 0 501 for i in range(0, len(names)): # pylint: disable=C0200 502 names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**" 503 504 max_len = max(max_len, len(names[i])) 505 506 msg += "+-" + "-" * max_len + "-+\n" 507 for name in names: 508 msg += f"| {name}" + " " * (max_len - len(name)) + " |\n" 509 msg += "+-" + "-" * max_len + "-+\n" 510 msg += "\n" 511 512 for ref in file_ref: 513 if wtype == "File": 514 msg += f".. _{ref[1]}:\n\n" 515 else: 516 base = os.path.basename(ref[0]) 517 msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n" 518 519 if wtype == "File": 520 msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n" 521 522 desc = v.get("description") 523 if not desc and wtype != "File": 524 msg += f"DESCRIPTION MISSING for {names[0]}\n\n" 525 526 if desc: 527 if output_in_txt: 528 msg += self.desc_txt(desc) 529 else: 530 msg += self.desc_rst(desc) 531 532 symbols = v.get("symbols") 533 if symbols: 534 msg += "Has the following ABI:\n\n" 535 536 for w, label in symbols: 537 # Escape special chars from content 538 content = self.re_escape.sub(r"\\\1", w) 539 540 msg += f"- :ref:`{content} <{label}>`\n\n" 541 542 users = v.get("users") 543 if users and users.strip(" \t\n"): 544 users = users.strip("\n").replace('\n', '\n\t') 545 msg += f"Users:\n\t{users}\n\n" 546 547 ln = v.get("line_no", 1) 548 549 yield (msg, file_ref[0][0], ln) 550 551 def check_issues(self): 552 """Warn about duplicated ABI entries""" 553 554 for what, v in self.what_symbols.items(): 555 files = v.get("file") 556 if not files: 557 # Should never happen if the parser works properly 558 self.log.warning("%s doesn't have a file associated", what) 559 continue 560 561 if len(files) == 1: 562 continue 563 564 f = [] 565 for fname, lines in sorted(files.items()): 566 if not lines: 567 f.append(f"{fname}") 568 elif len(lines) == 1: 569 f.append(f"{fname}:{lines[0]}") 570 else: 571 m = fname + "lines " 572 m += ", ".join(str(x) for x in lines) 573 f.append(m) 574 575 self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f)) 576 577 def search_symbols(self, expr): 578 """ Searches for ABI symbols """ 579 580 regex = re.compile(expr, re.I) 581 582 found_keys = 0 583 for t in sorted(self.data.items(), key=lambda x: [0]): 584 v = t[1] 585 586 wtype = v.get("type", "") 587 if wtype == "File": 588 continue 589 590 for what in v.get("what", [""]): 591 if regex.search(what): 592 found_keys += 1 593 594 kernelversion = v.get("kernelversion", "").strip(" \t\n") 595 date = v.get("date", "").strip(" \t\n") 596 contact = v.get("contact", "").strip(" \t\n") 597 users = v.get("users", "").strip(" \t\n") 598 desc = v.get("description", "").strip(" \t\n") 599 600 files = [] 601 for f in v.get("file", ()): 602 files.append(f[0]) 603 604 what = str(found_keys) + ". " + what 605 title_tag = "-" * len(what) 606 607 print(f"\n{what}\n{title_tag}\n") 608 609 if kernelversion: 610 print(f"Kernel version:\t\t{kernelversion}") 611 612 if date: 613 print(f"Date:\t\t\t{date}") 614 615 if contact: 616 print(f"Contact:\t\t{contact}") 617 618 if users: 619 print(f"Users:\t\t\t{users}") 620 621 print("Defined on file(s):\t" + ", ".join(files)) 622 623 if desc: 624 desc = desc.strip("\n") 625 print(f"\n{desc}\n") 626 627 if not found_keys: 628 print(f"Regular expression /{expr}/ not found.") 629