1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=R0903,R0912,R0913,R0914,R0915,R0917 6 7""" 8docdiff - Check differences between kernel‑doc output between two different 9commits. 10 11Examples 12-------- 13 14Compare the kernel‑doc output between the last two 5.15 releases:: 15 16 $ kdoc_diff v6.18..v6.19 17 18Both outputs are cached 19 20Force a complete documentation scan and clean any previous cache from 216.19 to the current HEAD:: 22 23 $ kdoc_diff 6.19.. --full --clean 24 25Check differences only on a single driver since origin/main:: 26 27 $ kdoc_diff origin/main drivers/media 28 29Generate an YAML file and use it to check for regressions:: 30 31 $ kdoc_diff HEAD~ drivers/media --regression 32 33 34""" 35 36import os 37import sys 38import argparse 39import subprocess 40import shutil 41import re 42import signal 43 44from glob import iglob 45 46 47SRC_DIR = os.path.dirname(os.path.realpath(__file__)) 48WORK_DIR = os.path.abspath(os.path.join(SRC_DIR, "../..")) 49 50KDOC_BINARY = os.path.join(SRC_DIR, "kernel-doc") 51KDOC_PARSER_TEST = os.path.join(WORK_DIR, "tools/unittests/test_kdoc_parser.py") 52 53CACHE_DIR = ".doc_diff_cache" 54YAML_NAME = "out.yaml" 55 56DIR_NAME = { 57 "full": os.path.join(CACHE_DIR, "full"), 58 "partial": os.path.join(CACHE_DIR, "partial"), 59 "no-cache": os.path.join(CACHE_DIR, "no_cache"), 60 "tmp": os.path.join(CACHE_DIR, "__tmp__"), 61} 62 63class GitHelper: 64 """Handles all Git operations""" 65 66 def __init__(self, work_dir=None): 67 self.work_dir = work_dir 68 69 def is_inside_repository(self): 70 """Check if we're inside a Git repository""" 71 try: 72 output = subprocess.check_output(["git", "rev-parse", 73 "--is-inside-work-tree"], 74 cwd=self.work_dir, 75 stderr=subprocess.STDOUT, 76 universal_newlines=True) 77 78 return output.strip() == "true" 79 except subprocess.CalledProcessError: 80 return False 81 82 def is_valid_commit(self, commit_hash): 83 """ 84 Validate that a ref (branch, tag, commit hash, etc.) can be 85 resolved to a commit. 86 """ 87 try: 88 subprocess.check_output(["git", "rev-parse", commit_hash], 89 cwd=self.work_dir, 90 stderr=subprocess.STDOUT) 91 return True 92 except subprocess.CalledProcessError: 93 return False 94 95 def get_short_hash(self, commit_hash): 96 """Get short commit hash""" 97 try: 98 return subprocess.check_output(["git", "rev-parse", "--short", 99 commit_hash], 100 cwd=self.work_dir, 101 stderr=subprocess.STDOUT, 102 universal_newlines=True).strip() 103 except subprocess.CalledProcessError: 104 return "" 105 106 def has_uncommitted_changes(self): 107 """Check for uncommitted changes""" 108 try: 109 subprocess.check_output(["git", "diff-index", 110 "--quiet", "HEAD", "--"], 111 cwd=self.work_dir, 112 stderr=subprocess.STDOUT) 113 return False 114 except subprocess.CalledProcessError: 115 return True 116 117 def get_current_branch(self): 118 """Get current branch name""" 119 return subprocess.check_output(["git", "branch", "--show-current"], 120 cwd=self.work_dir, 121 universal_newlines=True).strip() 122 123 def checkout_commit(self, commit_hash, quiet=True): 124 """Checkout a commit safely""" 125 args = ["git", "checkout", "-f"] 126 if quiet: 127 args.append("-q") 128 args.append(commit_hash) 129 try: 130 subprocess.check_output(args, cwd=self.work_dir, 131 stderr=subprocess.STDOUT) 132 133 # Double-check if branch actually switched 134 branch = self.get_short_hash("HEAD") 135 if commit_hash != branch: 136 raise RuntimeError(f"Branch changed to '{branch}' instead of '{commit_hash}'") 137 138 return True 139 except subprocess.CalledProcessError as e: 140 print(f"ERROR: Failed to checkout {commit_hash}: {e}", 141 file=sys.stderr) 142 return False 143 144 145class CacheManager: 146 """Manages persistent cache directories""" 147 148 def __init__(self, work_dir): 149 self.work_dir = work_dir 150 151 def initialize(self): 152 """Create cache directories if they don't exist""" 153 for dir_path in DIR_NAME.values(): 154 abs_path = os.path.join(self.work_dir, dir_path) 155 if not os.path.exists(abs_path): 156 os.makedirs(abs_path, exist_ok=True, mode=0o755) 157 158 def get_commit_cache(self, commit_hash, path): 159 """Generate cache path for a commit""" 160 hash_short = GitHelper(self.work_dir).get_short_hash(commit_hash) 161 if not hash_short: 162 hash_short = commit_hash 163 164 return os.path.join(path, hash_short) 165 166class KernelDocRunner: 167 """Runs kernel-doc documentation generator""" 168 169 def __init__(self, work_dir, kdoc_binary): 170 self.work_dir = work_dir 171 self.kdoc_binary = kdoc_binary 172 self.kdoc_files = None 173 174 def find_kdoc_references(self): 175 """Find all files marked with kernel-doc:: directives""" 176 if self.kdoc_files: 177 print("Using cached Kdoc refs") 178 return self.kdoc_files 179 180 print("Finding kernel-doc entries in Documentation...") 181 182 files = os.path.join(self.work_dir, 'Documentation/**/*.rst') 183 pattern = re.compile(r"^\.\.\s+kernel-doc::\s*(\S+)") 184 kdoc_files = set() 185 186 for file_path in iglob(files, recursive=True): 187 try: 188 with open(file_path, 'r', encoding='utf-8') as fp: 189 for line in fp: 190 match = pattern.match(line.strip()) 191 if match: 192 kdoc_files.add(match.group(1)) 193 194 except OSError: 195 continue 196 197 self.kdoc_files = list(kdoc_files) 198 199 return self.kdoc_files 200 201 def gen_yaml(self, yaml_file, kdoc_files): 202 """Runs kernel-doc to generate a yaml file with man and rst.""" 203 cmd = [self.kdoc_binary, "--man", "--rst", "--yaml", yaml_file] 204 cmd += kdoc_files 205 206 print(f"YAML regression test file will be stored at: {yaml_file}") 207 208 try: 209 subprocess.check_call(cmd, cwd=self.work_dir, 210 stdout=subprocess.DEVNULL, 211 stderr=subprocess.DEVNULL) 212 except subprocess.CalledProcessError: 213 return False 214 215 return True 216 217 def run_unittest(self, yaml_file): 218 """Run unit tests with the generated yaml file""" 219 cmd = [KDOC_PARSER_TEST, "-q", "--yaml", yaml_file] 220 result = subprocess.run(cmd, cwd=self.work_dir) 221 222 if result.returncode: 223 print("To check for problems, try to run it again with -v\n") 224 print("Use -k <regex> to filter results\n\n\t$", end="") 225 print(" ".join(cmd) + "\n") 226 227 return True 228 229 def normal_run(self, tmp_dir, output_dir, kdoc_files): 230 """Generate man, rst and errors, storing them at tmp_dir.""" 231 os.makedirs(tmp_dir, exist_ok=True) 232 233 try: 234 with open(os.path.join(tmp_dir, "man.log"), "w", encoding="utf-8") as out: 235 subprocess.check_call([self.kdoc_binary, "--man"] + kdoc_files, 236 cwd=self.work_dir, 237 stdout=out, stderr=subprocess.DEVNULL) 238 239 with open(os.path.join(tmp_dir, "rst.log"), "w", encoding="utf-8") as out: 240 with open(os.path.join(tmp_dir, "err.log"), "w", encoding="utf-8") as err: 241 subprocess.check_call([self.kdoc_binary, "--rst"] + kdoc_files, 242 cwd=self.work_dir, 243 stdout=out, stderr=err) 244 except subprocess.CalledProcessError: 245 return False 246 247 if output_dir: 248 os.replace(tmp_dir, output_dir) 249 250 return True 251 252 def run(self, commit_hash, tmp_dir, output_dir, kdoc_files, is_regression, 253 is_end): 254 """Run kernel-doc on its several ways""" 255 if not kdoc_files: 256 raise RuntimeError("No kernel-doc references found") 257 258 git_helper = GitHelper(self.work_dir) 259 if not git_helper.checkout_commit(commit_hash, quiet=True): 260 raise RuntimeError(f"ERROR: can't checkout commit {commit_hash}") 261 262 print(f"Processing {commit_hash}...") 263 264 if not is_regression: 265 return self.normal_run(tmp_dir, output_dir, kdoc_files) 266 267 yaml_file = os.path.join(tmp_dir, YAML_NAME) 268 269 if not is_end: 270 return self.gen_yaml(yaml_file, kdoc_files) 271 272 return self.run_unittest(yaml_file) 273 274class DiffManager: 275 """Compare documentation output directories with an external diff.""" 276 def __init__(self, diff_tool="diff", diff_args=None): 277 self.diff_tool = diff_tool 278 # default: unified, no context, ignore whitespace changes 279 self.diff_args = diff_args or ["-u0", "-w"] 280 281 def diff_directories(self, dir1, dir2): 282 """Compare two directories using an external diff.""" 283 print(f"\nDiffing {dir1} and {dir2}:") 284 285 dir1_files = set() 286 dir2_files = set() 287 has_diff = False 288 289 for root, _, files in os.walk(dir1): 290 for file in files: 291 dir1_files.add(os.path.relpath(os.path.join(root, file), dir1)) 292 for root, _, files in os.walk(dir2): 293 for file in files: 294 dir2_files.add(os.path.relpath(os.path.join(root, file), dir2)) 295 296 common_files = sorted(dir1_files & dir2_files) 297 for file in common_files: 298 f1 = os.path.join(dir1, file) 299 f2 = os.path.join(dir2, file) 300 301 cmd = [self.diff_tool] + self.diff_args + [f1, f2] 302 try: 303 result = subprocess.run( 304 cmd, capture_output=True, text=True, check=False 305 ) 306 if result.stdout: 307 has_diff = True 308 print(f"\n{file}") 309 print(result.stdout, end="") 310 except FileNotFoundError: 311 print(f"ERROR: {self.diff_tool} not found") 312 sys.exit(1) 313 314 # Show files that exist only in one directory 315 only_in_dir1 = dir1_files - dir2_files 316 only_in_dir2 = dir2_files - dir1_files 317 if only_in_dir1 or only_in_dir2: 318 has_diff = True 319 print("\nDifferential files:") 320 for f in sorted(only_in_dir1): 321 print(f" - {f} (only in {dir1})") 322 for f in sorted(only_in_dir2): 323 print(f" + {f} (only in {dir2})") 324 325 if not has_diff: 326 print("\nNo differences between those two commits") 327 328 329class SignalHandler(): 330 """Signal handler class.""" 331 332 def restore(self, force_exit=False): 333 """Restore original HEAD state.""" 334 if self.restored: 335 return 336 337 print(f"Restoring original branch: {self.original_head}") 338 try: 339 subprocess.check_call( 340 ["git", "checkout", "-f", self.original_head], 341 cwd=self.git_helper.work_dir, 342 stderr=subprocess.STDOUT, 343 ) 344 except subprocess.CalledProcessError as e: 345 print(f"Failed to restore: {e}", file=sys.stderr) 346 347 for sig, handler in self.old_handler.items(): 348 signal.signal(sig, handler) 349 350 self.restored = True 351 352 if force_exit: 353 sys.exit(1) 354 355 def signal_handler(self, sig, _): 356 """Handle interrupt signals.""" 357 print(f"\nSignal {sig} received. Restoring original state...") 358 359 self.restore(force_exit=True) 360 361 def __enter__(self): 362 """Allow using it via with command.""" 363 for sig in [signal.SIGINT, signal.SIGTERM]: 364 self.old_handler[sig] = signal.getsignal(sig) 365 signal.signal(sig, self.signal_handler) 366 367 return self 368 369 def __exit__(self, *args): 370 """Restore signals at the end of with block.""" 371 self.restore() 372 373 def __init__(self, git_helper, original_head): 374 self.git_helper = git_helper 375 self.original_head = original_head 376 self.old_handler = {} 377 self.restored = False 378 379def parse_commit_range(value): 380 """Handle a commit range.""" 381 if ".." not in value: 382 begin = value 383 end = "HEAD" 384 else: 385 begin, _, end = value.partition("..") 386 if not end: 387 end = "HEAD" 388 389 if not begin: 390 raise argparse.ArgumentTypeError("Need a commit begginning") 391 392 393 print(f"Range: {begin} to {end}") 394 395 return begin, end 396 397 398def main(): 399 """Main code""" 400 parser = argparse.ArgumentParser(description="Compare kernel documentation between commits") 401 parser.add_argument("commits", type=parse_commit_range, 402 help="commit range like old..new") 403 parser.add_argument("files", nargs="*", 404 help="files to process – if supplied the --full flag is ignored") 405 406 parser.add_argument("--full", "-f", action="store_true", 407 help="Force a full scan of Documentation/*") 408 409 parser.add_argument("--regression", "-r", action="store_true", 410 help="Use YAML format to check for regressions") 411 412 parser.add_argument("--work-dir", "-w", default=WORK_DIR, 413 help="work dir (default: %(default)s)") 414 415 parser.add_argument("--clean", "-c", action="store_true", 416 help="Clean caches") 417 418 args = parser.parse_args() 419 420 if args.files and args.full: 421 raise argparse.ArgumentError(args.full, 422 "cannot combine '--full' with an explicit file list") 423 424 work_dir = os.path.abspath(args.work_dir) 425 426 # Initialize cache 427 cache = CacheManager(work_dir) 428 cache.initialize() 429 430 # Validate git repository 431 git_helper = GitHelper(work_dir) 432 if not git_helper.is_inside_repository(): 433 raise RuntimeError("Must run inside Git repository") 434 435 old_commit, new_commit = args.commits 436 437 old_commit = git_helper.get_short_hash(old_commit) 438 new_commit = git_helper.get_short_hash(new_commit) 439 440 # Validate commits 441 for commit in [old_commit, new_commit]: 442 if not git_helper.is_valid_commit(commit): 443 raise RuntimeError(f"Commit '{commit}' does not exist") 444 445 # Check for uncommitted changes 446 if git_helper.has_uncommitted_changes(): 447 raise RuntimeError("Uncommitted changes present. Commit or stash first.") 448 449 runner = KernelDocRunner(git_helper.work_dir, KDOC_BINARY) 450 451 # Get files to be parsed 452 cache_msg = " (results will be cached)" 453 if args.full: 454 kdoc_files = ["."] 455 diff_type = "full" 456 print(f"Parsing all files at {work_dir}") 457 if not args.files: 458 diff_type = "partial" 459 kdoc_files = runner.find_kdoc_references() 460 print(f"Parsing files with kernel-doc markups at {work_dir}/Documentation") 461 else: 462 diff_type = "no-cache" 463 cache_msg = "" 464 kdoc_files = args.files 465 466 tmp_dir = DIR_NAME["tmp"] 467 out_path = DIR_NAME[diff_type] 468 469 if not args.regression: 470 print(f"Output will be stored at: {out_path}{cache_msg}") 471 472 # Just in case - should never happen in practice 473 if not kdoc_files: 474 raise argparse.ArgumentError(args.files, 475 "No kernel-doc references found") 476 477 original_head = git_helper.get_current_branch() 478 479 old_cache = cache.get_commit_cache(old_commit, out_path) 480 new_cache = cache.get_commit_cache(new_commit, out_path) 481 482 with SignalHandler(git_helper, original_head): 483 if args.clean or diff_type == "no-cache": 484 for cache_dir in [old_cache, new_cache]: 485 if cache_dir and os.path.exists(cache_dir): 486 shutil.rmtree(cache_dir) 487 488 if args.regression or not os.path.exists(old_cache): 489 old_success = runner.run(old_commit, tmp_dir, old_cache, kdoc_files, 490 args.regression, False) 491 else: 492 old_success = True 493 494 if args.regression or not os.path.exists(new_cache): 495 new_success = runner.run(new_commit, tmp_dir, new_cache, kdoc_files, 496 args.regression, True) 497 else: 498 new_success = True 499 500 if not (old_success and new_success): 501 raise RuntimeError("Failed to generate documentation") 502 503 if not args.regression: 504 diff_manager = DiffManager() 505 diff_manager.diff_directories(old_cache, new_cache) 506 507if __name__ == "__main__": 508 main() 509