xref: /linux/tools/docs/kdoc_diff (revision 5181afcdf99527dd92a88f80fc4d0d8013e1b510)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
4#
5# pylint: disable=R0903,R0912,R0913,R0914,R0915,R0917
6
7"""
8docdiff - Check differences between kernel‑doc output between two different
9commits.
10
11Examples
12--------
13
14Compare the kernel‑doc output between the last two 5.15 releases::
15
16    $ kdoc_diff v6.18..v6.19
17
18Both outputs are cached
19
20Force a complete documentation scan and clean any previous cache from
216.19 to the current HEAD::
22
23    $ kdoc_diff 6.19.. --full --clean
24
25Check differences only on a single driver since origin/main::
26
27    $ kdoc_diff origin/main drivers/media
28
29Generate an YAML file and use it to check for regressions::
30
31    $ kdoc_diff HEAD~ drivers/media --regression
32
33
34"""
35
36import os
37import sys
38import argparse
39import subprocess
40import shutil
41import re
42import signal
43
44from glob import iglob
45
46
47SRC_DIR = os.path.dirname(os.path.realpath(__file__))
48WORK_DIR = os.path.abspath(os.path.join(SRC_DIR, "../.."))
49
50KDOC_BINARY = os.path.join(SRC_DIR, "kernel-doc")
51KDOC_PARSER_TEST = os.path.join(WORK_DIR, "tools/unittests/test_kdoc_parser.py")
52
53CACHE_DIR = ".doc_diff_cache"
54YAML_NAME = "out.yaml"
55
56DIR_NAME = {
57    "full": os.path.join(CACHE_DIR, "full"),
58    "partial": os.path.join(CACHE_DIR, "partial"),
59    "no-cache": os.path.join(CACHE_DIR, "no_cache"),
60    "tmp": os.path.join(CACHE_DIR, "__tmp__"),
61}
62
63class GitHelper:
64    """Handles all Git operations"""
65
66    def __init__(self, work_dir=None):
67        self.work_dir = work_dir
68
69    def is_inside_repository(self):
70        """Check if we're inside a Git repository"""
71        try:
72            output = subprocess.check_output(["git", "rev-parse",
73                                              "--is-inside-work-tree"],
74                                             cwd=self.work_dir,
75                                             stderr=subprocess.STDOUT,
76                                             universal_newlines=True)
77
78            return output.strip() == "true"
79        except subprocess.CalledProcessError:
80            return False
81
82    def is_valid_commit(self, commit_hash):
83        """
84        Validate that a ref (branch, tag, commit hash, etc.) can be
85        resolved to a commit.
86        """
87        try:
88            subprocess.check_output(["git", "rev-parse", commit_hash],
89                                    cwd=self.work_dir,
90                                    stderr=subprocess.STDOUT)
91            return True
92        except subprocess.CalledProcessError:
93            return False
94
95    def get_short_hash(self, commit_hash):
96        """Get short commit hash"""
97        try:
98            return subprocess.check_output(["git", "rev-parse", "--short",
99                                            commit_hash],
100                                           cwd=self.work_dir,
101                                           stderr=subprocess.STDOUT,
102                                           universal_newlines=True).strip()
103        except subprocess.CalledProcessError:
104            return ""
105
106    def has_uncommitted_changes(self):
107        """Check for uncommitted changes"""
108        try:
109            subprocess.check_output(["git", "diff-index",
110                                     "--quiet", "HEAD", "--"],
111                                    cwd=self.work_dir,
112                                    stderr=subprocess.STDOUT)
113            return False
114        except subprocess.CalledProcessError:
115            return True
116
117    def get_current_branch(self):
118        """Get current branch name"""
119        return subprocess.check_output(["git", "branch", "--show-current"],
120                                        cwd=self.work_dir,
121                                        universal_newlines=True).strip()
122
123    def checkout_commit(self, commit_hash, quiet=True):
124        """Checkout a commit safely"""
125        args = ["git", "checkout", "-f"]
126        if quiet:
127            args.append("-q")
128        args.append(commit_hash)
129        try:
130            subprocess.check_output(args, cwd=self.work_dir,
131                                    stderr=subprocess.STDOUT)
132
133            # Double-check if branch actually switched
134            branch = self.get_short_hash("HEAD")
135            if commit_hash != branch:
136                raise RuntimeError(f"Branch changed to '{branch}' instead of '{commit_hash}'")
137
138            return True
139        except subprocess.CalledProcessError as e:
140            print(f"ERROR: Failed to checkout {commit_hash}: {e}",
141                  file=sys.stderr)
142            return False
143
144
145class CacheManager:
146    """Manages persistent cache directories"""
147
148    def __init__(self, work_dir):
149        self.work_dir = work_dir
150
151    def initialize(self):
152        """Create cache directories if they don't exist"""
153        for dir_path in DIR_NAME.values():
154            abs_path = os.path.join(self.work_dir, dir_path)
155            if not os.path.exists(abs_path):
156                os.makedirs(abs_path, exist_ok=True, mode=0o755)
157
158    def get_commit_cache(self, commit_hash, path):
159        """Generate cache path for a commit"""
160        hash_short = GitHelper(self.work_dir).get_short_hash(commit_hash)
161        if not hash_short:
162            hash_short = commit_hash
163
164        return os.path.join(path, hash_short)
165
166class KernelDocRunner:
167    """Runs kernel-doc documentation generator"""
168
169    def __init__(self, work_dir, kdoc_binary):
170        self.work_dir = work_dir
171        self.kdoc_binary = kdoc_binary
172        self.kdoc_files = None
173
174    def find_kdoc_references(self):
175        """Find all files marked with kernel-doc:: directives"""
176        if self.kdoc_files:
177            print("Using cached Kdoc refs")
178            return self.kdoc_files
179
180        print("Finding kernel-doc entries in Documentation...")
181
182        files = os.path.join(self.work_dir, 'Documentation/**/*.rst')
183        pattern = re.compile(r"^\.\.\s+kernel-doc::\s*(\S+)")
184        kdoc_files = set()
185
186        for file_path in iglob(files, recursive=True):
187            try:
188                with open(file_path, 'r', encoding='utf-8') as fp:
189                    for line in fp:
190                        match = pattern.match(line.strip())
191                        if match:
192                            kdoc_files.add(match.group(1))
193
194            except OSError:
195                continue
196
197        self.kdoc_files = list(kdoc_files)
198
199        return self.kdoc_files
200
201    def gen_yaml(self, yaml_file, kdoc_files):
202        """Runs kernel-doc to generate a yaml file with man and rst."""
203        cmd = [self.kdoc_binary, "--man", "--rst", "--yaml", yaml_file]
204        cmd += kdoc_files
205
206        print(f"YAML regression test file will be stored at: {yaml_file}")
207
208        try:
209            subprocess.check_call(cmd, cwd=self.work_dir,
210                                  stdout=subprocess.DEVNULL,
211                                  stderr=subprocess.DEVNULL)
212        except subprocess.CalledProcessError:
213            return False
214
215        return True
216
217    def run_unittest(self, yaml_file):
218        """Run unit tests with the generated yaml file"""
219        cmd = [KDOC_PARSER_TEST, "-q", "--yaml", yaml_file]
220        result = subprocess.run(cmd, cwd=self.work_dir)
221
222        if result.returncode:
223            print("To check for problems, try to run it again with -v\n")
224            print("Use -k <regex> to filter results\n\n\t$", end="")
225            print(" ".join(cmd) + "\n")
226
227        return True
228
229    def normal_run(self, tmp_dir, output_dir, kdoc_files):
230        """Generate man, rst and errors, storing them at tmp_dir."""
231        os.makedirs(tmp_dir, exist_ok=True)
232
233        try:
234            with open(os.path.join(tmp_dir, "man.log"), "w", encoding="utf-8") as out:
235                subprocess.check_call([self.kdoc_binary, "--man"] + kdoc_files,
236                                      cwd=self.work_dir,
237                                      stdout=out, stderr=subprocess.DEVNULL)
238
239            with open(os.path.join(tmp_dir, "rst.log"), "w", encoding="utf-8") as out:
240                with open(os.path.join(tmp_dir, "err.log"), "w", encoding="utf-8") as err:
241                    subprocess.check_call([self.kdoc_binary, "--rst"] + kdoc_files,
242                                          cwd=self.work_dir,
243                                          stdout=out, stderr=err)
244        except subprocess.CalledProcessError:
245            return False
246
247        if output_dir:
248            os.replace(tmp_dir, output_dir)
249
250        return True
251
252    def run(self, commit_hash, tmp_dir, output_dir, kdoc_files, is_regression,
253            is_end):
254        """Run kernel-doc on its several ways"""
255        if not kdoc_files:
256            raise RuntimeError("No kernel-doc references found")
257
258        git_helper = GitHelper(self.work_dir)
259        if not git_helper.checkout_commit(commit_hash, quiet=True):
260            raise RuntimeError(f"ERROR: can't checkout commit {commit_hash}")
261
262        print(f"Processing {commit_hash}...")
263
264        if not is_regression:
265            return self.normal_run(tmp_dir, output_dir, kdoc_files)
266
267        yaml_file = os.path.join(tmp_dir, YAML_NAME)
268
269        if not is_end:
270            return self.gen_yaml(yaml_file, kdoc_files)
271
272        return self.run_unittest(yaml_file)
273
274class DiffManager:
275    """Compare documentation output directories with an external diff."""
276    def __init__(self, diff_tool="diff", diff_args=None):
277        self.diff_tool = diff_tool
278        # default: unified, no context, ignore whitespace changes
279        self.diff_args = diff_args or ["-u0", "-w"]
280
281    def diff_directories(self, dir1, dir2):
282        """Compare two directories using an external diff."""
283        print(f"\nDiffing {dir1} and {dir2}:")
284
285        dir1_files = set()
286        dir2_files = set()
287        has_diff = False
288
289        for root, _, files in os.walk(dir1):
290            for file in files:
291                dir1_files.add(os.path.relpath(os.path.join(root, file), dir1))
292        for root, _, files in os.walk(dir2):
293            for file in files:
294                dir2_files.add(os.path.relpath(os.path.join(root, file), dir2))
295
296        common_files = sorted(dir1_files & dir2_files)
297        for file in common_files:
298            f1 = os.path.join(dir1, file)
299            f2 = os.path.join(dir2, file)
300
301            cmd = [self.diff_tool] + self.diff_args + [f1, f2]
302            try:
303                result = subprocess.run(
304                    cmd, capture_output=True, text=True, check=False
305                )
306                if result.stdout:
307                    has_diff = True
308                    print(f"\n{file}")
309                    print(result.stdout, end="")
310            except FileNotFoundError:
311                print(f"ERROR: {self.diff_tool} not found")
312                sys.exit(1)
313
314        # Show files that exist only in one directory
315        only_in_dir1 = dir1_files - dir2_files
316        only_in_dir2 = dir2_files - dir1_files
317        if only_in_dir1 or only_in_dir2:
318            has_diff = True
319            print("\nDifferential files:")
320            for f in sorted(only_in_dir1):
321                print(f"  - {f} (only in {dir1})")
322            for f in sorted(only_in_dir2):
323                print(f"  + {f} (only in {dir2})")
324
325        if not has_diff:
326            print("\nNo differences between those two commits")
327
328
329class SignalHandler():
330    """Signal handler class."""
331
332    def restore(self, force_exit=False):
333        """Restore original HEAD state."""
334        if self.restored:
335            return
336
337        print(f"Restoring original branch: {self.original_head}")
338        try:
339            subprocess.check_call(
340                ["git", "checkout", "-f", self.original_head],
341                cwd=self.git_helper.work_dir,
342                stderr=subprocess.STDOUT,
343            )
344        except subprocess.CalledProcessError as e:
345            print(f"Failed to restore: {e}", file=sys.stderr)
346
347        for sig, handler in self.old_handler.items():
348            signal.signal(sig, handler)
349
350        self.restored = True
351
352        if force_exit:
353            sys.exit(1)
354
355    def signal_handler(self, sig, _):
356        """Handle interrupt signals."""
357        print(f"\nSignal {sig} received. Restoring original state...")
358
359        self.restore(force_exit=True)
360
361    def __enter__(self):
362        """Allow using it via with command."""
363        for sig in [signal.SIGINT, signal.SIGTERM]:
364            self.old_handler[sig] = signal.getsignal(sig)
365            signal.signal(sig, self.signal_handler)
366
367        return self
368
369    def __exit__(self, *args):
370        """Restore signals at the end of with block."""
371        self.restore()
372
373    def __init__(self, git_helper, original_head):
374        self.git_helper = git_helper
375        self.original_head = original_head
376        self.old_handler = {}
377        self.restored = False
378
379def parse_commit_range(value):
380    """Handle a commit range."""
381    if ".." not in value:
382        begin = value
383        end = "HEAD"
384    else:
385        begin, _, end = value.partition("..")
386        if not end:
387            end = "HEAD"
388
389    if not begin:
390        raise argparse.ArgumentTypeError("Need a commit begginning")
391
392
393    print(f"Range: {begin} to {end}")
394
395    return begin, end
396
397
398def main():
399    """Main code"""
400    parser = argparse.ArgumentParser(description="Compare kernel documentation between commits")
401    parser.add_argument("commits", type=parse_commit_range,
402                        help="commit range like old..new")
403    parser.add_argument("files", nargs="*",
404                        help="files to process – if supplied the --full flag is ignored")
405
406    parser.add_argument("--full", "-f", action="store_true",
407                        help="Force a full scan of Documentation/*")
408
409    parser.add_argument("--regression", "-r", action="store_true",
410                        help="Use YAML format to check for regressions")
411
412    parser.add_argument("--work-dir", "-w", default=WORK_DIR,
413                        help="work dir (default: %(default)s)")
414
415    parser.add_argument("--clean", "-c", action="store_true",
416                        help="Clean caches")
417
418    args = parser.parse_args()
419
420    if args.files and args.full:
421        raise argparse.ArgumentError(args.full,
422                                     "cannot combine '--full' with an explicit file list")
423
424    work_dir = os.path.abspath(args.work_dir)
425
426    # Initialize cache
427    cache = CacheManager(work_dir)
428    cache.initialize()
429
430    # Validate git repository
431    git_helper = GitHelper(work_dir)
432    if not git_helper.is_inside_repository():
433        raise RuntimeError("Must run inside Git repository")
434
435    old_commit, new_commit = args.commits
436
437    old_commit = git_helper.get_short_hash(old_commit)
438    new_commit = git_helper.get_short_hash(new_commit)
439
440    # Validate commits
441    for commit in [old_commit, new_commit]:
442        if not git_helper.is_valid_commit(commit):
443            raise RuntimeError(f"Commit '{commit}' does not exist")
444
445    # Check for uncommitted changes
446    if git_helper.has_uncommitted_changes():
447        raise RuntimeError("Uncommitted changes present. Commit or stash first.")
448
449    runner = KernelDocRunner(git_helper.work_dir, KDOC_BINARY)
450
451    # Get files to be parsed
452    cache_msg = " (results will be cached)"
453    if args.full:
454        kdoc_files = ["."]
455        diff_type = "full"
456        print(f"Parsing all files at {work_dir}")
457    if not args.files:
458        diff_type = "partial"
459        kdoc_files = runner.find_kdoc_references()
460        print(f"Parsing files with kernel-doc markups at {work_dir}/Documentation")
461    else:
462        diff_type = "no-cache"
463        cache_msg = ""
464        kdoc_files = args.files
465
466    tmp_dir = DIR_NAME["tmp"]
467    out_path = DIR_NAME[diff_type]
468
469    if not args.regression:
470        print(f"Output will be stored at: {out_path}{cache_msg}")
471
472    # Just in case - should never happen in practice
473    if not kdoc_files:
474        raise argparse.ArgumentError(args.files,
475                                        "No kernel-doc references found")
476
477    original_head = git_helper.get_current_branch()
478
479    old_cache = cache.get_commit_cache(old_commit, out_path)
480    new_cache = cache.get_commit_cache(new_commit, out_path)
481
482    with SignalHandler(git_helper, original_head):
483        if args.clean or diff_type == "no-cache":
484            for cache_dir in [old_cache, new_cache]:
485                if cache_dir and os.path.exists(cache_dir):
486                    shutil.rmtree(cache_dir)
487
488        if args.regression or not os.path.exists(old_cache):
489            old_success = runner.run(old_commit, tmp_dir, old_cache, kdoc_files,
490                                    args.regression, False)
491        else:
492            old_success = True
493
494        if args.regression or not os.path.exists(new_cache):
495            new_success = runner.run(new_commit, tmp_dir, new_cache, kdoc_files,
496                                    args.regression, True)
497        else:
498            new_success = True
499
500    if not (old_success and new_success):
501        raise RuntimeError("Failed to generate documentation")
502
503    if not args.regression:
504        diff_manager = DiffManager()
505        diff_manager.diff_directories(old_cache, new_cache)
506
507if __name__ == "__main__":
508    main()
509