xref: /linux/tools/docs/checktransupdate.py (revision d37366cac4ccfb71c77e9620f63e3a6fcdf3816c)
150c0fa7fSDongliang Mu#!/usr/bin/env python3
250c0fa7fSDongliang Mu# SPDX-License-Identifier: GPL-2.0
350c0fa7fSDongliang Mu
450c0fa7fSDongliang Mu"""
550c0fa7fSDongliang MuThis script helps track the translation status of the documentation
650c0fa7fSDongliang Muin different locales, e.g., zh_CN. More specially, it uses `git log`
750c0fa7fSDongliang Mucommit to find the latest english commit from the translation commit
850c0fa7fSDongliang Mu(order by author date) and the latest english commits from HEAD. If
950c0fa7fSDongliang Mudifferences occur, report the file and commits that need to be updated.
1050c0fa7fSDongliang Mu
1150c0fa7fSDongliang MuThe usage is as follows:
12*d37366caSJonathan Corbet- tools/docs/checktransupdate.py -l zh_CN
1363e96ce0SDongliang MuThis will print all the files that need to be updated or translated in the zh_CN locale.
14*d37366caSJonathan Corbet- tools/docs/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst
1550c0fa7fSDongliang MuThis will only print the status of the specified file.
1650c0fa7fSDongliang Mu
1750c0fa7fSDongliang MuThe output is something like:
1863e96ce0SDongliang MuDocumentation/dev-tools/kfence.rst
1963e96ce0SDongliang MuNo translation in the locale of zh_CN
2063e96ce0SDongliang Mu
2163e96ce0SDongliang MuDocumentation/translations/zh_CN/dev-tools/testing-overview.rst
2250c0fa7fSDongliang Mucommit 42fb9cfd5b18 ("Documentation: dev-tools: Add link to RV docs")
2363e96ce0SDongliang Mu1 commits needs resolving in total
2450c0fa7fSDongliang Mu"""
2550c0fa7fSDongliang Mu
2650c0fa7fSDongliang Muimport os
2735293ebbSZhiyu Zhangimport re
2863e96ce0SDongliang Muimport time
2963e96ce0SDongliang Muimport logging
3063e96ce0SDongliang Mufrom argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction
3150c0fa7fSDongliang Mufrom datetime import datetime
3250c0fa7fSDongliang Mu
3350c0fa7fSDongliang Mu
3450c0fa7fSDongliang Mudef get_origin_path(file_path):
3563e96ce0SDongliang Mu    """Get the origin path from the translation path"""
3650c0fa7fSDongliang Mu    paths = file_path.split("/")
3750c0fa7fSDongliang Mu    tidx = paths.index("translations")
3850c0fa7fSDongliang Mu    opaths = paths[:tidx]
3950c0fa7fSDongliang Mu    opaths += paths[tidx + 2 :]
4050c0fa7fSDongliang Mu    return "/".join(opaths)
4150c0fa7fSDongliang Mu
4250c0fa7fSDongliang Mu
4350c0fa7fSDongliang Mudef get_latest_commit_from(file_path, commit):
4463e96ce0SDongliang Mu    """Get the latest commit from the specified commit for the specified file"""
4563e96ce0SDongliang Mu    command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}"
4663e96ce0SDongliang Mu    logging.debug(command)
4750c0fa7fSDongliang Mu    pipe = os.popen(command)
4850c0fa7fSDongliang Mu    result = pipe.read()
4950c0fa7fSDongliang Mu    result = result.split("\n")
5050c0fa7fSDongliang Mu    if len(result) <= 1:
5150c0fa7fSDongliang Mu        return None
5250c0fa7fSDongliang Mu
5363e96ce0SDongliang Mu    logging.debug("Result: %s", result[0])
5450c0fa7fSDongliang Mu
5550c0fa7fSDongliang Mu    return {
5650c0fa7fSDongliang Mu        "hash": result[0],
5750c0fa7fSDongliang Mu        "author_date": datetime.strptime(result[1], "%a, %d %b %Y %H:%M:%S %z"),
5850c0fa7fSDongliang Mu        "commit_date": datetime.strptime(result[2], "%a, %d %b %Y %H:%M:%S %z"),
5950c0fa7fSDongliang Mu        "message": result[4:],
6050c0fa7fSDongliang Mu    }
6150c0fa7fSDongliang Mu
6250c0fa7fSDongliang Mu
6350c0fa7fSDongliang Mudef get_origin_from_trans(origin_path, t_from_head):
6463e96ce0SDongliang Mu    """Get the latest origin commit from the translation commit"""
6550c0fa7fSDongliang Mu    o_from_t = get_latest_commit_from(origin_path, t_from_head["hash"])
6650c0fa7fSDongliang Mu    while o_from_t is not None and o_from_t["author_date"] > t_from_head["author_date"]:
6750c0fa7fSDongliang Mu        o_from_t = get_latest_commit_from(origin_path, o_from_t["hash"] + "^")
6850c0fa7fSDongliang Mu    if o_from_t is not None:
6963e96ce0SDongliang Mu        logging.debug("tracked origin commit id: %s", o_from_t["hash"])
7050c0fa7fSDongliang Mu    return o_from_t
7150c0fa7fSDongliang Mu
7250c0fa7fSDongliang Mu
7335293ebbSZhiyu Zhangdef get_origin_from_trans_smartly(origin_path, t_from_head):
7435293ebbSZhiyu Zhang    """Get the latest origin commit from the formatted translation commit:
7535293ebbSZhiyu Zhang    (1) update to commit HASH (TITLE)
7635293ebbSZhiyu Zhang    (2) Update the translation through commit HASH (TITLE)
7735293ebbSZhiyu Zhang    """
7835293ebbSZhiyu Zhang    # catch flag for 12-bit commit hash
7935293ebbSZhiyu Zhang    HASH = r'([0-9a-f]{12})'
8035293ebbSZhiyu Zhang    # pattern 1: contains "update to commit HASH"
8135293ebbSZhiyu Zhang    pat_update_to = re.compile(rf'update to commit {HASH}')
8235293ebbSZhiyu Zhang    # pattern 2: contains "Update the translation through commit HASH"
8335293ebbSZhiyu Zhang    pat_update_translation = re.compile(rf'Update the translation through commit {HASH}')
8435293ebbSZhiyu Zhang
8535293ebbSZhiyu Zhang    origin_commit_hash = None
8635293ebbSZhiyu Zhang    for line in t_from_head["message"]:
8735293ebbSZhiyu Zhang        # check if the line matches the first pattern
8835293ebbSZhiyu Zhang        match = pat_update_to.search(line)
8935293ebbSZhiyu Zhang        if match:
9035293ebbSZhiyu Zhang            origin_commit_hash = match.group(1)
9135293ebbSZhiyu Zhang            break
9235293ebbSZhiyu Zhang        # check if the line matches the second pattern
9335293ebbSZhiyu Zhang        match = pat_update_translation.search(line)
9435293ebbSZhiyu Zhang        if match:
9535293ebbSZhiyu Zhang            origin_commit_hash = match.group(1)
9635293ebbSZhiyu Zhang            break
9735293ebbSZhiyu Zhang    if origin_commit_hash is None:
9835293ebbSZhiyu Zhang        return None
9935293ebbSZhiyu Zhang    o_from_t = get_latest_commit_from(origin_path, origin_commit_hash)
10035293ebbSZhiyu Zhang    if o_from_t is not None:
10135293ebbSZhiyu Zhang        logging.debug("tracked origin commit id: %s", o_from_t["hash"])
10235293ebbSZhiyu Zhang    return o_from_t
10335293ebbSZhiyu Zhang
10435293ebbSZhiyu Zhang
10550c0fa7fSDongliang Mudef get_commits_count_between(opath, commit1, commit2):
10663e96ce0SDongliang Mu    """Get the commits count between two commits for the specified file"""
10763e96ce0SDongliang Mu    command = f"git log --pretty=format:%H {commit1}...{commit2} -- {opath}"
10863e96ce0SDongliang Mu    logging.debug(command)
10950c0fa7fSDongliang Mu    pipe = os.popen(command)
11050c0fa7fSDongliang Mu    result = pipe.read().split("\n")
11150c0fa7fSDongliang Mu    # filter out empty lines
11250c0fa7fSDongliang Mu    result = list(filter(lambda x: x != "", result))
11350c0fa7fSDongliang Mu    return result
11450c0fa7fSDongliang Mu
11550c0fa7fSDongliang Mu
11650c0fa7fSDongliang Mudef pretty_output(commit):
11763e96ce0SDongliang Mu    """Pretty print the commit message"""
11863e96ce0SDongliang Mu    command = f"git log --pretty='format:%h (\"%s\")' -1 {commit}"
11963e96ce0SDongliang Mu    logging.debug(command)
12050c0fa7fSDongliang Mu    pipe = os.popen(command)
12150c0fa7fSDongliang Mu    return pipe.read()
12250c0fa7fSDongliang Mu
12350c0fa7fSDongliang Mu
12463e96ce0SDongliang Mudef valid_commit(commit):
12563e96ce0SDongliang Mu    """Check if the commit is valid or not"""
12663e96ce0SDongliang Mu    msg = pretty_output(commit)
12763e96ce0SDongliang Mu    return "Merge tag" not in msg
12863e96ce0SDongliang Mu
12950c0fa7fSDongliang Mudef check_per_file(file_path):
13063e96ce0SDongliang Mu    """Check the translation status for the specified file"""
13150c0fa7fSDongliang Mu    opath = get_origin_path(file_path)
13250c0fa7fSDongliang Mu
13350c0fa7fSDongliang Mu    if not os.path.isfile(opath):
13463e96ce0SDongliang Mu        logging.error("Cannot find the origin path for {file_path}")
13550c0fa7fSDongliang Mu        return
13650c0fa7fSDongliang Mu
13750c0fa7fSDongliang Mu    o_from_head = get_latest_commit_from(opath, "HEAD")
13850c0fa7fSDongliang Mu    t_from_head = get_latest_commit_from(file_path, "HEAD")
13950c0fa7fSDongliang Mu
14050c0fa7fSDongliang Mu    if o_from_head is None or t_from_head is None:
14163e96ce0SDongliang Mu        logging.error("Cannot find the latest commit for %s", file_path)
14250c0fa7fSDongliang Mu        return
14350c0fa7fSDongliang Mu
14435293ebbSZhiyu Zhang    o_from_t = get_origin_from_trans_smartly(opath, t_from_head)
14535293ebbSZhiyu Zhang    # notice, o_from_t from get_*_smartly() is always more accurate than from get_*()
14635293ebbSZhiyu Zhang    if o_from_t is None:
14750c0fa7fSDongliang Mu        o_from_t = get_origin_from_trans(opath, t_from_head)
14850c0fa7fSDongliang Mu
14950c0fa7fSDongliang Mu    if o_from_t is None:
15063e96ce0SDongliang Mu        logging.error("Error: Cannot find the latest origin commit for %s", file_path)
15150c0fa7fSDongliang Mu        return
15250c0fa7fSDongliang Mu
15350c0fa7fSDongliang Mu    if o_from_head["hash"] == o_from_t["hash"]:
15463e96ce0SDongliang Mu        logging.debug("No update needed for %s", file_path)
15550c0fa7fSDongliang Mu    else:
15663e96ce0SDongliang Mu        logging.info(file_path)
15750c0fa7fSDongliang Mu        commits = get_commits_count_between(
15850c0fa7fSDongliang Mu            opath, o_from_t["hash"], o_from_head["hash"]
15950c0fa7fSDongliang Mu        )
16063e96ce0SDongliang Mu        count = 0
16150c0fa7fSDongliang Mu        for commit in commits:
16263e96ce0SDongliang Mu            if valid_commit(commit):
16363e96ce0SDongliang Mu                logging.info("commit %s", pretty_output(commit))
16463e96ce0SDongliang Mu                count += 1
16563e96ce0SDongliang Mu        logging.info("%d commits needs resolving in total\n", count)
16663e96ce0SDongliang Mu
16763e96ce0SDongliang Mu
16863e96ce0SDongliang Mudef valid_locales(locale):
16963e96ce0SDongliang Mu    """Check if the locale is valid or not"""
17063e96ce0SDongliang Mu    script_path = os.path.dirname(os.path.abspath(__file__))
171*d37366caSJonathan Corbet    linux_path = os.path.join(script_path, "../..")
17263e96ce0SDongliang Mu    if not os.path.isdir(f"{linux_path}/Documentation/translations/{locale}"):
17363e96ce0SDongliang Mu        raise ArgumentTypeError("Invalid locale: {locale}")
17463e96ce0SDongliang Mu    return locale
17563e96ce0SDongliang Mu
17663e96ce0SDongliang Mu
17763e96ce0SDongliang Mudef list_files_with_excluding_folders(folder, exclude_folders, include_suffix):
17863e96ce0SDongliang Mu    """List all files with the specified suffix in the folder and its subfolders"""
17963e96ce0SDongliang Mu    files = []
18063e96ce0SDongliang Mu    stack = [folder]
18163e96ce0SDongliang Mu
18263e96ce0SDongliang Mu    while stack:
18363e96ce0SDongliang Mu        pwd = stack.pop()
18463e96ce0SDongliang Mu        # filter out the exclude folders
18563e96ce0SDongliang Mu        if os.path.basename(pwd) in exclude_folders:
18663e96ce0SDongliang Mu            continue
18763e96ce0SDongliang Mu        # list all files and folders
18863e96ce0SDongliang Mu        for item in os.listdir(pwd):
18963e96ce0SDongliang Mu            ab_item = os.path.join(pwd, item)
19063e96ce0SDongliang Mu            if os.path.isdir(ab_item):
19163e96ce0SDongliang Mu                stack.append(ab_item)
19263e96ce0SDongliang Mu            else:
19363e96ce0SDongliang Mu                if ab_item.endswith(include_suffix):
19463e96ce0SDongliang Mu                    files.append(ab_item)
19563e96ce0SDongliang Mu
19663e96ce0SDongliang Mu    return files
19763e96ce0SDongliang Mu
19863e96ce0SDongliang Mu
19963e96ce0SDongliang Muclass DmesgFormatter(logging.Formatter):
20063e96ce0SDongliang Mu    """Custom dmesg logging formatter"""
20163e96ce0SDongliang Mu    def format(self, record):
20263e96ce0SDongliang Mu        timestamp = time.time()
20363e96ce0SDongliang Mu        formatted_time = f"[{timestamp:>10.6f}]"
20463e96ce0SDongliang Mu        log_message = f"{formatted_time} {record.getMessage()}"
20563e96ce0SDongliang Mu        return log_message
20663e96ce0SDongliang Mu
20763e96ce0SDongliang Mu
20863e96ce0SDongliang Mudef config_logging(log_level, log_file="checktransupdate.log"):
20963e96ce0SDongliang Mu    """configure logging based on the log level"""
21063e96ce0SDongliang Mu    # set up the root logger
21163e96ce0SDongliang Mu    logger = logging.getLogger()
21263e96ce0SDongliang Mu    logger.setLevel(log_level)
21363e96ce0SDongliang Mu
21463e96ce0SDongliang Mu    # Create console handler
21563e96ce0SDongliang Mu    console_handler = logging.StreamHandler()
21663e96ce0SDongliang Mu    console_handler.setLevel(log_level)
21763e96ce0SDongliang Mu
21863e96ce0SDongliang Mu    # Create file handler
21963e96ce0SDongliang Mu    file_handler = logging.FileHandler(log_file)
22063e96ce0SDongliang Mu    file_handler.setLevel(log_level)
22163e96ce0SDongliang Mu
22263e96ce0SDongliang Mu    # Create formatter and add it to the handlers
22363e96ce0SDongliang Mu    formatter = DmesgFormatter()
22463e96ce0SDongliang Mu    console_handler.setFormatter(formatter)
22563e96ce0SDongliang Mu    file_handler.setFormatter(formatter)
22663e96ce0SDongliang Mu
22763e96ce0SDongliang Mu    # Add the handler to the logger
22863e96ce0SDongliang Mu    logger.addHandler(console_handler)
22963e96ce0SDongliang Mu    logger.addHandler(file_handler)
23050c0fa7fSDongliang Mu
23150c0fa7fSDongliang Mu
23250c0fa7fSDongliang Mudef main():
23363e96ce0SDongliang Mu    """Main function of the script"""
23450c0fa7fSDongliang Mu    script_path = os.path.dirname(os.path.abspath(__file__))
235*d37366caSJonathan Corbet    linux_path = os.path.join(script_path, "../..")
23650c0fa7fSDongliang Mu
23750c0fa7fSDongliang Mu    parser = ArgumentParser(description="Check the translation update")
23850c0fa7fSDongliang Mu    parser.add_argument(
23950c0fa7fSDongliang Mu        "-l",
24050c0fa7fSDongliang Mu        "--locale",
24163e96ce0SDongliang Mu        default="zh_CN",
24263e96ce0SDongliang Mu        type=valid_locales,
24350c0fa7fSDongliang Mu        help="Locale to check when files are not specified",
24450c0fa7fSDongliang Mu    )
24563e96ce0SDongliang Mu
24650c0fa7fSDongliang Mu    parser.add_argument(
24763e96ce0SDongliang Mu        "--print-missing-translations",
24850c0fa7fSDongliang Mu        action=BooleanOptionalAction,
24950c0fa7fSDongliang Mu        default=True,
25063e96ce0SDongliang Mu        help="Print files that do not have translations",
25150c0fa7fSDongliang Mu    )
25250c0fa7fSDongliang Mu
25350c0fa7fSDongliang Mu    parser.add_argument(
25463e96ce0SDongliang Mu        '--log',
25563e96ce0SDongliang Mu        default='INFO',
25663e96ce0SDongliang Mu        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
25763e96ce0SDongliang Mu        help='Set the logging level')
25850c0fa7fSDongliang Mu
25950c0fa7fSDongliang Mu    parser.add_argument(
26063e96ce0SDongliang Mu        '--logfile',
26163e96ce0SDongliang Mu        default='checktransupdate.log',
26263e96ce0SDongliang Mu        help='Set the logging file (default: checktransupdate.log)')
26350c0fa7fSDongliang Mu
26450c0fa7fSDongliang Mu    parser.add_argument(
26550c0fa7fSDongliang Mu        "files", nargs="*", help="Files to check, if not specified, check all files"
26650c0fa7fSDongliang Mu    )
26750c0fa7fSDongliang Mu    args = parser.parse_args()
26850c0fa7fSDongliang Mu
26963e96ce0SDongliang Mu    # Configure logging based on the --log argument
27063e96ce0SDongliang Mu    log_level = getattr(logging, args.log.upper(), logging.INFO)
27163e96ce0SDongliang Mu    config_logging(log_level)
27250c0fa7fSDongliang Mu
27363e96ce0SDongliang Mu    # Get files related to linux path
27450c0fa7fSDongliang Mu    files = args.files
27550c0fa7fSDongliang Mu    if len(files) == 0:
27663e96ce0SDongliang Mu        offical_files = list_files_with_excluding_folders(
27763e96ce0SDongliang Mu            os.path.join(linux_path, "Documentation"), ["translations", "output"], "rst"
27850c0fa7fSDongliang Mu        )
27950c0fa7fSDongliang Mu
28063e96ce0SDongliang Mu        for file in offical_files:
28163e96ce0SDongliang Mu            # split the path into parts
28263e96ce0SDongliang Mu            path_parts = file.split(os.sep)
28363e96ce0SDongliang Mu            # find the index of the "Documentation" directory
28463e96ce0SDongliang Mu            kindex = path_parts.index("Documentation")
28563e96ce0SDongliang Mu            # insert the translations and locale after the Documentation directory
28663e96ce0SDongliang Mu            new_path_parts = path_parts[:kindex + 1] + ["translations", args.locale] \
28763e96ce0SDongliang Mu                           + path_parts[kindex + 1 :]
28863e96ce0SDongliang Mu            # join the path parts back together
28963e96ce0SDongliang Mu            new_file = os.sep.join(new_path_parts)
29063e96ce0SDongliang Mu            if os.path.isfile(new_file):
29163e96ce0SDongliang Mu                files.append(new_file)
29263e96ce0SDongliang Mu            else:
29363e96ce0SDongliang Mu                if args.print_missing_translations:
29463e96ce0SDongliang Mu                    logging.info(os.path.relpath(os.path.abspath(file), linux_path))
29563e96ce0SDongliang Mu                    logging.info("No translation in the locale of %s\n", args.locale)
29663e96ce0SDongliang Mu
29750c0fa7fSDongliang Mu    files = list(map(lambda x: os.path.relpath(os.path.abspath(x), linux_path), files))
29850c0fa7fSDongliang Mu
29950c0fa7fSDongliang Mu    # cd to linux root directory
30050c0fa7fSDongliang Mu    os.chdir(linux_path)
30150c0fa7fSDongliang Mu
30250c0fa7fSDongliang Mu    for file in files:
30350c0fa7fSDongliang Mu        check_per_file(file)
30450c0fa7fSDongliang Mu
30550c0fa7fSDongliang Mu
30650c0fa7fSDongliang Muif __name__ == "__main__":
30750c0fa7fSDongliang Mu    main()
308