xref: /linux/scripts/checktransupdate.py (revision b1cce98493a095925fb51be045ccf6e08edb4aa0)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3
4"""
5This script helps track the translation status of the documentation
6in different locales, e.g., zh_CN. More specially, it uses `git log`
7commit to find the latest english commit from the translation commit
8(order by author date) and the latest english commits from HEAD. If
9differences occur, report the file and commits that need to be updated.
10
11The usage is as follows:
12- ./scripts/checktransupdate.py -l zh_CN
13This will print all the files that need to be updated or translated in the zh_CN locale.
14- ./scripts/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst
15This will only print the status of the specified file.
16
17The output is something like:
18Documentation/dev-tools/kfence.rst
19No translation in the locale of zh_CN
20
21Documentation/translations/zh_CN/dev-tools/testing-overview.rst
22commit 42fb9cfd5b18 ("Documentation: dev-tools: Add link to RV docs")
231 commits needs resolving in total
24"""
25
26import os
27import re
28import time
29import logging
30from argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction
31from datetime import datetime
32
33
34def get_origin_path(file_path):
35    """Get the origin path from the translation path"""
36    paths = file_path.split("/")
37    tidx = paths.index("translations")
38    opaths = paths[:tidx]
39    opaths += paths[tidx + 2 :]
40    return "/".join(opaths)
41
42
43def get_latest_commit_from(file_path, commit):
44    """Get the latest commit from the specified commit for the specified file"""
45    command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}"
46    logging.debug(command)
47    pipe = os.popen(command)
48    result = pipe.read()
49    result = result.split("\n")
50    if len(result) <= 1:
51        return None
52
53    logging.debug("Result: %s", result[0])
54
55    return {
56        "hash": result[0],
57        "author_date": datetime.strptime(result[1], "%a, %d %b %Y %H:%M:%S %z"),
58        "commit_date": datetime.strptime(result[2], "%a, %d %b %Y %H:%M:%S %z"),
59        "message": result[4:],
60    }
61
62
63def get_origin_from_trans(origin_path, t_from_head):
64    """Get the latest origin commit from the translation commit"""
65    o_from_t = get_latest_commit_from(origin_path, t_from_head["hash"])
66    while o_from_t is not None and o_from_t["author_date"] > t_from_head["author_date"]:
67        o_from_t = get_latest_commit_from(origin_path, o_from_t["hash"] + "^")
68    if o_from_t is not None:
69        logging.debug("tracked origin commit id: %s", o_from_t["hash"])
70    return o_from_t
71
72
73def get_origin_from_trans_smartly(origin_path, t_from_head):
74    """Get the latest origin commit from the formatted translation commit:
75    (1) update to commit HASH (TITLE)
76    (2) Update the translation through commit HASH (TITLE)
77    """
78    # catch flag for 12-bit commit hash
79    HASH = r'([0-9a-f]{12})'
80    # pattern 1: contains "update to commit HASH"
81    pat_update_to = re.compile(rf'update to commit {HASH}')
82    # pattern 2: contains "Update the translation through commit HASH"
83    pat_update_translation = re.compile(rf'Update the translation through commit {HASH}')
84
85    origin_commit_hash = None
86    for line in t_from_head["message"]:
87        # check if the line matches the first pattern
88        match = pat_update_to.search(line)
89        if match:
90            origin_commit_hash = match.group(1)
91            break
92        # check if the line matches the second pattern
93        match = pat_update_translation.search(line)
94        if match:
95            origin_commit_hash = match.group(1)
96            break
97    if origin_commit_hash is None:
98        return None
99    o_from_t = get_latest_commit_from(origin_path, origin_commit_hash)
100    if o_from_t is not None:
101        logging.debug("tracked origin commit id: %s", o_from_t["hash"])
102    return o_from_t
103
104
105def get_commits_count_between(opath, commit1, commit2):
106    """Get the commits count between two commits for the specified file"""
107    command = f"git log --pretty=format:%H {commit1}...{commit2} -- {opath}"
108    logging.debug(command)
109    pipe = os.popen(command)
110    result = pipe.read().split("\n")
111    # filter out empty lines
112    result = list(filter(lambda x: x != "", result))
113    return result
114
115
116def pretty_output(commit):
117    """Pretty print the commit message"""
118    command = f"git log --pretty='format:%h (\"%s\")' -1 {commit}"
119    logging.debug(command)
120    pipe = os.popen(command)
121    return pipe.read()
122
123
124def valid_commit(commit):
125    """Check if the commit is valid or not"""
126    msg = pretty_output(commit)
127    return "Merge tag" not in msg
128
129def check_per_file(file_path):
130    """Check the translation status for the specified file"""
131    opath = get_origin_path(file_path)
132
133    if not os.path.isfile(opath):
134        logging.error("Cannot find the origin path for {file_path}")
135        return
136
137    o_from_head = get_latest_commit_from(opath, "HEAD")
138    t_from_head = get_latest_commit_from(file_path, "HEAD")
139
140    if o_from_head is None or t_from_head is None:
141        logging.error("Cannot find the latest commit for %s", file_path)
142        return
143
144    o_from_t = get_origin_from_trans_smartly(opath, t_from_head)
145    # notice, o_from_t from get_*_smartly() is always more accurate than from get_*()
146    if o_from_t is None:
147        o_from_t = get_origin_from_trans(opath, t_from_head)
148
149    if o_from_t is None:
150        logging.error("Error: Cannot find the latest origin commit for %s", file_path)
151        return
152
153    if o_from_head["hash"] == o_from_t["hash"]:
154        logging.debug("No update needed for %s", file_path)
155    else:
156        logging.info(file_path)
157        commits = get_commits_count_between(
158            opath, o_from_t["hash"], o_from_head["hash"]
159        )
160        count = 0
161        for commit in commits:
162            if valid_commit(commit):
163                logging.info("commit %s", pretty_output(commit))
164                count += 1
165        logging.info("%d commits needs resolving in total\n", count)
166
167
168def valid_locales(locale):
169    """Check if the locale is valid or not"""
170    script_path = os.path.dirname(os.path.abspath(__file__))
171    linux_path = os.path.join(script_path, "..")
172    if not os.path.isdir(f"{linux_path}/Documentation/translations/{locale}"):
173        raise ArgumentTypeError("Invalid locale: {locale}")
174    return locale
175
176
177def list_files_with_excluding_folders(folder, exclude_folders, include_suffix):
178    """List all files with the specified suffix in the folder and its subfolders"""
179    files = []
180    stack = [folder]
181
182    while stack:
183        pwd = stack.pop()
184        # filter out the exclude folders
185        if os.path.basename(pwd) in exclude_folders:
186            continue
187        # list all files and folders
188        for item in os.listdir(pwd):
189            ab_item = os.path.join(pwd, item)
190            if os.path.isdir(ab_item):
191                stack.append(ab_item)
192            else:
193                if ab_item.endswith(include_suffix):
194                    files.append(ab_item)
195
196    return files
197
198
199class DmesgFormatter(logging.Formatter):
200    """Custom dmesg logging formatter"""
201    def format(self, record):
202        timestamp = time.time()
203        formatted_time = f"[{timestamp:>10.6f}]"
204        log_message = f"{formatted_time} {record.getMessage()}"
205        return log_message
206
207
208def config_logging(log_level, log_file="checktransupdate.log"):
209    """configure logging based on the log level"""
210    # set up the root logger
211    logger = logging.getLogger()
212    logger.setLevel(log_level)
213
214    # Create console handler
215    console_handler = logging.StreamHandler()
216    console_handler.setLevel(log_level)
217
218    # Create file handler
219    file_handler = logging.FileHandler(log_file)
220    file_handler.setLevel(log_level)
221
222    # Create formatter and add it to the handlers
223    formatter = DmesgFormatter()
224    console_handler.setFormatter(formatter)
225    file_handler.setFormatter(formatter)
226
227    # Add the handler to the logger
228    logger.addHandler(console_handler)
229    logger.addHandler(file_handler)
230
231
232def main():
233    """Main function of the script"""
234    script_path = os.path.dirname(os.path.abspath(__file__))
235    linux_path = os.path.join(script_path, "..")
236
237    parser = ArgumentParser(description="Check the translation update")
238    parser.add_argument(
239        "-l",
240        "--locale",
241        default="zh_CN",
242        type=valid_locales,
243        help="Locale to check when files are not specified",
244    )
245
246    parser.add_argument(
247        "--print-missing-translations",
248        action=BooleanOptionalAction,
249        default=True,
250        help="Print files that do not have translations",
251    )
252
253    parser.add_argument(
254        '--log',
255        default='INFO',
256        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
257        help='Set the logging level')
258
259    parser.add_argument(
260        '--logfile',
261        default='checktransupdate.log',
262        help='Set the logging file (default: checktransupdate.log)')
263
264    parser.add_argument(
265        "files", nargs="*", help="Files to check, if not specified, check all files"
266    )
267    args = parser.parse_args()
268
269    # Configure logging based on the --log argument
270    log_level = getattr(logging, args.log.upper(), logging.INFO)
271    config_logging(log_level)
272
273    # Get files related to linux path
274    files = args.files
275    if len(files) == 0:
276        offical_files = list_files_with_excluding_folders(
277            os.path.join(linux_path, "Documentation"), ["translations", "output"], "rst"
278        )
279
280        for file in offical_files:
281            # split the path into parts
282            path_parts = file.split(os.sep)
283            # find the index of the "Documentation" directory
284            kindex = path_parts.index("Documentation")
285            # insert the translations and locale after the Documentation directory
286            new_path_parts = path_parts[:kindex + 1] + ["translations", args.locale] \
287                           + path_parts[kindex + 1 :]
288            # join the path parts back together
289            new_file = os.sep.join(new_path_parts)
290            if os.path.isfile(new_file):
291                files.append(new_file)
292            else:
293                if args.print_missing_translations:
294                    logging.info(os.path.relpath(os.path.abspath(file), linux_path))
295                    logging.info("No translation in the locale of %s\n", args.locale)
296
297    files = list(map(lambda x: os.path.relpath(os.path.abspath(x), linux_path), files))
298
299    # cd to linux root directory
300    os.chdir(linux_path)
301
302    for file in files:
303        check_per_file(file)
304
305
306if __name__ == "__main__":
307    main()
308