1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3 4""" 5This script helps track the translation status of the documentation 6in different locales, e.g., zh_CN. More specially, it uses `git log` 7commit to find the latest english commit from the translation commit 8(order by author date) and the latest english commits from HEAD. If 9differences occur, report the file and commits that need to be updated. 10 11The usage is as follows: 12- ./scripts/checktransupdate.py -l zh_CN 13This will print all the files that need to be updated or translated in the zh_CN locale. 14- ./scripts/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst 15This will only print the status of the specified file. 16 17The output is something like: 18Documentation/dev-tools/kfence.rst 19No translation in the locale of zh_CN 20 21Documentation/translations/zh_CN/dev-tools/testing-overview.rst 22commit 42fb9cfd5b18 ("Documentation: dev-tools: Add link to RV docs") 231 commits needs resolving in total 24""" 25 26import os 27import re 28import time 29import logging 30from argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction 31from datetime import datetime 32 33 34def get_origin_path(file_path): 35 """Get the origin path from the translation path""" 36 paths = file_path.split("/") 37 tidx = paths.index("translations") 38 opaths = paths[:tidx] 39 opaths += paths[tidx + 2 :] 40 return "/".join(opaths) 41 42 43def get_latest_commit_from(file_path, commit): 44 """Get the latest commit from the specified commit for the specified file""" 45 command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}" 46 logging.debug(command) 47 pipe = os.popen(command) 48 result = pipe.read() 49 result = result.split("\n") 50 if len(result) <= 1: 51 return None 52 53 logging.debug("Result: %s", result[0]) 54 55 return { 56 "hash": result[0], 57 "author_date": datetime.strptime(result[1], "%a, %d %b %Y %H:%M:%S %z"), 58 "commit_date": datetime.strptime(result[2], "%a, %d %b %Y %H:%M:%S %z"), 59 "message": result[4:], 60 } 61 62 63def get_origin_from_trans(origin_path, t_from_head): 64 """Get the latest origin commit from the translation commit""" 65 o_from_t = get_latest_commit_from(origin_path, t_from_head["hash"]) 66 while o_from_t is not None and o_from_t["author_date"] > t_from_head["author_date"]: 67 o_from_t = get_latest_commit_from(origin_path, o_from_t["hash"] + "^") 68 if o_from_t is not None: 69 logging.debug("tracked origin commit id: %s", o_from_t["hash"]) 70 return o_from_t 71 72 73def get_origin_from_trans_smartly(origin_path, t_from_head): 74 """Get the latest origin commit from the formatted translation commit: 75 (1) update to commit HASH (TITLE) 76 (2) Update the translation through commit HASH (TITLE) 77 """ 78 # catch flag for 12-bit commit hash 79 HASH = r'([0-9a-f]{12})' 80 # pattern 1: contains "update to commit HASH" 81 pat_update_to = re.compile(rf'update to commit {HASH}') 82 # pattern 2: contains "Update the translation through commit HASH" 83 pat_update_translation = re.compile(rf'Update the translation through commit {HASH}') 84 85 origin_commit_hash = None 86 for line in t_from_head["message"]: 87 # check if the line matches the first pattern 88 match = pat_update_to.search(line) 89 if match: 90 origin_commit_hash = match.group(1) 91 break 92 # check if the line matches the second pattern 93 match = pat_update_translation.search(line) 94 if match: 95 origin_commit_hash = match.group(1) 96 break 97 if origin_commit_hash is None: 98 return None 99 o_from_t = get_latest_commit_from(origin_path, origin_commit_hash) 100 if o_from_t is not None: 101 logging.debug("tracked origin commit id: %s", o_from_t["hash"]) 102 return o_from_t 103 104 105def get_commits_count_between(opath, commit1, commit2): 106 """Get the commits count between two commits for the specified file""" 107 command = f"git log --pretty=format:%H {commit1}...{commit2} -- {opath}" 108 logging.debug(command) 109 pipe = os.popen(command) 110 result = pipe.read().split("\n") 111 # filter out empty lines 112 result = list(filter(lambda x: x != "", result)) 113 return result 114 115 116def pretty_output(commit): 117 """Pretty print the commit message""" 118 command = f"git log --pretty='format:%h (\"%s\")' -1 {commit}" 119 logging.debug(command) 120 pipe = os.popen(command) 121 return pipe.read() 122 123 124def valid_commit(commit): 125 """Check if the commit is valid or not""" 126 msg = pretty_output(commit) 127 return "Merge tag" not in msg 128 129def check_per_file(file_path): 130 """Check the translation status for the specified file""" 131 opath = get_origin_path(file_path) 132 133 if not os.path.isfile(opath): 134 logging.error("Cannot find the origin path for {file_path}") 135 return 136 137 o_from_head = get_latest_commit_from(opath, "HEAD") 138 t_from_head = get_latest_commit_from(file_path, "HEAD") 139 140 if o_from_head is None or t_from_head is None: 141 logging.error("Cannot find the latest commit for %s", file_path) 142 return 143 144 o_from_t = get_origin_from_trans_smartly(opath, t_from_head) 145 # notice, o_from_t from get_*_smartly() is always more accurate than from get_*() 146 if o_from_t is None: 147 o_from_t = get_origin_from_trans(opath, t_from_head) 148 149 if o_from_t is None: 150 logging.error("Error: Cannot find the latest origin commit for %s", file_path) 151 return 152 153 if o_from_head["hash"] == o_from_t["hash"]: 154 logging.debug("No update needed for %s", file_path) 155 else: 156 logging.info(file_path) 157 commits = get_commits_count_between( 158 opath, o_from_t["hash"], o_from_head["hash"] 159 ) 160 count = 0 161 for commit in commits: 162 if valid_commit(commit): 163 logging.info("commit %s", pretty_output(commit)) 164 count += 1 165 logging.info("%d commits needs resolving in total\n", count) 166 167 168def valid_locales(locale): 169 """Check if the locale is valid or not""" 170 script_path = os.path.dirname(os.path.abspath(__file__)) 171 linux_path = os.path.join(script_path, "..") 172 if not os.path.isdir(f"{linux_path}/Documentation/translations/{locale}"): 173 raise ArgumentTypeError("Invalid locale: {locale}") 174 return locale 175 176 177def list_files_with_excluding_folders(folder, exclude_folders, include_suffix): 178 """List all files with the specified suffix in the folder and its subfolders""" 179 files = [] 180 stack = [folder] 181 182 while stack: 183 pwd = stack.pop() 184 # filter out the exclude folders 185 if os.path.basename(pwd) in exclude_folders: 186 continue 187 # list all files and folders 188 for item in os.listdir(pwd): 189 ab_item = os.path.join(pwd, item) 190 if os.path.isdir(ab_item): 191 stack.append(ab_item) 192 else: 193 if ab_item.endswith(include_suffix): 194 files.append(ab_item) 195 196 return files 197 198 199class DmesgFormatter(logging.Formatter): 200 """Custom dmesg logging formatter""" 201 def format(self, record): 202 timestamp = time.time() 203 formatted_time = f"[{timestamp:>10.6f}]" 204 log_message = f"{formatted_time} {record.getMessage()}" 205 return log_message 206 207 208def config_logging(log_level, log_file="checktransupdate.log"): 209 """configure logging based on the log level""" 210 # set up the root logger 211 logger = logging.getLogger() 212 logger.setLevel(log_level) 213 214 # Create console handler 215 console_handler = logging.StreamHandler() 216 console_handler.setLevel(log_level) 217 218 # Create file handler 219 file_handler = logging.FileHandler(log_file) 220 file_handler.setLevel(log_level) 221 222 # Create formatter and add it to the handlers 223 formatter = DmesgFormatter() 224 console_handler.setFormatter(formatter) 225 file_handler.setFormatter(formatter) 226 227 # Add the handler to the logger 228 logger.addHandler(console_handler) 229 logger.addHandler(file_handler) 230 231 232def main(): 233 """Main function of the script""" 234 script_path = os.path.dirname(os.path.abspath(__file__)) 235 linux_path = os.path.join(script_path, "..") 236 237 parser = ArgumentParser(description="Check the translation update") 238 parser.add_argument( 239 "-l", 240 "--locale", 241 default="zh_CN", 242 type=valid_locales, 243 help="Locale to check when files are not specified", 244 ) 245 246 parser.add_argument( 247 "--print-missing-translations", 248 action=BooleanOptionalAction, 249 default=True, 250 help="Print files that do not have translations", 251 ) 252 253 parser.add_argument( 254 '--log', 255 default='INFO', 256 choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], 257 help='Set the logging level') 258 259 parser.add_argument( 260 '--logfile', 261 default='checktransupdate.log', 262 help='Set the logging file (default: checktransupdate.log)') 263 264 parser.add_argument( 265 "files", nargs="*", help="Files to check, if not specified, check all files" 266 ) 267 args = parser.parse_args() 268 269 # Configure logging based on the --log argument 270 log_level = getattr(logging, args.log.upper(), logging.INFO) 271 config_logging(log_level) 272 273 # Get files related to linux path 274 files = args.files 275 if len(files) == 0: 276 offical_files = list_files_with_excluding_folders( 277 os.path.join(linux_path, "Documentation"), ["translations", "output"], "rst" 278 ) 279 280 for file in offical_files: 281 # split the path into parts 282 path_parts = file.split(os.sep) 283 # find the index of the "Documentation" directory 284 kindex = path_parts.index("Documentation") 285 # insert the translations and locale after the Documentation directory 286 new_path_parts = path_parts[:kindex + 1] + ["translations", args.locale] \ 287 + path_parts[kindex + 1 :] 288 # join the path parts back together 289 new_file = os.sep.join(new_path_parts) 290 if os.path.isfile(new_file): 291 files.append(new_file) 292 else: 293 if args.print_missing_translations: 294 logging.info(os.path.relpath(os.path.abspath(file), linux_path)) 295 logging.info("No translation in the locale of %s\n", args.locale) 296 297 files = list(map(lambda x: os.path.relpath(os.path.abspath(x), linux_path), files)) 298 299 # cd to linux root directory 300 os.chdir(linux_path) 301 302 for file in files: 303 check_per_file(file) 304 305 306if __name__ == "__main__": 307 main() 308