1#!/usr/bin/env python3 2 3# This tool reads a disk image in any format and converts it to qcow2, 4# writing the result directly to stdout. 5# 6# Copyright (C) 2024 Igalia, S.L. 7# 8# Authors: Alberto Garcia <berto@igalia.com> 9# Madeeha Javed <javed@igalia.com> 10# 11# SPDX-License-Identifier: GPL-2.0-or-later 12# 13# qcow2 files produced by this script are always arranged like this: 14# 15# - qcow2 header 16# - refcount table 17# - refcount blocks 18# - L1 table 19# - L2 tables 20# - Data clusters 21# 22# A note about variable names: in qcow2 there is one refcount table 23# and one (active) L1 table, although each can occupy several 24# clusters. For the sake of simplicity the code sometimes talks about 25# refcount tables and L1 tables when referring to those clusters. 26 27import argparse 28import errno 29import math 30import os 31import signal 32import struct 33import subprocess 34import sys 35import tempfile 36import time 37from contextlib import contextmanager 38 39QCOW2_DEFAULT_CLUSTER_SIZE = 65536 40QCOW2_DEFAULT_REFCOUNT_BITS = 16 41QCOW2_FEATURE_NAME_TABLE = 0x6803F857 42QCOW2_DATA_FILE_NAME_STRING = 0x44415441 43QCOW2_V3_HEADER_LENGTH = 112 # Header length in QEMU 9.0. Must be a multiple of 8 44QCOW2_INCOMPAT_DATA_FILE_BIT = 2 45QCOW2_AUTOCLEAR_DATA_FILE_RAW_BIT = 1 46QCOW_OFLAG_COPIED = 1 << 63 47QEMU_STORAGE_DAEMON = "qemu-storage-daemon" 48 49 50def bitmap_set(bitmap, idx): 51 bitmap[idx // 8] |= 1 << (idx % 8) 52 53 54def bitmap_is_set(bitmap, idx): 55 return (bitmap[idx // 8] & (1 << (idx % 8))) != 0 56 57 58def bitmap_iterator(bitmap, length): 59 for idx in range(length): 60 if bitmap_is_set(bitmap, idx): 61 yield idx 62 63 64def align_up(num, d): 65 return d * math.ceil(num / d) 66 67 68# Holes in the input file contain only zeroes so we can skip them and 69# save time. This function returns the indexes of the clusters that 70# are known to contain data. Those are the ones that we need to read. 71def clusters_with_data(fd, cluster_size): 72 data_to = 0 73 while True: 74 try: 75 data_from = os.lseek(fd, data_to, os.SEEK_DATA) 76 data_to = align_up(os.lseek(fd, data_from, os.SEEK_HOLE), cluster_size) 77 for idx in range(data_from // cluster_size, data_to // cluster_size): 78 yield idx 79 except OSError as err: 80 if err.errno == errno.ENXIO: # End of file reached 81 break 82 raise err 83 84 85# write_qcow2_content() expects a raw input file. If we have a different 86# format we can use qemu-storage-daemon to make it appear as raw. 87@contextmanager 88def get_input_as_raw_file(input_file, input_format): 89 if input_format == "raw": 90 yield input_file 91 return 92 try: 93 temp_dir = tempfile.mkdtemp() 94 pid_file = os.path.join(temp_dir, "pid") 95 raw_file = os.path.join(temp_dir, "raw") 96 open(raw_file, "wb").close() 97 ret = subprocess.run( 98 [ 99 QEMU_STORAGE_DAEMON, 100 "--daemonize", 101 "--pidfile", pid_file, 102 "--blockdev", f"driver=file,node-name=file0,driver=file,filename={input_file},read-only=on", 103 "--blockdev", f"driver={input_format},node-name=disk0,file=file0,read-only=on", 104 "--export", f"type=fuse,id=export0,node-name=disk0,mountpoint={raw_file},writable=off", 105 ], 106 capture_output=True, 107 ) 108 if ret.returncode != 0: 109 sys.exit("[Error] Could not start the qemu-storage-daemon:\n" + 110 ret.stderr.decode().rstrip('\n')) 111 yield raw_file 112 finally: 113 # Kill the storage daemon on exit 114 # and remove all temporary files 115 if os.path.exists(pid_file): 116 with open(pid_file, "r") as f: 117 pid = int(f.readline()) 118 os.kill(pid, signal.SIGTERM) 119 while os.path.exists(pid_file): 120 time.sleep(0.1) 121 os.unlink(raw_file) 122 os.rmdir(temp_dir) 123 124 125def write_features(cluster, offset, data_file_name): 126 if data_file_name is not None: 127 encoded_name = data_file_name.encode("utf-8") 128 padded_name_len = align_up(len(encoded_name), 8) 129 struct.pack_into(f">II{padded_name_len}s", cluster, offset, 130 QCOW2_DATA_FILE_NAME_STRING, 131 len(encoded_name), 132 encoded_name) 133 offset += 8 + padded_name_len 134 135 qcow2_features = [ 136 # Incompatible 137 (0, 0, "dirty bit"), 138 (0, 1, "corrupt bit"), 139 (0, 2, "external data file"), 140 (0, 3, "compression type"), 141 (0, 4, "extended L2 entries"), 142 # Compatible 143 (1, 0, "lazy refcounts"), 144 # Autoclear 145 (2, 0, "bitmaps"), 146 (2, 1, "raw external data"), 147 ] 148 struct.pack_into(">I", cluster, offset, QCOW2_FEATURE_NAME_TABLE) 149 struct.pack_into(">I", cluster, offset + 4, len(qcow2_features) * 48) 150 offset += 8 151 for feature_type, feature_bit, feature_name in qcow2_features: 152 struct.pack_into(">BB46s", cluster, offset, 153 feature_type, feature_bit, feature_name.encode("ascii")) 154 offset += 48 155 156 157def write_qcow2_content(input_file, cluster_size, refcount_bits, data_file_name, data_file_raw): 158 # Some basic values 159 l1_entries_per_table = cluster_size // 8 160 l2_entries_per_table = cluster_size // 8 161 refcounts_per_table = cluster_size // 8 162 refcounts_per_block = cluster_size * 8 // refcount_bits 163 164 # Virtual disk size, number of data clusters and L1 entries 165 disk_size = align_up(os.path.getsize(input_file), 512) 166 total_data_clusters = math.ceil(disk_size / cluster_size) 167 l1_entries = math.ceil(total_data_clusters / l2_entries_per_table) 168 allocated_l1_tables = math.ceil(l1_entries / l1_entries_per_table) 169 170 # Max L1 table size is 32 MB (QCOW_MAX_L1_SIZE in block/qcow2.h) 171 if (l1_entries * 8) > (32 * 1024 * 1024): 172 sys.exit("[Error] The image size is too large. Try using a larger cluster size.") 173 174 # Two bitmaps indicating which L1 and L2 entries are set 175 l1_bitmap = bytearray(allocated_l1_tables * l1_entries_per_table // 8) 176 l2_bitmap = bytearray(l1_entries * l2_entries_per_table // 8) 177 allocated_l2_tables = 0 178 allocated_data_clusters = 0 179 180 if data_file_raw: 181 # If data_file_raw is set then all clusters are allocated and 182 # we don't need to read the input file at all. 183 allocated_l2_tables = l1_entries 184 for idx in range(l1_entries): 185 bitmap_set(l1_bitmap, idx) 186 for idx in range(total_data_clusters): 187 bitmap_set(l2_bitmap, idx) 188 else: 189 # Open the input file for reading 190 fd = os.open(input_file, os.O_RDONLY) 191 zero_cluster = bytes(cluster_size) 192 # Read all the clusters that contain data 193 for idx in clusters_with_data(fd, cluster_size): 194 cluster = os.pread(fd, cluster_size, cluster_size * idx) 195 # If the last cluster is smaller than cluster_size pad it with zeroes 196 if len(cluster) < cluster_size: 197 cluster += bytes(cluster_size - len(cluster)) 198 # If a cluster has non-zero data then it must be allocated 199 # in the output file and its L2 entry must be set 200 if cluster != zero_cluster: 201 bitmap_set(l2_bitmap, idx) 202 allocated_data_clusters += 1 203 # Allocated data clusters also need their corresponding L1 entry and L2 table 204 l1_idx = math.floor(idx / l2_entries_per_table) 205 if not bitmap_is_set(l1_bitmap, l1_idx): 206 bitmap_set(l1_bitmap, l1_idx) 207 allocated_l2_tables += 1 208 209 # Total amount of allocated clusters excluding the refcount blocks and table 210 total_allocated_clusters = 1 + allocated_l1_tables + allocated_l2_tables 211 if data_file_name is None: 212 total_allocated_clusters += allocated_data_clusters 213 214 # Clusters allocated for the refcount blocks and table 215 allocated_refcount_blocks = math.ceil(total_allocated_clusters / refcounts_per_block) 216 allocated_refcount_tables = math.ceil(allocated_refcount_blocks / refcounts_per_table) 217 218 # Now we have a problem because allocated_refcount_blocks and allocated_refcount_tables... 219 # (a) increase total_allocated_clusters, and 220 # (b) need to be recalculated when total_allocated_clusters is increased 221 # So we need to repeat the calculation as long as the numbers change 222 while True: 223 new_total_allocated_clusters = total_allocated_clusters + allocated_refcount_tables + allocated_refcount_blocks 224 new_allocated_refcount_blocks = math.ceil(new_total_allocated_clusters / refcounts_per_block) 225 if new_allocated_refcount_blocks > allocated_refcount_blocks: 226 allocated_refcount_blocks = new_allocated_refcount_blocks 227 allocated_refcount_tables = math.ceil(allocated_refcount_blocks / refcounts_per_table) 228 else: 229 break 230 231 # Now that we have the final numbers we can update total_allocated_clusters 232 total_allocated_clusters += allocated_refcount_tables + allocated_refcount_blocks 233 234 # At this point we have the exact number of clusters that the output 235 # image is going to use so we can calculate all the offsets. 236 current_cluster_idx = 1 237 238 refcount_table_offset = current_cluster_idx * cluster_size 239 current_cluster_idx += allocated_refcount_tables 240 241 refcount_block_offset = current_cluster_idx * cluster_size 242 current_cluster_idx += allocated_refcount_blocks 243 244 l1_table_offset = current_cluster_idx * cluster_size 245 current_cluster_idx += allocated_l1_tables 246 247 l2_table_offset = current_cluster_idx * cluster_size 248 current_cluster_idx += allocated_l2_tables 249 250 data_clusters_offset = current_cluster_idx * cluster_size 251 252 # Calculate some values used in the qcow2 header 253 if allocated_l1_tables == 0: 254 l1_table_offset = 0 255 256 hdr_cluster_bits = int(math.log2(cluster_size)) 257 hdr_refcount_bits = int(math.log2(refcount_bits)) 258 hdr_length = QCOW2_V3_HEADER_LENGTH 259 hdr_incompat_features = 0 260 if data_file_name is not None: 261 hdr_incompat_features |= 1 << QCOW2_INCOMPAT_DATA_FILE_BIT 262 hdr_autoclear_features = 0 263 if data_file_raw: 264 hdr_autoclear_features |= 1 << QCOW2_AUTOCLEAR_DATA_FILE_RAW_BIT 265 266 ### Write qcow2 header 267 cluster = bytearray(cluster_size) 268 struct.pack_into(">4sIQIIQIIQQIIQQQQII", cluster, 0, 269 b"QFI\xfb", # QCOW magic string 270 3, # version 271 0, # backing file offset 272 0, # backing file sizes 273 hdr_cluster_bits, 274 disk_size, 275 0, # encryption method 276 l1_entries, 277 l1_table_offset, 278 refcount_table_offset, 279 allocated_refcount_tables, 280 0, # number of snapshots 281 0, # snapshot table offset 282 hdr_incompat_features, 283 0, # compatible features 284 hdr_autoclear_features, 285 hdr_refcount_bits, 286 hdr_length, 287 ) 288 289 write_features(cluster, hdr_length, data_file_name) 290 291 sys.stdout.buffer.write(cluster) 292 293 ### Write refcount table 294 cur_offset = refcount_block_offset 295 remaining_refcount_table_entries = allocated_refcount_blocks # Each entry is a pointer to a refcount block 296 while remaining_refcount_table_entries > 0: 297 cluster = bytearray(cluster_size) 298 to_write = min(remaining_refcount_table_entries, refcounts_per_table) 299 remaining_refcount_table_entries -= to_write 300 for idx in range(to_write): 301 struct.pack_into(">Q", cluster, idx * 8, cur_offset) 302 cur_offset += cluster_size 303 sys.stdout.buffer.write(cluster) 304 305 ### Write refcount blocks 306 remaining_refcount_block_entries = total_allocated_clusters # One entry for each allocated cluster 307 for tbl in range(allocated_refcount_blocks): 308 cluster = bytearray(cluster_size) 309 to_write = min(remaining_refcount_block_entries, refcounts_per_block) 310 remaining_refcount_block_entries -= to_write 311 # All refcount entries contain the number 1. The only difference 312 # is their bit width, defined when the image is created. 313 for idx in range(to_write): 314 if refcount_bits == 64: 315 struct.pack_into(">Q", cluster, idx * 8, 1) 316 elif refcount_bits == 32: 317 struct.pack_into(">L", cluster, idx * 4, 1) 318 elif refcount_bits == 16: 319 struct.pack_into(">H", cluster, idx * 2, 1) 320 elif refcount_bits == 8: 321 cluster[idx] = 1 322 elif refcount_bits == 4: 323 cluster[idx // 2] |= 1 << ((idx % 2) * 4) 324 elif refcount_bits == 2: 325 cluster[idx // 4] |= 1 << ((idx % 4) * 2) 326 elif refcount_bits == 1: 327 cluster[idx // 8] |= 1 << (idx % 8) 328 sys.stdout.buffer.write(cluster) 329 330 ### Write L1 table 331 cur_offset = l2_table_offset 332 for tbl in range(allocated_l1_tables): 333 cluster = bytearray(cluster_size) 334 for idx in range(l1_entries_per_table): 335 l1_idx = tbl * l1_entries_per_table + idx 336 if bitmap_is_set(l1_bitmap, l1_idx): 337 struct.pack_into(">Q", cluster, idx * 8, cur_offset | QCOW_OFLAG_COPIED) 338 cur_offset += cluster_size 339 sys.stdout.buffer.write(cluster) 340 341 ### Write L2 tables 342 cur_offset = data_clusters_offset 343 for tbl in range(l1_entries): 344 # Skip the empty L2 tables. We can identify them because 345 # there is no L1 entry pointing at them. 346 if bitmap_is_set(l1_bitmap, tbl): 347 cluster = bytearray(cluster_size) 348 for idx in range(l2_entries_per_table): 349 l2_idx = tbl * l2_entries_per_table + idx 350 if bitmap_is_set(l2_bitmap, l2_idx): 351 if data_file_name is None: 352 struct.pack_into(">Q", cluster, idx * 8, cur_offset | QCOW_OFLAG_COPIED) 353 cur_offset += cluster_size 354 else: 355 struct.pack_into(">Q", cluster, idx * 8, (l2_idx * cluster_size) | QCOW_OFLAG_COPIED) 356 sys.stdout.buffer.write(cluster) 357 358 ### Write data clusters 359 if data_file_name is None: 360 for idx in bitmap_iterator(l2_bitmap, total_data_clusters): 361 cluster = os.pread(fd, cluster_size, cluster_size * idx) 362 # If the last cluster is smaller than cluster_size pad it with zeroes 363 if len(cluster) < cluster_size: 364 cluster += bytes(cluster_size - len(cluster)) 365 sys.stdout.buffer.write(cluster) 366 367 if not data_file_raw: 368 os.close(fd) 369 370 371def main(): 372 # Command-line arguments 373 parser = argparse.ArgumentParser( 374 description="This program converts a QEMU disk image to qcow2 " 375 "and writes it to the standard output" 376 ) 377 parser.add_argument("input_file", help="name of the input file") 378 parser.add_argument( 379 "-f", 380 dest="input_format", 381 metavar="input_format", 382 help="format of the input file (default: raw)", 383 default="raw", 384 ) 385 parser.add_argument( 386 "-c", 387 dest="cluster_size", 388 metavar="cluster_size", 389 help=f"qcow2 cluster size (default: {QCOW2_DEFAULT_CLUSTER_SIZE})", 390 default=QCOW2_DEFAULT_CLUSTER_SIZE, 391 type=int, 392 choices=[1 << x for x in range(9, 22)], 393 ) 394 parser.add_argument( 395 "-r", 396 dest="refcount_bits", 397 metavar="refcount_bits", 398 help=f"width of the reference count entries (default: {QCOW2_DEFAULT_REFCOUNT_BITS})", 399 default=QCOW2_DEFAULT_REFCOUNT_BITS, 400 type=int, 401 choices=[1 << x for x in range(7)], 402 ) 403 parser.add_argument( 404 "-d", 405 dest="data_file", 406 help="create an image with input_file as an external data file", 407 action="store_true", 408 ) 409 parser.add_argument( 410 "-R", 411 dest="data_file_raw", 412 help="enable data_file_raw on the generated image (implies -d)", 413 action="store_true", 414 ) 415 args = parser.parse_args() 416 417 if args.data_file_raw: 418 args.data_file = True 419 420 if not os.path.isfile(args.input_file): 421 sys.exit(f"[Error] {args.input_file} does not exist or is not a regular file.") 422 423 if args.data_file and args.input_format != "raw": 424 sys.exit("[Error] External data files can only be used with raw input images") 425 426 # A 512 byte header is too small for the data file name extension 427 if args.data_file and args.cluster_size == 512: 428 sys.exit("[Error] External data files require a larger cluster size") 429 430 if sys.stdout.isatty(): 431 sys.exit("[Error] Refusing to write to a tty. Try redirecting stdout.") 432 433 if args.data_file: 434 data_file_name = args.input_file 435 else: 436 data_file_name = None 437 438 with get_input_as_raw_file(args.input_file, args.input_format) as raw_file: 439 write_qcow2_content( 440 raw_file, 441 args.cluster_size, 442 args.refcount_bits, 443 data_file_name, 444 args.data_file_raw, 445 ) 446 447 448if __name__ == "__main__": 449 main() 450