1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright 2023 Red Hat
4 */
5
6 #include "encodings.h"
7
8 #include <linux/log2.h>
9
10 #include "logger.h"
11 #include "memory-alloc.h"
12 #include "permassert.h"
13
14 #include "constants.h"
15 #include "status-codes.h"
16 #include "types.h"
17
18 /** The maximum logical space is 4 petabytes, which is 1 terablock. */
19 static const block_count_t MAXIMUM_VDO_LOGICAL_BLOCKS = 1024ULL * 1024 * 1024 * 1024;
20
21 /** The maximum physical space is 256 terabytes, which is 64 gigablocks. */
22 static const block_count_t MAXIMUM_VDO_PHYSICAL_BLOCKS = 1024ULL * 1024 * 1024 * 64;
23
24 struct geometry_block {
25 char magic_number[VDO_GEOMETRY_MAGIC_NUMBER_SIZE];
26 struct packed_header header;
27 u32 checksum;
28 } __packed;
29
30 static const struct header GEOMETRY_BLOCK_HEADER_5_0 = {
31 .id = VDO_GEOMETRY_BLOCK,
32 .version = {
33 .major_version = 5,
34 .minor_version = 0,
35 },
36 /*
37 * Note: this size isn't just the payload size following the header, like it is everywhere
38 * else in VDO.
39 */
40 .size = sizeof(struct geometry_block) + sizeof(struct volume_geometry),
41 };
42
43 static const struct header GEOMETRY_BLOCK_HEADER_4_0 = {
44 .id = VDO_GEOMETRY_BLOCK,
45 .version = {
46 .major_version = 4,
47 .minor_version = 0,
48 },
49 /*
50 * Note: this size isn't just the payload size following the header, like it is everywhere
51 * else in VDO.
52 */
53 .size = sizeof(struct geometry_block) + sizeof(struct volume_geometry_4_0),
54 };
55
56 const u8 VDO_GEOMETRY_MAGIC_NUMBER[VDO_GEOMETRY_MAGIC_NUMBER_SIZE + 1] = "dmvdo001";
57
58 #define PAGE_HEADER_4_1_SIZE (8 + 8 + 8 + 1 + 1 + 1 + 1)
59
60 static const struct version_number BLOCK_MAP_4_1 = {
61 .major_version = 4,
62 .minor_version = 1,
63 };
64
65 const struct header VDO_BLOCK_MAP_HEADER_2_0 = {
66 .id = VDO_BLOCK_MAP,
67 .version = {
68 .major_version = 2,
69 .minor_version = 0,
70 },
71 .size = sizeof(struct block_map_state_2_0),
72 };
73
74 const struct header VDO_RECOVERY_JOURNAL_HEADER_7_0 = {
75 .id = VDO_RECOVERY_JOURNAL,
76 .version = {
77 .major_version = 7,
78 .minor_version = 0,
79 },
80 .size = sizeof(struct recovery_journal_state_7_0),
81 };
82
83 const struct header VDO_SLAB_DEPOT_HEADER_2_0 = {
84 .id = VDO_SLAB_DEPOT,
85 .version = {
86 .major_version = 2,
87 .minor_version = 0,
88 },
89 .size = sizeof(struct slab_depot_state_2_0),
90 };
91
92 static const struct header VDO_LAYOUT_HEADER_3_0 = {
93 .id = VDO_LAYOUT,
94 .version = {
95 .major_version = 3,
96 .minor_version = 0,
97 },
98 .size = sizeof(struct layout_3_0) + (sizeof(struct partition_3_0) * VDO_PARTITION_COUNT),
99 };
100
101 static const enum partition_id REQUIRED_PARTITIONS[] = {
102 VDO_BLOCK_MAP_PARTITION,
103 VDO_SLAB_DEPOT_PARTITION,
104 VDO_RECOVERY_JOURNAL_PARTITION,
105 VDO_SLAB_SUMMARY_PARTITION,
106 };
107
108 /*
109 * The current version for the data encoded in the super block. This must be changed any time there
110 * is a change to encoding of the component data of any VDO component.
111 */
112 static const struct version_number VDO_COMPONENT_DATA_41_0 = {
113 .major_version = 41,
114 .minor_version = 0,
115 };
116
117 const struct version_number VDO_VOLUME_VERSION_67_0 = {
118 .major_version = 67,
119 .minor_version = 0,
120 };
121
122 static const struct header SUPER_BLOCK_HEADER_12_0 = {
123 .id = VDO_SUPER_BLOCK,
124 .version = {
125 .major_version = 12,
126 .minor_version = 0,
127 },
128
129 /* This is the minimum size, if the super block contains no components. */
130 .size = VDO_SUPER_BLOCK_FIXED_SIZE - VDO_ENCODED_HEADER_SIZE,
131 };
132
133 /**
134 * validate_version() - Check whether a version matches an expected version.
135 * @expected_version: The expected version.
136 * @actual_version: The version being validated.
137 * @component_name: The name of the component or the calling function (for error logging).
138 *
139 * Logs an error describing a mismatch.
140 *
141 * Return: VDO_SUCCESS if the versions are the same,
142 * VDO_UNSUPPORTED_VERSION if the versions don't match.
143 */
validate_version(struct version_number expected_version,struct version_number actual_version,const char * component_name)144 static int __must_check validate_version(struct version_number expected_version,
145 struct version_number actual_version,
146 const char *component_name)
147 {
148 if (!vdo_are_same_version(expected_version, actual_version)) {
149 return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
150 "%s version mismatch, expected %d.%d, got %d.%d",
151 component_name,
152 expected_version.major_version,
153 expected_version.minor_version,
154 actual_version.major_version,
155 actual_version.minor_version);
156 }
157
158 return VDO_SUCCESS;
159 }
160
161 /**
162 * vdo_validate_header() - Check whether a header matches expectations.
163 * @expected_header: The expected header.
164 * @actual_header: The header being validated.
165 * @exact_size: If true, the size fields of the two headers must be the same, otherwise it is
166 * required that actual_header.size >= expected_header.size.
167 * @name: The name of the component or the calling function (for error logging).
168 *
169 * Logs an error describing the first mismatch found.
170 *
171 * Return: VDO_SUCCESS if the header meets expectations,
172 * VDO_INCORRECT_COMPONENT if the component ids don't match,
173 * VDO_UNSUPPORTED_VERSION if the versions or sizes don't match.
174 */
vdo_validate_header(const struct header * expected_header,const struct header * actual_header,bool exact_size,const char * name)175 static int vdo_validate_header(const struct header *expected_header,
176 const struct header *actual_header,
177 bool exact_size, const char *name)
178 {
179 int result;
180
181 if (expected_header->id != actual_header->id) {
182 return vdo_log_error_strerror(VDO_INCORRECT_COMPONENT,
183 "%s ID mismatch, expected %d, got %d",
184 name, expected_header->id,
185 actual_header->id);
186 }
187
188 result = validate_version(expected_header->version, actual_header->version,
189 name);
190 if (result != VDO_SUCCESS)
191 return result;
192
193 if ((expected_header->size > actual_header->size) ||
194 (exact_size && (expected_header->size < actual_header->size))) {
195 return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
196 "%s size mismatch, expected %zu, got %zu",
197 name, expected_header->size,
198 actual_header->size);
199 }
200
201 return VDO_SUCCESS;
202 }
203
encode_version_number(u8 * buffer,size_t * offset,struct version_number version)204 static void encode_version_number(u8 *buffer, size_t *offset,
205 struct version_number version)
206 {
207 struct packed_version_number packed = vdo_pack_version_number(version);
208
209 memcpy(buffer + *offset, &packed, sizeof(packed));
210 *offset += sizeof(packed);
211 }
212
vdo_encode_header(u8 * buffer,size_t * offset,const struct header * header)213 static void vdo_encode_header(u8 *buffer, size_t *offset,
214 const struct header *header)
215 {
216 struct packed_header packed = vdo_pack_header(header);
217
218 memcpy(buffer + *offset, &packed, sizeof(packed));
219 *offset += sizeof(packed);
220 }
221
decode_version_number(u8 * buffer,size_t * offset,struct version_number * version)222 static void decode_version_number(u8 *buffer, size_t *offset,
223 struct version_number *version)
224 {
225 struct packed_version_number packed;
226
227 memcpy(&packed, buffer + *offset, sizeof(packed));
228 *offset += sizeof(packed);
229 *version = vdo_unpack_version_number(packed);
230 }
231
vdo_decode_header(u8 * buffer,size_t * offset,struct header * header)232 static void vdo_decode_header(u8 *buffer, size_t *offset, struct header *header)
233 {
234 struct packed_header packed;
235
236 memcpy(&packed, buffer + *offset, sizeof(packed));
237 *offset += sizeof(packed);
238
239 *header = vdo_unpack_header(&packed);
240 }
241
242 /**
243 * decode_volume_geometry() - Decode the on-disk representation of a volume geometry from a buffer.
244 * @buffer: A buffer to decode from.
245 * @offset: The offset in the buffer at which to decode.
246 * @geometry: The structure to receive the decoded fields.
247 * @version: The geometry block version to decode.
248 */
decode_volume_geometry(u8 * buffer,size_t * offset,struct volume_geometry * geometry,u32 version)249 static void decode_volume_geometry(u8 *buffer, size_t *offset,
250 struct volume_geometry *geometry, u32 version)
251 {
252 u32 unused, mem;
253 enum volume_region_id id;
254 nonce_t nonce;
255 block_count_t bio_offset = 0;
256 bool sparse;
257
258 /* This is for backwards compatibility. */
259 decode_u32_le(buffer, offset, &unused);
260 geometry->unused = unused;
261
262 decode_u64_le(buffer, offset, &nonce);
263 geometry->nonce = nonce;
264
265 memcpy((unsigned char *) &geometry->uuid, buffer + *offset, sizeof(uuid_t));
266 *offset += sizeof(uuid_t);
267
268 if (version > 4)
269 decode_u64_le(buffer, offset, &bio_offset);
270 geometry->bio_offset = bio_offset;
271
272 for (id = 0; id < VDO_VOLUME_REGION_COUNT; id++) {
273 physical_block_number_t start_block;
274 enum volume_region_id saved_id;
275
276 decode_u32_le(buffer, offset, &saved_id);
277 decode_u64_le(buffer, offset, &start_block);
278
279 geometry->regions[id] = (struct volume_region) {
280 .id = saved_id,
281 .start_block = start_block,
282 };
283 }
284
285 decode_u32_le(buffer, offset, &mem);
286 *offset += sizeof(u32);
287 sparse = buffer[(*offset)++];
288
289 geometry->index_config = (struct index_config) {
290 .mem = mem,
291 .sparse = sparse,
292 };
293 }
294
295 /**
296 * vdo_parse_geometry_block() - Decode and validate an encoded geometry block.
297 * @block: The encoded geometry block.
298 * @geometry: The structure to receive the decoded fields.
299 */
vdo_parse_geometry_block(u8 * block,struct volume_geometry * geometry)300 int __must_check vdo_parse_geometry_block(u8 *block, struct volume_geometry *geometry)
301 {
302 u32 checksum, saved_checksum;
303 struct header header;
304 size_t offset = 0;
305 int result;
306
307 if (memcmp(block, VDO_GEOMETRY_MAGIC_NUMBER, VDO_GEOMETRY_MAGIC_NUMBER_SIZE) != 0)
308 return VDO_BAD_MAGIC;
309 offset += VDO_GEOMETRY_MAGIC_NUMBER_SIZE;
310
311 vdo_decode_header(block, &offset, &header);
312 if (header.version.major_version <= 4) {
313 result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_4_0, &header,
314 true, __func__);
315 } else {
316 result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_5_0, &header,
317 true, __func__);
318 }
319 if (result != VDO_SUCCESS)
320 return result;
321
322 decode_volume_geometry(block, &offset, geometry, header.version.major_version);
323
324 result = VDO_ASSERT(header.size == offset + sizeof(u32),
325 "should have decoded up to the geometry checksum");
326 if (result != VDO_SUCCESS)
327 return result;
328
329 /* Decode and verify the checksum. */
330 checksum = vdo_crc32(block, offset);
331 decode_u32_le(block, &offset, &saved_checksum);
332
333 return ((checksum == saved_checksum) ? VDO_SUCCESS : VDO_CHECKSUM_MISMATCH);
334 }
335
vdo_format_block_map_page(void * buffer,nonce_t nonce,physical_block_number_t pbn,bool initialized)336 struct block_map_page *vdo_format_block_map_page(void *buffer, nonce_t nonce,
337 physical_block_number_t pbn,
338 bool initialized)
339 {
340 struct block_map_page *page = buffer;
341
342 memset(buffer, 0, VDO_BLOCK_SIZE);
343 page->version = vdo_pack_version_number(BLOCK_MAP_4_1);
344 page->header.nonce = __cpu_to_le64(nonce);
345 page->header.pbn = __cpu_to_le64(pbn);
346 page->header.initialized = initialized;
347 return page;
348 }
349
vdo_validate_block_map_page(struct block_map_page * page,nonce_t nonce,physical_block_number_t pbn)350 enum block_map_page_validity vdo_validate_block_map_page(struct block_map_page *page,
351 nonce_t nonce,
352 physical_block_number_t pbn)
353 {
354 BUILD_BUG_ON(sizeof(struct block_map_page_header) != PAGE_HEADER_4_1_SIZE);
355
356 if (!vdo_are_same_version(BLOCK_MAP_4_1,
357 vdo_unpack_version_number(page->version)) ||
358 !page->header.initialized || (nonce != __le64_to_cpu(page->header.nonce)))
359 return VDO_BLOCK_MAP_PAGE_INVALID;
360
361 if (pbn != vdo_get_block_map_page_pbn(page))
362 return VDO_BLOCK_MAP_PAGE_BAD;
363
364 return VDO_BLOCK_MAP_PAGE_VALID;
365 }
366
decode_block_map_state_2_0(u8 * buffer,size_t * offset,struct block_map_state_2_0 * state)367 static int decode_block_map_state_2_0(u8 *buffer, size_t *offset,
368 struct block_map_state_2_0 *state)
369 {
370 size_t initial_offset;
371 block_count_t flat_page_count, root_count;
372 physical_block_number_t flat_page_origin, root_origin;
373 struct header header;
374 int result;
375
376 vdo_decode_header(buffer, offset, &header);
377 result = vdo_validate_header(&VDO_BLOCK_MAP_HEADER_2_0, &header, true, __func__);
378 if (result != VDO_SUCCESS)
379 return result;
380
381 initial_offset = *offset;
382
383 decode_u64_le(buffer, offset, &flat_page_origin);
384 result = VDO_ASSERT(flat_page_origin == VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
385 "Flat page origin must be %u (recorded as %llu)",
386 VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
387 (unsigned long long) state->flat_page_origin);
388 if (result != VDO_SUCCESS)
389 return result;
390
391 decode_u64_le(buffer, offset, &flat_page_count);
392 result = VDO_ASSERT(flat_page_count == 0,
393 "Flat page count must be 0 (recorded as %llu)",
394 (unsigned long long) state->flat_page_count);
395 if (result != VDO_SUCCESS)
396 return result;
397
398 decode_u64_le(buffer, offset, &root_origin);
399 decode_u64_le(buffer, offset, &root_count);
400
401 result = VDO_ASSERT(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
402 "decoded block map component size must match header size");
403 if (result != VDO_SUCCESS)
404 return result;
405
406 *state = (struct block_map_state_2_0) {
407 .flat_page_origin = flat_page_origin,
408 .flat_page_count = flat_page_count,
409 .root_origin = root_origin,
410 .root_count = root_count,
411 };
412
413 return VDO_SUCCESS;
414 }
415
encode_block_map_state_2_0(u8 * buffer,size_t * offset,struct block_map_state_2_0 state)416 static void encode_block_map_state_2_0(u8 *buffer, size_t *offset,
417 struct block_map_state_2_0 state)
418 {
419 size_t initial_offset;
420
421 vdo_encode_header(buffer, offset, &VDO_BLOCK_MAP_HEADER_2_0);
422
423 initial_offset = *offset;
424 encode_u64_le(buffer, offset, state.flat_page_origin);
425 encode_u64_le(buffer, offset, state.flat_page_count);
426 encode_u64_le(buffer, offset, state.root_origin);
427 encode_u64_le(buffer, offset, state.root_count);
428
429 VDO_ASSERT_LOG_ONLY(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
430 "encoded block map component size must match header size");
431 }
432
433 /**
434 * vdo_compute_new_forest_pages() - Compute the number of pages which must be allocated at each
435 * level in order to grow the forest to a new number of entries.
436 * @root_count: The number of block map roots.
437 * @old_sizes: The sizes of the old tree segments.
438 * @entries: The new number of entries the block map must address.
439 * @new_sizes: The sizes of the new tree segments.
440 *
441 * Return: The total number of non-leaf pages required.
442 */
vdo_compute_new_forest_pages(root_count_t root_count,struct boundary * old_sizes,block_count_t entries,struct boundary * new_sizes)443 block_count_t vdo_compute_new_forest_pages(root_count_t root_count,
444 struct boundary *old_sizes,
445 block_count_t entries,
446 struct boundary *new_sizes)
447 {
448 page_count_t leaf_pages = max(vdo_compute_block_map_page_count(entries), 1U);
449 page_count_t level_size = DIV_ROUND_UP(leaf_pages, root_count);
450 block_count_t total_pages = 0;
451 height_t height;
452
453 for (height = 0; height < VDO_BLOCK_MAP_TREE_HEIGHT; height++) {
454 block_count_t new_pages;
455
456 level_size = DIV_ROUND_UP(level_size, VDO_BLOCK_MAP_ENTRIES_PER_PAGE);
457 new_sizes->levels[height] = level_size;
458 new_pages = level_size;
459 if (old_sizes != NULL)
460 new_pages -= old_sizes->levels[height];
461 total_pages += (new_pages * root_count);
462 }
463
464 return total_pages;
465 }
466
467 /**
468 * encode_recovery_journal_state_7_0() - Encode the state of a recovery journal.
469 * @buffer: A buffer to store the encoding.
470 * @offset: The offset in the buffer at which to encode.
471 * @state: The recovery journal state to encode.
472 *
473 * Return: VDO_SUCCESS or an error code.
474 */
encode_recovery_journal_state_7_0(u8 * buffer,size_t * offset,struct recovery_journal_state_7_0 state)475 static void encode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
476 struct recovery_journal_state_7_0 state)
477 {
478 size_t initial_offset;
479
480 vdo_encode_header(buffer, offset, &VDO_RECOVERY_JOURNAL_HEADER_7_0);
481
482 initial_offset = *offset;
483 encode_u64_le(buffer, offset, state.journal_start);
484 encode_u64_le(buffer, offset, state.logical_blocks_used);
485 encode_u64_le(buffer, offset, state.block_map_data_blocks);
486
487 VDO_ASSERT_LOG_ONLY(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
488 "encoded recovery journal component size must match header size");
489 }
490
491 /**
492 * decode_recovery_journal_state_7_0() - Decode the state of a recovery journal saved in a buffer.
493 * @buffer: The buffer containing the saved state.
494 * @offset: The offset to start decoding from.
495 * @state: A pointer to a recovery journal state to hold the result of a successful decode.
496 *
497 * Return: VDO_SUCCESS or an error code.
498 */
decode_recovery_journal_state_7_0(u8 * buffer,size_t * offset,struct recovery_journal_state_7_0 * state)499 static int __must_check decode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
500 struct recovery_journal_state_7_0 *state)
501 {
502 struct header header;
503 int result;
504 size_t initial_offset;
505 sequence_number_t journal_start;
506 block_count_t logical_blocks_used, block_map_data_blocks;
507
508 vdo_decode_header(buffer, offset, &header);
509 result = vdo_validate_header(&VDO_RECOVERY_JOURNAL_HEADER_7_0, &header, true,
510 __func__);
511 if (result != VDO_SUCCESS)
512 return result;
513
514 initial_offset = *offset;
515 decode_u64_le(buffer, offset, &journal_start);
516 decode_u64_le(buffer, offset, &logical_blocks_used);
517 decode_u64_le(buffer, offset, &block_map_data_blocks);
518
519 result = VDO_ASSERT(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
520 "decoded recovery journal component size must match header size");
521 if (result != VDO_SUCCESS)
522 return result;
523
524 *state = (struct recovery_journal_state_7_0) {
525 .journal_start = journal_start,
526 .logical_blocks_used = logical_blocks_used,
527 .block_map_data_blocks = block_map_data_blocks,
528 };
529
530 return VDO_SUCCESS;
531 }
532
533 /**
534 * vdo_get_journal_operation_name() - Get the name of a journal operation.
535 * @operation: The operation to name.
536 *
537 * Return: The name of the operation.
538 */
vdo_get_journal_operation_name(enum journal_operation operation)539 const char *vdo_get_journal_operation_name(enum journal_operation operation)
540 {
541 switch (operation) {
542 case VDO_JOURNAL_DATA_REMAPPING:
543 return "data remapping";
544
545 case VDO_JOURNAL_BLOCK_MAP_REMAPPING:
546 return "block map remapping";
547
548 default:
549 return "unknown journal operation";
550 }
551 }
552
553 /**
554 * encode_slab_depot_state_2_0() - Encode the state of a slab depot into a buffer.
555 * @buffer: A buffer to store the encoding.
556 * @offset: The offset in the buffer at which to encode.
557 * @state: The slab depot state to encode.
558 */
encode_slab_depot_state_2_0(u8 * buffer,size_t * offset,struct slab_depot_state_2_0 state)559 static void encode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
560 struct slab_depot_state_2_0 state)
561 {
562 size_t initial_offset;
563
564 vdo_encode_header(buffer, offset, &VDO_SLAB_DEPOT_HEADER_2_0);
565
566 initial_offset = *offset;
567 encode_u64_le(buffer, offset, state.slab_config.slab_blocks);
568 encode_u64_le(buffer, offset, state.slab_config.data_blocks);
569 encode_u64_le(buffer, offset, state.slab_config.reference_count_blocks);
570 encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocks);
571 encode_u64_le(buffer, offset, state.slab_config.slab_journal_flushing_threshold);
572 encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocking_threshold);
573 encode_u64_le(buffer, offset, state.slab_config.slab_journal_scrubbing_threshold);
574 encode_u64_le(buffer, offset, state.first_block);
575 encode_u64_le(buffer, offset, state.last_block);
576 buffer[(*offset)++] = state.zone_count;
577
578 VDO_ASSERT_LOG_ONLY(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
579 "encoded block map component size must match header size");
580 }
581
582 /**
583 * decode_slab_depot_state_2_0() - Decode slab depot component state version 2.0 from a buffer.
584 * @buffer: The buffer being decoded.
585 * @offset: The offset to start decoding from.
586 * @state: A pointer to a slab depot state to hold the decoded result.
587 *
588 * Return: VDO_SUCCESS or an error code.
589 */
decode_slab_depot_state_2_0(u8 * buffer,size_t * offset,struct slab_depot_state_2_0 * state)590 static int decode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
591 struct slab_depot_state_2_0 *state)
592 {
593 struct header header;
594 int result;
595 size_t initial_offset;
596 struct slab_config slab_config;
597 block_count_t count;
598 physical_block_number_t first_block, last_block;
599 zone_count_t zone_count;
600
601 vdo_decode_header(buffer, offset, &header);
602 result = vdo_validate_header(&VDO_SLAB_DEPOT_HEADER_2_0, &header, true,
603 __func__);
604 if (result != VDO_SUCCESS)
605 return result;
606
607 initial_offset = *offset;
608 decode_u64_le(buffer, offset, &count);
609 slab_config.slab_blocks = count;
610
611 decode_u64_le(buffer, offset, &count);
612 slab_config.data_blocks = count;
613
614 decode_u64_le(buffer, offset, &count);
615 slab_config.reference_count_blocks = count;
616
617 decode_u64_le(buffer, offset, &count);
618 slab_config.slab_journal_blocks = count;
619
620 decode_u64_le(buffer, offset, &count);
621 slab_config.slab_journal_flushing_threshold = count;
622
623 decode_u64_le(buffer, offset, &count);
624 slab_config.slab_journal_blocking_threshold = count;
625
626 decode_u64_le(buffer, offset, &count);
627 slab_config.slab_journal_scrubbing_threshold = count;
628
629 decode_u64_le(buffer, offset, &first_block);
630 decode_u64_le(buffer, offset, &last_block);
631 zone_count = buffer[(*offset)++];
632
633 result = VDO_ASSERT(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
634 "decoded slab depot component size must match header size");
635 if (result != VDO_SUCCESS)
636 return result;
637
638 *state = (struct slab_depot_state_2_0) {
639 .slab_config = slab_config,
640 .first_block = first_block,
641 .last_block = last_block,
642 .zone_count = zone_count,
643 };
644
645 return VDO_SUCCESS;
646 }
647
648 /**
649 * vdo_configure_slab_depot() - Configure the slab depot.
650 * @partition: The slab depot partition
651 * @slab_config: The configuration of a single slab.
652 * @zone_count: The number of zones the depot will use.
653 * @state: The state structure to be configured.
654 *
655 * Configures the slab_depot for the specified storage capacity, finding the number of data blocks
656 * that will fit and still leave room for the depot metadata, then return the saved state for that
657 * configuration.
658 *
659 * Return: VDO_SUCCESS or an error code.
660 */
vdo_configure_slab_depot(const struct partition * partition,struct slab_config slab_config,zone_count_t zone_count,struct slab_depot_state_2_0 * state)661 int vdo_configure_slab_depot(const struct partition *partition,
662 struct slab_config slab_config, zone_count_t zone_count,
663 struct slab_depot_state_2_0 *state)
664 {
665 block_count_t total_slab_blocks, total_data_blocks;
666 size_t slab_count;
667 physical_block_number_t last_block;
668 block_count_t slab_size = slab_config.slab_blocks;
669
670 vdo_log_debug("slabDepot %s(block_count=%llu, first_block=%llu, slab_size=%llu, zone_count=%u)",
671 __func__, (unsigned long long) partition->count,
672 (unsigned long long) partition->offset,
673 (unsigned long long) slab_size, zone_count);
674
675 /* We do not allow runt slabs, so we waste up to a slab's worth. */
676 slab_count = (partition->count / slab_size);
677 if (slab_count == 0)
678 return VDO_NO_SPACE;
679
680 if (slab_count > MAX_VDO_SLABS)
681 return VDO_TOO_MANY_SLABS;
682
683 total_slab_blocks = slab_count * slab_config.slab_blocks;
684 total_data_blocks = slab_count * slab_config.data_blocks;
685 last_block = partition->offset + total_slab_blocks;
686
687 *state = (struct slab_depot_state_2_0) {
688 .slab_config = slab_config,
689 .first_block = partition->offset,
690 .last_block = last_block,
691 .zone_count = zone_count,
692 };
693
694 vdo_log_debug("slab_depot last_block=%llu, total_data_blocks=%llu, slab_count=%zu, left_over=%llu",
695 (unsigned long long) last_block,
696 (unsigned long long) total_data_blocks, slab_count,
697 (unsigned long long) (partition->count - (last_block - partition->offset)));
698
699 return VDO_SUCCESS;
700 }
701
702 /**
703 * vdo_configure_slab() - Measure and initialize the configuration to use for each slab.
704 * @slab_size: The number of blocks per slab.
705 * @slab_journal_blocks: The number of blocks for the slab journal.
706 * @slab_config: The slab configuration to initialize.
707 *
708 * Return: VDO_SUCCESS or an error code.
709 */
vdo_configure_slab(block_count_t slab_size,block_count_t slab_journal_blocks,struct slab_config * slab_config)710 int vdo_configure_slab(block_count_t slab_size, block_count_t slab_journal_blocks,
711 struct slab_config *slab_config)
712 {
713 block_count_t ref_blocks, meta_blocks, data_blocks;
714 block_count_t flushing_threshold, remaining, blocking_threshold;
715 block_count_t minimal_extra_space, scrubbing_threshold;
716
717 if (slab_journal_blocks >= slab_size)
718 return VDO_BAD_CONFIGURATION;
719
720 /*
721 * This calculation should technically be a recurrence, but the total number of metadata
722 * blocks is currently less than a single block of ref_counts, so we'd gain at most one
723 * data block in each slab with more iteration.
724 */
725 ref_blocks = vdo_get_saved_reference_count_size(slab_size - slab_journal_blocks);
726 meta_blocks = (ref_blocks + slab_journal_blocks);
727
728 /* Make sure configured slabs are not too small. */
729 if (meta_blocks >= slab_size)
730 return VDO_BAD_CONFIGURATION;
731
732 data_blocks = slab_size - meta_blocks;
733
734 /*
735 * Configure the slab journal thresholds. The flush threshold is 168 of 224 blocks in
736 * production, or 3/4ths, so we use this ratio for all sizes.
737 */
738 flushing_threshold = ((slab_journal_blocks * 3) + 3) / 4;
739 /*
740 * The blocking threshold should be far enough from the flushing threshold to not produce
741 * delays, but far enough from the end of the journal to allow multiple successive recovery
742 * failures.
743 */
744 remaining = slab_journal_blocks - flushing_threshold;
745 blocking_threshold = flushing_threshold + ((remaining * 5) / 7);
746 /* The scrubbing threshold should be at least 2048 entries before the end of the journal. */
747 minimal_extra_space = 1 + (MAXIMUM_VDO_USER_VIOS / VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK);
748 scrubbing_threshold = blocking_threshold;
749 if (slab_journal_blocks > minimal_extra_space)
750 scrubbing_threshold = slab_journal_blocks - minimal_extra_space;
751 if (blocking_threshold > scrubbing_threshold)
752 blocking_threshold = scrubbing_threshold;
753
754 *slab_config = (struct slab_config) {
755 .slab_blocks = slab_size,
756 .data_blocks = data_blocks,
757 .reference_count_blocks = ref_blocks,
758 .slab_journal_blocks = slab_journal_blocks,
759 .slab_journal_flushing_threshold = flushing_threshold,
760 .slab_journal_blocking_threshold = blocking_threshold,
761 .slab_journal_scrubbing_threshold = scrubbing_threshold};
762 return VDO_SUCCESS;
763 }
764
765 /**
766 * vdo_decode_slab_journal_entry() - Decode a slab journal entry.
767 * @block: The journal block holding the entry.
768 * @entry_count: The number of the entry.
769 *
770 * Return: The decoded entry.
771 */
vdo_decode_slab_journal_entry(struct packed_slab_journal_block * block,journal_entry_count_t entry_count)772 struct slab_journal_entry vdo_decode_slab_journal_entry(struct packed_slab_journal_block *block,
773 journal_entry_count_t entry_count)
774 {
775 struct slab_journal_entry entry =
776 vdo_unpack_slab_journal_entry(&block->payload.entries[entry_count]);
777
778 if (block->header.has_block_map_increments &&
779 ((block->payload.full_entries.entry_types[entry_count / 8] &
780 ((u8) 1 << (entry_count % 8))) != 0))
781 entry.operation = VDO_JOURNAL_BLOCK_MAP_REMAPPING;
782
783 return entry;
784 }
785
786 /**
787 * allocate_partition() - Allocate a partition and add it to a layout.
788 * @layout: The layout containing the partition.
789 * @id: The id of the partition.
790 * @offset: The offset into the layout at which the partition begins.
791 * @size: The size of the partition in blocks.
792 *
793 * Return: VDO_SUCCESS or an error.
794 */
allocate_partition(struct layout * layout,u8 id,physical_block_number_t offset,block_count_t size)795 static int allocate_partition(struct layout *layout, u8 id,
796 physical_block_number_t offset, block_count_t size)
797 {
798 struct partition *partition;
799 int result;
800
801 result = vdo_allocate(1, struct partition, __func__, &partition);
802 if (result != VDO_SUCCESS)
803 return result;
804
805 partition->id = id;
806 partition->offset = offset;
807 partition->count = size;
808 partition->next = layout->head;
809 layout->head = partition;
810
811 return VDO_SUCCESS;
812 }
813
814 /**
815 * make_partition() - Create a new partition from the beginning or end of the unused space in a
816 * layout.
817 * @layout: The layout.
818 * @id: The id of the partition to make.
819 * @size: The number of blocks to carve out; if 0, all remaining space will be used.
820 * @beginning: True if the partition should start at the beginning of the unused space.
821 *
822 * Return: A success or error code, particularly VDO_NO_SPACE if there are fewer than size blocks
823 * remaining.
824 */
make_partition(struct layout * layout,enum partition_id id,block_count_t size,bool beginning)825 static int __must_check make_partition(struct layout *layout, enum partition_id id,
826 block_count_t size, bool beginning)
827 {
828 int result;
829 physical_block_number_t offset;
830 block_count_t free_blocks = layout->last_free - layout->first_free;
831
832 if (size == 0) {
833 if (free_blocks == 0)
834 return VDO_NO_SPACE;
835 size = free_blocks;
836 } else if (size > free_blocks) {
837 return VDO_NO_SPACE;
838 }
839
840 result = vdo_get_partition(layout, id, NULL);
841 if (result != VDO_UNKNOWN_PARTITION)
842 return VDO_PARTITION_EXISTS;
843
844 offset = beginning ? layout->first_free : (layout->last_free - size);
845
846 result = allocate_partition(layout, id, offset, size);
847 if (result != VDO_SUCCESS)
848 return result;
849
850 layout->num_partitions++;
851 if (beginning)
852 layout->first_free += size;
853 else
854 layout->last_free = layout->last_free - size;
855
856 return VDO_SUCCESS;
857 }
858
859 /**
860 * vdo_initialize_layout() - Lay out the partitions of a vdo.
861 * @size: The entire size of the vdo.
862 * @offset: The start of the layout on the underlying storage in blocks.
863 * @block_map_blocks: The size of the block map partition.
864 * @journal_blocks: The size of the journal partition.
865 * @summary_blocks: The size of the slab summary partition.
866 * @layout: The layout to initialize.
867 *
868 * Return: VDO_SUCCESS or an error.
869 */
vdo_initialize_layout(block_count_t size,physical_block_number_t offset,block_count_t block_map_blocks,block_count_t journal_blocks,block_count_t summary_blocks,struct layout * layout)870 int vdo_initialize_layout(block_count_t size, physical_block_number_t offset,
871 block_count_t block_map_blocks, block_count_t journal_blocks,
872 block_count_t summary_blocks, struct layout *layout)
873 {
874 int result;
875 block_count_t necessary_size =
876 (offset + block_map_blocks + journal_blocks + summary_blocks);
877
878 if (necessary_size > size)
879 return vdo_log_error_strerror(VDO_NO_SPACE,
880 "Not enough space to make a VDO");
881
882 *layout = (struct layout) {
883 .start = offset,
884 .size = size,
885 .first_free = offset,
886 .last_free = size,
887 .num_partitions = 0,
888 .head = NULL,
889 };
890
891 result = make_partition(layout, VDO_BLOCK_MAP_PARTITION, block_map_blocks, true);
892 if (result != VDO_SUCCESS) {
893 vdo_uninitialize_layout(layout);
894 return result;
895 }
896
897 result = make_partition(layout, VDO_SLAB_SUMMARY_PARTITION, summary_blocks,
898 false);
899 if (result != VDO_SUCCESS) {
900 vdo_uninitialize_layout(layout);
901 return result;
902 }
903
904 result = make_partition(layout, VDO_RECOVERY_JOURNAL_PARTITION, journal_blocks,
905 false);
906 if (result != VDO_SUCCESS) {
907 vdo_uninitialize_layout(layout);
908 return result;
909 }
910
911 result = make_partition(layout, VDO_SLAB_DEPOT_PARTITION, 0, true);
912 if (result != VDO_SUCCESS)
913 vdo_uninitialize_layout(layout);
914
915 return result;
916 }
917
918 /**
919 * vdo_uninitialize_layout() - Clean up a layout.
920 * @layout: The layout to clean up.
921 *
922 * All partitions created by this layout become invalid pointers.
923 */
vdo_uninitialize_layout(struct layout * layout)924 void vdo_uninitialize_layout(struct layout *layout)
925 {
926 while (layout->head != NULL) {
927 struct partition *part = layout->head;
928
929 layout->head = part->next;
930 vdo_free(part);
931 }
932
933 memset(layout, 0, sizeof(struct layout));
934 }
935
936 /**
937 * vdo_get_partition() - Get a partition by id.
938 * @layout: The layout from which to get a partition.
939 * @id: The id of the partition.
940 * @partition_ptr: A pointer to hold the partition.
941 *
942 * Return: VDO_SUCCESS or an error.
943 */
vdo_get_partition(struct layout * layout,enum partition_id id,struct partition ** partition_ptr)944 int vdo_get_partition(struct layout *layout, enum partition_id id,
945 struct partition **partition_ptr)
946 {
947 struct partition *partition;
948
949 for (partition = layout->head; partition != NULL; partition = partition->next) {
950 if (partition->id == id) {
951 if (partition_ptr != NULL)
952 *partition_ptr = partition;
953 return VDO_SUCCESS;
954 }
955 }
956
957 return VDO_UNKNOWN_PARTITION;
958 }
959
960 /**
961 * vdo_get_known_partition() - Get a partition by id from a validated layout.
962 * @layout: The layout from which to get a partition.
963 * @id: The id of the partition.
964 *
965 * Return: the partition
966 */
vdo_get_known_partition(struct layout * layout,enum partition_id id)967 struct partition *vdo_get_known_partition(struct layout *layout, enum partition_id id)
968 {
969 struct partition *partition;
970 int result = vdo_get_partition(layout, id, &partition);
971
972 VDO_ASSERT_LOG_ONLY(result == VDO_SUCCESS, "layout has expected partition: %u", id);
973
974 return partition;
975 }
976
encode_layout(u8 * buffer,size_t * offset,const struct layout * layout)977 static void encode_layout(u8 *buffer, size_t *offset, const struct layout *layout)
978 {
979 const struct partition *partition;
980 size_t initial_offset;
981 struct header header = VDO_LAYOUT_HEADER_3_0;
982
983 BUILD_BUG_ON(sizeof(enum partition_id) != sizeof(u8));
984 VDO_ASSERT_LOG_ONLY(layout->num_partitions <= U8_MAX,
985 "layout partition count must fit in a byte");
986
987 vdo_encode_header(buffer, offset, &header);
988
989 initial_offset = *offset;
990 encode_u64_le(buffer, offset, layout->first_free);
991 encode_u64_le(buffer, offset, layout->last_free);
992 buffer[(*offset)++] = layout->num_partitions;
993
994 VDO_ASSERT_LOG_ONLY(sizeof(struct layout_3_0) == *offset - initial_offset,
995 "encoded size of a layout header must match structure");
996
997 for (partition = layout->head; partition != NULL; partition = partition->next) {
998 buffer[(*offset)++] = partition->id;
999 encode_u64_le(buffer, offset, partition->offset);
1000 /* This field only exists for backwards compatibility */
1001 encode_u64_le(buffer, offset, 0);
1002 encode_u64_le(buffer, offset, partition->count);
1003 }
1004
1005 VDO_ASSERT_LOG_ONLY(header.size == *offset - initial_offset,
1006 "encoded size of a layout must match header size");
1007 }
1008
decode_layout(u8 * buffer,size_t * offset,physical_block_number_t start,block_count_t size,struct layout * layout)1009 static int decode_layout(u8 *buffer, size_t *offset, physical_block_number_t start,
1010 block_count_t size, struct layout *layout)
1011 {
1012 struct header header;
1013 struct layout_3_0 layout_header;
1014 struct partition *partition;
1015 size_t initial_offset;
1016 physical_block_number_t first_free, last_free;
1017 u8 partition_count;
1018 u8 i;
1019 int result;
1020
1021 vdo_decode_header(buffer, offset, &header);
1022 /* Layout is variable size, so only do a minimum size check here. */
1023 result = vdo_validate_header(&VDO_LAYOUT_HEADER_3_0, &header, false, __func__);
1024 if (result != VDO_SUCCESS)
1025 return result;
1026
1027 initial_offset = *offset;
1028 decode_u64_le(buffer, offset, &first_free);
1029 decode_u64_le(buffer, offset, &last_free);
1030 partition_count = buffer[(*offset)++];
1031 layout_header = (struct layout_3_0) {
1032 .first_free = first_free,
1033 .last_free = last_free,
1034 .partition_count = partition_count,
1035 };
1036
1037 result = VDO_ASSERT(sizeof(struct layout_3_0) == *offset - initial_offset,
1038 "decoded size of a layout header must match structure");
1039 if (result != VDO_SUCCESS)
1040 return result;
1041
1042 layout->start = start;
1043 layout->size = size;
1044 layout->first_free = layout_header.first_free;
1045 layout->last_free = layout_header.last_free;
1046 layout->num_partitions = layout_header.partition_count;
1047
1048 if (layout->num_partitions > VDO_PARTITION_COUNT) {
1049 return vdo_log_error_strerror(VDO_UNKNOWN_PARTITION,
1050 "layout has extra partitions");
1051 }
1052
1053 for (i = 0; i < layout->num_partitions; i++) {
1054 u8 id;
1055 u64 partition_offset, count;
1056
1057 id = buffer[(*offset)++];
1058 decode_u64_le(buffer, offset, &partition_offset);
1059 *offset += sizeof(u64);
1060 decode_u64_le(buffer, offset, &count);
1061
1062 result = allocate_partition(layout, id, partition_offset, count);
1063 if (result != VDO_SUCCESS) {
1064 vdo_uninitialize_layout(layout);
1065 return result;
1066 }
1067 }
1068
1069 /* Validate that the layout has all (and only) the required partitions */
1070 for (i = 0; i < VDO_PARTITION_COUNT; i++) {
1071 result = vdo_get_partition(layout, REQUIRED_PARTITIONS[i], &partition);
1072 if (result != VDO_SUCCESS) {
1073 vdo_uninitialize_layout(layout);
1074 return vdo_log_error_strerror(result,
1075 "layout is missing required partition %u",
1076 REQUIRED_PARTITIONS[i]);
1077 }
1078
1079 start += partition->count;
1080 }
1081
1082 if (start != size) {
1083 vdo_uninitialize_layout(layout);
1084 return vdo_log_error_strerror(UDS_BAD_STATE,
1085 "partitions do not cover the layout");
1086 }
1087
1088 return VDO_SUCCESS;
1089 }
1090
1091 /**
1092 * pack_vdo_config() - Convert a vdo_config to its packed on-disk representation.
1093 * @config: The vdo config to convert.
1094 *
1095 * Return: The platform-independent representation of the config.
1096 */
pack_vdo_config(struct vdo_config config)1097 static struct packed_vdo_config pack_vdo_config(struct vdo_config config)
1098 {
1099 return (struct packed_vdo_config) {
1100 .logical_blocks = __cpu_to_le64(config.logical_blocks),
1101 .physical_blocks = __cpu_to_le64(config.physical_blocks),
1102 .slab_size = __cpu_to_le64(config.slab_size),
1103 .recovery_journal_size = __cpu_to_le64(config.recovery_journal_size),
1104 .slab_journal_blocks = __cpu_to_le64(config.slab_journal_blocks),
1105 };
1106 }
1107
1108 /**
1109 * pack_vdo_component() - Convert a vdo_component to its packed on-disk representation.
1110 * @component: The VDO component data to convert.
1111 *
1112 * Return: The platform-independent representation of the component.
1113 */
pack_vdo_component(const struct vdo_component component)1114 static struct packed_vdo_component_41_0 pack_vdo_component(const struct vdo_component component)
1115 {
1116 return (struct packed_vdo_component_41_0) {
1117 .state = __cpu_to_le32(component.state),
1118 .complete_recoveries = __cpu_to_le64(component.complete_recoveries),
1119 .read_only_recoveries = __cpu_to_le64(component.read_only_recoveries),
1120 .config = pack_vdo_config(component.config),
1121 .nonce = __cpu_to_le64(component.nonce),
1122 };
1123 }
1124
encode_vdo_component(u8 * buffer,size_t * offset,struct vdo_component component)1125 static void encode_vdo_component(u8 *buffer, size_t *offset,
1126 struct vdo_component component)
1127 {
1128 struct packed_vdo_component_41_0 packed;
1129
1130 encode_version_number(buffer, offset, VDO_COMPONENT_DATA_41_0);
1131 packed = pack_vdo_component(component);
1132 memcpy(buffer + *offset, &packed, sizeof(packed));
1133 *offset += sizeof(packed);
1134 }
1135
1136 /**
1137 * unpack_vdo_config() - Convert a packed_vdo_config to its native in-memory representation.
1138 * @config: The packed vdo config to convert.
1139 *
1140 * Return: The native in-memory representation of the vdo config.
1141 */
unpack_vdo_config(struct packed_vdo_config config)1142 static struct vdo_config unpack_vdo_config(struct packed_vdo_config config)
1143 {
1144 return (struct vdo_config) {
1145 .logical_blocks = __le64_to_cpu(config.logical_blocks),
1146 .physical_blocks = __le64_to_cpu(config.physical_blocks),
1147 .slab_size = __le64_to_cpu(config.slab_size),
1148 .recovery_journal_size = __le64_to_cpu(config.recovery_journal_size),
1149 .slab_journal_blocks = __le64_to_cpu(config.slab_journal_blocks),
1150 };
1151 }
1152
1153 /**
1154 * unpack_vdo_component_41_0() - Convert a packed_vdo_component_41_0 to its native in-memory
1155 * representation.
1156 * @component: The packed vdo component data to convert.
1157 *
1158 * Return: The native in-memory representation of the component.
1159 */
unpack_vdo_component_41_0(struct packed_vdo_component_41_0 component)1160 static struct vdo_component unpack_vdo_component_41_0(struct packed_vdo_component_41_0 component)
1161 {
1162 return (struct vdo_component) {
1163 .state = __le32_to_cpu(component.state),
1164 .complete_recoveries = __le64_to_cpu(component.complete_recoveries),
1165 .read_only_recoveries = __le64_to_cpu(component.read_only_recoveries),
1166 .config = unpack_vdo_config(component.config),
1167 .nonce = __le64_to_cpu(component.nonce),
1168 };
1169 }
1170
1171 /**
1172 * decode_vdo_component() - Decode the component data for the vdo itself out of the super block.
1173 * @buffer: The buffer being decoded.
1174 * @offset: The offset to start decoding from.
1175 * @component: The vdo component structure to decode into.
1176 *
1177 * Return: VDO_SUCCESS or an error.
1178 */
decode_vdo_component(u8 * buffer,size_t * offset,struct vdo_component * component)1179 static int decode_vdo_component(u8 *buffer, size_t *offset, struct vdo_component *component)
1180 {
1181 struct version_number version;
1182 struct packed_vdo_component_41_0 packed;
1183 int result;
1184
1185 decode_version_number(buffer, offset, &version);
1186 result = validate_version(version, VDO_COMPONENT_DATA_41_0,
1187 "VDO component data");
1188 if (result != VDO_SUCCESS)
1189 return result;
1190
1191 memcpy(&packed, buffer + *offset, sizeof(packed));
1192 *offset += sizeof(packed);
1193 *component = unpack_vdo_component_41_0(packed);
1194 return VDO_SUCCESS;
1195 }
1196
1197 /**
1198 * vdo_validate_config() - Validate constraints on a VDO config.
1199 * @config: The VDO config.
1200 * @physical_block_count: The minimum block count of the underlying storage.
1201 * @logical_block_count: The expected logical size of the VDO, or 0 if the logical size may be
1202 * unspecified.
1203 *
1204 * Return: A success or error code.
1205 */
vdo_validate_config(const struct vdo_config * config,block_count_t physical_block_count,block_count_t logical_block_count)1206 int vdo_validate_config(const struct vdo_config *config,
1207 block_count_t physical_block_count,
1208 block_count_t logical_block_count)
1209 {
1210 struct slab_config slab_config;
1211 int result;
1212
1213 result = VDO_ASSERT(config->slab_size > 0, "slab size unspecified");
1214 if (result != VDO_SUCCESS)
1215 return result;
1216
1217 result = VDO_ASSERT(is_power_of_2(config->slab_size),
1218 "slab size must be a power of two");
1219 if (result != VDO_SUCCESS)
1220 return result;
1221
1222 result = VDO_ASSERT(config->slab_size <= (1 << MAX_VDO_SLAB_BITS),
1223 "slab size must be less than or equal to 2^%d",
1224 MAX_VDO_SLAB_BITS);
1225 if (result != VDO_SUCCESS)
1226 return result;
1227
1228 result = VDO_ASSERT(config->slab_journal_blocks <= config->slab_size,
1229 "slab journal size is within expected bound");
1230 if (result != VDO_SUCCESS)
1231 return result;
1232
1233 result = vdo_configure_slab(config->slab_size, config->slab_journal_blocks,
1234 &slab_config);
1235 if (result != VDO_SUCCESS)
1236 return result;
1237
1238 result = VDO_ASSERT((slab_config.data_blocks >= 1),
1239 "slab must be able to hold at least one block");
1240 if (result != VDO_SUCCESS)
1241 return result;
1242
1243 result = VDO_ASSERT(config->physical_blocks > 0, "physical blocks unspecified");
1244 if (result != VDO_SUCCESS)
1245 return result;
1246
1247 result = VDO_ASSERT(config->physical_blocks <= MAXIMUM_VDO_PHYSICAL_BLOCKS,
1248 "physical block count %llu exceeds maximum %llu",
1249 (unsigned long long) config->physical_blocks,
1250 (unsigned long long) MAXIMUM_VDO_PHYSICAL_BLOCKS);
1251 if (result != VDO_SUCCESS)
1252 return VDO_OUT_OF_RANGE;
1253
1254 if (physical_block_count != config->physical_blocks) {
1255 vdo_log_error("A physical size of %llu blocks was specified, not the %llu blocks configured in the vdo super block",
1256 (unsigned long long) physical_block_count,
1257 (unsigned long long) config->physical_blocks);
1258 return VDO_PARAMETER_MISMATCH;
1259 }
1260
1261 if (logical_block_count > 0) {
1262 result = VDO_ASSERT((config->logical_blocks > 0),
1263 "logical blocks unspecified");
1264 if (result != VDO_SUCCESS)
1265 return result;
1266
1267 if (logical_block_count != config->logical_blocks) {
1268 vdo_log_error("A logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block",
1269 (unsigned long long) logical_block_count,
1270 (unsigned long long) config->logical_blocks);
1271 return VDO_PARAMETER_MISMATCH;
1272 }
1273 }
1274
1275 result = VDO_ASSERT(config->logical_blocks <= MAXIMUM_VDO_LOGICAL_BLOCKS,
1276 "logical blocks too large");
1277 if (result != VDO_SUCCESS)
1278 return result;
1279
1280 result = VDO_ASSERT(config->recovery_journal_size > 0,
1281 "recovery journal size unspecified");
1282 if (result != VDO_SUCCESS)
1283 return result;
1284
1285 result = VDO_ASSERT(is_power_of_2(config->recovery_journal_size),
1286 "recovery journal size must be a power of two");
1287 if (result != VDO_SUCCESS)
1288 return result;
1289
1290 return result;
1291 }
1292
1293 /**
1294 * vdo_destroy_component_states() - Clean up any allocations in a vdo_component_states.
1295 * @states: The component states to destroy.
1296 */
vdo_destroy_component_states(struct vdo_component_states * states)1297 void vdo_destroy_component_states(struct vdo_component_states *states)
1298 {
1299 if (states == NULL)
1300 return;
1301
1302 vdo_uninitialize_layout(&states->layout);
1303 }
1304
1305 /**
1306 * decode_components() - Decode the components now that we know the component data is a version we
1307 * understand.
1308 * @buffer: The buffer being decoded.
1309 * @offset: The offset to start decoding from.
1310 * @geometry: The vdo geometry.
1311 * @states: An object to hold the successfully decoded state.
1312 *
1313 * Return: VDO_SUCCESS or an error.
1314 */
decode_components(u8 * buffer,size_t * offset,struct volume_geometry * geometry,struct vdo_component_states * states)1315 static int __must_check decode_components(u8 *buffer, size_t *offset,
1316 struct volume_geometry *geometry,
1317 struct vdo_component_states *states)
1318 {
1319 int result;
1320
1321 decode_vdo_component(buffer, offset, &states->vdo);
1322
1323 result = decode_layout(buffer, offset, vdo_get_data_region_start(*geometry) + 1,
1324 states->vdo.config.physical_blocks, &states->layout);
1325 if (result != VDO_SUCCESS)
1326 return result;
1327
1328 result = decode_recovery_journal_state_7_0(buffer, offset,
1329 &states->recovery_journal);
1330 if (result != VDO_SUCCESS)
1331 return result;
1332
1333 result = decode_slab_depot_state_2_0(buffer, offset, &states->slab_depot);
1334 if (result != VDO_SUCCESS)
1335 return result;
1336
1337 result = decode_block_map_state_2_0(buffer, offset, &states->block_map);
1338 if (result != VDO_SUCCESS)
1339 return result;
1340
1341 VDO_ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
1342 "All decoded component data was used");
1343 return VDO_SUCCESS;
1344 }
1345
1346 /**
1347 * vdo_decode_component_states() - Decode the payload of a super block.
1348 * @buffer: The buffer containing the encoded super block contents.
1349 * @geometry: The vdo geometry.
1350 * @states: A pointer to hold the decoded states.
1351 *
1352 * Return: VDO_SUCCESS or an error.
1353 */
vdo_decode_component_states(u8 * buffer,struct volume_geometry * geometry,struct vdo_component_states * states)1354 int vdo_decode_component_states(u8 *buffer, struct volume_geometry *geometry,
1355 struct vdo_component_states *states)
1356 {
1357 int result;
1358 size_t offset = VDO_COMPONENT_DATA_OFFSET;
1359
1360 /* This is for backwards compatibility. */
1361 decode_u32_le(buffer, &offset, &states->unused);
1362
1363 /* Check the VDO volume version */
1364 decode_version_number(buffer, &offset, &states->volume_version);
1365 result = validate_version(VDO_VOLUME_VERSION_67_0, states->volume_version,
1366 "volume");
1367 if (result != VDO_SUCCESS)
1368 return result;
1369
1370 result = decode_components(buffer, &offset, geometry, states);
1371 if (result != VDO_SUCCESS)
1372 vdo_uninitialize_layout(&states->layout);
1373
1374 return result;
1375 }
1376
1377 /**
1378 * vdo_validate_component_states() - Validate the decoded super block configuration.
1379 * @states: The state decoded from the super block.
1380 * @geometry_nonce: The nonce from the geometry block.
1381 * @physical_size: The minimum block count of the underlying storage.
1382 * @logical_size: The expected logical size of the VDO, or 0 if the logical size may be
1383 * unspecified.
1384 *
1385 * Return: VDO_SUCCESS or an error if the configuration is invalid.
1386 */
vdo_validate_component_states(struct vdo_component_states * states,nonce_t geometry_nonce,block_count_t physical_size,block_count_t logical_size)1387 int vdo_validate_component_states(struct vdo_component_states *states,
1388 nonce_t geometry_nonce, block_count_t physical_size,
1389 block_count_t logical_size)
1390 {
1391 if (geometry_nonce != states->vdo.nonce) {
1392 return vdo_log_error_strerror(VDO_BAD_NONCE,
1393 "Geometry nonce %llu does not match superblock nonce %llu",
1394 (unsigned long long) geometry_nonce,
1395 (unsigned long long) states->vdo.nonce);
1396 }
1397
1398 return vdo_validate_config(&states->vdo.config, physical_size, logical_size);
1399 }
1400
1401 /**
1402 * vdo_encode_component_states() - Encode the state of all vdo components in the super block.
1403 * @buffer: A buffer to store the encoding.
1404 * @offset: The offset into the buffer to start the encoding.
1405 * @states: The component states to encode.
1406 */
vdo_encode_component_states(u8 * buffer,size_t * offset,const struct vdo_component_states * states)1407 static void vdo_encode_component_states(u8 *buffer, size_t *offset,
1408 const struct vdo_component_states *states)
1409 {
1410 /* This is for backwards compatibility. */
1411 encode_u32_le(buffer, offset, states->unused);
1412 encode_version_number(buffer, offset, states->volume_version);
1413 encode_vdo_component(buffer, offset, states->vdo);
1414 encode_layout(buffer, offset, &states->layout);
1415 encode_recovery_journal_state_7_0(buffer, offset, states->recovery_journal);
1416 encode_slab_depot_state_2_0(buffer, offset, states->slab_depot);
1417 encode_block_map_state_2_0(buffer, offset, states->block_map);
1418
1419 VDO_ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
1420 "All super block component data was encoded");
1421 }
1422
1423 /**
1424 * vdo_encode_super_block() - Encode a super block into its on-disk representation.
1425 * @buffer: A buffer to store the encoding.
1426 * @states: The component states to encode.
1427 */
vdo_encode_super_block(u8 * buffer,struct vdo_component_states * states)1428 void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states)
1429 {
1430 u32 checksum;
1431 struct header header = SUPER_BLOCK_HEADER_12_0;
1432 size_t offset = 0;
1433
1434 header.size += VDO_COMPONENT_DATA_SIZE;
1435 vdo_encode_header(buffer, &offset, &header);
1436 vdo_encode_component_states(buffer, &offset, states);
1437
1438 checksum = vdo_crc32(buffer, offset);
1439 encode_u32_le(buffer, &offset, checksum);
1440
1441 /*
1442 * Even though the buffer is a full block, to avoid the potential corruption from a torn
1443 * write, the entire encoding must fit in the first sector.
1444 */
1445 VDO_ASSERT_LOG_ONLY(offset <= VDO_SECTOR_SIZE,
1446 "entire superblock must fit in one sector");
1447 }
1448
1449 /**
1450 * vdo_decode_super_block() - Decode a super block from its on-disk representation.
1451 * @buffer: The buffer to decode from.
1452 */
vdo_decode_super_block(u8 * buffer)1453 int vdo_decode_super_block(u8 *buffer)
1454 {
1455 struct header header;
1456 int result;
1457 u32 checksum, saved_checksum;
1458 size_t offset = 0;
1459
1460 /* Decode and validate the header. */
1461 vdo_decode_header(buffer, &offset, &header);
1462 result = vdo_validate_header(&SUPER_BLOCK_HEADER_12_0, &header, false, __func__);
1463 if (result != VDO_SUCCESS)
1464 return result;
1465
1466 if (header.size > VDO_COMPONENT_DATA_SIZE + sizeof(u32)) {
1467 /*
1468 * We can't check release version or checksum until we know the content size, so we
1469 * have to assume a version mismatch on unexpected values.
1470 */
1471 return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
1472 "super block contents too large: %zu",
1473 header.size);
1474 }
1475
1476 /* Skip past the component data for now, to verify the checksum. */
1477 offset += VDO_COMPONENT_DATA_SIZE;
1478
1479 checksum = vdo_crc32(buffer, offset);
1480 decode_u32_le(buffer, &offset, &saved_checksum);
1481
1482 result = VDO_ASSERT(offset == VDO_SUPER_BLOCK_FIXED_SIZE + VDO_COMPONENT_DATA_SIZE,
1483 "must have decoded entire superblock payload");
1484 if (result != VDO_SUCCESS)
1485 return result;
1486
1487 return ((checksum != saved_checksum) ? VDO_CHECKSUM_MISMATCH : VDO_SUCCESS);
1488 }
1489