1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright 2023 Red Hat
4 */
5
6 #include "encodings.h"
7
8 #include <linux/log2.h>
9
10 #include "logger.h"
11 #include "memory-alloc.h"
12 #include "permassert.h"
13
14 #include "constants.h"
15 #include "indexer.h"
16 #include "status-codes.h"
17 #include "types.h"
18
19 struct geometry_block {
20 char magic_number[VDO_GEOMETRY_MAGIC_NUMBER_SIZE];
21 struct packed_header header;
22 u32 checksum;
23 } __packed;
24
25 static const struct header GEOMETRY_BLOCK_HEADER_5_0 = {
26 .id = VDO_GEOMETRY_BLOCK,
27 .version = {
28 .major_version = 5,
29 .minor_version = 0,
30 },
31 /*
32 * Note: this size isn't just the payload size following the header, like it is everywhere
33 * else in VDO.
34 */
35 .size = sizeof(struct geometry_block) + sizeof(struct volume_geometry),
36 };
37
38 static const struct header GEOMETRY_BLOCK_HEADER_4_0 = {
39 .id = VDO_GEOMETRY_BLOCK,
40 .version = {
41 .major_version = 4,
42 .minor_version = 0,
43 },
44 /*
45 * Note: this size isn't just the payload size following the header, like it is everywhere
46 * else in VDO.
47 */
48 .size = sizeof(struct geometry_block) + sizeof(struct volume_geometry_4_0),
49 };
50
51 const u8 VDO_GEOMETRY_MAGIC_NUMBER[VDO_GEOMETRY_MAGIC_NUMBER_SIZE + 1] = "dmvdo001";
52
53 #define PAGE_HEADER_4_1_SIZE (8 + 8 + 8 + 1 + 1 + 1 + 1)
54
55 static const struct version_number BLOCK_MAP_4_1 = {
56 .major_version = 4,
57 .minor_version = 1,
58 };
59
60 const struct header VDO_BLOCK_MAP_HEADER_2_0 = {
61 .id = VDO_BLOCK_MAP,
62 .version = {
63 .major_version = 2,
64 .minor_version = 0,
65 },
66 .size = sizeof(struct block_map_state_2_0),
67 };
68
69 const struct header VDO_RECOVERY_JOURNAL_HEADER_7_0 = {
70 .id = VDO_RECOVERY_JOURNAL,
71 .version = {
72 .major_version = 7,
73 .minor_version = 0,
74 },
75 .size = sizeof(struct recovery_journal_state_7_0),
76 };
77
78 const struct header VDO_SLAB_DEPOT_HEADER_2_0 = {
79 .id = VDO_SLAB_DEPOT,
80 .version = {
81 .major_version = 2,
82 .minor_version = 0,
83 },
84 .size = sizeof(struct slab_depot_state_2_0),
85 };
86
87 static const struct header VDO_LAYOUT_HEADER_3_0 = {
88 .id = VDO_LAYOUT,
89 .version = {
90 .major_version = 3,
91 .minor_version = 0,
92 },
93 .size = sizeof(struct layout_3_0) + (sizeof(struct partition_3_0) * VDO_PARTITION_COUNT),
94 };
95
96 static const enum partition_id REQUIRED_PARTITIONS[] = {
97 VDO_BLOCK_MAP_PARTITION,
98 VDO_SLAB_DEPOT_PARTITION,
99 VDO_RECOVERY_JOURNAL_PARTITION,
100 VDO_SLAB_SUMMARY_PARTITION,
101 };
102
103 /*
104 * The current version for the data encoded in the super block. This must be changed any time there
105 * is a change to encoding of the component data of any VDO component.
106 */
107 static const struct version_number VDO_COMPONENT_DATA_41_0 = {
108 .major_version = 41,
109 .minor_version = 0,
110 };
111
112 const struct version_number VDO_VOLUME_VERSION_67_0 = {
113 .major_version = 67,
114 .minor_version = 0,
115 };
116
117 static const struct header SUPER_BLOCK_HEADER_12_0 = {
118 .id = VDO_SUPER_BLOCK,
119 .version = {
120 .major_version = 12,
121 .minor_version = 0,
122 },
123
124 /* This is the minimum size, if the super block contains no components. */
125 .size = VDO_SUPER_BLOCK_FIXED_SIZE - VDO_ENCODED_HEADER_SIZE,
126 };
127
128 /**
129 * validate_version() - Check whether a version matches an expected version.
130 * @expected_version: The expected version.
131 * @actual_version: The version being validated.
132 * @component_name: The name of the component or the calling function (for error logging).
133 *
134 * Logs an error describing a mismatch.
135 *
136 * Return: VDO_SUCCESS if the versions are the same,
137 * VDO_UNSUPPORTED_VERSION if the versions don't match.
138 */
validate_version(struct version_number expected_version,struct version_number actual_version,const char * component_name)139 static int __must_check validate_version(struct version_number expected_version,
140 struct version_number actual_version,
141 const char *component_name)
142 {
143 if (!vdo_are_same_version(expected_version, actual_version)) {
144 return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
145 "%s version mismatch, expected %d.%d, got %d.%d",
146 component_name,
147 expected_version.major_version,
148 expected_version.minor_version,
149 actual_version.major_version,
150 actual_version.minor_version);
151 }
152
153 return VDO_SUCCESS;
154 }
155
156 /**
157 * vdo_validate_header() - Check whether a header matches expectations.
158 * @expected_header: The expected header.
159 * @actual_header: The header being validated.
160 * @exact_size: If true, the size fields of the two headers must be the same, otherwise it is
161 * required that actual_header.size >= expected_header.size.
162 * @name: The name of the component or the calling function (for error logging).
163 *
164 * Logs an error describing the first mismatch found.
165 *
166 * Return: VDO_SUCCESS if the header meets expectations,
167 * VDO_INCORRECT_COMPONENT if the component ids don't match,
168 * VDO_UNSUPPORTED_VERSION if the versions or sizes don't match.
169 */
vdo_validate_header(const struct header * expected_header,const struct header * actual_header,bool exact_size,const char * name)170 static int vdo_validate_header(const struct header *expected_header,
171 const struct header *actual_header,
172 bool exact_size, const char *name)
173 {
174 int result;
175
176 if (expected_header->id != actual_header->id) {
177 return vdo_log_error_strerror(VDO_INCORRECT_COMPONENT,
178 "%s ID mismatch, expected %d, got %d",
179 name, expected_header->id,
180 actual_header->id);
181 }
182
183 result = validate_version(expected_header->version, actual_header->version,
184 name);
185 if (result != VDO_SUCCESS)
186 return result;
187
188 if ((expected_header->size > actual_header->size) ||
189 (exact_size && (expected_header->size < actual_header->size))) {
190 return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
191 "%s size mismatch, expected %zu, got %zu",
192 name, expected_header->size,
193 actual_header->size);
194 }
195
196 return VDO_SUCCESS;
197 }
198
encode_version_number(u8 * buffer,size_t * offset,struct version_number version)199 static void encode_version_number(u8 *buffer, size_t *offset,
200 struct version_number version)
201 {
202 struct packed_version_number packed = vdo_pack_version_number(version);
203
204 memcpy(buffer + *offset, &packed, sizeof(packed));
205 *offset += sizeof(packed);
206 }
207
vdo_encode_header(u8 * buffer,size_t * offset,const struct header * header)208 static void vdo_encode_header(u8 *buffer, size_t *offset,
209 const struct header *header)
210 {
211 struct packed_header packed = vdo_pack_header(header);
212
213 memcpy(buffer + *offset, &packed, sizeof(packed));
214 *offset += sizeof(packed);
215 }
216
decode_version_number(u8 * buffer,size_t * offset,struct version_number * version)217 static void decode_version_number(u8 *buffer, size_t *offset,
218 struct version_number *version)
219 {
220 struct packed_version_number packed;
221
222 memcpy(&packed, buffer + *offset, sizeof(packed));
223 *offset += sizeof(packed);
224 *version = vdo_unpack_version_number(packed);
225 }
226
vdo_decode_header(u8 * buffer,size_t * offset,struct header * header)227 static void vdo_decode_header(u8 *buffer, size_t *offset, struct header *header)
228 {
229 struct packed_header packed;
230
231 memcpy(&packed, buffer + *offset, sizeof(packed));
232 *offset += sizeof(packed);
233
234 *header = vdo_unpack_header(&packed);
235 }
236
237 /**
238 * decode_volume_geometry() - Decode the on-disk representation of a volume geometry from a buffer.
239 * @buffer: A buffer to decode from.
240 * @offset: The offset in the buffer at which to decode.
241 * @geometry: The structure to receive the decoded fields.
242 * @version: The geometry block version to decode.
243 */
decode_volume_geometry(u8 * buffer,size_t * offset,struct volume_geometry * geometry,u32 version)244 static void decode_volume_geometry(u8 *buffer, size_t *offset,
245 struct volume_geometry *geometry, u32 version)
246 {
247 u32 unused, mem;
248 enum volume_region_id id;
249 nonce_t nonce;
250 block_count_t bio_offset = 0;
251 bool sparse;
252
253 /* This is for backwards compatibility. */
254 decode_u32_le(buffer, offset, &unused);
255 geometry->unused = unused;
256
257 decode_u64_le(buffer, offset, &nonce);
258 geometry->nonce = nonce;
259
260 memcpy((unsigned char *) &geometry->uuid, buffer + *offset, sizeof(uuid_t));
261 *offset += sizeof(uuid_t);
262
263 if (version > 4)
264 decode_u64_le(buffer, offset, &bio_offset);
265 geometry->bio_offset = bio_offset;
266
267 for (id = 0; id < VDO_VOLUME_REGION_COUNT; id++) {
268 physical_block_number_t start_block;
269 enum volume_region_id saved_id;
270
271 decode_u32_le(buffer, offset, &saved_id);
272 decode_u64_le(buffer, offset, &start_block);
273
274 geometry->regions[id] = (struct volume_region) {
275 .id = saved_id,
276 .start_block = start_block,
277 };
278 }
279
280 decode_u32_le(buffer, offset, &mem);
281 *offset += sizeof(u32);
282 sparse = buffer[(*offset)++];
283
284 geometry->index_config = (struct index_config) {
285 .mem = mem,
286 .sparse = sparse,
287 };
288 }
289
290 /**
291 * vdo_encode_volume_geometry() - Encode the on-disk representation of a volume geometry into a buffer.
292 * @buffer: A buffer to store the encoding.
293 * @geometry: The geometry to encode.
294 * @version: The geometry block version to encode.
295 *
296 * Return: VDO_SUCCESS or an error.
297 */
vdo_encode_volume_geometry(u8 * buffer,const struct volume_geometry * geometry,u32 version)298 int vdo_encode_volume_geometry(u8 *buffer, const struct volume_geometry *geometry,
299 u32 version)
300 {
301 int result;
302 enum volume_region_id id;
303 u32 checksum;
304 size_t offset = 0;
305 const struct header *header;
306
307 memcpy(buffer, VDO_GEOMETRY_MAGIC_NUMBER, VDO_GEOMETRY_MAGIC_NUMBER_SIZE);
308 offset += VDO_GEOMETRY_MAGIC_NUMBER_SIZE;
309
310 header = (version > 4) ? &GEOMETRY_BLOCK_HEADER_5_0 : &GEOMETRY_BLOCK_HEADER_4_0;
311 vdo_encode_header(buffer, &offset, header);
312
313 /* This is for backwards compatibility */
314 encode_u32_le(buffer, &offset, geometry->unused);
315 encode_u64_le(buffer, &offset, geometry->nonce);
316 memcpy(buffer + offset, (unsigned char *) &geometry->uuid, sizeof(uuid_t));
317 offset += sizeof(uuid_t);
318
319 if (version > 4)
320 encode_u64_le(buffer, &offset, geometry->bio_offset);
321
322 for (id = 0; id < VDO_VOLUME_REGION_COUNT; id++) {
323 encode_u32_le(buffer, &offset, geometry->regions[id].id);
324 encode_u64_le(buffer, &offset, geometry->regions[id].start_block);
325 }
326
327 encode_u32_le(buffer, &offset, geometry->index_config.mem);
328 encode_u32_le(buffer, &offset, 0);
329
330 if (geometry->index_config.sparse)
331 buffer[offset++] = 1;
332 else
333 buffer[offset++] = 0;
334
335 result = VDO_ASSERT(header->size == offset + sizeof(u32),
336 "should have encoded up to the geometry checksum");
337 if (result != VDO_SUCCESS)
338 return result;
339
340 checksum = vdo_crc32(buffer, offset);
341 encode_u32_le(buffer, &offset, checksum);
342
343 return VDO_SUCCESS;
344 }
345
346 /**
347 * vdo_parse_geometry_block() - Decode and validate an encoded geometry block.
348 * @block: The encoded geometry block.
349 * @geometry: The structure to receive the decoded fields.
350 */
vdo_parse_geometry_block(u8 * block,struct volume_geometry * geometry)351 int __must_check vdo_parse_geometry_block(u8 *block, struct volume_geometry *geometry)
352 {
353 u32 checksum, saved_checksum;
354 struct header header;
355 size_t offset = 0;
356 int result;
357
358 if (memcmp(block, VDO_GEOMETRY_MAGIC_NUMBER, VDO_GEOMETRY_MAGIC_NUMBER_SIZE) != 0)
359 return VDO_BAD_MAGIC;
360 offset += VDO_GEOMETRY_MAGIC_NUMBER_SIZE;
361
362 vdo_decode_header(block, &offset, &header);
363 if (header.version.major_version <= 4) {
364 result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_4_0, &header,
365 true, __func__);
366 } else {
367 result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_5_0, &header,
368 true, __func__);
369 }
370 if (result != VDO_SUCCESS)
371 return result;
372
373 decode_volume_geometry(block, &offset, geometry, header.version.major_version);
374
375 result = VDO_ASSERT(header.size == offset + sizeof(u32),
376 "should have decoded up to the geometry checksum");
377 if (result != VDO_SUCCESS)
378 return result;
379
380 /* Decode and verify the checksum. */
381 checksum = vdo_crc32(block, offset);
382 decode_u32_le(block, &offset, &saved_checksum);
383
384 return ((checksum == saved_checksum) ? VDO_SUCCESS : VDO_CHECKSUM_MISMATCH);
385 }
386
vdo_format_block_map_page(void * buffer,nonce_t nonce,physical_block_number_t pbn,bool initialized)387 struct block_map_page *vdo_format_block_map_page(void *buffer, nonce_t nonce,
388 physical_block_number_t pbn,
389 bool initialized)
390 {
391 struct block_map_page *page = buffer;
392
393 memset(buffer, 0, VDO_BLOCK_SIZE);
394 page->version = vdo_pack_version_number(BLOCK_MAP_4_1);
395 page->header.nonce = __cpu_to_le64(nonce);
396 page->header.pbn = __cpu_to_le64(pbn);
397 page->header.initialized = initialized;
398 return page;
399 }
400
vdo_validate_block_map_page(struct block_map_page * page,nonce_t nonce,physical_block_number_t pbn)401 enum block_map_page_validity vdo_validate_block_map_page(struct block_map_page *page,
402 nonce_t nonce,
403 physical_block_number_t pbn)
404 {
405 BUILD_BUG_ON(sizeof(struct block_map_page_header) != PAGE_HEADER_4_1_SIZE);
406
407 if (!vdo_are_same_version(BLOCK_MAP_4_1,
408 vdo_unpack_version_number(page->version)) ||
409 !page->header.initialized || (nonce != __le64_to_cpu(page->header.nonce)))
410 return VDO_BLOCK_MAP_PAGE_INVALID;
411
412 if (pbn != vdo_get_block_map_page_pbn(page))
413 return VDO_BLOCK_MAP_PAGE_BAD;
414
415 return VDO_BLOCK_MAP_PAGE_VALID;
416 }
417
decode_block_map_state_2_0(u8 * buffer,size_t * offset,struct block_map_state_2_0 * state)418 static int decode_block_map_state_2_0(u8 *buffer, size_t *offset,
419 struct block_map_state_2_0 *state)
420 {
421 size_t initial_offset;
422 block_count_t flat_page_count, root_count;
423 physical_block_number_t flat_page_origin, root_origin;
424 struct header header;
425 int result;
426
427 vdo_decode_header(buffer, offset, &header);
428 result = vdo_validate_header(&VDO_BLOCK_MAP_HEADER_2_0, &header, true, __func__);
429 if (result != VDO_SUCCESS)
430 return result;
431
432 initial_offset = *offset;
433
434 decode_u64_le(buffer, offset, &flat_page_origin);
435 result = VDO_ASSERT(flat_page_origin == VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
436 "Flat page origin must be %u (recorded as %llu)",
437 VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
438 (unsigned long long) state->flat_page_origin);
439 if (result != VDO_SUCCESS)
440 return result;
441
442 decode_u64_le(buffer, offset, &flat_page_count);
443 result = VDO_ASSERT(flat_page_count == 0,
444 "Flat page count must be 0 (recorded as %llu)",
445 (unsigned long long) state->flat_page_count);
446 if (result != VDO_SUCCESS)
447 return result;
448
449 decode_u64_le(buffer, offset, &root_origin);
450 decode_u64_le(buffer, offset, &root_count);
451
452 result = VDO_ASSERT(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
453 "decoded block map component size must match header size");
454 if (result != VDO_SUCCESS)
455 return result;
456
457 *state = (struct block_map_state_2_0) {
458 .flat_page_origin = flat_page_origin,
459 .flat_page_count = flat_page_count,
460 .root_origin = root_origin,
461 .root_count = root_count,
462 };
463
464 return VDO_SUCCESS;
465 }
466
encode_block_map_state_2_0(u8 * buffer,size_t * offset,struct block_map_state_2_0 state)467 static void encode_block_map_state_2_0(u8 *buffer, size_t *offset,
468 struct block_map_state_2_0 state)
469 {
470 size_t initial_offset;
471
472 vdo_encode_header(buffer, offset, &VDO_BLOCK_MAP_HEADER_2_0);
473
474 initial_offset = *offset;
475 encode_u64_le(buffer, offset, state.flat_page_origin);
476 encode_u64_le(buffer, offset, state.flat_page_count);
477 encode_u64_le(buffer, offset, state.root_origin);
478 encode_u64_le(buffer, offset, state.root_count);
479
480 VDO_ASSERT_LOG_ONLY(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
481 "encoded block map component size must match header size");
482 }
483
484 /**
485 * vdo_compute_new_forest_pages() - Compute the number of pages which must be allocated at each
486 * level in order to grow the forest to a new number of entries.
487 * @root_count: The number of block map roots.
488 * @old_sizes: The sizes of the old tree segments.
489 * @entries: The new number of entries the block map must address.
490 * @new_sizes: The sizes of the new tree segments.
491 *
492 * Return: The total number of non-leaf pages required.
493 */
vdo_compute_new_forest_pages(root_count_t root_count,struct boundary * old_sizes,block_count_t entries,struct boundary * new_sizes)494 block_count_t vdo_compute_new_forest_pages(root_count_t root_count,
495 struct boundary *old_sizes,
496 block_count_t entries,
497 struct boundary *new_sizes)
498 {
499 page_count_t leaf_pages = max(vdo_compute_block_map_page_count(entries), 1U);
500 page_count_t level_size = DIV_ROUND_UP(leaf_pages, root_count);
501 block_count_t total_pages = 0;
502 height_t height;
503
504 for (height = 0; height < VDO_BLOCK_MAP_TREE_HEIGHT; height++) {
505 block_count_t new_pages;
506
507 level_size = DIV_ROUND_UP(level_size, VDO_BLOCK_MAP_ENTRIES_PER_PAGE);
508 new_sizes->levels[height] = level_size;
509 new_pages = level_size;
510 if (old_sizes != NULL)
511 new_pages -= old_sizes->levels[height];
512 total_pages += (new_pages * root_count);
513 }
514
515 return total_pages;
516 }
517
518 /**
519 * encode_recovery_journal_state_7_0() - Encode the state of a recovery journal.
520 * @buffer: A buffer to store the encoding.
521 * @offset: The offset in the buffer at which to encode.
522 * @state: The recovery journal state to encode.
523 *
524 * Return: VDO_SUCCESS or an error code.
525 */
encode_recovery_journal_state_7_0(u8 * buffer,size_t * offset,struct recovery_journal_state_7_0 state)526 static void encode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
527 struct recovery_journal_state_7_0 state)
528 {
529 size_t initial_offset;
530
531 vdo_encode_header(buffer, offset, &VDO_RECOVERY_JOURNAL_HEADER_7_0);
532
533 initial_offset = *offset;
534 encode_u64_le(buffer, offset, state.journal_start);
535 encode_u64_le(buffer, offset, state.logical_blocks_used);
536 encode_u64_le(buffer, offset, state.block_map_data_blocks);
537
538 VDO_ASSERT_LOG_ONLY(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
539 "encoded recovery journal component size must match header size");
540 }
541
542 /**
543 * decode_recovery_journal_state_7_0() - Decode the state of a recovery journal saved in a buffer.
544 * @buffer: The buffer containing the saved state.
545 * @offset: The offset to start decoding from.
546 * @state: A pointer to a recovery journal state to hold the result of a successful decode.
547 *
548 * Return: VDO_SUCCESS or an error code.
549 */
decode_recovery_journal_state_7_0(u8 * buffer,size_t * offset,struct recovery_journal_state_7_0 * state)550 static int __must_check decode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
551 struct recovery_journal_state_7_0 *state)
552 {
553 struct header header;
554 int result;
555 size_t initial_offset;
556 sequence_number_t journal_start;
557 block_count_t logical_blocks_used, block_map_data_blocks;
558
559 vdo_decode_header(buffer, offset, &header);
560 result = vdo_validate_header(&VDO_RECOVERY_JOURNAL_HEADER_7_0, &header, true,
561 __func__);
562 if (result != VDO_SUCCESS)
563 return result;
564
565 initial_offset = *offset;
566 decode_u64_le(buffer, offset, &journal_start);
567 decode_u64_le(buffer, offset, &logical_blocks_used);
568 decode_u64_le(buffer, offset, &block_map_data_blocks);
569
570 result = VDO_ASSERT(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
571 "decoded recovery journal component size must match header size");
572 if (result != VDO_SUCCESS)
573 return result;
574
575 *state = (struct recovery_journal_state_7_0) {
576 .journal_start = journal_start,
577 .logical_blocks_used = logical_blocks_used,
578 .block_map_data_blocks = block_map_data_blocks,
579 };
580
581 return VDO_SUCCESS;
582 }
583
584 /**
585 * vdo_get_journal_operation_name() - Get the name of a journal operation.
586 * @operation: The operation to name.
587 *
588 * Return: The name of the operation.
589 */
vdo_get_journal_operation_name(enum journal_operation operation)590 const char *vdo_get_journal_operation_name(enum journal_operation operation)
591 {
592 switch (operation) {
593 case VDO_JOURNAL_DATA_REMAPPING:
594 return "data remapping";
595
596 case VDO_JOURNAL_BLOCK_MAP_REMAPPING:
597 return "block map remapping";
598
599 default:
600 return "unknown journal operation";
601 }
602 }
603
604 /**
605 * encode_slab_depot_state_2_0() - Encode the state of a slab depot into a buffer.
606 * @buffer: A buffer to store the encoding.
607 * @offset: The offset in the buffer at which to encode.
608 * @state: The slab depot state to encode.
609 */
encode_slab_depot_state_2_0(u8 * buffer,size_t * offset,struct slab_depot_state_2_0 state)610 static void encode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
611 struct slab_depot_state_2_0 state)
612 {
613 size_t initial_offset;
614
615 vdo_encode_header(buffer, offset, &VDO_SLAB_DEPOT_HEADER_2_0);
616
617 initial_offset = *offset;
618 encode_u64_le(buffer, offset, state.slab_config.slab_blocks);
619 encode_u64_le(buffer, offset, state.slab_config.data_blocks);
620 encode_u64_le(buffer, offset, state.slab_config.reference_count_blocks);
621 encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocks);
622 encode_u64_le(buffer, offset, state.slab_config.slab_journal_flushing_threshold);
623 encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocking_threshold);
624 encode_u64_le(buffer, offset, state.slab_config.slab_journal_scrubbing_threshold);
625 encode_u64_le(buffer, offset, state.first_block);
626 encode_u64_le(buffer, offset, state.last_block);
627 buffer[(*offset)++] = state.zone_count;
628
629 VDO_ASSERT_LOG_ONLY(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
630 "encoded block map component size must match header size");
631 }
632
633 /**
634 * decode_slab_depot_state_2_0() - Decode slab depot component state version 2.0 from a buffer.
635 * @buffer: The buffer being decoded.
636 * @offset: The offset to start decoding from.
637 * @state: A pointer to a slab depot state to hold the decoded result.
638 *
639 * Return: VDO_SUCCESS or an error code.
640 */
decode_slab_depot_state_2_0(u8 * buffer,size_t * offset,struct slab_depot_state_2_0 * state)641 static int decode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
642 struct slab_depot_state_2_0 *state)
643 {
644 struct header header;
645 int result;
646 size_t initial_offset;
647 struct slab_config slab_config;
648 block_count_t count;
649 physical_block_number_t first_block, last_block;
650 zone_count_t zone_count;
651
652 vdo_decode_header(buffer, offset, &header);
653 result = vdo_validate_header(&VDO_SLAB_DEPOT_HEADER_2_0, &header, true,
654 __func__);
655 if (result != VDO_SUCCESS)
656 return result;
657
658 initial_offset = *offset;
659 decode_u64_le(buffer, offset, &count);
660 slab_config.slab_blocks = count;
661
662 decode_u64_le(buffer, offset, &count);
663 slab_config.data_blocks = count;
664
665 decode_u64_le(buffer, offset, &count);
666 slab_config.reference_count_blocks = count;
667
668 decode_u64_le(buffer, offset, &count);
669 slab_config.slab_journal_blocks = count;
670
671 decode_u64_le(buffer, offset, &count);
672 slab_config.slab_journal_flushing_threshold = count;
673
674 decode_u64_le(buffer, offset, &count);
675 slab_config.slab_journal_blocking_threshold = count;
676
677 decode_u64_le(buffer, offset, &count);
678 slab_config.slab_journal_scrubbing_threshold = count;
679
680 decode_u64_le(buffer, offset, &first_block);
681 decode_u64_le(buffer, offset, &last_block);
682 zone_count = buffer[(*offset)++];
683
684 result = VDO_ASSERT(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
685 "decoded slab depot component size must match header size");
686 if (result != VDO_SUCCESS)
687 return result;
688
689 *state = (struct slab_depot_state_2_0) {
690 .slab_config = slab_config,
691 .first_block = first_block,
692 .last_block = last_block,
693 .zone_count = zone_count,
694 };
695
696 return VDO_SUCCESS;
697 }
698
699 /**
700 * vdo_configure_slab_depot() - Configure the slab depot.
701 * @partition: The slab depot partition
702 * @slab_config: The configuration of a single slab.
703 * @zone_count: The number of zones the depot will use.
704 * @state: The state structure to be configured.
705 *
706 * Configures the slab_depot for the specified storage capacity, finding the number of data blocks
707 * that will fit and still leave room for the depot metadata, then return the saved state for that
708 * configuration.
709 *
710 * Return: VDO_SUCCESS or an error code.
711 */
vdo_configure_slab_depot(const struct partition * partition,struct slab_config slab_config,zone_count_t zone_count,struct slab_depot_state_2_0 * state)712 int vdo_configure_slab_depot(const struct partition *partition,
713 struct slab_config slab_config, zone_count_t zone_count,
714 struct slab_depot_state_2_0 *state)
715 {
716 block_count_t total_slab_blocks, total_data_blocks;
717 size_t slab_count;
718 physical_block_number_t last_block;
719 block_count_t slab_size = slab_config.slab_blocks;
720
721 vdo_log_debug("slabDepot %s(block_count=%llu, first_block=%llu, slab_size=%llu, zone_count=%u)",
722 __func__, (unsigned long long) partition->count,
723 (unsigned long long) partition->offset,
724 (unsigned long long) slab_size, zone_count);
725
726 /* We do not allow runt slabs, so we waste up to a slab's worth. */
727 slab_count = (partition->count / slab_size);
728 if (slab_count == 0)
729 return VDO_NO_SPACE;
730
731 if (slab_count > MAX_VDO_SLABS)
732 return VDO_TOO_MANY_SLABS;
733
734 total_slab_blocks = slab_count * slab_config.slab_blocks;
735 total_data_blocks = slab_count * slab_config.data_blocks;
736 last_block = partition->offset + total_slab_blocks;
737
738 *state = (struct slab_depot_state_2_0) {
739 .slab_config = slab_config,
740 .first_block = partition->offset,
741 .last_block = last_block,
742 .zone_count = zone_count,
743 };
744
745 vdo_log_debug("slab_depot last_block=%llu, total_data_blocks=%llu, slab_count=%zu, left_over=%llu",
746 (unsigned long long) last_block,
747 (unsigned long long) total_data_blocks, slab_count,
748 (unsigned long long) (partition->count - (last_block - partition->offset)));
749
750 return VDO_SUCCESS;
751 }
752
753 /**
754 * vdo_configure_slab() - Measure and initialize the configuration to use for each slab.
755 * @slab_size: The number of blocks per slab.
756 * @slab_journal_blocks: The number of blocks for the slab journal.
757 * @slab_config: The slab configuration to initialize.
758 *
759 * Return: VDO_SUCCESS or an error code.
760 */
vdo_configure_slab(block_count_t slab_size,block_count_t slab_journal_blocks,struct slab_config * slab_config)761 int vdo_configure_slab(block_count_t slab_size, block_count_t slab_journal_blocks,
762 struct slab_config *slab_config)
763 {
764 block_count_t ref_blocks, meta_blocks, data_blocks;
765 block_count_t flushing_threshold, remaining, blocking_threshold;
766 block_count_t minimal_extra_space, scrubbing_threshold;
767
768 if (slab_journal_blocks >= slab_size)
769 return VDO_BAD_CONFIGURATION;
770
771 /*
772 * This calculation should technically be a recurrence, but the total number of metadata
773 * blocks is currently less than a single block of ref_counts, so we'd gain at most one
774 * data block in each slab with more iteration.
775 */
776 ref_blocks = vdo_get_saved_reference_count_size(slab_size - slab_journal_blocks);
777 meta_blocks = (ref_blocks + slab_journal_blocks);
778
779 /* Make sure configured slabs are not too small. */
780 if (meta_blocks >= slab_size)
781 return VDO_BAD_CONFIGURATION;
782
783 data_blocks = slab_size - meta_blocks;
784
785 /*
786 * Configure the slab journal thresholds. The flush threshold is 168 of 224 blocks in
787 * production, or 3/4ths, so we use this ratio for all sizes.
788 */
789 flushing_threshold = ((slab_journal_blocks * 3) + 3) / 4;
790 /*
791 * The blocking threshold should be far enough from the flushing threshold to not produce
792 * delays, but far enough from the end of the journal to allow multiple successive recovery
793 * failures.
794 */
795 remaining = slab_journal_blocks - flushing_threshold;
796 blocking_threshold = flushing_threshold + ((remaining * 5) / 7);
797 /* The scrubbing threshold should be at least 2048 entries before the end of the journal. */
798 minimal_extra_space = 1 + (MAXIMUM_VDO_USER_VIOS / VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK);
799 scrubbing_threshold = blocking_threshold;
800 if (slab_journal_blocks > minimal_extra_space)
801 scrubbing_threshold = slab_journal_blocks - minimal_extra_space;
802 if (blocking_threshold > scrubbing_threshold)
803 blocking_threshold = scrubbing_threshold;
804
805 *slab_config = (struct slab_config) {
806 .slab_blocks = slab_size,
807 .data_blocks = data_blocks,
808 .reference_count_blocks = ref_blocks,
809 .slab_journal_blocks = slab_journal_blocks,
810 .slab_journal_flushing_threshold = flushing_threshold,
811 .slab_journal_blocking_threshold = blocking_threshold,
812 .slab_journal_scrubbing_threshold = scrubbing_threshold};
813 return VDO_SUCCESS;
814 }
815
816 /**
817 * vdo_decode_slab_journal_entry() - Decode a slab journal entry.
818 * @block: The journal block holding the entry.
819 * @entry_count: The number of the entry.
820 *
821 * Return: The decoded entry.
822 */
vdo_decode_slab_journal_entry(struct packed_slab_journal_block * block,journal_entry_count_t entry_count)823 struct slab_journal_entry vdo_decode_slab_journal_entry(struct packed_slab_journal_block *block,
824 journal_entry_count_t entry_count)
825 {
826 struct slab_journal_entry entry =
827 vdo_unpack_slab_journal_entry(&block->payload.entries[entry_count]);
828
829 if (block->header.has_block_map_increments &&
830 ((block->payload.full_entries.entry_types[entry_count / 8] &
831 ((u8) 1 << (entry_count % 8))) != 0))
832 entry.operation = VDO_JOURNAL_BLOCK_MAP_REMAPPING;
833
834 return entry;
835 }
836
837 /**
838 * allocate_partition() - Allocate a partition and add it to a layout.
839 * @layout: The layout containing the partition.
840 * @id: The id of the partition.
841 * @offset: The offset into the layout at which the partition begins.
842 * @size: The size of the partition in blocks.
843 *
844 * Return: VDO_SUCCESS or an error.
845 */
allocate_partition(struct layout * layout,u8 id,physical_block_number_t offset,block_count_t size)846 static int allocate_partition(struct layout *layout, u8 id,
847 physical_block_number_t offset, block_count_t size)
848 {
849 struct partition *partition;
850 int result;
851
852 result = vdo_allocate(1, __func__, &partition);
853 if (result != VDO_SUCCESS)
854 return result;
855
856 partition->id = id;
857 partition->offset = offset;
858 partition->count = size;
859 partition->next = layout->head;
860 layout->head = partition;
861
862 return VDO_SUCCESS;
863 }
864
865 /**
866 * make_partition() - Create a new partition from the beginning or end of the unused space in a
867 * layout.
868 * @layout: The layout.
869 * @id: The id of the partition to make.
870 * @size: The number of blocks to carve out; if 0, all remaining space will be used.
871 * @beginning: True if the partition should start at the beginning of the unused space.
872 *
873 * Return: A success or error code, particularly VDO_NO_SPACE if there are fewer than size blocks
874 * remaining.
875 */
make_partition(struct layout * layout,enum partition_id id,block_count_t size,bool beginning)876 static int __must_check make_partition(struct layout *layout, enum partition_id id,
877 block_count_t size, bool beginning)
878 {
879 int result;
880 physical_block_number_t offset;
881 block_count_t free_blocks = layout->last_free - layout->first_free;
882
883 if (size == 0) {
884 if (free_blocks == 0)
885 return VDO_NO_SPACE;
886 size = free_blocks;
887 } else if (size > free_blocks) {
888 return VDO_NO_SPACE;
889 }
890
891 result = vdo_get_partition(layout, id, NULL);
892 if (result != VDO_UNKNOWN_PARTITION)
893 return VDO_PARTITION_EXISTS;
894
895 offset = beginning ? layout->first_free : (layout->last_free - size);
896
897 result = allocate_partition(layout, id, offset, size);
898 if (result != VDO_SUCCESS)
899 return result;
900
901 layout->num_partitions++;
902 if (beginning)
903 layout->first_free += size;
904 else
905 layout->last_free = layout->last_free - size;
906
907 return VDO_SUCCESS;
908 }
909
910 /**
911 * vdo_initialize_layout() - Lay out the partitions of a vdo.
912 * @size: The entire size of the vdo.
913 * @offset: The start of the layout on the underlying storage in blocks.
914 * @block_map_blocks: The size of the block map partition.
915 * @journal_blocks: The size of the journal partition.
916 * @summary_blocks: The size of the slab summary partition.
917 * @layout: The layout to initialize.
918 *
919 * Return: VDO_SUCCESS or an error.
920 */
vdo_initialize_layout(block_count_t size,physical_block_number_t offset,block_count_t block_map_blocks,block_count_t journal_blocks,block_count_t summary_blocks,struct layout * layout)921 int vdo_initialize_layout(block_count_t size, physical_block_number_t offset,
922 block_count_t block_map_blocks, block_count_t journal_blocks,
923 block_count_t summary_blocks, struct layout *layout)
924 {
925 int result;
926 block_count_t necessary_size =
927 (offset + block_map_blocks + journal_blocks + summary_blocks);
928
929 if (necessary_size > size)
930 return vdo_log_error_strerror(VDO_NO_SPACE,
931 "Not enough space to make a VDO");
932
933 *layout = (struct layout) {
934 .start = offset,
935 .size = size,
936 .first_free = offset,
937 .last_free = size,
938 .num_partitions = 0,
939 .head = NULL,
940 };
941
942 result = make_partition(layout, VDO_BLOCK_MAP_PARTITION, block_map_blocks, true);
943 if (result != VDO_SUCCESS) {
944 vdo_uninitialize_layout(layout);
945 return result;
946 }
947
948 result = make_partition(layout, VDO_SLAB_SUMMARY_PARTITION, summary_blocks,
949 false);
950 if (result != VDO_SUCCESS) {
951 vdo_uninitialize_layout(layout);
952 return result;
953 }
954
955 result = make_partition(layout, VDO_RECOVERY_JOURNAL_PARTITION, journal_blocks,
956 false);
957 if (result != VDO_SUCCESS) {
958 vdo_uninitialize_layout(layout);
959 return result;
960 }
961
962 result = make_partition(layout, VDO_SLAB_DEPOT_PARTITION, 0, true);
963 if (result != VDO_SUCCESS)
964 vdo_uninitialize_layout(layout);
965
966 return result;
967 }
968
969 /**
970 * vdo_uninitialize_layout() - Clean up a layout.
971 * @layout: The layout to clean up.
972 *
973 * All partitions created by this layout become invalid pointers.
974 */
vdo_uninitialize_layout(struct layout * layout)975 void vdo_uninitialize_layout(struct layout *layout)
976 {
977 while (layout->head != NULL) {
978 struct partition *part = layout->head;
979
980 layout->head = part->next;
981 vdo_free(part);
982 }
983
984 memset(layout, 0, sizeof(struct layout));
985 }
986
987 /**
988 * vdo_get_partition() - Get a partition by id.
989 * @layout: The layout from which to get a partition.
990 * @id: The id of the partition.
991 * @partition_ptr: A pointer to hold the partition.
992 *
993 * Return: VDO_SUCCESS or an error.
994 */
vdo_get_partition(struct layout * layout,enum partition_id id,struct partition ** partition_ptr)995 int vdo_get_partition(struct layout *layout, enum partition_id id,
996 struct partition **partition_ptr)
997 {
998 struct partition *partition;
999
1000 for (partition = layout->head; partition != NULL; partition = partition->next) {
1001 if (partition->id == id) {
1002 if (partition_ptr != NULL)
1003 *partition_ptr = partition;
1004 return VDO_SUCCESS;
1005 }
1006 }
1007
1008 return VDO_UNKNOWN_PARTITION;
1009 }
1010
1011 /**
1012 * vdo_get_known_partition() - Get a partition by id from a validated layout.
1013 * @layout: The layout from which to get a partition.
1014 * @id: The id of the partition.
1015 *
1016 * Return: the partition
1017 */
vdo_get_known_partition(struct layout * layout,enum partition_id id)1018 struct partition *vdo_get_known_partition(struct layout *layout, enum partition_id id)
1019 {
1020 struct partition *partition;
1021 int result = vdo_get_partition(layout, id, &partition);
1022
1023 VDO_ASSERT_LOG_ONLY(result == VDO_SUCCESS, "layout has expected partition: %u", id);
1024
1025 return partition;
1026 }
1027
encode_layout(u8 * buffer,size_t * offset,const struct layout * layout)1028 static void encode_layout(u8 *buffer, size_t *offset, const struct layout *layout)
1029 {
1030 const struct partition *partition;
1031 size_t initial_offset;
1032 struct header header = VDO_LAYOUT_HEADER_3_0;
1033
1034 BUILD_BUG_ON(sizeof(enum partition_id) != sizeof(u8));
1035 VDO_ASSERT_LOG_ONLY(layout->num_partitions <= U8_MAX,
1036 "layout partition count must fit in a byte");
1037
1038 vdo_encode_header(buffer, offset, &header);
1039
1040 initial_offset = *offset;
1041 encode_u64_le(buffer, offset, layout->first_free);
1042 encode_u64_le(buffer, offset, layout->last_free);
1043 buffer[(*offset)++] = layout->num_partitions;
1044
1045 VDO_ASSERT_LOG_ONLY(sizeof(struct layout_3_0) == *offset - initial_offset,
1046 "encoded size of a layout header must match structure");
1047
1048 for (partition = layout->head; partition != NULL; partition = partition->next) {
1049 buffer[(*offset)++] = partition->id;
1050 encode_u64_le(buffer, offset, partition->offset);
1051 /* This field only exists for backwards compatibility */
1052 encode_u64_le(buffer, offset, 0);
1053 encode_u64_le(buffer, offset, partition->count);
1054 }
1055
1056 VDO_ASSERT_LOG_ONLY(header.size == *offset - initial_offset,
1057 "encoded size of a layout must match header size");
1058 }
1059
decode_layout(u8 * buffer,size_t * offset,physical_block_number_t start,block_count_t size,struct layout * layout)1060 static int decode_layout(u8 *buffer, size_t *offset, physical_block_number_t start,
1061 block_count_t size, struct layout *layout)
1062 {
1063 struct header header;
1064 struct layout_3_0 layout_header;
1065 struct partition *partition;
1066 size_t initial_offset;
1067 physical_block_number_t first_free, last_free;
1068 u8 partition_count;
1069 u8 i;
1070 int result;
1071
1072 vdo_decode_header(buffer, offset, &header);
1073 /* Layout is variable size, so only do a minimum size check here. */
1074 result = vdo_validate_header(&VDO_LAYOUT_HEADER_3_0, &header, false, __func__);
1075 if (result != VDO_SUCCESS)
1076 return result;
1077
1078 initial_offset = *offset;
1079 decode_u64_le(buffer, offset, &first_free);
1080 decode_u64_le(buffer, offset, &last_free);
1081 partition_count = buffer[(*offset)++];
1082 layout_header = (struct layout_3_0) {
1083 .first_free = first_free,
1084 .last_free = last_free,
1085 .partition_count = partition_count,
1086 };
1087
1088 result = VDO_ASSERT(sizeof(struct layout_3_0) == *offset - initial_offset,
1089 "decoded size of a layout header must match structure");
1090 if (result != VDO_SUCCESS)
1091 return result;
1092
1093 layout->start = start;
1094 layout->size = size;
1095 layout->first_free = layout_header.first_free;
1096 layout->last_free = layout_header.last_free;
1097 layout->num_partitions = layout_header.partition_count;
1098
1099 if (layout->num_partitions > VDO_PARTITION_COUNT) {
1100 return vdo_log_error_strerror(VDO_UNKNOWN_PARTITION,
1101 "layout has extra partitions");
1102 }
1103
1104 for (i = 0; i < layout->num_partitions; i++) {
1105 u8 id;
1106 u64 partition_offset, count;
1107
1108 id = buffer[(*offset)++];
1109 decode_u64_le(buffer, offset, &partition_offset);
1110 *offset += sizeof(u64);
1111 decode_u64_le(buffer, offset, &count);
1112
1113 result = allocate_partition(layout, id, partition_offset, count);
1114 if (result != VDO_SUCCESS) {
1115 vdo_uninitialize_layout(layout);
1116 return result;
1117 }
1118 }
1119
1120 /* Validate that the layout has all (and only) the required partitions */
1121 for (i = 0; i < VDO_PARTITION_COUNT; i++) {
1122 result = vdo_get_partition(layout, REQUIRED_PARTITIONS[i], &partition);
1123 if (result != VDO_SUCCESS) {
1124 vdo_uninitialize_layout(layout);
1125 return vdo_log_error_strerror(result,
1126 "layout is missing required partition %u",
1127 REQUIRED_PARTITIONS[i]);
1128 }
1129
1130 start += partition->count;
1131 }
1132
1133 if (start != size) {
1134 vdo_uninitialize_layout(layout);
1135 return vdo_log_error_strerror(UDS_BAD_STATE,
1136 "partitions do not cover the layout");
1137 }
1138
1139 return VDO_SUCCESS;
1140 }
1141
1142 /**
1143 * pack_vdo_config() - Convert a vdo_config to its packed on-disk representation.
1144 * @config: The vdo config to convert.
1145 *
1146 * Return: The platform-independent representation of the config.
1147 */
pack_vdo_config(struct vdo_config config)1148 static struct packed_vdo_config pack_vdo_config(struct vdo_config config)
1149 {
1150 return (struct packed_vdo_config) {
1151 .logical_blocks = __cpu_to_le64(config.logical_blocks),
1152 .physical_blocks = __cpu_to_le64(config.physical_blocks),
1153 .slab_size = __cpu_to_le64(config.slab_size),
1154 .recovery_journal_size = __cpu_to_le64(config.recovery_journal_size),
1155 .slab_journal_blocks = __cpu_to_le64(config.slab_journal_blocks),
1156 };
1157 }
1158
1159 /**
1160 * pack_vdo_component() - Convert a vdo_component to its packed on-disk representation.
1161 * @component: The VDO component data to convert.
1162 *
1163 * Return: The platform-independent representation of the component.
1164 */
pack_vdo_component(const struct vdo_component component)1165 static struct packed_vdo_component_41_0 pack_vdo_component(const struct vdo_component component)
1166 {
1167 return (struct packed_vdo_component_41_0) {
1168 .state = __cpu_to_le32(component.state),
1169 .complete_recoveries = __cpu_to_le64(component.complete_recoveries),
1170 .read_only_recoveries = __cpu_to_le64(component.read_only_recoveries),
1171 .config = pack_vdo_config(component.config),
1172 .nonce = __cpu_to_le64(component.nonce),
1173 };
1174 }
1175
encode_vdo_component(u8 * buffer,size_t * offset,struct vdo_component component)1176 static void encode_vdo_component(u8 *buffer, size_t *offset,
1177 struct vdo_component component)
1178 {
1179 struct packed_vdo_component_41_0 packed;
1180
1181 encode_version_number(buffer, offset, VDO_COMPONENT_DATA_41_0);
1182 packed = pack_vdo_component(component);
1183 memcpy(buffer + *offset, &packed, sizeof(packed));
1184 *offset += sizeof(packed);
1185 }
1186
1187 /**
1188 * unpack_vdo_config() - Convert a packed_vdo_config to its native in-memory representation.
1189 * @config: The packed vdo config to convert.
1190 *
1191 * Return: The native in-memory representation of the vdo config.
1192 */
unpack_vdo_config(struct packed_vdo_config config)1193 static struct vdo_config unpack_vdo_config(struct packed_vdo_config config)
1194 {
1195 return (struct vdo_config) {
1196 .logical_blocks = __le64_to_cpu(config.logical_blocks),
1197 .physical_blocks = __le64_to_cpu(config.physical_blocks),
1198 .slab_size = __le64_to_cpu(config.slab_size),
1199 .recovery_journal_size = __le64_to_cpu(config.recovery_journal_size),
1200 .slab_journal_blocks = __le64_to_cpu(config.slab_journal_blocks),
1201 };
1202 }
1203
1204 /**
1205 * unpack_vdo_component_41_0() - Convert a packed_vdo_component_41_0 to its native in-memory
1206 * representation.
1207 * @component: The packed vdo component data to convert.
1208 *
1209 * Return: The native in-memory representation of the component.
1210 */
unpack_vdo_component_41_0(struct packed_vdo_component_41_0 component)1211 static struct vdo_component unpack_vdo_component_41_0(struct packed_vdo_component_41_0 component)
1212 {
1213 return (struct vdo_component) {
1214 .state = __le32_to_cpu(component.state),
1215 .complete_recoveries = __le64_to_cpu(component.complete_recoveries),
1216 .read_only_recoveries = __le64_to_cpu(component.read_only_recoveries),
1217 .config = unpack_vdo_config(component.config),
1218 .nonce = __le64_to_cpu(component.nonce),
1219 };
1220 }
1221
1222 /**
1223 * decode_vdo_component() - Decode the component data for the vdo itself out of the super block.
1224 * @buffer: The buffer being decoded.
1225 * @offset: The offset to start decoding from.
1226 * @component: The vdo component structure to decode into.
1227 *
1228 * Return: VDO_SUCCESS or an error.
1229 */
decode_vdo_component(u8 * buffer,size_t * offset,struct vdo_component * component)1230 static int decode_vdo_component(u8 *buffer, size_t *offset, struct vdo_component *component)
1231 {
1232 struct version_number version;
1233 struct packed_vdo_component_41_0 packed;
1234 int result;
1235
1236 decode_version_number(buffer, offset, &version);
1237 result = validate_version(version, VDO_COMPONENT_DATA_41_0,
1238 "VDO component data");
1239 if (result != VDO_SUCCESS)
1240 return result;
1241
1242 memcpy(&packed, buffer + *offset, sizeof(packed));
1243 *offset += sizeof(packed);
1244 *component = unpack_vdo_component_41_0(packed);
1245 return VDO_SUCCESS;
1246 }
1247
1248 /**
1249 * vdo_validate_config() - Validate constraints on a VDO config.
1250 * @config: The VDO config.
1251 * @physical_block_count: The minimum block count of the underlying storage.
1252 * @logical_block_count: The expected logical size of the VDO, or 0 if the logical size may be
1253 * unspecified.
1254 *
1255 * Return: A success or error code.
1256 */
vdo_validate_config(const struct vdo_config * config,block_count_t physical_block_count,block_count_t logical_block_count)1257 int vdo_validate_config(const struct vdo_config *config,
1258 block_count_t physical_block_count,
1259 block_count_t logical_block_count)
1260 {
1261 struct slab_config slab_config;
1262 int result;
1263
1264 result = VDO_ASSERT(config->slab_size > 0, "slab size unspecified");
1265 if (result != VDO_SUCCESS)
1266 return result;
1267
1268 result = VDO_ASSERT(is_power_of_2(config->slab_size),
1269 "slab size must be a power of two");
1270 if (result != VDO_SUCCESS)
1271 return result;
1272
1273 result = VDO_ASSERT(config->slab_size <= MAX_VDO_SLAB_BLOCKS,
1274 "slab size must be a power of two less than or equal to %d",
1275 MAX_VDO_SLAB_BLOCKS);
1276 if (result != VDO_SUCCESS)
1277 return result;
1278
1279 result = VDO_ASSERT(config->slab_journal_blocks <= config->slab_size,
1280 "slab journal size is within expected bound");
1281 if (result != VDO_SUCCESS)
1282 return result;
1283
1284 result = vdo_configure_slab(config->slab_size, config->slab_journal_blocks,
1285 &slab_config);
1286 if (result != VDO_SUCCESS)
1287 return result;
1288
1289 result = VDO_ASSERT((slab_config.data_blocks >= 1),
1290 "slab must be able to hold at least one block");
1291 if (result != VDO_SUCCESS)
1292 return result;
1293
1294 result = VDO_ASSERT(config->physical_blocks > 0, "physical blocks unspecified");
1295 if (result != VDO_SUCCESS)
1296 return result;
1297
1298 result = VDO_ASSERT(config->physical_blocks <= MAXIMUM_VDO_PHYSICAL_BLOCKS,
1299 "physical block count %llu exceeds maximum %llu",
1300 (unsigned long long) config->physical_blocks,
1301 (unsigned long long) MAXIMUM_VDO_PHYSICAL_BLOCKS);
1302 if (result != VDO_SUCCESS)
1303 return VDO_OUT_OF_RANGE;
1304
1305 if (physical_block_count != config->physical_blocks) {
1306 vdo_log_error("A physical size of %llu blocks was specified, not the %llu blocks configured in the vdo super block",
1307 (unsigned long long) physical_block_count,
1308 (unsigned long long) config->physical_blocks);
1309 return VDO_PARAMETER_MISMATCH;
1310 }
1311
1312 if (logical_block_count > 0) {
1313 result = VDO_ASSERT((config->logical_blocks > 0),
1314 "logical blocks unspecified");
1315 if (result != VDO_SUCCESS)
1316 return result;
1317
1318 if (logical_block_count != config->logical_blocks) {
1319 vdo_log_error("A logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block",
1320 (unsigned long long) logical_block_count,
1321 (unsigned long long) config->logical_blocks);
1322 return VDO_PARAMETER_MISMATCH;
1323 }
1324 }
1325
1326 result = VDO_ASSERT(config->logical_blocks <= MAXIMUM_VDO_LOGICAL_BLOCKS,
1327 "logical blocks too large");
1328 if (result != VDO_SUCCESS)
1329 return result;
1330
1331 result = VDO_ASSERT(config->recovery_journal_size > 0,
1332 "recovery journal size unspecified");
1333 if (result != VDO_SUCCESS)
1334 return result;
1335
1336 result = VDO_ASSERT(is_power_of_2(config->recovery_journal_size),
1337 "recovery journal size must be a power of two");
1338 if (result != VDO_SUCCESS)
1339 return result;
1340
1341 return result;
1342 }
1343
1344 /**
1345 * vdo_destroy_component_states() - Clean up any allocations in a vdo_component_states.
1346 * @states: The component states to destroy.
1347 */
vdo_destroy_component_states(struct vdo_component_states * states)1348 void vdo_destroy_component_states(struct vdo_component_states *states)
1349 {
1350 if (states == NULL)
1351 return;
1352
1353 vdo_uninitialize_layout(&states->layout);
1354 }
1355
1356 /**
1357 * decode_components() - Decode the components now that we know the component data is a version we
1358 * understand.
1359 * @buffer: The buffer being decoded.
1360 * @offset: The offset to start decoding from.
1361 * @geometry: The vdo geometry.
1362 * @states: An object to hold the successfully decoded state.
1363 *
1364 * Return: VDO_SUCCESS or an error.
1365 */
decode_components(u8 * buffer,size_t * offset,struct volume_geometry * geometry,struct vdo_component_states * states)1366 static int __must_check decode_components(u8 *buffer, size_t *offset,
1367 struct volume_geometry *geometry,
1368 struct vdo_component_states *states)
1369 {
1370 int result;
1371
1372 decode_vdo_component(buffer, offset, &states->vdo);
1373
1374 result = decode_layout(buffer, offset, vdo_get_data_region_start(*geometry) + 1,
1375 states->vdo.config.physical_blocks, &states->layout);
1376 if (result != VDO_SUCCESS)
1377 return result;
1378
1379 result = decode_recovery_journal_state_7_0(buffer, offset,
1380 &states->recovery_journal);
1381 if (result != VDO_SUCCESS)
1382 return result;
1383
1384 result = decode_slab_depot_state_2_0(buffer, offset, &states->slab_depot);
1385 if (result != VDO_SUCCESS)
1386 return result;
1387
1388 result = decode_block_map_state_2_0(buffer, offset, &states->block_map);
1389 if (result != VDO_SUCCESS)
1390 return result;
1391
1392 VDO_ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
1393 "All decoded component data was used");
1394 return VDO_SUCCESS;
1395 }
1396
1397 /**
1398 * vdo_decode_component_states() - Decode the payload of a super block.
1399 * @buffer: The buffer containing the encoded super block contents.
1400 * @geometry: The vdo geometry.
1401 * @states: A pointer to hold the decoded states.
1402 *
1403 * Return: VDO_SUCCESS or an error.
1404 */
vdo_decode_component_states(u8 * buffer,struct volume_geometry * geometry,struct vdo_component_states * states)1405 int vdo_decode_component_states(u8 *buffer, struct volume_geometry *geometry,
1406 struct vdo_component_states *states)
1407 {
1408 int result;
1409 size_t offset = VDO_COMPONENT_DATA_OFFSET;
1410
1411 /* This is for backwards compatibility. */
1412 decode_u32_le(buffer, &offset, &states->unused);
1413
1414 /* Check the VDO volume version */
1415 decode_version_number(buffer, &offset, &states->volume_version);
1416 result = validate_version(VDO_VOLUME_VERSION_67_0, states->volume_version,
1417 "volume");
1418 if (result != VDO_SUCCESS)
1419 return result;
1420
1421 result = decode_components(buffer, &offset, geometry, states);
1422 if (result != VDO_SUCCESS)
1423 vdo_uninitialize_layout(&states->layout);
1424
1425 return result;
1426 }
1427
1428 /**
1429 * vdo_validate_component_states() - Validate the decoded super block configuration.
1430 * @states: The state decoded from the super block.
1431 * @geometry_nonce: The nonce from the geometry block.
1432 * @physical_size: The minimum block count of the underlying storage.
1433 * @logical_size: The expected logical size of the VDO, or 0 if the logical size may be
1434 * unspecified.
1435 *
1436 * Return: VDO_SUCCESS or an error if the configuration is invalid.
1437 */
vdo_validate_component_states(struct vdo_component_states * states,nonce_t geometry_nonce,block_count_t physical_size,block_count_t logical_size)1438 int vdo_validate_component_states(struct vdo_component_states *states,
1439 nonce_t geometry_nonce, block_count_t physical_size,
1440 block_count_t logical_size)
1441 {
1442 if (geometry_nonce != states->vdo.nonce) {
1443 return vdo_log_error_strerror(VDO_BAD_NONCE,
1444 "Geometry nonce %llu does not match superblock nonce %llu",
1445 (unsigned long long) geometry_nonce,
1446 (unsigned long long) states->vdo.nonce);
1447 }
1448
1449 return vdo_validate_config(&states->vdo.config, physical_size, logical_size);
1450 }
1451
1452 /**
1453 * vdo_encode_component_states() - Encode the state of all vdo components in the super block.
1454 * @buffer: A buffer to store the encoding.
1455 * @offset: The offset into the buffer to start the encoding.
1456 * @states: The component states to encode.
1457 */
vdo_encode_component_states(u8 * buffer,size_t * offset,const struct vdo_component_states * states)1458 static void vdo_encode_component_states(u8 *buffer, size_t *offset,
1459 const struct vdo_component_states *states)
1460 {
1461 /* This is for backwards compatibility. */
1462 encode_u32_le(buffer, offset, states->unused);
1463 encode_version_number(buffer, offset, states->volume_version);
1464 encode_vdo_component(buffer, offset, states->vdo);
1465 encode_layout(buffer, offset, &states->layout);
1466 encode_recovery_journal_state_7_0(buffer, offset, states->recovery_journal);
1467 encode_slab_depot_state_2_0(buffer, offset, states->slab_depot);
1468 encode_block_map_state_2_0(buffer, offset, states->block_map);
1469
1470 VDO_ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
1471 "All super block component data was encoded");
1472 }
1473
1474 /**
1475 * vdo_encode_super_block() - Encode a super block into its on-disk representation.
1476 * @buffer: A buffer to store the encoding.
1477 * @states: The component states to encode.
1478 */
vdo_encode_super_block(u8 * buffer,struct vdo_component_states * states)1479 void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states)
1480 {
1481 u32 checksum;
1482 struct header header = SUPER_BLOCK_HEADER_12_0;
1483 size_t offset = 0;
1484
1485 header.size += VDO_COMPONENT_DATA_SIZE;
1486 vdo_encode_header(buffer, &offset, &header);
1487 vdo_encode_component_states(buffer, &offset, states);
1488
1489 checksum = vdo_crc32(buffer, offset);
1490 encode_u32_le(buffer, &offset, checksum);
1491
1492 /*
1493 * Even though the buffer is a full block, to avoid the potential corruption from a torn
1494 * write, the entire encoding must fit in the first sector.
1495 */
1496 VDO_ASSERT_LOG_ONLY(offset <= VDO_SECTOR_SIZE,
1497 "entire superblock must fit in one sector");
1498 }
1499
1500 /**
1501 * vdo_decode_super_block() - Decode a super block from its on-disk representation.
1502 * @buffer: The buffer to decode from.
1503 */
vdo_decode_super_block(u8 * buffer)1504 int vdo_decode_super_block(u8 *buffer)
1505 {
1506 struct header header;
1507 int result;
1508 u32 checksum, saved_checksum;
1509 size_t offset = 0;
1510
1511 /* Decode and validate the header. */
1512 vdo_decode_header(buffer, &offset, &header);
1513 result = vdo_validate_header(&SUPER_BLOCK_HEADER_12_0, &header, false, __func__);
1514 if (result != VDO_SUCCESS)
1515 return result;
1516
1517 if (header.size > VDO_COMPONENT_DATA_SIZE + sizeof(u32)) {
1518 /*
1519 * We can't check release version or checksum until we know the content size, so we
1520 * have to assume a version mismatch on unexpected values.
1521 */
1522 return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
1523 "super block contents too large: %zu",
1524 header.size);
1525 }
1526
1527 /* Skip past the component data for now, to verify the checksum. */
1528 offset += VDO_COMPONENT_DATA_SIZE;
1529
1530 checksum = vdo_crc32(buffer, offset);
1531 decode_u32_le(buffer, &offset, &saved_checksum);
1532
1533 result = VDO_ASSERT(offset == VDO_SUPER_BLOCK_FIXED_SIZE + VDO_COMPONENT_DATA_SIZE,
1534 "must have decoded entire superblock payload");
1535 if (result != VDO_SUCCESS)
1536 return result;
1537
1538 return ((checksum != saved_checksum) ? VDO_CHECKSUM_MISMATCH : VDO_SUCCESS);
1539 }
1540
1541 /**
1542 * vdo_initialize_component_states() - Initialize the components so they can be written out.
1543 * @vdo_config: The config used for component state initialization.
1544 * @geometry: The volume geometry used to calculate the data region offset.
1545 * @nonce: The nonce to use to identify the vdo.
1546 * @states: The component states to initialize.
1547 *
1548 * Return: VDO_SUCCESS or an error code.
1549 */
vdo_initialize_component_states(const struct vdo_config * vdo_config,const struct volume_geometry * geometry,nonce_t nonce,struct vdo_component_states * states)1550 int vdo_initialize_component_states(const struct vdo_config *vdo_config,
1551 const struct volume_geometry *geometry,
1552 nonce_t nonce,
1553 struct vdo_component_states *states)
1554 {
1555 int result;
1556 struct slab_config slab_config;
1557 struct partition *partition;
1558
1559 states->vdo.config = *vdo_config;
1560 states->vdo.nonce = nonce;
1561 states->volume_version = VDO_VOLUME_VERSION_67_0;
1562
1563 states->recovery_journal = (struct recovery_journal_state_7_0) {
1564 .journal_start = RECOVERY_JOURNAL_STARTING_SEQUENCE_NUMBER,
1565 .logical_blocks_used = 0,
1566 .block_map_data_blocks = 0,
1567 };
1568
1569 /*
1570 * The layout starts 1 block past the beginning of the data region, as the
1571 * data region contains the super block but the layout does not.
1572 */
1573 result = vdo_initialize_layout(vdo_config->physical_blocks,
1574 vdo_get_data_region_start(*geometry) + 1,
1575 DEFAULT_VDO_BLOCK_MAP_TREE_ROOT_COUNT,
1576 vdo_config->recovery_journal_size,
1577 VDO_SLAB_SUMMARY_BLOCKS,
1578 &states->layout);
1579 if (result != VDO_SUCCESS)
1580 return result;
1581
1582 result = vdo_configure_slab(vdo_config->slab_size,
1583 vdo_config->slab_journal_blocks,
1584 &slab_config);
1585 if (result != VDO_SUCCESS) {
1586 vdo_uninitialize_layout(&states->layout);
1587 return result;
1588 }
1589
1590 result = vdo_get_partition(&states->layout, VDO_SLAB_DEPOT_PARTITION,
1591 &partition);
1592 if (result != VDO_SUCCESS) {
1593 vdo_uninitialize_layout(&states->layout);
1594 return result;
1595 }
1596
1597 result = vdo_configure_slab_depot(partition, slab_config, 0,
1598 &states->slab_depot);
1599 if (result != VDO_SUCCESS) {
1600 vdo_uninitialize_layout(&states->layout);
1601 return result;
1602 }
1603
1604 result = vdo_get_partition(&states->layout, VDO_BLOCK_MAP_PARTITION,
1605 &partition);
1606 if (result != VDO_SUCCESS) {
1607 vdo_uninitialize_layout(&states->layout);
1608 return result;
1609 }
1610
1611 states->block_map = (struct block_map_state_2_0) {
1612 .flat_page_origin = VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
1613 .flat_page_count = 0,
1614 .root_origin = partition->offset,
1615 .root_count = DEFAULT_VDO_BLOCK_MAP_TREE_ROOT_COUNT,
1616 };
1617
1618 states->vdo.state = VDO_NEW;
1619
1620 return VDO_SUCCESS;
1621 }
1622
1623 /**
1624 * vdo_compute_index_blocks() - Compute the number of blocks that the indexer will use.
1625 * @config: The index config from which the blocks are calculated.
1626 * @index_blocks_ptr: The number of blocks the index will use.
1627 *
1628 * Return: VDO_SUCCESS or an error code.
1629 */
vdo_compute_index_blocks(const struct index_config * config,block_count_t * index_blocks_ptr)1630 static int vdo_compute_index_blocks(const struct index_config *config,
1631 block_count_t *index_blocks_ptr)
1632 {
1633 int result;
1634 u64 index_bytes;
1635 struct uds_parameters uds_parameters = {
1636 .memory_size = config->mem,
1637 .sparse = config->sparse,
1638 };
1639
1640 result = uds_compute_index_size(&uds_parameters, &index_bytes);
1641 if (result != UDS_SUCCESS)
1642 return vdo_log_error_strerror(result, "error computing index size");
1643
1644 *index_blocks_ptr = index_bytes / VDO_BLOCK_SIZE;
1645 return VDO_SUCCESS;
1646 }
1647
1648 /**
1649 * vdo_initialize_volume_geometry() - Initialize the volume geometry so it can be written out.
1650 * @nonce: The nonce to use to identify the vdo.
1651 * @uuid: The uuid to use to identify the vdo.
1652 * @index_config: The config used for structure initialization.
1653 * @geometry: The volume geometry to initialize.
1654 *
1655 * Return: VDO_SUCCESS or an error code.
1656 */
vdo_initialize_volume_geometry(nonce_t nonce,uuid_t * uuid,const struct index_config * index_config,struct volume_geometry * geometry)1657 int vdo_initialize_volume_geometry(nonce_t nonce, uuid_t *uuid,
1658 const struct index_config *index_config,
1659 struct volume_geometry *geometry)
1660 {
1661 int result;
1662 block_count_t index_blocks = 0;
1663
1664 result = vdo_compute_index_blocks(index_config, &index_blocks);
1665 if (result != VDO_SUCCESS)
1666 return result;
1667
1668 *geometry = (struct volume_geometry) {
1669 /* This is for backwards compatibility. */
1670 .unused = 0,
1671 .nonce = nonce,
1672 .bio_offset = 0,
1673 .regions = {
1674 [VDO_INDEX_REGION] = {
1675 .id = VDO_INDEX_REGION,
1676 .start_block = 1,
1677 },
1678 [VDO_DATA_REGION] = {
1679 .id = VDO_DATA_REGION,
1680 .start_block = 1 + index_blocks,
1681 }
1682 }
1683 };
1684
1685 memcpy(&(geometry->uuid), uuid, sizeof(uuid_t));
1686 memcpy(&geometry->index_config, index_config, sizeof(struct index_config));
1687
1688 return VDO_SUCCESS;
1689 }
1690