1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2016, 2017 by Delphix. All rights reserved. 25 */ 26 27 #ifndef _SYS_UBERBLOCK_IMPL_H 28 #define _SYS_UBERBLOCK_IMPL_H 29 30 #include <sys/uberblock.h> 31 32 #ifdef __cplusplus 33 extern "C" { 34 #endif 35 36 /* 37 * The uberblock version is incremented whenever an incompatible on-disk 38 * format change is made to the SPA, DMU, or ZAP. 39 * 40 * Note: the first two fields should never be moved. When a storage pool 41 * is opened, the uberblock must be read off the disk before the version 42 * can be checked. If the ub_version field is moved, we may not detect 43 * version mismatch. If the ub_magic field is moved, applications that 44 * expect the magic number in the first word won't work. 45 */ 46 #define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */ 47 #define UBERBLOCK_SHIFT 10 /* up to 1K */ 48 #define MMP_MAGIC 0xa11cea11 /* all-see-all */ 49 50 #define MMP_INTERVAL_VALID_BIT 0x01 51 #define MMP_SEQ_VALID_BIT 0x02 52 #define MMP_FAIL_INT_VALID_BIT 0x04 53 54 #define MMP_INTERVAL_MASK 0x00000000FFFFFF00 55 #define MMP_SEQ_MASK 0x0000FFFF00000000 56 #define MMP_FAIL_INT_MASK 0xFFFF000000000000 57 58 #define MMP_SEQ_MAX UINT16_MAX 59 60 #define MMP_VALID(ubp) ((ubp)->ub_magic == UBERBLOCK_MAGIC && \ 61 (ubp)->ub_mmp_magic == MMP_MAGIC) 62 #define MMP_INTERVAL_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \ 63 MMP_INTERVAL_VALID_BIT)) 64 #define MMP_SEQ_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \ 65 MMP_SEQ_VALID_BIT)) 66 #define MMP_FAIL_INT_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \ 67 MMP_FAIL_INT_VALID_BIT)) 68 69 #define MMP_INTERVAL(ubp) (((ubp)->ub_mmp_config & MMP_INTERVAL_MASK) \ 70 >> 8) 71 #define MMP_SEQ(ubp) (((ubp)->ub_mmp_config & MMP_SEQ_MASK) \ 72 >> 32) 73 #define MMP_FAIL_INT(ubp) (((ubp)->ub_mmp_config & MMP_FAIL_INT_MASK) \ 74 >> 48) 75 76 #define MMP_INTERVAL_SET(write) \ 77 (((uint64_t)((write) & 0xFFFFFF) << 8) | MMP_INTERVAL_VALID_BIT) 78 79 #define MMP_SEQ_SET(seq) \ 80 (((uint64_t)((seq) & 0xFFFF) << 32) | MMP_SEQ_VALID_BIT) 81 82 #define MMP_FAIL_INT_SET(fail) \ 83 (((uint64_t)((fail) & 0xFFFF) << 48) | MMP_FAIL_INT_VALID_BIT) 84 85 86 #define MMP_SEQ_CLEAR(ubp) \ 87 ((ubp)->ub_mmp_config &= ~(MMP_SEQ_MASK | MMP_SEQ_VALID_BIT)) 88 89 /* 90 * RAIDZ expansion reflow information. 91 * 92 * 64 56 48 40 32 24 16 8 0 93 * +-------+-------+-------+-------+-------+-------+-------+-------+ 94 * |Scratch | Reflow | 95 * | State | Offset | 96 * +-------+-------+-------+-------+-------+-------+-------+-------+ 97 */ 98 typedef enum raidz_reflow_scratch_state { 99 RRSS_SCRATCH_NOT_IN_USE = 0, 100 RRSS_SCRATCH_VALID, 101 RRSS_SCRATCH_INVALID_SYNCED, 102 RRSS_SCRATCH_INVALID_SYNCED_ON_IMPORT, 103 RRSS_SCRATCH_INVALID_SYNCED_REFLOW 104 } raidz_reflow_scratch_state_t; 105 106 #define RRSS_GET_OFFSET(ub) \ 107 BF64_GET_SB((ub)->ub_raidz_reflow_info, 0, 55, SPA_MINBLOCKSHIFT, 0) 108 #define RRSS_SET_OFFSET(ub, x) \ 109 BF64_SET_SB((ub)->ub_raidz_reflow_info, 0, 55, SPA_MINBLOCKSHIFT, 0, x) 110 111 #define RRSS_GET_STATE(ub) \ 112 BF64_GET((ub)->ub_raidz_reflow_info, 55, 9) 113 #define RRSS_SET_STATE(ub, x) \ 114 BF64_SET((ub)->ub_raidz_reflow_info, 55, 9, x) 115 116 #define RAIDZ_REFLOW_SET(ub, state, offset) do { \ 117 (ub)->ub_raidz_reflow_info = 0; \ 118 RRSS_SET_OFFSET(ub, offset); \ 119 RRSS_SET_STATE(ub, state); \ 120 } while (0) 121 122 struct uberblock { 123 uint64_t ub_magic; /* UBERBLOCK_MAGIC */ 124 uint64_t ub_version; /* SPA_VERSION */ 125 uint64_t ub_txg; /* txg of last sync */ 126 uint64_t ub_guid_sum; /* sum of all vdev guids */ 127 uint64_t ub_timestamp; /* UTC time of last sync */ 128 blkptr_t ub_rootbp; /* MOS objset_phys_t */ 129 130 /* highest SPA_VERSION supported by software that wrote this txg */ 131 uint64_t ub_software_version; 132 133 /* Maybe missing in uberblocks we read, but always written */ 134 uint64_t ub_mmp_magic; /* MMP_MAGIC */ 135 /* 136 * If ub_mmp_delay == 0 and ub_mmp_magic is valid, MMP is off. 137 * Otherwise, nanosec since last MMP write. 138 */ 139 uint64_t ub_mmp_delay; 140 141 /* 142 * The ub_mmp_config contains the multihost write interval, multihost 143 * fail intervals, sequence number for sub-second granularity, and 144 * valid bit mask. This layout is as follows: 145 * 146 * 64 56 48 40 32 24 16 8 0 147 * +-------+-------+-------+-------+-------+-------+-------+-------+ 148 * 0 | Fail Intervals| Seq | Write Interval (ms) | VALID | 149 * +-------+-------+-------+-------+-------+-------+-------+-------+ 150 * 151 * This allows a write_interval of (2^24/1000)s, over 4.5 hours 152 * 153 * VALID Bits: 154 * - 0x01 - Write Interval (ms) 155 * - 0x02 - Sequence number exists 156 * - 0x04 - Fail Intervals 157 * - 0xf8 - Reserved 158 */ 159 uint64_t ub_mmp_config; 160 161 /* 162 * ub_checkpoint_txg indicates two things about the current uberblock: 163 * 164 * 1] If it is not zero then this uberblock is a checkpoint. If it is 165 * zero, then this uberblock is not a checkpoint. 166 * 167 * 2] On checkpointed uberblocks, the value of ub_checkpoint_txg is 168 * the ub_txg that the uberblock had at the time we moved it to 169 * the MOS config. 170 * 171 * The field is set when we checkpoint the uberblock and continues to 172 * hold that value even after we've rewound (unlike the ub_txg that 173 * is reset to a higher value). 174 * 175 * Besides checks used to determine whether we are reopening the 176 * pool from a checkpointed uberblock [see spa_ld_select_uberblock()], 177 * the value of the field is used to determine which ZIL blocks have 178 * been allocated according to the ms_sm when we are rewinding to a 179 * checkpoint. Specifically, if logical birth > ub_checkpoint_txg,then 180 * the ZIL block is not allocated [see uses of spa_min_claim_txg()]. 181 */ 182 uint64_t ub_checkpoint_txg; 183 184 uint64_t ub_raidz_reflow_info; 185 }; 186 187 #ifdef __cplusplus 188 } 189 #endif 190 191 #endif /* _SYS_UBERBLOCK_IMPL_H */ 192