xref: /src/sys/contrib/openzfs/include/sys/uberblock_impl.h (revision 8a62a2a5659d1839d8799b4274c04469d7f17c78)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2016, 2017 by Delphix. All rights reserved.
25  */
26 
27 #ifndef _SYS_UBERBLOCK_IMPL_H
28 #define	_SYS_UBERBLOCK_IMPL_H
29 
30 #include <sys/uberblock.h>
31 
32 #ifdef	__cplusplus
33 extern "C" {
34 #endif
35 
36 /*
37  * The uberblock version is incremented whenever an incompatible on-disk
38  * format change is made to the SPA, DMU, or ZAP.
39  *
40  * Note: the first two fields should never be moved.  When a storage pool
41  * is opened, the uberblock must be read off the disk before the version
42  * can be checked.  If the ub_version field is moved, we may not detect
43  * version mismatch.  If the ub_magic field is moved, applications that
44  * expect the magic number in the first word won't work.
45  */
46 #define	UBERBLOCK_MAGIC		0x00bab10c		/* oo-ba-bloc!	*/
47 #define	UBERBLOCK_SHIFT		10			/* up to 1K	*/
48 #define	MMP_MAGIC		0xa11cea11		/* all-see-all	*/
49 
50 #define	MMP_INTERVAL_VALID_BIT	0x01
51 #define	MMP_SEQ_VALID_BIT	0x02
52 #define	MMP_FAIL_INT_VALID_BIT	0x04
53 
54 #define	MMP_INTERVAL_MASK	0x00000000FFFFFF00
55 #define	MMP_SEQ_MASK		0x0000FFFF00000000
56 #define	MMP_FAIL_INT_MASK	0xFFFF000000000000
57 
58 #define	MMP_SEQ_MAX		UINT16_MAX
59 
60 #define	MMP_VALID(ubp)		((ubp)->ub_magic == UBERBLOCK_MAGIC && \
61 				    (ubp)->ub_mmp_magic == MMP_MAGIC)
62 #define	MMP_INTERVAL_VALID(ubp)	(MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
63 				    MMP_INTERVAL_VALID_BIT))
64 #define	MMP_SEQ_VALID(ubp)	(MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
65 				    MMP_SEQ_VALID_BIT))
66 #define	MMP_FAIL_INT_VALID(ubp)	(MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
67 				    MMP_FAIL_INT_VALID_BIT))
68 
69 #define	MMP_INTERVAL(ubp)	(((ubp)->ub_mmp_config & MMP_INTERVAL_MASK) \
70 				    >> 8)
71 #define	MMP_SEQ(ubp)		(((ubp)->ub_mmp_config & MMP_SEQ_MASK) \
72 				    >> 32)
73 #define	MMP_FAIL_INT(ubp)	(((ubp)->ub_mmp_config & MMP_FAIL_INT_MASK) \
74 				    >> 48)
75 
76 #define	MMP_INTERVAL_SET(write) \
77 	    (((uint64_t)((write) & 0xFFFFFF) << 8) | MMP_INTERVAL_VALID_BIT)
78 
79 #define	MMP_SEQ_SET(seq) \
80 	    (((uint64_t)((seq) & 0xFFFF) << 32) | MMP_SEQ_VALID_BIT)
81 
82 #define	MMP_FAIL_INT_SET(fail) \
83 	    (((uint64_t)((fail) & 0xFFFF) << 48) | MMP_FAIL_INT_VALID_BIT)
84 
85 
86 #define	MMP_SEQ_CLEAR(ubp) \
87 	    ((ubp)->ub_mmp_config &= ~(MMP_SEQ_MASK | MMP_SEQ_VALID_BIT))
88 
89 /*
90  * RAIDZ expansion reflow information.
91  *
92  *	64      56      48      40      32      24      16      8       0
93  *	+-------+-------+-------+-------+-------+-------+-------+-------+
94  *	|Scratch |                    Reflow                            |
95  *	| State  |                    Offset                            |
96  *	+-------+-------+-------+-------+-------+-------+-------+-------+
97  */
98 typedef enum raidz_reflow_scratch_state {
99 	RRSS_SCRATCH_NOT_IN_USE = 0,
100 	RRSS_SCRATCH_VALID,
101 	RRSS_SCRATCH_INVALID_SYNCED,
102 	RRSS_SCRATCH_INVALID_SYNCED_ON_IMPORT,
103 	RRSS_SCRATCH_INVALID_SYNCED_REFLOW
104 } raidz_reflow_scratch_state_t;
105 
106 #define	RRSS_GET_OFFSET(ub) \
107 	BF64_GET_SB((ub)->ub_raidz_reflow_info, 0, 55, SPA_MINBLOCKSHIFT, 0)
108 #define	RRSS_SET_OFFSET(ub, x) \
109 	BF64_SET_SB((ub)->ub_raidz_reflow_info, 0, 55, SPA_MINBLOCKSHIFT, 0, x)
110 
111 #define	RRSS_GET_STATE(ub) \
112 	BF64_GET((ub)->ub_raidz_reflow_info, 55, 9)
113 #define	RRSS_SET_STATE(ub, x) \
114 	BF64_SET((ub)->ub_raidz_reflow_info, 55, 9, x)
115 
116 #define	RAIDZ_REFLOW_SET(ub, state, offset) do { \
117 	(ub)->ub_raidz_reflow_info = 0; \
118 	RRSS_SET_OFFSET(ub, offset); \
119 	RRSS_SET_STATE(ub, state); \
120 } while (0)
121 
122 struct uberblock {
123 	uint64_t	ub_magic;	/* UBERBLOCK_MAGIC		*/
124 	uint64_t	ub_version;	/* SPA_VERSION			*/
125 	uint64_t	ub_txg;		/* txg of last sync		*/
126 	uint64_t	ub_guid_sum;	/* sum of all vdev guids	*/
127 	uint64_t	ub_timestamp;	/* UTC time of last sync	*/
128 	blkptr_t	ub_rootbp;	/* MOS objset_phys_t		*/
129 
130 	/* highest SPA_VERSION supported by software that wrote this txg */
131 	uint64_t	ub_software_version;
132 
133 	/* Maybe missing in uberblocks we read, but always written */
134 	uint64_t	ub_mmp_magic;	/* MMP_MAGIC			*/
135 	/*
136 	 * If ub_mmp_delay == 0 and ub_mmp_magic is valid, MMP is off.
137 	 * Otherwise, nanosec since last MMP write.
138 	 */
139 	uint64_t	ub_mmp_delay;
140 
141 	/*
142 	 * The ub_mmp_config contains the multihost write interval, multihost
143 	 * fail intervals, sequence number for sub-second granularity, and
144 	 * valid bit mask.  This layout is as follows:
145 	 *
146 	 *   64      56      48      40      32      24      16      8       0
147 	 *   +-------+-------+-------+-------+-------+-------+-------+-------+
148 	 * 0 | Fail Intervals|      Seq      |   Write Interval (ms) | VALID |
149 	 *   +-------+-------+-------+-------+-------+-------+-------+-------+
150 	 *
151 	 * This allows a write_interval of (2^24/1000)s, over 4.5 hours
152 	 *
153 	 * VALID Bits:
154 	 * - 0x01 - Write Interval (ms)
155 	 * - 0x02 - Sequence number exists
156 	 * - 0x04 - Fail Intervals
157 	 * - 0xf8 - Reserved
158 	 */
159 	uint64_t	ub_mmp_config;
160 
161 	/*
162 	 * ub_checkpoint_txg indicates two things about the current uberblock:
163 	 *
164 	 * 1] If it is not zero then this uberblock is a checkpoint. If it is
165 	 *    zero, then this uberblock is not a checkpoint.
166 	 *
167 	 * 2] On checkpointed uberblocks, the value of ub_checkpoint_txg is
168 	 *    the ub_txg that the uberblock had at the time we moved it to
169 	 *    the MOS config.
170 	 *
171 	 * The field is set when we checkpoint the uberblock and continues to
172 	 * hold that value even after we've rewound (unlike the ub_txg that
173 	 * is reset to a higher value).
174 	 *
175 	 * Besides checks used to determine whether we are reopening the
176 	 * pool from a checkpointed uberblock [see spa_ld_select_uberblock()],
177 	 * the value of the field is used to determine which ZIL blocks have
178 	 * been allocated according to the ms_sm when we are rewinding to a
179 	 * checkpoint. Specifically, if logical birth > ub_checkpoint_txg,then
180 	 * the ZIL block is not allocated [see uses of spa_min_claim_txg()].
181 	 */
182 	uint64_t	ub_checkpoint_txg;
183 
184 	uint64_t	ub_raidz_reflow_info;
185 };
186 
187 #ifdef	__cplusplus
188 }
189 #endif
190 
191 #endif	/* _SYS_UBERBLOCK_IMPL_H */
192