xref: /src/sys/contrib/openzfs/include/sys/ddt_impl.h (revision 546d3d08e5993cbe2d6141b256e8c2ebad5aa102)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2016 by Delphix. All rights reserved.
25  * Copyright (c) 2023, Klara Inc.
26  */
27 
28 #ifndef _SYS_DDT_IMPL_H
29 #define	_SYS_DDT_IMPL_H
30 
31 #include <sys/ddt.h>
32 #include <sys/bitops.h>
33 
34 #ifdef	__cplusplus
35 extern "C" {
36 #endif
37 
38 /* DDT version numbers */
39 #define	DDT_VERSION_LEGACY		(0)
40 #define	DDT_VERSION_FDT			(1)
41 
42 /* Dummy version to signal that configure is still necessary */
43 #define	DDT_VERSION_UNCONFIGURED	(UINT64_MAX)
44 
45 /* Names of interesting objects in the DDT root dir */
46 #define	DDT_DIR_VERSION		"version"
47 #define	DDT_DIR_FLAGS		"flags"
48 
49 /* Fill a lightweight entry from a live entry. */
50 #define	DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, ddlwe) do {			\
51 	memset((ddlwe), 0, sizeof (*ddlwe));				\
52 	(ddlwe)->ddlwe_key = (dde)->dde_key;				\
53 	(ddlwe)->ddlwe_type = (dde)->dde_type;				\
54 	(ddlwe)->ddlwe_class = (dde)->dde_class;			\
55 	memcpy(&(ddlwe)->ddlwe_phys, (dde)->dde_phys, DDT_PHYS_SIZE(ddt)); \
56 } while (0)
57 
58 #define	DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, ddlwe) do {             \
59 	memset((ddlwe), 0, sizeof (*ddlwe));                            \
60 	(ddlwe)->ddlwe_key = (ddle)->ddle_key;                          \
61 	(ddlwe)->ddlwe_type = (ddle)->ddle_type;                        \
62 	(ddlwe)->ddlwe_class = (ddle)->ddle_class;                      \
63 	memcpy(&(ddlwe)->ddlwe_phys, (ddle)->ddle_phys, DDT_PHYS_SIZE(ddt)); \
64 } while (0)
65 
66 /*
67  * An entry on the log tree. These are "frozen", and a record of what's in
68  * the on-disk log. They can't be used in place, but can be "loaded" back into
69  * the live tree.
70  */
71 typedef struct {
72 	ddt_key_t	ddle_key;	/* ddl_tree key */
73 	avl_node_t	ddle_node;	/* ddl_tree node */
74 
75 	ddt_type_t	ddle_type;	/* storage type */
76 	ddt_class_t	ddle_class;	/* storage class */
77 
78 	/* extra allocation for flat/trad phys */
79 	ddt_univ_phys_t	ddle_phys[];
80 } ddt_log_entry_t;
81 
82 /* On-disk log record types. */
83 typedef enum {
84 	DLR_INVALID	= 0,	/* end of block marker */
85 	DLR_ENTRY	= 1,	/* an entry to add or replace in the log tree */
86 } ddt_log_record_type_t;
87 
88 /* On-disk log record header. */
89 typedef struct {
90 	/*
91 	 * dlr_info is a packed u64, use the DLR_GET/DLR_SET macros below to
92 	 * access it.
93 	 *
94 	 * bits 0-7:    record type (ddt_log_record_type_t)
95 	 * bits 8-15:  length of record header+payload
96 	 * bits 16-47:  reserved, all zero
97 	 * bits 48-55:   if type==DLR_ENTRY, storage type (ddt_type)
98 	 *                otherwise all zero
99 	 * bits 56-63:  if type==DLR_ENTRY, storage class (ddt_class)
100 	 *                otherwise all zero
101 	 */
102 	uint64_t	dlr_info;
103 	uint8_t		dlr_payload[];
104 } ddt_log_record_t;
105 
106 #define	DLR_GET_TYPE(dlr)		BF64_GET((dlr)->dlr_info, 0, 8)
107 #define	DLR_SET_TYPE(dlr, v)		BF64_SET((dlr)->dlr_info, 0, 8, v)
108 #define	DLR_GET_RECLEN(dlr)		BF64_GET((dlr)->dlr_info, 8, 16)
109 #define	DLR_SET_RECLEN(dlr, v)		BF64_SET((dlr)->dlr_info, 8, 16, v)
110 #define	DLR_GET_ENTRY_TYPE(dlr)		BF64_GET((dlr)->dlr_info, 48, 8)
111 #define	DLR_SET_ENTRY_TYPE(dlr, v)	BF64_SET((dlr)->dlr_info, 48, 8, v)
112 #define	DLR_GET_ENTRY_CLASS(dlr)	BF64_GET((dlr)->dlr_info, 56, 8)
113 #define	DLR_SET_ENTRY_CLASS(dlr, v)	BF64_SET((dlr)->dlr_info, 56, 8, v)
114 
115 /* Payload for DLR_ENTRY. */
116 typedef struct {
117 	ddt_key_t	dlre_key;
118 	ddt_univ_phys_t	dlre_phys[];
119 } ddt_log_record_entry_t;
120 
121 /* Log flags (ddl_flags, dlh_flags) */
122 #define	DDL_FLAG_FLUSHING	(1 << 0)	/* this log is being flushed */
123 #define	DDL_FLAG_CHECKPOINT	(1 << 1)	/* header has a checkpoint */
124 
125 /* On-disk log header, stored in the bonus buffer. */
126 typedef struct {
127 	/*
128 	 * dlh_info is a packed u64, use the DLH_GET/DLH_SET macros below to
129 	 * access it.
130 	 *
131 	 * bits 0-7:   log version
132 	 * bits 8-15:  log flags
133 	 * bits 16-63: reserved, all zero
134 	 */
135 	uint64_t	dlh_info;
136 
137 	uint64_t	dlh_length;	/* log size in bytes */
138 	uint64_t	dlh_first_txg;	/* txg this log went active */
139 	ddt_key_t	dlh_checkpoint;	/* last checkpoint */
140 } ddt_log_header_t;
141 
142 #define	DLH_GET_VERSION(dlh)	BF64_GET((dlh)->dlh_info, 0, 8)
143 #define	DLH_SET_VERSION(dlh, v)	BF64_SET((dlh)->dlh_info, 0, 8, v)
144 #define	DLH_GET_FLAGS(dlh)	BF64_GET((dlh)->dlh_info, 8, 8)
145 #define	DLH_SET_FLAGS(dlh, v)	BF64_SET((dlh)->dlh_info, 8, 8, v)
146 
147 /* DDT log update state */
148 typedef struct {
149 	dmu_tx_t	*dlu_tx;	/* tx the update is being applied to */
150 	dnode_t		*dlu_dn;	/* log object dnode */
151 	dmu_buf_t	**dlu_dbp;	/* array of block buffer pointers */
152 	int		dlu_ndbp;	/* number of block buffer pointers */
153 	uint16_t	dlu_reclen;	/* cached length of record */
154 	uint64_t	dlu_block;	/* block for next entry */
155 	uint64_t	dlu_offset;	/* offset for next entry */
156 } ddt_log_update_t;
157 
158 /*
159  * Ops vector to access a specific DDT object type.
160  */
161 typedef struct {
162 	char ddt_op_name[32];
163 	int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
164 	    boolean_t prehash);
165 	int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
166 	int (*ddt_op_lookup)(dnode_t *dn, const ddt_key_t *ddk,
167 	    void *phys, size_t psize);
168 	int (*ddt_op_contains)(dnode_t *dn, const ddt_key_t *ddk);
169 	void (*ddt_op_prefetch)(dnode_t *dn, const ddt_key_t *ddk);
170 	void (*ddt_op_prefetch_all)(dnode_t *dn);
171 	int (*ddt_op_update)(dnode_t *dn, const ddt_key_t *ddk,
172 	    const void *phys, size_t psize, dmu_tx_t *tx);
173 	int (*ddt_op_remove)(dnode_t *dn, const ddt_key_t *ddk,
174 	    dmu_tx_t *tx);
175 	int (*ddt_op_walk)(dnode_t *dn, uint64_t *walk, ddt_key_t *ddk,
176 	    void *phys, size_t psize);
177 	int (*ddt_op_count)(dnode_t *dn, uint64_t *count);
178 } ddt_ops_t;
179 
180 extern const ddt_ops_t ddt_zap_ops;
181 
182 /* Dedup log API */
183 extern void ddt_log_begin(ddt_t *ddt, size_t nentries, dmu_tx_t *tx,
184     ddt_log_update_t *dlu);
185 extern void ddt_log_entry(ddt_t *ddt, ddt_lightweight_entry_t *dde,
186     ddt_log_update_t *dlu);
187 extern void ddt_log_commit(ddt_t *ddt, ddt_log_update_t *dlu);
188 
189 extern boolean_t ddt_log_take_first(ddt_t *ddt, ddt_log_t *ddl,
190     ddt_lightweight_entry_t *ddlwe);
191 
192 extern boolean_t ddt_log_find_key(ddt_t *ddt, const ddt_key_t *ddk,
193     ddt_lightweight_entry_t *ddlwe, boolean_t *from_flushing);
194 extern boolean_t ddt_log_remove_key(ddt_t *ddt, ddt_log_t *ddl,
195     const ddt_key_t *ddk);
196 
197 extern void ddt_log_checkpoint(ddt_t *ddt, ddt_lightweight_entry_t *ddlwe,
198     dmu_tx_t *tx);
199 extern void ddt_log_truncate(ddt_t *ddt, dmu_tx_t *tx);
200 
201 extern boolean_t ddt_log_swap(ddt_t *ddt, dmu_tx_t *tx);
202 
203 extern void ddt_log_destroy(ddt_t *ddt, dmu_tx_t *tx);
204 
205 extern int ddt_log_load(ddt_t *ddt);
206 extern void ddt_log_alloc(ddt_t *ddt);
207 extern void ddt_log_free(ddt_t *ddt);
208 
209 extern void ddt_log_init(void);
210 extern void ddt_log_fini(void);
211 
212 /*
213  * These are only exposed so that zdb can access them. Try not to use them
214  * outside of the DDT implementation proper, and if you do, consider moving
215  * them up.
216  */
217 
218 /*
219  * We use a histogram to convert a percentage request into a
220  * cutoff value where entries older than the cutoff get pruned.
221  *
222  * The histogram bins represent hours in power-of-two increments.
223  * 16 bins covers up to four years.
224  */
225 #define	HIST_BINS 16
226 
227 typedef struct ddt_age_histo {
228 	uint64_t dah_entries;
229 	uint64_t dah_age_histo[HIST_BINS];
230 } ddt_age_histo_t;
231 
232 void ddt_prune_walk(spa_t *spa, uint64_t cutoff, ddt_age_histo_t *histogram);
233 
234 #if defined(_KERNEL) || !defined(ZFS_DEBUG)
235 #define	ddt_dump_age_histogram(histo, cutoff)	((void)0)
236 #else
237 static inline void
ddt_dump_age_histogram(ddt_age_histo_t * histogram,uint64_t cutoff)238 ddt_dump_age_histogram(ddt_age_histo_t *histogram, uint64_t cutoff)
239 {
240 	if (histogram->dah_entries == 0)
241 		return;
242 
243 	(void) printf("DDT prune unique class age, %llu hour cutoff\n",
244 	    (u_longlong_t)(gethrestime_sec() - cutoff)/3600);
245 	(void) printf("%5s  %9s  %4s\n", "age", "blocks", "amnt");
246 	(void) printf("%5s  %9s  %4s\n", "-----", "---------", "----");
247 	for (int i = 0; i < HIST_BINS; i++) {
248 		(void) printf("%5d  %9llu %4d%%\n", 1<<i,
249 		    (u_longlong_t)histogram->dah_age_histo[i],
250 		    (int)((histogram->dah_age_histo[i] * 100) /
251 		    histogram->dah_entries));
252 	}
253 }
254 #endif
255 
256 /*
257  * Enough room to expand DMU_POOL_DDT format for all possible DDT
258  * checksum/class/type combinations.
259  */
260 #define	DDT_NAMELEN	32
261 
262 extern uint64_t ddt_phys_total_refcnt(const ddt_t *ddt,
263     const ddt_univ_phys_t *ddp);
264 
265 extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
266 
267 extern void ddt_object_name(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
268     char *name);
269 extern int ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
270     uint64_t *walk, ddt_lightweight_entry_t *ddlwe);
271 extern int ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
272     uint64_t *count);
273 extern int ddt_object_info(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
274     dmu_object_info_t *);
275 
276 #ifdef	__cplusplus
277 }
278 #endif
279 
280 #endif	/* _SYS_DDT_H */
281