xref: /src/sys/contrib/openzfs/include/sys/arc_impl.h (revision 80aae8a3f8aa70712930664572be9e6885dc0be7)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2013, Delphix. All rights reserved.
25  * Copyright (c) 2013, Saso Kiselkov. All rights reserved.
26  * Copyright (c) 2013, Nexenta Systems, Inc.  All rights reserved.
27  * Copyright (c) 2020, George Amanakis. All rights reserved.
28  */
29 
30 #ifndef _SYS_ARC_IMPL_H
31 #define	_SYS_ARC_IMPL_H
32 
33 #include <sys/arc.h>
34 #include <sys/multilist.h>
35 #include <sys/zio_crypt.h>
36 #include <sys/zthr.h>
37 #include <sys/aggsum.h>
38 #include <sys/wmsum.h>
39 
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 
44 /*
45  * We can feed L2ARC from two states of ARC buffers, mru and mfu,
46  * and each of the states has two types: data and metadata.
47  */
48 #define	L2ARC_FEED_TYPES	4
49 #define	L2ARC_MFU_META		0
50 #define	L2ARC_MRU_META		1
51 #define	L2ARC_MFU_DATA		2
52 #define	L2ARC_MRU_DATA		3
53 
54 /*
55  * L2ARC state and statistics for persistent marker management.
56  */
57 typedef struct l2arc_info {
58 	arc_buf_hdr_t	**l2arc_markers[L2ARC_FEED_TYPES];
59 	uint64_t	l2arc_total_writes;	/* total writes for reset */
60 	uint64_t	l2arc_total_capacity;	/* total L2ARC capacity */
61 	uint64_t	l2arc_smallest_capacity; /* smallest device capacity */
62 	/*
63 	 * Per-device thread coordination for sublist processing.
64 	 * reset: flags sublist marker for lazy reset to tail.
65 	 */
66 	boolean_t	*l2arc_sublist_busy[L2ARC_FEED_TYPES];
67 	boolean_t	*l2arc_sublist_reset[L2ARC_FEED_TYPES];
68 	kmutex_t	l2arc_sublist_lock;	/* protects busy/reset flags */
69 	/*
70 	 * Cumulative bytes scanned per pass since marker reset.
71 	 * Limits how far persistent markers advance from tail
72 	 * before resetting, based on % of state size.
73 	 */
74 	uint64_t	l2arc_ext_scanned[L2ARC_FEED_TYPES];
75 	int		l2arc_next_sublist[L2ARC_FEED_TYPES]; /* round-robin */
76 } l2arc_info_t;
77 
78 /*
79  * Note that buffers can be in one of 6 states:
80  *	ARC_anon	- anonymous (discussed below)
81  *	ARC_mru		- recently used, currently cached
82  *	ARC_mru_ghost	- recently used, no longer in cache
83  *	ARC_mfu		- frequently used, currently cached
84  *	ARC_mfu_ghost	- frequently used, no longer in cache
85  *	ARC_uncached	- uncacheable prefetch, to be evicted
86  *	ARC_l2c_only	- exists in L2ARC but not other states
87  * When there are no active references to the buffer, they are
88  * are linked onto a list in one of these arc states.  These are
89  * the only buffers that can be evicted or deleted.  Within each
90  * state there are multiple lists, one for meta-data and one for
91  * non-meta-data.  Meta-data (indirect blocks, blocks of dnodes,
92  * etc.) is tracked separately so that it can be managed more
93  * explicitly: favored over data, limited explicitly.
94  *
95  * Anonymous buffers are buffers that are not associated with
96  * a DVA.  These are buffers that hold dirty block copies
97  * before they are written to stable storage.  By definition,
98  * they are "ref'd" and are considered part of arc_mru
99  * that cannot be freed.  Generally, they will acquire a DVA
100  * as they are written and migrate onto the arc_mru list.
101  *
102  * The ARC_l2c_only state is for buffers that are in the second
103  * level ARC but no longer in any of the ARC_m* lists.  The second
104  * level ARC itself may also contain buffers that are in any of
105  * the ARC_m* states - meaning that a buffer can exist in two
106  * places.  The reason for the ARC_l2c_only state is to keep the
107  * buffer header in the hash table, so that reads that hit the
108  * second level ARC benefit from these fast lookups.
109  */
110 
111 typedef struct arc_state {
112 	/*
113 	 * list of evictable buffers
114 	 */
115 	multilist_t arcs_list[ARC_BUFC_NUMTYPES];
116 	/*
117 	 * supports the "dbufs" kstat
118 	 */
119 	arc_state_type_t arcs_state;
120 	/*
121 	 * total amount of data in this state.
122 	 */
123 	zfs_refcount_t arcs_size[ARC_BUFC_NUMTYPES] ____cacheline_aligned;
124 	/*
125 	 * total amount of evictable data in this state
126 	 */
127 	zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES];
128 	/*
129 	 * amount of hit bytes for this state (counted only for ghost states)
130 	 */
131 	wmsum_t arcs_hits[ARC_BUFC_NUMTYPES];
132 } arc_state_t;
133 
134 typedef struct arc_callback arc_callback_t;
135 
136 struct arc_callback {
137 	void			*acb_private;
138 	arc_read_done_func_t	*acb_done;
139 	arc_buf_t		*acb_buf;
140 	boolean_t		acb_encrypted;
141 	boolean_t		acb_compressed;
142 	boolean_t		acb_noauth;
143 	boolean_t		acb_nobuf;
144 	boolean_t		acb_wait;
145 	int			acb_wait_error;
146 	kmutex_t		acb_wait_lock;
147 	kcondvar_t		acb_wait_cv;
148 	zbookmark_phys_t	acb_zb;
149 	zio_t			*acb_zio_dummy;
150 	zio_t			*acb_zio_head;
151 	arc_callback_t		*acb_prev;
152 	arc_callback_t		*acb_next;
153 };
154 
155 typedef struct arc_write_callback arc_write_callback_t;
156 
157 struct arc_write_callback {
158 	void			*awcb_private;
159 	arc_write_done_func_t	*awcb_ready;
160 	arc_write_done_func_t	*awcb_children_ready;
161 	arc_write_done_func_t	*awcb_done;
162 	arc_buf_t		*awcb_buf;
163 };
164 
165 /*
166  * ARC buffers are separated into multiple structs as a memory saving measure:
167  *   - Common fields struct, always defined, and embedded within it:
168  *       - L2-only fields, always allocated but undefined when not in L2ARC
169  *       - L1-only fields, only allocated when in L1ARC
170  *
171  *           Buffer in L1                     Buffer only in L2
172  *    +------------------------+          +------------------------+
173  *    | arc_buf_hdr_t          |          | arc_buf_hdr_t          |
174  *    |                        |          |                        |
175  *    |                        |          |                        |
176  *    |                        |          |                        |
177  *    +------------------------+          +------------------------+
178  *    | l2arc_buf_hdr_t        |          | l2arc_buf_hdr_t        |
179  *    | (undefined if L1-only) |          |                        |
180  *    +------------------------+          +------------------------+
181  *    | l1arc_buf_hdr_t        |
182  *    |                        |
183  *    |                        |
184  *    |                        |
185  *    |                        |
186  *    +------------------------+
187  *
188  * Because it's possible for the L2ARC to become extremely large, we can wind
189  * up eating a lot of memory in L2ARC buffer headers, so the size of a header
190  * is minimized by only allocating the fields necessary for an L1-cached buffer
191  * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and
192  * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple
193  * words in pointers. arc_hdr_realloc() is used to switch a header between
194  * these two allocation states.
195  */
196 typedef struct l1arc_buf_hdr {
197 	/* protected by arc state mutex */
198 	arc_state_t		*b_state;
199 	multilist_node_t	b_arc_node;
200 
201 	/* protected by hash lock */
202 	clock_t			b_arc_access;
203 	uint32_t		b_mru_hits;
204 	uint32_t		b_mru_ghost_hits;
205 	uint32_t		b_mfu_hits;
206 	uint32_t		b_mfu_ghost_hits;
207 	uint8_t			b_byteswap;
208 	arc_buf_t		*b_buf;
209 
210 	/* self protecting */
211 	zfs_refcount_t		b_refcnt;
212 
213 	arc_callback_t		*b_acb;
214 	abd_t			*b_pabd;
215 
216 #ifdef ZFS_DEBUG
217 	zio_cksum_t		*b_freeze_cksum;
218 	kmutex_t		b_freeze_lock;
219 #endif
220 } l1arc_buf_hdr_t;
221 
222 typedef enum l2arc_dev_hdr_flags_t {
223 	L2ARC_DEV_HDR_EVICT_FIRST = (1 << 0)	/* mirror of l2ad_first */
224 } l2arc_dev_hdr_flags_t;
225 
226 /*
227  * Pointer used in persistent L2ARC (for pointing to log blocks).
228  */
229 typedef struct l2arc_log_blkptr {
230 	/*
231 	 * Offset of log block within the device, in bytes
232 	 */
233 	uint64_t	lbp_daddr;
234 	/*
235 	 * Aligned payload size (in bytes) of the log block
236 	 */
237 	uint64_t	lbp_payload_asize;
238 	/*
239 	 * Offset in bytes of the first buffer in the payload
240 	 */
241 	uint64_t	lbp_payload_start;
242 	/*
243 	 * lbp_prop has the following format:
244 	 *	* logical size (in bytes)
245 	 *	* aligned (after compression) size (in bytes)
246 	 *	* compression algorithm (we always LZ4-compress l2arc logs)
247 	 *	* checksum algorithm (used for lbp_cksum)
248 	 */
249 	uint64_t	lbp_prop;
250 	zio_cksum_t	lbp_cksum;	/* checksum of log */
251 } l2arc_log_blkptr_t;
252 
253 /*
254  * The persistent L2ARC device header.
255  * Byte order of magic determines whether 64-bit bswap of fields is necessary.
256  */
257 typedef struct l2arc_dev_hdr_phys {
258 	uint64_t	dh_magic;	/* L2ARC_DEV_HDR_MAGIC */
259 	uint64_t	dh_version;	/* Persistent L2ARC version */
260 
261 	/*
262 	 * Global L2ARC device state and metadata.
263 	 */
264 	uint64_t	dh_spa_guid;
265 	uint64_t	dh_vdev_guid;
266 	uint64_t	dh_log_entries;		/* mirror of l2ad_log_entries */
267 	uint64_t	dh_evict;		/* evicted offset in bytes */
268 	uint64_t	dh_flags;		/* l2arc_dev_hdr_flags_t */
269 	/*
270 	 * Used in zdb.c for determining if a log block is valid, in the same
271 	 * way that l2arc_rebuild() does.
272 	 */
273 	uint64_t	dh_start;		/* mirror of l2ad_start */
274 	uint64_t	dh_end;			/* mirror of l2ad_end */
275 	/*
276 	 * Start of log block chain. [0] -> newest log, [1] -> one older (used
277 	 * for initiating prefetch).
278 	 */
279 	l2arc_log_blkptr_t	dh_start_lbps[2];
280 	/*
281 	 * Aligned size of all log blocks as accounted by vdev_space_update().
282 	 */
283 	uint64_t	dh_lb_asize;		/* mirror of l2ad_lb_asize */
284 	uint64_t	dh_lb_count;		/* mirror of l2ad_lb_count */
285 	/*
286 	 * Mirrors of vdev_trim_action_time and vdev_trim_state, used to
287 	 * display when the cache device was fully trimmed for the last
288 	 * time.
289 	 */
290 	uint64_t		dh_trim_action_time;
291 	uint64_t		dh_trim_state;
292 	const uint64_t		dh_pad[30];	/* pad to 512 bytes */
293 	zio_eck_t		dh_tail;
294 } l2arc_dev_hdr_phys_t;
295 _Static_assert(sizeof (l2arc_dev_hdr_phys_t) == SPA_MINBLOCKSIZE,
296 	"l2arc_dev_hdr_phys_t wrong size");
297 
298 /*
299  * A single ARC buffer header entry in a l2arc_log_blk_phys_t.
300  */
301 typedef struct l2arc_log_ent_phys {
302 	dva_t			le_dva;		/* dva of buffer */
303 	uint64_t		le_birth;	/* birth txg of buffer */
304 	/*
305 	 * le_prop has the following format:
306 	 *	* logical size (in bytes)
307 	 *	* physical (compressed) size (in bytes)
308 	 *	* compression algorithm
309 	 *	* object type (used to restore arc_buf_contents_t)
310 	 *	* protected status (used for encryption)
311 	 *	* prefetch status (used in l2arc_read_done())
312 	 */
313 	uint64_t		le_prop;
314 	uint64_t		le_daddr;	/* buf location on l2dev */
315 	uint64_t		le_complevel;
316 	/*
317 	 * We pad the size of each entry to a power of 2 so that the size of
318 	 * l2arc_log_blk_phys_t is power-of-2 aligned with SPA_MINBLOCKSHIFT,
319 	 * because of the L2ARC_SET_*SIZE macros.
320 	 */
321 	const uint64_t		le_pad[2];	/* pad to 64 bytes	 */
322 } l2arc_log_ent_phys_t;
323 
324 #define	L2ARC_LOG_BLK_MAX_ENTRIES	(1022)
325 
326 /*
327  * A log block of up to 1022 ARC buffer log entries, chained into the
328  * persistent L2ARC metadata linked list. Byte order of magic determines
329  * whether 64-bit bswap of fields is necessary.
330  */
331 typedef struct l2arc_log_blk_phys {
332 	uint64_t		lb_magic;	/* L2ARC_LOG_BLK_MAGIC */
333 	/*
334 	 * There are 2 chains (headed by dh_start_lbps[2]), and this field
335 	 * points back to the previous block in this chain. We alternate
336 	 * which chain we append to, so they are time-wise and offset-wise
337 	 * interleaved, but that is an optimization rather than for
338 	 * correctness.
339 	 */
340 	l2arc_log_blkptr_t	lb_prev_lbp;	/* pointer to prev log block */
341 	/*
342 	 * Pad header section to 128 bytes
343 	 */
344 	uint64_t		lb_pad[7];
345 	/* Payload */
346 	l2arc_log_ent_phys_t	lb_entries[L2ARC_LOG_BLK_MAX_ENTRIES];
347 } l2arc_log_blk_phys_t;				/* 64K total */
348 
349 /*
350  * The size of l2arc_log_blk_phys_t has to be power-of-2 aligned with
351  * SPA_MINBLOCKSHIFT because of L2BLK_SET_*SIZE macros.
352  */
353 _Static_assert(IS_P2ALIGNED(sizeof (l2arc_log_blk_phys_t),
354     1ULL << SPA_MINBLOCKSHIFT), "l2arc_log_blk_phys_t misaligned");
355 _Static_assert(sizeof (l2arc_log_blk_phys_t) >= SPA_MINBLOCKSIZE,
356 	"l2arc_log_blk_phys_t too small");
357 _Static_assert(sizeof (l2arc_log_blk_phys_t) <= SPA_MAXBLOCKSIZE,
358 	"l2arc_log_blk_phys_t too big");
359 
360 /*
361  * These structures hold in-flight abd buffers for log blocks as they're being
362  * written to the L2ARC device.
363  */
364 typedef struct l2arc_lb_abd_buf {
365 	abd_t		*abd;
366 	list_node_t	node;
367 } l2arc_lb_abd_buf_t;
368 
369 /*
370  * These structures hold pointers to log blocks present on the L2ARC device.
371  */
372 typedef struct l2arc_lb_ptr_buf {
373 	l2arc_log_blkptr_t	*lb_ptr;
374 	list_node_t		node;
375 } l2arc_lb_ptr_buf_t;
376 
377 /* Macros for setting fields in le_prop and lbp_prop */
378 #define	L2BLK_GET_LSIZE(field)	\
379 	BF64_GET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1)
380 #define	L2BLK_SET_LSIZE(field, x)	\
381 	BF64_SET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
382 #define	L2BLK_GET_PSIZE(field)	\
383 	BF64_GET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1)
384 #define	L2BLK_SET_PSIZE(field, x)	\
385 	BF64_SET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
386 #define	L2BLK_GET_COMPRESS(field)	\
387 	BF64_GET((field), 32, SPA_COMPRESSBITS)
388 #define	L2BLK_SET_COMPRESS(field, x)	\
389 	BF64_SET((field), 32, SPA_COMPRESSBITS, x)
390 #define	L2BLK_GET_PREFETCH(field)	BF64_GET((field), 39, 1)
391 #define	L2BLK_SET_PREFETCH(field, x)	BF64_SET((field), 39, 1, x)
392 #define	L2BLK_GET_CHECKSUM(field)	BF64_GET((field), 40, 8)
393 #define	L2BLK_SET_CHECKSUM(field, x)	BF64_SET((field), 40, 8, x)
394 /* +/- 1 here are to keep compatibility after ARC_BUFC_INVALID removal. */
395 #define	L2BLK_GET_TYPE(field)		(BF64_GET((field), 48, 8) - 1)
396 #define	L2BLK_SET_TYPE(field, x)	BF64_SET((field), 48, 8, (x) + 1)
397 #define	L2BLK_GET_PROTECTED(field)	BF64_GET((field), 56, 1)
398 #define	L2BLK_SET_PROTECTED(field, x)	BF64_SET((field), 56, 1, x)
399 #define	L2BLK_GET_STATE(field)		BF64_GET((field), 57, 4)
400 #define	L2BLK_SET_STATE(field, x)	BF64_SET((field), 57, 4, x)
401 
402 #define	PTR_SWAP(x, y)		\
403 	do {			\
404 		void *tmp = (x);\
405 		x = y;		\
406 		y = tmp;	\
407 	} while (0)
408 
409 #define	L2ARC_DEV_HDR_MAGIC	0x5a46534341434845LLU	/* ASCII: "ZFSCACHE" */
410 #define	L2ARC_LOG_BLK_MAGIC	0x4c4f47424c4b4844LLU	/* ASCII: "LOGBLKHD" */
411 
412 /*
413  * L2ARC Internals
414  */
415 typedef struct l2arc_dev {
416 	vdev_t			*l2ad_vdev;	/* can be NULL during remove */
417 	spa_t			*l2ad_spa;	/* can be NULL during remove */
418 	uint64_t		l2ad_hand;	/* next write location */
419 	uint64_t		l2ad_start;	/* first addr on device */
420 	uint64_t		l2ad_end;	/* last addr on device */
421 	boolean_t		l2ad_first;	/* first sweep through */
422 	boolean_t		l2ad_writing;	/* currently writing */
423 	kmutex_t		l2ad_mtx;	/* lock for buffer list */
424 	list_t			l2ad_buflist;	/* buffer list */
425 	list_node_t		l2ad_node;	/* device list node */
426 	zfs_refcount_t		l2ad_alloc;	/* allocated bytes */
427 	/*
428 	 * Persistence-related stuff
429 	 */
430 	l2arc_dev_hdr_phys_t	*l2ad_dev_hdr;	/* persistent device header */
431 	uint64_t		l2ad_dev_hdr_asize; /* aligned hdr size */
432 	l2arc_log_blk_phys_t	l2ad_log_blk;	/* currently open log block */
433 	int			l2ad_log_ent_idx; /* index into cur log blk */
434 	/* Number of bytes in current log block's payload */
435 	uint64_t		l2ad_log_blk_payload_asize;
436 	/*
437 	 * Offset (in bytes) of the first buffer in current log block's
438 	 * payload.
439 	 */
440 	uint64_t		l2ad_log_blk_payload_start;
441 	/* Flag indicating whether a rebuild is scheduled or is going on */
442 	boolean_t		l2ad_rebuild;
443 	boolean_t		l2ad_rebuild_cancel;
444 	boolean_t		l2ad_rebuild_began;
445 	uint64_t		l2ad_log_entries;   /* entries per log blk  */
446 	uint64_t		l2ad_evict;	 /* evicted offset in bytes */
447 	/* List of pointers to log blocks present in the L2ARC device */
448 	list_t			l2ad_lbptr_list;
449 	/*
450 	 * Aligned size of all log blocks as accounted by vdev_space_update().
451 	 */
452 	zfs_refcount_t		l2ad_lb_asize;
453 	/*
454 	 * Number of log blocks present on the device.
455 	 */
456 	zfs_refcount_t		l2ad_lb_count;
457 	boolean_t		l2ad_trim_all; /* TRIM whole device */
458 	/*
459 	 * DWPD tracking with daily reset
460 	 */
461 	uint64_t		l2ad_dwpd_writes;	/* 24h bytes written */
462 	uint64_t		l2ad_dwpd_start;	/* 24h period start */
463 	uint64_t		l2ad_dwpd_accumulated;	/* Accumulated */
464 	uint64_t		l2ad_dwpd_bump;		/* Reset trigger */
465 	/*
466 	 * Per-device feed thread for parallel L2ARC writes
467 	 */
468 	kthread_t		*l2ad_feed_thread;	/* feed thread handle */
469 	boolean_t		l2ad_thread_exit;	/* signal thread exit */
470 	kmutex_t		l2ad_feed_thr_lock;	/* thread sleep/wake */
471 	kcondvar_t		l2ad_feed_cv;		/* thread wakeup cv */
472 	/*
473 	 * Consecutive cycles where metadata filled write budget
474 	 * while data passes got nothing written. Used to detect
475 	 * monopolization and skip metadata to give data a chance.
476 	 */
477 	uint64_t		l2ad_meta_cycles;
478 } l2arc_dev_t;
479 
480 /*
481  * Encrypted blocks will need to be stored encrypted on the L2ARC
482  * disk as they appear in the main pool. In order for this to work we
483  * need to pass around the encryption parameters so they can be used
484  * to write data to the L2ARC. This struct is only defined in the
485  * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED
486  * flag set.
487  */
488 typedef struct arc_buf_hdr_crypt {
489 	abd_t			*b_rabd;	/* raw encrypted data */
490 
491 	/* dsobj for looking up encryption key for l2arc encryption */
492 	uint64_t		b_dsobj;
493 
494 	dmu_object_type_t	b_ot;		/* object type */
495 
496 	/* encryption parameters */
497 	uint8_t			b_salt[ZIO_DATA_SALT_LEN];
498 	uint8_t			b_iv[ZIO_DATA_IV_LEN];
499 
500 	/*
501 	 * Technically this could be removed since we will always be able to
502 	 * get the mac from the bp when we need it. However, it is inconvenient
503 	 * for callers of arc code to have to pass a bp in all the time. This
504 	 * also allows us to assert that L2ARC data is properly encrypted to
505 	 * match the data in the main storage pool.
506 	 */
507 	uint8_t			b_mac[ZIO_DATA_MAC_LEN];
508 } arc_buf_hdr_crypt_t;
509 
510 typedef struct l2arc_buf_hdr {
511 	/* protected by arc_buf_hdr mutex */
512 	l2arc_dev_t		*b_dev;		/* L2ARC device */
513 	uint64_t		b_daddr;	/* disk address, offset byte */
514 	uint32_t		b_hits;
515 	arc_state_type_t	b_arcs_state;
516 	list_node_t		b_l2node;
517 } l2arc_buf_hdr_t;
518 
519 typedef struct l2arc_write_callback {
520 	l2arc_dev_t	*l2wcb_dev;		/* device info */
521 	arc_buf_hdr_t	*l2wcb_head;		/* head of write buflist */
522 	/* in-flight list of log blocks */
523 	list_t		l2wcb_abd_list;
524 } l2arc_write_callback_t;
525 
526 struct arc_buf_hdr {
527 	/* protected by hash lock */
528 	dva_t			b_dva;
529 	uint64_t		b_birth;
530 
531 	arc_buf_contents_t	b_type;
532 	uint8_t			b_complevel;
533 	uint8_t			b_reserved1;	/* used for 4 byte alignment */
534 	uint16_t		b_l2size;	/* alignment or L2-only size */
535 	arc_buf_hdr_t		*b_hash_next;
536 	arc_flags_t		b_flags;
537 
538 	/*
539 	 * This field stores the size of the data buffer after
540 	 * compression, and is set in the arc's zio completion handlers.
541 	 * It is in units of SPA_MINBLOCKSIZE (e.g. 1 == 512 bytes).
542 	 *
543 	 * While the block pointers can store up to 32MB in their psize
544 	 * field, we can only store up to 32MB minus 512B. This is due
545 	 * to the bp using a bias of 1, whereas we use a bias of 0 (i.e.
546 	 * a field of zeros represents 512B in the bp). We can't use a
547 	 * bias of 1 since we need to reserve a psize of zero, here, to
548 	 * represent holes and embedded blocks.
549 	 *
550 	 * This isn't a problem in practice, since the maximum size of a
551 	 * buffer is limited to 16MB, so we never need to store 32MB in
552 	 * this field. Even in the upstream illumos code base, the
553 	 * maximum size of a buffer is limited to 16MB.
554 	 */
555 	uint16_t		b_psize;
556 
557 	/*
558 	 * This field stores the size of the data buffer before
559 	 * compression, and cannot change once set. It is in units
560 	 * of SPA_MINBLOCKSIZE (e.g. 2 == 1024 bytes)
561 	 */
562 	uint16_t		b_lsize;	/* immutable */
563 	uint64_t		b_spa;		/* immutable */
564 
565 	/* L2ARC fields. Undefined when not in L2ARC. */
566 	l2arc_buf_hdr_t		b_l2hdr;
567 	/* L1ARC fields. Undefined when in l2arc_only state */
568 	l1arc_buf_hdr_t		b_l1hdr;
569 	/*
570 	 * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED
571 	 * is set and the L1 header exists.
572 	 */
573 	arc_buf_hdr_crypt_t b_crypt_hdr;
574 };
575 
576 typedef struct arc_stats {
577 	/* Number of requests that were satisfied without I/O. */
578 	kstat_named_t arcstat_hits;
579 	/* Number of requests for which I/O was already running. */
580 	kstat_named_t arcstat_iohits;
581 	/* Number of requests for which I/O has to be issued. */
582 	kstat_named_t arcstat_misses;
583 	/* Same three, but specifically for demand data. */
584 	kstat_named_t arcstat_demand_data_hits;
585 	kstat_named_t arcstat_demand_data_iohits;
586 	kstat_named_t arcstat_demand_data_misses;
587 	/* Same three, but specifically for demand metadata. */
588 	kstat_named_t arcstat_demand_metadata_hits;
589 	kstat_named_t arcstat_demand_metadata_iohits;
590 	kstat_named_t arcstat_demand_metadata_misses;
591 	/* Same three, but specifically for prefetch data. */
592 	kstat_named_t arcstat_prefetch_data_hits;
593 	kstat_named_t arcstat_prefetch_data_iohits;
594 	kstat_named_t arcstat_prefetch_data_misses;
595 	/* Same three, but specifically for prefetch metadata. */
596 	kstat_named_t arcstat_prefetch_metadata_hits;
597 	kstat_named_t arcstat_prefetch_metadata_iohits;
598 	kstat_named_t arcstat_prefetch_metadata_misses;
599 	kstat_named_t arcstat_mru_hits;
600 	kstat_named_t arcstat_mru_ghost_hits;
601 	kstat_named_t arcstat_mfu_hits;
602 	kstat_named_t arcstat_mfu_ghost_hits;
603 	kstat_named_t arcstat_uncached_hits;
604 	kstat_named_t arcstat_deleted;
605 	/*
606 	 * Number of buffers that could not be evicted because the hash lock
607 	 * was held by another thread.  The lock may not necessarily be held
608 	 * by something using the same buffer, since hash locks are shared
609 	 * by multiple buffers.
610 	 */
611 	kstat_named_t arcstat_mutex_miss;
612 	/*
613 	 * Number of buffers skipped when updating the access state due to the
614 	 * header having already been released after acquiring the hash lock.
615 	 */
616 	kstat_named_t arcstat_access_skip;
617 	/*
618 	 * Number of buffers skipped because they have I/O in progress, are
619 	 * indirect prefetch buffers that have not lived long enough, or are
620 	 * not from the spa we're trying to evict from.
621 	 */
622 	kstat_named_t arcstat_evict_skip;
623 	/*
624 	 * Number of times arc_evict_state() was unable to evict enough
625 	 * buffers to reach its target amount.
626 	 */
627 	kstat_named_t arcstat_evict_not_enough;
628 	kstat_named_t arcstat_evict_l2_cached;
629 	kstat_named_t arcstat_evict_l2_eligible;
630 	kstat_named_t arcstat_evict_l2_eligible_mfu;
631 	kstat_named_t arcstat_evict_l2_eligible_mru;
632 	kstat_named_t arcstat_evict_l2_ineligible;
633 	kstat_named_t arcstat_evict_l2_skip;
634 	kstat_named_t arcstat_hash_elements;
635 	kstat_named_t arcstat_hash_elements_max;
636 	kstat_named_t arcstat_hash_collisions;
637 	kstat_named_t arcstat_hash_chains;
638 	kstat_named_t arcstat_hash_chain_max;
639 	kstat_named_t arcstat_meta;
640 	kstat_named_t arcstat_pd;
641 	kstat_named_t arcstat_pm;
642 	kstat_named_t arcstat_c;
643 	kstat_named_t arcstat_c_min;
644 	kstat_named_t arcstat_c_max;
645 	kstat_named_t arcstat_size;
646 	/*
647 	 * Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd.
648 	 * Note that the compressed bytes may match the uncompressed bytes
649 	 * if the block is either not compressed or compressed arc is disabled.
650 	 */
651 	kstat_named_t arcstat_compressed_size;
652 	/*
653 	 * Uncompressed size of the data stored in b_pabd. If compressed
654 	 * arc is disabled then this value will be identical to the stat
655 	 * above.
656 	 */
657 	kstat_named_t arcstat_uncompressed_size;
658 	/*
659 	 * Number of bytes stored in all the arc_buf_t's. This is classified
660 	 * as "overhead" since this data is typically short-lived and will
661 	 * be evicted from the arc when it becomes unreferenced unless the
662 	 * zfs_keep_uncompressed_metadata or zfs_keep_uncompressed_level
663 	 * values have been set (see comment in dbuf.c for more information).
664 	 */
665 	kstat_named_t arcstat_overhead_size;
666 	/*
667 	 * Number of bytes consumed by internal ARC structures necessary
668 	 * for tracking purposes; these structures are not actually
669 	 * backed by ARC buffers. This includes arc_buf_hdr_t structures
670 	 * (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only
671 	 * caches), and arc_buf_t structures (allocated via arc_buf_t
672 	 * cache).
673 	 */
674 	kstat_named_t arcstat_hdr_size;
675 	/*
676 	 * Number of bytes consumed by ARC buffers of type equal to
677 	 * ARC_BUFC_DATA. This is generally consumed by buffers backing
678 	 * on disk user data (e.g. plain file contents).
679 	 */
680 	kstat_named_t arcstat_data_size;
681 	/*
682 	 * Number of bytes consumed by ARC buffers of type equal to
683 	 * ARC_BUFC_METADATA. This is generally consumed by buffers
684 	 * backing on disk data that is used for internal ZFS
685 	 * structures (e.g. ZAP, dnode, indirect blocks, etc).
686 	 */
687 	kstat_named_t arcstat_metadata_size;
688 	/*
689 	 * Number of bytes consumed by dmu_buf_impl_t objects.
690 	 */
691 	kstat_named_t arcstat_dbuf_size;
692 	/*
693 	 * Number of bytes consumed by dnode_t objects.
694 	 */
695 	kstat_named_t arcstat_dnode_size;
696 	/*
697 	 * Number of bytes consumed by bonus buffers.
698 	 */
699 	kstat_named_t arcstat_bonus_size;
700 #if defined(COMPAT_FREEBSD11)
701 	/*
702 	 * Sum of the previous three counters, provided for compatibility.
703 	 */
704 	kstat_named_t arcstat_other_size;
705 #endif
706 
707 	/*
708 	 * Total number of bytes consumed by ARC buffers residing in the
709 	 * arc_anon state. This includes *all* buffers in the arc_anon
710 	 * state; e.g. data, metadata, evictable, and unevictable buffers
711 	 * are all included in this value.
712 	 */
713 	kstat_named_t arcstat_anon_size;
714 	kstat_named_t arcstat_anon_data;
715 	kstat_named_t arcstat_anon_metadata;
716 	/*
717 	 * Number of bytes consumed by ARC buffers that meet the
718 	 * following criteria: backing buffers of type ARC_BUFC_DATA,
719 	 * residing in the arc_anon state, and are eligible for eviction
720 	 * (e.g. have no outstanding holds on the buffer).
721 	 */
722 	kstat_named_t arcstat_anon_evictable_data;
723 	/*
724 	 * Number of bytes consumed by ARC buffers that meet the
725 	 * following criteria: backing buffers of type ARC_BUFC_METADATA,
726 	 * residing in the arc_anon state, and are eligible for eviction
727 	 * (e.g. have no outstanding holds on the buffer).
728 	 */
729 	kstat_named_t arcstat_anon_evictable_metadata;
730 	/*
731 	 * Total number of bytes consumed by ARC buffers residing in the
732 	 * arc_mru state. This includes *all* buffers in the arc_mru
733 	 * state; e.g. data, metadata, evictable, and unevictable buffers
734 	 * are all included in this value.
735 	 */
736 	kstat_named_t arcstat_mru_size;
737 	kstat_named_t arcstat_mru_data;
738 	kstat_named_t arcstat_mru_metadata;
739 	/*
740 	 * Number of bytes consumed by ARC buffers that meet the
741 	 * following criteria: backing buffers of type ARC_BUFC_DATA,
742 	 * residing in the arc_mru state, and are eligible for eviction
743 	 * (e.g. have no outstanding holds on the buffer).
744 	 */
745 	kstat_named_t arcstat_mru_evictable_data;
746 	/*
747 	 * Number of bytes consumed by ARC buffers that meet the
748 	 * following criteria: backing buffers of type ARC_BUFC_METADATA,
749 	 * residing in the arc_mru state, and are eligible for eviction
750 	 * (e.g. have no outstanding holds on the buffer).
751 	 */
752 	kstat_named_t arcstat_mru_evictable_metadata;
753 	/*
754 	 * Total number of bytes that *would have been* consumed by ARC
755 	 * buffers in the arc_mru_ghost state. The key thing to note
756 	 * here, is the fact that this size doesn't actually indicate
757 	 * RAM consumption. The ghost lists only consist of headers and
758 	 * don't actually have ARC buffers linked off of these headers.
759 	 * Thus, *if* the headers had associated ARC buffers, these
760 	 * buffers *would have* consumed this number of bytes.
761 	 */
762 	kstat_named_t arcstat_mru_ghost_size;
763 	kstat_named_t arcstat_mru_ghost_data;
764 	kstat_named_t arcstat_mru_ghost_metadata;
765 	/*
766 	 * Number of bytes that *would have been* consumed by ARC
767 	 * buffers that are eligible for eviction, of type
768 	 * ARC_BUFC_DATA, and linked off the arc_mru_ghost state.
769 	 */
770 	kstat_named_t arcstat_mru_ghost_evictable_data;
771 	/*
772 	 * Number of bytes that *would have been* consumed by ARC
773 	 * buffers that are eligible for eviction, of type
774 	 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
775 	 */
776 	kstat_named_t arcstat_mru_ghost_evictable_metadata;
777 	/*
778 	 * Total number of bytes consumed by ARC buffers residing in the
779 	 * arc_mfu state. This includes *all* buffers in the arc_mfu
780 	 * state; e.g. data, metadata, evictable, and unevictable buffers
781 	 * are all included in this value.
782 	 */
783 	kstat_named_t arcstat_mfu_size;
784 	kstat_named_t arcstat_mfu_data;
785 	kstat_named_t arcstat_mfu_metadata;
786 	/*
787 	 * Number of bytes consumed by ARC buffers that are eligible for
788 	 * eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu
789 	 * state.
790 	 */
791 	kstat_named_t arcstat_mfu_evictable_data;
792 	/*
793 	 * Number of bytes consumed by ARC buffers that are eligible for
794 	 * eviction, of type ARC_BUFC_METADATA, and reside in the
795 	 * arc_mfu state.
796 	 */
797 	kstat_named_t arcstat_mfu_evictable_metadata;
798 	/*
799 	 * Total number of bytes that *would have been* consumed by ARC
800 	 * buffers in the arc_mfu_ghost state. See the comment above
801 	 * arcstat_mru_ghost_size for more details.
802 	 */
803 	kstat_named_t arcstat_mfu_ghost_size;
804 	kstat_named_t arcstat_mfu_ghost_data;
805 	kstat_named_t arcstat_mfu_ghost_metadata;
806 	/*
807 	 * Number of bytes that *would have been* consumed by ARC
808 	 * buffers that are eligible for eviction, of type
809 	 * ARC_BUFC_DATA, and linked off the arc_mfu_ghost state.
810 	 */
811 	kstat_named_t arcstat_mfu_ghost_evictable_data;
812 	/*
813 	 * Number of bytes that *would have been* consumed by ARC
814 	 * buffers that are eligible for eviction, of type
815 	 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
816 	 */
817 	kstat_named_t arcstat_mfu_ghost_evictable_metadata;
818 	/*
819 	 * Total number of bytes that are going to be evicted from ARC due to
820 	 * ARC_FLAG_UNCACHED being set.
821 	 */
822 	kstat_named_t arcstat_uncached_size;
823 	kstat_named_t arcstat_uncached_data;
824 	kstat_named_t arcstat_uncached_metadata;
825 	/*
826 	 * Number of data bytes that are going to be evicted from ARC due to
827 	 * ARC_FLAG_UNCACHED being set.
828 	 */
829 	kstat_named_t arcstat_uncached_evictable_data;
830 	/*
831 	 * Number of metadata bytes that that are going to be evicted from ARC
832 	 * due to ARC_FLAG_UNCACHED being set.
833 	 */
834 	kstat_named_t arcstat_uncached_evictable_metadata;
835 	kstat_named_t arcstat_l2_hits;
836 	kstat_named_t arcstat_l2_misses;
837 	/*
838 	 * Allocated size (in bytes) of L2ARC cached buffers by ARC state.
839 	 */
840 	kstat_named_t arcstat_l2_prefetch_asize;
841 	kstat_named_t arcstat_l2_mru_asize;
842 	kstat_named_t arcstat_l2_mfu_asize;
843 	/*
844 	 * Allocated size (in bytes) of L2ARC cached buffers by buffer content
845 	 * type.
846 	 */
847 	kstat_named_t arcstat_l2_bufc_data_asize;
848 	kstat_named_t arcstat_l2_bufc_metadata_asize;
849 	kstat_named_t arcstat_l2_feeds;
850 	kstat_named_t arcstat_l2_rw_clash;
851 	kstat_named_t arcstat_l2_read_bytes;
852 	kstat_named_t arcstat_l2_write_bytes;
853 	kstat_named_t arcstat_l2_writes_sent;
854 	kstat_named_t arcstat_l2_writes_done;
855 	kstat_named_t arcstat_l2_writes_error;
856 	kstat_named_t arcstat_l2_writes_lock_retry;
857 	kstat_named_t arcstat_l2_evict_lock_retry;
858 	kstat_named_t arcstat_l2_evict_reading;
859 	kstat_named_t arcstat_l2_evict_l1cached;
860 	kstat_named_t arcstat_l2_free_on_write;
861 	kstat_named_t arcstat_l2_abort_lowmem;
862 	kstat_named_t arcstat_l2_cksum_bad;
863 	kstat_named_t arcstat_l2_io_error;
864 	kstat_named_t arcstat_l2_lsize;
865 	kstat_named_t arcstat_l2_psize;
866 	kstat_named_t arcstat_l2_hdr_size;
867 	/*
868 	 * Number of L2ARC log blocks written. These are used for restoring the
869 	 * L2ARC. Updated during writing of L2ARC log blocks.
870 	 */
871 	kstat_named_t arcstat_l2_log_blk_writes;
872 	/*
873 	 * Moving average of the aligned size of the L2ARC log blocks, in
874 	 * bytes. Updated during L2ARC rebuild and during writing of L2ARC
875 	 * log blocks.
876 	 */
877 	kstat_named_t arcstat_l2_log_blk_avg_asize;
878 	/* Aligned size of L2ARC log blocks on L2ARC devices. */
879 	kstat_named_t arcstat_l2_log_blk_asize;
880 	/* Number of L2ARC log blocks present on L2ARC devices. */
881 	kstat_named_t arcstat_l2_log_blk_count;
882 	/*
883 	 * Moving average of the aligned size of L2ARC restored data, in bytes,
884 	 * to the aligned size of their metadata in L2ARC, in bytes.
885 	 * Updated during L2ARC rebuild and during writing of L2ARC log blocks.
886 	 */
887 	kstat_named_t arcstat_l2_data_to_meta_ratio;
888 	/*
889 	 * Number of times the L2ARC rebuild was successful for an L2ARC device.
890 	 */
891 	kstat_named_t arcstat_l2_rebuild_success;
892 	/*
893 	 * Number of times the L2ARC rebuild failed because the device header
894 	 * was in an unsupported format or corrupted.
895 	 */
896 	kstat_named_t arcstat_l2_rebuild_abort_unsupported;
897 	/*
898 	 * Number of times the L2ARC rebuild failed because of IO errors
899 	 * while reading a log block.
900 	 */
901 	kstat_named_t arcstat_l2_rebuild_abort_io_errors;
902 	/*
903 	 * Number of times the L2ARC rebuild failed because of IO errors when
904 	 * reading the device header.
905 	 */
906 	kstat_named_t arcstat_l2_rebuild_abort_dh_errors;
907 	/*
908 	 * Number of L2ARC log blocks which failed to be restored due to
909 	 * checksum errors.
910 	 */
911 	kstat_named_t arcstat_l2_rebuild_abort_cksum_lb_errors;
912 	/*
913 	 * Number of times the L2ARC rebuild was aborted due to low system
914 	 * memory.
915 	 */
916 	kstat_named_t arcstat_l2_rebuild_abort_lowmem;
917 	/* Logical size of L2ARC restored data, in bytes. */
918 	kstat_named_t arcstat_l2_rebuild_size;
919 	/* Aligned size of L2ARC restored data, in bytes. */
920 	kstat_named_t arcstat_l2_rebuild_asize;
921 	/*
922 	 * Number of L2ARC log entries (buffers) that were successfully
923 	 * restored in ARC.
924 	 */
925 	kstat_named_t arcstat_l2_rebuild_bufs;
926 	/*
927 	 * Number of L2ARC log entries (buffers) already cached in ARC. These
928 	 * were not restored again.
929 	 */
930 	kstat_named_t arcstat_l2_rebuild_bufs_precached;
931 	/*
932 	 * Number of L2ARC log blocks that were restored successfully. Each
933 	 * log block may hold up to L2ARC_LOG_BLK_MAX_ENTRIES buffers.
934 	 */
935 	kstat_named_t arcstat_l2_rebuild_log_blks;
936 	kstat_named_t arcstat_memory_throttle_count;
937 	kstat_named_t arcstat_memory_direct_count;
938 	kstat_named_t arcstat_memory_indirect_count;
939 	kstat_named_t arcstat_memory_all_bytes;
940 	kstat_named_t arcstat_memory_free_bytes;
941 	kstat_named_t arcstat_memory_available_bytes;
942 	kstat_named_t arcstat_no_grow;
943 	kstat_named_t arcstat_tempreserve;
944 	kstat_named_t arcstat_loaned_bytes;
945 	kstat_named_t arcstat_prune;
946 	kstat_named_t arcstat_meta_used;
947 	kstat_named_t arcstat_dnode_limit;
948 	kstat_named_t arcstat_async_upgrade_sync;
949 	/* Number of predictive prefetch requests. */
950 	kstat_named_t arcstat_predictive_prefetch;
951 	/* Number of requests for which predictive prefetch has completed. */
952 	kstat_named_t arcstat_demand_hit_predictive_prefetch;
953 	/* Number of requests for which predictive prefetch was running. */
954 	kstat_named_t arcstat_demand_iohit_predictive_prefetch;
955 	/* Number of prescient prefetch requests. */
956 	kstat_named_t arcstat_prescient_prefetch;
957 	/* Number of requests for which prescient prefetch has completed. */
958 	kstat_named_t arcstat_demand_hit_prescient_prefetch;
959 	/* Number of requests for which prescient prefetch was running. */
960 	kstat_named_t arcstat_demand_iohit_prescient_prefetch;
961 	kstat_named_t arcstat_need_free;
962 	kstat_named_t arcstat_sys_free;
963 	kstat_named_t arcstat_raw_size;
964 	kstat_named_t arcstat_cached_only_in_progress;
965 	kstat_named_t arcstat_abd_chunk_waste_size;
966 } arc_stats_t;
967 
968 typedef struct arc_sums {
969 	wmsum_t arcstat_hits;
970 	wmsum_t arcstat_iohits;
971 	wmsum_t arcstat_misses;
972 	wmsum_t arcstat_demand_data_hits;
973 	wmsum_t arcstat_demand_data_iohits;
974 	wmsum_t arcstat_demand_data_misses;
975 	wmsum_t arcstat_demand_metadata_hits;
976 	wmsum_t arcstat_demand_metadata_iohits;
977 	wmsum_t arcstat_demand_metadata_misses;
978 	wmsum_t arcstat_prefetch_data_hits;
979 	wmsum_t arcstat_prefetch_data_iohits;
980 	wmsum_t arcstat_prefetch_data_misses;
981 	wmsum_t arcstat_prefetch_metadata_hits;
982 	wmsum_t arcstat_prefetch_metadata_iohits;
983 	wmsum_t arcstat_prefetch_metadata_misses;
984 	wmsum_t arcstat_mru_hits;
985 	wmsum_t arcstat_mru_ghost_hits;
986 	wmsum_t arcstat_mfu_hits;
987 	wmsum_t arcstat_mfu_ghost_hits;
988 	wmsum_t arcstat_uncached_hits;
989 	wmsum_t arcstat_deleted;
990 	wmsum_t arcstat_mutex_miss;
991 	wmsum_t arcstat_access_skip;
992 	wmsum_t arcstat_evict_skip;
993 	wmsum_t arcstat_evict_not_enough;
994 	wmsum_t arcstat_evict_l2_cached;
995 	wmsum_t arcstat_evict_l2_eligible;
996 	wmsum_t arcstat_evict_l2_eligible_mfu;
997 	wmsum_t arcstat_evict_l2_eligible_mru;
998 	wmsum_t arcstat_evict_l2_ineligible;
999 	wmsum_t arcstat_evict_l2_skip;
1000 	wmsum_t arcstat_hash_elements;
1001 	wmsum_t arcstat_hash_collisions;
1002 	wmsum_t arcstat_hash_chains;
1003 	aggsum_t arcstat_size;
1004 	wmsum_t arcstat_compressed_size;
1005 	wmsum_t arcstat_uncompressed_size;
1006 	wmsum_t arcstat_overhead_size;
1007 	wmsum_t arcstat_hdr_size;
1008 	wmsum_t arcstat_data_size;
1009 	wmsum_t arcstat_metadata_size;
1010 	wmsum_t arcstat_dbuf_size;
1011 	aggsum_t arcstat_dnode_size;
1012 	wmsum_t arcstat_bonus_size;
1013 	wmsum_t arcstat_l2_hits;
1014 	wmsum_t arcstat_l2_misses;
1015 	wmsum_t arcstat_l2_prefetch_asize;
1016 	wmsum_t arcstat_l2_mru_asize;
1017 	wmsum_t arcstat_l2_mfu_asize;
1018 	wmsum_t arcstat_l2_bufc_data_asize;
1019 	wmsum_t arcstat_l2_bufc_metadata_asize;
1020 	wmsum_t arcstat_l2_feeds;
1021 	wmsum_t arcstat_l2_rw_clash;
1022 	wmsum_t arcstat_l2_read_bytes;
1023 	wmsum_t arcstat_l2_write_bytes;
1024 	wmsum_t arcstat_l2_writes_sent;
1025 	wmsum_t arcstat_l2_writes_done;
1026 	wmsum_t arcstat_l2_writes_error;
1027 	wmsum_t arcstat_l2_writes_lock_retry;
1028 	wmsum_t arcstat_l2_evict_lock_retry;
1029 	wmsum_t arcstat_l2_evict_reading;
1030 	wmsum_t arcstat_l2_evict_l1cached;
1031 	wmsum_t arcstat_l2_free_on_write;
1032 	wmsum_t arcstat_l2_abort_lowmem;
1033 	wmsum_t arcstat_l2_cksum_bad;
1034 	wmsum_t arcstat_l2_io_error;
1035 	wmsum_t arcstat_l2_lsize;
1036 	wmsum_t arcstat_l2_psize;
1037 	aggsum_t arcstat_l2_hdr_size;
1038 	wmsum_t arcstat_l2_log_blk_writes;
1039 	wmsum_t arcstat_l2_log_blk_asize;
1040 	wmsum_t arcstat_l2_log_blk_count;
1041 	wmsum_t arcstat_l2_rebuild_success;
1042 	wmsum_t arcstat_l2_rebuild_abort_unsupported;
1043 	wmsum_t arcstat_l2_rebuild_abort_io_errors;
1044 	wmsum_t arcstat_l2_rebuild_abort_dh_errors;
1045 	wmsum_t arcstat_l2_rebuild_abort_cksum_lb_errors;
1046 	wmsum_t arcstat_l2_rebuild_abort_lowmem;
1047 	wmsum_t arcstat_l2_rebuild_size;
1048 	wmsum_t arcstat_l2_rebuild_asize;
1049 	wmsum_t arcstat_l2_rebuild_bufs;
1050 	wmsum_t arcstat_l2_rebuild_bufs_precached;
1051 	wmsum_t arcstat_l2_rebuild_log_blks;
1052 	wmsum_t arcstat_memory_throttle_count;
1053 	wmsum_t arcstat_memory_direct_count;
1054 	wmsum_t arcstat_memory_indirect_count;
1055 	wmsum_t arcstat_prune;
1056 	wmsum_t arcstat_meta_used;
1057 	wmsum_t arcstat_async_upgrade_sync;
1058 	wmsum_t arcstat_predictive_prefetch;
1059 	wmsum_t arcstat_demand_hit_predictive_prefetch;
1060 	wmsum_t arcstat_demand_iohit_predictive_prefetch;
1061 	wmsum_t arcstat_prescient_prefetch;
1062 	wmsum_t arcstat_demand_hit_prescient_prefetch;
1063 	wmsum_t arcstat_demand_iohit_prescient_prefetch;
1064 	wmsum_t arcstat_raw_size;
1065 	wmsum_t arcstat_cached_only_in_progress;
1066 	wmsum_t arcstat_abd_chunk_waste_size;
1067 } arc_sums_t;
1068 
1069 typedef struct arc_evict_waiter {
1070 	list_node_t aew_node;
1071 	kcondvar_t aew_cv;
1072 	uint64_t aew_count;
1073 } arc_evict_waiter_t;
1074 
1075 #define	ARCSTAT(stat)	(arc_stats.stat.value.ui64)
1076 
1077 #define	ARCSTAT_INCR(stat, val) \
1078 	wmsum_add(&arc_sums.stat, (val))
1079 
1080 #define	ARCSTAT_BUMP(stat)	ARCSTAT_INCR(stat, 1)
1081 #define	ARCSTAT_BUMPDOWN(stat)	ARCSTAT_INCR(stat, -1)
1082 
1083 #define	arc_no_grow	ARCSTAT(arcstat_no_grow) /* do not grow cache size */
1084 #define	arc_meta	ARCSTAT(arcstat_meta)	/* target frac of metadata */
1085 #define	arc_pd		ARCSTAT(arcstat_pd)	/* target frac of data MRU */
1086 #define	arc_pm		ARCSTAT(arcstat_pm)	/* target frac of meta MRU */
1087 #define	arc_c		ARCSTAT(arcstat_c)	/* target size of cache */
1088 #define	arc_c_min	ARCSTAT(arcstat_c_min)	/* min target cache size */
1089 #define	arc_c_max	ARCSTAT(arcstat_c_max)	/* max target cache size */
1090 #define	arc_sys_free	ARCSTAT(arcstat_sys_free) /* target system free bytes */
1091 
1092 #define	arc_anon	(&ARC_anon)
1093 #define	arc_mru		(&ARC_mru)
1094 #define	arc_mru_ghost	(&ARC_mru_ghost)
1095 #define	arc_mfu		(&ARC_mfu)
1096 #define	arc_mfu_ghost	(&ARC_mfu_ghost)
1097 #define	arc_l2c_only	(&ARC_l2c_only)
1098 #define	arc_uncached	(&ARC_uncached)
1099 
1100 extern taskq_t *arc_prune_taskq;
1101 extern arc_stats_t arc_stats;
1102 extern arc_sums_t arc_sums;
1103 extern hrtime_t arc_growtime;
1104 extern boolean_t arc_warm;
1105 extern uint_t arc_grow_retry;
1106 extern uint_t arc_no_grow_shift;
1107 extern uint_t arc_shrink_shift;
1108 extern kmutex_t arc_prune_mtx;
1109 extern list_t arc_prune_list;
1110 extern arc_state_t	ARC_mfu;
1111 extern arc_state_t	ARC_mru;
1112 extern uint_t zfs_arc_pc_percent;
1113 extern uint_t arc_lotsfree_percent;
1114 extern uint64_t zfs_arc_min;
1115 extern uint64_t zfs_arc_max;
1116 extern uint64_t l2arc_dwpd_limit;
1117 
1118 extern uint64_t arc_reduce_target_size(uint64_t to_free);
1119 extern boolean_t arc_reclaim_needed(void);
1120 extern void arc_kmem_reap_soon(void);
1121 extern void arc_wait_for_eviction(uint64_t, boolean_t, boolean_t);
1122 
1123 extern void arc_lowmem_init(void);
1124 extern void arc_lowmem_fini(void);
1125 extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg);
1126 extern uint64_t arc_free_memory(void);
1127 extern int64_t arc_available_memory(void);
1128 extern void arc_tuning_update(boolean_t);
1129 extern void arc_register_hotplug(void);
1130 extern void arc_unregister_hotplug(void);
1131 
1132 extern int param_set_arc_u64(ZFS_MODULE_PARAM_ARGS);
1133 extern int param_set_arc_int(ZFS_MODULE_PARAM_ARGS);
1134 extern int param_set_arc_min(ZFS_MODULE_PARAM_ARGS);
1135 extern int param_set_arc_max(ZFS_MODULE_PARAM_ARGS);
1136 extern int param_set_l2arc_dwpd_limit(ZFS_MODULE_PARAM_ARGS);
1137 extern void l2arc_dwpd_bump_reset(void);
1138 
1139 /* used in zdb.c */
1140 boolean_t l2arc_log_blkptr_valid(l2arc_dev_t *dev,
1141     const l2arc_log_blkptr_t *lbp);
1142 
1143 /* used in vdev_trim.c */
1144 void l2arc_dev_hdr_update(l2arc_dev_t *dev);
1145 l2arc_dev_t *l2arc_vdev_get(vdev_t *vd);
1146 
1147 #ifdef __cplusplus
1148 }
1149 #endif
1150 
1151 #endif /* _SYS_ARC_IMPL_H */
1152