xref: /linux/fs/btrfs/zstd.c (revision ab93e0dd72c37d378dd936f031ffb83ff2bd87ce)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2016-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  */
7 
8 #include <linux/bio.h>
9 #include <linux/bitmap.h>
10 #include <linux/err.h>
11 #include <linux/init.h>
12 #include <linux/kernel.h>
13 #include <linux/mm.h>
14 #include <linux/sched/mm.h>
15 #include <linux/pagemap.h>
16 #include <linux/refcount.h>
17 #include <linux/sched.h>
18 #include <linux/slab.h>
19 #include <linux/zstd.h>
20 #include "misc.h"
21 #include "fs.h"
22 #include "btrfs_inode.h"
23 #include "compression.h"
24 #include "super.h"
25 
26 #define ZSTD_BTRFS_MAX_WINDOWLOG 17
27 #define ZSTD_BTRFS_MAX_INPUT (1U << ZSTD_BTRFS_MAX_WINDOWLOG)
28 #define ZSTD_BTRFS_DEFAULT_LEVEL 3
29 #define ZSTD_BTRFS_MIN_LEVEL -15
30 #define ZSTD_BTRFS_MAX_LEVEL 15
31 /* 307s to avoid pathologically clashing with transaction commit */
32 #define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ)
33 
zstd_get_btrfs_parameters(int level,size_t src_len)34 static zstd_parameters zstd_get_btrfs_parameters(int level,
35 						 size_t src_len)
36 {
37 	zstd_parameters params = zstd_get_params(level, src_len);
38 
39 	if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
40 		params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
41 	WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT);
42 	return params;
43 }
44 
45 struct workspace {
46 	void *mem;
47 	size_t size;
48 	char *buf;
49 	int level;
50 	int req_level;
51 	unsigned long last_used; /* jiffies */
52 	struct list_head list;
53 	struct list_head lru_list;
54 	zstd_in_buffer in_buf;
55 	zstd_out_buffer out_buf;
56 	zstd_parameters params;
57 };
58 
59 /*
60  * Zstd Workspace Management
61  *
62  * Zstd workspaces have different memory requirements depending on the level.
63  * The zstd workspaces are managed by having individual lists for each level
64  * and a global lru.  Forward progress is maintained by protecting a max level
65  * workspace.
66  *
67  * Getting a workspace is done by using the bitmap to identify the levels that
68  * have available workspaces and scans up.  This lets us recycle higher level
69  * workspaces because of the monotonic memory guarantee.  A workspace's
70  * last_used is only updated if it is being used by the corresponding memory
71  * level.  Putting a workspace involves adding it back to the appropriate places
72  * and adding it back to the lru if necessary.
73  *
74  * A timer is used to reclaim workspaces if they have not been used for
75  * ZSTD_BTRFS_RECLAIM_JIFFIES.  This helps keep only active workspaces around.
76  * The upper bound is provided by the workqueue limit which is 2 (percpu limit).
77  */
78 
79 struct zstd_workspace_manager {
80 	const struct btrfs_compress_op *ops;
81 	spinlock_t lock;
82 	struct list_head lru_list;
83 	struct list_head idle_ws[ZSTD_BTRFS_MAX_LEVEL];
84 	unsigned long active_map;
85 	wait_queue_head_t wait;
86 	struct timer_list timer;
87 };
88 
89 static struct zstd_workspace_manager wsm;
90 
91 static size_t zstd_ws_mem_sizes[ZSTD_BTRFS_MAX_LEVEL];
92 
list_to_workspace(struct list_head * list)93 static inline struct workspace *list_to_workspace(struct list_head *list)
94 {
95 	return container_of(list, struct workspace, list);
96 }
97 
clip_level(int level)98 static inline int clip_level(int level)
99 {
100 	return max(0, level - 1);
101 }
102 
103 /*
104  * Timer callback to free unused workspaces.
105  *
106  * @t: timer
107  *
108  * This scans the lru_list and attempts to reclaim any workspace that hasn't
109  * been used for ZSTD_BTRFS_RECLAIM_JIFFIES.
110  *
111  * The context is softirq and does not need the _bh locking primitives.
112  */
zstd_reclaim_timer_fn(struct timer_list * timer)113 static void zstd_reclaim_timer_fn(struct timer_list *timer)
114 {
115 	unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
116 	struct list_head *pos, *next;
117 
118 	ASSERT(timer == &wsm.timer);
119 
120 	spin_lock(&wsm.lock);
121 
122 	if (list_empty(&wsm.lru_list)) {
123 		spin_unlock(&wsm.lock);
124 		return;
125 	}
126 
127 	list_for_each_prev_safe(pos, next, &wsm.lru_list) {
128 		struct workspace *victim = container_of(pos, struct workspace,
129 							lru_list);
130 		int level;
131 
132 		if (time_after(victim->last_used, reclaim_threshold))
133 			break;
134 
135 		/* workspace is in use */
136 		if (victim->req_level)
137 			continue;
138 
139 		level = victim->level;
140 		list_del(&victim->lru_list);
141 		list_del(&victim->list);
142 		zstd_free_workspace(&victim->list);
143 
144 		if (list_empty(&wsm.idle_ws[level]))
145 			clear_bit(level, &wsm.active_map);
146 
147 	}
148 
149 	if (!list_empty(&wsm.lru_list))
150 		mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
151 
152 	spin_unlock(&wsm.lock);
153 }
154 
155 /*
156  * Calculate monotonic memory bounds.
157  *
158  * It is possible based on the level configurations that a higher level
159  * workspace uses less memory than a lower level workspace.  In order to reuse
160  * workspaces, this must be made a monotonic relationship.  This precomputes
161  * the required memory for each level and enforces the monotonicity between
162  * level and memory required.
163  */
zstd_calc_ws_mem_sizes(void)164 static void zstd_calc_ws_mem_sizes(void)
165 {
166 	size_t max_size = 0;
167 	int level;
168 
169 	for (level = ZSTD_BTRFS_MIN_LEVEL; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
170 		if (level == 0)
171 			continue;
172 		zstd_parameters params =
173 			zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT);
174 		size_t level_size =
175 			max_t(size_t,
176 			      zstd_cstream_workspace_bound(&params.cParams),
177 			      zstd_dstream_workspace_bound(ZSTD_BTRFS_MAX_INPUT));
178 
179 		max_size = max_t(size_t, max_size, level_size);
180 		/* Use level 1 workspace size for all the fast mode negative levels. */
181 		zstd_ws_mem_sizes[clip_level(level)] = max_size;
182 	}
183 }
184 
zstd_init_workspace_manager(void)185 void zstd_init_workspace_manager(void)
186 {
187 	struct list_head *ws;
188 	int i;
189 
190 	zstd_calc_ws_mem_sizes();
191 
192 	wsm.ops = &btrfs_zstd_compress;
193 	spin_lock_init(&wsm.lock);
194 	init_waitqueue_head(&wsm.wait);
195 	timer_setup(&wsm.timer, zstd_reclaim_timer_fn, 0);
196 
197 	INIT_LIST_HEAD(&wsm.lru_list);
198 	for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
199 		INIT_LIST_HEAD(&wsm.idle_ws[i]);
200 
201 	ws = zstd_alloc_workspace(ZSTD_BTRFS_MAX_LEVEL);
202 	if (IS_ERR(ws)) {
203 		btrfs_warn(NULL, "cannot preallocate zstd compression workspace");
204 	} else {
205 		set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &wsm.active_map);
206 		list_add(ws, &wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]);
207 	}
208 }
209 
zstd_cleanup_workspace_manager(void)210 void zstd_cleanup_workspace_manager(void)
211 {
212 	struct workspace *workspace;
213 	int i;
214 
215 	spin_lock_bh(&wsm.lock);
216 	for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
217 		while (!list_empty(&wsm.idle_ws[i])) {
218 			workspace = container_of(wsm.idle_ws[i].next,
219 						 struct workspace, list);
220 			list_del(&workspace->list);
221 			list_del(&workspace->lru_list);
222 			zstd_free_workspace(&workspace->list);
223 		}
224 	}
225 	spin_unlock_bh(&wsm.lock);
226 
227 	timer_delete_sync(&wsm.timer);
228 }
229 
230 /*
231  * Find workspace for given level.
232  *
233  * @level: compression level
234  *
235  * This iterates over the set bits in the active_map beginning at the requested
236  * compression level.  This lets us utilize already allocated workspaces before
237  * allocating a new one.  If the workspace is of a larger size, it is used, but
238  * the place in the lru_list and last_used times are not updated.  This is to
239  * offer the opportunity to reclaim the workspace in favor of allocating an
240  * appropriately sized one in the future.
241  */
zstd_find_workspace(int level)242 static struct list_head *zstd_find_workspace(int level)
243 {
244 	struct list_head *ws;
245 	struct workspace *workspace;
246 	int i = clip_level(level);
247 
248 	spin_lock_bh(&wsm.lock);
249 	for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) {
250 		if (!list_empty(&wsm.idle_ws[i])) {
251 			ws = wsm.idle_ws[i].next;
252 			workspace = list_to_workspace(ws);
253 			list_del_init(ws);
254 			/* keep its place if it's a lower level using this */
255 			workspace->req_level = level;
256 			if (clip_level(level) == workspace->level)
257 				list_del(&workspace->lru_list);
258 			if (list_empty(&wsm.idle_ws[i]))
259 				clear_bit(i, &wsm.active_map);
260 			spin_unlock_bh(&wsm.lock);
261 			return ws;
262 		}
263 	}
264 	spin_unlock_bh(&wsm.lock);
265 
266 	return NULL;
267 }
268 
269 /*
270  * Zstd get_workspace for level.
271  *
272  * @level: compression level
273  *
274  * If @level is 0, then any compression level can be used.  Therefore, we begin
275  * scanning from 1.  We first scan through possible workspaces and then after
276  * attempt to allocate a new workspace.  If we fail to allocate one due to
277  * memory pressure, go to sleep waiting for the max level workspace to free up.
278  */
zstd_get_workspace(int level)279 struct list_head *zstd_get_workspace(int level)
280 {
281 	struct list_head *ws;
282 	unsigned int nofs_flag;
283 
284 	/* level == 0 means we can use any workspace */
285 	if (!level)
286 		level = 1;
287 
288 again:
289 	ws = zstd_find_workspace(level);
290 	if (ws)
291 		return ws;
292 
293 	nofs_flag = memalloc_nofs_save();
294 	ws = zstd_alloc_workspace(level);
295 	memalloc_nofs_restore(nofs_flag);
296 
297 	if (IS_ERR(ws)) {
298 		DEFINE_WAIT(wait);
299 
300 		prepare_to_wait(&wsm.wait, &wait, TASK_UNINTERRUPTIBLE);
301 		schedule();
302 		finish_wait(&wsm.wait, &wait);
303 
304 		goto again;
305 	}
306 
307 	return ws;
308 }
309 
310 /*
311  * Zstd put_workspace.
312  *
313  * @ws: list_head for the workspace
314  *
315  * When putting back a workspace, we only need to update the LRU if we are of
316  * the requested compression level.  Here is where we continue to protect the
317  * max level workspace or update last_used accordingly.  If the reclaim timer
318  * isn't set, it is also set here.  Only the max level workspace tries and wakes
319  * up waiting workspaces.
320  */
zstd_put_workspace(struct list_head * ws)321 void zstd_put_workspace(struct list_head *ws)
322 {
323 	struct workspace *workspace = list_to_workspace(ws);
324 
325 	spin_lock_bh(&wsm.lock);
326 
327 	/* A node is only taken off the lru if we are the corresponding level */
328 	if (clip_level(workspace->req_level) == workspace->level) {
329 		/* Hide a max level workspace from reclaim */
330 		if (list_empty(&wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) {
331 			INIT_LIST_HEAD(&workspace->lru_list);
332 		} else {
333 			workspace->last_used = jiffies;
334 			list_add(&workspace->lru_list, &wsm.lru_list);
335 			if (!timer_pending(&wsm.timer))
336 				mod_timer(&wsm.timer,
337 					  jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
338 		}
339 	}
340 
341 	set_bit(workspace->level, &wsm.active_map);
342 	list_add(&workspace->list, &wsm.idle_ws[workspace->level]);
343 	workspace->req_level = 0;
344 
345 	spin_unlock_bh(&wsm.lock);
346 
347 	if (workspace->level == clip_level(ZSTD_BTRFS_MAX_LEVEL))
348 		cond_wake_up(&wsm.wait);
349 }
350 
zstd_free_workspace(struct list_head * ws)351 void zstd_free_workspace(struct list_head *ws)
352 {
353 	struct workspace *workspace = list_entry(ws, struct workspace, list);
354 
355 	kvfree(workspace->mem);
356 	kfree(workspace->buf);
357 	kfree(workspace);
358 }
359 
zstd_alloc_workspace(int level)360 struct list_head *zstd_alloc_workspace(int level)
361 {
362 	struct workspace *workspace;
363 
364 	workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
365 	if (!workspace)
366 		return ERR_PTR(-ENOMEM);
367 
368 	/* Use level 1 workspace size for all the fast mode negative levels. */
369 	workspace->size = zstd_ws_mem_sizes[clip_level(level)];
370 	workspace->level = clip_level(level);
371 	workspace->req_level = level;
372 	workspace->last_used = jiffies;
373 	workspace->mem = kvmalloc(workspace->size, GFP_KERNEL | __GFP_NOWARN);
374 	workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
375 	if (!workspace->mem || !workspace->buf)
376 		goto fail;
377 
378 	INIT_LIST_HEAD(&workspace->list);
379 	INIT_LIST_HEAD(&workspace->lru_list);
380 
381 	return &workspace->list;
382 fail:
383 	zstd_free_workspace(&workspace->list);
384 	return ERR_PTR(-ENOMEM);
385 }
386 
zstd_compress_folios(struct list_head * ws,struct address_space * mapping,u64 start,struct folio ** folios,unsigned long * out_folios,unsigned long * total_in,unsigned long * total_out)387 int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
388 			 u64 start, struct folio **folios, unsigned long *out_folios,
389 			 unsigned long *total_in, unsigned long *total_out)
390 {
391 	struct workspace *workspace = list_entry(ws, struct workspace, list);
392 	zstd_cstream *stream;
393 	int ret = 0;
394 	int nr_folios = 0;
395 	struct folio *in_folio = NULL;  /* The current folio to read. */
396 	struct folio *out_folio = NULL; /* The current folio to write to. */
397 	unsigned long tot_in = 0;
398 	unsigned long tot_out = 0;
399 	unsigned long len = *total_out;
400 	const unsigned long nr_dest_folios = *out_folios;
401 	const u64 orig_end = start + len;
402 	unsigned long max_out = nr_dest_folios * PAGE_SIZE;
403 	unsigned int cur_len;
404 
405 	workspace->params = zstd_get_btrfs_parameters(workspace->req_level, len);
406 	*out_folios = 0;
407 	*total_out = 0;
408 	*total_in = 0;
409 
410 	/* Initialize the stream */
411 	stream = zstd_init_cstream(&workspace->params, len, workspace->mem,
412 			workspace->size);
413 	if (unlikely(!stream)) {
414 		struct btrfs_inode *inode = BTRFS_I(mapping->host);
415 
416 		btrfs_err(inode->root->fs_info,
417 	"zstd compression init level %d failed, root %llu inode %llu offset %llu",
418 			  workspace->req_level, btrfs_root_id(inode->root),
419 			  btrfs_ino(inode), start);
420 		ret = -EIO;
421 		goto out;
422 	}
423 
424 	/* map in the first page of input data */
425 	ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio);
426 	if (ret < 0)
427 		goto out;
428 	cur_len = btrfs_calc_input_length(in_folio, orig_end, start);
429 	workspace->in_buf.src = kmap_local_folio(in_folio, offset_in_folio(in_folio, start));
430 	workspace->in_buf.pos = 0;
431 	workspace->in_buf.size = cur_len;
432 
433 	/* Allocate and map in the output buffer */
434 	out_folio = btrfs_alloc_compr_folio();
435 	if (out_folio == NULL) {
436 		ret = -ENOMEM;
437 		goto out;
438 	}
439 	folios[nr_folios++] = out_folio;
440 	workspace->out_buf.dst = folio_address(out_folio);
441 	workspace->out_buf.pos = 0;
442 	workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
443 
444 	while (1) {
445 		size_t ret2;
446 
447 		ret2 = zstd_compress_stream(stream, &workspace->out_buf,
448 				&workspace->in_buf);
449 		if (unlikely(zstd_is_error(ret2))) {
450 			struct btrfs_inode *inode = BTRFS_I(mapping->host);
451 
452 			btrfs_warn(inode->root->fs_info,
453 "zstd compression level %d failed, error %d root %llu inode %llu offset %llu",
454 				   workspace->req_level, zstd_get_error_code(ret2),
455 				   btrfs_root_id(inode->root), btrfs_ino(inode),
456 				   start);
457 			ret = -EIO;
458 			goto out;
459 		}
460 
461 		/* Check to see if we are making it bigger */
462 		if (tot_in + workspace->in_buf.pos > 8192 &&
463 				tot_in + workspace->in_buf.pos <
464 				tot_out + workspace->out_buf.pos) {
465 			ret = -E2BIG;
466 			goto out;
467 		}
468 
469 		/* We've reached the end of our output range */
470 		if (workspace->out_buf.pos >= max_out) {
471 			tot_out += workspace->out_buf.pos;
472 			ret = -E2BIG;
473 			goto out;
474 		}
475 
476 		/* Check if we need more output space */
477 		if (workspace->out_buf.pos == workspace->out_buf.size) {
478 			tot_out += PAGE_SIZE;
479 			max_out -= PAGE_SIZE;
480 			if (nr_folios == nr_dest_folios) {
481 				ret = -E2BIG;
482 				goto out;
483 			}
484 			out_folio = btrfs_alloc_compr_folio();
485 			if (out_folio == NULL) {
486 				ret = -ENOMEM;
487 				goto out;
488 			}
489 			folios[nr_folios++] = out_folio;
490 			workspace->out_buf.dst = folio_address(out_folio);
491 			workspace->out_buf.pos = 0;
492 			workspace->out_buf.size = min_t(size_t, max_out,
493 							PAGE_SIZE);
494 		}
495 
496 		/* We've reached the end of the input */
497 		if (workspace->in_buf.pos >= len) {
498 			tot_in += workspace->in_buf.pos;
499 			break;
500 		}
501 
502 		/* Check if we need more input */
503 		if (workspace->in_buf.pos == workspace->in_buf.size) {
504 			tot_in += workspace->in_buf.size;
505 			kunmap_local(workspace->in_buf.src);
506 			workspace->in_buf.src = NULL;
507 			folio_put(in_folio);
508 			start += cur_len;
509 			len -= cur_len;
510 			ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio);
511 			if (ret < 0)
512 				goto out;
513 			cur_len = btrfs_calc_input_length(in_folio, orig_end, start);
514 			workspace->in_buf.src = kmap_local_folio(in_folio,
515 							 offset_in_folio(in_folio, start));
516 			workspace->in_buf.pos = 0;
517 			workspace->in_buf.size = cur_len;
518 		}
519 	}
520 	while (1) {
521 		size_t ret2;
522 
523 		ret2 = zstd_end_stream(stream, &workspace->out_buf);
524 		if (unlikely(zstd_is_error(ret2))) {
525 			struct btrfs_inode *inode = BTRFS_I(mapping->host);
526 
527 			btrfs_err(inode->root->fs_info,
528 "zstd compression end level %d failed, error %d root %llu inode %llu offset %llu",
529 				  workspace->req_level, zstd_get_error_code(ret2),
530 				  btrfs_root_id(inode->root), btrfs_ino(inode),
531 				  start);
532 			ret = -EIO;
533 			goto out;
534 		}
535 		if (ret2 == 0) {
536 			tot_out += workspace->out_buf.pos;
537 			break;
538 		}
539 		if (workspace->out_buf.pos >= max_out) {
540 			tot_out += workspace->out_buf.pos;
541 			ret = -E2BIG;
542 			goto out;
543 		}
544 
545 		tot_out += PAGE_SIZE;
546 		max_out -= PAGE_SIZE;
547 		if (nr_folios == nr_dest_folios) {
548 			ret = -E2BIG;
549 			goto out;
550 		}
551 		out_folio = btrfs_alloc_compr_folio();
552 		if (out_folio == NULL) {
553 			ret = -ENOMEM;
554 			goto out;
555 		}
556 		folios[nr_folios++] = out_folio;
557 		workspace->out_buf.dst = folio_address(out_folio);
558 		workspace->out_buf.pos = 0;
559 		workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
560 	}
561 
562 	if (tot_out >= tot_in) {
563 		ret = -E2BIG;
564 		goto out;
565 	}
566 
567 	ret = 0;
568 	*total_in = tot_in;
569 	*total_out = tot_out;
570 out:
571 	*out_folios = nr_folios;
572 	if (workspace->in_buf.src) {
573 		kunmap_local(workspace->in_buf.src);
574 		folio_put(in_folio);
575 	}
576 	return ret;
577 }
578 
zstd_decompress_bio(struct list_head * ws,struct compressed_bio * cb)579 int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
580 {
581 	struct workspace *workspace = list_entry(ws, struct workspace, list);
582 	struct folio **folios_in = cb->compressed_folios;
583 	size_t srclen = cb->compressed_len;
584 	zstd_dstream *stream;
585 	int ret = 0;
586 	unsigned long folio_in_index = 0;
587 	unsigned long total_folios_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
588 	unsigned long buf_start;
589 	unsigned long total_out = 0;
590 
591 	stream = zstd_init_dstream(
592 			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
593 	if (unlikely(!stream)) {
594 		struct btrfs_inode *inode = cb->bbio.inode;
595 
596 		btrfs_err(inode->root->fs_info,
597 		"zstd decompression init failed, root %llu inode %llu offset %llu",
598 			  btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
599 		ret = -EIO;
600 		goto done;
601 	}
602 
603 	workspace->in_buf.src = kmap_local_folio(folios_in[folio_in_index], 0);
604 	workspace->in_buf.pos = 0;
605 	workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
606 
607 	workspace->out_buf.dst = workspace->buf;
608 	workspace->out_buf.pos = 0;
609 	workspace->out_buf.size = PAGE_SIZE;
610 
611 	while (1) {
612 		size_t ret2;
613 
614 		ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
615 				&workspace->in_buf);
616 		if (unlikely(zstd_is_error(ret2))) {
617 			struct btrfs_inode *inode = cb->bbio.inode;
618 
619 			btrfs_err(inode->root->fs_info,
620 		"zstd decompression failed, error %d root %llu inode %llu offset %llu",
621 				  zstd_get_error_code(ret2), btrfs_root_id(inode->root),
622 				  btrfs_ino(inode), cb->start);
623 			ret = -EIO;
624 			goto done;
625 		}
626 		buf_start = total_out;
627 		total_out += workspace->out_buf.pos;
628 		workspace->out_buf.pos = 0;
629 
630 		ret = btrfs_decompress_buf2page(workspace->out_buf.dst,
631 				total_out - buf_start, cb, buf_start);
632 		if (ret == 0)
633 			break;
634 
635 		if (workspace->in_buf.pos >= srclen)
636 			break;
637 
638 		/* Check if we've hit the end of a frame */
639 		if (ret2 == 0)
640 			break;
641 
642 		if (workspace->in_buf.pos == workspace->in_buf.size) {
643 			kunmap_local(workspace->in_buf.src);
644 			folio_in_index++;
645 			if (folio_in_index >= total_folios_in) {
646 				workspace->in_buf.src = NULL;
647 				ret = -EIO;
648 				goto done;
649 			}
650 			srclen -= PAGE_SIZE;
651 			workspace->in_buf.src =
652 				kmap_local_folio(folios_in[folio_in_index], 0);
653 			workspace->in_buf.pos = 0;
654 			workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
655 		}
656 	}
657 	ret = 0;
658 done:
659 	if (workspace->in_buf.src)
660 		kunmap_local(workspace->in_buf.src);
661 	return ret;
662 }
663 
zstd_decompress(struct list_head * ws,const u8 * data_in,struct folio * dest_folio,unsigned long dest_pgoff,size_t srclen,size_t destlen)664 int zstd_decompress(struct list_head *ws, const u8 *data_in,
665 		struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
666 		size_t destlen)
667 {
668 	struct workspace *workspace = list_entry(ws, struct workspace, list);
669 	struct btrfs_fs_info *fs_info = btrfs_sb(folio_inode(dest_folio)->i_sb);
670 	const u32 sectorsize = fs_info->sectorsize;
671 	zstd_dstream *stream;
672 	int ret = 0;
673 	unsigned long to_copy = 0;
674 
675 	stream = zstd_init_dstream(
676 			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
677 	if (unlikely(!stream)) {
678 		struct btrfs_inode *inode = folio_to_inode(dest_folio);
679 
680 		btrfs_err(inode->root->fs_info,
681 		"zstd decompression init failed, root %llu inode %llu offset %llu",
682 			  btrfs_root_id(inode->root), btrfs_ino(inode),
683 			  folio_pos(dest_folio));
684 		ret = -EIO;
685 		goto finish;
686 	}
687 
688 	workspace->in_buf.src = data_in;
689 	workspace->in_buf.pos = 0;
690 	workspace->in_buf.size = srclen;
691 
692 	workspace->out_buf.dst = workspace->buf;
693 	workspace->out_buf.pos = 0;
694 	workspace->out_buf.size = sectorsize;
695 
696 	/*
697 	 * Since both input and output buffers should not exceed one sector,
698 	 * one call should end the decompression.
699 	 */
700 	ret = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf);
701 	if (unlikely(zstd_is_error(ret))) {
702 		struct btrfs_inode *inode = folio_to_inode(dest_folio);
703 
704 		btrfs_err(inode->root->fs_info,
705 		"zstd decompression failed, error %d root %llu inode %llu offset %llu",
706 			  zstd_get_error_code(ret), btrfs_root_id(inode->root),
707 			  btrfs_ino(inode), folio_pos(dest_folio));
708 		goto finish;
709 	}
710 	to_copy = workspace->out_buf.pos;
711 	memcpy_to_folio(dest_folio, dest_pgoff, workspace->out_buf.dst, to_copy);
712 finish:
713 	/* Error or early end. */
714 	if (unlikely(to_copy < destlen)) {
715 		ret = -EIO;
716 		folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy);
717 	}
718 	return ret;
719 }
720 
721 const struct btrfs_compress_op btrfs_zstd_compress = {
722 	/* ZSTD uses own workspace manager */
723 	.workspace_manager = NULL,
724 	.min_level	= ZSTD_BTRFS_MIN_LEVEL,
725 	.max_level	= ZSTD_BTRFS_MAX_LEVEL,
726 	.default_level	= ZSTD_BTRFS_DEFAULT_LEVEL,
727 };
728