1 /*
2 * Compressed RAM block device
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 * 2012, 2013 Minchan Kim
6 *
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
9 *
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
12 *
13 */
14
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/highmem.h>
26 #include <linux/slab.h>
27 #include <linux/backing-dev.h>
28 #include <linux/string.h>
29 #include <linux/vmalloc.h>
30 #include <linux/err.h>
31 #include <linux/idr.h>
32 #include <linux/sysfs.h>
33 #include <linux/debugfs.h>
34 #include <linux/cpuhotplug.h>
35 #include <linux/part_stat.h>
36 #include <linux/kernel_read_file.h>
37
38 #include "zram_drv.h"
39
40 static DEFINE_IDR(zram_index_idr);
41 /* idr index must be protected */
42 static DEFINE_MUTEX(zram_index_mutex);
43
44 static int zram_major;
45 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP;
46
47 #define ZRAM_MAX_ALGO_NAME_SZ 128
48
49 /* Module params (documentation at end) */
50 static unsigned int num_devices = 1;
51 /*
52 * Pages that compress to sizes equals or greater than this are stored
53 * uncompressed in memory.
54 */
55 static size_t huge_class_size;
56
57 static const struct block_device_operations zram_devops;
58
59 static void zram_free_page(struct zram *zram, size_t index);
60 static int zram_read_from_zspool(struct zram *zram, struct page *page,
61 u32 index);
62
63 #define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map)
64
zram_slot_lock_init(struct zram * zram,u32 index)65 static void zram_slot_lock_init(struct zram *zram, u32 index)
66 {
67 static struct lock_class_key __key;
68
69 lockdep_init_map(slot_dep_map(zram, index), "zram->table[index].lock",
70 &__key, 0);
71 }
72
73 /*
74 * entry locking rules:
75 *
76 * 1) Lock is exclusive
77 *
78 * 2) lock() function can sleep waiting for the lock
79 *
80 * 3) Lock owner can sleep
81 *
82 * 4) Use TRY lock variant when in atomic context
83 * - must check return value and handle locking failers
84 */
zram_slot_trylock(struct zram * zram,u32 index)85 static __must_check bool zram_slot_trylock(struct zram *zram, u32 index)
86 {
87 unsigned long *lock = &zram->table[index].flags;
88
89 if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) {
90 mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_);
91 lock_acquired(slot_dep_map(zram, index), _RET_IP_);
92 return true;
93 }
94
95 return false;
96 }
97
zram_slot_lock(struct zram * zram,u32 index)98 static void zram_slot_lock(struct zram *zram, u32 index)
99 {
100 unsigned long *lock = &zram->table[index].flags;
101
102 mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_);
103 wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE);
104 lock_acquired(slot_dep_map(zram, index), _RET_IP_);
105 }
106
zram_slot_unlock(struct zram * zram,u32 index)107 static void zram_slot_unlock(struct zram *zram, u32 index)
108 {
109 unsigned long *lock = &zram->table[index].flags;
110
111 mutex_release(slot_dep_map(zram, index), _RET_IP_);
112 clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock);
113 }
114
init_done(struct zram * zram)115 static inline bool init_done(struct zram *zram)
116 {
117 return zram->disksize;
118 }
119
dev_to_zram(struct device * dev)120 static inline struct zram *dev_to_zram(struct device *dev)
121 {
122 return (struct zram *)dev_to_disk(dev)->private_data;
123 }
124
zram_get_handle(struct zram * zram,u32 index)125 static unsigned long zram_get_handle(struct zram *zram, u32 index)
126 {
127 return zram->table[index].handle;
128 }
129
zram_set_handle(struct zram * zram,u32 index,unsigned long handle)130 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
131 {
132 zram->table[index].handle = handle;
133 }
134
zram_test_flag(struct zram * zram,u32 index,enum zram_pageflags flag)135 static bool zram_test_flag(struct zram *zram, u32 index,
136 enum zram_pageflags flag)
137 {
138 return zram->table[index].flags & BIT(flag);
139 }
140
zram_set_flag(struct zram * zram,u32 index,enum zram_pageflags flag)141 static void zram_set_flag(struct zram *zram, u32 index,
142 enum zram_pageflags flag)
143 {
144 zram->table[index].flags |= BIT(flag);
145 }
146
zram_clear_flag(struct zram * zram,u32 index,enum zram_pageflags flag)147 static void zram_clear_flag(struct zram *zram, u32 index,
148 enum zram_pageflags flag)
149 {
150 zram->table[index].flags &= ~BIT(flag);
151 }
152
zram_get_obj_size(struct zram * zram,u32 index)153 static size_t zram_get_obj_size(struct zram *zram, u32 index)
154 {
155 return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
156 }
157
zram_set_obj_size(struct zram * zram,u32 index,size_t size)158 static void zram_set_obj_size(struct zram *zram,
159 u32 index, size_t size)
160 {
161 unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
162
163 zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
164 }
165
zram_allocated(struct zram * zram,u32 index)166 static inline bool zram_allocated(struct zram *zram, u32 index)
167 {
168 return zram_get_obj_size(zram, index) ||
169 zram_test_flag(zram, index, ZRAM_SAME) ||
170 zram_test_flag(zram, index, ZRAM_WB);
171 }
172
update_used_max(struct zram * zram,const unsigned long pages)173 static inline void update_used_max(struct zram *zram, const unsigned long pages)
174 {
175 unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages);
176
177 do {
178 if (cur_max >= pages)
179 return;
180 } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages,
181 &cur_max, pages));
182 }
183
zram_can_store_page(struct zram * zram)184 static bool zram_can_store_page(struct zram *zram)
185 {
186 unsigned long alloced_pages;
187
188 alloced_pages = zs_get_total_pages(zram->mem_pool);
189 update_used_max(zram, alloced_pages);
190
191 return !zram->limit_pages || alloced_pages <= zram->limit_pages;
192 }
193
194 #if PAGE_SIZE != 4096
is_partial_io(struct bio_vec * bvec)195 static inline bool is_partial_io(struct bio_vec *bvec)
196 {
197 return bvec->bv_len != PAGE_SIZE;
198 }
199 #define ZRAM_PARTIAL_IO 1
200 #else
is_partial_io(struct bio_vec * bvec)201 static inline bool is_partial_io(struct bio_vec *bvec)
202 {
203 return false;
204 }
205 #endif
206
zram_set_priority(struct zram * zram,u32 index,u32 prio)207 static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio)
208 {
209 prio &= ZRAM_COMP_PRIORITY_MASK;
210 /*
211 * Clear previous priority value first, in case if we recompress
212 * further an already recompressed page
213 */
214 zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK <<
215 ZRAM_COMP_PRIORITY_BIT1);
216 zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1);
217 }
218
zram_get_priority(struct zram * zram,u32 index)219 static inline u32 zram_get_priority(struct zram *zram, u32 index)
220 {
221 u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1;
222
223 return prio & ZRAM_COMP_PRIORITY_MASK;
224 }
225
zram_accessed(struct zram * zram,u32 index)226 static void zram_accessed(struct zram *zram, u32 index)
227 {
228 zram_clear_flag(zram, index, ZRAM_IDLE);
229 zram_clear_flag(zram, index, ZRAM_PP_SLOT);
230 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
231 zram->table[index].ac_time = ktime_get_boottime();
232 #endif
233 }
234
235 #if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP
236 struct zram_pp_slot {
237 unsigned long index;
238 struct list_head entry;
239 };
240
241 /*
242 * A post-processing bucket is, essentially, a size class, this defines
243 * the range (in bytes) of pp-slots sizes in particular bucket.
244 */
245 #define PP_BUCKET_SIZE_RANGE 64
246 #define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1)
247
248 struct zram_pp_ctl {
249 struct list_head pp_buckets[NUM_PP_BUCKETS];
250 };
251
init_pp_ctl(void)252 static struct zram_pp_ctl *init_pp_ctl(void)
253 {
254 struct zram_pp_ctl *ctl;
255 u32 idx;
256
257 ctl = kmalloc(sizeof(*ctl), GFP_KERNEL);
258 if (!ctl)
259 return NULL;
260
261 for (idx = 0; idx < NUM_PP_BUCKETS; idx++)
262 INIT_LIST_HEAD(&ctl->pp_buckets[idx]);
263 return ctl;
264 }
265
release_pp_slot(struct zram * zram,struct zram_pp_slot * pps)266 static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps)
267 {
268 list_del_init(&pps->entry);
269
270 zram_slot_lock(zram, pps->index);
271 zram_clear_flag(zram, pps->index, ZRAM_PP_SLOT);
272 zram_slot_unlock(zram, pps->index);
273
274 kfree(pps);
275 }
276
release_pp_ctl(struct zram * zram,struct zram_pp_ctl * ctl)277 static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl)
278 {
279 u32 idx;
280
281 if (!ctl)
282 return;
283
284 for (idx = 0; idx < NUM_PP_BUCKETS; idx++) {
285 while (!list_empty(&ctl->pp_buckets[idx])) {
286 struct zram_pp_slot *pps;
287
288 pps = list_first_entry(&ctl->pp_buckets[idx],
289 struct zram_pp_slot,
290 entry);
291 release_pp_slot(zram, pps);
292 }
293 }
294
295 kfree(ctl);
296 }
297
place_pp_slot(struct zram * zram,struct zram_pp_ctl * ctl,u32 index)298 static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl,
299 u32 index)
300 {
301 struct zram_pp_slot *pps;
302 u32 bid;
303
304 pps = kmalloc(sizeof(*pps), GFP_NOIO | __GFP_NOWARN);
305 if (!pps)
306 return false;
307
308 INIT_LIST_HEAD(&pps->entry);
309 pps->index = index;
310
311 bid = zram_get_obj_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE;
312 list_add(&pps->entry, &ctl->pp_buckets[bid]);
313
314 zram_set_flag(zram, pps->index, ZRAM_PP_SLOT);
315 return true;
316 }
317
select_pp_slot(struct zram_pp_ctl * ctl)318 static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl)
319 {
320 struct zram_pp_slot *pps = NULL;
321 s32 idx = NUM_PP_BUCKETS - 1;
322
323 /* The higher the bucket id the more optimal slot post-processing is */
324 while (idx >= 0) {
325 pps = list_first_entry_or_null(&ctl->pp_buckets[idx],
326 struct zram_pp_slot,
327 entry);
328 if (pps)
329 break;
330
331 idx--;
332 }
333 return pps;
334 }
335 #endif
336
zram_fill_page(void * ptr,unsigned long len,unsigned long value)337 static inline void zram_fill_page(void *ptr, unsigned long len,
338 unsigned long value)
339 {
340 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
341 memset_l(ptr, value, len / sizeof(unsigned long));
342 }
343
page_same_filled(void * ptr,unsigned long * element)344 static bool page_same_filled(void *ptr, unsigned long *element)
345 {
346 unsigned long *page;
347 unsigned long val;
348 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
349
350 page = (unsigned long *)ptr;
351 val = page[0];
352
353 if (val != page[last_pos])
354 return false;
355
356 for (pos = 1; pos < last_pos; pos++) {
357 if (val != page[pos])
358 return false;
359 }
360
361 *element = val;
362
363 return true;
364 }
365
initstate_show(struct device * dev,struct device_attribute * attr,char * buf)366 static ssize_t initstate_show(struct device *dev,
367 struct device_attribute *attr, char *buf)
368 {
369 u32 val;
370 struct zram *zram = dev_to_zram(dev);
371
372 down_read(&zram->init_lock);
373 val = init_done(zram);
374 up_read(&zram->init_lock);
375
376 return sysfs_emit(buf, "%u\n", val);
377 }
378
disksize_show(struct device * dev,struct device_attribute * attr,char * buf)379 static ssize_t disksize_show(struct device *dev,
380 struct device_attribute *attr, char *buf)
381 {
382 struct zram *zram = dev_to_zram(dev);
383
384 return sysfs_emit(buf, "%llu\n", zram->disksize);
385 }
386
mem_limit_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)387 static ssize_t mem_limit_store(struct device *dev,
388 struct device_attribute *attr, const char *buf, size_t len)
389 {
390 u64 limit;
391 char *tmp;
392 struct zram *zram = dev_to_zram(dev);
393
394 limit = memparse(buf, &tmp);
395 if (buf == tmp) /* no chars parsed, invalid input */
396 return -EINVAL;
397
398 down_write(&zram->init_lock);
399 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
400 up_write(&zram->init_lock);
401
402 return len;
403 }
404
mem_used_max_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)405 static ssize_t mem_used_max_store(struct device *dev,
406 struct device_attribute *attr, const char *buf, size_t len)
407 {
408 int err;
409 unsigned long val;
410 struct zram *zram = dev_to_zram(dev);
411
412 err = kstrtoul(buf, 10, &val);
413 if (err || val != 0)
414 return -EINVAL;
415
416 down_read(&zram->init_lock);
417 if (init_done(zram)) {
418 atomic_long_set(&zram->stats.max_used_pages,
419 zs_get_total_pages(zram->mem_pool));
420 }
421 up_read(&zram->init_lock);
422
423 return len;
424 }
425
426 /*
427 * Mark all pages which are older than or equal to cutoff as IDLE.
428 * Callers should hold the zram init lock in read mode
429 */
mark_idle(struct zram * zram,ktime_t cutoff)430 static void mark_idle(struct zram *zram, ktime_t cutoff)
431 {
432 int is_idle = 1;
433 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
434 int index;
435
436 for (index = 0; index < nr_pages; index++) {
437 /*
438 * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no
439 * post-processing (recompress, writeback) happens to the
440 * ZRAM_SAME slot.
441 *
442 * And ZRAM_WB slots simply cannot be ZRAM_IDLE.
443 */
444 zram_slot_lock(zram, index);
445 if (!zram_allocated(zram, index) ||
446 zram_test_flag(zram, index, ZRAM_WB) ||
447 zram_test_flag(zram, index, ZRAM_SAME)) {
448 zram_slot_unlock(zram, index);
449 continue;
450 }
451
452 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
453 is_idle = !cutoff ||
454 ktime_after(cutoff, zram->table[index].ac_time);
455 #endif
456 if (is_idle)
457 zram_set_flag(zram, index, ZRAM_IDLE);
458 else
459 zram_clear_flag(zram, index, ZRAM_IDLE);
460 zram_slot_unlock(zram, index);
461 }
462 }
463
idle_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)464 static ssize_t idle_store(struct device *dev,
465 struct device_attribute *attr, const char *buf, size_t len)
466 {
467 struct zram *zram = dev_to_zram(dev);
468 ktime_t cutoff_time = 0;
469 ssize_t rv = -EINVAL;
470
471 if (!sysfs_streq(buf, "all")) {
472 /*
473 * If it did not parse as 'all' try to treat it as an integer
474 * when we have memory tracking enabled.
475 */
476 u64 age_sec;
477
478 if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) && !kstrtoull(buf, 0, &age_sec))
479 cutoff_time = ktime_sub(ktime_get_boottime(),
480 ns_to_ktime(age_sec * NSEC_PER_SEC));
481 else
482 goto out;
483 }
484
485 down_read(&zram->init_lock);
486 if (!init_done(zram))
487 goto out_unlock;
488
489 /*
490 * A cutoff_time of 0 marks everything as idle, this is the
491 * "all" behavior.
492 */
493 mark_idle(zram, cutoff_time);
494 rv = len;
495
496 out_unlock:
497 up_read(&zram->init_lock);
498 out:
499 return rv;
500 }
501
502 #ifdef CONFIG_ZRAM_WRITEBACK
writeback_limit_enable_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)503 static ssize_t writeback_limit_enable_store(struct device *dev,
504 struct device_attribute *attr, const char *buf, size_t len)
505 {
506 struct zram *zram = dev_to_zram(dev);
507 u64 val;
508 ssize_t ret = -EINVAL;
509
510 if (kstrtoull(buf, 10, &val))
511 return ret;
512
513 down_read(&zram->init_lock);
514 spin_lock(&zram->wb_limit_lock);
515 zram->wb_limit_enable = val;
516 spin_unlock(&zram->wb_limit_lock);
517 up_read(&zram->init_lock);
518 ret = len;
519
520 return ret;
521 }
522
writeback_limit_enable_show(struct device * dev,struct device_attribute * attr,char * buf)523 static ssize_t writeback_limit_enable_show(struct device *dev,
524 struct device_attribute *attr, char *buf)
525 {
526 bool val;
527 struct zram *zram = dev_to_zram(dev);
528
529 down_read(&zram->init_lock);
530 spin_lock(&zram->wb_limit_lock);
531 val = zram->wb_limit_enable;
532 spin_unlock(&zram->wb_limit_lock);
533 up_read(&zram->init_lock);
534
535 return sysfs_emit(buf, "%d\n", val);
536 }
537
writeback_limit_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)538 static ssize_t writeback_limit_store(struct device *dev,
539 struct device_attribute *attr, const char *buf, size_t len)
540 {
541 struct zram *zram = dev_to_zram(dev);
542 u64 val;
543 ssize_t ret = -EINVAL;
544
545 if (kstrtoull(buf, 10, &val))
546 return ret;
547
548 down_read(&zram->init_lock);
549 spin_lock(&zram->wb_limit_lock);
550 zram->bd_wb_limit = val;
551 spin_unlock(&zram->wb_limit_lock);
552 up_read(&zram->init_lock);
553 ret = len;
554
555 return ret;
556 }
557
writeback_limit_show(struct device * dev,struct device_attribute * attr,char * buf)558 static ssize_t writeback_limit_show(struct device *dev,
559 struct device_attribute *attr, char *buf)
560 {
561 u64 val;
562 struct zram *zram = dev_to_zram(dev);
563
564 down_read(&zram->init_lock);
565 spin_lock(&zram->wb_limit_lock);
566 val = zram->bd_wb_limit;
567 spin_unlock(&zram->wb_limit_lock);
568 up_read(&zram->init_lock);
569
570 return sysfs_emit(buf, "%llu\n", val);
571 }
572
reset_bdev(struct zram * zram)573 static void reset_bdev(struct zram *zram)
574 {
575 if (!zram->backing_dev)
576 return;
577
578 /* hope filp_close flush all of IO */
579 filp_close(zram->backing_dev, NULL);
580 zram->backing_dev = NULL;
581 zram->bdev = NULL;
582 zram->disk->fops = &zram_devops;
583 kvfree(zram->bitmap);
584 zram->bitmap = NULL;
585 }
586
backing_dev_show(struct device * dev,struct device_attribute * attr,char * buf)587 static ssize_t backing_dev_show(struct device *dev,
588 struct device_attribute *attr, char *buf)
589 {
590 struct file *file;
591 struct zram *zram = dev_to_zram(dev);
592 char *p;
593 ssize_t ret;
594
595 down_read(&zram->init_lock);
596 file = zram->backing_dev;
597 if (!file) {
598 memcpy(buf, "none\n", 5);
599 up_read(&zram->init_lock);
600 return 5;
601 }
602
603 p = file_path(file, buf, PAGE_SIZE - 1);
604 if (IS_ERR(p)) {
605 ret = PTR_ERR(p);
606 goto out;
607 }
608
609 ret = strlen(p);
610 memmove(buf, p, ret);
611 buf[ret++] = '\n';
612 out:
613 up_read(&zram->init_lock);
614 return ret;
615 }
616
backing_dev_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)617 static ssize_t backing_dev_store(struct device *dev,
618 struct device_attribute *attr, const char *buf, size_t len)
619 {
620 char *file_name;
621 size_t sz;
622 struct file *backing_dev = NULL;
623 struct inode *inode;
624 unsigned int bitmap_sz;
625 unsigned long nr_pages, *bitmap = NULL;
626 int err;
627 struct zram *zram = dev_to_zram(dev);
628
629 file_name = kmalloc(PATH_MAX, GFP_KERNEL);
630 if (!file_name)
631 return -ENOMEM;
632
633 down_write(&zram->init_lock);
634 if (init_done(zram)) {
635 pr_info("Can't setup backing device for initialized device\n");
636 err = -EBUSY;
637 goto out;
638 }
639
640 strscpy(file_name, buf, PATH_MAX);
641 /* ignore trailing newline */
642 sz = strlen(file_name);
643 if (sz > 0 && file_name[sz - 1] == '\n')
644 file_name[sz - 1] = 0x00;
645
646 backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0);
647 if (IS_ERR(backing_dev)) {
648 err = PTR_ERR(backing_dev);
649 backing_dev = NULL;
650 goto out;
651 }
652
653 inode = backing_dev->f_mapping->host;
654
655 /* Support only block device in this moment */
656 if (!S_ISBLK(inode->i_mode)) {
657 err = -ENOTBLK;
658 goto out;
659 }
660
661 nr_pages = i_size_read(inode) >> PAGE_SHIFT;
662 /* Refuse to use zero sized device (also prevents self reference) */
663 if (!nr_pages) {
664 err = -EINVAL;
665 goto out;
666 }
667
668 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
669 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
670 if (!bitmap) {
671 err = -ENOMEM;
672 goto out;
673 }
674
675 reset_bdev(zram);
676
677 zram->bdev = I_BDEV(inode);
678 zram->backing_dev = backing_dev;
679 zram->bitmap = bitmap;
680 zram->nr_pages = nr_pages;
681 up_write(&zram->init_lock);
682
683 pr_info("setup backing device %s\n", file_name);
684 kfree(file_name);
685
686 return len;
687 out:
688 kvfree(bitmap);
689
690 if (backing_dev)
691 filp_close(backing_dev, NULL);
692
693 up_write(&zram->init_lock);
694
695 kfree(file_name);
696
697 return err;
698 }
699
alloc_block_bdev(struct zram * zram)700 static unsigned long alloc_block_bdev(struct zram *zram)
701 {
702 unsigned long blk_idx = 1;
703 retry:
704 /* skip 0 bit to confuse zram.handle = 0 */
705 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
706 if (blk_idx == zram->nr_pages)
707 return 0;
708
709 if (test_and_set_bit(blk_idx, zram->bitmap))
710 goto retry;
711
712 atomic64_inc(&zram->stats.bd_count);
713 return blk_idx;
714 }
715
free_block_bdev(struct zram * zram,unsigned long blk_idx)716 static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
717 {
718 int was_set;
719
720 was_set = test_and_clear_bit(blk_idx, zram->bitmap);
721 WARN_ON_ONCE(!was_set);
722 atomic64_dec(&zram->stats.bd_count);
723 }
724
read_from_bdev_async(struct zram * zram,struct page * page,unsigned long entry,struct bio * parent)725 static void read_from_bdev_async(struct zram *zram, struct page *page,
726 unsigned long entry, struct bio *parent)
727 {
728 struct bio *bio;
729
730 bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO);
731 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
732 __bio_add_page(bio, page, PAGE_SIZE, 0);
733 bio_chain(bio, parent);
734 submit_bio(bio);
735 }
736
zram_writeback_slots(struct zram * zram,struct zram_pp_ctl * ctl)737 static int zram_writeback_slots(struct zram *zram, struct zram_pp_ctl *ctl)
738 {
739 unsigned long blk_idx = 0;
740 struct page *page = NULL;
741 struct zram_pp_slot *pps;
742 struct bio_vec bio_vec;
743 struct bio bio;
744 int ret = 0, err;
745 u32 index;
746
747 page = alloc_page(GFP_KERNEL);
748 if (!page)
749 return -ENOMEM;
750
751 while ((pps = select_pp_slot(ctl))) {
752 spin_lock(&zram->wb_limit_lock);
753 if (zram->wb_limit_enable && !zram->bd_wb_limit) {
754 spin_unlock(&zram->wb_limit_lock);
755 ret = -EIO;
756 break;
757 }
758 spin_unlock(&zram->wb_limit_lock);
759
760 if (!blk_idx) {
761 blk_idx = alloc_block_bdev(zram);
762 if (!blk_idx) {
763 ret = -ENOSPC;
764 break;
765 }
766 }
767
768 index = pps->index;
769 zram_slot_lock(zram, index);
770 /*
771 * scan_slots() sets ZRAM_PP_SLOT and relases slot lock, so
772 * slots can change in the meantime. If slots are accessed or
773 * freed they lose ZRAM_PP_SLOT flag and hence we don't
774 * post-process them.
775 */
776 if (!zram_test_flag(zram, index, ZRAM_PP_SLOT))
777 goto next;
778 if (zram_read_from_zspool(zram, page, index))
779 goto next;
780 zram_slot_unlock(zram, index);
781
782 bio_init(&bio, zram->bdev, &bio_vec, 1,
783 REQ_OP_WRITE | REQ_SYNC);
784 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
785 __bio_add_page(&bio, page, PAGE_SIZE, 0);
786
787 /*
788 * XXX: A single page IO would be inefficient for write
789 * but it would be not bad as starter.
790 */
791 err = submit_bio_wait(&bio);
792 if (err) {
793 release_pp_slot(zram, pps);
794 /*
795 * BIO errors are not fatal, we continue and simply
796 * attempt to writeback the remaining objects (pages).
797 * At the same time we need to signal user-space that
798 * some writes (at least one, but also could be all of
799 * them) were not successful and we do so by returning
800 * the most recent BIO error.
801 */
802 ret = err;
803 continue;
804 }
805
806 atomic64_inc(&zram->stats.bd_writes);
807 zram_slot_lock(zram, index);
808 /*
809 * Same as above, we release slot lock during writeback so
810 * slot can change under us: slot_free() or slot_free() and
811 * reallocation (zram_write_page()). In both cases slot loses
812 * ZRAM_PP_SLOT flag. No concurrent post-processing can set
813 * ZRAM_PP_SLOT on such slots until current post-processing
814 * finishes.
815 */
816 if (!zram_test_flag(zram, index, ZRAM_PP_SLOT))
817 goto next;
818
819 zram_free_page(zram, index);
820 zram_set_flag(zram, index, ZRAM_WB);
821 zram_set_handle(zram, index, blk_idx);
822 blk_idx = 0;
823 atomic64_inc(&zram->stats.pages_stored);
824 spin_lock(&zram->wb_limit_lock);
825 if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
826 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12);
827 spin_unlock(&zram->wb_limit_lock);
828 next:
829 zram_slot_unlock(zram, index);
830 release_pp_slot(zram, pps);
831
832 cond_resched();
833 }
834
835 if (blk_idx)
836 free_block_bdev(zram, blk_idx);
837 if (page)
838 __free_page(page);
839
840 return ret;
841 }
842
843 #define PAGE_WRITEBACK 0
844 #define HUGE_WRITEBACK (1 << 0)
845 #define IDLE_WRITEBACK (1 << 1)
846 #define INCOMPRESSIBLE_WRITEBACK (1 << 2)
847
parse_page_index(char * val,unsigned long nr_pages,unsigned long * lo,unsigned long * hi)848 static int parse_page_index(char *val, unsigned long nr_pages,
849 unsigned long *lo, unsigned long *hi)
850 {
851 int ret;
852
853 ret = kstrtoul(val, 10, lo);
854 if (ret)
855 return ret;
856 if (*lo >= nr_pages)
857 return -ERANGE;
858 *hi = *lo + 1;
859 return 0;
860 }
861
parse_page_indexes(char * val,unsigned long nr_pages,unsigned long * lo,unsigned long * hi)862 static int parse_page_indexes(char *val, unsigned long nr_pages,
863 unsigned long *lo, unsigned long *hi)
864 {
865 char *delim;
866 int ret;
867
868 delim = strchr(val, '-');
869 if (!delim)
870 return -EINVAL;
871
872 *delim = 0x00;
873 ret = kstrtoul(val, 10, lo);
874 if (ret)
875 return ret;
876 if (*lo >= nr_pages)
877 return -ERANGE;
878
879 ret = kstrtoul(delim + 1, 10, hi);
880 if (ret)
881 return ret;
882 if (*hi >= nr_pages || *lo > *hi)
883 return -ERANGE;
884 *hi += 1;
885 return 0;
886 }
887
parse_mode(char * val,u32 * mode)888 static int parse_mode(char *val, u32 *mode)
889 {
890 *mode = 0;
891
892 if (!strcmp(val, "idle"))
893 *mode = IDLE_WRITEBACK;
894 if (!strcmp(val, "huge"))
895 *mode = HUGE_WRITEBACK;
896 if (!strcmp(val, "huge_idle"))
897 *mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
898 if (!strcmp(val, "incompressible"))
899 *mode = INCOMPRESSIBLE_WRITEBACK;
900
901 if (*mode == 0)
902 return -EINVAL;
903 return 0;
904 }
905
scan_slots_for_writeback(struct zram * zram,u32 mode,unsigned long lo,unsigned long hi,struct zram_pp_ctl * ctl)906 static int scan_slots_for_writeback(struct zram *zram, u32 mode,
907 unsigned long lo, unsigned long hi,
908 struct zram_pp_ctl *ctl)
909 {
910 u32 index = lo;
911
912 while (index < hi) {
913 bool ok = true;
914
915 zram_slot_lock(zram, index);
916 if (!zram_allocated(zram, index))
917 goto next;
918
919 if (zram_test_flag(zram, index, ZRAM_WB) ||
920 zram_test_flag(zram, index, ZRAM_SAME))
921 goto next;
922
923 if (mode & IDLE_WRITEBACK &&
924 !zram_test_flag(zram, index, ZRAM_IDLE))
925 goto next;
926 if (mode & HUGE_WRITEBACK &&
927 !zram_test_flag(zram, index, ZRAM_HUGE))
928 goto next;
929 if (mode & INCOMPRESSIBLE_WRITEBACK &&
930 !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
931 goto next;
932
933 ok = place_pp_slot(zram, ctl, index);
934 next:
935 zram_slot_unlock(zram, index);
936 if (!ok)
937 break;
938 index++;
939 }
940
941 return 0;
942 }
943
writeback_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)944 static ssize_t writeback_store(struct device *dev,
945 struct device_attribute *attr,
946 const char *buf, size_t len)
947 {
948 struct zram *zram = dev_to_zram(dev);
949 u64 nr_pages = zram->disksize >> PAGE_SHIFT;
950 unsigned long lo = 0, hi = nr_pages;
951 struct zram_pp_ctl *ctl = NULL;
952 char *args, *param, *val;
953 ssize_t ret = len;
954 int err, mode = 0;
955
956 down_read(&zram->init_lock);
957 if (!init_done(zram)) {
958 up_read(&zram->init_lock);
959 return -EINVAL;
960 }
961
962 /* Do not permit concurrent post-processing actions. */
963 if (atomic_xchg(&zram->pp_in_progress, 1)) {
964 up_read(&zram->init_lock);
965 return -EAGAIN;
966 }
967
968 if (!zram->backing_dev) {
969 ret = -ENODEV;
970 goto release_init_lock;
971 }
972
973 ctl = init_pp_ctl();
974 if (!ctl) {
975 ret = -ENOMEM;
976 goto release_init_lock;
977 }
978
979 args = skip_spaces(buf);
980 while (*args) {
981 args = next_arg(args, ¶m, &val);
982
983 /*
984 * Workaround to support the old writeback interface.
985 *
986 * The old writeback interface has a minor inconsistency and
987 * requires key=value only for page_index parameter, while the
988 * writeback mode is a valueless parameter.
989 *
990 * This is not the case anymore and now all parameters are
991 * required to have values, however, we need to support the
992 * legacy writeback interface format so we check if we can
993 * recognize a valueless parameter as the (legacy) writeback
994 * mode.
995 */
996 if (!val || !*val) {
997 err = parse_mode(param, &mode);
998 if (err) {
999 ret = err;
1000 goto release_init_lock;
1001 }
1002
1003 scan_slots_for_writeback(zram, mode, lo, hi, ctl);
1004 break;
1005 }
1006
1007 if (!strcmp(param, "type")) {
1008 err = parse_mode(val, &mode);
1009 if (err) {
1010 ret = err;
1011 goto release_init_lock;
1012 }
1013
1014 scan_slots_for_writeback(zram, mode, lo, hi, ctl);
1015 break;
1016 }
1017
1018 if (!strcmp(param, "page_index")) {
1019 err = parse_page_index(val, nr_pages, &lo, &hi);
1020 if (err) {
1021 ret = err;
1022 goto release_init_lock;
1023 }
1024
1025 scan_slots_for_writeback(zram, mode, lo, hi, ctl);
1026 continue;
1027 }
1028
1029 if (!strcmp(param, "page_indexes")) {
1030 err = parse_page_indexes(val, nr_pages, &lo, &hi);
1031 if (err) {
1032 ret = err;
1033 goto release_init_lock;
1034 }
1035
1036 scan_slots_for_writeback(zram, mode, lo, hi, ctl);
1037 continue;
1038 }
1039 }
1040
1041 err = zram_writeback_slots(zram, ctl);
1042 if (err)
1043 ret = err;
1044
1045 release_init_lock:
1046 release_pp_ctl(zram, ctl);
1047 atomic_set(&zram->pp_in_progress, 0);
1048 up_read(&zram->init_lock);
1049
1050 return ret;
1051 }
1052
1053 struct zram_work {
1054 struct work_struct work;
1055 struct zram *zram;
1056 unsigned long entry;
1057 struct page *page;
1058 int error;
1059 };
1060
zram_sync_read(struct work_struct * work)1061 static void zram_sync_read(struct work_struct *work)
1062 {
1063 struct zram_work *zw = container_of(work, struct zram_work, work);
1064 struct bio_vec bv;
1065 struct bio bio;
1066
1067 bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ);
1068 bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9);
1069 __bio_add_page(&bio, zw->page, PAGE_SIZE, 0);
1070 zw->error = submit_bio_wait(&bio);
1071 }
1072
1073 /*
1074 * Block layer want one ->submit_bio to be active at a time, so if we use
1075 * chained IO with parent IO in same context, it's a deadlock. To avoid that,
1076 * use a worker thread context.
1077 */
read_from_bdev_sync(struct zram * zram,struct page * page,unsigned long entry)1078 static int read_from_bdev_sync(struct zram *zram, struct page *page,
1079 unsigned long entry)
1080 {
1081 struct zram_work work;
1082
1083 work.page = page;
1084 work.zram = zram;
1085 work.entry = entry;
1086
1087 INIT_WORK_ONSTACK(&work.work, zram_sync_read);
1088 queue_work(system_unbound_wq, &work.work);
1089 flush_work(&work.work);
1090 destroy_work_on_stack(&work.work);
1091
1092 return work.error;
1093 }
1094
read_from_bdev(struct zram * zram,struct page * page,unsigned long entry,struct bio * parent)1095 static int read_from_bdev(struct zram *zram, struct page *page,
1096 unsigned long entry, struct bio *parent)
1097 {
1098 atomic64_inc(&zram->stats.bd_reads);
1099 if (!parent) {
1100 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO)))
1101 return -EIO;
1102 return read_from_bdev_sync(zram, page, entry);
1103 }
1104 read_from_bdev_async(zram, page, entry, parent);
1105 return 0;
1106 }
1107 #else
reset_bdev(struct zram * zram)1108 static inline void reset_bdev(struct zram *zram) {};
read_from_bdev(struct zram * zram,struct page * page,unsigned long entry,struct bio * parent)1109 static int read_from_bdev(struct zram *zram, struct page *page,
1110 unsigned long entry, struct bio *parent)
1111 {
1112 return -EIO;
1113 }
1114
free_block_bdev(struct zram * zram,unsigned long blk_idx)1115 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
1116 #endif
1117
1118 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
1119
1120 static struct dentry *zram_debugfs_root;
1121
zram_debugfs_create(void)1122 static void zram_debugfs_create(void)
1123 {
1124 zram_debugfs_root = debugfs_create_dir("zram", NULL);
1125 }
1126
zram_debugfs_destroy(void)1127 static void zram_debugfs_destroy(void)
1128 {
1129 debugfs_remove_recursive(zram_debugfs_root);
1130 }
1131
read_block_state(struct file * file,char __user * buf,size_t count,loff_t * ppos)1132 static ssize_t read_block_state(struct file *file, char __user *buf,
1133 size_t count, loff_t *ppos)
1134 {
1135 char *kbuf;
1136 ssize_t index, written = 0;
1137 struct zram *zram = file->private_data;
1138 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
1139 struct timespec64 ts;
1140
1141 kbuf = kvmalloc(count, GFP_KERNEL);
1142 if (!kbuf)
1143 return -ENOMEM;
1144
1145 down_read(&zram->init_lock);
1146 if (!init_done(zram)) {
1147 up_read(&zram->init_lock);
1148 kvfree(kbuf);
1149 return -EINVAL;
1150 }
1151
1152 for (index = *ppos; index < nr_pages; index++) {
1153 int copied;
1154
1155 zram_slot_lock(zram, index);
1156 if (!zram_allocated(zram, index))
1157 goto next;
1158
1159 ts = ktime_to_timespec64(zram->table[index].ac_time);
1160 copied = snprintf(kbuf + written, count,
1161 "%12zd %12lld.%06lu %c%c%c%c%c%c\n",
1162 index, (s64)ts.tv_sec,
1163 ts.tv_nsec / NSEC_PER_USEC,
1164 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
1165 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
1166 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
1167 zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.',
1168 zram_get_priority(zram, index) ? 'r' : '.',
1169 zram_test_flag(zram, index,
1170 ZRAM_INCOMPRESSIBLE) ? 'n' : '.');
1171
1172 if (count <= copied) {
1173 zram_slot_unlock(zram, index);
1174 break;
1175 }
1176 written += copied;
1177 count -= copied;
1178 next:
1179 zram_slot_unlock(zram, index);
1180 *ppos += 1;
1181 }
1182
1183 up_read(&zram->init_lock);
1184 if (copy_to_user(buf, kbuf, written))
1185 written = -EFAULT;
1186 kvfree(kbuf);
1187
1188 return written;
1189 }
1190
1191 static const struct file_operations proc_zram_block_state_op = {
1192 .open = simple_open,
1193 .read = read_block_state,
1194 .llseek = default_llseek,
1195 };
1196
zram_debugfs_register(struct zram * zram)1197 static void zram_debugfs_register(struct zram *zram)
1198 {
1199 if (!zram_debugfs_root)
1200 return;
1201
1202 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
1203 zram_debugfs_root);
1204 debugfs_create_file("block_state", 0400, zram->debugfs_dir,
1205 zram, &proc_zram_block_state_op);
1206 }
1207
zram_debugfs_unregister(struct zram * zram)1208 static void zram_debugfs_unregister(struct zram *zram)
1209 {
1210 debugfs_remove_recursive(zram->debugfs_dir);
1211 }
1212 #else
zram_debugfs_create(void)1213 static void zram_debugfs_create(void) {};
zram_debugfs_destroy(void)1214 static void zram_debugfs_destroy(void) {};
zram_debugfs_register(struct zram * zram)1215 static void zram_debugfs_register(struct zram *zram) {};
zram_debugfs_unregister(struct zram * zram)1216 static void zram_debugfs_unregister(struct zram *zram) {};
1217 #endif
1218
comp_algorithm_set(struct zram * zram,u32 prio,const char * alg)1219 static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg)
1220 {
1221 /* Do not free statically defined compression algorithms */
1222 if (zram->comp_algs[prio] != default_compressor)
1223 kfree(zram->comp_algs[prio]);
1224
1225 zram->comp_algs[prio] = alg;
1226 }
1227
__comp_algorithm_show(struct zram * zram,u32 prio,char * buf,ssize_t at)1228 static ssize_t __comp_algorithm_show(struct zram *zram, u32 prio,
1229 char *buf, ssize_t at)
1230 {
1231 ssize_t sz;
1232
1233 down_read(&zram->init_lock);
1234 sz = zcomp_available_show(zram->comp_algs[prio], buf, at);
1235 up_read(&zram->init_lock);
1236
1237 return sz;
1238 }
1239
__comp_algorithm_store(struct zram * zram,u32 prio,const char * buf)1240 static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf)
1241 {
1242 char *compressor;
1243 size_t sz;
1244
1245 sz = strlen(buf);
1246 if (sz >= ZRAM_MAX_ALGO_NAME_SZ)
1247 return -E2BIG;
1248
1249 compressor = kstrdup(buf, GFP_KERNEL);
1250 if (!compressor)
1251 return -ENOMEM;
1252
1253 /* ignore trailing newline */
1254 if (sz > 0 && compressor[sz - 1] == '\n')
1255 compressor[sz - 1] = 0x00;
1256
1257 if (!zcomp_available_algorithm(compressor)) {
1258 kfree(compressor);
1259 return -EINVAL;
1260 }
1261
1262 down_write(&zram->init_lock);
1263 if (init_done(zram)) {
1264 up_write(&zram->init_lock);
1265 kfree(compressor);
1266 pr_info("Can't change algorithm for initialized device\n");
1267 return -EBUSY;
1268 }
1269
1270 comp_algorithm_set(zram, prio, compressor);
1271 up_write(&zram->init_lock);
1272 return 0;
1273 }
1274
comp_params_reset(struct zram * zram,u32 prio)1275 static void comp_params_reset(struct zram *zram, u32 prio)
1276 {
1277 struct zcomp_params *params = &zram->params[prio];
1278
1279 vfree(params->dict);
1280 params->level = ZCOMP_PARAM_NOT_SET;
1281 params->deflate.winbits = ZCOMP_PARAM_NOT_SET;
1282 params->dict_sz = 0;
1283 params->dict = NULL;
1284 }
1285
comp_params_store(struct zram * zram,u32 prio,s32 level,const char * dict_path,struct deflate_params * deflate_params)1286 static int comp_params_store(struct zram *zram, u32 prio, s32 level,
1287 const char *dict_path,
1288 struct deflate_params *deflate_params)
1289 {
1290 ssize_t sz = 0;
1291
1292 comp_params_reset(zram, prio);
1293
1294 if (dict_path) {
1295 sz = kernel_read_file_from_path(dict_path, 0,
1296 &zram->params[prio].dict,
1297 INT_MAX,
1298 NULL,
1299 READING_POLICY);
1300 if (sz < 0)
1301 return -EINVAL;
1302 }
1303
1304 zram->params[prio].dict_sz = sz;
1305 zram->params[prio].level = level;
1306 zram->params[prio].deflate.winbits = deflate_params->winbits;
1307 return 0;
1308 }
1309
algorithm_params_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1310 static ssize_t algorithm_params_store(struct device *dev,
1311 struct device_attribute *attr,
1312 const char *buf,
1313 size_t len)
1314 {
1315 s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NOT_SET;
1316 char *args, *param, *val, *algo = NULL, *dict_path = NULL;
1317 struct deflate_params deflate_params;
1318 struct zram *zram = dev_to_zram(dev);
1319 int ret;
1320
1321 deflate_params.winbits = ZCOMP_PARAM_NOT_SET;
1322
1323 args = skip_spaces(buf);
1324 while (*args) {
1325 args = next_arg(args, ¶m, &val);
1326
1327 if (!val || !*val)
1328 return -EINVAL;
1329
1330 if (!strcmp(param, "priority")) {
1331 ret = kstrtoint(val, 10, &prio);
1332 if (ret)
1333 return ret;
1334 continue;
1335 }
1336
1337 if (!strcmp(param, "level")) {
1338 ret = kstrtoint(val, 10, &level);
1339 if (ret)
1340 return ret;
1341 continue;
1342 }
1343
1344 if (!strcmp(param, "algo")) {
1345 algo = val;
1346 continue;
1347 }
1348
1349 if (!strcmp(param, "dict")) {
1350 dict_path = val;
1351 continue;
1352 }
1353
1354 if (!strcmp(param, "deflate.winbits")) {
1355 ret = kstrtoint(val, 10, &deflate_params.winbits);
1356 if (ret)
1357 return ret;
1358 continue;
1359 }
1360 }
1361
1362 /* Lookup priority by algorithm name */
1363 if (algo) {
1364 s32 p;
1365
1366 prio = -EINVAL;
1367 for (p = ZRAM_PRIMARY_COMP; p < ZRAM_MAX_COMPS; p++) {
1368 if (!zram->comp_algs[p])
1369 continue;
1370
1371 if (!strcmp(zram->comp_algs[p], algo)) {
1372 prio = p;
1373 break;
1374 }
1375 }
1376 }
1377
1378 if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS)
1379 return -EINVAL;
1380
1381 ret = comp_params_store(zram, prio, level, dict_path, &deflate_params);
1382 return ret ? ret : len;
1383 }
1384
comp_algorithm_show(struct device * dev,struct device_attribute * attr,char * buf)1385 static ssize_t comp_algorithm_show(struct device *dev,
1386 struct device_attribute *attr,
1387 char *buf)
1388 {
1389 struct zram *zram = dev_to_zram(dev);
1390
1391 return __comp_algorithm_show(zram, ZRAM_PRIMARY_COMP, buf, 0);
1392 }
1393
comp_algorithm_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1394 static ssize_t comp_algorithm_store(struct device *dev,
1395 struct device_attribute *attr,
1396 const char *buf,
1397 size_t len)
1398 {
1399 struct zram *zram = dev_to_zram(dev);
1400 int ret;
1401
1402 ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf);
1403 return ret ? ret : len;
1404 }
1405
1406 #ifdef CONFIG_ZRAM_MULTI_COMP
recomp_algorithm_show(struct device * dev,struct device_attribute * attr,char * buf)1407 static ssize_t recomp_algorithm_show(struct device *dev,
1408 struct device_attribute *attr,
1409 char *buf)
1410 {
1411 struct zram *zram = dev_to_zram(dev);
1412 ssize_t sz = 0;
1413 u32 prio;
1414
1415 for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
1416 if (!zram->comp_algs[prio])
1417 continue;
1418
1419 sz += sysfs_emit_at(buf, sz, "#%d: ", prio);
1420 sz += __comp_algorithm_show(zram, prio, buf, sz);
1421 }
1422
1423 return sz;
1424 }
1425
recomp_algorithm_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1426 static ssize_t recomp_algorithm_store(struct device *dev,
1427 struct device_attribute *attr,
1428 const char *buf,
1429 size_t len)
1430 {
1431 struct zram *zram = dev_to_zram(dev);
1432 int prio = ZRAM_SECONDARY_COMP;
1433 char *args, *param, *val;
1434 char *alg = NULL;
1435 int ret;
1436
1437 args = skip_spaces(buf);
1438 while (*args) {
1439 args = next_arg(args, ¶m, &val);
1440
1441 if (!val || !*val)
1442 return -EINVAL;
1443
1444 if (!strcmp(param, "algo")) {
1445 alg = val;
1446 continue;
1447 }
1448
1449 if (!strcmp(param, "priority")) {
1450 ret = kstrtoint(val, 10, &prio);
1451 if (ret)
1452 return ret;
1453 continue;
1454 }
1455 }
1456
1457 if (!alg)
1458 return -EINVAL;
1459
1460 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS)
1461 return -EINVAL;
1462
1463 ret = __comp_algorithm_store(zram, prio, alg);
1464 return ret ? ret : len;
1465 }
1466 #endif
1467
compact_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1468 static ssize_t compact_store(struct device *dev,
1469 struct device_attribute *attr, const char *buf, size_t len)
1470 {
1471 struct zram *zram = dev_to_zram(dev);
1472
1473 down_read(&zram->init_lock);
1474 if (!init_done(zram)) {
1475 up_read(&zram->init_lock);
1476 return -EINVAL;
1477 }
1478
1479 zs_compact(zram->mem_pool);
1480 up_read(&zram->init_lock);
1481
1482 return len;
1483 }
1484
io_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1485 static ssize_t io_stat_show(struct device *dev,
1486 struct device_attribute *attr, char *buf)
1487 {
1488 struct zram *zram = dev_to_zram(dev);
1489 ssize_t ret;
1490
1491 down_read(&zram->init_lock);
1492 ret = sysfs_emit(buf,
1493 "%8llu %8llu 0 %8llu\n",
1494 (u64)atomic64_read(&zram->stats.failed_reads),
1495 (u64)atomic64_read(&zram->stats.failed_writes),
1496 (u64)atomic64_read(&zram->stats.notify_free));
1497 up_read(&zram->init_lock);
1498
1499 return ret;
1500 }
1501
mm_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1502 static ssize_t mm_stat_show(struct device *dev,
1503 struct device_attribute *attr, char *buf)
1504 {
1505 struct zram *zram = dev_to_zram(dev);
1506 struct zs_pool_stats pool_stats;
1507 u64 orig_size, mem_used = 0;
1508 long max_used;
1509 ssize_t ret;
1510
1511 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1512
1513 down_read(&zram->init_lock);
1514 if (init_done(zram)) {
1515 mem_used = zs_get_total_pages(zram->mem_pool);
1516 zs_pool_stats(zram->mem_pool, &pool_stats);
1517 }
1518
1519 orig_size = atomic64_read(&zram->stats.pages_stored);
1520 max_used = atomic_long_read(&zram->stats.max_used_pages);
1521
1522 ret = sysfs_emit(buf,
1523 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n",
1524 orig_size << PAGE_SHIFT,
1525 (u64)atomic64_read(&zram->stats.compr_data_size),
1526 mem_used << PAGE_SHIFT,
1527 zram->limit_pages << PAGE_SHIFT,
1528 max_used << PAGE_SHIFT,
1529 (u64)atomic64_read(&zram->stats.same_pages),
1530 atomic_long_read(&pool_stats.pages_compacted),
1531 (u64)atomic64_read(&zram->stats.huge_pages),
1532 (u64)atomic64_read(&zram->stats.huge_pages_since));
1533 up_read(&zram->init_lock);
1534
1535 return ret;
1536 }
1537
1538 #ifdef CONFIG_ZRAM_WRITEBACK
1539 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
bd_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1540 static ssize_t bd_stat_show(struct device *dev,
1541 struct device_attribute *attr, char *buf)
1542 {
1543 struct zram *zram = dev_to_zram(dev);
1544 ssize_t ret;
1545
1546 down_read(&zram->init_lock);
1547 ret = sysfs_emit(buf,
1548 "%8llu %8llu %8llu\n",
1549 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
1550 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
1551 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
1552 up_read(&zram->init_lock);
1553
1554 return ret;
1555 }
1556 #endif
1557
debug_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1558 static ssize_t debug_stat_show(struct device *dev,
1559 struct device_attribute *attr, char *buf)
1560 {
1561 int version = 1;
1562 struct zram *zram = dev_to_zram(dev);
1563 ssize_t ret;
1564
1565 down_read(&zram->init_lock);
1566 ret = sysfs_emit(buf,
1567 "version: %d\n0 %8llu\n",
1568 version,
1569 (u64)atomic64_read(&zram->stats.miss_free));
1570 up_read(&zram->init_lock);
1571
1572 return ret;
1573 }
1574
1575 static DEVICE_ATTR_RO(io_stat);
1576 static DEVICE_ATTR_RO(mm_stat);
1577 #ifdef CONFIG_ZRAM_WRITEBACK
1578 static DEVICE_ATTR_RO(bd_stat);
1579 #endif
1580 static DEVICE_ATTR_RO(debug_stat);
1581
zram_meta_free(struct zram * zram,u64 disksize)1582 static void zram_meta_free(struct zram *zram, u64 disksize)
1583 {
1584 size_t num_pages = disksize >> PAGE_SHIFT;
1585 size_t index;
1586
1587 if (!zram->table)
1588 return;
1589
1590 /* Free all pages that are still in this zram device */
1591 for (index = 0; index < num_pages; index++)
1592 zram_free_page(zram, index);
1593
1594 zs_destroy_pool(zram->mem_pool);
1595 vfree(zram->table);
1596 zram->table = NULL;
1597 }
1598
zram_meta_alloc(struct zram * zram,u64 disksize)1599 static bool zram_meta_alloc(struct zram *zram, u64 disksize)
1600 {
1601 size_t num_pages, index;
1602
1603 num_pages = disksize >> PAGE_SHIFT;
1604 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
1605 if (!zram->table)
1606 return false;
1607
1608 zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1609 if (!zram->mem_pool) {
1610 vfree(zram->table);
1611 zram->table = NULL;
1612 return false;
1613 }
1614
1615 if (!huge_class_size)
1616 huge_class_size = zs_huge_class_size(zram->mem_pool);
1617
1618 for (index = 0; index < num_pages; index++)
1619 zram_slot_lock_init(zram, index);
1620
1621 return true;
1622 }
1623
zram_free_page(struct zram * zram,size_t index)1624 static void zram_free_page(struct zram *zram, size_t index)
1625 {
1626 unsigned long handle;
1627
1628 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
1629 zram->table[index].ac_time = 0;
1630 #endif
1631
1632 zram_clear_flag(zram, index, ZRAM_IDLE);
1633 zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE);
1634 zram_clear_flag(zram, index, ZRAM_PP_SLOT);
1635 zram_set_priority(zram, index, 0);
1636
1637 if (zram_test_flag(zram, index, ZRAM_HUGE)) {
1638 zram_clear_flag(zram, index, ZRAM_HUGE);
1639 atomic64_dec(&zram->stats.huge_pages);
1640 }
1641
1642 if (zram_test_flag(zram, index, ZRAM_WB)) {
1643 zram_clear_flag(zram, index, ZRAM_WB);
1644 free_block_bdev(zram, zram_get_handle(zram, index));
1645 goto out;
1646 }
1647
1648 /*
1649 * No memory is allocated for same element filled pages.
1650 * Simply clear same page flag.
1651 */
1652 if (zram_test_flag(zram, index, ZRAM_SAME)) {
1653 zram_clear_flag(zram, index, ZRAM_SAME);
1654 atomic64_dec(&zram->stats.same_pages);
1655 goto out;
1656 }
1657
1658 handle = zram_get_handle(zram, index);
1659 if (!handle)
1660 return;
1661
1662 zs_free(zram->mem_pool, handle);
1663
1664 atomic64_sub(zram_get_obj_size(zram, index),
1665 &zram->stats.compr_data_size);
1666 out:
1667 atomic64_dec(&zram->stats.pages_stored);
1668 zram_set_handle(zram, index, 0);
1669 zram_set_obj_size(zram, index, 0);
1670 }
1671
read_same_filled_page(struct zram * zram,struct page * page,u32 index)1672 static int read_same_filled_page(struct zram *zram, struct page *page,
1673 u32 index)
1674 {
1675 void *mem;
1676
1677 mem = kmap_local_page(page);
1678 zram_fill_page(mem, PAGE_SIZE, zram_get_handle(zram, index));
1679 kunmap_local(mem);
1680 return 0;
1681 }
1682
read_incompressible_page(struct zram * zram,struct page * page,u32 index)1683 static int read_incompressible_page(struct zram *zram, struct page *page,
1684 u32 index)
1685 {
1686 unsigned long handle;
1687 void *src, *dst;
1688
1689 handle = zram_get_handle(zram, index);
1690 src = zs_obj_read_begin(zram->mem_pool, handle, NULL);
1691 dst = kmap_local_page(page);
1692 copy_page(dst, src);
1693 kunmap_local(dst);
1694 zs_obj_read_end(zram->mem_pool, handle, src);
1695
1696 return 0;
1697 }
1698
read_compressed_page(struct zram * zram,struct page * page,u32 index)1699 static int read_compressed_page(struct zram *zram, struct page *page, u32 index)
1700 {
1701 struct zcomp_strm *zstrm;
1702 unsigned long handle;
1703 unsigned int size;
1704 void *src, *dst;
1705 int ret, prio;
1706
1707 handle = zram_get_handle(zram, index);
1708 size = zram_get_obj_size(zram, index);
1709 prio = zram_get_priority(zram, index);
1710
1711 zstrm = zcomp_stream_get(zram->comps[prio]);
1712 src = zs_obj_read_begin(zram->mem_pool, handle, zstrm->local_copy);
1713 dst = kmap_local_page(page);
1714 ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst);
1715 kunmap_local(dst);
1716 zs_obj_read_end(zram->mem_pool, handle, src);
1717 zcomp_stream_put(zstrm);
1718
1719 return ret;
1720 }
1721
1722 /*
1723 * Reads (decompresses if needed) a page from zspool (zsmalloc).
1724 * Corresponding ZRAM slot should be locked.
1725 */
zram_read_from_zspool(struct zram * zram,struct page * page,u32 index)1726 static int zram_read_from_zspool(struct zram *zram, struct page *page,
1727 u32 index)
1728 {
1729 if (zram_test_flag(zram, index, ZRAM_SAME) ||
1730 !zram_get_handle(zram, index))
1731 return read_same_filled_page(zram, page, index);
1732
1733 if (!zram_test_flag(zram, index, ZRAM_HUGE))
1734 return read_compressed_page(zram, page, index);
1735 else
1736 return read_incompressible_page(zram, page, index);
1737 }
1738
zram_read_page(struct zram * zram,struct page * page,u32 index,struct bio * parent)1739 static int zram_read_page(struct zram *zram, struct page *page, u32 index,
1740 struct bio *parent)
1741 {
1742 int ret;
1743
1744 zram_slot_lock(zram, index);
1745 if (!zram_test_flag(zram, index, ZRAM_WB)) {
1746 /* Slot should be locked through out the function call */
1747 ret = zram_read_from_zspool(zram, page, index);
1748 zram_slot_unlock(zram, index);
1749 } else {
1750 /*
1751 * The slot should be unlocked before reading from the backing
1752 * device.
1753 */
1754 zram_slot_unlock(zram, index);
1755
1756 ret = read_from_bdev(zram, page, zram_get_handle(zram, index),
1757 parent);
1758 }
1759
1760 /* Should NEVER happen. Return bio error if it does. */
1761 if (WARN_ON(ret < 0))
1762 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
1763
1764 return ret;
1765 }
1766
1767 /*
1768 * Use a temporary buffer to decompress the page, as the decompressor
1769 * always expects a full page for the output.
1770 */
zram_bvec_read_partial(struct zram * zram,struct bio_vec * bvec,u32 index,int offset)1771 static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec,
1772 u32 index, int offset)
1773 {
1774 struct page *page = alloc_page(GFP_NOIO);
1775 int ret;
1776
1777 if (!page)
1778 return -ENOMEM;
1779 ret = zram_read_page(zram, page, index, NULL);
1780 if (likely(!ret))
1781 memcpy_to_bvec(bvec, page_address(page) + offset);
1782 __free_page(page);
1783 return ret;
1784 }
1785
zram_bvec_read(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)1786 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
1787 u32 index, int offset, struct bio *bio)
1788 {
1789 if (is_partial_io(bvec))
1790 return zram_bvec_read_partial(zram, bvec, index, offset);
1791 return zram_read_page(zram, bvec->bv_page, index, bio);
1792 }
1793
write_same_filled_page(struct zram * zram,unsigned long fill,u32 index)1794 static int write_same_filled_page(struct zram *zram, unsigned long fill,
1795 u32 index)
1796 {
1797 zram_slot_lock(zram, index);
1798 zram_set_flag(zram, index, ZRAM_SAME);
1799 zram_set_handle(zram, index, fill);
1800 zram_slot_unlock(zram, index);
1801
1802 atomic64_inc(&zram->stats.same_pages);
1803 atomic64_inc(&zram->stats.pages_stored);
1804
1805 return 0;
1806 }
1807
write_incompressible_page(struct zram * zram,struct page * page,u32 index)1808 static int write_incompressible_page(struct zram *zram, struct page *page,
1809 u32 index)
1810 {
1811 unsigned long handle;
1812 void *src;
1813
1814 /*
1815 * This function is called from preemptible context so we don't need
1816 * to do optimistic and fallback to pessimistic handle allocation,
1817 * like we do for compressible pages.
1818 */
1819 handle = zs_malloc(zram->mem_pool, PAGE_SIZE,
1820 GFP_NOIO | __GFP_NOWARN |
1821 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
1822 if (IS_ERR_VALUE(handle))
1823 return PTR_ERR((void *)handle);
1824
1825 if (!zram_can_store_page(zram)) {
1826 zs_free(zram->mem_pool, handle);
1827 return -ENOMEM;
1828 }
1829
1830 src = kmap_local_page(page);
1831 zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE);
1832 kunmap_local(src);
1833
1834 zram_slot_lock(zram, index);
1835 zram_set_flag(zram, index, ZRAM_HUGE);
1836 zram_set_handle(zram, index, handle);
1837 zram_set_obj_size(zram, index, PAGE_SIZE);
1838 zram_slot_unlock(zram, index);
1839
1840 atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size);
1841 atomic64_inc(&zram->stats.huge_pages);
1842 atomic64_inc(&zram->stats.huge_pages_since);
1843 atomic64_inc(&zram->stats.pages_stored);
1844
1845 return 0;
1846 }
1847
zram_write_page(struct zram * zram,struct page * page,u32 index)1848 static int zram_write_page(struct zram *zram, struct page *page, u32 index)
1849 {
1850 int ret = 0;
1851 unsigned long handle;
1852 unsigned int comp_len;
1853 void *mem;
1854 struct zcomp_strm *zstrm;
1855 unsigned long element;
1856 bool same_filled;
1857
1858 /* First, free memory allocated to this slot (if any) */
1859 zram_slot_lock(zram, index);
1860 zram_free_page(zram, index);
1861 zram_slot_unlock(zram, index);
1862
1863 mem = kmap_local_page(page);
1864 same_filled = page_same_filled(mem, &element);
1865 kunmap_local(mem);
1866 if (same_filled)
1867 return write_same_filled_page(zram, element, index);
1868
1869 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
1870 mem = kmap_local_page(page);
1871 ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm,
1872 mem, &comp_len);
1873 kunmap_local(mem);
1874
1875 if (unlikely(ret)) {
1876 zcomp_stream_put(zstrm);
1877 pr_err("Compression failed! err=%d\n", ret);
1878 return ret;
1879 }
1880
1881 if (comp_len >= huge_class_size) {
1882 zcomp_stream_put(zstrm);
1883 return write_incompressible_page(zram, page, index);
1884 }
1885
1886 handle = zs_malloc(zram->mem_pool, comp_len,
1887 GFP_NOIO | __GFP_NOWARN |
1888 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
1889 if (IS_ERR_VALUE(handle)) {
1890 zcomp_stream_put(zstrm);
1891 return PTR_ERR((void *)handle);
1892 }
1893
1894 if (!zram_can_store_page(zram)) {
1895 zcomp_stream_put(zstrm);
1896 zs_free(zram->mem_pool, handle);
1897 return -ENOMEM;
1898 }
1899
1900 zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len);
1901 zcomp_stream_put(zstrm);
1902
1903 zram_slot_lock(zram, index);
1904 zram_set_handle(zram, index, handle);
1905 zram_set_obj_size(zram, index, comp_len);
1906 zram_slot_unlock(zram, index);
1907
1908 /* Update stats */
1909 atomic64_inc(&zram->stats.pages_stored);
1910 atomic64_add(comp_len, &zram->stats.compr_data_size);
1911
1912 return ret;
1913 }
1914
1915 /*
1916 * This is a partial IO. Read the full page before writing the changes.
1917 */
zram_bvec_write_partial(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)1918 static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec,
1919 u32 index, int offset, struct bio *bio)
1920 {
1921 struct page *page = alloc_page(GFP_NOIO);
1922 int ret;
1923
1924 if (!page)
1925 return -ENOMEM;
1926
1927 ret = zram_read_page(zram, page, index, bio);
1928 if (!ret) {
1929 memcpy_from_bvec(page_address(page) + offset, bvec);
1930 ret = zram_write_page(zram, page, index);
1931 }
1932 __free_page(page);
1933 return ret;
1934 }
1935
zram_bvec_write(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)1936 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1937 u32 index, int offset, struct bio *bio)
1938 {
1939 if (is_partial_io(bvec))
1940 return zram_bvec_write_partial(zram, bvec, index, offset, bio);
1941 return zram_write_page(zram, bvec->bv_page, index);
1942 }
1943
1944 #ifdef CONFIG_ZRAM_MULTI_COMP
1945 #define RECOMPRESS_IDLE (1 << 0)
1946 #define RECOMPRESS_HUGE (1 << 1)
1947
scan_slots_for_recompress(struct zram * zram,u32 mode,u32 prio_max,struct zram_pp_ctl * ctl)1948 static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max,
1949 struct zram_pp_ctl *ctl)
1950 {
1951 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
1952 unsigned long index;
1953
1954 for (index = 0; index < nr_pages; index++) {
1955 bool ok = true;
1956
1957 zram_slot_lock(zram, index);
1958 if (!zram_allocated(zram, index))
1959 goto next;
1960
1961 if (mode & RECOMPRESS_IDLE &&
1962 !zram_test_flag(zram, index, ZRAM_IDLE))
1963 goto next;
1964
1965 if (mode & RECOMPRESS_HUGE &&
1966 !zram_test_flag(zram, index, ZRAM_HUGE))
1967 goto next;
1968
1969 if (zram_test_flag(zram, index, ZRAM_WB) ||
1970 zram_test_flag(zram, index, ZRAM_SAME) ||
1971 zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
1972 goto next;
1973
1974 /* Already compressed with same of higher priority */
1975 if (zram_get_priority(zram, index) + 1 >= prio_max)
1976 goto next;
1977
1978 ok = place_pp_slot(zram, ctl, index);
1979 next:
1980 zram_slot_unlock(zram, index);
1981 if (!ok)
1982 break;
1983 }
1984
1985 return 0;
1986 }
1987
1988 /*
1989 * This function will decompress (unless it's ZRAM_HUGE) the page and then
1990 * attempt to compress it using provided compression algorithm priority
1991 * (which is potentially more effective).
1992 *
1993 * Corresponding ZRAM slot should be locked.
1994 */
recompress_slot(struct zram * zram,u32 index,struct page * page,u64 * num_recomp_pages,u32 threshold,u32 prio,u32 prio_max)1995 static int recompress_slot(struct zram *zram, u32 index, struct page *page,
1996 u64 *num_recomp_pages, u32 threshold, u32 prio,
1997 u32 prio_max)
1998 {
1999 struct zcomp_strm *zstrm = NULL;
2000 unsigned long handle_old;
2001 unsigned long handle_new;
2002 unsigned int comp_len_old;
2003 unsigned int comp_len_new;
2004 unsigned int class_index_old;
2005 unsigned int class_index_new;
2006 void *src;
2007 int ret = 0;
2008
2009 handle_old = zram_get_handle(zram, index);
2010 if (!handle_old)
2011 return -EINVAL;
2012
2013 comp_len_old = zram_get_obj_size(zram, index);
2014 /*
2015 * Do not recompress objects that are already "small enough".
2016 */
2017 if (comp_len_old < threshold)
2018 return 0;
2019
2020 ret = zram_read_from_zspool(zram, page, index);
2021 if (ret)
2022 return ret;
2023
2024 /*
2025 * We touched this entry so mark it as non-IDLE. This makes sure that
2026 * we don't preserve IDLE flag and don't incorrectly pick this entry
2027 * for different post-processing type (e.g. writeback).
2028 */
2029 zram_clear_flag(zram, index, ZRAM_IDLE);
2030
2031 class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old);
2032
2033 prio = max(prio, zram_get_priority(zram, index) + 1);
2034 /*
2035 * Recompression slots scan should not select slots that are
2036 * already compressed with a higher priority algorithm, but
2037 * just in case
2038 */
2039 if (prio >= prio_max)
2040 return 0;
2041
2042 /*
2043 * Iterate the secondary comp algorithms list (in order of priority)
2044 * and try to recompress the page.
2045 */
2046 for (; prio < prio_max; prio++) {
2047 if (!zram->comps[prio])
2048 continue;
2049
2050 zstrm = zcomp_stream_get(zram->comps[prio]);
2051 src = kmap_local_page(page);
2052 ret = zcomp_compress(zram->comps[prio], zstrm,
2053 src, &comp_len_new);
2054 kunmap_local(src);
2055
2056 if (ret) {
2057 zcomp_stream_put(zstrm);
2058 zstrm = NULL;
2059 break;
2060 }
2061
2062 class_index_new = zs_lookup_class_index(zram->mem_pool,
2063 comp_len_new);
2064
2065 /* Continue until we make progress */
2066 if (class_index_new >= class_index_old ||
2067 (threshold && comp_len_new >= threshold)) {
2068 zcomp_stream_put(zstrm);
2069 zstrm = NULL;
2070 continue;
2071 }
2072
2073 /* Recompression was successful so break out */
2074 break;
2075 }
2076
2077 /*
2078 * Decrement the limit (if set) on pages we can recompress, even
2079 * when current recompression was unsuccessful or did not compress
2080 * the page below the threshold, because we still spent resources
2081 * on it.
2082 */
2083 if (*num_recomp_pages)
2084 *num_recomp_pages -= 1;
2085
2086 /* Compression error */
2087 if (ret)
2088 return ret;
2089
2090 if (!zstrm) {
2091 /*
2092 * Secondary algorithms failed to re-compress the page
2093 * in a way that would save memory.
2094 *
2095 * Mark the object incompressible if the max-priority
2096 * algorithm couldn't re-compress it.
2097 */
2098 if (prio < zram->num_active_comps)
2099 return 0;
2100 zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE);
2101 return 0;
2102 }
2103
2104 /*
2105 * We are holding per-CPU stream mutex and entry lock so better
2106 * avoid direct reclaim. Allocation error is not fatal since
2107 * we still have the old object in the mem_pool.
2108 *
2109 * XXX: technically, the node we really want here is the node that holds
2110 * the original compressed data. But that would require us to modify
2111 * zsmalloc API to return this information. For now, we will make do with
2112 * the node of the page allocated for recompression.
2113 */
2114 handle_new = zs_malloc(zram->mem_pool, comp_len_new,
2115 GFP_NOIO | __GFP_NOWARN |
2116 __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
2117 if (IS_ERR_VALUE(handle_new)) {
2118 zcomp_stream_put(zstrm);
2119 return PTR_ERR((void *)handle_new);
2120 }
2121
2122 zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new);
2123 zcomp_stream_put(zstrm);
2124
2125 zram_free_page(zram, index);
2126 zram_set_handle(zram, index, handle_new);
2127 zram_set_obj_size(zram, index, comp_len_new);
2128 zram_set_priority(zram, index, prio);
2129
2130 atomic64_add(comp_len_new, &zram->stats.compr_data_size);
2131 atomic64_inc(&zram->stats.pages_stored);
2132
2133 return 0;
2134 }
2135
recompress_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2136 static ssize_t recompress_store(struct device *dev,
2137 struct device_attribute *attr,
2138 const char *buf, size_t len)
2139 {
2140 struct zram *zram = dev_to_zram(dev);
2141 char *args, *param, *val, *algo = NULL;
2142 u64 num_recomp_pages = ULLONG_MAX;
2143 struct zram_pp_ctl *ctl = NULL;
2144 struct zram_pp_slot *pps;
2145 u32 mode = 0, threshold = 0;
2146 u32 prio, prio_max;
2147 struct page *page = NULL;
2148 ssize_t ret;
2149
2150 prio = ZRAM_SECONDARY_COMP;
2151 prio_max = zram->num_active_comps;
2152
2153 args = skip_spaces(buf);
2154 while (*args) {
2155 args = next_arg(args, ¶m, &val);
2156
2157 if (!val || !*val)
2158 return -EINVAL;
2159
2160 if (!strcmp(param, "type")) {
2161 if (!strcmp(val, "idle"))
2162 mode = RECOMPRESS_IDLE;
2163 if (!strcmp(val, "huge"))
2164 mode = RECOMPRESS_HUGE;
2165 if (!strcmp(val, "huge_idle"))
2166 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE;
2167 continue;
2168 }
2169
2170 if (!strcmp(param, "max_pages")) {
2171 /*
2172 * Limit the number of entries (pages) we attempt to
2173 * recompress.
2174 */
2175 ret = kstrtoull(val, 10, &num_recomp_pages);
2176 if (ret)
2177 return ret;
2178 continue;
2179 }
2180
2181 if (!strcmp(param, "threshold")) {
2182 /*
2183 * We will re-compress only idle objects equal or
2184 * greater in size than watermark.
2185 */
2186 ret = kstrtouint(val, 10, &threshold);
2187 if (ret)
2188 return ret;
2189 continue;
2190 }
2191
2192 if (!strcmp(param, "algo")) {
2193 algo = val;
2194 continue;
2195 }
2196
2197 if (!strcmp(param, "priority")) {
2198 ret = kstrtouint(val, 10, &prio);
2199 if (ret)
2200 return ret;
2201
2202 if (prio == ZRAM_PRIMARY_COMP)
2203 prio = ZRAM_SECONDARY_COMP;
2204
2205 prio_max = prio + 1;
2206 continue;
2207 }
2208 }
2209
2210 if (threshold >= huge_class_size)
2211 return -EINVAL;
2212
2213 down_read(&zram->init_lock);
2214 if (!init_done(zram)) {
2215 ret = -EINVAL;
2216 goto release_init_lock;
2217 }
2218
2219 /* Do not permit concurrent post-processing actions. */
2220 if (atomic_xchg(&zram->pp_in_progress, 1)) {
2221 up_read(&zram->init_lock);
2222 return -EAGAIN;
2223 }
2224
2225 if (algo) {
2226 bool found = false;
2227
2228 for (; prio < ZRAM_MAX_COMPS; prio++) {
2229 if (!zram->comp_algs[prio])
2230 continue;
2231
2232 if (!strcmp(zram->comp_algs[prio], algo)) {
2233 prio_max = prio + 1;
2234 found = true;
2235 break;
2236 }
2237 }
2238
2239 if (!found) {
2240 ret = -EINVAL;
2241 goto release_init_lock;
2242 }
2243 }
2244
2245 prio_max = min(prio_max, (u32)zram->num_active_comps);
2246 if (prio >= prio_max) {
2247 ret = -EINVAL;
2248 goto release_init_lock;
2249 }
2250
2251 page = alloc_page(GFP_KERNEL);
2252 if (!page) {
2253 ret = -ENOMEM;
2254 goto release_init_lock;
2255 }
2256
2257 ctl = init_pp_ctl();
2258 if (!ctl) {
2259 ret = -ENOMEM;
2260 goto release_init_lock;
2261 }
2262
2263 scan_slots_for_recompress(zram, mode, prio_max, ctl);
2264
2265 ret = len;
2266 while ((pps = select_pp_slot(ctl))) {
2267 int err = 0;
2268
2269 if (!num_recomp_pages)
2270 break;
2271
2272 zram_slot_lock(zram, pps->index);
2273 if (!zram_test_flag(zram, pps->index, ZRAM_PP_SLOT))
2274 goto next;
2275
2276 err = recompress_slot(zram, pps->index, page,
2277 &num_recomp_pages, threshold,
2278 prio, prio_max);
2279 next:
2280 zram_slot_unlock(zram, pps->index);
2281 release_pp_slot(zram, pps);
2282
2283 if (err) {
2284 ret = err;
2285 break;
2286 }
2287
2288 cond_resched();
2289 }
2290
2291 release_init_lock:
2292 if (page)
2293 __free_page(page);
2294 release_pp_ctl(zram, ctl);
2295 atomic_set(&zram->pp_in_progress, 0);
2296 up_read(&zram->init_lock);
2297 return ret;
2298 }
2299 #endif
2300
zram_bio_discard(struct zram * zram,struct bio * bio)2301 static void zram_bio_discard(struct zram *zram, struct bio *bio)
2302 {
2303 size_t n = bio->bi_iter.bi_size;
2304 u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2305 u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2306 SECTOR_SHIFT;
2307
2308 /*
2309 * zram manages data in physical block size units. Because logical block
2310 * size isn't identical with physical block size on some arch, we
2311 * could get a discard request pointing to a specific offset within a
2312 * certain physical block. Although we can handle this request by
2313 * reading that physiclal block and decompressing and partially zeroing
2314 * and re-compressing and then re-storing it, this isn't reasonable
2315 * because our intent with a discard request is to save memory. So
2316 * skipping this logical block is appropriate here.
2317 */
2318 if (offset) {
2319 if (n <= (PAGE_SIZE - offset))
2320 return;
2321
2322 n -= (PAGE_SIZE - offset);
2323 index++;
2324 }
2325
2326 while (n >= PAGE_SIZE) {
2327 zram_slot_lock(zram, index);
2328 zram_free_page(zram, index);
2329 zram_slot_unlock(zram, index);
2330 atomic64_inc(&zram->stats.notify_free);
2331 index++;
2332 n -= PAGE_SIZE;
2333 }
2334
2335 bio_endio(bio);
2336 }
2337
zram_bio_read(struct zram * zram,struct bio * bio)2338 static void zram_bio_read(struct zram *zram, struct bio *bio)
2339 {
2340 unsigned long start_time = bio_start_io_acct(bio);
2341 struct bvec_iter iter = bio->bi_iter;
2342
2343 do {
2344 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2345 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2346 SECTOR_SHIFT;
2347 struct bio_vec bv = bio_iter_iovec(bio, iter);
2348
2349 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
2350
2351 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) {
2352 atomic64_inc(&zram->stats.failed_reads);
2353 bio->bi_status = BLK_STS_IOERR;
2354 break;
2355 }
2356 flush_dcache_page(bv.bv_page);
2357
2358 zram_slot_lock(zram, index);
2359 zram_accessed(zram, index);
2360 zram_slot_unlock(zram, index);
2361
2362 bio_advance_iter_single(bio, &iter, bv.bv_len);
2363 } while (iter.bi_size);
2364
2365 bio_end_io_acct(bio, start_time);
2366 bio_endio(bio);
2367 }
2368
zram_bio_write(struct zram * zram,struct bio * bio)2369 static void zram_bio_write(struct zram *zram, struct bio *bio)
2370 {
2371 unsigned long start_time = bio_start_io_acct(bio);
2372 struct bvec_iter iter = bio->bi_iter;
2373
2374 do {
2375 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2376 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2377 SECTOR_SHIFT;
2378 struct bio_vec bv = bio_iter_iovec(bio, iter);
2379
2380 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
2381
2382 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) {
2383 atomic64_inc(&zram->stats.failed_writes);
2384 bio->bi_status = BLK_STS_IOERR;
2385 break;
2386 }
2387
2388 zram_slot_lock(zram, index);
2389 zram_accessed(zram, index);
2390 zram_slot_unlock(zram, index);
2391
2392 bio_advance_iter_single(bio, &iter, bv.bv_len);
2393 } while (iter.bi_size);
2394
2395 bio_end_io_acct(bio, start_time);
2396 bio_endio(bio);
2397 }
2398
2399 /*
2400 * Handler function for all zram I/O requests.
2401 */
zram_submit_bio(struct bio * bio)2402 static void zram_submit_bio(struct bio *bio)
2403 {
2404 struct zram *zram = bio->bi_bdev->bd_disk->private_data;
2405
2406 switch (bio_op(bio)) {
2407 case REQ_OP_READ:
2408 zram_bio_read(zram, bio);
2409 break;
2410 case REQ_OP_WRITE:
2411 zram_bio_write(zram, bio);
2412 break;
2413 case REQ_OP_DISCARD:
2414 case REQ_OP_WRITE_ZEROES:
2415 zram_bio_discard(zram, bio);
2416 break;
2417 default:
2418 WARN_ON_ONCE(1);
2419 bio_endio(bio);
2420 }
2421 }
2422
zram_slot_free_notify(struct block_device * bdev,unsigned long index)2423 static void zram_slot_free_notify(struct block_device *bdev,
2424 unsigned long index)
2425 {
2426 struct zram *zram;
2427
2428 zram = bdev->bd_disk->private_data;
2429
2430 atomic64_inc(&zram->stats.notify_free);
2431 if (!zram_slot_trylock(zram, index)) {
2432 atomic64_inc(&zram->stats.miss_free);
2433 return;
2434 }
2435
2436 zram_free_page(zram, index);
2437 zram_slot_unlock(zram, index);
2438 }
2439
zram_comp_params_reset(struct zram * zram)2440 static void zram_comp_params_reset(struct zram *zram)
2441 {
2442 u32 prio;
2443
2444 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2445 comp_params_reset(zram, prio);
2446 }
2447 }
2448
zram_destroy_comps(struct zram * zram)2449 static void zram_destroy_comps(struct zram *zram)
2450 {
2451 u32 prio;
2452
2453 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2454 struct zcomp *comp = zram->comps[prio];
2455
2456 zram->comps[prio] = NULL;
2457 if (!comp)
2458 continue;
2459 zcomp_destroy(comp);
2460 zram->num_active_comps--;
2461 }
2462
2463 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2464 /* Do not free statically defined compression algorithms */
2465 if (zram->comp_algs[prio] != default_compressor)
2466 kfree(zram->comp_algs[prio]);
2467 zram->comp_algs[prio] = NULL;
2468 }
2469
2470 zram_comp_params_reset(zram);
2471 }
2472
zram_reset_device(struct zram * zram)2473 static void zram_reset_device(struct zram *zram)
2474 {
2475 down_write(&zram->init_lock);
2476
2477 zram->limit_pages = 0;
2478
2479 set_capacity_and_notify(zram->disk, 0);
2480 part_stat_set_all(zram->disk->part0, 0);
2481
2482 /* I/O operation under all of CPU are done so let's free */
2483 zram_meta_free(zram, zram->disksize);
2484 zram->disksize = 0;
2485 zram_destroy_comps(zram);
2486 memset(&zram->stats, 0, sizeof(zram->stats));
2487 atomic_set(&zram->pp_in_progress, 0);
2488 reset_bdev(zram);
2489
2490 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
2491 up_write(&zram->init_lock);
2492 }
2493
disksize_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2494 static ssize_t disksize_store(struct device *dev,
2495 struct device_attribute *attr, const char *buf, size_t len)
2496 {
2497 u64 disksize;
2498 struct zcomp *comp;
2499 struct zram *zram = dev_to_zram(dev);
2500 int err;
2501 u32 prio;
2502
2503 disksize = memparse(buf, NULL);
2504 if (!disksize)
2505 return -EINVAL;
2506
2507 down_write(&zram->init_lock);
2508 if (init_done(zram)) {
2509 pr_info("Cannot change disksize for initialized device\n");
2510 err = -EBUSY;
2511 goto out_unlock;
2512 }
2513
2514 disksize = PAGE_ALIGN(disksize);
2515 if (!zram_meta_alloc(zram, disksize)) {
2516 err = -ENOMEM;
2517 goto out_unlock;
2518 }
2519
2520 for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2521 if (!zram->comp_algs[prio])
2522 continue;
2523
2524 comp = zcomp_create(zram->comp_algs[prio],
2525 &zram->params[prio]);
2526 if (IS_ERR(comp)) {
2527 pr_err("Cannot initialise %s compressing backend\n",
2528 zram->comp_algs[prio]);
2529 err = PTR_ERR(comp);
2530 goto out_free_comps;
2531 }
2532
2533 zram->comps[prio] = comp;
2534 zram->num_active_comps++;
2535 }
2536 zram->disksize = disksize;
2537 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
2538 up_write(&zram->init_lock);
2539
2540 return len;
2541
2542 out_free_comps:
2543 zram_destroy_comps(zram);
2544 zram_meta_free(zram, disksize);
2545 out_unlock:
2546 up_write(&zram->init_lock);
2547 return err;
2548 }
2549
reset_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)2550 static ssize_t reset_store(struct device *dev,
2551 struct device_attribute *attr, const char *buf, size_t len)
2552 {
2553 int ret;
2554 unsigned short do_reset;
2555 struct zram *zram;
2556 struct gendisk *disk;
2557
2558 ret = kstrtou16(buf, 10, &do_reset);
2559 if (ret)
2560 return ret;
2561
2562 if (!do_reset)
2563 return -EINVAL;
2564
2565 zram = dev_to_zram(dev);
2566 disk = zram->disk;
2567
2568 mutex_lock(&disk->open_mutex);
2569 /* Do not reset an active device or claimed device */
2570 if (disk_openers(disk) || zram->claim) {
2571 mutex_unlock(&disk->open_mutex);
2572 return -EBUSY;
2573 }
2574
2575 /* From now on, anyone can't open /dev/zram[0-9] */
2576 zram->claim = true;
2577 mutex_unlock(&disk->open_mutex);
2578
2579 /* Make sure all the pending I/O are finished */
2580 sync_blockdev(disk->part0);
2581 zram_reset_device(zram);
2582
2583 mutex_lock(&disk->open_mutex);
2584 zram->claim = false;
2585 mutex_unlock(&disk->open_mutex);
2586
2587 return len;
2588 }
2589
zram_open(struct gendisk * disk,blk_mode_t mode)2590 static int zram_open(struct gendisk *disk, blk_mode_t mode)
2591 {
2592 struct zram *zram = disk->private_data;
2593
2594 WARN_ON(!mutex_is_locked(&disk->open_mutex));
2595
2596 /* zram was claimed to reset so open request fails */
2597 if (zram->claim)
2598 return -EBUSY;
2599 return 0;
2600 }
2601
2602 static const struct block_device_operations zram_devops = {
2603 .open = zram_open,
2604 .submit_bio = zram_submit_bio,
2605 .swap_slot_free_notify = zram_slot_free_notify,
2606 .owner = THIS_MODULE
2607 };
2608
2609 static DEVICE_ATTR_WO(compact);
2610 static DEVICE_ATTR_RW(disksize);
2611 static DEVICE_ATTR_RO(initstate);
2612 static DEVICE_ATTR_WO(reset);
2613 static DEVICE_ATTR_WO(mem_limit);
2614 static DEVICE_ATTR_WO(mem_used_max);
2615 static DEVICE_ATTR_WO(idle);
2616 static DEVICE_ATTR_RW(comp_algorithm);
2617 #ifdef CONFIG_ZRAM_WRITEBACK
2618 static DEVICE_ATTR_RW(backing_dev);
2619 static DEVICE_ATTR_WO(writeback);
2620 static DEVICE_ATTR_RW(writeback_limit);
2621 static DEVICE_ATTR_RW(writeback_limit_enable);
2622 #endif
2623 #ifdef CONFIG_ZRAM_MULTI_COMP
2624 static DEVICE_ATTR_RW(recomp_algorithm);
2625 static DEVICE_ATTR_WO(recompress);
2626 #endif
2627 static DEVICE_ATTR_WO(algorithm_params);
2628
2629 static struct attribute *zram_disk_attrs[] = {
2630 &dev_attr_disksize.attr,
2631 &dev_attr_initstate.attr,
2632 &dev_attr_reset.attr,
2633 &dev_attr_compact.attr,
2634 &dev_attr_mem_limit.attr,
2635 &dev_attr_mem_used_max.attr,
2636 &dev_attr_idle.attr,
2637 &dev_attr_comp_algorithm.attr,
2638 #ifdef CONFIG_ZRAM_WRITEBACK
2639 &dev_attr_backing_dev.attr,
2640 &dev_attr_writeback.attr,
2641 &dev_attr_writeback_limit.attr,
2642 &dev_attr_writeback_limit_enable.attr,
2643 #endif
2644 &dev_attr_io_stat.attr,
2645 &dev_attr_mm_stat.attr,
2646 #ifdef CONFIG_ZRAM_WRITEBACK
2647 &dev_attr_bd_stat.attr,
2648 #endif
2649 &dev_attr_debug_stat.attr,
2650 #ifdef CONFIG_ZRAM_MULTI_COMP
2651 &dev_attr_recomp_algorithm.attr,
2652 &dev_attr_recompress.attr,
2653 #endif
2654 &dev_attr_algorithm_params.attr,
2655 NULL,
2656 };
2657
2658 ATTRIBUTE_GROUPS(zram_disk);
2659
2660 /*
2661 * Allocate and initialize new zram device. the function returns
2662 * '>= 0' device_id upon success, and negative value otherwise.
2663 */
zram_add(void)2664 static int zram_add(void)
2665 {
2666 struct queue_limits lim = {
2667 .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE,
2668 /*
2669 * To ensure that we always get PAGE_SIZE aligned and
2670 * n*PAGE_SIZED sized I/O requests.
2671 */
2672 .physical_block_size = PAGE_SIZE,
2673 .io_min = PAGE_SIZE,
2674 .io_opt = PAGE_SIZE,
2675 .max_hw_discard_sectors = UINT_MAX,
2676 /*
2677 * zram_bio_discard() will clear all logical blocks if logical
2678 * block size is identical with physical block size(PAGE_SIZE).
2679 * But if it is different, we will skip discarding some parts of
2680 * logical blocks in the part of the request range which isn't
2681 * aligned to physical block size. So we can't ensure that all
2682 * discarded logical blocks are zeroed.
2683 */
2684 #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE
2685 .max_write_zeroes_sectors = UINT_MAX,
2686 #endif
2687 .features = BLK_FEAT_STABLE_WRITES |
2688 BLK_FEAT_SYNCHRONOUS,
2689 };
2690 struct zram *zram;
2691 int ret, device_id;
2692
2693 zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
2694 if (!zram)
2695 return -ENOMEM;
2696
2697 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
2698 if (ret < 0)
2699 goto out_free_dev;
2700 device_id = ret;
2701
2702 init_rwsem(&zram->init_lock);
2703 #ifdef CONFIG_ZRAM_WRITEBACK
2704 spin_lock_init(&zram->wb_limit_lock);
2705 #endif
2706
2707 /* gendisk structure */
2708 zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
2709 if (IS_ERR(zram->disk)) {
2710 pr_err("Error allocating disk structure for device %d\n",
2711 device_id);
2712 ret = PTR_ERR(zram->disk);
2713 goto out_free_idr;
2714 }
2715
2716 zram->disk->major = zram_major;
2717 zram->disk->first_minor = device_id;
2718 zram->disk->minors = 1;
2719 zram->disk->flags |= GENHD_FL_NO_PART;
2720 zram->disk->fops = &zram_devops;
2721 zram->disk->private_data = zram;
2722 snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
2723 atomic_set(&zram->pp_in_progress, 0);
2724 zram_comp_params_reset(zram);
2725 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
2726
2727 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */
2728 set_capacity(zram->disk, 0);
2729 ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
2730 if (ret)
2731 goto out_cleanup_disk;
2732
2733 zram_debugfs_register(zram);
2734 pr_info("Added device: %s\n", zram->disk->disk_name);
2735 return device_id;
2736
2737 out_cleanup_disk:
2738 put_disk(zram->disk);
2739 out_free_idr:
2740 idr_remove(&zram_index_idr, device_id);
2741 out_free_dev:
2742 kfree(zram);
2743 return ret;
2744 }
2745
zram_remove(struct zram * zram)2746 static int zram_remove(struct zram *zram)
2747 {
2748 bool claimed;
2749
2750 mutex_lock(&zram->disk->open_mutex);
2751 if (disk_openers(zram->disk)) {
2752 mutex_unlock(&zram->disk->open_mutex);
2753 return -EBUSY;
2754 }
2755
2756 claimed = zram->claim;
2757 if (!claimed)
2758 zram->claim = true;
2759 mutex_unlock(&zram->disk->open_mutex);
2760
2761 zram_debugfs_unregister(zram);
2762
2763 if (claimed) {
2764 /*
2765 * If we were claimed by reset_store(), del_gendisk() will
2766 * wait until reset_store() is done, so nothing need to do.
2767 */
2768 ;
2769 } else {
2770 /* Make sure all the pending I/O are finished */
2771 sync_blockdev(zram->disk->part0);
2772 zram_reset_device(zram);
2773 }
2774
2775 pr_info("Removed device: %s\n", zram->disk->disk_name);
2776
2777 del_gendisk(zram->disk);
2778
2779 /* del_gendisk drains pending reset_store */
2780 WARN_ON_ONCE(claimed && zram->claim);
2781
2782 /*
2783 * disksize_store() may be called in between zram_reset_device()
2784 * and del_gendisk(), so run the last reset to avoid leaking
2785 * anything allocated with disksize_store()
2786 */
2787 zram_reset_device(zram);
2788
2789 put_disk(zram->disk);
2790 kfree(zram);
2791 return 0;
2792 }
2793
2794 /* zram-control sysfs attributes */
2795
2796 /*
2797 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2798 * sense that reading from this file does alter the state of your system -- it
2799 * creates a new un-initialized zram device and returns back this device's
2800 * device_id (or an error code if it fails to create a new device).
2801 */
hot_add_show(const struct class * class,const struct class_attribute * attr,char * buf)2802 static ssize_t hot_add_show(const struct class *class,
2803 const struct class_attribute *attr,
2804 char *buf)
2805 {
2806 int ret;
2807
2808 mutex_lock(&zram_index_mutex);
2809 ret = zram_add();
2810 mutex_unlock(&zram_index_mutex);
2811
2812 if (ret < 0)
2813 return ret;
2814 return sysfs_emit(buf, "%d\n", ret);
2815 }
2816 /* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */
2817 static struct class_attribute class_attr_hot_add =
2818 __ATTR(hot_add, 0400, hot_add_show, NULL);
2819
hot_remove_store(const struct class * class,const struct class_attribute * attr,const char * buf,size_t count)2820 static ssize_t hot_remove_store(const struct class *class,
2821 const struct class_attribute *attr,
2822 const char *buf,
2823 size_t count)
2824 {
2825 struct zram *zram;
2826 int ret, dev_id;
2827
2828 /* dev_id is gendisk->first_minor, which is `int' */
2829 ret = kstrtoint(buf, 10, &dev_id);
2830 if (ret)
2831 return ret;
2832 if (dev_id < 0)
2833 return -EINVAL;
2834
2835 mutex_lock(&zram_index_mutex);
2836
2837 zram = idr_find(&zram_index_idr, dev_id);
2838 if (zram) {
2839 ret = zram_remove(zram);
2840 if (!ret)
2841 idr_remove(&zram_index_idr, dev_id);
2842 } else {
2843 ret = -ENODEV;
2844 }
2845
2846 mutex_unlock(&zram_index_mutex);
2847 return ret ? ret : count;
2848 }
2849 static CLASS_ATTR_WO(hot_remove);
2850
2851 static struct attribute *zram_control_class_attrs[] = {
2852 &class_attr_hot_add.attr,
2853 &class_attr_hot_remove.attr,
2854 NULL,
2855 };
2856 ATTRIBUTE_GROUPS(zram_control_class);
2857
2858 static struct class zram_control_class = {
2859 .name = "zram-control",
2860 .class_groups = zram_control_class_groups,
2861 };
2862
zram_remove_cb(int id,void * ptr,void * data)2863 static int zram_remove_cb(int id, void *ptr, void *data)
2864 {
2865 WARN_ON_ONCE(zram_remove(ptr));
2866 return 0;
2867 }
2868
destroy_devices(void)2869 static void destroy_devices(void)
2870 {
2871 class_unregister(&zram_control_class);
2872 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
2873 zram_debugfs_destroy();
2874 idr_destroy(&zram_index_idr);
2875 unregister_blkdev(zram_major, "zram");
2876 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2877 }
2878
zram_init(void)2879 static int __init zram_init(void)
2880 {
2881 struct zram_table_entry zram_te;
2882 int ret;
2883
2884 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.flags) * 8);
2885
2886 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
2887 zcomp_cpu_up_prepare, zcomp_cpu_dead);
2888 if (ret < 0)
2889 return ret;
2890
2891 ret = class_register(&zram_control_class);
2892 if (ret) {
2893 pr_err("Unable to register zram-control class\n");
2894 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2895 return ret;
2896 }
2897
2898 zram_debugfs_create();
2899 zram_major = register_blkdev(0, "zram");
2900 if (zram_major <= 0) {
2901 pr_err("Unable to get major number\n");
2902 class_unregister(&zram_control_class);
2903 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2904 return -EBUSY;
2905 }
2906
2907 while (num_devices != 0) {
2908 mutex_lock(&zram_index_mutex);
2909 ret = zram_add();
2910 mutex_unlock(&zram_index_mutex);
2911 if (ret < 0)
2912 goto out_error;
2913 num_devices--;
2914 }
2915
2916 return 0;
2917
2918 out_error:
2919 destroy_devices();
2920 return ret;
2921 }
2922
zram_exit(void)2923 static void __exit zram_exit(void)
2924 {
2925 destroy_devices();
2926 }
2927
2928 module_init(zram_init);
2929 module_exit(zram_exit);
2930
2931 module_param(num_devices, uint, 0);
2932 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
2933
2934 MODULE_LICENSE("Dual BSD/GPL");
2935 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
2936 MODULE_DESCRIPTION("Compressed RAM Block Device");
2937