1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2017-2018 HUAWEI, Inc.
4 * https://www.huawei.com/
5 * Copyright (C) 2021, Alibaba Cloud
6 */
7 #include "internal.h"
8 #include <linux/sched/mm.h>
9 #include <trace/events/erofs.h>
10
erofs_unmap_metabuf(struct erofs_buf * buf)11 void erofs_unmap_metabuf(struct erofs_buf *buf)
12 {
13 if (!buf->base)
14 return;
15 kunmap_local(buf->base);
16 buf->base = NULL;
17 }
18
erofs_put_metabuf(struct erofs_buf * buf)19 void erofs_put_metabuf(struct erofs_buf *buf)
20 {
21 if (!buf->page)
22 return;
23 erofs_unmap_metabuf(buf);
24 folio_put(page_folio(buf->page));
25 buf->page = NULL;
26 }
27
erofs_bread(struct erofs_buf * buf,erofs_off_t offset,bool need_kmap)28 void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap)
29 {
30 pgoff_t index = offset >> PAGE_SHIFT;
31 struct folio *folio = NULL;
32
33 if (buf->page) {
34 folio = page_folio(buf->page);
35 if (folio_file_page(folio, index) != buf->page)
36 erofs_unmap_metabuf(buf);
37 }
38 if (!folio || !folio_contains(folio, index)) {
39 erofs_put_metabuf(buf);
40 folio = read_mapping_folio(buf->mapping, index, buf->file);
41 if (IS_ERR(folio))
42 return folio;
43 }
44 buf->page = folio_file_page(folio, index);
45 if (!need_kmap)
46 return NULL;
47 if (!buf->base)
48 buf->base = kmap_local_page(buf->page);
49 return buf->base + (offset & ~PAGE_MASK);
50 }
51
erofs_init_metabuf(struct erofs_buf * buf,struct super_block * sb)52 void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
53 {
54 struct erofs_sb_info *sbi = EROFS_SB(sb);
55
56 buf->file = NULL;
57 if (erofs_is_fileio_mode(sbi)) {
58 buf->file = sbi->dif0.file; /* some fs like FUSE needs it */
59 buf->mapping = buf->file->f_mapping;
60 } else if (erofs_is_fscache_mode(sb))
61 buf->mapping = sbi->dif0.fscache->inode->i_mapping;
62 else
63 buf->mapping = sb->s_bdev->bd_mapping;
64 }
65
erofs_read_metabuf(struct erofs_buf * buf,struct super_block * sb,erofs_off_t offset,bool need_kmap)66 void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
67 erofs_off_t offset, bool need_kmap)
68 {
69 erofs_init_metabuf(buf, sb);
70 return erofs_bread(buf, offset, need_kmap);
71 }
72
erofs_map_blocks(struct inode * inode,struct erofs_map_blocks * map)73 int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
74 {
75 struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
76 struct super_block *sb = inode->i_sb;
77 unsigned int unit, blksz = sb->s_blocksize;
78 struct erofs_inode *vi = EROFS_I(inode);
79 struct erofs_inode_chunk_index *idx;
80 erofs_blk_t startblk, addrmask;
81 bool tailpacking;
82 erofs_off_t pos;
83 u64 chunknr;
84 int err = 0;
85
86 trace_erofs_map_blocks_enter(inode, map, 0);
87 map->m_deviceid = 0;
88 map->m_flags = 0;
89 if (map->m_la >= inode->i_size)
90 goto out;
91
92 if (vi->datalayout != EROFS_INODE_CHUNK_BASED) {
93 tailpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
94 if (!tailpacking && vi->startblk == EROFS_NULL_ADDR)
95 goto out;
96 pos = erofs_pos(sb, erofs_iblks(inode) - tailpacking);
97
98 map->m_flags = EROFS_MAP_MAPPED;
99 if (map->m_la < pos) {
100 map->m_pa = erofs_pos(sb, vi->startblk) + map->m_la;
101 map->m_llen = pos - map->m_la;
102 } else {
103 map->m_pa = erofs_iloc(inode) + vi->inode_isize +
104 vi->xattr_isize + erofs_blkoff(sb, map->m_la);
105 map->m_llen = inode->i_size - map->m_la;
106 map->m_flags |= EROFS_MAP_META;
107 }
108 goto out;
109 }
110
111 if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
112 unit = sizeof(*idx); /* chunk index */
113 else
114 unit = EROFS_BLOCK_MAP_ENTRY_SIZE; /* block map */
115
116 chunknr = map->m_la >> vi->chunkbits;
117 pos = ALIGN(erofs_iloc(inode) + vi->inode_isize +
118 vi->xattr_isize, unit) + unit * chunknr;
119
120 idx = erofs_read_metabuf(&buf, sb, pos, true);
121 if (IS_ERR(idx)) {
122 err = PTR_ERR(idx);
123 goto out;
124 }
125 map->m_la = chunknr << vi->chunkbits;
126 map->m_llen = min_t(erofs_off_t, 1UL << vi->chunkbits,
127 round_up(inode->i_size - map->m_la, blksz));
128 if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES) {
129 addrmask = (vi->chunkformat & EROFS_CHUNK_FORMAT_48BIT) ?
130 BIT_ULL(48) - 1 : BIT_ULL(32) - 1;
131 startblk = (((u64)le16_to_cpu(idx->startblk_hi) << 32) |
132 le32_to_cpu(idx->startblk_lo)) & addrmask;
133 if ((startblk ^ EROFS_NULL_ADDR) & addrmask) {
134 map->m_deviceid = le16_to_cpu(idx->device_id) &
135 EROFS_SB(sb)->device_id_mask;
136 map->m_pa = erofs_pos(sb, startblk);
137 map->m_flags = EROFS_MAP_MAPPED;
138 }
139 } else {
140 startblk = le32_to_cpu(*(__le32 *)idx);
141 if (startblk != (u32)EROFS_NULL_ADDR) {
142 map->m_pa = erofs_pos(sb, startblk);
143 map->m_flags = EROFS_MAP_MAPPED;
144 }
145 }
146 erofs_put_metabuf(&buf);
147 out:
148 if (!err) {
149 map->m_plen = map->m_llen;
150 /* inline data should be located in the same meta block */
151 if ((map->m_flags & EROFS_MAP_META) &&
152 erofs_blkoff(sb, map->m_pa) + map->m_plen > blksz) {
153 erofs_err(sb, "inline data across blocks @ nid %llu", vi->nid);
154 DBG_BUGON(1);
155 return -EFSCORRUPTED;
156 }
157 }
158 trace_erofs_map_blocks_exit(inode, map, 0, err);
159 return err;
160 }
161
erofs_fill_from_devinfo(struct erofs_map_dev * map,struct super_block * sb,struct erofs_device_info * dif)162 static void erofs_fill_from_devinfo(struct erofs_map_dev *map,
163 struct super_block *sb, struct erofs_device_info *dif)
164 {
165 map->m_sb = sb;
166 map->m_dif = dif;
167 map->m_bdev = NULL;
168 if (dif->file && S_ISBLK(file_inode(dif->file)->i_mode))
169 map->m_bdev = file_bdev(dif->file);
170 }
171
erofs_map_dev(struct super_block * sb,struct erofs_map_dev * map)172 int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
173 {
174 struct erofs_dev_context *devs = EROFS_SB(sb)->devs;
175 struct erofs_device_info *dif;
176 erofs_off_t startoff;
177 int id;
178
179 erofs_fill_from_devinfo(map, sb, &EROFS_SB(sb)->dif0);
180 map->m_bdev = sb->s_bdev; /* use s_bdev for the primary device */
181 if (map->m_deviceid) {
182 down_read(&devs->rwsem);
183 dif = idr_find(&devs->tree, map->m_deviceid - 1);
184 if (!dif) {
185 up_read(&devs->rwsem);
186 return -ENODEV;
187 }
188 if (devs->flatdev) {
189 map->m_pa += erofs_pos(sb, dif->uniaddr);
190 up_read(&devs->rwsem);
191 return 0;
192 }
193 erofs_fill_from_devinfo(map, sb, dif);
194 up_read(&devs->rwsem);
195 } else if (devs->extra_devices && !devs->flatdev) {
196 down_read(&devs->rwsem);
197 idr_for_each_entry(&devs->tree, dif, id) {
198 if (!dif->uniaddr)
199 continue;
200
201 startoff = erofs_pos(sb, dif->uniaddr);
202 if (map->m_pa >= startoff &&
203 map->m_pa < startoff + erofs_pos(sb, dif->blocks)) {
204 map->m_pa -= startoff;
205 erofs_fill_from_devinfo(map, sb, dif);
206 break;
207 }
208 }
209 up_read(&devs->rwsem);
210 }
211 return 0;
212 }
213
214 /*
215 * bit 30: I/O error occurred on this folio
216 * bit 0 - 29: remaining parts to complete this folio
217 */
218 #define EROFS_ONLINEFOLIO_EIO (1 << 30)
219
erofs_onlinefolio_init(struct folio * folio)220 void erofs_onlinefolio_init(struct folio *folio)
221 {
222 union {
223 atomic_t o;
224 void *v;
225 } u = { .o = ATOMIC_INIT(1) };
226
227 folio->private = u.v; /* valid only if file-backed folio is locked */
228 }
229
erofs_onlinefolio_split(struct folio * folio)230 void erofs_onlinefolio_split(struct folio *folio)
231 {
232 atomic_inc((atomic_t *)&folio->private);
233 }
234
erofs_onlinefolio_end(struct folio * folio,int err)235 void erofs_onlinefolio_end(struct folio *folio, int err)
236 {
237 int orig, v;
238
239 do {
240 orig = atomic_read((atomic_t *)&folio->private);
241 v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0);
242 } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig);
243
244 if (v & ~EROFS_ONLINEFOLIO_EIO)
245 return;
246 folio->private = 0;
247 folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO));
248 }
249
erofs_iomap_begin(struct inode * inode,loff_t offset,loff_t length,unsigned int flags,struct iomap * iomap,struct iomap * srcmap)250 static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
251 unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
252 {
253 int ret;
254 struct super_block *sb = inode->i_sb;
255 struct erofs_map_blocks map;
256 struct erofs_map_dev mdev;
257
258 map.m_la = offset;
259 map.m_llen = length;
260
261 ret = erofs_map_blocks(inode, &map);
262 if (ret < 0)
263 return ret;
264
265 mdev = (struct erofs_map_dev) {
266 .m_deviceid = map.m_deviceid,
267 .m_pa = map.m_pa,
268 };
269 ret = erofs_map_dev(sb, &mdev);
270 if (ret)
271 return ret;
272
273 iomap->offset = map.m_la;
274 if (flags & IOMAP_DAX)
275 iomap->dax_dev = mdev.m_dif->dax_dev;
276 else
277 iomap->bdev = mdev.m_bdev;
278 iomap->length = map.m_llen;
279 iomap->flags = 0;
280 iomap->private = NULL;
281
282 if (!(map.m_flags & EROFS_MAP_MAPPED)) {
283 iomap->type = IOMAP_HOLE;
284 iomap->addr = IOMAP_NULL_ADDR;
285 if (!iomap->length)
286 iomap->length = length;
287 return 0;
288 }
289
290 if (map.m_flags & EROFS_MAP_META) {
291 void *ptr;
292 struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
293
294 iomap->type = IOMAP_INLINE;
295 ptr = erofs_read_metabuf(&buf, sb, mdev.m_pa, true);
296 if (IS_ERR(ptr))
297 return PTR_ERR(ptr);
298 iomap->inline_data = ptr;
299 iomap->private = buf.base;
300 } else {
301 iomap->type = IOMAP_MAPPED;
302 iomap->addr = mdev.m_pa;
303 if (flags & IOMAP_DAX)
304 iomap->addr += mdev.m_dif->dax_part_off;
305 }
306 return 0;
307 }
308
erofs_iomap_end(struct inode * inode,loff_t pos,loff_t length,ssize_t written,unsigned int flags,struct iomap * iomap)309 static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
310 ssize_t written, unsigned int flags, struct iomap *iomap)
311 {
312 void *ptr = iomap->private;
313
314 if (ptr) {
315 struct erofs_buf buf = {
316 .page = kmap_to_page(ptr),
317 .base = ptr,
318 };
319
320 DBG_BUGON(iomap->type != IOMAP_INLINE);
321 erofs_put_metabuf(&buf);
322 } else {
323 DBG_BUGON(iomap->type == IOMAP_INLINE);
324 }
325 return written;
326 }
327
328 static const struct iomap_ops erofs_iomap_ops = {
329 .iomap_begin = erofs_iomap_begin,
330 .iomap_end = erofs_iomap_end,
331 };
332
erofs_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo,u64 start,u64 len)333 int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
334 u64 start, u64 len)
335 {
336 if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) {
337 #ifdef CONFIG_EROFS_FS_ZIP
338 return iomap_fiemap(inode, fieinfo, start, len,
339 &z_erofs_iomap_report_ops);
340 #else
341 return -EOPNOTSUPP;
342 #endif
343 }
344 return iomap_fiemap(inode, fieinfo, start, len, &erofs_iomap_ops);
345 }
346
347 /*
348 * since we dont have write or truncate flows, so no inode
349 * locking needs to be held at the moment.
350 */
erofs_read_folio(struct file * file,struct folio * folio)351 static int erofs_read_folio(struct file *file, struct folio *folio)
352 {
353 return iomap_read_folio(folio, &erofs_iomap_ops);
354 }
355
erofs_readahead(struct readahead_control * rac)356 static void erofs_readahead(struct readahead_control *rac)
357 {
358 return iomap_readahead(rac, &erofs_iomap_ops);
359 }
360
erofs_bmap(struct address_space * mapping,sector_t block)361 static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
362 {
363 return iomap_bmap(mapping, block, &erofs_iomap_ops);
364 }
365
erofs_file_read_iter(struct kiocb * iocb,struct iov_iter * to)366 static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
367 {
368 struct inode *inode = file_inode(iocb->ki_filp);
369
370 /* no need taking (shared) inode lock since it's a ro filesystem */
371 if (!iov_iter_count(to))
372 return 0;
373
374 #ifdef CONFIG_FS_DAX
375 if (IS_DAX(inode))
376 return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
377 #endif
378 if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev)
379 return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
380 NULL, 0, NULL, 0);
381 return filemap_read(iocb, to, 0);
382 }
383
384 /* for uncompressed (aligned) files and raw access for other files */
385 const struct address_space_operations erofs_aops = {
386 .read_folio = erofs_read_folio,
387 .readahead = erofs_readahead,
388 .bmap = erofs_bmap,
389 .direct_IO = noop_direct_IO,
390 .release_folio = iomap_release_folio,
391 .invalidate_folio = iomap_invalidate_folio,
392 };
393
394 #ifdef CONFIG_FS_DAX
erofs_dax_huge_fault(struct vm_fault * vmf,unsigned int order)395 static vm_fault_t erofs_dax_huge_fault(struct vm_fault *vmf,
396 unsigned int order)
397 {
398 return dax_iomap_fault(vmf, order, NULL, NULL, &erofs_iomap_ops);
399 }
400
erofs_dax_fault(struct vm_fault * vmf)401 static vm_fault_t erofs_dax_fault(struct vm_fault *vmf)
402 {
403 return erofs_dax_huge_fault(vmf, 0);
404 }
405
406 static const struct vm_operations_struct erofs_dax_vm_ops = {
407 .fault = erofs_dax_fault,
408 .huge_fault = erofs_dax_huge_fault,
409 };
410
erofs_file_mmap(struct file * file,struct vm_area_struct * vma)411 static int erofs_file_mmap(struct file *file, struct vm_area_struct *vma)
412 {
413 if (!IS_DAX(file_inode(file)))
414 return generic_file_readonly_mmap(file, vma);
415
416 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
417 return -EINVAL;
418
419 vma->vm_ops = &erofs_dax_vm_ops;
420 vm_flags_set(vma, VM_HUGEPAGE);
421 return 0;
422 }
423 #else
424 #define erofs_file_mmap generic_file_readonly_mmap
425 #endif
426
erofs_file_llseek(struct file * file,loff_t offset,int whence)427 static loff_t erofs_file_llseek(struct file *file, loff_t offset, int whence)
428 {
429 struct inode *inode = file->f_mapping->host;
430 const struct iomap_ops *ops = &erofs_iomap_ops;
431
432 if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout))
433 #ifdef CONFIG_EROFS_FS_ZIP
434 ops = &z_erofs_iomap_report_ops;
435 #else
436 return generic_file_llseek(file, offset, whence);
437 #endif
438
439 if (whence == SEEK_HOLE)
440 offset = iomap_seek_hole(inode, offset, ops);
441 else if (whence == SEEK_DATA)
442 offset = iomap_seek_data(inode, offset, ops);
443 else
444 return generic_file_llseek(file, offset, whence);
445
446 if (offset < 0)
447 return offset;
448 return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
449 }
450
451 const struct file_operations erofs_file_fops = {
452 .llseek = erofs_file_llseek,
453 .read_iter = erofs_file_read_iter,
454 .mmap = erofs_file_mmap,
455 .get_unmapped_area = thp_get_unmapped_area,
456 .splice_read = filemap_splice_read,
457 };
458