1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2023-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_buf.h" 9 #include "xfs_buf_mem.h" 10 #include "xfs_trace.h" 11 #include <linux/shmem_fs.h> 12 #include "xfs_log_format.h" 13 #include "xfs_trans.h" 14 #include "xfs_buf_item.h" 15 #include "xfs_error.h" 16 17 /* 18 * Buffer Cache for In-Memory Files 19 * ================================ 20 * 21 * Online fsck wants to create ephemeral ordered recordsets. The existing 22 * btree infrastructure can do this, but we need the buffer cache to target 23 * memory instead of block devices. 24 * 25 * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those 26 * requirements. Therefore, the xmbuf mechanism uses an unlinked shmem file to 27 * store our staging data. This file is not installed in the file descriptor 28 * table so that user programs cannot access the data, which means that the 29 * xmbuf must be freed with xmbuf_destroy. 30 * 31 * xmbufs assume that the caller will handle all required concurrency 32 * management; standard vfs locks (freezer and inode) are not taken. Reads 33 * and writes are satisfied directly from the page cache. 34 * 35 * The only supported block size is PAGE_SIZE, and we cannot use highmem. 36 */ 37 38 /* 39 * shmem files used to back an in-memory buffer cache must not be exposed to 40 * userspace. Upper layers must coordinate access to the one handle returned 41 * by the constructor, so establish a separate lock class for xmbufs to avoid 42 * confusing lockdep. 43 */ 44 static struct lock_class_key xmbuf_i_mutex_key; 45 46 /* 47 * Allocate a buffer cache target for a memory-backed file and set up the 48 * buffer target. 49 */ 50 int 51 xmbuf_alloc( 52 struct xfs_mount *mp, 53 const char *descr, 54 struct xfs_buftarg **btpp) 55 { 56 struct file *file; 57 struct inode *inode; 58 struct xfs_buftarg *btp; 59 int error; 60 61 btp = kzalloc(struct_size(btp, bt_cache, 1), GFP_KERNEL); 62 if (!btp) 63 return -ENOMEM; 64 65 file = shmem_kernel_file_setup(descr, 0, 0); 66 if (IS_ERR(file)) { 67 error = PTR_ERR(file); 68 goto out_free_btp; 69 } 70 inode = file_inode(file); 71 72 /* private file, private locking */ 73 lockdep_set_class(&inode->i_rwsem, &xmbuf_i_mutex_key); 74 75 /* 76 * We don't want to bother with kmapping data during repair, so don't 77 * allow highmem folios to back this mapping. 78 */ 79 mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); 80 81 /* ensure all writes are below EOF to avoid pagecache zeroing */ 82 i_size_write(inode, inode->i_sb->s_maxbytes); 83 84 error = xfs_buf_cache_init(btp->bt_cache); 85 if (error) 86 goto out_file; 87 88 /* Initialize buffer target */ 89 btp->bt_mount = mp; 90 btp->bt_dev = (dev_t)-1U; 91 btp->bt_bdev = NULL; /* in-memory buftargs have no bdev */ 92 btp->bt_file = file; 93 btp->bt_meta_sectorsize = XMBUF_BLOCKSIZE; 94 btp->bt_meta_sectormask = XMBUF_BLOCKSIZE - 1; 95 96 error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr); 97 if (error) 98 goto out_bcache; 99 100 trace_xmbuf_create(btp); 101 102 *btpp = btp; 103 return 0; 104 105 out_bcache: 106 xfs_buf_cache_destroy(btp->bt_cache); 107 out_file: 108 fput(file); 109 out_free_btp: 110 kfree(btp); 111 return error; 112 } 113 114 /* Free a buffer cache target for a memory-backed buffer cache. */ 115 void 116 xmbuf_free( 117 struct xfs_buftarg *btp) 118 { 119 ASSERT(xfs_buftarg_is_mem(btp)); 120 ASSERT(percpu_counter_sum(&btp->bt_readahead_count) == 0); 121 122 trace_xmbuf_free(btp); 123 124 xfs_destroy_buftarg(btp); 125 xfs_buf_cache_destroy(btp->bt_cache); 126 fput(btp->bt_file); 127 kfree(btp); 128 } 129 130 /* Directly map a shmem folio into the buffer cache. */ 131 int 132 xmbuf_map_backing_mem( 133 struct xfs_buf *bp) 134 { 135 struct inode *inode = file_inode(bp->b_target->bt_file); 136 struct folio *folio = NULL; 137 loff_t pos = BBTOB(xfs_buf_daddr(bp)); 138 int error; 139 140 ASSERT(xfs_buftarg_is_mem(bp->b_target)); 141 142 if (bp->b_map_count != 1) 143 return -ENOMEM; 144 if (BBTOB(bp->b_length) != XMBUF_BLOCKSIZE) 145 return -ENOMEM; 146 if (offset_in_page(pos) != 0) { 147 ASSERT(offset_in_page(pos)); 148 return -ENOMEM; 149 } 150 151 error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, SGP_CACHE); 152 if (error) 153 return error; 154 155 if (filemap_check_wb_err(inode->i_mapping, 0)) { 156 folio_unlock(folio); 157 folio_put(folio); 158 return -EIO; 159 } 160 161 /* 162 * Mark the folio dirty so that it won't be reclaimed once we drop the 163 * (potentially last) reference in xfs_buf_free. 164 */ 165 folio_set_dirty(folio); 166 folio_unlock(folio); 167 168 bp->b_addr = folio_address(folio); 169 return 0; 170 } 171 172 /* Is this a valid daddr within the buftarg? */ 173 bool 174 xmbuf_verify_daddr( 175 struct xfs_buftarg *btp, 176 xfs_daddr_t daddr) 177 { 178 struct inode *inode = file_inode(btp->bt_file); 179 180 ASSERT(xfs_buftarg_is_mem(btp)); 181 182 return daddr < (inode->i_sb->s_maxbytes >> BBSHIFT); 183 } 184 185 /* Discard the folio backing this buffer. */ 186 static void 187 xmbuf_stale( 188 struct xfs_buf *bp) 189 { 190 struct inode *inode = file_inode(bp->b_target->bt_file); 191 loff_t pos; 192 193 ASSERT(xfs_buftarg_is_mem(bp->b_target)); 194 195 pos = BBTOB(xfs_buf_daddr(bp)); 196 shmem_truncate_range(inode, pos, pos + BBTOB(bp->b_length) - 1); 197 } 198 199 /* 200 * Finalize a buffer -- discard the backing folio if it's stale, or run the 201 * write verifier to detect problems. 202 */ 203 int 204 xmbuf_finalize( 205 struct xfs_buf *bp) 206 { 207 xfs_failaddr_t fa; 208 int error = 0; 209 210 if (bp->b_flags & XBF_STALE) { 211 xmbuf_stale(bp); 212 return 0; 213 } 214 215 /* 216 * Although this btree is ephemeral, validate the buffer structure so 217 * that we can detect memory corruption errors and software bugs. 218 */ 219 fa = bp->b_ops->verify_struct(bp); 220 if (fa) { 221 error = -EFSCORRUPTED; 222 xfs_verifier_error(bp, error, fa); 223 } 224 225 return error; 226 } 227 228 /* 229 * Detach this xmbuf buffer from the transaction by any means necessary. 230 * All buffers are direct-mapped, so they do not need bwrite. 231 */ 232 void 233 xmbuf_trans_bdetach( 234 struct xfs_trans *tp, 235 struct xfs_buf *bp) 236 { 237 struct xfs_buf_log_item *bli = bp->b_log_item; 238 239 ASSERT(bli != NULL); 240 241 bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED | 242 XFS_BLI_LOGGED | XFS_BLI_STALE); 243 clear_bit(XFS_LI_DIRTY, &bli->bli_item.li_flags); 244 245 while (bp->b_log_item != NULL) 246 xfs_trans_bdetach(tp, bp); 247 } 248