1 // SPDX-License-Identifier: BSD-2-Clause 2 /* 3 * Copyright (c) 2020 iXsystems, Inc. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 */ 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/dmu.h> 32 #include <sys/dmu_impl.h> 33 #include <sys/dmu_tx.h> 34 #include <sys/dbuf.h> 35 #include <sys/dnode.h> 36 #include <sys/zfs_context.h> 37 #include <sys/dmu_objset.h> 38 #include <sys/dmu_traverse.h> 39 #include <sys/dsl_dataset.h> 40 #include <sys/dsl_dir.h> 41 #include <sys/dsl_pool.h> 42 #include <sys/dsl_synctask.h> 43 #include <sys/dsl_prop.h> 44 #include <sys/zfs_ioctl.h> 45 #include <sys/zap.h> 46 #include <sys/zio_checksum.h> 47 #include <sys/zio_compress.h> 48 #include <sys/sa.h> 49 #include <sys/zfeature.h> 50 #include <sys/abd.h> 51 #include <sys/zfs_rlock.h> 52 #include <sys/racct.h> 53 #include <sys/vm.h> 54 #include <sys/zfs_znode.h> 55 #include <sys/zfs_vnops.h> 56 57 #include <sys/ccompat.h> 58 59 #ifndef IDX_TO_OFF 60 #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT) 61 #endif 62 63 #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY 64 65 int 66 dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 67 vm_page_t *ma, dmu_tx_t *tx) 68 { 69 dmu_buf_t **dbp; 70 struct sf_buf *sf; 71 int numbufs, i; 72 int err; 73 dmu_flags_t flags = 0; 74 75 if (size == 0) 76 return (0); 77 78 err = dmu_buf_hold_array(os, object, offset, size, 79 FALSE, FTAG, &numbufs, &dbp); 80 if (err) 81 return (err); 82 83 for (i = 0; i < numbufs; i++) { 84 int tocpy, copied, thiscpy; 85 int bufoff; 86 dmu_buf_t *db = dbp[i]; 87 caddr_t va; 88 89 ASSERT3U(size, >, 0); 90 ASSERT3U(db->db_size, >=, PAGESIZE); 91 92 bufoff = offset - db->db_offset; 93 tocpy = (int)MIN(db->db_size - bufoff, size); 94 95 ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 96 97 if (tocpy == db->db_size) { 98 dmu_buf_will_fill(db, tx, B_FALSE); 99 } else { 100 if (i == numbufs - 1 && bufoff + tocpy < db->db_size) { 101 if (bufoff == 0) 102 flags |= DMU_PARTIAL_FIRST; 103 else 104 flags |= DMU_PARTIAL_MORE; 105 } 106 dmu_buf_will_dirty_flags(db, tx, flags); 107 } 108 109 for (copied = 0; copied < tocpy; copied += PAGESIZE) { 110 ASSERT3U(ptoa((*ma)->pindex), ==, 111 db->db_offset + bufoff); 112 thiscpy = MIN(PAGESIZE, tocpy - copied); 113 va = zfs_map_page(*ma, &sf); 114 ASSERT(db->db_data != NULL); 115 memcpy((char *)db->db_data + bufoff, va, thiscpy); 116 zfs_unmap_page(sf); 117 ma += 1; 118 bufoff += PAGESIZE; 119 } 120 121 if (tocpy == db->db_size) 122 dmu_buf_fill_done(db, tx, B_FALSE); 123 124 offset += tocpy; 125 size -= tocpy; 126 } 127 dmu_buf_rele_array(dbp, numbufs, FTAG); 128 return (err); 129 } 130 131 int 132 dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, 133 int *rbehind, int *rahead, int last_size) 134 { 135 struct sf_buf *sf; 136 vm_object_t vmobj; 137 vm_page_t m; 138 dmu_buf_t **dbp; 139 dmu_buf_t *db; 140 caddr_t va; 141 int numbufs, i; 142 int bufoff, pgoff, tocpy; 143 int mi, di; 144 int err; 145 146 ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex); 147 ASSERT3S(last_size, <=, PAGE_SIZE); 148 149 err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex), 150 IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp); 151 if (err != 0) 152 return (err); 153 154 #ifdef ZFS_DEBUG 155 IMPLY(last_size < PAGE_SIZE, *rahead == 0); 156 if (dbp[0]->db_offset != 0 || numbufs > 1) { 157 for (i = 0; i < numbufs; i++) { 158 ASSERT(ISP2(dbp[i]->db_size)); 159 ASSERT0((dbp[i]->db_offset % dbp[i]->db_size)); 160 ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size); 161 } 162 } 163 #endif 164 165 vmobj = ma[0]->object; 166 167 db = dbp[0]; 168 for (i = 0; i < *rbehind; i++) { 169 m = vm_page_grab_unlocked(vmobj, ma[0]->pindex - 1 - i, 170 VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS); 171 if (m == NULL) 172 break; 173 if (!vm_page_none_valid(m)) { 174 ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); 175 vm_page_sunbusy(m); 176 break; 177 } 178 ASSERT0(m->dirty); 179 ASSERT(!pmap_page_is_write_mapped(m)); 180 181 ASSERT3U(db->db_size, >, PAGE_SIZE); 182 bufoff = IDX_TO_OFF(m->pindex) % db->db_size; 183 va = zfs_map_page(m, &sf); 184 ASSERT(db->db_data != NULL); 185 memcpy(va, (char *)db->db_data + bufoff, PAGESIZE); 186 zfs_unmap_page(sf); 187 vm_page_valid(m); 188 if ((m->busy_lock & VPB_BIT_WAITERS) != 0) 189 vm_page_activate(m); 190 else 191 vm_page_deactivate(m); 192 vm_page_sunbusy(m); 193 } 194 *rbehind = i; 195 196 bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size; 197 pgoff = 0; 198 for (mi = 0, di = 0; mi < count && di < numbufs; ) { 199 if (pgoff == 0) { 200 m = ma[mi]; 201 if (m != bogus_page) { 202 vm_page_assert_xbusied(m); 203 ASSERT(vm_page_none_valid(m)); 204 ASSERT0(m->dirty); 205 ASSERT(!pmap_page_is_write_mapped(m)); 206 va = zfs_map_page(m, &sf); 207 } 208 } 209 if (bufoff == 0) 210 db = dbp[di]; 211 212 if (m != bogus_page) { 213 ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==, 214 db->db_offset + bufoff); 215 } 216 217 /* 218 * We do not need to clamp the copy size by the file 219 * size as the last block is zero-filled beyond the 220 * end of file anyway. 221 */ 222 tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff); 223 ASSERT3S(tocpy, >=, 0); 224 if (m != bogus_page) { 225 ASSERT(db->db_data != NULL); 226 memcpy(va + pgoff, (char *)db->db_data + bufoff, tocpy); 227 } 228 229 pgoff += tocpy; 230 ASSERT3S(pgoff, >=, 0); 231 ASSERT3S(pgoff, <=, PAGESIZE); 232 if (pgoff == PAGESIZE) { 233 if (m != bogus_page) { 234 zfs_unmap_page(sf); 235 vm_page_valid(m); 236 } 237 ASSERT3S(mi, <, count); 238 mi++; 239 pgoff = 0; 240 } 241 242 bufoff += tocpy; 243 ASSERT3S(bufoff, >=, 0); 244 ASSERT3S(bufoff, <=, db->db_size); 245 if (bufoff == db->db_size) { 246 ASSERT3S(di, <, numbufs); 247 di++; 248 bufoff = 0; 249 } 250 } 251 252 #ifdef ZFS_DEBUG 253 /* 254 * Three possibilities: 255 * - last requested page ends at a buffer boundary and , thus, 256 * all pages and buffers have been iterated; 257 * - all requested pages are filled, but the last buffer 258 * has not been exhausted; 259 * the read-ahead is possible only in this case; 260 * - all buffers have been read, but the last page has not been 261 * fully filled; 262 * this is only possible if the file has only a single buffer 263 * with a size that is not a multiple of the page size. 264 */ 265 if (mi == count) { 266 ASSERT3S(di, >=, numbufs - 1); 267 IMPLY(*rahead != 0, di == numbufs - 1); 268 IMPLY(*rahead != 0, bufoff != 0); 269 ASSERT0(pgoff); 270 } 271 if (di == numbufs) { 272 ASSERT3S(mi, >=, count - 1); 273 ASSERT0(*rahead); 274 IMPLY(pgoff == 0, mi == count); 275 if (pgoff != 0) { 276 ASSERT3S(mi, ==, count - 1); 277 ASSERT3U((dbp[0]->db_size & PAGE_MASK), !=, 0); 278 } 279 } 280 #endif 281 if (pgoff != 0) { 282 ASSERT3P(m, !=, bogus_page); 283 memset(va + pgoff, 0, PAGESIZE - pgoff); 284 zfs_unmap_page(sf); 285 vm_page_valid(m); 286 } 287 288 for (i = 0; i < *rahead; i++) { 289 m = vm_page_grab_unlocked(vmobj, ma[count - 1]->pindex + 1 + i, 290 VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS); 291 if (m == NULL) 292 break; 293 if (!vm_page_none_valid(m)) { 294 ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); 295 vm_page_sunbusy(m); 296 break; 297 } 298 ASSERT0(m->dirty); 299 ASSERT(!pmap_page_is_write_mapped(m)); 300 301 ASSERT3U(db->db_size, >, PAGE_SIZE); 302 bufoff = IDX_TO_OFF(m->pindex) % db->db_size; 303 tocpy = MIN(db->db_size - bufoff, PAGESIZE); 304 va = zfs_map_page(m, &sf); 305 ASSERT(db->db_data != NULL); 306 memcpy(va, (char *)db->db_data + bufoff, tocpy); 307 if (tocpy < PAGESIZE) { 308 ASSERT3S(i, ==, *rahead - 1); 309 ASSERT3U((db->db_size & PAGE_MASK), !=, 0); 310 memset(va + tocpy, 0, PAGESIZE - tocpy); 311 } 312 zfs_unmap_page(sf); 313 vm_page_valid(m); 314 if ((m->busy_lock & VPB_BIT_WAITERS) != 0) 315 vm_page_activate(m); 316 else 317 vm_page_deactivate(m); 318 vm_page_sunbusy(m); 319 } 320 *rahead = i; 321 322 dmu_buf_rele_array(dbp, numbufs, FTAG); 323 return (0); 324 } 325