1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * Copyright (c) 2012 Taobao. 4 * Written by Tao Ma <boyu.mt@taobao.com> 5 */ 6 7 #include <linux/iomap.h> 8 #include <linux/fiemap.h> 9 #include <linux/namei.h> 10 #include <linux/iversion.h> 11 #include <linux/sched/mm.h> 12 13 #include "ext4_jbd2.h" 14 #include "ext4.h" 15 #include "xattr.h" 16 #include "truncate.h" 17 18 #define EXT4_XATTR_SYSTEM_DATA "data" 19 #define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS)) 20 #define EXT4_INLINE_DOTDOT_OFFSET 2 21 #define EXT4_INLINE_DOTDOT_SIZE 4 22 23 24 static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, 25 struct inode *inode, 26 void **fsdata); 27 28 static int ext4_get_inline_size(struct inode *inode) 29 { 30 if (EXT4_I(inode)->i_inline_off) 31 return EXT4_I(inode)->i_inline_size; 32 33 return 0; 34 } 35 36 static int get_max_inline_xattr_value_size(struct inode *inode, 37 struct ext4_iloc *iloc) 38 { 39 struct ext4_xattr_ibody_header *header; 40 struct ext4_xattr_entry *entry; 41 struct ext4_inode *raw_inode; 42 void *end; 43 int free, min_offs; 44 45 if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) 46 return 0; 47 48 min_offs = EXT4_SB(inode->i_sb)->s_inode_size - 49 EXT4_GOOD_OLD_INODE_SIZE - 50 EXT4_I(inode)->i_extra_isize - 51 sizeof(struct ext4_xattr_ibody_header); 52 53 /* 54 * We need to subtract another sizeof(__u32) since an in-inode xattr 55 * needs an empty 4 bytes to indicate the gap between the xattr entry 56 * and the name/value pair. 57 */ 58 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) 59 return EXT4_XATTR_SIZE(min_offs - 60 EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)) - 61 EXT4_XATTR_ROUND - sizeof(__u32)); 62 63 raw_inode = ext4_raw_inode(iloc); 64 header = IHDR(inode, raw_inode); 65 entry = IFIRST(header); 66 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 67 68 /* Compute min_offs. */ 69 while (!IS_LAST_ENTRY(entry)) { 70 void *next = EXT4_XATTR_NEXT(entry); 71 72 if (next >= end) { 73 EXT4_ERROR_INODE(inode, 74 "corrupt xattr in inline inode"); 75 return 0; 76 } 77 if (!entry->e_value_inum && entry->e_value_size) { 78 size_t offs = le16_to_cpu(entry->e_value_offs); 79 if (offs < min_offs) 80 min_offs = offs; 81 } 82 entry = next; 83 } 84 free = min_offs - 85 ((void *)entry - (void *)IFIRST(header)) - sizeof(__u32); 86 87 if (EXT4_I(inode)->i_inline_off) { 88 entry = (struct ext4_xattr_entry *) 89 ((void *)raw_inode + EXT4_I(inode)->i_inline_off); 90 91 free += EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)); 92 goto out; 93 } 94 95 free -= EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)); 96 97 if (free > EXT4_XATTR_ROUND) 98 free = EXT4_XATTR_SIZE(free - EXT4_XATTR_ROUND); 99 else 100 free = 0; 101 102 out: 103 return free; 104 } 105 106 /* 107 * Get the maximum size we now can store in an inode. 108 * If we can't find the space for a xattr entry, don't use the space 109 * of the extents since we have no space to indicate the inline data. 110 */ 111 int ext4_get_max_inline_size(struct inode *inode) 112 { 113 int error, max_inline_size; 114 struct ext4_iloc iloc; 115 116 if (EXT4_I(inode)->i_extra_isize == 0) 117 return 0; 118 119 error = ext4_get_inode_loc(inode, &iloc); 120 if (error) { 121 ext4_error_inode_err(inode, __func__, __LINE__, 0, -error, 122 "can't get inode location %lu", 123 inode->i_ino); 124 return 0; 125 } 126 127 down_read(&EXT4_I(inode)->xattr_sem); 128 max_inline_size = get_max_inline_xattr_value_size(inode, &iloc); 129 up_read(&EXT4_I(inode)->xattr_sem); 130 131 brelse(iloc.bh); 132 133 if (!max_inline_size) 134 return 0; 135 136 return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE; 137 } 138 139 /* 140 * this function does not take xattr_sem, which is OK because it is 141 * currently only used in a code path coming form ext4_iget, before 142 * the new inode has been unlocked 143 */ 144 int ext4_find_inline_data_nolock(struct inode *inode) 145 { 146 struct ext4_xattr_ibody_find is = { 147 .s = { .not_found = -ENODATA, }, 148 }; 149 struct ext4_xattr_info i = { 150 .name_index = EXT4_XATTR_INDEX_SYSTEM, 151 .name = EXT4_XATTR_SYSTEM_DATA, 152 }; 153 int error; 154 155 if (EXT4_I(inode)->i_extra_isize == 0) 156 return 0; 157 158 error = ext4_get_inode_loc(inode, &is.iloc); 159 if (error) 160 return error; 161 162 error = ext4_xattr_ibody_find(inode, &i, &is); 163 if (error) 164 goto out; 165 166 if (!is.s.not_found) { 167 if (is.s.here->e_value_inum) { 168 EXT4_ERROR_INODE(inode, "inline data xattr refers " 169 "to an external xattr inode"); 170 error = -EFSCORRUPTED; 171 goto out; 172 } 173 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - 174 (void *)ext4_raw_inode(&is.iloc)); 175 EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + 176 le32_to_cpu(is.s.here->e_value_size); 177 } 178 out: 179 brelse(is.iloc.bh); 180 return error; 181 } 182 183 static int ext4_read_inline_data(struct inode *inode, void *buffer, 184 unsigned int len, 185 struct ext4_iloc *iloc) 186 { 187 struct ext4_xattr_entry *entry; 188 struct ext4_xattr_ibody_header *header; 189 int cp_len = 0; 190 struct ext4_inode *raw_inode; 191 192 if (!len) 193 return 0; 194 195 BUG_ON(len > EXT4_I(inode)->i_inline_size); 196 197 cp_len = min_t(unsigned int, len, EXT4_MIN_INLINE_DATA_SIZE); 198 199 raw_inode = ext4_raw_inode(iloc); 200 memcpy(buffer, (void *)(raw_inode->i_block), cp_len); 201 202 len -= cp_len; 203 buffer += cp_len; 204 205 if (!len) 206 goto out; 207 208 header = IHDR(inode, raw_inode); 209 entry = (struct ext4_xattr_entry *)((void *)raw_inode + 210 EXT4_I(inode)->i_inline_off); 211 len = min_t(unsigned int, len, 212 (unsigned int)le32_to_cpu(entry->e_value_size)); 213 214 memcpy(buffer, 215 (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs), len); 216 cp_len += len; 217 218 out: 219 return cp_len; 220 } 221 222 /* 223 * write the buffer to the inline inode. 224 * If 'create' is set, we don't need to do the extra copy in the xattr 225 * value since it is already handled by ext4_xattr_ibody_set. 226 * That saves us one memcpy. 227 */ 228 static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, 229 void *buffer, loff_t pos, unsigned int len) 230 { 231 struct ext4_xattr_entry *entry; 232 struct ext4_xattr_ibody_header *header; 233 struct ext4_inode *raw_inode; 234 int cp_len = 0; 235 236 if (unlikely(ext4_emergency_state(inode->i_sb))) 237 return; 238 239 BUG_ON(!EXT4_I(inode)->i_inline_off); 240 BUG_ON(pos + len > EXT4_I(inode)->i_inline_size); 241 242 raw_inode = ext4_raw_inode(iloc); 243 buffer += pos; 244 245 if (pos < EXT4_MIN_INLINE_DATA_SIZE) { 246 cp_len = pos + len > EXT4_MIN_INLINE_DATA_SIZE ? 247 EXT4_MIN_INLINE_DATA_SIZE - pos : len; 248 memcpy((void *)raw_inode->i_block + pos, buffer, cp_len); 249 250 len -= cp_len; 251 buffer += cp_len; 252 pos += cp_len; 253 } 254 255 if (!len) 256 return; 257 258 pos -= EXT4_MIN_INLINE_DATA_SIZE; 259 header = IHDR(inode, raw_inode); 260 entry = (struct ext4_xattr_entry *)((void *)raw_inode + 261 EXT4_I(inode)->i_inline_off); 262 263 memcpy((void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs) + pos, 264 buffer, len); 265 } 266 267 static int ext4_create_inline_data(handle_t *handle, 268 struct inode *inode, unsigned len) 269 { 270 int error; 271 void *value = NULL; 272 struct ext4_xattr_ibody_find is = { 273 .s = { .not_found = -ENODATA, }, 274 }; 275 struct ext4_xattr_info i = { 276 .name_index = EXT4_XATTR_INDEX_SYSTEM, 277 .name = EXT4_XATTR_SYSTEM_DATA, 278 }; 279 280 error = ext4_get_inode_loc(inode, &is.iloc); 281 if (error) 282 return error; 283 284 BUFFER_TRACE(is.iloc.bh, "get_write_access"); 285 error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, 286 EXT4_JTR_NONE); 287 if (error) 288 goto out; 289 290 if (len > EXT4_MIN_INLINE_DATA_SIZE) { 291 value = EXT4_ZERO_XATTR_VALUE; 292 len -= EXT4_MIN_INLINE_DATA_SIZE; 293 } else { 294 value = ""; 295 len = 0; 296 } 297 298 /* Insert the xttr entry. */ 299 i.value = value; 300 i.value_len = len; 301 302 error = ext4_xattr_ibody_find(inode, &i, &is); 303 if (error) 304 goto out; 305 306 BUG_ON(!is.s.not_found); 307 308 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 309 if (error) { 310 if (error == -ENOSPC) 311 ext4_clear_inode_state(inode, 312 EXT4_STATE_MAY_INLINE_DATA); 313 goto out; 314 } 315 316 memset((void *)ext4_raw_inode(&is.iloc)->i_block, 317 0, EXT4_MIN_INLINE_DATA_SIZE); 318 319 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - 320 (void *)ext4_raw_inode(&is.iloc)); 321 EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE; 322 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); 323 ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA); 324 get_bh(is.iloc.bh); 325 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 326 327 out: 328 brelse(is.iloc.bh); 329 return error; 330 } 331 332 static int ext4_update_inline_data(handle_t *handle, struct inode *inode, 333 unsigned int len) 334 { 335 int error; 336 void *value = NULL; 337 struct ext4_xattr_ibody_find is = { 338 .s = { .not_found = -ENODATA, }, 339 }; 340 struct ext4_xattr_info i = { 341 .name_index = EXT4_XATTR_INDEX_SYSTEM, 342 .name = EXT4_XATTR_SYSTEM_DATA, 343 }; 344 345 /* If the old space is ok, write the data directly. */ 346 if (len <= EXT4_I(inode)->i_inline_size) 347 return 0; 348 349 error = ext4_get_inode_loc(inode, &is.iloc); 350 if (error) 351 return error; 352 353 error = ext4_xattr_ibody_find(inode, &i, &is); 354 if (error) 355 goto out; 356 357 BUG_ON(is.s.not_found); 358 359 len -= EXT4_MIN_INLINE_DATA_SIZE; 360 value = kzalloc(len, GFP_NOFS); 361 if (!value) { 362 error = -ENOMEM; 363 goto out; 364 } 365 366 error = ext4_xattr_ibody_get(inode, i.name_index, i.name, 367 value, len); 368 if (error < 0) 369 goto out; 370 371 BUFFER_TRACE(is.iloc.bh, "get_write_access"); 372 error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, 373 EXT4_JTR_NONE); 374 if (error) 375 goto out; 376 377 /* Update the xattr entry. */ 378 i.value = value; 379 i.value_len = len; 380 381 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 382 if (error) 383 goto out; 384 385 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - 386 (void *)ext4_raw_inode(&is.iloc)); 387 EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + 388 le32_to_cpu(is.s.here->e_value_size); 389 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 390 get_bh(is.iloc.bh); 391 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 392 393 out: 394 kfree(value); 395 brelse(is.iloc.bh); 396 return error; 397 } 398 399 static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, 400 loff_t len) 401 { 402 int ret, size, no_expand; 403 struct ext4_inode_info *ei = EXT4_I(inode); 404 405 if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) 406 return -ENOSPC; 407 408 size = ext4_get_max_inline_size(inode); 409 if (size < len) 410 return -ENOSPC; 411 412 ext4_write_lock_xattr(inode, &no_expand); 413 414 if (ei->i_inline_off) 415 ret = ext4_update_inline_data(handle, inode, len); 416 else 417 ret = ext4_create_inline_data(handle, inode, len); 418 419 ext4_write_unlock_xattr(inode, &no_expand); 420 return ret; 421 } 422 423 static int ext4_destroy_inline_data_nolock(handle_t *handle, 424 struct inode *inode) 425 { 426 struct ext4_inode_info *ei = EXT4_I(inode); 427 struct ext4_xattr_ibody_find is = { 428 .s = { .not_found = 0, }, 429 }; 430 struct ext4_xattr_info i = { 431 .name_index = EXT4_XATTR_INDEX_SYSTEM, 432 .name = EXT4_XATTR_SYSTEM_DATA, 433 .value = NULL, 434 .value_len = 0, 435 }; 436 int error; 437 438 if (!ei->i_inline_off) 439 return 0; 440 441 error = ext4_get_inode_loc(inode, &is.iloc); 442 if (error) 443 return error; 444 445 error = ext4_xattr_ibody_find(inode, &i, &is); 446 if (error) 447 goto out; 448 449 BUFFER_TRACE(is.iloc.bh, "get_write_access"); 450 error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, 451 EXT4_JTR_NONE); 452 if (error) 453 goto out; 454 455 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 456 if (error) 457 goto out; 458 459 memset((void *)ext4_raw_inode(&is.iloc)->i_block, 460 0, EXT4_MIN_INLINE_DATA_SIZE); 461 memset(ei->i_data, 0, EXT4_MIN_INLINE_DATA_SIZE); 462 463 if (ext4_has_feature_extents(inode->i_sb)) { 464 if (S_ISDIR(inode->i_mode) || 465 S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) { 466 ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); 467 ext4_ext_tree_init(handle, inode); 468 } 469 } 470 ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA); 471 472 get_bh(is.iloc.bh); 473 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 474 475 EXT4_I(inode)->i_inline_off = 0; 476 EXT4_I(inode)->i_inline_size = 0; 477 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 478 out: 479 brelse(is.iloc.bh); 480 if (error == -ENODATA) 481 error = 0; 482 return error; 483 } 484 485 static int ext4_read_inline_folio(struct inode *inode, struct folio *folio) 486 { 487 void *kaddr; 488 int ret = 0; 489 size_t len; 490 struct ext4_iloc iloc; 491 492 BUG_ON(!folio_test_locked(folio)); 493 BUG_ON(!ext4_has_inline_data(inode)); 494 BUG_ON(folio->index); 495 496 if (!EXT4_I(inode)->i_inline_off) { 497 ext4_warning(inode->i_sb, "inode %lu doesn't have inline data.", 498 inode->i_ino); 499 goto out; 500 } 501 502 ret = ext4_get_inode_loc(inode, &iloc); 503 if (ret) 504 goto out; 505 506 len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode)); 507 BUG_ON(len > PAGE_SIZE); 508 kaddr = kmap_local_folio(folio, 0); 509 ret = ext4_read_inline_data(inode, kaddr, len, &iloc); 510 kaddr = folio_zero_tail(folio, len, kaddr + len); 511 kunmap_local(kaddr); 512 folio_mark_uptodate(folio); 513 brelse(iloc.bh); 514 515 out: 516 return ret; 517 } 518 519 int ext4_readpage_inline(struct inode *inode, struct folio *folio) 520 { 521 int ret = 0; 522 523 down_read(&EXT4_I(inode)->xattr_sem); 524 if (!ext4_has_inline_data(inode)) { 525 up_read(&EXT4_I(inode)->xattr_sem); 526 return -EAGAIN; 527 } 528 529 /* 530 * Current inline data can only exist in the 1st page, 531 * So for all the other pages, just set them uptodate. 532 */ 533 if (!folio->index) 534 ret = ext4_read_inline_folio(inode, folio); 535 else if (!folio_test_uptodate(folio)) { 536 folio_zero_segment(folio, 0, folio_size(folio)); 537 folio_mark_uptodate(folio); 538 } 539 540 up_read(&EXT4_I(inode)->xattr_sem); 541 542 folio_unlock(folio); 543 return ret >= 0 ? 0 : ret; 544 } 545 546 static int ext4_convert_inline_data_to_extent(struct address_space *mapping, 547 struct inode *inode) 548 { 549 int ret, needed_blocks, no_expand; 550 handle_t *handle = NULL; 551 int retries = 0, sem_held = 0; 552 struct folio *folio = NULL; 553 unsigned from, to; 554 struct ext4_iloc iloc; 555 556 if (!ext4_has_inline_data(inode)) { 557 /* 558 * clear the flag so that no new write 559 * will trap here again. 560 */ 561 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 562 return 0; 563 } 564 565 needed_blocks = ext4_writepage_trans_blocks(inode); 566 567 ret = ext4_get_inode_loc(inode, &iloc); 568 if (ret) 569 return ret; 570 571 retry: 572 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); 573 if (IS_ERR(handle)) { 574 ret = PTR_ERR(handle); 575 handle = NULL; 576 goto out; 577 } 578 579 /* We cannot recurse into the filesystem as the transaction is already 580 * started */ 581 folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS, 582 mapping_gfp_mask(mapping)); 583 if (IS_ERR(folio)) { 584 ret = PTR_ERR(folio); 585 goto out_nofolio; 586 } 587 588 ext4_write_lock_xattr(inode, &no_expand); 589 sem_held = 1; 590 /* If some one has already done this for us, just exit. */ 591 if (!ext4_has_inline_data(inode)) { 592 ret = 0; 593 goto out; 594 } 595 596 from = 0; 597 to = ext4_get_inline_size(inode); 598 if (!folio_test_uptodate(folio)) { 599 ret = ext4_read_inline_folio(inode, folio); 600 if (ret < 0) 601 goto out; 602 } 603 604 ext4_fc_track_inode(handle, inode); 605 ret = ext4_destroy_inline_data_nolock(handle, inode); 606 if (ret) 607 goto out; 608 609 if (ext4_should_dioread_nolock(inode)) { 610 ret = ext4_block_write_begin(handle, folio, from, to, 611 ext4_get_block_unwritten); 612 } else 613 ret = ext4_block_write_begin(handle, folio, from, to, 614 ext4_get_block); 615 616 if (!ret && ext4_should_journal_data(inode)) { 617 ret = ext4_walk_page_buffers(handle, inode, 618 folio_buffers(folio), from, to, 619 NULL, do_journal_get_write_access); 620 } 621 622 if (ret) { 623 folio_unlock(folio); 624 folio_put(folio); 625 folio = NULL; 626 ext4_orphan_add(handle, inode); 627 ext4_write_unlock_xattr(inode, &no_expand); 628 sem_held = 0; 629 ext4_journal_stop(handle); 630 handle = NULL; 631 ext4_truncate_failed_write(inode); 632 /* 633 * If truncate failed early the inode might 634 * still be on the orphan list; we need to 635 * make sure the inode is removed from the 636 * orphan list in that case. 637 */ 638 if (inode->i_nlink) 639 ext4_orphan_del(NULL, inode); 640 } 641 642 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 643 goto retry; 644 645 if (folio) 646 block_commit_write(folio, from, to); 647 out: 648 if (folio) { 649 folio_unlock(folio); 650 folio_put(folio); 651 } 652 out_nofolio: 653 if (sem_held) 654 ext4_write_unlock_xattr(inode, &no_expand); 655 if (handle) 656 ext4_journal_stop(handle); 657 brelse(iloc.bh); 658 return ret; 659 } 660 661 /* 662 * Prepare the write for the inline data. 663 * If the data can be written into the inode, we just read 664 * the page and make it uptodate, and start the journal. 665 * Otherwise read the page, makes it dirty so that it can be 666 * handle in writepages(the i_disksize update is left to the 667 * normal ext4_da_write_end). 668 */ 669 int ext4_generic_write_inline_data(struct address_space *mapping, 670 struct inode *inode, 671 loff_t pos, unsigned len, 672 struct folio **foliop, 673 void **fsdata, bool da) 674 { 675 int ret; 676 handle_t *handle; 677 struct folio *folio; 678 struct ext4_iloc iloc; 679 int retries = 0; 680 681 ret = ext4_get_inode_loc(inode, &iloc); 682 if (ret) 683 return ret; 684 685 retry_journal: 686 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); 687 if (IS_ERR(handle)) { 688 ret = PTR_ERR(handle); 689 goto out_release_bh; 690 } 691 692 ret = ext4_prepare_inline_data(handle, inode, pos + len); 693 if (ret && ret != -ENOSPC) 694 goto out_stop_journal; 695 696 if (ret == -ENOSPC) { 697 ext4_journal_stop(handle); 698 if (!da) { 699 brelse(iloc.bh); 700 /* Retry inside */ 701 return ext4_convert_inline_data_to_extent(mapping, inode); 702 } 703 704 ret = ext4_da_convert_inline_data_to_extent(mapping, inode, fsdata); 705 if (ret == -ENOSPC && 706 ext4_should_retry_alloc(inode->i_sb, &retries)) 707 goto retry_journal; 708 goto out_release_bh; 709 } 710 711 folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS, 712 mapping_gfp_mask(mapping)); 713 if (IS_ERR(folio)) { 714 ret = PTR_ERR(folio); 715 goto out_stop_journal; 716 } 717 718 down_read(&EXT4_I(inode)->xattr_sem); 719 /* Someone else had converted it to extent */ 720 if (!ext4_has_inline_data(inode)) { 721 ret = 0; 722 goto out_release_folio; 723 } 724 725 if (!folio_test_uptodate(folio)) { 726 ret = ext4_read_inline_folio(inode, folio); 727 if (ret < 0) 728 goto out_release_folio; 729 } 730 731 ret = ext4_journal_get_write_access(handle, inode->i_sb, iloc.bh, EXT4_JTR_NONE); 732 if (ret) 733 goto out_release_folio; 734 *foliop = folio; 735 up_read(&EXT4_I(inode)->xattr_sem); 736 brelse(iloc.bh); 737 return 1; 738 739 out_release_folio: 740 up_read(&EXT4_I(inode)->xattr_sem); 741 folio_unlock(folio); 742 folio_put(folio); 743 out_stop_journal: 744 ext4_journal_stop(handle); 745 out_release_bh: 746 brelse(iloc.bh); 747 return ret; 748 } 749 750 /* 751 * Try to write data in the inode. 752 * If the inode has inline data, check whether the new write can be 753 * in the inode also. If not, create the page the handle, move the data 754 * to the page make it update and let the later codes create extent for it. 755 */ 756 int ext4_try_to_write_inline_data(struct address_space *mapping, 757 struct inode *inode, 758 loff_t pos, unsigned len, 759 struct folio **foliop) 760 { 761 if (pos + len > ext4_get_max_inline_size(inode)) 762 return ext4_convert_inline_data_to_extent(mapping, inode); 763 return ext4_generic_write_inline_data(mapping, inode, pos, len, 764 foliop, NULL, false); 765 } 766 767 int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, 768 unsigned copied, struct folio *folio) 769 { 770 handle_t *handle = ext4_journal_current_handle(); 771 int no_expand; 772 void *kaddr; 773 struct ext4_iloc iloc; 774 int ret = 0, ret2; 775 776 if (unlikely(copied < len) && !folio_test_uptodate(folio)) 777 copied = 0; 778 779 if (likely(copied)) { 780 ret = ext4_get_inode_loc(inode, &iloc); 781 if (ret) { 782 folio_unlock(folio); 783 folio_put(folio); 784 ext4_std_error(inode->i_sb, ret); 785 goto out; 786 } 787 ext4_write_lock_xattr(inode, &no_expand); 788 BUG_ON(!ext4_has_inline_data(inode)); 789 790 /* 791 * ei->i_inline_off may have changed since 792 * ext4_write_begin() called 793 * ext4_try_to_write_inline_data() 794 */ 795 (void) ext4_find_inline_data_nolock(inode); 796 797 kaddr = kmap_local_folio(folio, 0); 798 ext4_write_inline_data(inode, &iloc, kaddr, pos, copied); 799 kunmap_local(kaddr); 800 folio_mark_uptodate(folio); 801 /* clear dirty flag so that writepages wouldn't work for us. */ 802 folio_clear_dirty(folio); 803 804 ext4_write_unlock_xattr(inode, &no_expand); 805 brelse(iloc.bh); 806 807 /* 808 * It's important to update i_size while still holding folio 809 * lock: page writeout could otherwise come in and zero 810 * beyond i_size. 811 */ 812 ext4_update_inode_size(inode, pos + copied); 813 } 814 folio_unlock(folio); 815 folio_put(folio); 816 817 /* 818 * Don't mark the inode dirty under folio lock. First, it unnecessarily 819 * makes the holding time of folio lock longer. Second, it forces lock 820 * ordering of folio lock and transaction start for journaling 821 * filesystems. 822 */ 823 if (likely(copied)) 824 mark_inode_dirty(inode); 825 out: 826 /* 827 * If we didn't copy as much data as expected, we need to trim back 828 * size of xattr containing inline data. 829 */ 830 if (pos + len > inode->i_size && ext4_can_truncate(inode)) 831 ext4_orphan_add(handle, inode); 832 833 ret2 = ext4_journal_stop(handle); 834 if (!ret) 835 ret = ret2; 836 if (pos + len > inode->i_size) { 837 ext4_truncate_failed_write(inode); 838 /* 839 * If truncate failed early the inode might still be 840 * on the orphan list; we need to make sure the inode 841 * is removed from the orphan list in that case. 842 */ 843 if (inode->i_nlink) 844 ext4_orphan_del(NULL, inode); 845 } 846 return ret ? ret : copied; 847 } 848 849 /* 850 * Try to make the page cache and handle ready for the inline data case. 851 * We can call this function in 2 cases: 852 * 1. The inode is created and the first write exceeds inline size. We can 853 * clear the inode state safely. 854 * 2. The inode has inline data, then we need to read the data, make it 855 * update and dirty so that ext4_da_writepages can handle it. We don't 856 * need to start the journal since the file's metadata isn't changed now. 857 */ 858 static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, 859 struct inode *inode, 860 void **fsdata) 861 { 862 int ret = 0, inline_size; 863 struct folio *folio; 864 865 folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN, 866 mapping_gfp_mask(mapping)); 867 if (IS_ERR(folio)) 868 return PTR_ERR(folio); 869 870 down_read(&EXT4_I(inode)->xattr_sem); 871 if (!ext4_has_inline_data(inode)) { 872 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 873 goto out; 874 } 875 876 inline_size = ext4_get_inline_size(inode); 877 878 if (!folio_test_uptodate(folio)) { 879 ret = ext4_read_inline_folio(inode, folio); 880 if (ret < 0) 881 goto out; 882 } 883 884 ret = ext4_block_write_begin(NULL, folio, 0, inline_size, 885 ext4_da_get_block_prep); 886 if (ret) { 887 up_read(&EXT4_I(inode)->xattr_sem); 888 folio_unlock(folio); 889 folio_put(folio); 890 ext4_truncate_failed_write(inode); 891 return ret; 892 } 893 894 folio_mark_dirty(folio); 895 folio_mark_uptodate(folio); 896 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 897 *fsdata = (void *)CONVERT_INLINE_DATA; 898 899 out: 900 up_read(&EXT4_I(inode)->xattr_sem); 901 if (folio) { 902 folio_unlock(folio); 903 folio_put(folio); 904 } 905 return ret; 906 } 907 908 #ifdef INLINE_DIR_DEBUG 909 void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, 910 void *inline_start, int inline_size) 911 { 912 int offset; 913 unsigned short de_len; 914 struct ext4_dir_entry_2 *de = inline_start; 915 void *dlimit = inline_start + inline_size; 916 917 trace_printk("inode %lu\n", dir->i_ino); 918 offset = 0; 919 while ((void *)de < dlimit) { 920 de_len = ext4_rec_len_from_disk(de->rec_len, inline_size); 921 trace_printk("de: off %u rlen %u name %.*s nlen %u ino %u\n", 922 offset, de_len, de->name_len, de->name, 923 de->name_len, le32_to_cpu(de->inode)); 924 if (ext4_check_dir_entry(dir, NULL, de, bh, 925 inline_start, inline_size, offset)) 926 BUG(); 927 928 offset += de_len; 929 de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); 930 } 931 } 932 #else 933 #define ext4_show_inline_dir(dir, bh, inline_start, inline_size) 934 #endif 935 936 /* 937 * Add a new entry into a inline dir. 938 * It will return -ENOSPC if no space is available, and -EIO 939 * and -EEXIST if directory entry already exists. 940 */ 941 static int ext4_add_dirent_to_inline(handle_t *handle, 942 struct ext4_filename *fname, 943 struct inode *dir, 944 struct inode *inode, 945 struct ext4_iloc *iloc, 946 void *inline_start, int inline_size) 947 { 948 int err; 949 struct ext4_dir_entry_2 *de; 950 951 err = ext4_find_dest_de(dir, iloc->bh, inline_start, 952 inline_size, fname, &de); 953 if (err) 954 return err; 955 956 BUFFER_TRACE(iloc->bh, "get_write_access"); 957 err = ext4_journal_get_write_access(handle, dir->i_sb, iloc->bh, 958 EXT4_JTR_NONE); 959 if (err) 960 return err; 961 ext4_insert_dentry(dir, inode, de, inline_size, fname); 962 963 ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size); 964 965 /* 966 * XXX shouldn't update any times until successful 967 * completion of syscall, but too many callers depend 968 * on this. 969 * 970 * XXX similarly, too many callers depend on 971 * ext4_new_inode() setting the times, but error 972 * recovery deletes the inode, so the worst that can 973 * happen is that the times are slightly out of date 974 * and/or different from the directory change time. 975 */ 976 inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); 977 ext4_update_dx_flag(dir); 978 inode_inc_iversion(dir); 979 return 1; 980 } 981 982 static void *ext4_get_inline_xattr_pos(struct inode *inode, 983 struct ext4_iloc *iloc) 984 { 985 struct ext4_xattr_entry *entry; 986 struct ext4_xattr_ibody_header *header; 987 988 BUG_ON(!EXT4_I(inode)->i_inline_off); 989 990 header = IHDR(inode, ext4_raw_inode(iloc)); 991 entry = (struct ext4_xattr_entry *)((void *)ext4_raw_inode(iloc) + 992 EXT4_I(inode)->i_inline_off); 993 994 return (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs); 995 } 996 997 /* Set the final de to cover the whole block. */ 998 static void ext4_update_final_de(void *de_buf, int old_size, int new_size) 999 { 1000 struct ext4_dir_entry_2 *de, *prev_de; 1001 void *limit; 1002 int de_len; 1003 1004 de = de_buf; 1005 if (old_size) { 1006 limit = de_buf + old_size; 1007 do { 1008 prev_de = de; 1009 de_len = ext4_rec_len_from_disk(de->rec_len, old_size); 1010 de_buf += de_len; 1011 de = de_buf; 1012 } while (de_buf < limit); 1013 1014 prev_de->rec_len = ext4_rec_len_to_disk(de_len + new_size - 1015 old_size, new_size); 1016 } else { 1017 /* this is just created, so create an empty entry. */ 1018 de->inode = 0; 1019 de->rec_len = ext4_rec_len_to_disk(new_size, new_size); 1020 } 1021 } 1022 1023 static int ext4_update_inline_dir(handle_t *handle, struct inode *dir, 1024 struct ext4_iloc *iloc) 1025 { 1026 int ret; 1027 int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE; 1028 int new_size = get_max_inline_xattr_value_size(dir, iloc); 1029 1030 if (new_size - old_size <= ext4_dir_rec_len(1, NULL)) 1031 return -ENOSPC; 1032 1033 ret = ext4_update_inline_data(handle, dir, 1034 new_size + EXT4_MIN_INLINE_DATA_SIZE); 1035 if (ret) 1036 return ret; 1037 1038 ext4_update_final_de(ext4_get_inline_xattr_pos(dir, iloc), old_size, 1039 EXT4_I(dir)->i_inline_size - 1040 EXT4_MIN_INLINE_DATA_SIZE); 1041 dir->i_size = EXT4_I(dir)->i_disksize = EXT4_I(dir)->i_inline_size; 1042 return 0; 1043 } 1044 1045 static void ext4_restore_inline_data(handle_t *handle, struct inode *inode, 1046 struct ext4_iloc *iloc, 1047 void *buf, int inline_size) 1048 { 1049 int ret; 1050 1051 ret = ext4_create_inline_data(handle, inode, inline_size); 1052 if (ret) { 1053 ext4_msg(inode->i_sb, KERN_EMERG, 1054 "error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)", 1055 inode->i_ino, ret); 1056 return; 1057 } 1058 ext4_write_inline_data(inode, iloc, buf, 0, inline_size); 1059 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 1060 } 1061 1062 static int ext4_finish_convert_inline_dir(handle_t *handle, 1063 struct inode *inode, 1064 struct buffer_head *dir_block, 1065 void *buf, 1066 int inline_size) 1067 { 1068 int err, csum_size = 0, header_size = 0; 1069 struct ext4_dir_entry_2 *de; 1070 void *target = dir_block->b_data; 1071 1072 /* 1073 * First create "." and ".." and then copy the dir information 1074 * back to the block. 1075 */ 1076 de = target; 1077 de = ext4_init_dot_dotdot(inode, de, 1078 inode->i_sb->s_blocksize, csum_size, 1079 le32_to_cpu(((struct ext4_dir_entry_2 *)buf)->inode), 1); 1080 header_size = (void *)de - target; 1081 1082 memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE, 1083 inline_size - EXT4_INLINE_DOTDOT_SIZE); 1084 1085 if (ext4_has_feature_metadata_csum(inode->i_sb)) 1086 csum_size = sizeof(struct ext4_dir_entry_tail); 1087 1088 inode->i_size = inode->i_sb->s_blocksize; 1089 i_size_write(inode, inode->i_sb->s_blocksize); 1090 EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; 1091 ext4_update_final_de(dir_block->b_data, 1092 inline_size - EXT4_INLINE_DOTDOT_SIZE + header_size, 1093 inode->i_sb->s_blocksize - csum_size); 1094 1095 if (csum_size) 1096 ext4_initialize_dirent_tail(dir_block, 1097 inode->i_sb->s_blocksize); 1098 set_buffer_uptodate(dir_block); 1099 unlock_buffer(dir_block); 1100 err = ext4_handle_dirty_dirblock(handle, inode, dir_block); 1101 if (err) 1102 return err; 1103 set_buffer_verified(dir_block); 1104 return ext4_mark_inode_dirty(handle, inode); 1105 } 1106 1107 static int ext4_convert_inline_data_nolock(handle_t *handle, 1108 struct inode *inode, 1109 struct ext4_iloc *iloc) 1110 { 1111 int error; 1112 void *buf = NULL; 1113 struct buffer_head *data_bh = NULL; 1114 struct ext4_map_blocks map; 1115 int inline_size; 1116 1117 inline_size = ext4_get_inline_size(inode); 1118 buf = kmalloc(inline_size, GFP_NOFS); 1119 if (!buf) { 1120 error = -ENOMEM; 1121 goto out; 1122 } 1123 1124 error = ext4_read_inline_data(inode, buf, inline_size, iloc); 1125 if (error < 0) 1126 goto out; 1127 1128 /* 1129 * Make sure the inline directory entries pass checks before we try to 1130 * convert them, so that we avoid touching stuff that needs fsck. 1131 */ 1132 if (S_ISDIR(inode->i_mode)) { 1133 error = ext4_check_all_de(inode, iloc->bh, 1134 buf + EXT4_INLINE_DOTDOT_SIZE, 1135 inline_size - EXT4_INLINE_DOTDOT_SIZE); 1136 if (error) 1137 goto out; 1138 } 1139 1140 error = ext4_destroy_inline_data_nolock(handle, inode); 1141 if (error) 1142 goto out; 1143 1144 map.m_lblk = 0; 1145 map.m_len = 1; 1146 map.m_flags = 0; 1147 error = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_CREATE); 1148 if (error < 0) 1149 goto out_restore; 1150 if (!(map.m_flags & EXT4_MAP_MAPPED)) { 1151 error = -EIO; 1152 goto out_restore; 1153 } 1154 1155 data_bh = sb_getblk(inode->i_sb, map.m_pblk); 1156 if (!data_bh) { 1157 error = -ENOMEM; 1158 goto out_restore; 1159 } 1160 1161 lock_buffer(data_bh); 1162 error = ext4_journal_get_create_access(handle, inode->i_sb, data_bh, 1163 EXT4_JTR_NONE); 1164 if (error) { 1165 unlock_buffer(data_bh); 1166 error = -EIO; 1167 goto out_restore; 1168 } 1169 memset(data_bh->b_data, 0, inode->i_sb->s_blocksize); 1170 1171 if (!S_ISDIR(inode->i_mode)) { 1172 memcpy(data_bh->b_data, buf, inline_size); 1173 set_buffer_uptodate(data_bh); 1174 unlock_buffer(data_bh); 1175 error = ext4_handle_dirty_metadata(handle, 1176 inode, data_bh); 1177 } else { 1178 error = ext4_finish_convert_inline_dir(handle, inode, data_bh, 1179 buf, inline_size); 1180 } 1181 1182 out_restore: 1183 if (error) 1184 ext4_restore_inline_data(handle, inode, iloc, buf, inline_size); 1185 1186 out: 1187 brelse(data_bh); 1188 kfree(buf); 1189 return error; 1190 } 1191 1192 /* 1193 * Try to add the new entry to the inline data. 1194 * If succeeds, return 0. If not, extended the inline dir and copied data to 1195 * the new created block. 1196 */ 1197 int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, 1198 struct inode *dir, struct inode *inode) 1199 { 1200 int ret, ret2, inline_size, no_expand; 1201 void *inline_start; 1202 struct ext4_iloc iloc; 1203 1204 ret = ext4_get_inode_loc(dir, &iloc); 1205 if (ret) 1206 return ret; 1207 1208 ext4_write_lock_xattr(dir, &no_expand); 1209 if (!ext4_has_inline_data(dir)) 1210 goto out; 1211 1212 inline_start = (void *)ext4_raw_inode(&iloc)->i_block + 1213 EXT4_INLINE_DOTDOT_SIZE; 1214 inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; 1215 1216 ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc, 1217 inline_start, inline_size); 1218 if (ret != -ENOSPC) 1219 goto out; 1220 1221 /* check whether it can be inserted to inline xattr space. */ 1222 inline_size = EXT4_I(dir)->i_inline_size - 1223 EXT4_MIN_INLINE_DATA_SIZE; 1224 if (!inline_size) { 1225 /* Try to use the xattr space.*/ 1226 ret = ext4_update_inline_dir(handle, dir, &iloc); 1227 if (ret && ret != -ENOSPC) 1228 goto out; 1229 1230 inline_size = EXT4_I(dir)->i_inline_size - 1231 EXT4_MIN_INLINE_DATA_SIZE; 1232 } 1233 1234 if (inline_size) { 1235 inline_start = ext4_get_inline_xattr_pos(dir, &iloc); 1236 1237 ret = ext4_add_dirent_to_inline(handle, fname, dir, 1238 inode, &iloc, inline_start, 1239 inline_size); 1240 1241 if (ret != -ENOSPC) 1242 goto out; 1243 } 1244 1245 /* 1246 * The inline space is filled up, so create a new block for it. 1247 * As the extent tree will be created, we have to save the inline 1248 * dir first. 1249 */ 1250 ret = ext4_convert_inline_data_nolock(handle, dir, &iloc); 1251 1252 out: 1253 ext4_write_unlock_xattr(dir, &no_expand); 1254 ret2 = ext4_mark_inode_dirty(handle, dir); 1255 if (unlikely(ret2 && !ret)) 1256 ret = ret2; 1257 brelse(iloc.bh); 1258 return ret; 1259 } 1260 1261 /* 1262 * This function fills a red-black tree with information from an 1263 * inlined dir. It returns the number directory entries loaded 1264 * into the tree. If there is an error it is returned in err. 1265 */ 1266 int ext4_inlinedir_to_tree(struct file *dir_file, 1267 struct inode *dir, ext4_lblk_t block, 1268 struct dx_hash_info *hinfo, 1269 __u32 start_hash, __u32 start_minor_hash, 1270 int *has_inline_data) 1271 { 1272 int err = 0, count = 0; 1273 unsigned int parent_ino; 1274 int pos; 1275 struct ext4_dir_entry_2 *de; 1276 struct inode *inode = file_inode(dir_file); 1277 int ret, inline_size = 0; 1278 struct ext4_iloc iloc; 1279 void *dir_buf = NULL; 1280 struct ext4_dir_entry_2 fake; 1281 struct fscrypt_str tmp_str; 1282 1283 ret = ext4_get_inode_loc(inode, &iloc); 1284 if (ret) 1285 return ret; 1286 1287 down_read(&EXT4_I(inode)->xattr_sem); 1288 if (!ext4_has_inline_data(inode)) { 1289 up_read(&EXT4_I(inode)->xattr_sem); 1290 *has_inline_data = 0; 1291 goto out; 1292 } 1293 1294 inline_size = ext4_get_inline_size(inode); 1295 dir_buf = kmalloc(inline_size, GFP_NOFS); 1296 if (!dir_buf) { 1297 ret = -ENOMEM; 1298 up_read(&EXT4_I(inode)->xattr_sem); 1299 goto out; 1300 } 1301 1302 ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc); 1303 up_read(&EXT4_I(inode)->xattr_sem); 1304 if (ret < 0) 1305 goto out; 1306 1307 pos = 0; 1308 parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); 1309 while (pos < inline_size) { 1310 /* 1311 * As inlined dir doesn't store any information about '.' and 1312 * only the inode number of '..' is stored, we have to handle 1313 * them differently. 1314 */ 1315 if (pos == 0) { 1316 fake.inode = cpu_to_le32(inode->i_ino); 1317 fake.name_len = 1; 1318 strcpy(fake.name, "."); 1319 fake.rec_len = ext4_rec_len_to_disk( 1320 ext4_dir_rec_len(fake.name_len, NULL), 1321 inline_size); 1322 ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); 1323 de = &fake; 1324 pos = EXT4_INLINE_DOTDOT_OFFSET; 1325 } else if (pos == EXT4_INLINE_DOTDOT_OFFSET) { 1326 fake.inode = cpu_to_le32(parent_ino); 1327 fake.name_len = 2; 1328 strcpy(fake.name, ".."); 1329 fake.rec_len = ext4_rec_len_to_disk( 1330 ext4_dir_rec_len(fake.name_len, NULL), 1331 inline_size); 1332 ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); 1333 de = &fake; 1334 pos = EXT4_INLINE_DOTDOT_SIZE; 1335 } else { 1336 de = (struct ext4_dir_entry_2 *)(dir_buf + pos); 1337 pos += ext4_rec_len_from_disk(de->rec_len, inline_size); 1338 if (ext4_check_dir_entry(inode, dir_file, de, 1339 iloc.bh, dir_buf, 1340 inline_size, pos)) { 1341 ret = count; 1342 goto out; 1343 } 1344 } 1345 1346 if (ext4_hash_in_dirent(dir)) { 1347 hinfo->hash = EXT4_DIRENT_HASH(de); 1348 hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de); 1349 } else { 1350 err = ext4fs_dirhash(dir, de->name, de->name_len, hinfo); 1351 if (err) { 1352 ret = err; 1353 goto out; 1354 } 1355 } 1356 if ((hinfo->hash < start_hash) || 1357 ((hinfo->hash == start_hash) && 1358 (hinfo->minor_hash < start_minor_hash))) 1359 continue; 1360 if (de->inode == 0) 1361 continue; 1362 tmp_str.name = de->name; 1363 tmp_str.len = de->name_len; 1364 err = ext4_htree_store_dirent(dir_file, hinfo->hash, 1365 hinfo->minor_hash, de, &tmp_str); 1366 if (err) { 1367 ret = err; 1368 goto out; 1369 } 1370 count++; 1371 } 1372 ret = count; 1373 out: 1374 kfree(dir_buf); 1375 brelse(iloc.bh); 1376 return ret; 1377 } 1378 1379 /* 1380 * So this function is called when the volume is mkfsed with 1381 * dir_index disabled. In order to keep f_pos persistent 1382 * after we convert from an inlined dir to a blocked based, 1383 * we just pretend that we are a normal dir and return the 1384 * offset as if '.' and '..' really take place. 1385 * 1386 */ 1387 int ext4_read_inline_dir(struct file *file, 1388 struct dir_context *ctx, 1389 int *has_inline_data) 1390 { 1391 unsigned int offset, parent_ino; 1392 int i; 1393 struct ext4_dir_entry_2 *de; 1394 struct super_block *sb; 1395 struct inode *inode = file_inode(file); 1396 int ret, inline_size = 0; 1397 struct ext4_iloc iloc; 1398 void *dir_buf = NULL; 1399 int dotdot_offset, dotdot_size, extra_offset, extra_size; 1400 struct dir_private_info *info = file->private_data; 1401 1402 ret = ext4_get_inode_loc(inode, &iloc); 1403 if (ret) 1404 return ret; 1405 1406 down_read(&EXT4_I(inode)->xattr_sem); 1407 if (!ext4_has_inline_data(inode)) { 1408 up_read(&EXT4_I(inode)->xattr_sem); 1409 *has_inline_data = 0; 1410 goto out; 1411 } 1412 1413 inline_size = ext4_get_inline_size(inode); 1414 dir_buf = kmalloc(inline_size, GFP_NOFS); 1415 if (!dir_buf) { 1416 ret = -ENOMEM; 1417 up_read(&EXT4_I(inode)->xattr_sem); 1418 goto out; 1419 } 1420 1421 ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc); 1422 up_read(&EXT4_I(inode)->xattr_sem); 1423 if (ret < 0) 1424 goto out; 1425 1426 ret = 0; 1427 sb = inode->i_sb; 1428 parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); 1429 offset = ctx->pos; 1430 1431 /* 1432 * dotdot_offset and dotdot_size is the real offset and 1433 * size for ".." and "." if the dir is block based while 1434 * the real size for them are only EXT4_INLINE_DOTDOT_SIZE. 1435 * So we will use extra_offset and extra_size to indicate them 1436 * during the inline dir iteration. 1437 */ 1438 dotdot_offset = ext4_dir_rec_len(1, NULL); 1439 dotdot_size = dotdot_offset + ext4_dir_rec_len(2, NULL); 1440 extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE; 1441 extra_size = extra_offset + inline_size; 1442 1443 /* 1444 * If the cookie has changed since the last call to 1445 * readdir(2), then we might be pointing to an invalid 1446 * dirent right now. Scan from the start of the inline 1447 * dir to make sure. 1448 */ 1449 if (!inode_eq_iversion(inode, info->cookie)) { 1450 for (i = 0; i < extra_size && i < offset;) { 1451 /* 1452 * "." is with offset 0 and 1453 * ".." is dotdot_offset. 1454 */ 1455 if (!i) { 1456 i = dotdot_offset; 1457 continue; 1458 } else if (i == dotdot_offset) { 1459 i = dotdot_size; 1460 continue; 1461 } 1462 /* for other entry, the real offset in 1463 * the buf has to be tuned accordingly. 1464 */ 1465 de = (struct ext4_dir_entry_2 *) 1466 (dir_buf + i - extra_offset); 1467 /* It's too expensive to do a full 1468 * dirent test each time round this 1469 * loop, but we do have to test at 1470 * least that it is non-zero. A 1471 * failure will be detected in the 1472 * dirent test below. */ 1473 if (ext4_rec_len_from_disk(de->rec_len, extra_size) 1474 < ext4_dir_rec_len(1, NULL)) 1475 break; 1476 i += ext4_rec_len_from_disk(de->rec_len, 1477 extra_size); 1478 } 1479 offset = i; 1480 ctx->pos = offset; 1481 info->cookie = inode_query_iversion(inode); 1482 } 1483 1484 while (ctx->pos < extra_size) { 1485 if (ctx->pos == 0) { 1486 if (!dir_emit(ctx, ".", 1, inode->i_ino, DT_DIR)) 1487 goto out; 1488 ctx->pos = dotdot_offset; 1489 continue; 1490 } 1491 1492 if (ctx->pos == dotdot_offset) { 1493 if (!dir_emit(ctx, "..", 2, parent_ino, DT_DIR)) 1494 goto out; 1495 ctx->pos = dotdot_size; 1496 continue; 1497 } 1498 1499 de = (struct ext4_dir_entry_2 *) 1500 (dir_buf + ctx->pos - extra_offset); 1501 if (ext4_check_dir_entry(inode, file, de, iloc.bh, dir_buf, 1502 extra_size, ctx->pos)) 1503 goto out; 1504 if (le32_to_cpu(de->inode)) { 1505 if (!dir_emit(ctx, de->name, de->name_len, 1506 le32_to_cpu(de->inode), 1507 get_dtype(sb, de->file_type))) 1508 goto out; 1509 } 1510 ctx->pos += ext4_rec_len_from_disk(de->rec_len, extra_size); 1511 } 1512 out: 1513 kfree(dir_buf); 1514 brelse(iloc.bh); 1515 return ret; 1516 } 1517 1518 void *ext4_read_inline_link(struct inode *inode) 1519 { 1520 struct ext4_iloc iloc; 1521 int ret, inline_size; 1522 void *link; 1523 1524 ret = ext4_get_inode_loc(inode, &iloc); 1525 if (ret) 1526 return ERR_PTR(ret); 1527 1528 ret = -ENOMEM; 1529 inline_size = ext4_get_inline_size(inode); 1530 link = kmalloc(inline_size + 1, GFP_NOFS); 1531 if (!link) 1532 goto out; 1533 1534 ret = ext4_read_inline_data(inode, link, inline_size, &iloc); 1535 if (ret < 0) { 1536 kfree(link); 1537 goto out; 1538 } 1539 nd_terminate_link(link, inode->i_size, ret); 1540 out: 1541 if (ret < 0) 1542 link = ERR_PTR(ret); 1543 brelse(iloc.bh); 1544 return link; 1545 } 1546 1547 struct buffer_head *ext4_get_first_inline_block(struct inode *inode, 1548 struct ext4_dir_entry_2 **parent_de, 1549 int *retval) 1550 { 1551 struct ext4_iloc iloc; 1552 1553 *retval = ext4_get_inode_loc(inode, &iloc); 1554 if (*retval) 1555 return NULL; 1556 1557 *parent_de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; 1558 1559 return iloc.bh; 1560 } 1561 1562 /* 1563 * Try to create the inline data for the new dir. 1564 * If it succeeds, return 0, otherwise return the error. 1565 * In case of ENOSPC, the caller should create the normal disk layout dir. 1566 */ 1567 int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent, 1568 struct inode *inode) 1569 { 1570 int ret, inline_size = EXT4_MIN_INLINE_DATA_SIZE; 1571 struct ext4_iloc iloc; 1572 struct ext4_dir_entry_2 *de; 1573 1574 ret = ext4_get_inode_loc(inode, &iloc); 1575 if (ret) 1576 return ret; 1577 1578 ret = ext4_prepare_inline_data(handle, inode, inline_size); 1579 if (ret) 1580 goto out; 1581 1582 /* 1583 * For inline dir, we only save the inode information for the ".." 1584 * and create a fake dentry to cover the left space. 1585 */ 1586 de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; 1587 de->inode = cpu_to_le32(parent->i_ino); 1588 de = (struct ext4_dir_entry_2 *)((void *)de + EXT4_INLINE_DOTDOT_SIZE); 1589 de->inode = 0; 1590 de->rec_len = ext4_rec_len_to_disk( 1591 inline_size - EXT4_INLINE_DOTDOT_SIZE, 1592 inline_size); 1593 set_nlink(inode, 2); 1594 inode->i_size = EXT4_I(inode)->i_disksize = inline_size; 1595 out: 1596 brelse(iloc.bh); 1597 return ret; 1598 } 1599 1600 struct buffer_head *ext4_find_inline_entry(struct inode *dir, 1601 struct ext4_filename *fname, 1602 struct ext4_dir_entry_2 **res_dir, 1603 int *has_inline_data) 1604 { 1605 struct ext4_xattr_ibody_find is = { 1606 .s = { .not_found = -ENODATA, }, 1607 }; 1608 struct ext4_xattr_info i = { 1609 .name_index = EXT4_XATTR_INDEX_SYSTEM, 1610 .name = EXT4_XATTR_SYSTEM_DATA, 1611 }; 1612 int ret; 1613 void *inline_start; 1614 int inline_size; 1615 1616 ret = ext4_get_inode_loc(dir, &is.iloc); 1617 if (ret) 1618 return ERR_PTR(ret); 1619 1620 down_read(&EXT4_I(dir)->xattr_sem); 1621 1622 ret = ext4_xattr_ibody_find(dir, &i, &is); 1623 if (ret) 1624 goto out; 1625 1626 if (!ext4_has_inline_data(dir)) { 1627 *has_inline_data = 0; 1628 goto out; 1629 } 1630 1631 inline_start = (void *)ext4_raw_inode(&is.iloc)->i_block + 1632 EXT4_INLINE_DOTDOT_SIZE; 1633 inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; 1634 ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, 1635 dir, fname, 0, res_dir); 1636 if (ret == 1) 1637 goto out_find; 1638 if (ret < 0) 1639 goto out; 1640 1641 if (ext4_get_inline_size(dir) == EXT4_MIN_INLINE_DATA_SIZE) 1642 goto out; 1643 1644 inline_start = ext4_get_inline_xattr_pos(dir, &is.iloc); 1645 inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE; 1646 1647 ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, 1648 dir, fname, 0, res_dir); 1649 if (ret == 1) 1650 goto out_find; 1651 1652 out: 1653 brelse(is.iloc.bh); 1654 if (ret < 0) 1655 is.iloc.bh = ERR_PTR(ret); 1656 else 1657 is.iloc.bh = NULL; 1658 out_find: 1659 up_read(&EXT4_I(dir)->xattr_sem); 1660 return is.iloc.bh; 1661 } 1662 1663 int ext4_delete_inline_entry(handle_t *handle, 1664 struct inode *dir, 1665 struct ext4_dir_entry_2 *de_del, 1666 struct buffer_head *bh, 1667 int *has_inline_data) 1668 { 1669 int err, inline_size, no_expand; 1670 struct ext4_iloc iloc; 1671 void *inline_start; 1672 1673 err = ext4_get_inode_loc(dir, &iloc); 1674 if (err) 1675 return err; 1676 1677 ext4_write_lock_xattr(dir, &no_expand); 1678 if (!ext4_has_inline_data(dir)) { 1679 *has_inline_data = 0; 1680 goto out; 1681 } 1682 1683 if ((void *)de_del - ((void *)ext4_raw_inode(&iloc)->i_block) < 1684 EXT4_MIN_INLINE_DATA_SIZE) { 1685 inline_start = (void *)ext4_raw_inode(&iloc)->i_block + 1686 EXT4_INLINE_DOTDOT_SIZE; 1687 inline_size = EXT4_MIN_INLINE_DATA_SIZE - 1688 EXT4_INLINE_DOTDOT_SIZE; 1689 } else { 1690 inline_start = ext4_get_inline_xattr_pos(dir, &iloc); 1691 inline_size = ext4_get_inline_size(dir) - 1692 EXT4_MIN_INLINE_DATA_SIZE; 1693 } 1694 1695 BUFFER_TRACE(bh, "get_write_access"); 1696 err = ext4_journal_get_write_access(handle, dir->i_sb, bh, 1697 EXT4_JTR_NONE); 1698 if (err) 1699 goto out; 1700 1701 err = ext4_generic_delete_entry(dir, de_del, bh, 1702 inline_start, inline_size, 0); 1703 if (err) 1704 goto out; 1705 1706 ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size); 1707 out: 1708 ext4_write_unlock_xattr(dir, &no_expand); 1709 if (likely(err == 0)) 1710 err = ext4_mark_inode_dirty(handle, dir); 1711 brelse(iloc.bh); 1712 if (err != -ENOENT) 1713 ext4_std_error(dir->i_sb, err); 1714 return err; 1715 } 1716 1717 /* 1718 * Get the inline dentry at offset. 1719 */ 1720 static inline struct ext4_dir_entry_2 * 1721 ext4_get_inline_entry(struct inode *inode, 1722 struct ext4_iloc *iloc, 1723 unsigned int offset, 1724 void **inline_start, 1725 int *inline_size) 1726 { 1727 void *inline_pos; 1728 1729 BUG_ON(offset > ext4_get_inline_size(inode)); 1730 1731 if (offset < EXT4_MIN_INLINE_DATA_SIZE) { 1732 inline_pos = (void *)ext4_raw_inode(iloc)->i_block; 1733 *inline_size = EXT4_MIN_INLINE_DATA_SIZE; 1734 } else { 1735 inline_pos = ext4_get_inline_xattr_pos(inode, iloc); 1736 offset -= EXT4_MIN_INLINE_DATA_SIZE; 1737 *inline_size = ext4_get_inline_size(inode) - 1738 EXT4_MIN_INLINE_DATA_SIZE; 1739 } 1740 1741 if (inline_start) 1742 *inline_start = inline_pos; 1743 return (struct ext4_dir_entry_2 *)(inline_pos + offset); 1744 } 1745 1746 bool empty_inline_dir(struct inode *dir, int *has_inline_data) 1747 { 1748 int err, inline_size; 1749 struct ext4_iloc iloc; 1750 size_t inline_len; 1751 void *inline_pos; 1752 unsigned int offset; 1753 struct ext4_dir_entry_2 *de; 1754 bool ret = false; 1755 1756 err = ext4_get_inode_loc(dir, &iloc); 1757 if (err) { 1758 EXT4_ERROR_INODE_ERR(dir, -err, 1759 "error %d getting inode %lu block", 1760 err, dir->i_ino); 1761 return false; 1762 } 1763 1764 down_read(&EXT4_I(dir)->xattr_sem); 1765 if (!ext4_has_inline_data(dir)) { 1766 *has_inline_data = 0; 1767 ret = true; 1768 goto out; 1769 } 1770 1771 de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; 1772 if (!le32_to_cpu(de->inode)) { 1773 ext4_warning(dir->i_sb, 1774 "bad inline directory (dir #%lu) - no `..'", 1775 dir->i_ino); 1776 goto out; 1777 } 1778 1779 inline_len = ext4_get_inline_size(dir); 1780 offset = EXT4_INLINE_DOTDOT_SIZE; 1781 while (offset < inline_len) { 1782 de = ext4_get_inline_entry(dir, &iloc, offset, 1783 &inline_pos, &inline_size); 1784 if (ext4_check_dir_entry(dir, NULL, de, 1785 iloc.bh, inline_pos, 1786 inline_size, offset)) { 1787 ext4_warning(dir->i_sb, 1788 "bad inline directory (dir #%lu) - " 1789 "inode %u, rec_len %u, name_len %d" 1790 "inline size %d", 1791 dir->i_ino, le32_to_cpu(de->inode), 1792 le16_to_cpu(de->rec_len), de->name_len, 1793 inline_size); 1794 goto out; 1795 } 1796 if (le32_to_cpu(de->inode)) { 1797 goto out; 1798 } 1799 offset += ext4_rec_len_from_disk(de->rec_len, inline_size); 1800 } 1801 1802 ret = true; 1803 out: 1804 up_read(&EXT4_I(dir)->xattr_sem); 1805 brelse(iloc.bh); 1806 return ret; 1807 } 1808 1809 int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) 1810 { 1811 int ret, no_expand; 1812 1813 ext4_write_lock_xattr(inode, &no_expand); 1814 ret = ext4_destroy_inline_data_nolock(handle, inode); 1815 ext4_write_unlock_xattr(inode, &no_expand); 1816 1817 return ret; 1818 } 1819 1820 int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap) 1821 { 1822 __u64 addr; 1823 int error = -EAGAIN; 1824 struct ext4_iloc iloc; 1825 1826 down_read(&EXT4_I(inode)->xattr_sem); 1827 if (!ext4_has_inline_data(inode)) 1828 goto out; 1829 1830 error = ext4_get_inode_loc(inode, &iloc); 1831 if (error) 1832 goto out; 1833 1834 addr = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; 1835 addr += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; 1836 addr += offsetof(struct ext4_inode, i_block); 1837 1838 brelse(iloc.bh); 1839 1840 iomap->addr = addr; 1841 iomap->offset = 0; 1842 iomap->length = min_t(loff_t, ext4_get_inline_size(inode), 1843 i_size_read(inode)); 1844 iomap->type = IOMAP_INLINE; 1845 iomap->flags = 0; 1846 1847 out: 1848 up_read(&EXT4_I(inode)->xattr_sem); 1849 return error; 1850 } 1851 1852 int ext4_inline_data_truncate(struct inode *inode, int *has_inline) 1853 { 1854 handle_t *handle; 1855 int inline_size, value_len, needed_blocks, no_expand, err = 0; 1856 size_t i_size; 1857 void *value = NULL; 1858 struct ext4_xattr_ibody_find is = { 1859 .s = { .not_found = -ENODATA, }, 1860 }; 1861 struct ext4_xattr_info i = { 1862 .name_index = EXT4_XATTR_INDEX_SYSTEM, 1863 .name = EXT4_XATTR_SYSTEM_DATA, 1864 }; 1865 1866 1867 needed_blocks = ext4_writepage_trans_blocks(inode); 1868 handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks); 1869 if (IS_ERR(handle)) 1870 return PTR_ERR(handle); 1871 1872 ext4_write_lock_xattr(inode, &no_expand); 1873 if (!ext4_has_inline_data(inode)) { 1874 ext4_write_unlock_xattr(inode, &no_expand); 1875 *has_inline = 0; 1876 ext4_journal_stop(handle); 1877 return 0; 1878 } 1879 1880 if ((err = ext4_orphan_add(handle, inode)) != 0) 1881 goto out; 1882 1883 if ((err = ext4_get_inode_loc(inode, &is.iloc)) != 0) 1884 goto out; 1885 1886 down_write(&EXT4_I(inode)->i_data_sem); 1887 i_size = inode->i_size; 1888 inline_size = ext4_get_inline_size(inode); 1889 EXT4_I(inode)->i_disksize = i_size; 1890 1891 if (i_size < inline_size) { 1892 /* 1893 * if there's inline data to truncate and this file was 1894 * converted to extents after that inline data was written, 1895 * the extent status cache must be cleared to avoid leaving 1896 * behind stale delayed allocated extent entries 1897 */ 1898 if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) 1899 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); 1900 1901 /* Clear the content in the xattr space. */ 1902 if (inline_size > EXT4_MIN_INLINE_DATA_SIZE) { 1903 if ((err = ext4_xattr_ibody_find(inode, &i, &is)) != 0) 1904 goto out_error; 1905 1906 BUG_ON(is.s.not_found); 1907 1908 value_len = le32_to_cpu(is.s.here->e_value_size); 1909 value = kmalloc(value_len, GFP_NOFS); 1910 if (!value) { 1911 err = -ENOMEM; 1912 goto out_error; 1913 } 1914 1915 err = ext4_xattr_ibody_get(inode, i.name_index, 1916 i.name, value, value_len); 1917 if (err <= 0) 1918 goto out_error; 1919 1920 i.value = value; 1921 i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ? 1922 i_size - EXT4_MIN_INLINE_DATA_SIZE : 0; 1923 err = ext4_xattr_ibody_set(handle, inode, &i, &is); 1924 if (err) 1925 goto out_error; 1926 } 1927 1928 /* Clear the content within i_blocks. */ 1929 if (i_size < EXT4_MIN_INLINE_DATA_SIZE) { 1930 void *p = (void *) ext4_raw_inode(&is.iloc)->i_block; 1931 memset(p + i_size, 0, 1932 EXT4_MIN_INLINE_DATA_SIZE - i_size); 1933 } 1934 1935 EXT4_I(inode)->i_inline_size = i_size < 1936 EXT4_MIN_INLINE_DATA_SIZE ? 1937 EXT4_MIN_INLINE_DATA_SIZE : i_size; 1938 } 1939 1940 out_error: 1941 up_write(&EXT4_I(inode)->i_data_sem); 1942 out: 1943 brelse(is.iloc.bh); 1944 ext4_write_unlock_xattr(inode, &no_expand); 1945 kfree(value); 1946 if (inode->i_nlink) 1947 ext4_orphan_del(handle, inode); 1948 1949 if (err == 0) { 1950 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 1951 err = ext4_mark_inode_dirty(handle, inode); 1952 if (IS_SYNC(inode)) 1953 ext4_handle_sync(handle); 1954 } 1955 ext4_journal_stop(handle); 1956 return err; 1957 } 1958 1959 int ext4_convert_inline_data(struct inode *inode) 1960 { 1961 int error, needed_blocks, no_expand; 1962 handle_t *handle; 1963 struct ext4_iloc iloc; 1964 1965 if (!ext4_has_inline_data(inode)) { 1966 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 1967 return 0; 1968 } else if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { 1969 /* 1970 * Inode has inline data but EXT4_STATE_MAY_INLINE_DATA is 1971 * cleared. This means we are in the middle of moving of 1972 * inline data to delay allocated block. Just force writeout 1973 * here to finish conversion. 1974 */ 1975 error = filemap_flush(inode->i_mapping); 1976 if (error) 1977 return error; 1978 if (!ext4_has_inline_data(inode)) 1979 return 0; 1980 } 1981 1982 needed_blocks = ext4_writepage_trans_blocks(inode); 1983 1984 iloc.bh = NULL; 1985 error = ext4_get_inode_loc(inode, &iloc); 1986 if (error) 1987 return error; 1988 1989 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); 1990 if (IS_ERR(handle)) { 1991 error = PTR_ERR(handle); 1992 goto out_free; 1993 } 1994 1995 ext4_write_lock_xattr(inode, &no_expand); 1996 if (ext4_has_inline_data(inode)) 1997 error = ext4_convert_inline_data_nolock(handle, inode, &iloc); 1998 ext4_write_unlock_xattr(inode, &no_expand); 1999 ext4_journal_stop(handle); 2000 out_free: 2001 brelse(iloc.bh); 2002 return error; 2003 } 2004