1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "alloc_background.h" 5 #include "alloc_foreground.h" 6 #include "btree_iter.h" 7 #include "btree_update.h" 8 #include "btree_write_buffer.h" 9 #include "buckets.h" 10 #include "clock.h" 11 #include "compress.h" 12 #include "disk_groups.h" 13 #include "errcode.h" 14 #include "error.h" 15 #include "inode.h" 16 #include "io_write.h" 17 #include "move.h" 18 #include "rebalance.h" 19 #include "subvolume.h" 20 #include "super-io.h" 21 #include "trace.h" 22 23 #include <linux/freezer.h> 24 #include <linux/kthread.h> 25 #include <linux/sched/cputime.h> 26 27 /* bch_extent_rebalance: */ 28 29 static const struct bch_extent_rebalance *bch2_bkey_ptrs_rebalance_opts(struct bkey_ptrs_c ptrs) 30 { 31 const union bch_extent_entry *entry; 32 33 bkey_extent_entry_for_each(ptrs, entry) 34 if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance) 35 return &entry->rebalance; 36 37 return NULL; 38 } 39 40 static const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k) 41 { 42 return bch2_bkey_ptrs_rebalance_opts(bch2_bkey_ptrs_c(k)); 43 } 44 45 static inline unsigned bch2_bkey_ptrs_need_compress(struct bch_fs *c, 46 struct bch_io_opts *opts, 47 struct bkey_s_c k, 48 struct bkey_ptrs_c ptrs) 49 { 50 if (!opts->background_compression) 51 return 0; 52 53 unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); 54 const union bch_extent_entry *entry; 55 struct extent_ptr_decoded p; 56 unsigned ptr_bit = 1; 57 unsigned rewrite_ptrs = 0; 58 59 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 60 if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || 61 p.ptr.unwritten) 62 return 0; 63 64 if (!p.ptr.cached && p.crc.compression_type != compression_type) 65 rewrite_ptrs |= ptr_bit; 66 ptr_bit <<= 1; 67 } 68 69 return rewrite_ptrs; 70 } 71 72 static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c, 73 struct bch_io_opts *opts, 74 struct bkey_ptrs_c ptrs) 75 { 76 if (!opts->background_target || 77 !bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) 78 return 0; 79 80 unsigned ptr_bit = 1; 81 unsigned rewrite_ptrs = 0; 82 83 rcu_read_lock(); 84 bkey_for_each_ptr(ptrs, ptr) { 85 if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, opts->background_target)) 86 rewrite_ptrs |= ptr_bit; 87 ptr_bit <<= 1; 88 } 89 rcu_read_unlock(); 90 91 return rewrite_ptrs; 92 } 93 94 static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, 95 struct bch_io_opts *opts, 96 struct bkey_s_c k) 97 { 98 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 99 100 if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) 101 return 0; 102 103 return bch2_bkey_ptrs_need_compress(c, opts, k, ptrs) | 104 bch2_bkey_ptrs_need_move(c, opts, ptrs); 105 } 106 107 u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) 108 { 109 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 110 111 const struct bch_extent_rebalance *opts = bch2_bkey_ptrs_rebalance_opts(ptrs); 112 if (!opts) 113 return 0; 114 115 if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) 116 return 0; 117 118 const union bch_extent_entry *entry; 119 struct extent_ptr_decoded p; 120 u64 sectors = 0; 121 122 if (opts->background_compression) { 123 unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); 124 125 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 126 if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || 127 p.ptr.unwritten) { 128 sectors = 0; 129 goto incompressible; 130 } 131 132 if (!p.ptr.cached && p.crc.compression_type != compression_type) 133 sectors += p.crc.compressed_size; 134 } 135 } 136 incompressible: 137 if (opts->background_target) { 138 rcu_read_lock(); 139 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) 140 if (!p.ptr.cached && 141 !bch2_dev_in_target(c, p.ptr.dev, opts->background_target)) 142 sectors += p.crc.compressed_size; 143 rcu_read_unlock(); 144 } 145 146 return sectors; 147 } 148 149 static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_io_opts *opts, 150 struct bkey_s_c k) 151 { 152 if (!bkey_extent_is_direct_data(k.k)) 153 return 0; 154 155 const struct bch_extent_rebalance *old = bch2_bkey_rebalance_opts(k); 156 157 if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k)) { 158 struct bch_extent_rebalance new = io_opts_to_rebalance_opts(c, opts); 159 return old == NULL || memcmp(old, &new, sizeof(new)); 160 } else { 161 return old != NULL; 162 } 163 } 164 165 int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts, 166 struct bkey_i *_k) 167 { 168 if (!bkey_extent_is_direct_data(&_k->k)) 169 return 0; 170 171 struct bkey_s k = bkey_i_to_s(_k); 172 struct bch_extent_rebalance *old = 173 (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); 174 175 if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k.s_c)) { 176 if (!old) { 177 old = bkey_val_end(k); 178 k.k->u64s += sizeof(*old) / sizeof(u64); 179 } 180 181 *old = io_opts_to_rebalance_opts(c, opts); 182 } else { 183 if (old) 184 extent_entry_drop(k, (union bch_extent_entry *) old); 185 } 186 187 return 0; 188 } 189 190 int bch2_get_update_rebalance_opts(struct btree_trans *trans, 191 struct bch_io_opts *io_opts, 192 struct btree_iter *iter, 193 struct bkey_s_c k) 194 { 195 BUG_ON(iter->flags & BTREE_ITER_is_extents); 196 BUG_ON(iter->flags & BTREE_ITER_filter_snapshots); 197 198 const struct bch_extent_rebalance *r = k.k->type == KEY_TYPE_reflink_v 199 ? bch2_bkey_rebalance_opts(k) : NULL; 200 if (r) { 201 #define x(_name) \ 202 if (r->_name##_from_inode) { \ 203 io_opts->_name = r->_name; \ 204 io_opts->_name##_from_inode = true; \ 205 } 206 BCH_REBALANCE_OPTS() 207 #undef x 208 } 209 210 if (!bch2_bkey_rebalance_needs_update(trans->c, io_opts, k)) 211 return 0; 212 213 struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 8); 214 int ret = PTR_ERR_OR_ZERO(n); 215 if (ret) 216 return ret; 217 218 bkey_reassemble(n, k); 219 220 /* On successfull transaction commit, @k was invalidated: */ 221 222 return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n) ?: 223 bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?: 224 bch2_trans_commit(trans, NULL, NULL, 0) ?: 225 -BCH_ERR_transaction_restart_nested; 226 } 227 228 #define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1) 229 230 static const char * const bch2_rebalance_state_strs[] = { 231 #define x(t) #t, 232 BCH_REBALANCE_STATES() 233 NULL 234 #undef x 235 }; 236 237 int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum) 238 { 239 struct btree_iter iter; 240 struct bkey_s_c k; 241 struct bkey_i_cookie *cookie; 242 u64 v; 243 int ret; 244 245 bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, 246 SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), 247 BTREE_ITER_intent); 248 k = bch2_btree_iter_peek_slot(trans, &iter); 249 ret = bkey_err(k); 250 if (ret) 251 goto err; 252 253 v = k.k->type == KEY_TYPE_cookie 254 ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) 255 : 0; 256 257 cookie = bch2_trans_kmalloc(trans, sizeof(*cookie)); 258 ret = PTR_ERR_OR_ZERO(cookie); 259 if (ret) 260 goto err; 261 262 bkey_cookie_init(&cookie->k_i); 263 cookie->k.p = iter.pos; 264 cookie->v.cookie = cpu_to_le64(v + 1); 265 266 ret = bch2_trans_update(trans, &iter, &cookie->k_i, 0); 267 err: 268 bch2_trans_iter_exit(trans, &iter); 269 return ret; 270 } 271 272 int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) 273 { 274 int ret = bch2_trans_commit_do(c, NULL, NULL, 275 BCH_TRANS_COMMIT_no_enospc, 276 bch2_set_rebalance_needs_scan_trans(trans, inum)); 277 bch2_rebalance_wakeup(c); 278 return ret; 279 } 280 281 int bch2_set_fs_needs_rebalance(struct bch_fs *c) 282 { 283 return bch2_set_rebalance_needs_scan(c, 0); 284 } 285 286 static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, u64 cookie) 287 { 288 struct btree_iter iter; 289 struct bkey_s_c k; 290 u64 v; 291 int ret; 292 293 bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, 294 SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), 295 BTREE_ITER_intent); 296 k = bch2_btree_iter_peek_slot(trans, &iter); 297 ret = bkey_err(k); 298 if (ret) 299 goto err; 300 301 v = k.k->type == KEY_TYPE_cookie 302 ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) 303 : 0; 304 305 if (v == cookie) 306 ret = bch2_btree_delete_at(trans, &iter, 0); 307 err: 308 bch2_trans_iter_exit(trans, &iter); 309 return ret; 310 } 311 312 static struct bkey_s_c next_rebalance_entry(struct btree_trans *trans, 313 struct btree_iter *work_iter) 314 { 315 return !kthread_should_stop() 316 ? bch2_btree_iter_peek(trans, work_iter) 317 : bkey_s_c_null; 318 } 319 320 static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, 321 struct btree_iter *iter, 322 struct bkey_s_c k) 323 { 324 if (k.k->type == KEY_TYPE_reflink_v || !bch2_bkey_rebalance_opts(k)) 325 return 0; 326 327 struct bkey_i *n = bch2_bkey_make_mut(trans, iter, &k, 0); 328 int ret = PTR_ERR_OR_ZERO(n); 329 if (ret) 330 return ret; 331 332 extent_entry_drop(bkey_i_to_s(n), 333 (void *) bch2_bkey_rebalance_opts(bkey_i_to_s_c(n))); 334 return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 335 } 336 337 static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, 338 struct bpos work_pos, 339 struct btree_iter *extent_iter, 340 struct bch_io_opts *io_opts, 341 struct data_update_opts *data_opts) 342 { 343 struct bch_fs *c = trans->c; 344 345 bch2_trans_iter_exit(trans, extent_iter); 346 bch2_trans_iter_init(trans, extent_iter, 347 work_pos.inode ? BTREE_ID_extents : BTREE_ID_reflink, 348 work_pos, 349 BTREE_ITER_all_snapshots); 350 struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, extent_iter); 351 if (bkey_err(k)) 352 return k; 353 354 int ret = bch2_move_get_io_opts_one(trans, io_opts, extent_iter, k); 355 if (ret) 356 return bkey_s_c_err(ret); 357 358 memset(data_opts, 0, sizeof(*data_opts)); 359 data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, io_opts, k); 360 data_opts->target = io_opts->background_target; 361 data_opts->write_flags |= BCH_WRITE_only_specified_devs; 362 363 if (!data_opts->rewrite_ptrs) { 364 /* 365 * device we would want to write to offline? devices in target 366 * changed? 367 * 368 * We'll now need a full scan before this extent is picked up 369 * again: 370 */ 371 int ret = bch2_bkey_clear_needs_rebalance(trans, extent_iter, k); 372 if (ret) 373 return bkey_s_c_err(ret); 374 return bkey_s_c_null; 375 } 376 377 if (trace_rebalance_extent_enabled()) { 378 struct printbuf buf = PRINTBUF; 379 380 bch2_bkey_val_to_text(&buf, c, k); 381 prt_newline(&buf); 382 383 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 384 385 unsigned p = bch2_bkey_ptrs_need_compress(c, io_opts, k, ptrs); 386 if (p) { 387 prt_str(&buf, "compression="); 388 bch2_compression_opt_to_text(&buf, io_opts->background_compression); 389 prt_str(&buf, " "); 390 bch2_prt_u64_base2(&buf, p); 391 prt_newline(&buf); 392 } 393 394 p = bch2_bkey_ptrs_need_move(c, io_opts, ptrs); 395 if (p) { 396 prt_str(&buf, "move="); 397 bch2_target_to_text(&buf, c, io_opts->background_target); 398 prt_str(&buf, " "); 399 bch2_prt_u64_base2(&buf, p); 400 prt_newline(&buf); 401 } 402 403 trace_rebalance_extent(c, buf.buf); 404 printbuf_exit(&buf); 405 } 406 407 return k; 408 } 409 410 noinline_for_stack 411 static int do_rebalance_extent(struct moving_context *ctxt, 412 struct bpos work_pos, 413 struct btree_iter *extent_iter) 414 { 415 struct btree_trans *trans = ctxt->trans; 416 struct bch_fs *c = trans->c; 417 struct bch_fs_rebalance *r = &trans->c->rebalance; 418 struct data_update_opts data_opts; 419 struct bch_io_opts io_opts; 420 struct bkey_s_c k; 421 struct bkey_buf sk; 422 int ret; 423 424 ctxt->stats = &r->work_stats; 425 r->state = BCH_REBALANCE_working; 426 427 bch2_bkey_buf_init(&sk); 428 429 ret = bkey_err(k = next_rebalance_extent(trans, work_pos, 430 extent_iter, &io_opts, &data_opts)); 431 if (ret || !k.k) 432 goto out; 433 434 atomic64_add(k.k->size, &ctxt->stats->sectors_seen); 435 436 /* 437 * The iterator gets unlocked by __bch2_read_extent - need to 438 * save a copy of @k elsewhere: 439 */ 440 bch2_bkey_buf_reassemble(&sk, c, k); 441 k = bkey_i_to_s_c(sk.k); 442 443 ret = bch2_move_extent(ctxt, NULL, extent_iter, k, io_opts, data_opts); 444 if (ret) { 445 if (bch2_err_matches(ret, ENOMEM)) { 446 /* memory allocation failure, wait for some IO to finish */ 447 bch2_move_ctxt_wait_for_io(ctxt); 448 ret = -BCH_ERR_transaction_restart_nested; 449 } 450 451 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 452 goto out; 453 454 /* skip it and continue, XXX signal failure */ 455 ret = 0; 456 } 457 out: 458 bch2_bkey_buf_exit(&sk, c); 459 return ret; 460 } 461 462 static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie) 463 { 464 struct btree_trans *trans = ctxt->trans; 465 struct bch_fs *c = trans->c; 466 struct bch_fs_rebalance *r = &trans->c->rebalance; 467 468 bch2_move_stats_init(&r->scan_stats, "rebalance_scan"); 469 ctxt->stats = &r->scan_stats; 470 471 if (!inum) { 472 r->scan_start = BBPOS_MIN; 473 r->scan_end = BBPOS_MAX; 474 } else { 475 r->scan_start = BBPOS(BTREE_ID_extents, POS(inum, 0)); 476 r->scan_end = BBPOS(BTREE_ID_extents, POS(inum, U64_MAX)); 477 } 478 479 r->state = BCH_REBALANCE_scanning; 480 481 struct per_snapshot_io_opts snapshot_io_opts; 482 per_snapshot_io_opts_init(&snapshot_io_opts, c); 483 484 int ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, 485 r->scan_start.pos, r->scan_end.pos, 486 BTREE_ITER_all_snapshots| 487 BTREE_ITER_not_extents| 488 BTREE_ITER_prefetch, k, ({ 489 ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); 490 491 struct bch_io_opts *io_opts = bch2_move_get_io_opts(trans, 492 &snapshot_io_opts, iter.pos, &iter, k); 493 PTR_ERR_OR_ZERO(io_opts); 494 })) ?: 495 commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 496 bch2_clear_rebalance_needs_scan(trans, inum, cookie)); 497 498 per_snapshot_io_opts_exit(&snapshot_io_opts); 499 bch2_move_stats_exit(&r->scan_stats, trans->c); 500 501 /* 502 * Ensure that the rebalance_work entries we created are seen by the 503 * next iteration of do_rebalance(), so we don't end up stuck in 504 * rebalance_wait(): 505 */ 506 atomic64_inc(&r->scan_stats.sectors_seen); 507 bch2_btree_write_buffer_flush_sync(trans); 508 509 return ret; 510 } 511 512 static void rebalance_wait(struct bch_fs *c) 513 { 514 struct bch_fs_rebalance *r = &c->rebalance; 515 struct io_clock *clock = &c->io_clock[WRITE]; 516 u64 now = atomic64_read(&clock->now); 517 u64 min_member_capacity = bch2_min_rw_member_capacity(c); 518 519 if (min_member_capacity == U64_MAX) 520 min_member_capacity = 128 * 2048; 521 522 r->wait_iotime_end = now + (min_member_capacity >> 6); 523 524 if (r->state != BCH_REBALANCE_waiting) { 525 r->wait_iotime_start = now; 526 r->wait_wallclock_start = ktime_get_real_ns(); 527 r->state = BCH_REBALANCE_waiting; 528 } 529 530 bch2_kthread_io_clock_wait(clock, r->wait_iotime_end, MAX_SCHEDULE_TIMEOUT); 531 } 532 533 static bool bch2_rebalance_enabled(struct bch_fs *c) 534 { 535 return c->opts.rebalance_enabled && 536 !(c->opts.rebalance_on_ac_only && 537 c->rebalance.on_battery); 538 } 539 540 static int do_rebalance(struct moving_context *ctxt) 541 { 542 struct btree_trans *trans = ctxt->trans; 543 struct bch_fs *c = trans->c; 544 struct bch_fs_rebalance *r = &c->rebalance; 545 struct btree_iter rebalance_work_iter, extent_iter = {}; 546 struct bkey_s_c k; 547 int ret = 0; 548 549 bch2_trans_begin(trans); 550 551 bch2_move_stats_init(&r->work_stats, "rebalance_work"); 552 bch2_move_stats_init(&r->scan_stats, "rebalance_scan"); 553 554 bch2_trans_iter_init(trans, &rebalance_work_iter, 555 BTREE_ID_rebalance_work, POS_MIN, 556 BTREE_ITER_all_snapshots); 557 558 while (!bch2_move_ratelimit(ctxt)) { 559 if (!bch2_rebalance_enabled(c)) { 560 bch2_moving_ctxt_flush_all(ctxt); 561 kthread_wait_freezable(bch2_rebalance_enabled(c) || 562 kthread_should_stop()); 563 } 564 565 if (kthread_should_stop()) 566 break; 567 568 bch2_trans_begin(trans); 569 570 ret = bkey_err(k = next_rebalance_entry(trans, &rebalance_work_iter)); 571 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 572 continue; 573 if (ret || !k.k) 574 break; 575 576 ret = k.k->type == KEY_TYPE_cookie 577 ? do_rebalance_scan(ctxt, k.k->p.inode, 578 le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie)) 579 : do_rebalance_extent(ctxt, k.k->p, &extent_iter); 580 581 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 582 continue; 583 if (ret) 584 break; 585 586 bch2_btree_iter_advance(trans, &rebalance_work_iter); 587 } 588 589 bch2_trans_iter_exit(trans, &extent_iter); 590 bch2_trans_iter_exit(trans, &rebalance_work_iter); 591 bch2_move_stats_exit(&r->scan_stats, c); 592 593 if (!ret && 594 !kthread_should_stop() && 595 !atomic64_read(&r->work_stats.sectors_seen) && 596 !atomic64_read(&r->scan_stats.sectors_seen)) { 597 bch2_moving_ctxt_flush_all(ctxt); 598 bch2_trans_unlock_long(trans); 599 rebalance_wait(c); 600 } 601 602 if (!bch2_err_matches(ret, EROFS)) 603 bch_err_fn(c, ret); 604 return ret; 605 } 606 607 static int bch2_rebalance_thread(void *arg) 608 { 609 struct bch_fs *c = arg; 610 struct bch_fs_rebalance *r = &c->rebalance; 611 struct moving_context ctxt; 612 613 set_freezable(); 614 615 /* 616 * Data move operations can't run until after check_snapshots has 617 * completed, and bch2_snapshot_is_ancestor() is available. 618 */ 619 kthread_wait_freezable(c->recovery.pass_done > BCH_RECOVERY_PASS_check_snapshots || 620 kthread_should_stop()); 621 622 bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats, 623 writepoint_ptr(&c->rebalance_write_point), 624 true); 625 626 while (!kthread_should_stop() && !do_rebalance(&ctxt)) 627 ; 628 629 bch2_moving_ctxt_exit(&ctxt); 630 631 return 0; 632 } 633 634 void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c) 635 { 636 printbuf_tabstop_push(out, 32); 637 638 struct bch_fs_rebalance *r = &c->rebalance; 639 640 /* print pending work */ 641 struct disk_accounting_pos acc; 642 disk_accounting_key_init(acc, rebalance_work); 643 u64 v; 644 bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1); 645 646 prt_printf(out, "pending work:\t"); 647 prt_human_readable_u64(out, v << 9); 648 prt_printf(out, "\n\n"); 649 650 prt_str(out, bch2_rebalance_state_strs[r->state]); 651 prt_newline(out); 652 printbuf_indent_add(out, 2); 653 654 switch (r->state) { 655 case BCH_REBALANCE_waiting: { 656 u64 now = atomic64_read(&c->io_clock[WRITE].now); 657 658 prt_printf(out, "io wait duration:\t"); 659 bch2_prt_human_readable_s64(out, (r->wait_iotime_end - r->wait_iotime_start) << 9); 660 prt_newline(out); 661 662 prt_printf(out, "io wait remaining:\t"); 663 bch2_prt_human_readable_s64(out, (r->wait_iotime_end - now) << 9); 664 prt_newline(out); 665 666 prt_printf(out, "duration waited:\t"); 667 bch2_pr_time_units(out, ktime_get_real_ns() - r->wait_wallclock_start); 668 prt_newline(out); 669 break; 670 } 671 case BCH_REBALANCE_working: 672 bch2_move_stats_to_text(out, &r->work_stats); 673 break; 674 case BCH_REBALANCE_scanning: 675 bch2_move_stats_to_text(out, &r->scan_stats); 676 break; 677 } 678 prt_newline(out); 679 680 rcu_read_lock(); 681 struct task_struct *t = rcu_dereference(c->rebalance.thread); 682 if (t) 683 get_task_struct(t); 684 rcu_read_unlock(); 685 686 if (t) { 687 bch2_prt_task_backtrace(out, t, 0, GFP_KERNEL); 688 put_task_struct(t); 689 } 690 691 printbuf_indent_sub(out, 2); 692 } 693 694 void bch2_rebalance_stop(struct bch_fs *c) 695 { 696 struct task_struct *p; 697 698 c->rebalance.pd.rate.rate = UINT_MAX; 699 bch2_ratelimit_reset(&c->rebalance.pd.rate); 700 701 p = rcu_dereference_protected(c->rebalance.thread, 1); 702 c->rebalance.thread = NULL; 703 704 if (p) { 705 /* for sychronizing with bch2_rebalance_wakeup() */ 706 synchronize_rcu(); 707 708 kthread_stop(p); 709 put_task_struct(p); 710 } 711 } 712 713 int bch2_rebalance_start(struct bch_fs *c) 714 { 715 struct task_struct *p; 716 int ret; 717 718 if (c->rebalance.thread) 719 return 0; 720 721 if (c->opts.nochanges) 722 return 0; 723 724 p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name); 725 ret = PTR_ERR_OR_ZERO(p); 726 bch_err_msg(c, ret, "creating rebalance thread"); 727 if (ret) 728 return ret; 729 730 get_task_struct(p); 731 rcu_assign_pointer(c->rebalance.thread, p); 732 wake_up_process(p); 733 return 0; 734 } 735 736 #ifdef CONFIG_POWER_SUPPLY 737 #include <linux/power_supply.h> 738 739 static int bch2_rebalance_power_notifier(struct notifier_block *nb, 740 unsigned long event, void *data) 741 { 742 struct bch_fs *c = container_of(nb, struct bch_fs, rebalance.power_notifier); 743 744 c->rebalance.on_battery = !power_supply_is_system_supplied(); 745 bch2_rebalance_wakeup(c); 746 return NOTIFY_OK; 747 } 748 #endif 749 750 void bch2_fs_rebalance_exit(struct bch_fs *c) 751 { 752 #ifdef CONFIG_POWER_SUPPLY 753 power_supply_unreg_notifier(&c->rebalance.power_notifier); 754 #endif 755 } 756 757 int bch2_fs_rebalance_init(struct bch_fs *c) 758 { 759 struct bch_fs_rebalance *r = &c->rebalance; 760 761 bch2_pd_controller_init(&r->pd); 762 763 #ifdef CONFIG_POWER_SUPPLY 764 r->power_notifier.notifier_call = bch2_rebalance_power_notifier; 765 int ret = power_supply_reg_notifier(&r->power_notifier); 766 if (ret) 767 return ret; 768 769 r->on_battery = !power_supply_is_system_supplied(); 770 #endif 771 return 0; 772 } 773 774 static int check_rebalance_work_one(struct btree_trans *trans, 775 struct btree_iter *extent_iter, 776 struct btree_iter *rebalance_iter, 777 struct bkey_buf *last_flushed) 778 { 779 struct bch_fs *c = trans->c; 780 struct bkey_s_c extent_k, rebalance_k; 781 struct printbuf buf = PRINTBUF; 782 783 int ret = bkey_err(extent_k = bch2_btree_iter_peek(trans, extent_iter)) ?: 784 bkey_err(rebalance_k = bch2_btree_iter_peek(trans, rebalance_iter)); 785 if (ret) 786 return ret; 787 788 if (!extent_k.k && 789 extent_iter->btree_id == BTREE_ID_reflink && 790 (!rebalance_k.k || 791 rebalance_k.k->p.inode >= BCACHEFS_ROOT_INO)) { 792 bch2_trans_iter_exit(trans, extent_iter); 793 bch2_trans_iter_init(trans, extent_iter, 794 BTREE_ID_extents, POS_MIN, 795 BTREE_ITER_prefetch| 796 BTREE_ITER_all_snapshots); 797 return -BCH_ERR_transaction_restart_nested; 798 } 799 800 if (!extent_k.k && !rebalance_k.k) 801 return 1; 802 803 int cmp = bpos_cmp(extent_k.k ? extent_k.k->p : SPOS_MAX, 804 rebalance_k.k ? rebalance_k.k->p : SPOS_MAX); 805 806 struct bkey deleted; 807 bkey_init(&deleted); 808 809 if (cmp < 0) { 810 deleted.p = extent_k.k->p; 811 rebalance_k.k = &deleted; 812 } else if (cmp > 0) { 813 deleted.p = rebalance_k.k->p; 814 extent_k.k = &deleted; 815 } 816 817 bool should_have_rebalance = 818 bch2_bkey_sectors_need_rebalance(c, extent_k) != 0; 819 bool have_rebalance = rebalance_k.k->type == KEY_TYPE_set; 820 821 if (should_have_rebalance != have_rebalance) { 822 ret = bch2_btree_write_buffer_maybe_flush(trans, extent_k, last_flushed); 823 if (ret) 824 return ret; 825 826 bch2_bkey_val_to_text(&buf, c, extent_k); 827 } 828 829 if (fsck_err_on(!should_have_rebalance && have_rebalance, 830 trans, rebalance_work_incorrectly_set, 831 "rebalance work incorrectly set\n%s", buf.buf)) { 832 ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, 833 extent_k.k->p, false); 834 if (ret) 835 goto err; 836 } 837 838 if (fsck_err_on(should_have_rebalance && !have_rebalance, 839 trans, rebalance_work_incorrectly_unset, 840 "rebalance work incorrectly unset\n%s", buf.buf)) { 841 ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, 842 extent_k.k->p, true); 843 if (ret) 844 goto err; 845 } 846 847 if (cmp <= 0) 848 bch2_btree_iter_advance(trans, extent_iter); 849 if (cmp >= 0) 850 bch2_btree_iter_advance(trans, rebalance_iter); 851 err: 852 fsck_err: 853 printbuf_exit(&buf); 854 return ret; 855 } 856 857 int bch2_check_rebalance_work(struct bch_fs *c) 858 { 859 struct btree_trans *trans = bch2_trans_get(c); 860 struct btree_iter rebalance_iter, extent_iter; 861 int ret = 0; 862 863 bch2_trans_iter_init(trans, &extent_iter, 864 BTREE_ID_reflink, POS_MIN, 865 BTREE_ITER_prefetch); 866 bch2_trans_iter_init(trans, &rebalance_iter, 867 BTREE_ID_rebalance_work, POS_MIN, 868 BTREE_ITER_prefetch); 869 870 struct bkey_buf last_flushed; 871 bch2_bkey_buf_init(&last_flushed); 872 bkey_init(&last_flushed.k->k); 873 874 while (!ret) { 875 bch2_trans_begin(trans); 876 877 ret = check_rebalance_work_one(trans, &extent_iter, &rebalance_iter, &last_flushed); 878 879 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 880 ret = 0; 881 } 882 883 bch2_bkey_buf_exit(&last_flushed, c); 884 bch2_trans_iter_exit(trans, &extent_iter); 885 bch2_trans_iter_exit(trans, &rebalance_iter); 886 bch2_trans_put(trans); 887 return ret < 0 ? ret : 0; 888 } 889