1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "alloc_background.h" 5 #include "alloc_foreground.h" 6 #include "btree_iter.h" 7 #include "btree_update.h" 8 #include "btree_write_buffer.h" 9 #include "buckets.h" 10 #include "clock.h" 11 #include "compress.h" 12 #include "disk_groups.h" 13 #include "errcode.h" 14 #include "error.h" 15 #include "inode.h" 16 #include "io_write.h" 17 #include "move.h" 18 #include "rebalance.h" 19 #include "subvolume.h" 20 #include "super-io.h" 21 #include "trace.h" 22 23 #include <linux/freezer.h> 24 #include <linux/kthread.h> 25 #include <linux/sched/cputime.h> 26 27 /* bch_extent_rebalance: */ 28 29 static const struct bch_extent_rebalance *bch2_bkey_ptrs_rebalance_opts(struct bkey_ptrs_c ptrs) 30 { 31 const union bch_extent_entry *entry; 32 33 bkey_extent_entry_for_each(ptrs, entry) 34 if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance) 35 return &entry->rebalance; 36 37 return NULL; 38 } 39 40 static const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k) 41 { 42 return bch2_bkey_ptrs_rebalance_opts(bch2_bkey_ptrs_c(k)); 43 } 44 45 static inline unsigned bch2_bkey_ptrs_need_compress(struct bch_fs *c, 46 struct bch_io_opts *opts, 47 struct bkey_s_c k, 48 struct bkey_ptrs_c ptrs) 49 { 50 if (!opts->background_compression) 51 return 0; 52 53 unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); 54 const union bch_extent_entry *entry; 55 struct extent_ptr_decoded p; 56 unsigned ptr_bit = 1; 57 unsigned rewrite_ptrs = 0; 58 59 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 60 if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || 61 p.ptr.unwritten) 62 return 0; 63 64 if (!p.ptr.cached && p.crc.compression_type != compression_type) 65 rewrite_ptrs |= ptr_bit; 66 ptr_bit <<= 1; 67 } 68 69 return rewrite_ptrs; 70 } 71 72 static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c, 73 struct bch_io_opts *opts, 74 struct bkey_ptrs_c ptrs) 75 { 76 if (!opts->background_target || 77 !bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) 78 return 0; 79 80 unsigned ptr_bit = 1; 81 unsigned rewrite_ptrs = 0; 82 83 guard(rcu)(); 84 bkey_for_each_ptr(ptrs, ptr) { 85 if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, opts->background_target)) 86 rewrite_ptrs |= ptr_bit; 87 ptr_bit <<= 1; 88 } 89 90 return rewrite_ptrs; 91 } 92 93 static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, 94 struct bch_io_opts *opts, 95 struct bkey_s_c k) 96 { 97 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 98 99 if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) 100 return 0; 101 102 return bch2_bkey_ptrs_need_compress(c, opts, k, ptrs) | 103 bch2_bkey_ptrs_need_move(c, opts, ptrs); 104 } 105 106 u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) 107 { 108 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 109 110 const struct bch_extent_rebalance *opts = bch2_bkey_ptrs_rebalance_opts(ptrs); 111 if (!opts) 112 return 0; 113 114 if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) 115 return 0; 116 117 const union bch_extent_entry *entry; 118 struct extent_ptr_decoded p; 119 u64 sectors = 0; 120 121 if (opts->background_compression) { 122 unsigned compression_type = bch2_compression_opt_to_type(opts->background_compression); 123 124 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 125 if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || 126 p.ptr.unwritten) { 127 sectors = 0; 128 goto incompressible; 129 } 130 131 if (!p.ptr.cached && p.crc.compression_type != compression_type) 132 sectors += p.crc.compressed_size; 133 } 134 } 135 incompressible: 136 if (opts->background_target) { 137 guard(rcu)(); 138 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) 139 if (!p.ptr.cached && 140 !bch2_dev_in_target(c, p.ptr.dev, opts->background_target)) 141 sectors += p.crc.compressed_size; 142 } 143 144 return sectors; 145 } 146 147 static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_io_opts *opts, 148 struct bkey_s_c k) 149 { 150 if (!bkey_extent_is_direct_data(k.k)) 151 return 0; 152 153 const struct bch_extent_rebalance *old = bch2_bkey_rebalance_opts(k); 154 155 if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k)) { 156 struct bch_extent_rebalance new = io_opts_to_rebalance_opts(c, opts); 157 return old == NULL || memcmp(old, &new, sizeof(new)); 158 } else { 159 return old != NULL; 160 } 161 } 162 163 int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts, 164 struct bkey_i *_k) 165 { 166 if (!bkey_extent_is_direct_data(&_k->k)) 167 return 0; 168 169 struct bkey_s k = bkey_i_to_s(_k); 170 struct bch_extent_rebalance *old = 171 (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); 172 173 if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k.s_c)) { 174 if (!old) { 175 old = bkey_val_end(k); 176 k.k->u64s += sizeof(*old) / sizeof(u64); 177 } 178 179 *old = io_opts_to_rebalance_opts(c, opts); 180 } else { 181 if (old) 182 extent_entry_drop(k, (union bch_extent_entry *) old); 183 } 184 185 return 0; 186 } 187 188 int bch2_get_update_rebalance_opts(struct btree_trans *trans, 189 struct bch_io_opts *io_opts, 190 struct btree_iter *iter, 191 struct bkey_s_c k) 192 { 193 BUG_ON(iter->flags & BTREE_ITER_is_extents); 194 BUG_ON(iter->flags & BTREE_ITER_filter_snapshots); 195 196 const struct bch_extent_rebalance *r = k.k->type == KEY_TYPE_reflink_v 197 ? bch2_bkey_rebalance_opts(k) : NULL; 198 if (r) { 199 #define x(_name) \ 200 if (r->_name##_from_inode) { \ 201 io_opts->_name = r->_name; \ 202 io_opts->_name##_from_inode = true; \ 203 } 204 BCH_REBALANCE_OPTS() 205 #undef x 206 } 207 208 if (!bch2_bkey_rebalance_needs_update(trans->c, io_opts, k)) 209 return 0; 210 211 struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 8); 212 int ret = PTR_ERR_OR_ZERO(n); 213 if (ret) 214 return ret; 215 216 bkey_reassemble(n, k); 217 218 /* On successfull transaction commit, @k was invalidated: */ 219 220 return bch2_bkey_set_needs_rebalance(trans->c, io_opts, n) ?: 221 bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?: 222 bch2_trans_commit(trans, NULL, NULL, 0) ?: 223 -BCH_ERR_transaction_restart_nested; 224 } 225 226 #define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1) 227 228 static const char * const bch2_rebalance_state_strs[] = { 229 #define x(t) #t, 230 BCH_REBALANCE_STATES() 231 NULL 232 #undef x 233 }; 234 235 int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum) 236 { 237 struct btree_iter iter; 238 struct bkey_s_c k; 239 struct bkey_i_cookie *cookie; 240 u64 v; 241 int ret; 242 243 bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, 244 SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), 245 BTREE_ITER_intent); 246 k = bch2_btree_iter_peek_slot(trans, &iter); 247 ret = bkey_err(k); 248 if (ret) 249 goto err; 250 251 v = k.k->type == KEY_TYPE_cookie 252 ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) 253 : 0; 254 255 cookie = bch2_trans_kmalloc(trans, sizeof(*cookie)); 256 ret = PTR_ERR_OR_ZERO(cookie); 257 if (ret) 258 goto err; 259 260 bkey_cookie_init(&cookie->k_i); 261 cookie->k.p = iter.pos; 262 cookie->v.cookie = cpu_to_le64(v + 1); 263 264 ret = bch2_trans_update(trans, &iter, &cookie->k_i, 0); 265 err: 266 bch2_trans_iter_exit(trans, &iter); 267 return ret; 268 } 269 270 int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) 271 { 272 int ret = bch2_trans_commit_do(c, NULL, NULL, 273 BCH_TRANS_COMMIT_no_enospc, 274 bch2_set_rebalance_needs_scan_trans(trans, inum)); 275 bch2_rebalance_wakeup(c); 276 return ret; 277 } 278 279 int bch2_set_fs_needs_rebalance(struct bch_fs *c) 280 { 281 return bch2_set_rebalance_needs_scan(c, 0); 282 } 283 284 static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, u64 cookie) 285 { 286 struct btree_iter iter; 287 struct bkey_s_c k; 288 u64 v; 289 int ret; 290 291 bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, 292 SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), 293 BTREE_ITER_intent); 294 k = bch2_btree_iter_peek_slot(trans, &iter); 295 ret = bkey_err(k); 296 if (ret) 297 goto err; 298 299 v = k.k->type == KEY_TYPE_cookie 300 ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) 301 : 0; 302 303 if (v == cookie) 304 ret = bch2_btree_delete_at(trans, &iter, 0); 305 err: 306 bch2_trans_iter_exit(trans, &iter); 307 return ret; 308 } 309 310 static struct bkey_s_c next_rebalance_entry(struct btree_trans *trans, 311 struct btree_iter *work_iter) 312 { 313 return !kthread_should_stop() 314 ? bch2_btree_iter_peek(trans, work_iter) 315 : bkey_s_c_null; 316 } 317 318 static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, 319 struct btree_iter *iter, 320 struct bkey_s_c k) 321 { 322 if (k.k->type == KEY_TYPE_reflink_v || !bch2_bkey_rebalance_opts(k)) 323 return 0; 324 325 struct bkey_i *n = bch2_bkey_make_mut(trans, iter, &k, 0); 326 int ret = PTR_ERR_OR_ZERO(n); 327 if (ret) 328 return ret; 329 330 extent_entry_drop(bkey_i_to_s(n), 331 (void *) bch2_bkey_rebalance_opts(bkey_i_to_s_c(n))); 332 return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 333 } 334 335 static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, 336 struct bpos work_pos, 337 struct btree_iter *extent_iter, 338 struct bch_io_opts *io_opts, 339 struct data_update_opts *data_opts) 340 { 341 struct bch_fs *c = trans->c; 342 343 bch2_trans_iter_exit(trans, extent_iter); 344 bch2_trans_iter_init(trans, extent_iter, 345 work_pos.inode ? BTREE_ID_extents : BTREE_ID_reflink, 346 work_pos, 347 BTREE_ITER_all_snapshots); 348 struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, extent_iter); 349 if (bkey_err(k)) 350 return k; 351 352 int ret = bch2_move_get_io_opts_one(trans, io_opts, extent_iter, k); 353 if (ret) 354 return bkey_s_c_err(ret); 355 356 memset(data_opts, 0, sizeof(*data_opts)); 357 data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, io_opts, k); 358 data_opts->target = io_opts->background_target; 359 data_opts->write_flags |= BCH_WRITE_only_specified_devs; 360 361 if (!data_opts->rewrite_ptrs) { 362 /* 363 * device we would want to write to offline? devices in target 364 * changed? 365 * 366 * We'll now need a full scan before this extent is picked up 367 * again: 368 */ 369 int ret = bch2_bkey_clear_needs_rebalance(trans, extent_iter, k); 370 if (ret) 371 return bkey_s_c_err(ret); 372 return bkey_s_c_null; 373 } 374 375 if (trace_rebalance_extent_enabled()) { 376 struct printbuf buf = PRINTBUF; 377 378 bch2_bkey_val_to_text(&buf, c, k); 379 prt_newline(&buf); 380 381 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 382 383 unsigned p = bch2_bkey_ptrs_need_compress(c, io_opts, k, ptrs); 384 if (p) { 385 prt_str(&buf, "compression="); 386 bch2_compression_opt_to_text(&buf, io_opts->background_compression); 387 prt_str(&buf, " "); 388 bch2_prt_u64_base2(&buf, p); 389 prt_newline(&buf); 390 } 391 392 p = bch2_bkey_ptrs_need_move(c, io_opts, ptrs); 393 if (p) { 394 prt_str(&buf, "move="); 395 bch2_target_to_text(&buf, c, io_opts->background_target); 396 prt_str(&buf, " "); 397 bch2_prt_u64_base2(&buf, p); 398 prt_newline(&buf); 399 } 400 401 trace_rebalance_extent(c, buf.buf); 402 printbuf_exit(&buf); 403 } 404 405 return k; 406 } 407 408 noinline_for_stack 409 static int do_rebalance_extent(struct moving_context *ctxt, 410 struct bpos work_pos, 411 struct btree_iter *extent_iter) 412 { 413 struct btree_trans *trans = ctxt->trans; 414 struct bch_fs *c = trans->c; 415 struct bch_fs_rebalance *r = &trans->c->rebalance; 416 struct data_update_opts data_opts; 417 struct bch_io_opts io_opts; 418 struct bkey_s_c k; 419 struct bkey_buf sk; 420 int ret; 421 422 ctxt->stats = &r->work_stats; 423 r->state = BCH_REBALANCE_working; 424 425 bch2_bkey_buf_init(&sk); 426 427 ret = bkey_err(k = next_rebalance_extent(trans, work_pos, 428 extent_iter, &io_opts, &data_opts)); 429 if (ret || !k.k) 430 goto out; 431 432 atomic64_add(k.k->size, &ctxt->stats->sectors_seen); 433 434 /* 435 * The iterator gets unlocked by __bch2_read_extent - need to 436 * save a copy of @k elsewhere: 437 */ 438 bch2_bkey_buf_reassemble(&sk, c, k); 439 k = bkey_i_to_s_c(sk.k); 440 441 ret = bch2_move_extent(ctxt, NULL, extent_iter, k, io_opts, data_opts); 442 if (ret) { 443 if (bch2_err_matches(ret, ENOMEM)) { 444 /* memory allocation failure, wait for some IO to finish */ 445 bch2_move_ctxt_wait_for_io(ctxt); 446 ret = bch_err_throw(c, transaction_restart_nested); 447 } 448 449 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 450 goto out; 451 452 /* skip it and continue, XXX signal failure */ 453 ret = 0; 454 } 455 out: 456 bch2_bkey_buf_exit(&sk, c); 457 return ret; 458 } 459 460 static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie) 461 { 462 struct btree_trans *trans = ctxt->trans; 463 struct bch_fs *c = trans->c; 464 struct bch_fs_rebalance *r = &trans->c->rebalance; 465 466 bch2_move_stats_init(&r->scan_stats, "rebalance_scan"); 467 ctxt->stats = &r->scan_stats; 468 469 if (!inum) { 470 r->scan_start = BBPOS_MIN; 471 r->scan_end = BBPOS_MAX; 472 } else { 473 r->scan_start = BBPOS(BTREE_ID_extents, POS(inum, 0)); 474 r->scan_end = BBPOS(BTREE_ID_extents, POS(inum, U64_MAX)); 475 } 476 477 r->state = BCH_REBALANCE_scanning; 478 479 struct per_snapshot_io_opts snapshot_io_opts; 480 per_snapshot_io_opts_init(&snapshot_io_opts, c); 481 482 int ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, 483 r->scan_start.pos, r->scan_end.pos, 484 BTREE_ITER_all_snapshots| 485 BTREE_ITER_not_extents| 486 BTREE_ITER_prefetch, k, ({ 487 ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); 488 489 struct bch_io_opts *io_opts = bch2_move_get_io_opts(trans, 490 &snapshot_io_opts, iter.pos, &iter, k); 491 PTR_ERR_OR_ZERO(io_opts); 492 })) ?: 493 commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 494 bch2_clear_rebalance_needs_scan(trans, inum, cookie)); 495 496 per_snapshot_io_opts_exit(&snapshot_io_opts); 497 bch2_move_stats_exit(&r->scan_stats, trans->c); 498 499 /* 500 * Ensure that the rebalance_work entries we created are seen by the 501 * next iteration of do_rebalance(), so we don't end up stuck in 502 * rebalance_wait(): 503 */ 504 atomic64_inc(&r->scan_stats.sectors_seen); 505 bch2_btree_write_buffer_flush_sync(trans); 506 507 return ret; 508 } 509 510 static void rebalance_wait(struct bch_fs *c) 511 { 512 struct bch_fs_rebalance *r = &c->rebalance; 513 struct io_clock *clock = &c->io_clock[WRITE]; 514 u64 now = atomic64_read(&clock->now); 515 u64 min_member_capacity = bch2_min_rw_member_capacity(c); 516 517 if (min_member_capacity == U64_MAX) 518 min_member_capacity = 128 * 2048; 519 520 r->wait_iotime_end = now + (min_member_capacity >> 6); 521 522 if (r->state != BCH_REBALANCE_waiting) { 523 r->wait_iotime_start = now; 524 r->wait_wallclock_start = ktime_get_real_ns(); 525 r->state = BCH_REBALANCE_waiting; 526 } 527 528 bch2_kthread_io_clock_wait_once(clock, r->wait_iotime_end, MAX_SCHEDULE_TIMEOUT); 529 } 530 531 static bool bch2_rebalance_enabled(struct bch_fs *c) 532 { 533 return c->opts.rebalance_enabled && 534 !(c->opts.rebalance_on_ac_only && 535 c->rebalance.on_battery); 536 } 537 538 static int do_rebalance(struct moving_context *ctxt) 539 { 540 struct btree_trans *trans = ctxt->trans; 541 struct bch_fs *c = trans->c; 542 struct bch_fs_rebalance *r = &c->rebalance; 543 struct btree_iter rebalance_work_iter, extent_iter = {}; 544 struct bkey_s_c k; 545 u32 kick = r->kick; 546 int ret = 0; 547 548 bch2_trans_begin(trans); 549 550 bch2_move_stats_init(&r->work_stats, "rebalance_work"); 551 bch2_move_stats_init(&r->scan_stats, "rebalance_scan"); 552 553 bch2_trans_iter_init(trans, &rebalance_work_iter, 554 BTREE_ID_rebalance_work, POS_MIN, 555 BTREE_ITER_all_snapshots); 556 557 while (!bch2_move_ratelimit(ctxt)) { 558 if (!bch2_rebalance_enabled(c)) { 559 bch2_moving_ctxt_flush_all(ctxt); 560 kthread_wait_freezable(bch2_rebalance_enabled(c) || 561 kthread_should_stop()); 562 } 563 564 if (kthread_should_stop()) 565 break; 566 567 bch2_trans_begin(trans); 568 569 ret = bkey_err(k = next_rebalance_entry(trans, &rebalance_work_iter)); 570 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 571 continue; 572 if (ret || !k.k) 573 break; 574 575 ret = k.k->type == KEY_TYPE_cookie 576 ? do_rebalance_scan(ctxt, k.k->p.inode, 577 le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie)) 578 : do_rebalance_extent(ctxt, k.k->p, &extent_iter); 579 580 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 581 continue; 582 if (ret) 583 break; 584 585 bch2_btree_iter_advance(trans, &rebalance_work_iter); 586 } 587 588 bch2_trans_iter_exit(trans, &extent_iter); 589 bch2_trans_iter_exit(trans, &rebalance_work_iter); 590 bch2_move_stats_exit(&r->scan_stats, c); 591 592 if (!ret && 593 !kthread_should_stop() && 594 !atomic64_read(&r->work_stats.sectors_seen) && 595 !atomic64_read(&r->scan_stats.sectors_seen) && 596 kick == r->kick) { 597 bch2_moving_ctxt_flush_all(ctxt); 598 bch2_trans_unlock_long(trans); 599 rebalance_wait(c); 600 } 601 602 if (!bch2_err_matches(ret, EROFS)) 603 bch_err_fn(c, ret); 604 return ret; 605 } 606 607 static int bch2_rebalance_thread(void *arg) 608 { 609 struct bch_fs *c = arg; 610 struct bch_fs_rebalance *r = &c->rebalance; 611 struct moving_context ctxt; 612 613 set_freezable(); 614 615 /* 616 * Data move operations can't run until after check_snapshots has 617 * completed, and bch2_snapshot_is_ancestor() is available. 618 */ 619 kthread_wait_freezable(c->recovery.pass_done > BCH_RECOVERY_PASS_check_snapshots || 620 kthread_should_stop()); 621 622 bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats, 623 writepoint_ptr(&c->rebalance_write_point), 624 true); 625 626 while (!kthread_should_stop() && !do_rebalance(&ctxt)) 627 ; 628 629 bch2_moving_ctxt_exit(&ctxt); 630 631 return 0; 632 } 633 634 void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c) 635 { 636 printbuf_tabstop_push(out, 32); 637 638 struct bch_fs_rebalance *r = &c->rebalance; 639 640 /* print pending work */ 641 struct disk_accounting_pos acc; 642 disk_accounting_key_init(acc, rebalance_work); 643 u64 v; 644 bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1); 645 646 prt_printf(out, "pending work:\t"); 647 prt_human_readable_u64(out, v << 9); 648 prt_printf(out, "\n\n"); 649 650 prt_str(out, bch2_rebalance_state_strs[r->state]); 651 prt_newline(out); 652 printbuf_indent_add(out, 2); 653 654 switch (r->state) { 655 case BCH_REBALANCE_waiting: { 656 u64 now = atomic64_read(&c->io_clock[WRITE].now); 657 658 prt_printf(out, "io wait duration:\t"); 659 bch2_prt_human_readable_s64(out, (r->wait_iotime_end - r->wait_iotime_start) << 9); 660 prt_newline(out); 661 662 prt_printf(out, "io wait remaining:\t"); 663 bch2_prt_human_readable_s64(out, (r->wait_iotime_end - now) << 9); 664 prt_newline(out); 665 666 prt_printf(out, "duration waited:\t"); 667 bch2_pr_time_units(out, ktime_get_real_ns() - r->wait_wallclock_start); 668 prt_newline(out); 669 break; 670 } 671 case BCH_REBALANCE_working: 672 bch2_move_stats_to_text(out, &r->work_stats); 673 break; 674 case BCH_REBALANCE_scanning: 675 bch2_move_stats_to_text(out, &r->scan_stats); 676 break; 677 } 678 prt_newline(out); 679 680 struct task_struct *t; 681 scoped_guard(rcu) { 682 t = rcu_dereference(c->rebalance.thread); 683 if (t) 684 get_task_struct(t); 685 } 686 687 if (t) { 688 bch2_prt_task_backtrace(out, t, 0, GFP_KERNEL); 689 put_task_struct(t); 690 } 691 692 printbuf_indent_sub(out, 2); 693 } 694 695 void bch2_rebalance_stop(struct bch_fs *c) 696 { 697 struct task_struct *p; 698 699 c->rebalance.pd.rate.rate = UINT_MAX; 700 bch2_ratelimit_reset(&c->rebalance.pd.rate); 701 702 p = rcu_dereference_protected(c->rebalance.thread, 1); 703 c->rebalance.thread = NULL; 704 705 if (p) { 706 /* for sychronizing with bch2_rebalance_wakeup() */ 707 synchronize_rcu(); 708 709 kthread_stop(p); 710 put_task_struct(p); 711 } 712 } 713 714 int bch2_rebalance_start(struct bch_fs *c) 715 { 716 struct task_struct *p; 717 int ret; 718 719 if (c->rebalance.thread) 720 return 0; 721 722 if (c->opts.nochanges) 723 return 0; 724 725 p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name); 726 ret = PTR_ERR_OR_ZERO(p); 727 bch_err_msg(c, ret, "creating rebalance thread"); 728 if (ret) 729 return ret; 730 731 get_task_struct(p); 732 rcu_assign_pointer(c->rebalance.thread, p); 733 wake_up_process(p); 734 return 0; 735 } 736 737 #ifdef CONFIG_POWER_SUPPLY 738 #include <linux/power_supply.h> 739 740 static int bch2_rebalance_power_notifier(struct notifier_block *nb, 741 unsigned long event, void *data) 742 { 743 struct bch_fs *c = container_of(nb, struct bch_fs, rebalance.power_notifier); 744 745 c->rebalance.on_battery = !power_supply_is_system_supplied(); 746 bch2_rebalance_wakeup(c); 747 return NOTIFY_OK; 748 } 749 #endif 750 751 void bch2_fs_rebalance_exit(struct bch_fs *c) 752 { 753 #ifdef CONFIG_POWER_SUPPLY 754 power_supply_unreg_notifier(&c->rebalance.power_notifier); 755 #endif 756 } 757 758 int bch2_fs_rebalance_init(struct bch_fs *c) 759 { 760 struct bch_fs_rebalance *r = &c->rebalance; 761 762 bch2_pd_controller_init(&r->pd); 763 764 #ifdef CONFIG_POWER_SUPPLY 765 r->power_notifier.notifier_call = bch2_rebalance_power_notifier; 766 int ret = power_supply_reg_notifier(&r->power_notifier); 767 if (ret) 768 return ret; 769 770 r->on_battery = !power_supply_is_system_supplied(); 771 #endif 772 return 0; 773 } 774 775 static int check_rebalance_work_one(struct btree_trans *trans, 776 struct btree_iter *extent_iter, 777 struct btree_iter *rebalance_iter, 778 struct bkey_buf *last_flushed) 779 { 780 struct bch_fs *c = trans->c; 781 struct bkey_s_c extent_k, rebalance_k; 782 struct printbuf buf = PRINTBUF; 783 784 int ret = bkey_err(extent_k = bch2_btree_iter_peek(trans, extent_iter)) ?: 785 bkey_err(rebalance_k = bch2_btree_iter_peek(trans, rebalance_iter)); 786 if (ret) 787 return ret; 788 789 if (!extent_k.k && 790 extent_iter->btree_id == BTREE_ID_reflink && 791 (!rebalance_k.k || 792 rebalance_k.k->p.inode >= BCACHEFS_ROOT_INO)) { 793 bch2_trans_iter_exit(trans, extent_iter); 794 bch2_trans_iter_init(trans, extent_iter, 795 BTREE_ID_extents, POS_MIN, 796 BTREE_ITER_prefetch| 797 BTREE_ITER_all_snapshots); 798 return bch_err_throw(c, transaction_restart_nested); 799 } 800 801 if (!extent_k.k && !rebalance_k.k) 802 return 1; 803 804 int cmp = bpos_cmp(extent_k.k ? extent_k.k->p : SPOS_MAX, 805 rebalance_k.k ? rebalance_k.k->p : SPOS_MAX); 806 807 struct bkey deleted; 808 bkey_init(&deleted); 809 810 if (cmp < 0) { 811 deleted.p = extent_k.k->p; 812 rebalance_k.k = &deleted; 813 } else if (cmp > 0) { 814 deleted.p = rebalance_k.k->p; 815 extent_k.k = &deleted; 816 } 817 818 bool should_have_rebalance = 819 bch2_bkey_sectors_need_rebalance(c, extent_k) != 0; 820 bool have_rebalance = rebalance_k.k->type == KEY_TYPE_set; 821 822 if (should_have_rebalance != have_rebalance) { 823 ret = bch2_btree_write_buffer_maybe_flush(trans, extent_k, last_flushed); 824 if (ret) 825 return ret; 826 827 bch2_bkey_val_to_text(&buf, c, extent_k); 828 } 829 830 if (fsck_err_on(!should_have_rebalance && have_rebalance, 831 trans, rebalance_work_incorrectly_set, 832 "rebalance work incorrectly set\n%s", buf.buf)) { 833 ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, 834 extent_k.k->p, false); 835 if (ret) 836 goto err; 837 } 838 839 if (fsck_err_on(should_have_rebalance && !have_rebalance, 840 trans, rebalance_work_incorrectly_unset, 841 "rebalance work incorrectly unset\n%s", buf.buf)) { 842 ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, 843 extent_k.k->p, true); 844 if (ret) 845 goto err; 846 } 847 848 if (cmp <= 0) 849 bch2_btree_iter_advance(trans, extent_iter); 850 if (cmp >= 0) 851 bch2_btree_iter_advance(trans, rebalance_iter); 852 err: 853 fsck_err: 854 printbuf_exit(&buf); 855 return ret; 856 } 857 858 int bch2_check_rebalance_work(struct bch_fs *c) 859 { 860 struct btree_trans *trans = bch2_trans_get(c); 861 struct btree_iter rebalance_iter, extent_iter; 862 int ret = 0; 863 864 bch2_trans_iter_init(trans, &extent_iter, 865 BTREE_ID_reflink, POS_MIN, 866 BTREE_ITER_prefetch); 867 bch2_trans_iter_init(trans, &rebalance_iter, 868 BTREE_ID_rebalance_work, POS_MIN, 869 BTREE_ITER_prefetch); 870 871 struct bkey_buf last_flushed; 872 bch2_bkey_buf_init(&last_flushed); 873 bkey_init(&last_flushed.k->k); 874 875 while (!ret) { 876 bch2_trans_begin(trans); 877 878 ret = check_rebalance_work_one(trans, &extent_iter, &rebalance_iter, &last_flushed); 879 880 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 881 ret = 0; 882 } 883 884 bch2_bkey_buf_exit(&last_flushed, c); 885 bch2_trans_iter_exit(trans, &extent_iter); 886 bch2_trans_iter_exit(trans, &rebalance_iter); 887 bch2_trans_put(trans); 888 return ret < 0 ? ret : 0; 889 } 890