1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2023 Western Digital Corporation or its affiliates. 4 */ 5 6 #include <linux/btrfs_tree.h> 7 #include "ctree.h" 8 #include "fs.h" 9 #include "accessors.h" 10 #include "transaction.h" 11 #include "disk-io.h" 12 #include "raid-stripe-tree.h" 13 #include "volumes.h" 14 #include "print-tree.h" 15 16 static int btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans, 17 struct btrfs_path *path, 18 const struct btrfs_key *oldkey, 19 u64 newlen, u64 frontpad) 20 { 21 struct btrfs_root *stripe_root = trans->fs_info->stripe_root; 22 struct btrfs_stripe_extent *extent, *newitem; 23 struct extent_buffer *leaf; 24 int slot; 25 size_t item_size; 26 struct btrfs_key newkey = { 27 .objectid = oldkey->objectid + frontpad, 28 .type = BTRFS_RAID_STRIPE_KEY, 29 .offset = newlen, 30 }; 31 int ret; 32 33 ASSERT(newlen > 0); 34 ASSERT(oldkey->type == BTRFS_RAID_STRIPE_KEY); 35 36 leaf = path->nodes[0]; 37 slot = path->slots[0]; 38 item_size = btrfs_item_size(leaf, slot); 39 40 newitem = kzalloc(item_size, GFP_NOFS); 41 if (!newitem) 42 return -ENOMEM; 43 44 extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent); 45 46 for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) { 47 struct btrfs_raid_stride *stride = &extent->strides[i]; 48 u64 phys; 49 50 phys = btrfs_raid_stride_physical(leaf, stride) + frontpad; 51 btrfs_set_stack_raid_stride_physical(&newitem->strides[i], phys); 52 } 53 54 ret = btrfs_del_item(trans, stripe_root, path); 55 if (ret) 56 goto out; 57 58 btrfs_release_path(path); 59 ret = btrfs_insert_item(trans, stripe_root, &newkey, newitem, item_size); 60 61 out: 62 kfree(newitem); 63 return ret; 64 } 65 66 int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length) 67 { 68 struct btrfs_fs_info *fs_info = trans->fs_info; 69 struct btrfs_root *stripe_root = fs_info->stripe_root; 70 struct btrfs_path *path; 71 struct btrfs_key key; 72 struct extent_buffer *leaf; 73 u64 found_start; 74 u64 found_end; 75 u64 end = start + length; 76 int slot; 77 int ret; 78 79 if (!btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE) || !stripe_root) 80 return 0; 81 82 if (!btrfs_is_testing(fs_info)) { 83 struct btrfs_chunk_map *map; 84 bool use_rst; 85 86 map = btrfs_find_chunk_map(fs_info, start, length); 87 if (!map) 88 return -EINVAL; 89 use_rst = btrfs_need_stripe_tree_update(fs_info, map->type); 90 btrfs_free_chunk_map(map); 91 if (!use_rst) 92 return 0; 93 } 94 95 path = btrfs_alloc_path(); 96 if (!path) 97 return -ENOMEM; 98 99 while (1) { 100 key.objectid = start; 101 key.type = BTRFS_RAID_STRIPE_KEY; 102 key.offset = 0; 103 104 ret = btrfs_search_slot(trans, stripe_root, &key, path, -1, 1); 105 if (ret < 0) 106 break; 107 108 if (path->slots[0] == btrfs_header_nritems(path->nodes[0])) 109 path->slots[0]--; 110 111 leaf = path->nodes[0]; 112 slot = path->slots[0]; 113 btrfs_item_key_to_cpu(leaf, &key, slot); 114 found_start = key.objectid; 115 found_end = found_start + key.offset; 116 ret = 0; 117 118 /* 119 * The stripe extent starts before the range we want to delete, 120 * but the range spans more than one stripe extent: 121 * 122 * |--- RAID Stripe Extent ---||--- RAID Stripe Extent ---| 123 * |--- keep ---|--- drop ---| 124 * 125 * This means we have to get the previous item, truncate its 126 * length and then restart the search. 127 */ 128 if (found_start > start) { 129 if (slot == 0) { 130 ret = btrfs_previous_item(stripe_root, path, start, 131 BTRFS_RAID_STRIPE_KEY); 132 if (ret) { 133 if (ret > 0) 134 ret = -ENOENT; 135 break; 136 } 137 } else { 138 path->slots[0]--; 139 } 140 141 leaf = path->nodes[0]; 142 slot = path->slots[0]; 143 btrfs_item_key_to_cpu(leaf, &key, slot); 144 found_start = key.objectid; 145 found_end = found_start + key.offset; 146 ASSERT(found_start <= start); 147 } 148 149 if (key.type != BTRFS_RAID_STRIPE_KEY) 150 break; 151 152 /* That stripe ends before we start, we're done. */ 153 if (found_end <= start) 154 break; 155 156 trace_btrfs_raid_extent_delete(fs_info, start, end, 157 found_start, found_end); 158 159 /* 160 * The stripe extent starts before the range we want to delete 161 * and ends after the range we want to delete, i.e. we're 162 * punching a hole in the stripe extent: 163 * 164 * |--- RAID Stripe Extent ---| 165 * | keep |--- drop ---| keep | 166 * 167 * This means we need to a) truncate the existing item and b) 168 * create a second item for the remaining range. 169 */ 170 if (found_start < start && found_end > end) { 171 size_t item_size; 172 u64 diff_start = start - found_start; 173 u64 diff_end = found_end - end; 174 struct btrfs_stripe_extent *extent; 175 struct btrfs_key newkey = { 176 .objectid = end, 177 .type = BTRFS_RAID_STRIPE_KEY, 178 .offset = diff_end, 179 }; 180 181 /* The "right" item. */ 182 ret = btrfs_duplicate_item(trans, stripe_root, path, &newkey); 183 if (ret) 184 break; 185 186 item_size = btrfs_item_size(leaf, path->slots[0]); 187 extent = btrfs_item_ptr(leaf, path->slots[0], 188 struct btrfs_stripe_extent); 189 190 for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) { 191 struct btrfs_raid_stride *stride = &extent->strides[i]; 192 u64 phys; 193 194 phys = btrfs_raid_stride_physical(leaf, stride); 195 phys += diff_start + length; 196 btrfs_set_raid_stride_physical(leaf, stride, phys); 197 } 198 199 /* The "left" item. */ 200 path->slots[0]--; 201 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 202 btrfs_partially_delete_raid_extent(trans, path, &key, 203 diff_start, 0); 204 break; 205 } 206 207 /* 208 * The stripe extent starts before the range we want to delete: 209 * 210 * |--- RAID Stripe Extent ---| 211 * |--- keep ---|--- drop ---| 212 * 213 * This means we have to duplicate the tree item, truncate the 214 * length to the new size and then re-insert the item. 215 */ 216 if (found_start < start) { 217 u64 diff_start = start - found_start; 218 219 btrfs_partially_delete_raid_extent(trans, path, &key, 220 diff_start, 0); 221 222 start += (key.offset - diff_start); 223 length -= (key.offset - diff_start); 224 if (length == 0) 225 break; 226 227 btrfs_release_path(path); 228 continue; 229 } 230 231 /* 232 * The stripe extent ends after the range we want to delete: 233 * 234 * |--- RAID Stripe Extent ---| 235 * |--- drop ---|--- keep ---| 236 * 237 * This means we have to duplicate the tree item, truncate the 238 * length to the new size and then re-insert the item. 239 */ 240 if (found_end > end) { 241 u64 diff_end = found_end - end; 242 243 btrfs_partially_delete_raid_extent(trans, path, &key, 244 key.offset - length, 245 length); 246 ASSERT(key.offset - diff_end == length); 247 break; 248 } 249 250 /* Finally we can delete the whole item, no more special cases. */ 251 ret = btrfs_del_item(trans, stripe_root, path); 252 if (ret) 253 break; 254 255 start += key.offset; 256 length -= key.offset; 257 if (length == 0) 258 break; 259 260 btrfs_release_path(path); 261 } 262 263 btrfs_free_path(path); 264 return ret; 265 } 266 267 static int update_raid_extent_item(struct btrfs_trans_handle *trans, 268 struct btrfs_key *key, 269 struct btrfs_stripe_extent *stripe_extent, 270 const size_t item_size) 271 { 272 struct btrfs_path *path; 273 struct extent_buffer *leaf; 274 int ret; 275 int slot; 276 277 path = btrfs_alloc_path(); 278 if (!path) 279 return -ENOMEM; 280 281 ret = btrfs_search_slot(trans, trans->fs_info->stripe_root, key, path, 282 0, 1); 283 if (ret) 284 return (ret == 1 ? ret : -EINVAL); 285 286 leaf = path->nodes[0]; 287 slot = path->slots[0]; 288 289 write_extent_buffer(leaf, stripe_extent, btrfs_item_ptr_offset(leaf, slot), 290 item_size); 291 btrfs_free_path(path); 292 293 return ret; 294 } 295 296 EXPORT_FOR_TESTS 297 int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans, 298 struct btrfs_io_context *bioc) 299 { 300 struct btrfs_fs_info *fs_info = trans->fs_info; 301 struct btrfs_key stripe_key; 302 struct btrfs_root *stripe_root = fs_info->stripe_root; 303 const int num_stripes = btrfs_bg_type_to_factor(bioc->map_type); 304 struct btrfs_stripe_extent *stripe_extent; 305 const size_t item_size = struct_size(stripe_extent, strides, num_stripes); 306 int ret; 307 308 stripe_extent = kzalloc(item_size, GFP_NOFS); 309 if (!stripe_extent) { 310 btrfs_abort_transaction(trans, -ENOMEM); 311 btrfs_end_transaction(trans); 312 return -ENOMEM; 313 } 314 315 trace_btrfs_insert_one_raid_extent(fs_info, bioc->logical, bioc->size, 316 num_stripes); 317 for (int i = 0; i < num_stripes; i++) { 318 u64 devid = bioc->stripes[i].dev->devid; 319 u64 physical = bioc->stripes[i].physical; 320 struct btrfs_raid_stride *raid_stride = &stripe_extent->strides[i]; 321 322 btrfs_set_stack_raid_stride_devid(raid_stride, devid); 323 btrfs_set_stack_raid_stride_physical(raid_stride, physical); 324 } 325 326 stripe_key.objectid = bioc->logical; 327 stripe_key.type = BTRFS_RAID_STRIPE_KEY; 328 stripe_key.offset = bioc->size; 329 330 ret = btrfs_insert_item(trans, stripe_root, &stripe_key, stripe_extent, 331 item_size); 332 if (ret == -EEXIST) 333 ret = update_raid_extent_item(trans, &stripe_key, stripe_extent, 334 item_size); 335 if (ret) 336 btrfs_abort_transaction(trans, ret); 337 338 kfree(stripe_extent); 339 340 return ret; 341 } 342 343 int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans, 344 struct btrfs_ordered_extent *ordered_extent) 345 { 346 struct btrfs_io_context *bioc; 347 int ret; 348 349 if (!btrfs_fs_incompat(trans->fs_info, RAID_STRIPE_TREE)) 350 return 0; 351 352 list_for_each_entry(bioc, &ordered_extent->bioc_list, rst_ordered_entry) { 353 ret = btrfs_insert_one_raid_extent(trans, bioc); 354 if (ret) 355 return ret; 356 } 357 358 while (!list_empty(&ordered_extent->bioc_list)) { 359 bioc = list_first_entry(&ordered_extent->bioc_list, 360 typeof(*bioc), rst_ordered_entry); 361 list_del(&bioc->rst_ordered_entry); 362 btrfs_put_bioc(bioc); 363 } 364 365 return 0; 366 } 367 368 int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info, 369 u64 logical, u64 *length, u64 map_type, 370 u32 stripe_index, struct btrfs_io_stripe *stripe) 371 { 372 struct btrfs_root *stripe_root = fs_info->stripe_root; 373 struct btrfs_stripe_extent *stripe_extent; 374 struct btrfs_key stripe_key; 375 struct btrfs_key found_key; 376 struct btrfs_path *path; 377 struct extent_buffer *leaf; 378 const u64 end = logical + *length; 379 int num_stripes; 380 u64 offset; 381 u64 found_logical; 382 u64 found_length; 383 u64 found_end; 384 int slot; 385 int ret; 386 387 stripe_key.objectid = logical; 388 stripe_key.type = BTRFS_RAID_STRIPE_KEY; 389 stripe_key.offset = 0; 390 391 path = btrfs_alloc_path(); 392 if (!path) 393 return -ENOMEM; 394 395 if (stripe->rst_search_commit_root) { 396 path->skip_locking = 1; 397 path->search_commit_root = 1; 398 } 399 400 ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0); 401 if (ret < 0) 402 goto free_path; 403 if (ret) { 404 if (path->slots[0] != 0) 405 path->slots[0]--; 406 } 407 408 while (1) { 409 leaf = path->nodes[0]; 410 slot = path->slots[0]; 411 412 btrfs_item_key_to_cpu(leaf, &found_key, slot); 413 found_logical = found_key.objectid; 414 found_length = found_key.offset; 415 found_end = found_logical + found_length; 416 417 if (found_logical > end) { 418 ret = -ENODATA; 419 goto out; 420 } 421 422 if (in_range(logical, found_logical, found_length)) 423 break; 424 425 ret = btrfs_next_item(stripe_root, path); 426 if (ret) 427 goto out; 428 } 429 430 offset = logical - found_logical; 431 432 /* 433 * If we have a logically contiguous, but physically non-continuous 434 * range, we need to split the bio. Record the length after which we 435 * must split the bio. 436 */ 437 if (end > found_end) 438 *length -= end - found_end; 439 440 num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot)); 441 stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent); 442 443 for (int i = 0; i < num_stripes; i++) { 444 struct btrfs_raid_stride *stride = &stripe_extent->strides[i]; 445 u64 devid = btrfs_raid_stride_devid(leaf, stride); 446 u64 physical = btrfs_raid_stride_physical(leaf, stride); 447 448 if (devid != stripe->dev->devid) 449 continue; 450 451 if ((map_type & BTRFS_BLOCK_GROUP_DUP) && stripe_index != i) 452 continue; 453 454 stripe->physical = physical + offset; 455 456 trace_btrfs_get_raid_extent_offset(fs_info, logical, *length, 457 stripe->physical, devid); 458 459 ret = 0; 460 goto free_path; 461 } 462 463 /* If we're here, we haven't found the requested devid in the stripe. */ 464 ret = -ENODATA; 465 out: 466 if (ret > 0) 467 ret = -ENODATA; 468 if (ret && ret != -EIO && !stripe->rst_search_commit_root) { 469 btrfs_debug(fs_info, 470 "cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s", 471 logical, logical + *length, stripe->dev->devid, 472 btrfs_bg_type_to_raid_name(map_type)); 473 } 474 free_path: 475 btrfs_free_path(path); 476 477 return ret; 478 } 479