1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Common NFS I/O operations for the pnfs file based 4 * layout drivers. 5 * 6 * Copyright (c) 2014, Primary Data, Inc. All rights reserved. 7 * 8 * Tom Haynes <loghyr@primarydata.com> 9 */ 10 11 #include <linux/nfs_fs.h> 12 #include <linux/nfs_page.h> 13 #include <linux/sunrpc/addr.h> 14 #include <linux/module.h> 15 16 #include "nfs4session.h" 17 #include "internal.h" 18 #include "pnfs.h" 19 #include "netns.h" 20 21 #define NFSDBG_FACILITY NFSDBG_PNFS 22 23 void pnfs_generic_rw_release(void *data) 24 { 25 struct nfs_pgio_header *hdr = data; 26 27 nfs_put_client(hdr->ds_clp); 28 hdr->mds_ops->rpc_release(data); 29 } 30 EXPORT_SYMBOL_GPL(pnfs_generic_rw_release); 31 32 /* Fake up some data that will cause nfs_commit_release to retry the writes. */ 33 void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data) 34 { 35 struct nfs_writeverf *verf = data->res.verf; 36 37 data->task.tk_status = 0; 38 memset(&verf->verifier, 0, sizeof(verf->verifier)); 39 verf->committed = NFS_UNSTABLE; 40 } 41 EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes); 42 43 void pnfs_generic_write_commit_done(struct rpc_task *task, void *data) 44 { 45 struct nfs_commit_data *wdata = data; 46 47 /* Note this may cause RPC to be resent */ 48 wdata->mds_ops->rpc_call_done(task, data); 49 } 50 EXPORT_SYMBOL_GPL(pnfs_generic_write_commit_done); 51 52 void pnfs_generic_commit_release(void *calldata) 53 { 54 struct nfs_commit_data *data = calldata; 55 56 data->completion_ops->completion(data); 57 pnfs_put_lseg(data->lseg); 58 nfs_put_client(data->ds_clp); 59 nfs_commitdata_release(data); 60 } 61 EXPORT_SYMBOL_GPL(pnfs_generic_commit_release); 62 63 static struct pnfs_layout_segment * 64 pnfs_free_bucket_lseg(struct pnfs_commit_bucket *bucket) 65 { 66 if (list_empty(&bucket->committing) && list_empty(&bucket->written)) { 67 struct pnfs_layout_segment *freeme = bucket->lseg; 68 bucket->lseg = NULL; 69 return freeme; 70 } 71 return NULL; 72 } 73 74 /* The generic layer is about to remove the req from the commit list. 75 * If this will make the bucket empty, it will need to put the lseg reference. 76 * Note this must be called holding nfsi->commit_mutex 77 */ 78 void 79 pnfs_generic_clear_request_commit(struct nfs_page *req, 80 struct nfs_commit_info *cinfo) 81 { 82 struct pnfs_commit_bucket *bucket = NULL; 83 84 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) 85 goto out; 86 cinfo->ds->nwritten--; 87 if (list_is_singular(&req->wb_list)) 88 bucket = list_first_entry(&req->wb_list, 89 struct pnfs_commit_bucket, written); 90 out: 91 nfs_request_remove_commit_list(req, cinfo); 92 if (bucket) 93 pnfs_put_lseg(pnfs_free_bucket_lseg(bucket)); 94 } 95 EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit); 96 97 struct pnfs_commit_array * 98 pnfs_alloc_commit_array(size_t n, gfp_t gfp_flags) 99 { 100 struct pnfs_commit_array *p; 101 struct pnfs_commit_bucket *b; 102 103 p = kmalloc(struct_size(p, buckets, n), gfp_flags); 104 if (!p) 105 return NULL; 106 p->nbuckets = n; 107 INIT_LIST_HEAD(&p->cinfo_list); 108 INIT_LIST_HEAD(&p->lseg_list); 109 p->lseg = NULL; 110 for (b = &p->buckets[0]; n != 0; b++, n--) { 111 INIT_LIST_HEAD(&b->written); 112 INIT_LIST_HEAD(&b->committing); 113 b->lseg = NULL; 114 b->direct_verf.committed = NFS_INVALID_STABLE_HOW; 115 } 116 return p; 117 } 118 EXPORT_SYMBOL_GPL(pnfs_alloc_commit_array); 119 120 void 121 pnfs_free_commit_array(struct pnfs_commit_array *p) 122 { 123 kfree_rcu(p, rcu); 124 } 125 EXPORT_SYMBOL_GPL(pnfs_free_commit_array); 126 127 static struct pnfs_commit_array * 128 pnfs_find_commit_array_by_lseg(struct pnfs_ds_commit_info *fl_cinfo, 129 struct pnfs_layout_segment *lseg) 130 { 131 struct pnfs_commit_array *array; 132 133 list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) { 134 if (array->lseg == lseg) 135 return array; 136 } 137 return NULL; 138 } 139 140 struct pnfs_commit_array * 141 pnfs_add_commit_array(struct pnfs_ds_commit_info *fl_cinfo, 142 struct pnfs_commit_array *new, 143 struct pnfs_layout_segment *lseg) 144 { 145 struct pnfs_commit_array *array; 146 147 array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg); 148 if (array) 149 return array; 150 new->lseg = lseg; 151 refcount_set(&new->refcount, 1); 152 list_add_rcu(&new->cinfo_list, &fl_cinfo->commits); 153 list_add(&new->lseg_list, &lseg->pls_commits); 154 return new; 155 } 156 EXPORT_SYMBOL_GPL(pnfs_add_commit_array); 157 158 static struct pnfs_commit_array * 159 pnfs_lookup_commit_array(struct pnfs_ds_commit_info *fl_cinfo, 160 struct pnfs_layout_segment *lseg) 161 { 162 struct pnfs_commit_array *array; 163 164 rcu_read_lock(); 165 array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg); 166 if (!array) { 167 rcu_read_unlock(); 168 fl_cinfo->ops->setup_ds_info(fl_cinfo, lseg); 169 rcu_read_lock(); 170 array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg); 171 } 172 rcu_read_unlock(); 173 return array; 174 } 175 176 static void 177 pnfs_release_commit_array_locked(struct pnfs_commit_array *array) 178 { 179 list_del_rcu(&array->cinfo_list); 180 list_del(&array->lseg_list); 181 pnfs_free_commit_array(array); 182 } 183 184 static void 185 pnfs_put_commit_array_locked(struct pnfs_commit_array *array) 186 { 187 if (refcount_dec_and_test(&array->refcount)) 188 pnfs_release_commit_array_locked(array); 189 } 190 191 static void 192 pnfs_put_commit_array(struct pnfs_commit_array *array, struct inode *inode) 193 { 194 if (refcount_dec_and_lock(&array->refcount, &inode->i_lock)) { 195 pnfs_release_commit_array_locked(array); 196 spin_unlock(&inode->i_lock); 197 } 198 } 199 200 static struct pnfs_commit_array * 201 pnfs_get_commit_array(struct pnfs_commit_array *array) 202 { 203 if (refcount_inc_not_zero(&array->refcount)) 204 return array; 205 return NULL; 206 } 207 208 static void 209 pnfs_remove_and_free_commit_array(struct pnfs_commit_array *array) 210 { 211 array->lseg = NULL; 212 list_del_init(&array->lseg_list); 213 pnfs_put_commit_array_locked(array); 214 } 215 216 void 217 pnfs_generic_ds_cinfo_release_lseg(struct pnfs_ds_commit_info *fl_cinfo, 218 struct pnfs_layout_segment *lseg) 219 { 220 struct pnfs_commit_array *array, *tmp; 221 222 list_for_each_entry_safe(array, tmp, &lseg->pls_commits, lseg_list) 223 pnfs_remove_and_free_commit_array(array); 224 } 225 EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_release_lseg); 226 227 void 228 pnfs_generic_ds_cinfo_destroy(struct pnfs_ds_commit_info *fl_cinfo) 229 { 230 struct pnfs_commit_array *array, *tmp; 231 232 list_for_each_entry_safe(array, tmp, &fl_cinfo->commits, cinfo_list) 233 pnfs_remove_and_free_commit_array(array); 234 } 235 EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_destroy); 236 237 /* 238 * Locks the nfs_page requests for commit and moves them to 239 * @bucket->committing. 240 */ 241 static int 242 pnfs_bucket_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, 243 struct nfs_commit_info *cinfo, 244 int max) 245 { 246 struct list_head *src = &bucket->written; 247 struct list_head *dst = &bucket->committing; 248 int ret; 249 250 lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex); 251 ret = nfs_scan_commit_list(src, dst, cinfo, max); 252 if (ret) { 253 cinfo->ds->nwritten -= ret; 254 cinfo->ds->ncommitting += ret; 255 } 256 return ret; 257 } 258 259 static int pnfs_bucket_scan_array(struct nfs_commit_info *cinfo, 260 struct pnfs_commit_bucket *buckets, 261 unsigned int nbuckets, 262 int max) 263 { 264 unsigned int i; 265 int rv = 0, cnt; 266 267 for (i = 0; i < nbuckets && max != 0; i++) { 268 cnt = pnfs_bucket_scan_ds_commit_list(&buckets[i], cinfo, max); 269 rv += cnt; 270 max -= cnt; 271 } 272 return rv; 273 } 274 275 /* Move reqs from written to committing lists, returning count 276 * of number moved. 277 */ 278 int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max) 279 { 280 struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; 281 struct pnfs_commit_array *array; 282 int rv = 0, cnt; 283 284 rcu_read_lock(); 285 list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) { 286 if (!array->lseg || !pnfs_get_commit_array(array)) 287 continue; 288 rcu_read_unlock(); 289 cnt = pnfs_bucket_scan_array(cinfo, array->buckets, 290 array->nbuckets, max); 291 rcu_read_lock(); 292 pnfs_put_commit_array(array, cinfo->inode); 293 rv += cnt; 294 max -= cnt; 295 if (!max) 296 break; 297 } 298 rcu_read_unlock(); 299 return rv; 300 } 301 EXPORT_SYMBOL_GPL(pnfs_generic_scan_commit_lists); 302 303 static unsigned int 304 pnfs_bucket_recover_commit_reqs(struct list_head *dst, 305 struct pnfs_commit_bucket *buckets, 306 unsigned int nbuckets, 307 struct nfs_commit_info *cinfo) 308 { 309 struct pnfs_commit_bucket *b; 310 struct pnfs_layout_segment *freeme; 311 unsigned int nwritten, ret = 0; 312 unsigned int i; 313 314 restart: 315 for (i = 0, b = buckets; i < nbuckets; i++, b++) { 316 nwritten = nfs_scan_commit_list(&b->written, dst, cinfo, 0); 317 if (!nwritten) 318 continue; 319 ret += nwritten; 320 freeme = pnfs_free_bucket_lseg(b); 321 if (freeme) { 322 pnfs_put_lseg(freeme); 323 goto restart; 324 } 325 } 326 return ret; 327 } 328 329 /* Pull everything off the committing lists and dump into @dst. */ 330 void pnfs_generic_recover_commit_reqs(struct list_head *dst, 331 struct nfs_commit_info *cinfo) 332 { 333 struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; 334 struct pnfs_commit_array *array; 335 unsigned int nwritten; 336 337 lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex); 338 rcu_read_lock(); 339 list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) { 340 if (!array->lseg || !pnfs_get_commit_array(array)) 341 continue; 342 rcu_read_unlock(); 343 nwritten = pnfs_bucket_recover_commit_reqs(dst, 344 array->buckets, 345 array->nbuckets, 346 cinfo); 347 rcu_read_lock(); 348 pnfs_put_commit_array(array, cinfo->inode); 349 fl_cinfo->nwritten -= nwritten; 350 } 351 rcu_read_unlock(); 352 } 353 EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs); 354 355 static struct pnfs_layout_segment * 356 pnfs_bucket_get_committing(struct list_head *head, 357 struct pnfs_commit_bucket *bucket, 358 struct nfs_commit_info *cinfo) 359 { 360 struct pnfs_layout_segment *lseg; 361 struct list_head *pos; 362 363 list_for_each(pos, &bucket->committing) 364 cinfo->ds->ncommitting--; 365 list_splice_init(&bucket->committing, head); 366 lseg = pnfs_free_bucket_lseg(bucket); 367 if (!lseg) 368 lseg = pnfs_get_lseg(bucket->lseg); 369 return lseg; 370 } 371 372 static struct nfs_commit_data * 373 pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket, 374 struct nfs_commit_info *cinfo) 375 { 376 struct nfs_commit_data *data = nfs_commitdata_alloc(); 377 378 if (!data) 379 return NULL; 380 data->lseg = pnfs_bucket_get_committing(&data->pages, bucket, cinfo); 381 return data; 382 } 383 384 static void pnfs_generic_retry_commit(struct pnfs_commit_bucket *buckets, 385 unsigned int nbuckets, 386 struct nfs_commit_info *cinfo, 387 unsigned int idx) 388 { 389 struct pnfs_commit_bucket *bucket; 390 struct pnfs_layout_segment *freeme; 391 LIST_HEAD(pages); 392 393 for (bucket = buckets; idx < nbuckets; bucket++, idx++) { 394 if (list_empty(&bucket->committing)) 395 continue; 396 mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 397 freeme = pnfs_bucket_get_committing(&pages, bucket, cinfo); 398 mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 399 nfs_retry_commit(&pages, freeme, cinfo, idx); 400 pnfs_put_lseg(freeme); 401 } 402 } 403 404 static unsigned int 405 pnfs_bucket_alloc_ds_commits(struct list_head *list, 406 struct pnfs_commit_bucket *buckets, 407 unsigned int nbuckets, 408 struct nfs_commit_info *cinfo) 409 { 410 struct pnfs_commit_bucket *bucket; 411 struct nfs_commit_data *data; 412 unsigned int i; 413 unsigned int nreq = 0; 414 415 for (i = 0, bucket = buckets; i < nbuckets; i++, bucket++) { 416 if (list_empty(&bucket->committing)) 417 continue; 418 mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 419 if (!list_empty(&bucket->committing)) { 420 data = pnfs_bucket_fetch_commitdata(bucket, cinfo); 421 if (!data) 422 goto out_error; 423 data->ds_commit_index = i; 424 list_add_tail(&data->list, list); 425 nreq++; 426 } 427 mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 428 } 429 return nreq; 430 out_error: 431 mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 432 /* Clean up on error */ 433 pnfs_generic_retry_commit(buckets, nbuckets, cinfo, i); 434 return nreq; 435 } 436 437 static unsigned int 438 pnfs_alloc_ds_commits_list(struct list_head *list, 439 struct pnfs_ds_commit_info *fl_cinfo, 440 struct nfs_commit_info *cinfo) 441 { 442 struct pnfs_commit_array *array; 443 unsigned int ret = 0; 444 445 rcu_read_lock(); 446 list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) { 447 if (!array->lseg || !pnfs_get_commit_array(array)) 448 continue; 449 rcu_read_unlock(); 450 ret += pnfs_bucket_alloc_ds_commits(list, array->buckets, 451 array->nbuckets, cinfo); 452 rcu_read_lock(); 453 pnfs_put_commit_array(array, cinfo->inode); 454 } 455 rcu_read_unlock(); 456 return ret; 457 } 458 459 /* This follows nfs_commit_list pretty closely */ 460 int 461 pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, 462 int how, struct nfs_commit_info *cinfo, 463 int (*initiate_commit)(struct nfs_commit_data *data, 464 int how)) 465 { 466 struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; 467 struct nfs_commit_data *data, *tmp; 468 LIST_HEAD(list); 469 unsigned int nreq = 0; 470 471 if (!list_empty(mds_pages)) { 472 data = nfs_commitdata_alloc(); 473 if (!data) { 474 nfs_retry_commit(mds_pages, NULL, cinfo, -1); 475 return -ENOMEM; 476 } 477 data->ds_commit_index = -1; 478 list_splice_init(mds_pages, &data->pages); 479 list_add_tail(&data->list, &list); 480 nreq++; 481 } 482 483 nreq += pnfs_alloc_ds_commits_list(&list, fl_cinfo, cinfo); 484 if (nreq == 0) 485 goto out; 486 487 list_for_each_entry_safe(data, tmp, &list, list) { 488 list_del(&data->list); 489 if (data->ds_commit_index < 0) { 490 nfs_init_commit(data, NULL, NULL, cinfo); 491 nfs_initiate_commit(NFS_CLIENT(inode), data, 492 NFS_PROTO(data->inode), 493 data->mds_ops, how, 494 RPC_TASK_CRED_NOREF, NULL); 495 } else { 496 nfs_init_commit(data, NULL, data->lseg, cinfo); 497 initiate_commit(data, how); 498 } 499 } 500 out: 501 return PNFS_ATTEMPTED; 502 } 503 EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist); 504 505 /* 506 * Data server cache 507 * 508 * Data servers can be mapped to different device ids, but should 509 * never be shared between net namespaces. 510 * 511 * nfs4_pnfs_ds reference counting: 512 * - set to 1 on allocation 513 * - incremented when a device id maps a data server already in the cache. 514 * - decremented when deviceid is removed from the cache. 515 */ 516 517 /* Debug routines */ 518 static void 519 print_ds(struct nfs4_pnfs_ds *ds) 520 { 521 if (ds == NULL) { 522 printk(KERN_WARNING "%s NULL device\n", __func__); 523 return; 524 } 525 printk(KERN_WARNING " ds %s\n" 526 " ref count %d\n" 527 " client %p\n" 528 " cl_exchange_flags %x\n", 529 ds->ds_remotestr, 530 refcount_read(&ds->ds_count), ds->ds_clp, 531 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); 532 } 533 534 static bool 535 same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) 536 { 537 struct sockaddr_in *a, *b; 538 struct sockaddr_in6 *a6, *b6; 539 540 if (addr1->sa_family != addr2->sa_family) 541 return false; 542 543 switch (addr1->sa_family) { 544 case AF_INET: 545 a = (struct sockaddr_in *)addr1; 546 b = (struct sockaddr_in *)addr2; 547 548 if (a->sin_addr.s_addr == b->sin_addr.s_addr && 549 a->sin_port == b->sin_port) 550 return true; 551 break; 552 553 case AF_INET6: 554 a6 = (struct sockaddr_in6 *)addr1; 555 b6 = (struct sockaddr_in6 *)addr2; 556 557 /* LINKLOCAL addresses must have matching scope_id */ 558 if (ipv6_addr_src_scope(&a6->sin6_addr) == 559 IPV6_ADDR_SCOPE_LINKLOCAL && 560 a6->sin6_scope_id != b6->sin6_scope_id) 561 return false; 562 563 if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && 564 a6->sin6_port == b6->sin6_port) 565 return true; 566 break; 567 568 default: 569 dprintk("%s: unhandled address family: %u\n", 570 __func__, addr1->sa_family); 571 return false; 572 } 573 574 return false; 575 } 576 577 /* 578 * Checks if 'dsaddrs1' contains a subset of 'dsaddrs2'. If it does, 579 * declare a match. 580 */ 581 static bool 582 _same_data_server_addrs_locked(const struct list_head *dsaddrs1, 583 const struct list_head *dsaddrs2) 584 { 585 struct nfs4_pnfs_ds_addr *da1, *da2; 586 struct sockaddr *sa1, *sa2; 587 bool match = false; 588 589 list_for_each_entry(da1, dsaddrs1, da_node) { 590 sa1 = (struct sockaddr *)&da1->da_addr; 591 match = false; 592 list_for_each_entry(da2, dsaddrs2, da_node) { 593 sa2 = (struct sockaddr *)&da2->da_addr; 594 match = same_sockaddr(sa1, sa2); 595 if (match) 596 break; 597 } 598 if (!match) 599 break; 600 } 601 return match; 602 } 603 604 /* 605 * Lookup DS by addresses. nfs4_ds_cache_lock is held 606 */ 607 static struct nfs4_pnfs_ds * 608 _data_server_lookup_locked(const struct nfs_net *nn, const struct list_head *dsaddrs) 609 { 610 struct nfs4_pnfs_ds *ds; 611 612 list_for_each_entry(ds, &nn->nfs4_data_server_cache, ds_node) 613 if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) 614 return ds; 615 return NULL; 616 } 617 618 static struct nfs4_pnfs_ds_addr *nfs4_pnfs_ds_addr_alloc(gfp_t gfp_flags) 619 { 620 struct nfs4_pnfs_ds_addr *da = kzalloc(sizeof(*da), gfp_flags); 621 if (da) 622 INIT_LIST_HEAD(&da->da_node); 623 return da; 624 } 625 626 static void nfs4_pnfs_ds_addr_free(struct nfs4_pnfs_ds_addr *da) 627 { 628 kfree(da->da_remotestr); 629 kfree(da->da_netid); 630 kfree(da); 631 } 632 633 static void destroy_ds(struct nfs4_pnfs_ds *ds) 634 { 635 struct nfs4_pnfs_ds_addr *da; 636 637 dprintk("--> %s\n", __func__); 638 ifdebug(FACILITY) 639 print_ds(ds); 640 641 nfs_put_client(ds->ds_clp); 642 643 while (!list_empty(&ds->ds_addrs)) { 644 da = list_first_entry(&ds->ds_addrs, 645 struct nfs4_pnfs_ds_addr, 646 da_node); 647 list_del_init(&da->da_node); 648 nfs4_pnfs_ds_addr_free(da); 649 } 650 651 kfree(ds->ds_remotestr); 652 kfree(ds); 653 } 654 655 void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds) 656 { 657 struct nfs_net *nn = net_generic(ds->ds_net, nfs_net_id); 658 659 if (refcount_dec_and_lock(&ds->ds_count, &nn->nfs4_data_server_lock)) { 660 list_del_init(&ds->ds_node); 661 spin_unlock(&nn->nfs4_data_server_lock); 662 destroy_ds(ds); 663 } 664 } 665 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_put); 666 667 /* 668 * Create a string with a human readable address and port to avoid 669 * complicated setup around many dprinks. 670 */ 671 static char * 672 nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) 673 { 674 struct nfs4_pnfs_ds_addr *da; 675 char *remotestr; 676 size_t len; 677 char *p; 678 679 len = 3; /* '{', '}' and eol */ 680 list_for_each_entry(da, dsaddrs, da_node) { 681 len += strlen(da->da_remotestr) + 1; /* string plus comma */ 682 } 683 684 remotestr = kzalloc(len, gfp_flags); 685 if (!remotestr) 686 return NULL; 687 688 p = remotestr; 689 *(p++) = '{'; 690 len--; 691 list_for_each_entry(da, dsaddrs, da_node) { 692 size_t ll = strlen(da->da_remotestr); 693 694 if (ll > len) 695 goto out_err; 696 697 memcpy(p, da->da_remotestr, ll); 698 p += ll; 699 len -= ll; 700 701 if (len < 1) 702 goto out_err; 703 (*p++) = ','; 704 len--; 705 } 706 if (len < 2) 707 goto out_err; 708 *(p++) = '}'; 709 *p = '\0'; 710 return remotestr; 711 out_err: 712 kfree(remotestr); 713 return NULL; 714 } 715 716 /* 717 * Given a list of multipath struct nfs4_pnfs_ds_addr, add it to ds cache if 718 * uncached and return cached struct nfs4_pnfs_ds. 719 */ 720 struct nfs4_pnfs_ds * 721 nfs4_pnfs_ds_add(const struct net *net, struct list_head *dsaddrs, gfp_t gfp_flags) 722 { 723 struct nfs_net *nn = net_generic(net, nfs_net_id); 724 struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; 725 char *remotestr; 726 727 if (list_empty(dsaddrs)) { 728 dprintk("%s: no addresses defined\n", __func__); 729 goto out; 730 } 731 732 ds = kzalloc(sizeof(*ds), gfp_flags); 733 if (!ds) 734 goto out; 735 736 /* this is only used for debugging, so it's ok if its NULL */ 737 remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); 738 739 spin_lock(&nn->nfs4_data_server_lock); 740 tmp_ds = _data_server_lookup_locked(nn, dsaddrs); 741 if (tmp_ds == NULL) { 742 INIT_LIST_HEAD(&ds->ds_addrs); 743 list_splice_init(dsaddrs, &ds->ds_addrs); 744 ds->ds_remotestr = remotestr; 745 refcount_set(&ds->ds_count, 1); 746 INIT_LIST_HEAD(&ds->ds_node); 747 ds->ds_net = net; 748 ds->ds_clp = NULL; 749 list_add(&ds->ds_node, &nn->nfs4_data_server_cache); 750 dprintk("%s add new data server %s\n", __func__, 751 ds->ds_remotestr); 752 } else { 753 kfree(remotestr); 754 kfree(ds); 755 refcount_inc(&tmp_ds->ds_count); 756 dprintk("%s data server %s found, inc'ed ds_count to %d\n", 757 __func__, tmp_ds->ds_remotestr, 758 refcount_read(&tmp_ds->ds_count)); 759 ds = tmp_ds; 760 } 761 spin_unlock(&nn->nfs4_data_server_lock); 762 out: 763 return ds; 764 } 765 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add); 766 767 static int nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) 768 { 769 might_sleep(); 770 return wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, TASK_KILLABLE); 771 } 772 773 static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) 774 { 775 smp_mb__before_atomic(); 776 clear_and_wake_up_bit(NFS4DS_CONNECTING, &ds->ds_state); 777 } 778 779 static struct nfs_client *(*get_v3_ds_connect)( 780 struct nfs_server *mds_srv, 781 const struct sockaddr_storage *ds_addr, 782 int ds_addrlen, 783 int ds_proto, 784 unsigned int ds_timeo, 785 unsigned int ds_retrans); 786 787 static bool load_v3_ds_connect(void) 788 { 789 if (!get_v3_ds_connect) { 790 get_v3_ds_connect = symbol_request(nfs3_set_ds_client); 791 WARN_ON_ONCE(!get_v3_ds_connect); 792 } 793 794 return(get_v3_ds_connect != NULL); 795 } 796 797 void nfs4_pnfs_v3_ds_connect_unload(void) 798 { 799 if (get_v3_ds_connect) { 800 symbol_put(nfs3_set_ds_client); 801 get_v3_ds_connect = NULL; 802 } 803 } 804 805 static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, 806 struct nfs4_pnfs_ds *ds, 807 unsigned int timeo, 808 unsigned int retrans) 809 { 810 struct nfs_client *clp = ERR_PTR(-EIO); 811 struct nfs4_pnfs_ds_addr *da; 812 unsigned long connect_timeout = timeo * (retrans + 1) * HZ / 10; 813 int status = 0; 814 815 dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr); 816 817 if (!load_v3_ds_connect()) 818 return -EPROTONOSUPPORT; 819 820 list_for_each_entry(da, &ds->ds_addrs, da_node) { 821 dprintk("%s: DS %s: trying address %s\n", 822 __func__, ds->ds_remotestr, da->da_remotestr); 823 824 if (!IS_ERR(clp)) { 825 struct xprt_create xprt_args = { 826 .ident = da->da_transport, 827 .net = clp->cl_net, 828 .dstaddr = (struct sockaddr *)&da->da_addr, 829 .addrlen = da->da_addrlen, 830 .servername = clp->cl_hostname, 831 .connect_timeout = connect_timeout, 832 .reconnect_timeout = connect_timeout, 833 .xprtsec = clp->cl_xprtsec, 834 }; 835 836 if (da->da_transport != clp->cl_proto && 837 clp->cl_proto != XPRT_TRANSPORT_TCP_TLS) 838 continue; 839 if (da->da_transport == XPRT_TRANSPORT_TCP && 840 mds_srv->nfs_client->cl_proto == XPRT_TRANSPORT_TCP_TLS) 841 xprt_args.ident = XPRT_TRANSPORT_TCP_TLS; 842 843 if (da->da_addr.ss_family != clp->cl_addr.ss_family) 844 continue; 845 /* Add this address as an alias */ 846 rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args, 847 rpc_clnt_test_and_add_xprt, NULL); 848 continue; 849 } 850 if (da->da_transport == XPRT_TRANSPORT_TCP && 851 mds_srv->nfs_client->cl_proto == XPRT_TRANSPORT_TCP_TLS) 852 da->da_transport = XPRT_TRANSPORT_TCP_TLS; 853 clp = get_v3_ds_connect(mds_srv, 854 &da->da_addr, 855 da->da_addrlen, da->da_transport, 856 timeo, retrans); 857 if (IS_ERR(clp)) 858 continue; 859 clp->cl_rpcclient->cl_softerr = 0; 860 clp->cl_rpcclient->cl_softrtry = 0; 861 } 862 863 if (IS_ERR(clp)) { 864 status = PTR_ERR(clp); 865 goto out; 866 } 867 868 smp_wmb(); 869 WRITE_ONCE(ds->ds_clp, clp); 870 dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); 871 out: 872 return status; 873 } 874 875 static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, 876 struct nfs4_pnfs_ds *ds, 877 unsigned int timeo, 878 unsigned int retrans, 879 u32 minor_version) 880 { 881 struct nfs_client *clp = ERR_PTR(-EIO); 882 struct nfs4_pnfs_ds_addr *da; 883 int status = 0; 884 885 dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr); 886 887 list_for_each_entry(da, &ds->ds_addrs, da_node) { 888 char servername[48]; 889 890 dprintk("%s: DS %s: trying address %s\n", 891 __func__, ds->ds_remotestr, da->da_remotestr); 892 893 if (!IS_ERR(clp) && clp->cl_mvops->session_trunk) { 894 struct xprt_create xprt_args = { 895 .ident = da->da_transport, 896 .net = clp->cl_net, 897 .dstaddr = (struct sockaddr *)&da->da_addr, 898 .addrlen = da->da_addrlen, 899 .servername = clp->cl_hostname, 900 .xprtsec = clp->cl_xprtsec, 901 }; 902 struct nfs4_add_xprt_data xprtdata = { 903 .clp = clp, 904 }; 905 struct rpc_add_xprt_test rpcdata = { 906 .add_xprt_test = clp->cl_mvops->session_trunk, 907 .data = &xprtdata, 908 }; 909 910 if (da->da_transport != clp->cl_proto && 911 clp->cl_proto != XPRT_TRANSPORT_TCP_TLS) 912 continue; 913 if (da->da_transport == XPRT_TRANSPORT_TCP && 914 mds_srv->nfs_client->cl_proto == 915 XPRT_TRANSPORT_TCP_TLS) { 916 struct sockaddr *addr = 917 (struct sockaddr *)&da->da_addr; 918 struct sockaddr_in *sin = 919 (struct sockaddr_in *)&da->da_addr; 920 struct sockaddr_in6 *sin6 = 921 (struct sockaddr_in6 *)&da->da_addr; 922 923 /* for NFS with TLS we need to supply a correct 924 * servername of the trunked transport, not the 925 * servername of the main transport stored in 926 * clp->cl_hostname. And set the protocol to 927 * indicate to use TLS 928 */ 929 servername[0] = '\0'; 930 switch(addr->sa_family) { 931 case AF_INET: 932 snprintf(servername, sizeof(servername), 933 "%pI4", &sin->sin_addr.s_addr); 934 break; 935 case AF_INET6: 936 snprintf(servername, sizeof(servername), 937 "%pI6", &sin6->sin6_addr); 938 break; 939 default: 940 /* do not consider this address */ 941 continue; 942 } 943 xprt_args.ident = XPRT_TRANSPORT_TCP_TLS; 944 xprt_args.servername = servername; 945 } 946 if (da->da_addr.ss_family != clp->cl_addr.ss_family) 947 continue; 948 949 /** 950 * Test this address for session trunking and 951 * add as an alias 952 */ 953 xprtdata.cred = nfs4_get_clid_cred(clp); 954 rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args, 955 rpc_clnt_setup_test_and_add_xprt, 956 &rpcdata); 957 if (xprtdata.cred) 958 put_cred(xprtdata.cred); 959 } else { 960 if (da->da_transport == XPRT_TRANSPORT_TCP && 961 mds_srv->nfs_client->cl_proto == 962 XPRT_TRANSPORT_TCP_TLS) 963 da->da_transport = XPRT_TRANSPORT_TCP_TLS; 964 clp = nfs4_set_ds_client(mds_srv, 965 &da->da_addr, 966 da->da_addrlen, 967 da->da_transport, timeo, 968 retrans, minor_version); 969 if (IS_ERR(clp)) 970 continue; 971 972 status = nfs4_init_ds_session(clp, 973 mds_srv->nfs_client->cl_lease_time); 974 if (status) { 975 nfs_put_client(clp); 976 clp = ERR_PTR(-EIO); 977 continue; 978 } 979 980 } 981 } 982 983 if (IS_ERR(clp)) { 984 status = PTR_ERR(clp); 985 goto out; 986 } 987 988 smp_wmb(); 989 WRITE_ONCE(ds->ds_clp, clp); 990 dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); 991 out: 992 return status; 993 } 994 995 /* 996 * Create an rpc connection to the nfs4_pnfs_ds data server. 997 * Currently only supports IPv4 and IPv6 addresses. 998 * If connection fails, make devid unavailable and return a -errno. 999 */ 1000 int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, 1001 struct nfs4_deviceid_node *devid, unsigned int timeo, 1002 unsigned int retrans, u32 version, u32 minor_version) 1003 { 1004 int err; 1005 1006 do { 1007 err = nfs4_wait_ds_connect(ds); 1008 if (err || ds->ds_clp) 1009 goto out; 1010 if (nfs4_test_deviceid_unavailable(devid)) 1011 return -ENODEV; 1012 } while (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) != 0); 1013 1014 if (ds->ds_clp) 1015 goto connect_done; 1016 1017 switch (version) { 1018 case 3: 1019 err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, retrans); 1020 break; 1021 case 4: 1022 err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, retrans, 1023 minor_version); 1024 break; 1025 default: 1026 dprintk("%s: unsupported DS version %d\n", __func__, version); 1027 err = -EPROTONOSUPPORT; 1028 } 1029 1030 connect_done: 1031 nfs4_clear_ds_conn_bit(ds); 1032 out: 1033 /* 1034 * At this point the ds->ds_clp should be ready, but it might have 1035 * hit an error. 1036 */ 1037 if (!err) { 1038 if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) { 1039 WARN_ON_ONCE(ds->ds_clp || 1040 !nfs4_test_deviceid_unavailable(devid)); 1041 return -EINVAL; 1042 } 1043 err = nfs_client_init_status(ds->ds_clp); 1044 } 1045 1046 return err; 1047 } 1048 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect); 1049 1050 /* 1051 * Currently only supports ipv4, ipv6 and one multi-path address. 1052 */ 1053 struct nfs4_pnfs_ds_addr * 1054 nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) 1055 { 1056 struct nfs4_pnfs_ds_addr *da = NULL; 1057 char *buf, *portstr; 1058 __be16 port; 1059 ssize_t nlen, rlen; 1060 int tmp[2]; 1061 char *netid; 1062 size_t len; 1063 char *startsep = ""; 1064 char *endsep = ""; 1065 1066 1067 /* r_netid */ 1068 nlen = xdr_stream_decode_string_dup(xdr, &netid, XDR_MAX_NETOBJ, 1069 gfp_flags); 1070 if (unlikely(nlen < 0)) 1071 goto out_err; 1072 1073 /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ 1074 /* port is ".ABC.DEF", 8 chars max */ 1075 rlen = xdr_stream_decode_string_dup(xdr, &buf, INET6_ADDRSTRLEN + 1076 IPV6_SCOPE_ID_LEN + 8, gfp_flags); 1077 if (unlikely(rlen < 0)) 1078 goto out_free_netid; 1079 1080 /* replace port '.' with '-' */ 1081 portstr = strrchr(buf, '.'); 1082 if (!portstr) { 1083 dprintk("%s: Failed finding expected dot in port\n", 1084 __func__); 1085 goto out_free_buf; 1086 } 1087 *portstr = '-'; 1088 1089 /* find '.' between address and port */ 1090 portstr = strrchr(buf, '.'); 1091 if (!portstr) { 1092 dprintk("%s: Failed finding expected dot between address and " 1093 "port\n", __func__); 1094 goto out_free_buf; 1095 } 1096 *portstr = '\0'; 1097 1098 da = nfs4_pnfs_ds_addr_alloc(gfp_flags); 1099 if (unlikely(!da)) 1100 goto out_free_buf; 1101 1102 if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, 1103 sizeof(da->da_addr))) { 1104 dprintk("%s: error parsing address %s\n", __func__, buf); 1105 goto out_free_da; 1106 } 1107 1108 portstr++; 1109 sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); 1110 port = htons((tmp[0] << 8) | (tmp[1])); 1111 1112 switch (da->da_addr.ss_family) { 1113 case AF_INET: 1114 ((struct sockaddr_in *)&da->da_addr)->sin_port = port; 1115 da->da_addrlen = sizeof(struct sockaddr_in); 1116 break; 1117 1118 case AF_INET6: 1119 ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; 1120 da->da_addrlen = sizeof(struct sockaddr_in6); 1121 startsep = "["; 1122 endsep = "]"; 1123 break; 1124 1125 default: 1126 dprintk("%s: unsupported address family: %u\n", 1127 __func__, da->da_addr.ss_family); 1128 goto out_free_da; 1129 } 1130 1131 da->da_transport = xprt_find_transport_ident(netid); 1132 if (da->da_transport < 0) { 1133 dprintk("%s: ERROR: unknown r_netid \"%s\"\n", 1134 __func__, netid); 1135 goto out_free_da; 1136 } 1137 1138 da->da_netid = netid; 1139 1140 /* save human readable address */ 1141 len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; 1142 da->da_remotestr = kzalloc(len, gfp_flags); 1143 1144 /* NULL is ok, only used for dprintk */ 1145 if (da->da_remotestr) 1146 snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, 1147 buf, endsep, ntohs(port)); 1148 1149 dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); 1150 kfree(buf); 1151 return da; 1152 1153 out_free_da: 1154 kfree(da); 1155 out_free_buf: 1156 dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); 1157 kfree(buf); 1158 out_free_netid: 1159 kfree(netid); 1160 out_err: 1161 return NULL; 1162 } 1163 EXPORT_SYMBOL_GPL(nfs4_decode_mp_ds_addr); 1164 1165 void 1166 pnfs_layout_mark_request_commit(struct nfs_page *req, 1167 struct pnfs_layout_segment *lseg, 1168 struct nfs_commit_info *cinfo, 1169 u32 ds_commit_idx) 1170 { 1171 struct list_head *list; 1172 struct pnfs_commit_array *array; 1173 struct pnfs_commit_bucket *bucket; 1174 1175 mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 1176 array = pnfs_lookup_commit_array(cinfo->ds, lseg); 1177 if (!array || !pnfs_is_valid_lseg(lseg)) 1178 goto out_resched; 1179 bucket = &array->buckets[ds_commit_idx]; 1180 list = &bucket->written; 1181 /* Non-empty buckets hold a reference on the lseg. That ref 1182 * is normally transferred to the COMMIT call and released 1183 * there. It could also be released if the last req is pulled 1184 * off due to a rewrite, in which case it will be done in 1185 * pnfs_common_clear_request_commit 1186 */ 1187 if (!bucket->lseg) 1188 bucket->lseg = pnfs_get_lseg(lseg); 1189 set_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1190 cinfo->ds->nwritten++; 1191 1192 nfs_request_add_commit_list_locked(req, list, cinfo); 1193 mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 1194 nfs_folio_mark_unstable(nfs_page_to_folio(req), cinfo); 1195 return; 1196 out_resched: 1197 mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 1198 cinfo->completion_ops->resched_write(cinfo, req); 1199 } 1200 EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit); 1201 1202 int 1203 pnfs_nfs_generic_sync(struct inode *inode, bool datasync) 1204 { 1205 int ret; 1206 1207 if (!pnfs_layoutcommit_outstanding(inode)) 1208 return 0; 1209 ret = nfs_commit_inode(inode, FLUSH_SYNC); 1210 if (ret < 0) 1211 return ret; 1212 if (datasync) 1213 return 0; 1214 return pnfs_layoutcommit_inode(inode, true); 1215 } 1216 EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync); 1217 1218