1 /*
2  *  Device operations for the pnfs nfs4 file layout driver.
3  *
4  *  Copyright (c) 2002
5  *  The Regents of the University of Michigan
6  *  All Rights Reserved
7  *
8  *  Dean Hildebrand <dhildebz@umich.edu>
9  *  Garth Goodson   <Garth.Goodson@netapp.com>
10  *
11  *  Permission is granted to use, copy, create derivative works, and
12  *  redistribute this software and such derivative works for any purpose,
13  *  so long as the name of the University of Michigan is not used in
14  *  any advertising or publicity pertaining to the use or distribution
15  *  of this software without specific, written prior authorization. If
16  *  the above copyright notice or any other identification of the
17  *  University of Michigan is included in any copy of any portion of
18  *  this software, then the disclaimer below must also be included.
19  *
20  *  This software is provided as is, without representation or warranty
21  *  of any kind either express or implied, including without limitation
22  *  the implied warranties of merchantability, fitness for a particular
23  *  purpose, or noninfringement.  The Regents of the University of
24  *  Michigan shall not be liable for any damages, including special,
25  *  indirect, incidental, or consequential damages, with respect to any
26  *  claim arising out of or in connection with the use of the software,
27  *  even if it has been or is hereafter advised of the possibility of
28  *  such damages.
29  */
30 
31 #include <linux/nfs_fs.h>
32 #include <linux/vmalloc.h>
33 
34 #include "internal.h"
35 #include "nfs4filelayout.h"
36 
37 #define NFSDBG_FACILITY		NFSDBG_PNFS_LD
38 
39 /*
40  * Data server cache
41  *
42  * Data servers can be mapped to different device ids.
43  * nfs4_pnfs_ds reference counting
44  *   - set to 1 on allocation
45  *   - incremented when a device id maps a data server already in the cache.
46  *   - decremented when deviceid is removed from the cache.
47  */
48 DEFINE_SPINLOCK(nfs4_ds_cache_lock);
49 static LIST_HEAD(nfs4_data_server_cache);
50 
51 /* Debug routines */
52 void
print_ds(struct nfs4_pnfs_ds * ds)53 print_ds(struct nfs4_pnfs_ds *ds)
54 {
55 	if (ds == NULL) {
56 		printk("%s NULL device\n", __func__);
57 		return;
58 	}
59 	printk("        ds %s\n"
60 		"        ref count %d\n"
61 		"        client %p\n"
62 		"        cl_exchange_flags %x\n",
63 		ds->ds_remotestr,
64 		atomic_read(&ds->ds_count), ds->ds_clp,
65 		ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
66 }
67 
68 static bool
same_sockaddr(struct sockaddr * addr1,struct sockaddr * addr2)69 same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
70 {
71 	struct sockaddr_in *a, *b;
72 	struct sockaddr_in6 *a6, *b6;
73 
74 	if (addr1->sa_family != addr2->sa_family)
75 		return false;
76 
77 	switch (addr1->sa_family) {
78 	case AF_INET:
79 		a = (struct sockaddr_in *)addr1;
80 		b = (struct sockaddr_in *)addr2;
81 
82 		if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
83 		    a->sin_port == b->sin_port)
84 			return true;
85 		break;
86 
87 	case AF_INET6:
88 		a6 = (struct sockaddr_in6 *)addr1;
89 		b6 = (struct sockaddr_in6 *)addr2;
90 
91 		/* LINKLOCAL addresses must have matching scope_id */
92 		if (ipv6_addr_scope(&a6->sin6_addr) ==
93 		    IPV6_ADDR_SCOPE_LINKLOCAL &&
94 		    a6->sin6_scope_id != b6->sin6_scope_id)
95 			return false;
96 
97 		if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
98 		    a6->sin6_port == b6->sin6_port)
99 			return true;
100 		break;
101 
102 	default:
103 		dprintk("%s: unhandled address family: %u\n",
104 			__func__, addr1->sa_family);
105 		return false;
106 	}
107 
108 	return false;
109 }
110 
111 /*
112  * Lookup DS by addresses.  The first matching address returns true.
113  * nfs4_ds_cache_lock is held
114  */
115 static struct nfs4_pnfs_ds *
_data_server_lookup_locked(struct list_head * dsaddrs)116 _data_server_lookup_locked(struct list_head *dsaddrs)
117 {
118 	struct nfs4_pnfs_ds *ds;
119 	struct nfs4_pnfs_ds_addr *da1, *da2;
120 
121 	list_for_each_entry(da1, dsaddrs, da_node) {
122 		list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
123 			list_for_each_entry(da2, &ds->ds_addrs, da_node) {
124 				if (same_sockaddr(
125 					(struct sockaddr *)&da1->da_addr,
126 					(struct sockaddr *)&da2->da_addr))
127 					return ds;
128 			}
129 		}
130 	}
131 	return NULL;
132 }
133 
134 /*
135  * Compare two lists of addresses.
136  */
137 static bool
_data_server_match_all_addrs_locked(struct list_head * dsaddrs1,struct list_head * dsaddrs2)138 _data_server_match_all_addrs_locked(struct list_head *dsaddrs1,
139 				    struct list_head *dsaddrs2)
140 {
141 	struct nfs4_pnfs_ds_addr *da1, *da2;
142 	size_t count1 = 0,
143 	       count2 = 0;
144 
145 	list_for_each_entry(da1, dsaddrs1, da_node)
146 		count1++;
147 
148 	list_for_each_entry(da2, dsaddrs2, da_node) {
149 		bool found = false;
150 		count2++;
151 		list_for_each_entry(da1, dsaddrs1, da_node) {
152 			if (same_sockaddr((struct sockaddr *)&da1->da_addr,
153 				(struct sockaddr *)&da2->da_addr)) {
154 				found = true;
155 				break;
156 			}
157 		}
158 		if (!found)
159 			return false;
160 	}
161 
162 	return (count1 == count2);
163 }
164 
165 /*
166  * Create an rpc connection to the nfs4_pnfs_ds data server
167  * Currently only supports IPv4 and IPv6 addresses
168  */
169 static int
nfs4_ds_connect(struct nfs_server * mds_srv,struct nfs4_pnfs_ds * ds)170 nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
171 {
172 	struct nfs_client *clp = ERR_PTR(-EIO);
173 	struct nfs4_pnfs_ds_addr *da;
174 	int status = 0;
175 
176 	dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
177 		mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
178 
179 	BUG_ON(list_empty(&ds->ds_addrs));
180 
181 	list_for_each_entry(da, &ds->ds_addrs, da_node) {
182 		dprintk("%s: DS %s: trying address %s\n",
183 			__func__, ds->ds_remotestr, da->da_remotestr);
184 
185 		clp = nfs4_set_ds_client(mds_srv->nfs_client,
186 				 (struct sockaddr *)&da->da_addr,
187 				 da->da_addrlen, IPPROTO_TCP);
188 		if (!IS_ERR(clp))
189 			break;
190 	}
191 
192 	if (IS_ERR(clp)) {
193 		status = PTR_ERR(clp);
194 		goto out;
195 	}
196 
197 	if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) {
198 		if (!is_ds_client(clp)) {
199 			status = -ENODEV;
200 			goto out_put;
201 		}
202 		ds->ds_clp = clp;
203 		dprintk("%s [existing] server=%s\n", __func__,
204 			ds->ds_remotestr);
205 		goto out;
206 	}
207 
208 	/*
209 	 * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
210 	 * be equal to the MDS lease. Renewal is scheduled in create_session.
211 	 */
212 	spin_lock(&mds_srv->nfs_client->cl_lock);
213 	clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
214 	spin_unlock(&mds_srv->nfs_client->cl_lock);
215 	clp->cl_last_renewal = jiffies;
216 
217 	/* New nfs_client */
218 	status = nfs4_init_ds_session(clp);
219 	if (status)
220 		goto out_put;
221 
222 	ds->ds_clp = clp;
223 	dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
224 out:
225 	return status;
226 out_put:
227 	nfs_put_client(clp);
228 	goto out;
229 }
230 
231 static void
destroy_ds(struct nfs4_pnfs_ds * ds)232 destroy_ds(struct nfs4_pnfs_ds *ds)
233 {
234 	struct nfs4_pnfs_ds_addr *da;
235 
236 	dprintk("--> %s\n", __func__);
237 	ifdebug(FACILITY)
238 		print_ds(ds);
239 
240 	if (ds->ds_clp)
241 		nfs_put_client(ds->ds_clp);
242 
243 	while (!list_empty(&ds->ds_addrs)) {
244 		da = list_first_entry(&ds->ds_addrs,
245 				      struct nfs4_pnfs_ds_addr,
246 				      da_node);
247 		list_del_init(&da->da_node);
248 		kfree(da->da_remotestr);
249 		kfree(da);
250 	}
251 
252 	kfree(ds->ds_remotestr);
253 	kfree(ds);
254 }
255 
256 void
nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr * dsaddr)257 nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
258 {
259 	struct nfs4_pnfs_ds *ds;
260 	int i;
261 
262 	nfs4_print_deviceid(&dsaddr->id_node.deviceid);
263 
264 	for (i = 0; i < dsaddr->ds_num; i++) {
265 		ds = dsaddr->ds_list[i];
266 		if (ds != NULL) {
267 			if (atomic_dec_and_lock(&ds->ds_count,
268 						&nfs4_ds_cache_lock)) {
269 				list_del_init(&ds->ds_node);
270 				spin_unlock(&nfs4_ds_cache_lock);
271 				destroy_ds(ds);
272 			}
273 		}
274 	}
275 	kfree(dsaddr->stripe_indices);
276 	kfree(dsaddr);
277 }
278 
279 /*
280  * Create a string with a human readable address and port to avoid
281  * complicated setup around many dprinks.
282  */
283 static char *
nfs4_pnfs_remotestr(struct list_head * dsaddrs,gfp_t gfp_flags)284 nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
285 {
286 	struct nfs4_pnfs_ds_addr *da;
287 	char *remotestr;
288 	size_t len;
289 	char *p;
290 
291 	len = 3;        /* '{', '}' and eol */
292 	list_for_each_entry(da, dsaddrs, da_node) {
293 		len += strlen(da->da_remotestr) + 1;    /* string plus comma */
294 	}
295 
296 	remotestr = kzalloc(len, gfp_flags);
297 	if (!remotestr)
298 		return NULL;
299 
300 	p = remotestr;
301 	*(p++) = '{';
302 	len--;
303 	list_for_each_entry(da, dsaddrs, da_node) {
304 		size_t ll = strlen(da->da_remotestr);
305 
306 		if (ll > len)
307 			goto out_err;
308 
309 		memcpy(p, da->da_remotestr, ll);
310 		p += ll;
311 		len -= ll;
312 
313 		if (len < 1)
314 			goto out_err;
315 		(*p++) = ',';
316 		len--;
317 	}
318 	if (len < 2)
319 		goto out_err;
320 	*(p++) = '}';
321 	*p = '\0';
322 	return remotestr;
323 out_err:
324 	kfree(remotestr);
325 	return NULL;
326 }
327 
328 static struct nfs4_pnfs_ds *
nfs4_pnfs_ds_add(struct list_head * dsaddrs,gfp_t gfp_flags)329 nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
330 {
331 	struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
332 	char *remotestr;
333 
334 	if (list_empty(dsaddrs)) {
335 		dprintk("%s: no addresses defined\n", __func__);
336 		goto out;
337 	}
338 
339 	ds = kzalloc(sizeof(*ds), gfp_flags);
340 	if (!ds)
341 		goto out;
342 
343 	/* this is only used for debugging, so it's ok if its NULL */
344 	remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
345 
346 	spin_lock(&nfs4_ds_cache_lock);
347 	tmp_ds = _data_server_lookup_locked(dsaddrs);
348 	if (tmp_ds == NULL) {
349 		INIT_LIST_HEAD(&ds->ds_addrs);
350 		list_splice_init(dsaddrs, &ds->ds_addrs);
351 		ds->ds_remotestr = remotestr;
352 		atomic_set(&ds->ds_count, 1);
353 		INIT_LIST_HEAD(&ds->ds_node);
354 		ds->ds_clp = NULL;
355 		list_add(&ds->ds_node, &nfs4_data_server_cache);
356 		dprintk("%s add new data server %s\n", __func__,
357 			ds->ds_remotestr);
358 	} else {
359 		if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs,
360 							 dsaddrs)) {
361 			dprintk("%s:  multipath address mismatch: %s != %s",
362 				__func__, tmp_ds->ds_remotestr, remotestr);
363 		}
364 		kfree(remotestr);
365 		kfree(ds);
366 		atomic_inc(&tmp_ds->ds_count);
367 		dprintk("%s data server %s found, inc'ed ds_count to %d\n",
368 			__func__, tmp_ds->ds_remotestr,
369 			atomic_read(&tmp_ds->ds_count));
370 		ds = tmp_ds;
371 	}
372 	spin_unlock(&nfs4_ds_cache_lock);
373 out:
374 	return ds;
375 }
376 
377 /*
378  * Currently only supports ipv4, ipv6 and one multi-path address.
379  */
380 static struct nfs4_pnfs_ds_addr *
decode_ds_addr(struct xdr_stream * streamp,gfp_t gfp_flags)381 decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
382 {
383 	struct nfs4_pnfs_ds_addr *da = NULL;
384 	char *buf, *portstr;
385 	__be16 port;
386 	int nlen, rlen;
387 	int tmp[2];
388 	__be32 *p;
389 	char *netid, *match_netid;
390 	size_t len, match_netid_len;
391 	char *startsep = "";
392 	char *endsep = "";
393 
394 
395 	/* r_netid */
396 	p = xdr_inline_decode(streamp, 4);
397 	if (unlikely(!p))
398 		goto out_err;
399 	nlen = be32_to_cpup(p++);
400 
401 	p = xdr_inline_decode(streamp, nlen);
402 	if (unlikely(!p))
403 		goto out_err;
404 
405 	netid = kmalloc(nlen+1, gfp_flags);
406 	if (unlikely(!netid))
407 		goto out_err;
408 
409 	netid[nlen] = '\0';
410 	memcpy(netid, p, nlen);
411 
412 	/* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
413 	p = xdr_inline_decode(streamp, 4);
414 	if (unlikely(!p))
415 		goto out_free_netid;
416 	rlen = be32_to_cpup(p);
417 
418 	p = xdr_inline_decode(streamp, rlen);
419 	if (unlikely(!p))
420 		goto out_free_netid;
421 
422 	/* port is ".ABC.DEF", 8 chars max */
423 	if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
424 		dprintk("%s: Invalid address, length %d\n", __func__,
425 			rlen);
426 		goto out_free_netid;
427 	}
428 	buf = kmalloc(rlen + 1, gfp_flags);
429 	if (!buf) {
430 		dprintk("%s: Not enough memory\n", __func__);
431 		goto out_free_netid;
432 	}
433 	buf[rlen] = '\0';
434 	memcpy(buf, p, rlen);
435 
436 	/* replace port '.' with '-' */
437 	portstr = strrchr(buf, '.');
438 	if (!portstr) {
439 		dprintk("%s: Failed finding expected dot in port\n",
440 			__func__);
441 		goto out_free_buf;
442 	}
443 	*portstr = '-';
444 
445 	/* find '.' between address and port */
446 	portstr = strrchr(buf, '.');
447 	if (!portstr) {
448 		dprintk("%s: Failed finding expected dot between address and "
449 			"port\n", __func__);
450 		goto out_free_buf;
451 	}
452 	*portstr = '\0';
453 
454 	da = kzalloc(sizeof(*da), gfp_flags);
455 	if (unlikely(!da))
456 		goto out_free_buf;
457 
458 	INIT_LIST_HEAD(&da->da_node);
459 
460 	if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr,
461 		      sizeof(da->da_addr))) {
462 		dprintk("%s: error parsing address %s\n", __func__, buf);
463 		goto out_free_da;
464 	}
465 
466 	portstr++;
467 	sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
468 	port = htons((tmp[0] << 8) | (tmp[1]));
469 
470 	switch (da->da_addr.ss_family) {
471 	case AF_INET:
472 		((struct sockaddr_in *)&da->da_addr)->sin_port = port;
473 		da->da_addrlen = sizeof(struct sockaddr_in);
474 		match_netid = "tcp";
475 		match_netid_len = 3;
476 		break;
477 
478 	case AF_INET6:
479 		((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
480 		da->da_addrlen = sizeof(struct sockaddr_in6);
481 		match_netid = "tcp6";
482 		match_netid_len = 4;
483 		startsep = "[";
484 		endsep = "]";
485 		break;
486 
487 	default:
488 		dprintk("%s: unsupported address family: %u\n",
489 			__func__, da->da_addr.ss_family);
490 		goto out_free_da;
491 	}
492 
493 	if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
494 		dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
495 			__func__, netid, match_netid);
496 		goto out_free_da;
497 	}
498 
499 	/* save human readable address */
500 	len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
501 	da->da_remotestr = kzalloc(len, gfp_flags);
502 
503 	/* NULL is ok, only used for dprintk */
504 	if (da->da_remotestr)
505 		snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
506 			 buf, endsep, ntohs(port));
507 
508 	dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
509 	kfree(buf);
510 	kfree(netid);
511 	return da;
512 
513 out_free_da:
514 	kfree(da);
515 out_free_buf:
516 	dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
517 	kfree(buf);
518 out_free_netid:
519 	kfree(netid);
520 out_err:
521 	return NULL;
522 }
523 
524 /* Decode opaque device data and return the result */
525 static struct nfs4_file_layout_dsaddr*
decode_device(struct inode * ino,struct pnfs_device * pdev,gfp_t gfp_flags)526 decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
527 {
528 	int i;
529 	u32 cnt, num;
530 	u8 *indexp;
531 	__be32 *p;
532 	u8 *stripe_indices;
533 	u8 max_stripe_index;
534 	struct nfs4_file_layout_dsaddr *dsaddr = NULL;
535 	struct xdr_stream stream;
536 	struct xdr_buf buf;
537 	struct page *scratch;
538 	struct list_head dsaddrs;
539 	struct nfs4_pnfs_ds_addr *da;
540 
541 	/* set up xdr stream */
542 	scratch = alloc_page(gfp_flags);
543 	if (!scratch)
544 		goto out_err;
545 
546 	xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
547 	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
548 
549 	/* Get the stripe count (number of stripe index) */
550 	p = xdr_inline_decode(&stream, 4);
551 	if (unlikely(!p))
552 		goto out_err_free_scratch;
553 
554 	cnt = be32_to_cpup(p);
555 	dprintk("%s stripe count  %d\n", __func__, cnt);
556 	if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
557 		printk(KERN_WARNING "%s: stripe count %d greater than "
558 		       "supported maximum %d\n", __func__,
559 			cnt, NFS4_PNFS_MAX_STRIPE_CNT);
560 		goto out_err_free_scratch;
561 	}
562 
563 	/* read stripe indices */
564 	stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags);
565 	if (!stripe_indices)
566 		goto out_err_free_scratch;
567 
568 	p = xdr_inline_decode(&stream, cnt << 2);
569 	if (unlikely(!p))
570 		goto out_err_free_stripe_indices;
571 
572 	indexp = &stripe_indices[0];
573 	max_stripe_index = 0;
574 	for (i = 0; i < cnt; i++) {
575 		*indexp = be32_to_cpup(p++);
576 		max_stripe_index = max(max_stripe_index, *indexp);
577 		indexp++;
578 	}
579 
580 	/* Check the multipath list count */
581 	p = xdr_inline_decode(&stream, 4);
582 	if (unlikely(!p))
583 		goto out_err_free_stripe_indices;
584 
585 	num = be32_to_cpup(p);
586 	dprintk("%s ds_num %u\n", __func__, num);
587 	if (num > NFS4_PNFS_MAX_MULTI_CNT) {
588 		printk(KERN_WARNING "%s: multipath count %d greater than "
589 			"supported maximum %d\n", __func__,
590 			num, NFS4_PNFS_MAX_MULTI_CNT);
591 		goto out_err_free_stripe_indices;
592 	}
593 
594 	/* validate stripe indices are all < num */
595 	if (max_stripe_index >= num) {
596 		printk(KERN_WARNING "%s: stripe index %u >= num ds %u\n",
597 			__func__, max_stripe_index, num);
598 		goto out_err_free_stripe_indices;
599 	}
600 
601 	dsaddr = kzalloc(sizeof(*dsaddr) +
602 			(sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
603 			gfp_flags);
604 	if (!dsaddr)
605 		goto out_err_free_stripe_indices;
606 
607 	dsaddr->stripe_count = cnt;
608 	dsaddr->stripe_indices = stripe_indices;
609 	stripe_indices = NULL;
610 	dsaddr->ds_num = num;
611 	nfs4_init_deviceid_node(&dsaddr->id_node,
612 				NFS_SERVER(ino)->pnfs_curr_ld,
613 				NFS_SERVER(ino)->nfs_client,
614 				&pdev->dev_id);
615 
616 	INIT_LIST_HEAD(&dsaddrs);
617 
618 	for (i = 0; i < dsaddr->ds_num; i++) {
619 		int j;
620 		u32 mp_count;
621 
622 		p = xdr_inline_decode(&stream, 4);
623 		if (unlikely(!p))
624 			goto out_err_free_deviceid;
625 
626 		mp_count = be32_to_cpup(p); /* multipath count */
627 		for (j = 0; j < mp_count; j++) {
628 			da = decode_ds_addr(&stream, gfp_flags);
629 			if (da)
630 				list_add_tail(&da->da_node, &dsaddrs);
631 		}
632 		if (list_empty(&dsaddrs)) {
633 			dprintk("%s: no suitable DS addresses found\n",
634 				__func__);
635 			goto out_err_free_deviceid;
636 		}
637 
638 		dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
639 		if (!dsaddr->ds_list[i])
640 			goto out_err_drain_dsaddrs;
641 
642 		/* If DS was already in cache, free ds addrs */
643 		while (!list_empty(&dsaddrs)) {
644 			da = list_first_entry(&dsaddrs,
645 					      struct nfs4_pnfs_ds_addr,
646 					      da_node);
647 			list_del_init(&da->da_node);
648 			kfree(da->da_remotestr);
649 			kfree(da);
650 		}
651 	}
652 
653 	__free_page(scratch);
654 	return dsaddr;
655 
656 out_err_drain_dsaddrs:
657 	while (!list_empty(&dsaddrs)) {
658 		da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
659 				      da_node);
660 		list_del_init(&da->da_node);
661 		kfree(da->da_remotestr);
662 		kfree(da);
663 	}
664 out_err_free_deviceid:
665 	nfs4_fl_free_deviceid(dsaddr);
666 	/* stripe_indicies was part of dsaddr */
667 	goto out_err_free_scratch;
668 out_err_free_stripe_indices:
669 	kfree(stripe_indices);
670 out_err_free_scratch:
671 	__free_page(scratch);
672 out_err:
673 	dprintk("%s ERROR: returning NULL\n", __func__);
674 	return NULL;
675 }
676 
677 /*
678  * Decode the opaque device specified in 'dev' and add it to the cache of
679  * available devices.
680  */
681 static struct nfs4_file_layout_dsaddr *
decode_and_add_device(struct inode * inode,struct pnfs_device * dev,gfp_t gfp_flags)682 decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags)
683 {
684 	struct nfs4_deviceid_node *d;
685 	struct nfs4_file_layout_dsaddr *n, *new;
686 
687 	new = decode_device(inode, dev, gfp_flags);
688 	if (!new) {
689 		printk(KERN_WARNING "%s: Could not decode or add device\n",
690 			__func__);
691 		return NULL;
692 	}
693 
694 	d = nfs4_insert_deviceid_node(&new->id_node);
695 	n = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
696 	if (n != new) {
697 		nfs4_fl_free_deviceid(new);
698 		return n;
699 	}
700 
701 	return new;
702 }
703 
704 /*
705  * Retrieve the information for dev_id, add it to the list
706  * of available devices, and return it.
707  */
708 struct nfs4_file_layout_dsaddr *
get_device_info(struct inode * inode,struct nfs4_deviceid * dev_id,gfp_t gfp_flags)709 get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags)
710 {
711 	struct pnfs_device *pdev = NULL;
712 	u32 max_resp_sz;
713 	int max_pages;
714 	struct page **pages = NULL;
715 	struct nfs4_file_layout_dsaddr *dsaddr = NULL;
716 	int rc, i;
717 	struct nfs_server *server = NFS_SERVER(inode);
718 
719 	/*
720 	 * Use the session max response size as the basis for setting
721 	 * GETDEVICEINFO's maxcount
722 	 */
723 	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
724 	max_pages = max_resp_sz >> PAGE_SHIFT;
725 	dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
726 		__func__, inode, max_resp_sz, max_pages);
727 
728 	pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags);
729 	if (pdev == NULL)
730 		return NULL;
731 
732 	pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
733 	if (pages == NULL) {
734 		kfree(pdev);
735 		return NULL;
736 	}
737 	for (i = 0; i < max_pages; i++) {
738 		pages[i] = alloc_page(gfp_flags);
739 		if (!pages[i])
740 			goto out_free;
741 	}
742 
743 	memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
744 	pdev->layout_type = LAYOUT_NFSV4_1_FILES;
745 	pdev->pages = pages;
746 	pdev->pgbase = 0;
747 	pdev->pglen = PAGE_SIZE * max_pages;
748 	pdev->mincount = 0;
749 
750 	rc = nfs4_proc_getdeviceinfo(server, pdev);
751 	dprintk("%s getdevice info returns %d\n", __func__, rc);
752 	if (rc)
753 		goto out_free;
754 
755 	/*
756 	 * Found new device, need to decode it and then add it to the
757 	 * list of known devices for this mountpoint.
758 	 */
759 	dsaddr = decode_and_add_device(inode, pdev, gfp_flags);
760 out_free:
761 	for (i = 0; i < max_pages; i++)
762 		__free_page(pages[i]);
763 	kfree(pages);
764 	kfree(pdev);
765 	dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
766 	return dsaddr;
767 }
768 
769 void
nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr * dsaddr)770 nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
771 {
772 	nfs4_put_deviceid_node(&dsaddr->id_node);
773 }
774 
775 /*
776  * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
777  * Then: ((res + fsi) % dsaddr->stripe_count)
778  */
779 u32
nfs4_fl_calc_j_index(struct pnfs_layout_segment * lseg,loff_t offset)780 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
781 {
782 	struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
783 	u64 tmp;
784 
785 	tmp = offset - flseg->pattern_offset;
786 	do_div(tmp, flseg->stripe_unit);
787 	tmp += flseg->first_stripe_index;
788 	return do_div(tmp, flseg->dsaddr->stripe_count);
789 }
790 
791 u32
nfs4_fl_calc_ds_index(struct pnfs_layout_segment * lseg,u32 j)792 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j)
793 {
794 	return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j];
795 }
796 
797 struct nfs_fh *
nfs4_fl_select_ds_fh(struct pnfs_layout_segment * lseg,u32 j)798 nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
799 {
800 	struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
801 	u32 i;
802 
803 	if (flseg->stripe_type == STRIPE_SPARSE) {
804 		if (flseg->num_fh == 1)
805 			i = 0;
806 		else if (flseg->num_fh == 0)
807 			/* Use the MDS OPEN fh set in nfs_read_rpcsetup */
808 			return NULL;
809 		else
810 			i = nfs4_fl_calc_ds_index(lseg, j);
811 	} else
812 		i = j;
813 	return flseg->fh_array[i];
814 }
815 
816 static void
filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr * dsaddr,int err,const char * ds_remotestr)817 filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
818 			       int err, const char *ds_remotestr)
819 {
820 	u32 *p = (u32 *)&dsaddr->id_node.deviceid;
821 
822 	printk(KERN_ERR "NFS: data server %s connection error %d."
823 		" Deviceid [%x%x%x%x] marked out of use.\n",
824 		ds_remotestr, err, p[0], p[1], p[2], p[3]);
825 
826 	spin_lock(&nfs4_ds_cache_lock);
827 	dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
828 	spin_unlock(&nfs4_ds_cache_lock);
829 }
830 
831 struct nfs4_pnfs_ds *
nfs4_fl_prepare_ds(struct pnfs_layout_segment * lseg,u32 ds_idx)832 nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
833 {
834 	struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
835 	struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
836 
837 	if (ds == NULL) {
838 		printk(KERN_ERR "%s: No data server for offset index %d\n",
839 			__func__, ds_idx);
840 		return NULL;
841 	}
842 
843 	if (!ds->ds_clp) {
844 		struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
845 		int err;
846 
847 		if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) {
848 			/* Already tried to connect, don't try again */
849 			dprintk("%s Deviceid marked out of use\n", __func__);
850 			return NULL;
851 		}
852 		err = nfs4_ds_connect(s, ds);
853 		if (err) {
854 			filelayout_mark_devid_negative(dsaddr, err,
855 						       ds->ds_remotestr);
856 			return NULL;
857 		}
858 	}
859 	return ds;
860 }
861