1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_mount.h"
28 #include "xfs_da_btree.h"
29 #include "xfs_bmap_btree.h"
30 #include "xfs_dir2.h"
31 #include "xfs_dir2_format.h"
32 #include "xfs_dir2_priv.h"
33 #include "xfs_dinode.h"
34 #include "xfs_inode.h"
35 #include "xfs_inode_item.h"
36 #include "xfs_alloc.h"
37 #include "xfs_bmap.h"
38 #include "xfs_attr.h"
39 #include "xfs_attr_leaf.h"
40 #include "xfs_error.h"
41 #include "xfs_trace.h"
42 
43 /*
44  * xfs_da_btree.c
45  *
46  * Routines to implement directories as Btrees of hashed names.
47  */
48 
49 /*========================================================================
50  * Function prototypes for the kernel.
51  *========================================================================*/
52 
53 /*
54  * Routines used for growing the Btree.
55  */
56 STATIC int xfs_da_root_split(xfs_da_state_t *state,
57 					    xfs_da_state_blk_t *existing_root,
58 					    xfs_da_state_blk_t *new_child);
59 STATIC int xfs_da_node_split(xfs_da_state_t *state,
60 					    xfs_da_state_blk_t *existing_blk,
61 					    xfs_da_state_blk_t *split_blk,
62 					    xfs_da_state_blk_t *blk_to_add,
63 					    int treelevel,
64 					    int *result);
65 STATIC void xfs_da_node_rebalance(xfs_da_state_t *state,
66 					 xfs_da_state_blk_t *node_blk_1,
67 					 xfs_da_state_blk_t *node_blk_2);
68 STATIC void xfs_da_node_add(xfs_da_state_t *state,
69 				   xfs_da_state_blk_t *old_node_blk,
70 				   xfs_da_state_blk_t *new_node_blk);
71 
72 /*
73  * Routines used for shrinking the Btree.
74  */
75 STATIC int xfs_da_root_join(xfs_da_state_t *state,
76 					   xfs_da_state_blk_t *root_blk);
77 STATIC int xfs_da_node_toosmall(xfs_da_state_t *state, int *retval);
78 STATIC void xfs_da_node_remove(xfs_da_state_t *state,
79 					      xfs_da_state_blk_t *drop_blk);
80 STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
81 					 xfs_da_state_blk_t *src_node_blk,
82 					 xfs_da_state_blk_t *dst_node_blk);
83 
84 /*
85  * Utility routines.
86  */
87 STATIC uint	xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
88 STATIC int	xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
89 STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps);
90 STATIC int	xfs_da_blk_unlink(xfs_da_state_t *state,
91 				  xfs_da_state_blk_t *drop_blk,
92 				  xfs_da_state_blk_t *save_blk);
93 STATIC void	xfs_da_state_kill_altpath(xfs_da_state_t *state);
94 
95 /*========================================================================
96  * Routines used for growing the Btree.
97  *========================================================================*/
98 
99 /*
100  * Create the initial contents of an intermediate node.
101  */
102 int
xfs_da_node_create(xfs_da_args_t * args,xfs_dablk_t blkno,int level,xfs_dabuf_t ** bpp,int whichfork)103 xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
104 				 xfs_dabuf_t **bpp, int whichfork)
105 {
106 	xfs_da_intnode_t *node;
107 	xfs_dabuf_t *bp;
108 	int error;
109 	xfs_trans_t *tp;
110 
111 	tp = args->trans;
112 	error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
113 	if (error)
114 		return(error);
115 	ASSERT(bp != NULL);
116 	node = bp->data;
117 	node->hdr.info.forw = 0;
118 	node->hdr.info.back = 0;
119 	node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC);
120 	node->hdr.info.pad = 0;
121 	node->hdr.count = 0;
122 	node->hdr.level = cpu_to_be16(level);
123 
124 	xfs_da_log_buf(tp, bp,
125 		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
126 
127 	*bpp = bp;
128 	return(0);
129 }
130 
131 /*
132  * Split a leaf node, rebalance, then possibly split
133  * intermediate nodes, rebalance, etc.
134  */
135 int							/* error */
xfs_da_split(xfs_da_state_t * state)136 xfs_da_split(xfs_da_state_t *state)
137 {
138 	xfs_da_state_blk_t *oldblk, *newblk, *addblk;
139 	xfs_da_intnode_t *node;
140 	xfs_dabuf_t *bp;
141 	int max, action, error, i;
142 
143 	/*
144 	 * Walk back up the tree splitting/inserting/adjusting as necessary.
145 	 * If we need to insert and there isn't room, split the node, then
146 	 * decide which fragment to insert the new block from below into.
147 	 * Note that we may split the root this way, but we need more fixup.
148 	 */
149 	max = state->path.active - 1;
150 	ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
151 	ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
152 	       state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
153 
154 	addblk = &state->path.blk[max];		/* initial dummy value */
155 	for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
156 		oldblk = &state->path.blk[i];
157 		newblk = &state->altpath.blk[i];
158 
159 		/*
160 		 * If a leaf node then
161 		 *     Allocate a new leaf node, then rebalance across them.
162 		 * else if an intermediate node then
163 		 *     We split on the last layer, must we split the node?
164 		 */
165 		switch (oldblk->magic) {
166 		case XFS_ATTR_LEAF_MAGIC:
167 			error = xfs_attr_leaf_split(state, oldblk, newblk);
168 			if ((error != 0) && (error != ENOSPC)) {
169 				return(error);	/* GROT: attr is inconsistent */
170 			}
171 			if (!error) {
172 				addblk = newblk;
173 				break;
174 			}
175 			/*
176 			 * Entry wouldn't fit, split the leaf again.
177 			 */
178 			state->extravalid = 1;
179 			if (state->inleaf) {
180 				state->extraafter = 0;	/* before newblk */
181 				error = xfs_attr_leaf_split(state, oldblk,
182 							    &state->extrablk);
183 			} else {
184 				state->extraafter = 1;	/* after newblk */
185 				error = xfs_attr_leaf_split(state, newblk,
186 							    &state->extrablk);
187 			}
188 			if (error)
189 				return(error);	/* GROT: attr inconsistent */
190 			addblk = newblk;
191 			break;
192 		case XFS_DIR2_LEAFN_MAGIC:
193 			error = xfs_dir2_leafn_split(state, oldblk, newblk);
194 			if (error)
195 				return error;
196 			addblk = newblk;
197 			break;
198 		case XFS_DA_NODE_MAGIC:
199 			error = xfs_da_node_split(state, oldblk, newblk, addblk,
200 							 max - i, &action);
201 			xfs_da_buf_done(addblk->bp);
202 			addblk->bp = NULL;
203 			if (error)
204 				return(error);	/* GROT: dir is inconsistent */
205 			/*
206 			 * Record the newly split block for the next time thru?
207 			 */
208 			if (action)
209 				addblk = newblk;
210 			else
211 				addblk = NULL;
212 			break;
213 		}
214 
215 		/*
216 		 * Update the btree to show the new hashval for this child.
217 		 */
218 		xfs_da_fixhashpath(state, &state->path);
219 		/*
220 		 * If we won't need this block again, it's getting dropped
221 		 * from the active path by the loop control, so we need
222 		 * to mark it done now.
223 		 */
224 		if (i > 0 || !addblk)
225 			xfs_da_buf_done(oldblk->bp);
226 	}
227 	if (!addblk)
228 		return(0);
229 
230 	/*
231 	 * Split the root node.
232 	 */
233 	ASSERT(state->path.active == 0);
234 	oldblk = &state->path.blk[0];
235 	error = xfs_da_root_split(state, oldblk, addblk);
236 	if (error) {
237 		xfs_da_buf_done(oldblk->bp);
238 		xfs_da_buf_done(addblk->bp);
239 		addblk->bp = NULL;
240 		return(error);	/* GROT: dir is inconsistent */
241 	}
242 
243 	/*
244 	 * Update pointers to the node which used to be block 0 and
245 	 * just got bumped because of the addition of a new root node.
246 	 * There might be three blocks involved if a double split occurred,
247 	 * and the original block 0 could be at any position in the list.
248 	 */
249 
250 	node = oldblk->bp->data;
251 	if (node->hdr.info.forw) {
252 		if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) {
253 			bp = addblk->bp;
254 		} else {
255 			ASSERT(state->extravalid);
256 			bp = state->extrablk.bp;
257 		}
258 		node = bp->data;
259 		node->hdr.info.back = cpu_to_be32(oldblk->blkno);
260 		xfs_da_log_buf(state->args->trans, bp,
261 		    XFS_DA_LOGRANGE(node, &node->hdr.info,
262 		    sizeof(node->hdr.info)));
263 	}
264 	node = oldblk->bp->data;
265 	if (node->hdr.info.back) {
266 		if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) {
267 			bp = addblk->bp;
268 		} else {
269 			ASSERT(state->extravalid);
270 			bp = state->extrablk.bp;
271 		}
272 		node = bp->data;
273 		node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
274 		xfs_da_log_buf(state->args->trans, bp,
275 		    XFS_DA_LOGRANGE(node, &node->hdr.info,
276 		    sizeof(node->hdr.info)));
277 	}
278 	xfs_da_buf_done(oldblk->bp);
279 	xfs_da_buf_done(addblk->bp);
280 	addblk->bp = NULL;
281 	return(0);
282 }
283 
284 /*
285  * Split the root.  We have to create a new root and point to the two
286  * parts (the split old root) that we just created.  Copy block zero to
287  * the EOF, extending the inode in process.
288  */
289 STATIC int						/* error */
xfs_da_root_split(xfs_da_state_t * state,xfs_da_state_blk_t * blk1,xfs_da_state_blk_t * blk2)290 xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
291 				 xfs_da_state_blk_t *blk2)
292 {
293 	xfs_da_intnode_t *node, *oldroot;
294 	xfs_da_args_t *args;
295 	xfs_dablk_t blkno;
296 	xfs_dabuf_t *bp;
297 	int error, size;
298 	xfs_inode_t *dp;
299 	xfs_trans_t *tp;
300 	xfs_mount_t *mp;
301 	xfs_dir2_leaf_t *leaf;
302 
303 	/*
304 	 * Copy the existing (incorrect) block from the root node position
305 	 * to a free space somewhere.
306 	 */
307 	args = state->args;
308 	ASSERT(args != NULL);
309 	error = xfs_da_grow_inode(args, &blkno);
310 	if (error)
311 		return(error);
312 	dp = args->dp;
313 	tp = args->trans;
314 	mp = state->mp;
315 	error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
316 	if (error)
317 		return(error);
318 	ASSERT(bp != NULL);
319 	node = bp->data;
320 	oldroot = blk1->bp->data;
321 	if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
322 		size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
323 			     (char *)oldroot);
324 	} else {
325 		ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
326 		leaf = (xfs_dir2_leaf_t *)oldroot;
327 		size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
328 			     (char *)leaf);
329 	}
330 	memcpy(node, oldroot, size);
331 	xfs_da_log_buf(tp, bp, 0, size - 1);
332 	xfs_da_buf_done(blk1->bp);
333 	blk1->bp = bp;
334 	blk1->blkno = blkno;
335 
336 	/*
337 	 * Set up the new root node.
338 	 */
339 	error = xfs_da_node_create(args,
340 		(args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0,
341 		be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork);
342 	if (error)
343 		return(error);
344 	node = bp->data;
345 	node->btree[0].hashval = cpu_to_be32(blk1->hashval);
346 	node->btree[0].before = cpu_to_be32(blk1->blkno);
347 	node->btree[1].hashval = cpu_to_be32(blk2->hashval);
348 	node->btree[1].before = cpu_to_be32(blk2->blkno);
349 	node->hdr.count = cpu_to_be16(2);
350 
351 #ifdef DEBUG
352 	if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
353 		ASSERT(blk1->blkno >= mp->m_dirleafblk &&
354 		       blk1->blkno < mp->m_dirfreeblk);
355 		ASSERT(blk2->blkno >= mp->m_dirleafblk &&
356 		       blk2->blkno < mp->m_dirfreeblk);
357 	}
358 #endif
359 
360 	/* Header is already logged by xfs_da_node_create */
361 	xfs_da_log_buf(tp, bp,
362 		XFS_DA_LOGRANGE(node, node->btree,
363 			sizeof(xfs_da_node_entry_t) * 2));
364 	xfs_da_buf_done(bp);
365 
366 	return(0);
367 }
368 
369 /*
370  * Split the node, rebalance, then add the new entry.
371  */
372 STATIC int						/* error */
xfs_da_node_split(xfs_da_state_t * state,xfs_da_state_blk_t * oldblk,xfs_da_state_blk_t * newblk,xfs_da_state_blk_t * addblk,int treelevel,int * result)373 xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
374 				 xfs_da_state_blk_t *newblk,
375 				 xfs_da_state_blk_t *addblk,
376 				 int treelevel, int *result)
377 {
378 	xfs_da_intnode_t *node;
379 	xfs_dablk_t blkno;
380 	int newcount, error;
381 	int useextra;
382 
383 	node = oldblk->bp->data;
384 	ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
385 
386 	/*
387 	 * With V2 dirs the extra block is data or freespace.
388 	 */
389 	useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK;
390 	newcount = 1 + useextra;
391 	/*
392 	 * Do we have to split the node?
393 	 */
394 	if ((be16_to_cpu(node->hdr.count) + newcount) > state->node_ents) {
395 		/*
396 		 * Allocate a new node, add to the doubly linked chain of
397 		 * nodes, then move some of our excess entries into it.
398 		 */
399 		error = xfs_da_grow_inode(state->args, &blkno);
400 		if (error)
401 			return(error);	/* GROT: dir is inconsistent */
402 
403 		error = xfs_da_node_create(state->args, blkno, treelevel,
404 					   &newblk->bp, state->args->whichfork);
405 		if (error)
406 			return(error);	/* GROT: dir is inconsistent */
407 		newblk->blkno = blkno;
408 		newblk->magic = XFS_DA_NODE_MAGIC;
409 		xfs_da_node_rebalance(state, oldblk, newblk);
410 		error = xfs_da_blk_link(state, oldblk, newblk);
411 		if (error)
412 			return(error);
413 		*result = 1;
414 	} else {
415 		*result = 0;
416 	}
417 
418 	/*
419 	 * Insert the new entry(s) into the correct block
420 	 * (updating last hashval in the process).
421 	 *
422 	 * xfs_da_node_add() inserts BEFORE the given index,
423 	 * and as a result of using node_lookup_int() we always
424 	 * point to a valid entry (not after one), but a split
425 	 * operation always results in a new block whose hashvals
426 	 * FOLLOW the current block.
427 	 *
428 	 * If we had double-split op below us, then add the extra block too.
429 	 */
430 	node = oldblk->bp->data;
431 	if (oldblk->index <= be16_to_cpu(node->hdr.count)) {
432 		oldblk->index++;
433 		xfs_da_node_add(state, oldblk, addblk);
434 		if (useextra) {
435 			if (state->extraafter)
436 				oldblk->index++;
437 			xfs_da_node_add(state, oldblk, &state->extrablk);
438 			state->extravalid = 0;
439 		}
440 	} else {
441 		newblk->index++;
442 		xfs_da_node_add(state, newblk, addblk);
443 		if (useextra) {
444 			if (state->extraafter)
445 				newblk->index++;
446 			xfs_da_node_add(state, newblk, &state->extrablk);
447 			state->extravalid = 0;
448 		}
449 	}
450 
451 	return(0);
452 }
453 
454 /*
455  * Balance the btree elements between two intermediate nodes,
456  * usually one full and one empty.
457  *
458  * NOTE: if blk2 is empty, then it will get the upper half of blk1.
459  */
460 STATIC void
xfs_da_node_rebalance(xfs_da_state_t * state,xfs_da_state_blk_t * blk1,xfs_da_state_blk_t * blk2)461 xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
462 				     xfs_da_state_blk_t *blk2)
463 {
464 	xfs_da_intnode_t *node1, *node2, *tmpnode;
465 	xfs_da_node_entry_t *btree_s, *btree_d;
466 	int count, tmp;
467 	xfs_trans_t *tp;
468 
469 	node1 = blk1->bp->data;
470 	node2 = blk2->bp->data;
471 	/*
472 	 * Figure out how many entries need to move, and in which direction.
473 	 * Swap the nodes around if that makes it simpler.
474 	 */
475 	if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
476 	    ((be32_to_cpu(node2->btree[0].hashval) < be32_to_cpu(node1->btree[0].hashval)) ||
477 	     (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
478 	      be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
479 		tmpnode = node1;
480 		node1 = node2;
481 		node2 = tmpnode;
482 	}
483 	ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
484 	ASSERT(node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
485 	count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2;
486 	if (count == 0)
487 		return;
488 	tp = state->args->trans;
489 	/*
490 	 * Two cases: high-to-low and low-to-high.
491 	 */
492 	if (count > 0) {
493 		/*
494 		 * Move elements in node2 up to make a hole.
495 		 */
496 		if ((tmp = be16_to_cpu(node2->hdr.count)) > 0) {
497 			tmp *= (uint)sizeof(xfs_da_node_entry_t);
498 			btree_s = &node2->btree[0];
499 			btree_d = &node2->btree[count];
500 			memmove(btree_d, btree_s, tmp);
501 		}
502 
503 		/*
504 		 * Move the req'd B-tree elements from high in node1 to
505 		 * low in node2.
506 		 */
507 		be16_add_cpu(&node2->hdr.count, count);
508 		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
509 		btree_s = &node1->btree[be16_to_cpu(node1->hdr.count) - count];
510 		btree_d = &node2->btree[0];
511 		memcpy(btree_d, btree_s, tmp);
512 		be16_add_cpu(&node1->hdr.count, -count);
513 	} else {
514 		/*
515 		 * Move the req'd B-tree elements from low in node2 to
516 		 * high in node1.
517 		 */
518 		count = -count;
519 		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
520 		btree_s = &node2->btree[0];
521 		btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)];
522 		memcpy(btree_d, btree_s, tmp);
523 		be16_add_cpu(&node1->hdr.count, count);
524 		xfs_da_log_buf(tp, blk1->bp,
525 			XFS_DA_LOGRANGE(node1, btree_d, tmp));
526 
527 		/*
528 		 * Move elements in node2 down to fill the hole.
529 		 */
530 		tmp  = be16_to_cpu(node2->hdr.count) - count;
531 		tmp *= (uint)sizeof(xfs_da_node_entry_t);
532 		btree_s = &node2->btree[count];
533 		btree_d = &node2->btree[0];
534 		memmove(btree_d, btree_s, tmp);
535 		be16_add_cpu(&node2->hdr.count, -count);
536 	}
537 
538 	/*
539 	 * Log header of node 1 and all current bits of node 2.
540 	 */
541 	xfs_da_log_buf(tp, blk1->bp,
542 		XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr)));
543 	xfs_da_log_buf(tp, blk2->bp,
544 		XFS_DA_LOGRANGE(node2, &node2->hdr,
545 			sizeof(node2->hdr) +
546 			sizeof(node2->btree[0]) * be16_to_cpu(node2->hdr.count)));
547 
548 	/*
549 	 * Record the last hashval from each block for upward propagation.
550 	 * (note: don't use the swapped node pointers)
551 	 */
552 	node1 = blk1->bp->data;
553 	node2 = blk2->bp->data;
554 	blk1->hashval = be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval);
555 	blk2->hashval = be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval);
556 
557 	/*
558 	 * Adjust the expected index for insertion.
559 	 */
560 	if (blk1->index >= be16_to_cpu(node1->hdr.count)) {
561 		blk2->index = blk1->index - be16_to_cpu(node1->hdr.count);
562 		blk1->index = be16_to_cpu(node1->hdr.count) + 1;	/* make it invalid */
563 	}
564 }
565 
566 /*
567  * Add a new entry to an intermediate node.
568  */
569 STATIC void
xfs_da_node_add(xfs_da_state_t * state,xfs_da_state_blk_t * oldblk,xfs_da_state_blk_t * newblk)570 xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
571 			       xfs_da_state_blk_t *newblk)
572 {
573 	xfs_da_intnode_t *node;
574 	xfs_da_node_entry_t *btree;
575 	int tmp;
576 
577 	node = oldblk->bp->data;
578 	ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
579 	ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
580 	ASSERT(newblk->blkno != 0);
581 	if (state->args->whichfork == XFS_DATA_FORK)
582 		ASSERT(newblk->blkno >= state->mp->m_dirleafblk &&
583 		       newblk->blkno < state->mp->m_dirfreeblk);
584 
585 	/*
586 	 * We may need to make some room before we insert the new node.
587 	 */
588 	tmp = 0;
589 	btree = &node->btree[ oldblk->index ];
590 	if (oldblk->index < be16_to_cpu(node->hdr.count)) {
591 		tmp = (be16_to_cpu(node->hdr.count) - oldblk->index) * (uint)sizeof(*btree);
592 		memmove(btree + 1, btree, tmp);
593 	}
594 	btree->hashval = cpu_to_be32(newblk->hashval);
595 	btree->before = cpu_to_be32(newblk->blkno);
596 	xfs_da_log_buf(state->args->trans, oldblk->bp,
597 		XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
598 	be16_add_cpu(&node->hdr.count, 1);
599 	xfs_da_log_buf(state->args->trans, oldblk->bp,
600 		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
601 
602 	/*
603 	 * Copy the last hash value from the oldblk to propagate upwards.
604 	 */
605 	oldblk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1 ].hashval);
606 }
607 
608 /*========================================================================
609  * Routines used for shrinking the Btree.
610  *========================================================================*/
611 
612 /*
613  * Deallocate an empty leaf node, remove it from its parent,
614  * possibly deallocating that block, etc...
615  */
616 int
xfs_da_join(xfs_da_state_t * state)617 xfs_da_join(xfs_da_state_t *state)
618 {
619 	xfs_da_state_blk_t *drop_blk, *save_blk;
620 	int action, error;
621 
622 	action = 0;
623 	drop_blk = &state->path.blk[ state->path.active-1 ];
624 	save_blk = &state->altpath.blk[ state->path.active-1 ];
625 	ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
626 	ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
627 	       drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
628 
629 	/*
630 	 * Walk back up the tree joining/deallocating as necessary.
631 	 * When we stop dropping blocks, break out.
632 	 */
633 	for (  ; state->path.active >= 2; drop_blk--, save_blk--,
634 		 state->path.active--) {
635 		/*
636 		 * See if we can combine the block with a neighbor.
637 		 *   (action == 0) => no options, just leave
638 		 *   (action == 1) => coalesce, then unlink
639 		 *   (action == 2) => block empty, unlink it
640 		 */
641 		switch (drop_blk->magic) {
642 		case XFS_ATTR_LEAF_MAGIC:
643 			error = xfs_attr_leaf_toosmall(state, &action);
644 			if (error)
645 				return(error);
646 			if (action == 0)
647 				return(0);
648 			xfs_attr_leaf_unbalance(state, drop_blk, save_blk);
649 			break;
650 		case XFS_DIR2_LEAFN_MAGIC:
651 			error = xfs_dir2_leafn_toosmall(state, &action);
652 			if (error)
653 				return error;
654 			if (action == 0)
655 				return 0;
656 			xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
657 			break;
658 		case XFS_DA_NODE_MAGIC:
659 			/*
660 			 * Remove the offending node, fixup hashvals,
661 			 * check for a toosmall neighbor.
662 			 */
663 			xfs_da_node_remove(state, drop_blk);
664 			xfs_da_fixhashpath(state, &state->path);
665 			error = xfs_da_node_toosmall(state, &action);
666 			if (error)
667 				return(error);
668 			if (action == 0)
669 				return 0;
670 			xfs_da_node_unbalance(state, drop_blk, save_blk);
671 			break;
672 		}
673 		xfs_da_fixhashpath(state, &state->altpath);
674 		error = xfs_da_blk_unlink(state, drop_blk, save_blk);
675 		xfs_da_state_kill_altpath(state);
676 		if (error)
677 			return(error);
678 		error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
679 							 drop_blk->bp);
680 		drop_blk->bp = NULL;
681 		if (error)
682 			return(error);
683 	}
684 	/*
685 	 * We joined all the way to the top.  If it turns out that
686 	 * we only have one entry in the root, make the child block
687 	 * the new root.
688 	 */
689 	xfs_da_node_remove(state, drop_blk);
690 	xfs_da_fixhashpath(state, &state->path);
691 	error = xfs_da_root_join(state, &state->path.blk[0]);
692 	return(error);
693 }
694 
695 #ifdef	DEBUG
696 static void
xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo * blkinfo,__u16 level)697 xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
698 {
699 	__be16	magic = blkinfo->magic;
700 
701 	if (level == 1) {
702 		ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
703 		       magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
704 	} else
705 		ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
706 	ASSERT(!blkinfo->forw);
707 	ASSERT(!blkinfo->back);
708 }
709 #else	/* !DEBUG */
710 #define	xfs_da_blkinfo_onlychild_validate(blkinfo, level)
711 #endif	/* !DEBUG */
712 
713 /*
714  * We have only one entry in the root.  Copy the only remaining child of
715  * the old root to block 0 as the new root node.
716  */
717 STATIC int
xfs_da_root_join(xfs_da_state_t * state,xfs_da_state_blk_t * root_blk)718 xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
719 {
720 	xfs_da_intnode_t *oldroot;
721 	xfs_da_args_t *args;
722 	xfs_dablk_t child;
723 	xfs_dabuf_t *bp;
724 	int error;
725 
726 	args = state->args;
727 	ASSERT(args != NULL);
728 	ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
729 	oldroot = root_blk->bp->data;
730 	ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
731 	ASSERT(!oldroot->hdr.info.forw);
732 	ASSERT(!oldroot->hdr.info.back);
733 
734 	/*
735 	 * If the root has more than one child, then don't do anything.
736 	 */
737 	if (be16_to_cpu(oldroot->hdr.count) > 1)
738 		return(0);
739 
740 	/*
741 	 * Read in the (only) child block, then copy those bytes into
742 	 * the root block's buffer and free the original child block.
743 	 */
744 	child = be32_to_cpu(oldroot->btree[0].before);
745 	ASSERT(child != 0);
746 	error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
747 					     args->whichfork);
748 	if (error)
749 		return(error);
750 	ASSERT(bp != NULL);
751 	xfs_da_blkinfo_onlychild_validate(bp->data,
752 					be16_to_cpu(oldroot->hdr.level));
753 
754 	memcpy(root_blk->bp->data, bp->data, state->blocksize);
755 	xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
756 	error = xfs_da_shrink_inode(args, child, bp);
757 	return(error);
758 }
759 
760 /*
761  * Check a node block and its neighbors to see if the block should be
762  * collapsed into one or the other neighbor.  Always keep the block
763  * with the smaller block number.
764  * If the current block is over 50% full, don't try to join it, return 0.
765  * If the block is empty, fill in the state structure and return 2.
766  * If it can be collapsed, fill in the state structure and return 1.
767  * If nothing can be done, return 0.
768  */
769 STATIC int
xfs_da_node_toosmall(xfs_da_state_t * state,int * action)770 xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
771 {
772 	xfs_da_intnode_t *node;
773 	xfs_da_state_blk_t *blk;
774 	xfs_da_blkinfo_t *info;
775 	int count, forward, error, retval, i;
776 	xfs_dablk_t blkno;
777 	xfs_dabuf_t *bp;
778 
779 	/*
780 	 * Check for the degenerate case of the block being over 50% full.
781 	 * If so, it's not worth even looking to see if we might be able
782 	 * to coalesce with a sibling.
783 	 */
784 	blk = &state->path.blk[ state->path.active-1 ];
785 	info = blk->bp->data;
786 	ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
787 	node = (xfs_da_intnode_t *)info;
788 	count = be16_to_cpu(node->hdr.count);
789 	if (count > (state->node_ents >> 1)) {
790 		*action = 0;	/* blk over 50%, don't try to join */
791 		return(0);	/* blk over 50%, don't try to join */
792 	}
793 
794 	/*
795 	 * Check for the degenerate case of the block being empty.
796 	 * If the block is empty, we'll simply delete it, no need to
797 	 * coalesce it with a sibling block.  We choose (arbitrarily)
798 	 * to merge with the forward block unless it is NULL.
799 	 */
800 	if (count == 0) {
801 		/*
802 		 * Make altpath point to the block we want to keep and
803 		 * path point to the block we want to drop (this one).
804 		 */
805 		forward = (info->forw != 0);
806 		memcpy(&state->altpath, &state->path, sizeof(state->path));
807 		error = xfs_da_path_shift(state, &state->altpath, forward,
808 						 0, &retval);
809 		if (error)
810 			return(error);
811 		if (retval) {
812 			*action = 0;
813 		} else {
814 			*action = 2;
815 		}
816 		return(0);
817 	}
818 
819 	/*
820 	 * Examine each sibling block to see if we can coalesce with
821 	 * at least 25% free space to spare.  We need to figure out
822 	 * whether to merge with the forward or the backward block.
823 	 * We prefer coalescing with the lower numbered sibling so as
824 	 * to shrink a directory over time.
825 	 */
826 	/* start with smaller blk num */
827 	forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back));
828 	for (i = 0; i < 2; forward = !forward, i++) {
829 		if (forward)
830 			blkno = be32_to_cpu(info->forw);
831 		else
832 			blkno = be32_to_cpu(info->back);
833 		if (blkno == 0)
834 			continue;
835 		error = xfs_da_read_buf(state->args->trans, state->args->dp,
836 					blkno, -1, &bp, state->args->whichfork);
837 		if (error)
838 			return(error);
839 		ASSERT(bp != NULL);
840 
841 		node = (xfs_da_intnode_t *)info;
842 		count  = state->node_ents;
843 		count -= state->node_ents >> 2;
844 		count -= be16_to_cpu(node->hdr.count);
845 		node = bp->data;
846 		ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
847 		count -= be16_to_cpu(node->hdr.count);
848 		xfs_da_brelse(state->args->trans, bp);
849 		if (count >= 0)
850 			break;	/* fits with at least 25% to spare */
851 	}
852 	if (i >= 2) {
853 		*action = 0;
854 		return(0);
855 	}
856 
857 	/*
858 	 * Make altpath point to the block we want to keep (the lower
859 	 * numbered block) and path point to the block we want to drop.
860 	 */
861 	memcpy(&state->altpath, &state->path, sizeof(state->path));
862 	if (blkno < blk->blkno) {
863 		error = xfs_da_path_shift(state, &state->altpath, forward,
864 						 0, &retval);
865 		if (error) {
866 			return(error);
867 		}
868 		if (retval) {
869 			*action = 0;
870 			return(0);
871 		}
872 	} else {
873 		error = xfs_da_path_shift(state, &state->path, forward,
874 						 0, &retval);
875 		if (error) {
876 			return(error);
877 		}
878 		if (retval) {
879 			*action = 0;
880 			return(0);
881 		}
882 	}
883 	*action = 1;
884 	return(0);
885 }
886 
887 /*
888  * Walk back up the tree adjusting hash values as necessary,
889  * when we stop making changes, return.
890  */
891 void
xfs_da_fixhashpath(xfs_da_state_t * state,xfs_da_state_path_t * path)892 xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
893 {
894 	xfs_da_state_blk_t *blk;
895 	xfs_da_intnode_t *node;
896 	xfs_da_node_entry_t *btree;
897 	xfs_dahash_t lasthash=0;
898 	int level, count;
899 
900 	level = path->active-1;
901 	blk = &path->blk[ level ];
902 	switch (blk->magic) {
903 	case XFS_ATTR_LEAF_MAGIC:
904 		lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
905 		if (count == 0)
906 			return;
907 		break;
908 	case XFS_DIR2_LEAFN_MAGIC:
909 		lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count);
910 		if (count == 0)
911 			return;
912 		break;
913 	case XFS_DA_NODE_MAGIC:
914 		lasthash = xfs_da_node_lasthash(blk->bp, &count);
915 		if (count == 0)
916 			return;
917 		break;
918 	}
919 	for (blk--, level--; level >= 0; blk--, level--) {
920 		node = blk->bp->data;
921 		ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
922 		btree = &node->btree[ blk->index ];
923 		if (be32_to_cpu(btree->hashval) == lasthash)
924 			break;
925 		blk->hashval = lasthash;
926 		btree->hashval = cpu_to_be32(lasthash);
927 		xfs_da_log_buf(state->args->trans, blk->bp,
928 				  XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
929 
930 		lasthash = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
931 	}
932 }
933 
934 /*
935  * Remove an entry from an intermediate node.
936  */
937 STATIC void
xfs_da_node_remove(xfs_da_state_t * state,xfs_da_state_blk_t * drop_blk)938 xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
939 {
940 	xfs_da_intnode_t *node;
941 	xfs_da_node_entry_t *btree;
942 	int tmp;
943 
944 	node = drop_blk->bp->data;
945 	ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count));
946 	ASSERT(drop_blk->index >= 0);
947 
948 	/*
949 	 * Copy over the offending entry, or just zero it out.
950 	 */
951 	btree = &node->btree[drop_blk->index];
952 	if (drop_blk->index < (be16_to_cpu(node->hdr.count)-1)) {
953 		tmp  = be16_to_cpu(node->hdr.count) - drop_blk->index - 1;
954 		tmp *= (uint)sizeof(xfs_da_node_entry_t);
955 		memmove(btree, btree + 1, tmp);
956 		xfs_da_log_buf(state->args->trans, drop_blk->bp,
957 		    XFS_DA_LOGRANGE(node, btree, tmp));
958 		btree = &node->btree[be16_to_cpu(node->hdr.count)-1];
959 	}
960 	memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
961 	xfs_da_log_buf(state->args->trans, drop_blk->bp,
962 	    XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
963 	be16_add_cpu(&node->hdr.count, -1);
964 	xfs_da_log_buf(state->args->trans, drop_blk->bp,
965 	    XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
966 
967 	/*
968 	 * Copy the last hash value from the block to propagate upwards.
969 	 */
970 	btree--;
971 	drop_blk->hashval = be32_to_cpu(btree->hashval);
972 }
973 
974 /*
975  * Unbalance the btree elements between two intermediate nodes,
976  * move all Btree elements from one node into another.
977  */
978 STATIC void
xfs_da_node_unbalance(xfs_da_state_t * state,xfs_da_state_blk_t * drop_blk,xfs_da_state_blk_t * save_blk)979 xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
980 				     xfs_da_state_blk_t *save_blk)
981 {
982 	xfs_da_intnode_t *drop_node, *save_node;
983 	xfs_da_node_entry_t *btree;
984 	int tmp;
985 	xfs_trans_t *tp;
986 
987 	drop_node = drop_blk->bp->data;
988 	save_node = save_blk->bp->data;
989 	ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
990 	ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
991 	tp = state->args->trans;
992 
993 	/*
994 	 * If the dying block has lower hashvals, then move all the
995 	 * elements in the remaining block up to make a hole.
996 	 */
997 	if ((be32_to_cpu(drop_node->btree[0].hashval) < be32_to_cpu(save_node->btree[ 0 ].hashval)) ||
998 	    (be32_to_cpu(drop_node->btree[be16_to_cpu(drop_node->hdr.count)-1].hashval) <
999 	     be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval)))
1000 	{
1001 		btree = &save_node->btree[be16_to_cpu(drop_node->hdr.count)];
1002 		tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
1003 		memmove(btree, &save_node->btree[0], tmp);
1004 		btree = &save_node->btree[0];
1005 		xfs_da_log_buf(tp, save_blk->bp,
1006 			XFS_DA_LOGRANGE(save_node, btree,
1007 				(be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) *
1008 				sizeof(xfs_da_node_entry_t)));
1009 	} else {
1010 		btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)];
1011 		xfs_da_log_buf(tp, save_blk->bp,
1012 			XFS_DA_LOGRANGE(save_node, btree,
1013 				be16_to_cpu(drop_node->hdr.count) *
1014 				sizeof(xfs_da_node_entry_t)));
1015 	}
1016 
1017 	/*
1018 	 * Move all the B-tree elements from drop_blk to save_blk.
1019 	 */
1020 	tmp = be16_to_cpu(drop_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
1021 	memcpy(btree, &drop_node->btree[0], tmp);
1022 	be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count));
1023 
1024 	xfs_da_log_buf(tp, save_blk->bp,
1025 		XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1026 			sizeof(save_node->hdr)));
1027 
1028 	/*
1029 	 * Save the last hashval in the remaining block for upward propagation.
1030 	 */
1031 	save_blk->hashval = be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval);
1032 }
1033 
1034 /*========================================================================
1035  * Routines used for finding things in the Btree.
1036  *========================================================================*/
1037 
1038 /*
1039  * Walk down the Btree looking for a particular filename, filling
1040  * in the state structure as we go.
1041  *
1042  * We will set the state structure to point to each of the elements
1043  * in each of the nodes where either the hashval is or should be.
1044  *
1045  * We support duplicate hashval's so for each entry in the current
1046  * node that could contain the desired hashval, descend.  This is a
1047  * pruned depth-first tree search.
1048  */
1049 int							/* error */
xfs_da_node_lookup_int(xfs_da_state_t * state,int * result)1050 xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1051 {
1052 	xfs_da_state_blk_t *blk;
1053 	xfs_da_blkinfo_t *curr;
1054 	xfs_da_intnode_t *node;
1055 	xfs_da_node_entry_t *btree;
1056 	xfs_dablk_t blkno;
1057 	int probe, span, max, error, retval;
1058 	xfs_dahash_t hashval, btreehashval;
1059 	xfs_da_args_t *args;
1060 
1061 	args = state->args;
1062 
1063 	/*
1064 	 * Descend thru the B-tree searching each level for the right
1065 	 * node to use, until the right hashval is found.
1066 	 */
1067 	blkno = (args->whichfork == XFS_DATA_FORK)? state->mp->m_dirleafblk : 0;
1068 	for (blk = &state->path.blk[0], state->path.active = 1;
1069 			 state->path.active <= XFS_DA_NODE_MAXDEPTH;
1070 			 blk++, state->path.active++) {
1071 		/*
1072 		 * Read the next node down in the tree.
1073 		 */
1074 		blk->blkno = blkno;
1075 		error = xfs_da_read_buf(args->trans, args->dp, blkno,
1076 					-1, &blk->bp, args->whichfork);
1077 		if (error) {
1078 			blk->blkno = 0;
1079 			state->path.active--;
1080 			return(error);
1081 		}
1082 		curr = blk->bp->data;
1083 		blk->magic = be16_to_cpu(curr->magic);
1084 		ASSERT(blk->magic == XFS_DA_NODE_MAGIC ||
1085 		       blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1086 		       blk->magic == XFS_ATTR_LEAF_MAGIC);
1087 
1088 		/*
1089 		 * Search an intermediate node for a match.
1090 		 */
1091 		if (blk->magic == XFS_DA_NODE_MAGIC) {
1092 			node = blk->bp->data;
1093 			max = be16_to_cpu(node->hdr.count);
1094 			blk->hashval = be32_to_cpu(node->btree[max-1].hashval);
1095 
1096 			/*
1097 			 * Binary search.  (note: small blocks will skip loop)
1098 			 */
1099 			probe = span = max / 2;
1100 			hashval = args->hashval;
1101 			for (btree = &node->btree[probe]; span > 4;
1102 				   btree = &node->btree[probe]) {
1103 				span /= 2;
1104 				btreehashval = be32_to_cpu(btree->hashval);
1105 				if (btreehashval < hashval)
1106 					probe += span;
1107 				else if (btreehashval > hashval)
1108 					probe -= span;
1109 				else
1110 					break;
1111 			}
1112 			ASSERT((probe >= 0) && (probe < max));
1113 			ASSERT((span <= 4) || (be32_to_cpu(btree->hashval) == hashval));
1114 
1115 			/*
1116 			 * Since we may have duplicate hashval's, find the first
1117 			 * matching hashval in the node.
1118 			 */
1119 			while ((probe > 0) && (be32_to_cpu(btree->hashval) >= hashval)) {
1120 				btree--;
1121 				probe--;
1122 			}
1123 			while ((probe < max) && (be32_to_cpu(btree->hashval) < hashval)) {
1124 				btree++;
1125 				probe++;
1126 			}
1127 
1128 			/*
1129 			 * Pick the right block to descend on.
1130 			 */
1131 			if (probe == max) {
1132 				blk->index = max-1;
1133 				blkno = be32_to_cpu(node->btree[max-1].before);
1134 			} else {
1135 				blk->index = probe;
1136 				blkno = be32_to_cpu(btree->before);
1137 			}
1138 		} else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1139 			blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1140 			break;
1141 		} else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1142 			blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
1143 			break;
1144 		}
1145 	}
1146 
1147 	/*
1148 	 * A leaf block that ends in the hashval that we are interested in
1149 	 * (final hashval == search hashval) means that the next block may
1150 	 * contain more entries with the same hashval, shift upward to the
1151 	 * next leaf and keep searching.
1152 	 */
1153 	for (;;) {
1154 		if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1155 			retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1156 							&blk->index, state);
1157 		} else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1158 			retval = xfs_attr_leaf_lookup_int(blk->bp, args);
1159 			blk->index = args->index;
1160 			args->blkno = blk->blkno;
1161 		} else {
1162 			ASSERT(0);
1163 			return XFS_ERROR(EFSCORRUPTED);
1164 		}
1165 		if (((retval == ENOENT) || (retval == ENOATTR)) &&
1166 		    (blk->hashval == args->hashval)) {
1167 			error = xfs_da_path_shift(state, &state->path, 1, 1,
1168 							 &retval);
1169 			if (error)
1170 				return(error);
1171 			if (retval == 0) {
1172 				continue;
1173 			} else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1174 				/* path_shift() gives ENOENT */
1175 				retval = XFS_ERROR(ENOATTR);
1176 			}
1177 		}
1178 		break;
1179 	}
1180 	*result = retval;
1181 	return(0);
1182 }
1183 
1184 /*========================================================================
1185  * Utility routines.
1186  *========================================================================*/
1187 
1188 /*
1189  * Link a new block into a doubly linked list of blocks (of whatever type).
1190  */
1191 int							/* error */
xfs_da_blk_link(xfs_da_state_t * state,xfs_da_state_blk_t * old_blk,xfs_da_state_blk_t * new_blk)1192 xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1193 			       xfs_da_state_blk_t *new_blk)
1194 {
1195 	xfs_da_blkinfo_t *old_info, *new_info, *tmp_info;
1196 	xfs_da_args_t *args;
1197 	int before=0, error;
1198 	xfs_dabuf_t *bp;
1199 
1200 	/*
1201 	 * Set up environment.
1202 	 */
1203 	args = state->args;
1204 	ASSERT(args != NULL);
1205 	old_info = old_blk->bp->data;
1206 	new_info = new_blk->bp->data;
1207 	ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1208 	       old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1209 	       old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1210 	ASSERT(old_blk->magic == be16_to_cpu(old_info->magic));
1211 	ASSERT(new_blk->magic == be16_to_cpu(new_info->magic));
1212 	ASSERT(old_blk->magic == new_blk->magic);
1213 
1214 	switch (old_blk->magic) {
1215 	case XFS_ATTR_LEAF_MAGIC:
1216 		before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1217 		break;
1218 	case XFS_DIR2_LEAFN_MAGIC:
1219 		before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
1220 		break;
1221 	case XFS_DA_NODE_MAGIC:
1222 		before = xfs_da_node_order(old_blk->bp, new_blk->bp);
1223 		break;
1224 	}
1225 
1226 	/*
1227 	 * Link blocks in appropriate order.
1228 	 */
1229 	if (before) {
1230 		/*
1231 		 * Link new block in before existing block.
1232 		 */
1233 		new_info->forw = cpu_to_be32(old_blk->blkno);
1234 		new_info->back = old_info->back;
1235 		if (old_info->back) {
1236 			error = xfs_da_read_buf(args->trans, args->dp,
1237 						be32_to_cpu(old_info->back),
1238 						-1, &bp, args->whichfork);
1239 			if (error)
1240 				return(error);
1241 			ASSERT(bp != NULL);
1242 			tmp_info = bp->data;
1243 			ASSERT(be16_to_cpu(tmp_info->magic) == be16_to_cpu(old_info->magic));
1244 			ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
1245 			tmp_info->forw = cpu_to_be32(new_blk->blkno);
1246 			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1247 			xfs_da_buf_done(bp);
1248 		}
1249 		old_info->back = cpu_to_be32(new_blk->blkno);
1250 	} else {
1251 		/*
1252 		 * Link new block in after existing block.
1253 		 */
1254 		new_info->forw = old_info->forw;
1255 		new_info->back = cpu_to_be32(old_blk->blkno);
1256 		if (old_info->forw) {
1257 			error = xfs_da_read_buf(args->trans, args->dp,
1258 						be32_to_cpu(old_info->forw),
1259 						-1, &bp, args->whichfork);
1260 			if (error)
1261 				return(error);
1262 			ASSERT(bp != NULL);
1263 			tmp_info = bp->data;
1264 			ASSERT(tmp_info->magic == old_info->magic);
1265 			ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
1266 			tmp_info->back = cpu_to_be32(new_blk->blkno);
1267 			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1268 			xfs_da_buf_done(bp);
1269 		}
1270 		old_info->forw = cpu_to_be32(new_blk->blkno);
1271 	}
1272 
1273 	xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1274 	xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1275 	return(0);
1276 }
1277 
1278 /*
1279  * Compare two intermediate nodes for "order".
1280  */
1281 STATIC int
xfs_da_node_order(xfs_dabuf_t * node1_bp,xfs_dabuf_t * node2_bp)1282 xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
1283 {
1284 	xfs_da_intnode_t *node1, *node2;
1285 
1286 	node1 = node1_bp->data;
1287 	node2 = node2_bp->data;
1288 	ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) &&
1289 	       node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1290 	if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
1291 	    ((be32_to_cpu(node2->btree[0].hashval) <
1292 	      be32_to_cpu(node1->btree[0].hashval)) ||
1293 	     (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
1294 	      be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
1295 		return(1);
1296 	}
1297 	return(0);
1298 }
1299 
1300 /*
1301  * Pick up the last hashvalue from an intermediate node.
1302  */
1303 STATIC uint
xfs_da_node_lasthash(xfs_dabuf_t * bp,int * count)1304 xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
1305 {
1306 	xfs_da_intnode_t *node;
1307 
1308 	node = bp->data;
1309 	ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1310 	if (count)
1311 		*count = be16_to_cpu(node->hdr.count);
1312 	if (!node->hdr.count)
1313 		return(0);
1314 	return be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
1315 }
1316 
1317 /*
1318  * Unlink a block from a doubly linked list of blocks.
1319  */
1320 STATIC int						/* error */
xfs_da_blk_unlink(xfs_da_state_t * state,xfs_da_state_blk_t * drop_blk,xfs_da_state_blk_t * save_blk)1321 xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1322 				 xfs_da_state_blk_t *save_blk)
1323 {
1324 	xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info;
1325 	xfs_da_args_t *args;
1326 	xfs_dabuf_t *bp;
1327 	int error;
1328 
1329 	/*
1330 	 * Set up environment.
1331 	 */
1332 	args = state->args;
1333 	ASSERT(args != NULL);
1334 	save_info = save_blk->bp->data;
1335 	drop_info = drop_blk->bp->data;
1336 	ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1337 	       save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1338 	       save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1339 	ASSERT(save_blk->magic == be16_to_cpu(save_info->magic));
1340 	ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic));
1341 	ASSERT(save_blk->magic == drop_blk->magic);
1342 	ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
1343 	       (be32_to_cpu(save_info->back) == drop_blk->blkno));
1344 	ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) ||
1345 	       (be32_to_cpu(drop_info->back) == save_blk->blkno));
1346 
1347 	/*
1348 	 * Unlink the leaf block from the doubly linked chain of leaves.
1349 	 */
1350 	if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
1351 		save_info->back = drop_info->back;
1352 		if (drop_info->back) {
1353 			error = xfs_da_read_buf(args->trans, args->dp,
1354 						be32_to_cpu(drop_info->back),
1355 						-1, &bp, args->whichfork);
1356 			if (error)
1357 				return(error);
1358 			ASSERT(bp != NULL);
1359 			tmp_info = bp->data;
1360 			ASSERT(tmp_info->magic == save_info->magic);
1361 			ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
1362 			tmp_info->forw = cpu_to_be32(save_blk->blkno);
1363 			xfs_da_log_buf(args->trans, bp, 0,
1364 						    sizeof(*tmp_info) - 1);
1365 			xfs_da_buf_done(bp);
1366 		}
1367 	} else {
1368 		save_info->forw = drop_info->forw;
1369 		if (drop_info->forw) {
1370 			error = xfs_da_read_buf(args->trans, args->dp,
1371 						be32_to_cpu(drop_info->forw),
1372 						-1, &bp, args->whichfork);
1373 			if (error)
1374 				return(error);
1375 			ASSERT(bp != NULL);
1376 			tmp_info = bp->data;
1377 			ASSERT(tmp_info->magic == save_info->magic);
1378 			ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
1379 			tmp_info->back = cpu_to_be32(save_blk->blkno);
1380 			xfs_da_log_buf(args->trans, bp, 0,
1381 						    sizeof(*tmp_info) - 1);
1382 			xfs_da_buf_done(bp);
1383 		}
1384 	}
1385 
1386 	xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
1387 	return(0);
1388 }
1389 
1390 /*
1391  * Move a path "forward" or "!forward" one block at the current level.
1392  *
1393  * This routine will adjust a "path" to point to the next block
1394  * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
1395  * Btree, including updating pointers to the intermediate nodes between
1396  * the new bottom and the root.
1397  */
1398 int							/* error */
xfs_da_path_shift(xfs_da_state_t * state,xfs_da_state_path_t * path,int forward,int release,int * result)1399 xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1400 				 int forward, int release, int *result)
1401 {
1402 	xfs_da_state_blk_t *blk;
1403 	xfs_da_blkinfo_t *info;
1404 	xfs_da_intnode_t *node;
1405 	xfs_da_args_t *args;
1406 	xfs_dablk_t blkno=0;
1407 	int level, error;
1408 
1409 	/*
1410 	 * Roll up the Btree looking for the first block where our
1411 	 * current index is not at the edge of the block.  Note that
1412 	 * we skip the bottom layer because we want the sibling block.
1413 	 */
1414 	args = state->args;
1415 	ASSERT(args != NULL);
1416 	ASSERT(path != NULL);
1417 	ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1418 	level = (path->active-1) - 1;	/* skip bottom layer in path */
1419 	for (blk = &path->blk[level]; level >= 0; blk--, level--) {
1420 		ASSERT(blk->bp != NULL);
1421 		node = blk->bp->data;
1422 		ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1423 		if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) {
1424 			blk->index++;
1425 			blkno = be32_to_cpu(node->btree[blk->index].before);
1426 			break;
1427 		} else if (!forward && (blk->index > 0)) {
1428 			blk->index--;
1429 			blkno = be32_to_cpu(node->btree[blk->index].before);
1430 			break;
1431 		}
1432 	}
1433 	if (level < 0) {
1434 		*result = XFS_ERROR(ENOENT);	/* we're out of our tree */
1435 		ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1436 		return(0);
1437 	}
1438 
1439 	/*
1440 	 * Roll down the edge of the subtree until we reach the
1441 	 * same depth we were at originally.
1442 	 */
1443 	for (blk++, level++; level < path->active; blk++, level++) {
1444 		/*
1445 		 * Release the old block.
1446 		 * (if it's dirty, trans won't actually let go)
1447 		 */
1448 		if (release)
1449 			xfs_da_brelse(args->trans, blk->bp);
1450 
1451 		/*
1452 		 * Read the next child block.
1453 		 */
1454 		blk->blkno = blkno;
1455 		error = xfs_da_read_buf(args->trans, args->dp, blkno, -1,
1456 						     &blk->bp, args->whichfork);
1457 		if (error)
1458 			return(error);
1459 		ASSERT(blk->bp != NULL);
1460 		info = blk->bp->data;
1461 		ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1462 		       info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1463 		       info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1464 		blk->magic = be16_to_cpu(info->magic);
1465 		if (blk->magic == XFS_DA_NODE_MAGIC) {
1466 			node = (xfs_da_intnode_t *)info;
1467 			blk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
1468 			if (forward)
1469 				blk->index = 0;
1470 			else
1471 				blk->index = be16_to_cpu(node->hdr.count)-1;
1472 			blkno = be32_to_cpu(node->btree[blk->index].before);
1473 		} else {
1474 			ASSERT(level == path->active-1);
1475 			blk->index = 0;
1476 			switch(blk->magic) {
1477 			case XFS_ATTR_LEAF_MAGIC:
1478 				blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
1479 								      NULL);
1480 				break;
1481 			case XFS_DIR2_LEAFN_MAGIC:
1482 				blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
1483 								       NULL);
1484 				break;
1485 			default:
1486 				ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC ||
1487 				       blk->magic == XFS_DIR2_LEAFN_MAGIC);
1488 				break;
1489 			}
1490 		}
1491 	}
1492 	*result = 0;
1493 	return(0);
1494 }
1495 
1496 
1497 /*========================================================================
1498  * Utility routines.
1499  *========================================================================*/
1500 
1501 /*
1502  * Implement a simple hash on a character string.
1503  * Rotate the hash value by 7 bits, then XOR each character in.
1504  * This is implemented with some source-level loop unrolling.
1505  */
1506 xfs_dahash_t
xfs_da_hashname(const __uint8_t * name,int namelen)1507 xfs_da_hashname(const __uint8_t *name, int namelen)
1508 {
1509 	xfs_dahash_t hash;
1510 
1511 	/*
1512 	 * Do four characters at a time as long as we can.
1513 	 */
1514 	for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
1515 		hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
1516 		       (name[3] << 0) ^ rol32(hash, 7 * 4);
1517 
1518 	/*
1519 	 * Now do the rest of the characters.
1520 	 */
1521 	switch (namelen) {
1522 	case 3:
1523 		return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
1524 		       rol32(hash, 7 * 3);
1525 	case 2:
1526 		return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
1527 	case 1:
1528 		return (name[0] << 0) ^ rol32(hash, 7 * 1);
1529 	default: /* case 0: */
1530 		return hash;
1531 	}
1532 }
1533 
1534 enum xfs_dacmp
xfs_da_compname(struct xfs_da_args * args,const unsigned char * name,int len)1535 xfs_da_compname(
1536 	struct xfs_da_args *args,
1537 	const unsigned char *name,
1538 	int		len)
1539 {
1540 	return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
1541 					XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
1542 }
1543 
1544 static xfs_dahash_t
xfs_default_hashname(struct xfs_name * name)1545 xfs_default_hashname(
1546 	struct xfs_name	*name)
1547 {
1548 	return xfs_da_hashname(name->name, name->len);
1549 }
1550 
1551 const struct xfs_nameops xfs_default_nameops = {
1552 	.hashname	= xfs_default_hashname,
1553 	.compname	= xfs_da_compname
1554 };
1555 
1556 int
xfs_da_grow_inode_int(struct xfs_da_args * args,xfs_fileoff_t * bno,int count)1557 xfs_da_grow_inode_int(
1558 	struct xfs_da_args	*args,
1559 	xfs_fileoff_t		*bno,
1560 	int			count)
1561 {
1562 	struct xfs_trans	*tp = args->trans;
1563 	struct xfs_inode	*dp = args->dp;
1564 	int			w = args->whichfork;
1565 	xfs_drfsbno_t		nblks = dp->i_d.di_nblocks;
1566 	struct xfs_bmbt_irec	map, *mapp;
1567 	int			nmap, error, got, i, mapi;
1568 
1569 	/*
1570 	 * Find a spot in the file space to put the new block.
1571 	 */
1572 	error = xfs_bmap_first_unused(tp, dp, count, bno, w);
1573 	if (error)
1574 		return error;
1575 
1576 	/*
1577 	 * Try mapping it in one filesystem block.
1578 	 */
1579 	nmap = 1;
1580 	ASSERT(args->firstblock != NULL);
1581 	error = xfs_bmapi_write(tp, dp, *bno, count,
1582 			xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
1583 			args->firstblock, args->total, &map, &nmap,
1584 			args->flist);
1585 	if (error)
1586 		return error;
1587 
1588 	ASSERT(nmap <= 1);
1589 	if (nmap == 1) {
1590 		mapp = &map;
1591 		mapi = 1;
1592 	} else if (nmap == 0 && count > 1) {
1593 		xfs_fileoff_t		b;
1594 		int			c;
1595 
1596 		/*
1597 		 * If we didn't get it and the block might work if fragmented,
1598 		 * try without the CONTIG flag.  Loop until we get it all.
1599 		 */
1600 		mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
1601 		for (b = *bno, mapi = 0; b < *bno + count; ) {
1602 			nmap = MIN(XFS_BMAP_MAX_NMAP, count);
1603 			c = (int)(*bno + count - b);
1604 			error = xfs_bmapi_write(tp, dp, b, c,
1605 					xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
1606 					args->firstblock, args->total,
1607 					&mapp[mapi], &nmap, args->flist);
1608 			if (error)
1609 				goto out_free_map;
1610 			if (nmap < 1)
1611 				break;
1612 			mapi += nmap;
1613 			b = mapp[mapi - 1].br_startoff +
1614 			    mapp[mapi - 1].br_blockcount;
1615 		}
1616 	} else {
1617 		mapi = 0;
1618 		mapp = NULL;
1619 	}
1620 
1621 	/*
1622 	 * Count the blocks we got, make sure it matches the total.
1623 	 */
1624 	for (i = 0, got = 0; i < mapi; i++)
1625 		got += mapp[i].br_blockcount;
1626 	if (got != count || mapp[0].br_startoff != *bno ||
1627 	    mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
1628 	    *bno + count) {
1629 		error = XFS_ERROR(ENOSPC);
1630 		goto out_free_map;
1631 	}
1632 
1633 	/* account for newly allocated blocks in reserved blocks total */
1634 	args->total -= dp->i_d.di_nblocks - nblks;
1635 
1636 out_free_map:
1637 	if (mapp != &map)
1638 		kmem_free(mapp);
1639 	return error;
1640 }
1641 
1642 /*
1643  * Add a block to the btree ahead of the file.
1644  * Return the new block number to the caller.
1645  */
1646 int
xfs_da_grow_inode(struct xfs_da_args * args,xfs_dablk_t * new_blkno)1647 xfs_da_grow_inode(
1648 	struct xfs_da_args	*args,
1649 	xfs_dablk_t		*new_blkno)
1650 {
1651 	xfs_fileoff_t		bno;
1652 	int			count;
1653 	int			error;
1654 
1655 	if (args->whichfork == XFS_DATA_FORK) {
1656 		bno = args->dp->i_mount->m_dirleafblk;
1657 		count = args->dp->i_mount->m_dirblkfsbs;
1658 	} else {
1659 		bno = 0;
1660 		count = 1;
1661 	}
1662 
1663 	error = xfs_da_grow_inode_int(args, &bno, count);
1664 	if (!error)
1665 		*new_blkno = (xfs_dablk_t)bno;
1666 	return error;
1667 }
1668 
1669 /*
1670  * Ick.  We need to always be able to remove a btree block, even
1671  * if there's no space reservation because the filesystem is full.
1672  * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
1673  * It swaps the target block with the last block in the file.  The
1674  * last block in the file can always be removed since it can't cause
1675  * a bmap btree split to do that.
1676  */
1677 STATIC int
xfs_da_swap_lastblock(xfs_da_args_t * args,xfs_dablk_t * dead_blknop,xfs_dabuf_t ** dead_bufp)1678 xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
1679 		      xfs_dabuf_t **dead_bufp)
1680 {
1681 	xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno;
1682 	xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf;
1683 	xfs_fileoff_t lastoff;
1684 	xfs_inode_t *ip;
1685 	xfs_trans_t *tp;
1686 	xfs_mount_t *mp;
1687 	int error, w, entno, level, dead_level;
1688 	xfs_da_blkinfo_t *dead_info, *sib_info;
1689 	xfs_da_intnode_t *par_node, *dead_node;
1690 	xfs_dir2_leaf_t *dead_leaf2;
1691 	xfs_dahash_t dead_hash;
1692 
1693 	dead_buf = *dead_bufp;
1694 	dead_blkno = *dead_blknop;
1695 	tp = args->trans;
1696 	ip = args->dp;
1697 	w = args->whichfork;
1698 	ASSERT(w == XFS_DATA_FORK);
1699 	mp = ip->i_mount;
1700 	lastoff = mp->m_dirfreeblk;
1701 	error = xfs_bmap_last_before(tp, ip, &lastoff, w);
1702 	if (error)
1703 		return error;
1704 	if (unlikely(lastoff == 0)) {
1705 		XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
1706 				 mp);
1707 		return XFS_ERROR(EFSCORRUPTED);
1708 	}
1709 	/*
1710 	 * Read the last block in the btree space.
1711 	 */
1712 	last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
1713 	if ((error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w)))
1714 		return error;
1715 	/*
1716 	 * Copy the last block into the dead buffer and log it.
1717 	 */
1718 	memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize);
1719 	xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
1720 	dead_info = dead_buf->data;
1721 	/*
1722 	 * Get values from the moved block.
1723 	 */
1724 	if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
1725 		dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
1726 		dead_level = 0;
1727 		dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
1728 	} else {
1729 		ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1730 		dead_node = (xfs_da_intnode_t *)dead_info;
1731 		dead_level = be16_to_cpu(dead_node->hdr.level);
1732 		dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval);
1733 	}
1734 	sib_buf = par_buf = NULL;
1735 	/*
1736 	 * If the moved block has a left sibling, fix up the pointers.
1737 	 */
1738 	if ((sib_blkno = be32_to_cpu(dead_info->back))) {
1739 		if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1740 			goto done;
1741 		sib_info = sib_buf->data;
1742 		if (unlikely(
1743 		    be32_to_cpu(sib_info->forw) != last_blkno ||
1744 		    sib_info->magic != dead_info->magic)) {
1745 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
1746 					 XFS_ERRLEVEL_LOW, mp);
1747 			error = XFS_ERROR(EFSCORRUPTED);
1748 			goto done;
1749 		}
1750 		sib_info->forw = cpu_to_be32(dead_blkno);
1751 		xfs_da_log_buf(tp, sib_buf,
1752 			XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
1753 					sizeof(sib_info->forw)));
1754 		xfs_da_buf_done(sib_buf);
1755 		sib_buf = NULL;
1756 	}
1757 	/*
1758 	 * If the moved block has a right sibling, fix up the pointers.
1759 	 */
1760 	if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
1761 		if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1762 			goto done;
1763 		sib_info = sib_buf->data;
1764 		if (unlikely(
1765 		       be32_to_cpu(sib_info->back) != last_blkno ||
1766 		       sib_info->magic != dead_info->magic)) {
1767 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
1768 					 XFS_ERRLEVEL_LOW, mp);
1769 			error = XFS_ERROR(EFSCORRUPTED);
1770 			goto done;
1771 		}
1772 		sib_info->back = cpu_to_be32(dead_blkno);
1773 		xfs_da_log_buf(tp, sib_buf,
1774 			XFS_DA_LOGRANGE(sib_info, &sib_info->back,
1775 					sizeof(sib_info->back)));
1776 		xfs_da_buf_done(sib_buf);
1777 		sib_buf = NULL;
1778 	}
1779 	par_blkno = mp->m_dirleafblk;
1780 	level = -1;
1781 	/*
1782 	 * Walk down the tree looking for the parent of the moved block.
1783 	 */
1784 	for (;;) {
1785 		if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1786 			goto done;
1787 		par_node = par_buf->data;
1788 		if (unlikely(par_node->hdr.info.magic !=
1789 		    cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1790 		    (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
1791 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
1792 					 XFS_ERRLEVEL_LOW, mp);
1793 			error = XFS_ERROR(EFSCORRUPTED);
1794 			goto done;
1795 		}
1796 		level = be16_to_cpu(par_node->hdr.level);
1797 		for (entno = 0;
1798 		     entno < be16_to_cpu(par_node->hdr.count) &&
1799 		     be32_to_cpu(par_node->btree[entno].hashval) < dead_hash;
1800 		     entno++)
1801 			continue;
1802 		if (unlikely(entno == be16_to_cpu(par_node->hdr.count))) {
1803 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
1804 					 XFS_ERRLEVEL_LOW, mp);
1805 			error = XFS_ERROR(EFSCORRUPTED);
1806 			goto done;
1807 		}
1808 		par_blkno = be32_to_cpu(par_node->btree[entno].before);
1809 		if (level == dead_level + 1)
1810 			break;
1811 		xfs_da_brelse(tp, par_buf);
1812 		par_buf = NULL;
1813 	}
1814 	/*
1815 	 * We're in the right parent block.
1816 	 * Look for the right entry.
1817 	 */
1818 	for (;;) {
1819 		for (;
1820 		     entno < be16_to_cpu(par_node->hdr.count) &&
1821 		     be32_to_cpu(par_node->btree[entno].before) != last_blkno;
1822 		     entno++)
1823 			continue;
1824 		if (entno < be16_to_cpu(par_node->hdr.count))
1825 			break;
1826 		par_blkno = be32_to_cpu(par_node->hdr.info.forw);
1827 		xfs_da_brelse(tp, par_buf);
1828 		par_buf = NULL;
1829 		if (unlikely(par_blkno == 0)) {
1830 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
1831 					 XFS_ERRLEVEL_LOW, mp);
1832 			error = XFS_ERROR(EFSCORRUPTED);
1833 			goto done;
1834 		}
1835 		if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1836 			goto done;
1837 		par_node = par_buf->data;
1838 		if (unlikely(
1839 		    be16_to_cpu(par_node->hdr.level) != level ||
1840 		    par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) {
1841 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
1842 					 XFS_ERRLEVEL_LOW, mp);
1843 			error = XFS_ERROR(EFSCORRUPTED);
1844 			goto done;
1845 		}
1846 		entno = 0;
1847 	}
1848 	/*
1849 	 * Update the parent entry pointing to the moved block.
1850 	 */
1851 	par_node->btree[entno].before = cpu_to_be32(dead_blkno);
1852 	xfs_da_log_buf(tp, par_buf,
1853 		XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
1854 				sizeof(par_node->btree[entno].before)));
1855 	xfs_da_buf_done(par_buf);
1856 	xfs_da_buf_done(dead_buf);
1857 	*dead_blknop = last_blkno;
1858 	*dead_bufp = last_buf;
1859 	return 0;
1860 done:
1861 	if (par_buf)
1862 		xfs_da_brelse(tp, par_buf);
1863 	if (sib_buf)
1864 		xfs_da_brelse(tp, sib_buf);
1865 	xfs_da_brelse(tp, last_buf);
1866 	return error;
1867 }
1868 
1869 /*
1870  * Remove a btree block from a directory or attribute.
1871  */
1872 int
xfs_da_shrink_inode(xfs_da_args_t * args,xfs_dablk_t dead_blkno,xfs_dabuf_t * dead_buf)1873 xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
1874 		    xfs_dabuf_t *dead_buf)
1875 {
1876 	xfs_inode_t *dp;
1877 	int done, error, w, count;
1878 	xfs_trans_t *tp;
1879 	xfs_mount_t *mp;
1880 
1881 	dp = args->dp;
1882 	w = args->whichfork;
1883 	tp = args->trans;
1884 	mp = dp->i_mount;
1885 	if (w == XFS_DATA_FORK)
1886 		count = mp->m_dirblkfsbs;
1887 	else
1888 		count = 1;
1889 	for (;;) {
1890 		/*
1891 		 * Remove extents.  If we get ENOSPC for a dir we have to move
1892 		 * the last block to the place we want to kill.
1893 		 */
1894 		if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
1895 				xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
1896 				0, args->firstblock, args->flist,
1897 				&done)) == ENOSPC) {
1898 			if (w != XFS_DATA_FORK)
1899 				break;
1900 			if ((error = xfs_da_swap_lastblock(args, &dead_blkno,
1901 					&dead_buf)))
1902 				break;
1903 		} else {
1904 			break;
1905 		}
1906 	}
1907 	xfs_da_binval(tp, dead_buf);
1908 	return error;
1909 }
1910 
1911 /*
1912  * See if the mapping(s) for this btree block are valid, i.e.
1913  * don't contain holes, are logically contiguous, and cover the whole range.
1914  */
1915 STATIC int
xfs_da_map_covers_blocks(int nmap,xfs_bmbt_irec_t * mapp,xfs_dablk_t bno,int count)1916 xfs_da_map_covers_blocks(
1917 	int		nmap,
1918 	xfs_bmbt_irec_t	*mapp,
1919 	xfs_dablk_t	bno,
1920 	int		count)
1921 {
1922 	int		i;
1923 	xfs_fileoff_t	off;
1924 
1925 	for (i = 0, off = bno; i < nmap; i++) {
1926 		if (mapp[i].br_startblock == HOLESTARTBLOCK ||
1927 		    mapp[i].br_startblock == DELAYSTARTBLOCK) {
1928 			return 0;
1929 		}
1930 		if (off != mapp[i].br_startoff) {
1931 			return 0;
1932 		}
1933 		off += mapp[i].br_blockcount;
1934 	}
1935 	return off == bno + count;
1936 }
1937 
1938 /*
1939  * Make a dabuf.
1940  * Used for get_buf, read_buf, read_bufr, and reada_buf.
1941  */
1942 STATIC int
xfs_da_do_buf(xfs_trans_t * trans,xfs_inode_t * dp,xfs_dablk_t bno,xfs_daddr_t * mappedbnop,xfs_dabuf_t ** bpp,int whichfork,int caller)1943 xfs_da_do_buf(
1944 	xfs_trans_t	*trans,
1945 	xfs_inode_t	*dp,
1946 	xfs_dablk_t	bno,
1947 	xfs_daddr_t	*mappedbnop,
1948 	xfs_dabuf_t	**bpp,
1949 	int		whichfork,
1950 	int		caller)
1951 {
1952 	xfs_buf_t	*bp = NULL;
1953 	xfs_buf_t	**bplist;
1954 	int		error=0;
1955 	int		i;
1956 	xfs_bmbt_irec_t	map;
1957 	xfs_bmbt_irec_t	*mapp;
1958 	xfs_daddr_t	mappedbno;
1959 	xfs_mount_t	*mp;
1960 	int		nbplist=0;
1961 	int		nfsb;
1962 	int		nmap;
1963 	xfs_dabuf_t	*rbp;
1964 
1965 	mp = dp->i_mount;
1966 	nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1;
1967 	mappedbno = *mappedbnop;
1968 	/*
1969 	 * Caller doesn't have a mapping.  -2 means don't complain
1970 	 * if we land in a hole.
1971 	 */
1972 	if (mappedbno == -1 || mappedbno == -2) {
1973 		/*
1974 		 * Optimize the one-block case.
1975 		 */
1976 		if (nfsb == 1)
1977 			mapp = &map;
1978 		else
1979 			mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
1980 
1981 		nmap = nfsb;
1982 		error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, mapp,
1983 				       &nmap, xfs_bmapi_aflag(whichfork));
1984 		if (error)
1985 			goto exit0;
1986 	} else {
1987 		map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
1988 		map.br_startoff = (xfs_fileoff_t)bno;
1989 		map.br_blockcount = nfsb;
1990 		mapp = &map;
1991 		nmap = 1;
1992 	}
1993 	if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) {
1994 		error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
1995 		if (unlikely(error == EFSCORRUPTED)) {
1996 			if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
1997 				xfs_alert(mp, "%s: bno %lld dir: inode %lld",
1998 					__func__, (long long)bno,
1999 					(long long)dp->i_ino);
2000 				for (i = 0; i < nmap; i++) {
2001 					xfs_alert(mp,
2002 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
2003 						i,
2004 						(long long)mapp[i].br_startoff,
2005 						(long long)mapp[i].br_startblock,
2006 						(long long)mapp[i].br_blockcount,
2007 						mapp[i].br_state);
2008 				}
2009 			}
2010 			XFS_ERROR_REPORT("xfs_da_do_buf(1)",
2011 					 XFS_ERRLEVEL_LOW, mp);
2012 		}
2013 		goto exit0;
2014 	}
2015 	if (caller != 3 && nmap > 1) {
2016 		bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP);
2017 		nbplist = 0;
2018 	} else
2019 		bplist = NULL;
2020 	/*
2021 	 * Turn the mapping(s) into buffer(s).
2022 	 */
2023 	for (i = 0; i < nmap; i++) {
2024 		int	nmapped;
2025 
2026 		mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock);
2027 		if (i == 0)
2028 			*mappedbnop = mappedbno;
2029 		nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount);
2030 		switch (caller) {
2031 		case 0:
2032 			bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
2033 				mappedbno, nmapped, 0);
2034 			error = bp ? bp->b_error : XFS_ERROR(EIO);
2035 			break;
2036 		case 1:
2037 		case 2:
2038 			bp = NULL;
2039 			error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp,
2040 				mappedbno, nmapped, 0, &bp);
2041 			break;
2042 		case 3:
2043 			xfs_buf_readahead(mp->m_ddev_targp, mappedbno, nmapped);
2044 			error = 0;
2045 			bp = NULL;
2046 			break;
2047 		}
2048 		if (error) {
2049 			if (bp)
2050 				xfs_trans_brelse(trans, bp);
2051 			goto exit1;
2052 		}
2053 		if (!bp)
2054 			continue;
2055 		if (caller == 1) {
2056 			if (whichfork == XFS_ATTR_FORK)
2057 				xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
2058 			else
2059 				xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
2060 		}
2061 		if (bplist) {
2062 			bplist[nbplist++] = bp;
2063 		}
2064 	}
2065 	/*
2066 	 * Build a dabuf structure.
2067 	 */
2068 	if (bplist) {
2069 		rbp = xfs_da_buf_make(nbplist, bplist);
2070 	} else if (bp)
2071 		rbp = xfs_da_buf_make(1, &bp);
2072 	else
2073 		rbp = NULL;
2074 	/*
2075 	 * For read_buf, check the magic number.
2076 	 */
2077 	if (caller == 1) {
2078 		xfs_dir2_data_hdr_t	*hdr = rbp->data;
2079 		xfs_dir2_free_t		*free = rbp->data;
2080 		xfs_da_blkinfo_t	*info = rbp->data;
2081 		uint			magic, magic1;
2082 
2083 		magic = be16_to_cpu(info->magic);
2084 		magic1 = be32_to_cpu(hdr->magic);
2085 		if (unlikely(
2086 		    XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
2087 				   (magic != XFS_ATTR_LEAF_MAGIC) &&
2088 				   (magic != XFS_DIR2_LEAF1_MAGIC) &&
2089 				   (magic != XFS_DIR2_LEAFN_MAGIC) &&
2090 				   (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
2091 				   (magic1 != XFS_DIR2_DATA_MAGIC) &&
2092 				   (free->hdr.magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)),
2093 				mp, XFS_ERRTAG_DA_READ_BUF,
2094 				XFS_RANDOM_DA_READ_BUF))) {
2095 			trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_);
2096 			XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
2097 					     XFS_ERRLEVEL_LOW, mp, info);
2098 			error = XFS_ERROR(EFSCORRUPTED);
2099 			xfs_da_brelse(trans, rbp);
2100 			nbplist = 0;
2101 			goto exit1;
2102 		}
2103 	}
2104 	if (bplist) {
2105 		kmem_free(bplist);
2106 	}
2107 	if (mapp != &map) {
2108 		kmem_free(mapp);
2109 	}
2110 	if (bpp)
2111 		*bpp = rbp;
2112 	return 0;
2113 exit1:
2114 	if (bplist) {
2115 		for (i = 0; i < nbplist; i++)
2116 			xfs_trans_brelse(trans, bplist[i]);
2117 		kmem_free(bplist);
2118 	}
2119 exit0:
2120 	if (mapp != &map)
2121 		kmem_free(mapp);
2122 	if (bpp)
2123 		*bpp = NULL;
2124 	return error;
2125 }
2126 
2127 /*
2128  * Get a buffer for the dir/attr block.
2129  */
2130 int
xfs_da_get_buf(xfs_trans_t * trans,xfs_inode_t * dp,xfs_dablk_t bno,xfs_daddr_t mappedbno,xfs_dabuf_t ** bpp,int whichfork)2131 xfs_da_get_buf(
2132 	xfs_trans_t	*trans,
2133 	xfs_inode_t	*dp,
2134 	xfs_dablk_t	bno,
2135 	xfs_daddr_t		mappedbno,
2136 	xfs_dabuf_t	**bpp,
2137 	int		whichfork)
2138 {
2139 	return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0);
2140 }
2141 
2142 /*
2143  * Get a buffer for the dir/attr block, fill in the contents.
2144  */
2145 int
xfs_da_read_buf(xfs_trans_t * trans,xfs_inode_t * dp,xfs_dablk_t bno,xfs_daddr_t mappedbno,xfs_dabuf_t ** bpp,int whichfork)2146 xfs_da_read_buf(
2147 	xfs_trans_t	*trans,
2148 	xfs_inode_t	*dp,
2149 	xfs_dablk_t	bno,
2150 	xfs_daddr_t		mappedbno,
2151 	xfs_dabuf_t	**bpp,
2152 	int		whichfork)
2153 {
2154 	return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1);
2155 }
2156 
2157 /*
2158  * Readahead the dir/attr block.
2159  */
2160 xfs_daddr_t
xfs_da_reada_buf(xfs_trans_t * trans,xfs_inode_t * dp,xfs_dablk_t bno,int whichfork)2161 xfs_da_reada_buf(
2162 	xfs_trans_t	*trans,
2163 	xfs_inode_t	*dp,
2164 	xfs_dablk_t	bno,
2165 	int		whichfork)
2166 {
2167 	xfs_daddr_t		rval;
2168 
2169 	rval = -1;
2170 	if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3))
2171 		return -1;
2172 	else
2173 		return rval;
2174 }
2175 
2176 kmem_zone_t *xfs_da_state_zone;	/* anchor for state struct zone */
2177 kmem_zone_t *xfs_dabuf_zone;		/* dabuf zone */
2178 
2179 /*
2180  * Allocate a dir-state structure.
2181  * We don't put them on the stack since they're large.
2182  */
2183 xfs_da_state_t *
xfs_da_state_alloc(void)2184 xfs_da_state_alloc(void)
2185 {
2186 	return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
2187 }
2188 
2189 /*
2190  * Kill the altpath contents of a da-state structure.
2191  */
2192 STATIC void
xfs_da_state_kill_altpath(xfs_da_state_t * state)2193 xfs_da_state_kill_altpath(xfs_da_state_t *state)
2194 {
2195 	int	i;
2196 
2197 	for (i = 0; i < state->altpath.active; i++) {
2198 		if (state->altpath.blk[i].bp) {
2199 			if (state->altpath.blk[i].bp != state->path.blk[i].bp)
2200 				xfs_da_buf_done(state->altpath.blk[i].bp);
2201 			state->altpath.blk[i].bp = NULL;
2202 		}
2203 	}
2204 	state->altpath.active = 0;
2205 }
2206 
2207 /*
2208  * Free a da-state structure.
2209  */
2210 void
xfs_da_state_free(xfs_da_state_t * state)2211 xfs_da_state_free(xfs_da_state_t *state)
2212 {
2213 	int	i;
2214 
2215 	xfs_da_state_kill_altpath(state);
2216 	for (i = 0; i < state->path.active; i++) {
2217 		if (state->path.blk[i].bp)
2218 			xfs_da_buf_done(state->path.blk[i].bp);
2219 	}
2220 	if (state->extravalid && state->extrablk.bp)
2221 		xfs_da_buf_done(state->extrablk.bp);
2222 #ifdef DEBUG
2223 	memset((char *)state, 0, sizeof(*state));
2224 #endif /* DEBUG */
2225 	kmem_zone_free(xfs_da_state_zone, state);
2226 }
2227 
2228 /*
2229  * Create a dabuf.
2230  */
2231 /* ARGSUSED */
2232 STATIC xfs_dabuf_t *
xfs_da_buf_make(int nbuf,xfs_buf_t ** bps)2233 xfs_da_buf_make(int nbuf, xfs_buf_t **bps)
2234 {
2235 	xfs_buf_t	*bp;
2236 	xfs_dabuf_t	*dabuf;
2237 	int		i;
2238 	int		off;
2239 
2240 	if (nbuf == 1)
2241 		dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS);
2242 	else
2243 		dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
2244 	dabuf->dirty = 0;
2245 	if (nbuf == 1) {
2246 		dabuf->nbuf = 1;
2247 		bp = bps[0];
2248 		dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
2249 		dabuf->data = bp->b_addr;
2250 		dabuf->bps[0] = bp;
2251 	} else {
2252 		dabuf->nbuf = nbuf;
2253 		for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) {
2254 			dabuf->bps[i] = bp = bps[i];
2255 			dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
2256 		}
2257 		dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
2258 		for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2259 			bp = bps[i];
2260 			memcpy((char *)dabuf->data + off, bp->b_addr,
2261 				XFS_BUF_COUNT(bp));
2262 		}
2263 	}
2264 	return dabuf;
2265 }
2266 
2267 /*
2268  * Un-dirty a dabuf.
2269  */
2270 STATIC void
xfs_da_buf_clean(xfs_dabuf_t * dabuf)2271 xfs_da_buf_clean(xfs_dabuf_t *dabuf)
2272 {
2273 	xfs_buf_t	*bp;
2274 	int		i;
2275 	int		off;
2276 
2277 	if (dabuf->dirty) {
2278 		ASSERT(dabuf->nbuf > 1);
2279 		dabuf->dirty = 0;
2280 		for (i = off = 0; i < dabuf->nbuf;
2281 				i++, off += XFS_BUF_COUNT(bp)) {
2282 			bp = dabuf->bps[i];
2283 			memcpy(bp->b_addr, dabuf->data + off,
2284 						XFS_BUF_COUNT(bp));
2285 		}
2286 	}
2287 }
2288 
2289 /*
2290  * Release a dabuf.
2291  */
2292 void
xfs_da_buf_done(xfs_dabuf_t * dabuf)2293 xfs_da_buf_done(xfs_dabuf_t *dabuf)
2294 {
2295 	ASSERT(dabuf);
2296 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2297 	if (dabuf->dirty)
2298 		xfs_da_buf_clean(dabuf);
2299 	if (dabuf->nbuf > 1) {
2300 		kmem_free(dabuf->data);
2301 		kmem_free(dabuf);
2302 	} else {
2303 		kmem_zone_free(xfs_dabuf_zone, dabuf);
2304 	}
2305 }
2306 
2307 /*
2308  * Log transaction from a dabuf.
2309  */
2310 void
xfs_da_log_buf(xfs_trans_t * tp,xfs_dabuf_t * dabuf,uint first,uint last)2311 xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
2312 {
2313 	xfs_buf_t	*bp;
2314 	uint		f;
2315 	int		i;
2316 	uint		l;
2317 	int		off;
2318 
2319 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2320 	if (dabuf->nbuf == 1) {
2321 		ASSERT(dabuf->data == dabuf->bps[0]->b_addr);
2322 		xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
2323 		return;
2324 	}
2325 	dabuf->dirty = 1;
2326 	ASSERT(first <= last);
2327 	for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2328 		bp = dabuf->bps[i];
2329 		f = off;
2330 		l = f + XFS_BUF_COUNT(bp) - 1;
2331 		if (f < first)
2332 			f = first;
2333 		if (l > last)
2334 			l = last;
2335 		if (f <= l)
2336 			xfs_trans_log_buf(tp, bp, f - off, l - off);
2337 		/*
2338 		 * B_DONE is set by xfs_trans_log buf.
2339 		 * If we don't set it on a new buffer (get not read)
2340 		 * then if we don't put anything in the buffer it won't
2341 		 * be set, and at commit it it released into the cache,
2342 		 * and then a read will fail.
2343 		 */
2344 		else if (!(XFS_BUF_ISDONE(bp)))
2345 		  XFS_BUF_DONE(bp);
2346 	}
2347 	ASSERT(last < off);
2348 }
2349 
2350 /*
2351  * Release dabuf from a transaction.
2352  * Have to free up the dabuf before the buffers are released,
2353  * since the synchronization on the dabuf is really the lock on the buffer.
2354  */
2355 void
xfs_da_brelse(xfs_trans_t * tp,xfs_dabuf_t * dabuf)2356 xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2357 {
2358 	xfs_buf_t	*bp;
2359 	xfs_buf_t	**bplist;
2360 	int		i;
2361 	int		nbuf;
2362 
2363 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2364 	if ((nbuf = dabuf->nbuf) == 1) {
2365 		bplist = &bp;
2366 		bp = dabuf->bps[0];
2367 	} else {
2368 		bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2369 		memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2370 	}
2371 	xfs_da_buf_done(dabuf);
2372 	for (i = 0; i < nbuf; i++)
2373 		xfs_trans_brelse(tp, bplist[i]);
2374 	if (bplist != &bp)
2375 		kmem_free(bplist);
2376 }
2377 
2378 /*
2379  * Invalidate dabuf from a transaction.
2380  */
2381 void
xfs_da_binval(xfs_trans_t * tp,xfs_dabuf_t * dabuf)2382 xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2383 {
2384 	xfs_buf_t	*bp;
2385 	xfs_buf_t	**bplist;
2386 	int		i;
2387 	int		nbuf;
2388 
2389 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2390 	if ((nbuf = dabuf->nbuf) == 1) {
2391 		bplist = &bp;
2392 		bp = dabuf->bps[0];
2393 	} else {
2394 		bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2395 		memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2396 	}
2397 	xfs_da_buf_done(dabuf);
2398 	for (i = 0; i < nbuf; i++)
2399 		xfs_trans_binval(tp, bplist[i]);
2400 	if (bplist != &bp)
2401 		kmem_free(bplist);
2402 }
2403 
2404 /*
2405  * Get the first daddr from a dabuf.
2406  */
2407 xfs_daddr_t
xfs_da_blkno(xfs_dabuf_t * dabuf)2408 xfs_da_blkno(xfs_dabuf_t *dabuf)
2409 {
2410 	ASSERT(dabuf->nbuf);
2411 	ASSERT(dabuf->data);
2412 	return XFS_BUF_ADDR(dabuf->bps[0]);
2413 }
2414