xref: /linux/fs/ocfs2/namei.c (revision 944d79559d154c12becde0dab327016cf438f46c)
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * namei.c
5  *
6  * Create and rename file, directory, symlinks
7  *
8  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
9  *
10  *  Portions of this code from linux/fs/ext3/dir.c
11  *
12  *  Copyright (C) 1992, 1993, 1994, 1995
13  *  Remy Card (card@masi.ibp.fr)
14  *  Laboratoire MASI - Institut Blaise pascal
15  *  Universite Pierre et Marie Curie (Paris VI)
16  *
17  *   from
18  *
19  *   linux/fs/minix/dir.c
20  *
21  *   Copyright (C) 1991, 1992 Linux Torvalds
22  *
23  * This program is free software; you can redistribute it and/or
24  * modify it under the terms of the GNU General Public
25  * License as published by the Free Software Foundation; either
26  * version 2 of the License, or (at your option) any later version.
27  *
28  * This program is distributed in the hope that it will be useful,
29  * but WITHOUT ANY WARRANTY; without even the implied warranty of
30  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
31  * General Public License for more details.
32  *
33  * You should have received a copy of the GNU General Public
34  * License along with this program; if not, write to the
35  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
36  * Boston, MA 021110-1307, USA.
37  */
38 
39 #include <linux/fs.h>
40 #include <linux/types.h>
41 #include <linux/slab.h>
42 #include <linux/highmem.h>
43 
44 #define MLOG_MASK_PREFIX ML_NAMEI
45 #include <cluster/masklog.h>
46 
47 #include "ocfs2.h"
48 
49 #include "alloc.h"
50 #include "dcache.h"
51 #include "dir.h"
52 #include "dlmglue.h"
53 #include "extent_map.h"
54 #include "file.h"
55 #include "inode.h"
56 #include "journal.h"
57 #include "namei.h"
58 #include "suballoc.h"
59 #include "symlink.h"
60 #include "sysfile.h"
61 #include "uptodate.h"
62 #include "vote.h"
63 
64 #include "buffer_head_io.h"
65 
66 #define NAMEI_RA_CHUNKS  2
67 #define NAMEI_RA_BLOCKS  4
68 #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
69 #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
70 
71 static int inline ocfs2_search_dirblock(struct buffer_head *bh,
72 					struct inode *dir,
73 					const char *name, int namelen,
74 					unsigned long offset,
75 					struct ocfs2_dir_entry **res_dir);
76 
77 static int ocfs2_delete_entry(struct ocfs2_journal_handle *handle,
78 			      struct inode *dir,
79 			      struct ocfs2_dir_entry *de_del,
80 			      struct buffer_head *bh);
81 
82 static int __ocfs2_add_entry(struct ocfs2_journal_handle *handle,
83 			     struct inode *dir,
84 			     const char *name, int namelen,
85 			     struct inode *inode, u64 blkno,
86 			     struct buffer_head *parent_fe_bh,
87 			     struct buffer_head *insert_bh);
88 
89 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
90 			      struct inode *dir,
91 			      struct dentry *dentry, int mode,
92 			      dev_t dev,
93 			      struct buffer_head **new_fe_bh,
94 			      struct buffer_head *parent_fe_bh,
95 			      struct ocfs2_journal_handle *handle,
96 			      struct inode **ret_inode,
97 			      struct ocfs2_alloc_context *inode_ac);
98 
99 static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
100 			      struct ocfs2_journal_handle *handle,
101 			      struct inode *parent,
102 			      struct inode *inode,
103 			      struct buffer_head *fe_bh,
104 			      struct ocfs2_alloc_context *data_ac);
105 
106 static int ocfs2_double_lock(struct ocfs2_super *osb,
107 			     struct ocfs2_journal_handle *handle,
108 			     struct buffer_head **bh1,
109 			     struct inode *inode1,
110 			     struct buffer_head **bh2,
111 			     struct inode *inode2);
112 
113 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
114 				    struct ocfs2_journal_handle *handle,
115 				    struct inode *inode,
116 				    char *name,
117 				    struct buffer_head **de_bh);
118 
119 static int ocfs2_orphan_add(struct ocfs2_super *osb,
120 			    struct ocfs2_journal_handle *handle,
121 			    struct inode *inode,
122 			    struct ocfs2_dinode *fe,
123 			    char *name,
124 			    struct buffer_head *de_bh);
125 
126 static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
127 				     struct ocfs2_journal_handle *handle,
128 				     struct inode *inode,
129 				     const char *symname);
130 
131 static inline int ocfs2_add_entry(struct ocfs2_journal_handle *handle,
132 				  struct dentry *dentry,
133 				  struct inode *inode, u64 blkno,
134 				  struct buffer_head *parent_fe_bh,
135 				  struct buffer_head *insert_bh)
136 {
137 	return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
138 				 dentry->d_name.name, dentry->d_name.len,
139 				 inode, blkno, parent_fe_bh, insert_bh);
140 }
141 
142 /* An orphan dir name is an 8 byte value, printed as a hex string */
143 #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
144 
145 static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
146 				   struct nameidata *nd)
147 {
148 	int status;
149 	u64 blkno;
150 	struct buffer_head *dirent_bh = NULL;
151 	struct inode *inode = NULL;
152 	struct dentry *ret;
153 	struct ocfs2_dir_entry *dirent;
154 	struct ocfs2_inode_info *oi;
155 
156 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
157 		   dentry->d_name.len, dentry->d_name.name);
158 
159 	if (dentry->d_name.len > OCFS2_MAX_FILENAME_LEN) {
160 		ret = ERR_PTR(-ENAMETOOLONG);
161 		goto bail;
162 	}
163 
164 	mlog(0, "find name %.*s in directory %"MLFu64"\n", dentry->d_name.len,
165 	     dentry->d_name.name, OCFS2_I(dir)->ip_blkno);
166 
167 	status = ocfs2_meta_lock(dir, NULL, NULL, 0);
168 	if (status < 0) {
169 		if (status != -ENOENT)
170 			mlog_errno(status);
171 		ret = ERR_PTR(status);
172 		goto bail;
173 	}
174 
175 	status = ocfs2_find_files_on_disk(dentry->d_name.name,
176 					  dentry->d_name.len, &blkno,
177 					  dir, &dirent_bh, &dirent);
178 	if (status < 0)
179 		goto bail_add;
180 
181 	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
182 	if (IS_ERR(inode)) {
183 		mlog(ML_ERROR, "Unable to create inode %"MLFu64"\n", blkno);
184 		ret = ERR_PTR(-EACCES);
185 		goto bail_unlock;
186 	}
187 
188 	oi = OCFS2_I(inode);
189 	/* Clear any orphaned state... If we were able to look up the
190 	 * inode from a directory, it certainly can't be orphaned. We
191 	 * might have the bad state from a node which intended to
192 	 * orphan this inode but crashed before it could commit the
193 	 * unlink. */
194 	spin_lock(&oi->ip_lock);
195 	oi->ip_flags &= ~OCFS2_INODE_MAYBE_ORPHANED;
196 	oi->ip_orphaned_slot = OCFS2_INVALID_SLOT;
197 	spin_unlock(&oi->ip_lock);
198 
199 bail_add:
200 
201 	dentry->d_op = &ocfs2_dentry_ops;
202 	ret = d_splice_alias(inode, dentry);
203 
204 bail_unlock:
205 	/* Don't drop the cluster lock until *after* the d_add --
206 	 * unlink on another node will message us to remove that
207 	 * dentry under this lock so otherwise we can race this with
208 	 * the vote thread and have a stale dentry. */
209 	ocfs2_meta_unlock(dir, 0);
210 
211 bail:
212 	if (dirent_bh)
213 		brelse(dirent_bh);
214 
215 	mlog_exit_ptr(ret);
216 
217 	return ret;
218 }
219 
220 static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
221 			      struct ocfs2_journal_handle *handle,
222 			      struct inode *parent,
223 			      struct inode *inode,
224 			      struct buffer_head *fe_bh,
225 			      struct ocfs2_alloc_context *data_ac)
226 {
227 	int status;
228 	struct buffer_head *new_bh = NULL;
229 	struct ocfs2_dir_entry *de = NULL;
230 
231 	mlog_entry_void();
232 
233 	status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
234 				     data_ac, NULL, &new_bh);
235 	if (status < 0) {
236 		mlog_errno(status);
237 		goto bail;
238 	}
239 
240 	ocfs2_set_new_buffer_uptodate(inode, new_bh);
241 
242 	status = ocfs2_journal_access(handle, inode, new_bh,
243 				      OCFS2_JOURNAL_ACCESS_CREATE);
244 	if (status < 0) {
245 		mlog_errno(status);
246 		goto bail;
247 	}
248 	memset(new_bh->b_data, 0, osb->sb->s_blocksize);
249 
250 	de = (struct ocfs2_dir_entry *) new_bh->b_data;
251 	de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
252 	de->name_len = 1;
253 	de->rec_len =
254 		cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
255 	strcpy(de->name, ".");
256 	ocfs2_set_de_type(de, S_IFDIR);
257 	de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len));
258 	de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
259 	de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize -
260 				  OCFS2_DIR_REC_LEN(1));
261 	de->name_len = 2;
262 	strcpy(de->name, "..");
263 	ocfs2_set_de_type(de, S_IFDIR);
264 
265 	status = ocfs2_journal_dirty(handle, new_bh);
266 	if (status < 0) {
267 		mlog_errno(status);
268 		goto bail;
269 	}
270 
271 	i_size_write(inode, inode->i_sb->s_blocksize);
272 	inode->i_nlink = 2;
273 	inode->i_blocks = ocfs2_align_bytes_to_sectors(inode->i_sb->s_blocksize);
274 	status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
275 	if (status < 0) {
276 		mlog_errno(status);
277 		goto bail;
278 	}
279 
280 	status = 0;
281 bail:
282 	if (new_bh)
283 		brelse(new_bh);
284 
285 	mlog_exit(status);
286 	return status;
287 }
288 
289 static int ocfs2_mknod(struct inode *dir,
290 		       struct dentry *dentry,
291 		       int mode,
292 		       dev_t dev)
293 {
294 	int status = 0;
295 	struct buffer_head *parent_fe_bh = NULL;
296 	struct ocfs2_journal_handle *handle = NULL;
297 	struct ocfs2_super *osb;
298 	struct ocfs2_dinode *dirfe;
299 	struct buffer_head *new_fe_bh = NULL;
300 	struct buffer_head *de_bh = NULL;
301 	struct inode *inode = NULL;
302 	struct ocfs2_alloc_context *inode_ac = NULL;
303 	struct ocfs2_alloc_context *data_ac = NULL;
304 
305 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
306 		   (unsigned long)dev, dentry->d_name.len,
307 		   dentry->d_name.name);
308 
309 	/* get our super block */
310 	osb = OCFS2_SB(dir->i_sb);
311 
312 	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
313 		mlog(ML_ERROR, "inode %"MLFu64" has i_nlink of %u\n",
314 		     OCFS2_I(dir)->ip_blkno, dir->i_nlink);
315 		status = -EMLINK;
316 		goto leave;
317 	}
318 
319 	handle = ocfs2_alloc_handle(osb);
320 	if (handle == NULL) {
321 		status = -ENOMEM;
322 		mlog_errno(status);
323 		goto leave;
324 	}
325 
326 	status = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
327 	if (status < 0) {
328 		if (status != -ENOENT)
329 			mlog_errno(status);
330 		goto leave;
331 	}
332 
333 	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
334 	if (!dirfe->i_links_count) {
335 		/* can't make a file in a deleted directory. */
336 		status = -ENOENT;
337 		goto leave;
338 	}
339 
340 	status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
341 					   dentry->d_name.len);
342 	if (status)
343 		goto leave;
344 
345 	/* get a spot inside the dir. */
346 	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
347 					      dentry->d_name.name,
348 					      dentry->d_name.len, &de_bh);
349 	if (status < 0) {
350 		mlog_errno(status);
351 		goto leave;
352 	}
353 
354 	/* reserve an inode spot */
355 	status = ocfs2_reserve_new_inode(osb, handle, &inode_ac);
356 	if (status < 0) {
357 		if (status != -ENOSPC)
358 			mlog_errno(status);
359 		goto leave;
360 	}
361 
362 	/* are we making a directory? If so, reserve a cluster for his
363 	 * 1st extent. */
364 	if (S_ISDIR(mode)) {
365 		status = ocfs2_reserve_clusters(osb, handle, 1, &data_ac);
366 		if (status < 0) {
367 			if (status != -ENOSPC)
368 				mlog_errno(status);
369 			goto leave;
370 		}
371 	}
372 
373 	handle = ocfs2_start_trans(osb, handle, OCFS2_MKNOD_CREDITS);
374 	if (IS_ERR(handle)) {
375 		status = PTR_ERR(handle);
376 		handle = NULL;
377 		mlog_errno(status);
378 		goto leave;
379 	}
380 
381 	/* do the real work now. */
382 	status = ocfs2_mknod_locked(osb, dir, dentry, mode, dev,
383 				    &new_fe_bh, parent_fe_bh, handle,
384 				    &inode, inode_ac);
385 	if (status < 0) {
386 		mlog_errno(status);
387 		goto leave;
388 	}
389 
390 	if (S_ISDIR(mode)) {
391 		status = ocfs2_fill_new_dir(osb, handle, dir, inode,
392 					    new_fe_bh, data_ac);
393 		if (status < 0) {
394 			mlog_errno(status);
395 			goto leave;
396 		}
397 
398 		status = ocfs2_journal_access(handle, dir, parent_fe_bh,
399 					      OCFS2_JOURNAL_ACCESS_WRITE);
400 		if (status < 0) {
401 			mlog_errno(status);
402 			goto leave;
403 		}
404 		le16_add_cpu(&dirfe->i_links_count, 1);
405 		status = ocfs2_journal_dirty(handle, parent_fe_bh);
406 		if (status < 0) {
407 			mlog_errno(status);
408 			goto leave;
409 		}
410 		dir->i_nlink++;
411 	}
412 
413 	status = ocfs2_add_entry(handle, dentry, inode,
414 				 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
415 				 de_bh);
416 	if (status < 0) {
417 		mlog_errno(status);
418 		goto leave;
419 	}
420 
421 	insert_inode_hash(inode);
422 	dentry->d_op = &ocfs2_dentry_ops;
423 	d_instantiate(dentry, inode);
424 	status = 0;
425 leave:
426 	if (handle)
427 		ocfs2_commit_trans(handle);
428 
429 	if (status == -ENOSPC)
430 		mlog(0, "Disk is full\n");
431 
432 	if (new_fe_bh)
433 		brelse(new_fe_bh);
434 
435 	if (de_bh)
436 		brelse(de_bh);
437 
438 	if (parent_fe_bh)
439 		brelse(parent_fe_bh);
440 
441 	if ((status < 0) && inode)
442 		iput(inode);
443 
444 	if (inode_ac)
445 		ocfs2_free_alloc_context(inode_ac);
446 
447 	if (data_ac)
448 		ocfs2_free_alloc_context(data_ac);
449 
450 	mlog_exit(status);
451 
452 	return status;
453 }
454 
455 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
456 			      struct inode *dir,
457 			      struct dentry *dentry, int mode,
458 			      dev_t dev,
459 			      struct buffer_head **new_fe_bh,
460 			      struct buffer_head *parent_fe_bh,
461 			      struct ocfs2_journal_handle *handle,
462 			      struct inode **ret_inode,
463 			      struct ocfs2_alloc_context *inode_ac)
464 {
465 	int status = 0;
466 	struct ocfs2_dinode *fe = NULL;
467 	struct ocfs2_extent_list *fel;
468 	u64 fe_blkno = 0;
469 	u16 suballoc_bit;
470 	struct inode *inode = NULL;
471 
472 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
473 		   (unsigned long)dev, dentry->d_name.len,
474 		   dentry->d_name.name);
475 
476 	*new_fe_bh = NULL;
477 	*ret_inode = NULL;
478 
479 	status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit,
480 				       &fe_blkno);
481 	if (status < 0) {
482 		mlog_errno(status);
483 		goto leave;
484 	}
485 
486 	inode = new_inode(dir->i_sb);
487 	if (IS_ERR(inode)) {
488 		status = PTR_ERR(inode);
489 		mlog(ML_ERROR, "new_inode failed!\n");
490 		goto leave;
491 	}
492 
493 	/* populate as many fields early on as possible - many of
494 	 * these are used by the support functions here and in
495 	 * callers. */
496 	inode->i_ino = ino_from_blkno(osb->sb, fe_blkno);
497 	OCFS2_I(inode)->ip_blkno = fe_blkno;
498 	if (S_ISDIR(mode))
499 		inode->i_nlink = 2;
500 	else
501 		inode->i_nlink = 1;
502 	inode->i_mode = mode;
503 	spin_lock(&osb->osb_lock);
504 	inode->i_generation = osb->s_next_generation++;
505 	spin_unlock(&osb->osb_lock);
506 
507 	*new_fe_bh = sb_getblk(osb->sb, fe_blkno);
508 	if (!*new_fe_bh) {
509 		status = -EIO;
510 		mlog_errno(status);
511 		goto leave;
512 	}
513 	ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh);
514 
515 	status = ocfs2_journal_access(handle, inode, *new_fe_bh,
516 				      OCFS2_JOURNAL_ACCESS_CREATE);
517 	if (status < 0) {
518 		mlog_errno(status);
519 		goto leave;
520 	}
521 
522 	fe = (struct ocfs2_dinode *) (*new_fe_bh)->b_data;
523 	memset(fe, 0, osb->sb->s_blocksize);
524 
525 	fe->i_generation = cpu_to_le32(inode->i_generation);
526 	fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
527 	fe->i_blkno = cpu_to_le64(fe_blkno);
528 	fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
529 	fe->i_suballoc_slot = cpu_to_le16(osb->slot_num);
530 	fe->i_uid = cpu_to_le32(current->fsuid);
531 	if (dir->i_mode & S_ISGID) {
532 		fe->i_gid = cpu_to_le32(dir->i_gid);
533 		if (S_ISDIR(mode))
534 			mode |= S_ISGID;
535 	} else
536 		fe->i_gid = cpu_to_le32(current->fsgid);
537 	fe->i_mode = cpu_to_le16(mode);
538 	if (S_ISCHR(mode) || S_ISBLK(mode))
539 		fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
540 
541 	fe->i_links_count = cpu_to_le16(inode->i_nlink);
542 
543 	fe->i_last_eb_blk = 0;
544 	strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE);
545 	le32_add_cpu(&fe->i_flags, OCFS2_VALID_FL);
546 	fe->i_atime = fe->i_ctime = fe->i_mtime =
547 		cpu_to_le64(CURRENT_TIME.tv_sec);
548 	fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec =
549 		cpu_to_le32(CURRENT_TIME.tv_nsec);
550 	fe->i_dtime = 0;
551 
552 	fel = &fe->id2.i_list;
553 	fel->l_tree_depth = 0;
554 	fel->l_next_free_rec = 0;
555 	fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
556 
557 	status = ocfs2_journal_dirty(handle, *new_fe_bh);
558 	if (status < 0) {
559 		mlog_errno(status);
560 		goto leave;
561 	}
562 
563 	if (ocfs2_populate_inode(inode, fe, 1) < 0) {
564 		mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
565 		     "i_blkno=%"MLFu64", i_ino=%lu\n",
566 		     (unsigned long long) (*new_fe_bh)->b_blocknr,
567 		     fe->i_blkno, inode->i_ino);
568 		BUG();
569 	}
570 
571 	ocfs2_inode_set_new(osb, inode);
572 	status = ocfs2_create_new_inode_locks(inode);
573 	if (status < 0)
574 		mlog_errno(status);
575 
576 	status = 0; /* error in ocfs2_create_new_inode_locks is not
577 		     * critical */
578 
579 	*ret_inode = inode;
580 leave:
581 	if (status < 0) {
582 		if (*new_fe_bh) {
583 			brelse(*new_fe_bh);
584 			*new_fe_bh = NULL;
585 		}
586 		if (inode)
587 			iput(inode);
588 	}
589 
590 	mlog_exit(status);
591 	return status;
592 }
593 
594 static int ocfs2_mkdir(struct inode *dir,
595 		       struct dentry *dentry,
596 		       int mode)
597 {
598 	int ret;
599 
600 	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode,
601 		   dentry->d_name.len, dentry->d_name.name);
602 	ret = ocfs2_mknod(dir, dentry, mode | S_IFDIR, 0);
603 	mlog_exit(ret);
604 
605 	return ret;
606 }
607 
608 static int ocfs2_create(struct inode *dir,
609 			struct dentry *dentry,
610 			int mode,
611 			struct nameidata *nd)
612 {
613 	int ret;
614 
615 	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode,
616 		   dentry->d_name.len, dentry->d_name.name);
617 	ret = ocfs2_mknod(dir, dentry, mode | S_IFREG, 0);
618 	mlog_exit(ret);
619 
620 	return ret;
621 }
622 
623 static int ocfs2_link(struct dentry *old_dentry,
624 		      struct inode *dir,
625 		      struct dentry *dentry)
626 {
627 	struct ocfs2_journal_handle *handle = NULL;
628 	struct inode *inode = old_dentry->d_inode;
629 	int err;
630 	struct buffer_head *fe_bh = NULL;
631 	struct buffer_head *parent_fe_bh = NULL;
632 	struct buffer_head *de_bh = NULL;
633 	struct ocfs2_dinode *fe = NULL;
634 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
635 
636 	mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino,
637 		   old_dentry->d_name.len, old_dentry->d_name.name,
638 		   dentry->d_name.len, dentry->d_name.name);
639 
640 	if (S_ISDIR(inode->i_mode)) {
641 		err = -EPERM;
642 		goto bail;
643 	}
644 
645 	if (inode->i_nlink >= OCFS2_LINK_MAX) {
646 		err = -EMLINK;
647 		goto bail;
648 	}
649 
650 	handle = ocfs2_alloc_handle(osb);
651 	if (handle == NULL) {
652 		err = -ENOMEM;
653 		goto bail;
654 	}
655 
656 	err = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
657 	if (err < 0) {
658 		if (err != -ENOENT)
659 			mlog_errno(err);
660 		goto bail;
661 	}
662 
663 	err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
664 					dentry->d_name.len);
665 	if (err)
666 		goto bail;
667 
668 	err = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
669 					   dentry->d_name.name,
670 					   dentry->d_name.len, &de_bh);
671 	if (err < 0) {
672 		mlog_errno(err);
673 		goto bail;
674 	}
675 
676 	err = ocfs2_meta_lock(inode, handle, &fe_bh, 1);
677 	if (err < 0) {
678 		if (err != -ENOENT)
679 			mlog_errno(err);
680 		goto bail;
681 	}
682 
683 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
684 	if (le16_to_cpu(fe->i_links_count) >= OCFS2_LINK_MAX) {
685 		err = -EMLINK;
686 		goto bail;
687 	}
688 
689 	handle = ocfs2_start_trans(osb, handle, OCFS2_LINK_CREDITS);
690 	if (IS_ERR(handle)) {
691 		err = PTR_ERR(handle);
692 		handle = NULL;
693 		mlog_errno(err);
694 		goto bail;
695 	}
696 
697 	err = ocfs2_journal_access(handle, inode, fe_bh,
698 				   OCFS2_JOURNAL_ACCESS_WRITE);
699 	if (err < 0) {
700 		mlog_errno(err);
701 		goto bail;
702 	}
703 
704 	inode->i_nlink++;
705 	inode->i_ctime = CURRENT_TIME;
706 	fe->i_links_count = cpu_to_le16(inode->i_nlink);
707 	fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
708 	fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
709 
710 	err = ocfs2_journal_dirty(handle, fe_bh);
711 	if (err < 0) {
712 		le16_add_cpu(&fe->i_links_count, -1);
713 		inode->i_nlink--;
714 		mlog_errno(err);
715 		goto bail;
716 	}
717 
718 	err = ocfs2_add_entry(handle, dentry, inode,
719 			      OCFS2_I(inode)->ip_blkno,
720 			      parent_fe_bh, de_bh);
721 	if (err) {
722 		le16_add_cpu(&fe->i_links_count, -1);
723 		inode->i_nlink--;
724 		mlog_errno(err);
725 		goto bail;
726 	}
727 
728 	atomic_inc(&inode->i_count);
729 	dentry->d_op = &ocfs2_dentry_ops;
730 	d_instantiate(dentry, inode);
731 bail:
732 	if (handle)
733 		ocfs2_commit_trans(handle);
734 	if (de_bh)
735 		brelse(de_bh);
736 	if (fe_bh)
737 		brelse(fe_bh);
738 	if (parent_fe_bh)
739 		brelse(parent_fe_bh);
740 
741 	mlog_exit(err);
742 
743 	return err;
744 }
745 
746 static int ocfs2_unlink(struct inode *dir,
747 			struct dentry *dentry)
748 {
749 	int status;
750 	unsigned int saved_nlink = 0;
751 	struct inode *inode = dentry->d_inode;
752 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
753 	u64 blkno;
754 	struct ocfs2_dinode *fe = NULL;
755 	struct buffer_head *fe_bh = NULL;
756 	struct buffer_head *parent_node_bh = NULL;
757 	struct ocfs2_journal_handle *handle = NULL;
758 	struct ocfs2_dir_entry *dirent = NULL;
759 	struct buffer_head *dirent_bh = NULL;
760 	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
761 	struct buffer_head *orphan_entry_bh = NULL;
762 
763 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
764 		   dentry->d_name.len, dentry->d_name.name);
765 
766 	BUG_ON(dentry->d_parent->d_inode != dir);
767 
768 	mlog(0, "ino = %"MLFu64"\n", OCFS2_I(inode)->ip_blkno);
769 
770 	if (inode == osb->root_inode) {
771 		mlog(0, "Cannot delete the root directory\n");
772 		status = -EPERM;
773 		goto leave;
774 	}
775 
776 	handle = ocfs2_alloc_handle(osb);
777 	if (handle == NULL) {
778 		status = -ENOMEM;
779 		mlog_errno(status);
780 		goto leave;
781 	}
782 
783 	status = ocfs2_meta_lock(dir, handle, &parent_node_bh, 1);
784 	if (status < 0) {
785 		if (status != -ENOENT)
786 			mlog_errno(status);
787 		goto leave;
788 	}
789 
790 	status = ocfs2_find_files_on_disk(dentry->d_name.name,
791 					  dentry->d_name.len, &blkno,
792 					  dir, &dirent_bh, &dirent);
793 	if (status < 0) {
794 		if (status != -ENOENT)
795 			mlog_errno(status);
796 		goto leave;
797 	}
798 
799 	if (OCFS2_I(inode)->ip_blkno != blkno) {
800 		status = -ENOENT;
801 
802 		mlog(0, "ip_blkno (%"MLFu64") != dirent blkno (%"MLFu64") "
803 		     "ip_flags = %x\n", OCFS2_I(inode)->ip_blkno, blkno,
804 		     OCFS2_I(inode)->ip_flags);
805 		goto leave;
806 	}
807 
808 	status = ocfs2_meta_lock(inode, handle, &fe_bh, 1);
809 	if (status < 0) {
810 		if (status != -ENOENT)
811 			mlog_errno(status);
812 		goto leave;
813 	}
814 
815 	if (S_ISDIR(inode->i_mode)) {
816 	       	if (!ocfs2_empty_dir(inode)) {
817 			status = -ENOTEMPTY;
818 			goto leave;
819 		} else if (inode->i_nlink != 2) {
820 			status = -ENOTEMPTY;
821 			goto leave;
822 		}
823 	}
824 
825 	/* There are still a few steps left until we can consider the
826 	 * unlink to have succeeded. Save off nlink here before
827 	 * modification so we can set it back in case we hit an issue
828 	 * before commit. */
829 	saved_nlink = inode->i_nlink;
830 	if (S_ISDIR(inode->i_mode))
831 		inode->i_nlink = 0;
832 	else
833 		inode->i_nlink--;
834 
835 	status = ocfs2_request_unlink_vote(inode, dentry,
836 					   (unsigned int) inode->i_nlink);
837 	if (status < 0) {
838 		/* This vote should succeed under all normal
839 		 * circumstances. */
840 		mlog_errno(status);
841 		goto leave;
842 	}
843 
844 	if (!inode->i_nlink) {
845 		status = ocfs2_prepare_orphan_dir(osb, handle, inode,
846 						  orphan_name,
847 						  &orphan_entry_bh);
848 		if (status < 0) {
849 			mlog_errno(status);
850 			goto leave;
851 		}
852 	}
853 
854 	handle = ocfs2_start_trans(osb, handle, OCFS2_UNLINK_CREDITS);
855 	if (IS_ERR(handle)) {
856 		status = PTR_ERR(handle);
857 		handle = NULL;
858 		mlog_errno(status);
859 		goto leave;
860 	}
861 
862 	status = ocfs2_journal_access(handle, inode, fe_bh,
863 				      OCFS2_JOURNAL_ACCESS_WRITE);
864 	if (status < 0) {
865 		mlog_errno(status);
866 		goto leave;
867 	}
868 
869 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
870 
871 	if (!inode->i_nlink) {
872 		status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name,
873 					  orphan_entry_bh);
874 		if (status < 0) {
875 			mlog_errno(status);
876 			goto leave;
877 		}
878 	}
879 
880 	/* delete the name from the parent dir */
881 	status = ocfs2_delete_entry(handle, dir, dirent, dirent_bh);
882 	if (status < 0) {
883 		mlog_errno(status);
884 		goto leave;
885 	}
886 
887 	/* We can set nlink on the dinode now. clear the saved version
888 	 * so that it doesn't get set later. */
889 	fe->i_links_count = cpu_to_le16(inode->i_nlink);
890 	saved_nlink = 0;
891 
892 	status = ocfs2_journal_dirty(handle, fe_bh);
893 	if (status < 0) {
894 		mlog_errno(status);
895 		goto leave;
896 	}
897 
898 	if (S_ISDIR(inode->i_mode)) {
899 		dir->i_nlink--;
900 		status = ocfs2_mark_inode_dirty(handle, dir,
901 						parent_node_bh);
902 		if (status < 0) {
903 			mlog_errno(status);
904 			dir->i_nlink++;
905 		}
906 	}
907 
908 leave:
909 	if (status < 0 && saved_nlink)
910 		inode->i_nlink = saved_nlink;
911 
912 	if (handle)
913 		ocfs2_commit_trans(handle);
914 
915 	if (fe_bh)
916 		brelse(fe_bh);
917 
918 	if (dirent_bh)
919 		brelse(dirent_bh);
920 
921 	if (parent_node_bh)
922 		brelse(parent_node_bh);
923 
924 	if (orphan_entry_bh)
925 		brelse(orphan_entry_bh);
926 
927 	mlog_exit(status);
928 
929 	return status;
930 }
931 
932 /*
933  * The only place this should be used is rename!
934  * if they have the same id, then the 1st one is the only one locked.
935  */
936 static int ocfs2_double_lock(struct ocfs2_super *osb,
937 			     struct ocfs2_journal_handle *handle,
938 			     struct buffer_head **bh1,
939 			     struct inode *inode1,
940 			     struct buffer_head **bh2,
941 			     struct inode *inode2)
942 {
943 	int status;
944 	struct ocfs2_inode_info *oi1 = OCFS2_I(inode1);
945 	struct ocfs2_inode_info *oi2 = OCFS2_I(inode2);
946 	struct buffer_head **tmpbh;
947 	struct inode *tmpinode;
948 
949 	mlog_entry("(inode1 = %"MLFu64", inode2 = %"MLFu64")\n",
950 		   oi1->ip_blkno, oi2->ip_blkno);
951 
952 	BUG_ON(!handle);
953 
954 	if (*bh1)
955 		*bh1 = NULL;
956 	if (*bh2)
957 		*bh2 = NULL;
958 
959 	/* we always want to lock the one with the lower lockid first. */
960 	if (oi1->ip_blkno != oi2->ip_blkno) {
961 		if (oi1->ip_blkno < oi2->ip_blkno) {
962 			/* switch id1 and id2 around */
963 			mlog(0, "switching them around...\n");
964 			tmpbh = bh2;
965 			bh2 = bh1;
966 			bh1 = tmpbh;
967 
968 			tmpinode = inode2;
969 			inode2 = inode1;
970 			inode1 = tmpinode;
971 		}
972 		/* lock id2 */
973 		status = ocfs2_meta_lock(inode2, handle, bh2, 1);
974 		if (status < 0) {
975 			if (status != -ENOENT)
976 				mlog_errno(status);
977 			goto bail;
978 		}
979 	}
980 	/* lock id1 */
981 	status = ocfs2_meta_lock(inode1, handle, bh1, 1);
982 	if (status < 0) {
983 		if (status != -ENOENT)
984 			mlog_errno(status);
985 		goto bail;
986 	}
987 bail:
988 	mlog_exit(status);
989 	return status;
990 }
991 
992 #define PARENT_INO(buffer) \
993 	((struct ocfs2_dir_entry *) \
994 	 ((char *)buffer + \
995 	  le16_to_cpu(((struct ocfs2_dir_entry *)buffer)->rec_len)))->inode
996 
997 static int ocfs2_rename(struct inode *old_dir,
998 			struct dentry *old_dentry,
999 			struct inode *new_dir,
1000 			struct dentry *new_dentry)
1001 {
1002 	int status = 0, rename_lock = 0;
1003 	struct inode *old_inode = old_dentry->d_inode;
1004 	struct inode *new_inode = new_dentry->d_inode;
1005 	struct ocfs2_dinode *newfe = NULL;
1006 	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
1007 	struct buffer_head *orphan_entry_bh = NULL;
1008 	struct buffer_head *newfe_bh = NULL;
1009 	struct buffer_head *insert_entry_bh = NULL;
1010 	struct ocfs2_super *osb = NULL;
1011 	u64 newfe_blkno;
1012 	struct ocfs2_journal_handle *handle = NULL;
1013 	struct buffer_head *old_dir_bh = NULL;
1014 	struct buffer_head *new_dir_bh = NULL;
1015 	struct ocfs2_dir_entry *old_de = NULL, *new_de = NULL; // dirent for old_dentry
1016 							       // and new_dentry
1017 	struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above
1018 	struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
1019 						    // this is the 1st dirent bh
1020 	nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink;
1021 	unsigned int links_count;
1022 
1023 	/* At some point it might be nice to break this function up a
1024 	 * bit. */
1025 
1026 	mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p, from='%.*s' to='%.*s')\n",
1027 		   old_dir, old_dentry, new_dir, new_dentry,
1028 		   old_dentry->d_name.len, old_dentry->d_name.name,
1029 		   new_dentry->d_name.len, new_dentry->d_name.name);
1030 
1031 	osb = OCFS2_SB(old_dir->i_sb);
1032 
1033 	if (new_inode) {
1034 		if (!igrab(new_inode))
1035 			BUG();
1036 	}
1037 
1038 	if (atomic_read(&old_dentry->d_count) > 2) {
1039 		shrink_dcache_parent(old_dentry);
1040 		if (atomic_read(&old_dentry->d_count) > 2) {
1041 			status = -EBUSY;
1042 			goto bail;
1043 		}
1044 	}
1045 
1046 	/* Assume a directory heirarchy thusly:
1047 	 * a/b/c
1048 	 * a/d
1049 	 * a,b,c, and d are all directories.
1050 	 *
1051 	 * from cwd of 'a' on both nodes:
1052 	 * node1: mv b/c d
1053 	 * node2: mv d   b/c
1054 	 *
1055 	 * And that's why, just like the VFS, we need a file system
1056 	 * rename lock. */
1057 	if (old_dentry != new_dentry) {
1058 		status = ocfs2_rename_lock(osb);
1059 		if (status < 0) {
1060 			mlog_errno(status);
1061 			goto bail;
1062 		}
1063 		rename_lock = 1;
1064 	}
1065 
1066 	handle = ocfs2_alloc_handle(osb);
1067 	if (handle == NULL) {
1068 		status = -ENOMEM;
1069 		mlog_errno(status);
1070 		goto bail;
1071 	}
1072 
1073 	/* if old and new are the same, this'll just do one lock. */
1074 	status = ocfs2_double_lock(osb, handle,
1075 				  &old_dir_bh, old_dir,
1076 				  &new_dir_bh, new_dir);
1077 	if (status < 0) {
1078 		mlog_errno(status);
1079 		goto bail;
1080 	}
1081 
1082 	/* make sure both dirs have bhs
1083 	 * get an extra ref on old_dir_bh if old==new */
1084 	if (!new_dir_bh) {
1085 		if (old_dir_bh) {
1086 			new_dir_bh = old_dir_bh;
1087 			get_bh(new_dir_bh);
1088 		} else {
1089 			mlog(ML_ERROR, "no old_dir_bh!\n");
1090 			status = -EIO;
1091 			goto bail;
1092 		}
1093 	}
1094 
1095 	if (S_ISDIR(old_inode->i_mode)) {
1096 		/* Directories actually require metadata updates to
1097 		 * the directory info so we can't get away with not
1098 		 * doing node locking on it. */
1099 		status = ocfs2_meta_lock(old_inode, handle, NULL, 1);
1100 		if (status < 0) {
1101 			if (status != -ENOENT)
1102 				mlog_errno(status);
1103 			goto bail;
1104 		}
1105 
1106 		status = ocfs2_request_rename_vote(old_inode, old_dentry);
1107 		if (status < 0) {
1108 			mlog_errno(status);
1109 			goto bail;
1110 		}
1111 
1112 		status = -EIO;
1113 		old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0);
1114 		if (!old_inode_de_bh)
1115 			goto bail;
1116 
1117 		status = -EIO;
1118 		if (le64_to_cpu(PARENT_INO(old_inode_de_bh->b_data)) !=
1119 		    OCFS2_I(old_dir)->ip_blkno)
1120 			goto bail;
1121 		status = -EMLINK;
1122 		if (!new_inode && new_dir!=old_dir &&
1123 		    new_dir->i_nlink >= OCFS2_LINK_MAX)
1124 			goto bail;
1125 	} else {
1126 		/* Ah, the simple case - we're a file so just send a
1127 		 * message. */
1128 		status = ocfs2_request_rename_vote(old_inode, old_dentry);
1129 		if (status < 0) {
1130 			mlog_errno(status);
1131 			goto bail;
1132 		}
1133 	}
1134 
1135 	status = -ENOENT;
1136 	old_de_bh = ocfs2_find_entry(old_dentry->d_name.name,
1137 				     old_dentry->d_name.len,
1138 				     old_dir, &old_de);
1139 	if (!old_de_bh)
1140 		goto bail;
1141 
1142 	/*
1143 	 *  Check for inode number is _not_ due to possible IO errors.
1144 	 *  We might rmdir the source, keep it as pwd of some process
1145 	 *  and merrily kill the link to whatever was created under the
1146 	 *  same name. Goodbye sticky bit ;-<
1147 	 */
1148 	if (le64_to_cpu(old_de->inode) != OCFS2_I(old_inode)->ip_blkno)
1149 		goto bail;
1150 
1151 	/* check if the target already exists (in which case we need
1152 	 * to delete it */
1153 	status = ocfs2_find_files_on_disk(new_dentry->d_name.name,
1154 					  new_dentry->d_name.len,
1155 					  &newfe_blkno, new_dir, &new_de_bh,
1156 					  &new_de);
1157 	/* The only error we allow here is -ENOENT because the new
1158 	 * file not existing is perfectly valid. */
1159 	if ((status < 0) && (status != -ENOENT)) {
1160 		/* If we cannot find the file specified we should just */
1161 		/* return the error... */
1162 		mlog_errno(status);
1163 		goto bail;
1164 	}
1165 
1166 	if (!new_de && new_inode)
1167 		mlog(ML_ERROR, "inode %lu does not exist in it's parent "
1168 		     "directory!", new_inode->i_ino);
1169 
1170 	/* In case we need to overwrite an existing file, we blow it
1171 	 * away first */
1172 	if (new_de) {
1173 		/* VFS didn't think there existed an inode here, but
1174 		 * someone else in the cluster must have raced our
1175 		 * rename to create one. Today we error cleanly, in
1176 		 * the future we should consider calling iget to build
1177 		 * a new struct inode for this entry. */
1178 		if (!new_inode) {
1179 			status = -EACCES;
1180 
1181 			mlog(0, "We found an inode for name %.*s but VFS "
1182 			     "didn't give us one.\n", new_dentry->d_name.len,
1183 			     new_dentry->d_name.name);
1184 			goto bail;
1185 		}
1186 
1187 		if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) {
1188 			status = -EACCES;
1189 
1190 			mlog(0, "Inode blkno (%"MLFu64") and dir (%"MLFu64") "
1191 			     "disagree. ip_flags = %x\n",
1192 			     OCFS2_I(new_inode)->ip_blkno, newfe_blkno,
1193 			     OCFS2_I(new_inode)->ip_flags);
1194 			goto bail;
1195 		}
1196 
1197 		status = ocfs2_meta_lock(new_inode, handle, &newfe_bh, 1);
1198 		if (status < 0) {
1199 			if (status != -ENOENT)
1200 				mlog_errno(status);
1201 			goto bail;
1202 		}
1203 
1204 		if (S_ISDIR(new_inode->i_mode))
1205 			links_count = 0;
1206 		else
1207 			links_count = (unsigned int) (new_inode->i_nlink - 1);
1208 
1209 		status = ocfs2_request_unlink_vote(new_inode, new_dentry,
1210 						   links_count);
1211 		if (status < 0) {
1212 			mlog_errno(status);
1213 			goto bail;
1214 		}
1215 
1216 		newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
1217 
1218 		mlog(0, "aha rename over existing... new_de=%p "
1219 		     "new_blkno=%"MLFu64" newfebh=%p bhblocknr=%llu\n",
1220 		     new_de, newfe_blkno, newfe_bh, newfe_bh ?
1221 		     (unsigned long long)newfe_bh->b_blocknr : 0ULL);
1222 
1223 		if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
1224 			status = ocfs2_prepare_orphan_dir(osb, handle,
1225 							  new_inode,
1226 							  orphan_name,
1227 							  &orphan_entry_bh);
1228 			if (status < 0) {
1229 				mlog_errno(status);
1230 				goto bail;
1231 			}
1232 		}
1233 	} else {
1234 		BUG_ON(new_dentry->d_parent->d_inode != new_dir);
1235 
1236 		status = ocfs2_check_dir_for_entry(new_dir,
1237 						   new_dentry->d_name.name,
1238 						   new_dentry->d_name.len);
1239 		if (status)
1240 			goto bail;
1241 
1242 		status = ocfs2_prepare_dir_for_insert(osb, new_dir, new_dir_bh,
1243 						      new_dentry->d_name.name,
1244 						      new_dentry->d_name.len,
1245 						      &insert_entry_bh);
1246 		if (status < 0) {
1247 			mlog_errno(status);
1248 			goto bail;
1249 		}
1250 	}
1251 
1252 	handle = ocfs2_start_trans(osb, handle, OCFS2_RENAME_CREDITS);
1253 	if (IS_ERR(handle)) {
1254 		status = PTR_ERR(handle);
1255 		handle = NULL;
1256 		mlog_errno(status);
1257 		goto bail;
1258 	}
1259 
1260 	if (new_de) {
1261 		if (S_ISDIR(new_inode->i_mode)) {
1262 			if (!ocfs2_empty_dir(new_inode) ||
1263 			    new_inode->i_nlink != 2) {
1264 				status = -ENOTEMPTY;
1265 				goto bail;
1266 			}
1267 		}
1268 		status = ocfs2_journal_access(handle, new_inode, newfe_bh,
1269 					      OCFS2_JOURNAL_ACCESS_WRITE);
1270 		if (status < 0) {
1271 			mlog_errno(status);
1272 			goto bail;
1273 		}
1274 
1275 		if (S_ISDIR(new_inode->i_mode) ||
1276 		    (newfe->i_links_count == cpu_to_le16(1))){
1277 			status = ocfs2_orphan_add(osb, handle, new_inode,
1278 						  newfe, orphan_name,
1279 						  orphan_entry_bh);
1280 			if (status < 0) {
1281 				mlog_errno(status);
1282 				goto bail;
1283 			}
1284 		}
1285 
1286 		/* change the dirent to point to the correct inode */
1287 		status = ocfs2_journal_access(handle, new_dir, new_de_bh,
1288 					      OCFS2_JOURNAL_ACCESS_WRITE);
1289 		if (status < 0) {
1290 			mlog_errno(status);
1291 			goto bail;
1292 		}
1293 		new_de->inode = cpu_to_le64(OCFS2_I(old_inode)->ip_blkno);
1294 		new_de->file_type = old_de->file_type;
1295 		new_dir->i_version++;
1296 		status = ocfs2_journal_dirty(handle, new_de_bh);
1297 		if (status < 0) {
1298 			mlog_errno(status);
1299 			goto bail;
1300 		}
1301 
1302 		if (S_ISDIR(new_inode->i_mode))
1303 			newfe->i_links_count = 0;
1304 		else
1305 			le16_add_cpu(&newfe->i_links_count, -1);
1306 
1307 		status = ocfs2_journal_dirty(handle, newfe_bh);
1308 		if (status < 0) {
1309 			mlog_errno(status);
1310 			goto bail;
1311 		}
1312 	} else {
1313 		/* if the name was not found in new_dir, add it now */
1314 		status = ocfs2_add_entry(handle, new_dentry, old_inode,
1315 					 OCFS2_I(old_inode)->ip_blkno,
1316 					 new_dir_bh, insert_entry_bh);
1317 	}
1318 
1319 	old_inode->i_ctime = CURRENT_TIME;
1320 	mark_inode_dirty(old_inode);
1321 
1322 	/* now that the name has been added to new_dir, remove the old name */
1323 	status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh);
1324 	if (status < 0) {
1325 		mlog_errno(status);
1326 		goto bail;
1327 	}
1328 
1329 	if (new_inode) {
1330 		new_inode->i_nlink--;
1331 		new_inode->i_ctime = CURRENT_TIME;
1332 	}
1333 	old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
1334 	if (old_inode_de_bh) {
1335 		status = ocfs2_journal_access(handle, old_inode,
1336 					     old_inode_de_bh,
1337 					     OCFS2_JOURNAL_ACCESS_WRITE);
1338 		PARENT_INO(old_inode_de_bh->b_data) =
1339 			cpu_to_le64(OCFS2_I(new_dir)->ip_blkno);
1340 		status = ocfs2_journal_dirty(handle, old_inode_de_bh);
1341 		old_dir->i_nlink--;
1342 		if (new_inode) {
1343 			new_inode->i_nlink--;
1344 		} else {
1345 			new_dir->i_nlink++;
1346 			mark_inode_dirty(new_dir);
1347 		}
1348 	}
1349 	mark_inode_dirty(old_dir);
1350 	if (new_inode)
1351 		mark_inode_dirty(new_inode);
1352 
1353 	if (old_dir != new_dir)
1354 		if (new_dir_nlink != new_dir->i_nlink) {
1355 			if (!new_dir_bh) {
1356 				mlog(ML_ERROR, "need to change nlink for new "
1357 				     "dir %"MLFu64" from %d to %d but bh is "
1358 				     "NULL\n", OCFS2_I(new_dir)->ip_blkno,
1359 				     (int)new_dir_nlink, new_dir->i_nlink);
1360 			} else {
1361 				struct ocfs2_dinode *fe;
1362 				status = ocfs2_journal_access(handle,
1363 							      new_dir,
1364 							      new_dir_bh,
1365 							      OCFS2_JOURNAL_ACCESS_WRITE);
1366 				fe = (struct ocfs2_dinode *) new_dir_bh->b_data;
1367 				fe->i_links_count = cpu_to_le16(new_dir->i_nlink);
1368 				status = ocfs2_journal_dirty(handle, new_dir_bh);
1369 			}
1370 		}
1371 
1372 	if (old_dir_nlink != old_dir->i_nlink) {
1373 		if (!old_dir_bh) {
1374 			mlog(ML_ERROR, "need to change nlink for old dir "
1375 			     "%"MLFu64" from %d to %d but bh is NULL!\n",
1376 			     OCFS2_I(old_dir)->ip_blkno,
1377 			     (int)old_dir_nlink,
1378 			     old_dir->i_nlink);
1379 		} else {
1380 			struct ocfs2_dinode *fe;
1381 			status = ocfs2_journal_access(handle, old_dir,
1382 						      old_dir_bh,
1383 						      OCFS2_JOURNAL_ACCESS_WRITE);
1384 			fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
1385 			fe->i_links_count = cpu_to_le16(old_dir->i_nlink);
1386 			status = ocfs2_journal_dirty(handle, old_dir_bh);
1387 		}
1388 	}
1389 
1390 	status = 0;
1391 bail:
1392 	if (rename_lock)
1393 		ocfs2_rename_unlock(osb);
1394 
1395 	if (handle)
1396 		ocfs2_commit_trans(handle);
1397 
1398 	if (new_inode)
1399 		sync_mapping_buffers(old_inode->i_mapping);
1400 
1401 	if (new_inode)
1402 		iput(new_inode);
1403 	if (newfe_bh)
1404 		brelse(newfe_bh);
1405 	if (old_dir_bh)
1406 		brelse(old_dir_bh);
1407 	if (new_dir_bh)
1408 		brelse(new_dir_bh);
1409 	if (new_de_bh)
1410 		brelse(new_de_bh);
1411 	if (old_de_bh)
1412 		brelse(old_de_bh);
1413 	if (old_inode_de_bh)
1414 		brelse(old_inode_de_bh);
1415 	if (orphan_entry_bh)
1416 		brelse(orphan_entry_bh);
1417 	if (insert_entry_bh)
1418 		brelse(insert_entry_bh);
1419 
1420 	mlog_exit(status);
1421 
1422 	return status;
1423 }
1424 
1425 /*
1426  * we expect i_size = strlen(symname). Copy symname into the file
1427  * data, including the null terminator.
1428  */
1429 static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
1430 				     struct ocfs2_journal_handle *handle,
1431 				     struct inode *inode,
1432 				     const char *symname)
1433 {
1434 	struct buffer_head **bhs = NULL;
1435 	const char *c;
1436 	struct super_block *sb = osb->sb;
1437 	u64 p_blkno;
1438 	int p_blocks;
1439 	int virtual, blocks, status, i, bytes_left;
1440 
1441 	bytes_left = i_size_read(inode) + 1;
1442 	/* we can't trust i_blocks because we're actually going to
1443 	 * write i_size + 1 bytes. */
1444 	blocks = (bytes_left + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
1445 
1446 	mlog_entry("i_blocks = %lu, i_size = %llu, blocks = %d\n",
1447 		       inode->i_blocks, i_size_read(inode), blocks);
1448 
1449 	/* Sanity check -- make sure we're going to fit. */
1450 	if (bytes_left >
1451 	    ocfs2_clusters_to_bytes(sb, OCFS2_I(inode)->ip_clusters)) {
1452 		status = -EIO;
1453 		mlog_errno(status);
1454 		goto bail;
1455 	}
1456 
1457 	bhs = kcalloc(blocks, sizeof(struct buffer_head *), GFP_KERNEL);
1458 	if (!bhs) {
1459 		status = -ENOMEM;
1460 		mlog_errno(status);
1461 		goto bail;
1462 	}
1463 
1464 	status = ocfs2_extent_map_get_blocks(inode, 0, 1, &p_blkno,
1465 					     &p_blocks);
1466 	if (status < 0) {
1467 		mlog_errno(status);
1468 		goto bail;
1469 	}
1470 
1471 	/* links can never be larger than one cluster so we know this
1472 	 * is all going to be contiguous, but do a sanity check
1473 	 * anyway. */
1474 	if ((p_blocks << sb->s_blocksize_bits) < bytes_left) {
1475 		status = -EIO;
1476 		mlog_errno(status);
1477 		goto bail;
1478 	}
1479 
1480 	virtual = 0;
1481 	while(bytes_left > 0) {
1482 		c = &symname[virtual * sb->s_blocksize];
1483 
1484 		bhs[virtual] = sb_getblk(sb, p_blkno);
1485 		if (!bhs[virtual]) {
1486 			status = -ENOMEM;
1487 			mlog_errno(status);
1488 			goto bail;
1489 		}
1490 		ocfs2_set_new_buffer_uptodate(inode, bhs[virtual]);
1491 
1492 		status = ocfs2_journal_access(handle, inode, bhs[virtual],
1493 					      OCFS2_JOURNAL_ACCESS_CREATE);
1494 		if (status < 0) {
1495 			mlog_errno(status);
1496 			goto bail;
1497 		}
1498 
1499 		memset(bhs[virtual]->b_data, 0, sb->s_blocksize);
1500 
1501 		memcpy(bhs[virtual]->b_data, c,
1502 		       (bytes_left > sb->s_blocksize) ? sb->s_blocksize :
1503 		       bytes_left);
1504 
1505 		status = ocfs2_journal_dirty(handle, bhs[virtual]);
1506 		if (status < 0) {
1507 			mlog_errno(status);
1508 			goto bail;
1509 		}
1510 
1511 		virtual++;
1512 		p_blkno++;
1513 		bytes_left -= sb->s_blocksize;
1514 	}
1515 
1516 	status = 0;
1517 bail:
1518 
1519 	if (bhs) {
1520 		for(i = 0; i < blocks; i++)
1521 			if (bhs[i])
1522 				brelse(bhs[i]);
1523 		kfree(bhs);
1524 	}
1525 
1526 	mlog_exit(status);
1527 	return status;
1528 }
1529 
1530 static int ocfs2_symlink(struct inode *dir,
1531 			 struct dentry *dentry,
1532 			 const char *symname)
1533 {
1534 	int status, l, credits;
1535 	u64 newsize;
1536 	struct ocfs2_super *osb = NULL;
1537 	struct inode *inode = NULL;
1538 	struct super_block *sb;
1539 	struct buffer_head *new_fe_bh = NULL;
1540 	struct buffer_head *de_bh = NULL;
1541 	struct buffer_head *parent_fe_bh = NULL;
1542 	struct ocfs2_dinode *fe = NULL;
1543 	struct ocfs2_dinode *dirfe;
1544 	struct ocfs2_journal_handle *handle = NULL;
1545 	struct ocfs2_alloc_context *inode_ac = NULL;
1546 	struct ocfs2_alloc_context *data_ac = NULL;
1547 
1548 	mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
1549 		   dentry, symname, dentry->d_name.len, dentry->d_name.name);
1550 
1551 	sb = dir->i_sb;
1552 	osb = OCFS2_SB(sb);
1553 
1554 	l = strlen(symname) + 1;
1555 
1556 	credits = ocfs2_calc_symlink_credits(sb);
1557 
1558 	handle = ocfs2_alloc_handle(osb);
1559 	if (handle == NULL) {
1560 		status = -ENOMEM;
1561 		mlog_errno(status);
1562 		goto bail;
1563 	}
1564 
1565 	/* lock the parent directory */
1566 	status = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
1567 	if (status < 0) {
1568 		if (status != -ENOENT)
1569 			mlog_errno(status);
1570 		goto bail;
1571 	}
1572 
1573 	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
1574 	if (!dirfe->i_links_count) {
1575 		/* can't make a file in a deleted directory. */
1576 		status = -ENOENT;
1577 		goto bail;
1578 	}
1579 
1580 	status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
1581 					   dentry->d_name.len);
1582 	if (status)
1583 		goto bail;
1584 
1585 	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
1586 					      dentry->d_name.name,
1587 					      dentry->d_name.len, &de_bh);
1588 	if (status < 0) {
1589 		mlog_errno(status);
1590 		goto bail;
1591 	}
1592 
1593 	status = ocfs2_reserve_new_inode(osb, handle, &inode_ac);
1594 	if (status < 0) {
1595 		if (status != -ENOSPC)
1596 			mlog_errno(status);
1597 		goto bail;
1598 	}
1599 
1600 	/* don't reserve bitmap space for fast symlinks. */
1601 	if (l > ocfs2_fast_symlink_chars(sb)) {
1602 		status = ocfs2_reserve_clusters(osb, handle, 1, &data_ac);
1603 		if (status < 0) {
1604 			if (status != -ENOSPC)
1605 				mlog_errno(status);
1606 			goto bail;
1607 		}
1608 	}
1609 
1610 	handle = ocfs2_start_trans(osb, handle, credits);
1611 	if (IS_ERR(handle)) {
1612 		status = PTR_ERR(handle);
1613 		handle = NULL;
1614 		mlog_errno(status);
1615 		goto bail;
1616 	}
1617 
1618 	status = ocfs2_mknod_locked(osb, dir, dentry,
1619 				    S_IFLNK | S_IRWXUGO, 0,
1620 				    &new_fe_bh, parent_fe_bh, handle,
1621 				    &inode, inode_ac);
1622 	if (status < 0) {
1623 		mlog_errno(status);
1624 		goto bail;
1625 	}
1626 
1627 	fe = (struct ocfs2_dinode *) new_fe_bh->b_data;
1628 	inode->i_rdev = 0;
1629 	newsize = l - 1;
1630 	if (l > ocfs2_fast_symlink_chars(sb)) {
1631 		inode->i_op = &ocfs2_symlink_inode_operations;
1632 		status = ocfs2_do_extend_allocation(osb, inode, 1, new_fe_bh,
1633 						    handle, data_ac, NULL,
1634 						    NULL);
1635 		if (status < 0) {
1636 			if (status != -ENOSPC && status != -EINTR) {
1637 				mlog(ML_ERROR, "Failed to extend file to "
1638 					       "%"MLFu64"\n",
1639 				     newsize);
1640 				mlog_errno(status);
1641 				status = -ENOSPC;
1642 			}
1643 			goto bail;
1644 		}
1645 		i_size_write(inode, newsize);
1646 		inode->i_blocks = ocfs2_align_bytes_to_sectors(newsize);
1647 	} else {
1648 		inode->i_op = &ocfs2_fast_symlink_inode_operations;
1649 		memcpy((char *) fe->id2.i_symlink, symname, l);
1650 		i_size_write(inode, newsize);
1651 		inode->i_blocks = 0;
1652 	}
1653 
1654 	status = ocfs2_mark_inode_dirty(handle, inode, new_fe_bh);
1655 	if (status < 0) {
1656 		mlog_errno(status);
1657 		goto bail;
1658 	}
1659 
1660 	if (!ocfs2_inode_is_fast_symlink(inode)) {
1661 		status = ocfs2_create_symlink_data(osb, handle, inode,
1662 						   symname);
1663 		if (status < 0) {
1664 			mlog_errno(status);
1665 			goto bail;
1666 		}
1667 	}
1668 
1669 	status = ocfs2_add_entry(handle, dentry, inode,
1670 				 le64_to_cpu(fe->i_blkno), parent_fe_bh,
1671 				 de_bh);
1672 	if (status < 0) {
1673 		mlog_errno(status);
1674 		goto bail;
1675 	}
1676 
1677 	insert_inode_hash(inode);
1678 	dentry->d_op = &ocfs2_dentry_ops;
1679 	d_instantiate(dentry, inode);
1680 bail:
1681 	if (handle)
1682 		ocfs2_commit_trans(handle);
1683 	if (new_fe_bh)
1684 		brelse(new_fe_bh);
1685 	if (parent_fe_bh)
1686 		brelse(parent_fe_bh);
1687 	if (de_bh)
1688 		brelse(de_bh);
1689 	if (inode_ac)
1690 		ocfs2_free_alloc_context(inode_ac);
1691 	if (data_ac)
1692 		ocfs2_free_alloc_context(data_ac);
1693 	if ((status < 0) && inode)
1694 		iput(inode);
1695 
1696 	mlog_exit(status);
1697 
1698 	return status;
1699 }
1700 
1701 int ocfs2_check_dir_entry(struct inode * dir,
1702 			  struct ocfs2_dir_entry * de,
1703 			  struct buffer_head * bh,
1704 			  unsigned long offset)
1705 {
1706 	const char *error_msg = NULL;
1707 	const int rlen = le16_to_cpu(de->rec_len);
1708 
1709 	if (rlen < OCFS2_DIR_REC_LEN(1))
1710 		error_msg = "rec_len is smaller than minimal";
1711 	else if (rlen % 4 != 0)
1712 		error_msg = "rec_len % 4 != 0";
1713 	else if (rlen < OCFS2_DIR_REC_LEN(de->name_len))
1714 		error_msg = "rec_len is too small for name_len";
1715 	else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
1716 		error_msg = "directory entry across blocks";
1717 
1718 	if (error_msg != NULL)
1719 		mlog(ML_ERROR, "bad entry in directory #%"MLFu64": %s - "
1720 		     "offset=%lu, inode=%"MLFu64", rec_len=%d, name_len=%d\n",
1721 		     OCFS2_I(dir)->ip_blkno, error_msg, offset,
1722 		     le64_to_cpu(de->inode), rlen, de->name_len);
1723 	return error_msg == NULL ? 1 : 0;
1724 }
1725 
1726 /* we don't always have a dentry for what we want to add, so people
1727  * like orphan dir can call this instead.
1728  *
1729  * If you pass me insert_bh, I'll skip the search of the other dir
1730  * blocks and put the record in there.
1731  */
1732 static int __ocfs2_add_entry(struct ocfs2_journal_handle *handle,
1733 			     struct inode *dir,
1734 			     const char *name, int namelen,
1735 			     struct inode *inode, u64 blkno,
1736 			     struct buffer_head *parent_fe_bh,
1737 			     struct buffer_head *insert_bh)
1738 {
1739 	unsigned long offset;
1740 	unsigned short rec_len;
1741 	struct ocfs2_dir_entry *de, *de1;
1742 	struct super_block *sb;
1743 	int retval, status;
1744 
1745 	mlog_entry_void();
1746 
1747 	sb = dir->i_sb;
1748 
1749 	if (!namelen)
1750 		return -EINVAL;
1751 
1752 	rec_len = OCFS2_DIR_REC_LEN(namelen);
1753 	offset = 0;
1754 	de = (struct ocfs2_dir_entry *) insert_bh->b_data;
1755 	while (1) {
1756 		BUG_ON((char *)de >= sb->s_blocksize + insert_bh->b_data);
1757 		/* These checks should've already been passed by the
1758 		 * prepare function, but I guess we can leave them
1759 		 * here anyway. */
1760 		if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
1761 			retval = -ENOENT;
1762 			goto bail;
1763 		}
1764 		if (ocfs2_match(namelen, name, de)) {
1765 			retval = -EEXIST;
1766 			goto bail;
1767 		}
1768 		if (((le64_to_cpu(de->inode) == 0) &&
1769 		     (le16_to_cpu(de->rec_len) >= rec_len)) ||
1770 		    (le16_to_cpu(de->rec_len) >=
1771 		     (OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
1772 			status = ocfs2_journal_access(handle, dir, insert_bh,
1773 						      OCFS2_JOURNAL_ACCESS_WRITE);
1774 			/* By now the buffer is marked for journaling */
1775 			offset += le16_to_cpu(de->rec_len);
1776 			if (le64_to_cpu(de->inode)) {
1777 				de1 = (struct ocfs2_dir_entry *)((char *) de +
1778 					OCFS2_DIR_REC_LEN(de->name_len));
1779 				de1->rec_len =
1780 					cpu_to_le16(le16_to_cpu(de->rec_len) -
1781 					OCFS2_DIR_REC_LEN(de->name_len));
1782 				de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
1783 				de = de1;
1784 			}
1785 			de->file_type = OCFS2_FT_UNKNOWN;
1786 			if (blkno) {
1787 				de->inode = cpu_to_le64(blkno);
1788 				ocfs2_set_de_type(de, inode->i_mode);
1789 			} else
1790 				de->inode = 0;
1791 			de->name_len = namelen;
1792 			memcpy(de->name, name, namelen);
1793 
1794 			dir->i_mtime = dir->i_ctime = CURRENT_TIME;
1795 			dir->i_version++;
1796 			status = ocfs2_journal_dirty(handle, insert_bh);
1797 			retval = 0;
1798 			goto bail;
1799 		}
1800 		offset += le16_to_cpu(de->rec_len);
1801 		de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
1802 	}
1803 
1804 	/* when you think about it, the assert above should prevent us
1805 	 * from ever getting here. */
1806 	retval = -ENOSPC;
1807 bail:
1808 
1809 	mlog_exit(retval);
1810 	return retval;
1811 }
1812 
1813 
1814 /*
1815  * ocfs2_delete_entry deletes a directory entry by merging it with the
1816  * previous entry
1817  */
1818 static int ocfs2_delete_entry(struct ocfs2_journal_handle *handle,
1819 			      struct inode *dir,
1820 			      struct ocfs2_dir_entry *de_del,
1821 			      struct buffer_head *bh)
1822 {
1823 	struct ocfs2_dir_entry *de, *pde;
1824 	int i, status = -ENOENT;
1825 
1826 	mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
1827 
1828 	i = 0;
1829 	pde = NULL;
1830 	de = (struct ocfs2_dir_entry *) bh->b_data;
1831 	while (i < bh->b_size) {
1832 		if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
1833 			status = -EIO;
1834 			mlog_errno(status);
1835 			goto bail;
1836 		}
1837 		if (de == de_del)  {
1838 			status = ocfs2_journal_access(handle, dir, bh,
1839 						      OCFS2_JOURNAL_ACCESS_WRITE);
1840 			if (status < 0) {
1841 				status = -EIO;
1842 				mlog_errno(status);
1843 				goto bail;
1844 			}
1845 			if (pde)
1846 				pde->rec_len =
1847 					cpu_to_le16(le16_to_cpu(pde->rec_len) +
1848 						    le16_to_cpu(de->rec_len));
1849 			else
1850 				de->inode = 0;
1851 			dir->i_version++;
1852 			status = ocfs2_journal_dirty(handle, bh);
1853 			goto bail;
1854 		}
1855 		i += le16_to_cpu(de->rec_len);
1856 		pde = de;
1857 		de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len));
1858 	}
1859 bail:
1860 	mlog_exit(status);
1861 	return status;
1862 }
1863 
1864 /*
1865  * Returns 0 if not found, -1 on failure, and 1 on success
1866  */
1867 static int inline ocfs2_search_dirblock(struct buffer_head *bh,
1868 					struct inode *dir,
1869 					const char *name, int namelen,
1870 					unsigned long offset,
1871 					struct ocfs2_dir_entry **res_dir)
1872 {
1873 	struct ocfs2_dir_entry *de;
1874 	char *dlimit, *de_buf;
1875 	int de_len;
1876 	int ret = 0;
1877 
1878 	mlog_entry_void();
1879 
1880 	de_buf = bh->b_data;
1881 	dlimit = de_buf + dir->i_sb->s_blocksize;
1882 
1883 	while (de_buf < dlimit) {
1884 		/* this code is executed quadratically often */
1885 		/* do minimal checking `by hand' */
1886 
1887 		de = (struct ocfs2_dir_entry *) de_buf;
1888 
1889 		if (de_buf + namelen <= dlimit &&
1890 		    ocfs2_match(namelen, name, de)) {
1891 			/* found a match - just to be sure, do a full check */
1892 			if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
1893 				ret = -1;
1894 				goto bail;
1895 			}
1896 			*res_dir = de;
1897 			ret = 1;
1898 			goto bail;
1899 		}
1900 
1901 		/* prevent looping on a bad block */
1902 		de_len = le16_to_cpu(de->rec_len);
1903 		if (de_len <= 0) {
1904 			ret = -1;
1905 			goto bail;
1906 		}
1907 
1908 		de_buf += de_len;
1909 		offset += de_len;
1910 	}
1911 
1912 bail:
1913 	mlog_exit(ret);
1914 	return ret;
1915 }
1916 
1917 struct buffer_head *ocfs2_find_entry(const char *name, int namelen,
1918 				     struct inode *dir,
1919 				     struct ocfs2_dir_entry **res_dir)
1920 {
1921 	struct super_block *sb;
1922 	struct buffer_head *bh_use[NAMEI_RA_SIZE];
1923 	struct buffer_head *bh, *ret = NULL;
1924 	unsigned long start, block, b;
1925 	int ra_max = 0;		/* Number of bh's in the readahead
1926 				   buffer, bh_use[] */
1927 	int ra_ptr = 0;		/* Current index into readahead
1928 				   buffer */
1929 	int num = 0;
1930 	int nblocks, i, err;
1931 
1932 	mlog_entry_void();
1933 
1934 	*res_dir = NULL;
1935 	sb = dir->i_sb;
1936 
1937 	nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
1938 	start = OCFS2_I(dir)->ip_dir_start_lookup;
1939 	if (start >= nblocks)
1940 		start = 0;
1941 	block = start;
1942 
1943 restart:
1944 	do {
1945 		/*
1946 		 * We deal with the read-ahead logic here.
1947 		 */
1948 		if (ra_ptr >= ra_max) {
1949 			/* Refill the readahead buffer */
1950 			ra_ptr = 0;
1951 			b = block;
1952 			for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
1953 				/*
1954 				 * Terminate if we reach the end of the
1955 				 * directory and must wrap, or if our
1956 				 * search has finished at this block.
1957 				 */
1958 				if (b >= nblocks || (num && block == start)) {
1959 					bh_use[ra_max] = NULL;
1960 					break;
1961 				}
1962 				num++;
1963 
1964 				/* XXX: questionable readahead stuff here */
1965 				bh = ocfs2_bread(dir, b++, &err, 1);
1966 				bh_use[ra_max] = bh;
1967 #if 0		// ???
1968 				if (bh)
1969 					ll_rw_block(READ, 1, &bh);
1970 #endif
1971 			}
1972 		}
1973 		if ((bh = bh_use[ra_ptr++]) == NULL)
1974 			goto next;
1975 		wait_on_buffer(bh);
1976 		if (!buffer_uptodate(bh)) {
1977 			/* read error, skip block & hope for the best */
1978 			brelse(bh);
1979 			goto next;
1980 		}
1981 		i = ocfs2_search_dirblock(bh, dir, name, namelen,
1982 					  block << sb->s_blocksize_bits,
1983 					  res_dir);
1984 		if (i == 1) {
1985 			OCFS2_I(dir)->ip_dir_start_lookup = block;
1986 			ret = bh;
1987 			goto cleanup_and_exit;
1988 		} else {
1989 			brelse(bh);
1990 			if (i < 0)
1991 				goto cleanup_and_exit;
1992 		}
1993 	next:
1994 		if (++block >= nblocks)
1995 			block = 0;
1996 	} while (block != start);
1997 
1998 	/*
1999 	 * If the directory has grown while we were searching, then
2000 	 * search the last part of the directory before giving up.
2001 	 */
2002 	block = nblocks;
2003 	nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
2004 	if (block < nblocks) {
2005 		start = 0;
2006 		goto restart;
2007 	}
2008 
2009 cleanup_and_exit:
2010 	/* Clean up the read-ahead blocks */
2011 	for (; ra_ptr < ra_max; ra_ptr++)
2012 		brelse(bh_use[ra_ptr]);
2013 
2014 	mlog_exit_ptr(ret);
2015 	return ret;
2016 }
2017 
2018 static int ocfs2_blkno_stringify(u64 blkno, char *name)
2019 {
2020 	int status, namelen;
2021 
2022 	mlog_entry_void();
2023 
2024 	namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016"MLFx64,
2025 			   blkno);
2026 	if (namelen <= 0) {
2027 		if (namelen)
2028 			status = namelen;
2029 		else
2030 			status = -EINVAL;
2031 		mlog_errno(status);
2032 		goto bail;
2033 	}
2034 	if (namelen != OCFS2_ORPHAN_NAMELEN) {
2035 		status = -EINVAL;
2036 		mlog_errno(status);
2037 		goto bail;
2038 	}
2039 
2040 	mlog(0, "built filename '%s' for orphan dir (len=%d)\n", name,
2041 	     namelen);
2042 
2043 	status = 0;
2044 bail:
2045 	mlog_exit(status);
2046 	return status;
2047 }
2048 
2049 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
2050 				    struct ocfs2_journal_handle *handle,
2051 				    struct inode *inode,
2052 				    char *name,
2053 				    struct buffer_head **de_bh)
2054 {
2055 	struct inode *orphan_dir_inode = NULL;
2056 	struct buffer_head *orphan_dir_bh = NULL;
2057 	int status = 0;
2058 
2059 	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
2060 	if (status < 0) {
2061 		mlog_errno(status);
2062 		goto leave;
2063 	}
2064 
2065 	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
2066 						       ORPHAN_DIR_SYSTEM_INODE,
2067 						       osb->slot_num);
2068 	if (!orphan_dir_inode) {
2069 		status = -ENOENT;
2070 		mlog_errno(status);
2071 		goto leave;
2072 	}
2073 
2074 	ocfs2_handle_add_inode(handle, orphan_dir_inode);
2075 	status = ocfs2_meta_lock(orphan_dir_inode, handle, &orphan_dir_bh, 1);
2076 	if (status < 0) {
2077 		mlog_errno(status);
2078 		goto leave;
2079 	}
2080 
2081 	status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
2082 					      orphan_dir_bh, name,
2083 					      OCFS2_ORPHAN_NAMELEN, de_bh);
2084 	if (status < 0) {
2085 		mlog_errno(status);
2086 		goto leave;
2087 	}
2088 
2089 leave:
2090 	if (orphan_dir_inode)
2091 		iput(orphan_dir_inode);
2092 
2093 	if (orphan_dir_bh)
2094 		brelse(orphan_dir_bh);
2095 
2096 	mlog_exit(status);
2097 	return status;
2098 }
2099 
2100 static int ocfs2_orphan_add(struct ocfs2_super *osb,
2101 			    struct ocfs2_journal_handle *handle,
2102 			    struct inode *inode,
2103 			    struct ocfs2_dinode *fe,
2104 			    char *name,
2105 			    struct buffer_head *de_bh)
2106 {
2107 	struct inode *orphan_dir_inode = NULL;
2108 	struct buffer_head *orphan_dir_bh = NULL;
2109 	int status = 0;
2110 	struct ocfs2_dinode *orphan_fe;
2111 
2112 	mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
2113 
2114 	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
2115 						       ORPHAN_DIR_SYSTEM_INODE,
2116 						       osb->slot_num);
2117 	if (!orphan_dir_inode) {
2118 		status = -ENOENT;
2119 		mlog_errno(status);
2120 		goto leave;
2121 	}
2122 
2123 	status = ocfs2_read_block(osb,
2124 				  OCFS2_I(orphan_dir_inode)->ip_blkno,
2125 				  &orphan_dir_bh, OCFS2_BH_CACHED,
2126 				  orphan_dir_inode);
2127 	if (status < 0) {
2128 		mlog_errno(status);
2129 		goto leave;
2130 	}
2131 
2132 	status = ocfs2_journal_access(handle, orphan_dir_inode, orphan_dir_bh,
2133 				      OCFS2_JOURNAL_ACCESS_WRITE);
2134 	if (status < 0) {
2135 		mlog_errno(status);
2136 		goto leave;
2137 	}
2138 
2139 	/* we're a cluster, and nlink can change on disk from
2140 	 * underneath us... */
2141 	orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
2142 	if (S_ISDIR(inode->i_mode))
2143 		le16_add_cpu(&orphan_fe->i_links_count, 1);
2144 	orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
2145 
2146 	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
2147 	if (status < 0) {
2148 		mlog_errno(status);
2149 		goto leave;
2150 	}
2151 
2152 	status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
2153 				   OCFS2_ORPHAN_NAMELEN, inode,
2154 				   OCFS2_I(inode)->ip_blkno,
2155 				   orphan_dir_bh, de_bh);
2156 	if (status < 0) {
2157 		mlog_errno(status);
2158 		goto leave;
2159 	}
2160 
2161 	le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
2162 
2163 	/* Record which orphan dir our inode now resides
2164 	 * in. delete_inode will use this to determine which orphan
2165 	 * dir to lock. */
2166 	spin_lock(&OCFS2_I(inode)->ip_lock);
2167 	OCFS2_I(inode)->ip_orphaned_slot = osb->slot_num;
2168 	spin_unlock(&OCFS2_I(inode)->ip_lock);
2169 
2170 	mlog(0, "Inode %"MLFu64" orphaned in slot %d\n",
2171 	     OCFS2_I(inode)->ip_blkno, osb->slot_num);
2172 
2173 leave:
2174 	if (orphan_dir_inode)
2175 		iput(orphan_dir_inode);
2176 
2177 	if (orphan_dir_bh)
2178 		brelse(orphan_dir_bh);
2179 
2180 	mlog_exit(status);
2181 	return status;
2182 }
2183 
2184 /* unlike orphan_add, we expect the orphan dir to already be locked here. */
2185 int ocfs2_orphan_del(struct ocfs2_super *osb,
2186 		     struct ocfs2_journal_handle *handle,
2187 		     struct inode *orphan_dir_inode,
2188 		     struct inode *inode,
2189 		     struct buffer_head *orphan_dir_bh)
2190 {
2191 	char name[OCFS2_ORPHAN_NAMELEN + 1];
2192 	struct ocfs2_dinode *orphan_fe;
2193 	int status = 0;
2194 	struct buffer_head *target_de_bh = NULL;
2195 	struct ocfs2_dir_entry *target_de = NULL;
2196 
2197 	mlog_entry_void();
2198 
2199 	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
2200 	if (status < 0) {
2201 		mlog_errno(status);
2202 		goto leave;
2203 	}
2204 
2205 	mlog(0, "removing '%s' from orphan dir %"MLFu64" (namelen=%d)\n",
2206 	     name, OCFS2_I(orphan_dir_inode)->ip_blkno, OCFS2_ORPHAN_NAMELEN);
2207 
2208 	/* find it's spot in the orphan directory */
2209 	target_de_bh = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN,
2210 					orphan_dir_inode, &target_de);
2211 	if (!target_de_bh) {
2212 		status = -ENOENT;
2213 		mlog_errno(status);
2214 		goto leave;
2215 	}
2216 
2217 	/* remove it from the orphan directory */
2218 	status = ocfs2_delete_entry(handle, orphan_dir_inode, target_de,
2219 				    target_de_bh);
2220 	if (status < 0) {
2221 		mlog_errno(status);
2222 		goto leave;
2223 	}
2224 
2225 	status = ocfs2_journal_access(handle,orphan_dir_inode,  orphan_dir_bh,
2226 				      OCFS2_JOURNAL_ACCESS_WRITE);
2227 	if (status < 0) {
2228 		mlog_errno(status);
2229 		goto leave;
2230 	}
2231 
2232 	/* do the i_nlink dance! :) */
2233 	orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
2234 	if (S_ISDIR(inode->i_mode))
2235 		le16_add_cpu(&orphan_fe->i_links_count, -1);
2236 	orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
2237 
2238 	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
2239 	if (status < 0) {
2240 		mlog_errno(status);
2241 		goto leave;
2242 	}
2243 
2244 leave:
2245 	if (target_de_bh)
2246 		brelse(target_de_bh);
2247 
2248 	mlog_exit(status);
2249 	return status;
2250 }
2251 
2252 struct inode_operations ocfs2_dir_iops = {
2253 	.create		= ocfs2_create,
2254 	.lookup		= ocfs2_lookup,
2255 	.link		= ocfs2_link,
2256 	.unlink		= ocfs2_unlink,
2257 	.rmdir		= ocfs2_unlink,
2258 	.symlink	= ocfs2_symlink,
2259 	.mkdir		= ocfs2_mkdir,
2260 	.mknod		= ocfs2_mknod,
2261 	.rename		= ocfs2_rename,
2262 	.setattr	= ocfs2_setattr,
2263 	.getattr	= ocfs2_getattr,
2264 };
2265