1 /*
2  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4 /* Reiserfs block (de)allocator, bitmap-based. */
5 
6 #include <linux/time.h>
7 #include <linux/reiserfs_fs.h>
8 #include <linux/errno.h>
9 #include <linux/buffer_head.h>
10 #include <linux/kernel.h>
11 #include <linux/pagemap.h>
12 #include <linux/vmalloc.h>
13 #include <linux/reiserfs_fs_sb.h>
14 #include <linux/reiserfs_fs_i.h>
15 #include <linux/quotaops.h>
16 #include <linux/seq_file.h>
17 
18 #define PREALLOCATION_SIZE 9
19 
20 /* different reiserfs block allocator options */
21 
22 #define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits)
23 
24 #define  _ALLOC_concentrating_formatted_nodes 0
25 #define  _ALLOC_displacing_large_files 1
26 #define  _ALLOC_displacing_new_packing_localities 2
27 #define  _ALLOC_old_hashed_relocation 3
28 #define  _ALLOC_new_hashed_relocation 4
29 #define  _ALLOC_skip_busy 5
30 #define  _ALLOC_displace_based_on_dirid 6
31 #define  _ALLOC_hashed_formatted_nodes 7
32 #define  _ALLOC_old_way 8
33 #define  _ALLOC_hundredth_slices 9
34 #define  _ALLOC_dirid_groups 10
35 #define  _ALLOC_oid_groups 11
36 #define  _ALLOC_packing_groups 12
37 
38 #define  concentrating_formatted_nodes(s)	test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
39 #define  displacing_large_files(s)		test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
40 #define  displacing_new_packing_localities(s)	test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))
41 
42 #define SET_OPTION(optname) \
43    do { \
44 	reiserfs_info(s, "block allocator option \"%s\" is set", #optname); \
45 	set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
46     } while(0)
47 #define TEST_OPTION(optname, s) \
48     test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
49 
get_bit_address(struct super_block * s,b_blocknr_t block,unsigned int * bmap_nr,unsigned int * offset)50 static inline void get_bit_address(struct super_block *s,
51 				   b_blocknr_t block,
52 				   unsigned int *bmap_nr,
53 				   unsigned int *offset)
54 {
55 	/* It is in the bitmap block number equal to the block
56 	 * number divided by the number of bits in a block. */
57 	*bmap_nr = block >> (s->s_blocksize_bits + 3);
58 	/* Within that bitmap block it is located at bit offset *offset. */
59 	*offset = block & ((s->s_blocksize << 3) - 1);
60 }
61 
is_reusable(struct super_block * s,b_blocknr_t block,int bit_value)62 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
63 {
64 	unsigned int bmap, offset;
65 	unsigned int bmap_count = reiserfs_bmap_count(s);
66 
67 	if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
68 		reiserfs_error(s, "vs-4010",
69 			       "block number is out of range %lu (%u)",
70 			       block, SB_BLOCK_COUNT(s));
71 		return 0;
72 	}
73 
74 	get_bit_address(s, block, &bmap, &offset);
75 
76 	/* Old format filesystem? Unlikely, but the bitmaps are all up front so
77 	 * we need to account for it. */
78 	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
79 			      &(REISERFS_SB(s)->s_properties)))) {
80 		b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
81 		if (block >= bmap1 &&
82 		    block <= bmap1 + bmap_count) {
83 			reiserfs_error(s, "vs-4019", "bitmap block %lu(%u) "
84 				       "can't be freed or reused",
85 				       block, bmap_count);
86 			return 0;
87 		}
88 	} else {
89 		if (offset == 0) {
90 			reiserfs_error(s, "vs-4020", "bitmap block %lu(%u) "
91 				       "can't be freed or reused",
92 				       block, bmap_count);
93 			return 0;
94 		}
95 	}
96 
97 	if (bmap >= bmap_count) {
98 		reiserfs_error(s, "vs-4030", "bitmap for requested block "
99 			       "is out of range: block=%lu, bitmap_nr=%u",
100 			       block, bmap);
101 		return 0;
102 	}
103 
104 	if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
105 		reiserfs_error(s, "vs-4050", "this is root block (%u), "
106 			       "it must be busy", SB_ROOT_BLOCK(s));
107 		return 0;
108 	}
109 
110 	return 1;
111 }
112 
113 /* searches in journal structures for a given block number (bmap, off). If block
114    is found in reiserfs journal it suggests next free block candidate to test. */
is_block_in_journal(struct super_block * s,unsigned int bmap,int off,int * next)115 static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
116 				      int off, int *next)
117 {
118 	b_blocknr_t tmp;
119 
120 	if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) {
121 		if (tmp) {	/* hint supplied */
122 			*next = tmp;
123 			PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
124 		} else {
125 			(*next) = off + 1;	/* inc offset to avoid looping. */
126 			PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
127 		}
128 		PROC_INFO_INC(s, scan_bitmap.retry);
129 		return 1;
130 	}
131 	return 0;
132 }
133 
134 /* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
135  * block; */
scan_bitmap_block(struct reiserfs_transaction_handle * th,unsigned int bmap_n,int * beg,int boundary,int min,int max,int unfm)136 static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
137 			     unsigned int bmap_n, int *beg, int boundary,
138 			     int min, int max, int unfm)
139 {
140 	struct super_block *s = th->t_super;
141 	struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
142 	struct buffer_head *bh;
143 	int end, next;
144 	int org = *beg;
145 
146 	BUG_ON(!th->t_trans_id);
147 
148 	RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
149 	       "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
150 	PROC_INFO_INC(s, scan_bitmap.bmap);
151 /* this is unclear and lacks comments, explain how journal bitmaps
152    work here for the reader.  Convey a sense of the design here. What
153    is a window? */
154 /* - I mean `a window of zero bits' as in description of this function - Zam. */
155 
156 	if (!bi) {
157 		reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer "
158 			       "for bitmap %d", bmap_n);
159 		return 0;
160 	}
161 
162 	bh = reiserfs_read_bitmap_block(s, bmap_n);
163 	if (bh == NULL)
164 		return 0;
165 
166 	while (1) {
167 	      cont:
168 		if (bi->free_count < min) {
169 			brelse(bh);
170 			return 0;	// No free blocks in this bitmap
171 		}
172 
173 		/* search for a first zero bit -- beginning of a window */
174 		*beg = reiserfs_find_next_zero_le_bit
175 		    ((unsigned long *)(bh->b_data), boundary, *beg);
176 
177 		if (*beg + min > boundary) {	/* search for a zero bit fails or the rest of bitmap block
178 						 * cannot contain a zero window of minimum size */
179 			brelse(bh);
180 			return 0;
181 		}
182 
183 		if (unfm && is_block_in_journal(s, bmap_n, *beg, beg))
184 			continue;
185 		/* first zero bit found; we check next bits */
186 		for (end = *beg + 1;; end++) {
187 			if (end >= *beg + max || end >= boundary
188 			    || reiserfs_test_le_bit(end, bh->b_data)) {
189 				next = end;
190 				break;
191 			}
192 			/* finding the other end of zero bit window requires looking into journal structures (in
193 			 * case of searching for free blocks for unformatted nodes) */
194 			if (unfm && is_block_in_journal(s, bmap_n, end, &next))
195 				break;
196 		}
197 
198 		/* now (*beg) points to beginning of zero bits window,
199 		 * (end) points to one bit after the window end */
200 		if (end - *beg >= min) {	/* it seems we have found window of proper size */
201 			int i;
202 			reiserfs_prepare_for_journal(s, bh, 1);
203 			/* try to set all blocks used checking are they still free */
204 			for (i = *beg; i < end; i++) {
205 				/* It seems that we should not check in journal again. */
206 				if (reiserfs_test_and_set_le_bit
207 				    (i, bh->b_data)) {
208 					/* bit was set by another process
209 					 * while we slept in prepare_for_journal() */
210 					PROC_INFO_INC(s, scan_bitmap.stolen);
211 					if (i >= *beg + min) {	/* we can continue with smaller set of allocated blocks,
212 								 * if length of this set is more or equal to `min' */
213 						end = i;
214 						break;
215 					}
216 					/* otherwise we clear all bit were set ... */
217 					while (--i >= *beg)
218 						reiserfs_clear_le_bit
219 						    (i, bh->b_data);
220 					reiserfs_restore_prepared_buffer(s, bh);
221 					*beg = org;
222 					/* ... and search again in current block from beginning */
223 					goto cont;
224 				}
225 			}
226 			bi->free_count -= (end - *beg);
227 			journal_mark_dirty(th, s, bh);
228 			brelse(bh);
229 
230 			/* free block count calculation */
231 			reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
232 						     1);
233 			PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
234 			journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
235 
236 			return end - (*beg);
237 		} else {
238 			*beg = next;
239 		}
240 	}
241 }
242 
bmap_hash_id(struct super_block * s,u32 id)243 static int bmap_hash_id(struct super_block *s, u32 id)
244 {
245 	char *hash_in = NULL;
246 	unsigned long hash;
247 	unsigned bm;
248 
249 	if (id <= 2) {
250 		bm = 1;
251 	} else {
252 		hash_in = (char *)(&id);
253 		hash = keyed_hash(hash_in, 4);
254 		bm = hash % reiserfs_bmap_count(s);
255 		if (!bm)
256 			bm = 1;
257 	}
258 	/* this can only be true when SB_BMAP_NR = 1 */
259 	if (bm >= reiserfs_bmap_count(s))
260 		bm = 0;
261 	return bm;
262 }
263 
264 /*
265  * hashes the id and then returns > 0 if the block group for the
266  * corresponding hash is full
267  */
block_group_used(struct super_block * s,u32 id)268 static inline int block_group_used(struct super_block *s, u32 id)
269 {
270 	int bm = bmap_hash_id(s, id);
271 	struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm];
272 
273 	/* If we don't have cached information on this bitmap block, we're
274 	 * going to have to load it later anyway. Loading it here allows us
275 	 * to make a better decision. This favors long-term performance gain
276 	 * with a better on-disk layout vs. a short term gain of skipping the
277 	 * read and potentially having a bad placement. */
278 	if (info->free_count == UINT_MAX) {
279 		struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
280 		brelse(bh);
281 	}
282 
283 	if (info->free_count > ((s->s_blocksize << 3) * 60 / 100)) {
284 		return 0;
285 	}
286 	return 1;
287 }
288 
289 /*
290  * the packing is returned in disk byte order
291  */
reiserfs_choose_packing(struct inode * dir)292 __le32 reiserfs_choose_packing(struct inode * dir)
293 {
294 	__le32 packing;
295 	if (TEST_OPTION(packing_groups, dir->i_sb)) {
296 		u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
297 		/*
298 		 * some versions of reiserfsck expect packing locality 1 to be
299 		 * special
300 		 */
301 		if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir))
302 			packing = INODE_PKEY(dir)->k_objectid;
303 		else
304 			packing = INODE_PKEY(dir)->k_dir_id;
305 	} else
306 		packing = INODE_PKEY(dir)->k_objectid;
307 	return packing;
308 }
309 
310 /* Tries to find contiguous zero bit window (given size) in given region of
311  * bitmap and place new blocks there. Returns number of allocated blocks. */
scan_bitmap(struct reiserfs_transaction_handle * th,b_blocknr_t * start,b_blocknr_t finish,int min,int max,int unfm,sector_t file_block)312 static int scan_bitmap(struct reiserfs_transaction_handle *th,
313 		       b_blocknr_t * start, b_blocknr_t finish,
314 		       int min, int max, int unfm, sector_t file_block)
315 {
316 	int nr_allocated = 0;
317 	struct super_block *s = th->t_super;
318 	/* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
319 	 * - Hans, it is not a block number - Zam. */
320 
321 	unsigned int bm, off;
322 	unsigned int end_bm, end_off;
323 	unsigned int off_max = s->s_blocksize << 3;
324 
325 	BUG_ON(!th->t_trans_id);
326 
327 	PROC_INFO_INC(s, scan_bitmap.call);
328 	if (SB_FREE_BLOCKS(s) <= 0)
329 		return 0;	// No point in looking for more free blocks
330 
331 	get_bit_address(s, *start, &bm, &off);
332 	get_bit_address(s, finish, &end_bm, &end_off);
333 	if (bm > reiserfs_bmap_count(s))
334 		return 0;
335 	if (end_bm > reiserfs_bmap_count(s))
336 		end_bm = reiserfs_bmap_count(s);
337 
338 	/* When the bitmap is more than 10% free, anyone can allocate.
339 	 * When it's less than 10% free, only files that already use the
340 	 * bitmap are allowed. Once we pass 80% full, this restriction
341 	 * is lifted.
342 	 *
343 	 * We do this so that files that grow later still have space close to
344 	 * their original allocation. This improves locality, and presumably
345 	 * performance as a result.
346 	 *
347 	 * This is only an allocation policy and does not make up for getting a
348 	 * bad hint. Decent hinting must be implemented for this to work well.
349 	 */
350 	if (TEST_OPTION(skip_busy, s)
351 	    && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) {
352 		for (; bm < end_bm; bm++, off = 0) {
353 			if ((off && (!unfm || (file_block != 0)))
354 			    || SB_AP_BITMAP(s)[bm].free_count >
355 			    (s->s_blocksize << 3) / 10)
356 				nr_allocated =
357 				    scan_bitmap_block(th, bm, &off, off_max,
358 						      min, max, unfm);
359 			if (nr_allocated)
360 				goto ret;
361 		}
362 		/* we know from above that start is a reasonable number */
363 		get_bit_address(s, *start, &bm, &off);
364 	}
365 
366 	for (; bm < end_bm; bm++, off = 0) {
367 		nr_allocated =
368 		    scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
369 		if (nr_allocated)
370 			goto ret;
371 	}
372 
373 	nr_allocated =
374 	    scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
375 
376       ret:
377 	*start = bm * off_max + off;
378 	return nr_allocated;
379 
380 }
381 
_reiserfs_free_block(struct reiserfs_transaction_handle * th,struct inode * inode,b_blocknr_t block,int for_unformatted)382 static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
383 				 struct inode *inode, b_blocknr_t block,
384 				 int for_unformatted)
385 {
386 	struct super_block *s = th->t_super;
387 	struct reiserfs_super_block *rs;
388 	struct buffer_head *sbh, *bmbh;
389 	struct reiserfs_bitmap_info *apbi;
390 	unsigned int nr, offset;
391 
392 	BUG_ON(!th->t_trans_id);
393 
394 	PROC_INFO_INC(s, free_block);
395 
396 	rs = SB_DISK_SUPER_BLOCK(s);
397 	sbh = SB_BUFFER_WITH_SB(s);
398 	apbi = SB_AP_BITMAP(s);
399 
400 	get_bit_address(s, block, &nr, &offset);
401 
402 	if (nr >= reiserfs_bmap_count(s)) {
403 		reiserfs_error(s, "vs-4075", "block %lu is out of range",
404 			       block);
405 		return;
406 	}
407 
408 	bmbh = reiserfs_read_bitmap_block(s, nr);
409 	if (!bmbh)
410 		return;
411 
412 	reiserfs_prepare_for_journal(s, bmbh, 1);
413 
414 	/* clear bit for the given block in bit map */
415 	if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) {
416 		reiserfs_error(s, "vs-4080",
417 			       "block %lu: bit already cleared", block);
418 	}
419 	apbi[nr].free_count++;
420 	journal_mark_dirty(th, s, bmbh);
421 	brelse(bmbh);
422 
423 	reiserfs_prepare_for_journal(s, sbh, 1);
424 	/* update super block */
425 	set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
426 
427 	journal_mark_dirty(th, s, sbh);
428 	if (for_unformatted)
429 		dquot_free_block_nodirty(inode, 1);
430 }
431 
reiserfs_free_block(struct reiserfs_transaction_handle * th,struct inode * inode,b_blocknr_t block,int for_unformatted)432 void reiserfs_free_block(struct reiserfs_transaction_handle *th,
433 			 struct inode *inode, b_blocknr_t block,
434 			 int for_unformatted)
435 {
436 	struct super_block *s = th->t_super;
437 	BUG_ON(!th->t_trans_id);
438 
439 	RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
440 	if (!is_reusable(s, block, 1))
441 		return;
442 
443 	if (block > sb_block_count(REISERFS_SB(s)->s_rs)) {
444 		reiserfs_error(th->t_super, "bitmap-4072",
445 			       "Trying to free block outside file system "
446 			       "boundaries (%lu > %lu)",
447 			       block, sb_block_count(REISERFS_SB(s)->s_rs));
448 		return;
449 	}
450 	/* mark it before we clear it, just in case */
451 	journal_mark_freed(th, s, block);
452 	_reiserfs_free_block(th, inode, block, for_unformatted);
453 }
454 
455 /* preallocated blocks don't need to be run through journal_mark_freed */
reiserfs_free_prealloc_block(struct reiserfs_transaction_handle * th,struct inode * inode,b_blocknr_t block)456 static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
457 					 struct inode *inode, b_blocknr_t block)
458 {
459 	BUG_ON(!th->t_trans_id);
460 	RFALSE(!th->t_super,
461 	       "vs-4060: trying to free block on nonexistent device");
462 	if (!is_reusable(th->t_super, block, 1))
463 		return;
464 	_reiserfs_free_block(th, inode, block, 1);
465 }
466 
__discard_prealloc(struct reiserfs_transaction_handle * th,struct reiserfs_inode_info * ei)467 static void __discard_prealloc(struct reiserfs_transaction_handle *th,
468 			       struct reiserfs_inode_info *ei)
469 {
470 	unsigned long save = ei->i_prealloc_block;
471 	int dirty = 0;
472 	struct inode *inode = &ei->vfs_inode;
473 	BUG_ON(!th->t_trans_id);
474 #ifdef CONFIG_REISERFS_CHECK
475 	if (ei->i_prealloc_count < 0)
476 		reiserfs_error(th->t_super, "zam-4001",
477 			       "inode has negative prealloc blocks count.");
478 #endif
479 	while (ei->i_prealloc_count > 0) {
480 		reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
481 		ei->i_prealloc_block++;
482 		ei->i_prealloc_count--;
483 		dirty = 1;
484 	}
485 	if (dirty)
486 		reiserfs_update_sd(th, inode);
487 	ei->i_prealloc_block = save;
488 	list_del_init(&(ei->i_prealloc_list));
489 }
490 
491 /* FIXME: It should be inline function */
reiserfs_discard_prealloc(struct reiserfs_transaction_handle * th,struct inode * inode)492 void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
493 			       struct inode *inode)
494 {
495 	struct reiserfs_inode_info *ei = REISERFS_I(inode);
496 	BUG_ON(!th->t_trans_id);
497 	if (ei->i_prealloc_count)
498 		__discard_prealloc(th, ei);
499 }
500 
reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle * th)501 void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
502 {
503 	struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
504 
505 	BUG_ON(!th->t_trans_id);
506 
507 	while (!list_empty(plist)) {
508 		struct reiserfs_inode_info *ei;
509 		ei = list_entry(plist->next, struct reiserfs_inode_info,
510 				i_prealloc_list);
511 #ifdef CONFIG_REISERFS_CHECK
512 		if (!ei->i_prealloc_count) {
513 			reiserfs_error(th->t_super, "zam-4001",
514 				       "inode is in prealloc list but has "
515 				       "no preallocated blocks.");
516 		}
517 #endif
518 		__discard_prealloc(th, ei);
519 	}
520 }
521 
reiserfs_init_alloc_options(struct super_block * s)522 void reiserfs_init_alloc_options(struct super_block *s)
523 {
524 	set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s));
525 	set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s));
526 	set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s));
527 }
528 
529 /* block allocator related options are parsed here */
reiserfs_parse_alloc_options(struct super_block * s,char * options)530 int reiserfs_parse_alloc_options(struct super_block *s, char *options)
531 {
532 	char *this_char, *value;
533 
534 	REISERFS_SB(s)->s_alloc_options.bits = 0;	/* clear default settings */
535 
536 	while ((this_char = strsep(&options, ":")) != NULL) {
537 		if ((value = strchr(this_char, '=')) != NULL)
538 			*value++ = 0;
539 
540 		if (!strcmp(this_char, "concentrating_formatted_nodes")) {
541 			int temp;
542 			SET_OPTION(concentrating_formatted_nodes);
543 			temp = (value
544 				&& *value) ? simple_strtoul(value, &value,
545 							    0) : 10;
546 			if (temp <= 0 || temp > 100) {
547 				REISERFS_SB(s)->s_alloc_options.border = 10;
548 			} else {
549 				REISERFS_SB(s)->s_alloc_options.border =
550 				    100 / temp;
551 			}
552 			continue;
553 		}
554 		if (!strcmp(this_char, "displacing_large_files")) {
555 			SET_OPTION(displacing_large_files);
556 			REISERFS_SB(s)->s_alloc_options.large_file_size =
557 			    (value
558 			     && *value) ? simple_strtoul(value, &value, 0) : 16;
559 			continue;
560 		}
561 		if (!strcmp(this_char, "displacing_new_packing_localities")) {
562 			SET_OPTION(displacing_new_packing_localities);
563 			continue;
564 		};
565 
566 		if (!strcmp(this_char, "old_hashed_relocation")) {
567 			SET_OPTION(old_hashed_relocation);
568 			continue;
569 		}
570 
571 		if (!strcmp(this_char, "new_hashed_relocation")) {
572 			SET_OPTION(new_hashed_relocation);
573 			continue;
574 		}
575 
576 		if (!strcmp(this_char, "dirid_groups")) {
577 			SET_OPTION(dirid_groups);
578 			continue;
579 		}
580 		if (!strcmp(this_char, "oid_groups")) {
581 			SET_OPTION(oid_groups);
582 			continue;
583 		}
584 		if (!strcmp(this_char, "packing_groups")) {
585 			SET_OPTION(packing_groups);
586 			continue;
587 		}
588 		if (!strcmp(this_char, "hashed_formatted_nodes")) {
589 			SET_OPTION(hashed_formatted_nodes);
590 			continue;
591 		}
592 
593 		if (!strcmp(this_char, "skip_busy")) {
594 			SET_OPTION(skip_busy);
595 			continue;
596 		}
597 
598 		if (!strcmp(this_char, "hundredth_slices")) {
599 			SET_OPTION(hundredth_slices);
600 			continue;
601 		}
602 
603 		if (!strcmp(this_char, "old_way")) {
604 			SET_OPTION(old_way);
605 			continue;
606 		}
607 
608 		if (!strcmp(this_char, "displace_based_on_dirid")) {
609 			SET_OPTION(displace_based_on_dirid);
610 			continue;
611 		}
612 
613 		if (!strcmp(this_char, "preallocmin")) {
614 			REISERFS_SB(s)->s_alloc_options.preallocmin =
615 			    (value
616 			     && *value) ? simple_strtoul(value, &value, 0) : 4;
617 			continue;
618 		}
619 
620 		if (!strcmp(this_char, "preallocsize")) {
621 			REISERFS_SB(s)->s_alloc_options.preallocsize =
622 			    (value
623 			     && *value) ? simple_strtoul(value, &value,
624 							 0) :
625 			    PREALLOCATION_SIZE;
626 			continue;
627 		}
628 
629 		reiserfs_warning(s, "zam-4001", "unknown option - %s",
630 				 this_char);
631 		return 1;
632 	}
633 
634 	reiserfs_info(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
635 	return 0;
636 }
637 
print_sep(struct seq_file * seq,int * first)638 static void print_sep(struct seq_file *seq, int *first)
639 {
640 	if (!*first)
641 		seq_puts(seq, ":");
642 	else
643 		*first = 0;
644 }
645 
show_alloc_options(struct seq_file * seq,struct super_block * s)646 void show_alloc_options(struct seq_file *seq, struct super_block *s)
647 {
648 	int first = 1;
649 
650 	if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) |
651 		(1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups)))
652 		return;
653 
654 	seq_puts(seq, ",alloc=");
655 
656 	if (TEST_OPTION(concentrating_formatted_nodes, s)) {
657 		print_sep(seq, &first);
658 		if (REISERFS_SB(s)->s_alloc_options.border != 10) {
659 			seq_printf(seq, "concentrating_formatted_nodes=%d",
660 				100 / REISERFS_SB(s)->s_alloc_options.border);
661 		} else
662 			seq_puts(seq, "concentrating_formatted_nodes");
663 	}
664 	if (TEST_OPTION(displacing_large_files, s)) {
665 		print_sep(seq, &first);
666 		if (REISERFS_SB(s)->s_alloc_options.large_file_size != 16) {
667 			seq_printf(seq, "displacing_large_files=%lu",
668 			    REISERFS_SB(s)->s_alloc_options.large_file_size);
669 		} else
670 			seq_puts(seq, "displacing_large_files");
671 	}
672 	if (TEST_OPTION(displacing_new_packing_localities, s)) {
673 		print_sep(seq, &first);
674 		seq_puts(seq, "displacing_new_packing_localities");
675 	}
676 	if (TEST_OPTION(old_hashed_relocation, s)) {
677 		print_sep(seq, &first);
678 		seq_puts(seq, "old_hashed_relocation");
679 	}
680 	if (TEST_OPTION(new_hashed_relocation, s)) {
681 		print_sep(seq, &first);
682 		seq_puts(seq, "new_hashed_relocation");
683 	}
684 	if (TEST_OPTION(dirid_groups, s)) {
685 		print_sep(seq, &first);
686 		seq_puts(seq, "dirid_groups");
687 	}
688 	if (TEST_OPTION(oid_groups, s)) {
689 		print_sep(seq, &first);
690 		seq_puts(seq, "oid_groups");
691 	}
692 	if (TEST_OPTION(packing_groups, s)) {
693 		print_sep(seq, &first);
694 		seq_puts(seq, "packing_groups");
695 	}
696 	if (TEST_OPTION(hashed_formatted_nodes, s)) {
697 		print_sep(seq, &first);
698 		seq_puts(seq, "hashed_formatted_nodes");
699 	}
700 	if (TEST_OPTION(skip_busy, s)) {
701 		print_sep(seq, &first);
702 		seq_puts(seq, "skip_busy");
703 	}
704 	if (TEST_OPTION(hundredth_slices, s)) {
705 		print_sep(seq, &first);
706 		seq_puts(seq, "hundredth_slices");
707 	}
708 	if (TEST_OPTION(old_way, s)) {
709 		print_sep(seq, &first);
710 		seq_puts(seq, "old_way");
711 	}
712 	if (TEST_OPTION(displace_based_on_dirid, s)) {
713 		print_sep(seq, &first);
714 		seq_puts(seq, "displace_based_on_dirid");
715 	}
716 	if (REISERFS_SB(s)->s_alloc_options.preallocmin != 0) {
717 		print_sep(seq, &first);
718 		seq_printf(seq, "preallocmin=%d",
719 				REISERFS_SB(s)->s_alloc_options.preallocmin);
720 	}
721 	if (REISERFS_SB(s)->s_alloc_options.preallocsize != 17) {
722 		print_sep(seq, &first);
723 		seq_printf(seq, "preallocsize=%d",
724 				REISERFS_SB(s)->s_alloc_options.preallocsize);
725 	}
726 }
727 
new_hashed_relocation(reiserfs_blocknr_hint_t * hint)728 static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
729 {
730 	char *hash_in;
731 	if (hint->formatted_node) {
732 		hash_in = (char *)&hint->key.k_dir_id;
733 	} else {
734 		if (!hint->inode) {
735 			//hint->search_start = hint->beg;
736 			hash_in = (char *)&hint->key.k_dir_id;
737 		} else
738 		    if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
739 			hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
740 		else
741 			hash_in =
742 			    (char *)(&INODE_PKEY(hint->inode)->k_objectid);
743 	}
744 
745 	hint->search_start =
746 	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
747 }
748 
749 /*
750  * Relocation based on dirid, hashing them into a given bitmap block
751  * files. Formatted nodes are unaffected, a separate policy covers them
752  */
dirid_groups(reiserfs_blocknr_hint_t * hint)753 static void dirid_groups(reiserfs_blocknr_hint_t * hint)
754 {
755 	unsigned long hash;
756 	__u32 dirid = 0;
757 	int bm = 0;
758 	struct super_block *sb = hint->th->t_super;
759 	if (hint->inode)
760 		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
761 	else if (hint->formatted_node)
762 		dirid = hint->key.k_dir_id;
763 
764 	if (dirid) {
765 		bm = bmap_hash_id(sb, dirid);
766 		hash = bm * (sb->s_blocksize << 3);
767 		/* give a portion of the block group to metadata */
768 		if (hint->inode)
769 			hash += sb->s_blocksize / 2;
770 		hint->search_start = hash;
771 	}
772 }
773 
774 /*
775  * Relocation based on oid, hashing them into a given bitmap block
776  * files. Formatted nodes are unaffected, a separate policy covers them
777  */
oid_groups(reiserfs_blocknr_hint_t * hint)778 static void oid_groups(reiserfs_blocknr_hint_t * hint)
779 {
780 	if (hint->inode) {
781 		unsigned long hash;
782 		__u32 oid;
783 		__u32 dirid;
784 		int bm;
785 
786 		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
787 
788 		/* keep the root dir and it's first set of subdirs close to
789 		 * the start of the disk
790 		 */
791 		if (dirid <= 2)
792 			hash = (hint->inode->i_sb->s_blocksize << 3);
793 		else {
794 			oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid);
795 			bm = bmap_hash_id(hint->inode->i_sb, oid);
796 			hash = bm * (hint->inode->i_sb->s_blocksize << 3);
797 		}
798 		hint->search_start = hash;
799 	}
800 }
801 
802 /* returns 1 if it finds an indirect item and gets valid hint info
803  * from it, otherwise 0
804  */
get_left_neighbor(reiserfs_blocknr_hint_t * hint)805 static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
806 {
807 	struct treepath *path;
808 	struct buffer_head *bh;
809 	struct item_head *ih;
810 	int pos_in_item;
811 	__le32 *item;
812 	int ret = 0;
813 
814 	if (!hint->path)	/* reiserfs code can call this function w/o pointer to path
815 				 * structure supplied; then we rely on supplied search_start */
816 		return 0;
817 
818 	path = hint->path;
819 	bh = get_last_bh(path);
820 	RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
821 	ih = get_ih(path);
822 	pos_in_item = path->pos_in_item;
823 	item = get_item(path);
824 
825 	hint->search_start = bh->b_blocknr;
826 
827 	if (!hint->formatted_node && is_indirect_le_ih(ih)) {
828 		/* for indirect item: go to left and look for the first non-hole entry
829 		   in the indirect item */
830 		if (pos_in_item == I_UNFM_NUM(ih))
831 			pos_in_item--;
832 //          pos_in_item = I_UNFM_NUM (ih) - 1;
833 		while (pos_in_item >= 0) {
834 			int t = get_block_num(item, pos_in_item);
835 			if (t) {
836 				hint->search_start = t;
837 				ret = 1;
838 				break;
839 			}
840 			pos_in_item--;
841 		}
842 	}
843 
844 	/* does result value fit into specified region? */
845 	return ret;
846 }
847 
848 /* should be, if formatted node, then try to put on first part of the device
849    specified as number of percent with mount option device, else try to put
850    on last of device.  This is not to say it is good code to do so,
851    but the effect should be measured.  */
set_border_in_hint(struct super_block * s,reiserfs_blocknr_hint_t * hint)852 static inline void set_border_in_hint(struct super_block *s,
853 				      reiserfs_blocknr_hint_t * hint)
854 {
855 	b_blocknr_t border =
856 	    SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
857 
858 	if (hint->formatted_node)
859 		hint->end = border - 1;
860 	else
861 		hint->beg = border;
862 }
863 
displace_large_file(reiserfs_blocknr_hint_t * hint)864 static inline void displace_large_file(reiserfs_blocknr_hint_t * hint)
865 {
866 	if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
867 		hint->search_start =
868 		    hint->beg +
869 		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id),
870 			       4) % (hint->end - hint->beg);
871 	else
872 		hint->search_start =
873 		    hint->beg +
874 		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid),
875 			       4) % (hint->end - hint->beg);
876 }
877 
hash_formatted_node(reiserfs_blocknr_hint_t * hint)878 static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint)
879 {
880 	char *hash_in;
881 
882 	if (!hint->inode)
883 		hash_in = (char *)&hint->key.k_dir_id;
884 	else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
885 		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
886 	else
887 		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
888 
889 	hint->search_start =
890 	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
891 }
892 
893 static inline int
this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t * hint)894 this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *
895 						   hint)
896 {
897 	return hint->block ==
898 	    REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
899 }
900 
901 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)902 static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)
903 {
904 	struct in_core_key *key = &hint->key;
905 
906 	hint->th->displace_new_blocks = 0;
907 	hint->search_start =
908 	    hint->beg + keyed_hash((char *)(&key->k_objectid),
909 				   4) % (hint->end - hint->beg);
910 }
911 #endif
912 
old_hashed_relocation(reiserfs_blocknr_hint_t * hint)913 static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint)
914 {
915 	b_blocknr_t border;
916 	u32 hash_in;
917 
918 	if (hint->formatted_node || hint->inode == NULL) {
919 		return 0;
920 	}
921 
922 	hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
923 	border =
924 	    hint->beg + (u32) keyed_hash(((char *)(&hash_in)),
925 					 4) % (hint->end - hint->beg - 1);
926 	if (border > hint->search_start)
927 		hint->search_start = border;
928 
929 	return 1;
930 }
931 
old_way(reiserfs_blocknr_hint_t * hint)932 static inline int old_way(reiserfs_blocknr_hint_t * hint)
933 {
934 	b_blocknr_t border;
935 
936 	if (hint->formatted_node || hint->inode == NULL) {
937 		return 0;
938 	}
939 
940 	border =
941 	    hint->beg +
942 	    le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end -
943 							      hint->beg);
944 	if (border > hint->search_start)
945 		hint->search_start = border;
946 
947 	return 1;
948 }
949 
hundredth_slices(reiserfs_blocknr_hint_t * hint)950 static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint)
951 {
952 	struct in_core_key *key = &hint->key;
953 	b_blocknr_t slice_start;
954 
955 	slice_start =
956 	    (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100);
957 	if (slice_start > hint->search_start
958 	    || slice_start + (hint->end / 100) <= hint->search_start) {
959 		hint->search_start = slice_start;
960 	}
961 }
962 
determine_search_start(reiserfs_blocknr_hint_t * hint,int amount_needed)963 static void determine_search_start(reiserfs_blocknr_hint_t * hint,
964 				   int amount_needed)
965 {
966 	struct super_block *s = hint->th->t_super;
967 	int unfm_hint;
968 
969 	hint->beg = 0;
970 	hint->end = SB_BLOCK_COUNT(s) - 1;
971 
972 	/* This is former border algorithm. Now with tunable border offset */
973 	if (concentrating_formatted_nodes(s))
974 		set_border_in_hint(s, hint);
975 
976 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
977 	/* whenever we create a new directory, we displace it.  At first we will
978 	   hash for location, later we might look for a moderately empty place for
979 	   it */
980 	if (displacing_new_packing_localities(s)
981 	    && hint->th->displace_new_blocks) {
982 		displace_new_packing_locality(hint);
983 
984 		/* we do not continue determine_search_start,
985 		 * if new packing locality is being displaced */
986 		return;
987 	}
988 #endif
989 
990 	/* all persons should feel encouraged to add more special cases here and
991 	 * test them */
992 
993 	if (displacing_large_files(s) && !hint->formatted_node
994 	    && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
995 		displace_large_file(hint);
996 		return;
997 	}
998 
999 	/* if none of our special cases is relevant, use the left neighbor in the
1000 	   tree order of the new node we are allocating for */
1001 	if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
1002 		hash_formatted_node(hint);
1003 		return;
1004 	}
1005 
1006 	unfm_hint = get_left_neighbor(hint);
1007 
1008 	/* Mimic old block allocator behaviour, that is if VFS allowed for preallocation,
1009 	   new blocks are displaced based on directory ID. Also, if suggested search_start
1010 	   is less than last preallocated block, we start searching from it, assuming that
1011 	   HDD dataflow is faster in forward direction */
1012 	if (TEST_OPTION(old_way, s)) {
1013 		if (!hint->formatted_node) {
1014 			if (!reiserfs_hashed_relocation(s))
1015 				old_way(hint);
1016 			else if (!reiserfs_no_unhashed_relocation(s))
1017 				old_hashed_relocation(hint);
1018 
1019 			if (hint->inode
1020 			    && hint->search_start <
1021 			    REISERFS_I(hint->inode)->i_prealloc_block)
1022 				hint->search_start =
1023 				    REISERFS_I(hint->inode)->i_prealloc_block;
1024 		}
1025 		return;
1026 	}
1027 
1028 	/* This is an approach proposed by Hans */
1029 	if (TEST_OPTION(hundredth_slices, s)
1030 	    && !(displacing_large_files(s) && !hint->formatted_node)) {
1031 		hundredth_slices(hint);
1032 		return;
1033 	}
1034 
1035 	/* old_hashed_relocation only works on unformatted */
1036 	if (!unfm_hint && !hint->formatted_node &&
1037 	    TEST_OPTION(old_hashed_relocation, s)) {
1038 		old_hashed_relocation(hint);
1039 	}
1040 	/* new_hashed_relocation works with both formatted/unformatted nodes */
1041 	if ((!unfm_hint || hint->formatted_node) &&
1042 	    TEST_OPTION(new_hashed_relocation, s)) {
1043 		new_hashed_relocation(hint);
1044 	}
1045 	/* dirid grouping works only on unformatted nodes */
1046 	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1047 		dirid_groups(hint);
1048 	}
1049 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1050 	if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1051 		dirid_groups(hint);
1052 	}
1053 #endif
1054 
1055 	/* oid grouping works only on unformatted nodes */
1056 	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) {
1057 		oid_groups(hint);
1058 	}
1059 	return;
1060 }
1061 
determine_prealloc_size(reiserfs_blocknr_hint_t * hint)1062 static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
1063 {
1064 	/* make minimum size a mount option and benchmark both ways */
1065 	/* we preallocate blocks only for regular files, specific size */
1066 	/* benchmark preallocating always and see what happens */
1067 
1068 	hint->prealloc_size = 0;
1069 
1070 	if (!hint->formatted_node && hint->preallocate) {
1071 		if (S_ISREG(hint->inode->i_mode)
1072 		    && hint->inode->i_size >=
1073 		    REISERFS_SB(hint->th->t_super)->s_alloc_options.
1074 		    preallocmin * hint->inode->i_sb->s_blocksize)
1075 			hint->prealloc_size =
1076 			    REISERFS_SB(hint->th->t_super)->s_alloc_options.
1077 			    preallocsize - 1;
1078 	}
1079 	return CARRY_ON;
1080 }
1081 
1082 /* XXX I know it could be merged with upper-level function;
1083    but may be result function would be too complex. */
allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,b_blocknr_t * new_blocknrs,b_blocknr_t start,b_blocknr_t finish,int min,int amount_needed,int prealloc_size)1084 static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
1085 						 b_blocknr_t * new_blocknrs,
1086 						 b_blocknr_t start,
1087 						 b_blocknr_t finish, int min,
1088 						 int amount_needed,
1089 						 int prealloc_size)
1090 {
1091 	int rest = amount_needed;
1092 	int nr_allocated;
1093 
1094 	while (rest > 0 && start <= finish) {
1095 		nr_allocated = scan_bitmap(hint->th, &start, finish, min,
1096 					   rest + prealloc_size,
1097 					   !hint->formatted_node, hint->block);
1098 
1099 		if (nr_allocated == 0)	/* no new blocks allocated, return */
1100 			break;
1101 
1102 		/* fill free_blocknrs array first */
1103 		while (rest > 0 && nr_allocated > 0) {
1104 			*new_blocknrs++ = start++;
1105 			rest--;
1106 			nr_allocated--;
1107 		}
1108 
1109 		/* do we have something to fill prealloc. array also ? */
1110 		if (nr_allocated > 0) {
1111 			/* it means prealloc_size was greater that 0 and we do preallocation */
1112 			list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
1113 				 &SB_JOURNAL(hint->th->t_super)->
1114 				 j_prealloc_list);
1115 			REISERFS_I(hint->inode)->i_prealloc_block = start;
1116 			REISERFS_I(hint->inode)->i_prealloc_count =
1117 			    nr_allocated;
1118 			break;
1119 		}
1120 	}
1121 
1122 	return (amount_needed - rest);
1123 }
1124 
blocknrs_and_prealloc_arrays_from_search_start(reiserfs_blocknr_hint_t * hint,b_blocknr_t * new_blocknrs,int amount_needed)1125 static inline int blocknrs_and_prealloc_arrays_from_search_start
1126     (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs,
1127      int amount_needed) {
1128 	struct super_block *s = hint->th->t_super;
1129 	b_blocknr_t start = hint->search_start;
1130 	b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
1131 	int passno = 0;
1132 	int nr_allocated = 0;
1133 
1134 	determine_prealloc_size(hint);
1135 	if (!hint->formatted_node) {
1136 		int quota_ret;
1137 #ifdef REISERQUOTA_DEBUG
1138 		reiserfs_debug(s, REISERFS_DEBUG_CODE,
1139 			       "reiserquota: allocating %d blocks id=%u",
1140 			       amount_needed, hint->inode->i_uid);
1141 #endif
1142 		quota_ret =
1143 		    dquot_alloc_block_nodirty(hint->inode, amount_needed);
1144 		if (quota_ret)	/* Quota exceeded? */
1145 			return QUOTA_EXCEEDED;
1146 		if (hint->preallocate && hint->prealloc_size) {
1147 #ifdef REISERQUOTA_DEBUG
1148 			reiserfs_debug(s, REISERFS_DEBUG_CODE,
1149 				       "reiserquota: allocating (prealloc) %d blocks id=%u",
1150 				       hint->prealloc_size, hint->inode->i_uid);
1151 #endif
1152 			quota_ret = dquot_prealloc_block_nodirty(hint->inode,
1153 							 hint->prealloc_size);
1154 			if (quota_ret)
1155 				hint->preallocate = hint->prealloc_size = 0;
1156 		}
1157 		/* for unformatted nodes, force large allocations */
1158 	}
1159 
1160 	do {
1161 		switch (passno++) {
1162 		case 0:	/* Search from hint->search_start to end of disk */
1163 			start = hint->search_start;
1164 			finish = SB_BLOCK_COUNT(s) - 1;
1165 			break;
1166 		case 1:	/* Search from hint->beg to hint->search_start */
1167 			start = hint->beg;
1168 			finish = hint->search_start;
1169 			break;
1170 		case 2:	/* Last chance: Search from 0 to hint->beg */
1171 			start = 0;
1172 			finish = hint->beg;
1173 			break;
1174 		default:	/* We've tried searching everywhere, not enough space */
1175 			/* Free the blocks */
1176 			if (!hint->formatted_node) {
1177 #ifdef REISERQUOTA_DEBUG
1178 				reiserfs_debug(s, REISERFS_DEBUG_CODE,
1179 					       "reiserquota: freeing (nospace) %d blocks id=%u",
1180 					       amount_needed +
1181 					       hint->prealloc_size -
1182 					       nr_allocated,
1183 					       hint->inode->i_uid);
1184 #endif
1185 				/* Free not allocated blocks */
1186 				dquot_free_block_nodirty(hint->inode,
1187 					amount_needed + hint->prealloc_size -
1188 					nr_allocated);
1189 			}
1190 			while (nr_allocated--)
1191 				reiserfs_free_block(hint->th, hint->inode,
1192 						    new_blocknrs[nr_allocated],
1193 						    !hint->formatted_node);
1194 
1195 			return NO_DISK_SPACE;
1196 		}
1197 	} while ((nr_allocated += allocate_without_wrapping_disk(hint,
1198 								 new_blocknrs +
1199 								 nr_allocated,
1200 								 start, finish,
1201 								 1,
1202 								 amount_needed -
1203 								 nr_allocated,
1204 								 hint->
1205 								 prealloc_size))
1206 		 < amount_needed);
1207 	if (!hint->formatted_node &&
1208 	    amount_needed + hint->prealloc_size >
1209 	    nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) {
1210 		/* Some of preallocation blocks were not allocated */
1211 #ifdef REISERQUOTA_DEBUG
1212 		reiserfs_debug(s, REISERFS_DEBUG_CODE,
1213 			       "reiserquota: freeing (failed prealloc) %d blocks id=%u",
1214 			       amount_needed + hint->prealloc_size -
1215 			       nr_allocated -
1216 			       REISERFS_I(hint->inode)->i_prealloc_count,
1217 			       hint->inode->i_uid);
1218 #endif
1219 		dquot_free_block_nodirty(hint->inode, amount_needed +
1220 					 hint->prealloc_size - nr_allocated -
1221 					 REISERFS_I(hint->inode)->
1222 					 i_prealloc_count);
1223 	}
1224 
1225 	return CARRY_ON;
1226 }
1227 
1228 /* grab new blocknrs from preallocated list */
1229 /* return amount still needed after using them */
use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,b_blocknr_t * new_blocknrs,int amount_needed)1230 static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
1231 					      b_blocknr_t * new_blocknrs,
1232 					      int amount_needed)
1233 {
1234 	struct inode *inode = hint->inode;
1235 
1236 	if (REISERFS_I(inode)->i_prealloc_count > 0) {
1237 		while (amount_needed) {
1238 
1239 			*new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++;
1240 			REISERFS_I(inode)->i_prealloc_count--;
1241 
1242 			amount_needed--;
1243 
1244 			if (REISERFS_I(inode)->i_prealloc_count <= 0) {
1245 				list_del(&REISERFS_I(inode)->i_prealloc_list);
1246 				break;
1247 			}
1248 		}
1249 	}
1250 	/* return amount still needed after using preallocated blocks */
1251 	return amount_needed;
1252 }
1253 
reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint,b_blocknr_t * new_blocknrs,int amount_needed,int reserved_by_us)1254 int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, int amount_needed, int reserved_by_us	/* Amount of blocks we have
1255 																	   already reserved */ )
1256 {
1257 	int initial_amount_needed = amount_needed;
1258 	int ret;
1259 	struct super_block *s = hint->th->t_super;
1260 
1261 	/* Check if there is enough space, taking into account reserved space */
1262 	if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks <
1263 	    amount_needed - reserved_by_us)
1264 		return NO_DISK_SPACE;
1265 	/* should this be if !hint->inode &&  hint->preallocate? */
1266 	/* do you mean hint->formatted_node can be removed ? - Zam */
1267 	/* hint->formatted_node cannot be removed because we try to access
1268 	   inode information here, and there is often no inode assotiated with
1269 	   metadata allocations - green */
1270 
1271 	if (!hint->formatted_node && hint->preallocate) {
1272 		amount_needed = use_preallocated_list_if_available
1273 		    (hint, new_blocknrs, amount_needed);
1274 		if (amount_needed == 0)	/* all blocknrs we need we got from
1275 					   prealloc. list */
1276 			return CARRY_ON;
1277 		new_blocknrs += (initial_amount_needed - amount_needed);
1278 	}
1279 
1280 	/* find search start and save it in hint structure */
1281 	determine_search_start(hint, amount_needed);
1282 	if (hint->search_start >= SB_BLOCK_COUNT(s))
1283 		hint->search_start = SB_BLOCK_COUNT(s) - 1;
1284 
1285 	/* allocation itself; fill new_blocknrs and preallocation arrays */
1286 	ret = blocknrs_and_prealloc_arrays_from_search_start
1287 	    (hint, new_blocknrs, amount_needed);
1288 
1289 	/* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we
1290 	 * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second
1291 	 * variant) */
1292 
1293 	if (ret != CARRY_ON) {
1294 		while (amount_needed++ < initial_amount_needed) {
1295 			reiserfs_free_block(hint->th, hint->inode,
1296 					    *(--new_blocknrs), 1);
1297 		}
1298 	}
1299 	return ret;
1300 }
1301 
reiserfs_cache_bitmap_metadata(struct super_block * sb,struct buffer_head * bh,struct reiserfs_bitmap_info * info)1302 void reiserfs_cache_bitmap_metadata(struct super_block *sb,
1303                                     struct buffer_head *bh,
1304                                     struct reiserfs_bitmap_info *info)
1305 {
1306 	unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size);
1307 
1308 	/* The first bit must ALWAYS be 1 */
1309 	if (!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data))
1310 		reiserfs_error(sb, "reiserfs-2025", "bitmap block %lu is "
1311 			       "corrupted: first bit must be 1", bh->b_blocknr);
1312 
1313 	info->free_count = 0;
1314 
1315 	while (--cur >= (unsigned long *)bh->b_data) {
1316 		/* 0 and ~0 are special, we can optimize for them */
1317 		if (*cur == 0)
1318 			info->free_count += BITS_PER_LONG;
1319 		else if (*cur != ~0L)	/* A mix, investigate */
1320 			info->free_count += BITS_PER_LONG - hweight_long(*cur);
1321 	}
1322 }
1323 
reiserfs_read_bitmap_block(struct super_block * sb,unsigned int bitmap)1324 struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1325                                                unsigned int bitmap)
1326 {
1327 	b_blocknr_t block = (sb->s_blocksize << 3) * bitmap;
1328 	struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap;
1329 	struct buffer_head *bh;
1330 
1331 	/* Way old format filesystems had the bitmaps packed up front.
1332 	 * I doubt there are any of these left, but just in case... */
1333 	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
1334 	                      &(REISERFS_SB(sb)->s_properties))))
1335 		block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap;
1336 	else if (bitmap == 0)
1337 		block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
1338 
1339 	reiserfs_write_unlock(sb);
1340 	bh = sb_bread(sb, block);
1341 	reiserfs_write_lock(sb);
1342 	if (bh == NULL)
1343 		reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
1344 		                 "reading failed", __func__, block);
1345 	else {
1346 		if (buffer_locked(bh)) {
1347 			PROC_INFO_INC(sb, scan_bitmap.wait);
1348 			reiserfs_write_unlock(sb);
1349 			__wait_on_buffer(bh);
1350 			reiserfs_write_lock(sb);
1351 		}
1352 		BUG_ON(!buffer_uptodate(bh));
1353 		BUG_ON(atomic_read(&bh->b_count) == 0);
1354 
1355 		if (info->free_count == UINT_MAX)
1356 			reiserfs_cache_bitmap_metadata(sb, bh, info);
1357 	}
1358 
1359 	return bh;
1360 }
1361 
reiserfs_init_bitmap_cache(struct super_block * sb)1362 int reiserfs_init_bitmap_cache(struct super_block *sb)
1363 {
1364 	struct reiserfs_bitmap_info *bitmap;
1365 	unsigned int bmap_nr = reiserfs_bmap_count(sb);
1366 
1367 	bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
1368 	if (bitmap == NULL)
1369 		return -ENOMEM;
1370 
1371 	memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr);
1372 
1373 	SB_AP_BITMAP(sb) = bitmap;
1374 
1375 	return 0;
1376 }
1377 
reiserfs_free_bitmap_cache(struct super_block * sb)1378 void reiserfs_free_bitmap_cache(struct super_block *sb)
1379 {
1380 	if (SB_AP_BITMAP(sb)) {
1381 		vfree(SB_AP_BITMAP(sb));
1382 		SB_AP_BITMAP(sb) = NULL;
1383 	}
1384 }
1385