xref: /linux/drivers/block/drbd/drbd_int.h (revision a6b32bc3cebd3fb6848c526763733b9dbc389c02)
1b411b363SPhilipp Reisner /*
2b411b363SPhilipp Reisner   drbd_int.h
3b411b363SPhilipp Reisner 
4b411b363SPhilipp Reisner   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5b411b363SPhilipp Reisner 
6b411b363SPhilipp Reisner   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7b411b363SPhilipp Reisner   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8b411b363SPhilipp Reisner   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9b411b363SPhilipp Reisner 
10b411b363SPhilipp Reisner   drbd is free software; you can redistribute it and/or modify
11b411b363SPhilipp Reisner   it under the terms of the GNU General Public License as published by
12b411b363SPhilipp Reisner   the Free Software Foundation; either version 2, or (at your option)
13b411b363SPhilipp Reisner   any later version.
14b411b363SPhilipp Reisner 
15b411b363SPhilipp Reisner   drbd is distributed in the hope that it will be useful,
16b411b363SPhilipp Reisner   but WITHOUT ANY WARRANTY; without even the implied warranty of
17b411b363SPhilipp Reisner   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
18b411b363SPhilipp Reisner   GNU General Public License for more details.
19b411b363SPhilipp Reisner 
20b411b363SPhilipp Reisner   You should have received a copy of the GNU General Public License
21b411b363SPhilipp Reisner   along with drbd; see the file COPYING.  If not, write to
22b411b363SPhilipp Reisner   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23b411b363SPhilipp Reisner 
24b411b363SPhilipp Reisner */
25b411b363SPhilipp Reisner 
26b411b363SPhilipp Reisner #ifndef _DRBD_INT_H
27b411b363SPhilipp Reisner #define _DRBD_INT_H
28b411b363SPhilipp Reisner 
29b411b363SPhilipp Reisner #include <linux/compiler.h>
30b411b363SPhilipp Reisner #include <linux/types.h>
31b411b363SPhilipp Reisner #include <linux/list.h>
32b411b363SPhilipp Reisner #include <linux/sched.h>
33b411b363SPhilipp Reisner #include <linux/bitops.h>
34b411b363SPhilipp Reisner #include <linux/slab.h>
35b411b363SPhilipp Reisner #include <linux/crypto.h>
36132cc538SRandy Dunlap #include <linux/ratelimit.h>
37b411b363SPhilipp Reisner #include <linux/tcp.h>
38b411b363SPhilipp Reisner #include <linux/mutex.h>
39b411b363SPhilipp Reisner #include <linux/major.h>
40b411b363SPhilipp Reisner #include <linux/blkdev.h>
41b411b363SPhilipp Reisner #include <linux/genhd.h>
42062e879cSPhilipp Reisner #include <linux/idr.h>
43b411b363SPhilipp Reisner #include <net/tcp.h>
44b411b363SPhilipp Reisner #include <linux/lru_cache.h>
4570c71606SPaul Gortmaker #include <linux/prefetch.h>
463b98c0c2SLars Ellenberg #include <linux/drbd_genl_api.h>
47b8907339SPhilipp Reisner #include <linux/drbd.h>
48b8907339SPhilipp Reisner #include "drbd_state.h"
49a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h"
50b411b363SPhilipp Reisner 
51b411b363SPhilipp Reisner #ifdef __CHECKER__
52b411b363SPhilipp Reisner # define __protected_by(x)       __attribute__((require_context(x,1,999,"rdwr")))
53b411b363SPhilipp Reisner # define __protected_read_by(x)  __attribute__((require_context(x,1,999,"read")))
54b411b363SPhilipp Reisner # define __protected_write_by(x) __attribute__((require_context(x,1,999,"write")))
55b411b363SPhilipp Reisner # define __must_hold(x)       __attribute__((context(x,1,1), require_context(x,1,999,"call")))
56b411b363SPhilipp Reisner #else
57b411b363SPhilipp Reisner # define __protected_by(x)
58b411b363SPhilipp Reisner # define __protected_read_by(x)
59b411b363SPhilipp Reisner # define __protected_write_by(x)
60b411b363SPhilipp Reisner # define __must_hold(x)
61b411b363SPhilipp Reisner #endif
62b411b363SPhilipp Reisner 
63b411b363SPhilipp Reisner #define __no_warn(lock, stmt) do { __acquire(lock); stmt; __release(lock); } while (0)
64b411b363SPhilipp Reisner 
65b411b363SPhilipp Reisner /* module parameter, defined in drbd_main.c */
66b411b363SPhilipp Reisner extern unsigned int minor_count;
6790ab5ee9SRusty Russell extern bool disable_sendpage;
6890ab5ee9SRusty Russell extern bool allow_oos;
69b30ab791SAndreas Gruenbacher void tl_abort_disk_io(struct drbd_device *device);
70b411b363SPhilipp Reisner 
71b411b363SPhilipp Reisner #ifdef CONFIG_DRBD_FAULT_INJECTION
72b411b363SPhilipp Reisner extern int enable_faults;
73b411b363SPhilipp Reisner extern int fault_rate;
74b411b363SPhilipp Reisner extern int fault_devs;
75b411b363SPhilipp Reisner #endif
76b411b363SPhilipp Reisner 
77b411b363SPhilipp Reisner extern char usermode_helper[];
78b411b363SPhilipp Reisner 
79b411b363SPhilipp Reisner 
80b411b363SPhilipp Reisner /* I don't remember why XCPU ...
81b411b363SPhilipp Reisner  * This is used to wake the asender,
82b411b363SPhilipp Reisner  * and to interrupt sending the sending task
83b411b363SPhilipp Reisner  * on disconnect.
84b411b363SPhilipp Reisner  */
85b411b363SPhilipp Reisner #define DRBD_SIG SIGXCPU
86b411b363SPhilipp Reisner 
87b411b363SPhilipp Reisner /* This is used to stop/restart our threads.
88b411b363SPhilipp Reisner  * Cannot use SIGTERM nor SIGKILL, since these
89b411b363SPhilipp Reisner  * are sent out by init on runlevel changes
90b411b363SPhilipp Reisner  * I choose SIGHUP for now.
91b411b363SPhilipp Reisner  */
92b411b363SPhilipp Reisner #define DRBD_SIGKILL SIGHUP
93b411b363SPhilipp Reisner 
94b411b363SPhilipp Reisner #define ID_IN_SYNC      (4711ULL)
95b411b363SPhilipp Reisner #define ID_OUT_OF_SYNC  (4712ULL)
96b411b363SPhilipp Reisner #define ID_SYNCER (-1ULL)
97579b57edSAndreas Gruenbacher 
984a23f264SPhilipp Reisner #define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL)
99b411b363SPhilipp Reisner 
10054761697SAndreas Gruenbacher struct drbd_device;
101bde89a9eSAndreas Gruenbacher struct drbd_connection;
102b411b363SPhilipp Reisner 
103b411b363SPhilipp Reisner 
104b411b363SPhilipp Reisner /* to shorten dev_warn(DEV, "msg"); and relatives statements */
105b30ab791SAndreas Gruenbacher #define DEV (disk_to_dev(device->vdisk))
106b411b363SPhilipp Reisner 
10760ae4966SPhilipp Reisner #define conn_printk(LEVEL, TCONN, FMT, ARGS...) \
10860ae4966SPhilipp Reisner 	printk(LEVEL "d-con %s: " FMT, TCONN->name , ## ARGS)
10960ae4966SPhilipp Reisner #define conn_alert(TCONN, FMT, ARGS...)  conn_printk(KERN_ALERT, TCONN, FMT, ## ARGS)
11060ae4966SPhilipp Reisner #define conn_crit(TCONN, FMT, ARGS...)   conn_printk(KERN_CRIT, TCONN, FMT, ## ARGS)
11160ae4966SPhilipp Reisner #define conn_err(TCONN, FMT, ARGS...)    conn_printk(KERN_ERR, TCONN, FMT, ## ARGS)
11260ae4966SPhilipp Reisner #define conn_warn(TCONN, FMT, ARGS...)   conn_printk(KERN_WARNING, TCONN, FMT, ## ARGS)
11360ae4966SPhilipp Reisner #define conn_notice(TCONN, FMT, ARGS...) conn_printk(KERN_NOTICE, TCONN, FMT, ## ARGS)
11460ae4966SPhilipp Reisner #define conn_info(TCONN, FMT, ARGS...)   conn_printk(KERN_INFO, TCONN, FMT, ## ARGS)
11560ae4966SPhilipp Reisner #define conn_dbg(TCONN, FMT, ARGS...)    conn_printk(KERN_DEBUG, TCONN, FMT, ## ARGS)
11660ae4966SPhilipp Reisner 
117b411b363SPhilipp Reisner #define D_ASSERT(exp)	if (!(exp)) \
118b411b363SPhilipp Reisner 	 dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__)
119b411b363SPhilipp Reisner 
120841ce241SAndreas Gruenbacher /**
121841ce241SAndreas Gruenbacher  * expect  -  Make an assertion
122841ce241SAndreas Gruenbacher  *
123841ce241SAndreas Gruenbacher  * Unlike the assert macro, this macro returns a boolean result.
124841ce241SAndreas Gruenbacher  */
125841ce241SAndreas Gruenbacher #define expect(exp) ({								\
126841ce241SAndreas Gruenbacher 		bool _bool = (exp);						\
127841ce241SAndreas Gruenbacher 		if (!_bool)							\
128841ce241SAndreas Gruenbacher 			dev_err(DEV, "ASSERTION %s FAILED in %s\n",		\
129841ce241SAndreas Gruenbacher 			        #exp, __func__);				\
130841ce241SAndreas Gruenbacher 		_bool;								\
131841ce241SAndreas Gruenbacher 		})
132b411b363SPhilipp Reisner 
133b411b363SPhilipp Reisner /* Defines to control fault insertion */
134b411b363SPhilipp Reisner enum {
135b411b363SPhilipp Reisner 	DRBD_FAULT_MD_WR = 0,	/* meta data write */
136b411b363SPhilipp Reisner 	DRBD_FAULT_MD_RD = 1,	/*           read  */
137b411b363SPhilipp Reisner 	DRBD_FAULT_RS_WR = 2,	/* resync          */
138b411b363SPhilipp Reisner 	DRBD_FAULT_RS_RD = 3,
139b411b363SPhilipp Reisner 	DRBD_FAULT_DT_WR = 4,	/* data            */
140b411b363SPhilipp Reisner 	DRBD_FAULT_DT_RD = 5,
141b411b363SPhilipp Reisner 	DRBD_FAULT_DT_RA = 6,	/* data read ahead */
142b411b363SPhilipp Reisner 	DRBD_FAULT_BM_ALLOC = 7,	/* bitmap allocation */
143b411b363SPhilipp Reisner 	DRBD_FAULT_AL_EE = 8,	/* alloc ee */
1446b4388acSPhilipp Reisner 	DRBD_FAULT_RECEIVE = 9, /* Changes some bytes upon receiving a [rs]data block */
145b411b363SPhilipp Reisner 
146b411b363SPhilipp Reisner 	DRBD_FAULT_MAX,
147b411b363SPhilipp Reisner };
148b411b363SPhilipp Reisner 
149b411b363SPhilipp Reisner extern unsigned int
150b30ab791SAndreas Gruenbacher _drbd_insert_fault(struct drbd_device *device, unsigned int type);
1510cf9d27eSAndreas Gruenbacher 
152b411b363SPhilipp Reisner static inline int
153b30ab791SAndreas Gruenbacher drbd_insert_fault(struct drbd_device *device, unsigned int type) {
1540cf9d27eSAndreas Gruenbacher #ifdef CONFIG_DRBD_FAULT_INJECTION
155b411b363SPhilipp Reisner 	return fault_rate &&
156b411b363SPhilipp Reisner 		(enable_faults & (1<<type)) &&
157b30ab791SAndreas Gruenbacher 		_drbd_insert_fault(device, type);
158b411b363SPhilipp Reisner #else
1590cf9d27eSAndreas Gruenbacher 	return 0;
160b411b363SPhilipp Reisner #endif
1610cf9d27eSAndreas Gruenbacher }
162b411b363SPhilipp Reisner 
163b411b363SPhilipp Reisner /* integer division, round _UP_ to the next integer */
164b411b363SPhilipp Reisner #define div_ceil(A, B) ((A)/(B) + ((A)%(B) ? 1 : 0))
165b411b363SPhilipp Reisner /* usual integer division */
166b411b363SPhilipp Reisner #define div_floor(A, B) ((A)/(B))
167b411b363SPhilipp Reisner 
168b411b363SPhilipp Reisner extern struct ratelimit_state drbd_ratelimit_state;
169c141ebdaSPhilipp Reisner extern struct idr minors; /* RCU, updates: genl_lock() */
170bde89a9eSAndreas Gruenbacher extern struct list_head drbd_connections; /* RCU, updates: genl_lock() */
171b411b363SPhilipp Reisner 
172d8763023SAndreas Gruenbacher extern const char *cmdname(enum drbd_packet cmd);
173b411b363SPhilipp Reisner 
174b411b363SPhilipp Reisner /* for sending/receiving the bitmap,
175b411b363SPhilipp Reisner  * possibly in some encoding scheme */
176b411b363SPhilipp Reisner struct bm_xfer_ctx {
177b411b363SPhilipp Reisner 	/* "const"
178b411b363SPhilipp Reisner 	 * stores total bits and long words
179b411b363SPhilipp Reisner 	 * of the bitmap, so we don't need to
180b411b363SPhilipp Reisner 	 * call the accessor functions over and again. */
181b411b363SPhilipp Reisner 	unsigned long bm_bits;
182b411b363SPhilipp Reisner 	unsigned long bm_words;
183b411b363SPhilipp Reisner 	/* during xfer, current position within the bitmap */
184b411b363SPhilipp Reisner 	unsigned long bit_offset;
185b411b363SPhilipp Reisner 	unsigned long word_offset;
186b411b363SPhilipp Reisner 
187b411b363SPhilipp Reisner 	/* statistics; index: (h->command == P_BITMAP) */
188b411b363SPhilipp Reisner 	unsigned packets[2];
189b411b363SPhilipp Reisner 	unsigned bytes[2];
190b411b363SPhilipp Reisner };
191b411b363SPhilipp Reisner 
192b30ab791SAndreas Gruenbacher extern void INFO_bm_xfer_stats(struct drbd_device *device,
193b411b363SPhilipp Reisner 		const char *direction, struct bm_xfer_ctx *c);
194b411b363SPhilipp Reisner 
195b411b363SPhilipp Reisner static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c)
196b411b363SPhilipp Reisner {
197b411b363SPhilipp Reisner 	/* word_offset counts "native long words" (32 or 64 bit),
198b411b363SPhilipp Reisner 	 * aligned at 64 bit.
199b411b363SPhilipp Reisner 	 * Encoded packet may end at an unaligned bit offset.
200b411b363SPhilipp Reisner 	 * In case a fallback clear text packet is transmitted in
201b411b363SPhilipp Reisner 	 * between, we adjust this offset back to the last 64bit
202b411b363SPhilipp Reisner 	 * aligned "native long word", which makes coding and decoding
203b411b363SPhilipp Reisner 	 * the plain text bitmap much more convenient.  */
204b411b363SPhilipp Reisner #if BITS_PER_LONG == 64
205b411b363SPhilipp Reisner 	c->word_offset = c->bit_offset >> 6;
206b411b363SPhilipp Reisner #elif BITS_PER_LONG == 32
207b411b363SPhilipp Reisner 	c->word_offset = c->bit_offset >> 5;
208b411b363SPhilipp Reisner 	c->word_offset &= ~(1UL);
209b411b363SPhilipp Reisner #else
210b411b363SPhilipp Reisner # error "unsupported BITS_PER_LONG"
211b411b363SPhilipp Reisner #endif
212b411b363SPhilipp Reisner }
213b411b363SPhilipp Reisner 
214bde89a9eSAndreas Gruenbacher extern unsigned int drbd_header_size(struct drbd_connection *connection);
215b411b363SPhilipp Reisner 
216b411b363SPhilipp Reisner /**********************************************************************/
217b411b363SPhilipp Reisner enum drbd_thread_state {
218e77a0a5cSAndreas Gruenbacher 	NONE,
219e77a0a5cSAndreas Gruenbacher 	RUNNING,
220e77a0a5cSAndreas Gruenbacher 	EXITING,
221e77a0a5cSAndreas Gruenbacher 	RESTARTING
222b411b363SPhilipp Reisner };
223b411b363SPhilipp Reisner 
224b411b363SPhilipp Reisner struct drbd_thread {
225b411b363SPhilipp Reisner 	spinlock_t t_lock;
226b411b363SPhilipp Reisner 	struct task_struct *task;
227b411b363SPhilipp Reisner 	struct completion stop;
228b411b363SPhilipp Reisner 	enum drbd_thread_state t_state;
229b411b363SPhilipp Reisner 	int (*function) (struct drbd_thread *);
230bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection;
231b411b363SPhilipp Reisner 	int reset_cpu_mask;
232bed879aeSPhilipp Reisner 	char name[9];
233b411b363SPhilipp Reisner };
234b411b363SPhilipp Reisner 
235b411b363SPhilipp Reisner static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi)
236b411b363SPhilipp Reisner {
237b411b363SPhilipp Reisner 	/* THINK testing the t_state seems to be uncritical in all cases
238b411b363SPhilipp Reisner 	 * (but thread_{start,stop}), so we can read it *without* the lock.
239b411b363SPhilipp Reisner 	 *	--lge */
240b411b363SPhilipp Reisner 
241b411b363SPhilipp Reisner 	smp_rmb();
242b411b363SPhilipp Reisner 	return thi->t_state;
243b411b363SPhilipp Reisner }
244b411b363SPhilipp Reisner 
245b411b363SPhilipp Reisner struct drbd_work {
246b411b363SPhilipp Reisner 	struct list_head list;
247309a8348SAndreas Gruenbacher 	int (*cb)(struct drbd_work *, int cancel);
24800d56944SPhilipp Reisner 	union {
249b30ab791SAndreas Gruenbacher 		struct drbd_device *device;
250bde89a9eSAndreas Gruenbacher 		struct drbd_connection *connection;
25100d56944SPhilipp Reisner 	};
252b411b363SPhilipp Reisner };
253b411b363SPhilipp Reisner 
254ace652acSAndreas Gruenbacher #include "drbd_interval.h"
255ace652acSAndreas Gruenbacher 
25654761697SAndreas Gruenbacher extern int drbd_wait_misc(struct drbd_device *, struct drbd_interval *);
2577be8da07SAndreas Gruenbacher 
258b411b363SPhilipp Reisner struct drbd_request {
259b411b363SPhilipp Reisner 	struct drbd_work w;
260b411b363SPhilipp Reisner 
261b411b363SPhilipp Reisner 	/* if local IO is not allowed, will be NULL.
262b411b363SPhilipp Reisner 	 * if local IO _is_ allowed, holds the locally submitted bio clone,
263b411b363SPhilipp Reisner 	 * or, after local IO completion, the ERR_PTR(error).
264fcefa62eSAndreas Gruenbacher 	 * see drbd_request_endio(). */
265b411b363SPhilipp Reisner 	struct bio *private_bio;
266b411b363SPhilipp Reisner 
267ace652acSAndreas Gruenbacher 	struct drbd_interval i;
268b411b363SPhilipp Reisner 
269b6dd1a89SLars Ellenberg 	/* epoch: used to check on "completion" whether this req was in
270b411b363SPhilipp Reisner 	 * the current epoch, and we therefore have to close it,
271b6dd1a89SLars Ellenberg 	 * causing a p_barrier packet to be send, starting a new epoch.
272b6dd1a89SLars Ellenberg 	 *
273b6dd1a89SLars Ellenberg 	 * This corresponds to "barrier" in struct p_barrier[_ack],
274b6dd1a89SLars Ellenberg 	 * and to "barrier_nr" in struct drbd_epoch (and various
275b6dd1a89SLars Ellenberg 	 * comments/function parameters/local variable names).
276b411b363SPhilipp Reisner 	 */
277b6dd1a89SLars Ellenberg 	unsigned int epoch;
278b411b363SPhilipp Reisner 
279b411b363SPhilipp Reisner 	struct list_head tl_requests; /* ring list in the transfer log */
280b411b363SPhilipp Reisner 	struct bio *master_bio;       /* master bio pointer */
281b411b363SPhilipp Reisner 	unsigned long start_time;
282b406777eSLars Ellenberg 
283b406777eSLars Ellenberg 	/* once it hits 0, we may complete the master_bio */
284b406777eSLars Ellenberg 	atomic_t completion_ref;
285b406777eSLars Ellenberg 	/* once it hits 0, we may destroy this drbd_request object */
286b406777eSLars Ellenberg 	struct kref kref;
287a0d856dfSLars Ellenberg 
288a0d856dfSLars Ellenberg 	unsigned rq_state; /* see comments above _req_mod() */
289b411b363SPhilipp Reisner };
290b411b363SPhilipp Reisner 
291b411b363SPhilipp Reisner struct drbd_epoch {
292bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection;
293b411b363SPhilipp Reisner 	struct list_head list;
294b411b363SPhilipp Reisner 	unsigned int barrier_nr;
295b411b363SPhilipp Reisner 	atomic_t epoch_size; /* increased on every request added. */
296b411b363SPhilipp Reisner 	atomic_t active;     /* increased on every req. added, and dec on every finished. */
297b411b363SPhilipp Reisner 	unsigned long flags;
298b411b363SPhilipp Reisner };
299b411b363SPhilipp Reisner 
300de0b2e69SRashika Kheria /* Prototype declaration of function defined in drbd_receiver.c */
301de0b2e69SRashika Kheria int drbdd_init(struct drbd_thread *);
302de0b2e69SRashika Kheria int drbd_asender(struct drbd_thread *);
303de0b2e69SRashika Kheria 
304b411b363SPhilipp Reisner /* drbd_epoch flag bits */
305b411b363SPhilipp Reisner enum {
306b411b363SPhilipp Reisner 	DE_HAVE_BARRIER_NUMBER,
307b411b363SPhilipp Reisner };
308b411b363SPhilipp Reisner 
309b411b363SPhilipp Reisner enum epoch_event {
310b411b363SPhilipp Reisner 	EV_PUT,
311b411b363SPhilipp Reisner 	EV_GOT_BARRIER_NR,
312b411b363SPhilipp Reisner 	EV_BECAME_LAST,
313b411b363SPhilipp Reisner 	EV_CLEANUP = 32, /* used as flag */
314b411b363SPhilipp Reisner };
315b411b363SPhilipp Reisner 
316b411b363SPhilipp Reisner struct drbd_wq_barrier {
317b411b363SPhilipp Reisner 	struct drbd_work w;
318b411b363SPhilipp Reisner 	struct completion done;
319b411b363SPhilipp Reisner };
320b411b363SPhilipp Reisner 
321b411b363SPhilipp Reisner struct digest_info {
322b411b363SPhilipp Reisner 	int digest_size;
323b411b363SPhilipp Reisner 	void *digest;
324b411b363SPhilipp Reisner };
325b411b363SPhilipp Reisner 
326f6ffca9fSAndreas Gruenbacher struct drbd_peer_request {
32745bb912bSLars Ellenberg 	struct drbd_work w;
32885719573SPhilipp Reisner 	struct drbd_epoch *epoch; /* for writes */
32945bb912bSLars Ellenberg 	struct page *pages;
33045bb912bSLars Ellenberg 	atomic_t pending_bios;
331010f6e67SAndreas Gruenbacher 	struct drbd_interval i;
33245bb912bSLars Ellenberg 	/* see comments on ee flag bits below */
33345bb912bSLars Ellenberg 	unsigned long flags;
33485719573SPhilipp Reisner 	union {
33545bb912bSLars Ellenberg 		u64 block_id;
33685719573SPhilipp Reisner 		struct digest_info *digest;
33785719573SPhilipp Reisner 	};
33845bb912bSLars Ellenberg };
33945bb912bSLars Ellenberg 
34045bb912bSLars Ellenberg /* ee flag bits.
34145bb912bSLars Ellenberg  * While corresponding bios are in flight, the only modification will be
34245bb912bSLars Ellenberg  * set_bit WAS_ERROR, which has to be atomic.
34345bb912bSLars Ellenberg  * If no bios are in flight yet, or all have been completed,
34445bb912bSLars Ellenberg  * non-atomic modification to ee->flags is ok.
34545bb912bSLars Ellenberg  */
346b411b363SPhilipp Reisner enum {
347b411b363SPhilipp Reisner 	__EE_CALL_AL_COMPLETE_IO,
348b411b363SPhilipp Reisner 	__EE_MAY_SET_IN_SYNC,
34945bb912bSLars Ellenberg 
35045bb912bSLars Ellenberg 	/* In case a barrier failed,
35145bb912bSLars Ellenberg 	 * we need to resubmit without the barrier flag. */
35245bb912bSLars Ellenberg 	__EE_RESUBMITTED,
35345bb912bSLars Ellenberg 
3546c852becSAndreas Gruenbacher 	/* we may have several bios per peer request.
35545bb912bSLars Ellenberg 	 * if any of those fail, we set this flag atomically
35645bb912bSLars Ellenberg 	 * from the endio callback */
35745bb912bSLars Ellenberg 	__EE_WAS_ERROR,
358c36c3cedSLars Ellenberg 
359c36c3cedSLars Ellenberg 	/* This ee has a pointer to a digest instead of a block id */
360c36c3cedSLars Ellenberg 	__EE_HAS_DIGEST,
3617be8da07SAndreas Gruenbacher 
3627be8da07SAndreas Gruenbacher 	/* Conflicting local requests need to be restarted after this request */
3637be8da07SAndreas Gruenbacher 	__EE_RESTART_REQUESTS,
364303d1448SPhilipp Reisner 
365303d1448SPhilipp Reisner 	/* The peer wants a write ACK for this (wire proto C) */
366303d1448SPhilipp Reisner 	__EE_SEND_WRITE_ACK,
367302bdeaeSPhilipp Reisner 
368302bdeaeSPhilipp Reisner 	/* Is set when net_conf had two_primaries set while creating this peer_req */
369302bdeaeSPhilipp Reisner 	__EE_IN_INTERVAL_TREE,
370b411b363SPhilipp Reisner };
371b411b363SPhilipp Reisner #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
372b411b363SPhilipp Reisner #define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
37345bb912bSLars Ellenberg #define	EE_RESUBMITTED         (1<<__EE_RESUBMITTED)
37445bb912bSLars Ellenberg #define EE_WAS_ERROR           (1<<__EE_WAS_ERROR)
375c36c3cedSLars Ellenberg #define EE_HAS_DIGEST          (1<<__EE_HAS_DIGEST)
3767be8da07SAndreas Gruenbacher #define EE_RESTART_REQUESTS	(1<<__EE_RESTART_REQUESTS)
377303d1448SPhilipp Reisner #define EE_SEND_WRITE_ACK	(1<<__EE_SEND_WRITE_ACK)
378302bdeaeSPhilipp Reisner #define EE_IN_INTERVAL_TREE	(1<<__EE_IN_INTERVAL_TREE)
379b411b363SPhilipp Reisner 
380b30ab791SAndreas Gruenbacher /* flag bits per device */
381b411b363SPhilipp Reisner enum {
382b411b363SPhilipp Reisner 	UNPLUG_REMOTE,		/* sending a "UnplugRemote" could help */
383b411b363SPhilipp Reisner 	MD_DIRTY,		/* current uuids and flags not yet on disk */
384b411b363SPhilipp Reisner 	USE_DEGR_WFC_T,		/* degr-wfc-timeout instead of wfc-timeout. */
385b411b363SPhilipp Reisner 	CL_ST_CHG_SUCCESS,
386b411b363SPhilipp Reisner 	CL_ST_CHG_FAIL,
387b411b363SPhilipp Reisner 	CRASHED_PRIMARY,	/* This node was a crashed primary.
388b411b363SPhilipp Reisner 				 * Gets cleared when the state.conn
389b411b363SPhilipp Reisner 				 * goes into C_CONNECTED state. */
390b411b363SPhilipp Reisner 	CONSIDER_RESYNC,
391b411b363SPhilipp Reisner 
392a8a4e51eSPhilipp Reisner 	MD_NO_FUA,		/* Users wants us to not use FUA/FLUSH on meta data dev */
393b411b363SPhilipp Reisner 	SUSPEND_IO,		/* suspend application io */
394b411b363SPhilipp Reisner 	BITMAP_IO,		/* suspend application io;
395b411b363SPhilipp Reisner 				   once no more io in flight, start bitmap io */
396b411b363SPhilipp Reisner 	BITMAP_IO_QUEUED,       /* Started bitmap IO */
39782f59cc6SLars Ellenberg 	GO_DISKLESS,		/* Disk is being detached, on io-error or admin request. */
398a2a3c74fSLars Ellenberg 	WAS_IO_ERROR,		/* Local disk failed, returned IO error */
399a2a3c74fSLars Ellenberg 	WAS_READ_ERROR,		/* Local disk READ failed (set additionally to the above) */
400383606e0SLars Ellenberg 	FORCE_DETACH,		/* Force-detach from local disk, aborting any pending local IO */
401b411b363SPhilipp Reisner 	RESYNC_AFTER_NEG,       /* Resync after online grow after the attach&negotiate finished. */
402b411b363SPhilipp Reisner 	RESIZE_PENDING,		/* Size change detected locally, waiting for the response from
403b411b363SPhilipp Reisner 				 * the peer, if it changed there as well. */
40443a5182cSPhilipp Reisner 	NEW_CUR_UUID,		/* Create new current UUID when thawing IO */
4050778286aSPhilipp Reisner 	AL_SUSPENDED,		/* Activity logging is currently suspended. */
406370a43e7SPhilipp Reisner 	AHEAD_TO_SYNC_SOURCE,   /* Ahead -> SyncSource queued */
407e64a3294SPhilipp Reisner 	B_RS_H_DONE,		/* Before resync handler done (already executed) */
40808b165baSPhilipp Reisner 	DISCARD_MY_DATA,	/* discard_my_data flag per volume */
409380207d0SPhilipp Reisner 	READ_BALANCE_RR,
410b411b363SPhilipp Reisner };
411b411b363SPhilipp Reisner 
41254761697SAndreas Gruenbacher struct drbd_bitmap; /* opaque for drbd_device */
413b411b363SPhilipp Reisner 
41420ceb2b2SLars Ellenberg /* definition of bits in bm_flags to be used in drbd_bm_lock
41520ceb2b2SLars Ellenberg  * and drbd_bitmap_io and friends. */
41620ceb2b2SLars Ellenberg enum bm_flag {
41720ceb2b2SLars Ellenberg 	/* do we need to kfree, or vfree bm_pages? */
41820ceb2b2SLars Ellenberg 	BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
41920ceb2b2SLars Ellenberg 
42020ceb2b2SLars Ellenberg 	/* currently locked for bulk operation */
4210e8488adSLars Ellenberg 	BM_LOCKED_MASK = 0xf,
42220ceb2b2SLars Ellenberg 
42320ceb2b2SLars Ellenberg 	/* in detail, that is: */
42420ceb2b2SLars Ellenberg 	BM_DONT_CLEAR = 0x1,
42520ceb2b2SLars Ellenberg 	BM_DONT_SET   = 0x2,
42620ceb2b2SLars Ellenberg 	BM_DONT_TEST  = 0x4,
42720ceb2b2SLars Ellenberg 
4280e8488adSLars Ellenberg 	/* so we can mark it locked for bulk operation,
4290e8488adSLars Ellenberg 	 * and still allow all non-bulk operations */
4300e8488adSLars Ellenberg 	BM_IS_LOCKED  = 0x8,
4310e8488adSLars Ellenberg 
43220ceb2b2SLars Ellenberg 	/* (test bit, count bit) allowed (common case) */
4330e8488adSLars Ellenberg 	BM_LOCKED_TEST_ALLOWED = BM_DONT_CLEAR | BM_DONT_SET | BM_IS_LOCKED,
43420ceb2b2SLars Ellenberg 
43520ceb2b2SLars Ellenberg 	/* testing bits, as well as setting new bits allowed, but clearing bits
43620ceb2b2SLars Ellenberg 	 * would be unexpected.  Used during bitmap receive.  Setting new bits
43720ceb2b2SLars Ellenberg 	 * requires sending of "out-of-sync" information, though. */
4380e8488adSLars Ellenberg 	BM_LOCKED_SET_ALLOWED = BM_DONT_CLEAR | BM_IS_LOCKED,
43920ceb2b2SLars Ellenberg 
4400e8488adSLars Ellenberg 	/* for drbd_bm_write_copy_pages, everything is allowed,
4410e8488adSLars Ellenberg 	 * only concurrent bulk operations are locked out. */
4420e8488adSLars Ellenberg 	BM_LOCKED_CHANGE_ALLOWED = BM_IS_LOCKED,
44320ceb2b2SLars Ellenberg };
44420ceb2b2SLars Ellenberg 
445b411b363SPhilipp Reisner struct drbd_work_queue {
446b411b363SPhilipp Reisner 	struct list_head q;
447b411b363SPhilipp Reisner 	spinlock_t q_lock;  /* to protect the list. */
4488c0785a5SLars Ellenberg 	wait_queue_head_t q_wait;
449b411b363SPhilipp Reisner };
450b411b363SPhilipp Reisner 
451b411b363SPhilipp Reisner struct drbd_socket {
452b411b363SPhilipp Reisner 	struct mutex mutex;
453b411b363SPhilipp Reisner 	struct socket    *socket;
454b411b363SPhilipp Reisner 	/* this way we get our
455b411b363SPhilipp Reisner 	 * send/receive buffers off the stack */
4565a87d920SAndreas Gruenbacher 	void *sbuf;
457e6ef8a5cSAndreas Gruenbacher 	void *rbuf;
458b411b363SPhilipp Reisner };
459b411b363SPhilipp Reisner 
460b411b363SPhilipp Reisner struct drbd_md {
461b411b363SPhilipp Reisner 	u64 md_offset;		/* sector offset to 'super' block */
462b411b363SPhilipp Reisner 
463b411b363SPhilipp Reisner 	u64 la_size_sect;	/* last agreed size, unit sectors */
4649f2247bbSPhilipp Reisner 	spinlock_t uuid_lock;
465b411b363SPhilipp Reisner 	u64 uuid[UI_SIZE];
466b411b363SPhilipp Reisner 	u64 device_uuid;
467b411b363SPhilipp Reisner 	u32 flags;
468b411b363SPhilipp Reisner 	u32 md_size_sect;
469b411b363SPhilipp Reisner 
470ae8bf312SLars Ellenberg 	s32 al_offset;	/* signed relative sector offset to activity log */
471b411b363SPhilipp Reisner 	s32 bm_offset;	/* signed relative sector offset to bitmap */
4723a4d4eb3SLars Ellenberg 
4733a4d4eb3SLars Ellenberg 	/* cached value of bdev->disk_conf->meta_dev_idx (see below) */
4743a4d4eb3SLars Ellenberg 	s32 meta_dev_idx;
4753a4d4eb3SLars Ellenberg 
4763a4d4eb3SLars Ellenberg 	/* see al_tr_number_to_on_disk_sector() */
4773a4d4eb3SLars Ellenberg 	u32 al_stripes;
4783a4d4eb3SLars Ellenberg 	u32 al_stripe_size_4k;
4793a4d4eb3SLars Ellenberg 	u32 al_size_4k; /* cached product of the above */
480b411b363SPhilipp Reisner };
481b411b363SPhilipp Reisner 
482b411b363SPhilipp Reisner struct drbd_backing_dev {
483b411b363SPhilipp Reisner 	struct block_device *backing_bdev;
484b411b363SPhilipp Reisner 	struct block_device *md_bdev;
485b411b363SPhilipp Reisner 	struct drbd_md md;
486*a6b32bc3SAndreas Gruenbacher 	struct disk_conf *disk_conf; /* RCU, for updates: first_peer_device(device)->connection->conf_update */
487b411b363SPhilipp Reisner 	sector_t known_size; /* last known size of that backing device */
488b411b363SPhilipp Reisner };
489b411b363SPhilipp Reisner 
490b411b363SPhilipp Reisner struct drbd_md_io {
4910c464425SPhilipp Reisner 	unsigned int done;
492b411b363SPhilipp Reisner 	int error;
493b411b363SPhilipp Reisner };
494b411b363SPhilipp Reisner 
495b411b363SPhilipp Reisner struct bm_io_work {
496b411b363SPhilipp Reisner 	struct drbd_work w;
497b411b363SPhilipp Reisner 	char *why;
49820ceb2b2SLars Ellenberg 	enum bm_flag flags;
499b30ab791SAndreas Gruenbacher 	int (*io_fn)(struct drbd_device *device);
500b30ab791SAndreas Gruenbacher 	void (*done)(struct drbd_device *device, int rv);
501b411b363SPhilipp Reisner };
502b411b363SPhilipp Reisner 
503b411b363SPhilipp Reisner enum write_ordering_e {
504b411b363SPhilipp Reisner 	WO_none,
505b411b363SPhilipp Reisner 	WO_drain_io,
506b411b363SPhilipp Reisner 	WO_bdev_flush,
507b411b363SPhilipp Reisner };
508b411b363SPhilipp Reisner 
509778f271dSPhilipp Reisner struct fifo_buffer {
510778f271dSPhilipp Reisner 	unsigned int head_index;
511778f271dSPhilipp Reisner 	unsigned int size;
5129958c857SPhilipp Reisner 	int total; /* sum of all values */
5139958c857SPhilipp Reisner 	int values[0];
514778f271dSPhilipp Reisner };
5159958c857SPhilipp Reisner extern struct fifo_buffer *fifo_alloc(int fifo_size);
516778f271dSPhilipp Reisner 
517bde89a9eSAndreas Gruenbacher /* flag bits per connection */
51801a311a5SPhilipp Reisner enum {
51901a311a5SPhilipp Reisner 	NET_CONGESTED,		/* The data socket is congested */
520427c0434SLars Ellenberg 	RESOLVE_CONFLICTS,	/* Set on one node, cleared on the peer! */
521e43ef195SPhilipp Reisner 	SEND_PING,		/* whether asender should send a ping asap */
522808e37b8SPhilipp Reisner 	SIGNAL_ASENDER,		/* whether asender wants to be interrupted */
5232a67d8b9SPhilipp Reisner 	GOT_PING_ACK,		/* set when we receive a ping_ack packet, ping_wait gets woken */
5244d0fc3fdSPhilipp Reisner 	CONN_WD_ST_CHG_REQ,	/* A cluster wide state change on the connection is active */
525fc3b10a4SPhilipp Reisner 	CONN_WD_ST_CHG_OKAY,
526fc3b10a4SPhilipp Reisner 	CONN_WD_ST_CHG_FAIL,
5278169e41bSPhilipp Reisner 	CONN_DRY_RUN,		/* Expect disconnect after resync handshake. */
5286936fcb4SPhilipp Reisner 	CREATE_BARRIER,		/* next P_DATA is preceded by a P_BARRIER */
529a1096a6eSPhilipp Reisner 	STATE_SENT,		/* Do not change state/UUIDs while this is set */
5306f3465edSLars Ellenberg 	CALLBACK_PENDING,	/* Whether we have a call_usermodehelper(, UMH_WAIT_PROC)
5316f3465edSLars Ellenberg 				 * pending, from drbd worker context.
5326f3465edSLars Ellenberg 				 * If set, bdi_write_congested() returns true,
5336f3465edSLars Ellenberg 				 * so shrink_page_list() would not recurse into,
5346f3465edSLars Ellenberg 				 * and potentially deadlock on, this drbd worker.
5356f3465edSLars Ellenberg 				 */
536b66623e3SPhilipp Reisner 	DISCONNECT_SENT,
53701a311a5SPhilipp Reisner };
53801a311a5SPhilipp Reisner 
539bde89a9eSAndreas Gruenbacher struct drbd_connection {			/* is a resource from the config file */
5402111438bSPhilipp Reisner 	char *name;			/* Resource name */
541bde89a9eSAndreas Gruenbacher 	struct list_head connections;	/* linked on global drbd_connections */
5429dc9fbb3SPhilipp Reisner 	struct kref kref;
543bde89a9eSAndreas Gruenbacher 	struct idr volumes;		/* <connection, vnr> to device mapping */
544bbeb641cSPhilipp Reisner 	enum drbd_conns cstate;		/* Only C_STANDALONE to C_WF_REPORT_PARAMS */
5458e0af25fSPhilipp Reisner 	unsigned susp:1;		/* IO suspended by user */
5468e0af25fSPhilipp Reisner 	unsigned susp_nod:1;		/* IO suspended because no data */
5478e0af25fSPhilipp Reisner 	unsigned susp_fen:1;		/* IO suspended because fence peer handler runs */
5488410da8fSPhilipp Reisner 	struct mutex cstate_mutex;	/* Protects graceful disconnects */
54928e448bbSPhilipp Reisner 	unsigned int connect_cnt;	/* Inc each time a connection is established */
5502111438bSPhilipp Reisner 
551062e879cSPhilipp Reisner 	unsigned long flags;
55244ed167dSPhilipp Reisner 	struct net_conf *net_conf;	/* content protected by rcu */
553a0095508SPhilipp Reisner 	struct mutex conf_update;	/* mutex for ready-copy-update of net_conf and disk_conf */
5542a67d8b9SPhilipp Reisner 	wait_queue_head_t ping_wait;	/* Woken upon reception of a ping, and a state change */
555f399002eSLars Ellenberg 	struct res_opts res_opts;
556e42325a5SPhilipp Reisner 
557089c075dSAndreas Gruenbacher 	struct sockaddr_storage my_addr;
558089c075dSAndreas Gruenbacher 	int my_addr_len;
559089c075dSAndreas Gruenbacher 	struct sockaddr_storage peer_addr;
560089c075dSAndreas Gruenbacher 	int peer_addr_len;
561089c075dSAndreas Gruenbacher 
562e42325a5SPhilipp Reisner 	struct drbd_socket data;	/* data/barrier/cstate/parameter packets */
563e42325a5SPhilipp Reisner 	struct drbd_socket meta;	/* ping/ack (metadata) packets */
56431890f4aSPhilipp Reisner 	int agreed_pro_version;		/* actually used protocol version */
56531890f4aSPhilipp Reisner 	unsigned long last_received;	/* in jiffies, either socket */
56631890f4aSPhilipp Reisner 	unsigned int ko_count;
567e6b3ea83SPhilipp Reisner 
56887eeee41SPhilipp Reisner 	spinlock_t req_lock;
569b6dd1a89SLars Ellenberg 
570b6dd1a89SLars Ellenberg 	struct list_head transfer_log;	/* all requests not yet fully processed */
57187eeee41SPhilipp Reisner 
572a0638456SPhilipp Reisner 	struct crypto_hash *cram_hmac_tfm;
573bde89a9eSAndreas Gruenbacher 	struct crypto_hash *integrity_tfm;  /* checksums we compute, updates protected by connection->data->mutex */
574036b17eaSPhilipp Reisner 	struct crypto_hash *peer_integrity_tfm;  /* checksums we verify, only accessed from receiver thread  */
575f399002eSLars Ellenberg 	struct crypto_hash *csums_tfm;
576f399002eSLars Ellenberg 	struct crypto_hash *verify_tfm;
577a0638456SPhilipp Reisner 	void *int_dig_in;
578a0638456SPhilipp Reisner 	void *int_dig_vv;
579a0638456SPhilipp Reisner 
580b6dd1a89SLars Ellenberg 	/* receiver side */
58112038a3aSPhilipp Reisner 	struct drbd_epoch *current_epoch;
58212038a3aSPhilipp Reisner 	spinlock_t epoch_lock;
58312038a3aSPhilipp Reisner 	unsigned int epochs;
5844b0007c0SPhilipp Reisner 	enum write_ordering_e write_ordering;
585b379c41eSLars Ellenberg 	atomic_t current_tle_nr;	/* transfer log epoch number */
586b6dd1a89SLars Ellenberg 	unsigned current_tle_writes;	/* writes seen within this tl epoch */
5874b0007c0SPhilipp Reisner 
58807be15b1SLars Ellenberg 	unsigned long last_reconnect_jif;
589e6b3ea83SPhilipp Reisner 	struct drbd_thread receiver;
590e6b3ea83SPhilipp Reisner 	struct drbd_thread worker;
591e6b3ea83SPhilipp Reisner 	struct drbd_thread asender;
59280822284SPhilipp Reisner 	cpumask_var_t cpu_mask;
593b6dd1a89SLars Ellenberg 
594b6dd1a89SLars Ellenberg 	/* sender side */
595d5b27b01SLars Ellenberg 	struct drbd_work_queue sender_work;
596b6dd1a89SLars Ellenberg 
597b6dd1a89SLars Ellenberg 	struct {
598b6dd1a89SLars Ellenberg 		/* whether this sender thread
599b6dd1a89SLars Ellenberg 		 * has processed a single write yet. */
600b6dd1a89SLars Ellenberg 		bool seen_any_write_yet;
601b6dd1a89SLars Ellenberg 
602b6dd1a89SLars Ellenberg 		/* Which barrier number to send with the next P_BARRIER */
603b6dd1a89SLars Ellenberg 		int current_epoch_nr;
604b6dd1a89SLars Ellenberg 
605b6dd1a89SLars Ellenberg 		/* how many write requests have been sent
606b6dd1a89SLars Ellenberg 		 * with req->epoch == current_epoch_nr.
607b6dd1a89SLars Ellenberg 		 * If none, no P_BARRIER will be sent. */
608b6dd1a89SLars Ellenberg 		unsigned current_epoch_writes;
609b6dd1a89SLars Ellenberg 	} send;
610b411b363SPhilipp Reisner };
611b411b363SPhilipp Reisner 
612113fef9eSLars Ellenberg struct submit_worker {
613113fef9eSLars Ellenberg 	struct workqueue_struct *wq;
614113fef9eSLars Ellenberg 	struct work_struct worker;
615113fef9eSLars Ellenberg 
616113fef9eSLars Ellenberg 	spinlock_t lock;
617113fef9eSLars Ellenberg 	struct list_head writes;
618113fef9eSLars Ellenberg };
619113fef9eSLars Ellenberg 
620*a6b32bc3SAndreas Gruenbacher struct drbd_peer_device {
621*a6b32bc3SAndreas Gruenbacher 	struct list_head peer_devices;
622*a6b32bc3SAndreas Gruenbacher 	struct drbd_device *device;
623bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection;
624*a6b32bc3SAndreas Gruenbacher };
625*a6b32bc3SAndreas Gruenbacher 
626*a6b32bc3SAndreas Gruenbacher struct drbd_device {
627*a6b32bc3SAndreas Gruenbacher 	struct list_head peer_devices;
6282111438bSPhilipp Reisner 	int vnr;			/* volume number within the connection */
62981fa2e67SPhilipp Reisner 	struct kref kref;
6302111438bSPhilipp Reisner 
631b411b363SPhilipp Reisner 	/* things that are stored as / read from meta data on disk */
632b411b363SPhilipp Reisner 	unsigned long flags;
633b411b363SPhilipp Reisner 
634b411b363SPhilipp Reisner 	/* configured by drbdsetup */
635b411b363SPhilipp Reisner 	struct drbd_backing_dev *ldev __protected_by(local);
636b411b363SPhilipp Reisner 
637b411b363SPhilipp Reisner 	sector_t p_size;     /* partner's disk size */
638b411b363SPhilipp Reisner 	struct request_queue *rq_queue;
639b411b363SPhilipp Reisner 	struct block_device *this_bdev;
640b411b363SPhilipp Reisner 	struct gendisk	    *vdisk;
641b411b363SPhilipp Reisner 
64207be15b1SLars Ellenberg 	unsigned long last_reattach_jif;
643b411b363SPhilipp Reisner 	struct drbd_work  resync_work,
644b411b363SPhilipp Reisner 			  unplug_work,
645e9e6f3ecSLars Ellenberg 			  go_diskless,
646c4752ef1SPhilipp Reisner 			  md_sync_work,
647c4752ef1SPhilipp Reisner 			  start_resync_work;
648b411b363SPhilipp Reisner 	struct timer_list resync_timer;
649b411b363SPhilipp Reisner 	struct timer_list md_sync_timer;
650370a43e7SPhilipp Reisner 	struct timer_list start_resync_timer;
6517fde2be9SPhilipp Reisner 	struct timer_list request_timer;
652ee15b038SLars Ellenberg #ifdef DRBD_DEBUG_MD_SYNC
653ee15b038SLars Ellenberg 	struct {
654ee15b038SLars Ellenberg 		unsigned int line;
655ee15b038SLars Ellenberg 		const char* func;
656ee15b038SLars Ellenberg 	} last_md_mark_dirty;
657ee15b038SLars Ellenberg #endif
658b411b363SPhilipp Reisner 
659b411b363SPhilipp Reisner 	/* Used after attach while negotiating new disk state. */
660b411b363SPhilipp Reisner 	union drbd_state new_state_tmp;
661b411b363SPhilipp Reisner 
662da9fbc27SPhilipp Reisner 	union drbd_dev_state state;
663b411b363SPhilipp Reisner 	wait_queue_head_t misc_wait;
664b411b363SPhilipp Reisner 	wait_queue_head_t state_wait;  /* upon each state change. */
665b411b363SPhilipp Reisner 	unsigned int send_cnt;
666b411b363SPhilipp Reisner 	unsigned int recv_cnt;
667b411b363SPhilipp Reisner 	unsigned int read_cnt;
668b411b363SPhilipp Reisner 	unsigned int writ_cnt;
669b411b363SPhilipp Reisner 	unsigned int al_writ_cnt;
670b411b363SPhilipp Reisner 	unsigned int bm_writ_cnt;
671b411b363SPhilipp Reisner 	atomic_t ap_bio_cnt;	 /* Requests we need to complete */
672b411b363SPhilipp Reisner 	atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */
673b411b363SPhilipp Reisner 	atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
674d942ae44SPhilipp Reisner 	atomic_t unacked_cnt;	 /* Need to send replies for */
675b411b363SPhilipp Reisner 	atomic_t local_cnt;	 /* Waiting for local completion */
676b2fb6dbeSPhilipp Reisner 
677dac1389cSAndreas Gruenbacher 	/* Interval tree of pending local requests */
678dac1389cSAndreas Gruenbacher 	struct rb_root read_requests;
679de696716SAndreas Gruenbacher 	struct rb_root write_requests;
680b411b363SPhilipp Reisner 
6814b0715f0SLars Ellenberg 	/* blocks to resync in this run [unit BM_BLOCK_SIZE] */
682b411b363SPhilipp Reisner 	unsigned long rs_total;
6834b0715f0SLars Ellenberg 	/* number of resync blocks that failed in this run */
684b411b363SPhilipp Reisner 	unsigned long rs_failed;
685b411b363SPhilipp Reisner 	/* Syncer's start time [unit jiffies] */
686b411b363SPhilipp Reisner 	unsigned long rs_start;
687b411b363SPhilipp Reisner 	/* cumulated time in PausedSyncX state [unit jiffies] */
688b411b363SPhilipp Reisner 	unsigned long rs_paused;
6891d7734a0SLars Ellenberg 	/* skipped because csum was equal [unit BM_BLOCK_SIZE] */
690b411b363SPhilipp Reisner 	unsigned long rs_same_csum;
6911d7734a0SLars Ellenberg #define DRBD_SYNC_MARKS 8
6921d7734a0SLars Ellenberg #define DRBD_SYNC_MARK_STEP (3*HZ)
6931d7734a0SLars Ellenberg 	/* block not up-to-date at mark [unit BM_BLOCK_SIZE] */
6941d7734a0SLars Ellenberg 	unsigned long rs_mark_left[DRBD_SYNC_MARKS];
6951d7734a0SLars Ellenberg 	/* marks's time [unit jiffies] */
6961d7734a0SLars Ellenberg 	unsigned long rs_mark_time[DRBD_SYNC_MARKS];
6971d7734a0SLars Ellenberg 	/* current index into rs_mark_{left,time} */
6981d7734a0SLars Ellenberg 	int rs_last_mark;
699328e0f12SPhilipp Reisner 	unsigned long rs_last_bcast; /* [unit jiffies] */
700b411b363SPhilipp Reisner 
701b411b363SPhilipp Reisner 	/* where does the admin want us to start? (sector) */
702b411b363SPhilipp Reisner 	sector_t ov_start_sector;
70302b91b55SLars Ellenberg 	sector_t ov_stop_sector;
704b411b363SPhilipp Reisner 	/* where are we now? (sector) */
705b411b363SPhilipp Reisner 	sector_t ov_position;
706b411b363SPhilipp Reisner 	/* Start sector of out of sync range (to merge printk reporting). */
707b411b363SPhilipp Reisner 	sector_t ov_last_oos_start;
708b411b363SPhilipp Reisner 	/* size of out-of-sync range in sectors. */
709b411b363SPhilipp Reisner 	sector_t ov_last_oos_size;
710b411b363SPhilipp Reisner 	unsigned long ov_left; /* in bits */
711b411b363SPhilipp Reisner 
712b411b363SPhilipp Reisner 	struct drbd_bitmap *bitmap;
713b411b363SPhilipp Reisner 	unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */
714b411b363SPhilipp Reisner 
715b411b363SPhilipp Reisner 	/* Used to track operations of resync... */
716b411b363SPhilipp Reisner 	struct lru_cache *resync;
717b411b363SPhilipp Reisner 	/* Number of locked elements in resync LRU */
718b411b363SPhilipp Reisner 	unsigned int resync_locked;
719b411b363SPhilipp Reisner 	/* resync extent number waiting for application requests */
720b411b363SPhilipp Reisner 	unsigned int resync_wenr;
721b411b363SPhilipp Reisner 
722b411b363SPhilipp Reisner 	int open_cnt;
723b411b363SPhilipp Reisner 	u64 *p_uuid;
7244b0007c0SPhilipp Reisner 
72585719573SPhilipp Reisner 	struct list_head active_ee; /* IO in progress (P_DATA gets written to disk) */
72685719573SPhilipp Reisner 	struct list_head sync_ee;   /* IO in progress (P_RS_DATA_REPLY gets written to disk) */
72718b75d75SAndreas Gruenbacher 	struct list_head done_ee;   /* need to send P_WRITE_ACK */
72818b75d75SAndreas Gruenbacher 	struct list_head read_ee;   /* [RS]P_DATA_REQUEST being read */
729b411b363SPhilipp Reisner 	struct list_head net_ee;    /* zero-copy network send in progress */
730b411b363SPhilipp Reisner 
731b411b363SPhilipp Reisner 	int next_barrier_nr;
732b411b363SPhilipp Reisner 	struct list_head resync_reads;
733435f0740SLars Ellenberg 	atomic_t pp_in_use;		/* allocated from page pool */
734435f0740SLars Ellenberg 	atomic_t pp_in_use_by_net;	/* sendpage()d, still referenced by tcp */
735b411b363SPhilipp Reisner 	wait_queue_head_t ee_wait;
736b411b363SPhilipp Reisner 	struct page *md_io_page;	/* one page buffer for md_io */
737cc94c650SPhilipp Reisner 	struct drbd_md_io md_io;
738e1711731SPhilipp Reisner 	atomic_t md_io_in_use;		/* protects the md_io, md_io_page and md_io_tmpp */
739b411b363SPhilipp Reisner 	spinlock_t al_lock;
740b411b363SPhilipp Reisner 	wait_queue_head_t al_wait;
741b411b363SPhilipp Reisner 	struct lru_cache *act_log;	/* activity log */
742b411b363SPhilipp Reisner 	unsigned int al_tr_number;
743b411b363SPhilipp Reisner 	int al_tr_cycle;
744b411b363SPhilipp Reisner 	wait_queue_head_t seq_wait;
745b411b363SPhilipp Reisner 	atomic_t packet_seq;
746b411b363SPhilipp Reisner 	unsigned int peer_seq;
747b411b363SPhilipp Reisner 	spinlock_t peer_seq_lock;
748b411b363SPhilipp Reisner 	unsigned int minor;
749b411b363SPhilipp Reisner 	unsigned long comm_bm_set; /* communicated number of set bits. */
750b411b363SPhilipp Reisner 	struct bm_io_work bm_io_work;
751b411b363SPhilipp Reisner 	u64 ed_uuid; /* UUID of the exposed data */
7528410da8fSPhilipp Reisner 	struct mutex own_state_mutex;
753*a6b32bc3SAndreas Gruenbacher 	struct mutex *state_mutex; /* either own_state_mutex or first_peer_device(device)->connection->cstate_mutex */
754b411b363SPhilipp Reisner 	char congestion_reason;  /* Why we where congested... */
7551d7734a0SLars Ellenberg 	atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */
7561d7734a0SLars Ellenberg 	atomic_t rs_sect_ev; /* for submitted resync data rate, both */
7571d7734a0SLars Ellenberg 	int rs_last_sect_ev; /* counter to compare with */
7581d7734a0SLars Ellenberg 	int rs_last_events;  /* counter of read or write "events" (unit sectors)
7591d7734a0SLars Ellenberg 			      * on the lower level device when we last looked. */
7601d7734a0SLars Ellenberg 	int c_sync_rate; /* current resync rate after syncer throttle magic */
761bde89a9eSAndreas Gruenbacher 	struct fifo_buffer *rs_plan_s; /* correction values of resync planer (RCU, connection->conn_update) */
762778f271dSPhilipp Reisner 	int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
763759fbdfbSPhilipp Reisner 	atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */
764db141b2fSLars Ellenberg 	unsigned int peer_max_bio_size;
765db141b2fSLars Ellenberg 	unsigned int local_max_bio_size;
766113fef9eSLars Ellenberg 
767113fef9eSLars Ellenberg 	/* any requests that would block in drbd_make_request()
768113fef9eSLars Ellenberg 	 * are deferred to this single-threaded work queue */
769113fef9eSLars Ellenberg 	struct submit_worker submit;
770b411b363SPhilipp Reisner };
771b411b363SPhilipp Reisner 
772b30ab791SAndreas Gruenbacher static inline struct drbd_device *minor_to_device(unsigned int minor)
773b411b363SPhilipp Reisner {
77454761697SAndreas Gruenbacher 	return (struct drbd_device *)idr_find(&minors, minor);
775b411b363SPhilipp Reisner }
776b411b363SPhilipp Reisner 
777*a6b32bc3SAndreas Gruenbacher static inline struct drbd_peer_device *first_peer_device(struct drbd_device *device)
778*a6b32bc3SAndreas Gruenbacher {
779*a6b32bc3SAndreas Gruenbacher 	return list_first_entry(&device->peer_devices, struct drbd_peer_device, peer_devices);
780*a6b32bc3SAndreas Gruenbacher }
781*a6b32bc3SAndreas Gruenbacher 
782*a6b32bc3SAndreas Gruenbacher #define for_each_peer_device(peer_device, device) \
783*a6b32bc3SAndreas Gruenbacher 	list_for_each_entry(peer_device, &device->peer_devices, peer_devices)
784*a6b32bc3SAndreas Gruenbacher 
785*a6b32bc3SAndreas Gruenbacher #define for_each_peer_device_rcu(peer_device, device) \
786*a6b32bc3SAndreas Gruenbacher 	list_for_each_entry_rcu(peer_device, &device->peer_devices, peer_devices)
787*a6b32bc3SAndreas Gruenbacher 
788*a6b32bc3SAndreas Gruenbacher #define for_each_peer_device_safe(peer_device, tmp, device) \
789*a6b32bc3SAndreas Gruenbacher 	list_for_each_entry_safe(peer_device, tmp, &device->peer_devices, peer_devices)
790*a6b32bc3SAndreas Gruenbacher 
791b30ab791SAndreas Gruenbacher static inline unsigned int device_to_minor(struct drbd_device *device)
792b411b363SPhilipp Reisner {
793b30ab791SAndreas Gruenbacher 	return device->minor;
794b411b363SPhilipp Reisner }
795b411b363SPhilipp Reisner 
796bde89a9eSAndreas Gruenbacher static inline struct drbd_device *vnr_to_device(struct drbd_connection *connection, int vnr)
797b411b363SPhilipp Reisner {
798bde89a9eSAndreas Gruenbacher 	return (struct drbd_device *)idr_find(&connection->volumes, vnr);
799b411b363SPhilipp Reisner }
800b411b363SPhilipp Reisner 
801b411b363SPhilipp Reisner /*
802b411b363SPhilipp Reisner  * function declarations
803b411b363SPhilipp Reisner  *************************/
804b411b363SPhilipp Reisner 
805b411b363SPhilipp Reisner /* drbd_main.c */
806b411b363SPhilipp Reisner 
807e89b591cSPhilipp Reisner enum dds_flags {
808e89b591cSPhilipp Reisner 	DDSF_FORCED    = 1,
809e89b591cSPhilipp Reisner 	DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */
810e89b591cSPhilipp Reisner };
811e89b591cSPhilipp Reisner 
812b30ab791SAndreas Gruenbacher extern void drbd_init_set_defaults(struct drbd_device *device);
813b411b363SPhilipp Reisner extern int  drbd_thread_start(struct drbd_thread *thi);
814b411b363SPhilipp Reisner extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait);
815bde89a9eSAndreas Gruenbacher extern char *drbd_task_to_thread_name(struct drbd_connection *connection, struct task_struct *task);
816b411b363SPhilipp Reisner #ifdef CONFIG_SMP
81780822284SPhilipp Reisner extern void drbd_thread_current_set_cpu(struct drbd_thread *thi);
818bde89a9eSAndreas Gruenbacher extern void drbd_calc_cpu_mask(struct drbd_connection *connection);
819b411b363SPhilipp Reisner #else
820b411b363SPhilipp Reisner #define drbd_thread_current_set_cpu(A) ({})
821b411b363SPhilipp Reisner #define drbd_calc_cpu_mask(A) ({})
822b411b363SPhilipp Reisner #endif
823bde89a9eSAndreas Gruenbacher extern void tl_release(struct drbd_connection *, unsigned int barrier_nr,
824b411b363SPhilipp Reisner 		       unsigned int set_size);
825bde89a9eSAndreas Gruenbacher extern void tl_clear(struct drbd_connection *);
826bde89a9eSAndreas Gruenbacher extern void drbd_free_sock(struct drbd_connection *connection);
827bde89a9eSAndreas Gruenbacher extern int drbd_send(struct drbd_connection *connection, struct socket *sock,
828b411b363SPhilipp Reisner 		     void *buf, size_t size, unsigned msg_flags);
829bde89a9eSAndreas Gruenbacher extern int drbd_send_all(struct drbd_connection *, struct socket *, void *, size_t,
830fb708e40SAndreas Gruenbacher 			 unsigned);
831fb708e40SAndreas Gruenbacher 
832bde89a9eSAndreas Gruenbacher extern int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cmd);
833bde89a9eSAndreas Gruenbacher extern int drbd_send_protocol(struct drbd_connection *connection);
834b30ab791SAndreas Gruenbacher extern int drbd_send_uuids(struct drbd_device *device);
835b30ab791SAndreas Gruenbacher extern int drbd_send_uuids_skip_initial_sync(struct drbd_device *device);
836b30ab791SAndreas Gruenbacher extern void drbd_gen_and_send_sync_uuid(struct drbd_device *device);
837b30ab791SAndreas Gruenbacher extern int drbd_send_sizes(struct drbd_device *device, int trigger_reply, enum dds_flags flags);
838b30ab791SAndreas Gruenbacher extern int drbd_send_state(struct drbd_device *device, union drbd_state s);
839b30ab791SAndreas Gruenbacher extern int drbd_send_current_state(struct drbd_device *device);
840b30ab791SAndreas Gruenbacher extern int drbd_send_sync_param(struct drbd_device *device);
841bde89a9eSAndreas Gruenbacher extern void drbd_send_b_ack(struct drbd_connection *connection, u32 barrier_nr,
842b411b363SPhilipp Reisner 			    u32 set_size);
84354761697SAndreas Gruenbacher extern int drbd_send_ack(struct drbd_device *, enum drbd_packet,
844f6ffca9fSAndreas Gruenbacher 			 struct drbd_peer_request *);
845b30ab791SAndreas Gruenbacher extern void drbd_send_ack_rp(struct drbd_device *device, enum drbd_packet cmd,
846b411b363SPhilipp Reisner 			     struct p_block_req *rp);
847b30ab791SAndreas Gruenbacher extern void drbd_send_ack_dp(struct drbd_device *device, enum drbd_packet cmd,
8482b2bf214SLars Ellenberg 			     struct p_data *dp, int data_size);
849b30ab791SAndreas Gruenbacher extern int drbd_send_ack_ex(struct drbd_device *device, enum drbd_packet cmd,
850b411b363SPhilipp Reisner 			    sector_t sector, int blksize, u64 block_id);
85154761697SAndreas Gruenbacher extern int drbd_send_out_of_sync(struct drbd_device *, struct drbd_request *);
85254761697SAndreas Gruenbacher extern int drbd_send_block(struct drbd_device *, enum drbd_packet,
853f6ffca9fSAndreas Gruenbacher 			   struct drbd_peer_request *);
854b30ab791SAndreas Gruenbacher extern int drbd_send_dblock(struct drbd_device *device, struct drbd_request *req);
855b30ab791SAndreas Gruenbacher extern int drbd_send_drequest(struct drbd_device *device, int cmd,
856b411b363SPhilipp Reisner 			      sector_t sector, int size, u64 block_id);
857b30ab791SAndreas Gruenbacher extern int drbd_send_drequest_csum(struct drbd_device *device, sector_t sector,
858d8763023SAndreas Gruenbacher 				   int size, void *digest, int digest_size,
859d8763023SAndreas Gruenbacher 				   enum drbd_packet cmd);
860b30ab791SAndreas Gruenbacher extern int drbd_send_ov_request(struct drbd_device *device, sector_t sector, int size);
861b411b363SPhilipp Reisner 
862b30ab791SAndreas Gruenbacher extern int drbd_send_bitmap(struct drbd_device *device);
863b30ab791SAndreas Gruenbacher extern void drbd_send_sr_reply(struct drbd_device *device, enum drbd_state_rv retcode);
864bde89a9eSAndreas Gruenbacher extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode);
865b411b363SPhilipp Reisner extern void drbd_free_bc(struct drbd_backing_dev *ldev);
866b30ab791SAndreas Gruenbacher extern void drbd_device_cleanup(struct drbd_device *device);
867b30ab791SAndreas Gruenbacher void drbd_print_uuids(struct drbd_device *device, const char *text);
868b411b363SPhilipp Reisner 
869bde89a9eSAndreas Gruenbacher extern void conn_md_sync(struct drbd_connection *connection);
870b30ab791SAndreas Gruenbacher extern void drbd_md_write(struct drbd_device *device, void *buffer);
871b30ab791SAndreas Gruenbacher extern void drbd_md_sync(struct drbd_device *device);
872b30ab791SAndreas Gruenbacher extern int  drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev);
873b30ab791SAndreas Gruenbacher extern void drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local);
874b30ab791SAndreas Gruenbacher extern void _drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local);
875b30ab791SAndreas Gruenbacher extern void drbd_uuid_new_current(struct drbd_device *device) __must_hold(local);
876b30ab791SAndreas Gruenbacher extern void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local);
877b30ab791SAndreas Gruenbacher extern void drbd_uuid_move_history(struct drbd_device *device) __must_hold(local);
878b30ab791SAndreas Gruenbacher extern void __drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local);
879b30ab791SAndreas Gruenbacher extern void drbd_md_set_flag(struct drbd_device *device, int flags) __must_hold(local);
880b30ab791SAndreas Gruenbacher extern void drbd_md_clear_flag(struct drbd_device *device, int flags)__must_hold(local);
881b411b363SPhilipp Reisner extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
882ee15b038SLars Ellenberg #ifndef DRBD_DEBUG_MD_SYNC
883b30ab791SAndreas Gruenbacher extern void drbd_md_mark_dirty(struct drbd_device *device);
884ee15b038SLars Ellenberg #else
885ee15b038SLars Ellenberg #define drbd_md_mark_dirty(m)	drbd_md_mark_dirty_(m, __LINE__ , __func__ )
886b30ab791SAndreas Gruenbacher extern void drbd_md_mark_dirty_(struct drbd_device *device,
887ee15b038SLars Ellenberg 		unsigned int line, const char *func);
888ee15b038SLars Ellenberg #endif
889b30ab791SAndreas Gruenbacher extern void drbd_queue_bitmap_io(struct drbd_device *device,
89054761697SAndreas Gruenbacher 				 int (*io_fn)(struct drbd_device *),
89154761697SAndreas Gruenbacher 				 void (*done)(struct drbd_device *, int),
89220ceb2b2SLars Ellenberg 				 char *why, enum bm_flag flags);
893b30ab791SAndreas Gruenbacher extern int drbd_bitmap_io(struct drbd_device *device,
89454761697SAndreas Gruenbacher 		int (*io_fn)(struct drbd_device *),
89520ceb2b2SLars Ellenberg 		char *why, enum bm_flag flags);
896b30ab791SAndreas Gruenbacher extern int drbd_bitmap_io_from_worker(struct drbd_device *device,
89754761697SAndreas Gruenbacher 		int (*io_fn)(struct drbd_device *),
898edc9f5ebSLars Ellenberg 		char *why, enum bm_flag flags);
899b30ab791SAndreas Gruenbacher extern int drbd_bmio_set_n_write(struct drbd_device *device);
900b30ab791SAndreas Gruenbacher extern int drbd_bmio_clear_n_write(struct drbd_device *device);
901b30ab791SAndreas Gruenbacher extern void drbd_ldev_destroy(struct drbd_device *device);
902b411b363SPhilipp Reisner 
903b411b363SPhilipp Reisner /* Meta data layout
904ae8bf312SLars Ellenberg  *
905ae8bf312SLars Ellenberg  * We currently have two possible layouts.
906ae8bf312SLars Ellenberg  * Offsets in (512 byte) sectors.
907ae8bf312SLars Ellenberg  * external:
908ae8bf312SLars Ellenberg  *   |----------- md_size_sect ------------------|
909ae8bf312SLars Ellenberg  *   [ 4k superblock ][ activity log ][  Bitmap  ]
910ae8bf312SLars Ellenberg  *   | al_offset == 8 |
911ae8bf312SLars Ellenberg  *   | bm_offset = al_offset + X      |
912ae8bf312SLars Ellenberg  *  ==> bitmap sectors = md_size_sect - bm_offset
913ae8bf312SLars Ellenberg  *
914ae8bf312SLars Ellenberg  *  Variants:
915ae8bf312SLars Ellenberg  *     old, indexed fixed size meta data:
916ae8bf312SLars Ellenberg  *
917ae8bf312SLars Ellenberg  * internal:
918ae8bf312SLars Ellenberg  *            |----------- md_size_sect ------------------|
919ae8bf312SLars Ellenberg  * [data.....][  Bitmap  ][ activity log ][ 4k superblock ][padding*]
920ae8bf312SLars Ellenberg  *                        | al_offset < 0 |
921ae8bf312SLars Ellenberg  *            | bm_offset = al_offset - Y |
922ae8bf312SLars Ellenberg  *  ==> bitmap sectors = Y = al_offset - bm_offset
923ae8bf312SLars Ellenberg  *
924ae8bf312SLars Ellenberg  *  [padding*] are zero or up to 7 unused 512 Byte sectors to the
925ae8bf312SLars Ellenberg  *  end of the device, so that the [4k superblock] will be 4k aligned.
926ae8bf312SLars Ellenberg  *
927ae8bf312SLars Ellenberg  *  The activity log consists of 4k transaction blocks,
928ae8bf312SLars Ellenberg  *  which are written in a ring-buffer, or striped ring-buffer like fashion,
929ae8bf312SLars Ellenberg  *  which are writtensize used to be fixed 32kB,
930ae8bf312SLars Ellenberg  *  but is about to become configurable.
931ae8bf312SLars Ellenberg  */
932b411b363SPhilipp Reisner 
933ae8bf312SLars Ellenberg /* Our old fixed size meta data layout
934ae8bf312SLars Ellenberg  * allows up to about 3.8TB, so if you want more,
9357ad651b5SLars Ellenberg  * you need to use the "flexible" meta data format. */
936ae8bf312SLars Ellenberg #define MD_128MB_SECT (128LLU << 11)  /* 128 MB, unit sectors */
937ae8bf312SLars Ellenberg #define MD_4kB_SECT	 8
938ae8bf312SLars Ellenberg #define MD_32kB_SECT	64
939b411b363SPhilipp Reisner 
9407ad651b5SLars Ellenberg /* One activity log extent represents 4M of storage */
9417ad651b5SLars Ellenberg #define AL_EXTENT_SHIFT 22
942b411b363SPhilipp Reisner #define AL_EXTENT_SIZE (1<<AL_EXTENT_SHIFT)
943b411b363SPhilipp Reisner 
9447ad651b5SLars Ellenberg /* We could make these currently hardcoded constants configurable
9457ad651b5SLars Ellenberg  * variables at create-md time (or even re-configurable at runtime?).
9467ad651b5SLars Ellenberg  * Which will require some more changes to the DRBD "super block"
9477ad651b5SLars Ellenberg  * and attach code.
9487ad651b5SLars Ellenberg  *
9497ad651b5SLars Ellenberg  * updates per transaction:
9507ad651b5SLars Ellenberg  *   This many changes to the active set can be logged with one transaction.
9517ad651b5SLars Ellenberg  *   This number is arbitrary.
9527ad651b5SLars Ellenberg  * context per transaction:
9537ad651b5SLars Ellenberg  *   This many context extent numbers are logged with each transaction.
9547ad651b5SLars Ellenberg  *   This number is resulting from the transaction block size (4k), the layout
9557ad651b5SLars Ellenberg  *   of the transaction header, and the number of updates per transaction.
9567ad651b5SLars Ellenberg  *   See drbd_actlog.c:struct al_transaction_on_disk
9577ad651b5SLars Ellenberg  * */
9587ad651b5SLars Ellenberg #define AL_UPDATES_PER_TRANSACTION	 64	// arbitrary
9597ad651b5SLars Ellenberg #define AL_CONTEXT_PER_TRANSACTION	919	// (4096 - 36 - 6*64)/4
9607ad651b5SLars Ellenberg 
961b411b363SPhilipp Reisner #if BITS_PER_LONG == 32
962b411b363SPhilipp Reisner #define LN2_BPL 5
963b411b363SPhilipp Reisner #define cpu_to_lel(A) cpu_to_le32(A)
964b411b363SPhilipp Reisner #define lel_to_cpu(A) le32_to_cpu(A)
965b411b363SPhilipp Reisner #elif BITS_PER_LONG == 64
966b411b363SPhilipp Reisner #define LN2_BPL 6
967b411b363SPhilipp Reisner #define cpu_to_lel(A) cpu_to_le64(A)
968b411b363SPhilipp Reisner #define lel_to_cpu(A) le64_to_cpu(A)
969b411b363SPhilipp Reisner #else
970b411b363SPhilipp Reisner #error "LN2 of BITS_PER_LONG unknown!"
971b411b363SPhilipp Reisner #endif
972b411b363SPhilipp Reisner 
973b411b363SPhilipp Reisner /* resync bitmap */
974b411b363SPhilipp Reisner /* 16MB sized 'bitmap extent' to track syncer usage */
975b411b363SPhilipp Reisner struct bm_extent {
976b411b363SPhilipp Reisner 	int rs_left; /* number of bits set (out of sync) in this extent. */
977b411b363SPhilipp Reisner 	int rs_failed; /* number of failed resync requests in this extent. */
978b411b363SPhilipp Reisner 	unsigned long flags;
979b411b363SPhilipp Reisner 	struct lc_element lce;
980b411b363SPhilipp Reisner };
981b411b363SPhilipp Reisner 
982b411b363SPhilipp Reisner #define BME_NO_WRITES  0  /* bm_extent.flags: no more requests on this one! */
983b411b363SPhilipp Reisner #define BME_LOCKED     1  /* bm_extent.flags: syncer active on this one. */
984e3555d85SPhilipp Reisner #define BME_PRIORITY   2  /* finish resync IO on this extent ASAP! App IO waiting! */
985b411b363SPhilipp Reisner 
986b411b363SPhilipp Reisner /* drbd_bitmap.c */
987b411b363SPhilipp Reisner /*
988b411b363SPhilipp Reisner  * We need to store one bit for a block.
989b411b363SPhilipp Reisner  * Example: 1GB disk @ 4096 byte blocks ==> we need 32 KB bitmap.
990b411b363SPhilipp Reisner  * Bit 0 ==> local node thinks this block is binary identical on both nodes
991b411b363SPhilipp Reisner  * Bit 1 ==> local node thinks this block needs to be synced.
992b411b363SPhilipp Reisner  */
993b411b363SPhilipp Reisner 
9948e26f9ccSPhilipp Reisner #define SLEEP_TIME (HZ/10)
9958e26f9ccSPhilipp Reisner 
99645dfffebSLars Ellenberg /* We do bitmap IO in units of 4k blocks.
99745dfffebSLars Ellenberg  * We also still have a hardcoded 4k per bit relation. */
998b411b363SPhilipp Reisner #define BM_BLOCK_SHIFT	12			 /* 4k per bit */
999b411b363SPhilipp Reisner #define BM_BLOCK_SIZE	 (1<<BM_BLOCK_SHIFT)
100045dfffebSLars Ellenberg /* mostly arbitrarily set the represented size of one bitmap extent,
100145dfffebSLars Ellenberg  * aka resync extent, to 16 MiB (which is also 512 Byte worth of bitmap
100245dfffebSLars Ellenberg  * at 4k per bit resolution) */
100345dfffebSLars Ellenberg #define BM_EXT_SHIFT	 24	/* 16 MiB per resync extent */
1004b411b363SPhilipp Reisner #define BM_EXT_SIZE	 (1<<BM_EXT_SHIFT)
1005b411b363SPhilipp Reisner 
1006b411b363SPhilipp Reisner #if (BM_EXT_SHIFT != 24) || (BM_BLOCK_SHIFT != 12)
1007b411b363SPhilipp Reisner #error "HAVE YOU FIXED drbdmeta AS WELL??"
1008b411b363SPhilipp Reisner #endif
1009b411b363SPhilipp Reisner 
1010b411b363SPhilipp Reisner /* thus many _storage_ sectors are described by one bit */
1011b411b363SPhilipp Reisner #define BM_SECT_TO_BIT(x)   ((x)>>(BM_BLOCK_SHIFT-9))
1012b411b363SPhilipp Reisner #define BM_BIT_TO_SECT(x)   ((sector_t)(x)<<(BM_BLOCK_SHIFT-9))
1013b411b363SPhilipp Reisner #define BM_SECT_PER_BIT     BM_BIT_TO_SECT(1)
1014b411b363SPhilipp Reisner 
1015b411b363SPhilipp Reisner /* bit to represented kilo byte conversion */
1016b411b363SPhilipp Reisner #define Bit2KB(bits) ((bits)<<(BM_BLOCK_SHIFT-10))
1017b411b363SPhilipp Reisner 
1018b411b363SPhilipp Reisner /* in which _bitmap_ extent (resp. sector) the bit for a certain
1019b411b363SPhilipp Reisner  * _storage_ sector is located in */
1020b411b363SPhilipp Reisner #define BM_SECT_TO_EXT(x)   ((x)>>(BM_EXT_SHIFT-9))
1021b411b363SPhilipp Reisner 
1022b411b363SPhilipp Reisner /* how much _storage_ sectors we have per bitmap sector */
1023b411b363SPhilipp Reisner #define BM_EXT_TO_SECT(x)   ((sector_t)(x) << (BM_EXT_SHIFT-9))
1024b411b363SPhilipp Reisner #define BM_SECT_PER_EXT     BM_EXT_TO_SECT(1)
1025b411b363SPhilipp Reisner 
1026b411b363SPhilipp Reisner /* in one sector of the bitmap, we have this many activity_log extents. */
1027b411b363SPhilipp Reisner #define AL_EXT_PER_BM_SECT  (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT))
1028b411b363SPhilipp Reisner 
1029b411b363SPhilipp Reisner #define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT)
1030b411b363SPhilipp Reisner #define BM_BLOCKS_PER_BM_EXT_MASK  ((1<<BM_BLOCKS_PER_BM_EXT_B) - 1)
1031b411b363SPhilipp Reisner 
1032b411b363SPhilipp Reisner /* the extent in "PER_EXTENT" below is an activity log extent
1033b411b363SPhilipp Reisner  * we need that many (long words/bytes) to store the bitmap
1034b411b363SPhilipp Reisner  *		     of one AL_EXTENT_SIZE chunk of storage.
1035b411b363SPhilipp Reisner  * we can store the bitmap for that many AL_EXTENTS within
1036b411b363SPhilipp Reisner  * one sector of the _on_disk_ bitmap:
1037b411b363SPhilipp Reisner  * bit	 0	  bit 37   bit 38	     bit (512*8)-1
1038b411b363SPhilipp Reisner  *	     ...|........|........|.. // ..|........|
1039b411b363SPhilipp Reisner  * sect. 0	 `296	  `304			   ^(512*8*8)-1
1040b411b363SPhilipp Reisner  *
1041b411b363SPhilipp Reisner #define BM_WORDS_PER_EXT    ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / BITS_PER_LONG )
1042b411b363SPhilipp Reisner #define BM_BYTES_PER_EXT    ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / 8 )  // 128
1043b411b363SPhilipp Reisner #define BM_EXT_PER_SECT	    ( 512 / BM_BYTES_PER_EXTENT )	 //   4
1044b411b363SPhilipp Reisner  */
1045b411b363SPhilipp Reisner 
1046b411b363SPhilipp Reisner #define DRBD_MAX_SECTORS_32 (0xffffffffLU)
1047ae8bf312SLars Ellenberg /* we have a certain meta data variant that has a fixed on-disk size of 128
1048ae8bf312SLars Ellenberg  * MiB, of which 4k are our "superblock", and 32k are the fixed size activity
1049ae8bf312SLars Ellenberg  * log, leaving this many sectors for the bitmap.
1050ae8bf312SLars Ellenberg  */
1051ae8bf312SLars Ellenberg 
1052ae8bf312SLars Ellenberg #define DRBD_MAX_SECTORS_FIXED_BM \
1053ae8bf312SLars Ellenberg 	  ((MD_128MB_SECT - MD_32kB_SECT - MD_4kB_SECT) * (1LL<<(BM_EXT_SHIFT-9)))
1054ae8bf312SLars Ellenberg #if !defined(CONFIG_LBDAF) && BITS_PER_LONG == 32
1055b411b363SPhilipp Reisner #define DRBD_MAX_SECTORS      DRBD_MAX_SECTORS_32
1056b411b363SPhilipp Reisner #define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_32
1057b411b363SPhilipp Reisner #else
1058ae8bf312SLars Ellenberg #define DRBD_MAX_SECTORS      DRBD_MAX_SECTORS_FIXED_BM
1059b411b363SPhilipp Reisner /* 16 TB in units of sectors */
1060b411b363SPhilipp Reisner #if BITS_PER_LONG == 32
1061b411b363SPhilipp Reisner /* adjust by one page worth of bitmap,
1062b411b363SPhilipp Reisner  * so we won't wrap around in drbd_bm_find_next_bit.
1063b411b363SPhilipp Reisner  * you should use 64bit OS for that much storage, anyways. */
1064b411b363SPhilipp Reisner #define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff)
1065b411b363SPhilipp Reisner #else
10664b0715f0SLars Ellenberg /* we allow up to 1 PiB now on 64bit architecture with "flexible" meta data */
10674b0715f0SLars Ellenberg #define DRBD_MAX_SECTORS_FLEX (1UL << 51)
10684b0715f0SLars Ellenberg /* corresponds to (1UL << 38) bits right now. */
1069b411b363SPhilipp Reisner #endif
1070b411b363SPhilipp Reisner #endif
1071b411b363SPhilipp Reisner 
107223361cf3SLars Ellenberg /* BIO_MAX_SIZE is 256 * PAGE_CACHE_SIZE,
107323361cf3SLars Ellenberg  * so for typical PAGE_CACHE_SIZE of 4k, that is (1<<20) Byte.
107423361cf3SLars Ellenberg  * Since we may live in a mixed-platform cluster,
107523361cf3SLars Ellenberg  * we limit us to a platform agnostic constant here for now.
107623361cf3SLars Ellenberg  * A followup commit may allow even bigger BIO sizes,
107723361cf3SLars Ellenberg  * once we thought that through. */
107898683650SPhilipp Reisner #define DRBD_MAX_BIO_SIZE (1U << 20)
107923361cf3SLars Ellenberg #if DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
108023361cf3SLars Ellenberg #error Architecture not supported: DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
108123361cf3SLars Ellenberg #endif
1082db141b2fSLars Ellenberg #define DRBD_MAX_BIO_SIZE_SAFE (1U << 12)       /* Works always = 4k */
1083b411b363SPhilipp Reisner 
108498683650SPhilipp Reisner #define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */
108598683650SPhilipp Reisner #define DRBD_MAX_BIO_SIZE_P95    (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */
1086b411b363SPhilipp Reisner 
1087b30ab791SAndreas Gruenbacher extern int  drbd_bm_init(struct drbd_device *device);
1088b30ab791SAndreas Gruenbacher extern int  drbd_bm_resize(struct drbd_device *device, sector_t sectors, int set_new_bits);
1089b30ab791SAndreas Gruenbacher extern void drbd_bm_cleanup(struct drbd_device *device);
1090b30ab791SAndreas Gruenbacher extern void drbd_bm_set_all(struct drbd_device *device);
1091b30ab791SAndreas Gruenbacher extern void drbd_bm_clear_all(struct drbd_device *device);
10924b0715f0SLars Ellenberg /* set/clear/test only a few bits at a time */
1093b411b363SPhilipp Reisner extern int  drbd_bm_set_bits(
1094b30ab791SAndreas Gruenbacher 		struct drbd_device *device, unsigned long s, unsigned long e);
1095b411b363SPhilipp Reisner extern int  drbd_bm_clear_bits(
1096b30ab791SAndreas Gruenbacher 		struct drbd_device *device, unsigned long s, unsigned long e);
10974b0715f0SLars Ellenberg extern int drbd_bm_count_bits(
1098b30ab791SAndreas Gruenbacher 	struct drbd_device *device, const unsigned long s, const unsigned long e);
10994b0715f0SLars Ellenberg /* bm_set_bits variant for use while holding drbd_bm_lock,
11004b0715f0SLars Ellenberg  * may process the whole bitmap in one go */
1101b30ab791SAndreas Gruenbacher extern void _drbd_bm_set_bits(struct drbd_device *device,
1102b411b363SPhilipp Reisner 		const unsigned long s, const unsigned long e);
1103b30ab791SAndreas Gruenbacher extern int  drbd_bm_test_bit(struct drbd_device *device, unsigned long bitnr);
1104b30ab791SAndreas Gruenbacher extern int  drbd_bm_e_weight(struct drbd_device *device, unsigned long enr);
1105b30ab791SAndreas Gruenbacher extern int  drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold(local);
1106b30ab791SAndreas Gruenbacher extern int  drbd_bm_read(struct drbd_device *device) __must_hold(local);
1107b30ab791SAndreas Gruenbacher extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr);
1108b30ab791SAndreas Gruenbacher extern int  drbd_bm_write(struct drbd_device *device) __must_hold(local);
1109b30ab791SAndreas Gruenbacher extern int  drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local);
1110b30ab791SAndreas Gruenbacher extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local);
1111b30ab791SAndreas Gruenbacher extern int  drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local);
1112b30ab791SAndreas Gruenbacher extern size_t	     drbd_bm_words(struct drbd_device *device);
1113b30ab791SAndreas Gruenbacher extern unsigned long drbd_bm_bits(struct drbd_device *device);
1114b30ab791SAndreas Gruenbacher extern sector_t      drbd_bm_capacity(struct drbd_device *device);
11154b0715f0SLars Ellenberg 
11164b0715f0SLars Ellenberg #define DRBD_END_OF_BITMAP	(~(unsigned long)0)
1117b30ab791SAndreas Gruenbacher extern unsigned long drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo);
1118b411b363SPhilipp Reisner /* bm_find_next variants for use while you hold drbd_bm_lock() */
1119b30ab791SAndreas Gruenbacher extern unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo);
1120b30ab791SAndreas Gruenbacher extern unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo);
1121b30ab791SAndreas Gruenbacher extern unsigned long _drbd_bm_total_weight(struct drbd_device *device);
1122b30ab791SAndreas Gruenbacher extern unsigned long drbd_bm_total_weight(struct drbd_device *device);
1123b30ab791SAndreas Gruenbacher extern int drbd_bm_rs_done(struct drbd_device *device);
1124b411b363SPhilipp Reisner /* for receive_bitmap */
1125b30ab791SAndreas Gruenbacher extern void drbd_bm_merge_lel(struct drbd_device *device, size_t offset,
1126b411b363SPhilipp Reisner 		size_t number, unsigned long *buffer);
112719f843aaSLars Ellenberg /* for _drbd_send_bitmap */
1128b30ab791SAndreas Gruenbacher extern void drbd_bm_get_lel(struct drbd_device *device, size_t offset,
1129b411b363SPhilipp Reisner 		size_t number, unsigned long *buffer);
1130b411b363SPhilipp Reisner 
1131b30ab791SAndreas Gruenbacher extern void drbd_bm_lock(struct drbd_device *device, char *why, enum bm_flag flags);
1132b30ab791SAndreas Gruenbacher extern void drbd_bm_unlock(struct drbd_device *device);
1133b411b363SPhilipp Reisner /* drbd_main.c */
1134b411b363SPhilipp Reisner 
1135b411b363SPhilipp Reisner extern struct kmem_cache *drbd_request_cache;
11366c852becSAndreas Gruenbacher extern struct kmem_cache *drbd_ee_cache;	/* peer requests */
1137b411b363SPhilipp Reisner extern struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
1138b411b363SPhilipp Reisner extern struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
1139b411b363SPhilipp Reisner extern mempool_t *drbd_request_mempool;
1140b411b363SPhilipp Reisner extern mempool_t *drbd_ee_mempool;
1141b411b363SPhilipp Reisner 
11424281808fSLars Ellenberg /* drbd's page pool, used to buffer data received from the peer,
11434281808fSLars Ellenberg  * or data requested by the peer.
11444281808fSLars Ellenberg  *
11454281808fSLars Ellenberg  * This does not have an emergency reserve.
11464281808fSLars Ellenberg  *
11474281808fSLars Ellenberg  * When allocating from this pool, it first takes pages from the pool.
11484281808fSLars Ellenberg  * Only if the pool is depleted will try to allocate from the system.
11494281808fSLars Ellenberg  *
11504281808fSLars Ellenberg  * The assumption is that pages taken from this pool will be processed,
11514281808fSLars Ellenberg  * and given back, "quickly", and then can be recycled, so we can avoid
11524281808fSLars Ellenberg  * frequent calls to alloc_page(), and still will be able to make progress even
11534281808fSLars Ellenberg  * under memory pressure.
11544281808fSLars Ellenberg  */
11554281808fSLars Ellenberg extern struct page *drbd_pp_pool;
1156b411b363SPhilipp Reisner extern spinlock_t   drbd_pp_lock;
1157b411b363SPhilipp Reisner extern int	    drbd_pp_vacant;
1158b411b363SPhilipp Reisner extern wait_queue_head_t drbd_pp_wait;
1159b411b363SPhilipp Reisner 
11604281808fSLars Ellenberg /* We also need a standard (emergency-reserve backed) page pool
11614281808fSLars Ellenberg  * for meta data IO (activity log, bitmap).
11624281808fSLars Ellenberg  * We can keep it global, as long as it is used as "N pages at a time".
11634281808fSLars Ellenberg  * 128 should be plenty, currently we probably can get away with as few as 1.
11644281808fSLars Ellenberg  */
11654281808fSLars Ellenberg #define DRBD_MIN_POOL_PAGES	128
11664281808fSLars Ellenberg extern mempool_t *drbd_md_io_page_pool;
11674281808fSLars Ellenberg 
11689476f39dSLars Ellenberg /* We also need to make sure we get a bio
11699476f39dSLars Ellenberg  * when we need it for housekeeping purposes */
11709476f39dSLars Ellenberg extern struct bio_set *drbd_md_io_bio_set;
11719476f39dSLars Ellenberg /* to allocate from that set */
11729476f39dSLars Ellenberg extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
11739476f39dSLars Ellenberg 
1174b411b363SPhilipp Reisner extern rwlock_t global_state_lock;
1175b411b363SPhilipp Reisner 
1176bde89a9eSAndreas Gruenbacher extern int conn_lowest_minor(struct drbd_connection *connection);
1177*a6b32bc3SAndreas Gruenbacher enum drbd_ret_code drbd_create_minor(struct drbd_connection *connection, unsigned int minor, int vnr);
117881fa2e67SPhilipp Reisner extern void drbd_minor_destroy(struct kref *kref);
1179b411b363SPhilipp Reisner 
1180bde89a9eSAndreas Gruenbacher extern int set_resource_options(struct drbd_connection *connection, struct res_opts *res_opts);
1181bde89a9eSAndreas Gruenbacher extern struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts);
11829dc9fbb3SPhilipp Reisner extern void conn_destroy(struct kref *kref);
1183bde89a9eSAndreas Gruenbacher struct drbd_connection *conn_get_by_name(const char *name);
1184bde89a9eSAndreas Gruenbacher extern struct drbd_connection *conn_get_by_addrs(void *my_addr, int my_addr_len,
1185089c075dSAndreas Gruenbacher 					    void *peer_addr, int peer_addr_len);
1186bde89a9eSAndreas Gruenbacher extern void conn_free_crypto(struct drbd_connection *connection);
1187b411b363SPhilipp Reisner 
1188b411b363SPhilipp Reisner extern int proc_details;
1189b411b363SPhilipp Reisner 
1190b411b363SPhilipp Reisner /* drbd_req */
1191113fef9eSLars Ellenberg extern void do_submit(struct work_struct *ws);
119254761697SAndreas Gruenbacher extern void __drbd_make_request(struct drbd_device *, struct bio *, unsigned long);
11935a7bbad2SChristoph Hellwig extern void drbd_make_request(struct request_queue *q, struct bio *bio);
1194b30ab791SAndreas Gruenbacher extern int drbd_read_remote(struct drbd_device *device, struct drbd_request *req);
1195b411b363SPhilipp Reisner extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec);
1196b411b363SPhilipp Reisner extern int is_valid_ar_handle(struct drbd_request *, sector_t);
1197b411b363SPhilipp Reisner 
1198b411b363SPhilipp Reisner 
1199b411b363SPhilipp Reisner /* drbd_nl.c */
12008432b314SLars Ellenberg extern int drbd_msg_put_info(const char *info);
1201b30ab791SAndreas Gruenbacher extern void drbd_suspend_io(struct drbd_device *device);
1202b30ab791SAndreas Gruenbacher extern void drbd_resume_io(struct drbd_device *device);
1203b411b363SPhilipp Reisner extern char *ppsize(char *buf, unsigned long long size);
120454761697SAndreas Gruenbacher extern sector_t drbd_new_dev_size(struct drbd_device *, struct drbd_backing_dev *, sector_t, int);
1205e96c9633SPhilipp Reisner enum determine_dev_size {
1206d752b269SPhilipp Reisner 	DS_ERROR_SHRINK = -3,
1207d752b269SPhilipp Reisner 	DS_ERROR_SPACE_MD = -2,
1208e96c9633SPhilipp Reisner 	DS_ERROR = -1,
1209e96c9633SPhilipp Reisner 	DS_UNCHANGED = 0,
1210e96c9633SPhilipp Reisner 	DS_SHRUNK = 1,
121157737adcSPhilipp Reisner 	DS_GREW = 2,
121257737adcSPhilipp Reisner 	DS_GREW_FROM_ZERO = 3,
1213e96c9633SPhilipp Reisner };
1214d752b269SPhilipp Reisner extern enum determine_dev_size
121554761697SAndreas Gruenbacher drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local);
121654761697SAndreas Gruenbacher extern void resync_after_online_grow(struct drbd_device *);
1217b30ab791SAndreas Gruenbacher extern void drbd_reconsider_max_bio_size(struct drbd_device *device);
1218b30ab791SAndreas Gruenbacher extern enum drbd_state_rv drbd_set_role(struct drbd_device *device,
1219bf885f8aSAndreas Gruenbacher 					enum drbd_role new_role,
1220b411b363SPhilipp Reisner 					int force);
1221bde89a9eSAndreas Gruenbacher extern bool conn_try_outdate_peer(struct drbd_connection *connection);
1222bde89a9eSAndreas Gruenbacher extern void conn_try_outdate_peer_async(struct drbd_connection *connection);
1223b30ab791SAndreas Gruenbacher extern int drbd_khelper(struct drbd_device *device, char *cmd);
1224b411b363SPhilipp Reisner 
1225b411b363SPhilipp Reisner /* drbd_worker.c */
1226b411b363SPhilipp Reisner extern int drbd_worker(struct drbd_thread *thi);
1227b30ab791SAndreas Gruenbacher enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor);
1228b30ab791SAndreas Gruenbacher void drbd_resync_after_changed(struct drbd_device *device);
1229b30ab791SAndreas Gruenbacher extern void drbd_start_resync(struct drbd_device *device, enum drbd_conns side);
1230b30ab791SAndreas Gruenbacher extern void resume_next_sg(struct drbd_device *device);
1231b30ab791SAndreas Gruenbacher extern void suspend_other_sg(struct drbd_device *device);
1232b30ab791SAndreas Gruenbacher extern int drbd_resync_finished(struct drbd_device *device);
1233b411b363SPhilipp Reisner /* maybe rather drbd_main.c ? */
1234b30ab791SAndreas Gruenbacher extern void *drbd_md_get_buffer(struct drbd_device *device);
1235b30ab791SAndreas Gruenbacher extern void drbd_md_put_buffer(struct drbd_device *device);
1236b30ab791SAndreas Gruenbacher extern int drbd_md_sync_page_io(struct drbd_device *device,
1237b411b363SPhilipp Reisner 		struct drbd_backing_dev *bdev, sector_t sector, int rw);
123854761697SAndreas Gruenbacher extern void drbd_ov_out_of_sync_found(struct drbd_device *, sector_t, int);
1239b30ab791SAndreas Gruenbacher extern void wait_until_done_or_force_detached(struct drbd_device *device,
124044edfb0dSLars Ellenberg 		struct drbd_backing_dev *bdev, unsigned int *done);
1241b30ab791SAndreas Gruenbacher extern void drbd_rs_controller_reset(struct drbd_device *device);
1242b411b363SPhilipp Reisner 
1243b30ab791SAndreas Gruenbacher static inline void ov_out_of_sync_print(struct drbd_device *device)
1244b411b363SPhilipp Reisner {
1245b30ab791SAndreas Gruenbacher 	if (device->ov_last_oos_size) {
1246b411b363SPhilipp Reisner 		dev_err(DEV, "Out of sync: start=%llu, size=%lu (sectors)\n",
1247b30ab791SAndreas Gruenbacher 		     (unsigned long long)device->ov_last_oos_start,
1248b30ab791SAndreas Gruenbacher 		     (unsigned long)device->ov_last_oos_size);
1249b411b363SPhilipp Reisner 	}
1250b30ab791SAndreas Gruenbacher 	device->ov_last_oos_size = 0;
1251b411b363SPhilipp Reisner }
1252b411b363SPhilipp Reisner 
1253b411b363SPhilipp Reisner 
125454761697SAndreas Gruenbacher extern void drbd_csum_bio(struct drbd_device *, struct crypto_hash *, struct bio *, void *);
125554761697SAndreas Gruenbacher extern void drbd_csum_ee(struct drbd_device *, struct crypto_hash *,
1256f6ffca9fSAndreas Gruenbacher 			 struct drbd_peer_request *, void *);
1257b411b363SPhilipp Reisner /* worker callbacks */
125899920dc5SAndreas Gruenbacher extern int w_e_end_data_req(struct drbd_work *, int);
125999920dc5SAndreas Gruenbacher extern int w_e_end_rsdata_req(struct drbd_work *, int);
126099920dc5SAndreas Gruenbacher extern int w_e_end_csum_rs_req(struct drbd_work *, int);
126199920dc5SAndreas Gruenbacher extern int w_e_end_ov_reply(struct drbd_work *, int);
126299920dc5SAndreas Gruenbacher extern int w_e_end_ov_req(struct drbd_work *, int);
126399920dc5SAndreas Gruenbacher extern int w_ov_finished(struct drbd_work *, int);
126499920dc5SAndreas Gruenbacher extern int w_resync_timer(struct drbd_work *, int);
126599920dc5SAndreas Gruenbacher extern int w_send_write_hint(struct drbd_work *, int);
126699920dc5SAndreas Gruenbacher extern int w_make_resync_request(struct drbd_work *, int);
126799920dc5SAndreas Gruenbacher extern int w_send_dblock(struct drbd_work *, int);
126899920dc5SAndreas Gruenbacher extern int w_send_read_req(struct drbd_work *, int);
126999920dc5SAndreas Gruenbacher extern int w_prev_work_done(struct drbd_work *, int);
127099920dc5SAndreas Gruenbacher extern int w_e_reissue(struct drbd_work *, int);
127199920dc5SAndreas Gruenbacher extern int w_restart_disk_io(struct drbd_work *, int);
12728f7bed77SAndreas Gruenbacher extern int w_send_out_of_sync(struct drbd_work *, int);
127399920dc5SAndreas Gruenbacher extern int w_start_resync(struct drbd_work *, int);
1274b411b363SPhilipp Reisner 
1275b411b363SPhilipp Reisner extern void resync_timer_fn(unsigned long data);
1276370a43e7SPhilipp Reisner extern void start_resync_timer_fn(unsigned long data);
1277b411b363SPhilipp Reisner 
1278b411b363SPhilipp Reisner /* drbd_receiver.c */
1279b30ab791SAndreas Gruenbacher extern int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector);
128054761697SAndreas Gruenbacher extern int drbd_submit_peer_request(struct drbd_device *,
1281fbe29decSAndreas Gruenbacher 				    struct drbd_peer_request *, const unsigned,
1282fbe29decSAndreas Gruenbacher 				    const int);
128354761697SAndreas Gruenbacher extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *);
128454761697SAndreas Gruenbacher extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_device *, u64,
12850db55363SAndreas Gruenbacher 						     sector_t, unsigned int,
1286f6ffca9fSAndreas Gruenbacher 						     gfp_t) __must_hold(local);
128754761697SAndreas Gruenbacher extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *,
1288f6ffca9fSAndreas Gruenbacher 				 int);
12893967deb1SAndreas Gruenbacher #define drbd_free_peer_req(m,e) __drbd_free_peer_req(m, e, 0)
12903967deb1SAndreas Gruenbacher #define drbd_free_net_peer_req(m,e) __drbd_free_peer_req(m, e, 1)
129154761697SAndreas Gruenbacher extern struct page *drbd_alloc_pages(struct drbd_device *, unsigned int, bool);
1292b30ab791SAndreas Gruenbacher extern void drbd_set_recv_tcq(struct drbd_device *device, int tcq_enabled);
1293b30ab791SAndreas Gruenbacher extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed);
1294bde89a9eSAndreas Gruenbacher extern void conn_flush_workqueue(struct drbd_connection *connection);
1295b30ab791SAndreas Gruenbacher extern int drbd_connected(struct drbd_device *device);
1296b30ab791SAndreas Gruenbacher static inline void drbd_flush_workqueue(struct drbd_device *device)
1297b411b363SPhilipp Reisner {
1298*a6b32bc3SAndreas Gruenbacher 	conn_flush_workqueue(first_peer_device(device)->connection);
12990e29d163SPhilipp Reisner }
1300b411b363SPhilipp Reisner 
1301ed439848SLars Ellenberg /* Yes, there is kernel_setsockopt, but only since 2.6.18.
1302ed439848SLars Ellenberg  * So we have our own copy of it here. */
1303b411b363SPhilipp Reisner static inline int drbd_setsockopt(struct socket *sock, int level, int optname,
1304ed439848SLars Ellenberg 				  char *optval, int optlen)
1305b411b363SPhilipp Reisner {
1306ed439848SLars Ellenberg 	mm_segment_t oldfs = get_fs();
1307ed439848SLars Ellenberg 	char __user *uoptval;
1308b411b363SPhilipp Reisner 	int err;
1309ed439848SLars Ellenberg 
1310ed439848SLars Ellenberg 	uoptval = (char __user __force *)optval;
1311ed439848SLars Ellenberg 
1312ed439848SLars Ellenberg 	set_fs(KERNEL_DS);
1313b411b363SPhilipp Reisner 	if (level == SOL_SOCKET)
1314ed439848SLars Ellenberg 		err = sock_setsockopt(sock, level, optname, uoptval, optlen);
1315b411b363SPhilipp Reisner 	else
1316ed439848SLars Ellenberg 		err = sock->ops->setsockopt(sock, level, optname, uoptval,
1317b411b363SPhilipp Reisner 					    optlen);
1318ed439848SLars Ellenberg 	set_fs(oldfs);
1319b411b363SPhilipp Reisner 	return err;
1320b411b363SPhilipp Reisner }
1321b411b363SPhilipp Reisner 
1322b411b363SPhilipp Reisner static inline void drbd_tcp_cork(struct socket *sock)
1323b411b363SPhilipp Reisner {
1324ed439848SLars Ellenberg 	int val = 1;
1325b411b363SPhilipp Reisner 	(void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK,
1326ed439848SLars Ellenberg 			(char*)&val, sizeof(val));
1327b411b363SPhilipp Reisner }
1328b411b363SPhilipp Reisner 
1329b411b363SPhilipp Reisner static inline void drbd_tcp_uncork(struct socket *sock)
1330b411b363SPhilipp Reisner {
1331ed439848SLars Ellenberg 	int val = 0;
1332b411b363SPhilipp Reisner 	(void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK,
1333ed439848SLars Ellenberg 			(char*)&val, sizeof(val));
1334b411b363SPhilipp Reisner }
1335b411b363SPhilipp Reisner 
1336b411b363SPhilipp Reisner static inline void drbd_tcp_nodelay(struct socket *sock)
1337b411b363SPhilipp Reisner {
1338ed439848SLars Ellenberg 	int val = 1;
1339b411b363SPhilipp Reisner 	(void) drbd_setsockopt(sock, SOL_TCP, TCP_NODELAY,
1340ed439848SLars Ellenberg 			(char*)&val, sizeof(val));
1341b411b363SPhilipp Reisner }
1342b411b363SPhilipp Reisner 
1343b411b363SPhilipp Reisner static inline void drbd_tcp_quickack(struct socket *sock)
1344b411b363SPhilipp Reisner {
1345ed439848SLars Ellenberg 	int val = 2;
1346b411b363SPhilipp Reisner 	(void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
1347ed439848SLars Ellenberg 			(char*)&val, sizeof(val));
1348b411b363SPhilipp Reisner }
1349b411b363SPhilipp Reisner 
1350bde89a9eSAndreas Gruenbacher void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo);
1351b411b363SPhilipp Reisner 
1352b411b363SPhilipp Reisner /* drbd_proc.c */
1353b411b363SPhilipp Reisner extern struct proc_dir_entry *drbd_proc;
13547d4e9d09SEmese Revfy extern const struct file_operations drbd_proc_fops;
1355b411b363SPhilipp Reisner extern const char *drbd_conn_str(enum drbd_conns s);
1356b411b363SPhilipp Reisner extern const char *drbd_role_str(enum drbd_role s);
1357b411b363SPhilipp Reisner 
1358b411b363SPhilipp Reisner /* drbd_actlog.c */
1359b30ab791SAndreas Gruenbacher extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i);
1360b30ab791SAndreas Gruenbacher extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate);
1361b30ab791SAndreas Gruenbacher extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i);
1362b30ab791SAndreas Gruenbacher extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate);
1363b30ab791SAndreas Gruenbacher extern void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i);
1364b30ab791SAndreas Gruenbacher extern void drbd_rs_complete_io(struct drbd_device *device, sector_t sector);
1365b30ab791SAndreas Gruenbacher extern int drbd_rs_begin_io(struct drbd_device *device, sector_t sector);
1366b30ab791SAndreas Gruenbacher extern int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector);
1367b30ab791SAndreas Gruenbacher extern void drbd_rs_cancel_all(struct drbd_device *device);
1368b30ab791SAndreas Gruenbacher extern int drbd_rs_del_all(struct drbd_device *device);
1369b30ab791SAndreas Gruenbacher extern void drbd_rs_failed_io(struct drbd_device *device,
1370b411b363SPhilipp Reisner 		sector_t sector, int size);
1371b30ab791SAndreas Gruenbacher extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go);
1372b30ab791SAndreas Gruenbacher extern void __drbd_set_in_sync(struct drbd_device *device, sector_t sector,
1373b411b363SPhilipp Reisner 		int size, const char *file, const unsigned int line);
1374b30ab791SAndreas Gruenbacher #define drbd_set_in_sync(device, sector, size) \
1375b30ab791SAndreas Gruenbacher 	__drbd_set_in_sync(device, sector, size, __FILE__, __LINE__)
1376b30ab791SAndreas Gruenbacher extern int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector,
1377b411b363SPhilipp Reisner 		int size, const char *file, const unsigned int line);
1378b30ab791SAndreas Gruenbacher #define drbd_set_out_of_sync(device, sector, size) \
1379b30ab791SAndreas Gruenbacher 	__drbd_set_out_of_sync(device, sector, size, __FILE__, __LINE__)
1380b30ab791SAndreas Gruenbacher extern void drbd_al_shrink(struct drbd_device *device);
138154761697SAndreas Gruenbacher extern int drbd_initialize_al(struct drbd_device *, void *);
1382b411b363SPhilipp Reisner 
1383b411b363SPhilipp Reisner /* drbd_nl.c */
13843b98c0c2SLars Ellenberg /* state info broadcast */
13853b98c0c2SLars Ellenberg struct sib_info {
13863b98c0c2SLars Ellenberg 	enum drbd_state_info_bcast_reason sib_reason;
13873b98c0c2SLars Ellenberg 	union {
13883b98c0c2SLars Ellenberg 		struct {
13893b98c0c2SLars Ellenberg 			char *helper_name;
13903b98c0c2SLars Ellenberg 			unsigned helper_exit_code;
13913b98c0c2SLars Ellenberg 		};
13923b98c0c2SLars Ellenberg 		struct {
13933b98c0c2SLars Ellenberg 			union drbd_state os;
13943b98c0c2SLars Ellenberg 			union drbd_state ns;
13953b98c0c2SLars Ellenberg 		};
13963b98c0c2SLars Ellenberg 	};
13973b98c0c2SLars Ellenberg };
1398b30ab791SAndreas Gruenbacher void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib);
1399b411b363SPhilipp Reisner 
1400b411b363SPhilipp Reisner /*
1401b411b363SPhilipp Reisner  * inline helper functions
1402b411b363SPhilipp Reisner  *************************/
1403b411b363SPhilipp Reisner 
140445bb912bSLars Ellenberg /* see also page_chain_add and friends in drbd_receiver.c */
140545bb912bSLars Ellenberg static inline struct page *page_chain_next(struct page *page)
140645bb912bSLars Ellenberg {
140745bb912bSLars Ellenberg 	return (struct page *)page_private(page);
140845bb912bSLars Ellenberg }
140945bb912bSLars Ellenberg #define page_chain_for_each(page) \
141045bb912bSLars Ellenberg 	for (; page && ({ prefetch(page_chain_next(page)); 1; }); \
141145bb912bSLars Ellenberg 			page = page_chain_next(page))
141245bb912bSLars Ellenberg #define page_chain_for_each_safe(page, n) \
141345bb912bSLars Ellenberg 	for (; page && ({ n = page_chain_next(page); 1; }); page = n)
141445bb912bSLars Ellenberg 
141545bb912bSLars Ellenberg 
1416045417f7SAndreas Gruenbacher static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_req)
141745bb912bSLars Ellenberg {
1418db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
141945bb912bSLars Ellenberg 	page_chain_for_each(page) {
142045bb912bSLars Ellenberg 		if (page_count(page) > 1)
142145bb912bSLars Ellenberg 			return 1;
142245bb912bSLars Ellenberg 	}
142345bb912bSLars Ellenberg 	return 0;
142445bb912bSLars Ellenberg }
142545bb912bSLars Ellenberg 
1426bf885f8aSAndreas Gruenbacher static inline enum drbd_state_rv
1427b30ab791SAndreas Gruenbacher _drbd_set_state(struct drbd_device *device, union drbd_state ns,
1428bf885f8aSAndreas Gruenbacher 		enum chg_state_flags flags, struct completion *done)
1429b411b363SPhilipp Reisner {
1430bf885f8aSAndreas Gruenbacher 	enum drbd_state_rv rv;
1431b411b363SPhilipp Reisner 
1432b411b363SPhilipp Reisner 	read_lock(&global_state_lock);
1433b30ab791SAndreas Gruenbacher 	rv = __drbd_set_state(device, ns, flags, done);
1434b411b363SPhilipp Reisner 	read_unlock(&global_state_lock);
1435b411b363SPhilipp Reisner 
1436b411b363SPhilipp Reisner 	return rv;
1437b411b363SPhilipp Reisner }
1438b411b363SPhilipp Reisner 
1439b30ab791SAndreas Gruenbacher static inline union drbd_state drbd_read_state(struct drbd_device *device)
1440b411b363SPhilipp Reisner {
144178bae59bSPhilipp Reisner 	union drbd_state rv;
144278bae59bSPhilipp Reisner 
1443b30ab791SAndreas Gruenbacher 	rv.i = device->state.i;
1444*a6b32bc3SAndreas Gruenbacher 	rv.susp = first_peer_device(device)->connection->susp;
1445*a6b32bc3SAndreas Gruenbacher 	rv.susp_nod = first_peer_device(device)->connection->susp_nod;
1446*a6b32bc3SAndreas Gruenbacher 	rv.susp_fen = first_peer_device(device)->connection->susp_fen;
144778bae59bSPhilipp Reisner 
144878bae59bSPhilipp Reisner 	return rv;
1449b411b363SPhilipp Reisner }
1450b411b363SPhilipp Reisner 
1451383606e0SLars Ellenberg enum drbd_force_detach_flags {
1452a2a3c74fSLars Ellenberg 	DRBD_READ_ERROR,
1453a2a3c74fSLars Ellenberg 	DRBD_WRITE_ERROR,
1454383606e0SLars Ellenberg 	DRBD_META_IO_ERROR,
1455383606e0SLars Ellenberg 	DRBD_FORCE_DETACH,
1456383606e0SLars Ellenberg };
1457383606e0SLars Ellenberg 
1458b411b363SPhilipp Reisner #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
1459b30ab791SAndreas Gruenbacher static inline void __drbd_chk_io_error_(struct drbd_device *device,
1460a2a3c74fSLars Ellenberg 		enum drbd_force_detach_flags df,
1461383606e0SLars Ellenberg 		const char *where)
1462b411b363SPhilipp Reisner {
1463daeda1ccSPhilipp Reisner 	enum drbd_io_error_p ep;
1464daeda1ccSPhilipp Reisner 
1465daeda1ccSPhilipp Reisner 	rcu_read_lock();
1466b30ab791SAndreas Gruenbacher 	ep = rcu_dereference(device->ldev->disk_conf)->on_io_error;
1467daeda1ccSPhilipp Reisner 	rcu_read_unlock();
1468daeda1ccSPhilipp Reisner 	switch (ep) {
1469daeda1ccSPhilipp Reisner 	case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */
1470a2a3c74fSLars Ellenberg 		if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) {
14717383506cSLars Ellenberg 			if (__ratelimit(&drbd_ratelimit_state))
147282f59cc6SLars Ellenberg 				dev_err(DEV, "Local IO failed in %s.\n", where);
1473b30ab791SAndreas Gruenbacher 			if (device->state.disk > D_INCONSISTENT)
1474b30ab791SAndreas Gruenbacher 				_drbd_set_state(_NS(device, disk, D_INCONSISTENT), CS_HARD, NULL);
1475b411b363SPhilipp Reisner 			break;
1476b411b363SPhilipp Reisner 		}
1477a2a3c74fSLars Ellenberg 		/* NOTE fall through for DRBD_META_IO_ERROR or DRBD_FORCE_DETACH */
1478b411b363SPhilipp Reisner 	case EP_DETACH:
1479b411b363SPhilipp Reisner 	case EP_CALL_HELPER:
1480a2a3c74fSLars Ellenberg 		/* Remember whether we saw a READ or WRITE error.
1481a2a3c74fSLars Ellenberg 		 *
1482a2a3c74fSLars Ellenberg 		 * Recovery of the affected area for WRITE failure is covered
1483a2a3c74fSLars Ellenberg 		 * by the activity log.
1484a2a3c74fSLars Ellenberg 		 * READ errors may fall outside that area though. Certain READ
1485a2a3c74fSLars Ellenberg 		 * errors can be "healed" by writing good data to the affected
1486a2a3c74fSLars Ellenberg 		 * blocks, which triggers block re-allocation in lower layers.
1487a2a3c74fSLars Ellenberg 		 *
1488a2a3c74fSLars Ellenberg 		 * If we can not write the bitmap after a READ error,
1489a2a3c74fSLars Ellenberg 		 * we may need to trigger a full sync (see w_go_diskless()).
1490a2a3c74fSLars Ellenberg 		 *
1491a2a3c74fSLars Ellenberg 		 * Force-detach is not really an IO error, but rather a
1492a2a3c74fSLars Ellenberg 		 * desperate measure to try to deal with a completely
1493a2a3c74fSLars Ellenberg 		 * unresponsive lower level IO stack.
1494a2a3c74fSLars Ellenberg 		 * Still it should be treated as a WRITE error.
1495a2a3c74fSLars Ellenberg 		 *
1496a2a3c74fSLars Ellenberg 		 * Meta IO error is always WRITE error:
1497a2a3c74fSLars Ellenberg 		 * we read meta data only once during attach,
1498a2a3c74fSLars Ellenberg 		 * which will fail in case of errors.
1499a2a3c74fSLars Ellenberg 		 */
1500b30ab791SAndreas Gruenbacher 		set_bit(WAS_IO_ERROR, &device->flags);
1501a2a3c74fSLars Ellenberg 		if (df == DRBD_READ_ERROR)
1502b30ab791SAndreas Gruenbacher 			set_bit(WAS_READ_ERROR, &device->flags);
1503a2a3c74fSLars Ellenberg 		if (df == DRBD_FORCE_DETACH)
1504b30ab791SAndreas Gruenbacher 			set_bit(FORCE_DETACH, &device->flags);
1505b30ab791SAndreas Gruenbacher 		if (device->state.disk > D_FAILED) {
1506b30ab791SAndreas Gruenbacher 			_drbd_set_state(_NS(device, disk, D_FAILED), CS_HARD, NULL);
150782f59cc6SLars Ellenberg 			dev_err(DEV,
150882f59cc6SLars Ellenberg 				"Local IO failed in %s. Detaching...\n", where);
1509b411b363SPhilipp Reisner 		}
1510b411b363SPhilipp Reisner 		break;
1511b411b363SPhilipp Reisner 	}
1512b411b363SPhilipp Reisner }
1513b411b363SPhilipp Reisner 
1514b411b363SPhilipp Reisner /**
1515b411b363SPhilipp Reisner  * drbd_chk_io_error: Handle the on_io_error setting, should be called from all io completion handlers
1516b30ab791SAndreas Gruenbacher  * @device:	 DRBD device.
1517b411b363SPhilipp Reisner  * @error:	 Error code passed to the IO completion callback
1518b411b363SPhilipp Reisner  * @forcedetach: Force detach. I.e. the error happened while accessing the meta data
1519b411b363SPhilipp Reisner  *
1520b411b363SPhilipp Reisner  * See also drbd_main.c:after_state_ch() if (os.disk > D_FAILED && ns.disk == D_FAILED)
1521b411b363SPhilipp Reisner  */
1522b411b363SPhilipp Reisner #define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__)
1523b30ab791SAndreas Gruenbacher static inline void drbd_chk_io_error_(struct drbd_device *device,
1524383606e0SLars Ellenberg 	int error, enum drbd_force_detach_flags forcedetach, const char *where)
1525b411b363SPhilipp Reisner {
1526b411b363SPhilipp Reisner 	if (error) {
1527b411b363SPhilipp Reisner 		unsigned long flags;
1528*a6b32bc3SAndreas Gruenbacher 		spin_lock_irqsave(&first_peer_device(device)->connection->req_lock, flags);
1529b30ab791SAndreas Gruenbacher 		__drbd_chk_io_error_(device, forcedetach, where);
1530*a6b32bc3SAndreas Gruenbacher 		spin_unlock_irqrestore(&first_peer_device(device)->connection->req_lock, flags);
1531b411b363SPhilipp Reisner 	}
1532b411b363SPhilipp Reisner }
1533b411b363SPhilipp Reisner 
1534b411b363SPhilipp Reisner 
1535b411b363SPhilipp Reisner /**
1536b411b363SPhilipp Reisner  * drbd_md_first_sector() - Returns the first sector number of the meta data area
1537b411b363SPhilipp Reisner  * @bdev:	Meta data block device.
1538b411b363SPhilipp Reisner  *
1539b411b363SPhilipp Reisner  * BTW, for internal meta data, this happens to be the maximum capacity
1540b411b363SPhilipp Reisner  * we could agree upon with our peer node.
1541b411b363SPhilipp Reisner  */
154268e41a43SLars Ellenberg static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev)
1543b411b363SPhilipp Reisner {
154468e41a43SLars Ellenberg 	switch (bdev->md.meta_dev_idx) {
1545b411b363SPhilipp Reisner 	case DRBD_MD_INDEX_INTERNAL:
1546b411b363SPhilipp Reisner 	case DRBD_MD_INDEX_FLEX_INT:
1547b411b363SPhilipp Reisner 		return bdev->md.md_offset + bdev->md.bm_offset;
1548b411b363SPhilipp Reisner 	case DRBD_MD_INDEX_FLEX_EXT:
1549b411b363SPhilipp Reisner 	default:
1550b411b363SPhilipp Reisner 		return bdev->md.md_offset;
1551b411b363SPhilipp Reisner 	}
1552b411b363SPhilipp Reisner }
1553b411b363SPhilipp Reisner 
1554b411b363SPhilipp Reisner /**
1555b411b363SPhilipp Reisner  * drbd_md_last_sector() - Return the last sector number of the meta data area
1556b411b363SPhilipp Reisner  * @bdev:	Meta data block device.
1557b411b363SPhilipp Reisner  */
1558b411b363SPhilipp Reisner static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev)
1559b411b363SPhilipp Reisner {
156068e41a43SLars Ellenberg 	switch (bdev->md.meta_dev_idx) {
1561b411b363SPhilipp Reisner 	case DRBD_MD_INDEX_INTERNAL:
1562b411b363SPhilipp Reisner 	case DRBD_MD_INDEX_FLEX_INT:
1563ae8bf312SLars Ellenberg 		return bdev->md.md_offset + MD_4kB_SECT -1;
1564b411b363SPhilipp Reisner 	case DRBD_MD_INDEX_FLEX_EXT:
1565b411b363SPhilipp Reisner 	default:
1566ae8bf312SLars Ellenberg 		return bdev->md.md_offset + bdev->md.md_size_sect -1;
1567b411b363SPhilipp Reisner 	}
1568b411b363SPhilipp Reisner }
1569b411b363SPhilipp Reisner 
1570b411b363SPhilipp Reisner /* Returns the number of 512 byte sectors of the device */
1571b411b363SPhilipp Reisner static inline sector_t drbd_get_capacity(struct block_device *bdev)
1572b411b363SPhilipp Reisner {
1573b411b363SPhilipp Reisner 	/* return bdev ? get_capacity(bdev->bd_disk) : 0; */
157477304d2aSMike Snitzer 	return bdev ? i_size_read(bdev->bd_inode) >> 9 : 0;
1575b411b363SPhilipp Reisner }
1576b411b363SPhilipp Reisner 
1577b411b363SPhilipp Reisner /**
1578b411b363SPhilipp Reisner  * drbd_get_max_capacity() - Returns the capacity we announce to out peer
1579b411b363SPhilipp Reisner  * @bdev:	Meta data block device.
1580b411b363SPhilipp Reisner  *
1581b411b363SPhilipp Reisner  * returns the capacity we announce to out peer.  we clip ourselves at the
1582b411b363SPhilipp Reisner  * various MAX_SECTORS, because if we don't, current implementation will
1583b411b363SPhilipp Reisner  * oops sooner or later
1584b411b363SPhilipp Reisner  */
1585b411b363SPhilipp Reisner static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev)
1586b411b363SPhilipp Reisner {
1587b411b363SPhilipp Reisner 	sector_t s;
1588daeda1ccSPhilipp Reisner 
158968e41a43SLars Ellenberg 	switch (bdev->md.meta_dev_idx) {
1590b411b363SPhilipp Reisner 	case DRBD_MD_INDEX_INTERNAL:
1591b411b363SPhilipp Reisner 	case DRBD_MD_INDEX_FLEX_INT:
1592b411b363SPhilipp Reisner 		s = drbd_get_capacity(bdev->backing_bdev)
1593b411b363SPhilipp Reisner 			? min_t(sector_t, DRBD_MAX_SECTORS_FLEX,
159468e41a43SLars Ellenberg 				drbd_md_first_sector(bdev))
1595b411b363SPhilipp Reisner 			: 0;
1596b411b363SPhilipp Reisner 		break;
1597b411b363SPhilipp Reisner 	case DRBD_MD_INDEX_FLEX_EXT:
1598b411b363SPhilipp Reisner 		s = min_t(sector_t, DRBD_MAX_SECTORS_FLEX,
1599b411b363SPhilipp Reisner 				drbd_get_capacity(bdev->backing_bdev));
1600b411b363SPhilipp Reisner 		/* clip at maximum size the meta device can support */
1601b411b363SPhilipp Reisner 		s = min_t(sector_t, s,
1602b411b363SPhilipp Reisner 			BM_EXT_TO_SECT(bdev->md.md_size_sect
1603b411b363SPhilipp Reisner 				     - bdev->md.bm_offset));
1604b411b363SPhilipp Reisner 		break;
1605b411b363SPhilipp Reisner 	default:
1606b411b363SPhilipp Reisner 		s = min_t(sector_t, DRBD_MAX_SECTORS,
1607b411b363SPhilipp Reisner 				drbd_get_capacity(bdev->backing_bdev));
1608b411b363SPhilipp Reisner 	}
1609b411b363SPhilipp Reisner 	return s;
1610b411b363SPhilipp Reisner }
1611b411b363SPhilipp Reisner 
1612b411b363SPhilipp Reisner /**
16133a4d4eb3SLars Ellenberg  * drbd_md_ss() - Return the sector number of our meta data super block
1614b411b363SPhilipp Reisner  * @bdev:	Meta data block device.
1615b411b363SPhilipp Reisner  */
16163a4d4eb3SLars Ellenberg static inline sector_t drbd_md_ss(struct drbd_backing_dev *bdev)
1617b411b363SPhilipp Reisner {
16183a4d4eb3SLars Ellenberg 	const int meta_dev_idx = bdev->md.meta_dev_idx;
1619daeda1ccSPhilipp Reisner 
16203a4d4eb3SLars Ellenberg 	if (meta_dev_idx == DRBD_MD_INDEX_FLEX_EXT)
1621b411b363SPhilipp Reisner 		return 0;
16223a4d4eb3SLars Ellenberg 
16233a4d4eb3SLars Ellenberg 	/* Since drbd08, internal meta data is always "flexible".
1624ae8bf312SLars Ellenberg 	 * position: last 4k aligned block of 4k size */
16253a4d4eb3SLars Ellenberg 	if (meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
16263a4d4eb3SLars Ellenberg 	    meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)
1627ae8bf312SLars Ellenberg 		return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL) - 8;
16283a4d4eb3SLars Ellenberg 
16293a4d4eb3SLars Ellenberg 	/* external, some index; this is the old fixed size layout */
16303a4d4eb3SLars Ellenberg 	return MD_128MB_SECT * bdev->md.meta_dev_idx;
1631b411b363SPhilipp Reisner }
1632b411b363SPhilipp Reisner 
1633b411b363SPhilipp Reisner static inline void
1634b411b363SPhilipp Reisner drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w)
1635b411b363SPhilipp Reisner {
1636b411b363SPhilipp Reisner 	unsigned long flags;
1637b411b363SPhilipp Reisner 	spin_lock_irqsave(&q->q_lock, flags);
1638b411b363SPhilipp Reisner 	list_add(&w->list, &q->q);
1639b411b363SPhilipp Reisner 	spin_unlock_irqrestore(&q->q_lock, flags);
16408c0785a5SLars Ellenberg 	wake_up(&q->q_wait);
1641b411b363SPhilipp Reisner }
1642b411b363SPhilipp Reisner 
1643b411b363SPhilipp Reisner static inline void
1644b411b363SPhilipp Reisner drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
1645b411b363SPhilipp Reisner {
1646b411b363SPhilipp Reisner 	unsigned long flags;
1647b411b363SPhilipp Reisner 	spin_lock_irqsave(&q->q_lock, flags);
1648b411b363SPhilipp Reisner 	list_add_tail(&w->list, &q->q);
1649b411b363SPhilipp Reisner 	spin_unlock_irqrestore(&q->q_lock, flags);
16508c0785a5SLars Ellenberg 	wake_up(&q->q_wait);
1651b411b363SPhilipp Reisner }
1652b411b363SPhilipp Reisner 
1653bde89a9eSAndreas Gruenbacher static inline void wake_asender(struct drbd_connection *connection)
1654b411b363SPhilipp Reisner {
1655bde89a9eSAndreas Gruenbacher 	if (test_bit(SIGNAL_ASENDER, &connection->flags))
1656bde89a9eSAndreas Gruenbacher 		force_sig(DRBD_SIG, connection->asender.task);
1657b411b363SPhilipp Reisner }
1658b411b363SPhilipp Reisner 
1659bde89a9eSAndreas Gruenbacher static inline void request_ping(struct drbd_connection *connection)
1660b411b363SPhilipp Reisner {
1661bde89a9eSAndreas Gruenbacher 	set_bit(SEND_PING, &connection->flags);
1662bde89a9eSAndreas Gruenbacher 	wake_asender(connection);
1663b411b363SPhilipp Reisner }
1664b411b363SPhilipp Reisner 
1665bde89a9eSAndreas Gruenbacher extern void *conn_prepare_command(struct drbd_connection *, struct drbd_socket *);
166654761697SAndreas Gruenbacher extern void *drbd_prepare_command(struct drbd_device *, struct drbd_socket *);
1667bde89a9eSAndreas Gruenbacher extern int conn_send_command(struct drbd_connection *, struct drbd_socket *,
1668dba58587SAndreas Gruenbacher 			     enum drbd_packet, unsigned int, void *,
1669dba58587SAndreas Gruenbacher 			     unsigned int);
167054761697SAndreas Gruenbacher extern int drbd_send_command(struct drbd_device *, struct drbd_socket *,
1671dba58587SAndreas Gruenbacher 			     enum drbd_packet, unsigned int, void *,
1672dba58587SAndreas Gruenbacher 			     unsigned int);
1673b411b363SPhilipp Reisner 
1674bde89a9eSAndreas Gruenbacher extern int drbd_send_ping(struct drbd_connection *connection);
1675bde89a9eSAndreas Gruenbacher extern int drbd_send_ping_ack(struct drbd_connection *connection);
167654761697SAndreas Gruenbacher extern int drbd_send_state_req(struct drbd_device *, union drbd_state, union drbd_state);
1677bde89a9eSAndreas Gruenbacher extern int conn_send_state_req(struct drbd_connection *, union drbd_state, union drbd_state);
1678b411b363SPhilipp Reisner 
1679b411b363SPhilipp Reisner static inline void drbd_thread_stop(struct drbd_thread *thi)
1680b411b363SPhilipp Reisner {
168181e84650SAndreas Gruenbacher 	_drbd_thread_stop(thi, false, true);
1682b411b363SPhilipp Reisner }
1683b411b363SPhilipp Reisner 
1684b411b363SPhilipp Reisner static inline void drbd_thread_stop_nowait(struct drbd_thread *thi)
1685b411b363SPhilipp Reisner {
168681e84650SAndreas Gruenbacher 	_drbd_thread_stop(thi, false, false);
1687b411b363SPhilipp Reisner }
1688b411b363SPhilipp Reisner 
1689b411b363SPhilipp Reisner static inline void drbd_thread_restart_nowait(struct drbd_thread *thi)
1690b411b363SPhilipp Reisner {
169181e84650SAndreas Gruenbacher 	_drbd_thread_stop(thi, true, false);
1692b411b363SPhilipp Reisner }
1693b411b363SPhilipp Reisner 
1694b411b363SPhilipp Reisner /* counts how many answer packets packets we expect from our peer,
1695b411b363SPhilipp Reisner  * for either explicit application requests,
1696b411b363SPhilipp Reisner  * or implicit barrier packets as necessary.
1697b411b363SPhilipp Reisner  * increased:
1698b411b363SPhilipp Reisner  *  w_send_barrier
16998554df1cSAndreas Gruenbacher  *  _req_mod(req, QUEUE_FOR_NET_WRITE or QUEUE_FOR_NET_READ);
1700b411b363SPhilipp Reisner  *    it is much easier and equally valid to count what we queue for the
1701b411b363SPhilipp Reisner  *    worker, even before it actually was queued or send.
1702b411b363SPhilipp Reisner  *    (drbd_make_request_common; recovery path on read io-error)
1703b411b363SPhilipp Reisner  * decreased:
1704b411b363SPhilipp Reisner  *  got_BarrierAck (respective tl_clear, tl_clear_barrier)
17058554df1cSAndreas Gruenbacher  *  _req_mod(req, DATA_RECEIVED)
1706b411b363SPhilipp Reisner  *     [from receive_DataReply]
17078554df1cSAndreas Gruenbacher  *  _req_mod(req, WRITE_ACKED_BY_PEER or RECV_ACKED_BY_PEER or NEG_ACKED)
1708b411b363SPhilipp Reisner  *     [from got_BlockAck (P_WRITE_ACK, P_RECV_ACK)]
1709b411b363SPhilipp Reisner  *     for some reason it is NOT decreased in got_NegAck,
1710b411b363SPhilipp Reisner  *     but in the resulting cleanup code from report_params.
1711b411b363SPhilipp Reisner  *     we should try to remember the reason for that...
17128554df1cSAndreas Gruenbacher  *  _req_mod(req, SEND_FAILED or SEND_CANCELED)
17138554df1cSAndreas Gruenbacher  *  _req_mod(req, CONNECTION_LOST_WHILE_PENDING)
1714b411b363SPhilipp Reisner  *     [from tl_clear_barrier]
1715b411b363SPhilipp Reisner  */
1716b30ab791SAndreas Gruenbacher static inline void inc_ap_pending(struct drbd_device *device)
1717b411b363SPhilipp Reisner {
1718b30ab791SAndreas Gruenbacher 	atomic_inc(&device->ap_pending_cnt);
1719b411b363SPhilipp Reisner }
1720b411b363SPhilipp Reisner 
172149559d87SPhilipp Reisner #define ERR_IF_CNT_IS_NEGATIVE(which, func, line)			\
1722b30ab791SAndreas Gruenbacher 	if (atomic_read(&device->which) < 0)				\
1723b411b363SPhilipp Reisner 		dev_err(DEV, "in %s:%d: " #which " = %d < 0 !\n",	\
172449559d87SPhilipp Reisner 			func, line,					\
1725b30ab791SAndreas Gruenbacher 			atomic_read(&device->which))
1726b411b363SPhilipp Reisner 
1727b30ab791SAndreas Gruenbacher #define dec_ap_pending(device) _dec_ap_pending(device, __FUNCTION__, __LINE__)
1728b30ab791SAndreas Gruenbacher static inline void _dec_ap_pending(struct drbd_device *device, const char *func, int line)
172949559d87SPhilipp Reisner {
1730b30ab791SAndreas Gruenbacher 	if (atomic_dec_and_test(&device->ap_pending_cnt))
1731b30ab791SAndreas Gruenbacher 		wake_up(&device->misc_wait);
173249559d87SPhilipp Reisner 	ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt, func, line);
173349559d87SPhilipp Reisner }
1734b411b363SPhilipp Reisner 
1735b411b363SPhilipp Reisner /* counts how many resync-related answers we still expect from the peer
1736b411b363SPhilipp Reisner  *		     increase			decrease
1737b411b363SPhilipp Reisner  * C_SYNC_TARGET sends P_RS_DATA_REQUEST (and expects P_RS_DATA_REPLY)
173825985edcSLucas De Marchi  * C_SYNC_SOURCE sends P_RS_DATA_REPLY   (and expects P_WRITE_ACK with ID_SYNCER)
1739b411b363SPhilipp Reisner  *					   (or P_NEG_ACK with ID_SYNCER)
1740b411b363SPhilipp Reisner  */
1741b30ab791SAndreas Gruenbacher static inline void inc_rs_pending(struct drbd_device *device)
1742b411b363SPhilipp Reisner {
1743b30ab791SAndreas Gruenbacher 	atomic_inc(&device->rs_pending_cnt);
1744b411b363SPhilipp Reisner }
1745b411b363SPhilipp Reisner 
1746b30ab791SAndreas Gruenbacher #define dec_rs_pending(device) _dec_rs_pending(device, __FUNCTION__, __LINE__)
1747b30ab791SAndreas Gruenbacher static inline void _dec_rs_pending(struct drbd_device *device, const char *func, int line)
174849559d87SPhilipp Reisner {
1749b30ab791SAndreas Gruenbacher 	atomic_dec(&device->rs_pending_cnt);
175049559d87SPhilipp Reisner 	ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt, func, line);
175149559d87SPhilipp Reisner }
1752b411b363SPhilipp Reisner 
1753b411b363SPhilipp Reisner /* counts how many answers we still need to send to the peer.
1754b411b363SPhilipp Reisner  * increased on
1755b411b363SPhilipp Reisner  *  receive_Data	unless protocol A;
1756b411b363SPhilipp Reisner  *			we need to send a P_RECV_ACK (proto B)
1757b411b363SPhilipp Reisner  *			or P_WRITE_ACK (proto C)
1758b411b363SPhilipp Reisner  *  receive_RSDataReply (recv_resync_read) we need to send a P_WRITE_ACK
1759b411b363SPhilipp Reisner  *  receive_DataRequest (receive_RSDataRequest) we need to send back P_DATA
1760b411b363SPhilipp Reisner  *  receive_Barrier_*	we need to send a P_BARRIER_ACK
1761b411b363SPhilipp Reisner  */
1762b30ab791SAndreas Gruenbacher static inline void inc_unacked(struct drbd_device *device)
1763b411b363SPhilipp Reisner {
1764b30ab791SAndreas Gruenbacher 	atomic_inc(&device->unacked_cnt);
1765b411b363SPhilipp Reisner }
1766b411b363SPhilipp Reisner 
1767b30ab791SAndreas Gruenbacher #define dec_unacked(device) _dec_unacked(device, __FUNCTION__, __LINE__)
1768b30ab791SAndreas Gruenbacher static inline void _dec_unacked(struct drbd_device *device, const char *func, int line)
1769b411b363SPhilipp Reisner {
1770b30ab791SAndreas Gruenbacher 	atomic_dec(&device->unacked_cnt);
177149559d87SPhilipp Reisner 	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
1772b411b363SPhilipp Reisner }
1773b411b363SPhilipp Reisner 
1774b30ab791SAndreas Gruenbacher #define sub_unacked(device, n) _sub_unacked(device, n, __FUNCTION__, __LINE__)
1775b30ab791SAndreas Gruenbacher static inline void _sub_unacked(struct drbd_device *device, int n, const char *func, int line)
1776b411b363SPhilipp Reisner {
1777b30ab791SAndreas Gruenbacher 	atomic_sub(n, &device->unacked_cnt);
177849559d87SPhilipp Reisner 	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
1779b411b363SPhilipp Reisner }
1780b411b363SPhilipp Reisner 
1781b411b363SPhilipp Reisner /**
1782b30ab791SAndreas Gruenbacher  * get_ldev() - Increase the ref count on device->ldev. Returns 0 if there is no ldev
1783b411b363SPhilipp Reisner  * @M:		DRBD device.
1784b411b363SPhilipp Reisner  *
1785b30ab791SAndreas Gruenbacher  * You have to call put_ldev() when finished working with device->ldev.
1786b411b363SPhilipp Reisner  */
1787b411b363SPhilipp Reisner #define get_ldev(M) __cond_lock(local, _get_ldev_if_state(M,D_INCONSISTENT))
1788b411b363SPhilipp Reisner #define get_ldev_if_state(M,MINS) __cond_lock(local, _get_ldev_if_state(M,MINS))
1789b411b363SPhilipp Reisner 
1790b30ab791SAndreas Gruenbacher static inline void put_ldev(struct drbd_device *device)
1791b411b363SPhilipp Reisner {
1792b30ab791SAndreas Gruenbacher 	int i = atomic_dec_return(&device->local_cnt);
17939a0d9d03SLars Ellenberg 
17949a0d9d03SLars Ellenberg 	/* This may be called from some endio handler,
17959a0d9d03SLars Ellenberg 	 * so we must not sleep here. */
17969a0d9d03SLars Ellenberg 
1797b411b363SPhilipp Reisner 	__release(local);
17981d7734a0SLars Ellenberg 	D_ASSERT(i >= 0);
1799e9e6f3ecSLars Ellenberg 	if (i == 0) {
1800b30ab791SAndreas Gruenbacher 		if (device->state.disk == D_DISKLESS)
180182f59cc6SLars Ellenberg 			/* even internal references gone, safe to destroy */
1802b30ab791SAndreas Gruenbacher 			drbd_ldev_destroy(device);
1803b30ab791SAndreas Gruenbacher 		if (device->state.disk == D_FAILED) {
180482f59cc6SLars Ellenberg 			/* all application IO references gone. */
1805b30ab791SAndreas Gruenbacher 			if (!test_and_set_bit(GO_DISKLESS, &device->flags))
1806*a6b32bc3SAndreas Gruenbacher 				drbd_queue_work(&first_peer_device(device)->connection->sender_work, &device->go_diskless);
18079114d795SLars Ellenberg 		}
1808b30ab791SAndreas Gruenbacher 		wake_up(&device->misc_wait);
1809b411b363SPhilipp Reisner 	}
1810e9e6f3ecSLars Ellenberg }
1811b411b363SPhilipp Reisner 
1812b411b363SPhilipp Reisner #ifndef __CHECKER__
1813b30ab791SAndreas Gruenbacher static inline int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins)
1814b411b363SPhilipp Reisner {
1815b411b363SPhilipp Reisner 	int io_allowed;
1816b411b363SPhilipp Reisner 
181782f59cc6SLars Ellenberg 	/* never get a reference while D_DISKLESS */
1818b30ab791SAndreas Gruenbacher 	if (device->state.disk == D_DISKLESS)
181982f59cc6SLars Ellenberg 		return 0;
182082f59cc6SLars Ellenberg 
1821b30ab791SAndreas Gruenbacher 	atomic_inc(&device->local_cnt);
1822b30ab791SAndreas Gruenbacher 	io_allowed = (device->state.disk >= mins);
1823b411b363SPhilipp Reisner 	if (!io_allowed)
1824b30ab791SAndreas Gruenbacher 		put_ldev(device);
1825b411b363SPhilipp Reisner 	return io_allowed;
1826b411b363SPhilipp Reisner }
1827b411b363SPhilipp Reisner #else
1828b30ab791SAndreas Gruenbacher extern int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins);
1829b411b363SPhilipp Reisner #endif
1830b411b363SPhilipp Reisner 
1831b411b363SPhilipp Reisner /* you must have an "get_ldev" reference */
1832b30ab791SAndreas Gruenbacher static inline void drbd_get_syncer_progress(struct drbd_device *device,
1833b411b363SPhilipp Reisner 		unsigned long *bits_left, unsigned int *per_mil_done)
1834b411b363SPhilipp Reisner {
18354b0715f0SLars Ellenberg 	/* this is to break it at compile time when we change that, in case we
18364b0715f0SLars Ellenberg 	 * want to support more than (1<<32) bits on a 32bit arch. */
1837b30ab791SAndreas Gruenbacher 	typecheck(unsigned long, device->rs_total);
1838b411b363SPhilipp Reisner 
1839b411b363SPhilipp Reisner 	/* note: both rs_total and rs_left are in bits, i.e. in
1840b411b363SPhilipp Reisner 	 * units of BM_BLOCK_SIZE.
1841b411b363SPhilipp Reisner 	 * for the percentage, we don't care. */
1842b411b363SPhilipp Reisner 
1843b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
1844b30ab791SAndreas Gruenbacher 		*bits_left = device->ov_left;
1845439d5953SLars Ellenberg 	else
1846b30ab791SAndreas Gruenbacher 		*bits_left = drbd_bm_total_weight(device) - device->rs_failed;
1847b411b363SPhilipp Reisner 	/* >> 10 to prevent overflow,
1848b411b363SPhilipp Reisner 	 * +1 to prevent division by zero */
1849b30ab791SAndreas Gruenbacher 	if (*bits_left > device->rs_total) {
1850b411b363SPhilipp Reisner 		/* doh. maybe a logic bug somewhere.
1851b411b363SPhilipp Reisner 		 * may also be just a race condition
1852b411b363SPhilipp Reisner 		 * between this and a disconnect during sync.
1853b411b363SPhilipp Reisner 		 * for now, just prevent in-kernel buffer overflow.
1854b411b363SPhilipp Reisner 		 */
1855b411b363SPhilipp Reisner 		smp_rmb();
1856b411b363SPhilipp Reisner 		dev_warn(DEV, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n",
1857b30ab791SAndreas Gruenbacher 				drbd_conn_str(device->state.conn),
1858b30ab791SAndreas Gruenbacher 				*bits_left, device->rs_total, device->rs_failed);
1859b411b363SPhilipp Reisner 		*per_mil_done = 0;
1860b411b363SPhilipp Reisner 	} else {
18614b0715f0SLars Ellenberg 		/* Make sure the division happens in long context.
18624b0715f0SLars Ellenberg 		 * We allow up to one petabyte storage right now,
18634b0715f0SLars Ellenberg 		 * at a granularity of 4k per bit that is 2**38 bits.
18644b0715f0SLars Ellenberg 		 * After shift right and multiplication by 1000,
18654b0715f0SLars Ellenberg 		 * this should still fit easily into a 32bit long,
18664b0715f0SLars Ellenberg 		 * so we don't need a 64bit division on 32bit arch.
18674b0715f0SLars Ellenberg 		 * Note: currently we don't support such large bitmaps on 32bit
18684b0715f0SLars Ellenberg 		 * arch anyways, but no harm done to be prepared for it here.
18694b0715f0SLars Ellenberg 		 */
1870b30ab791SAndreas Gruenbacher 		unsigned int shift = device->rs_total > UINT_MAX ? 16 : 10;
18714b0715f0SLars Ellenberg 		unsigned long left = *bits_left >> shift;
1872b30ab791SAndreas Gruenbacher 		unsigned long total = 1UL + (device->rs_total >> shift);
18734b0715f0SLars Ellenberg 		unsigned long tmp = 1000UL - left * 1000UL/total;
1874b411b363SPhilipp Reisner 		*per_mil_done = tmp;
1875b411b363SPhilipp Reisner 	}
1876b411b363SPhilipp Reisner }
1877b411b363SPhilipp Reisner 
1878b411b363SPhilipp Reisner 
1879b411b363SPhilipp Reisner /* this throttles on-the-fly application requests
1880b411b363SPhilipp Reisner  * according to max_buffers settings;
1881b411b363SPhilipp Reisner  * maybe re-implement using semaphores? */
1882b30ab791SAndreas Gruenbacher static inline int drbd_get_max_buffers(struct drbd_device *device)
1883b411b363SPhilipp Reisner {
188444ed167dSPhilipp Reisner 	struct net_conf *nc;
188544ed167dSPhilipp Reisner 	int mxb;
188644ed167dSPhilipp Reisner 
188744ed167dSPhilipp Reisner 	rcu_read_lock();
1888*a6b32bc3SAndreas Gruenbacher 	nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
188944ed167dSPhilipp Reisner 	mxb = nc ? nc->max_buffers : 1000000;  /* arbitrary limit on open requests */
189044ed167dSPhilipp Reisner 	rcu_read_unlock();
189144ed167dSPhilipp Reisner 
1892b411b363SPhilipp Reisner 	return mxb;
1893b411b363SPhilipp Reisner }
1894b411b363SPhilipp Reisner 
1895b30ab791SAndreas Gruenbacher static inline int drbd_state_is_stable(struct drbd_device *device)
1896b411b363SPhilipp Reisner {
1897b30ab791SAndreas Gruenbacher 	union drbd_dev_state s = device->state;
1898b411b363SPhilipp Reisner 
1899b411b363SPhilipp Reisner 	/* DO NOT add a default clause, we want the compiler to warn us
1900b411b363SPhilipp Reisner 	 * for any newly introduced state we may have forgotten to add here */
1901b411b363SPhilipp Reisner 
1902b411b363SPhilipp Reisner 	switch ((enum drbd_conns)s.conn) {
1903b411b363SPhilipp Reisner 	/* new io only accepted when there is no connection, ... */
1904b411b363SPhilipp Reisner 	case C_STANDALONE:
1905b411b363SPhilipp Reisner 	case C_WF_CONNECTION:
1906b411b363SPhilipp Reisner 	/* ... or there is a well established connection. */
1907b411b363SPhilipp Reisner 	case C_CONNECTED:
1908b411b363SPhilipp Reisner 	case C_SYNC_SOURCE:
1909b411b363SPhilipp Reisner 	case C_SYNC_TARGET:
1910b411b363SPhilipp Reisner 	case C_VERIFY_S:
1911b411b363SPhilipp Reisner 	case C_VERIFY_T:
1912b411b363SPhilipp Reisner 	case C_PAUSED_SYNC_S:
1913b411b363SPhilipp Reisner 	case C_PAUSED_SYNC_T:
191467531718SPhilipp Reisner 	case C_AHEAD:
191567531718SPhilipp Reisner 	case C_BEHIND:
19163719094eSPhilipp Reisner 		/* transitional states, IO allowed */
1917b411b363SPhilipp Reisner 	case C_DISCONNECTING:
1918b411b363SPhilipp Reisner 	case C_UNCONNECTED:
1919b411b363SPhilipp Reisner 	case C_TIMEOUT:
1920b411b363SPhilipp Reisner 	case C_BROKEN_PIPE:
1921b411b363SPhilipp Reisner 	case C_NETWORK_FAILURE:
1922b411b363SPhilipp Reisner 	case C_PROTOCOL_ERROR:
1923b411b363SPhilipp Reisner 	case C_TEAR_DOWN:
1924b411b363SPhilipp Reisner 	case C_WF_REPORT_PARAMS:
1925b411b363SPhilipp Reisner 	case C_STARTING_SYNC_S:
1926b411b363SPhilipp Reisner 	case C_STARTING_SYNC_T:
19273719094eSPhilipp Reisner 		break;
19283719094eSPhilipp Reisner 
19293719094eSPhilipp Reisner 		/* Allow IO in BM exchange states with new protocols */
1930b411b363SPhilipp Reisner 	case C_WF_BITMAP_S:
1931*a6b32bc3SAndreas Gruenbacher 		if (first_peer_device(device)->connection->agreed_pro_version < 96)
19323719094eSPhilipp Reisner 			return 0;
19333719094eSPhilipp Reisner 		break;
19343719094eSPhilipp Reisner 
19353719094eSPhilipp Reisner 		/* no new io accepted in these states */
1936b411b363SPhilipp Reisner 	case C_WF_BITMAP_T:
1937b411b363SPhilipp Reisner 	case C_WF_SYNC_UUID:
1938b411b363SPhilipp Reisner 	case C_MASK:
1939b411b363SPhilipp Reisner 		/* not "stable" */
1940b411b363SPhilipp Reisner 		return 0;
1941b411b363SPhilipp Reisner 	}
1942b411b363SPhilipp Reisner 
1943b411b363SPhilipp Reisner 	switch ((enum drbd_disk_state)s.disk) {
1944b411b363SPhilipp Reisner 	case D_DISKLESS:
1945b411b363SPhilipp Reisner 	case D_INCONSISTENT:
1946b411b363SPhilipp Reisner 	case D_OUTDATED:
1947b411b363SPhilipp Reisner 	case D_CONSISTENT:
1948b411b363SPhilipp Reisner 	case D_UP_TO_DATE:
19495ca1de03SPhilipp Reisner 	case D_FAILED:
1950b411b363SPhilipp Reisner 		/* disk state is stable as well. */
1951b411b363SPhilipp Reisner 		break;
1952b411b363SPhilipp Reisner 
1953d942ae44SPhilipp Reisner 	/* no new io accepted during transitional states */
1954b411b363SPhilipp Reisner 	case D_ATTACHING:
1955b411b363SPhilipp Reisner 	case D_NEGOTIATING:
1956b411b363SPhilipp Reisner 	case D_UNKNOWN:
1957b411b363SPhilipp Reisner 	case D_MASK:
1958b411b363SPhilipp Reisner 		/* not "stable" */
1959b411b363SPhilipp Reisner 		return 0;
1960b411b363SPhilipp Reisner 	}
1961b411b363SPhilipp Reisner 
1962b411b363SPhilipp Reisner 	return 1;
1963b411b363SPhilipp Reisner }
1964b411b363SPhilipp Reisner 
1965b30ab791SAndreas Gruenbacher static inline int drbd_suspended(struct drbd_device *device)
1966fb22c402SPhilipp Reisner {
1967*a6b32bc3SAndreas Gruenbacher 	struct drbd_connection *connection = first_peer_device(device)->connection;
19688e0af25fSPhilipp Reisner 
1969bde89a9eSAndreas Gruenbacher 	return connection->susp || connection->susp_fen || connection->susp_nod;
1970fb22c402SPhilipp Reisner }
1971fb22c402SPhilipp Reisner 
1972b30ab791SAndreas Gruenbacher static inline bool may_inc_ap_bio(struct drbd_device *device)
1973b411b363SPhilipp Reisner {
1974b30ab791SAndreas Gruenbacher 	int mxb = drbd_get_max_buffers(device);
1975b411b363SPhilipp Reisner 
1976b30ab791SAndreas Gruenbacher 	if (drbd_suspended(device))
19771b881ef7SAndreas Gruenbacher 		return false;
1978b30ab791SAndreas Gruenbacher 	if (test_bit(SUSPEND_IO, &device->flags))
19791b881ef7SAndreas Gruenbacher 		return false;
1980b411b363SPhilipp Reisner 
1981b411b363SPhilipp Reisner 	/* to avoid potential deadlock or bitmap corruption,
1982b411b363SPhilipp Reisner 	 * in various places, we only allow new application io
1983b411b363SPhilipp Reisner 	 * to start during "stable" states. */
1984b411b363SPhilipp Reisner 
1985b411b363SPhilipp Reisner 	/* no new io accepted when attaching or detaching the disk */
1986b30ab791SAndreas Gruenbacher 	if (!drbd_state_is_stable(device))
19871b881ef7SAndreas Gruenbacher 		return false;
1988b411b363SPhilipp Reisner 
1989b411b363SPhilipp Reisner 	/* since some older kernels don't have atomic_add_unless,
1990b411b363SPhilipp Reisner 	 * and we are within the spinlock anyways, we have this workaround.  */
1991b30ab791SAndreas Gruenbacher 	if (atomic_read(&device->ap_bio_cnt) > mxb)
19921b881ef7SAndreas Gruenbacher 		return false;
1993b30ab791SAndreas Gruenbacher 	if (test_bit(BITMAP_IO, &device->flags))
19941b881ef7SAndreas Gruenbacher 		return false;
19951b881ef7SAndreas Gruenbacher 	return true;
1996b411b363SPhilipp Reisner }
1997b411b363SPhilipp Reisner 
1998b30ab791SAndreas Gruenbacher static inline bool inc_ap_bio_cond(struct drbd_device *device)
19998869d683SPhilipp Reisner {
20001b881ef7SAndreas Gruenbacher 	bool rv = false;
20018869d683SPhilipp Reisner 
2002*a6b32bc3SAndreas Gruenbacher 	spin_lock_irq(&first_peer_device(device)->connection->req_lock);
2003b30ab791SAndreas Gruenbacher 	rv = may_inc_ap_bio(device);
20048869d683SPhilipp Reisner 	if (rv)
2005b30ab791SAndreas Gruenbacher 		atomic_inc(&device->ap_bio_cnt);
2006*a6b32bc3SAndreas Gruenbacher 	spin_unlock_irq(&first_peer_device(device)->connection->req_lock);
20078869d683SPhilipp Reisner 
20088869d683SPhilipp Reisner 	return rv;
20098869d683SPhilipp Reisner }
20108869d683SPhilipp Reisner 
2011b30ab791SAndreas Gruenbacher static inline void inc_ap_bio(struct drbd_device *device)
2012b411b363SPhilipp Reisner {
2013b411b363SPhilipp Reisner 	/* we wait here
2014b411b363SPhilipp Reisner 	 *    as long as the device is suspended
2015b411b363SPhilipp Reisner 	 *    until the bitmap is no longer on the fly during connection
2016d942ae44SPhilipp Reisner 	 *    handshake as long as we would exceed the max_buffer limit.
2017b411b363SPhilipp Reisner 	 *
2018b411b363SPhilipp Reisner 	 * to avoid races with the reconnect code,
2019b411b363SPhilipp Reisner 	 * we need to atomic_inc within the spinlock. */
2020b411b363SPhilipp Reisner 
2021b30ab791SAndreas Gruenbacher 	wait_event(device->misc_wait, inc_ap_bio_cond(device));
2022b411b363SPhilipp Reisner }
2023b411b363SPhilipp Reisner 
2024b30ab791SAndreas Gruenbacher static inline void dec_ap_bio(struct drbd_device *device)
2025b411b363SPhilipp Reisner {
2026b30ab791SAndreas Gruenbacher 	int mxb = drbd_get_max_buffers(device);
2027b30ab791SAndreas Gruenbacher 	int ap_bio = atomic_dec_return(&device->ap_bio_cnt);
2028b411b363SPhilipp Reisner 
2029b411b363SPhilipp Reisner 	D_ASSERT(ap_bio >= 0);
20307ee1fb93SLars Ellenberg 
2031b30ab791SAndreas Gruenbacher 	if (ap_bio == 0 && test_bit(BITMAP_IO, &device->flags)) {
2032b30ab791SAndreas Gruenbacher 		if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags))
2033*a6b32bc3SAndreas Gruenbacher 			drbd_queue_work(&first_peer_device(device)->connection->sender_work, &device->bm_io_work.w);
20347ee1fb93SLars Ellenberg 	}
20357ee1fb93SLars Ellenberg 
2036b411b363SPhilipp Reisner 	/* this currently does wake_up for every dec_ap_bio!
2037b411b363SPhilipp Reisner 	 * maybe rather introduce some type of hysteresis?
2038b411b363SPhilipp Reisner 	 * e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */
2039b411b363SPhilipp Reisner 	if (ap_bio < mxb)
2040b30ab791SAndreas Gruenbacher 		wake_up(&device->misc_wait);
2041b411b363SPhilipp Reisner }
2042b411b363SPhilipp Reisner 
2043b30ab791SAndreas Gruenbacher static inline bool verify_can_do_stop_sector(struct drbd_device *device)
204458ffa580SLars Ellenberg {
2045*a6b32bc3SAndreas Gruenbacher 	return first_peer_device(device)->connection->agreed_pro_version >= 97 &&
2046*a6b32bc3SAndreas Gruenbacher 		first_peer_device(device)->connection->agreed_pro_version != 100;
204758ffa580SLars Ellenberg }
204858ffa580SLars Ellenberg 
2049b30ab791SAndreas Gruenbacher static inline int drbd_set_ed_uuid(struct drbd_device *device, u64 val)
2050b411b363SPhilipp Reisner {
2051b30ab791SAndreas Gruenbacher 	int changed = device->ed_uuid != val;
2052b30ab791SAndreas Gruenbacher 	device->ed_uuid = val;
205362b0da3aSLars Ellenberg 	return changed;
2054b411b363SPhilipp Reisner }
2055b411b363SPhilipp Reisner 
2056b30ab791SAndreas Gruenbacher static inline int drbd_queue_order_type(struct drbd_device *device)
2057b411b363SPhilipp Reisner {
2058b411b363SPhilipp Reisner 	/* sorry, we currently have no working implementation
2059b411b363SPhilipp Reisner 	 * of distributed TCQ stuff */
2060b411b363SPhilipp Reisner #ifndef QUEUE_ORDERED_NONE
2061b411b363SPhilipp Reisner #define QUEUE_ORDERED_NONE 0
2062b411b363SPhilipp Reisner #endif
2063b411b363SPhilipp Reisner 	return QUEUE_ORDERED_NONE;
2064b411b363SPhilipp Reisner }
2065b411b363SPhilipp Reisner 
2066b30ab791SAndreas Gruenbacher static inline void drbd_md_flush(struct drbd_device *device)
2067b411b363SPhilipp Reisner {
2068b411b363SPhilipp Reisner 	int r;
2069b411b363SPhilipp Reisner 
2070b30ab791SAndreas Gruenbacher 	if (device->ldev == NULL) {
2071b30ab791SAndreas Gruenbacher 		dev_warn(DEV, "device->ldev == NULL in drbd_md_flush\n");
2072fd0017c1SPhilipp Reisner 		return;
2073fd0017c1SPhilipp Reisner 	}
2074fd0017c1SPhilipp Reisner 
2075b30ab791SAndreas Gruenbacher 	if (test_bit(MD_NO_FUA, &device->flags))
2076b411b363SPhilipp Reisner 		return;
2077b411b363SPhilipp Reisner 
2078b30ab791SAndreas Gruenbacher 	r = blkdev_issue_flush(device->ldev->md_bdev, GFP_NOIO, NULL);
2079b411b363SPhilipp Reisner 	if (r) {
2080b30ab791SAndreas Gruenbacher 		set_bit(MD_NO_FUA, &device->flags);
2081b411b363SPhilipp Reisner 		dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
2082b411b363SPhilipp Reisner 	}
2083b411b363SPhilipp Reisner }
2084b411b363SPhilipp Reisner 
2085b411b363SPhilipp Reisner #endif
2086