1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com>
4  * Copyright (C) 2024 NeilBrown <neilb@suse.de>
5  */
6 
7 #include <linux/module.h>
8 #include <linux/list.h>
9 #include <linux/nfslocalio.h>
10 #include <linux/nfs3.h>
11 #include <linux/nfs4.h>
12 #include <linux/nfs_fs.h>
13 #include <net/netns/generic.h>
14 
15 #include "localio_trace.h"
16 
17 MODULE_LICENSE("GPL");
18 MODULE_DESCRIPTION("NFS localio protocol bypass support");
19 
20 static DEFINE_SPINLOCK(nfs_uuids_lock);
21 
22 /*
23  * Global list of nfs_uuid_t instances
24  * that is protected by nfs_uuids_lock.
25  */
26 static LIST_HEAD(nfs_uuids);
27 
28 /*
29  * Lock ordering:
30  * 1: nfs_uuid->lock
31  * 2: nfs_uuids_lock
32  * 3: nfs_uuid->list_lock (aka nn->local_clients_lock)
33  *
34  * May skip locks in select cases, but never hold multiple
35  * locks out of order.
36  */
37 
38 void nfs_uuid_init(nfs_uuid_t *nfs_uuid)
39 {
40 	RCU_INIT_POINTER(nfs_uuid->net, NULL);
41 	nfs_uuid->dom = NULL;
42 	nfs_uuid->list_lock = NULL;
43 	INIT_LIST_HEAD(&nfs_uuid->list);
44 	INIT_LIST_HEAD(&nfs_uuid->files);
45 	spin_lock_init(&nfs_uuid->lock);
46 	nfs_uuid->nfs3_localio_probe_count = 0;
47 }
48 EXPORT_SYMBOL_GPL(nfs_uuid_init);
49 
50 bool nfs_uuid_begin(nfs_uuid_t *nfs_uuid)
51 {
52 	spin_lock(&nfs_uuid->lock);
53 	if (rcu_access_pointer(nfs_uuid->net)) {
54 		/* This nfs_uuid is already in use */
55 		spin_unlock(&nfs_uuid->lock);
56 		return false;
57 	}
58 
59 	spin_lock(&nfs_uuids_lock);
60 	if (!list_empty(&nfs_uuid->list)) {
61 		/* This nfs_uuid is already in use */
62 		spin_unlock(&nfs_uuids_lock);
63 		spin_unlock(&nfs_uuid->lock);
64 		return false;
65 	}
66 	list_add_tail(&nfs_uuid->list, &nfs_uuids);
67 	spin_unlock(&nfs_uuids_lock);
68 
69 	uuid_gen(&nfs_uuid->uuid);
70 	spin_unlock(&nfs_uuid->lock);
71 
72 	return true;
73 }
74 EXPORT_SYMBOL_GPL(nfs_uuid_begin);
75 
76 void nfs_uuid_end(nfs_uuid_t *nfs_uuid)
77 {
78 	if (!rcu_access_pointer(nfs_uuid->net)) {
79 		spin_lock(&nfs_uuid->lock);
80 		if (!rcu_access_pointer(nfs_uuid->net)) {
81 			/* Not local, remove from nfs_uuids */
82 			spin_lock(&nfs_uuids_lock);
83 			list_del_init(&nfs_uuid->list);
84 			spin_unlock(&nfs_uuids_lock);
85 		}
86 		spin_unlock(&nfs_uuid->lock);
87         }
88 }
89 EXPORT_SYMBOL_GPL(nfs_uuid_end);
90 
91 static nfs_uuid_t * nfs_uuid_lookup_locked(const uuid_t *uuid)
92 {
93 	nfs_uuid_t *nfs_uuid;
94 
95 	list_for_each_entry(nfs_uuid, &nfs_uuids, list)
96 		if (uuid_equal(&nfs_uuid->uuid, uuid))
97 			return nfs_uuid;
98 
99 	return NULL;
100 }
101 
102 static struct module *nfsd_mod;
103 
104 void nfs_uuid_is_local(const uuid_t *uuid, struct list_head *list,
105 		       spinlock_t *list_lock, struct net *net,
106 		       struct auth_domain *dom, struct module *mod)
107 {
108 	nfs_uuid_t *nfs_uuid;
109 
110 	spin_lock(&nfs_uuids_lock);
111 	nfs_uuid = nfs_uuid_lookup_locked(uuid);
112 	if (!nfs_uuid) {
113 		spin_unlock(&nfs_uuids_lock);
114 		return;
115 	}
116 
117 	/*
118 	 * We don't hold a ref on the net, but instead put
119 	 * ourselves on @list (nn->local_clients) so the net
120 	 * pointer can be invalidated.
121 	 */
122 	spin_lock(list_lock); /* list_lock is nn->local_clients_lock */
123 	list_move(&nfs_uuid->list, list);
124 	spin_unlock(list_lock);
125 
126 	spin_unlock(&nfs_uuids_lock);
127 	/* Once nfs_uuid is parented to @list, avoid global nfs_uuids_lock */
128 	spin_lock(&nfs_uuid->lock);
129 
130 	__module_get(mod);
131 	nfsd_mod = mod;
132 
133 	nfs_uuid->list_lock = list_lock;
134 	kref_get(&dom->ref);
135 	nfs_uuid->dom = dom;
136 	rcu_assign_pointer(nfs_uuid->net, net);
137 	spin_unlock(&nfs_uuid->lock);
138 }
139 EXPORT_SYMBOL_GPL(nfs_uuid_is_local);
140 
141 void nfs_localio_enable_client(struct nfs_client *clp)
142 {
143 	/* nfs_uuid_is_local() does the actual enablement */
144 	trace_nfs_localio_enable_client(clp);
145 }
146 EXPORT_SYMBOL_GPL(nfs_localio_enable_client);
147 
148 /*
149  * Cleanup the nfs_uuid_t embedded in an nfs_client.
150  * This is the long-form of nfs_uuid_init().
151  */
152 static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid)
153 {
154 	struct nfs_file_localio *nfl;
155 
156 	spin_lock(&nfs_uuid->lock);
157 	if (unlikely(!rcu_access_pointer(nfs_uuid->net))) {
158 		spin_unlock(&nfs_uuid->lock);
159 		return false;
160 	}
161 	RCU_INIT_POINTER(nfs_uuid->net, NULL);
162 
163 	if (nfs_uuid->dom) {
164 		auth_domain_put(nfs_uuid->dom);
165 		nfs_uuid->dom = NULL;
166 	}
167 
168 	/* Walk list of files and ensure their last references dropped */
169 
170 	while ((nfl = list_first_entry_or_null(&nfs_uuid->files,
171 					       struct nfs_file_localio,
172 					       list)) != NULL) {
173 		/* If nfs_uuid is already NULL, nfs_close_local_fh is
174 		 * closing and we must wait, else we unlink and close.
175 		 */
176 		if (rcu_access_pointer(nfl->nfs_uuid) == NULL) {
177 			/* nfs_close_local_fh() is doing the
178 			 * close and we must wait. until it unlinks
179 			 */
180 			wait_var_event_spinlock(nfl,
181 						list_first_entry_or_null(
182 							&nfs_uuid->files,
183 							struct nfs_file_localio,
184 							list) != nfl,
185 						&nfs_uuid->lock);
186 			continue;
187 		}
188 
189 		/* Remove nfl from nfs_uuid->files list */
190 		list_del_init(&nfl->list);
191 		spin_unlock(&nfs_uuid->lock);
192 
193 		nfs_to_nfsd_file_put_local(&nfl->ro_file);
194 		nfs_to_nfsd_file_put_local(&nfl->rw_file);
195 		cond_resched();
196 
197 		spin_lock(&nfs_uuid->lock);
198 		/* Now we can allow racing nfs_close_local_fh() to
199 		 * skip the locking.
200 		 */
201 		RCU_INIT_POINTER(nfl->nfs_uuid, NULL);
202 		wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock);
203 	}
204 
205 	/* Remove client from nn->local_clients */
206 	if (nfs_uuid->list_lock) {
207 		spin_lock(nfs_uuid->list_lock);
208 		BUG_ON(list_empty(&nfs_uuid->list));
209 		list_del_init(&nfs_uuid->list);
210 		spin_unlock(nfs_uuid->list_lock);
211 		nfs_uuid->list_lock = NULL;
212 	}
213 
214 	module_put(nfsd_mod);
215 	spin_unlock(&nfs_uuid->lock);
216 
217 	return true;
218 }
219 
220 void nfs_localio_disable_client(struct nfs_client *clp)
221 {
222 	if (nfs_uuid_put(&clp->cl_uuid))
223 		trace_nfs_localio_disable_client(clp);
224 }
225 EXPORT_SYMBOL_GPL(nfs_localio_disable_client);
226 
227 void nfs_localio_invalidate_clients(struct list_head *nn_local_clients,
228 				    spinlock_t *nn_local_clients_lock)
229 {
230 	LIST_HEAD(local_clients);
231 	nfs_uuid_t *nfs_uuid, *tmp;
232 	struct nfs_client *clp;
233 
234 	spin_lock(nn_local_clients_lock);
235 	list_splice_init(nn_local_clients, &local_clients);
236 	spin_unlock(nn_local_clients_lock);
237 	list_for_each_entry_safe(nfs_uuid, tmp, &local_clients, list) {
238 		if (WARN_ON(nfs_uuid->list_lock != nn_local_clients_lock))
239 			break;
240 		clp = container_of(nfs_uuid, struct nfs_client, cl_uuid);
241 		nfs_localio_disable_client(clp);
242 	}
243 }
244 EXPORT_SYMBOL_GPL(nfs_localio_invalidate_clients);
245 
246 static void nfs_uuid_add_file(nfs_uuid_t *nfs_uuid, struct nfs_file_localio *nfl)
247 {
248 	/* Add nfl to nfs_uuid->files if it isn't already */
249 	spin_lock(&nfs_uuid->lock);
250 	if (list_empty(&nfl->list)) {
251 		rcu_assign_pointer(nfl->nfs_uuid, nfs_uuid);
252 		list_add_tail(&nfl->list, &nfs_uuid->files);
253 	}
254 	spin_unlock(&nfs_uuid->lock);
255 }
256 
257 /*
258  * Caller is responsible for calling nfsd_net_put and
259  * nfsd_file_put (via nfs_to_nfsd_file_put_local).
260  */
261 struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid,
262 		   struct rpc_clnt *rpc_clnt, const struct cred *cred,
263 		   const struct nfs_fh *nfs_fh, struct nfs_file_localio *nfl,
264 		   struct nfsd_file __rcu **pnf,
265 		   const fmode_t fmode)
266 {
267 	struct net *net;
268 	struct nfsd_file *localio;
269 
270 	/*
271 	 * Not running in nfsd context, so must safely get reference on nfsd_serv.
272 	 * But the server may already be shutting down, if so disallow new localio.
273 	 * uuid->net is NOT a counted reference, but rcu_read_lock() ensures that
274 	 * if uuid->net is not NULL, then calling nfsd_net_try_get() is safe
275 	 * and if it succeeds we will have an implied reference to the net.
276 	 *
277 	 * Otherwise NFS may not have ref on NFSD and therefore cannot safely
278 	 * make 'nfs_to' calls.
279 	 */
280 	rcu_read_lock();
281 	net = rcu_dereference(uuid->net);
282 	if (!net || !nfs_to->nfsd_net_try_get(net)) {
283 		rcu_read_unlock();
284 		return ERR_PTR(-ENXIO);
285 	}
286 	rcu_read_unlock();
287 	/* We have an implied reference to net thanks to nfsd_net_try_get */
288 	localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt,
289 					     cred, nfs_fh, pnf, fmode);
290 	nfs_to_nfsd_net_put(net);
291 	if (!IS_ERR(localio))
292 		nfs_uuid_add_file(uuid, nfl);
293 
294 	return localio;
295 }
296 EXPORT_SYMBOL_GPL(nfs_open_local_fh);
297 
298 void nfs_close_local_fh(struct nfs_file_localio *nfl)
299 {
300 	nfs_uuid_t *nfs_uuid;
301 
302 	rcu_read_lock();
303 	nfs_uuid = rcu_dereference(nfl->nfs_uuid);
304 	if (!nfs_uuid) {
305 		/* regular (non-LOCALIO) NFS will hammer this */
306 		rcu_read_unlock();
307 		return;
308 	}
309 
310 	spin_lock(&nfs_uuid->lock);
311 	if (!rcu_access_pointer(nfl->nfs_uuid)) {
312 		/* nfs_uuid_put has finished here */
313 		spin_unlock(&nfs_uuid->lock);
314 		rcu_read_unlock();
315 		return;
316 	}
317 	if (list_empty(&nfs_uuid->files)) {
318 		/* nfs_uuid_put() has started closing files, wait for it
319 		 * to finished
320 		 */
321 		spin_unlock(&nfs_uuid->lock);
322 		rcu_read_unlock();
323 		wait_var_event(&nfl->nfs_uuid,
324 			       rcu_access_pointer(nfl->nfs_uuid) == NULL);
325 		return;
326 	}
327 	/* tell nfs_uuid_put() to wait for us */
328 	RCU_INIT_POINTER(nfl->nfs_uuid, NULL);
329 	spin_unlock(&nfs_uuid->lock);
330 	rcu_read_unlock();
331 
332 	nfs_to_nfsd_file_put_local(&nfl->ro_file);
333 	nfs_to_nfsd_file_put_local(&nfl->rw_file);
334 
335 	/* Remove nfl from nfs_uuid->files list and signal nfs_uuid_put()
336 	 * that we are done.  The moment we drop the spinlock the
337 	 * nfs_uuid could be freed.
338 	 */
339 	spin_lock(&nfs_uuid->lock);
340 	list_del_init(&nfl->list);
341 	wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock);
342 	spin_unlock(&nfs_uuid->lock);
343 }
344 EXPORT_SYMBOL_GPL(nfs_close_local_fh);
345 
346 /*
347  * The NFS LOCALIO code needs to call into NFSD using various symbols,
348  * but cannot be statically linked, because that will make the NFS
349  * module always depend on the NFSD module.
350  *
351  * 'nfs_to' provides NFS access to NFSD functions needed for LOCALIO,
352  * its lifetime is tightly coupled to the NFSD module and will always
353  * be available to NFS LOCALIO because any successful client<->server
354  * LOCALIO handshake results in a reference on the NFSD module (above),
355  * so NFS implicitly holds a reference to the NFSD module and its
356  * functions in the 'nfs_to' nfsd_localio_operations cannot disappear.
357  *
358  * If the last NFS client using LOCALIO disconnects (and its reference
359  * on NFSD dropped) then NFSD could be unloaded, resulting in 'nfs_to'
360  * functions being invalid pointers. But if NFSD isn't loaded then NFS
361  * will not be able to handshake with NFSD and will have no cause to
362  * try to call 'nfs_to' function pointers. If/when NFSD is reloaded it
363  * will reinitialize the 'nfs_to' function pointers and make LOCALIO
364  * possible.
365  */
366 const struct nfsd_localio_operations *nfs_to;
367 EXPORT_SYMBOL_GPL(nfs_to);
368