xref: /linux/include/net/af_vsock.h (revision 32e940f2bd3b16551f23ea44be47f6f5d1746d64)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * VMware vSockets Driver
4  *
5  * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
6  */
7 
8 #ifndef __AF_VSOCK_H__
9 #define __AF_VSOCK_H__
10 
11 #include <linux/kernel.h>
12 #include <linux/workqueue.h>
13 #include <net/netns/vsock.h>
14 #include <net/sock.h>
15 #include <uapi/linux/vm_sockets.h>
16 
17 #include "vsock_addr.h"
18 
19 #define LAST_RESERVED_PORT 1023
20 
21 #define VSOCK_HASH_SIZE         251
22 extern struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
23 extern struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
24 extern spinlock_t vsock_table_lock;
25 
26 #define vsock_sk(__sk)    ((struct vsock_sock *)__sk)
27 #define sk_vsock(__vsk)   (&(__vsk)->sk)
28 
29 struct vsock_sock {
30 	/* sk must be the first member. */
31 	struct sock sk;
32 	const struct vsock_transport *transport;
33 	struct sockaddr_vm local_addr;
34 	struct sockaddr_vm remote_addr;
35 	/* Links for the global tables of bound and connected sockets. */
36 	struct list_head bound_table;
37 	struct list_head connected_table;
38 	/* Accessed without the socket lock held. This means it can never be
39 	 * modified outsided of socket create or destruct.
40 	 */
41 	bool trusted;
42 	bool cached_peer_allow_dgram;	/* Dgram communication allowed to
43 					 * cached peer?
44 					 */
45 	u32 cached_peer;  /* Context ID of last dgram destination check. */
46 	const struct cred *owner;
47 	/* Rest are SOCK_STREAM only. */
48 	long connect_timeout;
49 	/* Listening socket that this came from. */
50 	struct sock *listener;
51 	/* Used for pending list and accept queue during connection handshake.
52 	 * The listening socket is the head for both lists.  Sockets created
53 	 * for connection requests are placed in the pending list until they
54 	 * are connected, at which point they are put in the accept queue list
55 	 * so they can be accepted in accept().  If accept() cannot accept the
56 	 * connection, it is marked as rejected so the cleanup function knows
57 	 * to clean up the socket.
58 	 */
59 	struct list_head pending_links;
60 	struct list_head accept_queue;
61 	bool rejected;
62 	struct delayed_work connect_work;
63 	struct delayed_work pending_work;
64 	struct delayed_work close_work;
65 	bool close_work_scheduled;
66 	u32 peer_shutdown;
67 	bool sent_request;
68 	bool ignore_connecting_rst;
69 
70 	/* Protected by lock_sock(sk) */
71 	u64 buffer_size;
72 	u64 buffer_min_size;
73 	u64 buffer_max_size;
74 
75 	/* Private to transport. */
76 	void *trans;
77 };
78 
79 s64 vsock_connectible_has_data(struct vsock_sock *vsk);
80 s64 vsock_stream_has_data(struct vsock_sock *vsk);
81 s64 vsock_stream_has_space(struct vsock_sock *vsk);
82 struct sock *vsock_create_connected(struct sock *parent);
83 void vsock_data_ready(struct sock *sk);
84 
85 /**** TRANSPORT ****/
86 
87 struct vsock_transport_recv_notify_data {
88 	u64 data1; /* Transport-defined. */
89 	u64 data2; /* Transport-defined. */
90 	bool notify_on_block;
91 };
92 
93 struct vsock_transport_send_notify_data {
94 	u64 data1; /* Transport-defined. */
95 	u64 data2; /* Transport-defined. */
96 };
97 
98 /* Transport features flags */
99 /* Transport provides host->guest communication */
100 #define VSOCK_TRANSPORT_F_H2G		0x00000001
101 /* Transport provides guest->host communication */
102 #define VSOCK_TRANSPORT_F_G2H		0x00000002
103 /* Transport provides DGRAM communication */
104 #define VSOCK_TRANSPORT_F_DGRAM		0x00000004
105 /* Transport provides local (loopback) communication */
106 #define VSOCK_TRANSPORT_F_LOCAL		0x00000008
107 
108 struct vsock_transport {
109 	struct module *module;
110 
111 	/* Initialize/tear-down socket. */
112 	int (*init)(struct vsock_sock *, struct vsock_sock *);
113 	void (*destruct)(struct vsock_sock *);
114 	void (*release)(struct vsock_sock *);
115 
116 	/* Cancel all pending packets sent on vsock. */
117 	int (*cancel_pkt)(struct vsock_sock *vsk);
118 
119 	/* Connections. */
120 	int (*connect)(struct vsock_sock *);
121 
122 	/* DGRAM. */
123 	int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *);
124 	int (*dgram_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
125 			     size_t len, int flags);
126 	int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *,
127 			     struct msghdr *, size_t len);
128 	bool (*dgram_allow)(struct vsock_sock *vsk, u32 cid, u32 port);
129 
130 	/* STREAM. */
131 	/* TODO: stream_bind() */
132 	ssize_t (*stream_dequeue)(struct vsock_sock *, struct msghdr *,
133 				  size_t len, int flags);
134 	ssize_t (*stream_enqueue)(struct vsock_sock *, struct msghdr *,
135 				  size_t len);
136 	s64 (*stream_has_data)(struct vsock_sock *);
137 	s64 (*stream_has_space)(struct vsock_sock *);
138 	u64 (*stream_rcvhiwat)(struct vsock_sock *);
139 	bool (*stream_is_active)(struct vsock_sock *);
140 	bool (*stream_allow)(struct vsock_sock *vsk, u32 cid, u32 port);
141 
142 	/* SEQ_PACKET. */
143 	ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
144 				     int flags);
145 	int (*seqpacket_enqueue)(struct vsock_sock *vsk, struct msghdr *msg,
146 				 size_t len);
147 	bool (*seqpacket_allow)(struct vsock_sock *vsk, u32 remote_cid);
148 	u32 (*seqpacket_has_data)(struct vsock_sock *vsk);
149 
150 	/* Notification. */
151 	int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
152 	int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
153 	int (*notify_recv_init)(struct vsock_sock *, size_t,
154 		struct vsock_transport_recv_notify_data *);
155 	int (*notify_recv_pre_block)(struct vsock_sock *, size_t,
156 		struct vsock_transport_recv_notify_data *);
157 	int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t,
158 		struct vsock_transport_recv_notify_data *);
159 	int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t,
160 		ssize_t, bool, struct vsock_transport_recv_notify_data *);
161 	int (*notify_send_init)(struct vsock_sock *,
162 		struct vsock_transport_send_notify_data *);
163 	int (*notify_send_pre_block)(struct vsock_sock *,
164 		struct vsock_transport_send_notify_data *);
165 	int (*notify_send_pre_enqueue)(struct vsock_sock *,
166 		struct vsock_transport_send_notify_data *);
167 	int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t,
168 		struct vsock_transport_send_notify_data *);
169 	/* sk_lock held by the caller */
170 	void (*notify_buffer_size)(struct vsock_sock *, u64 *);
171 	int (*notify_set_rcvlowat)(struct vsock_sock *vsk, int val);
172 
173 	/* SIOCOUTQ ioctl */
174 	ssize_t (*unsent_bytes)(struct vsock_sock *vsk);
175 
176 	/* Shutdown. */
177 	int (*shutdown)(struct vsock_sock *, int);
178 
179 	/* Addressing. */
180 	u32 (*get_local_cid)(void);
181 
182 	/* Check if this transport serves a specific remote CID.
183 	 * For H2G transports: return true if the CID belongs to a registered
184 	 * guest. If not implemented, all CIDs > VMADDR_CID_HOST go to H2G.
185 	 * For G2H transports: return true if the transport can reach arbitrary
186 	 * CIDs via the hypervisor (i.e. supports the fallback overlay). VMCI
187 	 * does not implement this as it only serves CIDs 0 and 2.
188 	 */
189 	bool (*has_remote_cid)(struct vsock_sock *vsk, u32 remote_cid);
190 
191 	/* Read a single skb */
192 	int (*read_skb)(struct vsock_sock *, skb_read_actor_t);
193 
194 	/* Zero-copy. */
195 	bool (*msgzerocopy_allow)(void);
196 };
197 
198 /**** CORE ****/
199 
200 int vsock_core_register(const struct vsock_transport *t, int features);
201 void vsock_core_unregister(const struct vsock_transport *t);
202 
203 /* The transport may downcast this to access transport-specific functions */
204 const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk);
205 
206 /**** UTILS ****/
207 
208 /* vsock_table_lock must be held */
__vsock_in_bound_table(struct vsock_sock * vsk)209 static inline bool __vsock_in_bound_table(struct vsock_sock *vsk)
210 {
211 	return !list_empty(&vsk->bound_table);
212 }
213 
214 /* vsock_table_lock must be held */
__vsock_in_connected_table(struct vsock_sock * vsk)215 static inline bool __vsock_in_connected_table(struct vsock_sock *vsk)
216 {
217 	return !list_empty(&vsk->connected_table);
218 }
219 
220 void vsock_add_pending(struct sock *listener, struct sock *pending);
221 void vsock_remove_pending(struct sock *listener, struct sock *pending);
222 void vsock_enqueue_accept(struct sock *listener, struct sock *connected);
223 void vsock_insert_connected(struct vsock_sock *vsk);
224 void vsock_remove_bound(struct vsock_sock *vsk);
225 void vsock_remove_connected(struct vsock_sock *vsk);
226 struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
227 struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
228 					 struct sockaddr_vm *dst);
229 struct sock *vsock_find_bound_socket_net(struct sockaddr_vm *addr,
230 					 struct net *net);
231 struct sock *vsock_find_connected_socket_net(struct sockaddr_vm *src,
232 					     struct sockaddr_vm *dst,
233 					     struct net *net);
234 void vsock_remove_sock(struct vsock_sock *vsk);
235 void vsock_for_each_connected_socket(struct vsock_transport *transport,
236 				     void (*fn)(struct sock *sk));
237 int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk);
238 bool vsock_find_cid(unsigned int cid);
239 void vsock_linger(struct sock *sk);
240 
241 /**** TAP ****/
242 
243 struct vsock_tap {
244 	struct net_device *dev;
245 	struct module *module;
246 	struct list_head list;
247 };
248 
249 int vsock_add_tap(struct vsock_tap *vt);
250 int vsock_remove_tap(struct vsock_tap *vt);
251 void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque);
252 int __vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
253 				int flags);
254 int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
255 			      int flags);
256 int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
257 			  size_t len, int flags);
258 int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
259 			size_t len, int flags);
260 
261 extern struct proto vsock_proto;
262 #ifdef CONFIG_BPF_SYSCALL
263 int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
264 void __init vsock_bpf_build_proto(void);
265 #else
vsock_bpf_build_proto(void)266 static inline void __init vsock_bpf_build_proto(void)
267 {}
268 #endif
269 
vsock_msgzerocopy_allow(const struct vsock_transport * t)270 static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t)
271 {
272 	return t->msgzerocopy_allow && t->msgzerocopy_allow();
273 }
274 
vsock_net_mode(struct net * net)275 static inline enum vsock_net_mode vsock_net_mode(struct net *net)
276 {
277 	if (!net)
278 		return VSOCK_NET_MODE_GLOBAL;
279 
280 	return READ_ONCE(net->vsock.mode);
281 }
282 
vsock_net_mode_global(struct vsock_sock * vsk)283 static inline bool vsock_net_mode_global(struct vsock_sock *vsk)
284 {
285 	return vsock_net_mode(sock_net(sk_vsock(vsk))) == VSOCK_NET_MODE_GLOBAL;
286 }
287 
vsock_net_set_child_mode(struct net * net,enum vsock_net_mode mode)288 static inline bool vsock_net_set_child_mode(struct net *net,
289 					    enum vsock_net_mode mode)
290 {
291 	int new_locked = mode + 1;
292 	int old_locked = 0; /* unlocked */
293 
294 	if (try_cmpxchg(&net->vsock.child_ns_mode_locked,
295 			&old_locked, new_locked)) {
296 		WRITE_ONCE(net->vsock.child_ns_mode, mode);
297 		return true;
298 	}
299 
300 	return old_locked == new_locked;
301 }
302 
vsock_net_child_mode(struct net * net)303 static inline enum vsock_net_mode vsock_net_child_mode(struct net *net)
304 {
305 	return READ_ONCE(net->vsock.child_ns_mode);
306 }
307 
308 /* Return true if two namespaces pass the mode rules. Otherwise, return false.
309  *
310  * A NULL namespace is treated as VSOCK_NET_MODE_GLOBAL.
311  *
312  * Read more about modes in the comment header of net/vmw_vsock/af_vsock.c.
313  */
vsock_net_check_mode(struct net * ns0,struct net * ns1)314 static inline bool vsock_net_check_mode(struct net *ns0, struct net *ns1)
315 {
316 	enum vsock_net_mode mode0, mode1;
317 
318 	/* Any vsocks within the same network namespace are always reachable,
319 	 * regardless of the mode.
320 	 */
321 	if (net_eq(ns0, ns1))
322 		return true;
323 
324 	mode0 = vsock_net_mode(ns0);
325 	mode1 = vsock_net_mode(ns1);
326 
327 	/* Different namespaces are only reachable if they are both
328 	 * global mode.
329 	 */
330 	return mode0 == VSOCK_NET_MODE_GLOBAL && mode0 == mode1;
331 }
332 #endif /* __AF_VSOCK_H__ */
333