xref: /linux/include/linux/net.h (revision 91a4855d6c03e770e42f17c798a36a3c46e63de2)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * NET		An implementation of the SOCKET network access protocol.
4  *		This is the master header file for the Linux NET layer,
5  *		or, in plain English: the networking handling part of the
6  *		kernel.
7  *
8  * Version:	@(#)net.h	1.0.3	05/25/93
9  *
10  * Authors:	Orest Zborowski, <obz@Kodak.COM>
11  *		Ross Biro
12  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
13  */
14 #ifndef _LINUX_NET_H
15 #define _LINUX_NET_H
16 
17 #include <linux/stringify.h>
18 #include <linux/random.h>
19 #include <linux/wait.h>
20 #include <linux/fcntl.h>	/* For O_CLOEXEC and O_NONBLOCK */
21 #include <linux/rcupdate.h>
22 #include <linux/once.h>
23 #include <linux/fs.h>
24 #include <linux/mm.h>
25 #include <linux/sockptr.h>
26 #include <linux/uio.h>
27 
28 #include <uapi/linux/net.h>
29 
30 /**
31  * struct sockopt - socket option value container
32  * @iter_in: iov_iter for reading optval with the content from the caller.
33  *	     Use copy_from_iter() given this iov direction is ITER_SOURCE
34  * @iter_out: iov_iter for protocols to update optval data to userspace
35  *	      Use _copy_to_iter() given iov direction is ITER_DEST
36  * @optlen: serves as both input (buffer size) and output (returned data size).
37  *
38  * Type-safe wrapper for socket option data that works with both
39  * user and kernel buffers.
40  *
41  * The optlen field allows callbacks to return a specific length value
42  * independent of the bytes written via copy_to_iter().
43  */
44 typedef struct sockopt {
45 	struct iov_iter iter_in;
46 	struct iov_iter iter_out;
47 	int optlen;
48 } sockopt_t;
49 
50 struct poll_table_struct;
51 struct pipe_inode_info;
52 struct inode;
53 struct file;
54 struct net;
55 
56 /* Historically, SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA were located
57  * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected.
58  * Eventually all flags will be in sk->sk_wq->flags.
59  */
60 enum socket_flags {
61 	SOCKWQ_ASYNC_NOSPACE,
62 	SOCKWQ_ASYNC_WAITDATA,
63 	SOCK_NOSPACE,
64 	SOCK_SUPPORT_ZC,
65 	SOCK_CUSTOM_SOCKOPT,
66 };
67 
68 #ifndef ARCH_HAS_SOCKET_TYPES
69 /**
70  * enum sock_type - Socket types
71  * @SOCK_STREAM: stream (connection) socket
72  * @SOCK_DGRAM: datagram (conn.less) socket
73  * @SOCK_RAW: raw socket
74  * @SOCK_RDM: reliably-delivered message
75  * @SOCK_SEQPACKET: sequential packet socket
76  * @SOCK_DCCP: Datagram Congestion Control Protocol socket
77  * @SOCK_PACKET: linux specific way of getting packets at the dev level.
78  *		  For writing rarp and other similar things on the user level.
79  *
80  * When adding some new socket type please
81  * grep ARCH_HAS_SOCKET_TYPE include/asm-* /socket.h, at least MIPS
82  * overrides this enum for binary compat reasons.
83  */
84 enum sock_type {
85 	SOCK_STREAM	= 1,
86 	SOCK_DGRAM	= 2,
87 	SOCK_RAW	= 3,
88 	SOCK_RDM	= 4,
89 	SOCK_SEQPACKET	= 5,
90 	SOCK_DCCP	= 6,
91 	SOCK_PACKET	= 10,
92 };
93 #endif /* ARCH_HAS_SOCKET_TYPES */
94 
95 #define SOCK_MAX (SOCK_PACKET + 1)
96 /* Mask which covers at least up to SOCK_MASK-1.  The
97  * remaining bits are used as flags. */
98 #define SOCK_TYPE_MASK 0xf
99 
100 /* Flags for socket, socketpair, accept4 */
101 #define SOCK_CLOEXEC	O_CLOEXEC
102 #ifndef SOCK_NONBLOCK
103 #define SOCK_NONBLOCK	O_NONBLOCK
104 #endif
105 #define SOCK_COREDUMP	O_NOCTTY
106 
107 /**
108  * enum sock_shutdown_cmd - Shutdown types
109  * @SHUT_RD: shutdown receptions
110  * @SHUT_WR: shutdown transmissions
111  * @SHUT_RDWR: shutdown receptions/transmissions
112  */
113 enum sock_shutdown_cmd {
114 	SHUT_RD,
115 	SHUT_WR,
116 	SHUT_RDWR,
117 };
118 
119 struct socket_wq {
120 	/* Note: wait MUST be first field of socket_wq */
121 	wait_queue_head_t	wait;
122 	struct fasync_struct	*fasync_list;
123 	unsigned long		flags; /* %SOCKWQ_ASYNC_NOSPACE, etc */
124 	struct rcu_head		rcu;
125 } ____cacheline_aligned_in_smp;
126 
127 /**
128  *  struct socket - general BSD socket
129  *  @state: socket state (%SS_CONNECTED, etc)
130  *  @type: socket type (%SOCK_STREAM, etc)
131  *  @flags: socket flags (%SOCK_NOSPACE, etc)
132  *  @ops: protocol specific socket operations
133  *  @file: File back pointer for gc
134  *  @sk: internal networking protocol agnostic socket representation
135  *  @wq: wait queue for several uses
136  */
137 struct socket {
138 	socket_state		state;
139 
140 	short			type;
141 
142 	unsigned long		flags;
143 
144 	struct file		*file;
145 	struct sock		*sk;
146 	const struct proto_ops	*ops; /* Might change with IPV6_ADDRFORM or MPTCP. */
147 
148 	struct socket_wq	wq;
149 };
150 
151 /*
152  * "descriptor" for what we're up to with a read.
153  * This allows us to use the same read code yet
154  * have multiple different users of the data that
155  * we read from a file.
156  *
157  * The simplest case just copies the data to user
158  * mode.
159  */
160 typedef struct {
161 	size_t written;
162 	size_t count;
163 	union {
164 		char __user *buf;
165 		void *data;
166 	} arg;
167 	int error;
168 } read_descriptor_t;
169 
170 struct vm_area_struct;
171 struct page;
172 struct msghdr;
173 struct module;
174 struct sk_buff;
175 struct proto_accept_arg;
176 typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
177 			       unsigned int, size_t);
178 typedef int (*skb_read_actor_t)(struct sock *, struct sk_buff *);
179 
180 
181 struct proto_ops {
182 	int		family;
183 	struct module	*owner;
184 	int		(*release)   (struct socket *sock);
185 	int		(*bind)	     (struct socket *sock,
186 				      struct sockaddr_unsized *myaddr,
187 				      int sockaddr_len);
188 	int		(*connect)   (struct socket *sock,
189 				      struct sockaddr_unsized *vaddr,
190 				      int sockaddr_len, int flags);
191 	int		(*socketpair)(struct socket *sock1,
192 				      struct socket *sock2);
193 	int		(*accept)    (struct socket *sock,
194 				      struct socket *newsock,
195 				      struct proto_accept_arg *arg);
196 	int		(*getname)   (struct socket *sock,
197 				      struct sockaddr *addr,
198 				      int peer);
199 	__poll_t	(*poll)	     (struct file *file, struct socket *sock,
200 				      struct poll_table_struct *wait);
201 	int		(*ioctl)     (struct socket *sock, unsigned int cmd,
202 				      unsigned long arg);
203 #ifdef CONFIG_COMPAT
204 	int	 	(*compat_ioctl) (struct socket *sock, unsigned int cmd,
205 				      unsigned long arg);
206 #endif
207 	int		(*gettstamp) (struct socket *sock, void __user *userstamp,
208 				      bool timeval, bool time32);
209 	int		(*listen)    (struct socket *sock, int len);
210 	int		(*shutdown)  (struct socket *sock, int flags);
211 	int		(*setsockopt)(struct socket *sock, int level,
212 				      int optname, sockptr_t optval,
213 				      unsigned int optlen);
214 	int		(*getsockopt)(struct socket *sock, int level,
215 				      int optname, char __user *optval, int __user *optlen);
216 	int		(*getsockopt_iter)(struct socket *sock, int level,
217 					   int optname, sockopt_t *opt);
218 	void		(*show_fdinfo)(struct seq_file *m, struct socket *sock);
219 	int		(*sendmsg)   (struct socket *sock, struct msghdr *m,
220 				      size_t total_len);
221 	/* Notes for implementing recvmsg:
222 	 * ===============================
223 	 * msg->msg_namelen should get updated by the recvmsg handlers
224 	 * iff msg_name != NULL. It is by default 0 to prevent
225 	 * returning uninitialized memory to user space.  The recvfrom
226 	 * handlers can assume that msg.msg_name is either NULL or has
227 	 * a minimum size of sizeof(struct sockaddr_storage).
228 	 */
229 	int		(*recvmsg)   (struct socket *sock, struct msghdr *m,
230 				      size_t total_len, int flags);
231 	int		(*mmap)	     (struct file *file, struct socket *sock,
232 				      struct vm_area_struct * vma);
233 	ssize_t 	(*splice_read)(struct socket *sock,  loff_t *ppos,
234 				       struct pipe_inode_info *pipe, size_t len, unsigned int flags);
235 	void		(*splice_eof)(struct socket *sock);
236 	int		(*set_peek_off)(struct sock *sk, int val);
237 	int		(*peek_len)(struct socket *sock);
238 
239 	/* The following functions are called internally by kernel with
240 	 * sock lock already held.
241 	 */
242 	int		(*read_sock)(struct sock *sk, read_descriptor_t *desc,
243 				     sk_read_actor_t recv_actor);
244 	/* This is different from read_sock(), it reads an entire skb at a time. */
245 	int		(*read_skb)(struct sock *sk, skb_read_actor_t recv_actor);
246 	int		(*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
247 					  size_t size);
248 	int		(*set_rcvlowat)(struct sock *sk, int val);
249 	void		(*set_rcvbuf)(struct sock *sk, int val);
250 };
251 
252 #define DECLARE_SOCKADDR(type, dst, src)	\
253 	type dst = ({ __sockaddr_check_size(sizeof(*dst)); (type) src; })
254 
255 struct net_proto_family {
256 	int		family;
257 	int		(*create)(struct net *net, struct socket *sock,
258 				  int protocol, int kern);
259 	struct module	*owner;
260 };
261 
262 struct iovec;
263 struct kvec;
264 
265 enum {
266 	SOCK_WAKE_IO,
267 	SOCK_WAKE_WAITD,
268 	SOCK_WAKE_SPACE,
269 	SOCK_WAKE_URG,
270 };
271 
272 int sock_wake_async(struct socket_wq *sk_wq, int how, int band);
273 int sock_register(const struct net_proto_family *fam);
274 void sock_unregister(int family);
275 bool sock_is_registered(int family);
276 int __sock_create(struct net *net, int family, int type, int proto,
277 		  struct socket **res, int kern);
278 int sock_create(int family, int type, int proto, struct socket **res);
279 int sock_create_kern(struct net *net, int family, int type, int proto, struct socket **res);
280 int sock_create_lite(int family, int type, int proto, struct socket **res);
281 struct socket *sock_alloc(void);
282 void sock_release(struct socket *sock);
283 int sock_sendmsg(struct socket *sock, struct msghdr *msg);
284 int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags);
285 struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname);
286 struct socket *sockfd_lookup(int fd, int *err);
287 struct socket *sock_from_file(struct file *file);
288 #define		     sockfd_put(sock) fput(sock->file)
289 int net_ratelimit(void);
290 
291 #define net_ratelimited_function(function, ...)			\
292 do {								\
293 	if (net_ratelimit())					\
294 		function(__VA_ARGS__);				\
295 } while (0)
296 
297 #define net_emerg_ratelimited(fmt, ...)				\
298 	net_ratelimited_function(pr_emerg, fmt, ##__VA_ARGS__)
299 #define net_alert_ratelimited(fmt, ...)				\
300 	net_ratelimited_function(pr_alert, fmt, ##__VA_ARGS__)
301 #define net_crit_ratelimited(fmt, ...)				\
302 	net_ratelimited_function(pr_crit, fmt, ##__VA_ARGS__)
303 #define net_err_ratelimited(fmt, ...)				\
304 	net_ratelimited_function(pr_err, fmt, ##__VA_ARGS__)
305 #define net_notice_ratelimited(fmt, ...)			\
306 	net_ratelimited_function(pr_notice, fmt, ##__VA_ARGS__)
307 #define net_warn_ratelimited(fmt, ...)				\
308 	net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__)
309 #define net_info_ratelimited(fmt, ...)				\
310 	net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__)
311 #if defined(CONFIG_DYNAMIC_DEBUG) || \
312 	(defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
313 #define net_dbg_ratelimited(fmt, ...)					\
314 do {									\
315 	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);			\
316 	if (DYNAMIC_DEBUG_BRANCH(descriptor) &&				\
317 	    net_ratelimit())						\
318 		__dynamic_pr_debug(&descriptor, pr_fmt(fmt),		\
319 		                   ##__VA_ARGS__);			\
320 } while (0)
321 #elif defined(DEBUG)
322 #define net_dbg_ratelimited(fmt, ...)				\
323 	net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__)
324 #else
325 #define net_dbg_ratelimited(fmt, ...)				\
326 	no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
327 #endif
328 
329 #define net_get_random_once(buf, nbytes)			\
330 	get_random_once((buf), (nbytes))
331 #define net_get_random_sleepable_once(buf, nbytes)		\
332 	get_random_sleepable_once((buf), (nbytes))
333 
334 /*
335  * E.g. XFS meta- & log-data is in slab pages, or bcache meta
336  * data pages, or other high order pages allocated by
337  * __get_free_pages() without __GFP_COMP, which have a page_count
338  * of 0 and/or have PageSlab() set. We cannot use send_page for
339  * those, as that does get_page(); put_page(); and would cause
340  * either a VM_BUG directly, or __page_cache_release a page that
341  * would actually still be referenced by someone, leading to some
342  * obscure delayed Oops somewhere else.
343  */
344 static inline bool sendpage_ok(struct page *page)
345 {
346 	return !PageSlab(page) && page_count(page) >= 1;
347 }
348 
349 /*
350  * Check sendpage_ok on contiguous pages.
351  */
352 static inline bool sendpages_ok(struct page *page, size_t len, size_t offset)
353 {
354 	struct page *p = page + (offset >> PAGE_SHIFT);
355 	size_t count = 0;
356 
357 	while (count < len) {
358 		if (!sendpage_ok(p))
359 			return false;
360 
361 		p++;
362 		count += PAGE_SIZE;
363 	}
364 
365 	return true;
366 }
367 
368 int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
369 		   size_t num, size_t len);
370 int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
371 		   size_t num, size_t len, int flags);
372 
373 int kernel_bind(struct socket *sock, struct sockaddr_unsized *addr, int addrlen);
374 int kernel_listen(struct socket *sock, int backlog);
375 int kernel_accept(struct socket *sock, struct socket **newsock, int flags);
376 int kernel_connect(struct socket *sock, struct sockaddr_unsized *addr, int addrlen,
377 		   int flags);
378 int kernel_getsockname(struct socket *sock, struct sockaddr *addr);
379 int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
380 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
381 
382 /* Routine returns the IP overhead imposed by a (caller-protected) socket. */
383 u32 kernel_sock_ip_overhead(struct sock *sk);
384 
385 #define MODULE_ALIAS_NETPROTO(proto) \
386 	MODULE_ALIAS("net-pf-" __stringify(proto))
387 
388 #define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \
389 	MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto))
390 
391 #define MODULE_ALIAS_NET_PF_PROTO_TYPE(pf, proto, type) \
392 	MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
393 		     "-type-" __stringify(type))
394 
395 #define MODULE_ALIAS_NET_PF_PROTO_NAME(pf, proto, name) \
396 	MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
397 		     name)
398 #endif	/* _LINUX_NET_H */
399