xref: /linux/include/net/sock.h (revision eea86af6b1e18d6fa8dc959e3ddc0100f27aff9f)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		Definitions for the AF_INET socket handler.
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * Version:	@(#)sock.h	1.0.4	05/13/93
91da177e4SLinus Torvalds  *
1002c30a84SJesper Juhl  * Authors:	Ross Biro
111da177e4SLinus Torvalds  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
121da177e4SLinus Torvalds  *		Corey Minyard <wf-rch!minyard@relay.EU.net>
131da177e4SLinus Torvalds  *		Florian La Roche <flla@stud.uni-sb.de>
141da177e4SLinus Torvalds  *
151da177e4SLinus Torvalds  * Fixes:
161da177e4SLinus Torvalds  *		Alan Cox	:	Volatiles in skbuff pointers. See
171da177e4SLinus Torvalds  *					skbuff comments. May be overdone,
181da177e4SLinus Torvalds  *					better to prove they can be removed
191da177e4SLinus Torvalds  *					than the reverse.
201da177e4SLinus Torvalds  *		Alan Cox	:	Added a zapped field for tcp to note
211da177e4SLinus Torvalds  *					a socket is reset and must stay shut up
221da177e4SLinus Torvalds  *		Alan Cox	:	New fields for options
231da177e4SLinus Torvalds  *	Pauline Middelink	:	identd support
241da177e4SLinus Torvalds  *		Alan Cox	:	Eliminate low level recv/recvfrom
251da177e4SLinus Torvalds  *		David S. Miller	:	New socket lookup architecture.
261da177e4SLinus Torvalds  *              Steve Whitehouse:       Default routines for sock_ops
271da177e4SLinus Torvalds  *              Arnaldo C. Melo :	removed net_pinfo, tp_pinfo and made
281da177e4SLinus Torvalds  *              			protinfo be just a void pointer, as the
291da177e4SLinus Torvalds  *              			protocol specific parts were moved to
301da177e4SLinus Torvalds  *              			respective headers and ipv4/v6, etc now
311da177e4SLinus Torvalds  *              			use private slabcaches for its socks
321da177e4SLinus Torvalds  *              Pedro Hortas	:	New flags field for socket options
331da177e4SLinus Torvalds  *
341da177e4SLinus Torvalds  *
351da177e4SLinus Torvalds  *		This program is free software; you can redistribute it and/or
361da177e4SLinus Torvalds  *		modify it under the terms of the GNU General Public License
371da177e4SLinus Torvalds  *		as published by the Free Software Foundation; either version
381da177e4SLinus Torvalds  *		2 of the License, or (at your option) any later version.
391da177e4SLinus Torvalds  */
401da177e4SLinus Torvalds #ifndef _SOCK_H
411da177e4SLinus Torvalds #define _SOCK_H
421da177e4SLinus Torvalds 
43a6b7a407SAlexey Dobriyan #include <linux/hardirq.h>
44172589ccSIlpo Järvinen #include <linux/kernel.h>
451da177e4SLinus Torvalds #include <linux/list.h>
4688ab1932SEric Dumazet #include <linux/list_nulls.h>
471da177e4SLinus Torvalds #include <linux/timer.h>
481da177e4SLinus Torvalds #include <linux/cache.h>
493f134619SGlauber Costa #include <linux/bitops.h>
50a5b5bb9aSIngo Molnar #include <linux/lockdep.h>
511da177e4SLinus Torvalds #include <linux/netdevice.h>
521da177e4SLinus Torvalds #include <linux/skbuff.h>	/* struct sk_buff */
53d7fe0f24SAl Viro #include <linux/mm.h>
541da177e4SLinus Torvalds #include <linux/security.h>
555a0e3ad6STejun Heo #include <linux/slab.h>
56c6e1a0d1STom Herbert #include <linux/uaccess.h>
57180d8cd9SGlauber Costa #include <linux/memcontrol.h>
58e1aab161SGlauber Costa #include <linux/res_counter.h>
59c5905afbSIngo Molnar #include <linux/static_key.h>
6040401530SAl Viro #include <linux/aio.h>
6140401530SAl Viro #include <linux/sched.h>
621da177e4SLinus Torvalds 
631da177e4SLinus Torvalds #include <linux/filter.h>
6488ab1932SEric Dumazet #include <linux/rculist_nulls.h>
65a57de0b4SJiri Olsa #include <linux/poll.h>
661da177e4SLinus Torvalds 
67c31504dcSEric Dumazet #include <linux/atomic.h>
681da177e4SLinus Torvalds #include <net/dst.h>
691da177e4SLinus Torvalds #include <net/checksum.h>
701da177e4SLinus Torvalds 
719f048bfbSEric Dumazet struct cgroup;
729f048bfbSEric Dumazet struct cgroup_subsys;
73c607b2edSGlauber Costa #ifdef CONFIG_NET
741d62e436SGlauber Costa int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss);
751d62e436SGlauber Costa void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg);
76c607b2edSGlauber Costa #else
77c607b2edSGlauber Costa static inline
781d62e436SGlauber Costa int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
79c607b2edSGlauber Costa {
80c607b2edSGlauber Costa 	return 0;
81c607b2edSGlauber Costa }
82c607b2edSGlauber Costa static inline
831d62e436SGlauber Costa void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
84c607b2edSGlauber Costa {
85c607b2edSGlauber Costa }
86c607b2edSGlauber Costa #endif
871da177e4SLinus Torvalds /*
881da177e4SLinus Torvalds  * This structure really needs to be cleaned up.
891da177e4SLinus Torvalds  * Most of it is for TCP, and not used by any of
901da177e4SLinus Torvalds  * the other protocols.
911da177e4SLinus Torvalds  */
921da177e4SLinus Torvalds 
931da177e4SLinus Torvalds /* Define this to get the SOCK_DBG debugging facility. */
941da177e4SLinus Torvalds #define SOCK_DEBUGGING
951da177e4SLinus Torvalds #ifdef SOCK_DEBUGGING
961da177e4SLinus Torvalds #define SOCK_DEBUG(sk, msg...) do { if ((sk) && sock_flag((sk), SOCK_DBG)) \
971da177e4SLinus Torvalds 					printk(KERN_DEBUG msg); } while (0)
981da177e4SLinus Torvalds #else
994cd9029dSStephen Hemminger /* Validate arguments and do nothing */
100b9075fa9SJoe Perches static inline __printf(2, 3)
101dc6b9b78SEric Dumazet void SOCK_DEBUG(const struct sock *sk, const char *msg, ...)
1024cd9029dSStephen Hemminger {
1034cd9029dSStephen Hemminger }
1041da177e4SLinus Torvalds #endif
1051da177e4SLinus Torvalds 
1061da177e4SLinus Torvalds /* This is the per-socket lock.  The spinlock provides a synchronization
1071da177e4SLinus Torvalds  * between user contexts and software interrupt processing, whereas the
1081da177e4SLinus Torvalds  * mini-semaphore synchronizes multiple users amongst themselves.
1091da177e4SLinus Torvalds  */
1101da177e4SLinus Torvalds typedef struct {
1111da177e4SLinus Torvalds 	spinlock_t		slock;
112d2e9117cSJohn Heffner 	int			owned;
1131da177e4SLinus Torvalds 	wait_queue_head_t	wq;
114a5b5bb9aSIngo Molnar 	/*
115a5b5bb9aSIngo Molnar 	 * We express the mutex-alike socket_lock semantics
116a5b5bb9aSIngo Molnar 	 * to the lock validator by explicitly managing
117a5b5bb9aSIngo Molnar 	 * the slock as a lock variant (in addition to
118a5b5bb9aSIngo Molnar 	 * the slock itself):
119a5b5bb9aSIngo Molnar 	 */
120a5b5bb9aSIngo Molnar #ifdef CONFIG_DEBUG_LOCK_ALLOC
121a5b5bb9aSIngo Molnar 	struct lockdep_map dep_map;
122a5b5bb9aSIngo Molnar #endif
1231da177e4SLinus Torvalds } socket_lock_t;
1241da177e4SLinus Torvalds 
1251da177e4SLinus Torvalds struct sock;
1268feaf0c0SArnaldo Carvalho de Melo struct proto;
1270eeb8ffcSDenis V. Lunev struct net;
1281da177e4SLinus Torvalds 
129077b393dSEric Dumazet typedef __u32 __bitwise __portpair;
130077b393dSEric Dumazet typedef __u64 __bitwise __addrpair;
131077b393dSEric Dumazet 
1321da177e4SLinus Torvalds /**
1331da177e4SLinus Torvalds  *	struct sock_common - minimal network layer representation of sockets
13468835abaSEric Dumazet  *	@skc_daddr: Foreign IPv4 addr
13568835abaSEric Dumazet  *	@skc_rcv_saddr: Bound local IPv4 addr
1364dc6dc71SEric Dumazet  *	@skc_hash: hash value used with various protocol lookup tables
137d4cada4aSEric Dumazet  *	@skc_u16hashes: two u16 hash values used by UDP lookup tables
138ce43b03eSEric Dumazet  *	@skc_dport: placeholder for inet_dport/tw_dport
139ce43b03eSEric Dumazet  *	@skc_num: placeholder for inet_num/tw_num
1404dc3b16bSPavel Pisa  *	@skc_family: network address family
1414dc3b16bSPavel Pisa  *	@skc_state: Connection state
1424dc3b16bSPavel Pisa  *	@skc_reuse: %SO_REUSEADDR setting
143055dc21aSTom Herbert  *	@skc_reuseport: %SO_REUSEPORT setting
1444dc3b16bSPavel Pisa  *	@skc_bound_dev_if: bound device index if != 0
1454dc3b16bSPavel Pisa  *	@skc_bind_node: bind hash linkage for various protocol lookup tables
146512615b6SEric Dumazet  *	@skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
1478feaf0c0SArnaldo Carvalho de Melo  *	@skc_prot: protocol handlers inside a network family
14807feaebfSEric W. Biederman  *	@skc_net: reference to the network namespace of this socket
14968835abaSEric Dumazet  *	@skc_node: main hash linkage for various protocol lookup tables
15068835abaSEric Dumazet  *	@skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
15168835abaSEric Dumazet  *	@skc_tx_queue_mapping: tx queue number for this connection
15268835abaSEric Dumazet  *	@skc_refcnt: reference count
1531da177e4SLinus Torvalds  *
1541da177e4SLinus Torvalds  *	This is the minimal network layer representation of sockets, the header
1558feaf0c0SArnaldo Carvalho de Melo  *	for struct sock and struct inet_timewait_sock.
1561da177e4SLinus Torvalds  */
1571da177e4SLinus Torvalds struct sock_common {
158ce43b03eSEric Dumazet 	/* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned
159ce43b03eSEric Dumazet 	 * address on 64bit arches : cf INET_MATCH() and INET_TW_MATCH()
1604dc6dc71SEric Dumazet 	 */
161ce43b03eSEric Dumazet 	union {
162077b393dSEric Dumazet 		__addrpair	skc_addrpair;
163ce43b03eSEric Dumazet 		struct {
16468835abaSEric Dumazet 			__be32	skc_daddr;
16568835abaSEric Dumazet 			__be32	skc_rcv_saddr;
166ce43b03eSEric Dumazet 		};
167ce43b03eSEric Dumazet 	};
168d4cada4aSEric Dumazet 	union  {
16981c3d547SEric Dumazet 		unsigned int	skc_hash;
170d4cada4aSEric Dumazet 		__u16		skc_u16hashes[2];
171d4cada4aSEric Dumazet 	};
172ce43b03eSEric Dumazet 	/* skc_dport && skc_num must be grouped as well */
173ce43b03eSEric Dumazet 	union {
174077b393dSEric Dumazet 		__portpair	skc_portpair;
175ce43b03eSEric Dumazet 		struct {
176ce43b03eSEric Dumazet 			__be16	skc_dport;
177ce43b03eSEric Dumazet 			__u16	skc_num;
178ce43b03eSEric Dumazet 		};
179ce43b03eSEric Dumazet 	};
180ce43b03eSEric Dumazet 
1814dc6dc71SEric Dumazet 	unsigned short		skc_family;
1824dc6dc71SEric Dumazet 	volatile unsigned char	skc_state;
183055dc21aSTom Herbert 	unsigned char		skc_reuse:4;
184055dc21aSTom Herbert 	unsigned char		skc_reuseport:4;
1854dc6dc71SEric Dumazet 	int			skc_bound_dev_if;
186512615b6SEric Dumazet 	union {
1874dc6dc71SEric Dumazet 		struct hlist_node	skc_bind_node;
188512615b6SEric Dumazet 		struct hlist_nulls_node skc_portaddr_node;
189512615b6SEric Dumazet 	};
1908feaf0c0SArnaldo Carvalho de Melo 	struct proto		*skc_prot;
1913b1e0a65SYOSHIFUJI Hideaki #ifdef CONFIG_NET_NS
19207feaebfSEric W. Biederman 	struct net	 	*skc_net;
1933b1e0a65SYOSHIFUJI Hideaki #endif
19468835abaSEric Dumazet 	/*
19568835abaSEric Dumazet 	 * fields between dontcopy_begin/dontcopy_end
19668835abaSEric Dumazet 	 * are not copied in sock_copy()
19768835abaSEric Dumazet 	 */
198928c41e7SRandy Dunlap 	/* private: */
19968835abaSEric Dumazet 	int			skc_dontcopy_begin[0];
200928c41e7SRandy Dunlap 	/* public: */
20168835abaSEric Dumazet 	union {
20268835abaSEric Dumazet 		struct hlist_node	skc_node;
20368835abaSEric Dumazet 		struct hlist_nulls_node skc_nulls_node;
20468835abaSEric Dumazet 	};
20568835abaSEric Dumazet 	int			skc_tx_queue_mapping;
20668835abaSEric Dumazet 	atomic_t		skc_refcnt;
207928c41e7SRandy Dunlap 	/* private: */
20868835abaSEric Dumazet 	int                     skc_dontcopy_end[0];
209928c41e7SRandy Dunlap 	/* public: */
2101da177e4SLinus Torvalds };
2111da177e4SLinus Torvalds 
212e1aab161SGlauber Costa struct cg_proto;
2131da177e4SLinus Torvalds /**
2141da177e4SLinus Torvalds   *	struct sock - network layer representation of sockets
2158feaf0c0SArnaldo Carvalho de Melo   *	@__sk_common: shared layout with inet_timewait_sock
2164dc3b16bSPavel Pisa   *	@sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
2174dc3b16bSPavel Pisa   *	@sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
2184dc3b16bSPavel Pisa   *	@sk_lock:	synchronizer
2194dc3b16bSPavel Pisa   *	@sk_rcvbuf: size of receive buffer in bytes
22043815482SEric Dumazet   *	@sk_wq: sock wait queue and async head
221deaa5854SEric Dumazet   *	@sk_rx_dst: receive input route used by early tcp demux
2224dc3b16bSPavel Pisa   *	@sk_dst_cache: destination cache
2234dc3b16bSPavel Pisa   *	@sk_dst_lock: destination cache lock
2244dc3b16bSPavel Pisa   *	@sk_policy: flow policy
2254dc3b16bSPavel Pisa   *	@sk_receive_queue: incoming packets
2264dc3b16bSPavel Pisa   *	@sk_wmem_alloc: transmit queue bytes committed
2274dc3b16bSPavel Pisa   *	@sk_write_queue: Packet sending queue
22897fc2f08SChris Leech   *	@sk_async_wait_queue: DMA copied packets
2294dc3b16bSPavel Pisa   *	@sk_omem_alloc: "o" is "option" or "other"
2304dc3b16bSPavel Pisa   *	@sk_wmem_queued: persistent queue size
2314dc3b16bSPavel Pisa   *	@sk_forward_alloc: space allocated forward
23206021292SEliezer Tamir   *	@sk_napi_id: id of the last napi context to receive data for sk
233dafcc438SEliezer Tamir   *	@sk_ll_usec: usecs to busypoll when there is no data
2344dc3b16bSPavel Pisa   *	@sk_allocation: allocation mode
2354dc3b16bSPavel Pisa   *	@sk_sndbuf: size of send buffer in bytes
23633c732c3SWang Chen   *	@sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
23720d49473SPatrick Ohly   *		   %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
23848fc7f7eSAdam Buchbinder   *	@sk_no_check: %SO_NO_CHECK setting, whether or not checkup packets
2394dc3b16bSPavel Pisa   *	@sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
240a465419bSEric Dumazet   *	@sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
241bcd76111SHerbert Xu   *	@sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
24282cc1a7aSPeter P Waskiewicz Jr   *	@sk_gso_max_size: Maximum GSO segment size to build
2431485348dSBen Hutchings   *	@sk_gso_max_segs: Maximum number of GSO segments
2444dc3b16bSPavel Pisa   *	@sk_lingertime: %SO_LINGER l_linger setting
2454dc3b16bSPavel Pisa   *	@sk_backlog: always used with the per-socket spinlock held
2464dc3b16bSPavel Pisa   *	@sk_callback_lock: used with the callbacks in the end of this struct
2474dc3b16bSPavel Pisa   *	@sk_error_queue: rarely used
24833c732c3SWang Chen   *	@sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt,
24933c732c3SWang Chen   *			  IPV6_ADDRFORM for instance)
2504dc3b16bSPavel Pisa   *	@sk_err: last error
25133c732c3SWang Chen   *	@sk_err_soft: errors that don't cause failure but are the cause of a
25233c732c3SWang Chen   *		      persistent failure not just 'timed out'
253cb61cb9bSEric Dumazet   *	@sk_drops: raw/udp drops counter
2544dc3b16bSPavel Pisa   *	@sk_ack_backlog: current listen backlog
2554dc3b16bSPavel Pisa   *	@sk_max_ack_backlog: listen backlog set in listen()
2564dc3b16bSPavel Pisa   *	@sk_priority: %SO_PRIORITY setting
2571a3bc369SRandy Dunlap   *	@sk_cgrp_prioidx: socket group's priority map index
2584dc3b16bSPavel Pisa   *	@sk_type: socket type (%SOCK_STREAM, etc)
2594dc3b16bSPavel Pisa   *	@sk_protocol: which protocol this socket belongs in this network family
26053c3fa20SRandy Dunlap   *	@sk_peer_pid: &struct pid for this socket's peer
26153c3fa20SRandy Dunlap   *	@sk_peer_cred: %SO_PEERCRED setting
2624dc3b16bSPavel Pisa   *	@sk_rcvlowat: %SO_RCVLOWAT setting
2634dc3b16bSPavel Pisa   *	@sk_rcvtimeo: %SO_RCVTIMEO setting
2644dc3b16bSPavel Pisa   *	@sk_sndtimeo: %SO_SNDTIMEO setting
265c58dc01bSDavid S. Miller   *	@sk_rxhash: flow hash received from netif layer
2664dc3b16bSPavel Pisa   *	@sk_filter: socket filtering instructions
2674dc3b16bSPavel Pisa   *	@sk_protinfo: private area, net family specific, when not using slab
2684dc3b16bSPavel Pisa   *	@sk_timer: sock cleanup timer
2694dc3b16bSPavel Pisa   *	@sk_stamp: time stamp of last packet received
2704dc3b16bSPavel Pisa   *	@sk_socket: Identd and reporting IO signals
2714dc3b16bSPavel Pisa   *	@sk_user_data: RPC layer private data
2725640f768SEric Dumazet   *	@sk_frag: cached page frag
273d3d4f0a0SRandy Dunlap   *	@sk_peek_off: current peek_offset value
2744dc3b16bSPavel Pisa   *	@sk_send_head: front of stuff to transmit
27567be2dd1SMartin Waitz   *	@sk_security: used by security modules
27631729363SRandy Dunlap   *	@sk_mark: generic packet mark
27753c3fa20SRandy Dunlap   *	@sk_classid: this socket's cgroup classid
278e1aab161SGlauber Costa   *	@sk_cgrp: this socket's cgroup-specific proto data
2794dc3b16bSPavel Pisa   *	@sk_write_pending: a write to stream socket waits to start
2804dc3b16bSPavel Pisa   *	@sk_state_change: callback to indicate change in the state of the sock
2814dc3b16bSPavel Pisa   *	@sk_data_ready: callback to indicate there is data to be processed
2824dc3b16bSPavel Pisa   *	@sk_write_space: callback to indicate there is bf sending space available
2834dc3b16bSPavel Pisa   *	@sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE)
2844dc3b16bSPavel Pisa   *	@sk_backlog_rcv: callback to process the backlog
2854dc3b16bSPavel Pisa   *	@sk_destruct: called at sock freeing time, i.e. when all refcnt == 0
2861da177e4SLinus Torvalds  */
2871da177e4SLinus Torvalds struct sock {
2881da177e4SLinus Torvalds 	/*
2898feaf0c0SArnaldo Carvalho de Melo 	 * Now struct inet_timewait_sock also uses sock_common, so please just
2901da177e4SLinus Torvalds 	 * don't add nothing before this first member (__sk_common) --acme
2911da177e4SLinus Torvalds 	 */
2921da177e4SLinus Torvalds 	struct sock_common	__sk_common;
2934dc6dc71SEric Dumazet #define sk_node			__sk_common.skc_node
2944dc6dc71SEric Dumazet #define sk_nulls_node		__sk_common.skc_nulls_node
2954dc6dc71SEric Dumazet #define sk_refcnt		__sk_common.skc_refcnt
296e022f0b4SKrishna Kumar #define sk_tx_queue_mapping	__sk_common.skc_tx_queue_mapping
2974dc6dc71SEric Dumazet 
29868835abaSEric Dumazet #define sk_dontcopy_begin	__sk_common.skc_dontcopy_begin
29968835abaSEric Dumazet #define sk_dontcopy_end		__sk_common.skc_dontcopy_end
3004dc6dc71SEric Dumazet #define sk_hash			__sk_common.skc_hash
3011da177e4SLinus Torvalds #define sk_family		__sk_common.skc_family
3021da177e4SLinus Torvalds #define sk_state		__sk_common.skc_state
3031da177e4SLinus Torvalds #define sk_reuse		__sk_common.skc_reuse
304055dc21aSTom Herbert #define sk_reuseport		__sk_common.skc_reuseport
3051da177e4SLinus Torvalds #define sk_bound_dev_if		__sk_common.skc_bound_dev_if
3061da177e4SLinus Torvalds #define sk_bind_node		__sk_common.skc_bind_node
3078feaf0c0SArnaldo Carvalho de Melo #define sk_prot			__sk_common.skc_prot
30807feaebfSEric W. Biederman #define sk_net			__sk_common.skc_net
309b178bb3dSEric Dumazet 	socket_lock_t		sk_lock;
310b178bb3dSEric Dumazet 	struct sk_buff_head	sk_receive_queue;
311b178bb3dSEric Dumazet 	/*
312b178bb3dSEric Dumazet 	 * The backlog queue is special, it is always used with
313b178bb3dSEric Dumazet 	 * the per-socket spinlock held and requires low latency
314b178bb3dSEric Dumazet 	 * access. Therefore we special case it's implementation.
315b178bb3dSEric Dumazet 	 * Note : rmem_alloc is in this structure to fill a hole
316b178bb3dSEric Dumazet 	 * on 64bit arches, not because its logically part of
317b178bb3dSEric Dumazet 	 * backlog.
318b178bb3dSEric Dumazet 	 */
319b178bb3dSEric Dumazet 	struct {
320b178bb3dSEric Dumazet 		atomic_t	rmem_alloc;
321b178bb3dSEric Dumazet 		int		len;
322b178bb3dSEric Dumazet 		struct sk_buff	*head;
323b178bb3dSEric Dumazet 		struct sk_buff	*tail;
324b178bb3dSEric Dumazet 	} sk_backlog;
325b178bb3dSEric Dumazet #define sk_rmem_alloc sk_backlog.rmem_alloc
326b178bb3dSEric Dumazet 	int			sk_forward_alloc;
327b178bb3dSEric Dumazet #ifdef CONFIG_RPS
328b178bb3dSEric Dumazet 	__u32			sk_rxhash;
329b178bb3dSEric Dumazet #endif
33006021292SEliezer Tamir #ifdef CONFIG_NET_LL_RX_POLL
33106021292SEliezer Tamir 	unsigned int		sk_napi_id;
332dafcc438SEliezer Tamir 	unsigned int		sk_ll_usec;
33306021292SEliezer Tamir #endif
334b178bb3dSEric Dumazet 	atomic_t		sk_drops;
335b178bb3dSEric Dumazet 	int			sk_rcvbuf;
336b178bb3dSEric Dumazet 
337b178bb3dSEric Dumazet 	struct sk_filter __rcu	*sk_filter;
338eaefd110SEric Dumazet 	struct socket_wq __rcu	*sk_wq;
339b178bb3dSEric Dumazet 
340b178bb3dSEric Dumazet #ifdef CONFIG_NET_DMA
341b178bb3dSEric Dumazet 	struct sk_buff_head	sk_async_wait_queue;
342b178bb3dSEric Dumazet #endif
343b178bb3dSEric Dumazet 
344b178bb3dSEric Dumazet #ifdef CONFIG_XFRM
345b178bb3dSEric Dumazet 	struct xfrm_policy	*sk_policy[2];
346b178bb3dSEric Dumazet #endif
347b178bb3dSEric Dumazet 	unsigned long 		sk_flags;
348deaa5854SEric Dumazet 	struct dst_entry	*sk_rx_dst;
3490e36cbb3SCong Wang 	struct dst_entry __rcu	*sk_dst_cache;
350b178bb3dSEric Dumazet 	spinlock_t		sk_dst_lock;
351b178bb3dSEric Dumazet 	atomic_t		sk_wmem_alloc;
352b178bb3dSEric Dumazet 	atomic_t		sk_omem_alloc;
353b178bb3dSEric Dumazet 	int			sk_sndbuf;
354b178bb3dSEric Dumazet 	struct sk_buff_head	sk_write_queue;
355a98b65a3SVegard Nossum 	kmemcheck_bitfield_begin(flags);
3565fdb9973SEric Dumazet 	unsigned int		sk_shutdown  : 2,
3571da177e4SLinus Torvalds 				sk_no_check  : 2,
3585fdb9973SEric Dumazet 				sk_userlocks : 4,
3595fdb9973SEric Dumazet 				sk_protocol  : 8,
3605fdb9973SEric Dumazet 				sk_type      : 16;
361a98b65a3SVegard Nossum 	kmemcheck_bitfield_end(flags);
3621da177e4SLinus Torvalds 	int			sk_wmem_queued;
3637d877f3bSAl Viro 	gfp_t			sk_allocation;
364c8f44affSMichał Mirosław 	netdev_features_t	sk_route_caps;
365c8f44affSMichał Mirosław 	netdev_features_t	sk_route_nocaps;
366bcd76111SHerbert Xu 	int			sk_gso_type;
36782cc1a7aSPeter P Waskiewicz Jr 	unsigned int		sk_gso_max_size;
3681485348dSBen Hutchings 	u16			sk_gso_max_segs;
3699932cf95SDavid S. Miller 	int			sk_rcvlowat;
3701da177e4SLinus Torvalds 	unsigned long	        sk_lingertime;
3711da177e4SLinus Torvalds 	struct sk_buff_head	sk_error_queue;
372476e19cfSArnaldo Carvalho de Melo 	struct proto		*sk_prot_creator;
3731da177e4SLinus Torvalds 	rwlock_t		sk_callback_lock;
3741da177e4SLinus Torvalds 	int			sk_err,
3751da177e4SLinus Torvalds 				sk_err_soft;
3761da177e4SLinus Torvalds 	unsigned short		sk_ack_backlog;
3771da177e4SLinus Torvalds 	unsigned short		sk_max_ack_backlog;
3781da177e4SLinus Torvalds 	__u32			sk_priority;
3793d0dcfbdSLi Zefan #if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
3805bc1421eSNeil Horman 	__u32			sk_cgrp_prioidx;
3815bc1421eSNeil Horman #endif
382109f6e39SEric W. Biederman 	struct pid		*sk_peer_pid;
383109f6e39SEric W. Biederman 	const struct cred	*sk_peer_cred;
3841da177e4SLinus Torvalds 	long			sk_rcvtimeo;
3851da177e4SLinus Torvalds 	long			sk_sndtimeo;
3861da177e4SLinus Torvalds 	void			*sk_protinfo;
3871da177e4SLinus Torvalds 	struct timer_list	sk_timer;
388b7aa0bf7SEric Dumazet 	ktime_t			sk_stamp;
3891da177e4SLinus Torvalds 	struct socket		*sk_socket;
3901da177e4SLinus Torvalds 	void			*sk_user_data;
3915640f768SEric Dumazet 	struct page_frag	sk_frag;
3921da177e4SLinus Torvalds 	struct sk_buff		*sk_send_head;
393ef64a54fSPavel Emelyanov 	__s32			sk_peek_off;
3941da177e4SLinus Torvalds 	int			sk_write_pending;
395d5f64238SAlexey Dobriyan #ifdef CONFIG_SECURITY
3961da177e4SLinus Torvalds 	void			*sk_security;
397d5f64238SAlexey Dobriyan #endif
3984a19ec58SLaszlo Attila Toth 	__u32			sk_mark;
399f8451725SHerbert Xu 	u32			sk_classid;
400e1aab161SGlauber Costa 	struct cg_proto		*sk_cgrp;
4011da177e4SLinus Torvalds 	void			(*sk_state_change)(struct sock *sk);
4021da177e4SLinus Torvalds 	void			(*sk_data_ready)(struct sock *sk, int bytes);
4031da177e4SLinus Torvalds 	void			(*sk_write_space)(struct sock *sk);
4041da177e4SLinus Torvalds 	void			(*sk_error_report)(struct sock *sk);
4051da177e4SLinus Torvalds 	int			(*sk_backlog_rcv)(struct sock *sk,
4061da177e4SLinus Torvalds 						  struct sk_buff *skb);
4071da177e4SLinus Torvalds 	void                    (*sk_destruct)(struct sock *sk);
4081da177e4SLinus Torvalds };
4091da177e4SLinus Torvalds 
4104a17fd52SPavel Emelyanov /*
4114a17fd52SPavel Emelyanov  * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK
4124a17fd52SPavel Emelyanov  * or not whether his port will be reused by someone else. SK_FORCE_REUSE
4134a17fd52SPavel Emelyanov  * on a socket means that the socket will reuse everybody else's port
4144a17fd52SPavel Emelyanov  * without looking at the other's sk_reuse value.
4154a17fd52SPavel Emelyanov  */
4164a17fd52SPavel Emelyanov 
4174a17fd52SPavel Emelyanov #define SK_NO_REUSE	0
4184a17fd52SPavel Emelyanov #define SK_CAN_REUSE	1
4194a17fd52SPavel Emelyanov #define SK_FORCE_REUSE	2
4204a17fd52SPavel Emelyanov 
421ef64a54fSPavel Emelyanov static inline int sk_peek_offset(struct sock *sk, int flags)
422ef64a54fSPavel Emelyanov {
423ef64a54fSPavel Emelyanov 	if ((flags & MSG_PEEK) && (sk->sk_peek_off >= 0))
424ef64a54fSPavel Emelyanov 		return sk->sk_peek_off;
425ef64a54fSPavel Emelyanov 	else
426ef64a54fSPavel Emelyanov 		return 0;
427ef64a54fSPavel Emelyanov }
428ef64a54fSPavel Emelyanov 
429ef64a54fSPavel Emelyanov static inline void sk_peek_offset_bwd(struct sock *sk, int val)
430ef64a54fSPavel Emelyanov {
431ef64a54fSPavel Emelyanov 	if (sk->sk_peek_off >= 0) {
432ef64a54fSPavel Emelyanov 		if (sk->sk_peek_off >= val)
433ef64a54fSPavel Emelyanov 			sk->sk_peek_off -= val;
434ef64a54fSPavel Emelyanov 		else
435ef64a54fSPavel Emelyanov 			sk->sk_peek_off = 0;
436ef64a54fSPavel Emelyanov 	}
437ef64a54fSPavel Emelyanov }
438ef64a54fSPavel Emelyanov 
439ef64a54fSPavel Emelyanov static inline void sk_peek_offset_fwd(struct sock *sk, int val)
440ef64a54fSPavel Emelyanov {
441ef64a54fSPavel Emelyanov 	if (sk->sk_peek_off >= 0)
442ef64a54fSPavel Emelyanov 		sk->sk_peek_off += val;
443ef64a54fSPavel Emelyanov }
444ef64a54fSPavel Emelyanov 
4451da177e4SLinus Torvalds /*
4461da177e4SLinus Torvalds  * Hashed lists helper routines
4471da177e4SLinus Torvalds  */
448c4146644SLi Zefan static inline struct sock *sk_entry(const struct hlist_node *node)
449c4146644SLi Zefan {
450c4146644SLi Zefan 	return hlist_entry(node, struct sock, sk_node);
451c4146644SLi Zefan }
452c4146644SLi Zefan 
453e48c414eSArnaldo Carvalho de Melo static inline struct sock *__sk_head(const struct hlist_head *head)
4541da177e4SLinus Torvalds {
4551da177e4SLinus Torvalds 	return hlist_entry(head->first, struct sock, sk_node);
4561da177e4SLinus Torvalds }
4571da177e4SLinus Torvalds 
458e48c414eSArnaldo Carvalho de Melo static inline struct sock *sk_head(const struct hlist_head *head)
4591da177e4SLinus Torvalds {
4601da177e4SLinus Torvalds 	return hlist_empty(head) ? NULL : __sk_head(head);
4611da177e4SLinus Torvalds }
4621da177e4SLinus Torvalds 
46388ab1932SEric Dumazet static inline struct sock *__sk_nulls_head(const struct hlist_nulls_head *head)
46488ab1932SEric Dumazet {
46588ab1932SEric Dumazet 	return hlist_nulls_entry(head->first, struct sock, sk_nulls_node);
46688ab1932SEric Dumazet }
46788ab1932SEric Dumazet 
46888ab1932SEric Dumazet static inline struct sock *sk_nulls_head(const struct hlist_nulls_head *head)
46988ab1932SEric Dumazet {
47088ab1932SEric Dumazet 	return hlist_nulls_empty(head) ? NULL : __sk_nulls_head(head);
47188ab1932SEric Dumazet }
47288ab1932SEric Dumazet 
473e48c414eSArnaldo Carvalho de Melo static inline struct sock *sk_next(const struct sock *sk)
4741da177e4SLinus Torvalds {
4751da177e4SLinus Torvalds 	return sk->sk_node.next ?
4761da177e4SLinus Torvalds 		hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL;
4771da177e4SLinus Torvalds }
4781da177e4SLinus Torvalds 
47988ab1932SEric Dumazet static inline struct sock *sk_nulls_next(const struct sock *sk)
48088ab1932SEric Dumazet {
48188ab1932SEric Dumazet 	return (!is_a_nulls(sk->sk_nulls_node.next)) ?
48288ab1932SEric Dumazet 		hlist_nulls_entry(sk->sk_nulls_node.next,
48388ab1932SEric Dumazet 				  struct sock, sk_nulls_node) :
48488ab1932SEric Dumazet 		NULL;
48588ab1932SEric Dumazet }
48688ab1932SEric Dumazet 
487dc6b9b78SEric Dumazet static inline bool sk_unhashed(const struct sock *sk)
4881da177e4SLinus Torvalds {
4891da177e4SLinus Torvalds 	return hlist_unhashed(&sk->sk_node);
4901da177e4SLinus Torvalds }
4911da177e4SLinus Torvalds 
492dc6b9b78SEric Dumazet static inline bool sk_hashed(const struct sock *sk)
4931da177e4SLinus Torvalds {
494da753beaSAkinobu Mita 	return !sk_unhashed(sk);
4951da177e4SLinus Torvalds }
4961da177e4SLinus Torvalds 
497dc6b9b78SEric Dumazet static inline void sk_node_init(struct hlist_node *node)
4981da177e4SLinus Torvalds {
4991da177e4SLinus Torvalds 	node->pprev = NULL;
5001da177e4SLinus Torvalds }
5011da177e4SLinus Torvalds 
502dc6b9b78SEric Dumazet static inline void sk_nulls_node_init(struct hlist_nulls_node *node)
50388ab1932SEric Dumazet {
50488ab1932SEric Dumazet 	node->pprev = NULL;
50588ab1932SEric Dumazet }
50688ab1932SEric Dumazet 
507dc6b9b78SEric Dumazet static inline void __sk_del_node(struct sock *sk)
5081da177e4SLinus Torvalds {
5091da177e4SLinus Torvalds 	__hlist_del(&sk->sk_node);
5101da177e4SLinus Torvalds }
5111da177e4SLinus Torvalds 
512808f5114Sstephen hemminger /* NB: equivalent to hlist_del_init_rcu */
513dc6b9b78SEric Dumazet static inline bool __sk_del_node_init(struct sock *sk)
5141da177e4SLinus Torvalds {
5151da177e4SLinus Torvalds 	if (sk_hashed(sk)) {
5161da177e4SLinus Torvalds 		__sk_del_node(sk);
5171da177e4SLinus Torvalds 		sk_node_init(&sk->sk_node);
518dc6b9b78SEric Dumazet 		return true;
5191da177e4SLinus Torvalds 	}
520dc6b9b78SEric Dumazet 	return false;
5211da177e4SLinus Torvalds }
5221da177e4SLinus Torvalds 
5231da177e4SLinus Torvalds /* Grab socket reference count. This operation is valid only
5241da177e4SLinus Torvalds    when sk is ALREADY grabbed f.e. it is found in hash table
5251da177e4SLinus Torvalds    or a list and the lookup is made under lock preventing hash table
5261da177e4SLinus Torvalds    modifications.
5271da177e4SLinus Torvalds  */
5281da177e4SLinus Torvalds 
5291da177e4SLinus Torvalds static inline void sock_hold(struct sock *sk)
5301da177e4SLinus Torvalds {
5311da177e4SLinus Torvalds 	atomic_inc(&sk->sk_refcnt);
5321da177e4SLinus Torvalds }
5331da177e4SLinus Torvalds 
5341da177e4SLinus Torvalds /* Ungrab socket in the context, which assumes that socket refcnt
5351da177e4SLinus Torvalds    cannot hit zero, f.e. it is true in context of any socketcall.
5361da177e4SLinus Torvalds  */
5371da177e4SLinus Torvalds static inline void __sock_put(struct sock *sk)
5381da177e4SLinus Torvalds {
5391da177e4SLinus Torvalds 	atomic_dec(&sk->sk_refcnt);
5401da177e4SLinus Torvalds }
5411da177e4SLinus Torvalds 
542dc6b9b78SEric Dumazet static inline bool sk_del_node_init(struct sock *sk)
5431da177e4SLinus Torvalds {
544dc6b9b78SEric Dumazet 	bool rc = __sk_del_node_init(sk);
5451da177e4SLinus Torvalds 
5461da177e4SLinus Torvalds 	if (rc) {
5471da177e4SLinus Torvalds 		/* paranoid for a while -acme */
5481da177e4SLinus Torvalds 		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
5491da177e4SLinus Torvalds 		__sock_put(sk);
5501da177e4SLinus Torvalds 	}
5511da177e4SLinus Torvalds 	return rc;
5521da177e4SLinus Torvalds }
553808f5114Sstephen hemminger #define sk_del_node_init_rcu(sk)	sk_del_node_init(sk)
5541da177e4SLinus Torvalds 
555dc6b9b78SEric Dumazet static inline bool __sk_nulls_del_node_init_rcu(struct sock *sk)
556271b72c7SEric Dumazet {
557271b72c7SEric Dumazet 	if (sk_hashed(sk)) {
55888ab1932SEric Dumazet 		hlist_nulls_del_init_rcu(&sk->sk_nulls_node);
559dc6b9b78SEric Dumazet 		return true;
560271b72c7SEric Dumazet 	}
561dc6b9b78SEric Dumazet 	return false;
562271b72c7SEric Dumazet }
563271b72c7SEric Dumazet 
564dc6b9b78SEric Dumazet static inline bool sk_nulls_del_node_init_rcu(struct sock *sk)
565271b72c7SEric Dumazet {
566dc6b9b78SEric Dumazet 	bool rc = __sk_nulls_del_node_init_rcu(sk);
567271b72c7SEric Dumazet 
568271b72c7SEric Dumazet 	if (rc) {
569271b72c7SEric Dumazet 		/* paranoid for a while -acme */
570271b72c7SEric Dumazet 		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
571271b72c7SEric Dumazet 		__sock_put(sk);
572271b72c7SEric Dumazet 	}
573271b72c7SEric Dumazet 	return rc;
574271b72c7SEric Dumazet }
575271b72c7SEric Dumazet 
576dc6b9b78SEric Dumazet static inline void __sk_add_node(struct sock *sk, struct hlist_head *list)
5771da177e4SLinus Torvalds {
5781da177e4SLinus Torvalds 	hlist_add_head(&sk->sk_node, list);
5791da177e4SLinus Torvalds }
5801da177e4SLinus Torvalds 
581dc6b9b78SEric Dumazet static inline void sk_add_node(struct sock *sk, struct hlist_head *list)
5821da177e4SLinus Torvalds {
5831da177e4SLinus Torvalds 	sock_hold(sk);
5841da177e4SLinus Torvalds 	__sk_add_node(sk, list);
5851da177e4SLinus Torvalds }
5861da177e4SLinus Torvalds 
587dc6b9b78SEric Dumazet static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list)
588808f5114Sstephen hemminger {
589808f5114Sstephen hemminger 	sock_hold(sk);
590808f5114Sstephen hemminger 	hlist_add_head_rcu(&sk->sk_node, list);
591808f5114Sstephen hemminger }
592808f5114Sstephen hemminger 
593dc6b9b78SEric Dumazet static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
594271b72c7SEric Dumazet {
59588ab1932SEric Dumazet 	hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
596271b72c7SEric Dumazet }
597271b72c7SEric Dumazet 
598dc6b9b78SEric Dumazet static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
599271b72c7SEric Dumazet {
600271b72c7SEric Dumazet 	sock_hold(sk);
60188ab1932SEric Dumazet 	__sk_nulls_add_node_rcu(sk, list);
602271b72c7SEric Dumazet }
603271b72c7SEric Dumazet 
604dc6b9b78SEric Dumazet static inline void __sk_del_bind_node(struct sock *sk)
6051da177e4SLinus Torvalds {
6061da177e4SLinus Torvalds 	__hlist_del(&sk->sk_bind_node);
6071da177e4SLinus Torvalds }
6081da177e4SLinus Torvalds 
609dc6b9b78SEric Dumazet static inline void sk_add_bind_node(struct sock *sk,
6101da177e4SLinus Torvalds 					struct hlist_head *list)
6111da177e4SLinus Torvalds {
6121da177e4SLinus Torvalds 	hlist_add_head(&sk->sk_bind_node, list);
6131da177e4SLinus Torvalds }
6141da177e4SLinus Torvalds 
615b67bfe0dSSasha Levin #define sk_for_each(__sk, list) \
616b67bfe0dSSasha Levin 	hlist_for_each_entry(__sk, list, sk_node)
617b67bfe0dSSasha Levin #define sk_for_each_rcu(__sk, list) \
618b67bfe0dSSasha Levin 	hlist_for_each_entry_rcu(__sk, list, sk_node)
61988ab1932SEric Dumazet #define sk_nulls_for_each(__sk, node, list) \
62088ab1932SEric Dumazet 	hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node)
62188ab1932SEric Dumazet #define sk_nulls_for_each_rcu(__sk, node, list) \
62288ab1932SEric Dumazet 	hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node)
623b67bfe0dSSasha Levin #define sk_for_each_from(__sk) \
624b67bfe0dSSasha Levin 	hlist_for_each_entry_from(__sk, sk_node)
62588ab1932SEric Dumazet #define sk_nulls_for_each_from(__sk, node) \
62688ab1932SEric Dumazet 	if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \
62788ab1932SEric Dumazet 		hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node)
628b67bfe0dSSasha Levin #define sk_for_each_safe(__sk, tmp, list) \
629b67bfe0dSSasha Levin 	hlist_for_each_entry_safe(__sk, tmp, list, sk_node)
630b67bfe0dSSasha Levin #define sk_for_each_bound(__sk, list) \
631b67bfe0dSSasha Levin 	hlist_for_each_entry(__sk, list, sk_bind_node)
6321da177e4SLinus Torvalds 
633c336d148SEric W. Biederman static inline struct user_namespace *sk_user_ns(struct sock *sk)
634c336d148SEric W. Biederman {
635c336d148SEric W. Biederman 	/* Careful only use this in a context where these parameters
636c336d148SEric W. Biederman 	 * can not change and must all be valid, such as recvmsg from
637c336d148SEric W. Biederman 	 * userspace.
638c336d148SEric W. Biederman 	 */
639c336d148SEric W. Biederman 	return sk->sk_socket->file->f_cred->user_ns;
640c336d148SEric W. Biederman }
641c336d148SEric W. Biederman 
6421da177e4SLinus Torvalds /* Sock flags */
6431da177e4SLinus Torvalds enum sock_flags {
6441da177e4SLinus Torvalds 	SOCK_DEAD,
6451da177e4SLinus Torvalds 	SOCK_DONE,
6461da177e4SLinus Torvalds 	SOCK_URGINLINE,
6471da177e4SLinus Torvalds 	SOCK_KEEPOPEN,
6481da177e4SLinus Torvalds 	SOCK_LINGER,
6491da177e4SLinus Torvalds 	SOCK_DESTROY,
6501da177e4SLinus Torvalds 	SOCK_BROADCAST,
6511da177e4SLinus Torvalds 	SOCK_TIMESTAMP,
6521da177e4SLinus Torvalds 	SOCK_ZAPPED,
6531da177e4SLinus Torvalds 	SOCK_USE_WRITE_QUEUE, /* whether to call sk->sk_write_space in sock_wfree */
6541da177e4SLinus Torvalds 	SOCK_DBG, /* %SO_DEBUG setting */
6551da177e4SLinus Torvalds 	SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */
65692f37fd2SEric Dumazet 	SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */
6571da177e4SLinus Torvalds 	SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
6581da177e4SLinus Torvalds 	SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
6597cb02404SMel Gorman 	SOCK_MEMALLOC, /* VM depends on this socket for swapping */
66020d49473SPatrick Ohly 	SOCK_TIMESTAMPING_TX_HARDWARE,  /* %SOF_TIMESTAMPING_TX_HARDWARE */
66120d49473SPatrick Ohly 	SOCK_TIMESTAMPING_TX_SOFTWARE,  /* %SOF_TIMESTAMPING_TX_SOFTWARE */
66220d49473SPatrick Ohly 	SOCK_TIMESTAMPING_RX_HARDWARE,  /* %SOF_TIMESTAMPING_RX_HARDWARE */
66320d49473SPatrick Ohly 	SOCK_TIMESTAMPING_RX_SOFTWARE,  /* %SOF_TIMESTAMPING_RX_SOFTWARE */
66420d49473SPatrick Ohly 	SOCK_TIMESTAMPING_SOFTWARE,     /* %SOF_TIMESTAMPING_SOFTWARE */
66520d49473SPatrick Ohly 	SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */
66620d49473SPatrick Ohly 	SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */
667bcdce719SEric Dumazet 	SOCK_FASYNC, /* fasync() active */
6683b885787SNeil Horman 	SOCK_RXQ_OVFL,
6691cdebb42SShirley Ma 	SOCK_ZEROCOPY, /* buffers from userspace */
6706e3e939fSJohannes Berg 	SOCK_WIFI_STATUS, /* push wifi status to userspace */
6713bdc0ebaSBen Greear 	SOCK_NOFCS, /* Tell NIC not to do the Ethernet FCS.
6723bdc0ebaSBen Greear 		     * Will use last 4 bytes of packet sent from
6733bdc0ebaSBen Greear 		     * user-space instead.
6743bdc0ebaSBen Greear 		     */
675d59577b6SVincent Bernat 	SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */
6767d4c04fcSKeller, Jacob E 	SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
6771da177e4SLinus Torvalds };
6781da177e4SLinus Torvalds 
67953b924b3SRalf Baechle static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
68053b924b3SRalf Baechle {
68153b924b3SRalf Baechle 	nsk->sk_flags = osk->sk_flags;
68253b924b3SRalf Baechle }
68353b924b3SRalf Baechle 
6841da177e4SLinus Torvalds static inline void sock_set_flag(struct sock *sk, enum sock_flags flag)
6851da177e4SLinus Torvalds {
6861da177e4SLinus Torvalds 	__set_bit(flag, &sk->sk_flags);
6871da177e4SLinus Torvalds }
6881da177e4SLinus Torvalds 
6891da177e4SLinus Torvalds static inline void sock_reset_flag(struct sock *sk, enum sock_flags flag)
6901da177e4SLinus Torvalds {
6911da177e4SLinus Torvalds 	__clear_bit(flag, &sk->sk_flags);
6921da177e4SLinus Torvalds }
6931da177e4SLinus Torvalds 
6941b23a5dfSEric Dumazet static inline bool sock_flag(const struct sock *sk, enum sock_flags flag)
6951da177e4SLinus Torvalds {
6961da177e4SLinus Torvalds 	return test_bit(flag, &sk->sk_flags);
6971da177e4SLinus Torvalds }
6981da177e4SLinus Torvalds 
699c93bdd0eSMel Gorman #ifdef CONFIG_NET
700c93bdd0eSMel Gorman extern struct static_key memalloc_socks;
701c93bdd0eSMel Gorman static inline int sk_memalloc_socks(void)
702c93bdd0eSMel Gorman {
703c93bdd0eSMel Gorman 	return static_key_false(&memalloc_socks);
704c93bdd0eSMel Gorman }
705c93bdd0eSMel Gorman #else
706c93bdd0eSMel Gorman 
707c93bdd0eSMel Gorman static inline int sk_memalloc_socks(void)
708c93bdd0eSMel Gorman {
709c93bdd0eSMel Gorman 	return 0;
710c93bdd0eSMel Gorman }
711c93bdd0eSMel Gorman 
712c93bdd0eSMel Gorman #endif
713c93bdd0eSMel Gorman 
71499a1dec7SMel Gorman static inline gfp_t sk_gfp_atomic(struct sock *sk, gfp_t gfp_mask)
71599a1dec7SMel Gorman {
7167cb02404SMel Gorman 	return GFP_ATOMIC | (sk->sk_allocation & __GFP_MEMALLOC);
71799a1dec7SMel Gorman }
71899a1dec7SMel Gorman 
7191da177e4SLinus Torvalds static inline void sk_acceptq_removed(struct sock *sk)
7201da177e4SLinus Torvalds {
7211da177e4SLinus Torvalds 	sk->sk_ack_backlog--;
7221da177e4SLinus Torvalds }
7231da177e4SLinus Torvalds 
7241da177e4SLinus Torvalds static inline void sk_acceptq_added(struct sock *sk)
7251da177e4SLinus Torvalds {
7261da177e4SLinus Torvalds 	sk->sk_ack_backlog++;
7271da177e4SLinus Torvalds }
7281da177e4SLinus Torvalds 
729dc6b9b78SEric Dumazet static inline bool sk_acceptq_is_full(const struct sock *sk)
7301da177e4SLinus Torvalds {
73164a14651SDavid S. Miller 	return sk->sk_ack_backlog > sk->sk_max_ack_backlog;
7321da177e4SLinus Torvalds }
7331da177e4SLinus Torvalds 
7341da177e4SLinus Torvalds /*
7351da177e4SLinus Torvalds  * Compute minimal free write space needed to queue new packets.
7361da177e4SLinus Torvalds  */
737dc6b9b78SEric Dumazet static inline int sk_stream_min_wspace(const struct sock *sk)
7381da177e4SLinus Torvalds {
7398df09ea3SEric Dumazet 	return sk->sk_wmem_queued >> 1;
7401da177e4SLinus Torvalds }
7411da177e4SLinus Torvalds 
742dc6b9b78SEric Dumazet static inline int sk_stream_wspace(const struct sock *sk)
7431da177e4SLinus Torvalds {
7441da177e4SLinus Torvalds 	return sk->sk_sndbuf - sk->sk_wmem_queued;
7451da177e4SLinus Torvalds }
7461da177e4SLinus Torvalds 
7471da177e4SLinus Torvalds extern void sk_stream_write_space(struct sock *sk);
7481da177e4SLinus Torvalds 
749dc6b9b78SEric Dumazet static inline bool sk_stream_memory_free(const struct sock *sk)
7501da177e4SLinus Torvalds {
7511da177e4SLinus Torvalds 	return sk->sk_wmem_queued < sk->sk_sndbuf;
7521da177e4SLinus Torvalds }
7531da177e4SLinus Torvalds 
7548eae939fSZhu Yi /* OOB backlog add */
755a3a858ffSZhu Yi static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
7569ee6b535SStephen Hemminger {
7577fee226aSEric Dumazet 	/* dont let skb dst not refcounted, we are going to leave rcu lock */
7587fee226aSEric Dumazet 	skb_dst_force(skb);
7597fee226aSEric Dumazet 
7607fee226aSEric Dumazet 	if (!sk->sk_backlog.tail)
7617fee226aSEric Dumazet 		sk->sk_backlog.head = skb;
7627fee226aSEric Dumazet 	else
7639ee6b535SStephen Hemminger 		sk->sk_backlog.tail->next = skb;
7647fee226aSEric Dumazet 
7659ee6b535SStephen Hemminger 	sk->sk_backlog.tail = skb;
7669ee6b535SStephen Hemminger 	skb->next = NULL;
7679ee6b535SStephen Hemminger }
7681da177e4SLinus Torvalds 
769c377411fSEric Dumazet /*
770c377411fSEric Dumazet  * Take into account size of receive queue and backlog queue
7710fd7bac6SEric Dumazet  * Do not take into account this skb truesize,
7720fd7bac6SEric Dumazet  * to allow even a single big packet to come.
773c377411fSEric Dumazet  */
774f545a38fSEric Dumazet static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb,
775f545a38fSEric Dumazet 				     unsigned int limit)
776c377411fSEric Dumazet {
777c377411fSEric Dumazet 	unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc);
778c377411fSEric Dumazet 
779f545a38fSEric Dumazet 	return qsize > limit;
780c377411fSEric Dumazet }
781c377411fSEric Dumazet 
7828eae939fSZhu Yi /* The per-socket spinlock must be held here. */
783f545a38fSEric Dumazet static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb,
784f545a38fSEric Dumazet 					      unsigned int limit)
7858eae939fSZhu Yi {
786f545a38fSEric Dumazet 	if (sk_rcvqueues_full(sk, skb, limit))
7878eae939fSZhu Yi 		return -ENOBUFS;
7888eae939fSZhu Yi 
789a3a858ffSZhu Yi 	__sk_add_backlog(sk, skb);
7908eae939fSZhu Yi 	sk->sk_backlog.len += skb->truesize;
7918eae939fSZhu Yi 	return 0;
7928eae939fSZhu Yi }
7938eae939fSZhu Yi 
794b4b9e355SMel Gorman extern int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
795b4b9e355SMel Gorman 
796c57943a1SPeter Zijlstra static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
797c57943a1SPeter Zijlstra {
798b4b9e355SMel Gorman 	if (sk_memalloc_socks() && skb_pfmemalloc(skb))
799b4b9e355SMel Gorman 		return __sk_backlog_rcv(sk, skb);
800b4b9e355SMel Gorman 
801c57943a1SPeter Zijlstra 	return sk->sk_backlog_rcv(sk, skb);
802c57943a1SPeter Zijlstra }
803c57943a1SPeter Zijlstra 
804c58dc01bSDavid S. Miller static inline void sock_rps_record_flow(const struct sock *sk)
805c58dc01bSDavid S. Miller {
806c58dc01bSDavid S. Miller #ifdef CONFIG_RPS
807c58dc01bSDavid S. Miller 	struct rps_sock_flow_table *sock_flow_table;
808c58dc01bSDavid S. Miller 
809c58dc01bSDavid S. Miller 	rcu_read_lock();
810c58dc01bSDavid S. Miller 	sock_flow_table = rcu_dereference(rps_sock_flow_table);
811c58dc01bSDavid S. Miller 	rps_record_sock_flow(sock_flow_table, sk->sk_rxhash);
812c58dc01bSDavid S. Miller 	rcu_read_unlock();
813c58dc01bSDavid S. Miller #endif
814c58dc01bSDavid S. Miller }
815c58dc01bSDavid S. Miller 
816c58dc01bSDavid S. Miller static inline void sock_rps_reset_flow(const struct sock *sk)
817c58dc01bSDavid S. Miller {
818c58dc01bSDavid S. Miller #ifdef CONFIG_RPS
819c58dc01bSDavid S. Miller 	struct rps_sock_flow_table *sock_flow_table;
820c58dc01bSDavid S. Miller 
821c58dc01bSDavid S. Miller 	rcu_read_lock();
822c58dc01bSDavid S. Miller 	sock_flow_table = rcu_dereference(rps_sock_flow_table);
823c58dc01bSDavid S. Miller 	rps_reset_sock_flow(sock_flow_table, sk->sk_rxhash);
824c58dc01bSDavid S. Miller 	rcu_read_unlock();
825c58dc01bSDavid S. Miller #endif
826c58dc01bSDavid S. Miller }
827c58dc01bSDavid S. Miller 
828bdeab991STom Herbert static inline void sock_rps_save_rxhash(struct sock *sk,
829bdeab991STom Herbert 					const struct sk_buff *skb)
830c58dc01bSDavid S. Miller {
831c58dc01bSDavid S. Miller #ifdef CONFIG_RPS
832bdeab991STom Herbert 	if (unlikely(sk->sk_rxhash != skb->rxhash)) {
833c58dc01bSDavid S. Miller 		sock_rps_reset_flow(sk);
834bdeab991STom Herbert 		sk->sk_rxhash = skb->rxhash;
835c58dc01bSDavid S. Miller 	}
836c58dc01bSDavid S. Miller #endif
837c58dc01bSDavid S. Miller }
838c58dc01bSDavid S. Miller 
839bdeab991STom Herbert static inline void sock_rps_reset_rxhash(struct sock *sk)
840bdeab991STom Herbert {
841bdeab991STom Herbert #ifdef CONFIG_RPS
842bdeab991STom Herbert 	sock_rps_reset_flow(sk);
843bdeab991STom Herbert 	sk->sk_rxhash = 0;
844bdeab991STom Herbert #endif
845bdeab991STom Herbert }
846bdeab991STom Herbert 
8471da177e4SLinus Torvalds #define sk_wait_event(__sk, __timeo, __condition)			\
848cfcabdccSStephen Hemminger 	({	int __rc;						\
8491da177e4SLinus Torvalds 		release_sock(__sk);					\
850cfcabdccSStephen Hemminger 		__rc = __condition;					\
851cfcabdccSStephen Hemminger 		if (!__rc) {						\
8521da177e4SLinus Torvalds 			*(__timeo) = schedule_timeout(*(__timeo));	\
8531da177e4SLinus Torvalds 		}							\
8541da177e4SLinus Torvalds 		lock_sock(__sk);					\
855cfcabdccSStephen Hemminger 		__rc = __condition;					\
856cfcabdccSStephen Hemminger 		__rc;							\
8571da177e4SLinus Torvalds 	})
8581da177e4SLinus Torvalds 
8591da177e4SLinus Torvalds extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
8601da177e4SLinus Torvalds extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
8611da177e4SLinus Torvalds extern void sk_stream_wait_close(struct sock *sk, long timeo_p);
8621da177e4SLinus Torvalds extern int sk_stream_error(struct sock *sk, int flags, int err);
8631da177e4SLinus Torvalds extern void sk_stream_kill_queues(struct sock *sk);
8647cb02404SMel Gorman extern void sk_set_memalloc(struct sock *sk);
8657cb02404SMel Gorman extern void sk_clear_memalloc(struct sock *sk);
8661da177e4SLinus Torvalds 
8671da177e4SLinus Torvalds extern int sk_wait_data(struct sock *sk, long *timeo);
8681da177e4SLinus Torvalds 
86960236fddSArnaldo Carvalho de Melo struct request_sock_ops;
8706d6ee43eSArnaldo Carvalho de Melo struct timewait_sock_ops;
871ab1e0a13SArnaldo Carvalho de Melo struct inet_hashinfo;
872fc8717baSPavel Emelyanov struct raw_hashinfo;
873de477254SPaul Gortmaker struct module;
8742e6599cbSArnaldo Carvalho de Melo 
875f77d6021SEric Dumazet /*
876f77d6021SEric Dumazet  * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
877f77d6021SEric Dumazet  * un-modified. Special care is taken when initializing object to zero.
878f77d6021SEric Dumazet  */
879f77d6021SEric Dumazet static inline void sk_prot_clear_nulls(struct sock *sk, int size)
880f77d6021SEric Dumazet {
881f77d6021SEric Dumazet 	if (offsetof(struct sock, sk_node.next) != 0)
882f77d6021SEric Dumazet 		memset(sk, 0, offsetof(struct sock, sk_node.next));
883f77d6021SEric Dumazet 	memset(&sk->sk_node.pprev, 0,
884f77d6021SEric Dumazet 	       size - offsetof(struct sock, sk_node.pprev));
885f77d6021SEric Dumazet }
886f77d6021SEric Dumazet 
8871da177e4SLinus Torvalds /* Networking protocol blocks we attach to sockets.
8881da177e4SLinus Torvalds  * socket layer -> transport layer interface
8891da177e4SLinus Torvalds  * transport -> network interface is defined by struct inet_proto
8901da177e4SLinus Torvalds  */
8911da177e4SLinus Torvalds struct proto {
8921da177e4SLinus Torvalds 	void			(*close)(struct sock *sk,
8931da177e4SLinus Torvalds 					long timeout);
8941da177e4SLinus Torvalds 	int			(*connect)(struct sock *sk,
8951da177e4SLinus Torvalds 					struct sockaddr *uaddr,
8961da177e4SLinus Torvalds 					int addr_len);
8971da177e4SLinus Torvalds 	int			(*disconnect)(struct sock *sk, int flags);
8981da177e4SLinus Torvalds 
8991da177e4SLinus Torvalds 	struct sock *		(*accept)(struct sock *sk, int flags, int *err);
9001da177e4SLinus Torvalds 
9011da177e4SLinus Torvalds 	int			(*ioctl)(struct sock *sk, int cmd,
9021da177e4SLinus Torvalds 					 unsigned long arg);
9031da177e4SLinus Torvalds 	int			(*init)(struct sock *sk);
9047d06b2e0SBrian Haley 	void			(*destroy)(struct sock *sk);
9051da177e4SLinus Torvalds 	void			(*shutdown)(struct sock *sk, int how);
9061da177e4SLinus Torvalds 	int			(*setsockopt)(struct sock *sk, int level,
9071da177e4SLinus Torvalds 					int optname, char __user *optval,
908b7058842SDavid S. Miller 					unsigned int optlen);
9091da177e4SLinus Torvalds 	int			(*getsockopt)(struct sock *sk, int level,
9101da177e4SLinus Torvalds 					int optname, char __user *optval,
9111da177e4SLinus Torvalds 					int __user *option);
912af01d537SAlexey Dobriyan #ifdef CONFIG_COMPAT
9133fdadf7dSDmitry Mishin 	int			(*compat_setsockopt)(struct sock *sk,
9143fdadf7dSDmitry Mishin 					int level,
9153fdadf7dSDmitry Mishin 					int optname, char __user *optval,
916b7058842SDavid S. Miller 					unsigned int optlen);
9173fdadf7dSDmitry Mishin 	int			(*compat_getsockopt)(struct sock *sk,
9183fdadf7dSDmitry Mishin 					int level,
9193fdadf7dSDmitry Mishin 					int optname, char __user *optval,
9203fdadf7dSDmitry Mishin 					int __user *option);
921709b46e8SEric W. Biederman 	int			(*compat_ioctl)(struct sock *sk,
922709b46e8SEric W. Biederman 					unsigned int cmd, unsigned long arg);
923af01d537SAlexey Dobriyan #endif
9241da177e4SLinus Torvalds 	int			(*sendmsg)(struct kiocb *iocb, struct sock *sk,
9251da177e4SLinus Torvalds 					   struct msghdr *msg, size_t len);
9261da177e4SLinus Torvalds 	int			(*recvmsg)(struct kiocb *iocb, struct sock *sk,
9271da177e4SLinus Torvalds 					   struct msghdr *msg,
9281da177e4SLinus Torvalds 					   size_t len, int noblock, int flags,
9291da177e4SLinus Torvalds 					   int *addr_len);
9301da177e4SLinus Torvalds 	int			(*sendpage)(struct sock *sk, struct page *page,
9311da177e4SLinus Torvalds 					int offset, size_t size, int flags);
9321da177e4SLinus Torvalds 	int			(*bind)(struct sock *sk,
9331da177e4SLinus Torvalds 					struct sockaddr *uaddr, int addr_len);
9341da177e4SLinus Torvalds 
9351da177e4SLinus Torvalds 	int			(*backlog_rcv) (struct sock *sk,
9361da177e4SLinus Torvalds 						struct sk_buff *skb);
9371da177e4SLinus Torvalds 
93846d3ceabSEric Dumazet 	void		(*release_cb)(struct sock *sk);
939563d34d0SEric Dumazet 	void		(*mtu_reduced)(struct sock *sk);
94046d3ceabSEric Dumazet 
9411da177e4SLinus Torvalds 	/* Keeping track of sk's, looking them up, and port selection methods. */
9421da177e4SLinus Torvalds 	void			(*hash)(struct sock *sk);
9431da177e4SLinus Torvalds 	void			(*unhash)(struct sock *sk);
944719f8358SEric Dumazet 	void			(*rehash)(struct sock *sk);
9451da177e4SLinus Torvalds 	int			(*get_port)(struct sock *sk, unsigned short snum);
946fcbdf09dSOctavian Purdila 	void			(*clear_sk)(struct sock *sk, int size);
9471da177e4SLinus Torvalds 
948286ab3d4SEric Dumazet 	/* Keeping track of sockets in use */
94965f76517SEric Dumazet #ifdef CONFIG_PROC_FS
95013ff3d6fSPavel Emelyanov 	unsigned int		inuse_idx;
95165f76517SEric Dumazet #endif
952ebb53d75SArnaldo Carvalho de Melo 
9531da177e4SLinus Torvalds 	/* Memory pressure */
9545c52ba17SPavel Emelyanov 	void			(*enter_memory_pressure)(struct sock *sk);
9558d987e5cSEric Dumazet 	atomic_long_t		*memory_allocated;	/* Current allocated memory. */
9561748376bSEric Dumazet 	struct percpu_counter	*sockets_allocated;	/* Current number of sockets. */
9571da177e4SLinus Torvalds 	/*
9581da177e4SLinus Torvalds 	 * Pressure flag: try to collapse.
9591da177e4SLinus Torvalds 	 * Technical note: it is used by multiple contexts non atomically.
9603ab224beSHideo Aoki 	 * All the __sk_mem_schedule() is of this nature: accounting
9611da177e4SLinus Torvalds 	 * is strict, actions are advisory and have some latency.
9621da177e4SLinus Torvalds 	 */
9631da177e4SLinus Torvalds 	int			*memory_pressure;
9648d987e5cSEric Dumazet 	long			*sysctl_mem;
9651da177e4SLinus Torvalds 	int			*sysctl_wmem;
9661da177e4SLinus Torvalds 	int			*sysctl_rmem;
9671da177e4SLinus Torvalds 	int			max_header;
9687ba42910SChangli Gao 	bool			no_autobind;
9691da177e4SLinus Torvalds 
970e18b890bSChristoph Lameter 	struct kmem_cache	*slab;
9711da177e4SLinus Torvalds 	unsigned int		obj_size;
972271b72c7SEric Dumazet 	int			slab_flags;
9731da177e4SLinus Torvalds 
974dd24c001SEric Dumazet 	struct percpu_counter	*orphan_count;
9758feaf0c0SArnaldo Carvalho de Melo 
97660236fddSArnaldo Carvalho de Melo 	struct request_sock_ops	*rsk_prot;
9776d6ee43eSArnaldo Carvalho de Melo 	struct timewait_sock_ops *twsk_prot;
9782e6599cbSArnaldo Carvalho de Melo 
97939d8cda7SPavel Emelyanov 	union {
980ab1e0a13SArnaldo Carvalho de Melo 		struct inet_hashinfo	*hashinfo;
981645ca708SEric Dumazet 		struct udp_table	*udp_table;
982fc8717baSPavel Emelyanov 		struct raw_hashinfo	*raw_hash;
98339d8cda7SPavel Emelyanov 	} h;
984ab1e0a13SArnaldo Carvalho de Melo 
9851da177e4SLinus Torvalds 	struct module		*owner;
9861da177e4SLinus Torvalds 
9871da177e4SLinus Torvalds 	char			name[32];
9881da177e4SLinus Torvalds 
9891da177e4SLinus Torvalds 	struct list_head	node;
990e6848976SArnaldo Carvalho de Melo #ifdef SOCK_REFCNT_DEBUG
991e6848976SArnaldo Carvalho de Melo 	atomic_t		socks;
992e6848976SArnaldo Carvalho de Melo #endif
993c255a458SAndrew Morton #ifdef CONFIG_MEMCG_KMEM
994e1aab161SGlauber Costa 	/*
995e1aab161SGlauber Costa 	 * cgroup specific init/deinit functions. Called once for all
996e1aab161SGlauber Costa 	 * protocols that implement it, from cgroups populate function.
997e1aab161SGlauber Costa 	 * This function has to setup any files the protocol want to
998e1aab161SGlauber Costa 	 * appear in the kmem cgroup filesystem.
999e1aab161SGlauber Costa 	 */
10001d62e436SGlauber Costa 	int			(*init_cgroup)(struct mem_cgroup *memcg,
1001e1aab161SGlauber Costa 					       struct cgroup_subsys *ss);
10021d62e436SGlauber Costa 	void			(*destroy_cgroup)(struct mem_cgroup *memcg);
1003e1aab161SGlauber Costa 	struct cg_proto		*(*proto_cgroup)(struct mem_cgroup *memcg);
1004e1aab161SGlauber Costa #endif
1005e1aab161SGlauber Costa };
1006e1aab161SGlauber Costa 
10073f134619SGlauber Costa /*
10083f134619SGlauber Costa  * Bits in struct cg_proto.flags
10093f134619SGlauber Costa  */
10103f134619SGlauber Costa enum cg_proto_flags {
10113f134619SGlauber Costa 	/* Currently active and new sockets should be assigned to cgroups */
10123f134619SGlauber Costa 	MEMCG_SOCK_ACTIVE,
10133f134619SGlauber Costa 	/* It was ever activated; we must disarm static keys on destruction */
10143f134619SGlauber Costa 	MEMCG_SOCK_ACTIVATED,
10153f134619SGlauber Costa };
10163f134619SGlauber Costa 
1017e1aab161SGlauber Costa struct cg_proto {
1018e1aab161SGlauber Costa 	void			(*enter_memory_pressure)(struct sock *sk);
1019e1aab161SGlauber Costa 	struct res_counter	*memory_allocated;	/* Current allocated memory. */
1020e1aab161SGlauber Costa 	struct percpu_counter	*sockets_allocated;	/* Current number of sockets. */
1021e1aab161SGlauber Costa 	int			*memory_pressure;
1022e1aab161SGlauber Costa 	long			*sysctl_mem;
10233f134619SGlauber Costa 	unsigned long		flags;
1024e1aab161SGlauber Costa 	/*
1025e1aab161SGlauber Costa 	 * memcg field is used to find which memcg we belong directly
1026e1aab161SGlauber Costa 	 * Each memcg struct can hold more than one cg_proto, so container_of
1027e1aab161SGlauber Costa 	 * won't really cut.
1028e1aab161SGlauber Costa 	 *
1029e1aab161SGlauber Costa 	 * The elegant solution would be having an inverse function to
1030e1aab161SGlauber Costa 	 * proto_cgroup in struct proto, but that means polluting the structure
1031e1aab161SGlauber Costa 	 * for everybody, instead of just for memcg users.
1032e1aab161SGlauber Costa 	 */
1033e1aab161SGlauber Costa 	struct mem_cgroup	*memcg;
10341da177e4SLinus Torvalds };
10351da177e4SLinus Torvalds 
10361da177e4SLinus Torvalds extern int proto_register(struct proto *prot, int alloc_slab);
10371da177e4SLinus Torvalds extern void proto_unregister(struct proto *prot);
10381da177e4SLinus Torvalds 
10393f134619SGlauber Costa static inline bool memcg_proto_active(struct cg_proto *cg_proto)
10403f134619SGlauber Costa {
10413f134619SGlauber Costa 	return test_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
10423f134619SGlauber Costa }
10433f134619SGlauber Costa 
10443f134619SGlauber Costa static inline bool memcg_proto_activated(struct cg_proto *cg_proto)
10453f134619SGlauber Costa {
10463f134619SGlauber Costa 	return test_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags);
10473f134619SGlauber Costa }
10483f134619SGlauber Costa 
1049e6848976SArnaldo Carvalho de Melo #ifdef SOCK_REFCNT_DEBUG
1050e6848976SArnaldo Carvalho de Melo static inline void sk_refcnt_debug_inc(struct sock *sk)
1051e6848976SArnaldo Carvalho de Melo {
1052e6848976SArnaldo Carvalho de Melo 	atomic_inc(&sk->sk_prot->socks);
1053e6848976SArnaldo Carvalho de Melo }
1054e6848976SArnaldo Carvalho de Melo 
1055e6848976SArnaldo Carvalho de Melo static inline void sk_refcnt_debug_dec(struct sock *sk)
1056e6848976SArnaldo Carvalho de Melo {
1057e6848976SArnaldo Carvalho de Melo 	atomic_dec(&sk->sk_prot->socks);
1058e6848976SArnaldo Carvalho de Melo 	printk(KERN_DEBUG "%s socket %p released, %d are still alive\n",
1059e6848976SArnaldo Carvalho de Melo 	       sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks));
1060e6848976SArnaldo Carvalho de Melo }
1061e6848976SArnaldo Carvalho de Melo 
1062dec34fb0SYing Xue static inline void sk_refcnt_debug_release(const struct sock *sk)
1063e6848976SArnaldo Carvalho de Melo {
1064e6848976SArnaldo Carvalho de Melo 	if (atomic_read(&sk->sk_refcnt) != 1)
1065e6848976SArnaldo Carvalho de Melo 		printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n",
1066e6848976SArnaldo Carvalho de Melo 		       sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt));
1067e6848976SArnaldo Carvalho de Melo }
1068e6848976SArnaldo Carvalho de Melo #else /* SOCK_REFCNT_DEBUG */
1069e6848976SArnaldo Carvalho de Melo #define sk_refcnt_debug_inc(sk) do { } while (0)
1070e6848976SArnaldo Carvalho de Melo #define sk_refcnt_debug_dec(sk) do { } while (0)
1071e6848976SArnaldo Carvalho de Melo #define sk_refcnt_debug_release(sk) do { } while (0)
1072e6848976SArnaldo Carvalho de Melo #endif /* SOCK_REFCNT_DEBUG */
1073e6848976SArnaldo Carvalho de Melo 
1074c255a458SAndrew Morton #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_NET)
1075c5905afbSIngo Molnar extern struct static_key memcg_socket_limit_enabled;
1076e1aab161SGlauber Costa static inline struct cg_proto *parent_cg_proto(struct proto *proto,
1077e1aab161SGlauber Costa 					       struct cg_proto *cg_proto)
1078e1aab161SGlauber Costa {
1079e1aab161SGlauber Costa 	return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg));
1080e1aab161SGlauber Costa }
1081c5905afbSIngo Molnar #define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled)
1082e1aab161SGlauber Costa #else
1083e1aab161SGlauber Costa #define mem_cgroup_sockets_enabled 0
1084e1aab161SGlauber Costa static inline struct cg_proto *parent_cg_proto(struct proto *proto,
1085e1aab161SGlauber Costa 					       struct cg_proto *cg_proto)
1086e1aab161SGlauber Costa {
1087e1aab161SGlauber Costa 	return NULL;
1088e1aab161SGlauber Costa }
1089e1aab161SGlauber Costa #endif
1090e1aab161SGlauber Costa 
1091e1aab161SGlauber Costa 
1092180d8cd9SGlauber Costa static inline bool sk_has_memory_pressure(const struct sock *sk)
1093180d8cd9SGlauber Costa {
1094180d8cd9SGlauber Costa 	return sk->sk_prot->memory_pressure != NULL;
1095180d8cd9SGlauber Costa }
1096180d8cd9SGlauber Costa 
1097180d8cd9SGlauber Costa static inline bool sk_under_memory_pressure(const struct sock *sk)
1098180d8cd9SGlauber Costa {
1099180d8cd9SGlauber Costa 	if (!sk->sk_prot->memory_pressure)
1100180d8cd9SGlauber Costa 		return false;
1101e1aab161SGlauber Costa 
1102e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1103e1aab161SGlauber Costa 		return !!*sk->sk_cgrp->memory_pressure;
1104e1aab161SGlauber Costa 
1105180d8cd9SGlauber Costa 	return !!*sk->sk_prot->memory_pressure;
1106180d8cd9SGlauber Costa }
1107180d8cd9SGlauber Costa 
1108180d8cd9SGlauber Costa static inline void sk_leave_memory_pressure(struct sock *sk)
1109180d8cd9SGlauber Costa {
1110180d8cd9SGlauber Costa 	int *memory_pressure = sk->sk_prot->memory_pressure;
1111180d8cd9SGlauber Costa 
1112e1aab161SGlauber Costa 	if (!memory_pressure)
1113e1aab161SGlauber Costa 		return;
1114e1aab161SGlauber Costa 
1115e1aab161SGlauber Costa 	if (*memory_pressure)
1116180d8cd9SGlauber Costa 		*memory_pressure = 0;
1117e1aab161SGlauber Costa 
1118e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
1119e1aab161SGlauber Costa 		struct cg_proto *cg_proto = sk->sk_cgrp;
1120e1aab161SGlauber Costa 		struct proto *prot = sk->sk_prot;
1121e1aab161SGlauber Costa 
1122e1aab161SGlauber Costa 		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
1123e1aab161SGlauber Costa 			if (*cg_proto->memory_pressure)
1124e1aab161SGlauber Costa 				*cg_proto->memory_pressure = 0;
1125e1aab161SGlauber Costa 	}
1126e1aab161SGlauber Costa 
1127180d8cd9SGlauber Costa }
1128180d8cd9SGlauber Costa 
1129180d8cd9SGlauber Costa static inline void sk_enter_memory_pressure(struct sock *sk)
1130180d8cd9SGlauber Costa {
1131e1aab161SGlauber Costa 	if (!sk->sk_prot->enter_memory_pressure)
1132e1aab161SGlauber Costa 		return;
1133e1aab161SGlauber Costa 
1134e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
1135e1aab161SGlauber Costa 		struct cg_proto *cg_proto = sk->sk_cgrp;
1136e1aab161SGlauber Costa 		struct proto *prot = sk->sk_prot;
1137e1aab161SGlauber Costa 
1138e1aab161SGlauber Costa 		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
1139e1aab161SGlauber Costa 			cg_proto->enter_memory_pressure(sk);
1140e1aab161SGlauber Costa 	}
1141e1aab161SGlauber Costa 
1142180d8cd9SGlauber Costa 	sk->sk_prot->enter_memory_pressure(sk);
1143180d8cd9SGlauber Costa }
1144180d8cd9SGlauber Costa 
1145180d8cd9SGlauber Costa static inline long sk_prot_mem_limits(const struct sock *sk, int index)
1146180d8cd9SGlauber Costa {
1147180d8cd9SGlauber Costa 	long *prot = sk->sk_prot->sysctl_mem;
1148e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1149e1aab161SGlauber Costa 		prot = sk->sk_cgrp->sysctl_mem;
1150180d8cd9SGlauber Costa 	return prot[index];
1151180d8cd9SGlauber Costa }
1152180d8cd9SGlauber Costa 
1153e1aab161SGlauber Costa static inline void memcg_memory_allocated_add(struct cg_proto *prot,
1154e1aab161SGlauber Costa 					      unsigned long amt,
1155e1aab161SGlauber Costa 					      int *parent_status)
1156e1aab161SGlauber Costa {
1157e1aab161SGlauber Costa 	struct res_counter *fail;
1158e1aab161SGlauber Costa 	int ret;
1159e1aab161SGlauber Costa 
11600e90b31fSGlauber Costa 	ret = res_counter_charge_nofail(prot->memory_allocated,
1161e1aab161SGlauber Costa 					amt << PAGE_SHIFT, &fail);
1162e1aab161SGlauber Costa 	if (ret < 0)
1163e1aab161SGlauber Costa 		*parent_status = OVER_LIMIT;
1164e1aab161SGlauber Costa }
1165e1aab161SGlauber Costa 
1166e1aab161SGlauber Costa static inline void memcg_memory_allocated_sub(struct cg_proto *prot,
1167e1aab161SGlauber Costa 					      unsigned long amt)
1168e1aab161SGlauber Costa {
1169e1aab161SGlauber Costa 	res_counter_uncharge(prot->memory_allocated, amt << PAGE_SHIFT);
1170e1aab161SGlauber Costa }
1171e1aab161SGlauber Costa 
1172e1aab161SGlauber Costa static inline u64 memcg_memory_allocated_read(struct cg_proto *prot)
1173e1aab161SGlauber Costa {
1174e1aab161SGlauber Costa 	u64 ret;
1175e1aab161SGlauber Costa 	ret = res_counter_read_u64(prot->memory_allocated, RES_USAGE);
1176e1aab161SGlauber Costa 	return ret >> PAGE_SHIFT;
1177e1aab161SGlauber Costa }
1178e1aab161SGlauber Costa 
1179180d8cd9SGlauber Costa static inline long
1180180d8cd9SGlauber Costa sk_memory_allocated(const struct sock *sk)
1181180d8cd9SGlauber Costa {
1182180d8cd9SGlauber Costa 	struct proto *prot = sk->sk_prot;
1183e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1184e1aab161SGlauber Costa 		return memcg_memory_allocated_read(sk->sk_cgrp);
1185e1aab161SGlauber Costa 
1186180d8cd9SGlauber Costa 	return atomic_long_read(prot->memory_allocated);
1187180d8cd9SGlauber Costa }
1188180d8cd9SGlauber Costa 
1189180d8cd9SGlauber Costa static inline long
1190e1aab161SGlauber Costa sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status)
1191180d8cd9SGlauber Costa {
1192180d8cd9SGlauber Costa 	struct proto *prot = sk->sk_prot;
1193e1aab161SGlauber Costa 
1194e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
1195e1aab161SGlauber Costa 		memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status);
1196e1aab161SGlauber Costa 		/* update the root cgroup regardless */
1197e1aab161SGlauber Costa 		atomic_long_add_return(amt, prot->memory_allocated);
1198e1aab161SGlauber Costa 		return memcg_memory_allocated_read(sk->sk_cgrp);
1199e1aab161SGlauber Costa 	}
1200e1aab161SGlauber Costa 
1201180d8cd9SGlauber Costa 	return atomic_long_add_return(amt, prot->memory_allocated);
1202180d8cd9SGlauber Costa }
1203180d8cd9SGlauber Costa 
1204180d8cd9SGlauber Costa static inline void
12050e90b31fSGlauber Costa sk_memory_allocated_sub(struct sock *sk, int amt)
1206180d8cd9SGlauber Costa {
1207180d8cd9SGlauber Costa 	struct proto *prot = sk->sk_prot;
1208e1aab161SGlauber Costa 
12090e90b31fSGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1210e1aab161SGlauber Costa 		memcg_memory_allocated_sub(sk->sk_cgrp, amt);
1211e1aab161SGlauber Costa 
1212180d8cd9SGlauber Costa 	atomic_long_sub(amt, prot->memory_allocated);
1213180d8cd9SGlauber Costa }
1214180d8cd9SGlauber Costa 
1215180d8cd9SGlauber Costa static inline void sk_sockets_allocated_dec(struct sock *sk)
1216180d8cd9SGlauber Costa {
1217180d8cd9SGlauber Costa 	struct proto *prot = sk->sk_prot;
1218e1aab161SGlauber Costa 
1219e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
1220e1aab161SGlauber Costa 		struct cg_proto *cg_proto = sk->sk_cgrp;
1221e1aab161SGlauber Costa 
1222e1aab161SGlauber Costa 		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
1223e1aab161SGlauber Costa 			percpu_counter_dec(cg_proto->sockets_allocated);
1224e1aab161SGlauber Costa 	}
1225e1aab161SGlauber Costa 
1226180d8cd9SGlauber Costa 	percpu_counter_dec(prot->sockets_allocated);
1227180d8cd9SGlauber Costa }
1228180d8cd9SGlauber Costa 
1229180d8cd9SGlauber Costa static inline void sk_sockets_allocated_inc(struct sock *sk)
1230180d8cd9SGlauber Costa {
1231180d8cd9SGlauber Costa 	struct proto *prot = sk->sk_prot;
1232e1aab161SGlauber Costa 
1233e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
1234e1aab161SGlauber Costa 		struct cg_proto *cg_proto = sk->sk_cgrp;
1235e1aab161SGlauber Costa 
1236e1aab161SGlauber Costa 		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
1237e1aab161SGlauber Costa 			percpu_counter_inc(cg_proto->sockets_allocated);
1238e1aab161SGlauber Costa 	}
1239e1aab161SGlauber Costa 
1240180d8cd9SGlauber Costa 	percpu_counter_inc(prot->sockets_allocated);
1241180d8cd9SGlauber Costa }
1242180d8cd9SGlauber Costa 
1243180d8cd9SGlauber Costa static inline int
1244180d8cd9SGlauber Costa sk_sockets_allocated_read_positive(struct sock *sk)
1245180d8cd9SGlauber Costa {
1246180d8cd9SGlauber Costa 	struct proto *prot = sk->sk_prot;
1247180d8cd9SGlauber Costa 
1248e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1249518fbf9cSEric Dumazet 		return percpu_counter_read_positive(sk->sk_cgrp->sockets_allocated);
1250e1aab161SGlauber Costa 
1251518fbf9cSEric Dumazet 	return percpu_counter_read_positive(prot->sockets_allocated);
1252180d8cd9SGlauber Costa }
1253180d8cd9SGlauber Costa 
1254180d8cd9SGlauber Costa static inline int
1255180d8cd9SGlauber Costa proto_sockets_allocated_sum_positive(struct proto *prot)
1256180d8cd9SGlauber Costa {
1257180d8cd9SGlauber Costa 	return percpu_counter_sum_positive(prot->sockets_allocated);
1258180d8cd9SGlauber Costa }
1259180d8cd9SGlauber Costa 
1260180d8cd9SGlauber Costa static inline long
1261180d8cd9SGlauber Costa proto_memory_allocated(struct proto *prot)
1262180d8cd9SGlauber Costa {
1263180d8cd9SGlauber Costa 	return atomic_long_read(prot->memory_allocated);
1264180d8cd9SGlauber Costa }
1265180d8cd9SGlauber Costa 
1266180d8cd9SGlauber Costa static inline bool
1267180d8cd9SGlauber Costa proto_memory_pressure(struct proto *prot)
1268180d8cd9SGlauber Costa {
1269180d8cd9SGlauber Costa 	if (!prot->memory_pressure)
1270180d8cd9SGlauber Costa 		return false;
1271180d8cd9SGlauber Costa 	return !!*prot->memory_pressure;
1272180d8cd9SGlauber Costa }
1273180d8cd9SGlauber Costa 
127465f76517SEric Dumazet 
127565f76517SEric Dumazet #ifdef CONFIG_PROC_FS
12761da177e4SLinus Torvalds /* Called with local bh disabled */
1277c29a0bc4SPavel Emelyanov extern void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc);
1278c29a0bc4SPavel Emelyanov extern int sock_prot_inuse_get(struct net *net, struct proto *proto);
127965f76517SEric Dumazet #else
1280dc6b9b78SEric Dumazet static inline void sock_prot_inuse_add(struct net *net, struct proto *prot,
1281c29a0bc4SPavel Emelyanov 		int inc)
128265f76517SEric Dumazet {
128365f76517SEric Dumazet }
128465f76517SEric Dumazet #endif
128565f76517SEric Dumazet 
12861da177e4SLinus Torvalds 
1287614c6cb4SArnaldo Carvalho de Melo /* With per-bucket locks this operation is not-atomic, so that
1288614c6cb4SArnaldo Carvalho de Melo  * this version is not worse.
1289614c6cb4SArnaldo Carvalho de Melo  */
1290614c6cb4SArnaldo Carvalho de Melo static inline void __sk_prot_rehash(struct sock *sk)
1291614c6cb4SArnaldo Carvalho de Melo {
1292614c6cb4SArnaldo Carvalho de Melo 	sk->sk_prot->unhash(sk);
1293614c6cb4SArnaldo Carvalho de Melo 	sk->sk_prot->hash(sk);
1294614c6cb4SArnaldo Carvalho de Melo }
1295614c6cb4SArnaldo Carvalho de Melo 
1296fcbdf09dSOctavian Purdila void sk_prot_clear_portaddr_nulls(struct sock *sk, int size);
1297fcbdf09dSOctavian Purdila 
12981da177e4SLinus Torvalds /* About 10 seconds */
12991da177e4SLinus Torvalds #define SOCK_DESTROY_TIME (10*HZ)
13001da177e4SLinus Torvalds 
13011da177e4SLinus Torvalds /* Sockets 0-1023 can't be bound to unless you are superuser */
13021da177e4SLinus Torvalds #define PROT_SOCK	1024
13031da177e4SLinus Torvalds 
13041da177e4SLinus Torvalds #define SHUTDOWN_MASK	3
13051da177e4SLinus Torvalds #define RCV_SHUTDOWN	1
13061da177e4SLinus Torvalds #define SEND_SHUTDOWN	2
13071da177e4SLinus Torvalds 
13081da177e4SLinus Torvalds #define SOCK_SNDBUF_LOCK	1
13091da177e4SLinus Torvalds #define SOCK_RCVBUF_LOCK	2
13101da177e4SLinus Torvalds #define SOCK_BINDADDR_LOCK	4
13111da177e4SLinus Torvalds #define SOCK_BINDPORT_LOCK	8
13121da177e4SLinus Torvalds 
13131da177e4SLinus Torvalds /* sock_iocb: used to kick off async processing of socket ios */
13141da177e4SLinus Torvalds struct sock_iocb {
13151da177e4SLinus Torvalds 	struct list_head	list;
13161da177e4SLinus Torvalds 
13171da177e4SLinus Torvalds 	int			flags;
13181da177e4SLinus Torvalds 	int			size;
13191da177e4SLinus Torvalds 	struct socket		*sock;
13201da177e4SLinus Torvalds 	struct sock		*sk;
13211da177e4SLinus Torvalds 	struct scm_cookie	*scm;
13221da177e4SLinus Torvalds 	struct msghdr		*msg, async_msg;
13231da177e4SLinus Torvalds 	struct kiocb		*kiocb;
13241da177e4SLinus Torvalds };
13251da177e4SLinus Torvalds 
13261da177e4SLinus Torvalds static inline struct sock_iocb *kiocb_to_siocb(struct kiocb *iocb)
13271da177e4SLinus Torvalds {
13281da177e4SLinus Torvalds 	return (struct sock_iocb *)iocb->private;
13291da177e4SLinus Torvalds }
13301da177e4SLinus Torvalds 
13311da177e4SLinus Torvalds static inline struct kiocb *siocb_to_kiocb(struct sock_iocb *si)
13321da177e4SLinus Torvalds {
13331da177e4SLinus Torvalds 	return si->kiocb;
13341da177e4SLinus Torvalds }
13351da177e4SLinus Torvalds 
13361da177e4SLinus Torvalds struct socket_alloc {
13371da177e4SLinus Torvalds 	struct socket socket;
13381da177e4SLinus Torvalds 	struct inode vfs_inode;
13391da177e4SLinus Torvalds };
13401da177e4SLinus Torvalds 
13411da177e4SLinus Torvalds static inline struct socket *SOCKET_I(struct inode *inode)
13421da177e4SLinus Torvalds {
13431da177e4SLinus Torvalds 	return &container_of(inode, struct socket_alloc, vfs_inode)->socket;
13441da177e4SLinus Torvalds }
13451da177e4SLinus Torvalds 
13461da177e4SLinus Torvalds static inline struct inode *SOCK_INODE(struct socket *socket)
13471da177e4SLinus Torvalds {
13481da177e4SLinus Torvalds 	return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
13491da177e4SLinus Torvalds }
13501da177e4SLinus Torvalds 
13513ab224beSHideo Aoki /*
13523ab224beSHideo Aoki  * Functions for memory accounting
13533ab224beSHideo Aoki  */
13543ab224beSHideo Aoki extern int __sk_mem_schedule(struct sock *sk, int size, int kind);
13553ab224beSHideo Aoki extern void __sk_mem_reclaim(struct sock *sk);
13561da177e4SLinus Torvalds 
13573ab224beSHideo Aoki #define SK_MEM_QUANTUM ((int)PAGE_SIZE)
13583ab224beSHideo Aoki #define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM)
13593ab224beSHideo Aoki #define SK_MEM_SEND	0
13603ab224beSHideo Aoki #define SK_MEM_RECV	1
13611da177e4SLinus Torvalds 
13623ab224beSHideo Aoki static inline int sk_mem_pages(int amt)
13631da177e4SLinus Torvalds {
13643ab224beSHideo Aoki 	return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT;
13651da177e4SLinus Torvalds }
13661da177e4SLinus Torvalds 
1367dc6b9b78SEric Dumazet static inline bool sk_has_account(struct sock *sk)
13681da177e4SLinus Torvalds {
13693ab224beSHideo Aoki 	/* return true if protocol supports memory accounting */
13703ab224beSHideo Aoki 	return !!sk->sk_prot->memory_allocated;
13711da177e4SLinus Torvalds }
13721da177e4SLinus Torvalds 
1373dc6b9b78SEric Dumazet static inline bool sk_wmem_schedule(struct sock *sk, int size)
13741da177e4SLinus Torvalds {
13753ab224beSHideo Aoki 	if (!sk_has_account(sk))
1376dc6b9b78SEric Dumazet 		return true;
1377d80d99d6SHerbert Xu 	return size <= sk->sk_forward_alloc ||
13783ab224beSHideo Aoki 		__sk_mem_schedule(sk, size, SK_MEM_SEND);
13793ab224beSHideo Aoki }
13803ab224beSHideo Aoki 
1381c76562b6SMel Gorman static inline bool
138235c448a8SChuck Lever sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
13833ab224beSHideo Aoki {
13843ab224beSHideo Aoki 	if (!sk_has_account(sk))
1385dc6b9b78SEric Dumazet 		return true;
13863ab224beSHideo Aoki 	return size<= sk->sk_forward_alloc ||
1387c76562b6SMel Gorman 		__sk_mem_schedule(sk, size, SK_MEM_RECV) ||
1388c76562b6SMel Gorman 		skb_pfmemalloc(skb);
13893ab224beSHideo Aoki }
13903ab224beSHideo Aoki 
13913ab224beSHideo Aoki static inline void sk_mem_reclaim(struct sock *sk)
13923ab224beSHideo Aoki {
13933ab224beSHideo Aoki 	if (!sk_has_account(sk))
13943ab224beSHideo Aoki 		return;
13953ab224beSHideo Aoki 	if (sk->sk_forward_alloc >= SK_MEM_QUANTUM)
13963ab224beSHideo Aoki 		__sk_mem_reclaim(sk);
13973ab224beSHideo Aoki }
13983ab224beSHideo Aoki 
13999993e7d3SDavid S. Miller static inline void sk_mem_reclaim_partial(struct sock *sk)
14009993e7d3SDavid S. Miller {
14019993e7d3SDavid S. Miller 	if (!sk_has_account(sk))
14029993e7d3SDavid S. Miller 		return;
14039993e7d3SDavid S. Miller 	if (sk->sk_forward_alloc > SK_MEM_QUANTUM)
14049993e7d3SDavid S. Miller 		__sk_mem_reclaim(sk);
14059993e7d3SDavid S. Miller }
14069993e7d3SDavid S. Miller 
14073ab224beSHideo Aoki static inline void sk_mem_charge(struct sock *sk, int size)
14083ab224beSHideo Aoki {
14093ab224beSHideo Aoki 	if (!sk_has_account(sk))
14103ab224beSHideo Aoki 		return;
14113ab224beSHideo Aoki 	sk->sk_forward_alloc -= size;
14123ab224beSHideo Aoki }
14133ab224beSHideo Aoki 
14143ab224beSHideo Aoki static inline void sk_mem_uncharge(struct sock *sk, int size)
14153ab224beSHideo Aoki {
14163ab224beSHideo Aoki 	if (!sk_has_account(sk))
14173ab224beSHideo Aoki 		return;
14183ab224beSHideo Aoki 	sk->sk_forward_alloc += size;
14193ab224beSHideo Aoki }
14203ab224beSHideo Aoki 
14213ab224beSHideo Aoki static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
14223ab224beSHideo Aoki {
14233ab224beSHideo Aoki 	sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
14243ab224beSHideo Aoki 	sk->sk_wmem_queued -= skb->truesize;
14253ab224beSHideo Aoki 	sk_mem_uncharge(sk, skb->truesize);
14263ab224beSHideo Aoki 	__kfree_skb(skb);
1427d80d99d6SHerbert Xu }
1428d80d99d6SHerbert Xu 
14291da177e4SLinus Torvalds /* Used by processes to "lock" a socket state, so that
14301da177e4SLinus Torvalds  * interrupts and bottom half handlers won't change it
14311da177e4SLinus Torvalds  * from under us. It essentially blocks any incoming
14321da177e4SLinus Torvalds  * packets, so that we won't get any new data or any
14331da177e4SLinus Torvalds  * packets that change the state of the socket.
14341da177e4SLinus Torvalds  *
14351da177e4SLinus Torvalds  * While locked, BH processing will add new packets to
14361da177e4SLinus Torvalds  * the backlog queue.  This queue is processed by the
14371da177e4SLinus Torvalds  * owner of the socket lock right before it is released.
14381da177e4SLinus Torvalds  *
14391da177e4SLinus Torvalds  * Since ~2.3.5 it is also exclusive sleep lock serializing
14401da177e4SLinus Torvalds  * accesses from user process context.
14411da177e4SLinus Torvalds  */
1442d2e9117cSJohn Heffner #define sock_owned_by_user(sk)	((sk)->sk_lock.owned)
14431da177e4SLinus Torvalds 
1444ed07536eSPeter Zijlstra /*
1445ed07536eSPeter Zijlstra  * Macro so as to not evaluate some arguments when
1446ed07536eSPeter Zijlstra  * lockdep is not enabled.
1447ed07536eSPeter Zijlstra  *
1448ed07536eSPeter Zijlstra  * Mark both the sk_lock and the sk_lock.slock as a
1449ed07536eSPeter Zijlstra  * per-address-family lock class.
1450ed07536eSPeter Zijlstra  */
1451ed07536eSPeter Zijlstra #define sock_lock_init_class_and_name(sk, sname, skey, name, key)	\
1452ed07536eSPeter Zijlstra do {									\
1453d2e9117cSJohn Heffner 	sk->sk_lock.owned = 0;						\
1454ed07536eSPeter Zijlstra 	init_waitqueue_head(&sk->sk_lock.wq);				\
1455ed07536eSPeter Zijlstra 	spin_lock_init(&(sk)->sk_lock.slock);				\
1456ed07536eSPeter Zijlstra 	debug_check_no_locks_freed((void *)&(sk)->sk_lock,		\
1457ed07536eSPeter Zijlstra 			sizeof((sk)->sk_lock));				\
1458ed07536eSPeter Zijlstra 	lockdep_set_class_and_name(&(sk)->sk_lock.slock,		\
1459ed07536eSPeter Zijlstra 				(skey), (sname));				\
1460ed07536eSPeter Zijlstra 	lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0);	\
1461ed07536eSPeter Zijlstra } while (0)
1462ed07536eSPeter Zijlstra 
146341380930SHarvey Harrison extern void lock_sock_nested(struct sock *sk, int subclass);
1464fcc70d5fSPeter Zijlstra 
1465fcc70d5fSPeter Zijlstra static inline void lock_sock(struct sock *sk)
1466fcc70d5fSPeter Zijlstra {
1467fcc70d5fSPeter Zijlstra 	lock_sock_nested(sk, 0);
1468fcc70d5fSPeter Zijlstra }
1469fcc70d5fSPeter Zijlstra 
147041380930SHarvey Harrison extern void release_sock(struct sock *sk);
14711da177e4SLinus Torvalds 
14721da177e4SLinus Torvalds /* BH context may only use the following locking interface. */
14731da177e4SLinus Torvalds #define bh_lock_sock(__sk)	spin_lock(&((__sk)->sk_lock.slock))
1474c6366184SIngo Molnar #define bh_lock_sock_nested(__sk) \
1475c6366184SIngo Molnar 				spin_lock_nested(&((__sk)->sk_lock.slock), \
1476c6366184SIngo Molnar 				SINGLE_DEPTH_NESTING)
14771da177e4SLinus Torvalds #define bh_unlock_sock(__sk)	spin_unlock(&((__sk)->sk_lock.slock))
14781da177e4SLinus Torvalds 
14798a74ad60SEric Dumazet extern bool lock_sock_fast(struct sock *sk);
14808a74ad60SEric Dumazet /**
14818a74ad60SEric Dumazet  * unlock_sock_fast - complement of lock_sock_fast
14828a74ad60SEric Dumazet  * @sk: socket
14838a74ad60SEric Dumazet  * @slow: slow mode
14848a74ad60SEric Dumazet  *
14858a74ad60SEric Dumazet  * fast unlock socket for user context.
14868a74ad60SEric Dumazet  * If slow mode is on, we call regular release_sock()
14878a74ad60SEric Dumazet  */
14888a74ad60SEric Dumazet static inline void unlock_sock_fast(struct sock *sk, bool slow)
14894b0b72f7SEric Dumazet {
14908a74ad60SEric Dumazet 	if (slow)
14918a74ad60SEric Dumazet 		release_sock(sk);
14928a74ad60SEric Dumazet 	else
14934b0b72f7SEric Dumazet 		spin_unlock_bh(&sk->sk_lock.slock);
14944b0b72f7SEric Dumazet }
14954b0b72f7SEric Dumazet 
14968a74ad60SEric Dumazet 
14971b8d7ae4SEric W. Biederman extern struct sock		*sk_alloc(struct net *net, int family,
1498dd0fc66fSAl Viro 					  gfp_t priority,
14996257ff21SPavel Emelyanov 					  struct proto *prot);
15001da177e4SLinus Torvalds extern void			sk_free(struct sock *sk);
1501edf02087SDenis V. Lunev extern void			sk_release_kernel(struct sock *sk);
1502e56c57d0SEric Dumazet extern struct sock		*sk_clone_lock(const struct sock *sk,
1503dd0fc66fSAl Viro 					       const gfp_t priority);
15041da177e4SLinus Torvalds 
15051da177e4SLinus Torvalds extern struct sk_buff		*sock_wmalloc(struct sock *sk,
15061da177e4SLinus Torvalds 					      unsigned long size, int force,
1507dd0fc66fSAl Viro 					      gfp_t priority);
15081da177e4SLinus Torvalds extern struct sk_buff		*sock_rmalloc(struct sock *sk,
15091da177e4SLinus Torvalds 					      unsigned long size, int force,
1510dd0fc66fSAl Viro 					      gfp_t priority);
15111da177e4SLinus Torvalds extern void			sock_wfree(struct sk_buff *skb);
15121da177e4SLinus Torvalds extern void			sock_rfree(struct sk_buff *skb);
151341063e9dSDavid S. Miller extern void			sock_edemux(struct sk_buff *skb);
15141da177e4SLinus Torvalds 
15151da177e4SLinus Torvalds extern int			sock_setsockopt(struct socket *sock, int level,
15161da177e4SLinus Torvalds 						int op, char __user *optval,
1517b7058842SDavid S. Miller 						unsigned int optlen);
15181da177e4SLinus Torvalds 
15191da177e4SLinus Torvalds extern int			sock_getsockopt(struct socket *sock, int level,
15201da177e4SLinus Torvalds 						int op, char __user *optval,
15211da177e4SLinus Torvalds 						int __user *optlen);
15221da177e4SLinus Torvalds extern struct sk_buff		*sock_alloc_send_skb(struct sock *sk,
15231da177e4SLinus Torvalds 						     unsigned long size,
15241da177e4SLinus Torvalds 						     int noblock,
15251da177e4SLinus Torvalds 						     int *errcode);
15264cc7f68dSHerbert Xu extern struct sk_buff		*sock_alloc_send_pskb(struct sock *sk,
15274cc7f68dSHerbert Xu 						      unsigned long header_len,
15284cc7f68dSHerbert Xu 						      unsigned long data_len,
15294cc7f68dSHerbert Xu 						      int noblock,
15304cc7f68dSHerbert Xu 						      int *errcode);
153186a76cafSVictor Fusco extern void *sock_kmalloc(struct sock *sk, int size,
1532dd0fc66fSAl Viro 			  gfp_t priority);
15331da177e4SLinus Torvalds extern void sock_kfree_s(struct sock *sk, void *mem, int size);
15341da177e4SLinus Torvalds extern void sk_send_sigurg(struct sock *sk);
15351da177e4SLinus Torvalds 
15361da177e4SLinus Torvalds /*
15371da177e4SLinus Torvalds  * Functions to fill in entries in struct proto_ops when a protocol
15381da177e4SLinus Torvalds  * does not implement a particular function.
15391da177e4SLinus Torvalds  */
15401da177e4SLinus Torvalds extern int                      sock_no_bind(struct socket *,
15411da177e4SLinus Torvalds 					     struct sockaddr *, int);
15421da177e4SLinus Torvalds extern int                      sock_no_connect(struct socket *,
15431da177e4SLinus Torvalds 						struct sockaddr *, int, int);
15441da177e4SLinus Torvalds extern int                      sock_no_socketpair(struct socket *,
15451da177e4SLinus Torvalds 						   struct socket *);
15461da177e4SLinus Torvalds extern int                      sock_no_accept(struct socket *,
15471da177e4SLinus Torvalds 					       struct socket *, int);
15481da177e4SLinus Torvalds extern int                      sock_no_getname(struct socket *,
15491da177e4SLinus Torvalds 						struct sockaddr *, int *, int);
15501da177e4SLinus Torvalds extern unsigned int             sock_no_poll(struct file *, struct socket *,
15511da177e4SLinus Torvalds 					     struct poll_table_struct *);
15521da177e4SLinus Torvalds extern int                      sock_no_ioctl(struct socket *, unsigned int,
15531da177e4SLinus Torvalds 					      unsigned long);
15541da177e4SLinus Torvalds extern int			sock_no_listen(struct socket *, int);
15551da177e4SLinus Torvalds extern int                      sock_no_shutdown(struct socket *, int);
15561da177e4SLinus Torvalds extern int			sock_no_getsockopt(struct socket *, int , int,
15571da177e4SLinus Torvalds 						   char __user *, int __user *);
15581da177e4SLinus Torvalds extern int			sock_no_setsockopt(struct socket *, int, int,
1559b7058842SDavid S. Miller 						   char __user *, unsigned int);
15601da177e4SLinus Torvalds extern int                      sock_no_sendmsg(struct kiocb *, struct socket *,
15611da177e4SLinus Torvalds 						struct msghdr *, size_t);
15621da177e4SLinus Torvalds extern int                      sock_no_recvmsg(struct kiocb *, struct socket *,
15631da177e4SLinus Torvalds 						struct msghdr *, size_t, int);
15641da177e4SLinus Torvalds extern int			sock_no_mmap(struct file *file,
15651da177e4SLinus Torvalds 					     struct socket *sock,
15661da177e4SLinus Torvalds 					     struct vm_area_struct *vma);
15671da177e4SLinus Torvalds extern ssize_t			sock_no_sendpage(struct socket *sock,
15681da177e4SLinus Torvalds 						struct page *page,
15691da177e4SLinus Torvalds 						int offset, size_t size,
15701da177e4SLinus Torvalds 						int flags);
15711da177e4SLinus Torvalds 
15721da177e4SLinus Torvalds /*
15731da177e4SLinus Torvalds  * Functions to fill in entries in struct proto_ops when a protocol
15741da177e4SLinus Torvalds  * uses the inet style.
15751da177e4SLinus Torvalds  */
15761da177e4SLinus Torvalds extern int sock_common_getsockopt(struct socket *sock, int level, int optname,
15771da177e4SLinus Torvalds 				  char __user *optval, int __user *optlen);
15781da177e4SLinus Torvalds extern int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
15791da177e4SLinus Torvalds 			       struct msghdr *msg, size_t size, int flags);
15801da177e4SLinus Torvalds extern int sock_common_setsockopt(struct socket *sock, int level, int optname,
1581b7058842SDavid S. Miller 				  char __user *optval, unsigned int optlen);
15823fdadf7dSDmitry Mishin extern int compat_sock_common_getsockopt(struct socket *sock, int level,
15833fdadf7dSDmitry Mishin 		int optname, char __user *optval, int __user *optlen);
15843fdadf7dSDmitry Mishin extern int compat_sock_common_setsockopt(struct socket *sock, int level,
1585b7058842SDavid S. Miller 		int optname, char __user *optval, unsigned int optlen);
15861da177e4SLinus Torvalds 
15871da177e4SLinus Torvalds extern void sk_common_release(struct sock *sk);
15881da177e4SLinus Torvalds 
15891da177e4SLinus Torvalds /*
15901da177e4SLinus Torvalds  *	Default socket callbacks and setup code
15911da177e4SLinus Torvalds  */
15921da177e4SLinus Torvalds 
15931da177e4SLinus Torvalds /* Initialise core socket variables */
15941da177e4SLinus Torvalds extern void sock_init_data(struct socket *sock, struct sock *sk);
15951da177e4SLinus Torvalds 
159646bcf14fSEric Dumazet extern void sk_filter_release_rcu(struct rcu_head *rcu);
159746bcf14fSEric Dumazet 
15981da177e4SLinus Torvalds /**
15991a5778aaSBen Hutchings  *	sk_filter_release - release a socket filter
1600dc9b3346SPaul Bonser  *	@fp: filter to remove
1601dc9b3346SPaul Bonser  *
1602dc9b3346SPaul Bonser  *	Remove a filter from a socket and release its resources.
1603dc9b3346SPaul Bonser  */
1604dc9b3346SPaul Bonser 
1605309dd5fcSPavel Emelyanov static inline void sk_filter_release(struct sk_filter *fp)
1606309dd5fcSPavel Emelyanov {
1607309dd5fcSPavel Emelyanov 	if (atomic_dec_and_test(&fp->refcnt))
160880f8f102SEric Dumazet 		call_rcu(&fp->rcu, sk_filter_release_rcu);
1609309dd5fcSPavel Emelyanov }
1610309dd5fcSPavel Emelyanov 
1611309dd5fcSPavel Emelyanov static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
16121da177e4SLinus Torvalds {
16131da177e4SLinus Torvalds 	unsigned int size = sk_filter_len(fp);
16141da177e4SLinus Torvalds 
16151da177e4SLinus Torvalds 	atomic_sub(size, &sk->sk_omem_alloc);
1616309dd5fcSPavel Emelyanov 	sk_filter_release(fp);
16171da177e4SLinus Torvalds }
16181da177e4SLinus Torvalds 
16191da177e4SLinus Torvalds static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
16201da177e4SLinus Torvalds {
16211da177e4SLinus Torvalds 	atomic_inc(&fp->refcnt);
16221da177e4SLinus Torvalds 	atomic_add(sk_filter_len(fp), &sk->sk_omem_alloc);
16231da177e4SLinus Torvalds }
16241da177e4SLinus Torvalds 
16251da177e4SLinus Torvalds /*
16261da177e4SLinus Torvalds  * Socket reference counting postulates.
16271da177e4SLinus Torvalds  *
16281da177e4SLinus Torvalds  * * Each user of socket SHOULD hold a reference count.
16291da177e4SLinus Torvalds  * * Each access point to socket (an hash table bucket, reference from a list,
16301da177e4SLinus Torvalds  *   running timer, skb in flight MUST hold a reference count.
16311da177e4SLinus Torvalds  * * When reference count hits 0, it means it will never increase back.
16321da177e4SLinus Torvalds  * * When reference count hits 0, it means that no references from
16331da177e4SLinus Torvalds  *   outside exist to this socket and current process on current CPU
16341da177e4SLinus Torvalds  *   is last user and may/should destroy this socket.
16351da177e4SLinus Torvalds  * * sk_free is called from any context: process, BH, IRQ. When
16361da177e4SLinus Torvalds  *   it is called, socket has no references from outside -> sk_free
16371da177e4SLinus Torvalds  *   may release descendant resources allocated by the socket, but
16381da177e4SLinus Torvalds  *   to the time when it is called, socket is NOT referenced by any
16391da177e4SLinus Torvalds  *   hash tables, lists etc.
16401da177e4SLinus Torvalds  * * Packets, delivered from outside (from network or from another process)
16411da177e4SLinus Torvalds  *   and enqueued on receive/error queues SHOULD NOT grab reference count,
16421da177e4SLinus Torvalds  *   when they sit in queue. Otherwise, packets will leak to hole, when
16431da177e4SLinus Torvalds  *   socket is looked up by one cpu and unhasing is made by another CPU.
16441da177e4SLinus Torvalds  *   It is true for udp/raw, netlink (leak to receive and error queues), tcp
16451da177e4SLinus Torvalds  *   (leak to backlog). Packet socket does all the processing inside
16461da177e4SLinus Torvalds  *   BR_NETPROTO_LOCK, so that it has not this race condition. UNIX sockets
16471da177e4SLinus Torvalds  *   use separate SMP lock, so that they are prone too.
16481da177e4SLinus Torvalds  */
16491da177e4SLinus Torvalds 
16501da177e4SLinus Torvalds /* Ungrab socket and destroy it, if it was the last reference. */
16511da177e4SLinus Torvalds static inline void sock_put(struct sock *sk)
16521da177e4SLinus Torvalds {
16531da177e4SLinus Torvalds 	if (atomic_dec_and_test(&sk->sk_refcnt))
16541da177e4SLinus Torvalds 		sk_free(sk);
16551da177e4SLinus Torvalds }
16561da177e4SLinus Torvalds 
165758a5a7b9SArnaldo Carvalho de Melo extern int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
165858a5a7b9SArnaldo Carvalho de Melo 			  const int nested);
165925995ff5SArnaldo Carvalho de Melo 
1660e022f0b4SKrishna Kumar static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
1661e022f0b4SKrishna Kumar {
1662e022f0b4SKrishna Kumar 	sk->sk_tx_queue_mapping = tx_queue;
1663e022f0b4SKrishna Kumar }
1664e022f0b4SKrishna Kumar 
1665e022f0b4SKrishna Kumar static inline void sk_tx_queue_clear(struct sock *sk)
1666e022f0b4SKrishna Kumar {
1667e022f0b4SKrishna Kumar 	sk->sk_tx_queue_mapping = -1;
1668e022f0b4SKrishna Kumar }
1669e022f0b4SKrishna Kumar 
1670e022f0b4SKrishna Kumar static inline int sk_tx_queue_get(const struct sock *sk)
1671e022f0b4SKrishna Kumar {
1672b0f77d0eSTom Herbert 	return sk ? sk->sk_tx_queue_mapping : -1;
1673e022f0b4SKrishna Kumar }
1674e022f0b4SKrishna Kumar 
1675972692e0SDavid S. Miller static inline void sk_set_socket(struct sock *sk, struct socket *sock)
1676972692e0SDavid S. Miller {
1677e022f0b4SKrishna Kumar 	sk_tx_queue_clear(sk);
1678972692e0SDavid S. Miller 	sk->sk_socket = sock;
1679972692e0SDavid S. Miller }
1680972692e0SDavid S. Miller 
1681aa395145SEric Dumazet static inline wait_queue_head_t *sk_sleep(struct sock *sk)
1682aa395145SEric Dumazet {
1683eaefd110SEric Dumazet 	BUILD_BUG_ON(offsetof(struct socket_wq, wait) != 0);
1684eaefd110SEric Dumazet 	return &rcu_dereference_raw(sk->sk_wq)->wait;
1685aa395145SEric Dumazet }
16861da177e4SLinus Torvalds /* Detach socket from process context.
16871da177e4SLinus Torvalds  * Announce socket dead, detach it from wait queue and inode.
16881da177e4SLinus Torvalds  * Note that parent inode held reference count on this struct sock,
16891da177e4SLinus Torvalds  * we do not release it in this function, because protocol
16901da177e4SLinus Torvalds  * probably wants some additional cleanups or even continuing
16911da177e4SLinus Torvalds  * to work with this socket (TCP).
16921da177e4SLinus Torvalds  */
16931da177e4SLinus Torvalds static inline void sock_orphan(struct sock *sk)
16941da177e4SLinus Torvalds {
16951da177e4SLinus Torvalds 	write_lock_bh(&sk->sk_callback_lock);
16961da177e4SLinus Torvalds 	sock_set_flag(sk, SOCK_DEAD);
1697972692e0SDavid S. Miller 	sk_set_socket(sk, NULL);
169843815482SEric Dumazet 	sk->sk_wq  = NULL;
16991da177e4SLinus Torvalds 	write_unlock_bh(&sk->sk_callback_lock);
17001da177e4SLinus Torvalds }
17011da177e4SLinus Torvalds 
17021da177e4SLinus Torvalds static inline void sock_graft(struct sock *sk, struct socket *parent)
17031da177e4SLinus Torvalds {
17041da177e4SLinus Torvalds 	write_lock_bh(&sk->sk_callback_lock);
1705eaefd110SEric Dumazet 	sk->sk_wq = parent->wq;
17061da177e4SLinus Torvalds 	parent->sk = sk;
1707972692e0SDavid S. Miller 	sk_set_socket(sk, parent);
17084237c75cSVenkat Yekkirala 	security_sock_graft(sk, parent);
17091da177e4SLinus Torvalds 	write_unlock_bh(&sk->sk_callback_lock);
17101da177e4SLinus Torvalds }
17111da177e4SLinus Torvalds 
1712976d0201SEric W. Biederman extern kuid_t sock_i_uid(struct sock *sk);
17131da177e4SLinus Torvalds extern unsigned long sock_i_ino(struct sock *sk);
17141da177e4SLinus Torvalds 
17151da177e4SLinus Torvalds static inline struct dst_entry *
17161da177e4SLinus Torvalds __sk_dst_get(struct sock *sk)
17171da177e4SLinus Torvalds {
1718d8bf4ca9SMichal Hocko 	return rcu_dereference_check(sk->sk_dst_cache, sock_owned_by_user(sk) ||
1719f68c224fSEric Dumazet 						       lockdep_is_held(&sk->sk_lock.slock));
17201da177e4SLinus Torvalds }
17211da177e4SLinus Torvalds 
17221da177e4SLinus Torvalds static inline struct dst_entry *
17231da177e4SLinus Torvalds sk_dst_get(struct sock *sk)
17241da177e4SLinus Torvalds {
17251da177e4SLinus Torvalds 	struct dst_entry *dst;
17261da177e4SLinus Torvalds 
1727b6c6712aSEric Dumazet 	rcu_read_lock();
1728b6c6712aSEric Dumazet 	dst = rcu_dereference(sk->sk_dst_cache);
17291da177e4SLinus Torvalds 	if (dst)
17301da177e4SLinus Torvalds 		dst_hold(dst);
1731b6c6712aSEric Dumazet 	rcu_read_unlock();
17321da177e4SLinus Torvalds 	return dst;
17331da177e4SLinus Torvalds }
17341da177e4SLinus Torvalds 
1735b6c6712aSEric Dumazet extern void sk_reset_txq(struct sock *sk);
1736b6c6712aSEric Dumazet 
1737b6c6712aSEric Dumazet static inline void dst_negative_advice(struct sock *sk)
1738b6c6712aSEric Dumazet {
1739b6c6712aSEric Dumazet 	struct dst_entry *ndst, *dst = __sk_dst_get(sk);
1740b6c6712aSEric Dumazet 
1741b6c6712aSEric Dumazet 	if (dst && dst->ops->negative_advice) {
1742b6c6712aSEric Dumazet 		ndst = dst->ops->negative_advice(dst);
1743b6c6712aSEric Dumazet 
1744b6c6712aSEric Dumazet 		if (ndst != dst) {
1745b6c6712aSEric Dumazet 			rcu_assign_pointer(sk->sk_dst_cache, ndst);
1746b6c6712aSEric Dumazet 			sk_reset_txq(sk);
1747b6c6712aSEric Dumazet 		}
1748b6c6712aSEric Dumazet 	}
1749b6c6712aSEric Dumazet }
1750b6c6712aSEric Dumazet 
17511da177e4SLinus Torvalds static inline void
17521da177e4SLinus Torvalds __sk_dst_set(struct sock *sk, struct dst_entry *dst)
17531da177e4SLinus Torvalds {
17541da177e4SLinus Torvalds 	struct dst_entry *old_dst;
17551da177e4SLinus Torvalds 
1756e022f0b4SKrishna Kumar 	sk_tx_queue_clear(sk);
17570b53ff2eSEric Dumazet 	/*
17580b53ff2eSEric Dumazet 	 * This can be called while sk is owned by the caller only,
17590b53ff2eSEric Dumazet 	 * with no state that can be checked in a rcu_dereference_check() cond
17600b53ff2eSEric Dumazet 	 */
17610b53ff2eSEric Dumazet 	old_dst = rcu_dereference_raw(sk->sk_dst_cache);
1762b6c6712aSEric Dumazet 	rcu_assign_pointer(sk->sk_dst_cache, dst);
17631da177e4SLinus Torvalds 	dst_release(old_dst);
17641da177e4SLinus Torvalds }
17651da177e4SLinus Torvalds 
17661da177e4SLinus Torvalds static inline void
17671da177e4SLinus Torvalds sk_dst_set(struct sock *sk, struct dst_entry *dst)
17681da177e4SLinus Torvalds {
1769b6c6712aSEric Dumazet 	spin_lock(&sk->sk_dst_lock);
17701da177e4SLinus Torvalds 	__sk_dst_set(sk, dst);
1771b6c6712aSEric Dumazet 	spin_unlock(&sk->sk_dst_lock);
17721da177e4SLinus Torvalds }
17731da177e4SLinus Torvalds 
17741da177e4SLinus Torvalds static inline void
17751da177e4SLinus Torvalds __sk_dst_reset(struct sock *sk)
17761da177e4SLinus Torvalds {
1777b6c6712aSEric Dumazet 	__sk_dst_set(sk, NULL);
17781da177e4SLinus Torvalds }
17791da177e4SLinus Torvalds 
17801da177e4SLinus Torvalds static inline void
17811da177e4SLinus Torvalds sk_dst_reset(struct sock *sk)
17821da177e4SLinus Torvalds {
1783b6c6712aSEric Dumazet 	spin_lock(&sk->sk_dst_lock);
17841da177e4SLinus Torvalds 	__sk_dst_reset(sk);
1785b6c6712aSEric Dumazet 	spin_unlock(&sk->sk_dst_lock);
17861da177e4SLinus Torvalds }
17871da177e4SLinus Torvalds 
1788f0088a50SDenis Vlasenko extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie);
17891da177e4SLinus Torvalds 
1790f0088a50SDenis Vlasenko extern struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie);
17911da177e4SLinus Torvalds 
1792dc6b9b78SEric Dumazet static inline bool sk_can_gso(const struct sock *sk)
1793bcd76111SHerbert Xu {
1794bcd76111SHerbert Xu 	return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type);
1795bcd76111SHerbert Xu }
1796bcd76111SHerbert Xu 
17979958089aSAndi Kleen extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst);
17986cbb0df7SArnaldo Carvalho de Melo 
1799c8f44affSMichał Mirosław static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
1800a465419bSEric Dumazet {
1801a465419bSEric Dumazet 	sk->sk_route_nocaps |= flags;
1802a465419bSEric Dumazet 	sk->sk_route_caps &= ~flags;
1803a465419bSEric Dumazet }
1804a465419bSEric Dumazet 
1805c6e1a0d1STom Herbert static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
1806c6e1a0d1STom Herbert 					   char __user *from, char *to,
1807912d398dSWei Yongjun 					   int copy, int offset)
1808c6e1a0d1STom Herbert {
1809c6e1a0d1STom Herbert 	if (skb->ip_summed == CHECKSUM_NONE) {
1810c6e1a0d1STom Herbert 		int err = 0;
1811c6e1a0d1STom Herbert 		__wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err);
1812c6e1a0d1STom Herbert 		if (err)
1813c6e1a0d1STom Herbert 			return err;
1814912d398dSWei Yongjun 		skb->csum = csum_block_add(skb->csum, csum, offset);
1815c6e1a0d1STom Herbert 	} else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
1816c6e1a0d1STom Herbert 		if (!access_ok(VERIFY_READ, from, copy) ||
1817c6e1a0d1STom Herbert 		    __copy_from_user_nocache(to, from, copy))
1818c6e1a0d1STom Herbert 			return -EFAULT;
1819c6e1a0d1STom Herbert 	} else if (copy_from_user(to, from, copy))
1820c6e1a0d1STom Herbert 		return -EFAULT;
1821c6e1a0d1STom Herbert 
1822c6e1a0d1STom Herbert 	return 0;
1823c6e1a0d1STom Herbert }
1824c6e1a0d1STom Herbert 
1825c6e1a0d1STom Herbert static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
1826c6e1a0d1STom Herbert 				       char __user *from, int copy)
1827c6e1a0d1STom Herbert {
1828912d398dSWei Yongjun 	int err, offset = skb->len;
1829c6e1a0d1STom Herbert 
1830912d398dSWei Yongjun 	err = skb_do_copy_data_nocache(sk, skb, from, skb_put(skb, copy),
1831912d398dSWei Yongjun 				       copy, offset);
1832c6e1a0d1STom Herbert 	if (err)
1833912d398dSWei Yongjun 		__skb_trim(skb, offset);
1834c6e1a0d1STom Herbert 
1835c6e1a0d1STom Herbert 	return err;
1836c6e1a0d1STom Herbert }
1837c6e1a0d1STom Herbert 
1838c6e1a0d1STom Herbert static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from,
1839c6e1a0d1STom Herbert 					   struct sk_buff *skb,
1840c6e1a0d1STom Herbert 					   struct page *page,
1841c6e1a0d1STom Herbert 					   int off, int copy)
1842c6e1a0d1STom Herbert {
1843c6e1a0d1STom Herbert 	int err;
1844c6e1a0d1STom Herbert 
1845912d398dSWei Yongjun 	err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) + off,
1846912d398dSWei Yongjun 				       copy, skb->len);
1847c6e1a0d1STom Herbert 	if (err)
1848c6e1a0d1STom Herbert 		return err;
1849c6e1a0d1STom Herbert 
1850c6e1a0d1STom Herbert 	skb->len	     += copy;
1851c6e1a0d1STom Herbert 	skb->data_len	     += copy;
1852c6e1a0d1STom Herbert 	skb->truesize	     += copy;
1853c6e1a0d1STom Herbert 	sk->sk_wmem_queued   += copy;
1854c6e1a0d1STom Herbert 	sk_mem_charge(sk, copy);
1855c6e1a0d1STom Herbert 	return 0;
1856c6e1a0d1STom Herbert }
1857c6e1a0d1STom Herbert 
18581da177e4SLinus Torvalds static inline int skb_copy_to_page(struct sock *sk, char __user *from,
18591da177e4SLinus Torvalds 				   struct sk_buff *skb, struct page *page,
18601da177e4SLinus Torvalds 				   int off, int copy)
18611da177e4SLinus Torvalds {
18621da177e4SLinus Torvalds 	if (skb->ip_summed == CHECKSUM_NONE) {
18631da177e4SLinus Torvalds 		int err = 0;
18645084205fSAl Viro 		__wsum csum = csum_and_copy_from_user(from,
18651da177e4SLinus Torvalds 						     page_address(page) + off,
18661da177e4SLinus Torvalds 							    copy, 0, &err);
18671da177e4SLinus Torvalds 		if (err)
18681da177e4SLinus Torvalds 			return err;
18691da177e4SLinus Torvalds 		skb->csum = csum_block_add(skb->csum, csum, skb->len);
18701da177e4SLinus Torvalds 	} else if (copy_from_user(page_address(page) + off, from, copy))
18711da177e4SLinus Torvalds 		return -EFAULT;
18721da177e4SLinus Torvalds 
18731da177e4SLinus Torvalds 	skb->len	     += copy;
18741da177e4SLinus Torvalds 	skb->data_len	     += copy;
18751da177e4SLinus Torvalds 	skb->truesize	     += copy;
18761da177e4SLinus Torvalds 	sk->sk_wmem_queued   += copy;
18773ab224beSHideo Aoki 	sk_mem_charge(sk, copy);
18781da177e4SLinus Torvalds 	return 0;
18791da177e4SLinus Torvalds }
18801da177e4SLinus Torvalds 
1881c564039fSEric Dumazet /**
1882c564039fSEric Dumazet  * sk_wmem_alloc_get - returns write allocations
1883c564039fSEric Dumazet  * @sk: socket
1884c564039fSEric Dumazet  *
1885c564039fSEric Dumazet  * Returns sk_wmem_alloc minus initial offset of one
1886c564039fSEric Dumazet  */
1887c564039fSEric Dumazet static inline int sk_wmem_alloc_get(const struct sock *sk)
1888c564039fSEric Dumazet {
1889c564039fSEric Dumazet 	return atomic_read(&sk->sk_wmem_alloc) - 1;
1890c564039fSEric Dumazet }
1891c564039fSEric Dumazet 
1892c564039fSEric Dumazet /**
1893c564039fSEric Dumazet  * sk_rmem_alloc_get - returns read allocations
1894c564039fSEric Dumazet  * @sk: socket
1895c564039fSEric Dumazet  *
1896c564039fSEric Dumazet  * Returns sk_rmem_alloc
1897c564039fSEric Dumazet  */
1898c564039fSEric Dumazet static inline int sk_rmem_alloc_get(const struct sock *sk)
1899c564039fSEric Dumazet {
1900c564039fSEric Dumazet 	return atomic_read(&sk->sk_rmem_alloc);
1901c564039fSEric Dumazet }
1902c564039fSEric Dumazet 
1903c564039fSEric Dumazet /**
1904c564039fSEric Dumazet  * sk_has_allocations - check if allocations are outstanding
1905c564039fSEric Dumazet  * @sk: socket
1906c564039fSEric Dumazet  *
1907c564039fSEric Dumazet  * Returns true if socket has write or read allocations
1908c564039fSEric Dumazet  */
1909dc6b9b78SEric Dumazet static inline bool sk_has_allocations(const struct sock *sk)
1910c564039fSEric Dumazet {
1911c564039fSEric Dumazet 	return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk);
1912c564039fSEric Dumazet }
1913c564039fSEric Dumazet 
1914a57de0b4SJiri Olsa /**
191543815482SEric Dumazet  * wq_has_sleeper - check if there are any waiting processes
1916acfbe96aSRandy Dunlap  * @wq: struct socket_wq
1917a57de0b4SJiri Olsa  *
191843815482SEric Dumazet  * Returns true if socket_wq has waiting processes
1919a57de0b4SJiri Olsa  *
192043815482SEric Dumazet  * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory
1921a57de0b4SJiri Olsa  * barrier call. They were added due to the race found within the tcp code.
1922a57de0b4SJiri Olsa  *
1923a57de0b4SJiri Olsa  * Consider following tcp code paths:
1924a57de0b4SJiri Olsa  *
1925a57de0b4SJiri Olsa  * CPU1                  CPU2
1926a57de0b4SJiri Olsa  *
1927a57de0b4SJiri Olsa  * sys_select            receive packet
1928a57de0b4SJiri Olsa  *   ...                 ...
1929a57de0b4SJiri Olsa  *   __add_wait_queue    update tp->rcv_nxt
1930a57de0b4SJiri Olsa  *   ...                 ...
1931a57de0b4SJiri Olsa  *   tp->rcv_nxt check   sock_def_readable
1932a57de0b4SJiri Olsa  *   ...                 {
193343815482SEric Dumazet  *   schedule               rcu_read_lock();
193443815482SEric Dumazet  *                          wq = rcu_dereference(sk->sk_wq);
193543815482SEric Dumazet  *                          if (wq && waitqueue_active(&wq->wait))
193643815482SEric Dumazet  *                              wake_up_interruptible(&wq->wait)
1937a57de0b4SJiri Olsa  *                          ...
1938a57de0b4SJiri Olsa  *                       }
1939a57de0b4SJiri Olsa  *
1940a57de0b4SJiri Olsa  * The race for tcp fires when the __add_wait_queue changes done by CPU1 stay
1941a57de0b4SJiri Olsa  * in its cache, and so does the tp->rcv_nxt update on CPU2 side.  The CPU1
1942a57de0b4SJiri Olsa  * could then endup calling schedule and sleep forever if there are no more
1943a57de0b4SJiri Olsa  * data on the socket.
1944ad462769SJiri Olsa  *
1945a57de0b4SJiri Olsa  */
194643815482SEric Dumazet static inline bool wq_has_sleeper(struct socket_wq *wq)
1947a57de0b4SJiri Olsa {
1948dc6b9b78SEric Dumazet 	/* We need to be sure we are in sync with the
1949a57de0b4SJiri Olsa 	 * add_wait_queue modifications to the wait queue.
1950a57de0b4SJiri Olsa 	 *
1951a57de0b4SJiri Olsa 	 * This memory barrier is paired in the sock_poll_wait.
1952a57de0b4SJiri Olsa 	 */
195343815482SEric Dumazet 	smp_mb();
195443815482SEric Dumazet 	return wq && waitqueue_active(&wq->wait);
1955a57de0b4SJiri Olsa }
1956a57de0b4SJiri Olsa 
1957a57de0b4SJiri Olsa /**
1958a57de0b4SJiri Olsa  * sock_poll_wait - place memory barrier behind the poll_wait call.
1959a57de0b4SJiri Olsa  * @filp:           file
1960a57de0b4SJiri Olsa  * @wait_address:   socket wait queue
1961a57de0b4SJiri Olsa  * @p:              poll_table
1962a57de0b4SJiri Olsa  *
196343815482SEric Dumazet  * See the comments in the wq_has_sleeper function.
1964a57de0b4SJiri Olsa  */
1965a57de0b4SJiri Olsa static inline void sock_poll_wait(struct file *filp,
1966a57de0b4SJiri Olsa 		wait_queue_head_t *wait_address, poll_table *p)
1967a57de0b4SJiri Olsa {
1968626cf236SHans Verkuil 	if (!poll_does_not_wait(p) && wait_address) {
1969a57de0b4SJiri Olsa 		poll_wait(filp, wait_address, p);
1970dc6b9b78SEric Dumazet 		/* We need to be sure we are in sync with the
1971a57de0b4SJiri Olsa 		 * socket flags modification.
1972a57de0b4SJiri Olsa 		 *
197343815482SEric Dumazet 		 * This memory barrier is paired in the wq_has_sleeper.
1974a57de0b4SJiri Olsa 		 */
1975a57de0b4SJiri Olsa 		smp_mb();
1976a57de0b4SJiri Olsa 	}
1977a57de0b4SJiri Olsa }
1978a57de0b4SJiri Olsa 
19791da177e4SLinus Torvalds /*
19801da177e4SLinus Torvalds  *	Queue a received datagram if it will fit. Stream and sequenced
19811da177e4SLinus Torvalds  *	protocols can't normally use this as they need to fit buffers in
19821da177e4SLinus Torvalds  *	and play with them.
19831da177e4SLinus Torvalds  *
19841da177e4SLinus Torvalds  *	Inlined as it's very short and called for pretty much every
19851da177e4SLinus Torvalds  *	packet ever received.
19861da177e4SLinus Torvalds  */
19871da177e4SLinus Torvalds 
19881da177e4SLinus Torvalds static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
19891da177e4SLinus Torvalds {
1990d55d87fdSHerbert Xu 	skb_orphan(skb);
19911da177e4SLinus Torvalds 	skb->sk = sk;
19921da177e4SLinus Torvalds 	skb->destructor = sock_wfree;
19932b85a34eSEric Dumazet 	/*
19942b85a34eSEric Dumazet 	 * We used to take a refcount on sk, but following operation
19952b85a34eSEric Dumazet 	 * is enough to guarantee sk_free() wont free this sock until
19962b85a34eSEric Dumazet 	 * all in-flight packets are completed
19972b85a34eSEric Dumazet 	 */
19981da177e4SLinus Torvalds 	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
19991da177e4SLinus Torvalds }
20001da177e4SLinus Torvalds 
20011da177e4SLinus Torvalds static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
20021da177e4SLinus Torvalds {
2003d55d87fdSHerbert Xu 	skb_orphan(skb);
20041da177e4SLinus Torvalds 	skb->sk = sk;
20051da177e4SLinus Torvalds 	skb->destructor = sock_rfree;
20061da177e4SLinus Torvalds 	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
20073ab224beSHideo Aoki 	sk_mem_charge(sk, skb->truesize);
20081da177e4SLinus Torvalds }
20091da177e4SLinus Torvalds 
20101da177e4SLinus Torvalds extern void sk_reset_timer(struct sock *sk, struct timer_list *timer,
20111da177e4SLinus Torvalds 			   unsigned long expires);
20121da177e4SLinus Torvalds 
20131da177e4SLinus Torvalds extern void sk_stop_timer(struct sock *sk, struct timer_list *timer);
20141da177e4SLinus Torvalds 
2015f0088a50SDenis Vlasenko extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
20161da177e4SLinus Torvalds 
2017b1faf566SEric Dumazet extern int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb);
20181da177e4SLinus Torvalds 
20191da177e4SLinus Torvalds /*
20201da177e4SLinus Torvalds  *	Recover an error report and clear atomically
20211da177e4SLinus Torvalds  */
20221da177e4SLinus Torvalds 
20231da177e4SLinus Torvalds static inline int sock_error(struct sock *sk)
20241da177e4SLinus Torvalds {
2025c1cbe4b7SBenjamin LaHaise 	int err;
2026c1cbe4b7SBenjamin LaHaise 	if (likely(!sk->sk_err))
2027c1cbe4b7SBenjamin LaHaise 		return 0;
2028c1cbe4b7SBenjamin LaHaise 	err = xchg(&sk->sk_err, 0);
20291da177e4SLinus Torvalds 	return -err;
20301da177e4SLinus Torvalds }
20311da177e4SLinus Torvalds 
20321da177e4SLinus Torvalds static inline unsigned long sock_wspace(struct sock *sk)
20331da177e4SLinus Torvalds {
20341da177e4SLinus Torvalds 	int amt = 0;
20351da177e4SLinus Torvalds 
20361da177e4SLinus Torvalds 	if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
20371da177e4SLinus Torvalds 		amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
20381da177e4SLinus Torvalds 		if (amt < 0)
20391da177e4SLinus Torvalds 			amt = 0;
20401da177e4SLinus Torvalds 	}
20411da177e4SLinus Torvalds 	return amt;
20421da177e4SLinus Torvalds }
20431da177e4SLinus Torvalds 
20441da177e4SLinus Torvalds static inline void sk_wake_async(struct sock *sk, int how, int band)
20451da177e4SLinus Torvalds {
2046bcdce719SEric Dumazet 	if (sock_flag(sk, SOCK_FASYNC))
20471da177e4SLinus Torvalds 		sock_wake_async(sk->sk_socket, how, band);
20481da177e4SLinus Torvalds }
20491da177e4SLinus Torvalds 
2050*eea86af6SDaniel Borkmann /* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might
2051*eea86af6SDaniel Borkmann  * need sizeof(sk_buff) + MTU + padding, unless net driver perform copybreak.
2052*eea86af6SDaniel Borkmann  * Note: for send buffers, TCP works better if we can build two skbs at
2053*eea86af6SDaniel Borkmann  * minimum.
20547a91b434SEric Dumazet  */
2055*eea86af6SDaniel Borkmann #define TCP_SKB_MIN_TRUESIZE	(2048 + sizeof(struct sk_buff))
2056*eea86af6SDaniel Borkmann 
2057*eea86af6SDaniel Borkmann #define SOCK_MIN_SNDBUF		(TCP_SKB_MIN_TRUESIZE * 2)
2058*eea86af6SDaniel Borkmann #define SOCK_MIN_RCVBUF		 TCP_SKB_MIN_TRUESIZE
20591da177e4SLinus Torvalds 
20601da177e4SLinus Torvalds static inline void sk_stream_moderate_sndbuf(struct sock *sk)
20611da177e4SLinus Torvalds {
20621da177e4SLinus Torvalds 	if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) {
20638df09ea3SEric Dumazet 		sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
2064*eea86af6SDaniel Borkmann 		sk->sk_sndbuf = max_t(u32, sk->sk_sndbuf, SOCK_MIN_SNDBUF);
20651da177e4SLinus Torvalds 	}
20661da177e4SLinus Torvalds }
20671da177e4SLinus Torvalds 
2068df97c708SPavel Emelyanov struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp);
20691da177e4SLinus Torvalds 
20705640f768SEric Dumazet /**
20715640f768SEric Dumazet  * sk_page_frag - return an appropriate page_frag
20725640f768SEric Dumazet  * @sk: socket
20735640f768SEric Dumazet  *
20745640f768SEric Dumazet  * If socket allocation mode allows current thread to sleep, it means its
20755640f768SEric Dumazet  * safe to use the per task page_frag instead of the per socket one.
20765640f768SEric Dumazet  */
20775640f768SEric Dumazet static inline struct page_frag *sk_page_frag(struct sock *sk)
20781da177e4SLinus Torvalds {
20795640f768SEric Dumazet 	if (sk->sk_allocation & __GFP_WAIT)
20805640f768SEric Dumazet 		return &current->task_frag;
20811da177e4SLinus Torvalds 
20825640f768SEric Dumazet 	return &sk->sk_frag;
20831da177e4SLinus Torvalds }
20845640f768SEric Dumazet 
20855640f768SEric Dumazet extern bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
20861da177e4SLinus Torvalds 
20871da177e4SLinus Torvalds /*
20881da177e4SLinus Torvalds  *	Default write policy as shown to user space via poll/select/SIGIO
20891da177e4SLinus Torvalds  */
2090dc6b9b78SEric Dumazet static inline bool sock_writeable(const struct sock *sk)
20911da177e4SLinus Torvalds {
20928df09ea3SEric Dumazet 	return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
20931da177e4SLinus Torvalds }
20941da177e4SLinus Torvalds 
2095dd0fc66fSAl Viro static inline gfp_t gfp_any(void)
20961da177e4SLinus Torvalds {
209799709372SAndrew Morton 	return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
20981da177e4SLinus Torvalds }
20991da177e4SLinus Torvalds 
2100dc6b9b78SEric Dumazet static inline long sock_rcvtimeo(const struct sock *sk, bool noblock)
21011da177e4SLinus Torvalds {
21021da177e4SLinus Torvalds 	return noblock ? 0 : sk->sk_rcvtimeo;
21031da177e4SLinus Torvalds }
21041da177e4SLinus Torvalds 
2105dc6b9b78SEric Dumazet static inline long sock_sndtimeo(const struct sock *sk, bool noblock)
21061da177e4SLinus Torvalds {
21071da177e4SLinus Torvalds 	return noblock ? 0 : sk->sk_sndtimeo;
21081da177e4SLinus Torvalds }
21091da177e4SLinus Torvalds 
21101da177e4SLinus Torvalds static inline int sock_rcvlowat(const struct sock *sk, int waitall, int len)
21111da177e4SLinus Torvalds {
21121da177e4SLinus Torvalds 	return (waitall ? len : min_t(int, sk->sk_rcvlowat, len)) ? : 1;
21131da177e4SLinus Torvalds }
21141da177e4SLinus Torvalds 
21151da177e4SLinus Torvalds /* Alas, with timeout socket operations are not restartable.
21161da177e4SLinus Torvalds  * Compare this to poll().
21171da177e4SLinus Torvalds  */
21181da177e4SLinus Torvalds static inline int sock_intr_errno(long timeo)
21191da177e4SLinus Torvalds {
21201da177e4SLinus Torvalds 	return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
21211da177e4SLinus Torvalds }
21221da177e4SLinus Torvalds 
212392f37fd2SEric Dumazet extern void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
212492f37fd2SEric Dumazet 	struct sk_buff *skb);
21256e3e939fSJohannes Berg extern void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
21266e3e939fSJohannes Berg 	struct sk_buff *skb);
212792f37fd2SEric Dumazet 
2128dc6b9b78SEric Dumazet static inline void
21291da177e4SLinus Torvalds sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
21301da177e4SLinus Torvalds {
2131b7aa0bf7SEric Dumazet 	ktime_t kt = skb->tstamp;
213220d49473SPatrick Ohly 	struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
2133a61bbcf2SPatrick McHardy 
213420d49473SPatrick Ohly 	/*
213520d49473SPatrick Ohly 	 * generate control messages if
213620d49473SPatrick Ohly 	 * - receive time stamping in software requested (SOCK_RCVTSTAMP
213720d49473SPatrick Ohly 	 *   or SOCK_TIMESTAMPING_RX_SOFTWARE)
213820d49473SPatrick Ohly 	 * - software time stamp available and wanted
213920d49473SPatrick Ohly 	 *   (SOCK_TIMESTAMPING_SOFTWARE)
214020d49473SPatrick Ohly 	 * - hardware time stamps available and wanted
214120d49473SPatrick Ohly 	 *   (SOCK_TIMESTAMPING_SYS_HARDWARE or
214220d49473SPatrick Ohly 	 *   SOCK_TIMESTAMPING_RAW_HARDWARE)
214320d49473SPatrick Ohly 	 */
214420d49473SPatrick Ohly 	if (sock_flag(sk, SOCK_RCVTSTAMP) ||
214520d49473SPatrick Ohly 	    sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE) ||
214620d49473SPatrick Ohly 	    (kt.tv64 && sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) ||
214720d49473SPatrick Ohly 	    (hwtstamps->hwtstamp.tv64 &&
214820d49473SPatrick Ohly 	     sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)) ||
214920d49473SPatrick Ohly 	    (hwtstamps->syststamp.tv64 &&
215020d49473SPatrick Ohly 	     sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE)))
215192f37fd2SEric Dumazet 		__sock_recv_timestamp(msg, sk, skb);
215292f37fd2SEric Dumazet 	else
2153b7aa0bf7SEric Dumazet 		sk->sk_stamp = kt;
21546e3e939fSJohannes Berg 
21556e3e939fSJohannes Berg 	if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid)
21566e3e939fSJohannes Berg 		__sock_recv_wifi_status(msg, sk, skb);
21571da177e4SLinus Torvalds }
21581da177e4SLinus Torvalds 
2159767dd033SEric Dumazet extern void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
2160767dd033SEric Dumazet 				     struct sk_buff *skb);
2161767dd033SEric Dumazet 
2162767dd033SEric Dumazet static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
2163767dd033SEric Dumazet 					  struct sk_buff *skb)
2164767dd033SEric Dumazet {
2165767dd033SEric Dumazet #define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL)			| \
2166767dd033SEric Dumazet 			   (1UL << SOCK_RCVTSTAMP)			| \
2167767dd033SEric Dumazet 			   (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)	| \
2168767dd033SEric Dumazet 			   (1UL << SOCK_TIMESTAMPING_SOFTWARE)		| \
2169767dd033SEric Dumazet 			   (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE)	| \
2170767dd033SEric Dumazet 			   (1UL << SOCK_TIMESTAMPING_SYS_HARDWARE))
2171767dd033SEric Dumazet 
2172767dd033SEric Dumazet 	if (sk->sk_flags & FLAGS_TS_OR_DROPS)
2173767dd033SEric Dumazet 		__sock_recv_ts_and_drops(msg, sk, skb);
2174767dd033SEric Dumazet 	else
2175767dd033SEric Dumazet 		sk->sk_stamp = skb->tstamp;
2176767dd033SEric Dumazet }
21773b885787SNeil Horman 
21781da177e4SLinus Torvalds /**
217920d49473SPatrick Ohly  * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
218020d49473SPatrick Ohly  * @sk:		socket sending this packet
21812244d07bSOliver Hartkopp  * @tx_flags:	filled with instructions for time stamping
218220d49473SPatrick Ohly  *
2183bf84a010SDaniel Borkmann  * Currently only depends on SOCK_TIMESTAMPING* flags.
218420d49473SPatrick Ohly  */
2185bf84a010SDaniel Borkmann extern void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags);
218620d49473SPatrick Ohly 
218720d49473SPatrick Ohly /**
21881da177e4SLinus Torvalds  * sk_eat_skb - Release a skb if it is no longer needed
21894dc3b16bSPavel Pisa  * @sk: socket to eat this skb from
21904dc3b16bSPavel Pisa  * @skb: socket buffer to eat
2191f4b8ea78SRandy Dunlap  * @copied_early: flag indicating whether DMA operations copied this data early
21921da177e4SLinus Torvalds  *
21931da177e4SLinus Torvalds  * This routine must be called with interrupts disabled or with the socket
21941da177e4SLinus Torvalds  * locked so that the sk_buff queue operation is ok.
21951da177e4SLinus Torvalds */
2196624d1164SChris Leech #ifdef CONFIG_NET_DMA
2197dc6b9b78SEric Dumazet static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, bool copied_early)
2198624d1164SChris Leech {
2199624d1164SChris Leech 	__skb_unlink(skb, &sk->sk_receive_queue);
2200624d1164SChris Leech 	if (!copied_early)
2201624d1164SChris Leech 		__kfree_skb(skb);
2202624d1164SChris Leech 	else
2203624d1164SChris Leech 		__skb_queue_tail(&sk->sk_async_wait_queue, skb);
2204624d1164SChris Leech }
2205624d1164SChris Leech #else
2206dc6b9b78SEric Dumazet static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, bool copied_early)
22071da177e4SLinus Torvalds {
22081da177e4SLinus Torvalds 	__skb_unlink(skb, &sk->sk_receive_queue);
22091da177e4SLinus Torvalds 	__kfree_skb(skb);
22101da177e4SLinus Torvalds }
2211624d1164SChris Leech #endif
22121da177e4SLinus Torvalds 
22133b1e0a65SYOSHIFUJI Hideaki static inline
22143b1e0a65SYOSHIFUJI Hideaki struct net *sock_net(const struct sock *sk)
22153b1e0a65SYOSHIFUJI Hideaki {
2216c2d9ba9bSEric Dumazet 	return read_pnet(&sk->sk_net);
22173b1e0a65SYOSHIFUJI Hideaki }
22183b1e0a65SYOSHIFUJI Hideaki 
22193b1e0a65SYOSHIFUJI Hideaki static inline
2220f5aa23fdSDenis V. Lunev void sock_net_set(struct sock *sk, struct net *net)
22213b1e0a65SYOSHIFUJI Hideaki {
2222c2d9ba9bSEric Dumazet 	write_pnet(&sk->sk_net, net);
22233b1e0a65SYOSHIFUJI Hideaki }
22243b1e0a65SYOSHIFUJI Hideaki 
2225edf02087SDenis V. Lunev /*
2226edf02087SDenis V. Lunev  * Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace.
222725985edcSLucas De Marchi  * They should not hold a reference to a namespace in order to allow
2228edf02087SDenis V. Lunev  * to stop it.
2229edf02087SDenis V. Lunev  * Sockets after sk_change_net should be released using sk_release_kernel
2230edf02087SDenis V. Lunev  */
2231edf02087SDenis V. Lunev static inline void sk_change_net(struct sock *sk, struct net *net)
2232edf02087SDenis V. Lunev {
22333b1e0a65SYOSHIFUJI Hideaki 	put_net(sock_net(sk));
223465a18ec5SDenis V. Lunev 	sock_net_set(sk, hold_net(net));
2235edf02087SDenis V. Lunev }
2236edf02087SDenis V. Lunev 
223723542618SKOVACS Krisztian static inline struct sock *skb_steal_sock(struct sk_buff *skb)
223823542618SKOVACS Krisztian {
2239efc27f8cSVijay Subramanian 	if (skb->sk) {
224023542618SKOVACS Krisztian 		struct sock *sk = skb->sk;
224123542618SKOVACS Krisztian 
224223542618SKOVACS Krisztian 		skb->destructor = NULL;
224323542618SKOVACS Krisztian 		skb->sk = NULL;
224423542618SKOVACS Krisztian 		return sk;
224523542618SKOVACS Krisztian 	}
224623542618SKOVACS Krisztian 	return NULL;
224723542618SKOVACS Krisztian }
224823542618SKOVACS Krisztian 
224920d49473SPatrick Ohly extern void sock_enable_timestamp(struct sock *sk, int flag);
22501da177e4SLinus Torvalds extern int sock_get_timestamp(struct sock *, struct timeval __user *);
2251ae40eb1eSEric Dumazet extern int sock_get_timestampns(struct sock *, struct timespec __user *);
22521da177e4SLinus Torvalds 
22531da177e4SLinus Torvalds /*
22541da177e4SLinus Torvalds  *	Enable debug/info messages
22551da177e4SLinus Torvalds  */
2256a2a316fdSStephen Hemminger extern int net_msg_warn;
2257a2a316fdSStephen Hemminger #define NETDEBUG(fmt, args...) \
2258a2a316fdSStephen Hemminger 	do { if (net_msg_warn) printk(fmt,##args); } while (0)
22591da177e4SLinus Torvalds 
2260a2a316fdSStephen Hemminger #define LIMIT_NETDEBUG(fmt, args...) \
2261a2a316fdSStephen Hemminger 	do { if (net_msg_warn && net_ratelimit()) printk(fmt,##args); } while(0)
22621da177e4SLinus Torvalds 
22631da177e4SLinus Torvalds extern __u32 sysctl_wmem_max;
22641da177e4SLinus Torvalds extern __u32 sysctl_rmem_max;
22651da177e4SLinus Torvalds 
22666baf1f41SDavid S. Miller extern int sysctl_optmem_max;
22676baf1f41SDavid S. Miller 
226820380731SArnaldo Carvalho de Melo extern __u32 sysctl_wmem_default;
226920380731SArnaldo Carvalho de Melo extern __u32 sysctl_rmem_default;
227020380731SArnaldo Carvalho de Melo 
22711da177e4SLinus Torvalds #endif	/* _SOCK_H */
2272