xref: /linux/include/net/sock.h (revision efc27f8ceebe5eb147fa31d6c995706d327ad855)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		Definitions for the AF_INET socket handler.
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * Version:	@(#)sock.h	1.0.4	05/13/93
91da177e4SLinus Torvalds  *
1002c30a84SJesper Juhl  * Authors:	Ross Biro
111da177e4SLinus Torvalds  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
121da177e4SLinus Torvalds  *		Corey Minyard <wf-rch!minyard@relay.EU.net>
131da177e4SLinus Torvalds  *		Florian La Roche <flla@stud.uni-sb.de>
141da177e4SLinus Torvalds  *
151da177e4SLinus Torvalds  * Fixes:
161da177e4SLinus Torvalds  *		Alan Cox	:	Volatiles in skbuff pointers. See
171da177e4SLinus Torvalds  *					skbuff comments. May be overdone,
181da177e4SLinus Torvalds  *					better to prove they can be removed
191da177e4SLinus Torvalds  *					than the reverse.
201da177e4SLinus Torvalds  *		Alan Cox	:	Added a zapped field for tcp to note
211da177e4SLinus Torvalds  *					a socket is reset and must stay shut up
221da177e4SLinus Torvalds  *		Alan Cox	:	New fields for options
231da177e4SLinus Torvalds  *	Pauline Middelink	:	identd support
241da177e4SLinus Torvalds  *		Alan Cox	:	Eliminate low level recv/recvfrom
251da177e4SLinus Torvalds  *		David S. Miller	:	New socket lookup architecture.
261da177e4SLinus Torvalds  *              Steve Whitehouse:       Default routines for sock_ops
271da177e4SLinus Torvalds  *              Arnaldo C. Melo :	removed net_pinfo, tp_pinfo and made
281da177e4SLinus Torvalds  *              			protinfo be just a void pointer, as the
291da177e4SLinus Torvalds  *              			protocol specific parts were moved to
301da177e4SLinus Torvalds  *              			respective headers and ipv4/v6, etc now
311da177e4SLinus Torvalds  *              			use private slabcaches for its socks
321da177e4SLinus Torvalds  *              Pedro Hortas	:	New flags field for socket options
331da177e4SLinus Torvalds  *
341da177e4SLinus Torvalds  *
351da177e4SLinus Torvalds  *		This program is free software; you can redistribute it and/or
361da177e4SLinus Torvalds  *		modify it under the terms of the GNU General Public License
371da177e4SLinus Torvalds  *		as published by the Free Software Foundation; either version
381da177e4SLinus Torvalds  *		2 of the License, or (at your option) any later version.
391da177e4SLinus Torvalds  */
401da177e4SLinus Torvalds #ifndef _SOCK_H
411da177e4SLinus Torvalds #define _SOCK_H
421da177e4SLinus Torvalds 
43a6b7a407SAlexey Dobriyan #include <linux/hardirq.h>
44172589ccSIlpo Järvinen #include <linux/kernel.h>
451da177e4SLinus Torvalds #include <linux/list.h>
4688ab1932SEric Dumazet #include <linux/list_nulls.h>
471da177e4SLinus Torvalds #include <linux/timer.h>
481da177e4SLinus Torvalds #include <linux/cache.h>
493f134619SGlauber Costa #include <linux/bitops.h>
50a5b5bb9aSIngo Molnar #include <linux/lockdep.h>
511da177e4SLinus Torvalds #include <linux/netdevice.h>
521da177e4SLinus Torvalds #include <linux/skbuff.h>	/* struct sk_buff */
53d7fe0f24SAl Viro #include <linux/mm.h>
541da177e4SLinus Torvalds #include <linux/security.h>
555a0e3ad6STejun Heo #include <linux/slab.h>
56c6e1a0d1STom Herbert #include <linux/uaccess.h>
57180d8cd9SGlauber Costa #include <linux/memcontrol.h>
58e1aab161SGlauber Costa #include <linux/res_counter.h>
59c5905afbSIngo Molnar #include <linux/static_key.h>
6040401530SAl Viro #include <linux/aio.h>
6140401530SAl Viro #include <linux/sched.h>
621da177e4SLinus Torvalds 
631da177e4SLinus Torvalds #include <linux/filter.h>
6488ab1932SEric Dumazet #include <linux/rculist_nulls.h>
65a57de0b4SJiri Olsa #include <linux/poll.h>
661da177e4SLinus Torvalds 
67c31504dcSEric Dumazet #include <linux/atomic.h>
681da177e4SLinus Torvalds #include <net/dst.h>
691da177e4SLinus Torvalds #include <net/checksum.h>
701da177e4SLinus Torvalds 
719f048bfbSEric Dumazet struct cgroup;
729f048bfbSEric Dumazet struct cgroup_subsys;
73c607b2edSGlauber Costa #ifdef CONFIG_NET
741d62e436SGlauber Costa int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss);
751d62e436SGlauber Costa void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg);
76c607b2edSGlauber Costa #else
77c607b2edSGlauber Costa static inline
781d62e436SGlauber Costa int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
79c607b2edSGlauber Costa {
80c607b2edSGlauber Costa 	return 0;
81c607b2edSGlauber Costa }
82c607b2edSGlauber Costa static inline
831d62e436SGlauber Costa void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
84c607b2edSGlauber Costa {
85c607b2edSGlauber Costa }
86c607b2edSGlauber Costa #endif
871da177e4SLinus Torvalds /*
881da177e4SLinus Torvalds  * This structure really needs to be cleaned up.
891da177e4SLinus Torvalds  * Most of it is for TCP, and not used by any of
901da177e4SLinus Torvalds  * the other protocols.
911da177e4SLinus Torvalds  */
921da177e4SLinus Torvalds 
931da177e4SLinus Torvalds /* Define this to get the SOCK_DBG debugging facility. */
941da177e4SLinus Torvalds #define SOCK_DEBUGGING
951da177e4SLinus Torvalds #ifdef SOCK_DEBUGGING
961da177e4SLinus Torvalds #define SOCK_DEBUG(sk, msg...) do { if ((sk) && sock_flag((sk), SOCK_DBG)) \
971da177e4SLinus Torvalds 					printk(KERN_DEBUG msg); } while (0)
981da177e4SLinus Torvalds #else
994cd9029dSStephen Hemminger /* Validate arguments and do nothing */
100b9075fa9SJoe Perches static inline __printf(2, 3)
101dc6b9b78SEric Dumazet void SOCK_DEBUG(const struct sock *sk, const char *msg, ...)
1024cd9029dSStephen Hemminger {
1034cd9029dSStephen Hemminger }
1041da177e4SLinus Torvalds #endif
1051da177e4SLinus Torvalds 
1061da177e4SLinus Torvalds /* This is the per-socket lock.  The spinlock provides a synchronization
1071da177e4SLinus Torvalds  * between user contexts and software interrupt processing, whereas the
1081da177e4SLinus Torvalds  * mini-semaphore synchronizes multiple users amongst themselves.
1091da177e4SLinus Torvalds  */
1101da177e4SLinus Torvalds typedef struct {
1111da177e4SLinus Torvalds 	spinlock_t		slock;
112d2e9117cSJohn Heffner 	int			owned;
1131da177e4SLinus Torvalds 	wait_queue_head_t	wq;
114a5b5bb9aSIngo Molnar 	/*
115a5b5bb9aSIngo Molnar 	 * We express the mutex-alike socket_lock semantics
116a5b5bb9aSIngo Molnar 	 * to the lock validator by explicitly managing
117a5b5bb9aSIngo Molnar 	 * the slock as a lock variant (in addition to
118a5b5bb9aSIngo Molnar 	 * the slock itself):
119a5b5bb9aSIngo Molnar 	 */
120a5b5bb9aSIngo Molnar #ifdef CONFIG_DEBUG_LOCK_ALLOC
121a5b5bb9aSIngo Molnar 	struct lockdep_map dep_map;
122a5b5bb9aSIngo Molnar #endif
1231da177e4SLinus Torvalds } socket_lock_t;
1241da177e4SLinus Torvalds 
1251da177e4SLinus Torvalds struct sock;
1268feaf0c0SArnaldo Carvalho de Melo struct proto;
1270eeb8ffcSDenis V. Lunev struct net;
1281da177e4SLinus Torvalds 
1291da177e4SLinus Torvalds /**
1301da177e4SLinus Torvalds  *	struct sock_common - minimal network layer representation of sockets
13168835abaSEric Dumazet  *	@skc_daddr: Foreign IPv4 addr
13268835abaSEric Dumazet  *	@skc_rcv_saddr: Bound local IPv4 addr
1334dc6dc71SEric Dumazet  *	@skc_hash: hash value used with various protocol lookup tables
134d4cada4aSEric Dumazet  *	@skc_u16hashes: two u16 hash values used by UDP lookup tables
1354dc3b16bSPavel Pisa  *	@skc_family: network address family
1364dc3b16bSPavel Pisa  *	@skc_state: Connection state
1374dc3b16bSPavel Pisa  *	@skc_reuse: %SO_REUSEADDR setting
1384dc3b16bSPavel Pisa  *	@skc_bound_dev_if: bound device index if != 0
1394dc3b16bSPavel Pisa  *	@skc_bind_node: bind hash linkage for various protocol lookup tables
140512615b6SEric Dumazet  *	@skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
1418feaf0c0SArnaldo Carvalho de Melo  *	@skc_prot: protocol handlers inside a network family
14207feaebfSEric W. Biederman  *	@skc_net: reference to the network namespace of this socket
14368835abaSEric Dumazet  *	@skc_node: main hash linkage for various protocol lookup tables
14468835abaSEric Dumazet  *	@skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
14568835abaSEric Dumazet  *	@skc_tx_queue_mapping: tx queue number for this connection
14668835abaSEric Dumazet  *	@skc_refcnt: reference count
1471da177e4SLinus Torvalds  *
1481da177e4SLinus Torvalds  *	This is the minimal network layer representation of sockets, the header
1498feaf0c0SArnaldo Carvalho de Melo  *	for struct sock and struct inet_timewait_sock.
1501da177e4SLinus Torvalds  */
1511da177e4SLinus Torvalds struct sock_common {
15268835abaSEric Dumazet 	/* skc_daddr and skc_rcv_saddr must be grouped :
15368835abaSEric Dumazet 	 * cf INET_MATCH() and INET_TW_MATCH()
1544dc6dc71SEric Dumazet 	 */
15568835abaSEric Dumazet 	__be32			skc_daddr;
15668835abaSEric Dumazet 	__be32			skc_rcv_saddr;
1574dc6dc71SEric Dumazet 
158d4cada4aSEric Dumazet 	union  {
15981c3d547SEric Dumazet 		unsigned int	skc_hash;
160d4cada4aSEric Dumazet 		__u16		skc_u16hashes[2];
161d4cada4aSEric Dumazet 	};
1624dc6dc71SEric Dumazet 	unsigned short		skc_family;
1634dc6dc71SEric Dumazet 	volatile unsigned char	skc_state;
1644dc6dc71SEric Dumazet 	unsigned char		skc_reuse;
1654dc6dc71SEric Dumazet 	int			skc_bound_dev_if;
166512615b6SEric Dumazet 	union {
1674dc6dc71SEric Dumazet 		struct hlist_node	skc_bind_node;
168512615b6SEric Dumazet 		struct hlist_nulls_node skc_portaddr_node;
169512615b6SEric Dumazet 	};
1708feaf0c0SArnaldo Carvalho de Melo 	struct proto		*skc_prot;
1713b1e0a65SYOSHIFUJI Hideaki #ifdef CONFIG_NET_NS
17207feaebfSEric W. Biederman 	struct net	 	*skc_net;
1733b1e0a65SYOSHIFUJI Hideaki #endif
17468835abaSEric Dumazet 	/*
17568835abaSEric Dumazet 	 * fields between dontcopy_begin/dontcopy_end
17668835abaSEric Dumazet 	 * are not copied in sock_copy()
17768835abaSEric Dumazet 	 */
178928c41e7SRandy Dunlap 	/* private: */
17968835abaSEric Dumazet 	int			skc_dontcopy_begin[0];
180928c41e7SRandy Dunlap 	/* public: */
18168835abaSEric Dumazet 	union {
18268835abaSEric Dumazet 		struct hlist_node	skc_node;
18368835abaSEric Dumazet 		struct hlist_nulls_node skc_nulls_node;
18468835abaSEric Dumazet 	};
18568835abaSEric Dumazet 	int			skc_tx_queue_mapping;
18668835abaSEric Dumazet 	atomic_t		skc_refcnt;
187928c41e7SRandy Dunlap 	/* private: */
18868835abaSEric Dumazet 	int                     skc_dontcopy_end[0];
189928c41e7SRandy Dunlap 	/* public: */
1901da177e4SLinus Torvalds };
1911da177e4SLinus Torvalds 
192e1aab161SGlauber Costa struct cg_proto;
1931da177e4SLinus Torvalds /**
1941da177e4SLinus Torvalds   *	struct sock - network layer representation of sockets
1958feaf0c0SArnaldo Carvalho de Melo   *	@__sk_common: shared layout with inet_timewait_sock
1964dc3b16bSPavel Pisa   *	@sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
1974dc3b16bSPavel Pisa   *	@sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
1984dc3b16bSPavel Pisa   *	@sk_lock:	synchronizer
1994dc3b16bSPavel Pisa   *	@sk_rcvbuf: size of receive buffer in bytes
20043815482SEric Dumazet   *	@sk_wq: sock wait queue and async head
2014dc3b16bSPavel Pisa   *	@sk_dst_cache: destination cache
2024dc3b16bSPavel Pisa   *	@sk_dst_lock: destination cache lock
2034dc3b16bSPavel Pisa   *	@sk_policy: flow policy
2044dc3b16bSPavel Pisa   *	@sk_receive_queue: incoming packets
2054dc3b16bSPavel Pisa   *	@sk_wmem_alloc: transmit queue bytes committed
2064dc3b16bSPavel Pisa   *	@sk_write_queue: Packet sending queue
20797fc2f08SChris Leech   *	@sk_async_wait_queue: DMA copied packets
2084dc3b16bSPavel Pisa   *	@sk_omem_alloc: "o" is "option" or "other"
2094dc3b16bSPavel Pisa   *	@sk_wmem_queued: persistent queue size
2104dc3b16bSPavel Pisa   *	@sk_forward_alloc: space allocated forward
2114dc3b16bSPavel Pisa   *	@sk_allocation: allocation mode
2124dc3b16bSPavel Pisa   *	@sk_sndbuf: size of send buffer in bytes
21333c732c3SWang Chen   *	@sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
21420d49473SPatrick Ohly   *		   %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
2154dc3b16bSPavel Pisa   *	@sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets
2164dc3b16bSPavel Pisa   *	@sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
217a465419bSEric Dumazet   *	@sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
218bcd76111SHerbert Xu   *	@sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
21982cc1a7aSPeter P Waskiewicz Jr   *	@sk_gso_max_size: Maximum GSO segment size to build
2204dc3b16bSPavel Pisa   *	@sk_lingertime: %SO_LINGER l_linger setting
2214dc3b16bSPavel Pisa   *	@sk_backlog: always used with the per-socket spinlock held
2224dc3b16bSPavel Pisa   *	@sk_callback_lock: used with the callbacks in the end of this struct
2234dc3b16bSPavel Pisa   *	@sk_error_queue: rarely used
22433c732c3SWang Chen   *	@sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt,
22533c732c3SWang Chen   *			  IPV6_ADDRFORM for instance)
2264dc3b16bSPavel Pisa   *	@sk_err: last error
22733c732c3SWang Chen   *	@sk_err_soft: errors that don't cause failure but are the cause of a
22833c732c3SWang Chen   *		      persistent failure not just 'timed out'
229cb61cb9bSEric Dumazet   *	@sk_drops: raw/udp drops counter
2304dc3b16bSPavel Pisa   *	@sk_ack_backlog: current listen backlog
2314dc3b16bSPavel Pisa   *	@sk_max_ack_backlog: listen backlog set in listen()
2324dc3b16bSPavel Pisa   *	@sk_priority: %SO_PRIORITY setting
2331a3bc369SRandy Dunlap   *	@sk_cgrp_prioidx: socket group's priority map index
2344dc3b16bSPavel Pisa   *	@sk_type: socket type (%SOCK_STREAM, etc)
2354dc3b16bSPavel Pisa   *	@sk_protocol: which protocol this socket belongs in this network family
23653c3fa20SRandy Dunlap   *	@sk_peer_pid: &struct pid for this socket's peer
23753c3fa20SRandy Dunlap   *	@sk_peer_cred: %SO_PEERCRED setting
2384dc3b16bSPavel Pisa   *	@sk_rcvlowat: %SO_RCVLOWAT setting
2394dc3b16bSPavel Pisa   *	@sk_rcvtimeo: %SO_RCVTIMEO setting
2404dc3b16bSPavel Pisa   *	@sk_sndtimeo: %SO_SNDTIMEO setting
241c58dc01bSDavid S. Miller   *	@sk_rxhash: flow hash received from netif layer
2424dc3b16bSPavel Pisa   *	@sk_filter: socket filtering instructions
2434dc3b16bSPavel Pisa   *	@sk_protinfo: private area, net family specific, when not using slab
2444dc3b16bSPavel Pisa   *	@sk_timer: sock cleanup timer
2454dc3b16bSPavel Pisa   *	@sk_stamp: time stamp of last packet received
2464dc3b16bSPavel Pisa   *	@sk_socket: Identd and reporting IO signals
2474dc3b16bSPavel Pisa   *	@sk_user_data: RPC layer private data
2484dc3b16bSPavel Pisa   *	@sk_sndmsg_page: cached page for sendmsg
2494dc3b16bSPavel Pisa   *	@sk_sndmsg_off: cached offset for sendmsg
250d3d4f0a0SRandy Dunlap   *	@sk_peek_off: current peek_offset value
2514dc3b16bSPavel Pisa   *	@sk_send_head: front of stuff to transmit
25267be2dd1SMartin Waitz   *	@sk_security: used by security modules
25331729363SRandy Dunlap   *	@sk_mark: generic packet mark
25453c3fa20SRandy Dunlap   *	@sk_classid: this socket's cgroup classid
255e1aab161SGlauber Costa   *	@sk_cgrp: this socket's cgroup-specific proto data
2564dc3b16bSPavel Pisa   *	@sk_write_pending: a write to stream socket waits to start
2574dc3b16bSPavel Pisa   *	@sk_state_change: callback to indicate change in the state of the sock
2584dc3b16bSPavel Pisa   *	@sk_data_ready: callback to indicate there is data to be processed
2594dc3b16bSPavel Pisa   *	@sk_write_space: callback to indicate there is bf sending space available
2604dc3b16bSPavel Pisa   *	@sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE)
2614dc3b16bSPavel Pisa   *	@sk_backlog_rcv: callback to process the backlog
2624dc3b16bSPavel Pisa   *	@sk_destruct: called at sock freeing time, i.e. when all refcnt == 0
2631da177e4SLinus Torvalds  */
2641da177e4SLinus Torvalds struct sock {
2651da177e4SLinus Torvalds 	/*
2668feaf0c0SArnaldo Carvalho de Melo 	 * Now struct inet_timewait_sock also uses sock_common, so please just
2671da177e4SLinus Torvalds 	 * don't add nothing before this first member (__sk_common) --acme
2681da177e4SLinus Torvalds 	 */
2691da177e4SLinus Torvalds 	struct sock_common	__sk_common;
2704dc6dc71SEric Dumazet #define sk_node			__sk_common.skc_node
2714dc6dc71SEric Dumazet #define sk_nulls_node		__sk_common.skc_nulls_node
2724dc6dc71SEric Dumazet #define sk_refcnt		__sk_common.skc_refcnt
273e022f0b4SKrishna Kumar #define sk_tx_queue_mapping	__sk_common.skc_tx_queue_mapping
2744dc6dc71SEric Dumazet 
27568835abaSEric Dumazet #define sk_dontcopy_begin	__sk_common.skc_dontcopy_begin
27668835abaSEric Dumazet #define sk_dontcopy_end		__sk_common.skc_dontcopy_end
2774dc6dc71SEric Dumazet #define sk_hash			__sk_common.skc_hash
2781da177e4SLinus Torvalds #define sk_family		__sk_common.skc_family
2791da177e4SLinus Torvalds #define sk_state		__sk_common.skc_state
2801da177e4SLinus Torvalds #define sk_reuse		__sk_common.skc_reuse
2811da177e4SLinus Torvalds #define sk_bound_dev_if		__sk_common.skc_bound_dev_if
2821da177e4SLinus Torvalds #define sk_bind_node		__sk_common.skc_bind_node
2838feaf0c0SArnaldo Carvalho de Melo #define sk_prot			__sk_common.skc_prot
28407feaebfSEric W. Biederman #define sk_net			__sk_common.skc_net
285b178bb3dSEric Dumazet 	socket_lock_t		sk_lock;
286b178bb3dSEric Dumazet 	struct sk_buff_head	sk_receive_queue;
287b178bb3dSEric Dumazet 	/*
288b178bb3dSEric Dumazet 	 * The backlog queue is special, it is always used with
289b178bb3dSEric Dumazet 	 * the per-socket spinlock held and requires low latency
290b178bb3dSEric Dumazet 	 * access. Therefore we special case it's implementation.
291b178bb3dSEric Dumazet 	 * Note : rmem_alloc is in this structure to fill a hole
292b178bb3dSEric Dumazet 	 * on 64bit arches, not because its logically part of
293b178bb3dSEric Dumazet 	 * backlog.
294b178bb3dSEric Dumazet 	 */
295b178bb3dSEric Dumazet 	struct {
296b178bb3dSEric Dumazet 		atomic_t	rmem_alloc;
297b178bb3dSEric Dumazet 		int		len;
298b178bb3dSEric Dumazet 		struct sk_buff	*head;
299b178bb3dSEric Dumazet 		struct sk_buff	*tail;
300b178bb3dSEric Dumazet 	} sk_backlog;
301b178bb3dSEric Dumazet #define sk_rmem_alloc sk_backlog.rmem_alloc
302b178bb3dSEric Dumazet 	int			sk_forward_alloc;
303b178bb3dSEric Dumazet #ifdef CONFIG_RPS
304b178bb3dSEric Dumazet 	__u32			sk_rxhash;
305b178bb3dSEric Dumazet #endif
306b178bb3dSEric Dumazet 	atomic_t		sk_drops;
307b178bb3dSEric Dumazet 	int			sk_rcvbuf;
308b178bb3dSEric Dumazet 
309b178bb3dSEric Dumazet 	struct sk_filter __rcu	*sk_filter;
310eaefd110SEric Dumazet 	struct socket_wq __rcu	*sk_wq;
311b178bb3dSEric Dumazet 
312b178bb3dSEric Dumazet #ifdef CONFIG_NET_DMA
313b178bb3dSEric Dumazet 	struct sk_buff_head	sk_async_wait_queue;
314b178bb3dSEric Dumazet #endif
315b178bb3dSEric Dumazet 
316b178bb3dSEric Dumazet #ifdef CONFIG_XFRM
317b178bb3dSEric Dumazet 	struct xfrm_policy	*sk_policy[2];
318b178bb3dSEric Dumazet #endif
319b178bb3dSEric Dumazet 	unsigned long 		sk_flags;
320b178bb3dSEric Dumazet 	struct dst_entry	*sk_dst_cache;
321b178bb3dSEric Dumazet 	spinlock_t		sk_dst_lock;
32241063e9dSDavid S. Miller 	struct dst_entry	*sk_rx_dst;
323b178bb3dSEric Dumazet 	atomic_t		sk_wmem_alloc;
324b178bb3dSEric Dumazet 	atomic_t		sk_omem_alloc;
325b178bb3dSEric Dumazet 	int			sk_sndbuf;
326b178bb3dSEric Dumazet 	struct sk_buff_head	sk_write_queue;
327a98b65a3SVegard Nossum 	kmemcheck_bitfield_begin(flags);
3285fdb9973SEric Dumazet 	unsigned int		sk_shutdown  : 2,
3291da177e4SLinus Torvalds 				sk_no_check  : 2,
3305fdb9973SEric Dumazet 				sk_userlocks : 4,
3315fdb9973SEric Dumazet 				sk_protocol  : 8,
3325fdb9973SEric Dumazet 				sk_type      : 16;
333a98b65a3SVegard Nossum 	kmemcheck_bitfield_end(flags);
3341da177e4SLinus Torvalds 	int			sk_wmem_queued;
3357d877f3bSAl Viro 	gfp_t			sk_allocation;
336c8f44affSMichał Mirosław 	netdev_features_t	sk_route_caps;
337c8f44affSMichał Mirosław 	netdev_features_t	sk_route_nocaps;
338bcd76111SHerbert Xu 	int			sk_gso_type;
33982cc1a7aSPeter P Waskiewicz Jr 	unsigned int		sk_gso_max_size;
3409932cf95SDavid S. Miller 	int			sk_rcvlowat;
3411da177e4SLinus Torvalds 	unsigned long	        sk_lingertime;
3421da177e4SLinus Torvalds 	struct sk_buff_head	sk_error_queue;
343476e19cfSArnaldo Carvalho de Melo 	struct proto		*sk_prot_creator;
3441da177e4SLinus Torvalds 	rwlock_t		sk_callback_lock;
3451da177e4SLinus Torvalds 	int			sk_err,
3461da177e4SLinus Torvalds 				sk_err_soft;
3471da177e4SLinus Torvalds 	unsigned short		sk_ack_backlog;
3481da177e4SLinus Torvalds 	unsigned short		sk_max_ack_backlog;
3491da177e4SLinus Torvalds 	__u32			sk_priority;
3505bc1421eSNeil Horman #ifdef CONFIG_CGROUPS
3515bc1421eSNeil Horman 	__u32			sk_cgrp_prioidx;
3525bc1421eSNeil Horman #endif
353109f6e39SEric W. Biederman 	struct pid		*sk_peer_pid;
354109f6e39SEric W. Biederman 	const struct cred	*sk_peer_cred;
3551da177e4SLinus Torvalds 	long			sk_rcvtimeo;
3561da177e4SLinus Torvalds 	long			sk_sndtimeo;
3571da177e4SLinus Torvalds 	void			*sk_protinfo;
3581da177e4SLinus Torvalds 	struct timer_list	sk_timer;
359b7aa0bf7SEric Dumazet 	ktime_t			sk_stamp;
3601da177e4SLinus Torvalds 	struct socket		*sk_socket;
3611da177e4SLinus Torvalds 	void			*sk_user_data;
3621da177e4SLinus Torvalds 	struct page		*sk_sndmsg_page;
3631da177e4SLinus Torvalds 	struct sk_buff		*sk_send_head;
3641da177e4SLinus Torvalds 	__u32			sk_sndmsg_off;
365ef64a54fSPavel Emelyanov 	__s32			sk_peek_off;
3661da177e4SLinus Torvalds 	int			sk_write_pending;
367d5f64238SAlexey Dobriyan #ifdef CONFIG_SECURITY
3681da177e4SLinus Torvalds 	void			*sk_security;
369d5f64238SAlexey Dobriyan #endif
3704a19ec58SLaszlo Attila Toth 	__u32			sk_mark;
371f8451725SHerbert Xu 	u32			sk_classid;
372e1aab161SGlauber Costa 	struct cg_proto		*sk_cgrp;
3731da177e4SLinus Torvalds 	void			(*sk_state_change)(struct sock *sk);
3741da177e4SLinus Torvalds 	void			(*sk_data_ready)(struct sock *sk, int bytes);
3751da177e4SLinus Torvalds 	void			(*sk_write_space)(struct sock *sk);
3761da177e4SLinus Torvalds 	void			(*sk_error_report)(struct sock *sk);
3771da177e4SLinus Torvalds 	int			(*sk_backlog_rcv)(struct sock *sk,
3781da177e4SLinus Torvalds 						  struct sk_buff *skb);
3791da177e4SLinus Torvalds 	void                    (*sk_destruct)(struct sock *sk);
3801da177e4SLinus Torvalds };
3811da177e4SLinus Torvalds 
3824a17fd52SPavel Emelyanov /*
3834a17fd52SPavel Emelyanov  * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK
3844a17fd52SPavel Emelyanov  * or not whether his port will be reused by someone else. SK_FORCE_REUSE
3854a17fd52SPavel Emelyanov  * on a socket means that the socket will reuse everybody else's port
3864a17fd52SPavel Emelyanov  * without looking at the other's sk_reuse value.
3874a17fd52SPavel Emelyanov  */
3884a17fd52SPavel Emelyanov 
3894a17fd52SPavel Emelyanov #define SK_NO_REUSE	0
3904a17fd52SPavel Emelyanov #define SK_CAN_REUSE	1
3914a17fd52SPavel Emelyanov #define SK_FORCE_REUSE	2
3924a17fd52SPavel Emelyanov 
393ef64a54fSPavel Emelyanov static inline int sk_peek_offset(struct sock *sk, int flags)
394ef64a54fSPavel Emelyanov {
395ef64a54fSPavel Emelyanov 	if ((flags & MSG_PEEK) && (sk->sk_peek_off >= 0))
396ef64a54fSPavel Emelyanov 		return sk->sk_peek_off;
397ef64a54fSPavel Emelyanov 	else
398ef64a54fSPavel Emelyanov 		return 0;
399ef64a54fSPavel Emelyanov }
400ef64a54fSPavel Emelyanov 
401ef64a54fSPavel Emelyanov static inline void sk_peek_offset_bwd(struct sock *sk, int val)
402ef64a54fSPavel Emelyanov {
403ef64a54fSPavel Emelyanov 	if (sk->sk_peek_off >= 0) {
404ef64a54fSPavel Emelyanov 		if (sk->sk_peek_off >= val)
405ef64a54fSPavel Emelyanov 			sk->sk_peek_off -= val;
406ef64a54fSPavel Emelyanov 		else
407ef64a54fSPavel Emelyanov 			sk->sk_peek_off = 0;
408ef64a54fSPavel Emelyanov 	}
409ef64a54fSPavel Emelyanov }
410ef64a54fSPavel Emelyanov 
411ef64a54fSPavel Emelyanov static inline void sk_peek_offset_fwd(struct sock *sk, int val)
412ef64a54fSPavel Emelyanov {
413ef64a54fSPavel Emelyanov 	if (sk->sk_peek_off >= 0)
414ef64a54fSPavel Emelyanov 		sk->sk_peek_off += val;
415ef64a54fSPavel Emelyanov }
416ef64a54fSPavel Emelyanov 
4171da177e4SLinus Torvalds /*
4181da177e4SLinus Torvalds  * Hashed lists helper routines
4191da177e4SLinus Torvalds  */
420c4146644SLi Zefan static inline struct sock *sk_entry(const struct hlist_node *node)
421c4146644SLi Zefan {
422c4146644SLi Zefan 	return hlist_entry(node, struct sock, sk_node);
423c4146644SLi Zefan }
424c4146644SLi Zefan 
425e48c414eSArnaldo Carvalho de Melo static inline struct sock *__sk_head(const struct hlist_head *head)
4261da177e4SLinus Torvalds {
4271da177e4SLinus Torvalds 	return hlist_entry(head->first, struct sock, sk_node);
4281da177e4SLinus Torvalds }
4291da177e4SLinus Torvalds 
430e48c414eSArnaldo Carvalho de Melo static inline struct sock *sk_head(const struct hlist_head *head)
4311da177e4SLinus Torvalds {
4321da177e4SLinus Torvalds 	return hlist_empty(head) ? NULL : __sk_head(head);
4331da177e4SLinus Torvalds }
4341da177e4SLinus Torvalds 
43588ab1932SEric Dumazet static inline struct sock *__sk_nulls_head(const struct hlist_nulls_head *head)
43688ab1932SEric Dumazet {
43788ab1932SEric Dumazet 	return hlist_nulls_entry(head->first, struct sock, sk_nulls_node);
43888ab1932SEric Dumazet }
43988ab1932SEric Dumazet 
44088ab1932SEric Dumazet static inline struct sock *sk_nulls_head(const struct hlist_nulls_head *head)
44188ab1932SEric Dumazet {
44288ab1932SEric Dumazet 	return hlist_nulls_empty(head) ? NULL : __sk_nulls_head(head);
44388ab1932SEric Dumazet }
44488ab1932SEric Dumazet 
445e48c414eSArnaldo Carvalho de Melo static inline struct sock *sk_next(const struct sock *sk)
4461da177e4SLinus Torvalds {
4471da177e4SLinus Torvalds 	return sk->sk_node.next ?
4481da177e4SLinus Torvalds 		hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL;
4491da177e4SLinus Torvalds }
4501da177e4SLinus Torvalds 
45188ab1932SEric Dumazet static inline struct sock *sk_nulls_next(const struct sock *sk)
45288ab1932SEric Dumazet {
45388ab1932SEric Dumazet 	return (!is_a_nulls(sk->sk_nulls_node.next)) ?
45488ab1932SEric Dumazet 		hlist_nulls_entry(sk->sk_nulls_node.next,
45588ab1932SEric Dumazet 				  struct sock, sk_nulls_node) :
45688ab1932SEric Dumazet 		NULL;
45788ab1932SEric Dumazet }
45888ab1932SEric Dumazet 
459dc6b9b78SEric Dumazet static inline bool sk_unhashed(const struct sock *sk)
4601da177e4SLinus Torvalds {
4611da177e4SLinus Torvalds 	return hlist_unhashed(&sk->sk_node);
4621da177e4SLinus Torvalds }
4631da177e4SLinus Torvalds 
464dc6b9b78SEric Dumazet static inline bool sk_hashed(const struct sock *sk)
4651da177e4SLinus Torvalds {
466da753beaSAkinobu Mita 	return !sk_unhashed(sk);
4671da177e4SLinus Torvalds }
4681da177e4SLinus Torvalds 
469dc6b9b78SEric Dumazet static inline void sk_node_init(struct hlist_node *node)
4701da177e4SLinus Torvalds {
4711da177e4SLinus Torvalds 	node->pprev = NULL;
4721da177e4SLinus Torvalds }
4731da177e4SLinus Torvalds 
474dc6b9b78SEric Dumazet static inline void sk_nulls_node_init(struct hlist_nulls_node *node)
47588ab1932SEric Dumazet {
47688ab1932SEric Dumazet 	node->pprev = NULL;
47788ab1932SEric Dumazet }
47888ab1932SEric Dumazet 
479dc6b9b78SEric Dumazet static inline void __sk_del_node(struct sock *sk)
4801da177e4SLinus Torvalds {
4811da177e4SLinus Torvalds 	__hlist_del(&sk->sk_node);
4821da177e4SLinus Torvalds }
4831da177e4SLinus Torvalds 
484808f5114Sstephen hemminger /* NB: equivalent to hlist_del_init_rcu */
485dc6b9b78SEric Dumazet static inline bool __sk_del_node_init(struct sock *sk)
4861da177e4SLinus Torvalds {
4871da177e4SLinus Torvalds 	if (sk_hashed(sk)) {
4881da177e4SLinus Torvalds 		__sk_del_node(sk);
4891da177e4SLinus Torvalds 		sk_node_init(&sk->sk_node);
490dc6b9b78SEric Dumazet 		return true;
4911da177e4SLinus Torvalds 	}
492dc6b9b78SEric Dumazet 	return false;
4931da177e4SLinus Torvalds }
4941da177e4SLinus Torvalds 
4951da177e4SLinus Torvalds /* Grab socket reference count. This operation is valid only
4961da177e4SLinus Torvalds    when sk is ALREADY grabbed f.e. it is found in hash table
4971da177e4SLinus Torvalds    or a list and the lookup is made under lock preventing hash table
4981da177e4SLinus Torvalds    modifications.
4991da177e4SLinus Torvalds  */
5001da177e4SLinus Torvalds 
5011da177e4SLinus Torvalds static inline void sock_hold(struct sock *sk)
5021da177e4SLinus Torvalds {
5031da177e4SLinus Torvalds 	atomic_inc(&sk->sk_refcnt);
5041da177e4SLinus Torvalds }
5051da177e4SLinus Torvalds 
5061da177e4SLinus Torvalds /* Ungrab socket in the context, which assumes that socket refcnt
5071da177e4SLinus Torvalds    cannot hit zero, f.e. it is true in context of any socketcall.
5081da177e4SLinus Torvalds  */
5091da177e4SLinus Torvalds static inline void __sock_put(struct sock *sk)
5101da177e4SLinus Torvalds {
5111da177e4SLinus Torvalds 	atomic_dec(&sk->sk_refcnt);
5121da177e4SLinus Torvalds }
5131da177e4SLinus Torvalds 
514dc6b9b78SEric Dumazet static inline bool sk_del_node_init(struct sock *sk)
5151da177e4SLinus Torvalds {
516dc6b9b78SEric Dumazet 	bool rc = __sk_del_node_init(sk);
5171da177e4SLinus Torvalds 
5181da177e4SLinus Torvalds 	if (rc) {
5191da177e4SLinus Torvalds 		/* paranoid for a while -acme */
5201da177e4SLinus Torvalds 		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
5211da177e4SLinus Torvalds 		__sock_put(sk);
5221da177e4SLinus Torvalds 	}
5231da177e4SLinus Torvalds 	return rc;
5241da177e4SLinus Torvalds }
525808f5114Sstephen hemminger #define sk_del_node_init_rcu(sk)	sk_del_node_init(sk)
5261da177e4SLinus Torvalds 
527dc6b9b78SEric Dumazet static inline bool __sk_nulls_del_node_init_rcu(struct sock *sk)
528271b72c7SEric Dumazet {
529271b72c7SEric Dumazet 	if (sk_hashed(sk)) {
53088ab1932SEric Dumazet 		hlist_nulls_del_init_rcu(&sk->sk_nulls_node);
531dc6b9b78SEric Dumazet 		return true;
532271b72c7SEric Dumazet 	}
533dc6b9b78SEric Dumazet 	return false;
534271b72c7SEric Dumazet }
535271b72c7SEric Dumazet 
536dc6b9b78SEric Dumazet static inline bool sk_nulls_del_node_init_rcu(struct sock *sk)
537271b72c7SEric Dumazet {
538dc6b9b78SEric Dumazet 	bool rc = __sk_nulls_del_node_init_rcu(sk);
539271b72c7SEric Dumazet 
540271b72c7SEric Dumazet 	if (rc) {
541271b72c7SEric Dumazet 		/* paranoid for a while -acme */
542271b72c7SEric Dumazet 		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
543271b72c7SEric Dumazet 		__sock_put(sk);
544271b72c7SEric Dumazet 	}
545271b72c7SEric Dumazet 	return rc;
546271b72c7SEric Dumazet }
547271b72c7SEric Dumazet 
548dc6b9b78SEric Dumazet static inline void __sk_add_node(struct sock *sk, struct hlist_head *list)
5491da177e4SLinus Torvalds {
5501da177e4SLinus Torvalds 	hlist_add_head(&sk->sk_node, list);
5511da177e4SLinus Torvalds }
5521da177e4SLinus Torvalds 
553dc6b9b78SEric Dumazet static inline void sk_add_node(struct sock *sk, struct hlist_head *list)
5541da177e4SLinus Torvalds {
5551da177e4SLinus Torvalds 	sock_hold(sk);
5561da177e4SLinus Torvalds 	__sk_add_node(sk, list);
5571da177e4SLinus Torvalds }
5581da177e4SLinus Torvalds 
559dc6b9b78SEric Dumazet static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list)
560808f5114Sstephen hemminger {
561808f5114Sstephen hemminger 	sock_hold(sk);
562808f5114Sstephen hemminger 	hlist_add_head_rcu(&sk->sk_node, list);
563808f5114Sstephen hemminger }
564808f5114Sstephen hemminger 
565dc6b9b78SEric Dumazet static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
566271b72c7SEric Dumazet {
56788ab1932SEric Dumazet 	hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
568271b72c7SEric Dumazet }
569271b72c7SEric Dumazet 
570dc6b9b78SEric Dumazet static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
571271b72c7SEric Dumazet {
572271b72c7SEric Dumazet 	sock_hold(sk);
57388ab1932SEric Dumazet 	__sk_nulls_add_node_rcu(sk, list);
574271b72c7SEric Dumazet }
575271b72c7SEric Dumazet 
576dc6b9b78SEric Dumazet static inline void __sk_del_bind_node(struct sock *sk)
5771da177e4SLinus Torvalds {
5781da177e4SLinus Torvalds 	__hlist_del(&sk->sk_bind_node);
5791da177e4SLinus Torvalds }
5801da177e4SLinus Torvalds 
581dc6b9b78SEric Dumazet static inline void sk_add_bind_node(struct sock *sk,
5821da177e4SLinus Torvalds 					struct hlist_head *list)
5831da177e4SLinus Torvalds {
5841da177e4SLinus Torvalds 	hlist_add_head(&sk->sk_bind_node, list);
5851da177e4SLinus Torvalds }
5861da177e4SLinus Torvalds 
5871da177e4SLinus Torvalds #define sk_for_each(__sk, node, list) \
5881da177e4SLinus Torvalds 	hlist_for_each_entry(__sk, node, list, sk_node)
589808f5114Sstephen hemminger #define sk_for_each_rcu(__sk, node, list) \
590808f5114Sstephen hemminger 	hlist_for_each_entry_rcu(__sk, node, list, sk_node)
59188ab1932SEric Dumazet #define sk_nulls_for_each(__sk, node, list) \
59288ab1932SEric Dumazet 	hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node)
59388ab1932SEric Dumazet #define sk_nulls_for_each_rcu(__sk, node, list) \
59488ab1932SEric Dumazet 	hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node)
5951da177e4SLinus Torvalds #define sk_for_each_from(__sk, node) \
5961da177e4SLinus Torvalds 	if (__sk && ({ node = &(__sk)->sk_node; 1; })) \
5971da177e4SLinus Torvalds 		hlist_for_each_entry_from(__sk, node, sk_node)
59888ab1932SEric Dumazet #define sk_nulls_for_each_from(__sk, node) \
59988ab1932SEric Dumazet 	if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \
60088ab1932SEric Dumazet 		hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node)
6011da177e4SLinus Torvalds #define sk_for_each_safe(__sk, node, tmp, list) \
6021da177e4SLinus Torvalds 	hlist_for_each_entry_safe(__sk, node, tmp, list, sk_node)
6031da177e4SLinus Torvalds #define sk_for_each_bound(__sk, node, list) \
6041da177e4SLinus Torvalds 	hlist_for_each_entry(__sk, node, list, sk_bind_node)
6051da177e4SLinus Torvalds 
6061da177e4SLinus Torvalds /* Sock flags */
6071da177e4SLinus Torvalds enum sock_flags {
6081da177e4SLinus Torvalds 	SOCK_DEAD,
6091da177e4SLinus Torvalds 	SOCK_DONE,
6101da177e4SLinus Torvalds 	SOCK_URGINLINE,
6111da177e4SLinus Torvalds 	SOCK_KEEPOPEN,
6121da177e4SLinus Torvalds 	SOCK_LINGER,
6131da177e4SLinus Torvalds 	SOCK_DESTROY,
6141da177e4SLinus Torvalds 	SOCK_BROADCAST,
6151da177e4SLinus Torvalds 	SOCK_TIMESTAMP,
6161da177e4SLinus Torvalds 	SOCK_ZAPPED,
6171da177e4SLinus Torvalds 	SOCK_USE_WRITE_QUEUE, /* whether to call sk->sk_write_space in sock_wfree */
6181da177e4SLinus Torvalds 	SOCK_DBG, /* %SO_DEBUG setting */
6191da177e4SLinus Torvalds 	SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */
62092f37fd2SEric Dumazet 	SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */
6211da177e4SLinus Torvalds 	SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
6221da177e4SLinus Torvalds 	SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
62320d49473SPatrick Ohly 	SOCK_TIMESTAMPING_TX_HARDWARE,  /* %SOF_TIMESTAMPING_TX_HARDWARE */
62420d49473SPatrick Ohly 	SOCK_TIMESTAMPING_TX_SOFTWARE,  /* %SOF_TIMESTAMPING_TX_SOFTWARE */
62520d49473SPatrick Ohly 	SOCK_TIMESTAMPING_RX_HARDWARE,  /* %SOF_TIMESTAMPING_RX_HARDWARE */
62620d49473SPatrick Ohly 	SOCK_TIMESTAMPING_RX_SOFTWARE,  /* %SOF_TIMESTAMPING_RX_SOFTWARE */
62720d49473SPatrick Ohly 	SOCK_TIMESTAMPING_SOFTWARE,     /* %SOF_TIMESTAMPING_SOFTWARE */
62820d49473SPatrick Ohly 	SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */
62920d49473SPatrick Ohly 	SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */
630bcdce719SEric Dumazet 	SOCK_FASYNC, /* fasync() active */
6313b885787SNeil Horman 	SOCK_RXQ_OVFL,
6321cdebb42SShirley Ma 	SOCK_ZEROCOPY, /* buffers from userspace */
6336e3e939fSJohannes Berg 	SOCK_WIFI_STATUS, /* push wifi status to userspace */
6343bdc0ebaSBen Greear 	SOCK_NOFCS, /* Tell NIC not to do the Ethernet FCS.
6353bdc0ebaSBen Greear 		     * Will use last 4 bytes of packet sent from
6363bdc0ebaSBen Greear 		     * user-space instead.
6373bdc0ebaSBen Greear 		     */
6381da177e4SLinus Torvalds };
6391da177e4SLinus Torvalds 
64053b924b3SRalf Baechle static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
64153b924b3SRalf Baechle {
64253b924b3SRalf Baechle 	nsk->sk_flags = osk->sk_flags;
64353b924b3SRalf Baechle }
64453b924b3SRalf Baechle 
6451da177e4SLinus Torvalds static inline void sock_set_flag(struct sock *sk, enum sock_flags flag)
6461da177e4SLinus Torvalds {
6471da177e4SLinus Torvalds 	__set_bit(flag, &sk->sk_flags);
6481da177e4SLinus Torvalds }
6491da177e4SLinus Torvalds 
6501da177e4SLinus Torvalds static inline void sock_reset_flag(struct sock *sk, enum sock_flags flag)
6511da177e4SLinus Torvalds {
6521da177e4SLinus Torvalds 	__clear_bit(flag, &sk->sk_flags);
6531da177e4SLinus Torvalds }
6541da177e4SLinus Torvalds 
6551b23a5dfSEric Dumazet static inline bool sock_flag(const struct sock *sk, enum sock_flags flag)
6561da177e4SLinus Torvalds {
6571da177e4SLinus Torvalds 	return test_bit(flag, &sk->sk_flags);
6581da177e4SLinus Torvalds }
6591da177e4SLinus Torvalds 
6601da177e4SLinus Torvalds static inline void sk_acceptq_removed(struct sock *sk)
6611da177e4SLinus Torvalds {
6621da177e4SLinus Torvalds 	sk->sk_ack_backlog--;
6631da177e4SLinus Torvalds }
6641da177e4SLinus Torvalds 
6651da177e4SLinus Torvalds static inline void sk_acceptq_added(struct sock *sk)
6661da177e4SLinus Torvalds {
6671da177e4SLinus Torvalds 	sk->sk_ack_backlog++;
6681da177e4SLinus Torvalds }
6691da177e4SLinus Torvalds 
670dc6b9b78SEric Dumazet static inline bool sk_acceptq_is_full(const struct sock *sk)
6711da177e4SLinus Torvalds {
67264a14651SDavid S. Miller 	return sk->sk_ack_backlog > sk->sk_max_ack_backlog;
6731da177e4SLinus Torvalds }
6741da177e4SLinus Torvalds 
6751da177e4SLinus Torvalds /*
6761da177e4SLinus Torvalds  * Compute minimal free write space needed to queue new packets.
6771da177e4SLinus Torvalds  */
678dc6b9b78SEric Dumazet static inline int sk_stream_min_wspace(const struct sock *sk)
6791da177e4SLinus Torvalds {
6808df09ea3SEric Dumazet 	return sk->sk_wmem_queued >> 1;
6811da177e4SLinus Torvalds }
6821da177e4SLinus Torvalds 
683dc6b9b78SEric Dumazet static inline int sk_stream_wspace(const struct sock *sk)
6841da177e4SLinus Torvalds {
6851da177e4SLinus Torvalds 	return sk->sk_sndbuf - sk->sk_wmem_queued;
6861da177e4SLinus Torvalds }
6871da177e4SLinus Torvalds 
6881da177e4SLinus Torvalds extern void sk_stream_write_space(struct sock *sk);
6891da177e4SLinus Torvalds 
690dc6b9b78SEric Dumazet static inline bool sk_stream_memory_free(const struct sock *sk)
6911da177e4SLinus Torvalds {
6921da177e4SLinus Torvalds 	return sk->sk_wmem_queued < sk->sk_sndbuf;
6931da177e4SLinus Torvalds }
6941da177e4SLinus Torvalds 
6958eae939fSZhu Yi /* OOB backlog add */
696a3a858ffSZhu Yi static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
6979ee6b535SStephen Hemminger {
6987fee226aSEric Dumazet 	/* dont let skb dst not refcounted, we are going to leave rcu lock */
6997fee226aSEric Dumazet 	skb_dst_force(skb);
7007fee226aSEric Dumazet 
7017fee226aSEric Dumazet 	if (!sk->sk_backlog.tail)
7027fee226aSEric Dumazet 		sk->sk_backlog.head = skb;
7037fee226aSEric Dumazet 	else
7049ee6b535SStephen Hemminger 		sk->sk_backlog.tail->next = skb;
7057fee226aSEric Dumazet 
7069ee6b535SStephen Hemminger 	sk->sk_backlog.tail = skb;
7079ee6b535SStephen Hemminger 	skb->next = NULL;
7089ee6b535SStephen Hemminger }
7091da177e4SLinus Torvalds 
710c377411fSEric Dumazet /*
711c377411fSEric Dumazet  * Take into account size of receive queue and backlog queue
7120fd7bac6SEric Dumazet  * Do not take into account this skb truesize,
7130fd7bac6SEric Dumazet  * to allow even a single big packet to come.
714c377411fSEric Dumazet  */
715f545a38fSEric Dumazet static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb,
716f545a38fSEric Dumazet 				     unsigned int limit)
717c377411fSEric Dumazet {
718c377411fSEric Dumazet 	unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc);
719c377411fSEric Dumazet 
720f545a38fSEric Dumazet 	return qsize > limit;
721c377411fSEric Dumazet }
722c377411fSEric Dumazet 
7238eae939fSZhu Yi /* The per-socket spinlock must be held here. */
724f545a38fSEric Dumazet static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb,
725f545a38fSEric Dumazet 					      unsigned int limit)
7268eae939fSZhu Yi {
727f545a38fSEric Dumazet 	if (sk_rcvqueues_full(sk, skb, limit))
7288eae939fSZhu Yi 		return -ENOBUFS;
7298eae939fSZhu Yi 
730a3a858ffSZhu Yi 	__sk_add_backlog(sk, skb);
7318eae939fSZhu Yi 	sk->sk_backlog.len += skb->truesize;
7328eae939fSZhu Yi 	return 0;
7338eae939fSZhu Yi }
7348eae939fSZhu Yi 
735c57943a1SPeter Zijlstra static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
736c57943a1SPeter Zijlstra {
737c57943a1SPeter Zijlstra 	return sk->sk_backlog_rcv(sk, skb);
738c57943a1SPeter Zijlstra }
739c57943a1SPeter Zijlstra 
740c58dc01bSDavid S. Miller static inline void sock_rps_record_flow(const struct sock *sk)
741c58dc01bSDavid S. Miller {
742c58dc01bSDavid S. Miller #ifdef CONFIG_RPS
743c58dc01bSDavid S. Miller 	struct rps_sock_flow_table *sock_flow_table;
744c58dc01bSDavid S. Miller 
745c58dc01bSDavid S. Miller 	rcu_read_lock();
746c58dc01bSDavid S. Miller 	sock_flow_table = rcu_dereference(rps_sock_flow_table);
747c58dc01bSDavid S. Miller 	rps_record_sock_flow(sock_flow_table, sk->sk_rxhash);
748c58dc01bSDavid S. Miller 	rcu_read_unlock();
749c58dc01bSDavid S. Miller #endif
750c58dc01bSDavid S. Miller }
751c58dc01bSDavid S. Miller 
752c58dc01bSDavid S. Miller static inline void sock_rps_reset_flow(const struct sock *sk)
753c58dc01bSDavid S. Miller {
754c58dc01bSDavid S. Miller #ifdef CONFIG_RPS
755c58dc01bSDavid S. Miller 	struct rps_sock_flow_table *sock_flow_table;
756c58dc01bSDavid S. Miller 
757c58dc01bSDavid S. Miller 	rcu_read_lock();
758c58dc01bSDavid S. Miller 	sock_flow_table = rcu_dereference(rps_sock_flow_table);
759c58dc01bSDavid S. Miller 	rps_reset_sock_flow(sock_flow_table, sk->sk_rxhash);
760c58dc01bSDavid S. Miller 	rcu_read_unlock();
761c58dc01bSDavid S. Miller #endif
762c58dc01bSDavid S. Miller }
763c58dc01bSDavid S. Miller 
764bdeab991STom Herbert static inline void sock_rps_save_rxhash(struct sock *sk,
765bdeab991STom Herbert 					const struct sk_buff *skb)
766c58dc01bSDavid S. Miller {
767c58dc01bSDavid S. Miller #ifdef CONFIG_RPS
768bdeab991STom Herbert 	if (unlikely(sk->sk_rxhash != skb->rxhash)) {
769c58dc01bSDavid S. Miller 		sock_rps_reset_flow(sk);
770bdeab991STom Herbert 		sk->sk_rxhash = skb->rxhash;
771c58dc01bSDavid S. Miller 	}
772c58dc01bSDavid S. Miller #endif
773c58dc01bSDavid S. Miller }
774c58dc01bSDavid S. Miller 
775bdeab991STom Herbert static inline void sock_rps_reset_rxhash(struct sock *sk)
776bdeab991STom Herbert {
777bdeab991STom Herbert #ifdef CONFIG_RPS
778bdeab991STom Herbert 	sock_rps_reset_flow(sk);
779bdeab991STom Herbert 	sk->sk_rxhash = 0;
780bdeab991STom Herbert #endif
781bdeab991STom Herbert }
782bdeab991STom Herbert 
7831da177e4SLinus Torvalds #define sk_wait_event(__sk, __timeo, __condition)			\
784cfcabdccSStephen Hemminger 	({	int __rc;						\
7851da177e4SLinus Torvalds 		release_sock(__sk);					\
786cfcabdccSStephen Hemminger 		__rc = __condition;					\
787cfcabdccSStephen Hemminger 		if (!__rc) {						\
7881da177e4SLinus Torvalds 			*(__timeo) = schedule_timeout(*(__timeo));	\
7891da177e4SLinus Torvalds 		}							\
7901da177e4SLinus Torvalds 		lock_sock(__sk);					\
791cfcabdccSStephen Hemminger 		__rc = __condition;					\
792cfcabdccSStephen Hemminger 		__rc;							\
7931da177e4SLinus Torvalds 	})
7941da177e4SLinus Torvalds 
7951da177e4SLinus Torvalds extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
7961da177e4SLinus Torvalds extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
7971da177e4SLinus Torvalds extern void sk_stream_wait_close(struct sock *sk, long timeo_p);
7981da177e4SLinus Torvalds extern int sk_stream_error(struct sock *sk, int flags, int err);
7991da177e4SLinus Torvalds extern void sk_stream_kill_queues(struct sock *sk);
8001da177e4SLinus Torvalds 
8011da177e4SLinus Torvalds extern int sk_wait_data(struct sock *sk, long *timeo);
8021da177e4SLinus Torvalds 
80360236fddSArnaldo Carvalho de Melo struct request_sock_ops;
8046d6ee43eSArnaldo Carvalho de Melo struct timewait_sock_ops;
805ab1e0a13SArnaldo Carvalho de Melo struct inet_hashinfo;
806fc8717baSPavel Emelyanov struct raw_hashinfo;
807de477254SPaul Gortmaker struct module;
8082e6599cbSArnaldo Carvalho de Melo 
8091da177e4SLinus Torvalds /* Networking protocol blocks we attach to sockets.
8101da177e4SLinus Torvalds  * socket layer -> transport layer interface
8111da177e4SLinus Torvalds  * transport -> network interface is defined by struct inet_proto
8121da177e4SLinus Torvalds  */
8131da177e4SLinus Torvalds struct proto {
8141da177e4SLinus Torvalds 	void			(*close)(struct sock *sk,
8151da177e4SLinus Torvalds 					long timeout);
8161da177e4SLinus Torvalds 	int			(*connect)(struct sock *sk,
8171da177e4SLinus Torvalds 					struct sockaddr *uaddr,
8181da177e4SLinus Torvalds 					int addr_len);
8191da177e4SLinus Torvalds 	int			(*disconnect)(struct sock *sk, int flags);
8201da177e4SLinus Torvalds 
8211da177e4SLinus Torvalds 	struct sock *		(*accept)(struct sock *sk, int flags, int *err);
8221da177e4SLinus Torvalds 
8231da177e4SLinus Torvalds 	int			(*ioctl)(struct sock *sk, int cmd,
8241da177e4SLinus Torvalds 					 unsigned long arg);
8251da177e4SLinus Torvalds 	int			(*init)(struct sock *sk);
8267d06b2e0SBrian Haley 	void			(*destroy)(struct sock *sk);
8271da177e4SLinus Torvalds 	void			(*shutdown)(struct sock *sk, int how);
8281da177e4SLinus Torvalds 	int			(*setsockopt)(struct sock *sk, int level,
8291da177e4SLinus Torvalds 					int optname, char __user *optval,
830b7058842SDavid S. Miller 					unsigned int optlen);
8311da177e4SLinus Torvalds 	int			(*getsockopt)(struct sock *sk, int level,
8321da177e4SLinus Torvalds 					int optname, char __user *optval,
8331da177e4SLinus Torvalds 					int __user *option);
834af01d537SAlexey Dobriyan #ifdef CONFIG_COMPAT
8353fdadf7dSDmitry Mishin 	int			(*compat_setsockopt)(struct sock *sk,
8363fdadf7dSDmitry Mishin 					int level,
8373fdadf7dSDmitry Mishin 					int optname, char __user *optval,
838b7058842SDavid S. Miller 					unsigned int optlen);
8393fdadf7dSDmitry Mishin 	int			(*compat_getsockopt)(struct sock *sk,
8403fdadf7dSDmitry Mishin 					int level,
8413fdadf7dSDmitry Mishin 					int optname, char __user *optval,
8423fdadf7dSDmitry Mishin 					int __user *option);
843709b46e8SEric W. Biederman 	int			(*compat_ioctl)(struct sock *sk,
844709b46e8SEric W. Biederman 					unsigned int cmd, unsigned long arg);
845af01d537SAlexey Dobriyan #endif
8461da177e4SLinus Torvalds 	int			(*sendmsg)(struct kiocb *iocb, struct sock *sk,
8471da177e4SLinus Torvalds 					   struct msghdr *msg, size_t len);
8481da177e4SLinus Torvalds 	int			(*recvmsg)(struct kiocb *iocb, struct sock *sk,
8491da177e4SLinus Torvalds 					   struct msghdr *msg,
8501da177e4SLinus Torvalds 					   size_t len, int noblock, int flags,
8511da177e4SLinus Torvalds 					   int *addr_len);
8521da177e4SLinus Torvalds 	int			(*sendpage)(struct sock *sk, struct page *page,
8531da177e4SLinus Torvalds 					int offset, size_t size, int flags);
8541da177e4SLinus Torvalds 	int			(*bind)(struct sock *sk,
8551da177e4SLinus Torvalds 					struct sockaddr *uaddr, int addr_len);
8561da177e4SLinus Torvalds 
8571da177e4SLinus Torvalds 	int			(*backlog_rcv) (struct sock *sk,
8581da177e4SLinus Torvalds 						struct sk_buff *skb);
8591da177e4SLinus Torvalds 
8601da177e4SLinus Torvalds 	/* Keeping track of sk's, looking them up, and port selection methods. */
8611da177e4SLinus Torvalds 	void			(*hash)(struct sock *sk);
8621da177e4SLinus Torvalds 	void			(*unhash)(struct sock *sk);
863719f8358SEric Dumazet 	void			(*rehash)(struct sock *sk);
8641da177e4SLinus Torvalds 	int			(*get_port)(struct sock *sk, unsigned short snum);
865fcbdf09dSOctavian Purdila 	void			(*clear_sk)(struct sock *sk, int size);
8661da177e4SLinus Torvalds 
867286ab3d4SEric Dumazet 	/* Keeping track of sockets in use */
86865f76517SEric Dumazet #ifdef CONFIG_PROC_FS
86913ff3d6fSPavel Emelyanov 	unsigned int		inuse_idx;
87065f76517SEric Dumazet #endif
871ebb53d75SArnaldo Carvalho de Melo 
8721da177e4SLinus Torvalds 	/* Memory pressure */
8735c52ba17SPavel Emelyanov 	void			(*enter_memory_pressure)(struct sock *sk);
8748d987e5cSEric Dumazet 	atomic_long_t		*memory_allocated;	/* Current allocated memory. */
8751748376bSEric Dumazet 	struct percpu_counter	*sockets_allocated;	/* Current number of sockets. */
8761da177e4SLinus Torvalds 	/*
8771da177e4SLinus Torvalds 	 * Pressure flag: try to collapse.
8781da177e4SLinus Torvalds 	 * Technical note: it is used by multiple contexts non atomically.
8793ab224beSHideo Aoki 	 * All the __sk_mem_schedule() is of this nature: accounting
8801da177e4SLinus Torvalds 	 * is strict, actions are advisory and have some latency.
8811da177e4SLinus Torvalds 	 */
8821da177e4SLinus Torvalds 	int			*memory_pressure;
8838d987e5cSEric Dumazet 	long			*sysctl_mem;
8841da177e4SLinus Torvalds 	int			*sysctl_wmem;
8851da177e4SLinus Torvalds 	int			*sysctl_rmem;
8861da177e4SLinus Torvalds 	int			max_header;
8877ba42910SChangli Gao 	bool			no_autobind;
8881da177e4SLinus Torvalds 
889e18b890bSChristoph Lameter 	struct kmem_cache	*slab;
8901da177e4SLinus Torvalds 	unsigned int		obj_size;
891271b72c7SEric Dumazet 	int			slab_flags;
8921da177e4SLinus Torvalds 
893dd24c001SEric Dumazet 	struct percpu_counter	*orphan_count;
8948feaf0c0SArnaldo Carvalho de Melo 
89560236fddSArnaldo Carvalho de Melo 	struct request_sock_ops	*rsk_prot;
8966d6ee43eSArnaldo Carvalho de Melo 	struct timewait_sock_ops *twsk_prot;
8972e6599cbSArnaldo Carvalho de Melo 
89839d8cda7SPavel Emelyanov 	union {
899ab1e0a13SArnaldo Carvalho de Melo 		struct inet_hashinfo	*hashinfo;
900645ca708SEric Dumazet 		struct udp_table	*udp_table;
901fc8717baSPavel Emelyanov 		struct raw_hashinfo	*raw_hash;
90239d8cda7SPavel Emelyanov 	} h;
903ab1e0a13SArnaldo Carvalho de Melo 
9041da177e4SLinus Torvalds 	struct module		*owner;
9051da177e4SLinus Torvalds 
9061da177e4SLinus Torvalds 	char			name[32];
9071da177e4SLinus Torvalds 
9081da177e4SLinus Torvalds 	struct list_head	node;
909e6848976SArnaldo Carvalho de Melo #ifdef SOCK_REFCNT_DEBUG
910e6848976SArnaldo Carvalho de Melo 	atomic_t		socks;
911e6848976SArnaldo Carvalho de Melo #endif
912e1aab161SGlauber Costa #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
913e1aab161SGlauber Costa 	/*
914e1aab161SGlauber Costa 	 * cgroup specific init/deinit functions. Called once for all
915e1aab161SGlauber Costa 	 * protocols that implement it, from cgroups populate function.
916e1aab161SGlauber Costa 	 * This function has to setup any files the protocol want to
917e1aab161SGlauber Costa 	 * appear in the kmem cgroup filesystem.
918e1aab161SGlauber Costa 	 */
9191d62e436SGlauber Costa 	int			(*init_cgroup)(struct mem_cgroup *memcg,
920e1aab161SGlauber Costa 					       struct cgroup_subsys *ss);
9211d62e436SGlauber Costa 	void			(*destroy_cgroup)(struct mem_cgroup *memcg);
922e1aab161SGlauber Costa 	struct cg_proto		*(*proto_cgroup)(struct mem_cgroup *memcg);
923e1aab161SGlauber Costa #endif
924e1aab161SGlauber Costa };
925e1aab161SGlauber Costa 
9263f134619SGlauber Costa /*
9273f134619SGlauber Costa  * Bits in struct cg_proto.flags
9283f134619SGlauber Costa  */
9293f134619SGlauber Costa enum cg_proto_flags {
9303f134619SGlauber Costa 	/* Currently active and new sockets should be assigned to cgroups */
9313f134619SGlauber Costa 	MEMCG_SOCK_ACTIVE,
9323f134619SGlauber Costa 	/* It was ever activated; we must disarm static keys on destruction */
9333f134619SGlauber Costa 	MEMCG_SOCK_ACTIVATED,
9343f134619SGlauber Costa };
9353f134619SGlauber Costa 
936e1aab161SGlauber Costa struct cg_proto {
937e1aab161SGlauber Costa 	void			(*enter_memory_pressure)(struct sock *sk);
938e1aab161SGlauber Costa 	struct res_counter	*memory_allocated;	/* Current allocated memory. */
939e1aab161SGlauber Costa 	struct percpu_counter	*sockets_allocated;	/* Current number of sockets. */
940e1aab161SGlauber Costa 	int			*memory_pressure;
941e1aab161SGlauber Costa 	long			*sysctl_mem;
9423f134619SGlauber Costa 	unsigned long		flags;
943e1aab161SGlauber Costa 	/*
944e1aab161SGlauber Costa 	 * memcg field is used to find which memcg we belong directly
945e1aab161SGlauber Costa 	 * Each memcg struct can hold more than one cg_proto, so container_of
946e1aab161SGlauber Costa 	 * won't really cut.
947e1aab161SGlauber Costa 	 *
948e1aab161SGlauber Costa 	 * The elegant solution would be having an inverse function to
949e1aab161SGlauber Costa 	 * proto_cgroup in struct proto, but that means polluting the structure
950e1aab161SGlauber Costa 	 * for everybody, instead of just for memcg users.
951e1aab161SGlauber Costa 	 */
952e1aab161SGlauber Costa 	struct mem_cgroup	*memcg;
9531da177e4SLinus Torvalds };
9541da177e4SLinus Torvalds 
9551da177e4SLinus Torvalds extern int proto_register(struct proto *prot, int alloc_slab);
9561da177e4SLinus Torvalds extern void proto_unregister(struct proto *prot);
9571da177e4SLinus Torvalds 
9583f134619SGlauber Costa static inline bool memcg_proto_active(struct cg_proto *cg_proto)
9593f134619SGlauber Costa {
9603f134619SGlauber Costa 	return test_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
9613f134619SGlauber Costa }
9623f134619SGlauber Costa 
9633f134619SGlauber Costa static inline bool memcg_proto_activated(struct cg_proto *cg_proto)
9643f134619SGlauber Costa {
9653f134619SGlauber Costa 	return test_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags);
9663f134619SGlauber Costa }
9673f134619SGlauber Costa 
968e6848976SArnaldo Carvalho de Melo #ifdef SOCK_REFCNT_DEBUG
969e6848976SArnaldo Carvalho de Melo static inline void sk_refcnt_debug_inc(struct sock *sk)
970e6848976SArnaldo Carvalho de Melo {
971e6848976SArnaldo Carvalho de Melo 	atomic_inc(&sk->sk_prot->socks);
972e6848976SArnaldo Carvalho de Melo }
973e6848976SArnaldo Carvalho de Melo 
974e6848976SArnaldo Carvalho de Melo static inline void sk_refcnt_debug_dec(struct sock *sk)
975e6848976SArnaldo Carvalho de Melo {
976e6848976SArnaldo Carvalho de Melo 	atomic_dec(&sk->sk_prot->socks);
977e6848976SArnaldo Carvalho de Melo 	printk(KERN_DEBUG "%s socket %p released, %d are still alive\n",
978e6848976SArnaldo Carvalho de Melo 	       sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks));
979e6848976SArnaldo Carvalho de Melo }
980e6848976SArnaldo Carvalho de Melo 
981e1aab161SGlauber Costa inline void sk_refcnt_debug_release(const struct sock *sk)
982e6848976SArnaldo Carvalho de Melo {
983e6848976SArnaldo Carvalho de Melo 	if (atomic_read(&sk->sk_refcnt) != 1)
984e6848976SArnaldo Carvalho de Melo 		printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n",
985e6848976SArnaldo Carvalho de Melo 		       sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt));
986e6848976SArnaldo Carvalho de Melo }
987e6848976SArnaldo Carvalho de Melo #else /* SOCK_REFCNT_DEBUG */
988e6848976SArnaldo Carvalho de Melo #define sk_refcnt_debug_inc(sk) do { } while (0)
989e6848976SArnaldo Carvalho de Melo #define sk_refcnt_debug_dec(sk) do { } while (0)
990e6848976SArnaldo Carvalho de Melo #define sk_refcnt_debug_release(sk) do { } while (0)
991e6848976SArnaldo Carvalho de Melo #endif /* SOCK_REFCNT_DEBUG */
992e6848976SArnaldo Carvalho de Melo 
993376be5ffSGlauber Costa #if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET)
994c5905afbSIngo Molnar extern struct static_key memcg_socket_limit_enabled;
995e1aab161SGlauber Costa static inline struct cg_proto *parent_cg_proto(struct proto *proto,
996e1aab161SGlauber Costa 					       struct cg_proto *cg_proto)
997e1aab161SGlauber Costa {
998e1aab161SGlauber Costa 	return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg));
999e1aab161SGlauber Costa }
1000c5905afbSIngo Molnar #define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled)
1001e1aab161SGlauber Costa #else
1002e1aab161SGlauber Costa #define mem_cgroup_sockets_enabled 0
1003e1aab161SGlauber Costa static inline struct cg_proto *parent_cg_proto(struct proto *proto,
1004e1aab161SGlauber Costa 					       struct cg_proto *cg_proto)
1005e1aab161SGlauber Costa {
1006e1aab161SGlauber Costa 	return NULL;
1007e1aab161SGlauber Costa }
1008e1aab161SGlauber Costa #endif
1009e1aab161SGlauber Costa 
1010e1aab161SGlauber Costa 
1011180d8cd9SGlauber Costa static inline bool sk_has_memory_pressure(const struct sock *sk)
1012180d8cd9SGlauber Costa {
1013180d8cd9SGlauber Costa 	return sk->sk_prot->memory_pressure != NULL;
1014180d8cd9SGlauber Costa }
1015180d8cd9SGlauber Costa 
1016180d8cd9SGlauber Costa static inline bool sk_under_memory_pressure(const struct sock *sk)
1017180d8cd9SGlauber Costa {
1018180d8cd9SGlauber Costa 	if (!sk->sk_prot->memory_pressure)
1019180d8cd9SGlauber Costa 		return false;
1020e1aab161SGlauber Costa 
1021e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1022e1aab161SGlauber Costa 		return !!*sk->sk_cgrp->memory_pressure;
1023e1aab161SGlauber Costa 
1024180d8cd9SGlauber Costa 	return !!*sk->sk_prot->memory_pressure;
1025180d8cd9SGlauber Costa }
1026180d8cd9SGlauber Costa 
1027180d8cd9SGlauber Costa static inline void sk_leave_memory_pressure(struct sock *sk)
1028180d8cd9SGlauber Costa {
1029180d8cd9SGlauber Costa 	int *memory_pressure = sk->sk_prot->memory_pressure;
1030180d8cd9SGlauber Costa 
1031e1aab161SGlauber Costa 	if (!memory_pressure)
1032e1aab161SGlauber Costa 		return;
1033e1aab161SGlauber Costa 
1034e1aab161SGlauber Costa 	if (*memory_pressure)
1035180d8cd9SGlauber Costa 		*memory_pressure = 0;
1036e1aab161SGlauber Costa 
1037e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
1038e1aab161SGlauber Costa 		struct cg_proto *cg_proto = sk->sk_cgrp;
1039e1aab161SGlauber Costa 		struct proto *prot = sk->sk_prot;
1040e1aab161SGlauber Costa 
1041e1aab161SGlauber Costa 		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
1042e1aab161SGlauber Costa 			if (*cg_proto->memory_pressure)
1043e1aab161SGlauber Costa 				*cg_proto->memory_pressure = 0;
1044e1aab161SGlauber Costa 	}
1045e1aab161SGlauber Costa 
1046180d8cd9SGlauber Costa }
1047180d8cd9SGlauber Costa 
1048180d8cd9SGlauber Costa static inline void sk_enter_memory_pressure(struct sock *sk)
1049180d8cd9SGlauber Costa {
1050e1aab161SGlauber Costa 	if (!sk->sk_prot->enter_memory_pressure)
1051e1aab161SGlauber Costa 		return;
1052e1aab161SGlauber Costa 
1053e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
1054e1aab161SGlauber Costa 		struct cg_proto *cg_proto = sk->sk_cgrp;
1055e1aab161SGlauber Costa 		struct proto *prot = sk->sk_prot;
1056e1aab161SGlauber Costa 
1057e1aab161SGlauber Costa 		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
1058e1aab161SGlauber Costa 			cg_proto->enter_memory_pressure(sk);
1059e1aab161SGlauber Costa 	}
1060e1aab161SGlauber Costa 
1061180d8cd9SGlauber Costa 	sk->sk_prot->enter_memory_pressure(sk);
1062180d8cd9SGlauber Costa }
1063180d8cd9SGlauber Costa 
1064180d8cd9SGlauber Costa static inline long sk_prot_mem_limits(const struct sock *sk, int index)
1065180d8cd9SGlauber Costa {
1066180d8cd9SGlauber Costa 	long *prot = sk->sk_prot->sysctl_mem;
1067e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1068e1aab161SGlauber Costa 		prot = sk->sk_cgrp->sysctl_mem;
1069180d8cd9SGlauber Costa 	return prot[index];
1070180d8cd9SGlauber Costa }
1071180d8cd9SGlauber Costa 
1072e1aab161SGlauber Costa static inline void memcg_memory_allocated_add(struct cg_proto *prot,
1073e1aab161SGlauber Costa 					      unsigned long amt,
1074e1aab161SGlauber Costa 					      int *parent_status)
1075e1aab161SGlauber Costa {
1076e1aab161SGlauber Costa 	struct res_counter *fail;
1077e1aab161SGlauber Costa 	int ret;
1078e1aab161SGlauber Costa 
10790e90b31fSGlauber Costa 	ret = res_counter_charge_nofail(prot->memory_allocated,
1080e1aab161SGlauber Costa 					amt << PAGE_SHIFT, &fail);
1081e1aab161SGlauber Costa 	if (ret < 0)
1082e1aab161SGlauber Costa 		*parent_status = OVER_LIMIT;
1083e1aab161SGlauber Costa }
1084e1aab161SGlauber Costa 
1085e1aab161SGlauber Costa static inline void memcg_memory_allocated_sub(struct cg_proto *prot,
1086e1aab161SGlauber Costa 					      unsigned long amt)
1087e1aab161SGlauber Costa {
1088e1aab161SGlauber Costa 	res_counter_uncharge(prot->memory_allocated, amt << PAGE_SHIFT);
1089e1aab161SGlauber Costa }
1090e1aab161SGlauber Costa 
1091e1aab161SGlauber Costa static inline u64 memcg_memory_allocated_read(struct cg_proto *prot)
1092e1aab161SGlauber Costa {
1093e1aab161SGlauber Costa 	u64 ret;
1094e1aab161SGlauber Costa 	ret = res_counter_read_u64(prot->memory_allocated, RES_USAGE);
1095e1aab161SGlauber Costa 	return ret >> PAGE_SHIFT;
1096e1aab161SGlauber Costa }
1097e1aab161SGlauber Costa 
1098180d8cd9SGlauber Costa static inline long
1099180d8cd9SGlauber Costa sk_memory_allocated(const struct sock *sk)
1100180d8cd9SGlauber Costa {
1101180d8cd9SGlauber Costa 	struct proto *prot = sk->sk_prot;
1102e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1103e1aab161SGlauber Costa 		return memcg_memory_allocated_read(sk->sk_cgrp);
1104e1aab161SGlauber Costa 
1105180d8cd9SGlauber Costa 	return atomic_long_read(prot->memory_allocated);
1106180d8cd9SGlauber Costa }
1107180d8cd9SGlauber Costa 
1108180d8cd9SGlauber Costa static inline long
1109e1aab161SGlauber Costa sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status)
1110180d8cd9SGlauber Costa {
1111180d8cd9SGlauber Costa 	struct proto *prot = sk->sk_prot;
1112e1aab161SGlauber Costa 
1113e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
1114e1aab161SGlauber Costa 		memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status);
1115e1aab161SGlauber Costa 		/* update the root cgroup regardless */
1116e1aab161SGlauber Costa 		atomic_long_add_return(amt, prot->memory_allocated);
1117e1aab161SGlauber Costa 		return memcg_memory_allocated_read(sk->sk_cgrp);
1118e1aab161SGlauber Costa 	}
1119e1aab161SGlauber Costa 
1120180d8cd9SGlauber Costa 	return atomic_long_add_return(amt, prot->memory_allocated);
1121180d8cd9SGlauber Costa }
1122180d8cd9SGlauber Costa 
1123180d8cd9SGlauber Costa static inline void
11240e90b31fSGlauber Costa sk_memory_allocated_sub(struct sock *sk, int amt)
1125180d8cd9SGlauber Costa {
1126180d8cd9SGlauber Costa 	struct proto *prot = sk->sk_prot;
1127e1aab161SGlauber Costa 
11280e90b31fSGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1129e1aab161SGlauber Costa 		memcg_memory_allocated_sub(sk->sk_cgrp, amt);
1130e1aab161SGlauber Costa 
1131180d8cd9SGlauber Costa 	atomic_long_sub(amt, prot->memory_allocated);
1132180d8cd9SGlauber Costa }
1133180d8cd9SGlauber Costa 
1134180d8cd9SGlauber Costa static inline void sk_sockets_allocated_dec(struct sock *sk)
1135180d8cd9SGlauber Costa {
1136180d8cd9SGlauber Costa 	struct proto *prot = sk->sk_prot;
1137e1aab161SGlauber Costa 
1138e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
1139e1aab161SGlauber Costa 		struct cg_proto *cg_proto = sk->sk_cgrp;
1140e1aab161SGlauber Costa 
1141e1aab161SGlauber Costa 		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
1142e1aab161SGlauber Costa 			percpu_counter_dec(cg_proto->sockets_allocated);
1143e1aab161SGlauber Costa 	}
1144e1aab161SGlauber Costa 
1145180d8cd9SGlauber Costa 	percpu_counter_dec(prot->sockets_allocated);
1146180d8cd9SGlauber Costa }
1147180d8cd9SGlauber Costa 
1148180d8cd9SGlauber Costa static inline void sk_sockets_allocated_inc(struct sock *sk)
1149180d8cd9SGlauber Costa {
1150180d8cd9SGlauber Costa 	struct proto *prot = sk->sk_prot;
1151e1aab161SGlauber Costa 
1152e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
1153e1aab161SGlauber Costa 		struct cg_proto *cg_proto = sk->sk_cgrp;
1154e1aab161SGlauber Costa 
1155e1aab161SGlauber Costa 		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
1156e1aab161SGlauber Costa 			percpu_counter_inc(cg_proto->sockets_allocated);
1157e1aab161SGlauber Costa 	}
1158e1aab161SGlauber Costa 
1159180d8cd9SGlauber Costa 	percpu_counter_inc(prot->sockets_allocated);
1160180d8cd9SGlauber Costa }
1161180d8cd9SGlauber Costa 
1162180d8cd9SGlauber Costa static inline int
1163180d8cd9SGlauber Costa sk_sockets_allocated_read_positive(struct sock *sk)
1164180d8cd9SGlauber Costa {
1165180d8cd9SGlauber Costa 	struct proto *prot = sk->sk_prot;
1166180d8cd9SGlauber Costa 
1167e1aab161SGlauber Costa 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1168518fbf9cSEric Dumazet 		return percpu_counter_read_positive(sk->sk_cgrp->sockets_allocated);
1169e1aab161SGlauber Costa 
1170518fbf9cSEric Dumazet 	return percpu_counter_read_positive(prot->sockets_allocated);
1171180d8cd9SGlauber Costa }
1172180d8cd9SGlauber Costa 
1173180d8cd9SGlauber Costa static inline int
1174180d8cd9SGlauber Costa proto_sockets_allocated_sum_positive(struct proto *prot)
1175180d8cd9SGlauber Costa {
1176180d8cd9SGlauber Costa 	return percpu_counter_sum_positive(prot->sockets_allocated);
1177180d8cd9SGlauber Costa }
1178180d8cd9SGlauber Costa 
1179180d8cd9SGlauber Costa static inline long
1180180d8cd9SGlauber Costa proto_memory_allocated(struct proto *prot)
1181180d8cd9SGlauber Costa {
1182180d8cd9SGlauber Costa 	return atomic_long_read(prot->memory_allocated);
1183180d8cd9SGlauber Costa }
1184180d8cd9SGlauber Costa 
1185180d8cd9SGlauber Costa static inline bool
1186180d8cd9SGlauber Costa proto_memory_pressure(struct proto *prot)
1187180d8cd9SGlauber Costa {
1188180d8cd9SGlauber Costa 	if (!prot->memory_pressure)
1189180d8cd9SGlauber Costa 		return false;
1190180d8cd9SGlauber Costa 	return !!*prot->memory_pressure;
1191180d8cd9SGlauber Costa }
1192180d8cd9SGlauber Costa 
119365f76517SEric Dumazet 
119465f76517SEric Dumazet #ifdef CONFIG_PROC_FS
11951da177e4SLinus Torvalds /* Called with local bh disabled */
1196c29a0bc4SPavel Emelyanov extern void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc);
1197c29a0bc4SPavel Emelyanov extern int sock_prot_inuse_get(struct net *net, struct proto *proto);
119865f76517SEric Dumazet #else
1199dc6b9b78SEric Dumazet static inline void sock_prot_inuse_add(struct net *net, struct proto *prot,
1200c29a0bc4SPavel Emelyanov 		int inc)
120165f76517SEric Dumazet {
120265f76517SEric Dumazet }
120365f76517SEric Dumazet #endif
120465f76517SEric Dumazet 
12051da177e4SLinus Torvalds 
1206614c6cb4SArnaldo Carvalho de Melo /* With per-bucket locks this operation is not-atomic, so that
1207614c6cb4SArnaldo Carvalho de Melo  * this version is not worse.
1208614c6cb4SArnaldo Carvalho de Melo  */
1209614c6cb4SArnaldo Carvalho de Melo static inline void __sk_prot_rehash(struct sock *sk)
1210614c6cb4SArnaldo Carvalho de Melo {
1211614c6cb4SArnaldo Carvalho de Melo 	sk->sk_prot->unhash(sk);
1212614c6cb4SArnaldo Carvalho de Melo 	sk->sk_prot->hash(sk);
1213614c6cb4SArnaldo Carvalho de Melo }
1214614c6cb4SArnaldo Carvalho de Melo 
1215fcbdf09dSOctavian Purdila void sk_prot_clear_portaddr_nulls(struct sock *sk, int size);
1216fcbdf09dSOctavian Purdila 
12171da177e4SLinus Torvalds /* About 10 seconds */
12181da177e4SLinus Torvalds #define SOCK_DESTROY_TIME (10*HZ)
12191da177e4SLinus Torvalds 
12201da177e4SLinus Torvalds /* Sockets 0-1023 can't be bound to unless you are superuser */
12211da177e4SLinus Torvalds #define PROT_SOCK	1024
12221da177e4SLinus Torvalds 
12231da177e4SLinus Torvalds #define SHUTDOWN_MASK	3
12241da177e4SLinus Torvalds #define RCV_SHUTDOWN	1
12251da177e4SLinus Torvalds #define SEND_SHUTDOWN	2
12261da177e4SLinus Torvalds 
12271da177e4SLinus Torvalds #define SOCK_SNDBUF_LOCK	1
12281da177e4SLinus Torvalds #define SOCK_RCVBUF_LOCK	2
12291da177e4SLinus Torvalds #define SOCK_BINDADDR_LOCK	4
12301da177e4SLinus Torvalds #define SOCK_BINDPORT_LOCK	8
12311da177e4SLinus Torvalds 
12321da177e4SLinus Torvalds /* sock_iocb: used to kick off async processing of socket ios */
12331da177e4SLinus Torvalds struct sock_iocb {
12341da177e4SLinus Torvalds 	struct list_head	list;
12351da177e4SLinus Torvalds 
12361da177e4SLinus Torvalds 	int			flags;
12371da177e4SLinus Torvalds 	int			size;
12381da177e4SLinus Torvalds 	struct socket		*sock;
12391da177e4SLinus Torvalds 	struct sock		*sk;
12401da177e4SLinus Torvalds 	struct scm_cookie	*scm;
12411da177e4SLinus Torvalds 	struct msghdr		*msg, async_msg;
12421da177e4SLinus Torvalds 	struct kiocb		*kiocb;
12431da177e4SLinus Torvalds };
12441da177e4SLinus Torvalds 
12451da177e4SLinus Torvalds static inline struct sock_iocb *kiocb_to_siocb(struct kiocb *iocb)
12461da177e4SLinus Torvalds {
12471da177e4SLinus Torvalds 	return (struct sock_iocb *)iocb->private;
12481da177e4SLinus Torvalds }
12491da177e4SLinus Torvalds 
12501da177e4SLinus Torvalds static inline struct kiocb *siocb_to_kiocb(struct sock_iocb *si)
12511da177e4SLinus Torvalds {
12521da177e4SLinus Torvalds 	return si->kiocb;
12531da177e4SLinus Torvalds }
12541da177e4SLinus Torvalds 
12551da177e4SLinus Torvalds struct socket_alloc {
12561da177e4SLinus Torvalds 	struct socket socket;
12571da177e4SLinus Torvalds 	struct inode vfs_inode;
12581da177e4SLinus Torvalds };
12591da177e4SLinus Torvalds 
12601da177e4SLinus Torvalds static inline struct socket *SOCKET_I(struct inode *inode)
12611da177e4SLinus Torvalds {
12621da177e4SLinus Torvalds 	return &container_of(inode, struct socket_alloc, vfs_inode)->socket;
12631da177e4SLinus Torvalds }
12641da177e4SLinus Torvalds 
12651da177e4SLinus Torvalds static inline struct inode *SOCK_INODE(struct socket *socket)
12661da177e4SLinus Torvalds {
12671da177e4SLinus Torvalds 	return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
12681da177e4SLinus Torvalds }
12691da177e4SLinus Torvalds 
12703ab224beSHideo Aoki /*
12713ab224beSHideo Aoki  * Functions for memory accounting
12723ab224beSHideo Aoki  */
12733ab224beSHideo Aoki extern int __sk_mem_schedule(struct sock *sk, int size, int kind);
12743ab224beSHideo Aoki extern void __sk_mem_reclaim(struct sock *sk);
12751da177e4SLinus Torvalds 
12763ab224beSHideo Aoki #define SK_MEM_QUANTUM ((int)PAGE_SIZE)
12773ab224beSHideo Aoki #define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM)
12783ab224beSHideo Aoki #define SK_MEM_SEND	0
12793ab224beSHideo Aoki #define SK_MEM_RECV	1
12801da177e4SLinus Torvalds 
12813ab224beSHideo Aoki static inline int sk_mem_pages(int amt)
12821da177e4SLinus Torvalds {
12833ab224beSHideo Aoki 	return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT;
12841da177e4SLinus Torvalds }
12851da177e4SLinus Torvalds 
1286dc6b9b78SEric Dumazet static inline bool sk_has_account(struct sock *sk)
12871da177e4SLinus Torvalds {
12883ab224beSHideo Aoki 	/* return true if protocol supports memory accounting */
12893ab224beSHideo Aoki 	return !!sk->sk_prot->memory_allocated;
12901da177e4SLinus Torvalds }
12911da177e4SLinus Torvalds 
1292dc6b9b78SEric Dumazet static inline bool sk_wmem_schedule(struct sock *sk, int size)
12931da177e4SLinus Torvalds {
12943ab224beSHideo Aoki 	if (!sk_has_account(sk))
1295dc6b9b78SEric Dumazet 		return true;
1296d80d99d6SHerbert Xu 	return size <= sk->sk_forward_alloc ||
12973ab224beSHideo Aoki 		__sk_mem_schedule(sk, size, SK_MEM_SEND);
12983ab224beSHideo Aoki }
12993ab224beSHideo Aoki 
1300dc6b9b78SEric Dumazet static inline bool sk_rmem_schedule(struct sock *sk, int size)
13013ab224beSHideo Aoki {
13023ab224beSHideo Aoki 	if (!sk_has_account(sk))
1303dc6b9b78SEric Dumazet 		return true;
13043ab224beSHideo Aoki 	return size <= sk->sk_forward_alloc ||
13053ab224beSHideo Aoki 		__sk_mem_schedule(sk, size, SK_MEM_RECV);
13063ab224beSHideo Aoki }
13073ab224beSHideo Aoki 
13083ab224beSHideo Aoki static inline void sk_mem_reclaim(struct sock *sk)
13093ab224beSHideo Aoki {
13103ab224beSHideo Aoki 	if (!sk_has_account(sk))
13113ab224beSHideo Aoki 		return;
13123ab224beSHideo Aoki 	if (sk->sk_forward_alloc >= SK_MEM_QUANTUM)
13133ab224beSHideo Aoki 		__sk_mem_reclaim(sk);
13143ab224beSHideo Aoki }
13153ab224beSHideo Aoki 
13169993e7d3SDavid S. Miller static inline void sk_mem_reclaim_partial(struct sock *sk)
13179993e7d3SDavid S. Miller {
13189993e7d3SDavid S. Miller 	if (!sk_has_account(sk))
13199993e7d3SDavid S. Miller 		return;
13209993e7d3SDavid S. Miller 	if (sk->sk_forward_alloc > SK_MEM_QUANTUM)
13219993e7d3SDavid S. Miller 		__sk_mem_reclaim(sk);
13229993e7d3SDavid S. Miller }
13239993e7d3SDavid S. Miller 
13243ab224beSHideo Aoki static inline void sk_mem_charge(struct sock *sk, int size)
13253ab224beSHideo Aoki {
13263ab224beSHideo Aoki 	if (!sk_has_account(sk))
13273ab224beSHideo Aoki 		return;
13283ab224beSHideo Aoki 	sk->sk_forward_alloc -= size;
13293ab224beSHideo Aoki }
13303ab224beSHideo Aoki 
13313ab224beSHideo Aoki static inline void sk_mem_uncharge(struct sock *sk, int size)
13323ab224beSHideo Aoki {
13333ab224beSHideo Aoki 	if (!sk_has_account(sk))
13343ab224beSHideo Aoki 		return;
13353ab224beSHideo Aoki 	sk->sk_forward_alloc += size;
13363ab224beSHideo Aoki }
13373ab224beSHideo Aoki 
13383ab224beSHideo Aoki static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
13393ab224beSHideo Aoki {
13403ab224beSHideo Aoki 	sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
13413ab224beSHideo Aoki 	sk->sk_wmem_queued -= skb->truesize;
13423ab224beSHideo Aoki 	sk_mem_uncharge(sk, skb->truesize);
13433ab224beSHideo Aoki 	__kfree_skb(skb);
1344d80d99d6SHerbert Xu }
1345d80d99d6SHerbert Xu 
13461da177e4SLinus Torvalds /* Used by processes to "lock" a socket state, so that
13471da177e4SLinus Torvalds  * interrupts and bottom half handlers won't change it
13481da177e4SLinus Torvalds  * from under us. It essentially blocks any incoming
13491da177e4SLinus Torvalds  * packets, so that we won't get any new data or any
13501da177e4SLinus Torvalds  * packets that change the state of the socket.
13511da177e4SLinus Torvalds  *
13521da177e4SLinus Torvalds  * While locked, BH processing will add new packets to
13531da177e4SLinus Torvalds  * the backlog queue.  This queue is processed by the
13541da177e4SLinus Torvalds  * owner of the socket lock right before it is released.
13551da177e4SLinus Torvalds  *
13561da177e4SLinus Torvalds  * Since ~2.3.5 it is also exclusive sleep lock serializing
13571da177e4SLinus Torvalds  * accesses from user process context.
13581da177e4SLinus Torvalds  */
1359d2e9117cSJohn Heffner #define sock_owned_by_user(sk)	((sk)->sk_lock.owned)
13601da177e4SLinus Torvalds 
1361ed07536eSPeter Zijlstra /*
1362ed07536eSPeter Zijlstra  * Macro so as to not evaluate some arguments when
1363ed07536eSPeter Zijlstra  * lockdep is not enabled.
1364ed07536eSPeter Zijlstra  *
1365ed07536eSPeter Zijlstra  * Mark both the sk_lock and the sk_lock.slock as a
1366ed07536eSPeter Zijlstra  * per-address-family lock class.
1367ed07536eSPeter Zijlstra  */
1368ed07536eSPeter Zijlstra #define sock_lock_init_class_and_name(sk, sname, skey, name, key)	\
1369ed07536eSPeter Zijlstra do {									\
1370d2e9117cSJohn Heffner 	sk->sk_lock.owned = 0;						\
1371ed07536eSPeter Zijlstra 	init_waitqueue_head(&sk->sk_lock.wq);				\
1372ed07536eSPeter Zijlstra 	spin_lock_init(&(sk)->sk_lock.slock);				\
1373ed07536eSPeter Zijlstra 	debug_check_no_locks_freed((void *)&(sk)->sk_lock,		\
1374ed07536eSPeter Zijlstra 			sizeof((sk)->sk_lock));				\
1375ed07536eSPeter Zijlstra 	lockdep_set_class_and_name(&(sk)->sk_lock.slock,		\
1376ed07536eSPeter Zijlstra 				(skey), (sname));				\
1377ed07536eSPeter Zijlstra 	lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0);	\
1378ed07536eSPeter Zijlstra } while (0)
1379ed07536eSPeter Zijlstra 
138041380930SHarvey Harrison extern void lock_sock_nested(struct sock *sk, int subclass);
1381fcc70d5fSPeter Zijlstra 
1382fcc70d5fSPeter Zijlstra static inline void lock_sock(struct sock *sk)
1383fcc70d5fSPeter Zijlstra {
1384fcc70d5fSPeter Zijlstra 	lock_sock_nested(sk, 0);
1385fcc70d5fSPeter Zijlstra }
1386fcc70d5fSPeter Zijlstra 
138741380930SHarvey Harrison extern void release_sock(struct sock *sk);
13881da177e4SLinus Torvalds 
13891da177e4SLinus Torvalds /* BH context may only use the following locking interface. */
13901da177e4SLinus Torvalds #define bh_lock_sock(__sk)	spin_lock(&((__sk)->sk_lock.slock))
1391c6366184SIngo Molnar #define bh_lock_sock_nested(__sk) \
1392c6366184SIngo Molnar 				spin_lock_nested(&((__sk)->sk_lock.slock), \
1393c6366184SIngo Molnar 				SINGLE_DEPTH_NESTING)
13941da177e4SLinus Torvalds #define bh_unlock_sock(__sk)	spin_unlock(&((__sk)->sk_lock.slock))
13951da177e4SLinus Torvalds 
13968a74ad60SEric Dumazet extern bool lock_sock_fast(struct sock *sk);
13978a74ad60SEric Dumazet /**
13988a74ad60SEric Dumazet  * unlock_sock_fast - complement of lock_sock_fast
13998a74ad60SEric Dumazet  * @sk: socket
14008a74ad60SEric Dumazet  * @slow: slow mode
14018a74ad60SEric Dumazet  *
14028a74ad60SEric Dumazet  * fast unlock socket for user context.
14038a74ad60SEric Dumazet  * If slow mode is on, we call regular release_sock()
14048a74ad60SEric Dumazet  */
14058a74ad60SEric Dumazet static inline void unlock_sock_fast(struct sock *sk, bool slow)
14064b0b72f7SEric Dumazet {
14078a74ad60SEric Dumazet 	if (slow)
14088a74ad60SEric Dumazet 		release_sock(sk);
14098a74ad60SEric Dumazet 	else
14104b0b72f7SEric Dumazet 		spin_unlock_bh(&sk->sk_lock.slock);
14114b0b72f7SEric Dumazet }
14124b0b72f7SEric Dumazet 
14138a74ad60SEric Dumazet 
14141b8d7ae4SEric W. Biederman extern struct sock		*sk_alloc(struct net *net, int family,
1415dd0fc66fSAl Viro 					  gfp_t priority,
14166257ff21SPavel Emelyanov 					  struct proto *prot);
14171da177e4SLinus Torvalds extern void			sk_free(struct sock *sk);
1418edf02087SDenis V. Lunev extern void			sk_release_kernel(struct sock *sk);
1419e56c57d0SEric Dumazet extern struct sock		*sk_clone_lock(const struct sock *sk,
1420dd0fc66fSAl Viro 					       const gfp_t priority);
14211da177e4SLinus Torvalds 
14221da177e4SLinus Torvalds extern struct sk_buff		*sock_wmalloc(struct sock *sk,
14231da177e4SLinus Torvalds 					      unsigned long size, int force,
1424dd0fc66fSAl Viro 					      gfp_t priority);
14251da177e4SLinus Torvalds extern struct sk_buff		*sock_rmalloc(struct sock *sk,
14261da177e4SLinus Torvalds 					      unsigned long size, int force,
1427dd0fc66fSAl Viro 					      gfp_t priority);
14281da177e4SLinus Torvalds extern void			sock_wfree(struct sk_buff *skb);
14291da177e4SLinus Torvalds extern void			sock_rfree(struct sk_buff *skb);
143041063e9dSDavid S. Miller extern void			sock_edemux(struct sk_buff *skb);
14311da177e4SLinus Torvalds 
14321da177e4SLinus Torvalds extern int			sock_setsockopt(struct socket *sock, int level,
14331da177e4SLinus Torvalds 						int op, char __user *optval,
1434b7058842SDavid S. Miller 						unsigned int optlen);
14351da177e4SLinus Torvalds 
14361da177e4SLinus Torvalds extern int			sock_getsockopt(struct socket *sock, int level,
14371da177e4SLinus Torvalds 						int op, char __user *optval,
14381da177e4SLinus Torvalds 						int __user *optlen);
14391da177e4SLinus Torvalds extern struct sk_buff		*sock_alloc_send_skb(struct sock *sk,
14401da177e4SLinus Torvalds 						     unsigned long size,
14411da177e4SLinus Torvalds 						     int noblock,
14421da177e4SLinus Torvalds 						     int *errcode);
14434cc7f68dSHerbert Xu extern struct sk_buff		*sock_alloc_send_pskb(struct sock *sk,
14444cc7f68dSHerbert Xu 						      unsigned long header_len,
14454cc7f68dSHerbert Xu 						      unsigned long data_len,
14464cc7f68dSHerbert Xu 						      int noblock,
14474cc7f68dSHerbert Xu 						      int *errcode);
144886a76cafSVictor Fusco extern void *sock_kmalloc(struct sock *sk, int size,
1449dd0fc66fSAl Viro 			  gfp_t priority);
14501da177e4SLinus Torvalds extern void sock_kfree_s(struct sock *sk, void *mem, int size);
14511da177e4SLinus Torvalds extern void sk_send_sigurg(struct sock *sk);
14521da177e4SLinus Torvalds 
1453f8451725SHerbert Xu #ifdef CONFIG_CGROUPS
1454f8451725SHerbert Xu extern void sock_update_classid(struct sock *sk);
1455f8451725SHerbert Xu #else
1456f8451725SHerbert Xu static inline void sock_update_classid(struct sock *sk)
1457f8451725SHerbert Xu {
1458f8451725SHerbert Xu }
1459f8451725SHerbert Xu #endif
1460f8451725SHerbert Xu 
14611da177e4SLinus Torvalds /*
14621da177e4SLinus Torvalds  * Functions to fill in entries in struct proto_ops when a protocol
14631da177e4SLinus Torvalds  * does not implement a particular function.
14641da177e4SLinus Torvalds  */
14651da177e4SLinus Torvalds extern int                      sock_no_bind(struct socket *,
14661da177e4SLinus Torvalds 					     struct sockaddr *, int);
14671da177e4SLinus Torvalds extern int                      sock_no_connect(struct socket *,
14681da177e4SLinus Torvalds 						struct sockaddr *, int, int);
14691da177e4SLinus Torvalds extern int                      sock_no_socketpair(struct socket *,
14701da177e4SLinus Torvalds 						   struct socket *);
14711da177e4SLinus Torvalds extern int                      sock_no_accept(struct socket *,
14721da177e4SLinus Torvalds 					       struct socket *, int);
14731da177e4SLinus Torvalds extern int                      sock_no_getname(struct socket *,
14741da177e4SLinus Torvalds 						struct sockaddr *, int *, int);
14751da177e4SLinus Torvalds extern unsigned int             sock_no_poll(struct file *, struct socket *,
14761da177e4SLinus Torvalds 					     struct poll_table_struct *);
14771da177e4SLinus Torvalds extern int                      sock_no_ioctl(struct socket *, unsigned int,
14781da177e4SLinus Torvalds 					      unsigned long);
14791da177e4SLinus Torvalds extern int			sock_no_listen(struct socket *, int);
14801da177e4SLinus Torvalds extern int                      sock_no_shutdown(struct socket *, int);
14811da177e4SLinus Torvalds extern int			sock_no_getsockopt(struct socket *, int , int,
14821da177e4SLinus Torvalds 						   char __user *, int __user *);
14831da177e4SLinus Torvalds extern int			sock_no_setsockopt(struct socket *, int, int,
1484b7058842SDavid S. Miller 						   char __user *, unsigned int);
14851da177e4SLinus Torvalds extern int                      sock_no_sendmsg(struct kiocb *, struct socket *,
14861da177e4SLinus Torvalds 						struct msghdr *, size_t);
14871da177e4SLinus Torvalds extern int                      sock_no_recvmsg(struct kiocb *, struct socket *,
14881da177e4SLinus Torvalds 						struct msghdr *, size_t, int);
14891da177e4SLinus Torvalds extern int			sock_no_mmap(struct file *file,
14901da177e4SLinus Torvalds 					     struct socket *sock,
14911da177e4SLinus Torvalds 					     struct vm_area_struct *vma);
14921da177e4SLinus Torvalds extern ssize_t			sock_no_sendpage(struct socket *sock,
14931da177e4SLinus Torvalds 						struct page *page,
14941da177e4SLinus Torvalds 						int offset, size_t size,
14951da177e4SLinus Torvalds 						int flags);
14961da177e4SLinus Torvalds 
14971da177e4SLinus Torvalds /*
14981da177e4SLinus Torvalds  * Functions to fill in entries in struct proto_ops when a protocol
14991da177e4SLinus Torvalds  * uses the inet style.
15001da177e4SLinus Torvalds  */
15011da177e4SLinus Torvalds extern int sock_common_getsockopt(struct socket *sock, int level, int optname,
15021da177e4SLinus Torvalds 				  char __user *optval, int __user *optlen);
15031da177e4SLinus Torvalds extern int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
15041da177e4SLinus Torvalds 			       struct msghdr *msg, size_t size, int flags);
15051da177e4SLinus Torvalds extern int sock_common_setsockopt(struct socket *sock, int level, int optname,
1506b7058842SDavid S. Miller 				  char __user *optval, unsigned int optlen);
15073fdadf7dSDmitry Mishin extern int compat_sock_common_getsockopt(struct socket *sock, int level,
15083fdadf7dSDmitry Mishin 		int optname, char __user *optval, int __user *optlen);
15093fdadf7dSDmitry Mishin extern int compat_sock_common_setsockopt(struct socket *sock, int level,
1510b7058842SDavid S. Miller 		int optname, char __user *optval, unsigned int optlen);
15111da177e4SLinus Torvalds 
15121da177e4SLinus Torvalds extern void sk_common_release(struct sock *sk);
15131da177e4SLinus Torvalds 
15141da177e4SLinus Torvalds /*
15151da177e4SLinus Torvalds  *	Default socket callbacks and setup code
15161da177e4SLinus Torvalds  */
15171da177e4SLinus Torvalds 
15181da177e4SLinus Torvalds /* Initialise core socket variables */
15191da177e4SLinus Torvalds extern void sock_init_data(struct socket *sock, struct sock *sk);
15201da177e4SLinus Torvalds 
152146bcf14fSEric Dumazet extern void sk_filter_release_rcu(struct rcu_head *rcu);
152246bcf14fSEric Dumazet 
15231da177e4SLinus Torvalds /**
15241a5778aaSBen Hutchings  *	sk_filter_release - release a socket filter
1525dc9b3346SPaul Bonser  *	@fp: filter to remove
1526dc9b3346SPaul Bonser  *
1527dc9b3346SPaul Bonser  *	Remove a filter from a socket and release its resources.
1528dc9b3346SPaul Bonser  */
1529dc9b3346SPaul Bonser 
1530309dd5fcSPavel Emelyanov static inline void sk_filter_release(struct sk_filter *fp)
1531309dd5fcSPavel Emelyanov {
1532309dd5fcSPavel Emelyanov 	if (atomic_dec_and_test(&fp->refcnt))
153380f8f102SEric Dumazet 		call_rcu(&fp->rcu, sk_filter_release_rcu);
1534309dd5fcSPavel Emelyanov }
1535309dd5fcSPavel Emelyanov 
1536309dd5fcSPavel Emelyanov static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
15371da177e4SLinus Torvalds {
15381da177e4SLinus Torvalds 	unsigned int size = sk_filter_len(fp);
15391da177e4SLinus Torvalds 
15401da177e4SLinus Torvalds 	atomic_sub(size, &sk->sk_omem_alloc);
1541309dd5fcSPavel Emelyanov 	sk_filter_release(fp);
15421da177e4SLinus Torvalds }
15431da177e4SLinus Torvalds 
15441da177e4SLinus Torvalds static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
15451da177e4SLinus Torvalds {
15461da177e4SLinus Torvalds 	atomic_inc(&fp->refcnt);
15471da177e4SLinus Torvalds 	atomic_add(sk_filter_len(fp), &sk->sk_omem_alloc);
15481da177e4SLinus Torvalds }
15491da177e4SLinus Torvalds 
15501da177e4SLinus Torvalds /*
15511da177e4SLinus Torvalds  * Socket reference counting postulates.
15521da177e4SLinus Torvalds  *
15531da177e4SLinus Torvalds  * * Each user of socket SHOULD hold a reference count.
15541da177e4SLinus Torvalds  * * Each access point to socket (an hash table bucket, reference from a list,
15551da177e4SLinus Torvalds  *   running timer, skb in flight MUST hold a reference count.
15561da177e4SLinus Torvalds  * * When reference count hits 0, it means it will never increase back.
15571da177e4SLinus Torvalds  * * When reference count hits 0, it means that no references from
15581da177e4SLinus Torvalds  *   outside exist to this socket and current process on current CPU
15591da177e4SLinus Torvalds  *   is last user and may/should destroy this socket.
15601da177e4SLinus Torvalds  * * sk_free is called from any context: process, BH, IRQ. When
15611da177e4SLinus Torvalds  *   it is called, socket has no references from outside -> sk_free
15621da177e4SLinus Torvalds  *   may release descendant resources allocated by the socket, but
15631da177e4SLinus Torvalds  *   to the time when it is called, socket is NOT referenced by any
15641da177e4SLinus Torvalds  *   hash tables, lists etc.
15651da177e4SLinus Torvalds  * * Packets, delivered from outside (from network or from another process)
15661da177e4SLinus Torvalds  *   and enqueued on receive/error queues SHOULD NOT grab reference count,
15671da177e4SLinus Torvalds  *   when they sit in queue. Otherwise, packets will leak to hole, when
15681da177e4SLinus Torvalds  *   socket is looked up by one cpu and unhasing is made by another CPU.
15691da177e4SLinus Torvalds  *   It is true for udp/raw, netlink (leak to receive and error queues), tcp
15701da177e4SLinus Torvalds  *   (leak to backlog). Packet socket does all the processing inside
15711da177e4SLinus Torvalds  *   BR_NETPROTO_LOCK, so that it has not this race condition. UNIX sockets
15721da177e4SLinus Torvalds  *   use separate SMP lock, so that they are prone too.
15731da177e4SLinus Torvalds  */
15741da177e4SLinus Torvalds 
15751da177e4SLinus Torvalds /* Ungrab socket and destroy it, if it was the last reference. */
15761da177e4SLinus Torvalds static inline void sock_put(struct sock *sk)
15771da177e4SLinus Torvalds {
15781da177e4SLinus Torvalds 	if (atomic_dec_and_test(&sk->sk_refcnt))
15791da177e4SLinus Torvalds 		sk_free(sk);
15801da177e4SLinus Torvalds }
15811da177e4SLinus Torvalds 
158258a5a7b9SArnaldo Carvalho de Melo extern int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
158358a5a7b9SArnaldo Carvalho de Melo 			  const int nested);
158425995ff5SArnaldo Carvalho de Melo 
1585e022f0b4SKrishna Kumar static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
1586e022f0b4SKrishna Kumar {
1587e022f0b4SKrishna Kumar 	sk->sk_tx_queue_mapping = tx_queue;
1588e022f0b4SKrishna Kumar }
1589e022f0b4SKrishna Kumar 
1590e022f0b4SKrishna Kumar static inline void sk_tx_queue_clear(struct sock *sk)
1591e022f0b4SKrishna Kumar {
1592e022f0b4SKrishna Kumar 	sk->sk_tx_queue_mapping = -1;
1593e022f0b4SKrishna Kumar }
1594e022f0b4SKrishna Kumar 
1595e022f0b4SKrishna Kumar static inline int sk_tx_queue_get(const struct sock *sk)
1596e022f0b4SKrishna Kumar {
1597b0f77d0eSTom Herbert 	return sk ? sk->sk_tx_queue_mapping : -1;
1598e022f0b4SKrishna Kumar }
1599e022f0b4SKrishna Kumar 
1600972692e0SDavid S. Miller static inline void sk_set_socket(struct sock *sk, struct socket *sock)
1601972692e0SDavid S. Miller {
1602e022f0b4SKrishna Kumar 	sk_tx_queue_clear(sk);
1603972692e0SDavid S. Miller 	sk->sk_socket = sock;
1604972692e0SDavid S. Miller }
1605972692e0SDavid S. Miller 
1606aa395145SEric Dumazet static inline wait_queue_head_t *sk_sleep(struct sock *sk)
1607aa395145SEric Dumazet {
1608eaefd110SEric Dumazet 	BUILD_BUG_ON(offsetof(struct socket_wq, wait) != 0);
1609eaefd110SEric Dumazet 	return &rcu_dereference_raw(sk->sk_wq)->wait;
1610aa395145SEric Dumazet }
16111da177e4SLinus Torvalds /* Detach socket from process context.
16121da177e4SLinus Torvalds  * Announce socket dead, detach it from wait queue and inode.
16131da177e4SLinus Torvalds  * Note that parent inode held reference count on this struct sock,
16141da177e4SLinus Torvalds  * we do not release it in this function, because protocol
16151da177e4SLinus Torvalds  * probably wants some additional cleanups or even continuing
16161da177e4SLinus Torvalds  * to work with this socket (TCP).
16171da177e4SLinus Torvalds  */
16181da177e4SLinus Torvalds static inline void sock_orphan(struct sock *sk)
16191da177e4SLinus Torvalds {
16201da177e4SLinus Torvalds 	write_lock_bh(&sk->sk_callback_lock);
16211da177e4SLinus Torvalds 	sock_set_flag(sk, SOCK_DEAD);
1622972692e0SDavid S. Miller 	sk_set_socket(sk, NULL);
162343815482SEric Dumazet 	sk->sk_wq  = NULL;
16241da177e4SLinus Torvalds 	write_unlock_bh(&sk->sk_callback_lock);
16251da177e4SLinus Torvalds }
16261da177e4SLinus Torvalds 
16271da177e4SLinus Torvalds static inline void sock_graft(struct sock *sk, struct socket *parent)
16281da177e4SLinus Torvalds {
16291da177e4SLinus Torvalds 	write_lock_bh(&sk->sk_callback_lock);
1630eaefd110SEric Dumazet 	sk->sk_wq = parent->wq;
16311da177e4SLinus Torvalds 	parent->sk = sk;
1632972692e0SDavid S. Miller 	sk_set_socket(sk, parent);
16334237c75cSVenkat Yekkirala 	security_sock_graft(sk, parent);
16341da177e4SLinus Torvalds 	write_unlock_bh(&sk->sk_callback_lock);
16351da177e4SLinus Torvalds }
16361da177e4SLinus Torvalds 
16371da177e4SLinus Torvalds extern int sock_i_uid(struct sock *sk);
16381da177e4SLinus Torvalds extern unsigned long sock_i_ino(struct sock *sk);
16391da177e4SLinus Torvalds 
16401da177e4SLinus Torvalds static inline struct dst_entry *
16411da177e4SLinus Torvalds __sk_dst_get(struct sock *sk)
16421da177e4SLinus Torvalds {
1643d8bf4ca9SMichal Hocko 	return rcu_dereference_check(sk->sk_dst_cache, sock_owned_by_user(sk) ||
1644f68c224fSEric Dumazet 						       lockdep_is_held(&sk->sk_lock.slock));
16451da177e4SLinus Torvalds }
16461da177e4SLinus Torvalds 
16471da177e4SLinus Torvalds static inline struct dst_entry *
16481da177e4SLinus Torvalds sk_dst_get(struct sock *sk)
16491da177e4SLinus Torvalds {
16501da177e4SLinus Torvalds 	struct dst_entry *dst;
16511da177e4SLinus Torvalds 
1652b6c6712aSEric Dumazet 	rcu_read_lock();
1653b6c6712aSEric Dumazet 	dst = rcu_dereference(sk->sk_dst_cache);
16541da177e4SLinus Torvalds 	if (dst)
16551da177e4SLinus Torvalds 		dst_hold(dst);
1656b6c6712aSEric Dumazet 	rcu_read_unlock();
16571da177e4SLinus Torvalds 	return dst;
16581da177e4SLinus Torvalds }
16591da177e4SLinus Torvalds 
1660b6c6712aSEric Dumazet extern void sk_reset_txq(struct sock *sk);
1661b6c6712aSEric Dumazet 
1662b6c6712aSEric Dumazet static inline void dst_negative_advice(struct sock *sk)
1663b6c6712aSEric Dumazet {
1664b6c6712aSEric Dumazet 	struct dst_entry *ndst, *dst = __sk_dst_get(sk);
1665b6c6712aSEric Dumazet 
1666b6c6712aSEric Dumazet 	if (dst && dst->ops->negative_advice) {
1667b6c6712aSEric Dumazet 		ndst = dst->ops->negative_advice(dst);
1668b6c6712aSEric Dumazet 
1669b6c6712aSEric Dumazet 		if (ndst != dst) {
1670b6c6712aSEric Dumazet 			rcu_assign_pointer(sk->sk_dst_cache, ndst);
1671b6c6712aSEric Dumazet 			sk_reset_txq(sk);
1672b6c6712aSEric Dumazet 		}
1673b6c6712aSEric Dumazet 	}
1674b6c6712aSEric Dumazet }
1675b6c6712aSEric Dumazet 
16761da177e4SLinus Torvalds static inline void
16771da177e4SLinus Torvalds __sk_dst_set(struct sock *sk, struct dst_entry *dst)
16781da177e4SLinus Torvalds {
16791da177e4SLinus Torvalds 	struct dst_entry *old_dst;
16801da177e4SLinus Torvalds 
1681e022f0b4SKrishna Kumar 	sk_tx_queue_clear(sk);
16820b53ff2eSEric Dumazet 	/*
16830b53ff2eSEric Dumazet 	 * This can be called while sk is owned by the caller only,
16840b53ff2eSEric Dumazet 	 * with no state that can be checked in a rcu_dereference_check() cond
16850b53ff2eSEric Dumazet 	 */
16860b53ff2eSEric Dumazet 	old_dst = rcu_dereference_raw(sk->sk_dst_cache);
1687b6c6712aSEric Dumazet 	rcu_assign_pointer(sk->sk_dst_cache, dst);
16881da177e4SLinus Torvalds 	dst_release(old_dst);
16891da177e4SLinus Torvalds }
16901da177e4SLinus Torvalds 
16911da177e4SLinus Torvalds static inline void
16921da177e4SLinus Torvalds sk_dst_set(struct sock *sk, struct dst_entry *dst)
16931da177e4SLinus Torvalds {
1694b6c6712aSEric Dumazet 	spin_lock(&sk->sk_dst_lock);
16951da177e4SLinus Torvalds 	__sk_dst_set(sk, dst);
1696b6c6712aSEric Dumazet 	spin_unlock(&sk->sk_dst_lock);
16971da177e4SLinus Torvalds }
16981da177e4SLinus Torvalds 
16991da177e4SLinus Torvalds static inline void
17001da177e4SLinus Torvalds __sk_dst_reset(struct sock *sk)
17011da177e4SLinus Torvalds {
1702b6c6712aSEric Dumazet 	__sk_dst_set(sk, NULL);
17031da177e4SLinus Torvalds }
17041da177e4SLinus Torvalds 
17051da177e4SLinus Torvalds static inline void
17061da177e4SLinus Torvalds sk_dst_reset(struct sock *sk)
17071da177e4SLinus Torvalds {
1708b6c6712aSEric Dumazet 	spin_lock(&sk->sk_dst_lock);
17091da177e4SLinus Torvalds 	__sk_dst_reset(sk);
1710b6c6712aSEric Dumazet 	spin_unlock(&sk->sk_dst_lock);
17111da177e4SLinus Torvalds }
17121da177e4SLinus Torvalds 
1713f0088a50SDenis Vlasenko extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie);
17141da177e4SLinus Torvalds 
1715f0088a50SDenis Vlasenko extern struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie);
17161da177e4SLinus Torvalds 
1717dc6b9b78SEric Dumazet static inline bool sk_can_gso(const struct sock *sk)
1718bcd76111SHerbert Xu {
1719bcd76111SHerbert Xu 	return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type);
1720bcd76111SHerbert Xu }
1721bcd76111SHerbert Xu 
17229958089aSAndi Kleen extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst);
17236cbb0df7SArnaldo Carvalho de Melo 
1724c8f44affSMichał Mirosław static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
1725a465419bSEric Dumazet {
1726a465419bSEric Dumazet 	sk->sk_route_nocaps |= flags;
1727a465419bSEric Dumazet 	sk->sk_route_caps &= ~flags;
1728a465419bSEric Dumazet }
1729a465419bSEric Dumazet 
1730c6e1a0d1STom Herbert static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
1731c6e1a0d1STom Herbert 					   char __user *from, char *to,
1732912d398dSWei Yongjun 					   int copy, int offset)
1733c6e1a0d1STom Herbert {
1734c6e1a0d1STom Herbert 	if (skb->ip_summed == CHECKSUM_NONE) {
1735c6e1a0d1STom Herbert 		int err = 0;
1736c6e1a0d1STom Herbert 		__wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err);
1737c6e1a0d1STom Herbert 		if (err)
1738c6e1a0d1STom Herbert 			return err;
1739912d398dSWei Yongjun 		skb->csum = csum_block_add(skb->csum, csum, offset);
1740c6e1a0d1STom Herbert 	} else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
1741c6e1a0d1STom Herbert 		if (!access_ok(VERIFY_READ, from, copy) ||
1742c6e1a0d1STom Herbert 		    __copy_from_user_nocache(to, from, copy))
1743c6e1a0d1STom Herbert 			return -EFAULT;
1744c6e1a0d1STom Herbert 	} else if (copy_from_user(to, from, copy))
1745c6e1a0d1STom Herbert 		return -EFAULT;
1746c6e1a0d1STom Herbert 
1747c6e1a0d1STom Herbert 	return 0;
1748c6e1a0d1STom Herbert }
1749c6e1a0d1STom Herbert 
1750c6e1a0d1STom Herbert static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
1751c6e1a0d1STom Herbert 				       char __user *from, int copy)
1752c6e1a0d1STom Herbert {
1753912d398dSWei Yongjun 	int err, offset = skb->len;
1754c6e1a0d1STom Herbert 
1755912d398dSWei Yongjun 	err = skb_do_copy_data_nocache(sk, skb, from, skb_put(skb, copy),
1756912d398dSWei Yongjun 				       copy, offset);
1757c6e1a0d1STom Herbert 	if (err)
1758912d398dSWei Yongjun 		__skb_trim(skb, offset);
1759c6e1a0d1STom Herbert 
1760c6e1a0d1STom Herbert 	return err;
1761c6e1a0d1STom Herbert }
1762c6e1a0d1STom Herbert 
1763c6e1a0d1STom Herbert static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from,
1764c6e1a0d1STom Herbert 					   struct sk_buff *skb,
1765c6e1a0d1STom Herbert 					   struct page *page,
1766c6e1a0d1STom Herbert 					   int off, int copy)
1767c6e1a0d1STom Herbert {
1768c6e1a0d1STom Herbert 	int err;
1769c6e1a0d1STom Herbert 
1770912d398dSWei Yongjun 	err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) + off,
1771912d398dSWei Yongjun 				       copy, skb->len);
1772c6e1a0d1STom Herbert 	if (err)
1773c6e1a0d1STom Herbert 		return err;
1774c6e1a0d1STom Herbert 
1775c6e1a0d1STom Herbert 	skb->len	     += copy;
1776c6e1a0d1STom Herbert 	skb->data_len	     += copy;
1777c6e1a0d1STom Herbert 	skb->truesize	     += copy;
1778c6e1a0d1STom Herbert 	sk->sk_wmem_queued   += copy;
1779c6e1a0d1STom Herbert 	sk_mem_charge(sk, copy);
1780c6e1a0d1STom Herbert 	return 0;
1781c6e1a0d1STom Herbert }
1782c6e1a0d1STom Herbert 
17831da177e4SLinus Torvalds static inline int skb_copy_to_page(struct sock *sk, char __user *from,
17841da177e4SLinus Torvalds 				   struct sk_buff *skb, struct page *page,
17851da177e4SLinus Torvalds 				   int off, int copy)
17861da177e4SLinus Torvalds {
17871da177e4SLinus Torvalds 	if (skb->ip_summed == CHECKSUM_NONE) {
17881da177e4SLinus Torvalds 		int err = 0;
17895084205fSAl Viro 		__wsum csum = csum_and_copy_from_user(from,
17901da177e4SLinus Torvalds 						     page_address(page) + off,
17911da177e4SLinus Torvalds 							    copy, 0, &err);
17921da177e4SLinus Torvalds 		if (err)
17931da177e4SLinus Torvalds 			return err;
17941da177e4SLinus Torvalds 		skb->csum = csum_block_add(skb->csum, csum, skb->len);
17951da177e4SLinus Torvalds 	} else if (copy_from_user(page_address(page) + off, from, copy))
17961da177e4SLinus Torvalds 		return -EFAULT;
17971da177e4SLinus Torvalds 
17981da177e4SLinus Torvalds 	skb->len	     += copy;
17991da177e4SLinus Torvalds 	skb->data_len	     += copy;
18001da177e4SLinus Torvalds 	skb->truesize	     += copy;
18011da177e4SLinus Torvalds 	sk->sk_wmem_queued   += copy;
18023ab224beSHideo Aoki 	sk_mem_charge(sk, copy);
18031da177e4SLinus Torvalds 	return 0;
18041da177e4SLinus Torvalds }
18051da177e4SLinus Torvalds 
1806c564039fSEric Dumazet /**
1807c564039fSEric Dumazet  * sk_wmem_alloc_get - returns write allocations
1808c564039fSEric Dumazet  * @sk: socket
1809c564039fSEric Dumazet  *
1810c564039fSEric Dumazet  * Returns sk_wmem_alloc minus initial offset of one
1811c564039fSEric Dumazet  */
1812c564039fSEric Dumazet static inline int sk_wmem_alloc_get(const struct sock *sk)
1813c564039fSEric Dumazet {
1814c564039fSEric Dumazet 	return atomic_read(&sk->sk_wmem_alloc) - 1;
1815c564039fSEric Dumazet }
1816c564039fSEric Dumazet 
1817c564039fSEric Dumazet /**
1818c564039fSEric Dumazet  * sk_rmem_alloc_get - returns read allocations
1819c564039fSEric Dumazet  * @sk: socket
1820c564039fSEric Dumazet  *
1821c564039fSEric Dumazet  * Returns sk_rmem_alloc
1822c564039fSEric Dumazet  */
1823c564039fSEric Dumazet static inline int sk_rmem_alloc_get(const struct sock *sk)
1824c564039fSEric Dumazet {
1825c564039fSEric Dumazet 	return atomic_read(&sk->sk_rmem_alloc);
1826c564039fSEric Dumazet }
1827c564039fSEric Dumazet 
1828c564039fSEric Dumazet /**
1829c564039fSEric Dumazet  * sk_has_allocations - check if allocations are outstanding
1830c564039fSEric Dumazet  * @sk: socket
1831c564039fSEric Dumazet  *
1832c564039fSEric Dumazet  * Returns true if socket has write or read allocations
1833c564039fSEric Dumazet  */
1834dc6b9b78SEric Dumazet static inline bool sk_has_allocations(const struct sock *sk)
1835c564039fSEric Dumazet {
1836c564039fSEric Dumazet 	return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk);
1837c564039fSEric Dumazet }
1838c564039fSEric Dumazet 
1839a57de0b4SJiri Olsa /**
184043815482SEric Dumazet  * wq_has_sleeper - check if there are any waiting processes
1841acfbe96aSRandy Dunlap  * @wq: struct socket_wq
1842a57de0b4SJiri Olsa  *
184343815482SEric Dumazet  * Returns true if socket_wq has waiting processes
1844a57de0b4SJiri Olsa  *
184543815482SEric Dumazet  * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory
1846a57de0b4SJiri Olsa  * barrier call. They were added due to the race found within the tcp code.
1847a57de0b4SJiri Olsa  *
1848a57de0b4SJiri Olsa  * Consider following tcp code paths:
1849a57de0b4SJiri Olsa  *
1850a57de0b4SJiri Olsa  * CPU1                  CPU2
1851a57de0b4SJiri Olsa  *
1852a57de0b4SJiri Olsa  * sys_select            receive packet
1853a57de0b4SJiri Olsa  *   ...                 ...
1854a57de0b4SJiri Olsa  *   __add_wait_queue    update tp->rcv_nxt
1855a57de0b4SJiri Olsa  *   ...                 ...
1856a57de0b4SJiri Olsa  *   tp->rcv_nxt check   sock_def_readable
1857a57de0b4SJiri Olsa  *   ...                 {
185843815482SEric Dumazet  *   schedule               rcu_read_lock();
185943815482SEric Dumazet  *                          wq = rcu_dereference(sk->sk_wq);
186043815482SEric Dumazet  *                          if (wq && waitqueue_active(&wq->wait))
186143815482SEric Dumazet  *                              wake_up_interruptible(&wq->wait)
1862a57de0b4SJiri Olsa  *                          ...
1863a57de0b4SJiri Olsa  *                       }
1864a57de0b4SJiri Olsa  *
1865a57de0b4SJiri Olsa  * The race for tcp fires when the __add_wait_queue changes done by CPU1 stay
1866a57de0b4SJiri Olsa  * in its cache, and so does the tp->rcv_nxt update on CPU2 side.  The CPU1
1867a57de0b4SJiri Olsa  * could then endup calling schedule and sleep forever if there are no more
1868a57de0b4SJiri Olsa  * data on the socket.
1869ad462769SJiri Olsa  *
1870a57de0b4SJiri Olsa  */
187143815482SEric Dumazet static inline bool wq_has_sleeper(struct socket_wq *wq)
1872a57de0b4SJiri Olsa {
1873dc6b9b78SEric Dumazet 	/* We need to be sure we are in sync with the
1874a57de0b4SJiri Olsa 	 * add_wait_queue modifications to the wait queue.
1875a57de0b4SJiri Olsa 	 *
1876a57de0b4SJiri Olsa 	 * This memory barrier is paired in the sock_poll_wait.
1877a57de0b4SJiri Olsa 	 */
187843815482SEric Dumazet 	smp_mb();
187943815482SEric Dumazet 	return wq && waitqueue_active(&wq->wait);
1880a57de0b4SJiri Olsa }
1881a57de0b4SJiri Olsa 
1882a57de0b4SJiri Olsa /**
1883a57de0b4SJiri Olsa  * sock_poll_wait - place memory barrier behind the poll_wait call.
1884a57de0b4SJiri Olsa  * @filp:           file
1885a57de0b4SJiri Olsa  * @wait_address:   socket wait queue
1886a57de0b4SJiri Olsa  * @p:              poll_table
1887a57de0b4SJiri Olsa  *
188843815482SEric Dumazet  * See the comments in the wq_has_sleeper function.
1889a57de0b4SJiri Olsa  */
1890a57de0b4SJiri Olsa static inline void sock_poll_wait(struct file *filp,
1891a57de0b4SJiri Olsa 		wait_queue_head_t *wait_address, poll_table *p)
1892a57de0b4SJiri Olsa {
1893626cf236SHans Verkuil 	if (!poll_does_not_wait(p) && wait_address) {
1894a57de0b4SJiri Olsa 		poll_wait(filp, wait_address, p);
1895dc6b9b78SEric Dumazet 		/* We need to be sure we are in sync with the
1896a57de0b4SJiri Olsa 		 * socket flags modification.
1897a57de0b4SJiri Olsa 		 *
189843815482SEric Dumazet 		 * This memory barrier is paired in the wq_has_sleeper.
1899a57de0b4SJiri Olsa 		 */
1900a57de0b4SJiri Olsa 		smp_mb();
1901a57de0b4SJiri Olsa 	}
1902a57de0b4SJiri Olsa }
1903a57de0b4SJiri Olsa 
19041da177e4SLinus Torvalds /*
19051da177e4SLinus Torvalds  *	Queue a received datagram if it will fit. Stream and sequenced
19061da177e4SLinus Torvalds  *	protocols can't normally use this as they need to fit buffers in
19071da177e4SLinus Torvalds  *	and play with them.
19081da177e4SLinus Torvalds  *
19091da177e4SLinus Torvalds  *	Inlined as it's very short and called for pretty much every
19101da177e4SLinus Torvalds  *	packet ever received.
19111da177e4SLinus Torvalds  */
19121da177e4SLinus Torvalds 
19131da177e4SLinus Torvalds static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
19141da177e4SLinus Torvalds {
1915d55d87fdSHerbert Xu 	skb_orphan(skb);
19161da177e4SLinus Torvalds 	skb->sk = sk;
19171da177e4SLinus Torvalds 	skb->destructor = sock_wfree;
19182b85a34eSEric Dumazet 	/*
19192b85a34eSEric Dumazet 	 * We used to take a refcount on sk, but following operation
19202b85a34eSEric Dumazet 	 * is enough to guarantee sk_free() wont free this sock until
19212b85a34eSEric Dumazet 	 * all in-flight packets are completed
19222b85a34eSEric Dumazet 	 */
19231da177e4SLinus Torvalds 	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
19241da177e4SLinus Torvalds }
19251da177e4SLinus Torvalds 
19261da177e4SLinus Torvalds static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
19271da177e4SLinus Torvalds {
1928d55d87fdSHerbert Xu 	skb_orphan(skb);
19291da177e4SLinus Torvalds 	skb->sk = sk;
19301da177e4SLinus Torvalds 	skb->destructor = sock_rfree;
19311da177e4SLinus Torvalds 	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
19323ab224beSHideo Aoki 	sk_mem_charge(sk, skb->truesize);
19331da177e4SLinus Torvalds }
19341da177e4SLinus Torvalds 
19351da177e4SLinus Torvalds extern void sk_reset_timer(struct sock *sk, struct timer_list *timer,
19361da177e4SLinus Torvalds 			   unsigned long expires);
19371da177e4SLinus Torvalds 
19381da177e4SLinus Torvalds extern void sk_stop_timer(struct sock *sk, struct timer_list *timer);
19391da177e4SLinus Torvalds 
1940f0088a50SDenis Vlasenko extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
19411da177e4SLinus Torvalds 
1942b1faf566SEric Dumazet extern int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb);
19431da177e4SLinus Torvalds 
19441da177e4SLinus Torvalds /*
19451da177e4SLinus Torvalds  *	Recover an error report and clear atomically
19461da177e4SLinus Torvalds  */
19471da177e4SLinus Torvalds 
19481da177e4SLinus Torvalds static inline int sock_error(struct sock *sk)
19491da177e4SLinus Torvalds {
1950c1cbe4b7SBenjamin LaHaise 	int err;
1951c1cbe4b7SBenjamin LaHaise 	if (likely(!sk->sk_err))
1952c1cbe4b7SBenjamin LaHaise 		return 0;
1953c1cbe4b7SBenjamin LaHaise 	err = xchg(&sk->sk_err, 0);
19541da177e4SLinus Torvalds 	return -err;
19551da177e4SLinus Torvalds }
19561da177e4SLinus Torvalds 
19571da177e4SLinus Torvalds static inline unsigned long sock_wspace(struct sock *sk)
19581da177e4SLinus Torvalds {
19591da177e4SLinus Torvalds 	int amt = 0;
19601da177e4SLinus Torvalds 
19611da177e4SLinus Torvalds 	if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
19621da177e4SLinus Torvalds 		amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
19631da177e4SLinus Torvalds 		if (amt < 0)
19641da177e4SLinus Torvalds 			amt = 0;
19651da177e4SLinus Torvalds 	}
19661da177e4SLinus Torvalds 	return amt;
19671da177e4SLinus Torvalds }
19681da177e4SLinus Torvalds 
19691da177e4SLinus Torvalds static inline void sk_wake_async(struct sock *sk, int how, int band)
19701da177e4SLinus Torvalds {
1971bcdce719SEric Dumazet 	if (sock_flag(sk, SOCK_FASYNC))
19721da177e4SLinus Torvalds 		sock_wake_async(sk->sk_socket, how, band);
19731da177e4SLinus Torvalds }
19741da177e4SLinus Torvalds 
19751da177e4SLinus Torvalds #define SOCK_MIN_SNDBUF 2048
19767a91b434SEric Dumazet /*
19777a91b434SEric Dumazet  * Since sk_rmem_alloc sums skb->truesize, even a small frame might need
19787a91b434SEric Dumazet  * sizeof(sk_buff) + MTU + padding, unless net driver perform copybreak
19797a91b434SEric Dumazet  */
19807a91b434SEric Dumazet #define SOCK_MIN_RCVBUF (2048 + sizeof(struct sk_buff))
19811da177e4SLinus Torvalds 
19821da177e4SLinus Torvalds static inline void sk_stream_moderate_sndbuf(struct sock *sk)
19831da177e4SLinus Torvalds {
19841da177e4SLinus Torvalds 	if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) {
19858df09ea3SEric Dumazet 		sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
19861da177e4SLinus Torvalds 		sk->sk_sndbuf = max(sk->sk_sndbuf, SOCK_MIN_SNDBUF);
19871da177e4SLinus Torvalds 	}
19881da177e4SLinus Torvalds }
19891da177e4SLinus Torvalds 
1990df97c708SPavel Emelyanov struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp);
19911da177e4SLinus Torvalds 
19921da177e4SLinus Torvalds static inline struct page *sk_stream_alloc_page(struct sock *sk)
19931da177e4SLinus Torvalds {
19941da177e4SLinus Torvalds 	struct page *page = NULL;
19951da177e4SLinus Torvalds 
19961da177e4SLinus Torvalds 	page = alloc_pages(sk->sk_allocation, 0);
1997ef015786SHerbert Xu 	if (!page) {
1998180d8cd9SGlauber Costa 		sk_enter_memory_pressure(sk);
19991da177e4SLinus Torvalds 		sk_stream_moderate_sndbuf(sk);
20001da177e4SLinus Torvalds 	}
20011da177e4SLinus Torvalds 	return page;
20021da177e4SLinus Torvalds }
20031da177e4SLinus Torvalds 
20041da177e4SLinus Torvalds /*
20051da177e4SLinus Torvalds  *	Default write policy as shown to user space via poll/select/SIGIO
20061da177e4SLinus Torvalds  */
2007dc6b9b78SEric Dumazet static inline bool sock_writeable(const struct sock *sk)
20081da177e4SLinus Torvalds {
20098df09ea3SEric Dumazet 	return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
20101da177e4SLinus Torvalds }
20111da177e4SLinus Torvalds 
2012dd0fc66fSAl Viro static inline gfp_t gfp_any(void)
20131da177e4SLinus Torvalds {
201499709372SAndrew Morton 	return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
20151da177e4SLinus Torvalds }
20161da177e4SLinus Torvalds 
2017dc6b9b78SEric Dumazet static inline long sock_rcvtimeo(const struct sock *sk, bool noblock)
20181da177e4SLinus Torvalds {
20191da177e4SLinus Torvalds 	return noblock ? 0 : sk->sk_rcvtimeo;
20201da177e4SLinus Torvalds }
20211da177e4SLinus Torvalds 
2022dc6b9b78SEric Dumazet static inline long sock_sndtimeo(const struct sock *sk, bool noblock)
20231da177e4SLinus Torvalds {
20241da177e4SLinus Torvalds 	return noblock ? 0 : sk->sk_sndtimeo;
20251da177e4SLinus Torvalds }
20261da177e4SLinus Torvalds 
20271da177e4SLinus Torvalds static inline int sock_rcvlowat(const struct sock *sk, int waitall, int len)
20281da177e4SLinus Torvalds {
20291da177e4SLinus Torvalds 	return (waitall ? len : min_t(int, sk->sk_rcvlowat, len)) ? : 1;
20301da177e4SLinus Torvalds }
20311da177e4SLinus Torvalds 
20321da177e4SLinus Torvalds /* Alas, with timeout socket operations are not restartable.
20331da177e4SLinus Torvalds  * Compare this to poll().
20341da177e4SLinus Torvalds  */
20351da177e4SLinus Torvalds static inline int sock_intr_errno(long timeo)
20361da177e4SLinus Torvalds {
20371da177e4SLinus Torvalds 	return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
20381da177e4SLinus Torvalds }
20391da177e4SLinus Torvalds 
204092f37fd2SEric Dumazet extern void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
204192f37fd2SEric Dumazet 	struct sk_buff *skb);
20426e3e939fSJohannes Berg extern void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
20436e3e939fSJohannes Berg 	struct sk_buff *skb);
204492f37fd2SEric Dumazet 
2045dc6b9b78SEric Dumazet static inline void
20461da177e4SLinus Torvalds sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
20471da177e4SLinus Torvalds {
2048b7aa0bf7SEric Dumazet 	ktime_t kt = skb->tstamp;
204920d49473SPatrick Ohly 	struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
2050a61bbcf2SPatrick McHardy 
205120d49473SPatrick Ohly 	/*
205220d49473SPatrick Ohly 	 * generate control messages if
205320d49473SPatrick Ohly 	 * - receive time stamping in software requested (SOCK_RCVTSTAMP
205420d49473SPatrick Ohly 	 *   or SOCK_TIMESTAMPING_RX_SOFTWARE)
205520d49473SPatrick Ohly 	 * - software time stamp available and wanted
205620d49473SPatrick Ohly 	 *   (SOCK_TIMESTAMPING_SOFTWARE)
205720d49473SPatrick Ohly 	 * - hardware time stamps available and wanted
205820d49473SPatrick Ohly 	 *   (SOCK_TIMESTAMPING_SYS_HARDWARE or
205920d49473SPatrick Ohly 	 *   SOCK_TIMESTAMPING_RAW_HARDWARE)
206020d49473SPatrick Ohly 	 */
206120d49473SPatrick Ohly 	if (sock_flag(sk, SOCK_RCVTSTAMP) ||
206220d49473SPatrick Ohly 	    sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE) ||
206320d49473SPatrick Ohly 	    (kt.tv64 && sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) ||
206420d49473SPatrick Ohly 	    (hwtstamps->hwtstamp.tv64 &&
206520d49473SPatrick Ohly 	     sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)) ||
206620d49473SPatrick Ohly 	    (hwtstamps->syststamp.tv64 &&
206720d49473SPatrick Ohly 	     sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE)))
206892f37fd2SEric Dumazet 		__sock_recv_timestamp(msg, sk, skb);
206992f37fd2SEric Dumazet 	else
2070b7aa0bf7SEric Dumazet 		sk->sk_stamp = kt;
20716e3e939fSJohannes Berg 
20726e3e939fSJohannes Berg 	if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid)
20736e3e939fSJohannes Berg 		__sock_recv_wifi_status(msg, sk, skb);
20741da177e4SLinus Torvalds }
20751da177e4SLinus Torvalds 
2076767dd033SEric Dumazet extern void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
2077767dd033SEric Dumazet 				     struct sk_buff *skb);
2078767dd033SEric Dumazet 
2079767dd033SEric Dumazet static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
2080767dd033SEric Dumazet 					  struct sk_buff *skb)
2081767dd033SEric Dumazet {
2082767dd033SEric Dumazet #define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL)			| \
2083767dd033SEric Dumazet 			   (1UL << SOCK_RCVTSTAMP)			| \
2084767dd033SEric Dumazet 			   (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)	| \
2085767dd033SEric Dumazet 			   (1UL << SOCK_TIMESTAMPING_SOFTWARE)		| \
2086767dd033SEric Dumazet 			   (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE)	| \
2087767dd033SEric Dumazet 			   (1UL << SOCK_TIMESTAMPING_SYS_HARDWARE))
2088767dd033SEric Dumazet 
2089767dd033SEric Dumazet 	if (sk->sk_flags & FLAGS_TS_OR_DROPS)
2090767dd033SEric Dumazet 		__sock_recv_ts_and_drops(msg, sk, skb);
2091767dd033SEric Dumazet 	else
2092767dd033SEric Dumazet 		sk->sk_stamp = skb->tstamp;
2093767dd033SEric Dumazet }
20943b885787SNeil Horman 
20951da177e4SLinus Torvalds /**
209620d49473SPatrick Ohly  * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
209720d49473SPatrick Ohly  * @sk:		socket sending this packet
20982244d07bSOliver Hartkopp  * @tx_flags:	filled with instructions for time stamping
209920d49473SPatrick Ohly  *
210020d49473SPatrick Ohly  * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if
210120d49473SPatrick Ohly  * parameters are invalid.
210220d49473SPatrick Ohly  */
21032244d07bSOliver Hartkopp extern int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags);
210420d49473SPatrick Ohly 
210520d49473SPatrick Ohly /**
21061da177e4SLinus Torvalds  * sk_eat_skb - Release a skb if it is no longer needed
21074dc3b16bSPavel Pisa  * @sk: socket to eat this skb from
21084dc3b16bSPavel Pisa  * @skb: socket buffer to eat
2109f4b8ea78SRandy Dunlap  * @copied_early: flag indicating whether DMA operations copied this data early
21101da177e4SLinus Torvalds  *
21111da177e4SLinus Torvalds  * This routine must be called with interrupts disabled or with the socket
21121da177e4SLinus Torvalds  * locked so that the sk_buff queue operation is ok.
21131da177e4SLinus Torvalds */
2114624d1164SChris Leech #ifdef CONFIG_NET_DMA
2115dc6b9b78SEric Dumazet static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, bool copied_early)
2116624d1164SChris Leech {
2117624d1164SChris Leech 	__skb_unlink(skb, &sk->sk_receive_queue);
2118624d1164SChris Leech 	if (!copied_early)
2119624d1164SChris Leech 		__kfree_skb(skb);
2120624d1164SChris Leech 	else
2121624d1164SChris Leech 		__skb_queue_tail(&sk->sk_async_wait_queue, skb);
2122624d1164SChris Leech }
2123624d1164SChris Leech #else
2124dc6b9b78SEric Dumazet static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, bool copied_early)
21251da177e4SLinus Torvalds {
21261da177e4SLinus Torvalds 	__skb_unlink(skb, &sk->sk_receive_queue);
21271da177e4SLinus Torvalds 	__kfree_skb(skb);
21281da177e4SLinus Torvalds }
2129624d1164SChris Leech #endif
21301da177e4SLinus Torvalds 
21313b1e0a65SYOSHIFUJI Hideaki static inline
21323b1e0a65SYOSHIFUJI Hideaki struct net *sock_net(const struct sock *sk)
21333b1e0a65SYOSHIFUJI Hideaki {
2134c2d9ba9bSEric Dumazet 	return read_pnet(&sk->sk_net);
21353b1e0a65SYOSHIFUJI Hideaki }
21363b1e0a65SYOSHIFUJI Hideaki 
21373b1e0a65SYOSHIFUJI Hideaki static inline
2138f5aa23fdSDenis V. Lunev void sock_net_set(struct sock *sk, struct net *net)
21393b1e0a65SYOSHIFUJI Hideaki {
2140c2d9ba9bSEric Dumazet 	write_pnet(&sk->sk_net, net);
21413b1e0a65SYOSHIFUJI Hideaki }
21423b1e0a65SYOSHIFUJI Hideaki 
2143edf02087SDenis V. Lunev /*
2144edf02087SDenis V. Lunev  * Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace.
214525985edcSLucas De Marchi  * They should not hold a reference to a namespace in order to allow
2146edf02087SDenis V. Lunev  * to stop it.
2147edf02087SDenis V. Lunev  * Sockets after sk_change_net should be released using sk_release_kernel
2148edf02087SDenis V. Lunev  */
2149edf02087SDenis V. Lunev static inline void sk_change_net(struct sock *sk, struct net *net)
2150edf02087SDenis V. Lunev {
21513b1e0a65SYOSHIFUJI Hideaki 	put_net(sock_net(sk));
215265a18ec5SDenis V. Lunev 	sock_net_set(sk, hold_net(net));
2153edf02087SDenis V. Lunev }
2154edf02087SDenis V. Lunev 
215523542618SKOVACS Krisztian static inline struct sock *skb_steal_sock(struct sk_buff *skb)
215623542618SKOVACS Krisztian {
2157*efc27f8cSVijay Subramanian 	if (skb->sk) {
215823542618SKOVACS Krisztian 		struct sock *sk = skb->sk;
215923542618SKOVACS Krisztian 
216023542618SKOVACS Krisztian 		skb->destructor = NULL;
216123542618SKOVACS Krisztian 		skb->sk = NULL;
216223542618SKOVACS Krisztian 		return sk;
216323542618SKOVACS Krisztian 	}
216423542618SKOVACS Krisztian 	return NULL;
216523542618SKOVACS Krisztian }
216623542618SKOVACS Krisztian 
216720d49473SPatrick Ohly extern void sock_enable_timestamp(struct sock *sk, int flag);
21681da177e4SLinus Torvalds extern int sock_get_timestamp(struct sock *, struct timeval __user *);
2169ae40eb1eSEric Dumazet extern int sock_get_timestampns(struct sock *, struct timespec __user *);
21701da177e4SLinus Torvalds 
21711da177e4SLinus Torvalds /*
21721da177e4SLinus Torvalds  *	Enable debug/info messages
21731da177e4SLinus Torvalds  */
2174a2a316fdSStephen Hemminger extern int net_msg_warn;
2175a2a316fdSStephen Hemminger #define NETDEBUG(fmt, args...) \
2176a2a316fdSStephen Hemminger 	do { if (net_msg_warn) printk(fmt,##args); } while (0)
21771da177e4SLinus Torvalds 
2178a2a316fdSStephen Hemminger #define LIMIT_NETDEBUG(fmt, args...) \
2179a2a316fdSStephen Hemminger 	do { if (net_msg_warn && net_ratelimit()) printk(fmt,##args); } while(0)
21801da177e4SLinus Torvalds 
21811da177e4SLinus Torvalds extern __u32 sysctl_wmem_max;
21821da177e4SLinus Torvalds extern __u32 sysctl_rmem_max;
21831da177e4SLinus Torvalds 
218420380731SArnaldo Carvalho de Melo extern void sk_init(void);
218520380731SArnaldo Carvalho de Melo 
21866baf1f41SDavid S. Miller extern int sysctl_optmem_max;
21876baf1f41SDavid S. Miller 
218820380731SArnaldo Carvalho de Melo extern __u32 sysctl_wmem_default;
218920380731SArnaldo Carvalho de Melo extern __u32 sysctl_rmem_default;
219020380731SArnaldo Carvalho de Melo 
21911da177e4SLinus Torvalds #endif	/* _SOCK_H */
2192