1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Definitions for the AF_INET socket handler.
7 *
8 * Version: @(#)sock.h 1.0.4 05/13/93
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Corey Minyard <wf-rch!minyard@relay.EU.net>
13 * Florian La Roche <flla@stud.uni-sb.de>
14 *
15 * Fixes:
16 * Alan Cox : Volatiles in skbuff pointers. See
17 * skbuff comments. May be overdone,
18 * better to prove they can be removed
19 * than the reverse.
20 * Alan Cox : Added a zapped field for tcp to note
21 * a socket is reset and must stay shut up
22 * Alan Cox : New fields for options
23 * Pauline Middelink : identd support
24 * Alan Cox : Eliminate low level recv/recvfrom
25 * David S. Miller : New socket lookup architecture.
26 * Steve Whitehouse: Default routines for sock_ops
27 *
28 * This program is free software; you can redistribute it and/or
29 * modify it under the terms of the GNU General Public License
30 * as published by the Free Software Foundation; either version
31 * 2 of the License, or (at your option) any later version.
32 */
33 #ifndef _SOCK_H
34 #define _SOCK_H
35
36 #include <linux/config.h>
37 #include <linux/timer.h>
38 #include <linux/cache.h>
39 #include <linux/in.h> /* struct sockaddr_in */
40
41 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
42 #include <linux/in6.h> /* struct sockaddr_in6 */
43 #include <linux/ipv6.h> /* dest_cache, inet6_options */
44 #include <linux/icmpv6.h>
45 #include <net/if_inet6.h> /* struct ipv6_mc_socklist */
46 #endif
47
48 #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
49 #include <linux/icmp.h>
50 #endif
51 #include <linux/tcp.h> /* struct tcphdr */
52
53 #include <linux/netdevice.h>
54 #include <linux/skbuff.h> /* struct sk_buff */
55 #include <net/protocol.h> /* struct inet_protocol */
56 #if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
57 #include <net/x25.h>
58 #endif
59 #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
60 #include <net/ax25.h>
61 #if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
62 #include <net/netrom.h>
63 #endif
64 #if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE)
65 #include <net/rose.h>
66 #endif
67 #endif
68
69 #if defined(CONFIG_PPPOE) || defined(CONFIG_PPPOE_MODULE)
70 #include <linux/if_pppox.h>
71 #include <linux/ppp_channel.h> /* struct ppp_channel */
72 #endif
73
74 #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
75 #if defined(CONFIG_SPX) || defined(CONFIG_SPX_MODULE)
76 #include <net/spx.h>
77 #else
78 #include <net/ipx.h>
79 #endif /* CONFIG_SPX */
80 #endif /* CONFIG_IPX */
81
82 #if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
83 #include <linux/atalk.h>
84 #endif
85
86 #if defined(CONFIG_DECNET) || defined(CONFIG_DECNET_MODULE)
87 #include <net/dn.h>
88 #endif
89
90 #if defined(CONFIG_IRDA) || defined(CONFIG_IRDA_MODULE)
91 #include <net/irda/irda.h>
92 #endif
93
94 #if defined(CONFIG_ATM) || defined(CONFIG_ATM_MODULE)
95 struct atm_vcc;
96 #endif
97
98 #ifdef CONFIG_FILTER
99 #include <linux/filter.h>
100 #endif
101
102 #include <asm/atomic.h>
103 #include <net/dst.h>
104
105
106 /* The AF_UNIX specific socket options */
107 struct unix_opt {
108 struct unix_address *addr;
109 struct dentry * dentry;
110 struct vfsmount * mnt;
111 struct semaphore readsem;
112 struct sock * other;
113 struct sock ** list;
114 struct sock * gc_tree;
115 atomic_t inflight;
116 rwlock_t lock;
117 wait_queue_head_t peer_wait;
118 };
119
120
121 /* Once the IPX ncpd patches are in these are going into protinfo. */
122 #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
123 struct ipx_opt {
124 ipx_address dest_addr;
125 ipx_interface *intrfc;
126 unsigned short port;
127 #ifdef CONFIG_IPX_INTERN
128 unsigned char node[IPX_NODE_LEN];
129 #endif
130 unsigned short type;
131 /*
132 * To handle special ncp connection-handling sockets for mars_nwe,
133 * the connection number must be stored in the socket.
134 */
135 unsigned short ipx_ncp_conn;
136 };
137 #endif
138
139 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
140 struct ipv6_pinfo {
141 struct in6_addr saddr;
142 struct in6_addr rcv_saddr;
143 struct in6_addr daddr;
144 struct in6_addr *daddr_cache;
145
146 __u32 flow_label;
147 __u32 frag_size;
148 int hop_limit;
149 int mcast_hops;
150 int mcast_oif;
151
152 /* pktoption flags */
153 union {
154 struct {
155 __u8 srcrt:2,
156 rxinfo:1,
157 rxhlim:1,
158 hopopts:1,
159 dstopts:1,
160 authhdr:1,
161 rxflow:1;
162 } bits;
163 __u8 all;
164 } rxopt;
165
166 /* sockopt flags */
167 __u8 mc_loop:1,
168 recverr:1,
169 sndflow:1,
170 pmtudisc:2;
171
172 struct ipv6_mc_socklist *ipv6_mc_list;
173 struct ipv6_fl_socklist *ipv6_fl_list;
174 __u32 dst_cookie;
175
176 struct ipv6_txoptions *opt;
177 struct sk_buff *pktoptions;
178 };
179
180 struct raw6_opt {
181 __u32 checksum; /* perform checksum */
182 __u32 offset; /* checksum offset */
183
184 struct icmp6_filter filter;
185 };
186
187 #endif /* IPV6 */
188
189 #if defined(CONFIG_INET) || defined(CONFIG_INET_MODULE)
190 struct raw_opt {
191 struct icmp_filter filter;
192 };
193 #endif
194
195 #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
196 struct inet_opt
197 {
198 int ttl; /* TTL setting */
199 int tos; /* TOS */
200 unsigned cmsg_flags;
201 struct ip_options *opt;
202 unsigned char hdrincl; /* Include headers ? */
203 __u8 mc_ttl; /* Multicasting TTL */
204 __u8 mc_loop; /* Loopback */
205 unsigned recverr : 1,
206 freebind : 1;
207 __u8 pmtudisc;
208 int mc_index; /* Multicast device index */
209 __u32 mc_addr;
210 struct ip_mc_socklist *mc_list; /* Group array */
211 };
212 #endif
213
214 #if defined(CONFIG_PPPOE) || defined (CONFIG_PPPOE_MODULE)
215 struct pppoe_opt
216 {
217 struct net_device *dev; /* device associated with socket*/
218 struct pppoe_addr pa; /* what this socket is bound to*/
219 struct sockaddr_pppox relay; /* what socket data will be
220 relayed to (PPPoE relaying) */
221 };
222
223 struct pppox_opt
224 {
225 struct ppp_channel chan;
226 struct sock *sk;
227 struct pppox_opt *next; /* for hash table */
228 union {
229 struct pppoe_opt pppoe;
230 } proto;
231 };
232 #define pppoe_dev proto.pppoe.dev
233 #define pppoe_pa proto.pppoe.pa
234 #define pppoe_relay proto.pppoe.relay
235 #endif
236
237 /* This defines a selective acknowledgement block. */
238 struct tcp_sack_block {
239 __u32 start_seq;
240 __u32 end_seq;
241 };
242
243 struct tcp_opt {
244 int tcp_header_len; /* Bytes of tcp header to send */
245
246 /*
247 * Header prediction flags
248 * 0x5?10 << 16 + snd_wnd in net byte order
249 */
250 __u32 pred_flags;
251
252 /*
253 * RFC793 variables by their proper names. This means you can
254 * read the code and the spec side by side (and laugh ...)
255 * See RFC793 and RFC1122. The RFC writes these in capitals.
256 */
257 __u32 rcv_nxt; /* What we want to receive next */
258 __u32 snd_nxt; /* Next sequence we send */
259
260 __u32 snd_una; /* First byte we want an ack for */
261 __u32 snd_sml; /* Last byte of the most recently transmitted small packet */
262 __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
263 __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */
264
265 /* Delayed ACK control data */
266 struct {
267 __u8 pending; /* ACK is pending */
268 __u8 quick; /* Scheduled number of quick acks */
269 __u8 pingpong; /* The session is interactive */
270 __u8 blocked; /* Delayed ACK was blocked by socket lock*/
271 __u32 ato; /* Predicted tick of soft clock */
272 unsigned long timeout; /* Currently scheduled timeout */
273 __u32 lrcvtime; /* timestamp of last received data packet*/
274 __u16 last_seg_size; /* Size of last incoming segment */
275 __u16 rcv_mss; /* MSS used for delayed ACK decisions */
276 } ack;
277
278 /* Data for direct copy to user */
279 struct {
280 struct sk_buff_head prequeue;
281 int memory;
282 struct task_struct *task;
283 struct iovec *iov;
284 int len;
285 } ucopy;
286
287 __u32 snd_wl1; /* Sequence for window update */
288 __u32 snd_wnd; /* The window we expect to receive */
289 __u32 max_window; /* Maximal window ever seen from peer */
290 __u32 pmtu_cookie; /* Last pmtu seen by socket */
291 __u16 mss_cache; /* Cached effective mss, not including SACKS */
292 __u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
293 __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
294 __u8 ca_state; /* State of fast-retransmit machine */
295 __u8 retransmits; /* Number of unrecovered RTO timeouts. */
296
297 __u8 reordering; /* Packet reordering metric. */
298 __u8 queue_shrunk; /* Write queue has been shrunk recently.*/
299 __u8 defer_accept; /* User waits for some data after accept() */
300
301 /* RTT measurement */
302 __u8 backoff; /* backoff */
303 __u32 srtt; /* smothed round trip time << 3 */
304 __u32 mdev; /* medium deviation */
305 __u32 mdev_max; /* maximal mdev for the last rtt period */
306 __u32 rttvar; /* smoothed mdev_max */
307 __u32 rtt_seq; /* sequence number to update rttvar */
308 __u32 rto; /* retransmit timeout */
309
310 __u32 packets_out; /* Packets which are "in flight" */
311 __u32 left_out; /* Packets which leaved network */
312 __u32 retrans_out; /* Retransmitted packets out */
313
314
315 /*
316 * Slow start and congestion control (see also Nagle, and Karn & Partridge)
317 */
318 __u32 snd_ssthresh; /* Slow start size threshold */
319 __u32 snd_cwnd; /* Sending congestion window */
320 __u16 snd_cwnd_cnt; /* Linear increase counter */
321 __u16 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
322 __u32 snd_cwnd_used;
323 __u32 snd_cwnd_stamp;
324
325 /* Two commonly used timers in both sender and receiver paths. */
326 unsigned long timeout;
327 struct timer_list retransmit_timer; /* Resend (no ack) */
328 struct timer_list delack_timer; /* Ack delay */
329
330 struct sk_buff_head out_of_order_queue; /* Out of order segments go here */
331
332 struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */
333 struct sk_buff *send_head; /* Front of stuff to transmit */
334
335 __u32 rcv_wnd; /* Current receiver window */
336 __u32 rcv_wup; /* rcv_nxt on last window update sent */
337 __u32 write_seq; /* Tail(+1) of data held in tcp send buffer */
338 __u32 pushed_seq; /* Last pushed seq, required to talk to windows */
339 __u32 copied_seq; /* Head of yet unread data */
340 /*
341 * Options received (usually on last packet, some only on SYN packets).
342 */
343 char tstamp_ok, /* TIMESTAMP seen on SYN packet */
344 wscale_ok, /* Wscale seen on SYN packet */
345 sack_ok; /* SACK seen on SYN packet */
346 char saw_tstamp; /* Saw TIMESTAMP on last packet */
347 __u8 snd_wscale; /* Window scaling received from sender */
348 __u8 rcv_wscale; /* Window scaling to send to receiver */
349 __u8 nonagle; /* Disable Nagle algorithm? */
350 __u8 keepalive_probes; /* num of allowed keep alive probes */
351
352 /* PAWS/RTTM data */
353 __u32 rcv_tsval; /* Time stamp value */
354 __u32 rcv_tsecr; /* Time stamp echo reply */
355 __u32 ts_recent; /* Time stamp to echo next */
356 long ts_recent_stamp;/* Time we stored ts_recent (for aging) */
357
358 /* SACKs data */
359 __u16 user_mss; /* mss requested by user in ioctl */
360 __u8 dsack; /* D-SACK is scheduled */
361 __u8 eff_sacks; /* Size of SACK array to send with next packet */
362 struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
363 struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
364
365 __u32 window_clamp; /* Maximal window to advertise */
366 __u32 rcv_ssthresh; /* Current window clamp */
367 __u8 probes_out; /* unanswered 0 window probes */
368 __u8 num_sacks; /* Number of SACK blocks */
369 __u16 advmss; /* Advertised MSS */
370
371 __u8 syn_retries; /* num of allowed syn retries */
372 __u8 ecn_flags; /* ECN status bits. */
373 __u16 prior_ssthresh; /* ssthresh saved at recovery start */
374 __u32 lost_out; /* Lost packets */
375 __u32 sacked_out; /* SACK'd packets */
376 __u32 fackets_out; /* FACK'd packets */
377 __u32 high_seq; /* snd_nxt at onset of congestion */
378
379 __u32 retrans_stamp; /* Timestamp of the last retransmit,
380 * also used in SYN-SENT to remember stamp of
381 * the first SYN. */
382 __u32 undo_marker; /* tracking retrans started here. */
383 int undo_retrans; /* number of undoable retransmissions. */
384 __u32 syn_seq; /* Seq of received SYN. */
385 __u32 fin_seq; /* Seq of received FIN. */
386 __u32 urg_seq; /* Seq of received urgent pointer */
387 __u16 urg_data; /* Saved octet of OOB data and control flags */
388 __u8 pending; /* Scheduled timer event */
389 __u8 urg_mode; /* In urgent mode */
390 __u32 snd_up; /* Urgent pointer */
391
392 /* The syn_wait_lock is necessary only to avoid tcp_get_info having
393 * to grab the main lock sock while browsing the listening hash
394 * (otherwise it's deadlock prone).
395 * This lock is acquired in read mode only from tcp_get_info() and
396 * it's acquired in write mode _only_ from code that is actively
397 * changing the syn_wait_queue. All readers that are holding
398 * the master sock lock don't need to grab this lock in read mode
399 * too as the syn_wait_queue writes are always protected from
400 * the main sock lock.
401 */
402 rwlock_t syn_wait_lock;
403 struct tcp_listen_opt *listen_opt;
404
405 /* FIFO of established children */
406 struct open_request *accept_queue;
407 struct open_request *accept_queue_tail;
408
409 int write_pending; /* A write to socket waits to start. */
410
411 unsigned int keepalive_time; /* time before keep alive takes place */
412 unsigned int keepalive_intvl; /* time interval between keep alive probes */
413 int linger2;
414 };
415
416
417 /*
418 * This structure really needs to be cleaned up.
419 * Most of it is for TCP, and not used by any of
420 * the other protocols.
421 */
422
423 /*
424 * The idea is to start moving to a newer struct gradualy
425 *
426 * IMHO the newer struct should have the following format:
427 *
428 * struct sock {
429 * sockmem [mem, proto, callbacks]
430 *
431 * union or struct {
432 * ax25;
433 * } ll_pinfo;
434 *
435 * union {
436 * ipv4;
437 * ipv6;
438 * ipx;
439 * netrom;
440 * rose;
441 * x25;
442 * } net_pinfo;
443 *
444 * union {
445 * tcp;
446 * udp;
447 * spx;
448 * netrom;
449 * } tp_pinfo;
450 *
451 * }
452 *
453 * The idea failed because IPv6 transition asssumes dual IP/IPv6 sockets.
454 * So, net_pinfo is IPv6 are really, and protinfo unifies all another
455 * private areas.
456 */
457
458 /* Define this to get the sk->debug debugging facility. */
459 #define SOCK_DEBUGGING
460 #ifdef SOCK_DEBUGGING
461 #define SOCK_DEBUG(sk, msg...) do { if((sk) && ((sk)->debug)) printk(KERN_DEBUG msg); } while (0)
462 #else
463 #define SOCK_DEBUG(sk, msg...) do { } while (0)
464 #endif
465
466 /* This is the per-socket lock. The spinlock provides a synchronization
467 * between user contexts and software interrupt processing, whereas the
468 * mini-semaphore synchronizes multiple users amongst themselves.
469 */
470 typedef struct {
471 spinlock_t slock;
472 unsigned int users;
473 wait_queue_head_t wq;
474 } socket_lock_t;
475
476 #define sock_lock_init(__sk) \
477 do { spin_lock_init(&((__sk)->lock.slock)); \
478 (__sk)->lock.users = 0; \
479 init_waitqueue_head(&((__sk)->lock.wq)); \
480 } while(0);
481
482 struct sock {
483 /* Socket demultiplex comparisons on incoming packets. */
484 __u32 daddr; /* Foreign IPv4 addr */
485 __u32 rcv_saddr; /* Bound local IPv4 addr */
486 __u16 dport; /* Destination port */
487 unsigned short num; /* Local port */
488 int bound_dev_if; /* Bound device index if != 0 */
489
490 /* Main hash linkage for various protocol lookup tables. */
491 struct sock *next;
492 struct sock **pprev;
493 struct sock *bind_next;
494 struct sock **bind_pprev;
495
496 volatile unsigned char state, /* Connection state */
497 zapped; /* In ax25 & ipx means not linked */
498 __u16 sport; /* Source port */
499
500 unsigned short family; /* Address family */
501 unsigned char reuse; /* SO_REUSEADDR setting */
502 unsigned char shutdown;
503 atomic_t refcnt; /* Reference count */
504
505 socket_lock_t lock; /* Synchronizer... */
506 int rcvbuf; /* Size of receive buffer in bytes */
507
508 wait_queue_head_t *sleep; /* Sock wait queue */
509 struct dst_entry *dst_cache; /* Destination cache */
510 rwlock_t dst_lock;
511 atomic_t rmem_alloc; /* Receive queue bytes committed */
512 struct sk_buff_head receive_queue; /* Incoming packets */
513 atomic_t wmem_alloc; /* Transmit queue bytes committed */
514 struct sk_buff_head write_queue; /* Packet sending queue */
515 atomic_t omem_alloc; /* "o" is "option" or "other" */
516 int wmem_queued; /* Persistent queue size */
517 int forward_alloc; /* Space allocated forward. */
518 __u32 saddr; /* Sending source */
519 unsigned int allocation; /* Allocation mode */
520 int sndbuf; /* Size of send buffer in bytes */
521 struct sock *prev;
522
523 /* Not all are volatile, but some are, so we might as well say they all are.
524 * XXX Make this a flag word -DaveM
525 */
526 volatile char dead,
527 done,
528 urginline,
529 keepopen,
530 linger,
531 destroy,
532 no_check,
533 broadcast,
534 bsdism;
535 unsigned char debug;
536 unsigned char rcvtstamp;
537 unsigned char userlocks;
538 int proc;
539 unsigned long lingertime;
540
541 int hashent;
542 struct sock *pair;
543
544 /* The backlog queue is special, it is always used with
545 * the per-socket spinlock held and requires low latency
546 * access. Therefore we special case it's implementation.
547 */
548 struct {
549 struct sk_buff *head;
550 struct sk_buff *tail;
551 } backlog;
552
553 rwlock_t callback_lock;
554
555 /* Error queue, rarely used. */
556 struct sk_buff_head error_queue;
557
558 struct proto *prot;
559
560 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
561 union {
562 struct ipv6_pinfo af_inet6;
563 } net_pinfo;
564 #endif
565
566 union {
567 struct tcp_opt af_tcp;
568 #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
569 struct raw_opt tp_raw4;
570 #endif
571 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
572 struct raw6_opt tp_raw;
573 #endif /* CONFIG_IPV6 */
574 #if defined(CONFIG_SPX) || defined (CONFIG_SPX_MODULE)
575 struct spx_opt af_spx;
576 #endif /* CONFIG_SPX */
577
578 } tp_pinfo;
579
580 int err, err_soft; /* Soft holds errors that don't
581 cause failure but are the cause
582 of a persistent failure not just
583 'timed out' */
584 unsigned short ack_backlog;
585 unsigned short max_ack_backlog;
586 __u32 priority;
587 unsigned short type;
588 unsigned char localroute; /* Route locally only */
589 unsigned char protocol;
590 struct ucred peercred;
591 int rcvlowat;
592 long rcvtimeo;
593 long sndtimeo;
594
595 #ifdef CONFIG_FILTER
596 /* Socket Filtering Instructions */
597 struct sk_filter *filter;
598 #endif /* CONFIG_FILTER */
599
600 /* This is where all the private (optional) areas that don't
601 * overlap will eventually live.
602 */
603 union {
604 void *destruct_hook;
605 struct unix_opt af_unix;
606 #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
607 struct inet_opt af_inet;
608 #endif
609 #if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
610 struct atalk_sock af_at;
611 #endif
612 #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
613 struct ipx_opt af_ipx;
614 #endif
615 #if defined (CONFIG_DECNET) || defined(CONFIG_DECNET_MODULE)
616 struct dn_scp dn;
617 #endif
618 #if defined (CONFIG_PACKET) || defined(CONFIG_PACKET_MODULE)
619 struct packet_opt *af_packet;
620 #endif
621 #if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
622 x25_cb *x25;
623 #endif
624 #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
625 ax25_cb *ax25;
626 #endif
627 #if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
628 nr_cb *nr;
629 #endif
630 #if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE)
631 rose_cb *rose;
632 #endif
633 #if defined(CONFIG_PPPOE) || defined(CONFIG_PPPOE_MODULE)
634 struct pppox_opt *pppox;
635 #endif
636 #ifdef CONFIG_NETLINK
637 struct netlink_opt *af_netlink;
638 #endif
639 #if defined(CONFIG_ECONET) || defined(CONFIG_ECONET_MODULE)
640 struct econet_opt *af_econet;
641 #endif
642 #if defined(CONFIG_ATM) || defined(CONFIG_ATM_MODULE)
643 struct atm_vcc *af_atm;
644 #endif
645 #if defined(CONFIG_IRDA) || defined(CONFIG_IRDA_MODULE)
646 struct irda_sock *irda;
647 #endif
648 } protinfo;
649
650
651 /* This part is used for the timeout functions. */
652 struct timer_list timer; /* This is the sock cleanup timer. */
653 struct timeval stamp;
654
655 /* Identd and reporting IO signals */
656 struct socket *socket;
657
658 /* RPC layer private data */
659 void *user_data;
660
661 /* Callbacks */
662 void (*state_change)(struct sock *sk);
663 void (*data_ready)(struct sock *sk,int bytes);
664 void (*write_space)(struct sock *sk);
665 void (*error_report)(struct sock *sk);
666
667 int (*backlog_rcv) (struct sock *sk,
668 struct sk_buff *skb);
669 void (*destruct)(struct sock *sk);
670 };
671
672 /* The per-socket spinlock must be held here. */
673 #define sk_add_backlog(__sk, __skb) \
674 do { if((__sk)->backlog.tail == NULL) { \
675 (__sk)->backlog.head = \
676 (__sk)->backlog.tail = (__skb); \
677 } else { \
678 ((__sk)->backlog.tail)->next = (__skb); \
679 (__sk)->backlog.tail = (__skb); \
680 } \
681 (__skb)->next = NULL; \
682 } while(0)
683
684 /* IP protocol blocks we attach to sockets.
685 * socket layer -> transport layer interface
686 * transport -> network interface is defined by struct inet_proto
687 */
688 struct proto {
689 void (*close)(struct sock *sk,
690 long timeout);
691 int (*connect)(struct sock *sk,
692 struct sockaddr *uaddr,
693 int addr_len);
694 int (*disconnect)(struct sock *sk, int flags);
695
696 struct sock * (*accept) (struct sock *sk, int flags, int *err);
697
698 int (*ioctl)(struct sock *sk, int cmd,
699 unsigned long arg);
700 int (*init)(struct sock *sk);
701 int (*destroy)(struct sock *sk);
702 void (*shutdown)(struct sock *sk, int how);
703 int (*setsockopt)(struct sock *sk, int level,
704 int optname, char *optval, int optlen);
705 int (*getsockopt)(struct sock *sk, int level,
706 int optname, char *optval,
707 int *option);
708 int (*sendmsg)(struct sock *sk, struct msghdr *msg,
709 int len);
710 int (*recvmsg)(struct sock *sk, struct msghdr *msg,
711 int len, int noblock, int flags,
712 int *addr_len);
713 int (*bind)(struct sock *sk,
714 struct sockaddr *uaddr, int addr_len);
715
716 int (*backlog_rcv) (struct sock *sk,
717 struct sk_buff *skb);
718
719 /* Keeping track of sk's, looking them up, and port selection methods. */
720 void (*hash)(struct sock *sk);
721 void (*unhash)(struct sock *sk);
722 int (*get_port)(struct sock *sk, unsigned short snum);
723
724 char name[32];
725
726 struct {
727 int inuse;
728 u8 __pad[SMP_CACHE_BYTES - sizeof(int)];
729 } stats[NR_CPUS];
730 };
731
732 /* Called with local bh disabled */
733 static void __inline__ sock_prot_inc_use(struct proto *prot)
734 {
735 prot->stats[smp_processor_id()].inuse++;
736 }
737
738 static void __inline__ sock_prot_dec_use(struct proto *prot)
739 {
740 prot->stats[smp_processor_id()].inuse--;
741 }
742
743 /* About 10 seconds */
744 #define SOCK_DESTROY_TIME (10*HZ)
745
746 /* Sockets 0-1023 can't be bound to unless you are superuser */
747 #define PROT_SOCK 1024
748
749 #define SHUTDOWN_MASK 3
750 #define RCV_SHUTDOWN 1
751 #define SEND_SHUTDOWN 2
752
753 #define SOCK_SNDBUF_LOCK 1
754 #define SOCK_RCVBUF_LOCK 2
755 #define SOCK_BINDADDR_LOCK 4
756 #define SOCK_BINDPORT_LOCK 8
757
758
759 /* Used by processes to "lock" a socket state, so that
760 * interrupts and bottom half handlers won't change it
761 * from under us. It essentially blocks any incoming
762 * packets, so that we won't get any new data or any
763 * packets that change the state of the socket.
764 *
765 * While locked, BH processing will add new packets to
766 * the backlog queue. This queue is processed by the
767 * owner of the socket lock right before it is released.
768 *
769 * Since ~2.3.5 it is also exclusive sleep lock serializing
770 * accesses from user process context.
771 */
772 extern void __lock_sock(struct sock *sk);
773 extern void __release_sock(struct sock *sk);
774 #define lock_sock(__sk) \
775 do { spin_lock_bh(&((__sk)->lock.slock)); \
776 if ((__sk)->lock.users != 0) \
777 __lock_sock(__sk); \
778 (__sk)->lock.users = 1; \
779 spin_unlock_bh(&((__sk)->lock.slock)); \
780 } while(0)
781
782 #define release_sock(__sk) \
783 do { spin_lock_bh(&((__sk)->lock.slock)); \
784 if ((__sk)->backlog.tail != NULL) \
785 __release_sock(__sk); \
786 (__sk)->lock.users = 0; \
787 if (waitqueue_active(&((__sk)->lock.wq))) wake_up(&((__sk)->lock.wq)); \
788 spin_unlock_bh(&((__sk)->lock.slock)); \
789 } while(0)
790
791 /* BH context may only use the following locking interface. */
792 #define bh_lock_sock(__sk) spin_lock(&((__sk)->lock.slock))
793 #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->lock.slock))
794
795 /*
796 * This might not be the most appropriate place for this two
797 * but since they are used by a lot of the net related code
798 * at least they get declared on a include that is common to all
799 */
800
801 static __inline__ int min(unsigned int a, unsigned int b)
802 {
803 if (a > b)
804 a = b;
805 return a;
806 }
807
808 static __inline__ int max(unsigned int a, unsigned int b)
809 {
810 if (a < b)
811 a = b;
812 return a;
813 }
814
815 extern struct sock * sk_alloc(int family, int priority, int zero_it);
816 extern void sk_free(struct sock *sk);
817
818 extern struct sk_buff *sock_wmalloc(struct sock *sk,
819 unsigned long size, int force,
820 int priority);
821 extern struct sk_buff *sock_rmalloc(struct sock *sk,
822 unsigned long size, int force,
823 int priority);
824 extern void sock_wfree(struct sk_buff *skb);
825 extern void sock_rfree(struct sk_buff *skb);
826
827 extern int sock_setsockopt(struct socket *sock, int level,
828 int op, char *optval,
829 int optlen);
830
831 extern int sock_getsockopt(struct socket *sock, int level,
832 int op, char *optval,
833 int *optlen);
834 extern struct sk_buff *sock_alloc_send_skb(struct sock *sk,
835 unsigned long size,
836 unsigned long fallback,
837 int noblock,
838 int *errcode);
839 extern void *sock_kmalloc(struct sock *sk, int size, int priority);
840 extern void sock_kfree_s(struct sock *sk, void *mem, int size);
841
842 extern int copy_and_csum_toiovec(struct iovec *iov, struct sk_buff *skb, int hlen);
843
844 /*
845 * Functions to fill in entries in struct proto_ops when a protocol
846 * does not implement a particular function.
847 */
848 extern int sock_no_release(struct socket *);
849 extern int sock_no_bind(struct socket *,
850 struct sockaddr *, int);
851 extern int sock_no_connect(struct socket *,
852 struct sockaddr *, int, int);
853 extern int sock_no_socketpair(struct socket *,
854 struct socket *);
855 extern int sock_no_accept(struct socket *,
856 struct socket *, int);
857 extern int sock_no_getname(struct socket *,
858 struct sockaddr *, int *, int);
859 extern unsigned int sock_no_poll(struct file *, struct socket *,
860 struct poll_table_struct *);
861 extern int sock_no_ioctl(struct socket *, unsigned int,
862 unsigned long);
863 extern int sock_no_listen(struct socket *, int);
864 extern int sock_no_shutdown(struct socket *, int);
865 extern int sock_no_getsockopt(struct socket *, int , int,
866 char *, int *);
867 extern int sock_no_setsockopt(struct socket *, int, int,
868 char *, int);
869 extern int sock_no_fcntl(struct socket *,
870 unsigned int, unsigned long);
871 extern int sock_no_sendmsg(struct socket *,
872 struct msghdr *, int,
873 struct scm_cookie *);
874 extern int sock_no_recvmsg(struct socket *,
875 struct msghdr *, int, int,
876 struct scm_cookie *);
877 extern int sock_no_mmap(struct file *file,
878 struct socket *sock,
879 struct vm_area_struct *vma);
880
881 /*
882 * Default socket callbacks and setup code
883 */
884
885 extern void sock_def_destruct(struct sock *);
886
887 /* Initialise core socket variables */
888 extern void sock_init_data(struct socket *sock, struct sock *sk);
889
890 extern void sklist_remove_socket(struct sock **list, struct sock *sk);
891 extern void sklist_insert_socket(struct sock **list, struct sock *sk);
892 extern void sklist_destroy_socket(struct sock **list, struct sock *sk);
893
894 #ifdef CONFIG_FILTER
895
896 /**
897 * sk_filter - run a packet through a socket filter
898 * @skb: buffer to filter
899 * @filter: filter to apply
900 *
901 * Run the filter code and then cut skb->data to correct size returned by
902 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
903 * than pkt_len we keep whole skb->data. This is the socket level
904 * wrapper to sk_run_filter. It returns 0 if the packet should
905 * be accepted or 1 if the packet should be tossed.
906 */
907
908 static inline int sk_filter(struct sk_buff *skb, struct sk_filter *filter)
909 {
910 int pkt_len;
911
912 pkt_len = sk_run_filter(skb, filter->insns, filter->len);
913 if(!pkt_len)
914 return 1; /* Toss Packet */
915 else
916 skb_trim(skb, pkt_len);
917
918 return 0;
919 }
920
921 /**
922 * sk_filter_release: Release a socket filter
923 * @sk: socket
924 * @fp: filter to remove
925 *
926 * Remove a filter from a socket and release its resources.
927 */
928
929 static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp)
930 {
931 unsigned int size = sk_filter_len(fp);
932
933 atomic_sub(size, &sk->omem_alloc);
934
935 if (atomic_dec_and_test(&fp->refcnt))
936 kfree(fp);
937 }
938
939 static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
940 {
941 atomic_inc(&fp->refcnt);
942 atomic_add(sk_filter_len(fp), &sk->omem_alloc);
943 }
944
945 #endif /* CONFIG_FILTER */
946
947 /*
948 * Socket reference counting postulates.
949 *
950 * * Each user of socket SHOULD hold a reference count.
951 * * Each access point to socket (an hash table bucket, reference from a list,
952 * running timer, skb in flight MUST hold a reference count.
953 * * When reference count hits 0, it means it will never increase back.
954 * * When reference count hits 0, it means that no references from
955 * outside exist to this socket and current process on current CPU
956 * is last user and may/should destroy this socket.
957 * * sk_free is called from any context: process, BH, IRQ. When
958 * it is called, socket has no references from outside -> sk_free
959 * may release descendant resources allocated by the socket, but
960 * to the time when it is called, socket is NOT referenced by any
961 * hash tables, lists etc.
962 * * Packets, delivered from outside (from network or from another process)
963 * and enqueued on receive/error queues SHOULD NOT grab reference count,
964 * when they sit in queue. Otherwise, packets will leak to hole, when
965 * socket is looked up by one cpu and unhasing is made by another CPU.
966 * It is true for udp/raw, netlink (leak to receive and error queues), tcp
967 * (leak to backlog). Packet socket does all the processing inside
968 * BR_NETPROTO_LOCK, so that it has not this race condition. UNIX sockets
969 * use separate SMP lock, so that they are prone too.
970 */
971
972 /* Grab socket reference count. This operation is valid only
973 when sk is ALREADY grabbed f.e. it is found in hash table
974 or a list and the lookup is made under lock preventing hash table
975 modifications.
976 */
977
978 static inline void sock_hold(struct sock *sk)
979 {
980 atomic_inc(&sk->refcnt);
981 }
982
983 /* Ungrab socket in the context, which assumes that socket refcnt
984 cannot hit zero, f.e. it is true in context of any socketcall.
985 */
986 static inline void __sock_put(struct sock *sk)
987 {
988 atomic_dec(&sk->refcnt);
989 }
990
991 /* Ungrab socket and destroy it, if it was the last reference. */
992 static inline void sock_put(struct sock *sk)
993 {
994 if (atomic_dec_and_test(&sk->refcnt))
995 sk_free(sk);
996 }
997
998 /* Detach socket from process context.
999 * Announce socket dead, detach it from wait queue and inode.
1000 * Note that parent inode held reference count on this struct sock,
1001 * we do not release it in this function, because protocol
1002 * probably wants some additional cleanups or even continuing
1003 * to work with this socket (TCP).
1004 */
1005 static inline void sock_orphan(struct sock *sk)
1006 {
1007 write_lock_bh(&sk->callback_lock);
1008 sk->dead = 1;
1009 sk->socket = NULL;
1010 sk->sleep = NULL;
1011 write_unlock_bh(&sk->callback_lock);
1012 }
1013
1014 static inline void sock_graft(struct sock *sk, struct socket *parent)
1015 {
1016 write_lock_bh(&sk->callback_lock);
1017 sk->sleep = &parent->wait;
1018 parent->sk = sk;
1019 sk->socket = parent;
1020 write_unlock_bh(&sk->callback_lock);
1021 }
1022
1023 static inline int sock_i_uid(struct sock *sk)
1024 {
1025 int uid;
1026
1027 read_lock(&sk->callback_lock);
1028 uid = sk->socket ? sk->socket->inode->i_uid : 0;
1029 read_unlock(&sk->callback_lock);
1030 return uid;
1031 }
1032
1033 static inline unsigned long sock_i_ino(struct sock *sk)
1034 {
1035 unsigned long ino;
1036
1037 read_lock(&sk->callback_lock);
1038 ino = sk->socket ? sk->socket->inode->i_ino : 0;
1039 read_unlock(&sk->callback_lock);
1040 return ino;
1041 }
1042
1043 static inline struct dst_entry *
1044 __sk_dst_get(struct sock *sk)
1045 {
1046 return sk->dst_cache;
1047 }
1048
1049 static inline struct dst_entry *
1050 sk_dst_get(struct sock *sk)
1051 {
1052 struct dst_entry *dst;
1053
1054 read_lock(&sk->dst_lock);
1055 dst = sk->dst_cache;
1056 if (dst)
1057 dst_hold(dst);
1058 read_unlock(&sk->dst_lock);
1059 return dst;
1060 }
1061
1062 static inline void
1063 __sk_dst_set(struct sock *sk, struct dst_entry *dst)
1064 {
1065 struct dst_entry *old_dst;
1066
1067 old_dst = sk->dst_cache;
1068 sk->dst_cache = dst;
1069 dst_release(old_dst);
1070 }
1071
1072 static inline void
1073 sk_dst_set(struct sock *sk, struct dst_entry *dst)
1074 {
1075 write_lock(&sk->dst_lock);
1076 __sk_dst_set(sk, dst);
1077 write_unlock(&sk->dst_lock);
1078 }
1079
1080 static inline void
1081 __sk_dst_reset(struct sock *sk)
1082 {
1083 struct dst_entry *old_dst;
1084
1085 old_dst = sk->dst_cache;
1086 sk->dst_cache = NULL;
1087 dst_release(old_dst);
1088 }
1089
1090 static inline void
1091 sk_dst_reset(struct sock *sk)
1092 {
1093 write_lock(&sk->dst_lock);
1094 __sk_dst_reset(sk);
1095 write_unlock(&sk->dst_lock);
1096 }
1097
1098 static inline struct dst_entry *
1099 __sk_dst_check(struct sock *sk, u32 cookie)
1100 {
1101 struct dst_entry *dst = sk->dst_cache;
1102
1103 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
1104 sk->dst_cache = NULL;
1105 return NULL;
1106 }
1107
1108 return dst;
1109 }
1110
1111 static inline struct dst_entry *
1112 sk_dst_check(struct sock *sk, u32 cookie)
1113 {
1114 struct dst_entry *dst = sk_dst_get(sk);
1115
1116 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
1117 sk_dst_reset(sk);
1118 return NULL;
1119 }
1120
1121 return dst;
1122 }
1123
1124
1125 /*
1126 * Queue a received datagram if it will fit. Stream and sequenced
1127 * protocols can't normally use this as they need to fit buffers in
1128 * and play with them.
1129 *
1130 * Inlined as it's very short and called for pretty much every
1131 * packet ever received.
1132 */
1133
1134 static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
1135 {
1136 sock_hold(sk);
1137 skb->sk = sk;
1138 skb->destructor = sock_wfree;
1139 atomic_add(skb->truesize, &sk->wmem_alloc);
1140 }
1141
1142 static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
1143 {
1144 skb->sk = sk;
1145 skb->destructor = sock_rfree;
1146 atomic_add(skb->truesize, &sk->rmem_alloc);
1147 }
1148
1149 static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1150 {
1151 /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
1152 number of warnings when compiling with -W --ANK
1153 */
1154 if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
1155 return -ENOMEM;
1156
1157 #ifdef CONFIG_FILTER
1158 if (sk->filter) {
1159 int err = 0;
1160 struct sk_filter *filter;
1161
1162 /* It would be deadlock, if sock_queue_rcv_skb is used
1163 with socket lock! We assume that users of this
1164 function are lock free.
1165 */
1166 bh_lock_sock(sk);
1167 if ((filter = sk->filter) != NULL && sk_filter(skb, filter))
1168 err = -EPERM;
1169 bh_unlock_sock(sk);
1170 if (err)
1171 return err; /* Toss packet */
1172 }
1173 #endif /* CONFIG_FILTER */
1174
1175 skb->dev = NULL;
1176 skb_set_owner_r(skb, sk);
1177 skb_queue_tail(&sk->receive_queue, skb);
1178 if (!sk->dead)
1179 sk->data_ready(sk,skb->len);
1180 return 0;
1181 }
1182
1183 static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
1184 {
1185 /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
1186 number of warnings when compiling with -W --ANK
1187 */
1188 if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
1189 return -ENOMEM;
1190 skb_set_owner_r(skb, sk);
1191 skb_queue_tail(&sk->error_queue,skb);
1192 if (!sk->dead)
1193 sk->data_ready(sk,skb->len);
1194 return 0;
1195 }
1196
1197 /*
1198 * Recover an error report and clear atomically
1199 */
1200
1201 static inline int sock_error(struct sock *sk)
1202 {
1203 int err=xchg(&sk->err,0);
1204 return -err;
1205 }
1206
1207 static inline unsigned long sock_wspace(struct sock *sk)
1208 {
1209 int amt = 0;
1210
1211 if (!(sk->shutdown & SEND_SHUTDOWN)) {
1212 amt = sk->sndbuf - atomic_read(&sk->wmem_alloc);
1213 if (amt < 0)
1214 amt = 0;
1215 }
1216 return amt;
1217 }
1218
1219 static inline void sk_wake_async(struct sock *sk, int how, int band)
1220 {
1221 if (sk->socket && sk->socket->fasync_list)
1222 sock_wake_async(sk->socket, how, band);
1223 }
1224
1225 #define SOCK_MIN_SNDBUF 2048
1226 #define SOCK_MIN_RCVBUF 256
1227 /* Must be less or equal SOCK_MIN_SNDBUF */
1228 #define SOCK_MIN_WRITE_SPACE SOCK_MIN_SNDBUF
1229
1230 /*
1231 * Default write policy as shown to user space via poll/select/SIGIO
1232 * Kernel internally doesn't use the MIN_WRITE_SPACE threshold.
1233 */
1234 static inline int sock_writeable(struct sock *sk)
1235 {
1236 return sock_wspace(sk) >= SOCK_MIN_WRITE_SPACE;
1237 }
1238
1239 static inline int gfp_any(void)
1240 {
1241 return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
1242 }
1243
1244 static inline long sock_rcvtimeo(struct sock *sk, int noblock)
1245 {
1246 return noblock ? 0 : sk->rcvtimeo;
1247 }
1248
1249 static inline long sock_sndtimeo(struct sock *sk, int noblock)
1250 {
1251 return noblock ? 0 : sk->sndtimeo;
1252 }
1253
1254 static inline int sock_rcvlowat(struct sock *sk, int waitall, int len)
1255 {
1256 return (waitall ? len : min(sk->rcvlowat, len)) ? : 1;
1257 }
1258
1259 /* Alas, with timeout socket operations are not restartable.
1260 * Compare this to poll().
1261 */
1262 static inline int sock_intr_errno(long timeo)
1263 {
1264 return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
1265 }
1266
1267 static __inline__ void
1268 sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
1269 {
1270 if (sk->rcvtstamp)
1271 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(skb->stamp), &skb->stamp);
1272 else
1273 sk->stamp = skb->stamp;
1274 }
1275
1276 /*
1277 * Enable debug/info messages
1278 */
1279
1280 #if 0
1281 #define NETDEBUG(x) do { } while (0)
1282 #else
1283 #define NETDEBUG(x) do { x; } while (0)
1284 #endif
1285
1286 /*
1287 * Macros for sleeping on a socket. Use them like this:
1288 *
1289 * SOCK_SLEEP_PRE(sk)
1290 * if (condition)
1291 * schedule();
1292 * SOCK_SLEEP_POST(sk)
1293 *
1294 */
1295
1296 #define SOCK_SLEEP_PRE(sk) { struct task_struct *tsk = current; \
1297 DECLARE_WAITQUEUE(wait, tsk); \
1298 tsk->state = TASK_INTERRUPTIBLE; \
1299 add_wait_queue((sk)->sleep, &wait); \
1300 release_sock(sk);
1301
1302 #define SOCK_SLEEP_POST(sk) tsk->state = TASK_RUNNING; \
1303 remove_wait_queue((sk)->sleep, &wait); \
1304 lock_sock(sk); \
1305 }
1306
1307 extern __u32 sysctl_wmem_max;
1308 extern __u32 sysctl_rmem_max;
1309
1310 #endif /* _SOCK_H */
1311
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.