1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * The User Datagram Protocol (UDP).
7 *
8 * Version: $Id: udp.c,v 1.91 2000/11/28 13:38:38 davem Exp $
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
13 * Alan Cox, <Alan.Cox@linux.org>
14 *
15 * Fixes:
16 * Alan Cox : verify_area() calls
17 * Alan Cox : stopped close while in use off icmp
18 * messages. Not a fix but a botch that
19 * for udp at least is 'valid'.
20 * Alan Cox : Fixed icmp handling properly
21 * Alan Cox : Correct error for oversized datagrams
22 * Alan Cox : Tidied select() semantics.
23 * Alan Cox : udp_err() fixed properly, also now
24 * select and read wake correctly on errors
25 * Alan Cox : udp_send verify_area moved to avoid mem leak
26 * Alan Cox : UDP can count its memory
27 * Alan Cox : send to an unknown connection causes
28 * an ECONNREFUSED off the icmp, but
29 * does NOT close.
30 * Alan Cox : Switched to new sk_buff handlers. No more backlog!
31 * Alan Cox : Using generic datagram code. Even smaller and the PEEK
32 * bug no longer crashes it.
33 * Fred Van Kempen : Net2e support for sk->broadcast.
34 * Alan Cox : Uses skb_free_datagram
35 * Alan Cox : Added get/set sockopt support.
36 * Alan Cox : Broadcasting without option set returns EACCES.
37 * Alan Cox : No wakeup calls. Instead we now use the callbacks.
38 * Alan Cox : Use ip_tos and ip_ttl
39 * Alan Cox : SNMP Mibs
40 * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support.
41 * Matt Dillon : UDP length checks.
42 * Alan Cox : Smarter af_inet used properly.
43 * Alan Cox : Use new kernel side addressing.
44 * Alan Cox : Incorrect return on truncated datagram receive.
45 * Arnt Gulbrandsen : New udp_send and stuff
46 * Alan Cox : Cache last socket
47 * Alan Cox : Route cache
48 * Jon Peatfield : Minor efficiency fix to sendto().
49 * Mike Shaver : RFC1122 checks.
50 * Alan Cox : Nonblocking error fix.
51 * Willy Konynenberg : Transparent proxying support.
52 * Mike McLagan : Routing by source
53 * David S. Miller : New socket lookup architecture.
54 * Last socket cache retained as it
55 * does have a high hit rate.
56 * Olaf Kirch : Don't linearise iovec on sendmsg.
57 * Andi Kleen : Some cleanups, cache destination entry
58 * for connect.
59 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
60 * Melvin Smith : Check msg_name not msg_namelen in sendto(),
61 * return ENOTCONN for unconnected sockets (POSIX)
62 * Janos Farkas : don't deliver multi/broadcasts to a different
63 * bound-to-device socket
64 *
65 *
66 * This program is free software; you can redistribute it and/or
67 * modify it under the terms of the GNU General Public License
68 * as published by the Free Software Foundation; either version
69 * 2 of the License, or (at your option) any later version.
70 */
71
72 /* RFC1122 Status:
73 4.1.3.1 (Ports):
74 SHOULD send ICMP_PORT_UNREACHABLE in response to datagrams to
75 an un-listened port. (OK)
76 4.1.3.2 (IP Options)
77 MUST pass IP options from IP -> application (OK)
78 MUST allow application to specify IP options (OK)
79 4.1.3.3 (ICMP Messages)
80 MUST pass ICMP error messages to application (OK -- except when SO_BSDCOMPAT is set)
81 4.1.3.4 (UDP Checksums)
82 MUST provide facility for checksumming (OK)
83 MAY allow application to control checksumming (OK)
84 MUST default to checksumming on (OK)
85 MUST discard silently datagrams with bad csums (OK, except during debugging)
86 4.1.3.5 (UDP Multihoming)
87 MUST allow application to specify source address (OK)
88 SHOULD be able to communicate the chosen src addr up to application
89 when application doesn't choose (DOES - use recvmsg cmsgs)
90 4.1.3.6 (Invalid Addresses)
91 MUST discard invalid source addresses (OK -- done in the new routing code)
92 MUST only send datagrams with one of our addresses (OK)
93 */
94
95 #include <asm/system.h>
96 #include <asm/uaccess.h>
97 #include <linux/types.h>
98 #include <linux/fcntl.h>
99 #include <linux/socket.h>
100 #include <linux/sockios.h>
101 #include <linux/in.h>
102 #include <linux/errno.h>
103 #include <linux/timer.h>
104 #include <linux/mm.h>
105 #include <linux/config.h>
106 #include <linux/inet.h>
107 #include <linux/netdevice.h>
108 #include <net/snmp.h>
109 #include <net/ip.h>
110 #include <net/protocol.h>
111 #include <linux/skbuff.h>
112 #include <net/sock.h>
113 #include <net/udp.h>
114 #include <net/icmp.h>
115 #include <net/route.h>
116 #include <net/inet_common.h>
117 #include <net/checksum.h>
118
119 /*
120 * Snmp MIB for the UDP layer
121 */
122
123 struct udp_mib udp_statistics[NR_CPUS*2];
124
125 struct sock *udp_hash[UDP_HTABLE_SIZE];
126 rwlock_t udp_hash_lock = RW_LOCK_UNLOCKED;
127
128 /* Shared by v4/v6 udp. */
129 int udp_port_rover;
130
131 static int udp_v4_get_port(struct sock *sk, unsigned short snum)
132 {
133 write_lock_bh(&udp_hash_lock);
134 if (snum == 0) {
135 int best_size_so_far, best, result, i;
136
137 if (udp_port_rover > sysctl_local_port_range[1] ||
138 udp_port_rover < sysctl_local_port_range[0])
139 udp_port_rover = sysctl_local_port_range[0];
140 best_size_so_far = 32767;
141 best = result = udp_port_rover;
142 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
143 struct sock *sk;
144 int size;
145
146 sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
147 if (!sk) {
148 if (result > sysctl_local_port_range[1])
149 result = sysctl_local_port_range[0] +
150 ((result - sysctl_local_port_range[0]) &
151 (UDP_HTABLE_SIZE - 1));
152 goto gotit;
153 }
154 size = 0;
155 do {
156 if (++size >= best_size_so_far)
157 goto next;
158 } while ((sk = sk->next) != NULL);
159 best_size_so_far = size;
160 best = result;
161 next:;
162 }
163 result = best;
164 for(;; result += UDP_HTABLE_SIZE) {
165 if (result > sysctl_local_port_range[1])
166 result = sysctl_local_port_range[0]
167 + ((result - sysctl_local_port_range[0]) &
168 (UDP_HTABLE_SIZE - 1));
169 if (!udp_lport_inuse(result))
170 break;
171 }
172 gotit:
173 udp_port_rover = snum = result;
174 } else {
175 struct sock *sk2;
176
177 for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
178 sk2 != NULL;
179 sk2 = sk2->next) {
180 if (sk2->num == snum &&
181 sk2 != sk &&
182 sk2->bound_dev_if == sk->bound_dev_if &&
183 (!sk2->rcv_saddr ||
184 !sk->rcv_saddr ||
185 sk2->rcv_saddr == sk->rcv_saddr) &&
186 (!sk2->reuse || !sk->reuse))
187 goto fail;
188 }
189 }
190 sk->num = snum;
191 if (sk->pprev == NULL) {
192 struct sock **skp = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
193 if ((sk->next = *skp) != NULL)
194 (*skp)->pprev = &sk->next;
195 *skp = sk;
196 sk->pprev = skp;
197 sock_prot_inc_use(sk->prot);
198 sock_hold(sk);
199 }
200 write_unlock_bh(&udp_hash_lock);
201 return 0;
202
203 fail:
204 write_unlock_bh(&udp_hash_lock);
205 return 1;
206 }
207
208 static void udp_v4_hash(struct sock *sk)
209 {
210 BUG();
211 }
212
213 static void udp_v4_unhash(struct sock *sk)
214 {
215 write_lock_bh(&udp_hash_lock);
216 if (sk->pprev) {
217 if (sk->next)
218 sk->next->pprev = sk->pprev;
219 *sk->pprev = sk->next;
220 sk->pprev = NULL;
221 sk->num = 0;
222 sock_prot_dec_use(sk->prot);
223 __sock_put(sk);
224 }
225 write_unlock_bh(&udp_hash_lock);
226 }
227
228 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
229 * harder than this. -DaveM
230 */
231 struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
232 {
233 struct sock *sk, *result = NULL;
234 unsigned short hnum = ntohs(dport);
235 int badness = -1;
236
237 for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) {
238 if(sk->num == hnum) {
239 int score = 0;
240 if(sk->rcv_saddr) {
241 if(sk->rcv_saddr != daddr)
242 continue;
243 score++;
244 }
245 if(sk->daddr) {
246 if(sk->daddr != saddr)
247 continue;
248 score++;
249 }
250 if(sk->dport) {
251 if(sk->dport != sport)
252 continue;
253 score++;
254 }
255 if(sk->bound_dev_if) {
256 if(sk->bound_dev_if != dif)
257 continue;
258 score++;
259 }
260 if(score == 4) {
261 result = sk;
262 break;
263 } else if(score > badness) {
264 result = sk;
265 badness = score;
266 }
267 }
268 }
269 return result;
270 }
271
272 __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
273 {
274 struct sock *sk;
275
276 read_lock(&udp_hash_lock);
277 sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif);
278 if (sk)
279 sock_hold(sk);
280 read_unlock(&udp_hash_lock);
281 return sk;
282 }
283
284 static inline struct sock *udp_v4_mcast_next(struct sock *sk,
285 u16 loc_port, u32 loc_addr,
286 u16 rmt_port, u32 rmt_addr,
287 int dif)
288 {
289 struct sock *s = sk;
290 unsigned short hnum = ntohs(loc_port);
291 for(; s; s = s->next) {
292 if ((s->num != hnum) ||
293 (s->daddr && s->daddr!=rmt_addr) ||
294 (s->dport != rmt_port && s->dport != 0) ||
295 (s->rcv_saddr && s->rcv_saddr != loc_addr) ||
296 (s->bound_dev_if && s->bound_dev_if != dif))
297 continue;
298 break;
299 }
300 return s;
301 }
302
303 /*
304 * This routine is called by the ICMP module when it gets some
305 * sort of error condition. If err < 0 then the socket should
306 * be closed and the error returned to the user. If err > 0
307 * it's just the icmp type << 8 | icmp code.
308 * Header points to the ip header of the error packet. We move
309 * on past this. Then (as it used to claim before adjustment)
310 * header points to the first 8 bytes of the udp header. We need
311 * to find the appropriate port.
312 */
313
314 void udp_err(struct sk_buff *skb, unsigned char *dp, int len)
315 {
316 struct iphdr *iph = (struct iphdr*)dp;
317 struct udphdr *uh = (struct udphdr*)(dp+(iph->ihl<<2));
318 int type = skb->h.icmph->type;
319 int code = skb->h.icmph->code;
320 struct sock *sk;
321 int harderr;
322 u32 info;
323 int err;
324
325 if (len < (iph->ihl<<2)+sizeof(struct udphdr)) {
326 ICMP_INC_STATS_BH(IcmpInErrors);
327 return;
328 }
329
330 sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex);
331 if (sk == NULL) {
332 ICMP_INC_STATS_BH(IcmpInErrors);
333 return; /* No socket for error */
334 }
335
336 err = 0;
337 info = 0;
338 harderr = 0;
339
340 switch (type) {
341 default:
342 case ICMP_TIME_EXCEEDED:
343 err = EHOSTUNREACH;
344 break;
345 case ICMP_SOURCE_QUENCH:
346 goto out;
347 case ICMP_PARAMETERPROB:
348 err = EPROTO;
349 info = ntohl(skb->h.icmph->un.gateway)>>24;
350 harderr = 1;
351 break;
352 case ICMP_DEST_UNREACH:
353 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
354 if (sk->protinfo.af_inet.pmtudisc != IP_PMTUDISC_DONT) {
355 err = EMSGSIZE;
356 info = ntohs(skb->h.icmph->un.frag.mtu);
357 harderr = 1;
358 break;
359 }
360 goto out;
361 }
362 err = EHOSTUNREACH;
363 if (code <= NR_ICMP_UNREACH) {
364 harderr = icmp_err_convert[code].fatal;
365 err = icmp_err_convert[code].errno;
366 }
367 break;
368 }
369
370 /*
371 * RFC1122: OK. Passes ICMP errors back to application, as per
372 * 4.1.3.3.
373 */
374 if (!sk->protinfo.af_inet.recverr) {
375 if (!harderr || sk->state != TCP_ESTABLISHED)
376 goto out;
377 } else {
378 ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
379 }
380 sk->err = err;
381 sk->error_report(sk);
382 out:
383 sock_put(sk);
384 }
385
386
387 static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
388 {
389 return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
390 }
391
392 struct udpfakehdr
393 {
394 struct udphdr uh;
395 u32 saddr;
396 u32 daddr;
397 struct iovec *iov;
398 u32 wcheck;
399 };
400
401 /*
402 * Copy and checksum a UDP packet from user space into a buffer. We still have
403 * to do the planning to get ip_build_xmit to spot direct transfer to network
404 * card and provide an additional callback mode for direct user->board I/O
405 * transfers. That one will be fun.
406 */
407
408 static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned int fraglen)
409 {
410 struct udpfakehdr *ufh = (struct udpfakehdr *)p;
411 if (offset==0) {
412 if (csum_partial_copy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
413 fraglen-sizeof(struct udphdr), &ufh->wcheck))
414 return -EFAULT;
415 ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr),
416 ufh->wcheck);
417 ufh->uh.check = csum_tcpudp_magic(ufh->saddr, ufh->daddr,
418 ntohs(ufh->uh.len),
419 IPPROTO_UDP, ufh->wcheck);
420 if (ufh->uh.check == 0)
421 ufh->uh.check = -1;
422 memcpy(to, ufh, sizeof(struct udphdr));
423 return 0;
424 }
425 if (csum_partial_copy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
426 fraglen, &ufh->wcheck))
427 return -EFAULT;
428 return 0;
429 }
430
431 /*
432 * Unchecksummed UDP is sufficiently critical to stuff like ATM video conferencing
433 * that we use two routines for this for speed. Probably we ought to have a
434 * CONFIG_FAST_NET set for >10Mb/second boards to activate this sort of coding.
435 * Timing needed to verify if this is a valid decision.
436 */
437
438 static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset, unsigned int fraglen)
439 {
440 struct udpfakehdr *ufh = (struct udpfakehdr *)p;
441
442 if (offset==0) {
443 memcpy(to, ufh, sizeof(struct udphdr));
444 return memcpy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
445 fraglen-sizeof(struct udphdr));
446 }
447 return memcpy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
448 fraglen);
449 }
450
451 int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
452 {
453 int ulen = len + sizeof(struct udphdr);
454 struct ipcm_cookie ipc;
455 struct udpfakehdr ufh;
456 struct rtable *rt = NULL;
457 int free = 0;
458 int connected = 0;
459 u32 daddr;
460 u8 tos;
461 int err;
462
463 /* This check is ONLY to check for arithmetic overflow
464 on integer(!) len. Not more! Real check will be made
465 in ip_build_xmit --ANK
466
467 BTW socket.c -> af_*.c -> ... make multiple
468 invalid conversions size_t -> int. We MUST repair it f.e.
469 by replacing all of them with size_t and revise all
470 the places sort of len += sizeof(struct iphdr)
471 If len was ULONG_MAX-10 it would be cathastrophe --ANK
472 */
473
474 if (len < 0 || len > 0xFFFF)
475 return -EMSGSIZE;
476
477 /*
478 * Check the flags.
479 */
480
481 if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
482 return -EOPNOTSUPP;
483
484 /*
485 * Get and verify the address.
486 */
487
488 if (msg->msg_name) {
489 struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
490 if (msg->msg_namelen < sizeof(*usin))
491 return -EINVAL;
492 if (usin->sin_family != AF_INET) {
493 if (usin->sin_family != AF_UNSPEC)
494 return -EINVAL;
495 }
496
497 ufh.daddr = usin->sin_addr.s_addr;
498 ufh.uh.dest = usin->sin_port;
499 if (ufh.uh.dest == 0)
500 return -EINVAL;
501 } else {
502 if (sk->state != TCP_ESTABLISHED)
503 return -ENOTCONN;
504 ufh.daddr = sk->daddr;
505 ufh.uh.dest = sk->dport;
506 /* Open fast path for connected socket.
507 Route will not be used, if at least one option is set.
508 */
509 connected = 1;
510 }
511 ipc.addr = sk->saddr;
512 ufh.uh.source = sk->sport;
513
514 ipc.opt = NULL;
515 ipc.oif = sk->bound_dev_if;
516 if (msg->msg_controllen) {
517 err = ip_cmsg_send(msg, &ipc);
518 if (err)
519 return err;
520 if (ipc.opt)
521 free = 1;
522 connected = 0;
523 }
524 if (!ipc.opt)
525 ipc.opt = sk->protinfo.af_inet.opt;
526
527 ufh.saddr = ipc.addr;
528 ipc.addr = daddr = ufh.daddr;
529
530 if (ipc.opt && ipc.opt->srr) {
531 if (!daddr)
532 return -EINVAL;
533 daddr = ipc.opt->faddr;
534 connected = 0;
535 }
536 tos = RT_TOS(sk->protinfo.af_inet.tos);
537 if (sk->localroute || (msg->msg_flags&MSG_DONTROUTE) ||
538 (ipc.opt && ipc.opt->is_strictroute)) {
539 tos |= RTO_ONLINK;
540 connected = 0;
541 }
542
543 if (MULTICAST(daddr)) {
544 if (!ipc.oif)
545 ipc.oif = sk->protinfo.af_inet.mc_index;
546 if (!ufh.saddr)
547 ufh.saddr = sk->protinfo.af_inet.mc_addr;
548 connected = 0;
549 }
550
551 if (connected)
552 rt = (struct rtable*)sk_dst_check(sk, 0);
553
554 if (rt == NULL) {
555 err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif);
556 if (err)
557 goto out;
558
559 err = -EACCES;
560 if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast)
561 goto out;
562 if (connected)
563 sk_dst_set(sk, dst_clone(&rt->u.dst));
564 }
565
566 if (msg->msg_flags&MSG_CONFIRM)
567 goto do_confirm;
568 back_from_confirm:
569
570 ufh.saddr = rt->rt_src;
571 if (!ipc.addr)
572 ufh.daddr = ipc.addr = rt->rt_dst;
573 ufh.uh.len = htons(ulen);
574 ufh.uh.check = 0;
575 ufh.iov = msg->msg_iov;
576 ufh.wcheck = 0;
577
578 /* RFC1122: OK. Provides the checksumming facility (MUST) as per */
579 /* 4.1.3.4. It's configurable by the application via setsockopt() */
580 /* (MAY) and it defaults to on (MUST). */
581
582 err = ip_build_xmit(sk,
583 (sk->no_check == UDP_CSUM_NOXMIT ?
584 udp_getfrag_nosum :
585 udp_getfrag),
586 &ufh, ulen, &ipc, rt, msg->msg_flags);
587
588 out:
589 ip_rt_put(rt);
590 if (free)
591 kfree(ipc.opt);
592 if (!err) {
593 UDP_INC_STATS_USER(UdpOutDatagrams);
594 return len;
595 }
596 return err;
597
598 do_confirm:
599 dst_confirm(&rt->u.dst);
600 if (!(msg->msg_flags&MSG_PROBE) || len)
601 goto back_from_confirm;
602 err = 0;
603 goto out;
604 }
605
606 /*
607 * IOCTL requests applicable to the UDP protocol
608 */
609
610 int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
611 {
612 switch(cmd)
613 {
614 case SIOCOUTQ:
615 {
616 int amount = atomic_read(&sk->wmem_alloc);
617 return put_user(amount, (int *)arg);
618 }
619
620 case SIOCINQ:
621 {
622 struct sk_buff *skb;
623 unsigned long amount;
624
625 amount = 0;
626 spin_lock_irq(&sk->receive_queue.lock);
627 skb = skb_peek(&sk->receive_queue);
628 if (skb != NULL) {
629 /*
630 * We will only return the amount
631 * of this packet since that is all
632 * that will be read.
633 */
634 amount = skb->len - sizeof(struct udphdr);
635 }
636 spin_unlock_irq(&sk->receive_queue.lock);
637 return put_user(amount, (int *)arg);
638 }
639
640 default:
641 return -ENOIOCTLCMD;
642 }
643 return(0);
644 }
645
646 static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
647 {
648 return (unsigned short)csum_fold(csum_partial(skb->h.raw, skb->len, skb->csum));
649 }
650
651 static __inline__ int udp_checksum_complete(struct sk_buff *skb)
652 {
653 return skb->ip_summed != CHECKSUM_UNNECESSARY &&
654 __udp_checksum_complete(skb);
655 }
656
657 /*
658 * This should be easy, if there is something there we
659 * return it, otherwise we block.
660 */
661
662 int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
663 int noblock, int flags, int *addr_len)
664 {
665 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
666 struct sk_buff *skb;
667 int copied, err;
668
669 /*
670 * Check any passed addresses
671 */
672 if (addr_len)
673 *addr_len=sizeof(*sin);
674
675 if (flags & MSG_ERRQUEUE)
676 return ip_recv_error(sk, msg, len);
677
678 /*
679 * From here the generic datagram does a lot of the work. Come
680 * the finished NET3, it will do _ALL_ the work!
681 */
682
683 skb = skb_recv_datagram(sk, flags, noblock, &err);
684 if (!skb)
685 goto out;
686
687 copied = skb->len - sizeof(struct udphdr);
688 if (copied > len) {
689 copied = len;
690 msg->msg_flags |= MSG_TRUNC;
691 }
692
693 if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
694 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
695 copied);
696 } else if (msg->msg_flags&MSG_TRUNC) {
697 if (__udp_checksum_complete(skb))
698 goto csum_copy_err;
699 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
700 copied);
701 } else {
702 err = copy_and_csum_toiovec(msg->msg_iov, skb, sizeof(struct udphdr));
703
704 if (err)
705 goto csum_copy_err;
706 }
707
708 if (err)
709 goto out_free;
710
711 sock_recv_timestamp(msg, sk, skb);
712
713 /* Copy the address. */
714 if (sin)
715 {
716 sin->sin_family = AF_INET;
717 sin->sin_port = skb->h.uh->source;
718 sin->sin_addr.s_addr = skb->nh.iph->saddr;
719 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
720 }
721 if (sk->protinfo.af_inet.cmsg_flags)
722 ip_cmsg_recv(msg, skb);
723 err = copied;
724
725 out_free:
726 skb_free_datagram(sk, skb);
727 out:
728 return err;
729
730 csum_copy_err:
731 UDP_INC_STATS_BH(UdpInErrors);
732
733 /* Clear queue. */
734 if (flags&MSG_PEEK) {
735 int clear = 0;
736 spin_lock_irq(&sk->receive_queue.lock);
737 if (skb == skb_peek(&sk->receive_queue)) {
738 __skb_unlink(skb, &sk->receive_queue);
739 clear = 1;
740 }
741 spin_unlock_irq(&sk->receive_queue.lock);
742 if (clear)
743 kfree_skb(skb);
744 }
745
746 skb_free_datagram(sk, skb);
747
748 return -EAGAIN;
749 }
750
751 int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
752 {
753 struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
754 struct rtable *rt;
755 int err;
756
757
758 if (addr_len < sizeof(*usin))
759 return -EINVAL;
760
761 if (usin->sin_family != AF_INET)
762 return -EAFNOSUPPORT;
763
764 sk_dst_reset(sk);
765
766 err = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr,
767 sk->protinfo.af_inet.tos|sk->localroute, sk->bound_dev_if);
768 if (err)
769 return err;
770 if ((rt->rt_flags&RTCF_BROADCAST) && !sk->broadcast) {
771 ip_rt_put(rt);
772 return -EACCES;
773 }
774 if(!sk->saddr)
775 sk->saddr = rt->rt_src; /* Update source address */
776 if(!sk->rcv_saddr)
777 sk->rcv_saddr = rt->rt_src;
778 sk->daddr = rt->rt_dst;
779 sk->dport = usin->sin_port;
780 sk->state = TCP_ESTABLISHED;
781
782 sk_dst_set(sk, &rt->u.dst);
783 return(0);
784 }
785
786 int udp_disconnect(struct sock *sk, int flags)
787 {
788 /*
789 * 1003.1g - break association.
790 */
791
792 sk->state = TCP_CLOSE;
793 sk->daddr = 0;
794 sk->dport = 0;
795 sk->bound_dev_if = 0;
796 if (!(sk->userlocks&SOCK_BINDADDR_LOCK)) {
797 sk->rcv_saddr = 0;
798 sk->saddr = 0;
799 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
800 memset(&sk->net_pinfo.af_inet6.saddr, 0, 16);
801 memset(&sk->net_pinfo.af_inet6.rcv_saddr, 0, 16);
802 #endif
803 }
804 if (!(sk->userlocks&SOCK_BINDPORT_LOCK)) {
805 sk->prot->unhash(sk);
806 sk->sport = 0;
807 }
808 sk_dst_reset(sk);
809 return 0;
810 }
811
812 static void udp_close(struct sock *sk, long timeout)
813 {
814 inet_sock_release(sk);
815 }
816
817 static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
818 {
819 /*
820 * Charge it to the socket, dropping if the queue is full.
821 */
822
823 #if defined(CONFIG_FILTER)
824 if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
825 if (__udp_checksum_complete(skb)) {
826 UDP_INC_STATS_BH(UdpInErrors);
827 IP_INC_STATS_BH(IpInDiscards);
828 ip_statistics[smp_processor_id()*2].IpInDelivers--;
829 kfree_skb(skb);
830 return -1;
831 }
832 skb->ip_summed = CHECKSUM_UNNECESSARY;
833 }
834 #endif
835
836 if (sock_queue_rcv_skb(sk,skb)<0) {
837 UDP_INC_STATS_BH(UdpInErrors);
838 IP_INC_STATS_BH(IpInDiscards);
839 ip_statistics[smp_processor_id()*2].IpInDelivers--;
840 kfree_skb(skb);
841 return -1;
842 }
843 UDP_INC_STATS_BH(UdpInDatagrams);
844 return 0;
845 }
846
847 /*
848 * Multicasts and broadcasts go to each listener.
849 *
850 * Note: called only from the BH handler context,
851 * so we don't need to lock the hashes.
852 */
853 static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
854 u32 saddr, u32 daddr)
855 {
856 struct sock *sk;
857 int dif;
858
859 read_lock(&udp_hash_lock);
860 sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
861 dif = skb->dev->ifindex;
862 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
863 if (sk) {
864 struct sock *sknext = NULL;
865
866 do {
867 struct sk_buff *skb1 = skb;
868
869 sknext = udp_v4_mcast_next(sk->next, uh->dest, daddr,
870 uh->source, saddr, dif);
871 if(sknext)
872 skb1 = skb_clone(skb, GFP_ATOMIC);
873
874 if(skb1)
875 udp_queue_rcv_skb(sk, skb1);
876 sk = sknext;
877 } while(sknext);
878 } else
879 kfree_skb(skb);
880 read_unlock(&udp_hash_lock);
881 return 0;
882 }
883
884 /* Initialize UDP checksum. If exited with zero value (success),
885 * CHECKSUM_UNNECESSARY means, that no more checks are required.
886 * Otherwise, csum completion requires chacksumming packet body,
887 * including udp header and folding it to skb->csum.
888 */
889 static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
890 unsigned short ulen, u32 saddr, u32 daddr)
891 {
892 if (uh->check == 0) {
893 skb->ip_summed = CHECKSUM_UNNECESSARY;
894 } else if (skb->ip_summed == CHECKSUM_HW) {
895 if (udp_check(uh, ulen, saddr, daddr, skb->csum))
896 return -1;
897 skb->ip_summed = CHECKSUM_UNNECESSARY;
898 } else if (skb->ip_summed != CHECKSUM_UNNECESSARY)
899 skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
900 /* Probably, we should checksum udp header (it should be in cache
901 * in any case) and data in tiny packets (< rx copybreak).
902 */
903 return 0;
904 }
905
906 /*
907 * All we need to do is get the socket, and then do a checksum.
908 */
909
910 int udp_rcv(struct sk_buff *skb, unsigned short len)
911 {
912 struct sock *sk;
913 struct udphdr *uh;
914 unsigned short ulen;
915 struct rtable *rt = (struct rtable*)skb->dst;
916 u32 saddr = skb->nh.iph->saddr;
917 u32 daddr = skb->nh.iph->daddr;
918
919 /*
920 * Get the header.
921 */
922
923 uh = skb->h.uh;
924 __skb_pull(skb, skb->h.raw - skb->data);
925
926 IP_INC_STATS_BH(IpInDelivers);
927
928 /*
929 * Validate the packet and the UDP length.
930 */
931
932 ulen = ntohs(uh->len);
933
934 if (ulen > len || ulen < sizeof(*uh)) {
935 NETDEBUG(printk(KERN_DEBUG "UDP: short packet: %d/%d\n", ulen, len));
936 UDP_INC_STATS_BH(UdpInErrors);
937 kfree_skb(skb);
938 return(0);
939 }
940 skb_trim(skb, ulen);
941
942 if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
943 goto csum_error;
944
945 if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
946 return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
947
948 sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
949
950 if (sk != NULL) {
951 udp_queue_rcv_skb(sk, skb);
952 sock_put(sk);
953 return 0;
954 }
955
956 /* No socket. Drop packet silently, if checksum is wrong */
957 if (udp_checksum_complete(skb))
958 goto csum_error;
959
960 UDP_INC_STATS_BH(UdpNoPorts);
961 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
962
963 /*
964 * Hmm. We got an UDP packet to a port to which we
965 * don't wanna listen. Ignore it.
966 */
967 kfree_skb(skb);
968 return(0);
969
970 csum_error:
971 /*
972 * RFC1122: OK. Discards the bad packet silently (as far as
973 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
974 */
975 NETDEBUG(printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
976 NIPQUAD(saddr),
977 ntohs(uh->source),
978 NIPQUAD(daddr),
979 ntohs(uh->dest),
980 ulen));
981 UDP_INC_STATS_BH(UdpInErrors);
982 kfree_skb(skb);
983 return(0);
984 }
985
986 static void get_udp_sock(struct sock *sp, char *tmpbuf, int i)
987 {
988 unsigned int dest, src;
989 __u16 destp, srcp;
990
991 dest = sp->daddr;
992 src = sp->rcv_saddr;
993 destp = ntohs(sp->dport);
994 srcp = ntohs(sp->sport);
995 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
996 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p",
997 i, src, srcp, dest, destp, sp->state,
998 atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
999 0, 0L, 0,
1000 sock_i_uid(sp), 0,
1001 sock_i_ino(sp),
1002 atomic_read(&sp->refcnt), sp);
1003 }
1004
1005 int udp_get_info(char *buffer, char **start, off_t offset, int length)
1006 {
1007 int len = 0, num = 0, i;
1008 off_t pos = 0;
1009 off_t begin;
1010 char tmpbuf[129];
1011
1012 if (offset < 128)
1013 len += sprintf(buffer, "%-127s\n",
1014 " sl local_address rem_address st tx_queue "
1015 "rx_queue tr tm->when retrnsmt uid timeout inode");
1016 pos = 128;
1017 read_lock(&udp_hash_lock);
1018 for (i = 0; i < UDP_HTABLE_SIZE; i++) {
1019 struct sock *sk;
1020
1021 for (sk = udp_hash[i]; sk; sk = sk->next, num++) {
1022 if (sk->family != PF_INET)
1023 continue;
1024 pos += 128;
1025 if (pos <= offset)
1026 continue;
1027 get_udp_sock(sk, tmpbuf, i);
1028 len += sprintf(buffer+len, "%-127s\n", tmpbuf);
1029 if(len >= length)
1030 goto out;
1031 }
1032 }
1033 out:
1034 read_unlock(&udp_hash_lock);
1035 begin = len - (pos - offset);
1036 *start = buffer + begin;
1037 len -= begin;
1038 if(len > length)
1039 len = length;
1040 if (len < 0)
1041 len = 0;
1042 return len;
1043 }
1044
1045 struct proto udp_prot = {
1046 name: "UDP",
1047 close: udp_close,
1048 connect: udp_connect,
1049 disconnect: udp_disconnect,
1050 ioctl: udp_ioctl,
1051 setsockopt: ip_setsockopt,
1052 getsockopt: ip_getsockopt,
1053 sendmsg: udp_sendmsg,
1054 recvmsg: udp_recvmsg,
1055 backlog_rcv: udp_queue_rcv_skb,
1056 hash: udp_v4_hash,
1057 unhash: udp_v4_unhash,
1058 get_port: udp_v4_get_port,
1059 };
1060
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.