~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/net/ipv4/ip_output.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
  3  *              operating system.  INET is implemented using the  BSD Socket
  4  *              interface as the means of communication with the user level.
  5  *
  6  *              The Internet Protocol (IP) output module.
  7  *
  8  * Version:     $Id: ip_output.c,v 1.87 2000/10/25 20:07:22 davem Exp $
  9  *
 10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
 11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 12  *              Donald Becker, <becker@super.org>
 13  *              Alan Cox, <Alan.Cox@linux.org>
 14  *              Richard Underwood
 15  *              Stefan Becker, <stefanb@yello.ping.de>
 16  *              Jorge Cwik, <jorge@laser.satlink.net>
 17  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
 18  *
 19  *      See ip_input.c for original log
 20  *
 21  *      Fixes:
 22  *              Alan Cox        :       Missing nonblock feature in ip_build_xmit.
 23  *              Mike Kilburn    :       htons() missing in ip_build_xmit.
 24  *              Bradford Johnson:       Fix faulty handling of some frames when 
 25  *                                      no route is found.
 26  *              Alexander Demenshin:    Missing sk/skb free in ip_queue_xmit
 27  *                                      (in case if packet not accepted by
 28  *                                      output firewall rules)
 29  *              Mike McLagan    :       Routing by source
 30  *              Alexey Kuznetsov:       use new route cache
 31  *              Andi Kleen:             Fix broken PMTU recovery and remove
 32  *                                      some redundant tests.
 33  *      Vitaly E. Lavrov        :       Transparent proxy revived after year coma.
 34  *              Andi Kleen      :       Replace ip_reply with ip_send_reply.
 35  *              Andi Kleen      :       Split fast and slow ip_build_xmit path 
 36  *                                      for decreased register pressure on x86 
 37  *                                      and more readibility. 
 38  *              Marc Boucher    :       When call_out_firewall returns FW_QUEUE,
 39  *                                      silently drop skb instead of failing with -EPERM.
 40  */
 41 
 42 #include <asm/uaccess.h>
 43 #include <asm/system.h>
 44 #include <linux/types.h>
 45 #include <linux/kernel.h>
 46 #include <linux/sched.h>
 47 #include <linux/mm.h>
 48 #include <linux/string.h>
 49 #include <linux/errno.h>
 50 #include <linux/config.h>
 51 
 52 #include <linux/socket.h>
 53 #include <linux/sockios.h>
 54 #include <linux/in.h>
 55 #include <linux/inet.h>
 56 #include <linux/netdevice.h>
 57 #include <linux/etherdevice.h>
 58 #include <linux/proc_fs.h>
 59 #include <linux/stat.h>
 60 #include <linux/init.h>
 61 
 62 #include <net/snmp.h>
 63 #include <net/ip.h>
 64 #include <net/protocol.h>
 65 #include <net/route.h>
 66 #include <net/tcp.h>
 67 #include <net/udp.h>
 68 #include <linux/skbuff.h>
 69 #include <net/sock.h>
 70 #include <net/arp.h>
 71 #include <net/icmp.h>
 72 #include <net/raw.h>
 73 #include <net/checksum.h>
 74 #include <net/inetpeer.h>
 75 #include <linux/igmp.h>
 76 #include <linux/netfilter_ipv4.h>
 77 #include <linux/mroute.h>
 78 #include <linux/netlink.h>
 79 
 80 /*
 81  *      Shall we try to damage output packets if routing dev changes?
 82  */
 83 
 84 int sysctl_ip_dynaddr = 0;
 85 int sysctl_ip_default_ttl = IPDEFTTL;
 86 
 87 /* Generate a checksum for an outgoing IP datagram. */
 88 __inline__ void ip_send_check(struct iphdr *iph)
 89 {
 90         iph->check = 0;
 91         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 92 }
 93 
 94 /* dev_loopback_xmit for use with netfilter. */
 95 static int ip_dev_loopback_xmit(struct sk_buff *newskb)
 96 {
 97         newskb->mac.raw = newskb->data;
 98         skb_pull(newskb, newskb->nh.raw - newskb->data);
 99         newskb->pkt_type = PACKET_LOOPBACK;
100         newskb->ip_summed = CHECKSUM_UNNECESSARY;
101         BUG_TRAP(newskb->dst);
102 
103 #ifdef CONFIG_NETFILTER_DEBUG
104         nf_debug_ip_loopback_xmit(newskb);
105 #endif
106         netif_rx(newskb);
107         return 0;
108 }
109 
110 /* Don't just hand NF_HOOK skb->dst->output, in case netfilter hook
111    changes route */
112 static inline int
113 output_maybe_reroute(struct sk_buff *skb)
114 {
115         return skb->dst->output(skb);
116 }
117 
118 /* 
119  *              Add an ip header to a skbuff and send it out.
120  */
121 int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
122                           u32 saddr, u32 daddr, struct ip_options *opt)
123 {
124         struct rtable *rt = (struct rtable *)skb->dst;
125         struct iphdr *iph;
126 
127         /* Build the IP header. */
128         if (opt)
129                 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
130         else
131                 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
132 
133         iph->version  = 4;
134         iph->ihl      = 5;
135         iph->tos      = sk->protinfo.af_inet.tos;
136         iph->frag_off = 0;
137         if (ip_dont_fragment(sk, &rt->u.dst))
138                 iph->frag_off |= htons(IP_DF);
139         iph->ttl      = sk->protinfo.af_inet.ttl;
140         iph->daddr    = rt->rt_dst;
141         iph->saddr    = rt->rt_src;
142         iph->protocol = sk->protocol;
143         iph->tot_len  = htons(skb->len);
144         ip_select_ident(iph, &rt->u.dst);
145         skb->nh.iph   = iph;
146 
147         if (opt && opt->optlen) {
148                 iph->ihl += opt->optlen>>2;
149                 ip_options_build(skb, opt, daddr, rt, 0);
150         }
151         ip_send_check(iph);
152 
153         /* Send it out. */
154         return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
155                        output_maybe_reroute);
156 }
157 
158 static inline int ip_finish_output2(struct sk_buff *skb)
159 {
160         struct dst_entry *dst = skb->dst;
161         struct hh_cache *hh = dst->hh;
162 
163 #ifdef CONFIG_NETFILTER_DEBUG
164         nf_debug_ip_finish_output2(skb);
165 #endif /*CONFIG_NETFILTER_DEBUG*/
166 
167         if (hh) {
168                 read_lock_bh(&hh->hh_lock);
169                 memcpy(skb->data - 16, hh->hh_data, 16);
170                 read_unlock_bh(&hh->hh_lock);
171                 skb_push(skb, hh->hh_len);
172                 return hh->hh_output(skb);
173         } else if (dst->neighbour)
174                 return dst->neighbour->output(skb);
175 
176         printk(KERN_DEBUG "khm\n");
177         kfree_skb(skb);
178         return -EINVAL;
179 }
180 
181 __inline__ int ip_finish_output(struct sk_buff *skb)
182 {
183         struct net_device *dev = skb->dst->dev;
184 
185         skb->dev = dev;
186         skb->protocol = __constant_htons(ETH_P_IP);
187 
188         return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
189                        ip_finish_output2);
190 }
191 
192 int ip_mc_output(struct sk_buff *skb)
193 {
194         struct sock *sk = skb->sk;
195         struct rtable *rt = (struct rtable*)skb->dst;
196         struct net_device *dev = rt->u.dst.dev;
197 
198         /*
199          *      If the indicated interface is up and running, send the packet.
200          */
201         IP_INC_STATS(IpOutRequests);
202 #ifdef CONFIG_IP_ROUTE_NAT
203         if (rt->rt_flags & RTCF_NAT)
204                 ip_do_nat(skb);
205 #endif
206 
207         skb->dev = dev;
208         skb->protocol = __constant_htons(ETH_P_IP);
209 
210         /*
211          *      Multicasts are looped back for other local users
212          */
213 
214         if (rt->rt_flags&RTCF_MULTICAST && (!sk || sk->protinfo.af_inet.mc_loop)) {
215 #ifdef CONFIG_IP_MROUTE
216                 /* Small optimization: do not loopback not local frames,
217                    which returned after forwarding; they will be  dropped
218                    by ip_mr_input in any case.
219                    Note, that local frames are looped back to be delivered
220                    to local recipients.
221 
222                    This check is duplicated in ip_mr_input at the moment.
223                  */
224                 if ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
225 #endif
226                 {
227                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
228                         if (newskb)
229                                 NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
230                                         newskb->dev, 
231                                         ip_dev_loopback_xmit);
232                 }
233 
234                 /* Multicasts with ttl 0 must not go beyond the host */
235 
236                 if (skb->nh.iph->ttl == 0) {
237                         kfree_skb(skb);
238                         return 0;
239                 }
240         }
241 
242         if (rt->rt_flags&RTCF_BROADCAST) {
243                 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
244                 if (newskb)
245                         NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
246                                 newskb->dev, ip_dev_loopback_xmit);
247         }
248 
249         return ip_finish_output(skb);
250 }
251 
252 int ip_output(struct sk_buff *skb)
253 {
254 #ifdef CONFIG_IP_ROUTE_NAT
255         struct rtable *rt = (struct rtable*)skb->dst;
256 #endif
257 
258         IP_INC_STATS(IpOutRequests);
259 
260 #ifdef CONFIG_IP_ROUTE_NAT
261         if (rt->rt_flags&RTCF_NAT)
262                 ip_do_nat(skb);
263 #endif
264 
265         return ip_finish_output(skb);
266 }
267 
268 /* Queues a packet to be sent, and starts the transmitter if necessary.  
269  * This routine also needs to put in the total length and compute the 
270  * checksum.  We use to do this in two stages, ip_build_header() then
271  * this, but that scheme created a mess when routes disappeared etc.
272  * So we do it all here, and the TCP send engine has been changed to
273  * match. (No more unroutable FIN disasters, etc. wheee...)  This will
274  * most likely make other reliable transport layers above IP easier
275  * to implement under Linux.
276  */
277 static inline int ip_queue_xmit2(struct sk_buff *skb)
278 {
279         struct sock *sk = skb->sk;
280         struct rtable *rt = (struct rtable *)skb->dst;
281         struct net_device *dev;
282         struct iphdr *iph = skb->nh.iph;
283 
284         dev = rt->u.dst.dev;
285 
286         /* This can happen when the transport layer has segments queued
287          * with a cached route, and by the time we get here things are
288          * re-routed to a device with a different MTU than the original
289          * device.  Sick, but we must cover it.
290          */
291         if (skb_headroom(skb) < dev->hard_header_len && dev->hard_header) {
292                 struct sk_buff *skb2;
293 
294                 skb2 = skb_realloc_headroom(skb, (dev->hard_header_len + 15) & ~15);
295                 kfree_skb(skb);
296                 if (skb2 == NULL)
297                         return -ENOMEM;
298                 if (sk)
299                         skb_set_owner_w(skb2, sk);
300                 skb = skb2;
301                 iph = skb->nh.iph;
302         }
303 
304         if (skb->len > rt->u.dst.pmtu)
305                 goto fragment;
306 
307         if (ip_dont_fragment(sk, &rt->u.dst))
308                 iph->frag_off |= __constant_htons(IP_DF);
309 
310         ip_select_ident(iph, &rt->u.dst);
311 
312         /* Add an IP checksum. */
313         ip_send_check(iph);
314 
315         skb->priority = sk->priority;
316         return skb->dst->output(skb);
317 
318 fragment:
319         if (ip_dont_fragment(sk, &rt->u.dst)) {
320                 /* Reject packet ONLY if TCP might fragment
321                  * it itself, if were careful enough.
322                  */
323                 iph->frag_off |= __constant_htons(IP_DF);
324                 NETDEBUG(printk(KERN_DEBUG "sending pkt_too_big to self\n"));
325 
326                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
327                           htonl(rt->u.dst.pmtu));
328                 kfree_skb(skb);
329                 return -EMSGSIZE;
330         }
331         ip_select_ident(iph, &rt->u.dst);
332         return ip_fragment(skb, skb->dst->output);
333 }
334 
335 int ip_queue_xmit(struct sk_buff *skb)
336 {
337         struct sock *sk = skb->sk;
338         struct ip_options *opt = sk->protinfo.af_inet.opt;
339         struct rtable *rt;
340         struct iphdr *iph;
341 
342         /* Make sure we can route this packet. */
343         rt = (struct rtable *)__sk_dst_check(sk, 0);
344         if (rt == NULL) {
345                 u32 daddr;
346 
347                 /* Use correct destination address if we have options. */
348                 daddr = sk->daddr;
349                 if(opt && opt->srr)
350                         daddr = opt->faddr;
351 
352                 /* If this fails, retransmit mechanism of transport layer will
353                  * keep trying until route appears or the connection times itself
354                  * out.
355                  */
356                 if (ip_route_output(&rt, daddr, sk->saddr,
357                                     RT_TOS(sk->protinfo.af_inet.tos) | RTO_CONN | sk->localroute,
358                                     sk->bound_dev_if))
359                         goto no_route;
360                 __sk_dst_set(sk, &rt->u.dst);
361         }
362         skb->dst = dst_clone(&rt->u.dst);
363 
364         if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
365                 goto no_route;
366 
367         /* OK, we know where to send it, allocate and build IP header. */
368         iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
369         *((__u16 *)iph) = htons((4 << 12) | (5 << 8) | (sk->protinfo.af_inet.tos & 0xff));
370         iph->tot_len = htons(skb->len);
371         iph->frag_off = 0;
372         iph->ttl      = sk->protinfo.af_inet.ttl;
373         iph->protocol = sk->protocol;
374         iph->saddr    = rt->rt_src;
375         iph->daddr    = rt->rt_dst;
376         skb->nh.iph   = iph;
377         /* Transport layer set skb->h.foo itself. */
378 
379         if(opt && opt->optlen) {
380                 iph->ihl += opt->optlen >> 2;
381                 ip_options_build(skb, opt, sk->daddr, rt, 0);
382         }
383 
384         return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
385                        ip_queue_xmit2);
386 
387 no_route:
388         IP_INC_STATS(IpOutNoRoutes);
389         kfree_skb(skb);
390         return -EHOSTUNREACH;
391 }
392 
393 /*
394  *      Build and send a packet, with as little as one copy
395  *
396  *      Doesn't care much about ip options... option length can be
397  *      different for fragment at 0 and other fragments.
398  *
399  *      Note that the fragment at the highest offset is sent first,
400  *      so the getfrag routine can fill in the TCP/UDP checksum header
401  *      field in the last fragment it sends... actually it also helps
402  *      the reassemblers, they can put most packets in at the head of
403  *      the fragment queue, and they know the total size in advance. This
404  *      last feature will measurably improve the Linux fragment handler one
405  *      day.
406  *
407  *      The callback has five args, an arbitrary pointer (copy of frag),
408  *      the source IP address (may depend on the routing table), the 
409  *      destination address (char *), the offset to copy from, and the
410  *      length to be copied.
411  */
412 
413 static int ip_build_xmit_slow(struct sock *sk,
414                   int getfrag (const void *,
415                                char *,
416                                unsigned int,    
417                                unsigned int),
418                   const void *frag,
419                   unsigned length,
420                   struct ipcm_cookie *ipc,
421                   struct rtable *rt,
422                   int flags)
423 {
424         unsigned int fraglen, maxfraglen, fragheaderlen;
425         int err;
426         int offset, mf;
427         int mtu;
428         u16 id = 0;
429 
430         int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
431         int nfrags=0;
432         struct ip_options *opt = ipc->opt;
433         int df = 0;
434 
435         mtu = rt->u.dst.pmtu;
436         if (ip_dont_fragment(sk, &rt->u.dst))
437                 df = htons(IP_DF);
438 
439         length -= sizeof(struct iphdr);
440 
441         if (opt) {
442                 fragheaderlen = sizeof(struct iphdr) + opt->optlen;
443                 maxfraglen = ((mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
444         } else {
445                 fragheaderlen = sizeof(struct iphdr);
446 
447                 /*
448                  *      Fragheaderlen is the size of 'overhead' on each buffer. Now work
449                  *      out the size of the frames to send.
450                  */
451 
452                 maxfraglen = ((mtu-sizeof(struct iphdr)) & ~7) + fragheaderlen;
453         }
454 
455         if (length + fragheaderlen > 0xFFFF) {
456                 ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
457                 return -EMSGSIZE;
458         }
459 
460         /*
461          *      Start at the end of the frame by handling the remainder.
462          */
463 
464         offset = length - (length % (maxfraglen - fragheaderlen));
465 
466         /*
467          *      Amount of memory to allocate for final fragment.
468          */
469 
470         fraglen = length - offset + fragheaderlen;
471 
472         if (length-offset==0) {
473                 fraglen = maxfraglen;
474                 offset -= maxfraglen-fragheaderlen;
475         }
476 
477         /*
478          *      The last fragment will not have MF (more fragments) set.
479          */
480 
481         mf = 0;
482 
483         /*
484          *      Don't fragment packets for path mtu discovery.
485          */
486 
487         if (offset > 0 && sk->protinfo.af_inet.pmtudisc==IP_PMTUDISC_DO) { 
488                 ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
489                 return -EMSGSIZE;
490         }
491         if (flags&MSG_PROBE)
492                 goto out;
493 
494         /*
495          *      Begin outputting the bytes.
496          */
497 
498         do {
499                 char *data;
500                 struct sk_buff * skb;
501 
502                 /*
503                  *      Get the memory we require with some space left for alignment.
504                  */
505 
506                 skb = sock_alloc_send_skb(sk, fraglen+hh_len+15, 0, flags&MSG_DONTWAIT, &err);
507                 if (skb == NULL)
508                         goto error;
509 
510                 /*
511                  *      Fill in the control structures
512                  */
513 
514                 skb->priority = sk->priority;
515                 skb->dst = dst_clone(&rt->u.dst);
516                 skb_reserve(skb, hh_len);
517 
518                 /*
519                  *      Find where to start putting bytes.
520                  */
521 
522                 data = skb_put(skb, fraglen);
523                 skb->nh.iph = (struct iphdr *)data;
524 
525                 /*
526                  *      Only write IP header onto non-raw packets 
527                  */
528 
529                 {
530                         struct iphdr *iph = (struct iphdr *)data;
531 
532                         iph->version = 4;
533                         iph->ihl = 5;
534                         if (opt) {
535                                 iph->ihl += opt->optlen>>2;
536                                 ip_options_build(skb, opt,
537                                                  ipc->addr, rt, offset);
538                         }
539                         iph->tos = sk->protinfo.af_inet.tos;
540                         iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
541                         iph->frag_off = htons(offset>>3)|mf|df;
542                         iph->id = id;
543                         if (!mf) {
544                                 if (offset || !df) {
545                                         /* Select an unpredictable ident only
546                                          * for packets without DF or having
547                                          * been fragmented.
548                                          */
549                                         __ip_select_ident(iph, &rt->u.dst);
550                                         id = iph->id;
551                                 }
552 
553                                 /*
554                                  *      Any further fragments will have MF set.
555                                  */
556                                 mf = htons(IP_MF);
557                         }
558                         if (rt->rt_type == RTN_MULTICAST)
559                                 iph->ttl = sk->protinfo.af_inet.mc_ttl;
560                         else
561                                 iph->ttl = sk->protinfo.af_inet.ttl;
562                         iph->protocol = sk->protocol;
563                         iph->check = 0;
564                         iph->saddr = rt->rt_src;
565                         iph->daddr = rt->rt_dst;
566                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
567                         data += iph->ihl*4;
568                 }
569 
570                 /*
571                  *      User data callback
572                  */
573 
574                 if (getfrag(frag, data, offset, fraglen-fragheaderlen)) {
575                         err = -EFAULT;
576                         kfree_skb(skb);
577                         goto error;
578                 }
579 
580                 offset -= (maxfraglen-fragheaderlen);
581                 fraglen = maxfraglen;
582 
583                 nfrags++;
584 
585                 err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, 
586                               skb->dst->dev, output_maybe_reroute);
587                 if (err) {
588                         if (err > 0)
589                                 err = sk->protinfo.af_inet.recverr ? net_xmit_errno(err) : 0;
590                         if (err)
591                                 goto error;
592                 }
593         } while (offset >= 0);
594 
595         if (nfrags>1)
596                 ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags;
597 out:
598         return 0;
599 
600 error:
601         IP_INC_STATS(IpOutDiscards);
602         if (nfrags>1)
603                 ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags;
604         return err; 
605 }
606 
607 /*
608  *      Fast path for unfragmented packets.
609  */
610 int ip_build_xmit(struct sock *sk, 
611                   int getfrag (const void *,
612                                char *,
613                                unsigned int,    
614                                unsigned int),
615                   const void *frag,
616                   unsigned length,
617                   struct ipcm_cookie *ipc,
618                   struct rtable *rt,
619                   int flags)
620 {
621         int err;
622         struct sk_buff *skb;
623         int df;
624         struct iphdr *iph;
625 
626         /*
627          *      Try the simple case first. This leaves fragmented frames, and by
628          *      choice RAW frames within 20 bytes of maximum size(rare) to the long path
629          */
630 
631         if (!sk->protinfo.af_inet.hdrincl) {
632                 length += sizeof(struct iphdr);
633 
634                 /*
635                  *      Check for slow path.
636                  */
637                 if (length > rt->u.dst.pmtu || ipc->opt != NULL)  
638                         return ip_build_xmit_slow(sk,getfrag,frag,length,ipc,rt,flags); 
639         } else {
640                 if (length > rt->u.dst.dev->mtu) {
641                         ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, rt->u.dst.dev->mtu);
642                         return -EMSGSIZE;
643                 }
644         }
645         if (flags&MSG_PROBE)
646                 goto out;
647 
648         /*
649          *      Do path mtu discovery if needed.
650          */
651         df = 0;
652         if (ip_dont_fragment(sk, &rt->u.dst))
653                 df = htons(IP_DF);
654 
655         /* 
656          *      Fast path for unfragmented frames without options. 
657          */ 
658         {
659         int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
660 
661         skb = sock_alloc_send_skb(sk, length+hh_len+15,
662                                   0, flags&MSG_DONTWAIT, &err);
663         if(skb==NULL)
664                 goto error; 
665         skb_reserve(skb, hh_len);
666         }
667 
668         skb->priority = sk->priority;
669         skb->dst = dst_clone(&rt->u.dst);
670 
671         skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
672 
673         if(!sk->protinfo.af_inet.hdrincl) {
674                 iph->version=4;
675                 iph->ihl=5;
676                 iph->tos=sk->protinfo.af_inet.tos;
677                 iph->tot_len = htons(length);
678                 iph->frag_off = df;
679                 iph->ttl=sk->protinfo.af_inet.mc_ttl;
680                 ip_select_ident(iph, &rt->u.dst);
681                 if (rt->rt_type != RTN_MULTICAST)
682                         iph->ttl=sk->protinfo.af_inet.ttl;
683                 iph->protocol=sk->protocol;
684                 iph->saddr=rt->rt_src;
685                 iph->daddr=rt->rt_dst;
686                 iph->check=0;
687                 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
688                 err = getfrag(frag, ((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
689         }
690         else
691                 err = getfrag(frag, (void *)iph, 0, length);
692 
693         if (err)
694                 goto error_fault;
695 
696         err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
697                       output_maybe_reroute);
698         if (err > 0)
699                 err = sk->protinfo.af_inet.recverr ? net_xmit_errno(err) : 0;
700         if (err)
701                 goto error;
702 out:
703         return 0;
704 
705 error_fault:
706         err = -EFAULT;
707         kfree_skb(skb);
708 error:
709         IP_INC_STATS(IpOutDiscards);
710         return err; 
711 }
712 
713 /*
714  *      This IP datagram is too large to be sent in one piece.  Break it up into
715  *      smaller pieces (each of size equal to IP header plus
716  *      a block of the data of the original IP data part) that will yet fit in a
717  *      single device frame, and queue such a frame for sending.
718  *
719  *      Yes this is inefficient, feel free to submit a quicker one.
720  */
721 
722 int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
723 {
724         struct iphdr *iph;
725         unsigned char *raw;
726         unsigned char *ptr;
727         struct net_device *dev;
728         struct sk_buff *skb2;
729         unsigned int mtu, hlen, left, len; 
730         int offset;
731         int not_last_frag;
732         struct rtable *rt = (struct rtable*)skb->dst;
733         int err = 0;
734 
735         dev = rt->u.dst.dev;
736 
737         /*
738          *      Point into the IP datagram header.
739          */
740 
741         raw = skb->nh.raw;
742         iph = (struct iphdr*)raw;
743 
744         /*
745          *      Setup starting values.
746          */
747 
748         hlen = iph->ihl * 4;
749         left = ntohs(iph->tot_len) - hlen;      /* Space per frame */
750         mtu = rt->u.dst.pmtu - hlen;    /* Size of data space */
751         ptr = raw + hlen;                       /* Where to start from */
752 
753         /*
754          *      Fragment the datagram.
755          */
756 
757         offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
758         not_last_frag = iph->frag_off & htons(IP_MF);
759 
760         /*
761          *      Keep copying data until we run out.
762          */
763 
764         while(left > 0) {
765                 len = left;
766                 /* IF: it doesn't fit, use 'mtu' - the data space left */
767                 if (len > mtu)
768                         len = mtu;
769                 /* IF: we are not sending upto and including the packet end
770                    then align the next start on an eight byte boundary */
771                 if (len < left) {
772                         len &= ~7;
773                 }
774                 /*
775                  *      Allocate buffer.
776                  */
777 
778                 if ((skb2 = alloc_skb(len+hlen+dev->hard_header_len+15,GFP_ATOMIC)) == NULL) {
779                         NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
780                         err = -ENOMEM;
781                         goto fail;
782                 }
783 
784                 /*
785                  *      Set up data on packet
786                  */
787 
788                 skb2->pkt_type = skb->pkt_type;
789                 skb2->priority = skb->priority;
790                 skb_reserve(skb2, (dev->hard_header_len+15)&~15);
791                 skb_put(skb2, len + hlen);
792                 skb2->nh.raw = skb2->data;
793                 skb2->h.raw = skb2->data + hlen;
794 
795                 /*
796                  *      Charge the memory for the fragment to any owner
797                  *      it might possess
798                  */
799 
800                 if (skb->sk)
801                         skb_set_owner_w(skb2, skb->sk);
802                 skb2->dst = dst_clone(skb->dst);
803                 skb2->dev = skb->dev;
804 
805                 /*
806                  *      Copy the packet header into the new buffer.
807                  */
808 
809                 memcpy(skb2->nh.raw, raw, hlen);
810 
811                 /*
812                  *      Copy a block of the IP datagram.
813                  */
814                 memcpy(skb2->h.raw, ptr, len);
815                 left -= len;
816 
817                 /*
818                  *      Fill in the new header fields.
819                  */
820                 iph = skb2->nh.iph;
821                 iph->frag_off = htons((offset >> 3));
822 
823                 /* ANK: dirty, but effective trick. Upgrade options only if
824                  * the segment to be fragmented was THE FIRST (otherwise,
825                  * options are already fixed) and make it ONCE
826                  * on the initial skb, so that all the following fragments
827                  * will inherit fixed options.
828                  */
829                 if (offset == 0)
830                         ip_options_fragment(skb);
831 
832                 /*
833                  *      Added AC : If we are fragmenting a fragment that's not the
834                  *                 last fragment then keep MF on each bit
835                  */
836                 if (left > 0 || not_last_frag)
837                         iph->frag_off |= htons(IP_MF);
838                 ptr += len;
839                 offset += len;
840 
841 #ifdef CONFIG_NETFILTER
842                 /* Connection association is same as pre-frag packet */
843                 skb2->nfct = skb->nfct;
844                 nf_conntrack_get(skb2->nfct);
845 #ifdef CONFIG_NETFILTER_DEBUG
846                 skb2->nf_debug = skb->nf_debug;
847 #endif
848 #endif
849 
850                 /*
851                  *      Put this fragment into the sending queue.
852                  */
853 
854                 IP_INC_STATS(IpFragCreates);
855 
856                 iph->tot_len = htons(len + hlen);
857 
858                 ip_send_check(iph);
859 
860                 err = output(skb2);
861                 if (err)
862                         goto fail;
863         }
864         kfree_skb(skb);
865         IP_INC_STATS(IpFragOKs);
866         return err;
867 
868 fail:
869         kfree_skb(skb); 
870         IP_INC_STATS(IpFragFails);
871         return err;
872 }
873 
874 /*
875  *      Fetch data from kernel space and fill in checksum if needed.
876  */
877 static int ip_reply_glue_bits(const void *dptr, char *to, unsigned int offset, 
878                               unsigned int fraglen)
879 {
880         struct ip_reply_arg *dp = (struct ip_reply_arg*)dptr;
881         u16 *pktp = (u16 *)to;
882         struct iovec *iov; 
883         int len; 
884         int hdrflag = 1; 
885 
886         iov = &dp->iov[0]; 
887         if (offset >= iov->iov_len) { 
888                 offset -= iov->iov_len;
889                 iov++; 
890                 hdrflag = 0; 
891         }
892         len = iov->iov_len - offset;
893         if (fraglen > len) { /* overlapping. */ 
894                 dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, len,
895                                              dp->csum);
896                 offset = 0;
897                 fraglen -= len; 
898                 to += len; 
899                 iov++;
900         }
901 
902         dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, fraglen, 
903                                              dp->csum); 
904 
905         if (hdrflag && dp->csumoffset)
906                 *(pktp + dp->csumoffset) = csum_fold(dp->csum); /* fill in checksum */
907         return 0;              
908 }
909 
910 /* 
911  *      Generic function to send a packet as reply to another packet.
912  *      Used to send TCP resets so far. ICMP should use this function too.
913  *
914  *      Should run single threaded per socket because it uses the sock 
915  *      structure to pass arguments.
916  */
917 void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
918                    unsigned int len)
919 {
920         struct {
921                 struct ip_options       opt;
922                 char                    data[40];
923         } replyopts;
924         struct ipcm_cookie ipc;
925         u32 daddr;
926         struct rtable *rt = (struct rtable*)skb->dst;
927 
928         if (ip_options_echo(&replyopts.opt, skb))
929                 return;
930 
931         daddr = ipc.addr = rt->rt_src;
932         ipc.opt = NULL;
933 
934         if (replyopts.opt.optlen) {
935                 ipc.opt = &replyopts.opt;
936 
937                 if (ipc.opt->srr)
938                         daddr = replyopts.opt.faddr;
939         }
940 
941         if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
942                 return;
943 
944         /* And let IP do all the hard work.
945 
946            This chunk is not reenterable, hence spinlock.
947            Note that it uses the fact, that this function is called
948            with locally disabled BH and that sk cannot be already spinlocked.
949          */
950         bh_lock_sock(sk);
951         sk->protinfo.af_inet.tos = skb->nh.iph->tos;
952         sk->priority = skb->priority;
953         sk->protocol = skb->nh.iph->protocol;
954         ip_build_xmit(sk, ip_reply_glue_bits, arg, len, &ipc, rt, MSG_DONTWAIT);
955         bh_unlock_sock(sk);
956 
957         ip_rt_put(rt);
958 }
959 
960 /*
961  *      IP protocol layer initialiser
962  */
963 
964 static struct packet_type ip_packet_type =
965 {
966         __constant_htons(ETH_P_IP),
967         NULL,   /* All devices */
968         ip_rcv,
969         (void*)1,
970         NULL,
971 };
972 
973 /*
974  *      IP registers the packet type and then calls the subprotocol initialisers
975  */
976 
977 void __init ip_init(void)
978 {
979         dev_add_pack(&ip_packet_type);
980 
981         ip_rt_init();
982         inet_initpeers();
983 
984 #ifdef CONFIG_IP_MULTICAST
985         proc_net_create("igmp", 0, ip_mc_procinfo);
986 #endif
987 }
988 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.