~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/net/ipv4/ipmr.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *      IP multicast routing support for mrouted 3.6/3.8
  3  *
  4  *              (c) 1995 Alan Cox, <alan@redhat.com>
  5  *        Linux Consultancy and Custom Driver Development
  6  *
  7  *      This program is free software; you can redistribute it and/or
  8  *      modify it under the terms of the GNU General Public License
  9  *      as published by the Free Software Foundation; either version
 10  *      2 of the License, or (at your option) any later version.
 11  *
 12  *      Version: $Id: ipmr.c,v 1.55 2000/11/28 13:13:27 davem Exp $
 13  *
 14  *      Fixes:
 15  *      Michael Chastain        :       Incorrect size of copying.
 16  *      Alan Cox                :       Added the cache manager code
 17  *      Alan Cox                :       Fixed the clone/copy bug and device race.
 18  *      Mike McLagan            :       Routing by source
 19  *      Malcolm Beattie         :       Buffer handling fixes.
 20  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
 21  *      SVR Anand               :       Fixed several multicast bugs and problems.
 22  *      Alexey Kuznetsov        :       Status, optimisations and more.
 23  *      Brad Parker             :       Better behaviour on mrouted upcall
 24  *                                      overflow.
 25  *      Carlos Picoto           :       PIMv1 Support
 26  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
 27  *                                      Relax this requrement to work with older peers.
 28  *
 29  */
 30 
 31 #include <linux/config.h>
 32 #include <asm/system.h>
 33 #include <asm/uaccess.h>
 34 #include <linux/types.h>
 35 #include <linux/sched.h>
 36 #include <linux/errno.h>
 37 #include <linux/timer.h>
 38 #include <linux/mm.h>
 39 #include <linux/kernel.h>
 40 #include <linux/fcntl.h>
 41 #include <linux/stat.h>
 42 #include <linux/socket.h>
 43 #include <linux/in.h>
 44 #include <linux/inet.h>
 45 #include <linux/netdevice.h>
 46 #include <linux/inetdevice.h>
 47 #include <linux/igmp.h>
 48 #include <linux/proc_fs.h>
 49 #include <linux/mroute.h>
 50 #include <linux/init.h>
 51 #include <net/ip.h>
 52 #include <net/protocol.h>
 53 #include <linux/skbuff.h>
 54 #include <net/sock.h>
 55 #include <net/icmp.h>
 56 #include <net/udp.h>
 57 #include <net/raw.h>
 58 #include <linux/notifier.h>
 59 #include <linux/if_arp.h>
 60 #include <linux/netfilter_ipv4.h>
 61 #include <net/ipip.h>
 62 #include <net/checksum.h>
 63 
 64 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
 65 #define CONFIG_IP_PIMSM 1
 66 #endif
 67 
 68 static struct sock *mroute_socket;
 69 
 70 
 71 /* Big lock, protecting vif table, mrt cache and mroute socket state.
 72    Note that the changes are semaphored via rtnl_lock.
 73  */
 74 
 75 static rwlock_t mrt_lock = RW_LOCK_UNLOCKED;
 76 
 77 /*
 78  *      Multicast router control variables
 79  */
 80 
 81 static struct vif_device vif_table[MAXVIFS];            /* Devices              */
 82 static int maxvif;
 83 
 84 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
 85 
 86 int mroute_do_assert = 0;                               /* Set in PIM assert    */
 87 int mroute_do_pim = 0;
 88 
 89 static struct mfc_cache *mfc_cache_array[MFC_LINES];    /* Forwarding cache     */
 90 
 91 static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
 92 atomic_t cache_resolve_queue_len;                       /* Size of unresolved   */
 93 
 94 /* Special spinlock for queue of unresolved entries */
 95 static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED;
 96 
 97 /* We return to original Alan's scheme. Hash table of resolved
 98    entries is changed only in process context and protected
 99    with weak lock mrt_lock. Queue of unresolved entries is protected
100    with strong spinlock mfc_unres_lock.
101 
102    In this case data path is free of exclusive locks at all.
103  */
104 
105 kmem_cache_t *mrt_cachep;
106 
107 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
108 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
109 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
110 
111 extern struct inet_protocol pim_protocol;
112 
113 static struct timer_list ipmr_expire_timer;
114 
115 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
116 
117 static
118 struct net_device *ipmr_new_tunnel(struct vifctl *v)
119 {
120         struct net_device  *dev;
121 
122         dev = __dev_get_by_name("tunl0");
123 
124         if (dev) {
125                 int err;
126                 struct ifreq ifr;
127                 mm_segment_t    oldfs;
128                 struct ip_tunnel_parm p;
129                 struct in_device  *in_dev;
130 
131                 memset(&p, 0, sizeof(p));
132                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
133                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
134                 p.iph.version = 4;
135                 p.iph.ihl = 5;
136                 p.iph.protocol = IPPROTO_IPIP;
137                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
138                 ifr.ifr_ifru.ifru_data = (void*)&p;
139 
140                 oldfs = get_fs(); set_fs(KERNEL_DS);
141                 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
142                 set_fs(oldfs);
143 
144                 dev = NULL;
145 
146                 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
147                         dev->flags |= IFF_MULTICAST;
148 
149                         in_dev = __in_dev_get(dev);
150                         if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
151                                 goto failure;
152                         in_dev->cnf.rp_filter = 0;
153 
154                         if (dev_open(dev))
155                                 goto failure;
156                 }
157         }
158         return dev;
159 
160 failure:
161         unregister_netdevice(dev);
162         return NULL;
163 }
164 
165 #ifdef CONFIG_IP_PIMSM
166 
167 static int reg_vif_num = -1;
168 
169 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
170 {
171         read_lock(&mrt_lock);
172         ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
173         ((struct net_device_stats*)dev->priv)->tx_packets++;
174         ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
175         read_unlock(&mrt_lock);
176         kfree_skb(skb);
177         return 0;
178 }
179 
180 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
181 {
182         return (struct net_device_stats*)dev->priv;
183 }
184 
185 static
186 struct net_device *ipmr_reg_vif(struct vifctl *v)
187 {
188         struct net_device  *dev;
189         struct in_device *in_dev;
190         int size;
191 
192         size = sizeof(*dev) + sizeof(struct net_device_stats);
193         dev = kmalloc(size, GFP_KERNEL);
194         if (!dev)
195                 return NULL;
196 
197         memset(dev, 0, size);
198 
199         dev->priv = dev + 1;
200 
201         strcpy(dev->name, "pimreg");
202 
203         dev->type               = ARPHRD_PIMREG;
204         dev->mtu                = 1500 - sizeof(struct iphdr) - 8;
205         dev->flags              = IFF_NOARP;
206         dev->hard_start_xmit    = reg_vif_xmit;
207         dev->get_stats          = reg_vif_get_stats;
208         dev->features           |= NETIF_F_DYNALLOC;
209 
210         if (register_netdevice(dev)) {
211                 kfree(dev);
212                 return NULL;
213         }
214         dev->iflink = 0;
215 
216         if ((in_dev = inetdev_init(dev)) == NULL)
217                 goto failure;
218 
219         in_dev->cnf.rp_filter = 0;
220 
221         if (dev_open(dev))
222                 goto failure;
223 
224         return dev;
225 
226 failure:
227         unregister_netdevice(dev);
228         return NULL;
229 }
230 #endif
231 
232 /*
233  *      Delete a VIF entry
234  */
235  
236 static int vif_delete(int vifi)
237 {
238         struct vif_device *v;
239         struct net_device *dev;
240         struct in_device *in_dev;
241 
242         if (vifi < 0 || vifi >= maxvif)
243                 return -EADDRNOTAVAIL;
244 
245         v = &vif_table[vifi];
246 
247         write_lock_bh(&mrt_lock);
248         dev = v->dev;
249         v->dev = NULL;
250 
251         if (!dev) {
252                 write_unlock_bh(&mrt_lock);
253                 return -EADDRNOTAVAIL;
254         }
255 
256 #ifdef CONFIG_IP_PIMSM
257         if (vifi == reg_vif_num)
258                 reg_vif_num = -1;
259 #endif
260 
261         if (vifi+1 == maxvif) {
262                 int tmp;
263                 for (tmp=vifi-1; tmp>=0; tmp--) {
264                         if (VIF_EXISTS(tmp))
265                                 break;
266                 }
267                 maxvif = tmp+1;
268         }
269 
270         write_unlock_bh(&mrt_lock);
271 
272         dev_set_allmulti(dev, -1);
273 
274         if ((in_dev = __in_dev_get(dev)) != NULL) {
275                 in_dev->cnf.mc_forwarding--;
276                 ip_rt_multicast_event(in_dev);
277         }
278 
279         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
280                 unregister_netdevice(dev);
281 
282         dev_put(dev);
283         return 0;
284 }
285 
286 /* Destroy an unresolved cache entry, killing queued skbs
287    and reporting error to netlink readers.
288  */
289 
290 static void ipmr_destroy_unres(struct mfc_cache *c)
291 {
292         struct sk_buff *skb;
293 
294         atomic_dec(&cache_resolve_queue_len);
295 
296         while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
297 #ifdef CONFIG_RTNETLINK
298                 if (skb->nh.iph->version == 0) {
299                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
300                         nlh->nlmsg_type = NLMSG_ERROR;
301                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
302                         skb_trim(skb, nlh->nlmsg_len);
303                         ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
304                         netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
305                 } else
306 #endif
307                         kfree_skb(skb);
308         }
309 
310         kmem_cache_free(mrt_cachep, c);
311 }
312 
313 
314 /* Single timer process for all the unresolved queue. */
315 
316 void ipmr_expire_process(unsigned long dummy)
317 {
318         unsigned long now;
319         unsigned long expires;
320         struct mfc_cache *c, **cp;
321 
322         if (!spin_trylock(&mfc_unres_lock)) {
323                 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
324                 return;
325         }
326 
327         if (atomic_read(&cache_resolve_queue_len) == 0)
328                 goto out;
329 
330         now = jiffies;
331         expires = 10*HZ;
332         cp = &mfc_unres_queue;
333 
334         while ((c=*cp) != NULL) {
335                 long interval = c->mfc_un.unres.expires - now;
336 
337                 if (interval > 0) {
338                         if (interval < expires)
339                                 expires = interval;
340                         cp = &c->next;
341                         continue;
342                 }
343 
344                 *cp = c->next;
345 
346                 ipmr_destroy_unres(c);
347         }
348 
349         if (atomic_read(&cache_resolve_queue_len))
350                 mod_timer(&ipmr_expire_timer, jiffies + expires);
351 
352 out:
353         spin_unlock(&mfc_unres_lock);
354 }
355 
356 /* Fill oifs list. It is called under write locked mrt_lock. */
357 
358 static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
359 {
360         int vifi;
361 
362         cache->mfc_un.res.minvif = MAXVIFS;
363         cache->mfc_un.res.maxvif = 0;
364         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
365 
366         for (vifi=0; vifi<maxvif; vifi++) {
367                 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
368                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
369                         if (cache->mfc_un.res.minvif > vifi)
370                                 cache->mfc_un.res.minvif = vifi;
371                         if (cache->mfc_un.res.maxvif <= vifi)
372                                 cache->mfc_un.res.maxvif = vifi + 1;
373                 }
374         }
375 }
376 
377 static int vif_add(struct vifctl *vifc, int mrtsock)
378 {
379         int vifi = vifc->vifc_vifi;
380         struct vif_device *v = &vif_table[vifi];
381         struct net_device *dev;
382         struct in_device *in_dev;
383 
384         /* Is vif busy ? */
385         if (VIF_EXISTS(vifi))
386                 return -EADDRINUSE;
387 
388         switch (vifc->vifc_flags) {
389 #ifdef CONFIG_IP_PIMSM
390         case VIFF_REGISTER:
391                 /*
392                  * Special Purpose VIF in PIM
393                  * All the packets will be sent to the daemon
394                  */
395                 if (reg_vif_num >= 0)
396                         return -EADDRINUSE;
397                 dev = ipmr_reg_vif(vifc);
398                 if (!dev)
399                         return -ENOBUFS;
400                 break;
401 #endif
402         case VIFF_TUNNEL:       
403                 dev = ipmr_new_tunnel(vifc);
404                 if (!dev)
405                         return -ENOBUFS;
406                 break;
407         case 0:
408                 dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
409                 if (!dev)
410                         return -EADDRNOTAVAIL;
411                 __dev_put(dev);
412                 break;
413         default:
414                 return -EINVAL;
415         }
416 
417         if ((in_dev = __in_dev_get(dev)) == NULL)
418                 return -EADDRNOTAVAIL;
419         in_dev->cnf.mc_forwarding++;
420         dev_set_allmulti(dev, +1);
421         ip_rt_multicast_event(in_dev);
422 
423         /*
424          *      Fill in the VIF structures
425          */
426         v->rate_limit=vifc->vifc_rate_limit;
427         v->local=vifc->vifc_lcl_addr.s_addr;
428         v->remote=vifc->vifc_rmt_addr.s_addr;
429         v->flags=vifc->vifc_flags;
430         if (!mrtsock)
431                 v->flags |= VIFF_STATIC;
432         v->threshold=vifc->vifc_threshold;
433         v->bytes_in = 0;
434         v->bytes_out = 0;
435         v->pkt_in = 0;
436         v->pkt_out = 0;
437         v->link = dev->ifindex;
438         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
439                 v->link = dev->iflink;
440 
441         /* And finish update writing critical data */
442         write_lock_bh(&mrt_lock);
443         dev_hold(dev);
444         v->dev=dev;
445 #ifdef CONFIG_IP_PIMSM
446         if (v->flags&VIFF_REGISTER)
447                 reg_vif_num = vifi;
448 #endif
449         if (vifi+1 > maxvif)
450                 maxvif = vifi+1;
451         write_unlock_bh(&mrt_lock);
452         return 0;
453 }
454 
455 static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
456 {
457         int line=MFC_HASH(mcastgrp,origin);
458         struct mfc_cache *c;
459 
460         for (c=mfc_cache_array[line]; c; c = c->next) {
461                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
462                         break;
463         }
464         return c;
465 }
466 
467 /*
468  *      Allocate a multicast cache entry
469  */
470 static struct mfc_cache *ipmr_cache_alloc(void)
471 {
472         struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
473         if(c==NULL)
474                 return NULL;
475         memset(c, 0, sizeof(*c));
476         c->mfc_un.res.minvif = MAXVIFS;
477         return c;
478 }
479 
480 static struct mfc_cache *ipmr_cache_alloc_unres(void)
481 {
482         struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
483         if(c==NULL)
484                 return NULL;
485         memset(c, 0, sizeof(*c));
486         skb_queue_head_init(&c->mfc_un.unres.unresolved);
487         c->mfc_un.unres.expires = jiffies + 10*HZ;
488         return c;
489 }
490 
491 /*
492  *      A cache entry has gone into a resolved state from queued
493  */
494  
495 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
496 {
497         struct sk_buff *skb;
498 
499         /*
500          *      Play the pending entries through our router
501          */
502 
503         while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
504 #ifdef CONFIG_RTNETLINK
505                 if (skb->nh.iph->version == 0) {
506                         int err;
507                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
508 
509                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
510                                 nlh->nlmsg_len = skb->tail - (u8*)nlh;
511                         } else {
512                                 nlh->nlmsg_type = NLMSG_ERROR;
513                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
514                                 skb_trim(skb, nlh->nlmsg_len);
515                                 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE;
516                         }
517                         err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
518                 } else
519 #endif
520                         ip_mr_forward(skb, c, 0);
521         }
522 }
523 
524 /*
525  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
526  *      expects the following bizarre scheme.
527  *
528  *      Called under mrt_lock.
529  */
530  
531 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
532 {
533         struct sk_buff *skb;
534         int ihl = pkt->nh.iph->ihl<<2;
535         struct igmphdr *igmp;
536         struct igmpmsg *msg;
537         int ret;
538 
539 #ifdef CONFIG_IP_PIMSM
540         if (assert == IGMPMSG_WHOLEPKT)
541                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
542         else
543 #endif
544                 skb = alloc_skb(128, GFP_ATOMIC);
545 
546         if(!skb)
547                 return -ENOBUFS;
548 
549 #ifdef CONFIG_IP_PIMSM
550         if (assert == IGMPMSG_WHOLEPKT) {
551                 /* Ugly, but we have no choice with this interface.
552                    Duplicate old header, fix ihl, length etc.
553                    And all this only to mangle msg->im_msgtype and
554                    to set msg->im_mbz to "mbz" :-)
555                  */
556                 msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
557                 skb->nh.raw = skb->h.raw = (u8*)msg;
558                 memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
559                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
560                 msg->im_mbz = 0;
561                 msg->im_vif = reg_vif_num;
562                 skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
563                 skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
564         } else 
565 #endif
566         {       
567                 
568         /*
569          *      Copy the IP header
570          */
571 
572         skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
573         memcpy(skb->data,pkt->data,ihl);
574         skb->nh.iph->protocol = 0;                      /* Flag to the kernel this is a route add */
575         msg = (struct igmpmsg*)skb->nh.iph;
576         msg->im_vif = vifi;
577         skb->dst = dst_clone(pkt->dst);
578 
579         /*
580          *      Add our header
581          */
582 
583         igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
584         igmp->type      =
585         msg->im_msgtype = assert;
586         igmp->code      =       0;
587         skb->nh.iph->tot_len=htons(skb->len);                   /* Fix the length */
588         skb->h.raw = skb->nh.raw;
589         }
590 
591         if (mroute_socket == NULL) {
592                 kfree_skb(skb);
593                 return -EINVAL;
594         }
595 
596         /*
597          *      Deliver to mrouted
598          */
599         if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
600                 if (net_ratelimit())
601                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
602                 kfree_skb(skb);
603         }
604 
605         return ret;
606 }
607 
608 /*
609  *      Queue a packet for resolution. It gets locked cache entry!
610  */
611  
612 static int
613 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
614 {
615         int err;
616         struct mfc_cache *c;
617 
618         spin_lock_bh(&mfc_unres_lock);
619         for (c=mfc_unres_queue; c; c=c->next) {
620                 if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
621                     c->mfc_origin == skb->nh.iph->saddr)
622                         break;
623         }
624 
625         if (c == NULL) {
626                 /*
627                  *      Create a new entry if allowable
628                  */
629 
630                 if (atomic_read(&cache_resolve_queue_len)>=10 ||
631                     (c=ipmr_cache_alloc_unres())==NULL) {
632                         spin_unlock_bh(&mfc_unres_lock);
633 
634                         kfree_skb(skb);
635                         return -ENOBUFS;
636                 }
637 
638                 /*
639                  *      Fill in the new cache entry
640                  */
641                 c->mfc_parent=-1;
642                 c->mfc_origin=skb->nh.iph->saddr;
643                 c->mfc_mcastgrp=skb->nh.iph->daddr;
644 
645                 /*
646                  *      Reflect first query at mrouted.
647                  */
648                 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
649                         /* If the report failed throw the cache entry 
650                            out - Brad Parker
651                          */
652                         spin_unlock_bh(&mfc_unres_lock);
653 
654                         kmem_cache_free(mrt_cachep, c);
655                         kfree_skb(skb);
656                         return err;
657                 }
658 
659                 atomic_inc(&cache_resolve_queue_len);
660                 c->next = mfc_unres_queue;
661                 mfc_unres_queue = c;
662 
663                 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
664         }
665 
666         /*
667          *      See if we can append the packet
668          */
669         if (c->mfc_un.unres.unresolved.qlen>3) {
670                 kfree_skb(skb);
671                 err = -ENOBUFS;
672         } else {
673                 skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
674                 err = 0;
675         }
676 
677         spin_unlock_bh(&mfc_unres_lock);
678         return err;
679 }
680 
681 /*
682  *      MFC cache manipulation by user space mroute daemon
683  */
684 
685 int ipmr_mfc_delete(struct mfcctl *mfc)
686 {
687         int line;
688         struct mfc_cache *c, **cp;
689 
690         line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
691 
692         for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
693                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
694                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
695                         write_lock_bh(&mrt_lock);
696                         *cp = c->next;
697                         write_unlock_bh(&mrt_lock);
698 
699                         kmem_cache_free(mrt_cachep, c);
700                         return 0;
701                 }
702         }
703         return -ENOENT;
704 }
705 
706 int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
707 {
708         int line;
709         struct mfc_cache *uc, *c, **cp;
710 
711         line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
712 
713         for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
714                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
715                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
716                         break;
717         }
718 
719         if (c != NULL) {
720                 write_lock_bh(&mrt_lock);
721                 c->mfc_parent = mfc->mfcc_parent;
722                 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
723                 if (!mrtsock)
724                         c->mfc_flags |= MFC_STATIC;
725                 write_unlock_bh(&mrt_lock);
726                 return 0;
727         }
728 
729         if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
730                 return -EINVAL;
731 
732         c=ipmr_cache_alloc();
733         if (c==NULL)
734                 return -ENOMEM;
735 
736         c->mfc_origin=mfc->mfcc_origin.s_addr;
737         c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
738         c->mfc_parent=mfc->mfcc_parent;
739         ipmr_update_threshoulds(c, mfc->mfcc_ttls);
740         if (!mrtsock)
741                 c->mfc_flags |= MFC_STATIC;
742 
743         write_lock_bh(&mrt_lock);
744         c->next = mfc_cache_array[line];
745         mfc_cache_array[line] = c;
746         write_unlock_bh(&mrt_lock);
747 
748         /*
749          *      Check to see if we resolved a queued list. If so we
750          *      need to send on the frames and tidy up.
751          */
752         spin_lock_bh(&mfc_unres_lock);
753         for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
754              cp = &uc->next) {
755                 if (uc->mfc_origin == c->mfc_origin &&
756                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
757                         *cp = uc->next;
758                         if (atomic_dec_and_test(&cache_resolve_queue_len))
759                                 del_timer(&ipmr_expire_timer);
760                         break;
761                 }
762         }
763         spin_unlock_bh(&mfc_unres_lock);
764 
765         if (uc) {
766                 ipmr_cache_resolve(uc, c);
767                 kmem_cache_free(mrt_cachep, uc);
768         }
769         return 0;
770 }
771 
772 /*
773  *      Close the multicast socket, and clear the vif tables etc
774  */
775  
776 static void mroute_clean_tables(struct sock *sk)
777 {
778         int i;
779                 
780         /*
781          *      Shut down all active vif entries
782          */
783         for(i=0; i<maxvif; i++) {
784                 if (!(vif_table[i].flags&VIFF_STATIC))
785                         vif_delete(i);
786         }
787 
788         /*
789          *      Wipe the cache
790          */
791         for (i=0;i<MFC_LINES;i++) {
792                 struct mfc_cache *c, **cp;
793 
794                 cp = &mfc_cache_array[i];
795                 while ((c = *cp) != NULL) {
796                         if (c->mfc_flags&MFC_STATIC) {
797                                 cp = &c->next;
798                                 continue;
799                         }
800                         write_lock_bh(&mrt_lock);
801                         *cp = c->next;
802                         write_unlock_bh(&mrt_lock);
803 
804                         kmem_cache_free(mrt_cachep, c);
805                 }
806         }
807 
808         if (atomic_read(&cache_resolve_queue_len) != 0) {
809                 struct mfc_cache *c;
810 
811                 spin_lock_bh(&mfc_unres_lock);
812                 while (mfc_unres_queue != NULL) {
813                         c = mfc_unres_queue;
814                         mfc_unres_queue = c->next;
815                         spin_unlock_bh(&mfc_unres_lock);
816 
817                         ipmr_destroy_unres(c);
818 
819                         spin_lock_bh(&mfc_unres_lock);
820                 }
821                 spin_unlock_bh(&mfc_unres_lock);
822         }
823 }
824 
825 static void mrtsock_destruct(struct sock *sk)
826 {
827         rtnl_lock();
828         if (sk == mroute_socket) {
829                 ipv4_devconf.mc_forwarding--;
830 
831                 write_lock_bh(&mrt_lock);
832                 mroute_socket=NULL;
833                 write_unlock_bh(&mrt_lock);
834 
835                 mroute_clean_tables(sk);
836         }
837         rtnl_unlock();
838 }
839 
840 /*
841  *      Socket options and virtual interface manipulation. The whole
842  *      virtual interface system is a complete heap, but unfortunately
843  *      that's how BSD mrouted happens to think. Maybe one day with a proper
844  *      MOSPF/PIM router set up we can clean this up.
845  */
846  
847 int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
848 {
849         int ret;
850         struct vifctl vif;
851         struct mfcctl mfc;
852         
853         if(optname!=MRT_INIT)
854         {
855                 if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
856                         return -EACCES;
857         }
858 
859         switch(optname)
860         {
861                 case MRT_INIT:
862                         if(sk->type!=SOCK_RAW || sk->num!=IPPROTO_IGMP)
863                                 return -EOPNOTSUPP;
864                         if(optlen!=sizeof(int))
865                                 return -ENOPROTOOPT;
866 
867                         rtnl_lock();
868                         if (mroute_socket) {
869                                 rtnl_unlock();
870                                 return -EADDRINUSE;
871                         }
872 
873                         ret = ip_ra_control(sk, 1, mrtsock_destruct);
874                         if (ret == 0) {
875                                 write_lock_bh(&mrt_lock);
876                                 mroute_socket=sk;
877                                 write_unlock_bh(&mrt_lock);
878 
879                                 ipv4_devconf.mc_forwarding++;
880                         }
881                         rtnl_unlock();
882                         return ret;
883                 case MRT_DONE:
884                         if (sk!=mroute_socket)
885                                 return -EACCES;
886                         return ip_ra_control(sk, 0, NULL);
887                 case MRT_ADD_VIF:
888                 case MRT_DEL_VIF:
889                         if(optlen!=sizeof(vif))
890                                 return -EINVAL;
891                         if (copy_from_user(&vif,optval,sizeof(vif)))
892                                 return -EFAULT; 
893                         if(vif.vifc_vifi >= MAXVIFS)
894                                 return -ENFILE;
895                         rtnl_lock();
896                         if (optname==MRT_ADD_VIF) {
897                                 ret = vif_add(&vif, sk==mroute_socket);
898                         } else {
899                                 ret = vif_delete(vif.vifc_vifi);
900                         }
901                         rtnl_unlock();
902                         return ret;
903 
904                 /*
905                  *      Manipulate the forwarding caches. These live
906                  *      in a sort of kernel/user symbiosis.
907                  */
908                 case MRT_ADD_MFC:
909                 case MRT_DEL_MFC:
910                         if(optlen!=sizeof(mfc))
911                                 return -EINVAL;
912                         if (copy_from_user(&mfc,optval, sizeof(mfc)))
913                                 return -EFAULT;
914                         rtnl_lock();
915                         if (optname==MRT_DEL_MFC)
916                                 ret = ipmr_mfc_delete(&mfc);
917                         else
918                                 ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
919                         rtnl_unlock();
920                         return ret;
921                 /*
922                  *      Control PIM assert.
923                  */
924                 case MRT_ASSERT:
925                 {
926                         int v;
927                         if(get_user(v,(int *)optval))
928                                 return -EFAULT;
929                         mroute_do_assert=(v)?1:0;
930                         return 0;
931                 }
932 #ifdef CONFIG_IP_PIMSM
933                 case MRT_PIM:
934                 {
935                         int v;
936                         if(get_user(v,(int *)optval))
937                                 return -EFAULT;
938                         v = (v)?1:0;
939                         rtnl_lock();
940                         if (v != mroute_do_pim) {
941                                 mroute_do_pim = v;
942                                 mroute_do_assert = v;
943 #ifdef CONFIG_IP_PIMSM_V2
944                                 if (mroute_do_pim)
945                                         inet_add_protocol(&pim_protocol);
946                                 else
947                                         inet_del_protocol(&pim_protocol);
948 #endif
949                         }
950                         rtnl_unlock();
951                         return 0;
952                 }
953 #endif
954                 /*
955                  *      Spurious command, or MRT_VERSION which you cannot
956                  *      set.
957                  */
958                 default:
959                         return -ENOPROTOOPT;
960         }
961 }
962 
963 /*
964  *      Getsock opt support for the multicast routing system.
965  */
966  
967 int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
968 {
969         int olr;
970         int val;
971 
972         if(optname!=MRT_VERSION && 
973 #ifdef CONFIG_IP_PIMSM
974            optname!=MRT_PIM &&
975 #endif
976            optname!=MRT_ASSERT)
977                 return -ENOPROTOOPT;
978 
979         if(get_user(olr, optlen))
980                 return -EFAULT;
981 
982         olr=min(olr,sizeof(int));
983         if(put_user(olr,optlen))
984                 return -EFAULT;
985         if(optname==MRT_VERSION)
986                 val=0x0305;
987 #ifdef CONFIG_IP_PIMSM
988         else if(optname==MRT_PIM)
989                 val=mroute_do_pim;
990 #endif
991         else
992                 val=mroute_do_assert;
993         if(copy_to_user(optval,&val,olr))
994                 return -EFAULT;
995         return 0;
996 }
997 
998 /*
999  *      The IP multicast ioctl support routines.
1000  */
1001  
1002 int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
1003 {
1004         struct sioc_sg_req sr;
1005         struct sioc_vif_req vr;
1006         struct vif_device *vif;
1007         struct mfc_cache *c;
1008         
1009         switch(cmd)
1010         {
1011                 case SIOCGETVIFCNT:
1012                         if (copy_from_user(&vr,(void *)arg,sizeof(vr)))
1013                                 return -EFAULT; 
1014                         if(vr.vifi>=maxvif)
1015                                 return -EINVAL;
1016                         read_lock(&mrt_lock);
1017                         vif=&vif_table[vr.vifi];
1018                         if(VIF_EXISTS(vr.vifi)) {
1019                                 vr.icount=vif->pkt_in;
1020                                 vr.ocount=vif->pkt_out;
1021                                 vr.ibytes=vif->bytes_in;
1022                                 vr.obytes=vif->bytes_out;
1023                                 read_unlock(&mrt_lock);
1024 
1025                                 if (copy_to_user((void *)arg,&vr,sizeof(vr)))
1026                                         return -EFAULT;
1027                                 return 0;
1028                         }
1029                         read_unlock(&mrt_lock);
1030                         return -EADDRNOTAVAIL;
1031                 case SIOCGETSGCNT:
1032                         if (copy_from_user(&sr,(void *)arg,sizeof(sr)))
1033                                 return -EFAULT;
1034 
1035                         read_lock(&mrt_lock);
1036                         c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1037                         if (c) {
1038                                 sr.pktcnt = c->mfc_un.res.pkt;
1039                                 sr.bytecnt = c->mfc_un.res.bytes;
1040                                 sr.wrong_if = c->mfc_un.res.wrong_if;
1041                                 read_unlock(&mrt_lock);
1042 
1043                                 if (copy_to_user((void *)arg,&sr,sizeof(sr)))
1044                                         return -EFAULT;
1045                                 return 0;
1046                         }
1047                         read_unlock(&mrt_lock);
1048                         return -EADDRNOTAVAIL;
1049                 default:
1050                         return -ENOIOCTLCMD;
1051         }
1052 }
1053 
1054 
1055 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1056 {
1057         struct vif_device *v;
1058         int ct;
1059         if (event != NETDEV_UNREGISTER)
1060                 return NOTIFY_DONE;
1061         v=&vif_table[0];
1062         for(ct=0;ct<maxvif;ct++,v++) {
1063                 if (v->dev==ptr)
1064                         vif_delete(ct);
1065         }
1066         return NOTIFY_DONE;
1067 }
1068 
1069 
1070 static struct notifier_block ip_mr_notifier={
1071         ipmr_device_event,
1072         NULL,
1073         0
1074 };
1075 
1076 /*
1077  *      Encapsulate a packet by attaching a valid IPIP header to it.
1078  *      This avoids tunnel drivers and other mess and gives us the speed so
1079  *      important for multicast video.
1080  */
1081  
1082 static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1083 {
1084         struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1085 
1086         iph->version    =       4;
1087         iph->tos        =       skb->nh.iph->tos;
1088         iph->ttl        =       skb->nh.iph->ttl;
1089         iph->frag_off   =       0;
1090         iph->daddr      =       daddr;
1091         iph->saddr      =       saddr;
1092         iph->protocol   =       IPPROTO_IPIP;
1093         iph->ihl        =       5;
1094         iph->tot_len    =       htons(skb->len);
1095         ip_select_ident(iph, skb->dst);
1096         ip_send_check(iph);
1097 
1098         skb->h.ipiph = skb->nh.iph;
1099         skb->nh.iph = iph;
1100 #ifdef CONFIG_NETFILTER
1101         nf_conntrack_put(skb->nfct);
1102         skb->nfct = NULL;
1103 #endif
1104 }
1105 
1106 static inline int ipmr_forward_finish(struct sk_buff *skb)
1107 {
1108         struct dst_entry *dst = skb->dst;
1109 
1110         if (skb->len <= dst->pmtu)
1111                 return dst->output(skb);
1112         else
1113                 return ip_fragment(skb, dst->output);
1114 }
1115 
1116 /*
1117  *      Processing handlers for ipmr_forward
1118  */
1119 
1120 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
1121                            int vifi, int last)
1122 {
1123         struct iphdr *iph = skb->nh.iph;
1124         struct vif_device *vif = &vif_table[vifi];
1125         struct net_device *dev;
1126         struct rtable *rt;
1127         int    encap = 0;
1128         struct sk_buff *skb2;
1129 
1130         if (vif->dev == NULL)
1131                 return;
1132 
1133 #ifdef CONFIG_IP_PIMSM
1134         if (vif->flags & VIFF_REGISTER) {
1135                 vif->pkt_out++;
1136                 vif->bytes_out+=skb->len;
1137                 ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
1138                 ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
1139                 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1140                 return;
1141         }
1142 #endif
1143 
1144         if (vif->flags&VIFF_TUNNEL) {
1145                 if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link))
1146                         return;
1147                 encap = sizeof(struct iphdr);
1148         } else {
1149                 if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link))
1150                         return;
1151         }
1152 
1153         dev = rt->u.dst.dev;
1154 
1155         if (skb->len+encap > rt->u.dst.pmtu && (ntohs(iph->frag_off) & IP_DF)) {
1156                 /* Do not fragment multicasts. Alas, IPv4 does not
1157                    allow to send ICMP, so that packets will disappear
1158                    to blackhole.
1159                  */
1160 
1161                 IP_INC_STATS_BH(IpFragFails);
1162                 ip_rt_put(rt);
1163                 return;
1164         }
1165 
1166         encap += dev->hard_header_len;
1167 
1168         if (skb_headroom(skb) < encap || skb_cloned(skb) || !last)
1169                 skb2 = skb_realloc_headroom(skb, (encap + 15)&~15);
1170         else if (atomic_read(&skb->users) != 1)
1171                 skb2 = skb_clone(skb, GFP_ATOMIC);
1172         else {
1173                 atomic_inc(&skb->users);
1174                 skb2 = skb;
1175         }
1176 
1177         if (skb2 == NULL) {
1178                 ip_rt_put(rt);
1179                 return;
1180         }
1181 
1182         vif->pkt_out++;
1183         vif->bytes_out+=skb->len;
1184 
1185         dst_release(skb2->dst);
1186         skb2->dst = &rt->u.dst;
1187         iph = skb2->nh.iph;
1188         ip_decrease_ttl(iph);
1189 
1190         /* FIXME: forward and output firewalls used to be called here.
1191          * What do we do with netfilter? -- RR */
1192         if (vif->flags & VIFF_TUNNEL) {
1193                 ip_encap(skb2, vif->local, vif->remote);
1194                 /* FIXME: extra output firewall step used to be here. --RR */
1195                 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
1196                 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len;
1197         }
1198 
1199         IPCB(skb2)->flags |= IPSKB_FORWARDED;
1200 
1201         /*
1202          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1203          * not only before forwarding, but after forwarding on all output
1204          * interfaces. It is clear, if mrouter runs a multicasting
1205          * program, it should receive packets not depending to what interface
1206          * program is joined.
1207          * If we will not make it, the program will have to join on all
1208          * interfaces. On the other hand, multihoming host (or router, but
1209          * not mrouter) cannot join to more than one interface - it will
1210          * result in receiving multiple packets.
1211          */
1212         NF_HOOK(PF_INET, NF_IP_FORWARD, skb2, skb->dev, dev, 
1213                 ipmr_forward_finish);
1214 }
1215 
1216 int ipmr_find_vif(struct net_device *dev)
1217 {
1218         int ct;
1219         for (ct=maxvif-1; ct>=0; ct--) {
1220                 if (vif_table[ct].dev == dev)
1221                         break;
1222         }
1223         return ct;
1224 }
1225 
1226 /* "local" means that we should preserve one skb (for local delivery) */
1227 
1228 int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1229 {
1230         int psend = -1;
1231         int vif, ct;
1232 
1233         vif = cache->mfc_parent;
1234         cache->mfc_un.res.pkt++;
1235         cache->mfc_un.res.bytes += skb->len;
1236 
1237         /*
1238          * Wrong interface: drop packet and (maybe) send PIM assert.
1239          */
1240         if (vif_table[vif].dev != skb->dev) {
1241                 int true_vifi;
1242 
1243                 if (((struct rtable*)skb->dst)->key.iif == 0) {
1244                         /* It is our own packet, looped back.
1245                            Very complicated situation...
1246 
1247                            The best workaround until routing daemons will be
1248                            fixed is not to redistribute packet, if it was
1249                            send through wrong interface. It means, that
1250                            multicast applications WILL NOT work for
1251                            (S,G), which have default multicast route pointing
1252                            to wrong oif. In any case, it is not a good
1253                            idea to use multicasting applications on router.
1254                          */
1255                         goto dont_forward;
1256                 }
1257 
1258                 cache->mfc_un.res.wrong_if++;
1259                 true_vifi = ipmr_find_vif(skb->dev);
1260 
1261                 if (true_vifi >= 0 && mroute_do_assert &&
1262                     /* pimsm uses asserts, when switching from RPT to SPT,
1263                        so that we cannot check that packet arrived on an oif.
1264                        It is bad, but otherwise we would need to move pretty
1265                        large chunk of pimd to kernel. Ough... --ANK
1266                      */
1267                     (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1268                     jiffies - cache->mfc_un.res.last_assert > MFC_ASSERT_THRESH) {
1269                         cache->mfc_un.res.last_assert = jiffies;
1270                         ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1271                 }
1272                 goto dont_forward;
1273         }
1274 
1275         vif_table[vif].pkt_in++;
1276         vif_table[vif].bytes_in+=skb->len;
1277 
1278         /*
1279          *      Forward the frame
1280          */
1281         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1282                 if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1283                         if (psend != -1)
1284                                 ipmr_queue_xmit(skb, cache, psend, 0);
1285                         psend=ct;
1286                 }
1287         }
1288         if (psend != -1)
1289                 ipmr_queue_xmit(skb, cache, psend, !local);
1290 
1291 dont_forward:
1292         if (!local)
1293                 kfree_skb(skb);
1294         return 0;
1295 }
1296 
1297 
1298 /*
1299  *      Multicast packets for forwarding arrive here
1300  */
1301 
1302 int ip_mr_input(struct sk_buff *skb)
1303 {
1304         struct mfc_cache *cache;
1305         int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1306 
1307         /* Packet is looped back after forward, it should not be
1308            forwarded second time, but still can be delivered locally.
1309          */
1310         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1311                 goto dont_forward;
1312 
1313         if (!local) {
1314                     if (IPCB(skb)->opt.router_alert) {
1315                             if (ip_call_ra_chain(skb))
1316                                     return 0;
1317                     } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1318                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1319                                Cisco IOS <= 11.2(8)) do not put router alert
1320                                option to IGMP packets destined to routable
1321                                groups. It is very bad, because it means
1322                                that we can forward NO IGMP messages.
1323                              */
1324                             read_lock(&mrt_lock);
1325                             if (mroute_socket) {
1326                                     raw_rcv(mroute_socket, skb);
1327                                     read_unlock(&mrt_lock);
1328                                     return 0;
1329                             }
1330                             read_unlock(&mrt_lock);
1331                     }
1332         }
1333 
1334         read_lock(&mrt_lock);
1335         cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1336 
1337         /*
1338          *      No usable cache entry
1339          */
1340         if (cache==NULL) {
1341                 int vif;
1342 
1343                 if (local) {
1344                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1345                         ip_local_deliver(skb);
1346                         if (skb2 == NULL) {
1347                                 read_unlock(&mrt_lock);
1348                                 return -ENOBUFS;
1349                         }
1350                         skb = skb2;
1351                 }
1352 
1353                 vif = ipmr_find_vif(skb->dev);
1354                 if (vif >= 0) {
1355                         int err = ipmr_cache_unresolved(vif, skb);
1356                         read_unlock(&mrt_lock);
1357 
1358                         return err;
1359                 }
1360                 read_unlock(&mrt_lock);
1361                 kfree_skb(skb);
1362                 return -ENODEV;
1363         }
1364 
1365         ip_mr_forward(skb, cache, local);
1366 
1367         read_unlock(&mrt_lock);
1368 
1369         if (local)
1370                 return ip_local_deliver(skb);
1371 
1372         return 0;
1373 
1374 dont_forward:
1375         if (local)
1376                 return ip_local_deliver(skb);
1377         kfree_skb(skb);
1378         return 0;
1379 }
1380 
1381 #ifdef CONFIG_IP_PIMSM_V1
1382 /*
1383  * Handle IGMP messages of PIMv1
1384  */
1385 
1386 int pim_rcv_v1(struct sk_buff * skb, unsigned short len)
1387 {
1388         struct igmphdr *pim = (struct igmphdr*)skb->h.raw;
1389         struct iphdr   *encap;
1390         struct net_device  *reg_dev = NULL;
1391 
1392         if (!mroute_do_pim ||
1393             len < sizeof(*pim) + sizeof(*encap) ||
1394             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) {
1395                 kfree_skb(skb);
1396                 return -EINVAL;
1397         }
1398 
1399         encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1400         /*
1401            Check that:
1402            a. packet is really destinted to a multicast group
1403            b. packet is not a NULL-REGISTER
1404            c. packet is not truncated
1405          */
1406         if (!MULTICAST(encap->daddr) ||
1407             ntohs(encap->tot_len) == 0 ||
1408             ntohs(encap->tot_len) + sizeof(*pim) > len) {
1409                 kfree_skb(skb);
1410                 return -EINVAL;
1411         }
1412 
1413         read_lock(&mrt_lock);
1414         if (reg_vif_num >= 0)
1415                 reg_dev = vif_table[reg_vif_num].dev;
1416         if (reg_dev)
1417                 dev_hold(reg_dev);
1418         read_unlock(&mrt_lock);
1419 
1420         if (reg_dev == NULL) {
1421                 kfree_skb(skb);
1422                 return -EINVAL;
1423         }
1424 
1425         skb->mac.raw = skb->nh.raw;
1426         skb_pull(skb, (u8*)encap - skb->data);
1427         skb->nh.iph = (struct iphdr *)skb->data;
1428         skb->dev = reg_dev;
1429         memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1430         skb->protocol = __constant_htons(ETH_P_IP);
1431         skb->ip_summed = 0;
1432         skb->pkt_type = PACKET_HOST;
1433         dst_release(skb->dst);
1434         skb->dst = NULL;
1435         ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1436         ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1437 #ifdef CONFIG_NETFILTER
1438         nf_conntrack_put(skb->nfct);
1439         skb->nfct = NULL;
1440 #endif
1441         netif_rx(skb);
1442         dev_put(reg_dev);
1443         return 0;
1444 }
1445 #endif
1446 
1447 #ifdef CONFIG_IP_PIMSM_V2
1448 int pim_rcv(struct sk_buff * skb, unsigned short len)
1449 {
1450         struct pimreghdr *pim = (struct pimreghdr*)skb->h.raw;
1451         struct iphdr   *encap;
1452         struct net_device  *reg_dev = NULL;
1453 
1454         if (len < sizeof(*pim) + sizeof(*encap) ||
1455             pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1456             (pim->flags&PIM_NULL_REGISTER) ||
1457             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1458              ip_compute_csum((void *)pim, len))) {
1459                 kfree_skb(skb);
1460                 return -EINVAL;
1461         }
1462 
1463         /* check if the inner packet is destined to mcast group */
1464         encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1465         if (!MULTICAST(encap->daddr) ||
1466             ntohs(encap->tot_len) == 0 ||
1467             ntohs(encap->tot_len) + sizeof(*pim) > len) {
1468                 kfree_skb(skb);
1469                 return -EINVAL;
1470         }
1471 
1472         read_lock(&mrt_lock);
1473         if (reg_vif_num >= 0)
1474                 reg_dev = vif_table[reg_vif_num].dev;
1475         if (reg_dev)
1476                 dev_hold(reg_dev);
1477         read_unlock(&mrt_lock);
1478 
1479         if (reg_dev == NULL) {
1480                 kfree_skb(skb);
1481                 return -EINVAL;
1482         }
1483 
1484         skb->mac.raw = skb->nh.raw;
1485         skb_pull(skb, (u8*)encap - skb->data);
1486         skb->nh.iph = (struct iphdr *)skb->data;
1487         skb->dev = reg_dev;
1488         memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1489         skb->protocol = __constant_htons(ETH_P_IP);
1490         skb->ip_summed = 0;
1491         skb->pkt_type = PACKET_HOST;
1492         dst_release(skb->dst);
1493         ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1494         ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1495         skb->dst = NULL;
1496 #ifdef CONFIG_NETFILTER
1497         nf_conntrack_put(skb->nfct);
1498         skb->nfct = NULL;
1499 #endif
1500         netif_rx(skb);
1501         dev_put(reg_dev);
1502         return 0;
1503 }
1504 #endif
1505 
1506 #ifdef CONFIG_RTNETLINK
1507 
1508 static int
1509 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1510 {
1511         int ct;
1512         struct rtnexthop *nhp;
1513         struct net_device *dev = vif_table[c->mfc_parent].dev;
1514         u8 *b = skb->tail;
1515         struct rtattr *mp_head;
1516 
1517         if (dev)
1518                 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1519 
1520         mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1521 
1522         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1523                 if (c->mfc_un.res.ttls[ct] < 255) {
1524                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1525                                 goto rtattr_failure;
1526                         nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1527                         nhp->rtnh_flags = 0;
1528                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1529                         nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1530                         nhp->rtnh_len = sizeof(*nhp);
1531                 }
1532         }
1533         mp_head->rta_type = RTA_MULTIPATH;
1534         mp_head->rta_len = skb->tail - (u8*)mp_head;
1535         rtm->rtm_type = RTN_MULTICAST;
1536         return 1;
1537 
1538 rtattr_failure:
1539         skb_trim(skb, b - skb->data);
1540         return -EMSGSIZE;
1541 }
1542 
1543 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1544 {
1545         int err;
1546         struct mfc_cache *cache;
1547         struct rtable *rt = (struct rtable*)skb->dst;
1548 
1549         read_lock(&mrt_lock);
1550         cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1551 
1552         if (cache==NULL) {
1553                 struct net_device *dev;
1554                 int vif;
1555 
1556                 if (nowait) {
1557                         read_unlock(&mrt_lock);
1558                         return -EAGAIN;
1559                 }
1560 
1561                 dev = skb->dev;
1562                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1563                         read_unlock(&mrt_lock);
1564                         return -ENODEV;
1565                 }
1566                 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1567                 skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1568                 skb->nh.iph->saddr = rt->rt_src;
1569                 skb->nh.iph->daddr = rt->rt_dst;
1570                 skb->nh.iph->version = 0;
1571                 err = ipmr_cache_unresolved(vif, skb);
1572                 read_unlock(&mrt_lock);
1573                 return err;
1574         }
1575 
1576         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1577                 cache->mfc_flags |= MFC_NOTIFY;
1578         err = ipmr_fill_mroute(skb, cache, rtm);
1579         read_unlock(&mrt_lock);
1580         return err;
1581 }
1582 #endif
1583 
1584 #ifdef CONFIG_PROC_FS   
1585 /*
1586  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1587  */
1588  
1589 static int ipmr_vif_info(char *buffer, char **start, off_t offset, int length)
1590 {
1591         struct vif_device *vif;
1592         int len=0;
1593         off_t pos=0;
1594         off_t begin=0;
1595         int size;
1596         int ct;
1597 
1598         len += sprintf(buffer,
1599                  "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1600         pos=len;
1601   
1602         read_lock(&mrt_lock);
1603         for (ct=0;ct<maxvif;ct++) 
1604         {
1605                 char *name = "none";
1606                 vif=&vif_table[ct];
1607                 if(!VIF_EXISTS(ct))
1608                         continue;
1609                 if (vif->dev)
1610                         name = vif->dev->name;
1611                 size = sprintf(buffer+len, "%2d %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1612                         ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out,
1613                         vif->flags, vif->local, vif->remote);
1614                 len+=size;
1615                 pos+=size;
1616                 if(pos<offset)
1617                 {
1618                         len=0;
1619                         begin=pos;
1620                 }
1621                 if(pos>offset+length)
1622                         break;
1623         }
1624         read_unlock(&mrt_lock);
1625         
1626         *start=buffer+(offset-begin);
1627         len-=(offset-begin);
1628         if(len>length)
1629                 len=length;
1630         if (len<0)
1631                 len = 0;
1632         return len;
1633 }
1634 
1635 static int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length)
1636 {
1637         struct mfc_cache *mfc;
1638         int len=0;
1639         off_t pos=0;
1640         off_t begin=0;
1641         int size;
1642         int ct;
1643 
1644         len += sprintf(buffer,
1645                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1646         pos=len;
1647 
1648         read_lock(&mrt_lock);
1649         for (ct=0;ct<MFC_LINES;ct++) 
1650         {
1651                 for(mfc=mfc_cache_array[ct]; mfc; mfc=mfc->next)
1652                 {
1653                         int n;
1654 
1655                         /*
1656                          *      Interface forwarding map
1657                          */
1658                         size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld",
1659                                 (unsigned long)mfc->mfc_mcastgrp,
1660                                 (unsigned long)mfc->mfc_origin,
1661                                 mfc->mfc_parent,
1662                                 mfc->mfc_un.res.pkt,
1663                                 mfc->mfc_un.res.bytes,
1664                                 mfc->mfc_un.res.wrong_if);
1665                         for(n=mfc->mfc_un.res.minvif;n<mfc->mfc_un.res.maxvif;n++)
1666                         {
1667                                 if(VIF_EXISTS(n) && mfc->mfc_un.res.ttls[n] < 255)
1668                                         size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]);
1669                         }
1670                         size += sprintf(buffer+len+size, "\n");
1671                         len+=size;
1672                         pos+=size;
1673                         if(pos<offset)
1674                         {
1675                                 len=0;
1676                                 begin=pos;
1677                         }
1678                         if(pos>offset+length)
1679                                 goto done;
1680                 }
1681         }
1682 
1683         spin_lock_bh(&mfc_unres_lock);
1684         for(mfc=mfc_unres_queue; mfc; mfc=mfc->next) {
1685                 size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld\n",
1686                                (unsigned long)mfc->mfc_mcastgrp,
1687                                (unsigned long)mfc->mfc_origin,
1688                                -1,
1689                                 (long)mfc->mfc_un.unres.unresolved.qlen,
1690                                 0L, 0L);
1691                 len+=size;
1692                 pos+=size;
1693                 if(pos<offset)
1694                 {
1695                         len=0;
1696                         begin=pos;
1697                 }
1698                 if(pos>offset+length)
1699                         break;
1700         }
1701         spin_unlock_bh(&mfc_unres_lock);
1702 
1703 done:
1704         read_unlock(&mrt_lock);
1705         *start=buffer+(offset-begin);
1706         len-=(offset-begin);
1707         if(len>length)
1708                 len=length;
1709         if (len < 0) {
1710                 len = 0;
1711         }
1712         return len;
1713 }
1714 
1715 #endif  
1716 
1717 #ifdef CONFIG_IP_PIMSM_V2
1718 struct inet_protocol pim_protocol = 
1719 {
1720         pim_rcv,                /* PIM handler          */
1721         NULL,                   /* PIM error control    */
1722         NULL,                   /* next                 */
1723         IPPROTO_PIM,            /* protocol ID          */
1724         0,                      /* copy                 */
1725         NULL,                   /* data                 */
1726         "PIM"                   /* name                 */
1727 };
1728 #endif
1729 
1730 
1731 /*
1732  *      Setup for IP multicast routing
1733  */
1734  
1735 void __init ip_mr_init(void)
1736 {
1737         printk(KERN_INFO "Linux IP multicast router 0.06 plus PIM-SM\n");
1738         mrt_cachep = kmem_cache_create("ip_mrt_cache",
1739                                        sizeof(struct mfc_cache),
1740                                        0, SLAB_HWCACHE_ALIGN,
1741                                        NULL, NULL);
1742         init_timer(&ipmr_expire_timer);
1743         ipmr_expire_timer.function=ipmr_expire_process;
1744         register_netdevice_notifier(&ip_mr_notifier);
1745 #ifdef CONFIG_PROC_FS   
1746         proc_net_create("ip_mr_vif",0,ipmr_vif_info);
1747         proc_net_create("ip_mr_cache",0,ipmr_mfc_info);
1748 #endif  
1749 }
1750 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.