1 /*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
4 * (c) 1995 Alan Cox, <alan@redhat.com>
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Version: $Id: ipmr.c,v 1.55 2000/11/28 13:13:27 davem Exp $
13 *
14 * Fixes:
15 * Michael Chastain : Incorrect size of copying.
16 * Alan Cox : Added the cache manager code
17 * Alan Cox : Fixed the clone/copy bug and device race.
18 * Mike McLagan : Routing by source
19 * Malcolm Beattie : Buffer handling fixes.
20 * Alexey Kuznetsov : Double buffer free and other fixes.
21 * SVR Anand : Fixed several multicast bugs and problems.
22 * Alexey Kuznetsov : Status, optimisations and more.
23 * Brad Parker : Better behaviour on mrouted upcall
24 * overflow.
25 * Carlos Picoto : PIMv1 Support
26 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
27 * Relax this requrement to work with older peers.
28 *
29 */
30
31 #include <linux/config.h>
32 #include <asm/system.h>
33 #include <asm/uaccess.h>
34 #include <linux/types.h>
35 #include <linux/sched.h>
36 #include <linux/errno.h>
37 #include <linux/timer.h>
38 #include <linux/mm.h>
39 #include <linux/kernel.h>
40 #include <linux/fcntl.h>
41 #include <linux/stat.h>
42 #include <linux/socket.h>
43 #include <linux/in.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/inetdevice.h>
47 #include <linux/igmp.h>
48 #include <linux/proc_fs.h>
49 #include <linux/mroute.h>
50 #include <linux/init.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/sock.h>
55 #include <net/icmp.h>
56 #include <net/udp.h>
57 #include <net/raw.h>
58 #include <linux/notifier.h>
59 #include <linux/if_arp.h>
60 #include <linux/netfilter_ipv4.h>
61 #include <net/ipip.h>
62 #include <net/checksum.h>
63
64 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
65 #define CONFIG_IP_PIMSM 1
66 #endif
67
68 static struct sock *mroute_socket;
69
70
71 /* Big lock, protecting vif table, mrt cache and mroute socket state.
72 Note that the changes are semaphored via rtnl_lock.
73 */
74
75 static rwlock_t mrt_lock = RW_LOCK_UNLOCKED;
76
77 /*
78 * Multicast router control variables
79 */
80
81 static struct vif_device vif_table[MAXVIFS]; /* Devices */
82 static int maxvif;
83
84 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
85
86 int mroute_do_assert = 0; /* Set in PIM assert */
87 int mroute_do_pim = 0;
88
89 static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
90
91 static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
92 atomic_t cache_resolve_queue_len; /* Size of unresolved */
93
94 /* Special spinlock for queue of unresolved entries */
95 static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED;
96
97 /* We return to original Alan's scheme. Hash table of resolved
98 entries is changed only in process context and protected
99 with weak lock mrt_lock. Queue of unresolved entries is protected
100 with strong spinlock mfc_unres_lock.
101
102 In this case data path is free of exclusive locks at all.
103 */
104
105 kmem_cache_t *mrt_cachep;
106
107 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
108 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
109 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
110
111 extern struct inet_protocol pim_protocol;
112
113 static struct timer_list ipmr_expire_timer;
114
115 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
116
117 static
118 struct net_device *ipmr_new_tunnel(struct vifctl *v)
119 {
120 struct net_device *dev;
121
122 dev = __dev_get_by_name("tunl0");
123
124 if (dev) {
125 int err;
126 struct ifreq ifr;
127 mm_segment_t oldfs;
128 struct ip_tunnel_parm p;
129 struct in_device *in_dev;
130
131 memset(&p, 0, sizeof(p));
132 p.iph.daddr = v->vifc_rmt_addr.s_addr;
133 p.iph.saddr = v->vifc_lcl_addr.s_addr;
134 p.iph.version = 4;
135 p.iph.ihl = 5;
136 p.iph.protocol = IPPROTO_IPIP;
137 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
138 ifr.ifr_ifru.ifru_data = (void*)&p;
139
140 oldfs = get_fs(); set_fs(KERNEL_DS);
141 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
142 set_fs(oldfs);
143
144 dev = NULL;
145
146 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
147 dev->flags |= IFF_MULTICAST;
148
149 in_dev = __in_dev_get(dev);
150 if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
151 goto failure;
152 in_dev->cnf.rp_filter = 0;
153
154 if (dev_open(dev))
155 goto failure;
156 }
157 }
158 return dev;
159
160 failure:
161 unregister_netdevice(dev);
162 return NULL;
163 }
164
165 #ifdef CONFIG_IP_PIMSM
166
167 static int reg_vif_num = -1;
168
169 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
170 {
171 read_lock(&mrt_lock);
172 ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
173 ((struct net_device_stats*)dev->priv)->tx_packets++;
174 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
175 read_unlock(&mrt_lock);
176 kfree_skb(skb);
177 return 0;
178 }
179
180 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
181 {
182 return (struct net_device_stats*)dev->priv;
183 }
184
185 static
186 struct net_device *ipmr_reg_vif(struct vifctl *v)
187 {
188 struct net_device *dev;
189 struct in_device *in_dev;
190 int size;
191
192 size = sizeof(*dev) + sizeof(struct net_device_stats);
193 dev = kmalloc(size, GFP_KERNEL);
194 if (!dev)
195 return NULL;
196
197 memset(dev, 0, size);
198
199 dev->priv = dev + 1;
200
201 strcpy(dev->name, "pimreg");
202
203 dev->type = ARPHRD_PIMREG;
204 dev->mtu = 1500 - sizeof(struct iphdr) - 8;
205 dev->flags = IFF_NOARP;
206 dev->hard_start_xmit = reg_vif_xmit;
207 dev->get_stats = reg_vif_get_stats;
208 dev->features |= NETIF_F_DYNALLOC;
209
210 if (register_netdevice(dev)) {
211 kfree(dev);
212 return NULL;
213 }
214 dev->iflink = 0;
215
216 if ((in_dev = inetdev_init(dev)) == NULL)
217 goto failure;
218
219 in_dev->cnf.rp_filter = 0;
220
221 if (dev_open(dev))
222 goto failure;
223
224 return dev;
225
226 failure:
227 unregister_netdevice(dev);
228 return NULL;
229 }
230 #endif
231
232 /*
233 * Delete a VIF entry
234 */
235
236 static int vif_delete(int vifi)
237 {
238 struct vif_device *v;
239 struct net_device *dev;
240 struct in_device *in_dev;
241
242 if (vifi < 0 || vifi >= maxvif)
243 return -EADDRNOTAVAIL;
244
245 v = &vif_table[vifi];
246
247 write_lock_bh(&mrt_lock);
248 dev = v->dev;
249 v->dev = NULL;
250
251 if (!dev) {
252 write_unlock_bh(&mrt_lock);
253 return -EADDRNOTAVAIL;
254 }
255
256 #ifdef CONFIG_IP_PIMSM
257 if (vifi == reg_vif_num)
258 reg_vif_num = -1;
259 #endif
260
261 if (vifi+1 == maxvif) {
262 int tmp;
263 for (tmp=vifi-1; tmp>=0; tmp--) {
264 if (VIF_EXISTS(tmp))
265 break;
266 }
267 maxvif = tmp+1;
268 }
269
270 write_unlock_bh(&mrt_lock);
271
272 dev_set_allmulti(dev, -1);
273
274 if ((in_dev = __in_dev_get(dev)) != NULL) {
275 in_dev->cnf.mc_forwarding--;
276 ip_rt_multicast_event(in_dev);
277 }
278
279 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
280 unregister_netdevice(dev);
281
282 dev_put(dev);
283 return 0;
284 }
285
286 /* Destroy an unresolved cache entry, killing queued skbs
287 and reporting error to netlink readers.
288 */
289
290 static void ipmr_destroy_unres(struct mfc_cache *c)
291 {
292 struct sk_buff *skb;
293
294 atomic_dec(&cache_resolve_queue_len);
295
296 while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
297 #ifdef CONFIG_RTNETLINK
298 if (skb->nh.iph->version == 0) {
299 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
300 nlh->nlmsg_type = NLMSG_ERROR;
301 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
302 skb_trim(skb, nlh->nlmsg_len);
303 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
304 netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
305 } else
306 #endif
307 kfree_skb(skb);
308 }
309
310 kmem_cache_free(mrt_cachep, c);
311 }
312
313
314 /* Single timer process for all the unresolved queue. */
315
316 void ipmr_expire_process(unsigned long dummy)
317 {
318 unsigned long now;
319 unsigned long expires;
320 struct mfc_cache *c, **cp;
321
322 if (!spin_trylock(&mfc_unres_lock)) {
323 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
324 return;
325 }
326
327 if (atomic_read(&cache_resolve_queue_len) == 0)
328 goto out;
329
330 now = jiffies;
331 expires = 10*HZ;
332 cp = &mfc_unres_queue;
333
334 while ((c=*cp) != NULL) {
335 long interval = c->mfc_un.unres.expires - now;
336
337 if (interval > 0) {
338 if (interval < expires)
339 expires = interval;
340 cp = &c->next;
341 continue;
342 }
343
344 *cp = c->next;
345
346 ipmr_destroy_unres(c);
347 }
348
349 if (atomic_read(&cache_resolve_queue_len))
350 mod_timer(&ipmr_expire_timer, jiffies + expires);
351
352 out:
353 spin_unlock(&mfc_unres_lock);
354 }
355
356 /* Fill oifs list. It is called under write locked mrt_lock. */
357
358 static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
359 {
360 int vifi;
361
362 cache->mfc_un.res.minvif = MAXVIFS;
363 cache->mfc_un.res.maxvif = 0;
364 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
365
366 for (vifi=0; vifi<maxvif; vifi++) {
367 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
368 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
369 if (cache->mfc_un.res.minvif > vifi)
370 cache->mfc_un.res.minvif = vifi;
371 if (cache->mfc_un.res.maxvif <= vifi)
372 cache->mfc_un.res.maxvif = vifi + 1;
373 }
374 }
375 }
376
377 static int vif_add(struct vifctl *vifc, int mrtsock)
378 {
379 int vifi = vifc->vifc_vifi;
380 struct vif_device *v = &vif_table[vifi];
381 struct net_device *dev;
382 struct in_device *in_dev;
383
384 /* Is vif busy ? */
385 if (VIF_EXISTS(vifi))
386 return -EADDRINUSE;
387
388 switch (vifc->vifc_flags) {
389 #ifdef CONFIG_IP_PIMSM
390 case VIFF_REGISTER:
391 /*
392 * Special Purpose VIF in PIM
393 * All the packets will be sent to the daemon
394 */
395 if (reg_vif_num >= 0)
396 return -EADDRINUSE;
397 dev = ipmr_reg_vif(vifc);
398 if (!dev)
399 return -ENOBUFS;
400 break;
401 #endif
402 case VIFF_TUNNEL:
403 dev = ipmr_new_tunnel(vifc);
404 if (!dev)
405 return -ENOBUFS;
406 break;
407 case 0:
408 dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
409 if (!dev)
410 return -EADDRNOTAVAIL;
411 __dev_put(dev);
412 break;
413 default:
414 return -EINVAL;
415 }
416
417 if ((in_dev = __in_dev_get(dev)) == NULL)
418 return -EADDRNOTAVAIL;
419 in_dev->cnf.mc_forwarding++;
420 dev_set_allmulti(dev, +1);
421 ip_rt_multicast_event(in_dev);
422
423 /*
424 * Fill in the VIF structures
425 */
426 v->rate_limit=vifc->vifc_rate_limit;
427 v->local=vifc->vifc_lcl_addr.s_addr;
428 v->remote=vifc->vifc_rmt_addr.s_addr;
429 v->flags=vifc->vifc_flags;
430 if (!mrtsock)
431 v->flags |= VIFF_STATIC;
432 v->threshold=vifc->vifc_threshold;
433 v->bytes_in = 0;
434 v->bytes_out = 0;
435 v->pkt_in = 0;
436 v->pkt_out = 0;
437 v->link = dev->ifindex;
438 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
439 v->link = dev->iflink;
440
441 /* And finish update writing critical data */
442 write_lock_bh(&mrt_lock);
443 dev_hold(dev);
444 v->dev=dev;
445 #ifdef CONFIG_IP_PIMSM
446 if (v->flags&VIFF_REGISTER)
447 reg_vif_num = vifi;
448 #endif
449 if (vifi+1 > maxvif)
450 maxvif = vifi+1;
451 write_unlock_bh(&mrt_lock);
452 return 0;
453 }
454
455 static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
456 {
457 int line=MFC_HASH(mcastgrp,origin);
458 struct mfc_cache *c;
459
460 for (c=mfc_cache_array[line]; c; c = c->next) {
461 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
462 break;
463 }
464 return c;
465 }
466
467 /*
468 * Allocate a multicast cache entry
469 */
470 static struct mfc_cache *ipmr_cache_alloc(void)
471 {
472 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
473 if(c==NULL)
474 return NULL;
475 memset(c, 0, sizeof(*c));
476 c->mfc_un.res.minvif = MAXVIFS;
477 return c;
478 }
479
480 static struct mfc_cache *ipmr_cache_alloc_unres(void)
481 {
482 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
483 if(c==NULL)
484 return NULL;
485 memset(c, 0, sizeof(*c));
486 skb_queue_head_init(&c->mfc_un.unres.unresolved);
487 c->mfc_un.unres.expires = jiffies + 10*HZ;
488 return c;
489 }
490
491 /*
492 * A cache entry has gone into a resolved state from queued
493 */
494
495 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
496 {
497 struct sk_buff *skb;
498
499 /*
500 * Play the pending entries through our router
501 */
502
503 while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
504 #ifdef CONFIG_RTNETLINK
505 if (skb->nh.iph->version == 0) {
506 int err;
507 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
508
509 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
510 nlh->nlmsg_len = skb->tail - (u8*)nlh;
511 } else {
512 nlh->nlmsg_type = NLMSG_ERROR;
513 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
514 skb_trim(skb, nlh->nlmsg_len);
515 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE;
516 }
517 err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
518 } else
519 #endif
520 ip_mr_forward(skb, c, 0);
521 }
522 }
523
524 /*
525 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
526 * expects the following bizarre scheme.
527 *
528 * Called under mrt_lock.
529 */
530
531 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
532 {
533 struct sk_buff *skb;
534 int ihl = pkt->nh.iph->ihl<<2;
535 struct igmphdr *igmp;
536 struct igmpmsg *msg;
537 int ret;
538
539 #ifdef CONFIG_IP_PIMSM
540 if (assert == IGMPMSG_WHOLEPKT)
541 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
542 else
543 #endif
544 skb = alloc_skb(128, GFP_ATOMIC);
545
546 if(!skb)
547 return -ENOBUFS;
548
549 #ifdef CONFIG_IP_PIMSM
550 if (assert == IGMPMSG_WHOLEPKT) {
551 /* Ugly, but we have no choice with this interface.
552 Duplicate old header, fix ihl, length etc.
553 And all this only to mangle msg->im_msgtype and
554 to set msg->im_mbz to "mbz" :-)
555 */
556 msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
557 skb->nh.raw = skb->h.raw = (u8*)msg;
558 memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
559 msg->im_msgtype = IGMPMSG_WHOLEPKT;
560 msg->im_mbz = 0;
561 msg->im_vif = reg_vif_num;
562 skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
563 skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
564 } else
565 #endif
566 {
567
568 /*
569 * Copy the IP header
570 */
571
572 skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
573 memcpy(skb->data,pkt->data,ihl);
574 skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */
575 msg = (struct igmpmsg*)skb->nh.iph;
576 msg->im_vif = vifi;
577 skb->dst = dst_clone(pkt->dst);
578
579 /*
580 * Add our header
581 */
582
583 igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
584 igmp->type =
585 msg->im_msgtype = assert;
586 igmp->code = 0;
587 skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */
588 skb->h.raw = skb->nh.raw;
589 }
590
591 if (mroute_socket == NULL) {
592 kfree_skb(skb);
593 return -EINVAL;
594 }
595
596 /*
597 * Deliver to mrouted
598 */
599 if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
600 if (net_ratelimit())
601 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
602 kfree_skb(skb);
603 }
604
605 return ret;
606 }
607
608 /*
609 * Queue a packet for resolution. It gets locked cache entry!
610 */
611
612 static int
613 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
614 {
615 int err;
616 struct mfc_cache *c;
617
618 spin_lock_bh(&mfc_unres_lock);
619 for (c=mfc_unres_queue; c; c=c->next) {
620 if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
621 c->mfc_origin == skb->nh.iph->saddr)
622 break;
623 }
624
625 if (c == NULL) {
626 /*
627 * Create a new entry if allowable
628 */
629
630 if (atomic_read(&cache_resolve_queue_len)>=10 ||
631 (c=ipmr_cache_alloc_unres())==NULL) {
632 spin_unlock_bh(&mfc_unres_lock);
633
634 kfree_skb(skb);
635 return -ENOBUFS;
636 }
637
638 /*
639 * Fill in the new cache entry
640 */
641 c->mfc_parent=-1;
642 c->mfc_origin=skb->nh.iph->saddr;
643 c->mfc_mcastgrp=skb->nh.iph->daddr;
644
645 /*
646 * Reflect first query at mrouted.
647 */
648 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
649 /* If the report failed throw the cache entry
650 out - Brad Parker
651 */
652 spin_unlock_bh(&mfc_unres_lock);
653
654 kmem_cache_free(mrt_cachep, c);
655 kfree_skb(skb);
656 return err;
657 }
658
659 atomic_inc(&cache_resolve_queue_len);
660 c->next = mfc_unres_queue;
661 mfc_unres_queue = c;
662
663 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
664 }
665
666 /*
667 * See if we can append the packet
668 */
669 if (c->mfc_un.unres.unresolved.qlen>3) {
670 kfree_skb(skb);
671 err = -ENOBUFS;
672 } else {
673 skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
674 err = 0;
675 }
676
677 spin_unlock_bh(&mfc_unres_lock);
678 return err;
679 }
680
681 /*
682 * MFC cache manipulation by user space mroute daemon
683 */
684
685 int ipmr_mfc_delete(struct mfcctl *mfc)
686 {
687 int line;
688 struct mfc_cache *c, **cp;
689
690 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
691
692 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
693 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
694 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
695 write_lock_bh(&mrt_lock);
696 *cp = c->next;
697 write_unlock_bh(&mrt_lock);
698
699 kmem_cache_free(mrt_cachep, c);
700 return 0;
701 }
702 }
703 return -ENOENT;
704 }
705
706 int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
707 {
708 int line;
709 struct mfc_cache *uc, *c, **cp;
710
711 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
712
713 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
714 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
715 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
716 break;
717 }
718
719 if (c != NULL) {
720 write_lock_bh(&mrt_lock);
721 c->mfc_parent = mfc->mfcc_parent;
722 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
723 if (!mrtsock)
724 c->mfc_flags |= MFC_STATIC;
725 write_unlock_bh(&mrt_lock);
726 return 0;
727 }
728
729 if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
730 return -EINVAL;
731
732 c=ipmr_cache_alloc();
733 if (c==NULL)
734 return -ENOMEM;
735
736 c->mfc_origin=mfc->mfcc_origin.s_addr;
737 c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
738 c->mfc_parent=mfc->mfcc_parent;
739 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
740 if (!mrtsock)
741 c->mfc_flags |= MFC_STATIC;
742
743 write_lock_bh(&mrt_lock);
744 c->next = mfc_cache_array[line];
745 mfc_cache_array[line] = c;
746 write_unlock_bh(&mrt_lock);
747
748 /*
749 * Check to see if we resolved a queued list. If so we
750 * need to send on the frames and tidy up.
751 */
752 spin_lock_bh(&mfc_unres_lock);
753 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
754 cp = &uc->next) {
755 if (uc->mfc_origin == c->mfc_origin &&
756 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
757 *cp = uc->next;
758 if (atomic_dec_and_test(&cache_resolve_queue_len))
759 del_timer(&ipmr_expire_timer);
760 break;
761 }
762 }
763 spin_unlock_bh(&mfc_unres_lock);
764
765 if (uc) {
766 ipmr_cache_resolve(uc, c);
767 kmem_cache_free(mrt_cachep, uc);
768 }
769 return 0;
770 }
771
772 /*
773 * Close the multicast socket, and clear the vif tables etc
774 */
775
776 static void mroute_clean_tables(struct sock *sk)
777 {
778 int i;
779
780 /*
781 * Shut down all active vif entries
782 */
783 for(i=0; i<maxvif; i++) {
784 if (!(vif_table[i].flags&VIFF_STATIC))
785 vif_delete(i);
786 }
787
788 /*
789 * Wipe the cache
790 */
791 for (i=0;i<MFC_LINES;i++) {
792 struct mfc_cache *c, **cp;
793
794 cp = &mfc_cache_array[i];
795 while ((c = *cp) != NULL) {
796 if (c->mfc_flags&MFC_STATIC) {
797 cp = &c->next;
798 continue;
799 }
800 write_lock_bh(&mrt_lock);
801 *cp = c->next;
802 write_unlock_bh(&mrt_lock);
803
804 kmem_cache_free(mrt_cachep, c);
805 }
806 }
807
808 if (atomic_read(&cache_resolve_queue_len) != 0) {
809 struct mfc_cache *c;
810
811 spin_lock_bh(&mfc_unres_lock);
812 while (mfc_unres_queue != NULL) {
813 c = mfc_unres_queue;
814 mfc_unres_queue = c->next;
815 spin_unlock_bh(&mfc_unres_lock);
816
817 ipmr_destroy_unres(c);
818
819 spin_lock_bh(&mfc_unres_lock);
820 }
821 spin_unlock_bh(&mfc_unres_lock);
822 }
823 }
824
825 static void mrtsock_destruct(struct sock *sk)
826 {
827 rtnl_lock();
828 if (sk == mroute_socket) {
829 ipv4_devconf.mc_forwarding--;
830
831 write_lock_bh(&mrt_lock);
832 mroute_socket=NULL;
833 write_unlock_bh(&mrt_lock);
834
835 mroute_clean_tables(sk);
836 }
837 rtnl_unlock();
838 }
839
840 /*
841 * Socket options and virtual interface manipulation. The whole
842 * virtual interface system is a complete heap, but unfortunately
843 * that's how BSD mrouted happens to think. Maybe one day with a proper
844 * MOSPF/PIM router set up we can clean this up.
845 */
846
847 int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
848 {
849 int ret;
850 struct vifctl vif;
851 struct mfcctl mfc;
852
853 if(optname!=MRT_INIT)
854 {
855 if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
856 return -EACCES;
857 }
858
859 switch(optname)
860 {
861 case MRT_INIT:
862 if(sk->type!=SOCK_RAW || sk->num!=IPPROTO_IGMP)
863 return -EOPNOTSUPP;
864 if(optlen!=sizeof(int))
865 return -ENOPROTOOPT;
866
867 rtnl_lock();
868 if (mroute_socket) {
869 rtnl_unlock();
870 return -EADDRINUSE;
871 }
872
873 ret = ip_ra_control(sk, 1, mrtsock_destruct);
874 if (ret == 0) {
875 write_lock_bh(&mrt_lock);
876 mroute_socket=sk;
877 write_unlock_bh(&mrt_lock);
878
879 ipv4_devconf.mc_forwarding++;
880 }
881 rtnl_unlock();
882 return ret;
883 case MRT_DONE:
884 if (sk!=mroute_socket)
885 return -EACCES;
886 return ip_ra_control(sk, 0, NULL);
887 case MRT_ADD_VIF:
888 case MRT_DEL_VIF:
889 if(optlen!=sizeof(vif))
890 return -EINVAL;
891 if (copy_from_user(&vif,optval,sizeof(vif)))
892 return -EFAULT;
893 if(vif.vifc_vifi >= MAXVIFS)
894 return -ENFILE;
895 rtnl_lock();
896 if (optname==MRT_ADD_VIF) {
897 ret = vif_add(&vif, sk==mroute_socket);
898 } else {
899 ret = vif_delete(vif.vifc_vifi);
900 }
901 rtnl_unlock();
902 return ret;
903
904 /*
905 * Manipulate the forwarding caches. These live
906 * in a sort of kernel/user symbiosis.
907 */
908 case MRT_ADD_MFC:
909 case MRT_DEL_MFC:
910 if(optlen!=sizeof(mfc))
911 return -EINVAL;
912 if (copy_from_user(&mfc,optval, sizeof(mfc)))
913 return -EFAULT;
914 rtnl_lock();
915 if (optname==MRT_DEL_MFC)
916 ret = ipmr_mfc_delete(&mfc);
917 else
918 ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
919 rtnl_unlock();
920 return ret;
921 /*
922 * Control PIM assert.
923 */
924 case MRT_ASSERT:
925 {
926 int v;
927 if(get_user(v,(int *)optval))
928 return -EFAULT;
929 mroute_do_assert=(v)?1:0;
930 return 0;
931 }
932 #ifdef CONFIG_IP_PIMSM
933 case MRT_PIM:
934 {
935 int v;
936 if(get_user(v,(int *)optval))
937 return -EFAULT;
938 v = (v)?1:0;
939 rtnl_lock();
940 if (v != mroute_do_pim) {
941 mroute_do_pim = v;
942 mroute_do_assert = v;
943 #ifdef CONFIG_IP_PIMSM_V2
944 if (mroute_do_pim)
945 inet_add_protocol(&pim_protocol);
946 else
947 inet_del_protocol(&pim_protocol);
948 #endif
949 }
950 rtnl_unlock();
951 return 0;
952 }
953 #endif
954 /*
955 * Spurious command, or MRT_VERSION which you cannot
956 * set.
957 */
958 default:
959 return -ENOPROTOOPT;
960 }
961 }
962
963 /*
964 * Getsock opt support for the multicast routing system.
965 */
966
967 int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
968 {
969 int olr;
970 int val;
971
972 if(optname!=MRT_VERSION &&
973 #ifdef CONFIG_IP_PIMSM
974 optname!=MRT_PIM &&
975 #endif
976 optname!=MRT_ASSERT)
977 return -ENOPROTOOPT;
978
979 if(get_user(olr, optlen))
980 return -EFAULT;
981
982 olr=min(olr,sizeof(int));
983 if(put_user(olr,optlen))
984 return -EFAULT;
985 if(optname==MRT_VERSION)
986 val=0x0305;
987 #ifdef CONFIG_IP_PIMSM
988 else if(optname==MRT_PIM)
989 val=mroute_do_pim;
990 #endif
991 else
992 val=mroute_do_assert;
993 if(copy_to_user(optval,&val,olr))
994 return -EFAULT;
995 return 0;
996 }
997
998 /*
999 * The IP multicast ioctl support routines.
1000 */
1001
1002 int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
1003 {
1004 struct sioc_sg_req sr;
1005 struct sioc_vif_req vr;
1006 struct vif_device *vif;
1007 struct mfc_cache *c;
1008
1009 switch(cmd)
1010 {
1011 case SIOCGETVIFCNT:
1012 if (copy_from_user(&vr,(void *)arg,sizeof(vr)))
1013 return -EFAULT;
1014 if(vr.vifi>=maxvif)
1015 return -EINVAL;
1016 read_lock(&mrt_lock);
1017 vif=&vif_table[vr.vifi];
1018 if(VIF_EXISTS(vr.vifi)) {
1019 vr.icount=vif->pkt_in;
1020 vr.ocount=vif->pkt_out;
1021 vr.ibytes=vif->bytes_in;
1022 vr.obytes=vif->bytes_out;
1023 read_unlock(&mrt_lock);
1024
1025 if (copy_to_user((void *)arg,&vr,sizeof(vr)))
1026 return -EFAULT;
1027 return 0;
1028 }
1029 read_unlock(&mrt_lock);
1030 return -EADDRNOTAVAIL;
1031 case SIOCGETSGCNT:
1032 if (copy_from_user(&sr,(void *)arg,sizeof(sr)))
1033 return -EFAULT;
1034
1035 read_lock(&mrt_lock);
1036 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1037 if (c) {
1038 sr.pktcnt = c->mfc_un.res.pkt;
1039 sr.bytecnt = c->mfc_un.res.bytes;
1040 sr.wrong_if = c->mfc_un.res.wrong_if;
1041 read_unlock(&mrt_lock);
1042
1043 if (copy_to_user((void *)arg,&sr,sizeof(sr)))
1044 return -EFAULT;
1045 return 0;
1046 }
1047 read_unlock(&mrt_lock);
1048 return -EADDRNOTAVAIL;
1049 default:
1050 return -ENOIOCTLCMD;
1051 }
1052 }
1053
1054
1055 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1056 {
1057 struct vif_device *v;
1058 int ct;
1059 if (event != NETDEV_UNREGISTER)
1060 return NOTIFY_DONE;
1061 v=&vif_table[0];
1062 for(ct=0;ct<maxvif;ct++,v++) {
1063 if (v->dev==ptr)
1064 vif_delete(ct);
1065 }
1066 return NOTIFY_DONE;
1067 }
1068
1069
1070 static struct notifier_block ip_mr_notifier={
1071 ipmr_device_event,
1072 NULL,
1073 0
1074 };
1075
1076 /*
1077 * Encapsulate a packet by attaching a valid IPIP header to it.
1078 * This avoids tunnel drivers and other mess and gives us the speed so
1079 * important for multicast video.
1080 */
1081
1082 static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1083 {
1084 struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1085
1086 iph->version = 4;
1087 iph->tos = skb->nh.iph->tos;
1088 iph->ttl = skb->nh.iph->ttl;
1089 iph->frag_off = 0;
1090 iph->daddr = daddr;
1091 iph->saddr = saddr;
1092 iph->protocol = IPPROTO_IPIP;
1093 iph->ihl = 5;
1094 iph->tot_len = htons(skb->len);
1095 ip_select_ident(iph, skb->dst);
1096 ip_send_check(iph);
1097
1098 skb->h.ipiph = skb->nh.iph;
1099 skb->nh.iph = iph;
1100 #ifdef CONFIG_NETFILTER
1101 nf_conntrack_put(skb->nfct);
1102 skb->nfct = NULL;
1103 #endif
1104 }
1105
1106 static inline int ipmr_forward_finish(struct sk_buff *skb)
1107 {
1108 struct dst_entry *dst = skb->dst;
1109
1110 if (skb->len <= dst->pmtu)
1111 return dst->output(skb);
1112 else
1113 return ip_fragment(skb, dst->output);
1114 }
1115
1116 /*
1117 * Processing handlers for ipmr_forward
1118 */
1119
1120 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
1121 int vifi, int last)
1122 {
1123 struct iphdr *iph = skb->nh.iph;
1124 struct vif_device *vif = &vif_table[vifi];
1125 struct net_device *dev;
1126 struct rtable *rt;
1127 int encap = 0;
1128 struct sk_buff *skb2;
1129
1130 if (vif->dev == NULL)
1131 return;
1132
1133 #ifdef CONFIG_IP_PIMSM
1134 if (vif->flags & VIFF_REGISTER) {
1135 vif->pkt_out++;
1136 vif->bytes_out+=skb->len;
1137 ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
1138 ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
1139 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1140 return;
1141 }
1142 #endif
1143
1144 if (vif->flags&VIFF_TUNNEL) {
1145 if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link))
1146 return;
1147 encap = sizeof(struct iphdr);
1148 } else {
1149 if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link))
1150 return;
1151 }
1152
1153 dev = rt->u.dst.dev;
1154
1155 if (skb->len+encap > rt->u.dst.pmtu && (ntohs(iph->frag_off) & IP_DF)) {
1156 /* Do not fragment multicasts. Alas, IPv4 does not
1157 allow to send ICMP, so that packets will disappear
1158 to blackhole.
1159 */
1160
1161 IP_INC_STATS_BH(IpFragFails);
1162 ip_rt_put(rt);
1163 return;
1164 }
1165
1166 encap += dev->hard_header_len;
1167
1168 if (skb_headroom(skb) < encap || skb_cloned(skb) || !last)
1169 skb2 = skb_realloc_headroom(skb, (encap + 15)&~15);
1170 else if (atomic_read(&skb->users) != 1)
1171 skb2 = skb_clone(skb, GFP_ATOMIC);
1172 else {
1173 atomic_inc(&skb->users);
1174 skb2 = skb;
1175 }
1176
1177 if (skb2 == NULL) {
1178 ip_rt_put(rt);
1179 return;
1180 }
1181
1182 vif->pkt_out++;
1183 vif->bytes_out+=skb->len;
1184
1185 dst_release(skb2->dst);
1186 skb2->dst = &rt->u.dst;
1187 iph = skb2->nh.iph;
1188 ip_decrease_ttl(iph);
1189
1190 /* FIXME: forward and output firewalls used to be called here.
1191 * What do we do with netfilter? -- RR */
1192 if (vif->flags & VIFF_TUNNEL) {
1193 ip_encap(skb2, vif->local, vif->remote);
1194 /* FIXME: extra output firewall step used to be here. --RR */
1195 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
1196 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len;
1197 }
1198
1199 IPCB(skb2)->flags |= IPSKB_FORWARDED;
1200
1201 /*
1202 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1203 * not only before forwarding, but after forwarding on all output
1204 * interfaces. It is clear, if mrouter runs a multicasting
1205 * program, it should receive packets not depending to what interface
1206 * program is joined.
1207 * If we will not make it, the program will have to join on all
1208 * interfaces. On the other hand, multihoming host (or router, but
1209 * not mrouter) cannot join to more than one interface - it will
1210 * result in receiving multiple packets.
1211 */
1212 NF_HOOK(PF_INET, NF_IP_FORWARD, skb2, skb->dev, dev,
1213 ipmr_forward_finish);
1214 }
1215
1216 int ipmr_find_vif(struct net_device *dev)
1217 {
1218 int ct;
1219 for (ct=maxvif-1; ct>=0; ct--) {
1220 if (vif_table[ct].dev == dev)
1221 break;
1222 }
1223 return ct;
1224 }
1225
1226 /* "local" means that we should preserve one skb (for local delivery) */
1227
1228 int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1229 {
1230 int psend = -1;
1231 int vif, ct;
1232
1233 vif = cache->mfc_parent;
1234 cache->mfc_un.res.pkt++;
1235 cache->mfc_un.res.bytes += skb->len;
1236
1237 /*
1238 * Wrong interface: drop packet and (maybe) send PIM assert.
1239 */
1240 if (vif_table[vif].dev != skb->dev) {
1241 int true_vifi;
1242
1243 if (((struct rtable*)skb->dst)->key.iif == 0) {
1244 /* It is our own packet, looped back.
1245 Very complicated situation...
1246
1247 The best workaround until routing daemons will be
1248 fixed is not to redistribute packet, if it was
1249 send through wrong interface. It means, that
1250 multicast applications WILL NOT work for
1251 (S,G), which have default multicast route pointing
1252 to wrong oif. In any case, it is not a good
1253 idea to use multicasting applications on router.
1254 */
1255 goto dont_forward;
1256 }
1257
1258 cache->mfc_un.res.wrong_if++;
1259 true_vifi = ipmr_find_vif(skb->dev);
1260
1261 if (true_vifi >= 0 && mroute_do_assert &&
1262 /* pimsm uses asserts, when switching from RPT to SPT,
1263 so that we cannot check that packet arrived on an oif.
1264 It is bad, but otherwise we would need to move pretty
1265 large chunk of pimd to kernel. Ough... --ANK
1266 */
1267 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1268 jiffies - cache->mfc_un.res.last_assert > MFC_ASSERT_THRESH) {
1269 cache->mfc_un.res.last_assert = jiffies;
1270 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1271 }
1272 goto dont_forward;
1273 }
1274
1275 vif_table[vif].pkt_in++;
1276 vif_table[vif].bytes_in+=skb->len;
1277
1278 /*
1279 * Forward the frame
1280 */
1281 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1282 if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1283 if (psend != -1)
1284 ipmr_queue_xmit(skb, cache, psend, 0);
1285 psend=ct;
1286 }
1287 }
1288 if (psend != -1)
1289 ipmr_queue_xmit(skb, cache, psend, !local);
1290
1291 dont_forward:
1292 if (!local)
1293 kfree_skb(skb);
1294 return 0;
1295 }
1296
1297
1298 /*
1299 * Multicast packets for forwarding arrive here
1300 */
1301
1302 int ip_mr_input(struct sk_buff *skb)
1303 {
1304 struct mfc_cache *cache;
1305 int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1306
1307 /* Packet is looped back after forward, it should not be
1308 forwarded second time, but still can be delivered locally.
1309 */
1310 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1311 goto dont_forward;
1312
1313 if (!local) {
1314 if (IPCB(skb)->opt.router_alert) {
1315 if (ip_call_ra_chain(skb))
1316 return 0;
1317 } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1318 /* IGMPv1 (and broken IGMPv2 implementations sort of
1319 Cisco IOS <= 11.2(8)) do not put router alert
1320 option to IGMP packets destined to routable
1321 groups. It is very bad, because it means
1322 that we can forward NO IGMP messages.
1323 */
1324 read_lock(&mrt_lock);
1325 if (mroute_socket) {
1326 raw_rcv(mroute_socket, skb);
1327 read_unlock(&mrt_lock);
1328 return 0;
1329 }
1330 read_unlock(&mrt_lock);
1331 }
1332 }
1333
1334 read_lock(&mrt_lock);
1335 cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1336
1337 /*
1338 * No usable cache entry
1339 */
1340 if (cache==NULL) {
1341 int vif;
1342
1343 if (local) {
1344 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1345 ip_local_deliver(skb);
1346 if (skb2 == NULL) {
1347 read_unlock(&mrt_lock);
1348 return -ENOBUFS;
1349 }
1350 skb = skb2;
1351 }
1352
1353 vif = ipmr_find_vif(skb->dev);
1354 if (vif >= 0) {
1355 int err = ipmr_cache_unresolved(vif, skb);
1356 read_unlock(&mrt_lock);
1357
1358 return err;
1359 }
1360 read_unlock(&mrt_lock);
1361 kfree_skb(skb);
1362 return -ENODEV;
1363 }
1364
1365 ip_mr_forward(skb, cache, local);
1366
1367 read_unlock(&mrt_lock);
1368
1369 if (local)
1370 return ip_local_deliver(skb);
1371
1372 return 0;
1373
1374 dont_forward:
1375 if (local)
1376 return ip_local_deliver(skb);
1377 kfree_skb(skb);
1378 return 0;
1379 }
1380
1381 #ifdef CONFIG_IP_PIMSM_V1
1382 /*
1383 * Handle IGMP messages of PIMv1
1384 */
1385
1386 int pim_rcv_v1(struct sk_buff * skb, unsigned short len)
1387 {
1388 struct igmphdr *pim = (struct igmphdr*)skb->h.raw;
1389 struct iphdr *encap;
1390 struct net_device *reg_dev = NULL;
1391
1392 if (!mroute_do_pim ||
1393 len < sizeof(*pim) + sizeof(*encap) ||
1394 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) {
1395 kfree_skb(skb);
1396 return -EINVAL;
1397 }
1398
1399 encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1400 /*
1401 Check that:
1402 a. packet is really destinted to a multicast group
1403 b. packet is not a NULL-REGISTER
1404 c. packet is not truncated
1405 */
1406 if (!MULTICAST(encap->daddr) ||
1407 ntohs(encap->tot_len) == 0 ||
1408 ntohs(encap->tot_len) + sizeof(*pim) > len) {
1409 kfree_skb(skb);
1410 return -EINVAL;
1411 }
1412
1413 read_lock(&mrt_lock);
1414 if (reg_vif_num >= 0)
1415 reg_dev = vif_table[reg_vif_num].dev;
1416 if (reg_dev)
1417 dev_hold(reg_dev);
1418 read_unlock(&mrt_lock);
1419
1420 if (reg_dev == NULL) {
1421 kfree_skb(skb);
1422 return -EINVAL;
1423 }
1424
1425 skb->mac.raw = skb->nh.raw;
1426 skb_pull(skb, (u8*)encap - skb->data);
1427 skb->nh.iph = (struct iphdr *)skb->data;
1428 skb->dev = reg_dev;
1429 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1430 skb->protocol = __constant_htons(ETH_P_IP);
1431 skb->ip_summed = 0;
1432 skb->pkt_type = PACKET_HOST;
1433 dst_release(skb->dst);
1434 skb->dst = NULL;
1435 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1436 ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1437 #ifdef CONFIG_NETFILTER
1438 nf_conntrack_put(skb->nfct);
1439 skb->nfct = NULL;
1440 #endif
1441 netif_rx(skb);
1442 dev_put(reg_dev);
1443 return 0;
1444 }
1445 #endif
1446
1447 #ifdef CONFIG_IP_PIMSM_V2
1448 int pim_rcv(struct sk_buff * skb, unsigned short len)
1449 {
1450 struct pimreghdr *pim = (struct pimreghdr*)skb->h.raw;
1451 struct iphdr *encap;
1452 struct net_device *reg_dev = NULL;
1453
1454 if (len < sizeof(*pim) + sizeof(*encap) ||
1455 pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1456 (pim->flags&PIM_NULL_REGISTER) ||
1457 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1458 ip_compute_csum((void *)pim, len))) {
1459 kfree_skb(skb);
1460 return -EINVAL;
1461 }
1462
1463 /* check if the inner packet is destined to mcast group */
1464 encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1465 if (!MULTICAST(encap->daddr) ||
1466 ntohs(encap->tot_len) == 0 ||
1467 ntohs(encap->tot_len) + sizeof(*pim) > len) {
1468 kfree_skb(skb);
1469 return -EINVAL;
1470 }
1471
1472 read_lock(&mrt_lock);
1473 if (reg_vif_num >= 0)
1474 reg_dev = vif_table[reg_vif_num].dev;
1475 if (reg_dev)
1476 dev_hold(reg_dev);
1477 read_unlock(&mrt_lock);
1478
1479 if (reg_dev == NULL) {
1480 kfree_skb(skb);
1481 return -EINVAL;
1482 }
1483
1484 skb->mac.raw = skb->nh.raw;
1485 skb_pull(skb, (u8*)encap - skb->data);
1486 skb->nh.iph = (struct iphdr *)skb->data;
1487 skb->dev = reg_dev;
1488 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1489 skb->protocol = __constant_htons(ETH_P_IP);
1490 skb->ip_summed = 0;
1491 skb->pkt_type = PACKET_HOST;
1492 dst_release(skb->dst);
1493 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1494 ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1495 skb->dst = NULL;
1496 #ifdef CONFIG_NETFILTER
1497 nf_conntrack_put(skb->nfct);
1498 skb->nfct = NULL;
1499 #endif
1500 netif_rx(skb);
1501 dev_put(reg_dev);
1502 return 0;
1503 }
1504 #endif
1505
1506 #ifdef CONFIG_RTNETLINK
1507
1508 static int
1509 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1510 {
1511 int ct;
1512 struct rtnexthop *nhp;
1513 struct net_device *dev = vif_table[c->mfc_parent].dev;
1514 u8 *b = skb->tail;
1515 struct rtattr *mp_head;
1516
1517 if (dev)
1518 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1519
1520 mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1521
1522 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1523 if (c->mfc_un.res.ttls[ct] < 255) {
1524 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1525 goto rtattr_failure;
1526 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1527 nhp->rtnh_flags = 0;
1528 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1529 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1530 nhp->rtnh_len = sizeof(*nhp);
1531 }
1532 }
1533 mp_head->rta_type = RTA_MULTIPATH;
1534 mp_head->rta_len = skb->tail - (u8*)mp_head;
1535 rtm->rtm_type = RTN_MULTICAST;
1536 return 1;
1537
1538 rtattr_failure:
1539 skb_trim(skb, b - skb->data);
1540 return -EMSGSIZE;
1541 }
1542
1543 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1544 {
1545 int err;
1546 struct mfc_cache *cache;
1547 struct rtable *rt = (struct rtable*)skb->dst;
1548
1549 read_lock(&mrt_lock);
1550 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1551
1552 if (cache==NULL) {
1553 struct net_device *dev;
1554 int vif;
1555
1556 if (nowait) {
1557 read_unlock(&mrt_lock);
1558 return -EAGAIN;
1559 }
1560
1561 dev = skb->dev;
1562 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1563 read_unlock(&mrt_lock);
1564 return -ENODEV;
1565 }
1566 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1567 skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1568 skb->nh.iph->saddr = rt->rt_src;
1569 skb->nh.iph->daddr = rt->rt_dst;
1570 skb->nh.iph->version = 0;
1571 err = ipmr_cache_unresolved(vif, skb);
1572 read_unlock(&mrt_lock);
1573 return err;
1574 }
1575
1576 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1577 cache->mfc_flags |= MFC_NOTIFY;
1578 err = ipmr_fill_mroute(skb, cache, rtm);
1579 read_unlock(&mrt_lock);
1580 return err;
1581 }
1582 #endif
1583
1584 #ifdef CONFIG_PROC_FS
1585 /*
1586 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1587 */
1588
1589 static int ipmr_vif_info(char *buffer, char **start, off_t offset, int length)
1590 {
1591 struct vif_device *vif;
1592 int len=0;
1593 off_t pos=0;
1594 off_t begin=0;
1595 int size;
1596 int ct;
1597
1598 len += sprintf(buffer,
1599 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1600 pos=len;
1601
1602 read_lock(&mrt_lock);
1603 for (ct=0;ct<maxvif;ct++)
1604 {
1605 char *name = "none";
1606 vif=&vif_table[ct];
1607 if(!VIF_EXISTS(ct))
1608 continue;
1609 if (vif->dev)
1610 name = vif->dev->name;
1611 size = sprintf(buffer+len, "%2d %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1612 ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out,
1613 vif->flags, vif->local, vif->remote);
1614 len+=size;
1615 pos+=size;
1616 if(pos<offset)
1617 {
1618 len=0;
1619 begin=pos;
1620 }
1621 if(pos>offset+length)
1622 break;
1623 }
1624 read_unlock(&mrt_lock);
1625
1626 *start=buffer+(offset-begin);
1627 len-=(offset-begin);
1628 if(len>length)
1629 len=length;
1630 if (len<0)
1631 len = 0;
1632 return len;
1633 }
1634
1635 static int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length)
1636 {
1637 struct mfc_cache *mfc;
1638 int len=0;
1639 off_t pos=0;
1640 off_t begin=0;
1641 int size;
1642 int ct;
1643
1644 len += sprintf(buffer,
1645 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1646 pos=len;
1647
1648 read_lock(&mrt_lock);
1649 for (ct=0;ct<MFC_LINES;ct++)
1650 {
1651 for(mfc=mfc_cache_array[ct]; mfc; mfc=mfc->next)
1652 {
1653 int n;
1654
1655 /*
1656 * Interface forwarding map
1657 */
1658 size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld",
1659 (unsigned long)mfc->mfc_mcastgrp,
1660 (unsigned long)mfc->mfc_origin,
1661 mfc->mfc_parent,
1662 mfc->mfc_un.res.pkt,
1663 mfc->mfc_un.res.bytes,
1664 mfc->mfc_un.res.wrong_if);
1665 for(n=mfc->mfc_un.res.minvif;n<mfc->mfc_un.res.maxvif;n++)
1666 {
1667 if(VIF_EXISTS(n) && mfc->mfc_un.res.ttls[n] < 255)
1668 size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]);
1669 }
1670 size += sprintf(buffer+len+size, "\n");
1671 len+=size;
1672 pos+=size;
1673 if(pos<offset)
1674 {
1675 len=0;
1676 begin=pos;
1677 }
1678 if(pos>offset+length)
1679 goto done;
1680 }
1681 }
1682
1683 spin_lock_bh(&mfc_unres_lock);
1684 for(mfc=mfc_unres_queue; mfc; mfc=mfc->next) {
1685 size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld\n",
1686 (unsigned long)mfc->mfc_mcastgrp,
1687 (unsigned long)mfc->mfc_origin,
1688 -1,
1689 (long)mfc->mfc_un.unres.unresolved.qlen,
1690 0L, 0L);
1691 len+=size;
1692 pos+=size;
1693 if(pos<offset)
1694 {
1695 len=0;
1696 begin=pos;
1697 }
1698 if(pos>offset+length)
1699 break;
1700 }
1701 spin_unlock_bh(&mfc_unres_lock);
1702
1703 done:
1704 read_unlock(&mrt_lock);
1705 *start=buffer+(offset-begin);
1706 len-=(offset-begin);
1707 if(len>length)
1708 len=length;
1709 if (len < 0) {
1710 len = 0;
1711 }
1712 return len;
1713 }
1714
1715 #endif
1716
1717 #ifdef CONFIG_IP_PIMSM_V2
1718 struct inet_protocol pim_protocol =
1719 {
1720 pim_rcv, /* PIM handler */
1721 NULL, /* PIM error control */
1722 NULL, /* next */
1723 IPPROTO_PIM, /* protocol ID */
1724 0, /* copy */
1725 NULL, /* data */
1726 "PIM" /* name */
1727 };
1728 #endif
1729
1730
1731 /*
1732 * Setup for IP multicast routing
1733 */
1734
1735 void __init ip_mr_init(void)
1736 {
1737 printk(KERN_INFO "Linux IP multicast router 0.06 plus PIM-SM\n");
1738 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1739 sizeof(struct mfc_cache),
1740 0, SLAB_HWCACHE_ALIGN,
1741 NULL, NULL);
1742 init_timer(&ipmr_expire_timer);
1743 ipmr_expire_timer.function=ipmr_expire_process;
1744 register_netdevice_notifier(&ip_mr_notifier);
1745 #ifdef CONFIG_PROC_FS
1746 proc_net_create("ip_mr_vif",0,ipmr_vif_info);
1747 proc_net_create("ip_mr_cache",0,ipmr_mfc_info);
1748 #endif
1749 }
1750
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.