1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.49 2000/11/03 01:11:58 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16 #include <linux/config.h>
17 #include <linux/errno.h>
18 #include <linux/types.h>
19 #include <linux/socket.h>
20 #include <linux/sockios.h>
21 #include <linux/net.h>
22 #include <linux/route.h>
23 #include <linux/netdevice.h>
24 #include <linux/in6.h>
25 #include <linux/init.h>
26 #include <linux/netlink.h>
27 #include <linux/if_arp.h>
28
29 #ifdef CONFIG_PROC_FS
30 #include <linux/proc_fs.h>
31 #endif
32
33 #include <net/snmp.h>
34 #include <net/ipv6.h>
35 #include <net/ip6_fib.h>
36 #include <net/ip6_route.h>
37 #include <net/ndisc.h>
38 #include <net/addrconf.h>
39 #include <net/tcp.h>
40 #include <linux/rtnetlink.h>
41
42 #include <asm/uaccess.h>
43
44 #ifdef CONFIG_SYSCTL
45 #include <linux/sysctl.h>
46 #endif
47
48 #undef CONFIG_RT6_POLICY
49
50 /* Set to 3 to get tracing. */
51 #define RT6_DEBUG 2
52
53 #if RT6_DEBUG >= 3
54 #define RDBG(x) printk x
55 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
56 #else
57 #define RDBG(x)
58 #define RT6_TRACE(x...) do { ; } while (0)
59 #endif
60
61
62 int ip6_rt_max_size = 4096;
63 int ip6_rt_gc_min_interval = 5*HZ;
64 int ip6_rt_gc_timeout = 60*HZ;
65 int ip6_rt_gc_interval = 30*HZ;
66 int ip6_rt_gc_elasticity = 9;
67 int ip6_rt_mtu_expires = 10*60*HZ;
68 int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
69
70 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
71 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
72 static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst,
73 struct sk_buff *skb);
74 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
75 static int ip6_dst_gc(void);
76
77 static int ip6_pkt_discard(struct sk_buff *skb);
78 static void ip6_link_failure(struct sk_buff *skb);
79
80 struct dst_ops ip6_dst_ops = {
81 AF_INET6,
82 __constant_htons(ETH_P_IPV6),
83 1024,
84
85 ip6_dst_gc,
86 ip6_dst_check,
87 ip6_dst_reroute,
88 NULL,
89 ip6_negative_advice,
90 ip6_link_failure,
91 sizeof(struct rt6_info),
92 };
93
94 struct rt6_info ip6_null_entry = {
95 {{NULL, ATOMIC_INIT(1), 1, &loopback_dev,
96 -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
97 -ENETUNREACH, NULL, NULL,
98 ip6_pkt_discard, ip6_pkt_discard,
99 #ifdef CONFIG_NET_CLS_ROUTE
100 0,
101 #endif
102 &ip6_dst_ops}},
103 NULL, {{{0}}}, RTF_REJECT|RTF_NONEXTHOP, ~0U,
104 255, ATOMIC_INIT(1), {NULL}, {{{{0}}}, 0}, {{{{0}}}, 0}
105 };
106
107 struct fib6_node ip6_routing_table = {
108 NULL, NULL, NULL, NULL,
109 &ip6_null_entry,
110 0, RTN_ROOT|RTN_TL_ROOT|RTN_RTINFO, 0
111 };
112
113 #ifdef CONFIG_RT6_POLICY
114 int ip6_rt_policy = 0;
115
116 struct pol_chain *rt6_pol_list = NULL;
117
118
119 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb);
120 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk);
121
122 static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
123 struct in6_addr *daddr,
124 struct in6_addr *saddr,
125 struct fl_acc_args *args);
126
127 #else
128 #define ip6_rt_policy (0)
129 #endif
130
131 /* Protects all the ip6 fib */
132
133 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
134
135
136 /*
137 * Route lookup. Any rt6_lock is implied.
138 */
139
140 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
141 int oif,
142 int strict)
143 {
144 struct rt6_info *local = NULL;
145 struct rt6_info *sprt;
146
147 if (oif) {
148 for (sprt = rt; sprt; sprt = sprt->u.next) {
149 struct net_device *dev = sprt->rt6i_dev;
150 if (dev->ifindex == oif)
151 return sprt;
152 if (dev->flags&IFF_LOOPBACK)
153 local = sprt;
154 }
155
156 if (local)
157 return local;
158
159 if (strict)
160 return &ip6_null_entry;
161 }
162 return rt;
163 }
164
165 /*
166 * pointer to the last default router chosen. BH is disabled locally.
167 */
168 static struct rt6_info *rt6_dflt_pointer = NULL;
169 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
170
171 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
172 {
173 struct rt6_info *match = NULL;
174 struct rt6_info *sprt;
175 int mpri = 0;
176
177 for (sprt = rt; sprt; sprt = sprt->u.next) {
178 struct neighbour *neigh;
179
180 if ((neigh = sprt->rt6i_nexthop) != NULL) {
181 int m = -1;
182
183 switch (neigh->nud_state) {
184 case NUD_REACHABLE:
185 if (sprt != rt6_dflt_pointer) {
186 rt = sprt;
187 goto out;
188 }
189 m = 2;
190 break;
191
192 case NUD_DELAY:
193 m = 1;
194 break;
195
196 case NUD_STALE:
197 m = 1;
198 break;
199 };
200
201 if (oif && sprt->rt6i_dev->ifindex == oif) {
202 m += 2;
203 }
204
205 if (m >= mpri) {
206 mpri = m;
207 match = sprt;
208 }
209 }
210 }
211
212 if (match) {
213 rt = match;
214 } else {
215 /*
216 * No default routers are known to be reachable.
217 * SHOULD round robin
218 */
219 spin_lock(&rt6_dflt_lock);
220 if (rt6_dflt_pointer) {
221 struct rt6_info *next;
222
223 if ((next = rt6_dflt_pointer->u.next) != NULL &&
224 next->u.dst.obsolete <= 0 &&
225 next->u.dst.error == 0)
226 rt = next;
227 }
228 spin_unlock(&rt6_dflt_lock);
229 }
230
231 out:
232 spin_lock(&rt6_dflt_lock);
233 rt6_dflt_pointer = rt;
234 spin_unlock(&rt6_dflt_lock);
235 return rt;
236 }
237
238 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
239 int oif, int strict)
240 {
241 struct fib6_node *fn;
242 struct rt6_info *rt;
243
244 read_lock_bh(&rt6_lock);
245 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
246 rt = rt6_device_match(fn->leaf, oif, strict);
247 dst_hold(&rt->u.dst);
248 rt->u.dst.__use++;
249 read_unlock_bh(&rt6_lock);
250
251 rt->u.dst.lastuse = jiffies;
252 if (rt->u.dst.error == 0)
253 return rt;
254 dst_release(&rt->u.dst);
255 return NULL;
256 }
257
258 /* rt6_ins is called with FREE rt6_lock.
259 It takes new route entry, the addition fails by any reason the
260 route is freed. In any case, if caller does not hold it, it may
261 be destroyed.
262 */
263
264 static int rt6_ins(struct rt6_info *rt)
265 {
266 int err;
267
268 write_lock_bh(&rt6_lock);
269 err = fib6_add(&ip6_routing_table, rt);
270 write_unlock_bh(&rt6_lock);
271
272 return err;
273 }
274
275 /* No rt6_lock! If COW faild, the function returns dead route entry
276 with dst->error set to errno value.
277 */
278
279 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
280 struct in6_addr *saddr)
281 {
282 int err;
283 struct rt6_info *rt;
284
285 /*
286 * Clone the route.
287 */
288
289 rt = ip6_rt_copy(ort);
290
291 if (rt) {
292 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
293
294 if (!(rt->rt6i_flags&RTF_GATEWAY))
295 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
296
297 rt->rt6i_dst.plen = 128;
298 rt->rt6i_flags |= RTF_CACHE;
299 rt->u.dst.flags |= DST_HOST;
300
301 #ifdef CONFIG_IPV6_SUBTREES
302 if (rt->rt6i_src.plen && saddr) {
303 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
304 rt->rt6i_src.plen = 128;
305 }
306 #endif
307
308 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
309
310 dst_clone(&rt->u.dst);
311
312 err = rt6_ins(rt);
313 if (err == 0)
314 return rt;
315
316 rt->u.dst.error = err;
317
318 return rt;
319 }
320 dst_clone(&ip6_null_entry.u.dst);
321 return &ip6_null_entry;
322 }
323
324 #ifdef CONFIG_RT6_POLICY
325 static __inline__ struct rt6_info *rt6_flow_lookup_in(struct rt6_info *rt,
326 struct sk_buff *skb)
327 {
328 struct in6_addr *daddr, *saddr;
329 struct fl_acc_args arg;
330
331 arg.type = FL_ARG_FORWARD;
332 arg.fl_u.skb = skb;
333
334 saddr = &skb->nh.ipv6h->saddr;
335 daddr = &skb->nh.ipv6h->daddr;
336
337 return rt6_flow_lookup(rt, daddr, saddr, &arg);
338 }
339
340 static __inline__ struct rt6_info *rt6_flow_lookup_out(struct rt6_info *rt,
341 struct sock *sk,
342 struct flowi *fl)
343 {
344 struct fl_acc_args arg;
345
346 arg.type = FL_ARG_ORIGIN;
347 arg.fl_u.fl_o.sk = sk;
348 arg.fl_u.fl_o.flow = fl;
349
350 return rt6_flow_lookup(rt, fl->nl_u.ip6_u.daddr, fl->nl_u.ip6_u.saddr,
351 &arg);
352 }
353
354 #endif
355
356 #define BACKTRACK() \
357 if (rt == &ip6_null_entry && strict) { \
358 while ((fn = fn->parent) != NULL) { \
359 if (fn->fn_flags & RTN_ROOT) { \
360 dst_clone(&rt->u.dst); \
361 goto out; \
362 } \
363 if (fn->fn_flags & RTN_RTINFO) \
364 goto restart; \
365 } \
366 }
367
368
369 void ip6_route_input(struct sk_buff *skb)
370 {
371 struct fib6_node *fn;
372 struct rt6_info *rt;
373 int strict;
374 int attempts = 3;
375
376 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
377
378 relookup:
379 read_lock_bh(&rt6_lock);
380
381 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
382 &skb->nh.ipv6h->saddr);
383
384 restart:
385 rt = fn->leaf;
386
387 if ((rt->rt6i_flags & RTF_CACHE)) {
388 if (ip6_rt_policy == 0) {
389 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
390 BACKTRACK();
391 dst_clone(&rt->u.dst);
392 goto out;
393 }
394
395 #ifdef CONFIG_RT6_POLICY
396 if ((rt->rt6i_flags & RTF_FLOW)) {
397 struct rt6_info *sprt;
398
399 for (sprt = rt; sprt; sprt = sprt->u.next) {
400 if (rt6_flow_match_in(sprt, skb)) {
401 rt = sprt;
402 dst_clone(&rt->u.dst);
403 goto out;
404 }
405 }
406 }
407 #endif
408 }
409
410 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
411 BACKTRACK();
412
413 if (ip6_rt_policy == 0) {
414 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
415 read_unlock_bh(&rt6_lock);
416
417 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
418 &skb->nh.ipv6h->saddr);
419
420 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
421 goto out2;
422 /* Race condition! In the gap, when rt6_lock was
423 released someone could insert this route. Relookup.
424 */
425 goto relookup;
426 }
427 dst_clone(&rt->u.dst);
428 } else {
429 #ifdef CONFIG_RT6_POLICY
430 rt = rt6_flow_lookup_in(rt, skb);
431 #else
432 /* NEVER REACHED */
433 #endif
434 }
435
436 out:
437 read_unlock_bh(&rt6_lock);
438 out2:
439 rt->u.dst.lastuse = jiffies;
440 rt->u.dst.__use++;
441 skb->dst = (struct dst_entry *) rt;
442 }
443
444 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
445 {
446 struct fib6_node *fn;
447 struct rt6_info *rt;
448 int strict;
449 int attempts = 3;
450
451 strict = ipv6_addr_type(fl->nl_u.ip6_u.daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
452
453 relookup:
454 read_lock_bh(&rt6_lock);
455
456 fn = fib6_lookup(&ip6_routing_table, fl->nl_u.ip6_u.daddr,
457 fl->nl_u.ip6_u.saddr);
458
459 restart:
460 rt = fn->leaf;
461
462 if ((rt->rt6i_flags & RTF_CACHE)) {
463 if (ip6_rt_policy == 0) {
464 rt = rt6_device_match(rt, fl->oif, strict);
465 BACKTRACK();
466 dst_clone(&rt->u.dst);
467 goto out;
468 }
469
470 #ifdef CONFIG_RT6_POLICY
471 if ((rt->rt6i_flags & RTF_FLOW)) {
472 struct rt6_info *sprt;
473
474 for (sprt = rt; sprt; sprt = sprt->u.next) {
475 if (rt6_flow_match_out(sprt, sk)) {
476 rt = sprt;
477 dst_clone(&rt->u.dst);
478 goto out;
479 }
480 }
481 }
482 #endif
483 }
484 if (rt->rt6i_flags & RTF_DEFAULT) {
485 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
486 rt = rt6_best_dflt(rt, fl->oif);
487 } else {
488 rt = rt6_device_match(rt, fl->oif, strict);
489 BACKTRACK();
490 }
491
492 if (ip6_rt_policy == 0) {
493 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
494 read_unlock_bh(&rt6_lock);
495
496 rt = rt6_cow(rt, fl->nl_u.ip6_u.daddr,
497 fl->nl_u.ip6_u.saddr);
498
499 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
500 goto out2;
501
502 /* Race condition! In the gap, when rt6_lock was
503 released someone could insert this route. Relookup.
504 */
505 goto relookup;
506 }
507 dst_clone(&rt->u.dst);
508 } else {
509 #ifdef CONFIG_RT6_POLICY
510 rt = rt6_flow_lookup_out(rt, sk, fl);
511 #else
512 /* NEVER REACHED */
513 #endif
514 }
515
516 out:
517 read_unlock_bh(&rt6_lock);
518 out2:
519 rt->u.dst.lastuse = jiffies;
520 rt->u.dst.__use++;
521 return &rt->u.dst;
522 }
523
524
525 /*
526 * Destination cache support functions
527 */
528
529 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
530 {
531 struct rt6_info *rt;
532
533 rt = (struct rt6_info *) dst;
534
535 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
536 return dst;
537
538 dst_release(dst);
539 return NULL;
540 }
541
542 static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst, struct sk_buff *skb)
543 {
544 /*
545 * FIXME
546 */
547 RDBG(("ip6_dst_reroute(%p,%p)[%p] (AIEEE)\n", dst, skb,
548 __builtin_return_address(0)));
549 return NULL;
550 }
551
552 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
553 {
554 struct rt6_info *rt = (struct rt6_info *) dst;
555
556 if (rt) {
557 if (rt->rt6i_flags & RTF_CACHE)
558 ip6_del_rt(rt);
559 else
560 dst_release(dst);
561 }
562 return NULL;
563 }
564
565 static void ip6_link_failure(struct sk_buff *skb)
566 {
567 struct rt6_info *rt;
568
569 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
570
571 rt = (struct rt6_info *) skb->dst;
572 if (rt) {
573 if (rt->rt6i_flags&RTF_CACHE) {
574 dst_set_expires(&rt->u.dst, 0);
575 rt->rt6i_flags |= RTF_EXPIRES;
576 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
577 rt->rt6i_node->fn_sernum = -1;
578 }
579 }
580
581 static int ip6_dst_gc()
582 {
583 static unsigned expire = 30*HZ;
584 static unsigned long last_gc;
585 unsigned long now = jiffies;
586
587 if ((long)(now - last_gc) < ip6_rt_gc_min_interval &&
588 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
589 goto out;
590
591 expire++;
592 fib6_run_gc(expire);
593 last_gc = now;
594 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
595 expire = ip6_rt_gc_timeout>>1;
596
597 out:
598 expire -= expire>>ip6_rt_gc_elasticity;
599 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
600 }
601
602 /* Clean host part of a prefix. Not necessary in radix tree,
603 but results in cleaner routing tables.
604
605 Remove it only when all the things will work!
606 */
607
608 static void ipv6_wash_prefix(struct in6_addr *pfx, int plen)
609 {
610 int b = plen&0x7;
611 int o = (plen + 7)>>3;
612
613 if (o < 16)
614 memset(pfx->s6_addr + o, 0, 16 - o);
615 if (b != 0)
616 pfx->s6_addr[plen>>3] &= (0xFF<<(8-b));
617 }
618
619 static int ipv6_get_mtu(struct net_device *dev)
620 {
621 int mtu = IPV6_MIN_MTU;
622 struct inet6_dev *idev;
623
624 idev = in6_dev_get(dev);
625 if (idev) {
626 mtu = idev->cnf.mtu6;
627 in6_dev_put(idev);
628 }
629 return mtu;
630 }
631
632 static int ipv6_get_hoplimit(struct net_device *dev)
633 {
634 int hoplimit = ipv6_devconf.hop_limit;
635 struct inet6_dev *idev;
636
637 idev = in6_dev_get(dev);
638 if (idev) {
639 hoplimit = idev->cnf.hop_limit;
640 in6_dev_put(idev);
641 }
642 return hoplimit;
643 }
644
645 /*
646 *
647 */
648
649 int ip6_route_add(struct in6_rtmsg *rtmsg)
650 {
651 int err;
652 struct rt6_info *rt;
653 struct net_device *dev = NULL;
654 int addr_type;
655
656 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
657 return -EINVAL;
658 #ifndef CONFIG_IPV6_SUBTREES
659 if (rtmsg->rtmsg_src_len)
660 return -EINVAL;
661 #endif
662 if (rtmsg->rtmsg_metric == 0)
663 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
664
665 rt = dst_alloc(&ip6_dst_ops);
666
667 if (rt == NULL)
668 return -ENOMEM;
669
670 rt->u.dst.obsolete = -1;
671 rt->rt6i_expires = rtmsg->rtmsg_info;
672
673 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
674
675 if (addr_type & IPV6_ADDR_MULTICAST)
676 rt->u.dst.input = ip6_mc_input;
677 else
678 rt->u.dst.input = ip6_forward;
679
680 rt->u.dst.output = ip6_output;
681
682 if (rtmsg->rtmsg_ifindex) {
683 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
684 err = -ENODEV;
685 if (dev == NULL)
686 goto out;
687 }
688
689 ipv6_addr_copy(&rt->rt6i_dst.addr, &rtmsg->rtmsg_dst);
690 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
691 if (rt->rt6i_dst.plen == 128)
692 rt->u.dst.flags = DST_HOST;
693 ipv6_wash_prefix(&rt->rt6i_dst.addr, rt->rt6i_dst.plen);
694
695 #ifdef CONFIG_IPV6_SUBTREES
696 ipv6_addr_copy(&rt->rt6i_src.addr, &rtmsg->rtmsg_src);
697 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
698 ipv6_wash_prefix(&rt->rt6i_src.addr, rt->rt6i_src.plen);
699 #endif
700
701 rt->rt6i_metric = rtmsg->rtmsg_metric;
702
703 /* We cannot add true routes via loopback here,
704 they would result in kernel looping; promote them to reject routes
705 */
706 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
707 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
708 if (dev)
709 dev_put(dev);
710 dev = &loopback_dev;
711 dev_hold(dev);
712 rt->u.dst.output = ip6_pkt_discard;
713 rt->u.dst.input = ip6_pkt_discard;
714 rt->u.dst.error = -ENETUNREACH;
715 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
716 goto install_route;
717 }
718
719 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
720 struct in6_addr *gw_addr;
721 int gwa_type;
722
723 gw_addr = &rtmsg->rtmsg_gateway;
724 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
725 gwa_type = ipv6_addr_type(gw_addr);
726
727 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
728 struct rt6_info *grt;
729
730 /* IPv6 strictly inhibits using not link-local
731 addresses as nexthop address.
732 Otherwise, router will not able to send redirects.
733 It is very good, but in some (rare!) curcumstances
734 (SIT, PtP, NBMA NOARP links) it is handy to allow
735 some exceptions. --ANK
736 */
737 err = -EINVAL;
738 if (!(gwa_type&IPV6_ADDR_UNICAST))
739 goto out;
740
741 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
742
743 err = -EHOSTUNREACH;
744 if (grt == NULL)
745 goto out;
746 if (dev) {
747 if (dev != grt->rt6i_dev) {
748 dst_release(&grt->u.dst);
749 goto out;
750 }
751 } else {
752 dev = grt->rt6i_dev;
753 dev_hold(dev);
754 }
755 if (!(grt->rt6i_flags&RTF_GATEWAY))
756 err = 0;
757 dst_release(&grt->u.dst);
758
759 if (err)
760 goto out;
761 }
762 err = -EINVAL;
763 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
764 goto out;
765 }
766
767 err = -ENODEV;
768 if (dev == NULL)
769 goto out;
770
771 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
772 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
773 if (IS_ERR(rt->rt6i_nexthop)) {
774 err = PTR_ERR(rt->rt6i_nexthop);
775 rt->rt6i_nexthop = NULL;
776 goto out;
777 }
778 }
779
780 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
781 rt->rt6i_hoplimit = IPV6_DEFAULT_MCASTHOPS;
782 else
783 rt->rt6i_hoplimit = ipv6_get_hoplimit(dev);
784 rt->rt6i_flags = rtmsg->rtmsg_flags;
785
786 install_route:
787 rt->u.dst.pmtu = ipv6_get_mtu(dev);
788 rt->u.dst.advmss = max(rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
789 /* Maximal non-jumbo IPv6 payload is 65535 and corresponding
790 MSS is 65535 - tcp_header_size. 65535 is also valid and
791 means: "any MSS, rely only on pmtu discovery"
792 */
793 if (rt->u.dst.advmss > 65535-20)
794 rt->u.dst.advmss = 65535;
795 rt->u.dst.dev = dev;
796 return rt6_ins(rt);
797
798 out:
799 if (dev)
800 dev_put(dev);
801 dst_free((struct dst_entry *) rt);
802 return err;
803 }
804
805 int ip6_del_rt(struct rt6_info *rt)
806 {
807 int err;
808
809 write_lock_bh(&rt6_lock);
810
811 spin_lock_bh(&rt6_dflt_lock);
812 rt6_dflt_pointer = NULL;
813 spin_unlock_bh(&rt6_dflt_lock);
814
815 dst_release(&rt->u.dst);
816
817 err = fib6_del(rt);
818 write_unlock_bh(&rt6_lock);
819
820 return err;
821 }
822
823 int ip6_route_del(struct in6_rtmsg *rtmsg)
824 {
825 struct fib6_node *fn;
826 struct rt6_info *rt;
827 int err = -ESRCH;
828
829 read_lock_bh(&rt6_lock);
830
831 fn = fib6_locate(&ip6_routing_table,
832 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
833 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
834
835 if (fn) {
836 for (rt = fn->leaf; rt; rt = rt->u.next) {
837 if (rtmsg->rtmsg_ifindex &&
838 (rt->rt6i_dev == NULL ||
839 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
840 continue;
841 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
842 ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
843 continue;
844 if (rtmsg->rtmsg_metric &&
845 rtmsg->rtmsg_metric != rt->rt6i_metric)
846 continue;
847 dst_clone(&rt->u.dst);
848 read_unlock_bh(&rt6_lock);
849
850 return ip6_del_rt(rt);
851 }
852 }
853 read_unlock_bh(&rt6_lock);
854
855 return err;
856 }
857
858 /*
859 * Handle redirects
860 */
861 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
862 struct neighbour *neigh, int on_link)
863 {
864 struct rt6_info *rt, *nrt;
865
866 /* Locate old route to this destination. */
867 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
868
869 if (rt == NULL)
870 return;
871
872 if (neigh->dev != rt->rt6i_dev)
873 goto out;
874
875 /* Redirect received -> path was valid.
876 Look, redirects are sent only in response to data packets,
877 so that this nexthop apparently is reachable. --ANK
878 */
879 dst_confirm(&rt->u.dst);
880
881 /* Duplicate redirect: silently ignore. */
882 if (neigh == rt->u.dst.neighbour)
883 goto out;
884
885 /* Current route is on-link; redirect is always invalid.
886
887 Seems, previous statement is not true. It could
888 be node, which looks for us as on-link (f.e. proxy ndisc)
889 But then router serving it might decide, that we should
890 know truth 8)8) --ANK (980726).
891 */
892 if (!(rt->rt6i_flags&RTF_GATEWAY))
893 goto out;
894
895 #if !defined(CONFIG_IPV6_EUI64) || defined(CONFIG_IPV6_NO_PB)
896 /*
897 * During transition gateways have more than
898 * one link local address. Certainly, it is violation
899 * of basic principles, but it is temporary.
900 */
901 /*
902 * RFC 1970 specifies that redirects should only be
903 * accepted if they come from the nexthop to the target.
904 * Due to the way default routers are chosen, this notion
905 * is a bit fuzzy and one might need to check all default
906 * routers.
907 */
908
909 if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
910 if (rt->rt6i_flags & RTF_DEFAULT) {
911 struct rt6_info *rt1;
912
913 read_lock(&rt6_lock);
914 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
915 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
916 dst_clone(&rt1->u.dst);
917 dst_release(&rt->u.dst);
918 read_unlock(&rt6_lock);
919 rt = rt1;
920 goto source_ok;
921 }
922 }
923 read_unlock(&rt6_lock);
924 }
925 if (net_ratelimit())
926 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
927 "for redirect target\n");
928 goto out;
929 }
930
931 source_ok:
932 #endif
933
934 /*
935 * We have finally decided to accept it.
936 */
937
938 nrt = ip6_rt_copy(rt);
939 if (nrt == NULL)
940 goto out;
941
942 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
943 if (on_link)
944 nrt->rt6i_flags &= ~RTF_GATEWAY;
945
946 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
947 nrt->rt6i_dst.plen = 128;
948 nrt->u.dst.flags |= DST_HOST;
949
950 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
951 nrt->rt6i_nexthop = neigh_clone(neigh);
952 /* Reset pmtu, it may be better */
953 nrt->u.dst.pmtu = ipv6_get_mtu(neigh->dev);
954 nrt->u.dst.advmss = max(nrt->u.dst.pmtu - 60, ip6_rt_min_advmss);
955 if (rt->u.dst.advmss > 65535-20)
956 rt->u.dst.advmss = 65535;
957 nrt->rt6i_hoplimit = ipv6_get_hoplimit(neigh->dev);
958
959 if (rt6_ins(nrt))
960 goto out;
961
962 if (rt->rt6i_flags&RTF_CACHE) {
963 ip6_del_rt(rt);
964 return;
965 }
966
967 out:
968 dst_release(&rt->u.dst);
969 return;
970 }
971
972 /*
973 * Handle ICMP "packet too big" messages
974 * i.e. Path MTU discovery
975 */
976
977 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
978 struct net_device *dev, u32 pmtu)
979 {
980 struct rt6_info *rt, *nrt;
981
982 if (pmtu < IPV6_MIN_MTU) {
983 if (net_ratelimit())
984 printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
985 pmtu);
986 return;
987 }
988
989 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
990
991 if (rt == NULL)
992 return;
993
994 if (pmtu >= rt->u.dst.pmtu)
995 goto out;
996
997 /* New mtu received -> path was valid.
998 They are sent only in response to data packets,
999 so that this nexthop apparently is reachable. --ANK
1000 */
1001 dst_confirm(&rt->u.dst);
1002
1003 /* Host route. If it is static, it would be better
1004 not to override it, but add new one, so that
1005 when cache entry will expire old pmtu
1006 would return automatically.
1007 */
1008 if (rt->rt6i_flags & RTF_CACHE) {
1009 rt->u.dst.pmtu = pmtu;
1010 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1011 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1012 goto out;
1013 }
1014
1015 /* Network route.
1016 Two cases are possible:
1017 1. It is connected route. Action: COW
1018 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1019 */
1020 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1021 nrt = rt6_cow(rt, daddr, saddr);
1022 if (!nrt->u.dst.error) {
1023 nrt->u.dst.pmtu = pmtu;
1024 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1025 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1026 dst_release(&nrt->u.dst);
1027 }
1028 } else {
1029 nrt = ip6_rt_copy(rt);
1030 if (nrt == NULL)
1031 goto out;
1032 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1033 nrt->rt6i_dst.plen = 128;
1034 nrt->u.dst.flags |= DST_HOST;
1035 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1036 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1037 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1038 nrt->u.dst.pmtu = pmtu;
1039 rt6_ins(nrt);
1040 }
1041
1042 out:
1043 dst_release(&rt->u.dst);
1044 }
1045
1046 /*
1047 * Misc support functions
1048 */
1049
1050 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1051 {
1052 struct rt6_info *rt;
1053
1054 rt = dst_alloc(&ip6_dst_ops);
1055
1056 if (rt) {
1057 rt->u.dst.input = ort->u.dst.input;
1058 rt->u.dst.output = ort->u.dst.output;
1059
1060 memcpy(&rt->u.dst.mxlock, &ort->u.dst.mxlock, RTAX_MAX*sizeof(unsigned));
1061 rt->u.dst.dev = ort->u.dst.dev;
1062 if (rt->u.dst.dev)
1063 dev_hold(rt->u.dst.dev);
1064 rt->u.dst.lastuse = jiffies;
1065 rt->rt6i_hoplimit = ort->rt6i_hoplimit;
1066 rt->rt6i_expires = 0;
1067
1068 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1069 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1070 rt->rt6i_metric = 0;
1071
1072 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1073 #ifdef CONFIG_IPV6_SUBTREES
1074 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1075 #endif
1076 }
1077 return rt;
1078 }
1079
1080 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1081 {
1082 struct rt6_info *rt;
1083 struct fib6_node *fn;
1084
1085 fn = &ip6_routing_table;
1086
1087 write_lock_bh(&rt6_lock);
1088 for (rt = fn->leaf; rt; rt=rt->u.next) {
1089 if (dev == rt->rt6i_dev &&
1090 ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1091 break;
1092 }
1093 if (rt)
1094 dst_clone(&rt->u.dst);
1095 write_unlock_bh(&rt6_lock);
1096 return rt;
1097 }
1098
1099 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1100 struct net_device *dev)
1101 {
1102 struct in6_rtmsg rtmsg;
1103
1104 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1105 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1106 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1107 rtmsg.rtmsg_metric = 1024;
1108 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1109
1110 rtmsg.rtmsg_ifindex = dev->ifindex;
1111
1112 ip6_route_add(&rtmsg);
1113 return rt6_get_dflt_router(gwaddr, dev);
1114 }
1115
1116 void rt6_purge_dflt_routers(int last_resort)
1117 {
1118 struct rt6_info *rt;
1119 u32 flags;
1120
1121 if (last_resort)
1122 flags = RTF_ALLONLINK;
1123 else
1124 flags = RTF_DEFAULT | RTF_ADDRCONF;
1125
1126 restart:
1127 read_lock_bh(&rt6_lock);
1128 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1129 if (rt->rt6i_flags & flags) {
1130 dst_hold(&rt->u.dst);
1131
1132 spin_lock_bh(&rt6_dflt_lock);
1133 rt6_dflt_pointer = NULL;
1134 spin_unlock_bh(&rt6_dflt_lock);
1135
1136 read_unlock_bh(&rt6_lock);
1137
1138 ip6_del_rt(rt);
1139
1140 goto restart;
1141 }
1142 }
1143 read_unlock_bh(&rt6_lock);
1144 }
1145
1146 int ipv6_route_ioctl(unsigned int cmd, void *arg)
1147 {
1148 struct in6_rtmsg rtmsg;
1149 int err;
1150
1151 switch(cmd) {
1152 case SIOCADDRT: /* Add a route */
1153 case SIOCDELRT: /* Delete a route */
1154 if (!capable(CAP_NET_ADMIN))
1155 return -EPERM;
1156 err = copy_from_user(&rtmsg, arg,
1157 sizeof(struct in6_rtmsg));
1158 if (err)
1159 return -EFAULT;
1160
1161 rtnl_lock();
1162 switch (cmd) {
1163 case SIOCADDRT:
1164 err = ip6_route_add(&rtmsg);
1165 break;
1166 case SIOCDELRT:
1167 err = ip6_route_del(&rtmsg);
1168 break;
1169 default:
1170 err = -EINVAL;
1171 }
1172 rtnl_unlock();
1173
1174 return err;
1175 };
1176
1177 return -EINVAL;
1178 }
1179
1180 /*
1181 * Drop the packet on the floor
1182 */
1183
1184 int ip6_pkt_discard(struct sk_buff *skb)
1185 {
1186 IP6_INC_STATS(Ip6OutNoRoutes);
1187 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
1188 kfree_skb(skb);
1189 return 0;
1190 }
1191
1192 /*
1193 * Add address
1194 */
1195
1196 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev)
1197 {
1198 struct rt6_info *rt;
1199
1200 rt = dst_alloc(&ip6_dst_ops);
1201 if (rt == NULL)
1202 return -ENOMEM;
1203
1204 rt->u.dst.flags = DST_HOST;
1205 rt->u.dst.input = ip6_input;
1206 rt->u.dst.output = ip6_output;
1207 rt->rt6i_dev = dev_get_by_name("lo");
1208 rt->u.dst.pmtu = ipv6_get_mtu(rt->rt6i_dev);
1209 rt->u.dst.advmss = max(rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
1210 if (rt->u.dst.advmss > 65535-20)
1211 rt->u.dst.advmss = 65535;
1212 rt->rt6i_hoplimit = ipv6_get_hoplimit(rt->rt6i_dev);
1213 rt->u.dst.obsolete = -1;
1214
1215 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1216 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1217 if (rt->rt6i_nexthop == NULL) {
1218 dst_free((struct dst_entry *) rt);
1219 return -ENOMEM;
1220 }
1221
1222 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1223 rt->rt6i_dst.plen = 128;
1224 rt6_ins(rt);
1225
1226 return 0;
1227 }
1228
1229 /* Delete address. Warning: you should check that this address
1230 disappeared before calling this function.
1231 */
1232
1233 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1234 {
1235 struct rt6_info *rt;
1236 int err = -ENOENT;
1237
1238 rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1239 if (rt) {
1240 if (rt->rt6i_dst.plen == 128)
1241 err = ip6_del_rt(rt);
1242 else
1243 dst_release(&rt->u.dst);
1244 }
1245
1246 return err;
1247 }
1248
1249 #ifdef CONFIG_RT6_POLICY
1250
1251 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb)
1252 {
1253 struct flow_filter *frule;
1254 struct pkt_filter *filter;
1255 int res = 1;
1256
1257 if ((frule = rt->rt6i_filter) == NULL)
1258 goto out;
1259
1260 if (frule->type != FLR_INPUT) {
1261 res = 0;
1262 goto out;
1263 }
1264
1265 for (filter = frule->u.filter; filter; filter = filter->next) {
1266 __u32 *word;
1267
1268 word = (__u32 *) skb->h.raw;
1269 word += filter->offset;
1270
1271 if ((*word ^ filter->value) & filter->mask) {
1272 res = 0;
1273 break;
1274 }
1275 }
1276
1277 out:
1278 return res;
1279 }
1280
1281 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk)
1282 {
1283 struct flow_filter *frule;
1284 int res = 1;
1285
1286 if ((frule = rt->rt6i_filter) == NULL)
1287 goto out;
1288
1289 if (frule->type != FLR_INPUT) {
1290 res = 0;
1291 goto out;
1292 }
1293
1294 if (frule->u.sk != sk)
1295 res = 0;
1296 out:
1297 return res;
1298 }
1299
1300 static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
1301 struct in6_addr *daddr,
1302 struct in6_addr *saddr,
1303 struct fl_acc_args *args)
1304 {
1305 struct flow_rule *frule;
1306 struct rt6_info *nrt = NULL;
1307 struct pol_chain *pol;
1308
1309 for (pol = rt6_pol_list; pol; pol = pol->next) {
1310 struct fib6_node *fn;
1311 struct rt6_info *sprt;
1312
1313 fn = fib6_lookup(pol->rules, daddr, saddr);
1314
1315 do {
1316 for (sprt = fn->leaf; sprt; sprt=sprt->u.next) {
1317 int res;
1318
1319 frule = sprt->rt6i_flowr;
1320 #if RT6_DEBUG >= 2
1321 if (frule == NULL) {
1322 printk(KERN_DEBUG "NULL flowr\n");
1323 goto error;
1324 }
1325 #endif
1326 res = frule->ops->accept(rt, sprt, args, &nrt);
1327
1328 switch (res) {
1329 case FLOWR_SELECT:
1330 goto found;
1331 case FLOWR_CLEAR:
1332 goto next_policy;
1333 case FLOWR_NODECISION:
1334 break;
1335 default:
1336 goto error;
1337 };
1338 }
1339
1340 fn = fn->parent;
1341
1342 } while ((fn->fn_flags & RTN_TL_ROOT) == 0);
1343
1344 next_policy:
1345 }
1346
1347 error:
1348 dst_clone(&ip6_null_entry.u.dst);
1349 return &ip6_null_entry;
1350
1351 found:
1352 if (nrt == NULL)
1353 goto error;
1354
1355 nrt->rt6i_flags |= RTF_CACHE;
1356 dst_clone(&nrt->u.dst);
1357 err = rt6_ins(nrt);
1358 if (err)
1359 nrt->u.dst.error = err;
1360 return nrt;
1361 }
1362 #endif
1363
1364 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1365 {
1366 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1367 rt != &ip6_null_entry) {
1368 RT6_TRACE("deleted by ifdown %p\n", rt);
1369 return -1;
1370 }
1371 return 0;
1372 }
1373
1374 void rt6_ifdown(struct net_device *dev)
1375 {
1376 write_lock_bh(&rt6_lock);
1377 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1378 write_unlock_bh(&rt6_lock);
1379 }
1380
1381 struct rt6_mtu_change_arg
1382 {
1383 struct net_device *dev;
1384 unsigned mtu;
1385 };
1386
1387 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1388 {
1389 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1390
1391 /* In IPv6 pmtu discovery is not optional,
1392 so that RTAX_MTU lock cannot disable it.
1393 We still use this lock to block changes
1394 caused by addrconf/ndisc.
1395 */
1396 if (rt->rt6i_dev == arg->dev &&
1397 !(rt->u.dst.mxlock&(1<<RTAX_MTU)))
1398 rt->u.dst.pmtu = arg->mtu;
1399 rt->u.dst.advmss = max(arg->mtu - 60, ip6_rt_min_advmss);
1400 if (rt->u.dst.advmss > 65535-20)
1401 rt->u.dst.advmss = 65535;
1402 return 0;
1403 }
1404
1405 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1406 {
1407 struct rt6_mtu_change_arg arg;
1408
1409 arg.dev = dev;
1410 arg.mtu = mtu;
1411 read_lock_bh(&rt6_lock);
1412 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1413 read_unlock_bh(&rt6_lock);
1414 }
1415
1416 #ifdef CONFIG_RTNETLINK
1417
1418 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1419 struct in6_rtmsg *rtmsg)
1420 {
1421 memset(rtmsg, 0, sizeof(*rtmsg));
1422
1423 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1424 rtmsg->rtmsg_src_len = r->rtm_src_len;
1425 rtmsg->rtmsg_flags = RTF_UP;
1426 if (r->rtm_type == RTN_UNREACHABLE)
1427 rtmsg->rtmsg_flags |= RTF_REJECT;
1428
1429 if (rta[RTA_GATEWAY-1]) {
1430 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1431 return -EINVAL;
1432 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1433 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1434 }
1435 if (rta[RTA_DST-1]) {
1436 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1437 return -EINVAL;
1438 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1439 }
1440 if (rta[RTA_SRC-1]) {
1441 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1442 return -EINVAL;
1443 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1444 }
1445 if (rta[RTA_OIF-1]) {
1446 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1447 return -EINVAL;
1448 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1449 }
1450 if (rta[RTA_PRIORITY-1]) {
1451 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1452 return -EINVAL;
1453 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1454 }
1455 return 0;
1456 }
1457
1458 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1459 {
1460 struct rtmsg *r = NLMSG_DATA(nlh);
1461 struct in6_rtmsg rtmsg;
1462
1463 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1464 return -EINVAL;
1465 return ip6_route_del(&rtmsg);
1466 }
1467
1468 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1469 {
1470 struct rtmsg *r = NLMSG_DATA(nlh);
1471 struct in6_rtmsg rtmsg;
1472
1473 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1474 return -EINVAL;
1475 return ip6_route_add(&rtmsg);
1476 }
1477
1478 struct rt6_rtnl_dump_arg
1479 {
1480 struct sk_buff *skb;
1481 struct netlink_callback *cb;
1482 };
1483
1484 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1485 struct in6_addr *dst,
1486 struct in6_addr *src,
1487 int iif,
1488 int type, u32 pid, u32 seq)
1489 {
1490 struct rtmsg *rtm;
1491 struct nlmsghdr *nlh;
1492 unsigned char *b = skb->tail;
1493 struct rta_cacheinfo ci;
1494
1495 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1496 rtm = NLMSG_DATA(nlh);
1497 rtm->rtm_family = AF_INET6;
1498 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1499 rtm->rtm_src_len = rt->rt6i_src.plen;
1500 rtm->rtm_tos = 0;
1501 rtm->rtm_table = RT_TABLE_MAIN;
1502 if (rt->rt6i_flags&RTF_REJECT)
1503 rtm->rtm_type = RTN_UNREACHABLE;
1504 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1505 rtm->rtm_type = RTN_LOCAL;
1506 else
1507 rtm->rtm_type = RTN_UNICAST;
1508 rtm->rtm_flags = 0;
1509 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1510 rtm->rtm_protocol = RTPROT_BOOT;
1511 if (rt->rt6i_flags&RTF_DYNAMIC)
1512 rtm->rtm_protocol = RTPROT_REDIRECT;
1513 else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1514 rtm->rtm_protocol = RTPROT_KERNEL;
1515 else if (rt->rt6i_flags&RTF_DEFAULT)
1516 rtm->rtm_protocol = RTPROT_RA;
1517
1518 if (rt->rt6i_flags&RTF_CACHE)
1519 rtm->rtm_flags |= RTM_F_CLONED;
1520
1521 if (dst) {
1522 RTA_PUT(skb, RTA_DST, 16, dst);
1523 rtm->rtm_dst_len = 128;
1524 } else if (rtm->rtm_dst_len)
1525 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1526 #ifdef CONFIG_IPV6_SUBTREES
1527 if (src) {
1528 RTA_PUT(skb, RTA_SRC, 16, src);
1529 rtm->rtm_src_len = 128;
1530 } else if (rtm->rtm_src_len)
1531 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1532 #endif
1533 if (iif)
1534 RTA_PUT(skb, RTA_IIF, 4, &iif);
1535 else if (dst) {
1536 struct in6_addr saddr_buf;
1537 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1538 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1539 }
1540 if (rtnetlink_put_metrics(skb, &rt->u.dst.mxlock) < 0)
1541 goto rtattr_failure;
1542 if (rt->u.dst.neighbour)
1543 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1544 if (rt->u.dst.dev)
1545 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1546 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1547 ci.rta_lastuse = jiffies - rt->u.dst.lastuse;
1548 if (rt->rt6i_expires)
1549 ci.rta_expires = rt->rt6i_expires - jiffies;
1550 else
1551 ci.rta_expires = 0;
1552 ci.rta_used = rt->u.dst.__use;
1553 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1554 ci.rta_error = rt->u.dst.error;
1555 ci.rta_id = 0;
1556 ci.rta_ts = 0;
1557 ci.rta_tsage = 0;
1558 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1559 nlh->nlmsg_len = skb->tail - b;
1560 return skb->len;
1561
1562 nlmsg_failure:
1563 rtattr_failure:
1564 skb_trim(skb, b - skb->data);
1565 return -1;
1566 }
1567
1568 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1569 {
1570 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1571
1572 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1573 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq);
1574 }
1575
1576 static int fib6_dump_node(struct fib6_walker_t *w)
1577 {
1578 int res;
1579 struct rt6_info *rt;
1580
1581 for (rt = w->leaf; rt; rt = rt->u.next) {
1582 res = rt6_dump_route(rt, w->args);
1583 if (res < 0) {
1584 /* Frame is full, suspend walking */
1585 w->leaf = rt;
1586 return 1;
1587 }
1588 BUG_TRAP(res!=0);
1589 }
1590 w->leaf = NULL;
1591 return 0;
1592 }
1593
1594 static void fib6_dump_end(struct netlink_callback *cb)
1595 {
1596 struct fib6_walker_t *w = (void*)cb->args[0];
1597
1598 if (w) {
1599 cb->args[0] = 0;
1600 fib6_walker_unlink(w);
1601 kfree(w);
1602 }
1603 if (cb->args[1]) {
1604 cb->done = (void*)cb->args[1];
1605 cb->args[1] = 0;
1606 }
1607 }
1608
1609 static int fib6_dump_done(struct netlink_callback *cb)
1610 {
1611 fib6_dump_end(cb);
1612 return cb->done(cb);
1613 }
1614
1615 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1616 {
1617 struct rt6_rtnl_dump_arg arg;
1618 struct fib6_walker_t *w;
1619 int res;
1620
1621 arg.skb = skb;
1622 arg.cb = cb;
1623
1624 w = (void*)cb->args[0];
1625 if (w == NULL) {
1626 /* New dump:
1627 *
1628 * 1. hook callback destructor.
1629 */
1630 cb->args[1] = (long)cb->done;
1631 cb->done = fib6_dump_done;
1632
1633 /*
1634 * 2. allocate and initialize walker.
1635 */
1636 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1637 if (w == NULL)
1638 return -ENOMEM;
1639 RT6_TRACE("dump<%p", w);
1640 memset(w, 0, sizeof(*w));
1641 w->root = &ip6_routing_table;
1642 w->func = fib6_dump_node;
1643 w->args = &arg;
1644 cb->args[0] = (long)w;
1645 read_lock_bh(&rt6_lock);
1646 res = fib6_walk(w);
1647 read_unlock_bh(&rt6_lock);
1648 } else {
1649 w->args = &arg;
1650 read_lock_bh(&rt6_lock);
1651 res = fib6_walk_continue(w);
1652 read_unlock_bh(&rt6_lock);
1653 }
1654 #if RT6_DEBUG >= 3
1655 if (res <= 0 && skb->len == 0)
1656 RT6_TRACE("%p>dump end\n", w);
1657 #endif
1658 res = res < 0 ? res : skb->len;
1659 /* res < 0 is an error. (really, impossible)
1660 res == 0 means that dump is complete, but skb still can contain data.
1661 res > 0 dump is not complete, but frame is full.
1662 */
1663 /* Destroy walker, if dump of this table is complete. */
1664 if (res <= 0)
1665 fib6_dump_end(cb);
1666 return res;
1667 }
1668
1669 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1670 {
1671 struct rtattr **rta = arg;
1672 int iif = 0;
1673 int err;
1674 struct sk_buff *skb;
1675 struct flowi fl;
1676 struct rt6_info *rt;
1677
1678 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1679 if (skb == NULL)
1680 return -ENOBUFS;
1681
1682 /* Reserve room for dummy headers, this skb can pass
1683 through good chunk of routing engine.
1684 */
1685 skb->mac.raw = skb->data;
1686 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1687
1688 fl.proto = 0;
1689 fl.nl_u.ip6_u.daddr = NULL;
1690 fl.nl_u.ip6_u.saddr = NULL;
1691 fl.uli_u.icmpt.type = 0;
1692 fl.uli_u.icmpt.code = 0;
1693 if (rta[RTA_SRC-1])
1694 fl.nl_u.ip6_u.saddr = (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]);
1695 if (rta[RTA_DST-1])
1696 fl.nl_u.ip6_u.daddr = (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]);
1697
1698 if (rta[RTA_IIF-1])
1699 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1700
1701 if (iif) {
1702 struct net_device *dev;
1703 dev = __dev_get_by_index(iif);
1704 if (!dev)
1705 return -ENODEV;
1706 }
1707
1708 fl.oif = 0;
1709 if (rta[RTA_OIF-1])
1710 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1711
1712 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1713
1714 skb->dst = &rt->u.dst;
1715
1716 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1717 err = rt6_fill_node(skb, rt,
1718 fl.nl_u.ip6_u.daddr,
1719 fl.nl_u.ip6_u.saddr,
1720 iif,
1721 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq);
1722 if (err < 0)
1723 return -EMSGSIZE;
1724
1725 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1726 if (err < 0)
1727 return err;
1728 return 0;
1729 }
1730
1731 void inet6_rt_notify(int event, struct rt6_info *rt)
1732 {
1733 struct sk_buff *skb;
1734 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1735
1736 skb = alloc_skb(size, gfp_any());
1737 if (!skb) {
1738 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1739 return;
1740 }
1741 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0) < 0) {
1742 kfree_skb(skb);
1743 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1744 return;
1745 }
1746 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1747 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1748 }
1749
1750 #endif
1751
1752 /*
1753 * /proc
1754 */
1755
1756 #ifdef CONFIG_PROC_FS
1757
1758 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1759
1760 struct rt6_proc_arg
1761 {
1762 char *buffer;
1763 int offset;
1764 int length;
1765 int skip;
1766 int len;
1767 };
1768
1769 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1770 {
1771 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1772 int i;
1773
1774 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1775 arg->skip++;
1776 return 0;
1777 }
1778
1779 if (arg->len >= arg->length)
1780 return 0;
1781
1782 for (i=0; i<16; i++) {
1783 sprintf(arg->buffer + arg->len, "%02x",
1784 rt->rt6i_dst.addr.s6_addr[i]);
1785 arg->len += 2;
1786 }
1787 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1788 rt->rt6i_dst.plen);
1789
1790 #ifdef CONFIG_IPV6_SUBTREES
1791 for (i=0; i<16; i++) {
1792 sprintf(arg->buffer + arg->len, "%02x",
1793 rt->rt6i_src.addr.s6_addr[i]);
1794 arg->len += 2;
1795 }
1796 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1797 rt->rt6i_src.plen);
1798 #else
1799 sprintf(arg->buffer + arg->len,
1800 "00000000000000000000000000000000 00 ");
1801 arg->len += 36;
1802 #endif
1803
1804 if (rt->rt6i_nexthop) {
1805 for (i=0; i<16; i++) {
1806 sprintf(arg->buffer + arg->len, "%02x",
1807 rt->rt6i_nexthop->primary_key[i]);
1808 arg->len += 2;
1809 }
1810 } else {
1811 sprintf(arg->buffer + arg->len,
1812 "00000000000000000000000000000000");
1813 arg->len += 32;
1814 }
1815 arg->len += sprintf(arg->buffer + arg->len,
1816 " %08x %08x %08x %08x %8s\n",
1817 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1818 rt->u.dst.__use, rt->rt6i_flags,
1819 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1820 return 0;
1821 }
1822
1823 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1824 {
1825 struct rt6_proc_arg arg;
1826 arg.buffer = buffer;
1827 arg.offset = offset;
1828 arg.length = length;
1829 arg.skip = 0;
1830 arg.len = 0;
1831
1832 read_lock_bh(&rt6_lock);
1833 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1834 read_unlock_bh(&rt6_lock);
1835
1836 *start = buffer;
1837 if (offset)
1838 *start += offset % RT6_INFO_LEN;
1839
1840 arg.len -= offset % RT6_INFO_LEN;
1841
1842 if (arg.len > length)
1843 arg.len = length;
1844 if (arg.len < 0)
1845 arg.len = 0;
1846
1847 return arg.len;
1848 }
1849
1850 extern struct rt6_statistics rt6_stats;
1851
1852 static int rt6_proc_stats(char *buffer, char **start, off_t offset, int length)
1853 {
1854 int len;
1855
1856 len = sprintf(buffer, "%04x %04x %04x %04x %04x %04x\n",
1857 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1858 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1859 rt6_stats.fib_rt_cache,
1860 atomic_read(&ip6_dst_ops.entries));
1861
1862 len -= offset;
1863
1864 if (len > length)
1865 len = length;
1866 if(len < 0)
1867 len = 0;
1868
1869 *start = buffer + offset;
1870
1871 return len;
1872 }
1873 #endif /* CONFIG_PROC_FS */
1874
1875 #ifdef CONFIG_SYSCTL
1876
1877 static int flush_delay;
1878
1879 static
1880 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1881 void *buffer, size_t *lenp)
1882 {
1883 if (write) {
1884 proc_dointvec(ctl, write, filp, buffer, lenp);
1885 if (flush_delay < 0)
1886 flush_delay = 0;
1887 fib6_run_gc((unsigned long)flush_delay);
1888 return 0;
1889 } else
1890 return -EINVAL;
1891 }
1892
1893 ctl_table ipv6_route_table[] = {
1894 {NET_IPV6_ROUTE_FLUSH, "flush",
1895 &flush_delay, sizeof(int), 0644, NULL,
1896 &ipv6_sysctl_rtcache_flush},
1897 {NET_IPV6_ROUTE_GC_THRESH, "gc_thresh",
1898 &ip6_dst_ops.gc_thresh, sizeof(int), 0644, NULL,
1899 &proc_dointvec},
1900 {NET_IPV6_ROUTE_MAX_SIZE, "max_size",
1901 &ip6_rt_max_size, sizeof(int), 0644, NULL,
1902 &proc_dointvec},
1903 {NET_IPV6_ROUTE_GC_MIN_INTERVAL, "gc_min_interval",
1904 &ip6_rt_gc_min_interval, sizeof(int), 0644, NULL,
1905 &proc_dointvec_jiffies, &sysctl_jiffies},
1906 {NET_IPV6_ROUTE_GC_TIMEOUT, "gc_timeout",
1907 &ip6_rt_gc_timeout, sizeof(int), 0644, NULL,
1908 &proc_dointvec_jiffies, &sysctl_jiffies},
1909 {NET_IPV6_ROUTE_GC_INTERVAL, "gc_interval",
1910 &ip6_rt_gc_interval, sizeof(int), 0644, NULL,
1911 &proc_dointvec_jiffies, &sysctl_jiffies},
1912 {NET_IPV6_ROUTE_GC_ELASTICITY, "gc_elasticity",
1913 &ip6_rt_gc_elasticity, sizeof(int), 0644, NULL,
1914 &proc_dointvec_jiffies, &sysctl_jiffies},
1915 {NET_IPV6_ROUTE_MTU_EXPIRES, "mtu_expires",
1916 &ip6_rt_mtu_expires, sizeof(int), 0644, NULL,
1917 &proc_dointvec_jiffies, &sysctl_jiffies},
1918 {NET_IPV6_ROUTE_MIN_ADVMSS, "min_adv_mss",
1919 &ip6_rt_min_advmss, sizeof(int), 0644, NULL,
1920 &proc_dointvec_jiffies, &sysctl_jiffies},
1921 {0}
1922 };
1923
1924 #endif
1925
1926
1927 void __init ip6_route_init(void)
1928 {
1929 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
1930 sizeof(struct rt6_info),
1931 0, SLAB_HWCACHE_ALIGN,
1932 NULL, NULL);
1933 fib6_init();
1934 #ifdef CONFIG_PROC_FS
1935 proc_net_create("ipv6_route", 0, rt6_proc_info);
1936 proc_net_create("rt6_stats", 0, rt6_proc_stats);
1937 #endif
1938 }
1939
1940 #ifdef MODULE
1941 void ip6_route_cleanup(void)
1942 {
1943 #ifdef CONFIG_PROC_FS
1944 proc_net_remove("ipv6_route");
1945 proc_net_remove("rt6_stats");
1946 #endif
1947
1948 rt6_ifdown(NULL);
1949 fib6_gc_cleanup();
1950 }
1951 #endif /* MODULE */
1952
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.