1 /*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dhinds@allegro.stanford.edu>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * Alan Cox : device private ioctl copies fields back.
24 * Alan Cox : Transmit queue code does relevant stunts to
25 * keep the queue safe.
26 * Alan Cox : Fixed double lock.
27 * Alan Cox : Fixed promisc NULL pointer trap
28 * ???????? : Support the full private ioctl range
29 * Alan Cox : Moved ioctl permission check into drivers
30 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
31 * Alan Cox : 100 backlog just doesn't cut it when
32 * you start doing multicast video 8)
33 * Alan Cox : Rewrote net_bh and list manager.
34 * Alan Cox : Fix ETH_P_ALL echoback lengths.
35 * Alan Cox : Took out transmit every packet pass
36 * Saved a few bytes in the ioctl handler
37 * Alan Cox : Network driver sets packet type before calling netif_rx. Saves
38 * a function call a packet.
39 * Alan Cox : Hashed net_bh()
40 * Richard Kooijman: Timestamp fixes.
41 * Alan Cox : Wrong field in SIOCGIFDSTADDR
42 * Alan Cox : Device lock protection.
43 * Alan Cox : Fixed nasty side effect of device close changes.
44 * Rudi Cilibrasi : Pass the right thing to set_mac_address()
45 * Dave Miller : 32bit quantity for the device lock to make it work out
46 * on a Sparc.
47 * Bjorn Ekwall : Added KERNELD hack.
48 * Alan Cox : Cleaned up the backlog initialise.
49 * Craig Metz : SIOCGIFCONF fix if space for under
50 * 1 device.
51 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
52 * is no device open function.
53 * Andi Kleen : Fix error reporting for SIOCGIFCONF
54 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
55 * Cyrus Durgin : Cleaned for KMOD
56 * Adam Sulmicki : Bug Fix : Network Device Unload
57 * A network device unload needs to purge
58 * the backlog queue.
59 * Paul Rusty Russell : SIOCSIFNAME
60 * Pekka Riikonen : Netdev boot-time settings code
61 * Andrew Morton : Make unregister_netdevice wait indefinitely on dev->refcnt
62 * J Hadi Salim : - Backlog queue sampling
63 * - netif_rx() feedback
64 */
65
66 #include <asm/uaccess.h>
67 #include <asm/system.h>
68 #include <asm/bitops.h>
69 #include <linux/config.h>
70 #include <linux/types.h>
71 #include <linux/kernel.h>
72 #include <linux/sched.h>
73 #include <linux/string.h>
74 #include <linux/mm.h>
75 #include <linux/socket.h>
76 #include <linux/sockios.h>
77 #include <linux/errno.h>
78 #include <linux/interrupt.h>
79 #include <linux/if_ether.h>
80 #include <linux/netdevice.h>
81 #include <linux/etherdevice.h>
82 #include <linux/notifier.h>
83 #include <linux/skbuff.h>
84 #include <linux/brlock.h>
85 #include <net/sock.h>
86 #include <linux/rtnetlink.h>
87 #include <linux/proc_fs.h>
88 #include <linux/stat.h>
89 #include <linux/if_bridge.h>
90 #include <linux/divert.h>
91 #include <net/dst.h>
92 #include <net/pkt_sched.h>
93 #include <net/profile.h>
94 #include <linux/init.h>
95 #include <linux/kmod.h>
96 #include <linux/module.h>
97 #if defined(CONFIG_NET_RADIO) || defined(CONFIG_NET_PCMCIA_RADIO)
98 #include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
99 #endif /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */
100 #ifdef CONFIG_PLIP
101 extern int plip_init(void);
102 #endif
103
104 /* This define, if set, will randomly drop a packet when congestion
105 * is more than moderate. It helps fairness in the multi-interface
106 * case when one of them is a hog, but it kills performance for the
107 * single interface case so it is off now by default.
108 */
109 #undef RAND_LIE
110
111 /* Setting this will sample the queue lengths and thus congestion
112 * via a timer instead of as each packet is received.
113 */
114 #undef OFFLINE_SAMPLE
115
116 NET_PROFILE_DEFINE(dev_queue_xmit)
117 NET_PROFILE_DEFINE(softnet_process)
118
119 const char *if_port_text[] = {
120 "unknown",
121 "BNC",
122 "10baseT",
123 "AUI",
124 "100baseT",
125 "100baseTX",
126 "100baseFX"
127 };
128
129 /*
130 * The list of packet types we will receive (as opposed to discard)
131 * and the routines to invoke.
132 *
133 * Why 16. Because with 16 the only overlap we get on a hash of the
134 * low nibble of the protocol value is RARP/SNAP/X.25.
135 *
136 * 0800 IP
137 * 0001 802.3
138 * 0002 AX.25
139 * 0004 802.2
140 * 8035 RARP
141 * 0005 SNAP
142 * 0805 X.25
143 * 0806 ARP
144 * 8137 IPX
145 * 0009 Localtalk
146 * 86DD IPv6
147 */
148
149 static struct packet_type *ptype_base[16]; /* 16 way hashed list */
150 static struct packet_type *ptype_all = NULL; /* Taps */
151
152 #ifdef OFFLINE_SAMPLE
153 static void sample_queue(unsigned long dummy);
154 static struct timer_list samp_timer = { function: sample_queue };
155 #endif
156
157 #ifdef CONFIG_HOTPLUG
158 static int net_run_sbin_hotplug(struct net_device *dev, char *action);
159 #else
160 #define net_run_sbin_hotplug(dev, action) ({ 0; })
161 #endif
162
163 /*
164 * Our notifier list
165 */
166
167 static struct notifier_block *netdev_chain=NULL;
168
169 /*
170 * Device drivers call our routines to queue packets here. We empty the
171 * queue in the local softnet handler.
172 */
173 struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned;
174
175 #ifdef CONFIG_NET_FASTROUTE
176 int netdev_fastroute;
177 int netdev_fastroute_obstacles;
178 #endif
179
180
181 /******************************************************************************************
182
183 Protocol management and registration routines
184
185 *******************************************************************************************/
186
187 /*
188 * For efficiency
189 */
190
191 int netdev_nit=0;
192
193 /*
194 * Add a protocol ID to the list. Now that the input handler is
195 * smarter we can dispense with all the messy stuff that used to be
196 * here.
197 *
198 * BEWARE!!! Protocol handlers, mangling input packets,
199 * MUST BE last in hash buckets and checking protocol handlers
200 * MUST start from promiscous ptype_all chain in net_bh.
201 * It is true now, do not change it.
202 * Explantion follows: if protocol handler, mangling packet, will
203 * be the first on list, it is not able to sense, that packet
204 * is cloned and should be copied-on-write, so that it will
205 * change it and subsequent readers will get broken packet.
206 * --ANK (980803)
207 */
208
209 /**
210 * dev_add_pack - add packet handler
211 * @pt: packet type declaration
212 *
213 * Add a protocol handler to the networking stack. The passed &packet_type
214 * is linked into kernel lists and may not be freed until it has been
215 * removed from the kernel lists.
216 */
217
218 void dev_add_pack(struct packet_type *pt)
219 {
220 int hash;
221
222 br_write_lock_bh(BR_NETPROTO_LOCK);
223
224 #ifdef CONFIG_NET_FASTROUTE
225 /* Hack to detect packet socket */
226 if (pt->data) {
227 netdev_fastroute_obstacles++;
228 dev_clear_fastroute(pt->dev);
229 }
230 #endif
231 if (pt->type == htons(ETH_P_ALL)) {
232 netdev_nit++;
233 pt->next=ptype_all;
234 ptype_all=pt;
235 } else {
236 hash=ntohs(pt->type)&15;
237 pt->next = ptype_base[hash];
238 ptype_base[hash] = pt;
239 }
240 br_write_unlock_bh(BR_NETPROTO_LOCK);
241 }
242
243
244 /**
245 * dev_remove_pack - remove packet handler
246 * @pt: packet type declaration
247 *
248 * Remove a protocol handler that was previously added to the kernel
249 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
250 * from the kernel lists and can be freed or reused once this function
251 * returns.
252 */
253
254 void dev_remove_pack(struct packet_type *pt)
255 {
256 struct packet_type **pt1;
257
258 br_write_lock_bh(BR_NETPROTO_LOCK);
259
260 if (pt->type == htons(ETH_P_ALL)) {
261 netdev_nit--;
262 pt1=&ptype_all;
263 } else {
264 pt1=&ptype_base[ntohs(pt->type)&15];
265 }
266
267 for (; (*pt1) != NULL; pt1 = &((*pt1)->next)) {
268 if (pt == (*pt1)) {
269 *pt1 = pt->next;
270 #ifdef CONFIG_NET_FASTROUTE
271 if (pt->data)
272 netdev_fastroute_obstacles--;
273 #endif
274 br_write_unlock_bh(BR_NETPROTO_LOCK);
275 return;
276 }
277 }
278 br_write_unlock_bh(BR_NETPROTO_LOCK);
279 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
280 }
281
282 /******************************************************************************
283
284 Device Boot-time Settings Routines
285
286 *******************************************************************************/
287
288 /* Boot time configuration table */
289 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
290
291 /**
292 * netdev_boot_setup_add - add new setup entry
293 * @name: name of the device
294 * @map: configured settings for the device
295 *
296 * Adds new setup entry to the dev_boot_setup list. The function
297 * returns 0 on error and 1 on success. This is a generic routine to
298 * all netdevices.
299 */
300 int netdev_boot_setup_add(char *name, struct ifmap *map)
301 {
302 struct netdev_boot_setup *s;
303 int i;
304
305 s = dev_boot_setup;
306 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
307 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
308 memset(s[i].name, 0, sizeof(s[i].name));
309 strcpy(s[i].name, name);
310 memcpy(&s[i].map, map, sizeof(s[i].map));
311 break;
312 }
313 }
314
315 if (i >= NETDEV_BOOT_SETUP_MAX)
316 return 0;
317
318 return 1;
319 }
320
321 /**
322 * netdev_boot_setup_check - check boot time settings
323 * @dev: the netdevice
324 *
325 * Check boot time settings for the device.
326 * The found settings are set for the device to be used
327 * later in the device probing.
328 * Returns 0 if no settings found, 1 if they are.
329 */
330 int netdev_boot_setup_check(struct net_device *dev)
331 {
332 struct netdev_boot_setup *s;
333 int i;
334
335 s = dev_boot_setup;
336 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
337 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
338 !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
339 dev->irq = s[i].map.irq;
340 dev->base_addr = s[i].map.base_addr;
341 dev->mem_start = s[i].map.mem_start;
342 dev->mem_end = s[i].map.mem_end;
343 return 1;
344 }
345 }
346 return 0;
347 }
348
349 /*
350 * Saves at boot time configured settings for any netdevice.
351 */
352 static int __init netdev_boot_setup(char *str)
353 {
354 int ints[5];
355 struct ifmap map;
356
357 str = get_options(str, ARRAY_SIZE(ints), ints);
358 if (!str || !*str)
359 return 0;
360
361 /* Save settings */
362 memset(&map, -1, sizeof(map));
363 if (ints[0] > 0)
364 map.irq = ints[1];
365 if (ints[0] > 1)
366 map.base_addr = ints[2];
367 if (ints[0] > 2)
368 map.mem_start = ints[3];
369 if (ints[0] > 3)
370 map.mem_end = ints[4];
371
372 /* Add new entry to the list */
373 return netdev_boot_setup_add(str, &map);
374 }
375
376 __setup("netdev=", netdev_boot_setup);
377
378 /*****************************************************************************************
379
380 Device Interface Subroutines
381
382 ******************************************************************************************/
383
384 /**
385 * __dev_get_by_name - find a device by its name
386 * @name: name to find
387 *
388 * Find an interface by name. Must be called under RTNL semaphore
389 * or @dev_base_lock. If the name is found a pointer to the device
390 * is returned. If the name is not found then %NULL is returned. The
391 * reference counters are not incremented so the caller must be
392 * careful with locks.
393 */
394
395
396 struct net_device *__dev_get_by_name(const char *name)
397 {
398 struct net_device *dev;
399
400 for (dev = dev_base; dev != NULL; dev = dev->next) {
401 if (strcmp(dev->name, name) == 0)
402 return dev;
403 }
404 return NULL;
405 }
406
407 /**
408 * dev_get_by_name - find a device by its name
409 * @name: name to find
410 *
411 * Find an interface by name. This can be called from any
412 * context and does its own locking. The returned handle has
413 * the usage count incremented and the caller must use dev_put() to
414 * release it when it is no longer needed. %NULL is returned if no
415 * matching device is found.
416 */
417
418 struct net_device *dev_get_by_name(const char *name)
419 {
420 struct net_device *dev;
421
422 read_lock(&dev_base_lock);
423 dev = __dev_get_by_name(name);
424 if (dev)
425 dev_hold(dev);
426 read_unlock(&dev_base_lock);
427 return dev;
428 }
429
430 /*
431 Return value is changed to int to prevent illegal usage in future.
432 It is still legal to use to check for device existance.
433
434 User should understand, that the result returned by this function
435 is meaningless, if it was not issued under rtnl semaphore.
436 */
437
438 /**
439 * dev_get - test if a device exists
440 * @name: name to test for
441 *
442 * Test if a name exists. Returns true if the name is found. In order
443 * to be sure the name is not allocated or removed during the test the
444 * caller must hold the rtnl semaphore.
445 *
446 * This function primarily exists for back compatibility with older
447 * drivers.
448 */
449
450 int dev_get(const char *name)
451 {
452 struct net_device *dev;
453
454 read_lock(&dev_base_lock);
455 dev = __dev_get_by_name(name);
456 read_unlock(&dev_base_lock);
457 return dev != NULL;
458 }
459
460 /**
461 * __dev_get_by_index - find a device by its ifindex
462 * @ifindex: index of device
463 *
464 * Search for an interface by index. Returns %NULL if the device
465 * is not found or a pointer to the device. The device has not
466 * had its reference counter increased so the caller must be careful
467 * about locking. The caller must hold either the RTNL semaphore
468 * or @dev_base_lock.
469 */
470
471 struct net_device * __dev_get_by_index(int ifindex)
472 {
473 struct net_device *dev;
474
475 for (dev = dev_base; dev != NULL; dev = dev->next) {
476 if (dev->ifindex == ifindex)
477 return dev;
478 }
479 return NULL;
480 }
481
482
483 /**
484 * dev_get_by_index - find a device by its ifindex
485 * @ifindex: index of device
486 *
487 * Search for an interface by index. Returns NULL if the device
488 * is not found or a pointer to the device. The device returned has
489 * had a reference added and the pointer is safe until the user calls
490 * dev_put to indicate they have finished with it.
491 */
492
493 struct net_device * dev_get_by_index(int ifindex)
494 {
495 struct net_device *dev;
496
497 read_lock(&dev_base_lock);
498 dev = __dev_get_by_index(ifindex);
499 if (dev)
500 dev_hold(dev);
501 read_unlock(&dev_base_lock);
502 return dev;
503 }
504
505 /**
506 * dev_getbyhwaddr - find a device by its hardware addres
507 * @type: media type of device
508 * @ha: hardware address
509 *
510 * Search for an interface by MAC address. Returns NULL if the device
511 * is not found or a pointer to the device. The caller must hold the
512 * rtnl semaphore. The returned device has not had its ref count increased
513 * and the caller must therefore be careful about locking
514 *
515 * BUGS:
516 * If the API was consistent this would be __dev_get_by_hwaddr
517 */
518
519 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
520 {
521 struct net_device *dev;
522
523 ASSERT_RTNL();
524
525 for (dev = dev_base; dev != NULL; dev = dev->next) {
526 if (dev->type == type &&
527 memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
528 return dev;
529 }
530 return NULL;
531 }
532
533 /**
534 * dev_alloc_name - allocate a name for a device
535 * @dev: device
536 * @name: name format string
537 *
538 * Passed a format string - eg "lt%d" it will try and find a suitable
539 * id. Not efficient for many devices, not called a lot. The caller
540 * must hold the dev_base or rtnl lock while allocating the name and
541 * adding the device in order to avoid duplicates. Returns the number
542 * of the unit assigned or a negative errno code.
543 */
544
545 int dev_alloc_name(struct net_device *dev, const char *name)
546 {
547 int i;
548 char buf[32];
549
550 /*
551 * If you need over 100 please also fix the algorithm...
552 */
553 for (i = 0; i < 100; i++) {
554 sprintf(buf,name,i);
555 if (__dev_get_by_name(buf) == NULL) {
556 strcpy(dev->name, buf);
557 return i;
558 }
559 }
560 return -ENFILE; /* Over 100 of the things .. bail out! */
561 }
562
563 /**
564 * dev_alloc - allocate a network device and name
565 * @name: name format string
566 * @err: error return pointer
567 *
568 * Passed a format string, eg. "lt%d", it will allocate a network device
569 * and space for the name. %NULL is returned if no memory is available.
570 * If the allocation succeeds then the name is assigned and the
571 * device pointer returned. %NULL is returned if the name allocation
572 * failed. The cause of an error is returned as a negative errno code
573 * in the variable @err points to.
574 *
575 * The caller must hold the @dev_base or RTNL locks when doing this in
576 * order to avoid duplicate name allocations.
577 */
578
579 struct net_device *dev_alloc(const char *name, int *err)
580 {
581 struct net_device *dev=kmalloc(sizeof(struct net_device), GFP_KERNEL);
582 if (dev == NULL) {
583 *err = -ENOBUFS;
584 return NULL;
585 }
586 memset(dev, 0, sizeof(struct net_device));
587 *err = dev_alloc_name(dev, name);
588 if (*err < 0) {
589 kfree(dev);
590 return NULL;
591 }
592 return dev;
593 }
594
595 /**
596 * netdev_state_change - device changes state
597 * @dev: device to cause notification
598 *
599 * Called to indicate a device has changed state. This function calls
600 * the notifier chains for netdev_chain and sends a NEWLINK message
601 * to the routing socket.
602 */
603
604 void netdev_state_change(struct net_device *dev)
605 {
606 if (dev->flags&IFF_UP) {
607 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
608 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
609 }
610 }
611
612
613 #ifdef CONFIG_KMOD
614
615 /**
616 * dev_load - load a network module
617 * @name: name of interface
618 *
619 * If a network interface is not present and the process has suitable
620 * privileges this function loads the module. If module loading is not
621 * available in this kernel then it becomes a nop.
622 */
623
624 void dev_load(const char *name)
625 {
626 if (!dev_get(name) && capable(CAP_SYS_MODULE))
627 request_module(name);
628 }
629
630 #else
631
632 extern inline void dev_load(const char *unused){;}
633
634 #endif
635
636 static int default_rebuild_header(struct sk_buff *skb)
637 {
638 printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!");
639 kfree_skb(skb);
640 return 1;
641 }
642
643 /**
644 * dev_open - prepare an interface for use.
645 * @dev: device to open
646 *
647 * Takes a device from down to up state. The device's private open
648 * function is invoked and then the multicast lists are loaded. Finally
649 * the device is moved into the up state and a %NETDEV_UP message is
650 * sent to the netdev notifier chain.
651 *
652 * Calling this function on an active interface is a nop. On a failure
653 * a negative errno code is returned.
654 */
655
656 int dev_open(struct net_device *dev)
657 {
658 int ret = 0;
659
660 /*
661 * Is it already up?
662 */
663
664 if (dev->flags&IFF_UP)
665 return 0;
666
667 /*
668 * Is it even present?
669 */
670 if (!netif_device_present(dev))
671 return -ENODEV;
672
673 /*
674 * Call device private open method
675 */
676 if (try_inc_mod_count(dev->owner)) {
677 if (dev->open) {
678 ret = dev->open(dev);
679 if (ret != 0 && dev->owner)
680 __MOD_DEC_USE_COUNT(dev->owner);
681 }
682 } else {
683 ret = -ENODEV;
684 }
685
686 /*
687 * If it went open OK then:
688 */
689
690 if (ret == 0)
691 {
692 /*
693 * Set the flags.
694 */
695 dev->flags |= IFF_UP;
696
697 set_bit(__LINK_STATE_START, &dev->state);
698
699 /*
700 * Initialize multicasting status
701 */
702 dev_mc_upload(dev);
703
704 /*
705 * Wakeup transmit queue engine
706 */
707 dev_activate(dev);
708
709 /*
710 * ... and announce new interface.
711 */
712 notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
713 }
714 return(ret);
715 }
716
717 #ifdef CONFIG_NET_FASTROUTE
718
719 static void dev_do_clear_fastroute(struct net_device *dev)
720 {
721 if (dev->accept_fastpath) {
722 int i;
723
724 for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++) {
725 struct dst_entry *dst;
726
727 write_lock_irq(&dev->fastpath_lock);
728 dst = dev->fastpath[i];
729 dev->fastpath[i] = NULL;
730 write_unlock_irq(&dev->fastpath_lock);
731
732 dst_release(dst);
733 }
734 }
735 }
736
737 void dev_clear_fastroute(struct net_device *dev)
738 {
739 if (dev) {
740 dev_do_clear_fastroute(dev);
741 } else {
742 read_lock(&dev_base_lock);
743 for (dev = dev_base; dev; dev = dev->next)
744 dev_do_clear_fastroute(dev);
745 read_unlock(&dev_base_lock);
746 }
747 }
748 #endif
749
750 /**
751 * dev_close - shutdown an interface.
752 * @dev: device to shutdown
753 *
754 * This function moves an active device into down state. A
755 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
756 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
757 * chain.
758 */
759
760 int dev_close(struct net_device *dev)
761 {
762 if (!(dev->flags&IFF_UP))
763 return 0;
764
765 /*
766 * Tell people we are going down, so that they can
767 * prepare to death, when device is still operating.
768 */
769 notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
770
771 dev_deactivate(dev);
772
773 clear_bit(__LINK_STATE_START, &dev->state);
774
775 /*
776 * Call the device specific close. This cannot fail.
777 * Only if device is UP
778 *
779 * We allow it to be called even after a DETACH hot-plug
780 * event.
781 */
782
783 if (dev->stop)
784 dev->stop(dev);
785
786 /*
787 * Device is now down.
788 */
789
790 dev->flags &= ~IFF_UP;
791 #ifdef CONFIG_NET_FASTROUTE
792 dev_clear_fastroute(dev);
793 #endif
794
795 /*
796 * Tell people we are down
797 */
798 notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
799
800 /*
801 * Drop the module refcount
802 */
803 if (dev->owner)
804 __MOD_DEC_USE_COUNT(dev->owner);
805
806 return(0);
807 }
808
809
810 /*
811 * Device change register/unregister. These are not inline or static
812 * as we export them to the world.
813 */
814
815 /**
816 * register_netdevice_notifier - register a network notifier block
817 * @nb: notifier
818 *
819 * Register a notifier to be called when network device events occur.
820 * The notifier passed is linked into the kernel structures and must
821 * not be reused until it has been unregistered. A negative errno code
822 * is returned on a failure.
823 */
824
825 int register_netdevice_notifier(struct notifier_block *nb)
826 {
827 return notifier_chain_register(&netdev_chain, nb);
828 }
829
830 /**
831 * unregister_netdevice_notifier - unregister a network notifier block
832 * @nb: notifier
833 *
834 * Unregister a notifier previously registered by
835 * register_netdevice_notifier(). The notifier is unlinked into the
836 * kernel structures and may then be reused. A negative errno code
837 * is returned on a failure.
838 */
839
840 int unregister_netdevice_notifier(struct notifier_block *nb)
841 {
842 return notifier_chain_unregister(&netdev_chain,nb);
843 }
844
845 /*
846 * Support routine. Sends outgoing frames to any network
847 * taps currently in use.
848 */
849
850 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
851 {
852 struct packet_type *ptype;
853 get_fast_time(&skb->stamp);
854
855 br_read_lock(BR_NETPROTO_LOCK);
856 for (ptype = ptype_all; ptype!=NULL; ptype = ptype->next)
857 {
858 /* Never send packets back to the socket
859 * they originated from - MvS (miquels@drinkel.ow.org)
860 */
861 if ((ptype->dev == dev || !ptype->dev) &&
862 ((struct sock *)ptype->data != skb->sk))
863 {
864 struct sk_buff *skb2;
865 if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL)
866 break;
867
868 /* skb->nh should be correctly
869 set by sender, so that the second statement is
870 just protection against buggy protocols.
871 */
872 skb2->mac.raw = skb2->data;
873
874 if (skb2->nh.raw < skb2->data || skb2->nh.raw >= skb2->tail) {
875 if (net_ratelimit())
876 printk(KERN_DEBUG "protocol %04x is buggy, dev %s\n", skb2->protocol, dev->name);
877 skb2->nh.raw = skb2->data;
878 if (dev->hard_header)
879 skb2->nh.raw += dev->hard_header_len;
880 }
881
882 skb2->h.raw = skb2->nh.raw;
883 skb2->pkt_type = PACKET_OUTGOING;
884 ptype->func(skb2, skb->dev, ptype);
885 }
886 }
887 br_read_unlock(BR_NETPROTO_LOCK);
888 }
889
890 /**
891 * dev_queue_xmit - transmit a buffer
892 * @skb: buffer to transmit
893 *
894 * Queue a buffer for transmission to a network device. The caller must
895 * have set the device and priority and built the buffer before calling this
896 * function. The function can be called from an interrupt.
897 *
898 * A negative errno code is returned on a failure. A success does not
899 * guarantee the frame will be transmitted as it may be dropped due
900 * to congestion or traffic shaping.
901 */
902
903 int dev_queue_xmit(struct sk_buff *skb)
904 {
905 struct net_device *dev = skb->dev;
906 struct Qdisc *q;
907
908 /* Grab device queue */
909 spin_lock_bh(&dev->queue_lock);
910 q = dev->qdisc;
911 if (q->enqueue) {
912 int ret = q->enqueue(skb, q);
913
914 qdisc_run(dev);
915
916 spin_unlock_bh(&dev->queue_lock);
917 return ret == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : ret;
918 }
919
920 /* The device has no queue. Common case for software devices:
921 loopback, all the sorts of tunnels...
922
923 Really, it is unlikely that xmit_lock protection is necessary here.
924 (f.e. loopback and IP tunnels are clean ignoring statistics counters.)
925 However, it is possible, that they rely on protection
926 made by us here.
927
928 Check this and shot the lock. It is not prone from deadlocks.
929 Either shot noqueue qdisc, it is even simpler 8)
930 */
931 if (dev->flags&IFF_UP) {
932 int cpu = smp_processor_id();
933
934 if (dev->xmit_lock_owner != cpu) {
935 spin_unlock(&dev->queue_lock);
936 spin_lock(&dev->xmit_lock);
937 dev->xmit_lock_owner = cpu;
938
939 if (!netif_queue_stopped(dev)) {
940 if (netdev_nit)
941 dev_queue_xmit_nit(skb,dev);
942
943 if (dev->hard_start_xmit(skb, dev) == 0) {
944 dev->xmit_lock_owner = -1;
945 spin_unlock_bh(&dev->xmit_lock);
946 return 0;
947 }
948 }
949 dev->xmit_lock_owner = -1;
950 spin_unlock_bh(&dev->xmit_lock);
951 if (net_ratelimit())
952 printk(KERN_DEBUG "Virtual device %s asks to queue packet!\n", dev->name);
953 kfree_skb(skb);
954 return -ENETDOWN;
955 } else {
956 /* Recursion is detected! It is possible, unfortunately */
957 if (net_ratelimit())
958 printk(KERN_DEBUG "Dead loop on virtual device %s, fix it urgently!\n", dev->name);
959 }
960 }
961 spin_unlock_bh(&dev->queue_lock);
962
963 kfree_skb(skb);
964 return -ENETDOWN;
965 }
966
967
968 /*=======================================================================
969 Receiver routines
970 =======================================================================*/
971
972 int netdev_max_backlog = 300;
973 /* These numbers are selected based on intuition and some
974 * experimentatiom, if you have more scientific way of doing this
975 * please go ahead and fix things.
976 */
977 int no_cong_thresh = 10;
978 int no_cong = 20;
979 int lo_cong = 100;
980 int mod_cong = 290;
981
982 struct netif_rx_stats netdev_rx_stat[NR_CPUS];
983
984
985 #ifdef CONFIG_NET_HW_FLOWCONTROL
986 atomic_t netdev_dropping = ATOMIC_INIT(0);
987 static unsigned long netdev_fc_mask = 1;
988 unsigned long netdev_fc_xoff = 0;
989 spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
990
991 static struct
992 {
993 void (*stimul)(struct net_device *);
994 struct net_device *dev;
995 } netdev_fc_slots[32];
996
997 int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev))
998 {
999 int bit = 0;
1000 unsigned long flags;
1001
1002 spin_lock_irqsave(&netdev_fc_lock, flags);
1003 if (netdev_fc_mask != ~0UL) {
1004 bit = ffz(netdev_fc_mask);
1005 netdev_fc_slots[bit].stimul = stimul;
1006 netdev_fc_slots[bit].dev = dev;
1007 set_bit(bit, &netdev_fc_mask);
1008 clear_bit(bit, &netdev_fc_xoff);
1009 }
1010 spin_unlock_irqrestore(&netdev_fc_lock, flags);
1011 return bit;
1012 }
1013
1014 void netdev_unregister_fc(int bit)
1015 {
1016 unsigned long flags;
1017
1018 spin_lock_irqsave(&netdev_fc_lock, flags);
1019 if (bit > 0) {
1020 netdev_fc_slots[bit].stimul = NULL;
1021 netdev_fc_slots[bit].dev = NULL;
1022 clear_bit(bit, &netdev_fc_mask);
1023 clear_bit(bit, &netdev_fc_xoff);
1024 }
1025 spin_unlock_irqrestore(&netdev_fc_lock, flags);
1026 }
1027
1028 static void netdev_wakeup(void)
1029 {
1030 unsigned long xoff;
1031
1032 spin_lock(&netdev_fc_lock);
1033 xoff = netdev_fc_xoff;
1034 netdev_fc_xoff = 0;
1035 while (xoff) {
1036 int i = ffz(~xoff);
1037 xoff &= ~(1<<i);
1038 netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
1039 }
1040 spin_unlock(&netdev_fc_lock);
1041 }
1042 #endif
1043
1044 static void get_sample_stats(int cpu)
1045 {
1046 #ifdef RAND_LIE
1047 unsigned long rd;
1048 int rq;
1049 #endif
1050 int blog = softnet_data[cpu].input_pkt_queue.qlen;
1051 int avg_blog = softnet_data[cpu].avg_blog;
1052
1053 avg_blog = (avg_blog >> 1)+ (blog >> 1);
1054
1055 if (avg_blog > mod_cong) {
1056 /* Above moderate congestion levels. */
1057 softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
1058 #ifdef RAND_LIE
1059 rd = net_random();
1060 rq = rd % netdev_max_backlog;
1061 if (rq < avg_blog) /* unlucky bastard */
1062 softnet_data[cpu].cng_level = NET_RX_DROP;
1063 #endif
1064 } else if (avg_blog > lo_cong) {
1065 softnet_data[cpu].cng_level = NET_RX_CN_MOD;
1066 #ifdef RAND_LIE
1067 rd = net_random();
1068 rq = rd % netdev_max_backlog;
1069 if (rq < avg_blog) /* unlucky bastard */
1070 softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
1071 #endif
1072 } else if (avg_blog > no_cong)
1073 softnet_data[cpu].cng_level = NET_RX_CN_LOW;
1074 else /* no congestion */
1075 softnet_data[cpu].cng_level = NET_RX_SUCCESS;
1076
1077 softnet_data[cpu].avg_blog = avg_blog;
1078 }
1079
1080 #ifdef OFFLINE_SAMPLE
1081 static void sample_queue(unsigned long dummy)
1082 {
1083 /* 10 ms 0r 1ms -- i dont care -- JHS */
1084 int next_tick = 1;
1085 int cpu = smp_processor_id();
1086
1087 get_sample_stats(cpu);
1088 next_tick += jiffies;
1089 mod_timer(&samp_timer, next_tick);
1090 }
1091 #endif
1092
1093
1094 /**
1095 * netif_rx - post buffer to the network code
1096 * @skb: buffer to post
1097 *
1098 * This function receives a packet from a device driver and queues it for
1099 * the upper (protocol) levels to process. It always succeeds. The buffer
1100 * may be dropped during processing for congestion control or by the
1101 * protocol layers.
1102 *
1103 * return values:
1104 * NET_RX_SUCCESS (no congestion)
1105 * NET_RX_CN_LOW (low congestion)
1106 * NET_RX_CN_MOD (moderate congestion)
1107 * NET_RX_CN_HIGH (high congestion)
1108 * NET_RX_DROP (packet was dropped)
1109 *
1110 *
1111 */
1112
1113 int netif_rx(struct sk_buff *skb)
1114 {
1115 int this_cpu = smp_processor_id();
1116 struct softnet_data *queue;
1117 unsigned long flags;
1118
1119 if (skb->stamp.tv_sec == 0)
1120 get_fast_time(&skb->stamp);
1121
1122 /* The code is rearranged so that the path is the most
1123 short when CPU is congested, but is still operating.
1124 */
1125 queue = &softnet_data[this_cpu];
1126
1127 local_irq_save(flags);
1128
1129 netdev_rx_stat[this_cpu].total++;
1130 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1131 if (queue->input_pkt_queue.qlen) {
1132 if (queue->throttle)
1133 goto drop;
1134
1135 enqueue:
1136 dev_hold(skb->dev);
1137 __skb_queue_tail(&queue->input_pkt_queue,skb);
1138 __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
1139 local_irq_restore(flags);
1140 #ifndef OFFLINE_SAMPLE
1141 get_sample_stats(this_cpu);
1142 #endif
1143 return softnet_data[this_cpu].cng_level;
1144 }
1145
1146 if (queue->throttle) {
1147 queue->throttle = 0;
1148 #ifdef CONFIG_NET_HW_FLOWCONTROL
1149 if (atomic_dec_and_test(&netdev_dropping))
1150 netdev_wakeup();
1151 #endif
1152 }
1153 goto enqueue;
1154 }
1155
1156 if (queue->throttle == 0) {
1157 queue->throttle = 1;
1158 netdev_rx_stat[this_cpu].throttled++;
1159 #ifdef CONFIG_NET_HW_FLOWCONTROL
1160 atomic_inc(&netdev_dropping);
1161 #endif
1162 }
1163
1164 drop:
1165 netdev_rx_stat[this_cpu].dropped++;
1166 local_irq_restore(flags);
1167
1168 kfree_skb(skb);
1169 return NET_RX_DROP;
1170 }
1171
1172 /* Deliver skb to an old protocol, which is not threaded well
1173 or which do not understand shared skbs.
1174 */
1175 static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
1176 {
1177 static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
1178 int ret = NET_RX_DROP;
1179
1180
1181 if (!last) {
1182 skb = skb_clone(skb, GFP_ATOMIC);
1183 if (skb == NULL)
1184 return ret;
1185 }
1186
1187 /* The assumption (correct one) is that old protocols
1188 did not depened on BHs different of NET_BH and TIMER_BH.
1189 */
1190
1191 /* Emulate NET_BH with special spinlock */
1192 spin_lock(&net_bh_lock);
1193
1194 /* Disable timers and wait for all timers completion */
1195 tasklet_disable(bh_task_vec+TIMER_BH);
1196
1197 ret = pt->func(skb, skb->dev, pt);
1198
1199 tasklet_enable(bh_task_vec+TIMER_BH);
1200 spin_unlock(&net_bh_lock);
1201 return ret;
1202 }
1203
1204 /* Reparent skb to master device. This function is called
1205 * only from net_rx_action under BR_NETPROTO_LOCK. It is misuse
1206 * of BR_NETPROTO_LOCK, but it is OK for now.
1207 */
1208 static __inline__ void skb_bond(struct sk_buff *skb)
1209 {
1210 struct net_device *dev = skb->dev;
1211
1212 if (dev->master) {
1213 dev_hold(dev->master);
1214 skb->dev = dev->master;
1215 dev_put(dev);
1216 }
1217 }
1218
1219 static void net_tx_action(struct softirq_action *h)
1220 {
1221 int cpu = smp_processor_id();
1222
1223 if (softnet_data[cpu].completion_queue) {
1224 struct sk_buff *clist;
1225
1226 local_irq_disable();
1227 clist = softnet_data[cpu].completion_queue;
1228 softnet_data[cpu].completion_queue = NULL;
1229 local_irq_enable();
1230
1231 while (clist != NULL) {
1232 struct sk_buff *skb = clist;
1233 clist = clist->next;
1234
1235 BUG_TRAP(atomic_read(&skb->users) == 0);
1236 __kfree_skb(skb);
1237 }
1238 }
1239
1240 if (softnet_data[cpu].output_queue) {
1241 struct net_device *head;
1242
1243 local_irq_disable();
1244 head = softnet_data[cpu].output_queue;
1245 softnet_data[cpu].output_queue = NULL;
1246 local_irq_enable();
1247
1248 while (head != NULL) {
1249 struct net_device *dev = head;
1250 head = head->next_sched;
1251
1252 smp_mb__before_clear_bit();
1253 clear_bit(__LINK_STATE_SCHED, &dev->state);
1254
1255 if (spin_trylock(&dev->queue_lock)) {
1256 qdisc_run(dev);
1257 spin_unlock(&dev->queue_lock);
1258 } else {
1259 netif_schedule(dev);
1260 }
1261 }
1262 }
1263 }
1264
1265 /**
1266 * net_call_rx_atomic
1267 * @fn: function to call
1268 *
1269 * Make a function call that is atomic with respect to the protocol
1270 * layers.
1271 */
1272
1273 void net_call_rx_atomic(void (*fn)(void))
1274 {
1275 br_write_lock_bh(BR_NETPROTO_LOCK);
1276 fn();
1277 br_write_unlock_bh(BR_NETPROTO_LOCK);
1278 }
1279
1280 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1281 void (*br_handle_frame_hook)(struct sk_buff *skb) = NULL;
1282 #endif
1283
1284 static int __inline__ handle_bridge(struct sk_buff *skb,
1285 struct packet_type *pt_prev)
1286 {
1287 int ret = NET_RX_DROP;
1288
1289 if (pt_prev) {
1290 if (!pt_prev->data)
1291 ret = deliver_to_old_ones(pt_prev, skb, 0);
1292 else {
1293 atomic_inc(&skb->users);
1294 ret = pt_prev->func(skb, skb->dev, pt_prev);
1295 }
1296 }
1297
1298 br_handle_frame_hook(skb);
1299 return ret;
1300 }
1301
1302
1303 #ifdef CONFIG_NET_DIVERT
1304 static inline void handle_diverter(struct sk_buff *skb)
1305 {
1306 /* if diversion is supported on device, then divert */
1307 if (skb->dev->divert && skb->dev->divert->divert)
1308 divert_frame(skb);
1309 }
1310 #endif /* CONFIG_NET_DIVERT */
1311
1312
1313 static void net_rx_action(struct softirq_action *h)
1314 {
1315 int this_cpu = smp_processor_id();
1316 struct softnet_data *queue = &softnet_data[this_cpu];
1317 unsigned long start_time = jiffies;
1318 int bugdet = netdev_max_backlog;
1319
1320 br_read_lock(BR_NETPROTO_LOCK);
1321
1322 for (;;) {
1323 struct sk_buff *skb;
1324 struct net_device *rx_dev;
1325
1326 local_irq_disable();
1327 skb = __skb_dequeue(&queue->input_pkt_queue);
1328 local_irq_enable();
1329
1330 if (skb == NULL)
1331 break;
1332
1333 skb_bond(skb);
1334
1335 rx_dev = skb->dev;
1336
1337 #ifdef CONFIG_NET_FASTROUTE
1338 if (skb->pkt_type == PACKET_FASTROUTE) {
1339 netdev_rx_stat[this_cpu].fastroute_deferred_out++;
1340 dev_queue_xmit(skb);
1341 dev_put(rx_dev);
1342 continue;
1343 }
1344 #endif
1345 skb->h.raw = skb->nh.raw = skb->data;
1346 {
1347 struct packet_type *ptype, *pt_prev;
1348 unsigned short type = skb->protocol;
1349
1350 pt_prev = NULL;
1351 for (ptype = ptype_all; ptype; ptype = ptype->next) {
1352 if (!ptype->dev || ptype->dev == skb->dev) {
1353 if (pt_prev) {
1354 if (!pt_prev->data) {
1355 deliver_to_old_ones(pt_prev, skb, 0);
1356 } else {
1357 atomic_inc(&skb->users);
1358 pt_prev->func(skb,
1359 skb->dev,
1360 pt_prev);
1361 }
1362 }
1363 pt_prev = ptype;
1364 }
1365 }
1366
1367 #ifdef CONFIG_NET_DIVERT
1368 if (skb->dev->divert && skb->dev->divert->divert)
1369 handle_diverter(skb);
1370 #endif /* CONFIG_NET_DIVERT */
1371
1372
1373 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1374 if (skb->dev->br_port != NULL &&
1375 br_handle_frame_hook != NULL) {
1376 handle_bridge(skb, pt_prev);
1377 dev_put(rx_dev);
1378 continue;
1379 }
1380 #endif
1381
1382 for (ptype=ptype_base[ntohs(type)&15];ptype;ptype=ptype->next) {
1383 if (ptype->type == type &&
1384 (!ptype->dev || ptype->dev == skb->dev)) {
1385 if (pt_prev) {
1386 if (!pt_prev->data)
1387 deliver_to_old_ones(pt_prev, skb, 0);
1388 else {
1389 atomic_inc(&skb->users);
1390 pt_prev->func(skb,
1391 skb->dev,
1392 pt_prev);
1393 }
1394 }
1395 pt_prev = ptype;
1396 }
1397 }
1398
1399 if (pt_prev) {
1400 if (!pt_prev->data)
1401 deliver_to_old_ones(pt_prev, skb, 1);
1402 else
1403 pt_prev->func(skb, skb->dev, pt_prev);
1404 } else
1405 kfree_skb(skb);
1406 }
1407
1408 dev_put(rx_dev);
1409
1410 if (bugdet-- < 0 || jiffies - start_time > 1)
1411 goto softnet_break;
1412
1413 #ifdef CONFIG_NET_HW_FLOWCONTROL
1414 if (queue->throttle && queue->input_pkt_queue.qlen < no_cong_thresh ) {
1415 if (atomic_dec_and_test(&netdev_dropping)) {
1416 queue->throttle = 0;
1417 netdev_wakeup();
1418 goto softnet_break;
1419 }
1420 }
1421 #endif
1422
1423 }
1424 br_read_unlock(BR_NETPROTO_LOCK);
1425
1426 local_irq_disable();
1427 if (queue->throttle) {
1428 queue->throttle = 0;
1429 #ifdef CONFIG_NET_HW_FLOWCONTROL
1430 if (atomic_dec_and_test(&netdev_dropping))
1431 netdev_wakeup();
1432 #endif
1433 }
1434 local_irq_enable();
1435
1436 NET_PROFILE_LEAVE(softnet_process);
1437 return;
1438
1439 softnet_break:
1440 br_read_unlock(BR_NETPROTO_LOCK);
1441
1442 local_irq_disable();
1443 netdev_rx_stat[this_cpu].time_squeeze++;
1444 __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
1445 local_irq_enable();
1446
1447 NET_PROFILE_LEAVE(softnet_process);
1448 return;
1449 }
1450
1451 static gifconf_func_t * gifconf_list [NPROTO];
1452
1453 /**
1454 * register_gifconf - register a SIOCGIF handler
1455 * @family: Address family
1456 * @gifconf: Function handler
1457 *
1458 * Register protocol dependent address dumping routines. The handler
1459 * that is passed must not be freed or reused until it has been replaced
1460 * by another handler.
1461 */
1462
1463 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1464 {
1465 if (family>=NPROTO)
1466 return -EINVAL;
1467 gifconf_list[family] = gifconf;
1468 return 0;
1469 }
1470
1471
1472 /*
1473 * Map an interface index to its name (SIOCGIFNAME)
1474 */
1475
1476 /*
1477 * We need this ioctl for efficient implementation of the
1478 * if_indextoname() function required by the IPv6 API. Without
1479 * it, we would have to search all the interfaces to find a
1480 * match. --pb
1481 */
1482
1483 static int dev_ifname(struct ifreq *arg)
1484 {
1485 struct net_device *dev;
1486 struct ifreq ifr;
1487
1488 /*
1489 * Fetch the caller's info block.
1490 */
1491
1492 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1493 return -EFAULT;
1494
1495 read_lock(&dev_base_lock);
1496 dev = __dev_get_by_index(ifr.ifr_ifindex);
1497 if (!dev) {
1498 read_unlock(&dev_base_lock);
1499 return -ENODEV;
1500 }
1501
1502 strcpy(ifr.ifr_name, dev->name);
1503 read_unlock(&dev_base_lock);
1504
1505 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1506 return -EFAULT;
1507 return 0;
1508 }
1509
1510 /*
1511 * Perform a SIOCGIFCONF call. This structure will change
1512 * size eventually, and there is nothing I can do about it.
1513 * Thus we will need a 'compatibility mode'.
1514 */
1515
1516 static int dev_ifconf(char *arg)
1517 {
1518 struct ifconf ifc;
1519 struct net_device *dev;
1520 char *pos;
1521 int len;
1522 int total;
1523 int i;
1524
1525 /*
1526 * Fetch the caller's info block.
1527 */
1528
1529 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1530 return -EFAULT;
1531
1532 pos = ifc.ifc_buf;
1533 len = ifc.ifc_len;
1534
1535 /*
1536 * Loop over the interfaces, and write an info block for each.
1537 */
1538
1539 total = 0;
1540 for (dev = dev_base; dev != NULL; dev = dev->next) {
1541 for (i=0; i<NPROTO; i++) {
1542 if (gifconf_list[i]) {
1543 int done;
1544 if (pos==NULL) {
1545 done = gifconf_list[i](dev, NULL, 0);
1546 } else {
1547 done = gifconf_list[i](dev, pos+total, len-total);
1548 }
1549 if (done<0) {
1550 return -EFAULT;
1551 }
1552 total += done;
1553 }
1554 }
1555 }
1556
1557 /*
1558 * All done. Write the updated control block back to the caller.
1559 */
1560 ifc.ifc_len = total;
1561
1562 if (copy_to_user(arg, &ifc, sizeof(struct ifconf)))
1563 return -EFAULT;
1564
1565 /*
1566 * Both BSD and Solaris return 0 here, so we do too.
1567 */
1568 return 0;
1569 }
1570
1571 /*
1572 * This is invoked by the /proc filesystem handler to display a device
1573 * in detail.
1574 */
1575
1576 #ifdef CONFIG_PROC_FS
1577
1578 static int sprintf_stats(char *buffer, struct net_device *dev)
1579 {
1580 struct net_device_stats *stats = (dev->get_stats ? dev->get_stats(dev): NULL);
1581 int size;
1582
1583 if (stats)
1584 size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu %8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
1585 dev->name,
1586 stats->rx_bytes,
1587 stats->rx_packets, stats->rx_errors,
1588 stats->rx_dropped + stats->rx_missed_errors,
1589 stats->rx_fifo_errors,
1590 stats->rx_length_errors + stats->rx_over_errors
1591 + stats->rx_crc_errors + stats->rx_frame_errors,
1592 stats->rx_compressed, stats->multicast,
1593 stats->tx_bytes,
1594 stats->tx_packets, stats->tx_errors, stats->tx_dropped,
1595 stats->tx_fifo_errors, stats->collisions,
1596 stats->tx_carrier_errors + stats->tx_aborted_errors
1597 + stats->tx_window_errors + stats->tx_heartbeat_errors,
1598 stats->tx_compressed);
1599 else
1600 size = sprintf(buffer, "%6s: No statistics available.\n", dev->name);
1601
1602 return size;
1603 }
1604
1605 /*
1606 * Called from the PROCfs module. This now uses the new arbitrary sized /proc/net interface
1607 * to create /proc/net/dev
1608 */
1609
1610 static int dev_get_info(char *buffer, char **start, off_t offset, int length)
1611 {
1612 int len = 0;
1613 off_t begin = 0;
1614 off_t pos = 0;
1615 int size;
1616 struct net_device *dev;
1617
1618
1619 size = sprintf(buffer,
1620 "Inter-| Receive | Transmit\n"
1621 " face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n");
1622
1623 pos += size;
1624 len += size;
1625
1626
1627 read_lock(&dev_base_lock);
1628 for (dev = dev_base; dev != NULL; dev = dev->next) {
1629 size = sprintf_stats(buffer+len, dev);
1630 len += size;
1631 pos = begin + len;
1632
1633 if (pos < offset) {
1634 len = 0;
1635 begin = pos;
1636 }
1637 if (pos > offset + length)
1638 break;
1639 }
1640 read_unlock(&dev_base_lock);
1641
1642 *start = buffer + (offset - begin); /* Start of wanted data */
1643 len -= (offset - begin); /* Start slop */
1644 if (len > length)
1645 len = length; /* Ending slop */
1646 if (len < 0)
1647 len = 0;
1648 return len;
1649 }
1650
1651 static int dev_proc_stats(char *buffer, char **start, off_t offset,
1652 int length, int *eof, void *data)
1653 {
1654 int i, lcpu;
1655 int len=0;
1656
1657 for (lcpu=0; lcpu<smp_num_cpus; lcpu++) {
1658 i = cpu_logical_map(lcpu);
1659 len += sprintf(buffer+len, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
1660 netdev_rx_stat[i].total,
1661 netdev_rx_stat[i].dropped,
1662 netdev_rx_stat[i].time_squeeze,
1663 netdev_rx_stat[i].throttled,
1664 netdev_rx_stat[i].fastroute_hit,
1665 netdev_rx_stat[i].fastroute_success,
1666 netdev_rx_stat[i].fastroute_defer,
1667 netdev_rx_stat[i].fastroute_deferred_out,
1668 #if 0
1669 netdev_rx_stat[i].fastroute_latency_reduction
1670 #else
1671 netdev_rx_stat[i].cpu_collision
1672 #endif
1673 );
1674 }
1675
1676 len -= offset;
1677
1678 if (len > length)
1679 len = length;
1680 if (len < 0)
1681 len = 0;
1682
1683 *start = buffer + offset;
1684 *eof = 1;
1685
1686 return len;
1687 }
1688
1689 #endif /* CONFIG_PROC_FS */
1690
1691
1692 #ifdef WIRELESS_EXT
1693 #ifdef CONFIG_PROC_FS
1694
1695 /*
1696 * Print one entry of /proc/net/wireless
1697 * This is a clone of /proc/net/dev (just above)
1698 */
1699 static int sprintf_wireless_stats(char *buffer, struct net_device *dev)
1700 {
1701 /* Get stats from the driver */
1702 struct iw_statistics *stats = (dev->get_wireless_stats ?
1703 dev->get_wireless_stats(dev) :
1704 (struct iw_statistics *) NULL);
1705 int size;
1706
1707 if (stats != (struct iw_statistics *) NULL) {
1708 size = sprintf(buffer,
1709 "%6s: %04x %3d%c %3d%c %3d%c %6d %6d %6d\n",
1710 dev->name,
1711 stats->status,
1712 stats->qual.qual,
1713 stats->qual.updated & 1 ? '.' : ' ',
1714 stats->qual.level,
1715 stats->qual.updated & 2 ? '.' : ' ',
1716 stats->qual.noise,
1717 stats->qual.updated & 4 ? '.' : ' ',
1718 stats->discard.nwid,
1719 stats->discard.code,
1720 stats->discard.misc);
1721 stats->qual.updated = 0;
1722 }
1723 else
1724 size = 0;
1725
1726 return size;
1727 }
1728
1729 /*
1730 * Print info for /proc/net/wireless (print all entries)
1731 * This is a clone of /proc/net/dev (just above)
1732 */
1733 static int dev_get_wireless_info(char * buffer, char **start, off_t offset,
1734 int length)
1735 {
1736 int len = 0;
1737 off_t begin = 0;
1738 off_t pos = 0;
1739 int size;
1740
1741 struct net_device * dev;
1742
1743 size = sprintf(buffer,
1744 "Inter-| sta-| Quality | Discarded packets\n"
1745 " face | tus | link level noise | nwid crypt misc\n"
1746 );
1747
1748 pos += size;
1749 len += size;
1750
1751 read_lock(&dev_base_lock);
1752 for (dev = dev_base; dev != NULL; dev = dev->next) {
1753 size = sprintf_wireless_stats(buffer + len, dev);
1754 len += size;
1755 pos = begin + len;
1756
1757 if (pos < offset) {
1758 len = 0;
1759 begin = pos;
1760 }
1761 if (pos > offset + length)
1762 break;
1763 }
1764 read_unlock(&dev_base_lock);
1765
1766 *start = buffer + (offset - begin); /* Start of wanted data */
1767 len -= (offset - begin); /* Start slop */
1768 if (len > length)
1769 len = length; /* Ending slop */
1770 if (len < 0)
1771 len = 0;
1772
1773 return len;
1774 }
1775 #endif /* CONFIG_PROC_FS */
1776 #endif /* WIRELESS_EXT */
1777
1778 /**
1779 * netdev_set_master - set up master/slave pair
1780 * @slave: slave device
1781 * @master: new master device
1782 *
1783 * Changes the master device of the slave. Pass %NULL to break the
1784 * bonding. The caller must hold the RTNL semaphore. On a failure
1785 * a negative errno code is returned. On success the reference counts
1786 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
1787 * function returns zero.
1788 */
1789
1790 int netdev_set_master(struct net_device *slave, struct net_device *master)
1791 {
1792 struct net_device *old = slave->master;
1793
1794 ASSERT_RTNL();
1795
1796 if (master) {
1797 if (old)
1798 return -EBUSY;
1799 dev_hold(master);
1800 }
1801
1802 br_write_lock_bh(BR_NETPROTO_LOCK);
1803 slave->master = master;
1804 br_write_unlock_bh(BR_NETPROTO_LOCK);
1805
1806 if (old)
1807 dev_put(old);
1808
1809 if (master)
1810 slave->flags |= IFF_SLAVE;
1811 else
1812 slave->flags &= ~IFF_SLAVE;
1813
1814 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
1815 return 0;
1816 }
1817
1818 /**
1819 * dev_set_promiscuity - update promiscuity count on a device
1820 * @dev: device
1821 * @inc: modifier
1822 *
1823 * Add or remove promsicuity from a device. While the count in the device
1824 * remains above zero the interface remains promiscuous. Once it hits zero
1825 * the device reverts back to normal filtering operation. A negative inc
1826 * value is used to drop promiscuity on the device.
1827 */
1828
1829 void dev_set_promiscuity(struct net_device *dev, int inc)
1830 {
1831 unsigned short old_flags = dev->flags;
1832
1833 dev->flags |= IFF_PROMISC;
1834 if ((dev->promiscuity += inc) == 0)
1835 dev->flags &= ~IFF_PROMISC;
1836 if (dev->flags^old_flags) {
1837 #ifdef CONFIG_NET_FASTROUTE
1838 if (dev->flags&IFF_PROMISC) {
1839 netdev_fastroute_obstacles++;
1840 dev_clear_fastroute(dev);
1841 } else
1842 netdev_fastroute_obstacles--;
1843 #endif
1844 dev_mc_upload(dev);
1845 printk(KERN_INFO "device %s %s promiscuous mode\n",
1846 dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left");
1847 }
1848 }
1849
1850 /**
1851 * dev_set_allmulti - update allmulti count on a device
1852 * @dev: device
1853 * @inc: modifier
1854 *
1855 * Add or remove reception of all multicast frames to a device. While the
1856 * count in the device remains above zero the interface remains listening
1857 * to all interfaces. Once it hits zero the device reverts back to normal
1858 * filtering operation. A negative @inc value is used to drop the counter
1859 * when releasing a resource needing all multicasts.
1860 */
1861
1862 void dev_set_allmulti(struct net_device *dev, int inc)
1863 {
1864 unsigned short old_flags = dev->flags;
1865
1866 dev->flags |= IFF_ALLMULTI;
1867 if ((dev->allmulti += inc) == 0)
1868 dev->flags &= ~IFF_ALLMULTI;
1869 if (dev->flags^old_flags)
1870 dev_mc_upload(dev);
1871 }
1872
1873 int dev_change_flags(struct net_device *dev, unsigned flags)
1874 {
1875 int ret;
1876 int old_flags = dev->flags;
1877
1878 /*
1879 * Set the flags on our device.
1880 */
1881
1882 dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC|
1883 IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) |
1884 (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI));
1885
1886 /*
1887 * Load in the correct multicast list now the flags have changed.
1888 */
1889
1890 dev_mc_upload(dev);
1891
1892 /*
1893 * Have we downed the interface. We handle IFF_UP ourselves
1894 * according to user attempts to set it, rather than blindly
1895 * setting it.
1896 */
1897
1898 ret = 0;
1899 if ((old_flags^flags)&IFF_UP) /* Bit is different ? */
1900 {
1901 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
1902
1903 if (ret == 0)
1904 dev_mc_upload(dev);
1905 }
1906
1907 if (dev->flags&IFF_UP &&
1908 ((old_flags^dev->flags)&~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE)))
1909 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
1910
1911 if ((flags^dev->gflags)&IFF_PROMISC) {
1912 int inc = (flags&IFF_PROMISC) ? +1 : -1;
1913 dev->gflags ^= IFF_PROMISC;
1914 dev_set_promiscuity(dev, inc);
1915 }
1916
1917 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
1918 is important. Some (broken) drivers set IFF_PROMISC, when
1919 IFF_ALLMULTI is requested not asking us and not reporting.
1920 */
1921 if ((flags^dev->gflags)&IFF_ALLMULTI) {
1922 int inc = (flags&IFF_ALLMULTI) ? +1 : -1;
1923 dev->gflags ^= IFF_ALLMULTI;
1924 dev_set_allmulti(dev, inc);
1925 }
1926
1927 if (old_flags^dev->flags)
1928 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags);
1929
1930 return ret;
1931 }
1932
1933 /*
1934 * Perform the SIOCxIFxxx calls.
1935 */
1936
1937 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
1938 {
1939 struct net_device *dev;
1940 int err;
1941
1942 if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
1943 return -ENODEV;
1944
1945 switch(cmd)
1946 {
1947 case SIOCGIFFLAGS: /* Get interface flags */
1948 ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING))
1949 |(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI));
1950 if (netif_running(dev) && netif_carrier_ok(dev))
1951 ifr->ifr_flags |= IFF_RUNNING;
1952 return 0;
1953
1954 case SIOCSIFFLAGS: /* Set interface flags */
1955 return dev_change_flags(dev, ifr->ifr_flags);
1956
1957 case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */
1958 ifr->ifr_metric = 0;
1959 return 0;
1960
1961 case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */
1962 return -EOPNOTSUPP;
1963
1964 case SIOCGIFMTU: /* Get the MTU of a device */
1965 ifr->ifr_mtu = dev->mtu;
1966 return 0;
1967
1968 case SIOCSIFMTU: /* Set the MTU of a device */
1969 if (ifr->ifr_mtu == dev->mtu)
1970 return 0;
1971
1972 /*
1973 * MTU must be positive.
1974 */
1975
1976 if (ifr->ifr_mtu<0)
1977 return -EINVAL;
1978
1979 if (!netif_device_present(dev))
1980 return -ENODEV;
1981
1982 if (dev->change_mtu)
1983 err = dev->change_mtu(dev, ifr->ifr_mtu);
1984 else {
1985 dev->mtu = ifr->ifr_mtu;
1986 err = 0;
1987 }
1988 if (!err && dev->flags&IFF_UP)
1989 notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev);
1990 return err;
1991
1992 case SIOCGIFHWADDR:
1993 memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN);
1994 ifr->ifr_hwaddr.sa_family=dev->type;
1995 return 0;
1996
1997 case SIOCSIFHWADDR:
1998 if (dev->set_mac_address == NULL)
1999 return -EOPNOTSUPP;
2000 if (ifr->ifr_hwaddr.sa_family!=dev->type)
2001 return -EINVAL;
2002 if (!netif_device_present(dev))
2003 return -ENODEV;
2004 err = dev->set_mac_address(dev, &ifr->ifr_hwaddr);
2005 if (!err)
2006 notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2007 return err;
2008
2009 case SIOCSIFHWBROADCAST:
2010 if (ifr->ifr_hwaddr.sa_family!=dev->type)
2011 return -EINVAL;
2012 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN);
2013 notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2014 return 0;
2015
2016 case SIOCGIFMAP:
2017 ifr->ifr_map.mem_start=dev->mem_start;
2018 ifr->ifr_map.mem_end=dev->mem_end;
2019 ifr->ifr_map.base_addr=dev->base_addr;
2020 ifr->ifr_map.irq=dev->irq;
2021 ifr->ifr_map.dma=dev->dma;
2022 ifr->ifr_map.port=dev->if_port;
2023 return 0;
2024
2025 case SIOCSIFMAP:
2026 if (dev->set_config) {
2027 if (!netif_device_present(dev))
2028 return -ENODEV;
2029 return dev->set_config(dev,&ifr->ifr_map);
2030 }
2031 return -EOPNOTSUPP;
2032
2033 case SIOCADDMULTI:
2034 if (dev->set_multicast_list == NULL ||
2035 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2036 return -EINVAL;
2037 if (!netif_device_present(dev))
2038 return -ENODEV;
2039 dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1);
2040 return 0;
2041
2042 case SIOCDELMULTI:
2043 if (dev->set_multicast_list == NULL ||
2044 ifr->ifr_hwaddr.sa_family!=AF_UNSPEC)
2045 return -EINVAL;
2046 if (!netif_device_present(dev))
2047 return -ENODEV;
2048 dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1);
2049 return 0;
2050
2051 case SIOCGIFINDEX:
2052 ifr->ifr_ifindex = dev->ifindex;
2053 return 0;
2054
2055 case SIOCGIFTXQLEN:
2056 ifr->ifr_qlen = dev->tx_queue_len;
2057 return 0;
2058
2059 case SIOCSIFTXQLEN:
2060 if (ifr->ifr_qlen<0)
2061 return -EINVAL;
2062 dev->tx_queue_len = ifr->ifr_qlen;
2063 return 0;
2064
2065 case SIOCSIFNAME:
2066 if (dev->flags&IFF_UP)
2067 return -EBUSY;
2068 if (__dev_get_by_name(ifr->ifr_newname))
2069 return -EEXIST;
2070 memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
2071 dev->name[IFNAMSIZ-1] = 0;
2072 notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
2073 return 0;
2074
2075 /*
2076 * Unknown or private ioctl
2077 */
2078
2079 default:
2080 if ((cmd >= SIOCDEVPRIVATE &&
2081 cmd <= SIOCDEVPRIVATE + 15) ||
2082 cmd == SIOCETHTOOL) {
2083 if (dev->do_ioctl) {
2084 if (!netif_device_present(dev))
2085 return -ENODEV;
2086 return dev->do_ioctl(dev, ifr, cmd);
2087 }
2088 return -EOPNOTSUPP;
2089 }
2090
2091 #ifdef WIRELESS_EXT
2092 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2093 if (dev->do_ioctl) {
2094 if (!netif_device_present(dev))
2095 return -ENODEV;
2096 return dev->do_ioctl(dev, ifr, cmd);
2097 }
2098 return -EOPNOTSUPP;
2099 }
2100 #endif /* WIRELESS_EXT */
2101
2102 }
2103 return -EINVAL;
2104 }
2105
2106 /*
2107 * This function handles all "interface"-type I/O control requests. The actual
2108 * 'doing' part of this is dev_ifsioc above.
2109 */
2110
2111 /**
2112 * dev_ioctl - network device ioctl
2113 * @cmd: command to issue
2114 * @arg: pointer to a struct ifreq in user space
2115 *
2116 * Issue ioctl functions to devices. This is normally called by the
2117 * user space syscall interfaces but can sometimes be useful for
2118 * other purposes. The return value is the return from the syscall if
2119 * positive or a negative errno code on error.
2120 */
2121
2122 int dev_ioctl(unsigned int cmd, void *arg)
2123 {
2124 struct ifreq ifr;
2125 int ret;
2126 char *colon;
2127
2128 /* One special case: SIOCGIFCONF takes ifconf argument
2129 and requires shared lock, because it sleeps writing
2130 to user space.
2131 */
2132
2133 if (cmd == SIOCGIFCONF) {
2134 rtnl_shlock();
2135 ret = dev_ifconf((char *) arg);
2136 rtnl_shunlock();
2137 return ret;
2138 }
2139 if (cmd == SIOCGIFNAME) {
2140 return dev_ifname((struct ifreq *)arg);
2141 }
2142
2143 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2144 return -EFAULT;
2145
2146 ifr.ifr_name[IFNAMSIZ-1] = 0;
2147
2148 colon = strchr(ifr.ifr_name, ':');
2149 if (colon)
2150 *colon = 0;
2151
2152 /*
2153 * See which interface the caller is talking about.
2154 */
2155
2156 switch(cmd)
2157 {
2158 /*
2159 * These ioctl calls:
2160 * - can be done by all.
2161 * - atomic and do not require locking.
2162 * - return a value
2163 */
2164
2165 case SIOCGIFFLAGS:
2166 case SIOCGIFMETRIC:
2167 case SIOCGIFMTU:
2168 case SIOCGIFHWADDR:
2169 case SIOCGIFSLAVE:
2170 case SIOCGIFMAP:
2171 case SIOCGIFINDEX:
2172 case SIOCGIFTXQLEN:
2173 dev_load(ifr.ifr_name);
2174 read_lock(&dev_base_lock);
2175 ret = dev_ifsioc(&ifr, cmd);
2176 read_unlock(&dev_base_lock);
2177 if (!ret) {
2178 if (colon)
2179 *colon = ':';
2180 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2181 return -EFAULT;
2182 }
2183 return ret;
2184
2185 /*
2186 * These ioctl calls:
2187 * - require superuser power.
2188 * - require strict serialization.
2189 * - do not return a value
2190 */
2191
2192 case SIOCSIFFLAGS:
2193 case SIOCSIFMETRIC:
2194 case SIOCSIFMTU:
2195 case SIOCSIFMAP:
2196 case SIOCSIFHWADDR:
2197 case SIOCSIFSLAVE:
2198 case SIOCADDMULTI:
2199 case SIOCDELMULTI:
2200 case SIOCSIFHWBROADCAST:
2201 case SIOCSIFTXQLEN:
2202 case SIOCSIFNAME:
2203 case SIOCETHTOOL:
2204 if (!capable(CAP_NET_ADMIN))
2205 return -EPERM;
2206 dev_load(ifr.ifr_name);
2207 dev_probe_lock();
2208 rtnl_lock();
2209 ret = dev_ifsioc(&ifr, cmd);
2210 rtnl_unlock();
2211 dev_probe_unlock();
2212 return ret;
2213
2214 case SIOCGIFMEM:
2215 /* Get the per device memory space. We can add this but currently
2216 do not support it */
2217 case SIOCSIFMEM:
2218 /* Set the per device memory buffer space. Not applicable in our case */
2219 case SIOCSIFLINK:
2220 return -EINVAL;
2221
2222 /*
2223 * Unknown or private ioctl.
2224 */
2225
2226 default:
2227 if (cmd >= SIOCDEVPRIVATE &&
2228 cmd <= SIOCDEVPRIVATE + 15) {
2229 dev_load(ifr.ifr_name);
2230 dev_probe_lock();
2231 rtnl_lock();
2232 ret = dev_ifsioc(&ifr, cmd);
2233 rtnl_unlock();
2234 dev_probe_unlock();
2235 if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2236 return -EFAULT;
2237 return ret;
2238 }
2239 #ifdef WIRELESS_EXT
2240 /* Take care of Wireless Extensions */
2241 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2242 /* If command is `set a parameter', or
2243 * `get the encoding parameters', check if
2244 * the user has the right to do it */
2245 if (IW_IS_SET(cmd) || (cmd == SIOCGIWENCODE)) {
2246 if(!capable(CAP_NET_ADMIN))
2247 return -EPERM;
2248 }
2249 dev_load(ifr.ifr_name);
2250 rtnl_lock();
2251 ret = dev_ifsioc(&ifr, cmd);
2252 rtnl_unlock();
2253 if (!ret && IW_IS_GET(cmd) &&
2254 copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2255 return -EFAULT;
2256 return ret;
2257 }
2258 #endif /* WIRELESS_EXT */
2259 return -EINVAL;
2260 }
2261 }
2262
2263
2264 /**
2265 * dev_new_index - allocate an ifindex
2266 *
2267 * Returns a suitable unique value for a new device interface
2268 * number. The caller must hold the rtnl semaphore or the
2269 * dev_base_lock to be sure it remains unique.
2270 */
2271
2272 int dev_new_index(void)
2273 {
2274 static int ifindex;
2275 for (;;) {
2276 if (++ifindex <= 0)
2277 ifindex=1;
2278 if (__dev_get_by_index(ifindex) == NULL)
2279 return ifindex;
2280 }
2281 }
2282
2283 static int dev_boot_phase = 1;
2284
2285 /**
2286 * register_netdevice - register a network device
2287 * @dev: device to register
2288 *
2289 * Take a completed network device structure and add it to the kernel
2290 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2291 * chain. 0 is returned on success. A negative errno code is returned
2292 * on a failure to set up the device, or if the name is a duplicate.
2293 *
2294 * Callers must hold the rtnl semaphore. See the comment at the
2295 * end of Space.c for details about the locking. You may want
2296 * register_netdev() instead of this.
2297 *
2298 * BUGS:
2299 * The locking appears insufficient to guarantee two parallel registers
2300 * will not get the same name.
2301 */
2302
2303 int register_netdevice(struct net_device *dev)
2304 {
2305 struct net_device *d, **dp;
2306 #ifdef CONFIG_NET_DIVERT
2307 int ret;
2308 #endif
2309
2310 spin_lock_init(&dev->queue_lock);
2311 spin_lock_init(&dev->xmit_lock);
2312 dev->xmit_lock_owner = -1;
2313 #ifdef CONFIG_NET_FASTROUTE
2314 dev->fastpath_lock=RW_LOCK_UNLOCKED;
2315 #endif
2316
2317 if (dev_boot_phase) {
2318 #ifdef CONFIG_NET_DIVERT
2319 ret = alloc_divert_blk(dev);
2320 if (ret)
2321 return ret;
2322 #endif /* CONFIG_NET_DIVERT */
2323
2324 /* This is NOT bug, but I am not sure, that all the
2325 devices, initialized before netdev module is started
2326 are sane.
2327
2328 Now they are chained to device boot list
2329 and probed later. If a module is initialized
2330 before netdev, but assumes that dev->init
2331 is really called by register_netdev(), it will fail.
2332
2333 So that this message should be printed for a while.
2334 */
2335 printk(KERN_INFO "early initialization of device %s is deferred\n", dev->name);
2336
2337 /* Check for existence, and append to tail of chain */
2338 for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
2339 if (d == dev || strcmp(d->name, dev->name) == 0) {
2340 return -EEXIST;
2341 }
2342 }
2343 dev->next = NULL;
2344 write_lock_bh(&dev_base_lock);
2345 *dp = dev;
2346 dev_hold(dev);
2347 write_unlock_bh(&dev_base_lock);
2348
2349 /*
2350 * Default initial state at registry is that the
2351 * device is present.
2352 */
2353
2354 set_bit(__LINK_STATE_PRESENT, &dev->state);
2355
2356 return 0;
2357 }
2358
2359 #ifdef CONFIG_NET_DIVERT
2360 ret = alloc_divert_blk(dev);
2361 if (ret)
2362 return ret;
2363 #endif /* CONFIG_NET_DIVERT */
2364
2365 dev->iflink = -1;
2366
2367 /* Init, if this function is available */
2368 if (dev->init && dev->init(dev) != 0)
2369 return -EIO;
2370
2371 dev->ifindex = dev_new_index();
2372 if (dev->iflink == -1)
2373 dev->iflink = dev->ifindex;
2374
2375 /* Check for existence, and append to tail of chain */
2376 for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
2377 if (d == dev || strcmp(d->name, dev->name) == 0) {
2378 return -EEXIST;
2379 }
2380 }
2381 /*
2382 * nil rebuild_header routine,
2383 * that should be never called and used as just bug trap.
2384 */
2385
2386 if (dev->rebuild_header == NULL)
2387 dev->rebuild_header = default_rebuild_header;
2388
2389 /*
2390 * Default initial state at registry is that the
2391 * device is present.
2392 */
2393
2394 set_bit(__LINK_STATE_PRESENT, &dev->state);
2395
2396 dev->next = NULL;
2397 dev_init_scheduler(dev);
2398 write_lock_bh(&dev_base_lock);
2399 *dp = dev;
2400 dev_hold(dev);
2401 dev->deadbeaf = 0;
2402 write_unlock_bh(&dev_base_lock);
2403
2404 /* Notify protocols, that a new device appeared. */
2405 notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2406
2407 net_run_sbin_hotplug(dev, "register");
2408
2409 return 0;
2410 }
2411
2412 /**
2413 * netdev_finish_unregister - complete unregistration
2414 * @dev: device
2415 *
2416 * Destroy and free a dead device. A value of zero is returned on
2417 * success.
2418 */
2419
2420 int netdev_finish_unregister(struct net_device *dev)
2421 {
2422 BUG_TRAP(dev->ip_ptr==NULL);
2423 BUG_TRAP(dev->ip6_ptr==NULL);
2424 BUG_TRAP(dev->dn_ptr==NULL);
2425
2426 if (!dev->deadbeaf) {
2427 printk(KERN_ERR "Freeing alive device %p, %s\n", dev, dev->name);
2428 return 0;
2429 }
2430 #ifdef NET_REFCNT_DEBUG
2431 printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name,
2432 (dev->features & NETIF_F_DYNALLOC)?"":", old style");
2433 #endif
2434 if (dev->destructor)
2435 dev->destructor(dev);
2436 if (dev->features & NETIF_F_DYNALLOC)
2437 kfree(dev);
2438 return 0;
2439 }
2440
2441 /**
2442 * unregister_netdevice - remove device from the kernel
2443 * @dev: device
2444 *
2445 * This function shuts down a device interface and removes it
2446 * from the kernel tables. On success 0 is returned, on a failure
2447 * a negative errno code is returned.
2448 *
2449 * Callers must hold the rtnl semaphore. See the comment at the
2450 * end of Space.c for details about the locking. You may want
2451 * unregister_netdev() instead of this.
2452 */
2453
2454 int unregister_netdevice(struct net_device *dev)
2455 {
2456 unsigned long now, warning_time;
2457 struct net_device *d, **dp;
2458
2459 /* If device is running, close it first. */
2460 if (dev->flags & IFF_UP)
2461 dev_close(dev);
2462
2463 BUG_TRAP(dev->deadbeaf==0);
2464 dev->deadbeaf = 1;
2465
2466 /* And unlink it from device chain. */
2467 for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
2468 if (d == dev) {
2469 write_lock_bh(&dev_base_lock);
2470 *dp = d->next;
2471 write_unlock_bh(&dev_base_lock);
2472 break;
2473 }
2474 }
2475 if (d == NULL) {
2476 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never was registered\n", dev->name, dev);
2477 return -ENODEV;
2478 }
2479
2480 /* Synchronize to net_rx_action. */
2481 br_write_lock_bh(BR_NETPROTO_LOCK);
2482 br_write_unlock_bh(BR_NETPROTO_LOCK);
2483
2484 if (dev_boot_phase == 0) {
2485 #ifdef CONFIG_NET_FASTROUTE
2486 dev_clear_fastroute(dev);
2487 #endif
2488
2489 /* Shutdown queueing discipline. */
2490 dev_shutdown(dev);
2491
2492 net_run_sbin_hotplug(dev, "unregister");
2493
2494 /* Notify protocols, that we are about to destroy
2495 this device. They should clean all the things.
2496 */
2497 notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
2498
2499 /*
2500 * Flush the multicast chain
2501 */
2502 dev_mc_discard(dev);
2503 }
2504
2505 if (dev->uninit)
2506 dev->uninit(dev);
2507
2508 /* Notifier chain MUST detach us from master device. */
2509 BUG_TRAP(dev->master==NULL);
2510
2511 #ifdef CONFIG_NET_DIVERT
2512 free_divert_blk(dev);
2513 #endif
2514
2515 if (dev->features & NETIF_F_DYNALLOC) {
2516 #ifdef NET_REFCNT_DEBUG
2517 if (atomic_read(&dev->refcnt) != 1)
2518 printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)-1);
2519 #endif
2520 dev_put(dev);
2521 return 0;
2522 }
2523
2524 /* Last reference is our one */
2525 if (atomic_read(&dev->refcnt) == 1) {
2526 dev_put(dev);
2527 return 0;
2528 }
2529
2530 #ifdef NET_REFCNT_DEBUG
2531 printk("unregister_netdevice: waiting %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt));
2532 #endif
2533
2534 /* EXPLANATION. If dev->refcnt is not now 1 (our own reference)
2535 it means that someone in the kernel still has a reference
2536 to this device and we cannot release it.
2537
2538 "New style" devices have destructors, hence we can return from this
2539 function and destructor will do all the work later. As of kernel 2.4.0
2540 there are very few "New Style" devices.
2541
2542 "Old style" devices expect that the device is free of any references
2543 upon exit from this function.
2544 We cannot return from this function until all such references have
2545 fallen away. This is because the caller of this function will probably
2546 immediately kfree(*dev) and then be unloaded via sys_delete_module.
2547
2548 So, we linger until all references fall away. The duration of the
2549 linger is basically unbounded! It is driven by, for example, the
2550 current setting of sysctl_ipfrag_time.
2551
2552 After 1 second, we start to rebroadcast unregister notifications
2553 in hope that careless clients will release the device.
2554
2555 */
2556
2557 now = warning_time = jiffies;
2558 while (atomic_read(&dev->refcnt) != 1) {
2559 if ((jiffies - now) > 1*HZ) {
2560 /* Rebroadcast unregister notification */
2561 notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
2562 }
2563 current->state = TASK_INTERRUPTIBLE;
2564 schedule_timeout(HZ/4);
2565 current->state = TASK_RUNNING;
2566 if ((jiffies - warning_time) > 10*HZ) {
2567 printk(KERN_EMERG "unregister_netdevice: waiting for %s to "
2568 "become free. Usage count = %d\n",
2569 dev->name, atomic_read(&dev->refcnt));
2570 warning_time = jiffies;
2571 }
2572 }
2573 dev_put(dev);
2574 return 0;
2575 }
2576
2577
2578 /*
2579 * Initialize the DEV module. At boot time this walks the device list and
2580 * unhooks any devices that fail to initialise (normally hardware not
2581 * present) and leaves us with a valid list of present and active devices.
2582 *
2583 */
2584
2585 extern void net_device_init(void);
2586 extern void ip_auto_config(void);
2587 #ifdef CONFIG_NET_DIVERT
2588 extern void dv_init(void);
2589 #endif /* CONFIG_NET_DIVERT */
2590
2591
2592 /*
2593 * Callers must hold the rtnl semaphore. See the comment at the
2594 * end of Space.c for details about the locking.
2595 */
2596 int __init net_dev_init(void)
2597 {
2598 struct net_device *dev, **dp;
2599 int i;
2600
2601 #ifdef CONFIG_NET_SCHED
2602 pktsched_init();
2603 #endif
2604
2605 #ifdef CONFIG_NET_DIVERT
2606 dv_init();
2607 #endif /* CONFIG_NET_DIVERT */
2608
2609 /*
2610 * Initialise the packet receive queues.
2611 */
2612
2613 for (i = 0; i < NR_CPUS; i++) {
2614 struct softnet_data *queue;
2615
2616 queue = &softnet_data[i];
2617 skb_queue_head_init(&queue->input_pkt_queue);
2618 queue->throttle = 0;
2619 queue->cng_level = 0;
2620 queue->avg_blog = 10; /* arbitrary non-zero */
2621 queue->completion_queue = NULL;
2622 }
2623
2624 #ifdef CONFIG_NET_PROFILE
2625 net_profile_init();
2626 NET_PROFILE_REGISTER(dev_queue_xmit);
2627 NET_PROFILE_REGISTER(softnet_process);
2628 #endif
2629
2630 #ifdef OFFLINE_SAMPLE
2631 samp_timer.expires = jiffies + (10 * HZ);
2632 add_timer(&samp_timer);
2633 #endif
2634
2635 /*
2636 * Add the devices.
2637 * If the call to dev->init fails, the dev is removed
2638 * from the chain disconnecting the device until the
2639 * next reboot.
2640 *
2641 * NB At boot phase networking is dead. No locking is required.
2642 * But we still preserve dev_base_lock for sanity.
2643 */
2644
2645 dp = &dev_base;
2646 while ((dev = *dp) != NULL) {
2647 spin_lock_init(&dev->queue_lock);
2648 spin_lock_init(&dev->xmit_lock);
2649 #ifdef CONFIG_NET_FASTROUTE
2650 dev->fastpath_lock = RW_LOCK_UNLOCKED;
2651 #endif
2652 dev->xmit_lock_owner = -1;
2653 dev->iflink = -1;
2654 dev_hold(dev);
2655
2656 /*
2657 * Allocate name. If the init() fails
2658 * the name will be reissued correctly.
2659 */
2660 if (strchr(dev->name, '%'))
2661 dev_alloc_name(dev, dev->name);
2662
2663 /*
2664 * Check boot time settings for the device.
2665 */
2666 netdev_boot_setup_check(dev);
2667
2668 if (dev->init && dev->init(dev)) {
2669 /*
2670 * It failed to come up. It will be unhooked later.
2671 * dev_alloc_name can now advance to next suitable
2672 * name that is checked next.
2673 */
2674 dev->deadbeaf = 1;
2675 dp = &dev->next;
2676 } else {
2677 dp = &dev->next;
2678 dev->ifindex = dev_new_index();
2679 if (dev->iflink == -1)
2680 dev->iflink = dev->ifindex;
2681 if (dev->rebuild_header == NULL)
2682 dev->rebuild_header = default_rebuild_header;
2683 dev_init_scheduler(dev);
2684 set_bit(__LINK_STATE_PRESENT, &dev->state);
2685 }
2686 }
2687
2688 /*
2689 * Unhook devices that failed to come up
2690 */
2691 dp = &dev_base;
2692 while ((dev = *dp) != NULL) {
2693 if (dev->deadbeaf) {
2694 write_lock_bh(&dev_base_lock);
2695 *dp = dev->next;
2696 write_unlock_bh(&dev_base_lock);
2697 dev_put(dev);
2698 } else {
2699 dp = &dev->next;
2700 }
2701 }
2702
2703 #ifdef CONFIG_PROC_FS
2704 proc_net_create("dev", 0, dev_get_info);
2705 create_proc_read_entry("net/softnet_stat", 0, 0, dev_proc_stats, NULL);
2706 #ifdef WIRELESS_EXT
2707 proc_net_create("wireless", 0, dev_get_wireless_info);
2708 #endif /* WIRELESS_EXT */
2709 #endif /* CONFIG_PROC_FS */
2710
2711 dev_boot_phase = 0;
2712
2713 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
2714 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
2715
2716 dst_init();
2717 dev_mcast_init();
2718
2719 /*
2720 * Initialise network devices
2721 */
2722
2723 net_device_init();
2724
2725 return 0;
2726 }
2727
2728 #ifdef CONFIG_HOTPLUG
2729
2730 /* Notify userspace when a netdevice event occurs,
2731 * by running '/sbin/hotplug net' with certain
2732 * environment variables set.
2733 */
2734
2735 static int net_run_sbin_hotplug(struct net_device *dev, char *action)
2736 {
2737 char *argv[3], *envp[5], ifname[12 + IFNAMSIZ], action_str[32];
2738 int i;
2739
2740 sprintf(ifname, "INTERFACE=%s", dev->name);
2741 sprintf(action_str, "ACTION=%s", action);
2742
2743 i = 0;
2744 argv[i++] = hotplug_path;
2745 argv[i++] = "net";
2746 argv[i] = 0;
2747
2748 i = 0;
2749 /* minimal command environment */
2750 envp [i++] = "HOME=/";
2751 envp [i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
2752 envp [i++] = ifname;
2753 envp [i++] = action_str;
2754 envp [i] = 0;
2755
2756 return call_usermodehelper(argv [0], argv, envp);
2757 }
2758 #endif
2759
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.