~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/net/core/dev.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *      NET3    Protocol independent device support routines.
  3  *
  4  *              This program is free software; you can redistribute it and/or
  5  *              modify it under the terms of the GNU General Public License
  6  *              as published by the Free Software Foundation; either version
  7  *              2 of the License, or (at your option) any later version.
  8  *
  9  *      Derived from the non IP parts of dev.c 1.0.19
 10  *              Authors:        Ross Biro, <bir7@leland.Stanford.Edu>
 11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
 13  *
 14  *      Additional Authors:
 15  *              Florian la Roche <rzsfl@rz.uni-sb.de>
 16  *              Alan Cox <gw4pts@gw4pts.ampr.org>
 17  *              David Hinds <dhinds@allegro.stanford.edu>
 18  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
 19  *              Adam Sulmicki <adam@cfar.umd.edu>
 20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
 21  *
 22  *      Changes:
 23  *              Alan Cox        :       device private ioctl copies fields back.
 24  *              Alan Cox        :       Transmit queue code does relevant stunts to
 25  *                                      keep the queue safe.
 26  *              Alan Cox        :       Fixed double lock.
 27  *              Alan Cox        :       Fixed promisc NULL pointer trap
 28  *              ????????        :       Support the full private ioctl range
 29  *              Alan Cox        :       Moved ioctl permission check into drivers
 30  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
 31  *              Alan Cox        :       100 backlog just doesn't cut it when
 32  *                                      you start doing multicast video 8)
 33  *              Alan Cox        :       Rewrote net_bh and list manager.
 34  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
 35  *              Alan Cox        :       Took out transmit every packet pass
 36  *                                      Saved a few bytes in the ioctl handler
 37  *              Alan Cox        :       Network driver sets packet type before calling netif_rx. Saves
 38  *                                      a function call a packet.
 39  *              Alan Cox        :       Hashed net_bh()
 40  *              Richard Kooijman:       Timestamp fixes.
 41  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
 42  *              Alan Cox        :       Device lock protection.
 43  *              Alan Cox        :       Fixed nasty side effect of device close changes.
 44  *              Rudi Cilibrasi  :       Pass the right thing to set_mac_address()
 45  *              Dave Miller     :       32bit quantity for the device lock to make it work out
 46  *                                      on a Sparc.
 47  *              Bjorn Ekwall    :       Added KERNELD hack.
 48  *              Alan Cox        :       Cleaned up the backlog initialise.
 49  *              Craig Metz      :       SIOCGIFCONF fix if space for under
 50  *                                      1 device.
 51  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
 52  *                                      is no device open function.
 53  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
 54  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
 55  *              Cyrus Durgin    :       Cleaned for KMOD
 56  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
 57  *                                      A network device unload needs to purge
 58  *                                      the backlog queue.
 59  *      Paul Rusty Russell      :       SIOCSIFNAME
 60  *              Pekka Riikonen  :       Netdev boot-time settings code
 61  *              Andrew Morton   :       Make unregister_netdevice wait indefinitely on dev->refcnt
 62  *              J Hadi Salim    :       - Backlog queue sampling
 63  *                                      - netif_rx() feedback   
 64  */
 65 
 66 #include <asm/uaccess.h>
 67 #include <asm/system.h>
 68 #include <asm/bitops.h>
 69 #include <linux/config.h>
 70 #include <linux/types.h>
 71 #include <linux/kernel.h>
 72 #include <linux/sched.h>
 73 #include <linux/string.h>
 74 #include <linux/mm.h>
 75 #include <linux/socket.h>
 76 #include <linux/sockios.h>
 77 #include <linux/errno.h>
 78 #include <linux/interrupt.h>
 79 #include <linux/if_ether.h>
 80 #include <linux/netdevice.h>
 81 #include <linux/etherdevice.h>
 82 #include <linux/notifier.h>
 83 #include <linux/skbuff.h>
 84 #include <linux/brlock.h>
 85 #include <net/sock.h>
 86 #include <linux/rtnetlink.h>
 87 #include <linux/proc_fs.h>
 88 #include <linux/stat.h>
 89 #include <linux/if_bridge.h>
 90 #include <linux/divert.h>
 91 #include <net/dst.h>
 92 #include <net/pkt_sched.h>
 93 #include <net/profile.h>
 94 #include <linux/init.h>
 95 #include <linux/kmod.h>
 96 #include <linux/module.h>
 97 #if defined(CONFIG_NET_RADIO) || defined(CONFIG_NET_PCMCIA_RADIO)
 98 #include <linux/wireless.h>             /* Note : will define WIRELESS_EXT */
 99 #endif  /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */
100 #ifdef CONFIG_PLIP
101 extern int plip_init(void);
102 #endif
103 
104 /* This define, if set, will randomly drop a packet when congestion
105  * is more than moderate.  It helps fairness in the multi-interface
106  * case when one of them is a hog, but it kills performance for the
107  * single interface case so it is off now by default.
108  */
109 #undef RAND_LIE
110 
111 /* Setting this will sample the queue lengths and thus congestion
112  * via a timer instead of as each packet is received.
113  */
114 #undef OFFLINE_SAMPLE
115 
116 NET_PROFILE_DEFINE(dev_queue_xmit)
117 NET_PROFILE_DEFINE(softnet_process)
118 
119 const char *if_port_text[] = {
120   "unknown",
121   "BNC",
122   "10baseT",
123   "AUI",
124   "100baseT",
125   "100baseTX",
126   "100baseFX"
127 };
128 
129 /*
130  *      The list of packet types we will receive (as opposed to discard)
131  *      and the routines to invoke.
132  *
133  *      Why 16. Because with 16 the only overlap we get on a hash of the
134  *      low nibble of the protocol value is RARP/SNAP/X.25. 
135  *
136  *              0800    IP
137  *              0001    802.3
138  *              0002    AX.25
139  *              0004    802.2
140  *              8035    RARP
141  *              0005    SNAP
142  *              0805    X.25
143  *              0806    ARP
144  *              8137    IPX
145  *              0009    Localtalk
146  *              86DD    IPv6
147  */
148 
149 static struct packet_type *ptype_base[16];              /* 16 way hashed list */
150 static struct packet_type *ptype_all = NULL;            /* Taps */
151 
152 #ifdef OFFLINE_SAMPLE
153 static void sample_queue(unsigned long dummy);
154 static struct timer_list samp_timer = { function: sample_queue };
155 #endif
156 
157 #ifdef CONFIG_HOTPLUG
158 static int net_run_sbin_hotplug(struct net_device *dev, char *action);
159 #else
160 #define net_run_sbin_hotplug(dev, action) ({ 0; })
161 #endif
162 
163 /*
164  *      Our notifier list
165  */
166  
167 static struct notifier_block *netdev_chain=NULL;
168 
169 /*
170  *      Device drivers call our routines to queue packets here. We empty the
171  *      queue in the local softnet handler.
172  */
173 struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned;
174 
175 #ifdef CONFIG_NET_FASTROUTE
176 int netdev_fastroute;
177 int netdev_fastroute_obstacles;
178 #endif
179 
180 
181 /******************************************************************************************
182 
183                 Protocol management and registration routines
184 
185 *******************************************************************************************/
186 
187 /*
188  *      For efficiency
189  */
190 
191 int netdev_nit=0;
192 
193 /*
194  *      Add a protocol ID to the list. Now that the input handler is
195  *      smarter we can dispense with all the messy stuff that used to be
196  *      here.
197  *
198  *      BEWARE!!! Protocol handlers, mangling input packets,
199  *      MUST BE last in hash buckets and checking protocol handlers
200  *      MUST start from promiscous ptype_all chain in net_bh.
201  *      It is true now, do not change it.
202  *      Explantion follows: if protocol handler, mangling packet, will
203  *      be the first on list, it is not able to sense, that packet
204  *      is cloned and should be copied-on-write, so that it will
205  *      change it and subsequent readers will get broken packet.
206  *                                                      --ANK (980803)
207  */
208 
209 /**
210  *      dev_add_pack - add packet handler
211  *      @pt: packet type declaration
212  * 
213  *      Add a protocol handler to the networking stack. The passed &packet_type
214  *      is linked into kernel lists and may not be freed until it has been
215  *      removed from the kernel lists.
216  */
217  
218 void dev_add_pack(struct packet_type *pt)
219 {
220         int hash;
221 
222         br_write_lock_bh(BR_NETPROTO_LOCK);
223 
224 #ifdef CONFIG_NET_FASTROUTE
225         /* Hack to detect packet socket */
226         if (pt->data) {
227                 netdev_fastroute_obstacles++;
228                 dev_clear_fastroute(pt->dev);
229         }
230 #endif
231         if (pt->type == htons(ETH_P_ALL)) {
232                 netdev_nit++;
233                 pt->next=ptype_all;
234                 ptype_all=pt;
235         } else {
236                 hash=ntohs(pt->type)&15;
237                 pt->next = ptype_base[hash];
238                 ptype_base[hash] = pt;
239         }
240         br_write_unlock_bh(BR_NETPROTO_LOCK);
241 }
242 
243 
244 /**
245  *      dev_remove_pack  - remove packet handler
246  *      @pt: packet type declaration
247  * 
248  *      Remove a protocol handler that was previously added to the kernel
249  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
250  *      from the kernel lists and can be freed or reused once this function
251  *      returns.
252  */
253  
254 void dev_remove_pack(struct packet_type *pt)
255 {
256         struct packet_type **pt1;
257 
258         br_write_lock_bh(BR_NETPROTO_LOCK);
259 
260         if (pt->type == htons(ETH_P_ALL)) {
261                 netdev_nit--;
262                 pt1=&ptype_all;
263         } else {
264                 pt1=&ptype_base[ntohs(pt->type)&15];
265         }
266 
267         for (; (*pt1) != NULL; pt1 = &((*pt1)->next)) {
268                 if (pt == (*pt1)) {
269                         *pt1 = pt->next;
270 #ifdef CONFIG_NET_FASTROUTE
271                         if (pt->data)
272                                 netdev_fastroute_obstacles--;
273 #endif
274                         br_write_unlock_bh(BR_NETPROTO_LOCK);
275                         return;
276                 }
277         }
278         br_write_unlock_bh(BR_NETPROTO_LOCK);
279         printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
280 }
281 
282 /******************************************************************************
283 
284                       Device Boot-time Settings Routines
285 
286 *******************************************************************************/
287 
288 /* Boot time configuration table */
289 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
290 
291 /**
292  *      netdev_boot_setup_add   - add new setup entry
293  *      @name: name of the device
294  *      @map: configured settings for the device
295  *
296  *      Adds new setup entry to the dev_boot_setup list.  The function
297  *      returns 0 on error and 1 on success.  This is a generic routine to
298  *      all netdevices.
299  */
300 int netdev_boot_setup_add(char *name, struct ifmap *map)
301 {
302         struct netdev_boot_setup *s;
303         int i;
304 
305         s = dev_boot_setup;
306         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
307                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
308                         memset(s[i].name, 0, sizeof(s[i].name));
309                         strcpy(s[i].name, name);
310                         memcpy(&s[i].map, map, sizeof(s[i].map));
311                         break;
312                 }
313         }
314 
315         if (i >= NETDEV_BOOT_SETUP_MAX)
316                 return 0;
317 
318         return 1;
319 }
320 
321 /**
322  *      netdev_boot_setup_check - check boot time settings
323  *      @dev: the netdevice
324  *
325  *      Check boot time settings for the device.
326  *      The found settings are set for the device to be used
327  *      later in the device probing.
328  *      Returns 0 if no settings found, 1 if they are.
329  */
330 int netdev_boot_setup_check(struct net_device *dev)
331 {
332         struct netdev_boot_setup *s;
333         int i;
334 
335         s = dev_boot_setup;
336         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
337                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
338                     !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
339                         dev->irq        = s[i].map.irq;
340                         dev->base_addr  = s[i].map.base_addr;
341                         dev->mem_start  = s[i].map.mem_start;
342                         dev->mem_end    = s[i].map.mem_end;
343                         return 1;
344                 }
345         }
346         return 0;
347 }
348 
349 /*
350  * Saves at boot time configured settings for any netdevice.
351  */
352 static int __init netdev_boot_setup(char *str)
353 {
354         int ints[5];
355         struct ifmap map;
356 
357         str = get_options(str, ARRAY_SIZE(ints), ints);
358         if (!str || !*str)
359                 return 0;
360 
361         /* Save settings */
362         memset(&map, -1, sizeof(map));
363         if (ints[0] > 0)
364                 map.irq = ints[1];
365         if (ints[0] > 1)
366                 map.base_addr = ints[2];
367         if (ints[0] > 2)
368                 map.mem_start = ints[3];
369         if (ints[0] > 3)
370                 map.mem_end = ints[4];
371 
372         /* Add new entry to the list */ 
373         return netdev_boot_setup_add(str, &map);
374 }
375 
376 __setup("netdev=", netdev_boot_setup);
377 
378 /*****************************************************************************************
379 
380                             Device Interface Subroutines
381 
382 ******************************************************************************************/
383 
384 /**
385  *      __dev_get_by_name       - find a device by its name 
386  *      @name: name to find
387  *
388  *      Find an interface by name. Must be called under RTNL semaphore
389  *      or @dev_base_lock. If the name is found a pointer to the device
390  *      is returned. If the name is not found then %NULL is returned. The
391  *      reference counters are not incremented so the caller must be
392  *      careful with locks.
393  */
394  
395 
396 struct net_device *__dev_get_by_name(const char *name)
397 {
398         struct net_device *dev;
399 
400         for (dev = dev_base; dev != NULL; dev = dev->next) {
401                 if (strcmp(dev->name, name) == 0)
402                         return dev;
403         }
404         return NULL;
405 }
406 
407 /**
408  *      dev_get_by_name         - find a device by its name
409  *      @name: name to find
410  *
411  *      Find an interface by name. This can be called from any 
412  *      context and does its own locking. The returned handle has
413  *      the usage count incremented and the caller must use dev_put() to
414  *      release it when it is no longer needed. %NULL is returned if no
415  *      matching device is found.
416  */
417 
418 struct net_device *dev_get_by_name(const char *name)
419 {
420         struct net_device *dev;
421 
422         read_lock(&dev_base_lock);
423         dev = __dev_get_by_name(name);
424         if (dev)
425                 dev_hold(dev);
426         read_unlock(&dev_base_lock);
427         return dev;
428 }
429 
430 /* 
431    Return value is changed to int to prevent illegal usage in future.
432    It is still legal to use to check for device existance.
433 
434    User should understand, that the result returned by this function
435    is meaningless, if it was not issued under rtnl semaphore.
436  */
437 
438 /**
439  *      dev_get -       test if a device exists
440  *      @name:  name to test for
441  *
442  *      Test if a name exists. Returns true if the name is found. In order
443  *      to be sure the name is not allocated or removed during the test the
444  *      caller must hold the rtnl semaphore.
445  *
446  *      This function primarily exists for back compatibility with older
447  *      drivers. 
448  */
449  
450 int dev_get(const char *name)
451 {
452         struct net_device *dev;
453 
454         read_lock(&dev_base_lock);
455         dev = __dev_get_by_name(name);
456         read_unlock(&dev_base_lock);
457         return dev != NULL;
458 }
459 
460 /**
461  *      __dev_get_by_index - find a device by its ifindex
462  *      @ifindex: index of device
463  *
464  *      Search for an interface by index. Returns %NULL if the device
465  *      is not found or a pointer to the device. The device has not
466  *      had its reference counter increased so the caller must be careful
467  *      about locking. The caller must hold either the RTNL semaphore
468  *      or @dev_base_lock.
469  */
470 
471 struct net_device * __dev_get_by_index(int ifindex)
472 {
473         struct net_device *dev;
474 
475         for (dev = dev_base; dev != NULL; dev = dev->next) {
476                 if (dev->ifindex == ifindex)
477                         return dev;
478         }
479         return NULL;
480 }
481 
482 
483 /**
484  *      dev_get_by_index - find a device by its ifindex
485  *      @ifindex: index of device
486  *
487  *      Search for an interface by index. Returns NULL if the device
488  *      is not found or a pointer to the device. The device returned has 
489  *      had a reference added and the pointer is safe until the user calls
490  *      dev_put to indicate they have finished with it.
491  */
492 
493 struct net_device * dev_get_by_index(int ifindex)
494 {
495         struct net_device *dev;
496 
497         read_lock(&dev_base_lock);
498         dev = __dev_get_by_index(ifindex);
499         if (dev)
500                 dev_hold(dev);
501         read_unlock(&dev_base_lock);
502         return dev;
503 }
504 
505 /**
506  *      dev_getbyhwaddr - find a device by its hardware addres
507  *      @type: media type of device
508  *      @ha: hardware address
509  *
510  *      Search for an interface by MAC address. Returns NULL if the device
511  *      is not found or a pointer to the device. The caller must hold the
512  *      rtnl semaphore. The returned device has not had its ref count increased
513  *      and the caller must therefore be careful about locking
514  *
515  *      BUGS:
516  *      If the API was consistent this would be __dev_get_by_hwaddr
517  */
518 
519 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
520 {
521         struct net_device *dev;
522 
523         ASSERT_RTNL();
524 
525         for (dev = dev_base; dev != NULL; dev = dev->next) {
526                 if (dev->type == type &&
527                     memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
528                         return dev;
529         }
530         return NULL;
531 }
532 
533 /**
534  *      dev_alloc_name - allocate a name for a device
535  *      @dev: device 
536  *      @name: name format string
537  *
538  *      Passed a format string - eg "lt%d" it will try and find a suitable
539  *      id. Not efficient for many devices, not called a lot. The caller
540  *      must hold the dev_base or rtnl lock while allocating the name and
541  *      adding the device in order to avoid duplicates. Returns the number
542  *      of the unit assigned or a negative errno code.
543  */
544 
545 int dev_alloc_name(struct net_device *dev, const char *name)
546 {
547         int i;
548         char buf[32];
549 
550         /*
551          *      If you need over 100 please also fix the algorithm...
552          */
553         for (i = 0; i < 100; i++) {
554                 sprintf(buf,name,i);
555                 if (__dev_get_by_name(buf) == NULL) {
556                         strcpy(dev->name, buf);
557                         return i;
558                 }
559         }
560         return -ENFILE; /* Over 100 of the things .. bail out! */
561 }
562 
563 /**
564  *      dev_alloc - allocate a network device and name
565  *      @name: name format string
566  *      @err: error return pointer
567  *
568  *      Passed a format string, eg. "lt%d", it will allocate a network device
569  *      and space for the name. %NULL is returned if no memory is available.
570  *      If the allocation succeeds then the name is assigned and the 
571  *      device pointer returned. %NULL is returned if the name allocation
572  *      failed. The cause of an error is returned as a negative errno code
573  *      in the variable @err points to.
574  *
575  *      The caller must hold the @dev_base or RTNL locks when doing this in
576  *      order to avoid duplicate name allocations.
577  */
578 
579 struct net_device *dev_alloc(const char *name, int *err)
580 {
581         struct net_device *dev=kmalloc(sizeof(struct net_device), GFP_KERNEL);
582         if (dev == NULL) {
583                 *err = -ENOBUFS;
584                 return NULL;
585         }
586         memset(dev, 0, sizeof(struct net_device));
587         *err = dev_alloc_name(dev, name);
588         if (*err < 0) {
589                 kfree(dev);
590                 return NULL;
591         }
592         return dev;
593 }
594 
595 /**
596  *      netdev_state_change - device changes state
597  *      @dev: device to cause notification
598  *
599  *      Called to indicate a device has changed state. This function calls
600  *      the notifier chains for netdev_chain and sends a NEWLINK message
601  *      to the routing socket.
602  */
603  
604 void netdev_state_change(struct net_device *dev)
605 {
606         if (dev->flags&IFF_UP) {
607                 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
608                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
609         }
610 }
611 
612 
613 #ifdef CONFIG_KMOD
614 
615 /**
616  *      dev_load        - load a network module
617  *      @name: name of interface
618  *
619  *      If a network interface is not present and the process has suitable
620  *      privileges this function loads the module. If module loading is not
621  *      available in this kernel then it becomes a nop.
622  */
623 
624 void dev_load(const char *name)
625 {
626         if (!dev_get(name) && capable(CAP_SYS_MODULE))
627                 request_module(name);
628 }
629 
630 #else
631 
632 extern inline void dev_load(const char *unused){;}
633 
634 #endif
635 
636 static int default_rebuild_header(struct sk_buff *skb)
637 {
638         printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!");
639         kfree_skb(skb);
640         return 1;
641 }
642 
643 /**
644  *      dev_open        - prepare an interface for use. 
645  *      @dev:   device to open
646  *
647  *      Takes a device from down to up state. The device's private open
648  *      function is invoked and then the multicast lists are loaded. Finally
649  *      the device is moved into the up state and a %NETDEV_UP message is
650  *      sent to the netdev notifier chain.
651  *
652  *      Calling this function on an active interface is a nop. On a failure
653  *      a negative errno code is returned.
654  */
655  
656 int dev_open(struct net_device *dev)
657 {
658         int ret = 0;
659 
660         /*
661          *      Is it already up?
662          */
663 
664         if (dev->flags&IFF_UP)
665                 return 0;
666 
667         /*
668          *      Is it even present?
669          */
670         if (!netif_device_present(dev))
671                 return -ENODEV;
672 
673         /*
674          *      Call device private open method
675          */
676         if (try_inc_mod_count(dev->owner)) {
677                 if (dev->open) {
678                         ret = dev->open(dev);
679                         if (ret != 0 && dev->owner)
680                                 __MOD_DEC_USE_COUNT(dev->owner);
681                 }
682         } else {
683                 ret = -ENODEV;
684         }
685 
686         /*
687          *      If it went open OK then:
688          */
689          
690         if (ret == 0) 
691         {
692                 /*
693                  *      Set the flags.
694                  */
695                 dev->flags |= IFF_UP;
696 
697                 set_bit(__LINK_STATE_START, &dev->state);
698 
699                 /*
700                  *      Initialize multicasting status 
701                  */
702                 dev_mc_upload(dev);
703 
704                 /*
705                  *      Wakeup transmit queue engine
706                  */
707                 dev_activate(dev);
708 
709                 /*
710                  *      ... and announce new interface.
711                  */
712                 notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
713         }
714         return(ret);
715 }
716 
717 #ifdef CONFIG_NET_FASTROUTE
718 
719 static void dev_do_clear_fastroute(struct net_device *dev)
720 {
721         if (dev->accept_fastpath) {
722                 int i;
723 
724                 for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++) {
725                         struct dst_entry *dst;
726 
727                         write_lock_irq(&dev->fastpath_lock);
728                         dst = dev->fastpath[i];
729                         dev->fastpath[i] = NULL;
730                         write_unlock_irq(&dev->fastpath_lock);
731 
732                         dst_release(dst);
733                 }
734         }
735 }
736 
737 void dev_clear_fastroute(struct net_device *dev)
738 {
739         if (dev) {
740                 dev_do_clear_fastroute(dev);
741         } else {
742                 read_lock(&dev_base_lock);
743                 for (dev = dev_base; dev; dev = dev->next)
744                         dev_do_clear_fastroute(dev);
745                 read_unlock(&dev_base_lock);
746         }
747 }
748 #endif
749 
750 /**
751  *      dev_close - shutdown an interface.
752  *      @dev: device to shutdown
753  *
754  *      This function moves an active device into down state. A 
755  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
756  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
757  *      chain.
758  */
759  
760 int dev_close(struct net_device *dev)
761 {
762         if (!(dev->flags&IFF_UP))
763                 return 0;
764 
765         /*
766          *      Tell people we are going down, so that they can
767          *      prepare to death, when device is still operating.
768          */
769         notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
770 
771         dev_deactivate(dev);
772 
773         clear_bit(__LINK_STATE_START, &dev->state);
774 
775         /*
776          *      Call the device specific close. This cannot fail.
777          *      Only if device is UP
778          *
779          *      We allow it to be called even after a DETACH hot-plug
780          *      event.
781          */
782          
783         if (dev->stop)
784                 dev->stop(dev);
785 
786         /*
787          *      Device is now down.
788          */
789 
790         dev->flags &= ~IFF_UP;
791 #ifdef CONFIG_NET_FASTROUTE
792         dev_clear_fastroute(dev);
793 #endif
794 
795         /*
796          *      Tell people we are down
797          */
798         notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
799 
800         /*
801          * Drop the module refcount
802          */
803         if (dev->owner)
804                 __MOD_DEC_USE_COUNT(dev->owner);
805 
806         return(0);
807 }
808 
809 
810 /*
811  *      Device change register/unregister. These are not inline or static
812  *      as we export them to the world.
813  */
814  
815 /**
816  *      register_netdevice_notifier - register a network notifier block
817  *      @nb: notifier
818  *
819  *      Register a notifier to be called when network device events occur.
820  *      The notifier passed is linked into the kernel structures and must
821  *      not be reused until it has been unregistered. A negative errno code
822  *      is returned on a failure.
823  */
824 
825 int register_netdevice_notifier(struct notifier_block *nb)
826 {
827         return notifier_chain_register(&netdev_chain, nb);
828 }
829 
830 /**
831  *      unregister_netdevice_notifier - unregister a network notifier block
832  *      @nb: notifier
833  *
834  *      Unregister a notifier previously registered by
835  *      register_netdevice_notifier(). The notifier is unlinked into the
836  *      kernel structures and may then be reused. A negative errno code
837  *      is returned on a failure.
838  */
839 
840 int unregister_netdevice_notifier(struct notifier_block *nb)
841 {
842         return notifier_chain_unregister(&netdev_chain,nb);
843 }
844 
845 /*
846  *      Support routine. Sends outgoing frames to any network
847  *      taps currently in use.
848  */
849 
850 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
851 {
852         struct packet_type *ptype;
853         get_fast_time(&skb->stamp);
854 
855         br_read_lock(BR_NETPROTO_LOCK);
856         for (ptype = ptype_all; ptype!=NULL; ptype = ptype->next) 
857         {
858                 /* Never send packets back to the socket
859                  * they originated from - MvS (miquels@drinkel.ow.org)
860                  */
861                 if ((ptype->dev == dev || !ptype->dev) &&
862                         ((struct sock *)ptype->data != skb->sk))
863                 {
864                         struct sk_buff *skb2;
865                         if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL)
866                                 break;
867 
868                         /* skb->nh should be correctly
869                            set by sender, so that the second statement is
870                            just protection against buggy protocols.
871                          */
872                         skb2->mac.raw = skb2->data;
873 
874                         if (skb2->nh.raw < skb2->data || skb2->nh.raw >= skb2->tail) {
875                                 if (net_ratelimit())
876                                         printk(KERN_DEBUG "protocol %04x is buggy, dev %s\n", skb2->protocol, dev->name);
877                                 skb2->nh.raw = skb2->data;
878                                 if (dev->hard_header)
879                                         skb2->nh.raw += dev->hard_header_len;
880                         }
881 
882                         skb2->h.raw = skb2->nh.raw;
883                         skb2->pkt_type = PACKET_OUTGOING;
884                         ptype->func(skb2, skb->dev, ptype);
885                 }
886         }
887         br_read_unlock(BR_NETPROTO_LOCK);
888 }
889 
890 /**
891  *      dev_queue_xmit - transmit a buffer
892  *      @skb: buffer to transmit
893  *      
894  *      Queue a buffer for transmission to a network device. The caller must
895  *      have set the device and priority and built the buffer before calling this 
896  *      function. The function can be called from an interrupt.
897  *
898  *      A negative errno code is returned on a failure. A success does not
899  *      guarantee the frame will be transmitted as it may be dropped due
900  *      to congestion or traffic shaping.
901  */
902  
903 int dev_queue_xmit(struct sk_buff *skb)
904 {
905         struct net_device *dev = skb->dev;
906         struct Qdisc  *q;
907 
908         /* Grab device queue */
909         spin_lock_bh(&dev->queue_lock);
910         q = dev->qdisc;
911         if (q->enqueue) {
912                 int ret = q->enqueue(skb, q);
913 
914                 qdisc_run(dev);
915 
916                 spin_unlock_bh(&dev->queue_lock);
917                 return ret == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : ret;
918         }
919 
920         /* The device has no queue. Common case for software devices:
921            loopback, all the sorts of tunnels...
922 
923            Really, it is unlikely that xmit_lock protection is necessary here.
924            (f.e. loopback and IP tunnels are clean ignoring statistics counters.)
925            However, it is possible, that they rely on protection
926            made by us here.
927 
928            Check this and shot the lock. It is not prone from deadlocks.
929            Either shot noqueue qdisc, it is even simpler 8)
930          */
931         if (dev->flags&IFF_UP) {
932                 int cpu = smp_processor_id();
933 
934                 if (dev->xmit_lock_owner != cpu) {
935                         spin_unlock(&dev->queue_lock);
936                         spin_lock(&dev->xmit_lock);
937                         dev->xmit_lock_owner = cpu;
938 
939                         if (!netif_queue_stopped(dev)) {
940                                 if (netdev_nit)
941                                         dev_queue_xmit_nit(skb,dev);
942 
943                                 if (dev->hard_start_xmit(skb, dev) == 0) {
944                                         dev->xmit_lock_owner = -1;
945                                         spin_unlock_bh(&dev->xmit_lock);
946                                         return 0;
947                                 }
948                         }
949                         dev->xmit_lock_owner = -1;
950                         spin_unlock_bh(&dev->xmit_lock);
951                         if (net_ratelimit())
952                                 printk(KERN_DEBUG "Virtual device %s asks to queue packet!\n", dev->name);
953                         kfree_skb(skb);
954                         return -ENETDOWN;
955                 } else {
956                         /* Recursion is detected! It is possible, unfortunately */
957                         if (net_ratelimit())
958                                 printk(KERN_DEBUG "Dead loop on virtual device %s, fix it urgently!\n", dev->name);
959                 }
960         }
961         spin_unlock_bh(&dev->queue_lock);
962 
963         kfree_skb(skb);
964         return -ENETDOWN;
965 }
966 
967 
968 /*=======================================================================
969                         Receiver routines
970   =======================================================================*/
971 
972 int netdev_max_backlog = 300;
973 /* These numbers are selected based on intuition and some
974  * experimentatiom, if you have more scientific way of doing this
975  * please go ahead and fix things.
976  */
977 int no_cong_thresh = 10;
978 int no_cong = 20;
979 int lo_cong = 100;
980 int mod_cong = 290;
981 
982 struct netif_rx_stats netdev_rx_stat[NR_CPUS];
983 
984 
985 #ifdef CONFIG_NET_HW_FLOWCONTROL
986 atomic_t netdev_dropping = ATOMIC_INIT(0);
987 static unsigned long netdev_fc_mask = 1;
988 unsigned long netdev_fc_xoff = 0;
989 spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
990 
991 static struct
992 {
993         void (*stimul)(struct net_device *);
994         struct net_device *dev;
995 } netdev_fc_slots[32];
996 
997 int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev))
998 {
999         int bit = 0;
1000         unsigned long flags;
1001 
1002         spin_lock_irqsave(&netdev_fc_lock, flags);
1003         if (netdev_fc_mask != ~0UL) {
1004                 bit = ffz(netdev_fc_mask);
1005                 netdev_fc_slots[bit].stimul = stimul;
1006                 netdev_fc_slots[bit].dev = dev;
1007                 set_bit(bit, &netdev_fc_mask);
1008                 clear_bit(bit, &netdev_fc_xoff);
1009         }
1010         spin_unlock_irqrestore(&netdev_fc_lock, flags);
1011         return bit;
1012 }
1013 
1014 void netdev_unregister_fc(int bit)
1015 {
1016         unsigned long flags;
1017 
1018         spin_lock_irqsave(&netdev_fc_lock, flags);
1019         if (bit > 0) {
1020                 netdev_fc_slots[bit].stimul = NULL;
1021                 netdev_fc_slots[bit].dev = NULL;
1022                 clear_bit(bit, &netdev_fc_mask);
1023                 clear_bit(bit, &netdev_fc_xoff);
1024         }
1025         spin_unlock_irqrestore(&netdev_fc_lock, flags);
1026 }
1027 
1028 static void netdev_wakeup(void)
1029 {
1030         unsigned long xoff;
1031 
1032         spin_lock(&netdev_fc_lock);
1033         xoff = netdev_fc_xoff;
1034         netdev_fc_xoff = 0;
1035         while (xoff) {
1036                 int i = ffz(~xoff);
1037                 xoff &= ~(1<<i);
1038                 netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
1039         }
1040         spin_unlock(&netdev_fc_lock);
1041 }
1042 #endif
1043 
1044 static void get_sample_stats(int cpu)
1045 {
1046 #ifdef RAND_LIE
1047         unsigned long rd;
1048         int rq;
1049 #endif
1050         int blog = softnet_data[cpu].input_pkt_queue.qlen;
1051         int avg_blog = softnet_data[cpu].avg_blog;
1052 
1053         avg_blog = (avg_blog >> 1)+ (blog >> 1);
1054 
1055         if (avg_blog > mod_cong) {
1056                 /* Above moderate congestion levels. */
1057                 softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
1058 #ifdef RAND_LIE
1059                 rd = net_random();
1060                 rq = rd % netdev_max_backlog;
1061                 if (rq < avg_blog) /* unlucky bastard */
1062                         softnet_data[cpu].cng_level = NET_RX_DROP;
1063 #endif
1064         } else if (avg_blog > lo_cong) {
1065                 softnet_data[cpu].cng_level = NET_RX_CN_MOD;
1066 #ifdef RAND_LIE
1067                 rd = net_random();
1068                 rq = rd % netdev_max_backlog;
1069                         if (rq < avg_blog) /* unlucky bastard */
1070                                 softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
1071 #endif
1072         } else if (avg_blog > no_cong) 
1073                 softnet_data[cpu].cng_level = NET_RX_CN_LOW;
1074         else  /* no congestion */
1075                 softnet_data[cpu].cng_level = NET_RX_SUCCESS;
1076 
1077         softnet_data[cpu].avg_blog = avg_blog;
1078 }
1079 
1080 #ifdef OFFLINE_SAMPLE
1081 static void sample_queue(unsigned long dummy)
1082 {
1083 /* 10 ms 0r 1ms -- i dont care -- JHS */
1084         int next_tick = 1;
1085         int cpu = smp_processor_id();
1086 
1087         get_sample_stats(cpu);
1088         next_tick += jiffies;
1089         mod_timer(&samp_timer, next_tick);
1090 }
1091 #endif
1092 
1093 
1094 /**
1095  *      netif_rx        -       post buffer to the network code
1096  *      @skb: buffer to post
1097  *
1098  *      This function receives a packet from a device driver and queues it for
1099  *      the upper (protocol) levels to process.  It always succeeds. The buffer
1100  *      may be dropped during processing for congestion control or by the 
1101  *      protocol layers.
1102  *      
1103  *      return values:
1104  *      NET_RX_SUCCESS  (no congestion)           
1105  *      NET_RX_CN_LOW     (low congestion) 
1106  *      NET_RX_CN_MOD     (moderate congestion)
1107  *      NET_RX_CN_HIGH    (high congestion) 
1108  *      NET_RX_DROP    (packet was dropped)
1109  *      
1110  *      
1111  */
1112 
1113 int netif_rx(struct sk_buff *skb)
1114 {
1115         int this_cpu = smp_processor_id();
1116         struct softnet_data *queue;
1117         unsigned long flags;
1118 
1119         if (skb->stamp.tv_sec == 0)
1120                 get_fast_time(&skb->stamp);
1121 
1122         /* The code is rearranged so that the path is the most
1123            short when CPU is congested, but is still operating.
1124          */
1125         queue = &softnet_data[this_cpu];
1126 
1127         local_irq_save(flags);
1128 
1129         netdev_rx_stat[this_cpu].total++;
1130         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1131                 if (queue->input_pkt_queue.qlen) {
1132                         if (queue->throttle)
1133                                 goto drop;
1134 
1135 enqueue:
1136                         dev_hold(skb->dev);
1137                         __skb_queue_tail(&queue->input_pkt_queue,skb);
1138                         __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
1139                         local_irq_restore(flags);
1140 #ifndef OFFLINE_SAMPLE
1141                         get_sample_stats(this_cpu);
1142 #endif
1143                         return softnet_data[this_cpu].cng_level;
1144                 }
1145 
1146                 if (queue->throttle) {
1147                         queue->throttle = 0;
1148 #ifdef CONFIG_NET_HW_FLOWCONTROL
1149                         if (atomic_dec_and_test(&netdev_dropping))
1150                                 netdev_wakeup();
1151 #endif
1152                 }
1153                 goto enqueue;
1154         }
1155 
1156         if (queue->throttle == 0) {
1157                 queue->throttle = 1;
1158                 netdev_rx_stat[this_cpu].throttled++;
1159 #ifdef CONFIG_NET_HW_FLOWCONTROL
1160                 atomic_inc(&netdev_dropping);
1161 #endif
1162         }
1163 
1164 drop:
1165         netdev_rx_stat[this_cpu].dropped++;
1166         local_irq_restore(flags);
1167 
1168         kfree_skb(skb);
1169         return NET_RX_DROP;
1170 }
1171 
1172 /* Deliver skb to an old protocol, which is not threaded well
1173    or which do not understand shared skbs.
1174  */
1175 static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
1176 {
1177         static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
1178         int ret = NET_RX_DROP;
1179 
1180 
1181         if (!last) {
1182                 skb = skb_clone(skb, GFP_ATOMIC);
1183                 if (skb == NULL)
1184                         return ret;
1185         }
1186 
1187         /* The assumption (correct one) is that old protocols
1188            did not depened on BHs different of NET_BH and TIMER_BH.
1189          */
1190 
1191         /* Emulate NET_BH with special spinlock */
1192         spin_lock(&net_bh_lock);
1193 
1194         /* Disable timers and wait for all timers completion */
1195         tasklet_disable(bh_task_vec+TIMER_BH);
1196 
1197         ret = pt->func(skb, skb->dev, pt);
1198 
1199         tasklet_enable(bh_task_vec+TIMER_BH);
1200         spin_unlock(&net_bh_lock);
1201         return ret;
1202 }
1203 
1204 /* Reparent skb to master device. This function is called
1205  * only from net_rx_action under BR_NETPROTO_LOCK. It is misuse
1206  * of BR_NETPROTO_LOCK, but it is OK for now.
1207  */
1208 static __inline__ void skb_bond(struct sk_buff *skb)
1209 {
1210         struct net_device *dev = skb->dev;
1211         
1212         if (dev->master) {
1213                 dev_hold(dev->master);
1214                 skb->dev = dev->master;
1215                 dev_put(dev);
1216         }
1217 }
1218 
1219 static void net_tx_action(struct softirq_action *h)
1220 {
1221         int cpu = smp_processor_id();
1222 
1223         if (softnet_data[cpu].completion_queue) {
1224                 struct sk_buff *clist;
1225 
1226                 local_irq_disable();
1227                 clist = softnet_data[cpu].completion_queue;
1228                 softnet_data[cpu].completion_queue = NULL;
1229                 local_irq_enable();
1230 
1231                 while (clist != NULL) {
1232                         struct sk_buff *skb = clist;
1233                         clist = clist->next;
1234 
1235                         BUG_TRAP(atomic_read(&skb->users) == 0);
1236                         __kfree_skb(skb);
1237                 }
1238         }
1239 
1240         if (softnet_data[cpu].output_queue) {
1241                 struct net_device *head;
1242 
1243                 local_irq_disable();
1244                 head = softnet_data[cpu].output_queue;
1245                 softnet_data[cpu].output_queue = NULL;
1246                 local_irq_enable();
1247 
1248                 while (head != NULL) {
1249                         struct net_device *dev = head;
1250                         head = head->next_sched;
1251 
1252                         smp_mb__before_clear_bit();
1253                         clear_bit(__LINK_STATE_SCHED, &dev->state);
1254 
1255                         if (spin_trylock(&dev->queue_lock)) {
1256                                 qdisc_run(dev);
1257                                 spin_unlock(&dev->queue_lock);
1258                         } else {
1259                                 netif_schedule(dev);
1260                         }
1261                 }
1262         }
1263 }
1264 
1265 /**
1266  *      net_call_rx_atomic
1267  *      @fn: function to call
1268  *
1269  *      Make a function call that is atomic with respect to the protocol
1270  *      layers.
1271  */
1272  
1273 void net_call_rx_atomic(void (*fn)(void))
1274 {
1275         br_write_lock_bh(BR_NETPROTO_LOCK);
1276         fn();
1277         br_write_unlock_bh(BR_NETPROTO_LOCK);
1278 }
1279 
1280 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1281 void (*br_handle_frame_hook)(struct sk_buff *skb) = NULL;
1282 #endif
1283 
1284 static int __inline__ handle_bridge(struct sk_buff *skb,
1285                                      struct packet_type *pt_prev)
1286 {
1287         int ret = NET_RX_DROP;
1288 
1289         if (pt_prev) {
1290                 if (!pt_prev->data)
1291                         ret = deliver_to_old_ones(pt_prev, skb, 0);
1292                 else {
1293                         atomic_inc(&skb->users);
1294                         ret = pt_prev->func(skb, skb->dev, pt_prev);
1295                 }
1296         }
1297 
1298         br_handle_frame_hook(skb);
1299         return ret;
1300 }
1301 
1302 
1303 #ifdef CONFIG_NET_DIVERT
1304 static inline void handle_diverter(struct sk_buff *skb)
1305 {
1306         /* if diversion is supported on device, then divert */
1307         if (skb->dev->divert && skb->dev->divert->divert)
1308                 divert_frame(skb);
1309 }
1310 #endif   /* CONFIG_NET_DIVERT */
1311 
1312 
1313 static void net_rx_action(struct softirq_action *h)
1314 {
1315         int this_cpu = smp_processor_id();
1316         struct softnet_data *queue = &softnet_data[this_cpu];
1317         unsigned long start_time = jiffies;
1318         int bugdet = netdev_max_backlog;
1319 
1320         br_read_lock(BR_NETPROTO_LOCK);
1321 
1322         for (;;) {
1323                 struct sk_buff *skb;
1324                 struct net_device *rx_dev;
1325 
1326                 local_irq_disable();
1327                 skb = __skb_dequeue(&queue->input_pkt_queue);
1328                 local_irq_enable();
1329 
1330                 if (skb == NULL)
1331                         break;
1332 
1333                 skb_bond(skb);
1334 
1335                 rx_dev = skb->dev;
1336 
1337 #ifdef CONFIG_NET_FASTROUTE
1338                 if (skb->pkt_type == PACKET_FASTROUTE) {
1339                         netdev_rx_stat[this_cpu].fastroute_deferred_out++;
1340                         dev_queue_xmit(skb);
1341                         dev_put(rx_dev);
1342                         continue;
1343                 }
1344 #endif
1345                 skb->h.raw = skb->nh.raw = skb->data;
1346                 {
1347                         struct packet_type *ptype, *pt_prev;
1348                         unsigned short type = skb->protocol;
1349 
1350                         pt_prev = NULL;
1351                         for (ptype = ptype_all; ptype; ptype = ptype->next) {
1352                                 if (!ptype->dev || ptype->dev == skb->dev) {
1353                                         if (pt_prev) {
1354                                                 if (!pt_prev->data) {
1355                                                         deliver_to_old_ones(pt_prev, skb, 0);
1356                                                 } else {
1357                                                         atomic_inc(&skb->users);
1358                                                         pt_prev->func(skb,
1359                                                                       skb->dev,
1360                                                                       pt_prev);
1361                                                 }
1362                                         }
1363                                         pt_prev = ptype;
1364                                 }
1365                         }
1366 
1367 #ifdef CONFIG_NET_DIVERT
1368                         if (skb->dev->divert && skb->dev->divert->divert)
1369                                 handle_diverter(skb);
1370 #endif /* CONFIG_NET_DIVERT */
1371 
1372                         
1373 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1374                         if (skb->dev->br_port != NULL &&
1375                             br_handle_frame_hook != NULL) {
1376                                 handle_bridge(skb, pt_prev);
1377                                 dev_put(rx_dev);
1378                                 continue;
1379                         }
1380 #endif
1381 
1382                         for (ptype=ptype_base[ntohs(type)&15];ptype;ptype=ptype->next) {
1383                                 if (ptype->type == type &&
1384                                     (!ptype->dev || ptype->dev == skb->dev)) {
1385                                         if (pt_prev) {
1386                                                 if (!pt_prev->data)
1387                                                         deliver_to_old_ones(pt_prev, skb, 0);
1388                                                 else {
1389                                                         atomic_inc(&skb->users);
1390                                                         pt_prev->func(skb,
1391                                                                       skb->dev,
1392                                                                       pt_prev);
1393                                                 }
1394                                         }
1395                                         pt_prev = ptype;
1396                                 }
1397                         }
1398 
1399                         if (pt_prev) {
1400                                 if (!pt_prev->data)
1401                                         deliver_to_old_ones(pt_prev, skb, 1);
1402                                 else
1403                                         pt_prev->func(skb, skb->dev, pt_prev);
1404                         } else
1405                                 kfree_skb(skb);
1406                 }
1407 
1408                 dev_put(rx_dev);
1409 
1410                 if (bugdet-- < 0 || jiffies - start_time > 1)
1411                         goto softnet_break;
1412 
1413 #ifdef CONFIG_NET_HW_FLOWCONTROL
1414         if (queue->throttle && queue->input_pkt_queue.qlen < no_cong_thresh ) {
1415                 if (atomic_dec_and_test(&netdev_dropping)) {
1416                         queue->throttle = 0;
1417                         netdev_wakeup();
1418                         goto softnet_break;
1419                 }
1420         }
1421 #endif
1422 
1423         }
1424         br_read_unlock(BR_NETPROTO_LOCK);
1425 
1426         local_irq_disable();
1427         if (queue->throttle) {
1428                 queue->throttle = 0;
1429 #ifdef CONFIG_NET_HW_FLOWCONTROL
1430                 if (atomic_dec_and_test(&netdev_dropping))
1431                         netdev_wakeup();
1432 #endif
1433         }
1434         local_irq_enable();
1435 
1436         NET_PROFILE_LEAVE(softnet_process);
1437         return;
1438 
1439 softnet_break:
1440         br_read_unlock(BR_NETPROTO_LOCK);
1441 
1442         local_irq_disable();
1443         netdev_rx_stat[this_cpu].time_squeeze++;
1444         __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
1445         local_irq_enable();
1446 
1447         NET_PROFILE_LEAVE(softnet_process);
1448         return;
1449 }
1450 
1451 static gifconf_func_t * gifconf_list [NPROTO];
1452 
1453 /**
1454  *      register_gifconf        -       register a SIOCGIF handler
1455  *      @family: Address family
1456  *      @gifconf: Function handler
1457  *
1458  *      Register protocol dependent address dumping routines. The handler
1459  *      that is passed must not be freed or reused until it has been replaced
1460  *      by another handler.
1461  */
1462  
1463 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1464 {
1465         if (family>=NPROTO)
1466                 return -EINVAL;
1467         gifconf_list[family] = gifconf;
1468         return 0;
1469 }
1470 
1471 
1472 /*
1473  *      Map an interface index to its name (SIOCGIFNAME)
1474  */
1475 
1476 /*
1477  *      We need this ioctl for efficient implementation of the
1478  *      if_indextoname() function required by the IPv6 API.  Without
1479  *      it, we would have to search all the interfaces to find a
1480  *      match.  --pb
1481  */
1482 
1483 static int dev_ifname(struct ifreq *arg)
1484 {
1485         struct net_device *dev;
1486         struct ifreq ifr;
1487 
1488         /*
1489          *      Fetch the caller's info block. 
1490          */
1491         
1492         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1493                 return -EFAULT;
1494 
1495         read_lock(&dev_base_lock);
1496         dev = __dev_get_by_index(ifr.ifr_ifindex);
1497         if (!dev) {
1498                 read_unlock(&dev_base_lock);
1499                 return -ENODEV;
1500         }
1501 
1502         strcpy(ifr.ifr_name, dev->name);
1503         read_unlock(&dev_base_lock);
1504 
1505         if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1506                 return -EFAULT;
1507         return 0;
1508 }
1509 
1510 /*
1511  *      Perform a SIOCGIFCONF call. This structure will change
1512  *      size eventually, and there is nothing I can do about it.
1513  *      Thus we will need a 'compatibility mode'.
1514  */
1515 
1516 static int dev_ifconf(char *arg)
1517 {
1518         struct ifconf ifc;
1519         struct net_device *dev;
1520         char *pos;
1521         int len;
1522         int total;
1523         int i;
1524 
1525         /*
1526          *      Fetch the caller's info block. 
1527          */
1528         
1529         if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1530                 return -EFAULT;
1531 
1532         pos = ifc.ifc_buf;
1533         len = ifc.ifc_len;
1534 
1535         /*
1536          *      Loop over the interfaces, and write an info block for each. 
1537          */
1538 
1539         total = 0;
1540         for (dev = dev_base; dev != NULL; dev = dev->next) {
1541                 for (i=0; i<NPROTO; i++) {
1542                         if (gifconf_list[i]) {
1543                                 int done;
1544                                 if (pos==NULL) {
1545                                         done = gifconf_list[i](dev, NULL, 0);
1546                                 } else {
1547                                         done = gifconf_list[i](dev, pos+total, len-total);
1548                                 }
1549                                 if (done<0) {
1550                                         return -EFAULT;
1551                                 }
1552                                 total += done;
1553                         }
1554                 }
1555         }
1556 
1557         /*
1558          *      All done.  Write the updated control block back to the caller. 
1559          */
1560         ifc.ifc_len = total;
1561 
1562         if (copy_to_user(arg, &ifc, sizeof(struct ifconf)))
1563                 return -EFAULT; 
1564 
1565         /* 
1566          *      Both BSD and Solaris return 0 here, so we do too.
1567          */
1568         return 0;
1569 }
1570 
1571 /*
1572  *      This is invoked by the /proc filesystem handler to display a device
1573  *      in detail.
1574  */
1575 
1576 #ifdef CONFIG_PROC_FS
1577 
1578 static int sprintf_stats(char *buffer, struct net_device *dev)
1579 {
1580         struct net_device_stats *stats = (dev->get_stats ? dev->get_stats(dev): NULL);
1581         int size;
1582         
1583         if (stats)
1584                 size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu %8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
1585                    dev->name,
1586                    stats->rx_bytes,
1587                    stats->rx_packets, stats->rx_errors,
1588                    stats->rx_dropped + stats->rx_missed_errors,
1589                    stats->rx_fifo_errors,
1590                    stats->rx_length_errors + stats->rx_over_errors
1591                    + stats->rx_crc_errors + stats->rx_frame_errors,
1592                    stats->rx_compressed, stats->multicast,
1593                    stats->tx_bytes,
1594                    stats->tx_packets, stats->tx_errors, stats->tx_dropped,
1595                    stats->tx_fifo_errors, stats->collisions,
1596                    stats->tx_carrier_errors + stats->tx_aborted_errors
1597                    + stats->tx_window_errors + stats->tx_heartbeat_errors,
1598                    stats->tx_compressed);
1599         else
1600                 size = sprintf(buffer, "%6s: No statistics available.\n", dev->name);
1601 
1602         return size;
1603 }
1604 
1605 /*
1606  *      Called from the PROCfs module. This now uses the new arbitrary sized /proc/net interface
1607  *      to create /proc/net/dev
1608  */
1609  
1610 static int dev_get_info(char *buffer, char **start, off_t offset, int length)
1611 {
1612         int len = 0;
1613         off_t begin = 0;
1614         off_t pos = 0;
1615         int size;
1616         struct net_device *dev;
1617 
1618 
1619         size = sprintf(buffer, 
1620                 "Inter-|   Receive                                                |  Transmit\n"
1621                 " face |bytes    packets errs drop fifo frame compressed multicast|bytes    packets errs drop fifo colls carrier compressed\n");
1622         
1623         pos += size;
1624         len += size;
1625         
1626 
1627         read_lock(&dev_base_lock);
1628         for (dev = dev_base; dev != NULL; dev = dev->next) {
1629                 size = sprintf_stats(buffer+len, dev);
1630                 len += size;
1631                 pos = begin + len;
1632                                 
1633                 if (pos < offset) {
1634                         len = 0;
1635                         begin = pos;
1636                 }
1637                 if (pos > offset + length)
1638                         break;
1639         }
1640         read_unlock(&dev_base_lock);
1641 
1642         *start = buffer + (offset - begin);     /* Start of wanted data */
1643         len -= (offset - begin);                /* Start slop */
1644         if (len > length)
1645                 len = length;                   /* Ending slop */
1646         if (len < 0)
1647                 len = 0;
1648         return len;
1649 }
1650 
1651 static int dev_proc_stats(char *buffer, char **start, off_t offset,
1652                           int length, int *eof, void *data)
1653 {
1654         int i, lcpu;
1655         int len=0;
1656 
1657         for (lcpu=0; lcpu<smp_num_cpus; lcpu++) {
1658                 i = cpu_logical_map(lcpu);
1659                 len += sprintf(buffer+len, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
1660                                netdev_rx_stat[i].total,
1661                                netdev_rx_stat[i].dropped,
1662                                netdev_rx_stat[i].time_squeeze,
1663                                netdev_rx_stat[i].throttled,
1664                                netdev_rx_stat[i].fastroute_hit,
1665                                netdev_rx_stat[i].fastroute_success,
1666                                netdev_rx_stat[i].fastroute_defer,
1667                                netdev_rx_stat[i].fastroute_deferred_out,
1668 #if 0
1669                                netdev_rx_stat[i].fastroute_latency_reduction
1670 #else
1671                                netdev_rx_stat[i].cpu_collision
1672 #endif
1673                                );
1674         }
1675 
1676         len -= offset;
1677 
1678         if (len > length)
1679                 len = length;
1680         if (len < 0)
1681                 len = 0;
1682 
1683         *start = buffer + offset;
1684         *eof = 1;
1685 
1686         return len;
1687 }
1688 
1689 #endif  /* CONFIG_PROC_FS */
1690 
1691 
1692 #ifdef WIRELESS_EXT
1693 #ifdef CONFIG_PROC_FS
1694 
1695 /*
1696  * Print one entry of /proc/net/wireless
1697  * This is a clone of /proc/net/dev (just above)
1698  */
1699 static int sprintf_wireless_stats(char *buffer, struct net_device *dev)
1700 {
1701         /* Get stats from the driver */
1702         struct iw_statistics *stats = (dev->get_wireless_stats ?
1703                                        dev->get_wireless_stats(dev) :
1704                                        (struct iw_statistics *) NULL);
1705         int size;
1706 
1707         if (stats != (struct iw_statistics *) NULL) {
1708                 size = sprintf(buffer,
1709                                "%6s: %04x  %3d%c  %3d%c  %3d%c  %6d %6d %6d\n",
1710                                dev->name,
1711                                stats->status,
1712                                stats->qual.qual,
1713                                stats->qual.updated & 1 ? '.' : ' ',
1714                                stats->qual.level,
1715                                stats->qual.updated & 2 ? '.' : ' ',
1716                                stats->qual.noise,
1717                                stats->qual.updated & 4 ? '.' : ' ',
1718                                stats->discard.nwid,
1719                                stats->discard.code,
1720                                stats->discard.misc);
1721                 stats->qual.updated = 0;
1722         }
1723         else
1724                 size = 0;
1725 
1726         return size;
1727 }
1728 
1729 /*
1730  * Print info for /proc/net/wireless (print all entries)
1731  * This is a clone of /proc/net/dev (just above)
1732  */
1733 static int dev_get_wireless_info(char * buffer, char **start, off_t offset,
1734                           int length)
1735 {
1736         int             len = 0;
1737         off_t           begin = 0;
1738         off_t           pos = 0;
1739         int             size;
1740         
1741         struct net_device *     dev;
1742 
1743         size = sprintf(buffer,
1744                        "Inter-| sta-|   Quality        |   Discarded packets\n"
1745                        " face | tus | link level noise |  nwid  crypt   misc\n"
1746                         );
1747         
1748         pos += size;
1749         len += size;
1750 
1751         read_lock(&dev_base_lock);
1752         for (dev = dev_base; dev != NULL; dev = dev->next) {
1753                 size = sprintf_wireless_stats(buffer + len, dev);
1754                 len += size;
1755                 pos = begin + len;
1756 
1757                 if (pos < offset) {
1758                         len = 0;
1759                         begin = pos;
1760                 }
1761                 if (pos > offset + length)
1762                         break;
1763         }
1764         read_unlock(&dev_base_lock);
1765 
1766         *start = buffer + (offset - begin);     /* Start of wanted data */
1767         len -= (offset - begin);                /* Start slop */
1768         if (len > length)
1769                 len = length;                   /* Ending slop */
1770         if (len < 0)
1771                 len = 0;
1772 
1773         return len;
1774 }
1775 #endif  /* CONFIG_PROC_FS */
1776 #endif  /* WIRELESS_EXT */
1777 
1778 /**
1779  *      netdev_set_master       -       set up master/slave pair
1780  *      @slave: slave device
1781  *      @master: new master device
1782  *
1783  *      Changes the master device of the slave. Pass %NULL to break the
1784  *      bonding. The caller must hold the RTNL semaphore. On a failure
1785  *      a negative errno code is returned. On success the reference counts
1786  *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
1787  *      function returns zero.
1788  */
1789  
1790 int netdev_set_master(struct net_device *slave, struct net_device *master)
1791 {
1792         struct net_device *old = slave->master;
1793 
1794         ASSERT_RTNL();
1795 
1796         if (master) {
1797                 if (old)
1798                         return -EBUSY;
1799                 dev_hold(master);
1800         }
1801 
1802         br_write_lock_bh(BR_NETPROTO_LOCK);
1803         slave->master = master;
1804         br_write_unlock_bh(BR_NETPROTO_LOCK);
1805 
1806         if (old)
1807                 dev_put(old);
1808 
1809         if (master)
1810                 slave->flags |= IFF_SLAVE;
1811         else
1812                 slave->flags &= ~IFF_SLAVE;
1813 
1814         rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
1815         return 0;
1816 }
1817 
1818 /**
1819  *      dev_set_promiscuity     - update promiscuity count on a device
1820  *      @dev: device
1821  *      @inc: modifier
1822  *
1823  *      Add or remove promsicuity from a device. While the count in the device
1824  *      remains above zero the interface remains promiscuous. Once it hits zero
1825  *      the device reverts back to normal filtering operation. A negative inc
1826  *      value is used to drop promiscuity on the device.
1827  */
1828  
1829 void dev_set_promiscuity(struct net_device *dev, int inc)
1830 {
1831         unsigned short old_flags = dev->flags;
1832 
1833         dev->flags |= IFF_PROMISC;
1834         if ((dev->promiscuity += inc) == 0)
1835                 dev->flags &= ~IFF_PROMISC;
1836         if (dev->flags^old_flags) {
1837 #ifdef CONFIG_NET_FASTROUTE
1838                 if (dev->flags&IFF_PROMISC) {
1839                         netdev_fastroute_obstacles++;
1840                         dev_clear_fastroute(dev);
1841                 } else
1842                         netdev_fastroute_obstacles--;
1843 #endif
1844                 dev_mc_upload(dev);
1845                 printk(KERN_INFO "device %s %s promiscuous mode\n",
1846                        dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left");
1847         }
1848 }
1849 
1850 /**
1851  *      dev_set_allmulti        - update allmulti count on a device
1852  *      @dev: device
1853  *      @inc: modifier
1854  *
1855  *      Add or remove reception of all multicast frames to a device. While the
1856  *      count in the device remains above zero the interface remains listening
1857  *      to all interfaces. Once it hits zero the device reverts back to normal
1858  *      filtering operation. A negative @inc value is used to drop the counter
1859  *      when releasing a resource needing all multicasts.
1860  */
1861 
1862 void dev_set_allmulti(struct net_device *dev, int inc)
1863 {
1864         unsigned short old_flags = dev->flags;
1865 
1866         dev->flags |= IFF_ALLMULTI;
1867         if ((dev->allmulti += inc) == 0)
1868                 dev->flags &= ~IFF_ALLMULTI;
1869         if (dev->flags^old_flags)
1870                 dev_mc_upload(dev);
1871 }
1872 
1873 int dev_change_flags(struct net_device *dev, unsigned flags)
1874 {
1875         int ret;
1876         int old_flags = dev->flags;
1877 
1878         /*
1879          *      Set the flags on our device.
1880          */
1881 
1882         dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC|
1883                                IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) |
1884                                        (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI));
1885 
1886         /*
1887          *      Load in the correct multicast list now the flags have changed.
1888          */                             
1889 
1890         dev_mc_upload(dev);
1891 
1892         /*
1893          *      Have we downed the interface. We handle IFF_UP ourselves
1894          *      according to user attempts to set it, rather than blindly
1895          *      setting it.
1896          */
1897 
1898         ret = 0;
1899         if ((old_flags^flags)&IFF_UP)   /* Bit is different  ? */
1900         {
1901                 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
1902 
1903                 if (ret == 0) 
1904                         dev_mc_upload(dev);
1905         }
1906 
1907         if (dev->flags&IFF_UP &&
1908             ((old_flags^dev->flags)&~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE)))
1909                 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
1910 
1911         if ((flags^dev->gflags)&IFF_PROMISC) {
1912                 int inc = (flags&IFF_PROMISC) ? +1 : -1;
1913                 dev->gflags ^= IFF_PROMISC;
1914                 dev_set_promiscuity(dev, inc);
1915         }
1916 
1917         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
1918            is important. Some (broken) drivers set IFF_PROMISC, when
1919            IFF_ALLMULTI is requested not asking us and not reporting.
1920          */
1921         if ((flags^dev->gflags)&IFF_ALLMULTI) {
1922                 int inc = (flags&IFF_ALLMULTI) ? +1 : -1;
1923                 dev->gflags ^= IFF_ALLMULTI;
1924                 dev_set_allmulti(dev, inc);
1925         }
1926 
1927         if (old_flags^dev->flags)
1928                 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags);
1929 
1930         return ret;
1931 }
1932 
1933 /*
1934  *      Perform the SIOCxIFxxx calls. 
1935  */
1936  
1937 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
1938 {
1939         struct net_device *dev;
1940         int err;
1941 
1942         if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
1943                 return -ENODEV;
1944 
1945         switch(cmd) 
1946         {
1947                 case SIOCGIFFLAGS:      /* Get interface flags */
1948                         ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING))
1949                                 |(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI));
1950                         if (netif_running(dev) && netif_carrier_ok(dev))
1951                                 ifr->ifr_flags |= IFF_RUNNING;
1952                         return 0;
1953 
1954                 case SIOCSIFFLAGS:      /* Set interface flags */
1955                         return dev_change_flags(dev, ifr->ifr_flags);
1956                 
1957                 case SIOCGIFMETRIC:     /* Get the metric on the interface (currently unused) */
1958                         ifr->ifr_metric = 0;
1959                         return 0;
1960                         
1961                 case SIOCSIFMETRIC:     /* Set the metric on the interface (currently unused) */
1962                         return -EOPNOTSUPP;
1963         
1964                 case SIOCGIFMTU:        /* Get the MTU of a device */
1965                         ifr->ifr_mtu = dev->mtu;
1966                         return 0;
1967         
1968                 case SIOCSIFMTU:        /* Set the MTU of a device */
1969                         if (ifr->ifr_mtu == dev->mtu)
1970                                 return 0;
1971 
1972                         /*
1973                          *      MTU must be positive.
1974                          */
1975                          
1976                         if (ifr->ifr_mtu<0)
1977                                 return -EINVAL;
1978 
1979                         if (!netif_device_present(dev))
1980                                 return -ENODEV;
1981 
1982                         if (dev->change_mtu)
1983                                 err = dev->change_mtu(dev, ifr->ifr_mtu);
1984                         else {
1985                                 dev->mtu = ifr->ifr_mtu;
1986                                 err = 0;
1987                         }
1988                         if (!err && dev->flags&IFF_UP)
1989                                 notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev);
1990                         return err;
1991 
1992                 case SIOCGIFHWADDR:
1993                         memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN);
1994                         ifr->ifr_hwaddr.sa_family=dev->type;
1995                         return 0;
1996                                 
1997                 case SIOCSIFHWADDR:
1998                         if (dev->set_mac_address == NULL)
1999                                 return -EOPNOTSUPP;
2000                         if (ifr->ifr_hwaddr.sa_family!=dev->type)
2001                                 return -EINVAL;
2002                         if (!netif_device_present(dev))
2003                                 return -ENODEV;
2004                         err = dev->set_mac_address(dev, &ifr->ifr_hwaddr);
2005                         if (!err)
2006                                 notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2007                         return err;
2008                         
2009                 case SIOCSIFHWBROADCAST:
2010                         if (ifr->ifr_hwaddr.sa_family!=dev->type)
2011                                 return -EINVAL;
2012                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN);
2013                         notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2014                         return 0;
2015 
2016                 case SIOCGIFMAP:
2017                         ifr->ifr_map.mem_start=dev->mem_start;
2018                         ifr->ifr_map.mem_end=dev->mem_end;
2019                         ifr->ifr_map.base_addr=dev->base_addr;
2020                         ifr->ifr_map.irq=dev->irq;
2021                         ifr->ifr_map.dma=dev->dma;
2022                         ifr->ifr_map.port=dev->if_port;
2023                         return 0;
2024                         
2025                 case SIOCSIFMAP:
2026                         if (dev->set_config) {
2027                                 if (!netif_device_present(dev))
2028                                         return -ENODEV;
2029                                 return dev->set_config(dev,&ifr->ifr_map);
2030                         }
2031                         return -EOPNOTSUPP;
2032                         
2033                 case SIOCADDMULTI:
2034                         if (dev->set_multicast_list == NULL ||
2035                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2036                                 return -EINVAL;
2037                         if (!netif_device_present(dev))
2038                                 return -ENODEV;
2039                         dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1);
2040                         return 0;
2041 
2042                 case SIOCDELMULTI:
2043                         if (dev->set_multicast_list == NULL ||
2044                             ifr->ifr_hwaddr.sa_family!=AF_UNSPEC)
2045                                 return -EINVAL;
2046                         if (!netif_device_present(dev))
2047                                 return -ENODEV;
2048                         dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1);
2049                         return 0;
2050 
2051                 case SIOCGIFINDEX:
2052                         ifr->ifr_ifindex = dev->ifindex;
2053                         return 0;
2054 
2055                 case SIOCGIFTXQLEN:
2056                         ifr->ifr_qlen = dev->tx_queue_len;
2057                         return 0;
2058 
2059                 case SIOCSIFTXQLEN:
2060                         if (ifr->ifr_qlen<0)
2061                                 return -EINVAL;
2062                         dev->tx_queue_len = ifr->ifr_qlen;
2063                         return 0;
2064 
2065                 case SIOCSIFNAME:
2066                         if (dev->flags&IFF_UP)
2067                                 return -EBUSY;
2068                         if (__dev_get_by_name(ifr->ifr_newname))
2069                                 return -EEXIST;
2070                         memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
2071                         dev->name[IFNAMSIZ-1] = 0;
2072                         notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
2073                         return 0;
2074 
2075                 /*
2076                  *      Unknown or private ioctl
2077                  */
2078 
2079                 default:
2080                         if ((cmd >= SIOCDEVPRIVATE &&
2081                             cmd <= SIOCDEVPRIVATE + 15) ||
2082                             cmd == SIOCETHTOOL) {
2083                                 if (dev->do_ioctl) {
2084                                         if (!netif_device_present(dev))
2085                                                 return -ENODEV;
2086                                         return dev->do_ioctl(dev, ifr, cmd);
2087                                 }
2088                                 return -EOPNOTSUPP;
2089                         }
2090 
2091 #ifdef WIRELESS_EXT
2092                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2093                                 if (dev->do_ioctl) {
2094                                         if (!netif_device_present(dev))
2095                                                 return -ENODEV;
2096                                         return dev->do_ioctl(dev, ifr, cmd);
2097                                 }
2098                                 return -EOPNOTSUPP;
2099                         }
2100 #endif  /* WIRELESS_EXT */
2101 
2102         }
2103         return -EINVAL;
2104 }
2105 
2106 /*
2107  *      This function handles all "interface"-type I/O control requests. The actual
2108  *      'doing' part of this is dev_ifsioc above.
2109  */
2110 
2111 /**
2112  *      dev_ioctl       -       network device ioctl
2113  *      @cmd: command to issue
2114  *      @arg: pointer to a struct ifreq in user space
2115  *
2116  *      Issue ioctl functions to devices. This is normally called by the
2117  *      user space syscall interfaces but can sometimes be useful for 
2118  *      other purposes. The return value is the return from the syscall if
2119  *      positive or a negative errno code on error.
2120  */
2121 
2122 int dev_ioctl(unsigned int cmd, void *arg)
2123 {
2124         struct ifreq ifr;
2125         int ret;
2126         char *colon;
2127 
2128         /* One special case: SIOCGIFCONF takes ifconf argument
2129            and requires shared lock, because it sleeps writing
2130            to user space.
2131          */
2132            
2133         if (cmd == SIOCGIFCONF) {
2134                 rtnl_shlock();
2135                 ret = dev_ifconf((char *) arg);
2136                 rtnl_shunlock();
2137                 return ret;
2138         }
2139         if (cmd == SIOCGIFNAME) {
2140                 return dev_ifname((struct ifreq *)arg);
2141         }
2142 
2143         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2144                 return -EFAULT;
2145 
2146         ifr.ifr_name[IFNAMSIZ-1] = 0;
2147 
2148         colon = strchr(ifr.ifr_name, ':');
2149         if (colon)
2150                 *colon = 0;
2151 
2152         /*
2153          *      See which interface the caller is talking about. 
2154          */
2155          
2156         switch(cmd) 
2157         {
2158                 /*
2159                  *      These ioctl calls:
2160                  *      - can be done by all.
2161                  *      - atomic and do not require locking.
2162                  *      - return a value
2163                  */
2164                  
2165                 case SIOCGIFFLAGS:
2166                 case SIOCGIFMETRIC:
2167                 case SIOCGIFMTU:
2168                 case SIOCGIFHWADDR:
2169                 case SIOCGIFSLAVE:
2170                 case SIOCGIFMAP:
2171                 case SIOCGIFINDEX:
2172                 case SIOCGIFTXQLEN:
2173                         dev_load(ifr.ifr_name);
2174                         read_lock(&dev_base_lock);
2175                         ret = dev_ifsioc(&ifr, cmd);
2176                         read_unlock(&dev_base_lock);
2177                         if (!ret) {
2178                                 if (colon)
2179                                         *colon = ':';
2180                                 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2181                                         return -EFAULT;
2182                         }
2183                         return ret;
2184 
2185                 /*
2186                  *      These ioctl calls:
2187                  *      - require superuser power.
2188                  *      - require strict serialization.
2189                  *      - do not return a value
2190                  */
2191                  
2192                 case SIOCSIFFLAGS:
2193                 case SIOCSIFMETRIC:
2194                 case SIOCSIFMTU:
2195                 case SIOCSIFMAP:
2196                 case SIOCSIFHWADDR:
2197                 case SIOCSIFSLAVE:
2198                 case SIOCADDMULTI:
2199                 case SIOCDELMULTI:
2200                 case SIOCSIFHWBROADCAST:
2201                 case SIOCSIFTXQLEN:
2202                 case SIOCSIFNAME:
2203                 case SIOCETHTOOL:
2204                         if (!capable(CAP_NET_ADMIN))
2205                                 return -EPERM;
2206                         dev_load(ifr.ifr_name);
2207                         dev_probe_lock();
2208                         rtnl_lock();
2209                         ret = dev_ifsioc(&ifr, cmd);
2210                         rtnl_unlock();
2211                         dev_probe_unlock();
2212                         return ret;
2213         
2214                 case SIOCGIFMEM:
2215                         /* Get the per device memory space. We can add this but currently
2216                            do not support it */
2217                 case SIOCSIFMEM:
2218                         /* Set the per device memory buffer space. Not applicable in our case */
2219                 case SIOCSIFLINK:
2220                         return -EINVAL;
2221 
2222                 /*
2223                  *      Unknown or private ioctl.
2224                  */     
2225                  
2226                 default:
2227                         if (cmd >= SIOCDEVPRIVATE &&
2228                             cmd <= SIOCDEVPRIVATE + 15) {
2229                                 dev_load(ifr.ifr_name);
2230                                 dev_probe_lock();
2231                                 rtnl_lock();
2232                                 ret = dev_ifsioc(&ifr, cmd);
2233                                 rtnl_unlock();
2234                                 dev_probe_unlock();
2235                                 if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2236                                         return -EFAULT;
2237                                 return ret;
2238                         }
2239 #ifdef WIRELESS_EXT
2240                         /* Take care of Wireless Extensions */
2241                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2242                                 /* If command is `set a parameter', or
2243                                  * `get the encoding parameters', check if
2244                                  * the user has the right to do it */
2245                                 if (IW_IS_SET(cmd) || (cmd == SIOCGIWENCODE)) {
2246                                         if(!capable(CAP_NET_ADMIN))
2247                                                 return -EPERM;
2248                                 }
2249                                 dev_load(ifr.ifr_name);
2250                                 rtnl_lock();
2251                                 ret = dev_ifsioc(&ifr, cmd);
2252                                 rtnl_unlock();
2253                                 if (!ret && IW_IS_GET(cmd) &&
2254                                     copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2255                                         return -EFAULT;
2256                                 return ret;
2257                         }
2258 #endif  /* WIRELESS_EXT */
2259                         return -EINVAL;
2260         }
2261 }
2262 
2263 
2264 /**
2265  *      dev_new_index   -       allocate an ifindex
2266  *
2267  *      Returns a suitable unique value for a new device interface
2268  *      number.  The caller must hold the rtnl semaphore or the
2269  *      dev_base_lock to be sure it remains unique.
2270  */
2271  
2272 int dev_new_index(void)
2273 {
2274         static int ifindex;
2275         for (;;) {
2276                 if (++ifindex <= 0)
2277                         ifindex=1;
2278                 if (__dev_get_by_index(ifindex) == NULL)
2279                         return ifindex;
2280         }
2281 }
2282 
2283 static int dev_boot_phase = 1;
2284 
2285 /**
2286  *      register_netdevice      - register a network device
2287  *      @dev: device to register
2288  *      
2289  *      Take a completed network device structure and add it to the kernel
2290  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2291  *      chain. 0 is returned on success. A negative errno code is returned
2292  *      on a failure to set up the device, or if the name is a duplicate.
2293  *
2294  *      Callers must hold the rtnl semaphore.  See the comment at the
2295  *      end of Space.c for details about the locking.  You may want
2296  *      register_netdev() instead of this.
2297  *
2298  *      BUGS:
2299  *      The locking appears insufficient to guarantee two parallel registers
2300  *      will not get the same name.
2301  */
2302 
2303 int register_netdevice(struct net_device *dev)
2304 {
2305         struct net_device *d, **dp;
2306 #ifdef CONFIG_NET_DIVERT
2307         int ret;
2308 #endif
2309 
2310         spin_lock_init(&dev->queue_lock);
2311         spin_lock_init(&dev->xmit_lock);
2312         dev->xmit_lock_owner = -1;
2313 #ifdef CONFIG_NET_FASTROUTE
2314         dev->fastpath_lock=RW_LOCK_UNLOCKED;
2315 #endif
2316 
2317         if (dev_boot_phase) {
2318 #ifdef CONFIG_NET_DIVERT
2319                 ret = alloc_divert_blk(dev);
2320                 if (ret)
2321                         return ret;
2322 #endif /* CONFIG_NET_DIVERT */
2323                 
2324                 /* This is NOT bug, but I am not sure, that all the
2325                    devices, initialized before netdev module is started
2326                    are sane. 
2327 
2328                    Now they are chained to device boot list
2329                    and probed later. If a module is initialized
2330                    before netdev, but assumes that dev->init
2331                    is really called by register_netdev(), it will fail.
2332 
2333                    So that this message should be printed for a while.
2334                  */
2335                 printk(KERN_INFO "early initialization of device %s is deferred\n", dev->name);
2336 
2337                 /* Check for existence, and append to tail of chain */
2338                 for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
2339                         if (d == dev || strcmp(d->name, dev->name) == 0) {
2340                                 return -EEXIST;
2341                         }
2342                 }
2343                 dev->next = NULL;
2344                 write_lock_bh(&dev_base_lock);
2345                 *dp = dev;
2346                 dev_hold(dev);
2347                 write_unlock_bh(&dev_base_lock);
2348 
2349                 /*
2350                  *      Default initial state at registry is that the
2351                  *      device is present.
2352                  */
2353 
2354                 set_bit(__LINK_STATE_PRESENT, &dev->state);
2355 
2356                 return 0;
2357         }
2358 
2359 #ifdef CONFIG_NET_DIVERT
2360         ret = alloc_divert_blk(dev);
2361         if (ret)
2362                 return ret;
2363 #endif /* CONFIG_NET_DIVERT */
2364         
2365         dev->iflink = -1;
2366 
2367         /* Init, if this function is available */
2368         if (dev->init && dev->init(dev) != 0)
2369                 return -EIO;
2370 
2371         dev->ifindex = dev_new_index();
2372         if (dev->iflink == -1)
2373                 dev->iflink = dev->ifindex;
2374 
2375         /* Check for existence, and append to tail of chain */
2376         for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
2377                 if (d == dev || strcmp(d->name, dev->name) == 0) {
2378                         return -EEXIST;
2379                 }
2380         }
2381         /*
2382          *      nil rebuild_header routine,
2383          *      that should be never called and used as just bug trap.
2384          */
2385 
2386         if (dev->rebuild_header == NULL)
2387                 dev->rebuild_header = default_rebuild_header;
2388 
2389         /*
2390          *      Default initial state at registry is that the
2391          *      device is present.
2392          */
2393 
2394         set_bit(__LINK_STATE_PRESENT, &dev->state);
2395 
2396         dev->next = NULL;
2397         dev_init_scheduler(dev);
2398         write_lock_bh(&dev_base_lock);
2399         *dp = dev;
2400         dev_hold(dev);
2401         dev->deadbeaf = 0;
2402         write_unlock_bh(&dev_base_lock);
2403 
2404         /* Notify protocols, that a new device appeared. */
2405         notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2406 
2407         net_run_sbin_hotplug(dev, "register");
2408 
2409         return 0;
2410 }
2411 
2412 /**
2413  *      netdev_finish_unregister - complete unregistration
2414  *      @dev: device
2415  *
2416  *      Destroy and free a dead device. A value of zero is returned on
2417  *      success.
2418  */
2419  
2420 int netdev_finish_unregister(struct net_device *dev)
2421 {
2422         BUG_TRAP(dev->ip_ptr==NULL);
2423         BUG_TRAP(dev->ip6_ptr==NULL);
2424         BUG_TRAP(dev->dn_ptr==NULL);
2425 
2426         if (!dev->deadbeaf) {
2427                 printk(KERN_ERR "Freeing alive device %p, %s\n", dev, dev->name);
2428                 return 0;
2429         }
2430 #ifdef NET_REFCNT_DEBUG
2431         printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name,
2432                (dev->features & NETIF_F_DYNALLOC)?"":", old style");
2433 #endif
2434         if (dev->destructor)
2435                 dev->destructor(dev);
2436         if (dev->features & NETIF_F_DYNALLOC)
2437                 kfree(dev);
2438         return 0;
2439 }
2440 
2441 /**
2442  *      unregister_netdevice - remove device from the kernel
2443  *      @dev: device
2444  *
2445  *      This function shuts down a device interface and removes it
2446  *      from the kernel tables. On success 0 is returned, on a failure
2447  *      a negative errno code is returned.
2448  *
2449  *      Callers must hold the rtnl semaphore.  See the comment at the
2450  *      end of Space.c for details about the locking.  You may want
2451  *      unregister_netdev() instead of this.
2452  */
2453 
2454 int unregister_netdevice(struct net_device *dev)
2455 {
2456         unsigned long now, warning_time;
2457         struct net_device *d, **dp;
2458 
2459         /* If device is running, close it first. */
2460         if (dev->flags & IFF_UP)
2461                 dev_close(dev);
2462 
2463         BUG_TRAP(dev->deadbeaf==0);
2464         dev->deadbeaf = 1;
2465 
2466         /* And unlink it from device chain. */
2467         for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
2468                 if (d == dev) {
2469                         write_lock_bh(&dev_base_lock);
2470                         *dp = d->next;
2471                         write_unlock_bh(&dev_base_lock);
2472                         break;
2473                 }
2474         }
2475         if (d == NULL) {
2476                 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never was registered\n", dev->name, dev);
2477                 return -ENODEV;
2478         }
2479 
2480         /* Synchronize to net_rx_action. */
2481         br_write_lock_bh(BR_NETPROTO_LOCK);
2482         br_write_unlock_bh(BR_NETPROTO_LOCK);
2483 
2484         if (dev_boot_phase == 0) {
2485 #ifdef CONFIG_NET_FASTROUTE
2486                 dev_clear_fastroute(dev);
2487 #endif
2488 
2489                 /* Shutdown queueing discipline. */
2490                 dev_shutdown(dev);
2491 
2492                 net_run_sbin_hotplug(dev, "unregister");
2493 
2494                 /* Notify protocols, that we are about to destroy
2495                    this device. They should clean all the things.
2496                  */
2497                 notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
2498 
2499                 /*
2500                  *      Flush the multicast chain
2501                  */
2502                 dev_mc_discard(dev);
2503         }
2504 
2505         if (dev->uninit)
2506                 dev->uninit(dev);
2507 
2508         /* Notifier chain MUST detach us from master device. */
2509         BUG_TRAP(dev->master==NULL);
2510 
2511 #ifdef CONFIG_NET_DIVERT
2512         free_divert_blk(dev);
2513 #endif
2514 
2515         if (dev->features & NETIF_F_DYNALLOC) {
2516 #ifdef NET_REFCNT_DEBUG
2517                 if (atomic_read(&dev->refcnt) != 1)
2518                         printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)-1);
2519 #endif
2520                 dev_put(dev);
2521                 return 0;
2522         }
2523 
2524         /* Last reference is our one */
2525         if (atomic_read(&dev->refcnt) == 1) {
2526                 dev_put(dev);
2527                 return 0;
2528         }
2529 
2530 #ifdef NET_REFCNT_DEBUG
2531         printk("unregister_netdevice: waiting %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt));
2532 #endif
2533 
2534         /* EXPLANATION. If dev->refcnt is not now 1 (our own reference)
2535            it means that someone in the kernel still has a reference
2536            to this device and we cannot release it.
2537 
2538            "New style" devices have destructors, hence we can return from this
2539            function and destructor will do all the work later.  As of kernel 2.4.0
2540            there are very few "New Style" devices.
2541 
2542            "Old style" devices expect that the device is free of any references
2543            upon exit from this function.
2544            We cannot return from this function until all such references have
2545            fallen away.  This is because the caller of this function will probably
2546            immediately kfree(*dev) and then be unloaded via sys_delete_module.
2547 
2548            So, we linger until all references fall away.  The duration of the
2549            linger is basically unbounded! It is driven by, for example, the
2550            current setting of sysctl_ipfrag_time.
2551 
2552            After 1 second, we start to rebroadcast unregister notifications
2553            in hope that careless clients will release the device.
2554 
2555          */
2556 
2557         now = warning_time = jiffies;
2558         while (atomic_read(&dev->refcnt) != 1) {
2559                 if ((jiffies - now) > 1*HZ) {
2560                         /* Rebroadcast unregister notification */
2561                         notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
2562                 }
2563                 current->state = TASK_INTERRUPTIBLE;
2564                 schedule_timeout(HZ/4);
2565                 current->state = TASK_RUNNING;
2566                 if ((jiffies - warning_time) > 10*HZ) {
2567                         printk(KERN_EMERG "unregister_netdevice: waiting for %s to "
2568                                         "become free. Usage count = %d\n",
2569                                         dev->name, atomic_read(&dev->refcnt));
2570                         warning_time = jiffies;
2571                 }
2572         }
2573         dev_put(dev);
2574         return 0;
2575 }
2576 
2577 
2578 /*
2579  *      Initialize the DEV module. At boot time this walks the device list and
2580  *      unhooks any devices that fail to initialise (normally hardware not 
2581  *      present) and leaves us with a valid list of present and active devices.
2582  *
2583  */
2584 
2585 extern void net_device_init(void);
2586 extern void ip_auto_config(void);
2587 #ifdef CONFIG_NET_DIVERT
2588 extern void dv_init(void);
2589 #endif /* CONFIG_NET_DIVERT */
2590 
2591 
2592 /*
2593  *       Callers must hold the rtnl semaphore.  See the comment at the
2594  *       end of Space.c for details about the locking.
2595  */
2596 int __init net_dev_init(void)
2597 {
2598         struct net_device *dev, **dp;
2599         int i;
2600 
2601 #ifdef CONFIG_NET_SCHED
2602         pktsched_init();
2603 #endif
2604 
2605 #ifdef CONFIG_NET_DIVERT
2606         dv_init();
2607 #endif /* CONFIG_NET_DIVERT */
2608         
2609         /*
2610          *      Initialise the packet receive queues.
2611          */
2612 
2613         for (i = 0; i < NR_CPUS; i++) {
2614                 struct softnet_data *queue;
2615 
2616                 queue = &softnet_data[i];
2617                 skb_queue_head_init(&queue->input_pkt_queue);
2618                 queue->throttle = 0;
2619                 queue->cng_level = 0;
2620                 queue->avg_blog = 10; /* arbitrary non-zero */
2621                 queue->completion_queue = NULL;
2622         }
2623         
2624 #ifdef CONFIG_NET_PROFILE
2625         net_profile_init();
2626         NET_PROFILE_REGISTER(dev_queue_xmit);
2627         NET_PROFILE_REGISTER(softnet_process);
2628 #endif
2629 
2630 #ifdef OFFLINE_SAMPLE
2631         samp_timer.expires = jiffies + (10 * HZ);
2632         add_timer(&samp_timer);
2633 #endif
2634 
2635         /*
2636          *      Add the devices.
2637          *      If the call to dev->init fails, the dev is removed
2638          *      from the chain disconnecting the device until the
2639          *      next reboot.
2640          *
2641          *      NB At boot phase networking is dead. No locking is required.
2642          *      But we still preserve dev_base_lock for sanity.
2643          */
2644 
2645         dp = &dev_base;
2646         while ((dev = *dp) != NULL) {
2647                 spin_lock_init(&dev->queue_lock);
2648                 spin_lock_init(&dev->xmit_lock);
2649 #ifdef CONFIG_NET_FASTROUTE
2650                 dev->fastpath_lock = RW_LOCK_UNLOCKED;
2651 #endif
2652                 dev->xmit_lock_owner = -1;
2653                 dev->iflink = -1;
2654                 dev_hold(dev);
2655 
2656                 /*
2657                  * Allocate name. If the init() fails
2658                  * the name will be reissued correctly.
2659                  */
2660                 if (strchr(dev->name, '%'))
2661                         dev_alloc_name(dev, dev->name);
2662 
2663                 /* 
2664                  * Check boot time settings for the device.
2665                  */
2666                 netdev_boot_setup_check(dev);
2667 
2668                 if (dev->init && dev->init(dev)) {
2669                         /*
2670                          * It failed to come up. It will be unhooked later.
2671                          * dev_alloc_name can now advance to next suitable
2672                          * name that is checked next.
2673                          */
2674                         dev->deadbeaf = 1;
2675                         dp = &dev->next;
2676                 } else {
2677                         dp = &dev->next;
2678                         dev->ifindex = dev_new_index();
2679                         if (dev->iflink == -1)
2680                                 dev->iflink = dev->ifindex;
2681                         if (dev->rebuild_header == NULL)
2682                                 dev->rebuild_header = default_rebuild_header;
2683                         dev_init_scheduler(dev);
2684                         set_bit(__LINK_STATE_PRESENT, &dev->state);
2685                 }
2686         }
2687 
2688         /*
2689          * Unhook devices that failed to come up
2690          */
2691         dp = &dev_base;
2692         while ((dev = *dp) != NULL) {
2693                 if (dev->deadbeaf) {
2694                         write_lock_bh(&dev_base_lock);
2695                         *dp = dev->next;
2696                         write_unlock_bh(&dev_base_lock);
2697                         dev_put(dev);
2698                 } else {
2699                         dp = &dev->next;
2700                 }
2701         }
2702 
2703 #ifdef CONFIG_PROC_FS
2704         proc_net_create("dev", 0, dev_get_info);
2705         create_proc_read_entry("net/softnet_stat", 0, 0, dev_proc_stats, NULL);
2706 #ifdef WIRELESS_EXT
2707         proc_net_create("wireless", 0, dev_get_wireless_info);
2708 #endif  /* WIRELESS_EXT */
2709 #endif  /* CONFIG_PROC_FS */
2710 
2711         dev_boot_phase = 0;
2712 
2713         open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
2714         open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
2715 
2716         dst_init();
2717         dev_mcast_init();
2718 
2719         /*
2720          *      Initialise network devices
2721          */
2722          
2723         net_device_init();
2724 
2725         return 0;
2726 }
2727 
2728 #ifdef CONFIG_HOTPLUG
2729 
2730 /* Notify userspace when a netdevice event occurs,
2731  * by running '/sbin/hotplug net' with certain
2732  * environment variables set.
2733  */
2734 
2735 static int net_run_sbin_hotplug(struct net_device *dev, char *action)
2736 {
2737         char *argv[3], *envp[5], ifname[12 + IFNAMSIZ], action_str[32];
2738         int i;
2739 
2740         sprintf(ifname, "INTERFACE=%s", dev->name);
2741         sprintf(action_str, "ACTION=%s", action);
2742 
2743         i = 0;
2744         argv[i++] = hotplug_path;
2745         argv[i++] = "net";
2746         argv[i] = 0;
2747 
2748         i = 0;
2749         /* minimal command environment */
2750         envp [i++] = "HOME=/";
2751         envp [i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
2752         envp [i++] = ifname;
2753         envp [i++] = action_str;
2754         envp [i] = 0;
2755         
2756         return call_usermodehelper(argv [0], argv, envp);
2757 }
2758 #endif
2759 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.