~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/net/sched/sch_generic.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * net/sched/sch_generic.c      Generic packet scheduler routines.
  3  *
  4  *              This program is free software; you can redistribute it and/or
  5  *              modify it under the terms of the GNU General Public License
  6  *              as published by the Free Software Foundation; either version
  7  *              2 of the License, or (at your option) any later version.
  8  *
  9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 10  *              Jamal Hadi Salim, <hadi@nortelnetworks.com> 990601
 11  *              - Ingress support
 12  */
 13 
 14 #include <asm/uaccess.h>
 15 #include <asm/system.h>
 16 #include <asm/bitops.h>
 17 #include <linux/config.h>
 18 #include <linux/types.h>
 19 #include <linux/kernel.h>
 20 #include <linux/sched.h>
 21 #include <linux/string.h>
 22 #include <linux/mm.h>
 23 #include <linux/socket.h>
 24 #include <linux/sockios.h>
 25 #include <linux/in.h>
 26 #include <linux/errno.h>
 27 #include <linux/interrupt.h>
 28 #include <linux/netdevice.h>
 29 #include <linux/skbuff.h>
 30 #include <linux/rtnetlink.h>
 31 #include <linux/init.h>
 32 #include <net/sock.h>
 33 #include <net/pkt_sched.h>
 34 
 35 /* Main transmission queue. */
 36 
 37 /* Main qdisc structure lock. 
 38 
 39    However, modifications
 40    to data, participating in scheduling must be additionally
 41    protected with dev->queue_lock spinlock.
 42 
 43    The idea is the following:
 44    - enqueue, dequeue are serialized via top level device
 45      spinlock dev->queue_lock.
 46    - tree walking is protected by read_lock(qdisc_tree_lock)
 47      and this lock is used only in process context.
 48    - updates to tree are made only under rtnl semaphore,
 49      hence this lock may be made without local bh disabling.
 50 
 51    qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
 52  */
 53 rwlock_t qdisc_tree_lock = RW_LOCK_UNLOCKED;
 54 
 55 /* 
 56    dev->queue_lock serializes queue accesses for this device
 57    AND dev->qdisc pointer itself.
 58 
 59    dev->xmit_lock serializes accesses to device driver.
 60 
 61    dev->queue_lock and dev->xmit_lock are mutually exclusive,
 62    if one is grabbed, another must be free.
 63  */
 64 
 65 
 66 /* Kick device.
 67    Note, that this procedure can be called by a watchdog timer, so that
 68    we do not check dev->tbusy flag here.
 69 
 70    Returns:  0  - queue is empty.
 71             >0  - queue is not empty, but throttled.
 72             <0  - queue is not empty. Device is throttled, if dev->tbusy != 0.
 73 
 74    NOTE: Called under dev->queue_lock with locally disabled BH.
 75 */
 76 
 77 int qdisc_restart(struct net_device *dev)
 78 {
 79         struct Qdisc *q = dev->qdisc;
 80         struct sk_buff *skb;
 81 
 82         /* Dequeue packet */
 83         if ((skb = q->dequeue(q)) != NULL) {
 84                 if (spin_trylock(&dev->xmit_lock)) {
 85                         /* Remember that the driver is grabbed by us. */
 86                         dev->xmit_lock_owner = smp_processor_id();
 87 
 88                         /* And release queue */
 89                         spin_unlock(&dev->queue_lock);
 90 
 91                         if (!netif_queue_stopped(dev)) {
 92                                 if (netdev_nit)
 93                                         dev_queue_xmit_nit(skb, dev);
 94 
 95                                 if (dev->hard_start_xmit(skb, dev) == 0) {
 96                                         dev->xmit_lock_owner = -1;
 97                                         spin_unlock(&dev->xmit_lock);
 98 
 99                                         spin_lock(&dev->queue_lock);
100                                         return -1;
101                                 }
102                         }
103 
104                         /* Release the driver */
105                         dev->xmit_lock_owner = -1;
106                         spin_unlock(&dev->xmit_lock);
107                         spin_lock(&dev->queue_lock);
108                         q = dev->qdisc;
109                 } else {
110                         /* So, someone grabbed the driver. */
111 
112                         /* It may be transient configuration error,
113                            when hard_start_xmit() recurses. We detect
114                            it by checking xmit owner and drop the
115                            packet when deadloop is detected.
116                          */
117                         if (dev->xmit_lock_owner == smp_processor_id()) {
118                                 kfree_skb(skb);
119                                 if (net_ratelimit())
120                                         printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
121                                 return -1;
122                         }
123                         netdev_rx_stat[smp_processor_id()].cpu_collision++;
124                 }
125 
126                 /* Device kicked us out :(
127                    This is possible in three cases:
128 
129                    0. driver is locked
130                    1. fastroute is enabled
131                    2. device cannot determine busy state
132                       before start of transmission (f.e. dialout)
133                    3. device is buggy (ppp)
134                  */
135 
136                 q->ops->requeue(skb, q);
137                 netif_schedule(dev);
138                 return 1;
139         }
140         return q->q.qlen;
141 }
142 
143 static void dev_watchdog(unsigned long arg)
144 {
145         struct net_device *dev = (struct net_device *)arg;
146 
147         spin_lock(&dev->xmit_lock);
148         if (dev->qdisc != &noop_qdisc) {
149                 if (netif_device_present(dev) &&
150                     netif_running(dev) &&
151                     netif_carrier_ok(dev)) {
152                         if (netif_queue_stopped(dev) &&
153                             (jiffies - dev->trans_start) > dev->watchdog_timeo) {
154                                 printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", dev->name);
155                                 dev->tx_timeout(dev);
156                         }
157                         if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
158                                 dev_hold(dev);
159                 }
160         }
161         spin_unlock(&dev->xmit_lock);
162 
163         dev_put(dev);
164 }
165 
166 static void dev_watchdog_init(struct net_device *dev)
167 {
168         init_timer(&dev->watchdog_timer);
169         dev->watchdog_timer.data = (unsigned long)dev;
170         dev->watchdog_timer.function = dev_watchdog;
171 }
172 
173 void __netdev_watchdog_up(struct net_device *dev)
174 {
175         if (dev->tx_timeout) {
176                 if (dev->watchdog_timeo <= 0)
177                         dev->watchdog_timeo = 5*HZ;
178                 if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
179                         dev_hold(dev);
180         }
181 }
182 
183 static void dev_watchdog_up(struct net_device *dev)
184 {
185         spin_lock_bh(&dev->xmit_lock);
186         __netdev_watchdog_up(dev);
187         spin_unlock_bh(&dev->xmit_lock);
188 }
189 
190 static void dev_watchdog_down(struct net_device *dev)
191 {
192         spin_lock_bh(&dev->xmit_lock);
193         if (del_timer(&dev->watchdog_timer))
194                 __dev_put(dev);
195         spin_unlock_bh(&dev->xmit_lock);
196 }
197 
198 /* "NOOP" scheduler: the best scheduler, recommended for all interfaces
199    under all circumstances. It is difficult to invent anything faster or
200    cheaper.
201  */
202 
203 static int
204 noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
205 {
206         kfree_skb(skb);
207         return NET_XMIT_CN;
208 }
209 
210 static struct sk_buff *
211 noop_dequeue(struct Qdisc * qdisc)
212 {
213         return NULL;
214 }
215 
216 static int
217 noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
218 {
219         if (net_ratelimit())
220                 printk(KERN_DEBUG "%s deferred output. It is buggy.\n", skb->dev->name);
221         kfree_skb(skb);
222         return NET_XMIT_CN;
223 }
224 
225 struct Qdisc_ops noop_qdisc_ops =
226 {
227         NULL,
228         NULL,
229         "noop",
230         0,
231 
232         noop_enqueue,
233         noop_dequeue,
234         noop_requeue,
235 };
236 
237 struct Qdisc noop_qdisc =
238 {
239         noop_enqueue,
240         noop_dequeue,
241         TCQ_F_BUILTIN,
242         &noop_qdisc_ops,        
243 };
244 
245 
246 struct Qdisc_ops noqueue_qdisc_ops =
247 {
248         NULL,
249         NULL,
250         "noqueue",
251         0,
252 
253         noop_enqueue,
254         noop_dequeue,
255         noop_requeue,
256 
257 };
258 
259 struct Qdisc noqueue_qdisc =
260 {
261         NULL,
262         noop_dequeue,
263         TCQ_F_BUILTIN,
264         &noqueue_qdisc_ops,
265 };
266 
267 
268 static const u8 prio2band[TC_PRIO_MAX+1] =
269 { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
270 
271 /* 3-band FIFO queue: old style, but should be a bit faster than
272    generic prio+fifo combination.
273  */
274 
275 static int
276 pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
277 {
278         struct sk_buff_head *list;
279 
280         list = ((struct sk_buff_head*)qdisc->data) +
281                 prio2band[skb->priority&TC_PRIO_MAX];
282 
283         if (list->qlen <= skb->dev->tx_queue_len) {
284                 __skb_queue_tail(list, skb);
285                 qdisc->q.qlen++;
286                 return 0;
287         }
288         qdisc->stats.drops++;
289         kfree_skb(skb);
290         return NET_XMIT_DROP;
291 }
292 
293 static struct sk_buff *
294 pfifo_fast_dequeue(struct Qdisc* qdisc)
295 {
296         int prio;
297         struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data);
298         struct sk_buff *skb;
299 
300         for (prio = 0; prio < 3; prio++, list++) {
301                 skb = __skb_dequeue(list);
302                 if (skb) {
303                         qdisc->q.qlen--;
304                         return skb;
305                 }
306         }
307         return NULL;
308 }
309 
310 static int
311 pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
312 {
313         struct sk_buff_head *list;
314 
315         list = ((struct sk_buff_head*)qdisc->data) +
316                 prio2band[skb->priority&TC_PRIO_MAX];
317 
318         __skb_queue_head(list, skb);
319         qdisc->q.qlen++;
320         return 0;
321 }
322 
323 static void
324 pfifo_fast_reset(struct Qdisc* qdisc)
325 {
326         int prio;
327         struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data);
328 
329         for (prio=0; prio < 3; prio++)
330                 skb_queue_purge(list+prio);
331         qdisc->q.qlen = 0;
332 }
333 
334 static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
335 {
336         int i;
337         struct sk_buff_head *list;
338 
339         list = ((struct sk_buff_head*)qdisc->data);
340 
341         for (i=0; i<3; i++)
342                 skb_queue_head_init(list+i);
343 
344         return 0;
345 }
346 
347 static struct Qdisc_ops pfifo_fast_ops =
348 {
349         NULL,
350         NULL,
351         "pfifo_fast",
352         3 * sizeof(struct sk_buff_head),
353 
354         pfifo_fast_enqueue,
355         pfifo_fast_dequeue,
356         pfifo_fast_requeue,
357         NULL,
358 
359         pfifo_fast_init,
360         pfifo_fast_reset,
361 };
362 
363 struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
364 {
365         struct Qdisc *sch;
366         int size = sizeof(*sch) + ops->priv_size;
367 
368         sch = kmalloc(size, GFP_KERNEL);
369         if (!sch)
370                 return NULL;
371         memset(sch, 0, size);
372 
373         skb_queue_head_init(&sch->q);
374         sch->ops = ops;
375         sch->enqueue = ops->enqueue;
376         sch->dequeue = ops->dequeue;
377         sch->dev = dev;
378         sch->stats.lock = &dev->queue_lock;
379         atomic_set(&sch->refcnt, 1);
380         if (!ops->init || ops->init(sch, NULL) == 0)
381                 return sch;
382 
383         kfree(sch);
384         return NULL;
385 }
386 
387 /* Under dev->queue_lock and BH! */
388 
389 void qdisc_reset(struct Qdisc *qdisc)
390 {
391         struct Qdisc_ops *ops = qdisc->ops;
392 
393         if (ops->reset)
394                 ops->reset(qdisc);
395 }
396 
397 /* Under dev->queue_lock and BH! */
398 
399 void qdisc_destroy(struct Qdisc *qdisc)
400 {
401         struct Qdisc_ops *ops = qdisc->ops;
402         struct net_device *dev;
403 
404         if (!atomic_dec_and_test(&qdisc->refcnt))
405                 return;
406 
407         dev = qdisc->dev;
408 
409 #ifdef CONFIG_NET_SCHED
410         if (dev) {
411                 struct Qdisc *q, **qp;
412                 for (qp = &qdisc->dev->qdisc_list; (q=*qp) != NULL; qp = &q->next) {
413                         if (q == qdisc) {
414                                 *qp = q->next;
415                                 break;
416                         }
417                 }
418         }
419 #ifdef CONFIG_NET_ESTIMATOR
420         qdisc_kill_estimator(&qdisc->stats);
421 #endif
422 #endif
423         if (ops->reset)
424                 ops->reset(qdisc);
425         if (ops->destroy)
426                 ops->destroy(qdisc);
427         if (!(qdisc->flags&TCQ_F_BUILTIN))
428                 kfree(qdisc);
429 }
430 
431 
432 void dev_activate(struct net_device *dev)
433 {
434         /* No queueing discipline is attached to device;
435            create default one i.e. pfifo_fast for devices,
436            which need queueing and noqueue_qdisc for
437            virtual interfaces
438          */
439 
440         if (dev->qdisc_sleeping == &noop_qdisc) {
441                 struct Qdisc *qdisc;
442                 if (dev->tx_queue_len) {
443                         qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops);
444                         if (qdisc == NULL) {
445                                 printk(KERN_INFO "%s: activation failed\n", dev->name);
446                                 return;
447                         }
448                 } else {
449                         qdisc =  &noqueue_qdisc;
450                 }
451                 write_lock(&qdisc_tree_lock);
452                 dev->qdisc_sleeping = qdisc;
453                 write_unlock(&qdisc_tree_lock);
454         }
455 
456         spin_lock_bh(&dev->queue_lock);
457         if ((dev->qdisc = dev->qdisc_sleeping) != &noqueue_qdisc) {
458                 dev->trans_start = jiffies;
459                 dev_watchdog_up(dev);
460         }
461         spin_unlock_bh(&dev->queue_lock);
462 }
463 
464 void dev_deactivate(struct net_device *dev)
465 {
466         struct Qdisc *qdisc;
467 
468         spin_lock_bh(&dev->queue_lock);
469         qdisc = dev->qdisc;
470         dev->qdisc = &noop_qdisc;
471 
472         qdisc_reset(qdisc);
473 
474         spin_unlock_bh(&dev->queue_lock);
475 
476         dev_watchdog_down(dev);
477 
478         while (test_bit(__LINK_STATE_SCHED, &dev->state)) {
479                 current->policy |= SCHED_YIELD;
480                 schedule();
481         }
482 
483         spin_unlock_wait(&dev->xmit_lock);
484 }
485 
486 void dev_init_scheduler(struct net_device *dev)
487 {
488         write_lock(&qdisc_tree_lock);
489         spin_lock_bh(&dev->queue_lock);
490         dev->qdisc = &noop_qdisc;
491         spin_unlock_bh(&dev->queue_lock);
492         dev->qdisc_sleeping = &noop_qdisc;
493         dev->qdisc_list = NULL;
494         write_unlock(&qdisc_tree_lock);
495 
496         dev_watchdog_init(dev);
497 }
498 
499 void dev_shutdown(struct net_device *dev)
500 {
501         struct Qdisc *qdisc;
502 
503         write_lock(&qdisc_tree_lock);
504         spin_lock_bh(&dev->queue_lock);
505         qdisc = dev->qdisc_sleeping;
506         dev->qdisc = &noop_qdisc;
507         dev->qdisc_sleeping = &noop_qdisc;
508         qdisc_destroy(qdisc);
509 #if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE)
510         if ((qdisc = dev->qdisc_ingress) != NULL) {
511                 dev->qdisc_ingress = NULL;
512                 qdisc_destroy(qdisc);
513         }
514 #endif
515         BUG_TRAP(dev->qdisc_list == NULL);
516         BUG_TRAP(!timer_pending(&dev->watchdog_timer));
517         dev->qdisc_list = NULL;
518         spin_unlock_bh(&dev->queue_lock);
519         write_unlock(&qdisc_tree_lock);
520 }
521 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.