1 /*
2 * net/sched/sch_generic.c Generic packet scheduler routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * Jamal Hadi Salim, <hadi@nortelnetworks.com> 990601
11 * - Ingress support
12 */
13
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <asm/bitops.h>
17 #include <linux/config.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
22 #include <linux/mm.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/in.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/netdevice.h>
29 #include <linux/skbuff.h>
30 #include <linux/rtnetlink.h>
31 #include <linux/init.h>
32 #include <net/sock.h>
33 #include <net/pkt_sched.h>
34
35 /* Main transmission queue. */
36
37 /* Main qdisc structure lock.
38
39 However, modifications
40 to data, participating in scheduling must be additionally
41 protected with dev->queue_lock spinlock.
42
43 The idea is the following:
44 - enqueue, dequeue are serialized via top level device
45 spinlock dev->queue_lock.
46 - tree walking is protected by read_lock(qdisc_tree_lock)
47 and this lock is used only in process context.
48 - updates to tree are made only under rtnl semaphore,
49 hence this lock may be made without local bh disabling.
50
51 qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
52 */
53 rwlock_t qdisc_tree_lock = RW_LOCK_UNLOCKED;
54
55 /*
56 dev->queue_lock serializes queue accesses for this device
57 AND dev->qdisc pointer itself.
58
59 dev->xmit_lock serializes accesses to device driver.
60
61 dev->queue_lock and dev->xmit_lock are mutually exclusive,
62 if one is grabbed, another must be free.
63 */
64
65
66 /* Kick device.
67 Note, that this procedure can be called by a watchdog timer, so that
68 we do not check dev->tbusy flag here.
69
70 Returns: 0 - queue is empty.
71 >0 - queue is not empty, but throttled.
72 <0 - queue is not empty. Device is throttled, if dev->tbusy != 0.
73
74 NOTE: Called under dev->queue_lock with locally disabled BH.
75 */
76
77 int qdisc_restart(struct net_device *dev)
78 {
79 struct Qdisc *q = dev->qdisc;
80 struct sk_buff *skb;
81
82 /* Dequeue packet */
83 if ((skb = q->dequeue(q)) != NULL) {
84 if (spin_trylock(&dev->xmit_lock)) {
85 /* Remember that the driver is grabbed by us. */
86 dev->xmit_lock_owner = smp_processor_id();
87
88 /* And release queue */
89 spin_unlock(&dev->queue_lock);
90
91 if (!netif_queue_stopped(dev)) {
92 if (netdev_nit)
93 dev_queue_xmit_nit(skb, dev);
94
95 if (dev->hard_start_xmit(skb, dev) == 0) {
96 dev->xmit_lock_owner = -1;
97 spin_unlock(&dev->xmit_lock);
98
99 spin_lock(&dev->queue_lock);
100 return -1;
101 }
102 }
103
104 /* Release the driver */
105 dev->xmit_lock_owner = -1;
106 spin_unlock(&dev->xmit_lock);
107 spin_lock(&dev->queue_lock);
108 q = dev->qdisc;
109 } else {
110 /* So, someone grabbed the driver. */
111
112 /* It may be transient configuration error,
113 when hard_start_xmit() recurses. We detect
114 it by checking xmit owner and drop the
115 packet when deadloop is detected.
116 */
117 if (dev->xmit_lock_owner == smp_processor_id()) {
118 kfree_skb(skb);
119 if (net_ratelimit())
120 printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
121 return -1;
122 }
123 netdev_rx_stat[smp_processor_id()].cpu_collision++;
124 }
125
126 /* Device kicked us out :(
127 This is possible in three cases:
128
129 0. driver is locked
130 1. fastroute is enabled
131 2. device cannot determine busy state
132 before start of transmission (f.e. dialout)
133 3. device is buggy (ppp)
134 */
135
136 q->ops->requeue(skb, q);
137 netif_schedule(dev);
138 return 1;
139 }
140 return q->q.qlen;
141 }
142
143 static void dev_watchdog(unsigned long arg)
144 {
145 struct net_device *dev = (struct net_device *)arg;
146
147 spin_lock(&dev->xmit_lock);
148 if (dev->qdisc != &noop_qdisc) {
149 if (netif_device_present(dev) &&
150 netif_running(dev) &&
151 netif_carrier_ok(dev)) {
152 if (netif_queue_stopped(dev) &&
153 (jiffies - dev->trans_start) > dev->watchdog_timeo) {
154 printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", dev->name);
155 dev->tx_timeout(dev);
156 }
157 if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
158 dev_hold(dev);
159 }
160 }
161 spin_unlock(&dev->xmit_lock);
162
163 dev_put(dev);
164 }
165
166 static void dev_watchdog_init(struct net_device *dev)
167 {
168 init_timer(&dev->watchdog_timer);
169 dev->watchdog_timer.data = (unsigned long)dev;
170 dev->watchdog_timer.function = dev_watchdog;
171 }
172
173 void __netdev_watchdog_up(struct net_device *dev)
174 {
175 if (dev->tx_timeout) {
176 if (dev->watchdog_timeo <= 0)
177 dev->watchdog_timeo = 5*HZ;
178 if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
179 dev_hold(dev);
180 }
181 }
182
183 static void dev_watchdog_up(struct net_device *dev)
184 {
185 spin_lock_bh(&dev->xmit_lock);
186 __netdev_watchdog_up(dev);
187 spin_unlock_bh(&dev->xmit_lock);
188 }
189
190 static void dev_watchdog_down(struct net_device *dev)
191 {
192 spin_lock_bh(&dev->xmit_lock);
193 if (del_timer(&dev->watchdog_timer))
194 __dev_put(dev);
195 spin_unlock_bh(&dev->xmit_lock);
196 }
197
198 /* "NOOP" scheduler: the best scheduler, recommended for all interfaces
199 under all circumstances. It is difficult to invent anything faster or
200 cheaper.
201 */
202
203 static int
204 noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
205 {
206 kfree_skb(skb);
207 return NET_XMIT_CN;
208 }
209
210 static struct sk_buff *
211 noop_dequeue(struct Qdisc * qdisc)
212 {
213 return NULL;
214 }
215
216 static int
217 noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
218 {
219 if (net_ratelimit())
220 printk(KERN_DEBUG "%s deferred output. It is buggy.\n", skb->dev->name);
221 kfree_skb(skb);
222 return NET_XMIT_CN;
223 }
224
225 struct Qdisc_ops noop_qdisc_ops =
226 {
227 NULL,
228 NULL,
229 "noop",
230 0,
231
232 noop_enqueue,
233 noop_dequeue,
234 noop_requeue,
235 };
236
237 struct Qdisc noop_qdisc =
238 {
239 noop_enqueue,
240 noop_dequeue,
241 TCQ_F_BUILTIN,
242 &noop_qdisc_ops,
243 };
244
245
246 struct Qdisc_ops noqueue_qdisc_ops =
247 {
248 NULL,
249 NULL,
250 "noqueue",
251 0,
252
253 noop_enqueue,
254 noop_dequeue,
255 noop_requeue,
256
257 };
258
259 struct Qdisc noqueue_qdisc =
260 {
261 NULL,
262 noop_dequeue,
263 TCQ_F_BUILTIN,
264 &noqueue_qdisc_ops,
265 };
266
267
268 static const u8 prio2band[TC_PRIO_MAX+1] =
269 { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
270
271 /* 3-band FIFO queue: old style, but should be a bit faster than
272 generic prio+fifo combination.
273 */
274
275 static int
276 pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
277 {
278 struct sk_buff_head *list;
279
280 list = ((struct sk_buff_head*)qdisc->data) +
281 prio2band[skb->priority&TC_PRIO_MAX];
282
283 if (list->qlen <= skb->dev->tx_queue_len) {
284 __skb_queue_tail(list, skb);
285 qdisc->q.qlen++;
286 return 0;
287 }
288 qdisc->stats.drops++;
289 kfree_skb(skb);
290 return NET_XMIT_DROP;
291 }
292
293 static struct sk_buff *
294 pfifo_fast_dequeue(struct Qdisc* qdisc)
295 {
296 int prio;
297 struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data);
298 struct sk_buff *skb;
299
300 for (prio = 0; prio < 3; prio++, list++) {
301 skb = __skb_dequeue(list);
302 if (skb) {
303 qdisc->q.qlen--;
304 return skb;
305 }
306 }
307 return NULL;
308 }
309
310 static int
311 pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
312 {
313 struct sk_buff_head *list;
314
315 list = ((struct sk_buff_head*)qdisc->data) +
316 prio2band[skb->priority&TC_PRIO_MAX];
317
318 __skb_queue_head(list, skb);
319 qdisc->q.qlen++;
320 return 0;
321 }
322
323 static void
324 pfifo_fast_reset(struct Qdisc* qdisc)
325 {
326 int prio;
327 struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data);
328
329 for (prio=0; prio < 3; prio++)
330 skb_queue_purge(list+prio);
331 qdisc->q.qlen = 0;
332 }
333
334 static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
335 {
336 int i;
337 struct sk_buff_head *list;
338
339 list = ((struct sk_buff_head*)qdisc->data);
340
341 for (i=0; i<3; i++)
342 skb_queue_head_init(list+i);
343
344 return 0;
345 }
346
347 static struct Qdisc_ops pfifo_fast_ops =
348 {
349 NULL,
350 NULL,
351 "pfifo_fast",
352 3 * sizeof(struct sk_buff_head),
353
354 pfifo_fast_enqueue,
355 pfifo_fast_dequeue,
356 pfifo_fast_requeue,
357 NULL,
358
359 pfifo_fast_init,
360 pfifo_fast_reset,
361 };
362
363 struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
364 {
365 struct Qdisc *sch;
366 int size = sizeof(*sch) + ops->priv_size;
367
368 sch = kmalloc(size, GFP_KERNEL);
369 if (!sch)
370 return NULL;
371 memset(sch, 0, size);
372
373 skb_queue_head_init(&sch->q);
374 sch->ops = ops;
375 sch->enqueue = ops->enqueue;
376 sch->dequeue = ops->dequeue;
377 sch->dev = dev;
378 sch->stats.lock = &dev->queue_lock;
379 atomic_set(&sch->refcnt, 1);
380 if (!ops->init || ops->init(sch, NULL) == 0)
381 return sch;
382
383 kfree(sch);
384 return NULL;
385 }
386
387 /* Under dev->queue_lock and BH! */
388
389 void qdisc_reset(struct Qdisc *qdisc)
390 {
391 struct Qdisc_ops *ops = qdisc->ops;
392
393 if (ops->reset)
394 ops->reset(qdisc);
395 }
396
397 /* Under dev->queue_lock and BH! */
398
399 void qdisc_destroy(struct Qdisc *qdisc)
400 {
401 struct Qdisc_ops *ops = qdisc->ops;
402 struct net_device *dev;
403
404 if (!atomic_dec_and_test(&qdisc->refcnt))
405 return;
406
407 dev = qdisc->dev;
408
409 #ifdef CONFIG_NET_SCHED
410 if (dev) {
411 struct Qdisc *q, **qp;
412 for (qp = &qdisc->dev->qdisc_list; (q=*qp) != NULL; qp = &q->next) {
413 if (q == qdisc) {
414 *qp = q->next;
415 break;
416 }
417 }
418 }
419 #ifdef CONFIG_NET_ESTIMATOR
420 qdisc_kill_estimator(&qdisc->stats);
421 #endif
422 #endif
423 if (ops->reset)
424 ops->reset(qdisc);
425 if (ops->destroy)
426 ops->destroy(qdisc);
427 if (!(qdisc->flags&TCQ_F_BUILTIN))
428 kfree(qdisc);
429 }
430
431
432 void dev_activate(struct net_device *dev)
433 {
434 /* No queueing discipline is attached to device;
435 create default one i.e. pfifo_fast for devices,
436 which need queueing and noqueue_qdisc for
437 virtual interfaces
438 */
439
440 if (dev->qdisc_sleeping == &noop_qdisc) {
441 struct Qdisc *qdisc;
442 if (dev->tx_queue_len) {
443 qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops);
444 if (qdisc == NULL) {
445 printk(KERN_INFO "%s: activation failed\n", dev->name);
446 return;
447 }
448 } else {
449 qdisc = &noqueue_qdisc;
450 }
451 write_lock(&qdisc_tree_lock);
452 dev->qdisc_sleeping = qdisc;
453 write_unlock(&qdisc_tree_lock);
454 }
455
456 spin_lock_bh(&dev->queue_lock);
457 if ((dev->qdisc = dev->qdisc_sleeping) != &noqueue_qdisc) {
458 dev->trans_start = jiffies;
459 dev_watchdog_up(dev);
460 }
461 spin_unlock_bh(&dev->queue_lock);
462 }
463
464 void dev_deactivate(struct net_device *dev)
465 {
466 struct Qdisc *qdisc;
467
468 spin_lock_bh(&dev->queue_lock);
469 qdisc = dev->qdisc;
470 dev->qdisc = &noop_qdisc;
471
472 qdisc_reset(qdisc);
473
474 spin_unlock_bh(&dev->queue_lock);
475
476 dev_watchdog_down(dev);
477
478 while (test_bit(__LINK_STATE_SCHED, &dev->state)) {
479 current->policy |= SCHED_YIELD;
480 schedule();
481 }
482
483 spin_unlock_wait(&dev->xmit_lock);
484 }
485
486 void dev_init_scheduler(struct net_device *dev)
487 {
488 write_lock(&qdisc_tree_lock);
489 spin_lock_bh(&dev->queue_lock);
490 dev->qdisc = &noop_qdisc;
491 spin_unlock_bh(&dev->queue_lock);
492 dev->qdisc_sleeping = &noop_qdisc;
493 dev->qdisc_list = NULL;
494 write_unlock(&qdisc_tree_lock);
495
496 dev_watchdog_init(dev);
497 }
498
499 void dev_shutdown(struct net_device *dev)
500 {
501 struct Qdisc *qdisc;
502
503 write_lock(&qdisc_tree_lock);
504 spin_lock_bh(&dev->queue_lock);
505 qdisc = dev->qdisc_sleeping;
506 dev->qdisc = &noop_qdisc;
507 dev->qdisc_sleeping = &noop_qdisc;
508 qdisc_destroy(qdisc);
509 #if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE)
510 if ((qdisc = dev->qdisc_ingress) != NULL) {
511 dev->qdisc_ingress = NULL;
512 qdisc_destroy(qdisc);
513 }
514 #endif
515 BUG_TRAP(dev->qdisc_list == NULL);
516 BUG_TRAP(!timer_pending(&dev->watchdog_timer));
517 dev->qdisc_list = NULL;
518 spin_unlock_bh(&dev->queue_lock);
519 write_unlock(&qdisc_tree_lock);
520 }
521
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.