~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/net/sched/sch_tbf.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * net/sched/sch_tbf.c  Token Bucket Filter queue.
  3  *
  4  *              This program is free software; you can redistribute it and/or
  5  *              modify it under the terms of the GNU General Public License
  6  *              as published by the Free Software Foundation; either version
  7  *              2 of the License, or (at your option) any later version.
  8  *
  9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 10  *
 11  */
 12 
 13 #include <linux/config.h>
 14 #include <linux/module.h>
 15 #include <asm/uaccess.h>
 16 #include <asm/system.h>
 17 #include <asm/bitops.h>
 18 #include <linux/types.h>
 19 #include <linux/kernel.h>
 20 #include <linux/sched.h>
 21 #include <linux/string.h>
 22 #include <linux/mm.h>
 23 #include <linux/socket.h>
 24 #include <linux/sockios.h>
 25 #include <linux/in.h>
 26 #include <linux/errno.h>
 27 #include <linux/interrupt.h>
 28 #include <linux/if_ether.h>
 29 #include <linux/inet.h>
 30 #include <linux/netdevice.h>
 31 #include <linux/etherdevice.h>
 32 #include <linux/notifier.h>
 33 #include <net/ip.h>
 34 #include <net/route.h>
 35 #include <linux/skbuff.h>
 36 #include <net/sock.h>
 37 #include <net/pkt_sched.h>
 38 
 39 
 40 /*      Simple Token Bucket Filter.
 41         =======================================
 42 
 43         SOURCE.
 44         -------
 45 
 46         None.
 47 
 48         Description.
 49         ------------
 50 
 51         A data flow obeys TBF with rate R and depth B, if for any
 52         time interval t_i...t_f the number of transmitted bits
 53         does not exceed B + R*(t_f-t_i).
 54 
 55         Packetized version of this definition:
 56         The sequence of packets of sizes s_i served at moments t_i
 57         obeys TBF, if for any i<=k:
 58 
 59         s_i+....+s_k <= B + R*(t_k - t_i)
 60 
 61         Algorithm.
 62         ----------
 63         
 64         Let N(t_i) be B/R initially and N(t) grow continuously with time as:
 65 
 66         N(t+delta) = min{B/R, N(t) + delta}
 67 
 68         If the first packet in queue has length S, it may be
 69         transmited only at the time t_* when S/R <= N(t_*),
 70         and in this case N(t) jumps:
 71 
 72         N(t_* + 0) = N(t_* - 0) - S/R.
 73 
 74 
 75 
 76         Actually, QoS requires two TBF to be applied to a data stream.
 77         One of them controls steady state burst size, another
 78         one with rate P (peak rate) and depth M (equal to link MTU)
 79         limits bursts at a smaller time scale.
 80 
 81         It is easy to see that P>R, and B>M. If P is infinity, this double
 82         TBF is equivalent to a single one.
 83 
 84         When TBF works in reshaping mode, latency is estimated as:
 85 
 86         lat = max ((L-B)/R, (L-M)/P)
 87 
 88 
 89         NOTES.
 90         ------
 91 
 92         If TBF throttles, it starts a watchdog timer, which will wake it up
 93         when it is ready to transmit.
 94         Note that the minimal timer resolution is 1/HZ.
 95         If no new packets arrive during this period,
 96         or if the device is not awaken by EOI for some previous packet,
 97         TBF can stop its activity for 1/HZ.
 98 
 99 
100         This means, that with depth B, the maximal rate is
101 
102         R_crit = B*HZ
103 
104         F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
105 
106         Note that the peak rate TBF is much more tough: with MTU 1500
107         P_crit = 150Kbytes/sec. So, if you need greater peak
108         rates, use alpha with HZ=1000 :-)
109 */
110 
111 struct tbf_sched_data
112 {
113 /* Parameters */
114         u32             limit;          /* Maximal length of backlog: bytes */
115         u32             buffer;         /* Token bucket depth/rate: MUST BE >= MTU/B */
116         u32             mtu;
117         u32             max_size;
118         struct qdisc_rate_table *R_tab;
119         struct qdisc_rate_table *P_tab;
120 
121 /* Variables */
122         long    tokens;                 /* Current number of B tokens */
123         long    ptokens;                /* Current number of P tokens */
124         psched_time_t   t_c;            /* Time check-point */
125         struct timer_list wd_timer;     /* Watchdog timer */
126 };
127 
128 #define L2T(q,L)   ((q)->R_tab->data[(L)>>(q)->R_tab->rate.cell_log])
129 #define L2T_P(q,L) ((q)->P_tab->data[(L)>>(q)->P_tab->rate.cell_log])
130 
131 static int
132 tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
133 {
134         struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data;
135 
136         if (skb->len > q->max_size)
137                 goto drop;
138         __skb_queue_tail(&sch->q, skb);
139         if ((sch->stats.backlog += skb->len) <= q->limit) {
140                 sch->stats.bytes += skb->len;
141                 sch->stats.packets++;
142                 return 0;
143         }
144 
145         /* Drop action: undo the things that we just did,
146          * i.e. make tail drop
147          */
148 
149         __skb_unlink(skb, &sch->q);
150         sch->stats.backlog -= skb->len;
151 
152 drop:
153         sch->stats.drops++;
154 #ifdef CONFIG_NET_CLS_POLICE
155         if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch))
156 #endif
157                 kfree_skb(skb);
158         return NET_XMIT_DROP;
159 }
160 
161 static int
162 tbf_requeue(struct sk_buff *skb, struct Qdisc* sch)
163 {
164         __skb_queue_head(&sch->q, skb);
165         sch->stats.backlog += skb->len;
166         return 0;
167 }
168 
169 static int
170 tbf_drop(struct Qdisc* sch)
171 {
172         struct sk_buff *skb;
173 
174         skb = __skb_dequeue_tail(&sch->q);
175         if (skb) {
176                 sch->stats.backlog -= skb->len;
177                 sch->stats.drops++;
178                 kfree_skb(skb);
179                 return 1;
180         }
181         return 0;
182 }
183 
184 static void tbf_watchdog(unsigned long arg)
185 {
186         struct Qdisc *sch = (struct Qdisc*)arg;
187 
188         sch->flags &= ~TCQ_F_THROTTLED;
189         netif_schedule(sch->dev);
190 }
191 
192 static struct sk_buff *
193 tbf_dequeue(struct Qdisc* sch)
194 {
195         struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data;
196         struct sk_buff *skb;
197         
198         skb = __skb_dequeue(&sch->q);
199 
200         if (skb) {
201                 psched_time_t now;
202                 long toks;
203                 long ptoks = 0;
204 
205                 PSCHED_GET_TIME(now);
206 
207                 toks = PSCHED_TDIFF_SAFE(now, q->t_c, q->buffer, 0);
208 
209                 if (q->P_tab) {
210                         ptoks = toks + q->ptokens;
211                         if (ptoks > (long)q->mtu)
212                                 ptoks = q->mtu;
213                         ptoks -= L2T_P(q, skb->len);
214                 }
215                 toks += q->tokens;
216                 if (toks > (long)q->buffer)
217                         toks = q->buffer;
218                 toks -= L2T(q, skb->len);
219 
220                 if ((toks|ptoks) >= 0) {
221                         q->t_c = now;
222                         q->tokens = toks;
223                         q->ptokens = ptoks;
224                         sch->stats.backlog -= skb->len;
225                         sch->flags &= ~TCQ_F_THROTTLED;
226                         return skb;
227                 }
228 
229                 if (!netif_queue_stopped(sch->dev)) {
230                         long delay = PSCHED_US2JIFFIE(max(-toks, -ptoks));
231 
232                         if (delay == 0)
233                                 delay = 1;
234 
235                         mod_timer(&q->wd_timer, jiffies+delay);
236                 }
237 
238                 /* Maybe we have a shorter packet in the queue,
239                    which can be sent now. It sounds cool,
240                    but, however, this is wrong in principle.
241                    We MUST NOT reorder packets under these circumstances.
242 
243                    Really, if we split the flow into independent
244                    subflows, it would be a very good solution.
245                    This is the main idea of all FQ algorithms
246                    (cf. CSZ, HPFQ, HFSC)
247                  */
248                 __skb_queue_head(&sch->q, skb);
249 
250                 sch->flags |= TCQ_F_THROTTLED;
251                 sch->stats.overlimits++;
252         }
253         return NULL;
254 }
255 
256 
257 static void
258 tbf_reset(struct Qdisc* sch)
259 {
260         struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data;
261 
262         skb_queue_purge(&sch->q);
263         sch->stats.backlog = 0;
264         PSCHED_GET_TIME(q->t_c);
265         q->tokens = q->buffer;
266         q->ptokens = q->mtu;
267         sch->flags &= ~TCQ_F_THROTTLED;
268         del_timer(&q->wd_timer);
269 }
270 
271 static int tbf_change(struct Qdisc* sch, struct rtattr *opt)
272 {
273         int err = -EINVAL;
274         struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data;
275         struct rtattr *tb[TCA_TBF_PTAB];
276         struct tc_tbf_qopt *qopt;
277         struct qdisc_rate_table *rtab = NULL;
278         struct qdisc_rate_table *ptab = NULL;
279         int max_size;
280 
281         if (rtattr_parse(tb, TCA_TBF_PTAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) ||
282             tb[TCA_TBF_PARMS-1] == NULL ||
283             RTA_PAYLOAD(tb[TCA_TBF_PARMS-1]) < sizeof(*qopt))
284                 goto done;
285 
286         qopt = RTA_DATA(tb[TCA_TBF_PARMS-1]);
287         rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB-1]);
288         if (rtab == NULL)
289                 goto done;
290 
291         if (qopt->peakrate.rate) {
292                 if (qopt->peakrate.rate > qopt->rate.rate)
293                         ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB-1]);
294                 if (ptab == NULL)
295                         goto done;
296         }
297 
298         max_size = psched_mtu(sch->dev);
299         if (ptab) {
300                 int n = max_size>>qopt->peakrate.cell_log;
301                 while (n>0 && ptab->data[n-1] > qopt->mtu) {
302                         max_size -= (1<<qopt->peakrate.cell_log);
303                         n--;
304                 }
305         }
306         if (rtab->data[max_size>>qopt->rate.cell_log] > qopt->buffer)
307                 goto done;
308 
309         sch_tree_lock(sch);
310         q->limit = qopt->limit;
311         q->mtu = qopt->mtu;
312         q->max_size = max_size;
313         q->buffer = qopt->buffer;
314         q->tokens = q->buffer;
315         q->ptokens = q->mtu;
316         rtab = xchg(&q->R_tab, rtab);
317         ptab = xchg(&q->P_tab, ptab);
318         sch_tree_unlock(sch);
319         err = 0;
320 done:
321         if (rtab)
322                 qdisc_put_rtab(rtab);
323         if (ptab)
324                 qdisc_put_rtab(ptab);
325         return err;
326 }
327 
328 static int tbf_init(struct Qdisc* sch, struct rtattr *opt)
329 {
330         int err;
331         struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data;
332         
333         if (opt == NULL)
334                 return -EINVAL;
335         
336         MOD_INC_USE_COUNT;
337         
338         PSCHED_GET_TIME(q->t_c);
339         init_timer(&q->wd_timer);
340         q->wd_timer.function = tbf_watchdog;
341         q->wd_timer.data = (unsigned long)sch;
342         
343         if ((err = tbf_change(sch, opt)) != 0) {
344                 MOD_DEC_USE_COUNT;
345         }
346         return err;
347 }
348 
349 static void tbf_destroy(struct Qdisc *sch)
350 {
351         struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data;
352 
353         del_timer(&q->wd_timer);
354 
355         if (q->P_tab)
356                 qdisc_put_rtab(q->P_tab);
357         if (q->R_tab)
358                 qdisc_put_rtab(q->R_tab);
359 
360         MOD_DEC_USE_COUNT;
361 }
362 
363 #ifdef CONFIG_RTNETLINK
364 static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
365 {
366         struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data;
367         unsigned char    *b = skb->tail;
368         struct rtattr *rta;
369         struct tc_tbf_qopt opt;
370         
371         rta = (struct rtattr*)b;
372         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
373         
374         opt.limit = q->limit;
375         opt.rate = q->R_tab->rate;
376         if (q->P_tab)
377                 opt.peakrate = q->P_tab->rate;
378         else
379                 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
380         opt.mtu = q->mtu;
381         opt.buffer = q->buffer;
382         RTA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
383         rta->rta_len = skb->tail - b;
384 
385         return skb->len;
386 
387 rtattr_failure:
388         skb_trim(skb, b - skb->data);
389         return -1;
390 }
391 #endif
392 
393 struct Qdisc_ops tbf_qdisc_ops =
394 {
395         NULL,
396         NULL,
397         "tbf",
398         sizeof(struct tbf_sched_data),
399 
400         tbf_enqueue,
401         tbf_dequeue,
402         tbf_requeue,
403         tbf_drop,
404 
405         tbf_init,
406         tbf_reset,
407         tbf_destroy,
408         tbf_change,
409 
410 #ifdef CONFIG_RTNETLINK
411         tbf_dump,
412 #endif
413 };
414 
415 
416 #ifdef MODULE
417 int init_module(void)
418 {
419         return register_qdisc(&tbf_qdisc_ops);
420 }
421 
422 void cleanup_module(void) 
423 {
424         unregister_qdisc(&tbf_qdisc_ops);
425 }
426 #endif
427 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.