~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/net/sched/sch_csz.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * net/sched/sch_csz.c  Clark-Shenker-Zhang scheduler.
  3  *
  4  *              This program is free software; you can redistribute it and/or
  5  *              modify it under the terms of the GNU General Public License
  6  *              as published by the Free Software Foundation; either version
  7  *              2 of the License, or (at your option) any later version.
  8  *
  9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 10  *
 11  */
 12 
 13 #include <linux/config.h>
 14 #include <linux/module.h>
 15 #include <asm/uaccess.h>
 16 #include <asm/system.h>
 17 #include <asm/bitops.h>
 18 #include <linux/types.h>
 19 #include <linux/kernel.h>
 20 #include <linux/sched.h>
 21 #include <linux/string.h>
 22 #include <linux/mm.h>
 23 #include <linux/socket.h>
 24 #include <linux/sockios.h>
 25 #include <linux/in.h>
 26 #include <linux/errno.h>
 27 #include <linux/interrupt.h>
 28 #include <linux/if_ether.h>
 29 #include <linux/inet.h>
 30 #include <linux/netdevice.h>
 31 #include <linux/etherdevice.h>
 32 #include <linux/notifier.h>
 33 #include <net/ip.h>
 34 #include <net/route.h>
 35 #include <linux/skbuff.h>
 36 #include <net/sock.h>
 37 #include <net/pkt_sched.h>
 38 
 39 
 40 /*      Clark-Shenker-Zhang algorithm.
 41         =======================================
 42 
 43         SOURCE.
 44 
 45         David D. Clark, Scott Shenker and Lixia Zhang
 46         "Supporting Real-Time Applications in an Integrated Services Packet
 47         Network: Architecture and Mechanism".
 48 
 49         CBQ presents a flexible universal algorithm for packet scheduling,
 50         but it has pretty poor delay characteristics.
 51         Round-robin scheduling and link-sharing goals
 52         apparently contradict minimization of network delay and jitter.
 53         Moreover, correct handling of predictive flows seems to be
 54         impossible in CBQ.
 55 
 56         CSZ presents a more precise but less flexible and less efficient
 57         approach. As I understand it, the main idea is to create
 58         WFQ flows for each guaranteed service and to allocate
 59         the rest of bandwith to dummy flow-0. Flow-0 comprises
 60         the predictive services and the best effort traffic;
 61         it is handled by a priority scheduler with the highest
 62         priority band allocated for predictive services, and the rest ---
 63         to the best effort packets.
 64 
 65         Note that in CSZ flows are NOT limited to their bandwidth.  It
 66         is supposed that the flow passed admission control at the edge
 67         of the QoS network and it doesn't need further shaping. Any
 68         attempt to improve the flow or to shape it to a token bucket
 69         at intermediate hops will introduce undesired delays and raise
 70         jitter.
 71 
 72         At the moment CSZ is the only scheduler that provides
 73         true guaranteed service. Another schemes (including CBQ)
 74         do not provide guaranteed delay and randomize jitter.
 75         There is a proof (Sally Floyd), that delay
 76         can be estimated by a IntServ compliant formula.
 77         This result is true formally, but it is wrong in principle.
 78         It takes into account only round-robin delays,
 79         ignoring delays introduced by link sharing i.e. overlimiting.
 80         Note that temporary overlimits are inevitable because
 81         real links are not ideal, and the real algorithm must take this
 82         into account.
 83 
 84         ALGORITHM.
 85 
 86         --- Notations.
 87 
 88         $B$ is link bandwidth (bits/sec).
 89 
 90         $I$ is set of all flows, including flow $0$.
 91         Every flow $a \in I$ has associated bandwidth slice $r_a < 1$ and
 92         $\sum_{a \in I} r_a = 1$.
 93 
 94         --- Flow model.
 95 
 96         Let $m_a$ is the number of backlogged bits in flow $a$.
 97         The flow is {\em active}, if $m_a > 0$.
 98         This number is a discontinuous function of time;
 99         when a packet $i$ arrives:
100         \[
101         m_a(t_i+0) - m_a(t_i-0) = L^i,
102         \]
103         where $L^i$ is the length of the arrived packet.
104         The flow queue is drained continuously until $m_a == 0$:
105         \[
106         {d m_a \over dt} = - { B r_a \over \sum_{b \in A} r_b}.
107         \]
108         I.e. flow rates are their allocated rates proportionally
109         scaled to take all available link bandwidth. Apparently,
110         it is not the only possible policy. F.e. CBQ classes
111         without borrowing would be modelled by:
112         \[
113         {d m_a \over dt} = - B r_a .
114         \]
115         More complicated hierarchical bandwidth allocation
116         policies are possible, but unfortunately, the basic
117         flow equations have a simple solution only for proportional
118         scaling.
119 
120         --- Departure times.
121 
122         We calculate the time until the last bit of packet is sent:
123         \[
124         E_a^i(t) = { m_a(t_i) - \delta_a(t) \over r_a },
125         \]
126         where $\delta_a(t)$ is number of bits drained since $t_i$.
127         We have to evaluate $E_a^i$ for all queued packets,
128         then find the packet with minimal $E_a^i$ and send it.
129 
130         This sounds good, but direct implementation of the algorithm
131         is absolutely infeasible. Luckily, if flow rates
132         are scaled proportionally, the equations have a simple solution.
133         
134         The differential equation for $E_a^i$ is
135         \[
136         {d E_a^i (t) \over dt } = - { d \delta_a(t) \over dt} { 1 \over r_a} =
137         { B \over \sum_{b \in A} r_b}
138         \]
139         with initial condition
140         \[
141         E_a^i (t_i) = { m_a(t_i) \over r_a } .
142         \]
143 
144         Let's introduce an auxiliary function $R(t)$:
145 
146         --- Round number.
147 
148         Consider the following model: we rotate over active flows,
149         sending $r_a B$ bits from every flow, so that we send
150         $B \sum_{a \in A} r_a$ bits per round, that takes
151         $\sum_{a \in A} r_a$ seconds.
152         
153         Hence, $R(t)$ (round number) is a monotonically increasing
154         linear function of time when $A$ is not changed
155         \[
156         { d R(t) \over dt } = { 1 \over \sum_{a \in A} r_a }
157         \]
158         and it is continuous when $A$ changes.
159 
160         The central observation is that the quantity
161         $F_a^i = R(t) + E_a^i(t)/B$ does not depend on time at all!
162         $R(t)$ does not depend on flow, so that $F_a^i$ can be
163         calculated only once on packet arrival, and we need not
164         recalculate $E$ numbers and resorting queues.
165         The number $F_a^i$ is called finish number of the packet.
166         It is just the value of $R(t)$ when the last bit of packet
167         is sent out.
168 
169         Maximal finish number on flow is called finish number of flow
170         and minimal one is "start number of flow".
171         Apparently, flow is active if and only if $F_a \leq R$.
172 
173         When a packet of length $L_i$ bit arrives to flow $a$ at time $t_i$,
174         we calculate $F_a^i$ as:
175 
176         If flow was inactive ($F_a < R$):
177         $F_a^i = R(t) + {L_i \over B r_a}$
178         otherwise
179         $F_a^i = F_a + {L_i \over B r_a}$
180 
181         These equations complete the algorithm specification.
182 
183         It looks pretty hairy, but there is a simple
184         procedure for solving these equations.
185         See procedure csz_update(), that is a generalization of
186         the algorithm from S. Keshav's thesis Chapter 3
187         "Efficient Implementation of Fair Queeing".
188 
189         NOTES.
190 
191         * We implement only the simplest variant of CSZ,
192         when flow-0 is a explicit 4band priority fifo.
193         This is bad, but we need a "peek" operation in addition
194         to "dequeue" to implement complete CSZ.
195         I do not want to do that, unless it is absolutely
196         necessary.
197         
198         * A primitive support for token bucket filtering
199         presents itself too. It directly contradicts CSZ, but
200         even though the Internet is on the globe ... :-)
201         "the edges of the network" really exist.
202         
203         BUGS.
204 
205         * Fixed point arithmetic is overcomplicated, suboptimal and even
206         wrong. Check it later.  */
207 
208 
209 /* This number is arbitrary */
210 
211 #define CSZ_GUARANTEED          16
212 #define CSZ_FLOWS               (CSZ_GUARANTEED+4)
213 
214 struct csz_head
215 {
216         struct csz_head         *snext;
217         struct csz_head         *sprev;
218         struct csz_head         *fnext;
219         struct csz_head         *fprev;
220 };
221 
222 struct csz_flow
223 {
224         struct csz_head         *snext;
225         struct csz_head         *sprev;
226         struct csz_head         *fnext;
227         struct csz_head         *fprev;
228 
229 /* Parameters */
230         struct tc_ratespec      rate;
231         struct tc_ratespec      slice;
232         u32                     *L_tab; /* Lookup table for L/(B*r_a) values */
233         unsigned long           limit;  /* Maximal length of queue */
234 #ifdef CSZ_PLUS_TBF
235         struct tc_ratespec      peakrate;
236         __u32                   buffer; /* Depth of token bucket, normalized
237                                            as L/(B*r_a) */
238         __u32                   mtu;
239 #endif
240 
241 /* Variables */
242 #ifdef CSZ_PLUS_TBF
243         unsigned long           tokens; /* Tokens number: usecs */
244         psched_time_t           t_tbf;
245         unsigned long           R_tbf;
246         int                     throttled;
247 #endif
248         unsigned                peeked;
249         unsigned long           start;  /* Finish number of the first skb */
250         unsigned long           finish; /* Finish number of the flow */
251 
252         struct sk_buff_head     q;      /* FIFO queue */
253 };
254 
255 #define L2R(f,L) ((f)->L_tab[(L)>>(f)->slice.cell_log])
256 
257 struct csz_sched_data
258 {
259 /* Parameters */
260         unsigned char   rate_log;       /* fixed point position for rate;
261                                          * really we need not it */
262         unsigned char   R_log;          /* fixed point position for round number */
263         unsigned char   delta_log;      /* 1<<delta_log is maximal timeout in usecs;
264                                          * 21 <-> 2.1sec is MAXIMAL value */
265 
266 /* Variables */
267         struct tcf_proto *filter_list;
268         u8      prio2band[TC_PRIO_MAX+1];
269 #ifdef CSZ_PLUS_TBF
270         struct timer_list wd_timer;
271         long            wd_expires;
272 #endif
273         psched_time_t   t_c;            /* Time check-point */
274         unsigned long   R_c;            /* R-number check-point */
275         unsigned long   rate;           /* Current sum of rates of active flows */
276         struct csz_head s;              /* Flows sorted by "start" */
277         struct csz_head f;              /* Flows sorted by "finish"     */
278 
279         struct sk_buff_head     other[4];/* Predicted (0) and the best efforts
280                                             classes (1,2,3) */
281         struct csz_flow flow[CSZ_GUARANTEED]; /* Array of flows */
282 };
283 
284 /* These routines (csz_insert_finish and csz_insert_start) are
285    the most time consuming part of all the algorithm.
286 
287    We insert to sorted list, so that time
288    is linear with respect to number of active flows in the worst case.
289    Note that we have not very large number of guaranteed flows,
290    so that logarithmic algorithms (heap etc.) are useless,
291    they are slower than linear one when length of list <= 32.
292 
293    Heap would take sence if we used WFQ for best efforts
294    flows, but SFQ is better choice in this case.
295  */
296 
297 
298 /* Insert flow "this" to the list "b" before
299    flow with greater finish number.
300  */
301 
302 #if 0
303 /* Scan forward */
304 extern __inline__ void csz_insert_finish(struct csz_head *b,
305                                          struct csz_flow *this)
306 {
307         struct csz_head *f = b->fnext;
308         unsigned long finish = this->finish;
309 
310         while (f != b) {
311                 if (((struct csz_flow*)f)->finish - finish > 0)
312                         break;
313                 f = f->fnext;
314         }
315         this->fnext = f;
316         this->fprev = f->fprev;
317         this->fnext->fprev = this->fprev->fnext = (struct csz_head*)this;
318 }
319 #else
320 /* Scan backward */
321 extern __inline__ void csz_insert_finish(struct csz_head *b,
322                                          struct csz_flow *this)
323 {
324         struct csz_head *f = b->fprev;
325         unsigned long finish = this->finish;
326 
327         while (f != b) {
328                 if (((struct csz_flow*)f)->finish - finish <= 0)
329                         break;
330                 f = f->fprev;
331         }
332         this->fnext = f->fnext;
333         this->fprev = f;
334         this->fnext->fprev = this->fprev->fnext = (struct csz_head*)this;
335 }
336 #endif
337 
338 /* Insert flow "this" to the list "b" before
339    flow with greater start number.
340  */
341 
342 extern __inline__ void csz_insert_start(struct csz_head *b,
343                                         struct csz_flow *this)
344 {
345         struct csz_head *f = b->snext;
346         unsigned long start = this->start;
347 
348         while (f != b) {
349                 if (((struct csz_flow*)f)->start - start > 0)
350                         break;
351                 f = f->snext;
352         }
353         this->snext = f;
354         this->sprev = f->sprev;
355         this->snext->sprev = this->sprev->snext = (struct csz_head*)this;
356 }
357 
358 
359 /* Calculate and return current round number.
360    It is another time consuming part, but
361    it is impossible to avoid it.
362 
363    It costs O(N) that make all the algorithm useful only
364    to play with closest to ideal fluid model.
365 
366    There exist less academic, but more practical modifications,
367    which might have even better characteristics (WF2Q+, HPFQ, HFSC)
368  */
369 
370 static unsigned long csz_update(struct Qdisc *sch)
371 {
372         struct csz_sched_data   *q = (struct csz_sched_data*)sch->data;
373         struct csz_flow         *a;
374         unsigned long           F;
375         unsigned long           tmp;
376         psched_time_t           now;
377         unsigned long           delay;
378         unsigned long           R_c;
379 
380         PSCHED_GET_TIME(now);
381         delay = PSCHED_TDIFF_SAFE(now, q->t_c, 0, goto do_reset);
382 
383         if (delay>>q->delta_log) {
384 do_reset:
385                 /* Delta is too large.
386                    It is possible if MTU/BW > 1<<q->delta_log
387                    (i.e. configuration error) or because of hardware
388                    fault. We have no choice...
389                  */
390                 qdisc_reset(sch);
391                 return 0;
392         }
393 
394         q->t_c = now;
395 
396         for (;;) {
397                 a = (struct csz_flow*)q->f.fnext;
398 
399                 /* No more active flows. Reset R and exit. */
400                 if (a == (struct csz_flow*)&q->f) {
401 #ifdef CSZ_DEBUG
402                         if (q->rate) {
403                                 printk("csz_update: rate!=0 on inactive csz\n");
404                                 q->rate = 0;
405                         }
406 #endif
407                         q->R_c = 0;
408                         return 0;
409                 }
410 
411                 F = a->finish;
412 
413 #ifdef CSZ_DEBUG
414                 if (q->rate == 0) {
415                         printk("csz_update: rate=0 on active csz\n");
416                         goto do_reset;
417                 }
418 #endif
419 
420                 /*
421                  *           tmp = (t - q->t_c)/q->rate;
422                  */
423 
424                 tmp = ((delay<<(31-q->delta_log))/q->rate)>>(31-q->delta_log+q->R_log);
425 
426                 tmp += q->R_c;
427 
428                 /* OK, this flow (and all flows with greater
429                    finish numbers) is still active */
430                 if (F - tmp > 0)
431                         break;
432 
433                 /* It is more not active */
434 
435                 a->fprev->fnext = a->fnext;
436                 a->fnext->fprev = a->fprev;
437 
438                 /*
439                  * q->t_c += (F - q->R_c)*q->rate
440                  */
441 
442                 tmp = ((F-q->R_c)*q->rate)<<q->R_log;
443                 R_c = F;
444                 q->rate -= a->slice.rate;
445 
446                 if ((long)(delay - tmp) >= 0) {
447                         delay -= tmp;
448                         continue;
449                 }
450                 delay = 0;
451         }
452 
453         q->R_c = tmp;
454         return tmp;
455 }
456 
457 unsigned csz_classify(struct sk_buff *skb, struct csz_sched_data *q)
458 {
459         return CSZ_GUARANTEED;
460 }
461 
462 static int
463 csz_enqueue(struct sk_buff *skb, struct Qdisc* sch)
464 {
465         struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
466         unsigned flow_id = csz_classify(skb, q);
467         unsigned long R;
468         int prio = 0;
469         struct csz_flow *this;
470 
471         if (flow_id >= CSZ_GUARANTEED) {
472                 prio = flow_id - CSZ_GUARANTEED;
473                 flow_id = 0;
474         }
475 
476         this = &q->flow[flow_id];
477         if (this->q.qlen >= this->limit || this->L_tab == NULL) {
478                 sch->stats.drops++;
479                 kfree_skb(skb);
480                 return NET_XMIT_DROP;
481         }
482 
483         R = csz_update(sch);
484 
485         if ((long)(this->finish - R) >= 0) {
486                 /* It was active */
487                 this->finish += L2R(this,skb->len);
488         } else {
489                 /* It is inactive; activate it */
490                 this->finish = R + L2R(this,skb->len);
491                 q->rate += this->slice.rate;
492                 csz_insert_finish(&q->f, this);
493         }
494 
495         /* If this flow was empty, remember start number
496            and insert it into start queue */
497         if (this->q.qlen == 0) {
498                 this->start = this->finish;
499                 csz_insert_start(&q->s, this);
500         }
501         if (flow_id)
502                 skb_queue_tail(&this->q, skb);
503         else
504                 skb_queue_tail(&q->other[prio], skb);
505         sch->q.qlen++;
506         sch->stats.bytes += skb->len;
507         sch->stats.packets++;
508         return 0;
509 }
510 
511 static __inline__ struct sk_buff *
512 skb_dequeue_best(struct csz_sched_data * q)
513 {
514         int i;
515         struct sk_buff *skb;
516 
517         for (i=0; i<4; i++) {
518                 skb = skb_dequeue(&q->other[i]);
519                 if (skb) {
520                         q->flow[0].q.qlen--;
521                         return skb;
522                 }
523         }
524         return NULL;
525 }
526 
527 static __inline__ struct sk_buff *
528 skb_peek_best(struct csz_sched_data * q)
529 {
530         int i;
531         struct sk_buff *skb;
532 
533         for (i=0; i<4; i++) {
534                 skb = skb_peek(&q->other[i]);
535                 if (skb)
536                         return skb;
537         }
538         return NULL;
539 }
540 
541 #ifdef CSZ_PLUS_TBF
542 
543 static void csz_watchdog(unsigned long arg)
544 {
545         struct Qdisc *sch = (struct Qdisc*)arg;
546 
547         qdisc_wakeup(sch->dev);
548 }
549 
550 static __inline__ void
551 csz_move_queue(struct csz_flow *this, long delta)
552 {
553         this->fprev->fnext = this->fnext;
554         this->fnext->fprev = this->fprev;
555 
556         this->start += delta;
557         this->finish += delta;
558 
559         csz_insert_finish(this);
560 }
561 
562 static __inline__ int csz_enough_tokens(struct csz_sched_data *q,
563                                         struct csz_flow *this,
564                                         struct sk_buff *skb)
565 {
566         long toks;
567         long shift;
568         psched_time_t now;
569 
570         PSCHED_GET_TIME(now);
571 
572         toks = PSCHED_TDIFF(now, t_tbf) + this->tokens - L2R(q,this,skb->len);
573 
574         shift = 0;
575         if (this->throttled) {
576                 /* Remember aposteriory delay */
577 
578                 unsigned long R = csz_update(q);
579                 shift = R - this->R_tbf;
580                 this->R_tbf = R;
581         }
582 
583         if (toks >= 0) {
584                 /* Now we have enough tokens to proceed */
585 
586                 this->tokens = toks <= this->depth ? toks : this->depth;
587                 this->t_tbf = now;
588         
589                 if (!this->throttled)
590                         return 1;
591 
592                 /* Flow was throttled. Update its start&finish numbers
593                    with delay calculated aposteriori.
594                  */
595 
596                 this->throttled = 0;
597                 if (shift > 0)
598                         csz_move_queue(this, shift);
599                 return 1;
600         }
601 
602         if (!this->throttled) {
603                 /* Flow has just been throttled; remember
604                    current round number to calculate aposteriori delay
605                  */
606                 this->throttled = 1;
607                 this->R_tbf = csz_update(q);
608         }
609 
610         /* Move all the queue to the time when it will be allowed to send.
611            We should translate time to round number, but it is impossible,
612            so that we made the most conservative estimate i.e. we suppose
613            that only this flow is active and, hence, R = t.
614            Really toks <= R <= toks/r_a.
615 
616            This apriory shift in R will be adjusted later to reflect
617            real delay. We cannot avoid it because of:
618            - throttled flow continues to be active from the viewpoint
619              of CSZ, so that it would acquire the highest priority,
620              if you not adjusted start numbers.
621            - Eventually, finish number would become less than round
622              number and flow were declared inactive.
623          */
624 
625         toks = -toks;
626 
627         /* Remeber, that we should start watchdog */
628         if (toks < q->wd_expires)
629                 q->wd_expires = toks;
630 
631         toks >>= q->R_log;
632         shift += toks;
633         if (shift > 0) {
634                 this->R_tbf += toks;
635                 csz_move_queue(this, shift);
636         }
637         csz_insert_start(this);
638         return 0;
639 }
640 #endif
641 
642 
643 static struct sk_buff *
644 csz_dequeue(struct Qdisc* sch)
645 {
646         struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
647         struct sk_buff *skb;
648         struct csz_flow *this;
649 
650 #ifdef CSZ_PLUS_TBF
651         q->wd_expires = 0;
652 #endif
653         this = (struct csz_flow*)q->s.snext;
654 
655         while (this != (struct csz_flow*)&q->s) {
656 
657                 /* First of all: unlink from start list */
658                 this->sprev->snext = this->snext;
659                 this->snext->sprev = this->sprev;
660 
661                 if (this != &q->flow[0]) {      /* Guaranteed flow */
662                         skb = __skb_dequeue(&this->q);
663                         if (skb) {
664 #ifdef CSZ_PLUS_TBF
665                                 if (this->depth) {
666                                         if (!csz_enough_tokens(q, this, skb))
667                                                 continue;
668                                 }
669 #endif
670                                 if (this->q.qlen) {
671                                         struct sk_buff *nskb = skb_peek(&this->q);
672                                         this->start += L2R(this,nskb->len);
673                                         csz_insert_start(&q->s, this);
674                                 }
675                                 sch->q.qlen--;
676                                 return skb;
677                         }
678                 } else {        /* Predicted or best effort flow */
679                         skb = skb_dequeue_best(q);
680                         if (skb) {
681                                 unsigned peeked = this->peeked;
682                                 this->peeked = 0;
683 
684                                 if (--this->q.qlen) {
685                                         struct sk_buff *nskb;
686                                         unsigned dequeued = L2R(this,skb->len);
687 
688                                         /* We got not the same thing that
689                                            peeked earlier; adjust start number
690                                            */
691                                         if (peeked != dequeued && peeked)
692                                                 this->start += dequeued - peeked;
693 
694                                         nskb = skb_peek_best(q);
695                                         peeked = L2R(this,nskb->len);
696                                         this->start += peeked;
697                                         this->peeked = peeked;
698                                         csz_insert_start(&q->s, this);
699                                 }
700                                 sch->q.qlen--;
701                                 return skb;
702                         }
703                 }
704         }
705 #ifdef CSZ_PLUS_TBF
706         /* We are about to return no skb.
707            Schedule watchdog timer, if it occurred because of shaping.
708          */
709         if (q->wd_expires) {
710                 unsigned long delay = PSCHED_US2JIFFIE(q->wd_expires);
711                 del_timer(&q->wd_timer);
712                 if (delay == 0)
713                         delay = 1;
714                 q->wd_timer.expires = jiffies + delay;
715                 add_timer(&q->wd_timer);
716                 sch->stats.overlimits++;
717         }
718 #endif
719         return NULL;
720 }
721 
722 static void
723 csz_reset(struct Qdisc* sch)
724 {
725         struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
726         int    i;
727 
728         for (i=0; i<4; i++)
729                 skb_queue_purge(&q->other[i]);
730 
731         for (i=0; i<CSZ_GUARANTEED; i++) {
732                 struct csz_flow *this = q->flow + i;
733                 skb_queue_purge(&this->q);
734                 this->snext = this->sprev =
735                 this->fnext = this->fprev = (struct csz_head*)this;
736                 this->start = this->finish = 0;
737         }
738         q->s.snext = q->s.sprev = &q->s;
739         q->f.fnext = q->f.fprev = &q->f;
740         q->R_c = 0;
741 #ifdef CSZ_PLUS_TBF
742         PSCHED_GET_TIME(&q->t_tbf);
743         q->tokens = q->depth;
744         del_timer(&q->wd_timer);
745 #endif
746         sch->q.qlen = 0;
747 }
748 
749 static void
750 csz_destroy(struct Qdisc* sch)
751 {
752         MOD_DEC_USE_COUNT;
753 }
754 
755 static int csz_init(struct Qdisc *sch, struct rtattr *opt)
756 {
757         struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
758         struct rtattr *tb[TCA_CSZ_PTAB];
759         struct tc_csz_qopt *qopt;
760         int    i;
761 
762         rtattr_parse(tb, TCA_CSZ_PTAB, RTA_DATA(opt), RTA_PAYLOAD(opt));
763         if (tb[TCA_CSZ_PARMS-1] == NULL ||
764             RTA_PAYLOAD(tb[TCA_CSZ_PARMS-1]) < sizeof(*qopt))
765                 return -EINVAL;
766         qopt = RTA_DATA(tb[TCA_CSZ_PARMS-1]);
767 
768         q->R_log = qopt->R_log;
769         q->delta_log = qopt->delta_log;
770         for (i=0; i<=TC_PRIO_MAX; i++) {
771                 if (qopt->priomap[i] >= CSZ_FLOWS)
772                         return -EINVAL;
773                 q->prio2band[i] = qopt->priomap[i];
774         }
775 
776         for (i=0; i<4; i++)
777                 skb_queue_head_init(&q->other[i]);
778 
779         for (i=0; i<CSZ_GUARANTEED; i++) {
780                 struct csz_flow *this = q->flow + i;
781                 skb_queue_head_init(&this->q);
782                 this->snext = this->sprev =
783                 this->fnext = this->fprev = (struct csz_head*)this;
784                 this->start = this->finish = 0;
785         }
786         q->s.snext = q->s.sprev = &q->s;
787         q->f.fnext = q->f.fprev = &q->f;
788         q->R_c = 0;
789 #ifdef CSZ_PLUS_TBF
790         init_timer(&q->wd_timer);
791         q->wd_timer.data = (unsigned long)sch;
792         q->wd_timer.function = csz_watchdog;
793 #endif
794         MOD_INC_USE_COUNT;
795         return 0;
796 }
797 
798 #ifdef CONFIG_RTNETLINK
799 static int csz_dump(struct Qdisc *sch, struct sk_buff *skb)
800 {
801         struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
802         unsigned char    *b = skb->tail;
803         struct rtattr *rta;
804         struct tc_csz_qopt opt;
805 
806         rta = (struct rtattr*)b;
807         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
808 
809         opt.flows = CSZ_FLOWS;
810         memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
811         RTA_PUT(skb, TCA_CSZ_PARMS, sizeof(opt), &opt);
812         rta->rta_len = skb->tail - b;
813 
814         return skb->len;
815 
816 rtattr_failure:
817         skb_trim(skb, b - skb->data);
818         return -1;
819 }
820 #endif
821 
822 
823 static int csz_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
824                      struct Qdisc **old)
825 {
826         return -EINVAL;
827 }
828 
829 static struct Qdisc * csz_leaf(struct Qdisc *sch, unsigned long cl)
830 {
831         return NULL;
832 }
833 
834 
835 static unsigned long csz_get(struct Qdisc *sch, u32 classid)
836 {
837         struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
838         unsigned long band = TC_H_MIN(classid) - 1;
839 
840         if (band >= CSZ_FLOWS)
841                 return 0;
842 
843         if (band < CSZ_GUARANTEED && q->flow[band].L_tab == NULL)
844                 return 0;
845 
846         return band+1;
847 }
848 
849 static unsigned long csz_bind(struct Qdisc *sch, unsigned long parent, u32 classid)
850 {
851         return csz_get(sch, classid);
852 }
853 
854 
855 static void csz_put(struct Qdisc *sch, unsigned long cl)
856 {
857         return;
858 }
859 
860 static int csz_change(struct Qdisc *sch, u32 handle, u32 parent, struct rtattr **tca, unsigned long *arg)
861 {
862         unsigned long cl = *arg;
863         struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
864         struct rtattr *opt = tca[TCA_OPTIONS-1];
865         struct rtattr *tb[TCA_CSZ_PTAB];
866         struct tc_csz_copt *copt;
867 
868         rtattr_parse(tb, TCA_CSZ_PTAB, RTA_DATA(opt), RTA_PAYLOAD(opt));
869         if (tb[TCA_CSZ_PARMS-1] == NULL ||
870             RTA_PAYLOAD(tb[TCA_CSZ_PARMS-1]) < sizeof(*copt))
871                 return -EINVAL;
872         copt = RTA_DATA(tb[TCA_CSZ_PARMS-1]);
873 
874         if (tb[TCA_CSZ_RTAB-1] &&
875             RTA_PAYLOAD(tb[TCA_CSZ_RTAB-1]) < 1024)
876                 return -EINVAL;
877 
878         if (cl) {
879                 struct csz_flow *a;
880                 cl--;
881                 if (cl >= CSZ_FLOWS)
882                         return -ENOENT;
883                 if (cl >= CSZ_GUARANTEED || q->flow[cl].L_tab == NULL)
884                         return -EINVAL;
885 
886                 a = &q->flow[cl];
887 
888                 spin_lock_bh(&sch->dev->queue_lock);
889 #if 0
890                 a->rate_log = copt->rate_log;
891 #endif
892 #ifdef CSZ_PLUS_TBF
893                 a->limit = copt->limit;
894                 a->rate = copt->rate;
895                 a->buffer = copt->buffer;
896                 a->mtu = copt->mtu;
897 #endif
898 
899                 if (tb[TCA_CSZ_RTAB-1])
900                         memcpy(a->L_tab, RTA_DATA(tb[TCA_CSZ_RTAB-1]), 1024);
901 
902                 spin_unlock_bh(&sch->dev->queue_lock);
903                 return 0;
904         }
905         /* NI */
906         return 0;
907 }
908 
909 static int csz_delete(struct Qdisc *sch, unsigned long cl)
910 {
911         struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
912         struct csz_flow *a;
913 
914         cl--;
915 
916         if (cl >= CSZ_FLOWS)
917                 return -ENOENT;
918         if (cl >= CSZ_GUARANTEED || q->flow[cl].L_tab == NULL)
919                 return -EINVAL;
920 
921         a = &q->flow[cl];
922 
923         spin_lock_bh(&sch->dev->queue_lock);
924         a->fprev->fnext = a->fnext;
925         a->fnext->fprev = a->fprev;
926         a->sprev->snext = a->snext;
927         a->snext->sprev = a->sprev;
928         a->start = a->finish = 0;
929         kfree(xchg(&q->flow[cl].L_tab, NULL));
930         spin_unlock_bh(&sch->dev->queue_lock);
931 
932         return 0;
933 }
934 
935 #ifdef CONFIG_RTNETLINK
936 static int csz_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm)
937 {
938         struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
939         unsigned char    *b = skb->tail;
940         struct rtattr *rta;
941         struct tc_csz_copt opt;
942 
943         tcm->tcm_handle = sch->handle|cl;
944 
945         cl--;
946 
947         if (cl > CSZ_FLOWS)
948                 goto rtattr_failure;
949 
950         if (cl < CSZ_GUARANTEED) {
951                 struct csz_flow *f = &q->flow[cl];
952 
953                 if (f->L_tab == NULL)
954                         goto rtattr_failure;
955 
956                 rta = (struct rtattr*)b;
957                 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
958 
959                 opt.limit = f->limit;
960                 opt.rate = f->rate;
961                 opt.slice = f->slice;
962                 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
963 #ifdef CSZ_PLUS_TBF
964                 opt.buffer = f->buffer;
965                 opt.mtu = f->mtu;
966 #else
967                 opt.buffer = 0;
968                 opt.mtu = 0;
969 #endif
970 
971                 RTA_PUT(skb, TCA_CSZ_PARMS, sizeof(opt), &opt);
972                 rta->rta_len = skb->tail - b;
973         }
974 
975         return skb->len;
976 
977 rtattr_failure:
978         skb_trim(skb, b - skb->data);
979         return -1;
980 }
981 #endif
982 
983 static void csz_walk(struct Qdisc *sch, struct qdisc_walker *arg)
984 {
985         struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
986         int prio = 0;
987 
988         if (arg->stop)
989                 return;
990 
991         for (prio = 0; prio < CSZ_FLOWS; prio++) {
992                 if (arg->count < arg->skip) {
993                         arg->count++;
994                         continue;
995                 }
996                 if (prio < CSZ_GUARANTEED && q->flow[prio].L_tab == NULL) {
997                         arg->count++;
998                         continue;
999                 }
1000                 if (arg->fn(sch, prio+1, arg) < 0) {
1001                         arg->stop = 1;
1002                         break;
1003                 }
1004                 arg->count++;
1005         }
1006 }
1007 
1008 static struct tcf_proto ** csz_find_tcf(struct Qdisc *sch, unsigned long cl)
1009 {
1010         struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
1011 
1012         if (cl)
1013                 return NULL;
1014 
1015         return &q->filter_list;
1016 }
1017 
1018 struct Qdisc_class_ops csz_class_ops =
1019 {
1020         csz_graft,
1021         csz_leaf,
1022 
1023         csz_get,
1024         csz_put,
1025         csz_change,
1026         csz_delete,
1027         csz_walk,
1028 
1029         csz_find_tcf,
1030         csz_bind,
1031         csz_put,
1032 
1033 #ifdef CONFIG_RTNETLINK
1034         csz_dump_class,
1035 #endif
1036 };
1037 
1038 struct Qdisc_ops csz_qdisc_ops =
1039 {
1040         NULL,
1041         &csz_class_ops,
1042         "csz",
1043         sizeof(struct csz_sched_data),
1044 
1045         csz_enqueue,
1046         csz_dequeue,
1047         NULL,
1048         NULL,
1049 
1050         csz_init,
1051         csz_reset,
1052         csz_destroy,
1053         NULL /* csz_change */,
1054 
1055 #ifdef CONFIG_RTNETLINK
1056         csz_dump,
1057 #endif
1058 };
1059 
1060 
1061 #ifdef MODULE
1062 int init_module(void)
1063 {
1064         return register_qdisc(&csz_qdisc_ops);
1065 }
1066 
1067 void cleanup_module(void) 
1068 {
1069         unregister_qdisc(&csz_qdisc_ops);
1070 }
1071 #endif
1072 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.