~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/net/sched/estimator.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * net/sched/estimator.c        Simple rate estimator.
  3  *
  4  *              This program is free software; you can redistribute it and/or
  5  *              modify it under the terms of the GNU General Public License
  6  *              as published by the Free Software Foundation; either version
  7  *              2 of the License, or (at your option) any later version.
  8  *
  9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 10  */
 11 
 12 #include <asm/uaccess.h>
 13 #include <asm/system.h>
 14 #include <asm/bitops.h>
 15 #include <linux/types.h>
 16 #include <linux/kernel.h>
 17 #include <linux/sched.h>
 18 #include <linux/string.h>
 19 #include <linux/mm.h>
 20 #include <linux/socket.h>
 21 #include <linux/sockios.h>
 22 #include <linux/in.h>
 23 #include <linux/errno.h>
 24 #include <linux/interrupt.h>
 25 #include <linux/netdevice.h>
 26 #include <linux/skbuff.h>
 27 #include <linux/rtnetlink.h>
 28 #include <linux/init.h>
 29 #include <linux/proc_fs.h>
 30 #include <net/sock.h>
 31 #include <net/pkt_sched.h>
 32 
 33 /*
 34    This code is NOT intended to be used for statistics collection,
 35    its purpose is to provide a base for statistical multiplexing
 36    for controlled load service.
 37    If you need only statistics, run a user level daemon which
 38    periodically reads byte counters.
 39 
 40    Unfortunately, rate estimation is not a very easy task.
 41    F.e. I did not find a simple way to estimate the current peak rate
 42    and even failed to formulate the problem 8)8)
 43 
 44    So I preferred not to built an estimator into the scheduler,
 45    but run this task separately.
 46    Ideally, it should be kernel thread(s), but for now it runs
 47    from timers, which puts apparent top bounds on the number of rated
 48    flows, has minimal overhead on small, but is enough
 49    to handle controlled load service, sets of aggregates.
 50 
 51    We measure rate over A=(1<<interval) seconds and evaluate EWMA:
 52 
 53    avrate = avrate*(1-W) + rate*W
 54 
 55    where W is chosen as negative power of 2: W = 2^(-ewma_log)
 56 
 57    The resulting time constant is:
 58 
 59    T = A/(-ln(1-W))
 60 
 61 
 62    NOTES.
 63 
 64    * The stored value for avbps is scaled by 2^5, so that maximal
 65      rate is ~1Gbit, avpps is scaled by 2^10.
 66 
 67    * Minimal interval is HZ/4=250msec (it is the greatest common divisor
 68      for HZ=100 and HZ=1024 8)), maximal interval
 69      is (HZ/4)*2^EST_MAX_INTERVAL = 8sec. Shorter intervals
 70      are too expensive, longer ones can be implemented
 71      at user level painlessly.
 72  */
 73 
 74 #if (HZ%4) != 0
 75 #error Bad HZ value.
 76 #endif
 77 
 78 #define EST_MAX_INTERVAL        5
 79 
 80 struct qdisc_estimator
 81 {
 82         struct qdisc_estimator  *next;
 83         struct tc_stats         *stats;
 84         unsigned                interval;
 85         int                     ewma_log;
 86         u64                     last_bytes;
 87         u32                     last_packets;
 88         u32                     avpps;
 89         u32                     avbps;
 90 };
 91 
 92 struct qdisc_estimator_head
 93 {
 94         struct timer_list       timer;
 95         struct qdisc_estimator  *list;
 96 };
 97 
 98 static struct qdisc_estimator_head elist[EST_MAX_INTERVAL+1];
 99 
100 /* Estimator array lock */
101 static rwlock_t est_lock = RW_LOCK_UNLOCKED;
102 
103 static void est_timer(unsigned long arg)
104 {
105         int idx = (int)arg;
106         struct qdisc_estimator *e;
107 
108         read_lock(&est_lock);
109         for (e = elist[idx].list; e; e = e->next) {
110                 struct tc_stats *st = e->stats;
111                 u64 nbytes;
112                 u32 npackets;
113                 u32 rate;
114 
115                 spin_lock(st->lock);
116                 nbytes = st->bytes;
117                 npackets = st->packets;
118                 rate = (nbytes - e->last_bytes)<<(7 - idx);
119                 e->last_bytes = nbytes;
120                 e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
121                 st->bps = (e->avbps+0xF)>>5;
122 
123                 rate = (npackets - e->last_packets)<<(12 - idx);
124                 e->last_packets = npackets;
125                 e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
126                 e->stats->pps = (e->avpps+0x1FF)>>10;
127                 spin_unlock(st->lock);
128         }
129 
130         mod_timer(&elist[idx].timer, jiffies + ((HZ/4)<<idx));
131         read_unlock(&est_lock);
132 }
133 
134 int qdisc_new_estimator(struct tc_stats *stats, struct rtattr *opt)
135 {
136         struct qdisc_estimator *est;
137         struct tc_estimator *parm = RTA_DATA(opt);
138 
139         if (RTA_PAYLOAD(opt) < sizeof(*parm))
140                 return -EINVAL;
141 
142         if (parm->interval < -2 || parm->interval > 3)
143                 return -EINVAL;
144 
145         est = kmalloc(sizeof(*est), GFP_KERNEL);
146         if (est == NULL)
147                 return -ENOBUFS;
148 
149         memset(est, 0, sizeof(*est));
150         est->interval = parm->interval + 2;
151         est->stats = stats;
152         est->ewma_log = parm->ewma_log;
153         est->last_bytes = stats->bytes;
154         est->avbps = stats->bps<<5;
155         est->last_packets = stats->packets;
156         est->avpps = stats->pps<<10;
157 
158         est->next = elist[est->interval].list;
159         if (est->next == NULL) {
160                 init_timer(&elist[est->interval].timer);
161                 elist[est->interval].timer.data = est->interval;
162                 elist[est->interval].timer.expires = jiffies + ((HZ/4)<<est->interval);
163                 elist[est->interval].timer.function = est_timer;
164                 add_timer(&elist[est->interval].timer);
165         }
166         write_lock_bh(&est_lock);
167         elist[est->interval].list = est;
168         write_unlock_bh(&est_lock);
169         return 0;
170 }
171 
172 void qdisc_kill_estimator(struct tc_stats *stats)
173 {
174         int idx;
175         struct qdisc_estimator *est, **pest;
176 
177         for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
178                 int killed = 0;
179                 pest = &elist[idx].list;
180                 while ((est=*pest) != NULL) {
181                         if (est->stats != stats) {
182                                 pest = &est->next;
183                                 continue;
184                         }
185 
186                         write_lock_bh(&est_lock);
187                         *pest = est->next;
188                         write_unlock_bh(&est_lock);
189 
190                         kfree(est);
191                         killed++;
192                 }
193                 if (killed && elist[idx].list == NULL)
194                         del_timer(&elist[idx].timer);
195         }
196 }
197 
198 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.