~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/net/ipv4/tcp_timer.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
  3  *              operating system.  INET is implemented using the  BSD Socket
  4  *              interface as the means of communication with the user level.
  5  *
  6  *              Implementation of the Transmission Control Protocol(TCP).
  7  *
  8  * Version:     $Id: tcp_timer.c,v 1.80 2000/10/03 07:29:01 anton Exp $
  9  *
 10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
 11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
 13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
 14  *              Florian La Roche, <flla@stud.uni-sb.de>
 15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
 16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
 17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
 18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
 19  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
 20  *              Jorge Cwik, <jorge@laser.satlink.net>
 21  */
 22 
 23 #include <net/tcp.h>
 24 
 25 int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 
 26 int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; 
 27 int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
 28 int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
 29 int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
 30 int sysctl_tcp_retries1 = TCP_RETR1;
 31 int sysctl_tcp_retries2 = TCP_RETR2;
 32 int sysctl_tcp_orphan_retries;
 33 
 34 static void tcp_write_timer(unsigned long);
 35 static void tcp_delack_timer(unsigned long);
 36 static void tcp_keepalive_timer (unsigned long data);
 37 
 38 const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
 39 
 40 /*
 41  * Using different timers for retransmit, delayed acks and probes
 42  * We may wish use just one timer maintaining a list of expire jiffies 
 43  * to optimize.
 44  */
 45 
 46 void tcp_init_xmit_timers(struct sock *sk)
 47 {
 48         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 49 
 50         init_timer(&tp->retransmit_timer);
 51         tp->retransmit_timer.function=&tcp_write_timer;
 52         tp->retransmit_timer.data = (unsigned long) sk;
 53         tp->pending = 0;
 54 
 55         init_timer(&tp->delack_timer);
 56         tp->delack_timer.function=&tcp_delack_timer;
 57         tp->delack_timer.data = (unsigned long) sk;
 58         tp->ack.pending = 0;
 59 
 60         init_timer(&sk->timer);
 61         sk->timer.function=&tcp_keepalive_timer;
 62         sk->timer.data = (unsigned long) sk;
 63 }
 64 
 65 void tcp_clear_xmit_timers(struct sock *sk)
 66 {
 67         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 68 
 69         tp->pending = 0;
 70         if (timer_pending(&tp->retransmit_timer) &&
 71             del_timer(&tp->retransmit_timer))
 72                 __sock_put(sk);
 73 
 74         tp->ack.pending = 0;
 75         tp->ack.blocked = 0;
 76         if (timer_pending(&tp->delack_timer) &&
 77             del_timer(&tp->delack_timer))
 78                 __sock_put(sk);
 79 
 80         if(timer_pending(&sk->timer) && del_timer(&sk->timer))
 81                 __sock_put(sk);
 82 }
 83 
 84 static void tcp_write_err(struct sock *sk)
 85 {
 86         sk->err = sk->err_soft ? : ETIMEDOUT;
 87         sk->error_report(sk);
 88 
 89         tcp_done(sk);
 90         NET_INC_STATS_BH(TCPAbortOnTimeout);
 91 }
 92 
 93 /* Do not allow orphaned sockets to eat all our resources.
 94  * This is direct violation of TCP specs, but it is required
 95  * to prevent DoS attacks. It is called when a retransmission timeout
 96  * or zero probe timeout occurs on orphaned socket.
 97  *
 98  * Criterium is still not confirmed experimentally and may change.
 99  * We kill the socket, if:
100  * 1. If number of orphaned sockets exceeds an administratively configured
101  *    limit.
102  * 2. If we have strong memory pressure.
103  */
104 static int tcp_out_of_resources(struct sock *sk, int do_reset)
105 {
106         struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
107         int orphans = atomic_read(&tcp_orphan_count);
108 
109         /* If peer does not open window for long time, or did not transmit 
110          * anything for long time, penalize it. */
111         if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
112                 orphans <<= 1;
113 
114         /* If some dubious ICMP arrived, penalize even more. */
115         if (sk->err_soft)
116                 orphans <<= 1;
117 
118         if (orphans >= sysctl_tcp_max_orphans ||
119             (sk->wmem_queued > SOCK_MIN_SNDBUF &&
120              atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
121                 if (net_ratelimit())
122                         printk(KERN_INFO "Out of socket memory\n");
123 
124                 /* Catch exceptional cases, when connection requires reset.
125                  *      1. Last segment was sent recently. */
126                 if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
127                     /*  2. Window is closed. */
128                     (!tp->snd_wnd && !tp->packets_out))
129                         do_reset = 1;
130                 if (do_reset)
131                         tcp_send_active_reset(sk, GFP_ATOMIC);
132                 tcp_done(sk);
133                 NET_INC_STATS_BH(TCPAbortOnMemory);
134                 return 1;
135         }
136         return 0;
137 }
138 
139 /* Calculate maximal number or retries on an orphaned socket. */
140 static int tcp_orphan_retries(struct sock *sk, int alive)
141 {
142         int retries = sysctl_tcp_orphan_retries; /* May be zero. */
143 
144         /* We know from an ICMP that something is wrong. */
145         if (sk->err_soft && !alive)
146                 retries = 0;
147 
148         /* However, if socket sent something recently, select some safe
149          * number of retries. 8 corresponds to >100 seconds with minimal
150          * RTO of 200msec. */
151         if (retries == 0 && alive)
152                 retries = 8;
153         return retries;
154 }
155 
156 /* A write timeout has occurred. Process the after effects. */
157 static int tcp_write_timeout(struct sock *sk)
158 {
159         struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
160         int retry_until;
161 
162         if ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV)) {
163                 if (tp->retransmits)
164                         dst_negative_advice(&sk->dst_cache);
165                 retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries;
166         } else {
167                 if (tp->retransmits >= sysctl_tcp_retries1) {
168                         /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
169                            hole detection. :-(
170 
171                            It is place to make it. It is not made. I do not want
172                            to make it. It is disguisting. It does not work in any
173                            case. Let me to cite the same draft, which requires for
174                            us to implement this:
175 
176    "The one security concern raised by this memo is that ICMP black holes
177    are often caused by over-zealous security administrators who block
178    all ICMP messages.  It is vitally important that those who design and
179    deploy security systems understand the impact of strict filtering on
180    upper-layer protocols.  The safest web site in the world is worthless
181    if most TCP implementations cannot transfer data from it.  It would
182    be far nicer to have all of the black holes fixed rather than fixing
183    all of the TCP implementations."
184 
185                            Golden words :-).
186                    */
187 
188                         dst_negative_advice(&sk->dst_cache);
189                 }
190 
191                 retry_until = sysctl_tcp_retries2;
192                 if (sk->dead) {
193                         int alive = (tp->rto < TCP_RTO_MAX);
194  
195                         retry_until = tcp_orphan_retries(sk, alive);
196 
197                         if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until))
198                                 return 1;
199                 }
200         }
201 
202         if (tp->retransmits >= retry_until) {
203                 /* Has it gone just too far? */
204                 tcp_write_err(sk);
205                 return 1;
206         }
207         return 0;
208 }
209 
210 static void tcp_delack_timer(unsigned long data)
211 {
212         struct sock *sk = (struct sock*)data;
213         struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
214 
215         bh_lock_sock(sk);
216         if (sk->lock.users) {
217                 /* Try again later. */
218                 tp->ack.blocked = 1;
219                 NET_INC_STATS_BH(DelayedACKLocked);
220                 if (!mod_timer(&tp->delack_timer, jiffies + TCP_DELACK_MIN))
221                         sock_hold(sk);
222                 goto out_unlock;
223         }
224 
225         tcp_mem_reclaim(sk);
226 
227         if (sk->state == TCP_CLOSE || !(tp->ack.pending&TCP_ACK_TIMER))
228                 goto out;
229 
230         if ((long)(tp->ack.timeout - jiffies) > 0) {
231                 if (!mod_timer(&tp->delack_timer, tp->ack.timeout))
232                         sock_hold(sk);
233                 goto out;
234         }
235         tp->ack.pending &= ~TCP_ACK_TIMER;
236 
237         if (skb_queue_len(&tp->ucopy.prequeue)) {
238                 struct sk_buff *skb;
239 
240                 net_statistics[smp_processor_id()*2].TCPSchedulerFailed += skb_queue_len(&tp->ucopy.prequeue);
241 
242                 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
243                         sk->backlog_rcv(sk, skb);
244 
245                 tp->ucopy.memory = 0;
246         }
247 
248         if (tcp_ack_scheduled(tp)) {
249                 if (!tp->ack.pingpong) {
250                         /* Delayed ACK missed: inflate ATO. */
251                         tp->ack.ato = min(tp->ack.ato<<1, tp->rto);
252                 } else {
253                         /* Delayed ACK missed: leave pingpong mode and
254                          * deflate ATO.
255                          */
256                         tp->ack.pingpong = 0;
257                         tp->ack.ato = TCP_ATO_MIN;
258                 }
259                 tcp_send_ack(sk);
260                 NET_INC_STATS_BH(DelayedACKs);
261         }
262         TCP_CHECK_TIMER(sk);
263 
264 out:
265         if (tcp_memory_pressure)
266                 tcp_mem_reclaim(sk);
267 out_unlock:
268         bh_unlock_sock(sk);
269         sock_put(sk);
270 }
271 
272 static void tcp_probe_timer(struct sock *sk)
273 {
274         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
275         int max_probes;
276 
277         if (tp->packets_out || !tp->send_head) {
278                 tp->probes_out = 0;
279                 return;
280         }
281 
282         /* *WARNING* RFC 1122 forbids this
283          *
284          * It doesn't AFAIK, because we kill the retransmit timer -AK
285          *
286          * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
287          * this behaviour in Solaris down as a bug fix. [AC]
288          *
289          * Let me to explain. probes_out is zeroed by incoming ACKs
290          * even if they advertise zero window. Hence, connection is killed only
291          * if we received no ACKs for normal connection timeout. It is not killed
292          * only because window stays zero for some time, window may be zero
293          * until armageddon and even later. We are in full accordance
294          * with RFCs, only probe timer combines both retransmission timeout
295          * and probe timeout in one bottle.                             --ANK
296          */
297         max_probes = sysctl_tcp_retries2;
298 
299         if (sk->dead) {
300                 int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX);
301  
302                 max_probes = tcp_orphan_retries(sk, alive);
303 
304                 if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes))
305                         return;
306         }
307 
308         if (tp->probes_out > max_probes) {
309                 tcp_write_err(sk);
310         } else {
311                 /* Only send another probe if we didn't close things up. */
312                 tcp_send_probe0(sk);
313         }
314 }
315 
316 /*
317  *      The TCP retransmit timer.
318  */
319 
320 static void tcp_retransmit_timer(struct sock *sk)
321 {
322         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
323 
324         if (tp->packets_out == 0)
325                 goto out;
326 
327         BUG_TRAP(!skb_queue_empty(&sk->write_queue));
328 
329         if (tcp_write_timeout(sk))
330                 goto out;
331 
332         if (tp->retransmits == 0) {
333                 if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
334                         if (tp->sack_ok) {
335                                 if (tp->ca_state == TCP_CA_Recovery)
336                                         NET_INC_STATS_BH(TCPSackRecoveryFail);
337                                 else
338                                         NET_INC_STATS_BH(TCPSackFailures);
339                         } else {
340                                 if (tp->ca_state == TCP_CA_Recovery)
341                                         NET_INC_STATS_BH(TCPRenoRecoveryFail);
342                                 else
343                                         NET_INC_STATS_BH(TCPRenoFailures);
344                         }
345                 } else if (tp->ca_state == TCP_CA_Loss) {
346                         NET_INC_STATS_BH(TCPLossFailures);
347                 } else {
348                         NET_INC_STATS_BH(TCPTimeouts);
349                 }
350         }
351 
352         tcp_enter_loss(sk, 0);
353 
354         if (tcp_retransmit_skb(sk, skb_peek(&sk->write_queue)) > 0) {
355                 /* Retransmission failed because of local congestion,
356                  * do not backoff.
357                  */
358                 if (!tp->retransmits)
359                         tp->retransmits=1;
360                 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS,
361                                      min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL));
362                 goto out;
363         }
364 
365         /* Increase the timeout each time we retransmit.  Note that
366          * we do not increase the rtt estimate.  rto is initialized
367          * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
368          * that doubling rto each time is the least we can get away with.
369          * In KA9Q, Karn uses this for the first few times, and then
370          * goes to quadratic.  netBSD doubles, but only goes up to *64,
371          * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
372          * defined in the protocol as the maximum possible RTT.  I guess
373          * we'll have to use something other than TCP to talk to the
374          * University of Mars.
375          *
376          * PAWS allows us longer timeouts and large windows, so once
377          * implemented ftp to mars will work nicely. We will have to fix
378          * the 120 second clamps though!
379          */
380         tp->backoff++;
381         tp->retransmits++;
382         tp->rto = min(tp->rto << 1, TCP_RTO_MAX);
383         tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
384         if (tp->retransmits > sysctl_tcp_retries1)
385                 __sk_dst_reset(sk);
386 
387 out:;
388 }
389 
390 static void tcp_write_timer(unsigned long data)
391 {
392         struct sock *sk = (struct sock*)data;
393         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
394         int event;
395 
396         bh_lock_sock(sk);
397         if (sk->lock.users) {
398                 /* Try again later */
399                 if (!mod_timer(&tp->retransmit_timer, jiffies + (HZ/20)))
400                         sock_hold(sk);
401                 goto out_unlock;
402         }
403 
404         if (sk->state == TCP_CLOSE || !tp->pending)
405                 goto out;
406 
407         if ((long)(tp->timeout - jiffies) > 0) {
408                 if (!mod_timer(&tp->retransmit_timer, tp->timeout))
409                         sock_hold(sk);
410                 goto out;
411         }
412 
413         event = tp->pending;
414         tp->pending = 0;
415 
416         switch (event) {
417         case TCP_TIME_RETRANS:
418                 tcp_retransmit_timer(sk);
419                 break;
420         case TCP_TIME_PROBE0:
421                 tcp_probe_timer(sk);
422                 break;
423         }
424         TCP_CHECK_TIMER(sk);
425 
426 out:
427         tcp_mem_reclaim(sk);
428 out_unlock:
429         bh_unlock_sock(sk);
430         sock_put(sk);
431 }
432 
433 /*
434  *      Timer for listening sockets
435  */
436 
437 static void tcp_synack_timer(struct sock *sk)
438 {
439         struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
440         struct tcp_listen_opt *lopt = tp->listen_opt;
441         int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
442         int thresh = max_retries;
443         unsigned long now = jiffies;
444         struct open_request **reqp, *req;
445         int i, budget;
446 
447         if (lopt == NULL || lopt->qlen == 0)
448                 return;
449 
450         /* Normally all the openreqs are young and become mature
451          * (i.e. converted to established socket) for first timeout.
452          * If synack was not acknowledged for 3 seconds, it means
453          * one of the following things: synack was lost, ack was lost,
454          * rtt is high or nobody planned to ack (i.e. synflood).
455          * When server is a bit loaded, queue is populated with old
456          * open requests, reducing effective size of queue.
457          * When server is well loaded, queue size reduces to zero
458          * after several minutes of work. It is not synflood,
459          * it is normal operation. The solution is pruning
460          * too old entries overriding normal timeout, when
461          * situation becomes dangerous.
462          *
463          * Essentially, we reserve half of room for young
464          * embrions; and abort old ones without pity, if old
465          * ones are about to clog our table.
466          */
467         if (lopt->qlen>>(lopt->max_qlen_log-1)) {
468                 int young = (lopt->qlen_young<<1);
469 
470                 while (thresh > 2) {
471                         if (lopt->qlen < young)
472                                 break;
473                         thresh--;
474                         young <<= 1;
475                 }
476         }
477 
478         if (tp->defer_accept)
479                 max_retries = tp->defer_accept;
480 
481         budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL));
482         i = lopt->clock_hand;
483 
484         do {
485                 reqp=&lopt->syn_table[i];
486                 while ((req = *reqp) != NULL) {
487                         if ((long)(now - req->expires) >= 0) {
488                                 if ((req->retrans < thresh ||
489                                      (req->acked && req->retrans < max_retries))
490                                     && !req->class->rtx_syn_ack(sk, req, NULL)) {
491                                         unsigned long timeo;
492 
493                                         if (req->retrans++ == 0)
494                                                 lopt->qlen_young--;
495                                         timeo = min((TCP_TIMEOUT_INIT << req->retrans),
496                                                     TCP_RTO_MAX);
497                                         req->expires = now + timeo;
498                                         reqp = &req->dl_next;
499                                         continue;
500                                 }
501 
502                                 /* Drop this request */
503                                 write_lock(&tp->syn_wait_lock);
504                                 *reqp = req->dl_next;
505                                 write_unlock(&tp->syn_wait_lock);
506                                 lopt->qlen--;
507                                 if (req->retrans == 0)
508                                         lopt->qlen_young--;
509                                 tcp_openreq_free(req);
510                                 continue;
511                         }
512                         reqp = &req->dl_next;
513                 }
514 
515                 i = (i+1)&(TCP_SYNQ_HSIZE-1);
516 
517         } while (--budget > 0);
518 
519         lopt->clock_hand = i;
520 
521         if (lopt->qlen)
522                 tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL);
523 }
524 
525 void tcp_delete_keepalive_timer (struct sock *sk)
526 {
527         if (timer_pending(&sk->timer) && del_timer (&sk->timer))
528                 __sock_put(sk);
529 }
530 
531 void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len)
532 {
533         if (!mod_timer(&sk->timer, jiffies+len))
534                 sock_hold(sk);
535 }
536 
537 void tcp_set_keepalive(struct sock *sk, int val)
538 {
539         if ((1<<sk->state)&(TCPF_CLOSE|TCPF_LISTEN))
540                 return;
541 
542         if (val && !sk->keepopen)
543                 tcp_reset_keepalive_timer(sk, keepalive_time_when(&sk->tp_pinfo.af_tcp));
544         else if (!val)
545                 tcp_delete_keepalive_timer(sk);
546 }
547 
548 
549 static void tcp_keepalive_timer (unsigned long data)
550 {
551         struct sock *sk = (struct sock *) data;
552         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
553         __u32 elapsed;
554 
555         /* Only process if socket is not in use. */
556         bh_lock_sock(sk);
557         if (sk->lock.users) {
558                 /* Try again later. */ 
559                 tcp_reset_keepalive_timer (sk, HZ/20);
560                 goto out;
561         }
562 
563         if (sk->state == TCP_LISTEN) {
564                 tcp_synack_timer(sk);
565                 goto out;
566         }
567 
568         if (sk->state == TCP_FIN_WAIT2 && sk->dead) {
569                 if (tp->linger2 >= 0) {
570                         int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN;
571 
572                         if (tmo > 0) {
573                                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
574                                 goto out;
575                         }
576                 }
577                 tcp_send_active_reset(sk, GFP_ATOMIC);
578                 goto death;
579         }
580 
581         if (!sk->keepopen || sk->state == TCP_CLOSE)
582                 goto out;
583 
584         elapsed = keepalive_time_when(tp);
585 
586         /* It is alive without keepalive 8) */
587         if (tp->packets_out || tp->send_head)
588                 goto resched;
589 
590         elapsed = tcp_time_stamp - tp->rcv_tstamp;
591 
592         if (elapsed >= keepalive_time_when(tp)) {
593                 if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) ||
594                      (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) {
595                         tcp_send_active_reset(sk, GFP_ATOMIC);
596                         tcp_write_err(sk);
597                         goto out;
598                 }
599                 if (tcp_write_wakeup(sk) <= 0) {
600                         tp->probes_out++;
601                         elapsed = keepalive_intvl_when(tp);
602                 } else {
603                         /* If keepalive was lost due to local congestion,
604                          * try harder.
605                          */
606                         elapsed = TCP_RESOURCE_PROBE_INTERVAL;
607                 }
608         } else {
609                 /* It is tp->rcv_tstamp + keepalive_time_when(tp) */
610                 elapsed = keepalive_time_when(tp) - elapsed;
611         }
612 
613         TCP_CHECK_TIMER(sk);
614         tcp_mem_reclaim(sk);
615 
616 resched:
617         tcp_reset_keepalive_timer (sk, elapsed);
618         goto out;
619 
620 death:  
621         tcp_done(sk);
622 
623 out:
624         bh_unlock_sock(sk);
625         sock_put(sk);
626 }
627 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.