| 1 | /* |
| 2 | * Copyright (c) 2000-2014 Apple Computer, Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | /* |
| 29 | * Copyright (c) 1982, 1986, 1993 |
| 30 | * The Regents of the University of California. All rights reserved. |
| 31 | * |
| 32 | * Redistribution and use in source and binary forms, with or without |
| 33 | * modification, are permitted provided that the following conditions |
| 34 | * are met: |
| 35 | * 1. Redistributions of source code must retain the above copyright |
| 36 | * notice, this list of conditions and the following disclaimer. |
| 37 | * 2. Redistributions in binary form must reproduce the above copyright |
| 38 | * notice, this list of conditions and the following disclaimer in the |
| 39 | * documentation and/or other materials provided with the distribution. |
| 40 | * 3. All advertising materials mentioning features or use of this software |
| 41 | * must display the following acknowledgement: |
| 42 | * This product includes software developed by the University of |
| 43 | * California, Berkeley and its contributors. |
| 44 | * 4. Neither the name of the University nor the names of its contributors |
| 45 | * may be used to endorse or promote products derived from this software |
| 46 | * without specific prior written permission. |
| 47 | * |
| 48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 58 | * SUCH DAMAGE. |
| 59 | * |
| 60 | * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 |
| 61 | * $FreeBSD: src/sys/netinet/tcp_timer.h,v 1.18 1999/12/29 04:41:03 peter Exp $ |
| 62 | */ |
| 63 | |
| 64 | #ifndef _NETINET_TCP_TIMER_H_ |
| 65 | #define _NETINET_TCP_TIMER_H_ |
| 66 | #include <sys/appleapiopts.h> |
| 67 | |
| 68 | #ifdef BSD_KERNEL_PRIVATE |
| 69 | #include <kern/thread_call.h> |
| 70 | #endif /* BSD_KERNEL_PRIVATE */ |
| 71 | |
| 72 | /* Keep the external definition the same for binary compatibility */ |
| 73 | #define TCPT_NTIMERS_EXT 4 |
| 74 | |
| 75 | /* |
| 76 | * Definitions of the TCP timers. |
| 77 | * |
| 78 | * The TCPT_PTO timer is used for probing for a tail loss in a send window. |
| 79 | * If this probe gets acknowledged using SACK, it will allow the connection |
| 80 | * to enter fast-recovery instead of hitting a retransmit timeout. A probe |
| 81 | * timeout will send the last unacknowledged segment to generate more acks |
| 82 | * with SACK information which can be used for fast-retransmiting the lost |
| 83 | * packets. This will fire in the order of 10ms. |
| 84 | * |
| 85 | * The TCPT_REXMT timer is used to force retransmissions. |
| 86 | * The TCP has the TCPT_REXMT timer set whenever segments |
| 87 | * have been sent for which ACKs are expected but not yet |
| 88 | * received. If an ACK is received which advances tp->snd_una, |
| 89 | * then the retransmit timer is cleared (if there are no more |
| 90 | * outstanding segments) or reset to the base value (if there |
| 91 | * are more ACKs expected). Whenever the retransmit timer goes off, |
| 92 | * we retransmit one unacknowledged segment, and do a backoff |
| 93 | * on the retransmit timer. |
| 94 | * |
| 95 | * The TCPT_DELACK timer is used for transmitting delayed acknowledgements |
| 96 | * if an acknowledgement was delayed in anticipation of a new segment. |
| 97 | * |
| 98 | * The TCPT_PERSIST timer is used to keep window size information |
| 99 | * flowing even if the window goes shut. If all previous transmissions |
| 100 | * have been acknowledged(so that there are no retransmissions in progress), |
| 101 | * and the window is too small to bother sending anything, then we start |
| 102 | * the TCPT_PERSIST timer. When it expires, if the window is nonzero, |
| 103 | * we go to transmit state. Otherwise, at intervals send a single byte |
| 104 | * into the peer's window to force him to update our window information. |
| 105 | * We do this at most as often as TCPT_PERSMIN time intervals, |
| 106 | * but no more frequently than the current estimate of round-trip |
| 107 | * packet time. The TCPT_PERSIST timer is cleared whenever we receive |
| 108 | * a window update from the peer. |
| 109 | * |
| 110 | * The TCPT_KEEP timer is used to keep connections alive. If an |
| 111 | * connection is idle (no segments received) for TCPTV_KEEP_INIT amount |
| 112 | * of time, but not yet established, then we drop the connection. |
| 113 | * Once the connection is established, if the connection is idle for |
| 114 | * TCPTV_KEEP_IDLE time (and keepalives have been enabled on the socket), |
| 115 | * we begin to probe the connection. We force the peer to send us a |
| 116 | * segment by sending: |
| 117 | * <SEQ=SND.UNA-1><ACK=RCV.NXT><CTL=ACK> |
| 118 | * This segment is (deliberately) outside the window, and should elicit |
| 119 | * an ack segment in response from the peer. If, despite the TCPT_KEEP |
| 120 | * initiated segments we cannot elicit a response from a peer in |
| 121 | * TCPT_MAXIDLE amount of time probing, then we drop the connection. |
| 122 | * |
| 123 | * The TCPT_2MSL timer is used for keeping the conenction in Time-wait state |
| 124 | * before fully closing it so that the connection 4-tuple can be reused. |
| 125 | */ |
| 126 | #ifdef BSD_KERNEL_PRIVATE |
| 127 | |
| 128 | #define TCPT_PTO 0 /* Probe timeout */ |
| 129 | #define TCPT_DELAYFR 1 /* Delay recovery if there is reordering */ |
| 130 | #define TCPT_REXMT 2 /* retransmit */ |
| 131 | #define TCPT_DELACK 3 /* delayed ack */ |
| 132 | #define TCPT_PERSIST 4 /* retransmit persistence */ |
| 133 | #define TCPT_KEEP 5 /* keep alive */ |
| 134 | #define TCPT_2MSL 6 /* 2*msl quiet time timer */ |
| 135 | #if MPTCP |
| 136 | #define TCPT_JACK_RXMT 7 /* retransmit timer for join ack */ |
| 137 | #define TCPT_CELLICON 8 /* Timer to check for cell-activity */ |
| 138 | #define TCPT_MAX 8 |
| 139 | #else /* MPTCP */ |
| 140 | #define TCPT_MAX 6 |
| 141 | #endif /* !MPTCP */ |
| 142 | |
| 143 | #define TCPT_NONE (TCPT_MAX + 1) |
| 144 | #define TCPT_NTIMERS (TCPT_MAX + 1) |
| 145 | |
| 146 | /* External definitions */ |
| 147 | #define TCPT_REXMT_EXT 0 |
| 148 | #define TCPT_PERSIST_EXT 1 |
| 149 | #define TCPT_KEEP_EXT 2 |
| 150 | #define TCPT_2MSL_EXT 3 |
| 151 | #define TCPT_DELACK_EXT 4 |
| 152 | |
| 153 | #else /* !BSD_KERNEL_PRIVATE */ |
| 154 | #define TCPT_REXMT 0 /* retransmit */ |
| 155 | #define TCPT_PERSIST 1 /* retransmit persistence */ |
| 156 | #define TCPT_KEEP 2 /* keep alive */ |
| 157 | #define TCPT_2MSL 3 /* 2*msl quiet time timer */ |
| 158 | #define TCPT_DELACK 4 /* delayed ack timer */ |
| 159 | #if MPTCP |
| 160 | #define TCPT_JACK_RXMT 5 /* retransmit timer for join ack */ |
| 161 | #define TCPT_MAX 5 |
| 162 | #else /* MPTCP */ |
| 163 | #define TCPT_MAX 4 |
| 164 | #endif /* !MPTCP */ |
| 165 | #define TCPT_NONE (TCPT_MAX + 1) |
| 166 | #define TCPT_NTIMERS (TCPT_MAX + 1) |
| 167 | |
| 168 | #endif /* BSD_KERNEL_PRIVATE */ |
| 169 | |
| 170 | #ifdef BSD_KERNEL_PRIVATE |
| 171 | /* |
| 172 | * Time constants. |
| 173 | */ |
| 174 | #define TCPTV_MSL ( 15*TCP_RETRANSHZ) /* max seg lifetime */ |
| 175 | #define TCPTV_SRTTBASE 0 /* base roundtrip time; if 0, no idea yet */ |
| 176 | #define TCPTV_RTOBASE ( 1*TCP_RETRANSHZ) /* assumed RTO if no info */ |
| 177 | #define TCPTV_SRTTDFLT ( 1*TCP_RETRANSHZ) /* assumed RTT if no info */ |
| 178 | #define TCPTV_PERSMIN ( 5*TCP_RETRANSHZ) /* retransmit persistence */ |
| 179 | #define TCPTV_PERSMAX ( 60*TCP_RETRANSHZ) /* maximum persist interval */ |
| 180 | |
| 181 | extern int tcptv_persmin_val; |
| 182 | |
| 183 | #define TCPTV_KEEP_INIT ( 75*TCP_RETRANSHZ) /* connect keep alive */ |
| 184 | #define TCPTV_KEEP_IDLE (120*60*TCP_RETRANSHZ) /* time before probing */ |
| 185 | #define TCPTV_KEEPINTVL ( 75*TCP_RETRANSHZ) /* default probe interval */ |
| 186 | #define TCPTV_KEEPCNT 8 /* max probes before drop */ |
| 187 | |
| 188 | #define TCPTV_REXMTMAX ( 64*TCP_RETRANSHZ ) /* max REXMT value */ |
| 189 | #define TCPTV_REXMTMIN ( TCP_RETRANSHZ/33 ) /* min REXMT for non-local connections */ |
| 190 | |
| 191 | #define TCPTV_FINWAIT2 ( 60*TCP_RETRANSHZ) /* timeout to get out of FIN_WAIT_2 */ |
| 192 | |
| 193 | /* |
| 194 | * Window for counting received bytes to see if ack-stretching |
| 195 | * can start (default 100 ms) |
| 196 | */ |
| 197 | #define TCPTV_UNACKWIN ( TCP_RETRANSHZ/10 ) |
| 198 | |
| 199 | /* Receiver idle time, avoid ack-stretching after this idle time */ |
| 200 | #define TCPTV_MAXRCVIDLE (TCP_RETRANSHZ/5 ) |
| 201 | |
| 202 | /* |
| 203 | * No ack stretching during slow-start, until we see some packets. |
| 204 | * By the time the receiver gets 512 packets, the senders cwnd |
| 205 | * should open by a few hundred packets consdering the |
| 206 | * slow-start progression. |
| 207 | */ |
| 208 | #define TCP_RCV_SS_PKTCOUNT 512 |
| 209 | |
| 210 | #define TCPTV_TWTRUNC 8 /* RTO factor to truncate TW */ |
| 211 | |
| 212 | #define TCP_LINGERTIME 120 /* linger at most 2 minutes */ |
| 213 | |
| 214 | #define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ |
| 215 | |
| 216 | #ifdef TCPTIMERS |
| 217 | static char *tcptimers[] = |
| 218 | { "REXMT" , "PERSIST" , "KEEP" , "2MSL" , "DELACK" }; |
| 219 | #endif /* TCPTIMERS */ |
| 220 | |
| 221 | /* |
| 222 | * Persist, keep, 2msl and MPTCP's join-ack timer as slow timers which can |
| 223 | * be coalesced at a higher granularity (500 ms). |
| 224 | * |
| 225 | * Rexmt and delayed ack timers are considered as fast timers which run |
| 226 | * in the order of 100ms. |
| 227 | * |
| 228 | * Probe timeout is a quick timer which will run in the order of 10ms. |
| 229 | */ |
| 230 | #define IS_TIMER_HZ_500MS(i) ((i) >= TCPT_PERSIST) |
| 231 | #define IS_TIMER_HZ_100MS(i) ((i) >= TCPT_REXMT && (i) < TCPT_PERSIST) |
| 232 | #define IS_TIMER_HZ_10MS(i) ((i) < TCPT_REXMT) |
| 233 | |
| 234 | struct tcptimerlist; |
| 235 | |
| 236 | struct tcptimerentry { |
| 237 | LIST_ENTRY(tcptimerentry) le; /* links for timer list */ |
| 238 | uint32_t timer_start; /* tcp clock when the timer was started */ |
| 239 | uint16_t index; /* index of lowest timer that needs to run first */ |
| 240 | uint16_t mode; /* Bit-wise OR of timers that are active */ |
| 241 | uint32_t runtime; /* deadline at which the first timer has to fire */ |
| 242 | }; |
| 243 | |
| 244 | LIST_HEAD(timerlisthead, tcptimerentry); |
| 245 | |
| 246 | struct tcptimerlist { |
| 247 | struct timerlisthead lhead; /* head of the list */ |
| 248 | lck_mtx_t mtx; /* lock to protect the list */ |
| 249 | lck_grp_t *mtx_grp; /* mutex group definition */ |
| 250 | thread_call_t call; /* call entry */ |
| 251 | uint32_t runtime; /* time at which this list is going to run */ |
| 252 | uint32_t schedtime; /* time at which this list was scheduled */ |
| 253 | uint32_t entries; /* Number of entries on the list */ |
| 254 | uint32_t maxentries; /* Max number of entries at any time */ |
| 255 | |
| 256 | /* Set desired mode when timer list running */ |
| 257 | boolean_t running; /* Set when timer list is being processed */ |
| 258 | boolean_t scheduled; /* set when the timer is scheduled */ |
| 259 | #define TCP_TIMERLIST_10MS_MODE 0x1 |
| 260 | #define TCP_TIMERLIST_100MS_MODE 0x2 |
| 261 | #define TCP_TIMERLIST_500MS_MODE 0x4 |
| 262 | uint32_t mode; /* Current mode of the timer */ |
| 263 | uint32_t pref_mode; /* Preferred mode set by a connection */ |
| 264 | uint32_t pref_offset; /* Preferred offset set by a connection */ |
| 265 | uint32_t idleruns; /* Number of times the list has been idle in fast mode */ |
| 266 | struct tcptimerentry *next_te; /* next timer entry pointer to process */ |
| 267 | u_int16_t probe_if_index; /* Interface index that needs to send probes */ |
| 268 | }; |
| 269 | |
| 270 | /* number of idle runs allowed for TCP timer list in fast or quick modes */ |
| 271 | #define TCP_FASTMODE_IDLERUN_MAX 10 |
| 272 | |
| 273 | /* |
| 274 | * Minimum retransmit timeout is set to 30ms. We add a slop of |
| 275 | * 200 ms to the retransmit value to account for processing |
| 276 | * variance and delayed ack. This extra 200ms will help to avoid |
| 277 | * spurious retransmits by taking into consideration the receivers |
| 278 | * that wait for delayed ack timer instead of generating an ack |
| 279 | * for every two packets. |
| 280 | * |
| 281 | * On a local link, the minimum retransmit timeout is 100ms and |
| 282 | * variance is set to 0. This will make the sender a little bit more |
| 283 | * aggressive on local link. When the connection is not established yet, |
| 284 | * there is no need to add an extra 200ms to retransmit timeout because |
| 285 | * the initial value is high (1s) and delayed ack is not a problem in |
| 286 | * that case. |
| 287 | */ |
| 288 | #define TCPTV_REXMTSLOP ( TCP_RETRANSHZ/5 ) /* extra 200 ms slop */ |
| 289 | |
| 290 | /* macro to decide when retransmit slop (described above) should be added */ |
| 291 | #define TCP_ADD_REXMTSLOP(tp) (tp->t_state >= TCPS_ESTABLISHED) |
| 292 | |
| 293 | #define TCPT_RANGESET(tv, value, tvmin, tvmax, addslop) do { \ |
| 294 | (tv) = ((addslop) ? tcp_rexmt_slop : 0) + (value); \ |
| 295 | if ((uint32_t)(tv) < (uint32_t)(tvmin)) \ |
| 296 | (tv) = (tvmin); \ |
| 297 | else if ((uint32_t)(tv) > (uint32_t)(tvmax)) \ |
| 298 | (tv) = (tvmax); \ |
| 299 | } while(0) |
| 300 | |
| 301 | #define TCP_CONN_KEEPIDLE(tp) \ |
| 302 | ((tp)->t_keepidle && \ |
| 303 | ((tp)->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) ? \ |
| 304 | (tp)->t_keepidle : (uint32_t)tcp_keepidle) |
| 305 | #define TCP_CONN_KEEPINIT(tp) \ |
| 306 | (((tp)->t_keepinit > 0) ? (tp)->t_keepinit : (uint32_t)tcp_keepinit) |
| 307 | #define TCP_CONN_KEEPCNT(tp) \ |
| 308 | (((tp)->t_keepcnt > 0) ? (tp)->t_keepcnt : (uint32_t)tcp_keepcnt) |
| 309 | #define TCP_CONN_KEEPINTVL(tp) \ |
| 310 | (((tp)->t_keepintvl > 0) ? (tp)->t_keepintvl : (uint32_t)tcp_keepintvl) |
| 311 | #define TCP_CONN_MAXIDLE(tp) \ |
| 312 | (TCP_CONN_KEEPCNT(tp) * TCP_CONN_KEEPINTVL(tp)) |
| 313 | |
| 314 | #define TCP_IDLETIMEOUT(tp) \ |
| 315 | (((TCP_ADD_REXMTSLOP(tp)) ? 0 : tcp_rexmt_slop) + tp->t_rxtcur) |
| 316 | |
| 317 | TAILQ_HEAD(tcptailq, tcpcb); |
| 318 | |
| 319 | extern int tcp_keepinit; /* time to establish connection */ |
| 320 | extern int tcp_keepidle; /* time before keepalive probes begin */ |
| 321 | extern int tcp_keepintvl; /* time between keepalive probes */ |
| 322 | extern int tcp_keepcnt; /* number of keepalives */ |
| 323 | extern int tcp_delack; /* delayed ack timer */ |
| 324 | extern int tcp_maxpersistidle; |
| 325 | extern int tcp_msl; |
| 326 | extern int tcp_ttl; /* time to live for TCP segs */ |
| 327 | extern int tcp_backoff[]; |
| 328 | extern int tcp_rexmt_slop; |
| 329 | extern u_int32_t tcp_max_persist_timeout; /* Maximum persistence for Zero Window Probes */ |
| 330 | |
| 331 | #define OFFSET_FROM_START(tp, off) ((tcp_now + (off)) - (tp)->tentry.timer_start) |
| 332 | |
| 333 | #endif /* BSD_KERNEL_PRIVATE */ |
| 334 | #endif /* !_NETINET_TCP_TIMER_H_ */ |
| 335 | |