| 1 | /* | 
| 2 |  * Copyright (c) 2013-2021 Apple Inc. All rights reserved. | 
| 3 |  * | 
| 4 |  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | 
| 5 |  * | 
| 6 |  * This file contains Original Code and/or Modifications of Original Code | 
| 7 |  * as defined in and that are subject to the Apple Public Source License | 
| 8 |  * Version 2.0 (the 'License'). You may not use this file except in | 
| 9 |  * compliance with the License. The rights granted to you under the License | 
| 10 |  * may not be used to create, or enable the creation or redistribution of, | 
| 11 |  * unlawful or unlicensed copies of an Apple operating system, or to | 
| 12 |  * circumvent, violate, or enable the circumvention or violation of, any | 
| 13 |  * terms of an Apple operating system software license agreement. | 
| 14 |  * | 
| 15 |  * Please obtain a copy of the License at | 
| 16 |  * http://www.opensource.apple.com/apsl/ and read it before using this file. | 
| 17 |  * | 
| 18 |  * The Original Code and all software distributed under the License are | 
| 19 |  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | 
| 20 |  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | 
| 21 |  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | 
| 22 |  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | 
| 23 |  * Please see the License for the specific language governing rights and | 
| 24 |  * limitations under the License. | 
| 25 |  * | 
| 26 |  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | 
| 27 |  */ | 
| 28 |  | 
| 29 | #include "tcp_includes.h" | 
| 30 |  | 
| 31 | #include <sys/param.h> | 
| 32 | #include <sys/kernel.h> | 
| 33 | #include <sys/syslog.h> | 
| 34 |  | 
| 35 | #include <netinet/in.h> | 
| 36 | #include <netinet/in_systm.h> | 
| 37 | #include <netinet/ip.h> | 
| 38 | #include <netinet/ip6.h> | 
| 39 | #include <netinet/ip_var.h> | 
| 40 |  | 
| 41 | static int tcp_cubic_init(struct tcpcb *tp); | 
| 42 | static int tcp_cubic_cleanup(struct tcpcb *tp); | 
| 43 | static void tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp); | 
| 44 | static void tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th); | 
| 45 | static void tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th); | 
| 46 | static void tcp_cubic_pre_fr(struct tcpcb *tp); | 
| 47 | static void tcp_cubic_post_fr(struct tcpcb *tp, struct tcphdr *th); | 
| 48 | static void tcp_cubic_after_timeout(struct tcpcb *tp); | 
| 49 | static int tcp_cubic_delay_ack(struct tcpcb *tp, struct tcphdr *th); | 
| 50 | static void tcp_cubic_switch_cc(struct tcpcb *tp); | 
| 51 | static uint32_t tcp_cubic_update(struct tcpcb *tp, uint32_t rtt); | 
| 52 | static inline void tcp_cubic_clear_state(struct tcpcb *tp); | 
| 53 |  | 
| 54 | extern float cbrtf(float x); | 
| 55 |  | 
| 56 | struct tcp_cc_algo tcp_cc_cubic = { | 
| 57 | 	.name = "cubic" , | 
| 58 | 	.init = tcp_cubic_init, | 
| 59 | 	.cleanup = tcp_cubic_cleanup, | 
| 60 | 	.cwnd_init = tcp_cubic_cwnd_init_or_reset, | 
| 61 | 	.congestion_avd = tcp_cubic_congestion_avd, | 
| 62 | 	.ack_rcvd = tcp_cubic_ack_rcvd, | 
| 63 | 	.pre_fr = tcp_cubic_pre_fr, | 
| 64 | 	.post_fr = tcp_cubic_post_fr, | 
| 65 | 	.after_idle = tcp_cubic_cwnd_init_or_reset, | 
| 66 | 	.after_timeout = tcp_cubic_after_timeout, | 
| 67 | 	.delay_ack = tcp_cubic_delay_ack, | 
| 68 | 	.switch_to = tcp_cubic_switch_cc | 
| 69 | }; | 
| 70 |  | 
| 71 | static float tcp_cubic_backoff = 0.2f; /* multiplicative decrease factor */ | 
| 72 | static float tcp_cubic_coeff = 0.4f; | 
| 73 | static float tcp_cubic_fast_convergence_factor = 0.875f; | 
| 74 |  | 
| 75 | static float tcp_cubic_beta = 0.8f; | 
| 76 |  | 
| 77 | static int | 
| 78 | tcp_cubic_init(struct tcpcb *tp) | 
| 79 | { | 
| 80 | 	os_atomic_inc(&tcp_cc_cubic.num_sockets, relaxed); | 
| 81 |  | 
| 82 | 	if (tcp_cubic_rfc_compliant) { | 
| 83 | 		tcp_cubic_backoff = 0.3f; /* multiplicative decrease factor */ | 
| 84 | 		tcp_cubic_fast_convergence_factor = 0.85f; | 
| 85 | 		tcp_cubic_beta = 0.7f; | 
| 86 | 	} else { | 
| 87 | 		tcp_cubic_backoff = 0.2f; /* multiplicative decrease factor */ | 
| 88 | 		tcp_cubic_fast_convergence_factor = 0.875f; | 
| 89 | 		tcp_cubic_beta = 0.8f; | 
| 90 | 	} | 
| 91 |  | 
| 92 | 	VERIFY(tp->t_ccstate != NULL); | 
| 93 | 	tcp_cubic_clear_state(tp); | 
| 94 | 	return 0; | 
| 95 | } | 
| 96 |  | 
| 97 | static int | 
| 98 | tcp_cubic_cleanup(struct tcpcb *tp) | 
| 99 | { | 
| 100 | #pragma unused(tp) | 
| 101 | 	os_atomic_dec(&tcp_cc_cubic.num_sockets, relaxed); | 
| 102 | 	return 0; | 
| 103 | } | 
| 104 |  | 
| 105 | /* | 
| 106 |  * Initialize the congestion window at the beginning of a connection or | 
| 107 |  * after idle time | 
| 108 |  */ | 
| 109 | static void | 
| 110 | tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp) | 
| 111 | { | 
| 112 | 	VERIFY(tp->t_ccstate != NULL); | 
| 113 |  | 
| 114 | 	tcp_cubic_clear_state(tp); | 
| 115 | 	tcp_cc_cwnd_init_or_reset(tp); | 
| 116 | 	tp->t_pipeack = 0; | 
| 117 | 	tcp_clear_pipeack_state(tp); | 
| 118 |  | 
| 119 | 	/* Start counting bytes for RFC 3465 again */ | 
| 120 | 	tp->t_bytes_acked = 0; | 
| 121 |  | 
| 122 | 	/* | 
| 123 | 	 * slow start threshold could get initialized to a lower value | 
| 124 | 	 * when there is a cached value in the route metrics. In this case, | 
| 125 | 	 * the connection can enter congestion avoidance without any packet | 
| 126 | 	 * loss and Cubic will enter steady-state too early. It is better | 
| 127 | 	 * to always probe to find the initial slow-start threshold. | 
| 128 | 	 */ | 
| 129 | 	if (tp->t_inpcb->inp_stat->txbytes <= tcp_initial_cwnd(tp) && | 
| 130 | 	    tp->snd_ssthresh < (TCP_MAXWIN << TCP_MAX_WINSHIFT)) { | 
| 131 | 		tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; | 
| 132 | 	} | 
| 133 |  | 
| 134 | 	/* Initialize cubic last max to be same as ssthresh */ | 
| 135 | 	tp->t_ccstate->cub_last_max = tp->snd_ssthresh; | 
| 136 | } | 
| 137 |  | 
| 138 | /* | 
| 139 |  * Compute the target congestion window for the next RTT according to | 
| 140 |  * cubic equation when an ack is received. | 
| 141 |  * | 
| 142 |  * W(t) = C(t-K)^3 + W(last_max) | 
| 143 |  */ | 
| 144 | static uint32_t | 
| 145 | tcp_cubic_update(struct tcpcb *tp, uint32_t rtt) | 
| 146 | { | 
| 147 | 	struct tcp_globals *globals = tcp_get_globals(tp); | 
| 148 | 	float K, var; | 
| 149 | 	uint32_t elapsed_time, win; | 
| 150 |  | 
| 151 | 	win = min(a: tp->snd_cwnd, b: tp->snd_wnd); | 
| 152 | 	if (tp->t_ccstate->cub_last_max == 0) { | 
| 153 | 		tp->t_ccstate->cub_last_max = tp->snd_ssthresh; | 
| 154 | 	} | 
| 155 |  | 
| 156 | 	if (tp->t_ccstate->cub_epoch_start == 0) { | 
| 157 | 		/* | 
| 158 | 		 * This is the beginning of a new epoch, initialize some of | 
| 159 | 		 * the variables that we need to use for computing the | 
| 160 | 		 * congestion window later. | 
| 161 | 		 */ | 
| 162 | 		tp->t_ccstate->cub_epoch_start = tcp_globals_now(globals); | 
| 163 | 		if (tp->t_ccstate->cub_epoch_start == 0) { | 
| 164 | 			tp->t_ccstate->cub_epoch_start = 1; | 
| 165 | 		} | 
| 166 | 		if (win < tp->t_ccstate->cub_last_max) { | 
| 167 | 			/* | 
| 168 | 			 * Compute cubic epoch period, this is the time | 
| 169 | 			 * period that the window will take to increase to | 
| 170 | 			 * last_max again after backoff due to loss. | 
| 171 | 			 */ | 
| 172 | 			if (tcp_cubic_minor_fixes) { | 
| 173 | 				K = ((float)tp->t_ccstate->cub_last_max - win) / tp->t_maxseg / tcp_cubic_coeff; | 
| 174 | 			} else { | 
| 175 | 				K = (tp->t_ccstate->cub_last_max - win) / tp->t_maxseg / tcp_cubic_coeff; | 
| 176 | 			} | 
| 177 | 			K = cbrtf(x: K); | 
| 178 | 			tp->t_ccstate->cub_epoch_period = K * TCP_RETRANSHZ; | 
| 179 | 			/* Origin point */ | 
| 180 | 			tp->t_ccstate->cub_origin_point = tp->t_ccstate->cub_last_max; | 
| 181 | 		} else { | 
| 182 | 			tp->t_ccstate->cub_epoch_period = 0; | 
| 183 | 			tp->t_ccstate->cub_origin_point = win; | 
| 184 | 		} | 
| 185 | 	} | 
| 186 |  | 
| 187 | 	VERIFY(tp->t_ccstate->cub_origin_point > 0); | 
| 188 | 	/* | 
| 189 | 	 * Compute the target window for the next RTT using smoothed RTT | 
| 190 | 	 * as an estimate for next RTT. | 
| 191 | 	 */ | 
| 192 | 	elapsed_time = timer_diff(t1: tcp_globals_now(globals), toff1: 0, t2: tp->t_ccstate->cub_epoch_start, toff2: 0); | 
| 193 |  | 
| 194 | 	if (tcp_cubic_use_minrtt) { | 
| 195 | 		elapsed_time += max(a: tcp_cubic_use_minrtt, b: rtt); | 
| 196 | 	} else { | 
| 197 | 		elapsed_time += rtt; | 
| 198 | 	} | 
| 199 | 	var = (elapsed_time  - tp->t_ccstate->cub_epoch_period) / TCP_RETRANSHZ; | 
| 200 | 	var = var * var * var * (tcp_cubic_coeff * tp->t_maxseg); | 
| 201 |  | 
| 202 | 	return (uint32_t)(tp->t_ccstate->cub_origin_point + var); | 
| 203 | } | 
| 204 |  | 
| 205 | /* | 
| 206 |  * Standard TCP utilizes bandwidth well in low RTT and low BDP connections | 
| 207 |  * even when there is some packet loss. Enabling TCP mode will help Cubic | 
| 208 |  * to achieve this kind of utilization. | 
| 209 |  * | 
| 210 |  * But if there is a bottleneck link in the path with a fixed size queue | 
| 211 |  * and fixed bandwidth, TCP Cubic will help to reduce packet loss at this | 
| 212 |  * link because of the steady-state behavior. Using average and mean | 
| 213 |  * absolute deviation of W(lastmax), we try to detect if the congestion | 
| 214 |  * window is close to the bottleneck bandwidth. In that case, disabling | 
| 215 |  * TCP mode will help to minimize packet loss at this link. | 
| 216 |  * | 
| 217 |  * Disable TCP mode if the W(lastmax) (the window where previous packet | 
| 218 |  * loss happened) is within a small range from the average last max | 
| 219 |  * calculated. | 
| 220 |  */ | 
| 221 | #define TCP_CUBIC_ENABLE_TCPMODE(_tp_) \ | 
| 222 | 	((!soissrcrealtime((_tp_)->t_inpcb->inp_socket) && \ | 
| 223 | 	(_tp_)->t_ccstate->cub_mean_dev > (tp->t_maxseg << 1)) ? 1 : 0) | 
| 224 |  | 
| 225 | /* | 
| 226 |  * Compute the window growth if standard TCP (AIMD) was used with | 
| 227 |  * a backoff of 0.5 and additive increase of 1 packet per RTT. | 
| 228 |  * | 
| 229 |  * TCP window at time t can be calculated using the following equation | 
| 230 |  * with tcp_beta_cubic | 
| 231 |  * | 
| 232 |  * W(t) <- Wmax * tcp_beta_cubic + 3 * ((1 - tcp_beta_cubic)/(1 + tcp_beta_cubic)) * t/RTT | 
| 233 |  * | 
| 234 |  */ | 
| 235 | static uint32_t | 
| 236 | tcp_cubic_tcpwin(struct tcpcb *tp, struct tcphdr *th) | 
| 237 | { | 
| 238 | 	if (tp->t_ccstate->cub_tcp_win == 0) { | 
| 239 | 		/* Start of the epoch, we set the tcp_win to whatever Cubic decided | 
| 240 | 		 * at the beginning of the epoch. | 
| 241 | 		 */ | 
| 242 | 		tp->t_ccstate->cub_tcp_win = min(a: tp->snd_cwnd, b: tp->snd_wnd); | 
| 243 | 		if (tcp_cubic_minor_fixes) { | 
| 244 | 			tp->t_ccstate->cub_tcp_bytes_acked = BYTES_ACKED(th, tp); | 
| 245 | 		} else { | 
| 246 | 			tp->t_ccstate->cub_tcp_bytes_acked = 0; | 
| 247 | 		} | 
| 248 | 	} else { | 
| 249 | 		tp->t_ccstate->cub_tcp_bytes_acked += BYTES_ACKED(th, tp); | 
| 250 |  | 
| 251 | 		if (tcp_cubic_minor_fixes) { | 
| 252 | 			/* | 
| 253 | 			 * Increase by ai_factor * MSS, once per RTT. Counting bytes_acked | 
| 254 | 			 * against the snd_cwnd represents exactly one RTT at full rate. | 
| 255 | 			 */ | 
| 256 | 			while (tp->t_ccstate->cub_tcp_bytes_acked >= tp->snd_cwnd) { | 
| 257 | 				/* Enough bytes have been ACK'd for TCP to do AIMD*/ | 
| 258 | 				tp->t_ccstate->cub_tcp_bytes_acked -= tp->snd_cwnd; | 
| 259 |  | 
| 260 | 				if (tp->snd_cwnd >= tp->t_ccstate->cub_last_max || !tcp_cubic_rfc_compliant) { | 
| 261 | 					tp->t_ccstate->cub_tcp_win += tp->t_maxseg; | 
| 262 | 				} else { | 
| 263 | 					/* Increase-rate from Section 4.2, RFC 8312 */ | 
| 264 | 					float ai_factor = (float)3 * (1 - tcp_cubic_beta) / (1 + tcp_cubic_beta); | 
| 265 |  | 
| 266 | 					tp->t_ccstate->cub_tcp_win += (uint32_t)(tp->t_maxseg * ai_factor); | 
| 267 | 				} | 
| 268 | 			} | 
| 269 | 		} else { | 
| 270 | 			if (tp->t_ccstate->cub_tcp_bytes_acked >= tp->t_ccstate->cub_tcp_win) { | 
| 271 | 				tp->t_ccstate->cub_tcp_bytes_acked -= tp->t_ccstate->cub_tcp_win; | 
| 272 | 				tp->t_ccstate->cub_tcp_win += tp->t_maxseg; | 
| 273 | 			} | 
| 274 | 		} | 
| 275 | 	} | 
| 276 | 	return tp->t_ccstate->cub_tcp_win; | 
| 277 | } | 
| 278 |  | 
| 279 | /* | 
| 280 |  * Handle an in-sequence ack during congestion avoidance phase. | 
| 281 |  */ | 
| 282 | static void | 
| 283 | tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th) | 
| 284 | { | 
| 285 | 	uint32_t cubic_target_win, tcp_win, rtt; | 
| 286 | 	uint64_t incr_win = UINT32_MAX; | 
| 287 |  | 
| 288 | 	/* Do not increase congestion window in non-validated phase */ | 
| 289 | 	if (tcp_cc_is_cwnd_nonvalidated(tp) != 0) { | 
| 290 | 		return; | 
| 291 | 	} | 
| 292 |  | 
| 293 | 	tp->t_bytes_acked += BYTES_ACKED(th, tp); | 
| 294 |  | 
| 295 | 	rtt = get_base_rtt(tp); | 
| 296 | 	/* | 
| 297 | 	 * First compute cubic window. If cubic variables are not | 
| 298 | 	 * initialized (after coming out of recovery), this call will | 
| 299 | 	 * initialize them. | 
| 300 | 	 */ | 
| 301 | 	cubic_target_win = tcp_cubic_update(tp, rtt); | 
| 302 |  | 
| 303 | 	/* Compute TCP window if a multiplicative decrease of 0.2 is used */ | 
| 304 | 	tcp_win = tcp_cubic_tcpwin(tp, th); | 
| 305 |  | 
| 306 | 	if (tp->snd_cwnd < tcp_win && tcp_cubic_minor_fixes == 0 && TCP_CUBIC_ENABLE_TCPMODE(tp)) { | 
| 307 | 		/* this connection is in TCP-friendly region */ | 
| 308 | 		if (tp->t_bytes_acked >= tp->snd_cwnd) { | 
| 309 | 			tp->t_bytes_acked -= tp->snd_cwnd; | 
| 310 | 			tp->snd_cwnd = min(a: tcp_win, TCP_MAXWIN << tp->snd_scale); | 
| 311 | 		} | 
| 312 | 	} else { | 
| 313 | 		if (cubic_target_win > tp->snd_cwnd) { | 
| 314 | 			/* | 
| 315 | 			 * The target win is computed for the next RTT. | 
| 316 | 			 * To reach this value, cwnd will have to be updated | 
| 317 | 			 * one segment at a time. Compute how many bytes | 
| 318 | 			 * need to be acknowledged before we can increase | 
| 319 | 			 * the cwnd by one segment. | 
| 320 | 			 */ | 
| 321 | 			incr_win = (uint64_t)tp->snd_cwnd * tp->t_maxseg; | 
| 322 | 			incr_win /= (cubic_target_win - tp->snd_cwnd); | 
| 323 | 			if (!tcp_cubic_minor_fixes) { | 
| 324 | 				if (incr_win > 0 && tp->t_bytes_acked >= incr_win) { | 
| 325 | 					tp->t_bytes_acked -= incr_win; | 
| 326 | 					tp->snd_cwnd = | 
| 327 | 					    min(a: (tp->snd_cwnd + tp->t_maxseg), | 
| 328 | 					    TCP_MAXWIN << tp->snd_scale); | 
| 329 | 				} | 
| 330 | 			} | 
| 331 | 		} | 
| 332 | 	} | 
| 333 |  | 
| 334 | 	if (tcp_cubic_minor_fixes) { | 
| 335 | 		tcp_win = tcp_round_to(val: tcp_win, round: tp->t_maxseg); | 
| 336 |  | 
| 337 | 		if (tp->snd_cwnd < tcp_win) { | 
| 338 | 			uint64_t tcp_incr_win; | 
| 339 |  | 
| 340 | 			tcp_incr_win = (uint64_t)tp->snd_cwnd * tp->t_maxseg; | 
| 341 | 			tcp_incr_win /= (tcp_win - tp->snd_cwnd); | 
| 342 |  | 
| 343 | 			if (tcp_incr_win < incr_win) { | 
| 344 | 				/* this connection is in TCP-friendly region */ | 
| 345 | 				incr_win = tcp_incr_win; | 
| 346 | 			} | 
| 347 | 		} | 
| 348 |  | 
| 349 | 		if (incr_win > 0 && tp->t_bytes_acked >= incr_win) { | 
| 350 | 			tp->t_bytes_acked -= incr_win; | 
| 351 | 			tp->snd_cwnd = min(a: tp->snd_cwnd + tp->t_maxseg, TCP_MAXWIN << tp->snd_scale); | 
| 352 | 		} | 
| 353 | 	} | 
| 354 | } | 
| 355 |  | 
| 356 | static void | 
| 357 | tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) | 
| 358 | { | 
| 359 | 	/* Do not increase the congestion window in non-validated phase */ | 
| 360 | 	if (tcp_cc_is_cwnd_nonvalidated(tp) != 0) { | 
| 361 | 		return; | 
| 362 | 	} | 
| 363 |  | 
| 364 | 	if (tp->snd_cwnd >= tp->snd_ssthresh) { | 
| 365 | 		/* Congestion avoidance phase */ | 
| 366 | 		tcp_cubic_congestion_avd(tp, th); | 
| 367 | 	} else { | 
| 368 | 		/* | 
| 369 | 		 * Use 2*SMSS as limit on increment as suggested | 
| 370 | 		 * by RFC 3465 section 2.3 | 
| 371 | 		 */ | 
| 372 | 		uint32_t acked, abc_lim, incr; | 
| 373 |  | 
| 374 | 		acked = BYTES_ACKED(th, tp); | 
| 375 | 		if (tcp_cubic_minor_fixes) { | 
| 376 | 			/* | 
| 377 | 			 * Maximum burst-size is limited to the initial congestion-window. | 
| 378 | 			 * We know that the network can survive this kind of burst. | 
| 379 | 			 */ | 
| 380 | 			abc_lim = tcp_initial_cwnd(tp); | 
| 381 | 		} else { | 
| 382 | 			abc_lim = (tp->snd_nxt == tp->snd_max) ? 2 * tp->t_maxseg : tp->t_maxseg; | 
| 383 | 		} | 
| 384 | 		incr = min(a: acked, b: abc_lim); | 
| 385 |  | 
| 386 | 		tp->snd_cwnd += incr; | 
| 387 | 		tp->snd_cwnd = min(a: tp->snd_cwnd, TCP_MAXWIN << tp->snd_scale); | 
| 388 | 	} | 
| 389 | } | 
| 390 |  | 
| 391 | static void | 
| 392 | tcp_cubic_pre_fr(struct tcpcb *tp) | 
| 393 | { | 
| 394 | 	uint32_t win, avg; | 
| 395 | 	int32_t dev; | 
| 396 | 	tp->t_ccstate->cub_epoch_start = 0; | 
| 397 | 	tp->t_ccstate->cub_tcp_win = 0; | 
| 398 | 	tp->t_ccstate->cub_tcp_bytes_acked = 0; | 
| 399 |  | 
| 400 | 	win = min(a: tp->snd_cwnd, b: tp->snd_wnd); | 
| 401 | 	if (tp->t_flagsext & TF_CWND_NONVALIDATED) { | 
| 402 | 		tp->t_lossflightsize = tp->snd_max - tp->snd_una; | 
| 403 | 		if (tcp_flow_control_response) { | 
| 404 | 			win = max(a: tp->t_pipeack, b: tp->t_lossflightsize); | 
| 405 | 		} else { | 
| 406 | 			win = (max(a: tp->t_pipeack, b: tp->t_lossflightsize)) >> 1; | 
| 407 | 		} | 
| 408 | 	} else { | 
| 409 | 		tp->t_lossflightsize = 0; | 
| 410 | 	} | 
| 411 | 	/* | 
| 412 | 	 * Note the congestion window at which packet loss occurred as | 
| 413 | 	 * cub_last_max. | 
| 414 | 	 * | 
| 415 | 	 * If the congestion window is less than the last max window when | 
| 416 | 	 * loss occurred, it indicates that capacity available in the | 
| 417 | 	 * network has gone down. This can happen if a new flow has started | 
| 418 | 	 * and it is capturing some of the bandwidth. To reach convergence | 
| 419 | 	 * quickly, backoff a little more. | 
| 420 | 	 */ | 
| 421 | 	if (win < tp->t_ccstate->cub_last_max && tcp_cubic_minor_fixes) { | 
| 422 | 		tp->t_ccstate->cub_last_max = (uint32_t)((float)win * tcp_cubic_fast_convergence_factor); | 
| 423 | 	} else { | 
| 424 | 		tp->t_ccstate->cub_last_max = win; | 
| 425 | 	} | 
| 426 |  | 
| 427 | 	if (tp->t_ccstate->cub_last_max == 0) { | 
| 428 | 		/* | 
| 429 | 		 * If last_max is zero because snd_wnd is zero or for | 
| 430 | 		 * any other reason, initialize it to the amount of data | 
| 431 | 		 * in flight | 
| 432 | 		 */ | 
| 433 | 		tp->t_ccstate->cub_last_max = tp->snd_max - tp->snd_una; | 
| 434 | 	} | 
| 435 |  | 
| 436 | 	/* | 
| 437 | 	 * Compute average and mean absolute deviation of the | 
| 438 | 	 * window at which packet loss occurred. | 
| 439 | 	 */ | 
| 440 | 	if (tp->t_ccstate->cub_avg_lastmax == 0) { | 
| 441 | 		tp->t_ccstate->cub_avg_lastmax = tp->t_ccstate->cub_last_max; | 
| 442 | 	} else { | 
| 443 | 		/* | 
| 444 | 		 * Average is computed by taking 63 parts of | 
| 445 | 		 * history and one part of the most recent value | 
| 446 | 		 */ | 
| 447 | 		avg = tp->t_ccstate->cub_avg_lastmax; | 
| 448 | 		avg = (avg << 6) - avg; | 
| 449 | 		tp->t_ccstate->cub_avg_lastmax = | 
| 450 | 		    (avg + tp->t_ccstate->cub_last_max) >> 6; | 
| 451 | 	} | 
| 452 |  | 
| 453 | 	/* caluclate deviation from average */ | 
| 454 | 	dev = tp->t_ccstate->cub_avg_lastmax - tp->t_ccstate->cub_last_max; | 
| 455 |  | 
| 456 | 	/* Take the absolute value */ | 
| 457 | 	if (dev < 0) { | 
| 458 | 		dev = -dev; | 
| 459 | 	} | 
| 460 |  | 
| 461 | 	if (tp->t_ccstate->cub_mean_dev == 0) { | 
| 462 | 		tp->t_ccstate->cub_mean_dev = dev; | 
| 463 | 	} else { | 
| 464 | 		dev = dev + ((tp->t_ccstate->cub_mean_dev << 4) | 
| 465 | 		    - tp->t_ccstate->cub_mean_dev); | 
| 466 | 		tp->t_ccstate->cub_mean_dev = dev >> 4; | 
| 467 | 	} | 
| 468 |  | 
| 469 | 	/* Backoff congestion window by tcp_cubic_backoff factor */ | 
| 470 | 	win = (uint32_t)(win - (win * tcp_cubic_backoff)); | 
| 471 | 	win = tcp_round_to(val: win, round: tp->t_maxseg); | 
| 472 | 	if (win < 2 * tp->t_maxseg) { | 
| 473 | 		win =  2 * tp->t_maxseg; | 
| 474 | 	} | 
| 475 | 	tp->snd_ssthresh = win; | 
| 476 | 	tcp_cc_resize_sndbuf(tp); | 
| 477 | } | 
| 478 |  | 
| 479 | static void | 
| 480 | tcp_cubic_post_fr(struct tcpcb *tp, struct tcphdr *th) | 
| 481 | { | 
| 482 | 	uint32_t flight_size = 0; | 
| 483 | 	uint32_t ack; | 
| 484 |  | 
| 485 | 	if (th != NULL) { | 
| 486 | 		ack = th->th_ack; | 
| 487 | 	} else { | 
| 488 | 		ack = tp->snd_una; | 
| 489 | 	} | 
| 490 |  | 
| 491 | 	if (SEQ_LEQ(ack, tp->snd_max) && (!tcp_cubic_minor_fixes || tcp_flow_control_response)) { | 
| 492 | 		flight_size = tp->snd_max - ack; | 
| 493 | 	} else if (tcp_cubic_minor_fixes) { | 
| 494 | 		/* | 
| 495 | 		 * Cubic Minor Fixes: snd_max - th_ack is a very very bad estimate | 
| 496 | 		 * of the flight size. Either the app is sending at full speed and | 
| 497 | 		 * flight_size *is* snd_sshtresh, or the app is not sending at full | 
| 498 | 		 * speed and congestion-window validation would have kicked in earlier. | 
| 499 | 		 * | 
| 500 | 		 * Except that for the latter, snd_ssthresh is way too high. | 
| 501 | 		 * When we exit recovery we will burst a lot of data out... | 
| 502 | 		 * | 
| 503 | 		 * So, tcp_flow_control_response brings us back to the old behavior. | 
| 504 | 		 * Too many feature-flags... | 
| 505 | 		 */ | 
| 506 | 		flight_size = tp->snd_ssthresh; | 
| 507 | 	} | 
| 508 |  | 
| 509 | 	/* | 
| 510 | 	 * Cubic Minor Fixes: t_lossflightsize is always 0, because of | 
| 511 | 	 * EXIT_FASTRECOVERY. This here is basically dead code... | 
| 512 | 	 */ | 
| 513 | 	if (SACK_ENABLED(tp) && tp->t_lossflightsize > 0 && !tcp_cubic_minor_fixes) { | 
| 514 | 		uint32_t total_rxt_size = 0, ncwnd; | 
| 515 | 		/* | 
| 516 | 		 * When SACK is enabled, the number of retransmitted bytes | 
| 517 | 		 * can be counted more accurately. | 
| 518 | 		 */ | 
| 519 | 		total_rxt_size = tcp_rxtseg_total_size(tp); | 
| 520 | 		ncwnd = max(a: tp->t_pipeack, b: tp->t_lossflightsize); | 
| 521 | 		if (total_rxt_size <= ncwnd) { | 
| 522 | 			ncwnd = ncwnd - total_rxt_size; | 
| 523 | 		} | 
| 524 |  | 
| 525 | 		/* | 
| 526 | 		 * To avoid sending a large burst at the end of recovery | 
| 527 | 		 * set a max limit on ncwnd | 
| 528 | 		 */ | 
| 529 | 		ncwnd = min(a: ncwnd, b: (tp->t_maxseg << 6)); | 
| 530 | 		ncwnd = ncwnd >> 1; | 
| 531 | 		flight_size = max(a: ncwnd, b: flight_size); | 
| 532 | 	} | 
| 533 | 	/* | 
| 534 | 	 * Complete ack. The current window was inflated for fast recovery. | 
| 535 | 	 * It has to be deflated post recovery. | 
| 536 | 	 * | 
| 537 | 	 * Window inflation should have left us with approx snd_ssthresh | 
| 538 | 	 * outstanding data. If the flight size is zero or one segment, | 
| 539 | 	 * make congestion window to be at least as big as 2 segments to | 
| 540 | 	 * avoid delayed acknowledgements. This is according to RFC 6582. | 
| 541 | 	 */ | 
| 542 | 	if (flight_size < tp->snd_ssthresh) { | 
| 543 | 		tp->snd_cwnd = max(a: flight_size, b: tp->t_maxseg) + tp->t_maxseg; | 
| 544 | 	} else { | 
| 545 | 		tp->snd_cwnd = tp->snd_ssthresh; | 
| 546 | 	} | 
| 547 |  | 
| 548 | 	tp->t_ccstate->cub_tcp_win = 0; | 
| 549 | 	tp->t_ccstate->cub_tcp_bytes_acked = 0; | 
| 550 | } | 
| 551 |  | 
| 552 | static void | 
| 553 | tcp_cubic_after_timeout(struct tcpcb *tp) | 
| 554 | { | 
| 555 | 	VERIFY(tp->t_ccstate != NULL); | 
| 556 |  | 
| 557 | 	/* | 
| 558 | 	 * Avoid adjusting congestion window due to SYN retransmissions. | 
| 559 | 	 * If more than one byte (SYN) is outstanding then it is still | 
| 560 | 	 * needed to adjust the window. | 
| 561 | 	 */ | 
| 562 | 	if (tp->t_state < TCPS_ESTABLISHED && | 
| 563 | 	    ((int)(tp->snd_max - tp->snd_una) <= 1)) { | 
| 564 | 		return; | 
| 565 | 	} | 
| 566 |  | 
| 567 | 	if (!IN_FASTRECOVERY(tp)) { | 
| 568 | 		tcp_cubic_clear_state(tp); | 
| 569 | 		tcp_cubic_pre_fr(tp); | 
| 570 | 	} | 
| 571 |  | 
| 572 | 	/* | 
| 573 | 	 * Close the congestion window down to one segment as a retransmit | 
| 574 | 	 * timeout might indicate severe congestion. | 
| 575 | 	 */ | 
| 576 | 	tp->snd_cwnd = tp->t_maxseg; | 
| 577 | } | 
| 578 |  | 
| 579 | static int | 
| 580 | tcp_cubic_delay_ack(struct tcpcb *tp, struct tcphdr *th) | 
| 581 | { | 
| 582 | 	return tcp_cc_delay_ack(tp, th); | 
| 583 | } | 
| 584 |  | 
| 585 | /* | 
| 586 |  * When switching from a different CC it is better for Cubic to start | 
| 587 |  * fresh. The state required for Cubic calculation might be stale and it | 
| 588 |  * might not represent the current state of the network. If it starts as | 
| 589 |  * a new connection it will probe and learn the existing network conditions. | 
| 590 |  */ | 
| 591 | static void | 
| 592 | tcp_cubic_switch_cc(struct tcpcb *tp) | 
| 593 | { | 
| 594 | 	tcp_cubic_cwnd_init_or_reset(tp); | 
| 595 |  | 
| 596 | 	os_atomic_inc(&tcp_cc_cubic.num_sockets, relaxed); | 
| 597 | } | 
| 598 |  | 
| 599 | static inline void | 
| 600 | tcp_cubic_clear_state(struct tcpcb *tp) | 
| 601 | { | 
| 602 | 	tp->t_ccstate->cub_last_max = 0; | 
| 603 | 	tp->t_ccstate->cub_epoch_start = 0; | 
| 604 | 	tp->t_ccstate->cub_origin_point = 0; | 
| 605 | 	tp->t_ccstate->cub_tcp_win = 0; | 
| 606 | 	tp->t_ccstate->cub_tcp_bytes_acked = 0; | 
| 607 | 	tp->t_ccstate->cub_epoch_period = 0; | 
| 608 | } | 
| 609 |  |