1 | /* |
2 | * Copyright (c) 2013-2021 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include "tcp_includes.h" |
30 | |
31 | #include <sys/param.h> |
32 | #include <sys/kernel.h> |
33 | #include <sys/syslog.h> |
34 | |
35 | #include <netinet/in.h> |
36 | #include <netinet/in_systm.h> |
37 | #include <netinet/ip.h> |
38 | #include <netinet/ip6.h> |
39 | #include <netinet/ip_var.h> |
40 | |
41 | static int tcp_cubic_init(struct tcpcb *tp); |
42 | static int tcp_cubic_cleanup(struct tcpcb *tp); |
43 | static void tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp); |
44 | static void tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th); |
45 | static void tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th); |
46 | static void tcp_cubic_pre_fr(struct tcpcb *tp); |
47 | static void tcp_cubic_post_fr(struct tcpcb *tp, struct tcphdr *th); |
48 | static void tcp_cubic_after_timeout(struct tcpcb *tp); |
49 | static int tcp_cubic_delay_ack(struct tcpcb *tp, struct tcphdr *th); |
50 | static void tcp_cubic_switch_cc(struct tcpcb *tp); |
51 | static uint32_t tcp_cubic_update(struct tcpcb *tp, uint32_t rtt); |
52 | static inline void tcp_cubic_clear_state(struct tcpcb *tp); |
53 | |
54 | extern float cbrtf(float x); |
55 | |
56 | struct tcp_cc_algo tcp_cc_cubic = { |
57 | .name = "cubic" , |
58 | .init = tcp_cubic_init, |
59 | .cleanup = tcp_cubic_cleanup, |
60 | .cwnd_init = tcp_cubic_cwnd_init_or_reset, |
61 | .congestion_avd = tcp_cubic_congestion_avd, |
62 | .ack_rcvd = tcp_cubic_ack_rcvd, |
63 | .pre_fr = tcp_cubic_pre_fr, |
64 | .post_fr = tcp_cubic_post_fr, |
65 | .after_idle = tcp_cubic_cwnd_init_or_reset, |
66 | .after_timeout = tcp_cubic_after_timeout, |
67 | .delay_ack = tcp_cubic_delay_ack, |
68 | .switch_to = tcp_cubic_switch_cc |
69 | }; |
70 | |
71 | static float tcp_cubic_backoff = 0.2f; /* multiplicative decrease factor */ |
72 | static float tcp_cubic_coeff = 0.4f; |
73 | static float tcp_cubic_fast_convergence_factor = 0.875f; |
74 | |
75 | static float tcp_cubic_beta = 0.8f; |
76 | |
77 | static int |
78 | tcp_cubic_init(struct tcpcb *tp) |
79 | { |
80 | os_atomic_inc(&tcp_cc_cubic.num_sockets, relaxed); |
81 | |
82 | if (tcp_cubic_rfc_compliant) { |
83 | tcp_cubic_backoff = 0.3f; /* multiplicative decrease factor */ |
84 | tcp_cubic_fast_convergence_factor = 0.85f; |
85 | tcp_cubic_beta = 0.7f; |
86 | } else { |
87 | tcp_cubic_backoff = 0.2f; /* multiplicative decrease factor */ |
88 | tcp_cubic_fast_convergence_factor = 0.875f; |
89 | tcp_cubic_beta = 0.8f; |
90 | } |
91 | |
92 | VERIFY(tp->t_ccstate != NULL); |
93 | tcp_cubic_clear_state(tp); |
94 | return 0; |
95 | } |
96 | |
97 | static int |
98 | tcp_cubic_cleanup(struct tcpcb *tp) |
99 | { |
100 | #pragma unused(tp) |
101 | os_atomic_dec(&tcp_cc_cubic.num_sockets, relaxed); |
102 | return 0; |
103 | } |
104 | |
105 | /* |
106 | * Initialize the congestion window at the beginning of a connection or |
107 | * after idle time |
108 | */ |
109 | static void |
110 | tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp) |
111 | { |
112 | VERIFY(tp->t_ccstate != NULL); |
113 | |
114 | tcp_cubic_clear_state(tp); |
115 | tcp_cc_cwnd_init_or_reset(tp); |
116 | tp->t_pipeack = 0; |
117 | tcp_clear_pipeack_state(tp); |
118 | |
119 | /* Start counting bytes for RFC 3465 again */ |
120 | tp->t_bytes_acked = 0; |
121 | |
122 | /* |
123 | * slow start threshold could get initialized to a lower value |
124 | * when there is a cached value in the route metrics. In this case, |
125 | * the connection can enter congestion avoidance without any packet |
126 | * loss and Cubic will enter steady-state too early. It is better |
127 | * to always probe to find the initial slow-start threshold. |
128 | */ |
129 | if (tp->t_inpcb->inp_stat->txbytes <= tcp_initial_cwnd(tp) && |
130 | tp->snd_ssthresh < (TCP_MAXWIN << TCP_MAX_WINSHIFT)) { |
131 | tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; |
132 | } |
133 | |
134 | /* Initialize cubic last max to be same as ssthresh */ |
135 | tp->t_ccstate->cub_last_max = tp->snd_ssthresh; |
136 | } |
137 | |
138 | /* |
139 | * Compute the target congestion window for the next RTT according to |
140 | * cubic equation when an ack is received. |
141 | * |
142 | * W(t) = C(t-K)^3 + W(last_max) |
143 | */ |
144 | static uint32_t |
145 | tcp_cubic_update(struct tcpcb *tp, uint32_t rtt) |
146 | { |
147 | struct tcp_globals *globals = tcp_get_globals(tp); |
148 | float K, var; |
149 | uint32_t elapsed_time, win; |
150 | |
151 | win = min(a: tp->snd_cwnd, b: tp->snd_wnd); |
152 | if (tp->t_ccstate->cub_last_max == 0) { |
153 | tp->t_ccstate->cub_last_max = tp->snd_ssthresh; |
154 | } |
155 | |
156 | if (tp->t_ccstate->cub_epoch_start == 0) { |
157 | /* |
158 | * This is the beginning of a new epoch, initialize some of |
159 | * the variables that we need to use for computing the |
160 | * congestion window later. |
161 | */ |
162 | tp->t_ccstate->cub_epoch_start = tcp_globals_now(globals); |
163 | if (tp->t_ccstate->cub_epoch_start == 0) { |
164 | tp->t_ccstate->cub_epoch_start = 1; |
165 | } |
166 | if (win < tp->t_ccstate->cub_last_max) { |
167 | /* |
168 | * Compute cubic epoch period, this is the time |
169 | * period that the window will take to increase to |
170 | * last_max again after backoff due to loss. |
171 | */ |
172 | if (tcp_cubic_minor_fixes) { |
173 | K = ((float)tp->t_ccstate->cub_last_max - win) / tp->t_maxseg / tcp_cubic_coeff; |
174 | } else { |
175 | K = (tp->t_ccstate->cub_last_max - win) / tp->t_maxseg / tcp_cubic_coeff; |
176 | } |
177 | K = cbrtf(x: K); |
178 | tp->t_ccstate->cub_epoch_period = K * TCP_RETRANSHZ; |
179 | /* Origin point */ |
180 | tp->t_ccstate->cub_origin_point = tp->t_ccstate->cub_last_max; |
181 | } else { |
182 | tp->t_ccstate->cub_epoch_period = 0; |
183 | tp->t_ccstate->cub_origin_point = win; |
184 | } |
185 | } |
186 | |
187 | VERIFY(tp->t_ccstate->cub_origin_point > 0); |
188 | /* |
189 | * Compute the target window for the next RTT using smoothed RTT |
190 | * as an estimate for next RTT. |
191 | */ |
192 | elapsed_time = timer_diff(t1: tcp_globals_now(globals), toff1: 0, t2: tp->t_ccstate->cub_epoch_start, toff2: 0); |
193 | |
194 | if (tcp_cubic_use_minrtt) { |
195 | elapsed_time += max(a: tcp_cubic_use_minrtt, b: rtt); |
196 | } else { |
197 | elapsed_time += rtt; |
198 | } |
199 | var = (elapsed_time - tp->t_ccstate->cub_epoch_period) / TCP_RETRANSHZ; |
200 | var = var * var * var * (tcp_cubic_coeff * tp->t_maxseg); |
201 | |
202 | return (uint32_t)(tp->t_ccstate->cub_origin_point + var); |
203 | } |
204 | |
205 | /* |
206 | * Standard TCP utilizes bandwidth well in low RTT and low BDP connections |
207 | * even when there is some packet loss. Enabling TCP mode will help Cubic |
208 | * to achieve this kind of utilization. |
209 | * |
210 | * But if there is a bottleneck link in the path with a fixed size queue |
211 | * and fixed bandwidth, TCP Cubic will help to reduce packet loss at this |
212 | * link because of the steady-state behavior. Using average and mean |
213 | * absolute deviation of W(lastmax), we try to detect if the congestion |
214 | * window is close to the bottleneck bandwidth. In that case, disabling |
215 | * TCP mode will help to minimize packet loss at this link. |
216 | * |
217 | * Disable TCP mode if the W(lastmax) (the window where previous packet |
218 | * loss happened) is within a small range from the average last max |
219 | * calculated. |
220 | */ |
221 | #define TCP_CUBIC_ENABLE_TCPMODE(_tp_) \ |
222 | ((!soissrcrealtime((_tp_)->t_inpcb->inp_socket) && \ |
223 | (_tp_)->t_ccstate->cub_mean_dev > (tp->t_maxseg << 1)) ? 1 : 0) |
224 | |
225 | /* |
226 | * Compute the window growth if standard TCP (AIMD) was used with |
227 | * a backoff of 0.5 and additive increase of 1 packet per RTT. |
228 | * |
229 | * TCP window at time t can be calculated using the following equation |
230 | * with tcp_beta_cubic |
231 | * |
232 | * W(t) <- Wmax * tcp_beta_cubic + 3 * ((1 - tcp_beta_cubic)/(1 + tcp_beta_cubic)) * t/RTT |
233 | * |
234 | */ |
235 | static uint32_t |
236 | tcp_cubic_tcpwin(struct tcpcb *tp, struct tcphdr *th) |
237 | { |
238 | if (tp->t_ccstate->cub_tcp_win == 0) { |
239 | /* Start of the epoch, we set the tcp_win to whatever Cubic decided |
240 | * at the beginning of the epoch. |
241 | */ |
242 | tp->t_ccstate->cub_tcp_win = min(a: tp->snd_cwnd, b: tp->snd_wnd); |
243 | if (tcp_cubic_minor_fixes) { |
244 | tp->t_ccstate->cub_tcp_bytes_acked = BYTES_ACKED(th, tp); |
245 | } else { |
246 | tp->t_ccstate->cub_tcp_bytes_acked = 0; |
247 | } |
248 | } else { |
249 | tp->t_ccstate->cub_tcp_bytes_acked += BYTES_ACKED(th, tp); |
250 | |
251 | if (tcp_cubic_minor_fixes) { |
252 | /* |
253 | * Increase by ai_factor * MSS, once per RTT. Counting bytes_acked |
254 | * against the snd_cwnd represents exactly one RTT at full rate. |
255 | */ |
256 | while (tp->t_ccstate->cub_tcp_bytes_acked >= tp->snd_cwnd) { |
257 | /* Enough bytes have been ACK'd for TCP to do AIMD*/ |
258 | tp->t_ccstate->cub_tcp_bytes_acked -= tp->snd_cwnd; |
259 | |
260 | if (tp->snd_cwnd >= tp->t_ccstate->cub_last_max || !tcp_cubic_rfc_compliant) { |
261 | tp->t_ccstate->cub_tcp_win += tp->t_maxseg; |
262 | } else { |
263 | /* Increase-rate from Section 4.2, RFC 8312 */ |
264 | float ai_factor = (float)3 * (1 - tcp_cubic_beta) / (1 + tcp_cubic_beta); |
265 | |
266 | tp->t_ccstate->cub_tcp_win += (uint32_t)(tp->t_maxseg * ai_factor); |
267 | } |
268 | } |
269 | } else { |
270 | if (tp->t_ccstate->cub_tcp_bytes_acked >= tp->t_ccstate->cub_tcp_win) { |
271 | tp->t_ccstate->cub_tcp_bytes_acked -= tp->t_ccstate->cub_tcp_win; |
272 | tp->t_ccstate->cub_tcp_win += tp->t_maxseg; |
273 | } |
274 | } |
275 | } |
276 | return tp->t_ccstate->cub_tcp_win; |
277 | } |
278 | |
279 | /* |
280 | * Handle an in-sequence ack during congestion avoidance phase. |
281 | */ |
282 | static void |
283 | tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th) |
284 | { |
285 | uint32_t cubic_target_win, tcp_win, rtt; |
286 | uint64_t incr_win = UINT32_MAX; |
287 | |
288 | /* Do not increase congestion window in non-validated phase */ |
289 | if (tcp_cc_is_cwnd_nonvalidated(tp) != 0) { |
290 | return; |
291 | } |
292 | |
293 | tp->t_bytes_acked += BYTES_ACKED(th, tp); |
294 | |
295 | rtt = get_base_rtt(tp); |
296 | /* |
297 | * First compute cubic window. If cubic variables are not |
298 | * initialized (after coming out of recovery), this call will |
299 | * initialize them. |
300 | */ |
301 | cubic_target_win = tcp_cubic_update(tp, rtt); |
302 | |
303 | /* Compute TCP window if a multiplicative decrease of 0.2 is used */ |
304 | tcp_win = tcp_cubic_tcpwin(tp, th); |
305 | |
306 | if (tp->snd_cwnd < tcp_win && tcp_cubic_minor_fixes == 0 && TCP_CUBIC_ENABLE_TCPMODE(tp)) { |
307 | /* this connection is in TCP-friendly region */ |
308 | if (tp->t_bytes_acked >= tp->snd_cwnd) { |
309 | tp->t_bytes_acked -= tp->snd_cwnd; |
310 | tp->snd_cwnd = min(a: tcp_win, TCP_MAXWIN << tp->snd_scale); |
311 | } |
312 | } else { |
313 | if (cubic_target_win > tp->snd_cwnd) { |
314 | /* |
315 | * The target win is computed for the next RTT. |
316 | * To reach this value, cwnd will have to be updated |
317 | * one segment at a time. Compute how many bytes |
318 | * need to be acknowledged before we can increase |
319 | * the cwnd by one segment. |
320 | */ |
321 | incr_win = (uint64_t)tp->snd_cwnd * tp->t_maxseg; |
322 | incr_win /= (cubic_target_win - tp->snd_cwnd); |
323 | if (!tcp_cubic_minor_fixes) { |
324 | if (incr_win > 0 && tp->t_bytes_acked >= incr_win) { |
325 | tp->t_bytes_acked -= incr_win; |
326 | tp->snd_cwnd = |
327 | min(a: (tp->snd_cwnd + tp->t_maxseg), |
328 | TCP_MAXWIN << tp->snd_scale); |
329 | } |
330 | } |
331 | } |
332 | } |
333 | |
334 | if (tcp_cubic_minor_fixes) { |
335 | tcp_win = tcp_round_to(val: tcp_win, round: tp->t_maxseg); |
336 | |
337 | if (tp->snd_cwnd < tcp_win) { |
338 | uint64_t tcp_incr_win; |
339 | |
340 | tcp_incr_win = (uint64_t)tp->snd_cwnd * tp->t_maxseg; |
341 | tcp_incr_win /= (tcp_win - tp->snd_cwnd); |
342 | |
343 | if (tcp_incr_win < incr_win) { |
344 | /* this connection is in TCP-friendly region */ |
345 | incr_win = tcp_incr_win; |
346 | } |
347 | } |
348 | |
349 | if (incr_win > 0 && tp->t_bytes_acked >= incr_win) { |
350 | tp->t_bytes_acked -= incr_win; |
351 | tp->snd_cwnd = min(a: tp->snd_cwnd + tp->t_maxseg, TCP_MAXWIN << tp->snd_scale); |
352 | } |
353 | } |
354 | } |
355 | |
356 | static void |
357 | tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) |
358 | { |
359 | /* Do not increase the congestion window in non-validated phase */ |
360 | if (tcp_cc_is_cwnd_nonvalidated(tp) != 0) { |
361 | return; |
362 | } |
363 | |
364 | if (tp->snd_cwnd >= tp->snd_ssthresh) { |
365 | /* Congestion avoidance phase */ |
366 | tcp_cubic_congestion_avd(tp, th); |
367 | } else { |
368 | /* |
369 | * Use 2*SMSS as limit on increment as suggested |
370 | * by RFC 3465 section 2.3 |
371 | */ |
372 | uint32_t acked, abc_lim, incr; |
373 | |
374 | acked = BYTES_ACKED(th, tp); |
375 | if (tcp_cubic_minor_fixes) { |
376 | /* |
377 | * Maximum burst-size is limited to the initial congestion-window. |
378 | * We know that the network can survive this kind of burst. |
379 | */ |
380 | abc_lim = tcp_initial_cwnd(tp); |
381 | } else { |
382 | abc_lim = (tp->snd_nxt == tp->snd_max) ? 2 * tp->t_maxseg : tp->t_maxseg; |
383 | } |
384 | incr = min(a: acked, b: abc_lim); |
385 | |
386 | tp->snd_cwnd += incr; |
387 | tp->snd_cwnd = min(a: tp->snd_cwnd, TCP_MAXWIN << tp->snd_scale); |
388 | } |
389 | } |
390 | |
391 | static void |
392 | tcp_cubic_pre_fr(struct tcpcb *tp) |
393 | { |
394 | uint32_t win, avg; |
395 | int32_t dev; |
396 | tp->t_ccstate->cub_epoch_start = 0; |
397 | tp->t_ccstate->cub_tcp_win = 0; |
398 | tp->t_ccstate->cub_tcp_bytes_acked = 0; |
399 | |
400 | win = min(a: tp->snd_cwnd, b: tp->snd_wnd); |
401 | if (tp->t_flagsext & TF_CWND_NONVALIDATED) { |
402 | tp->t_lossflightsize = tp->snd_max - tp->snd_una; |
403 | if (tcp_flow_control_response) { |
404 | win = max(a: tp->t_pipeack, b: tp->t_lossflightsize); |
405 | } else { |
406 | win = (max(a: tp->t_pipeack, b: tp->t_lossflightsize)) >> 1; |
407 | } |
408 | } else { |
409 | tp->t_lossflightsize = 0; |
410 | } |
411 | /* |
412 | * Note the congestion window at which packet loss occurred as |
413 | * cub_last_max. |
414 | * |
415 | * If the congestion window is less than the last max window when |
416 | * loss occurred, it indicates that capacity available in the |
417 | * network has gone down. This can happen if a new flow has started |
418 | * and it is capturing some of the bandwidth. To reach convergence |
419 | * quickly, backoff a little more. |
420 | */ |
421 | if (win < tp->t_ccstate->cub_last_max && tcp_cubic_minor_fixes) { |
422 | tp->t_ccstate->cub_last_max = (uint32_t)((float)win * tcp_cubic_fast_convergence_factor); |
423 | } else { |
424 | tp->t_ccstate->cub_last_max = win; |
425 | } |
426 | |
427 | if (tp->t_ccstate->cub_last_max == 0) { |
428 | /* |
429 | * If last_max is zero because snd_wnd is zero or for |
430 | * any other reason, initialize it to the amount of data |
431 | * in flight |
432 | */ |
433 | tp->t_ccstate->cub_last_max = tp->snd_max - tp->snd_una; |
434 | } |
435 | |
436 | /* |
437 | * Compute average and mean absolute deviation of the |
438 | * window at which packet loss occurred. |
439 | */ |
440 | if (tp->t_ccstate->cub_avg_lastmax == 0) { |
441 | tp->t_ccstate->cub_avg_lastmax = tp->t_ccstate->cub_last_max; |
442 | } else { |
443 | /* |
444 | * Average is computed by taking 63 parts of |
445 | * history and one part of the most recent value |
446 | */ |
447 | avg = tp->t_ccstate->cub_avg_lastmax; |
448 | avg = (avg << 6) - avg; |
449 | tp->t_ccstate->cub_avg_lastmax = |
450 | (avg + tp->t_ccstate->cub_last_max) >> 6; |
451 | } |
452 | |
453 | /* caluclate deviation from average */ |
454 | dev = tp->t_ccstate->cub_avg_lastmax - tp->t_ccstate->cub_last_max; |
455 | |
456 | /* Take the absolute value */ |
457 | if (dev < 0) { |
458 | dev = -dev; |
459 | } |
460 | |
461 | if (tp->t_ccstate->cub_mean_dev == 0) { |
462 | tp->t_ccstate->cub_mean_dev = dev; |
463 | } else { |
464 | dev = dev + ((tp->t_ccstate->cub_mean_dev << 4) |
465 | - tp->t_ccstate->cub_mean_dev); |
466 | tp->t_ccstate->cub_mean_dev = dev >> 4; |
467 | } |
468 | |
469 | /* Backoff congestion window by tcp_cubic_backoff factor */ |
470 | win = (uint32_t)(win - (win * tcp_cubic_backoff)); |
471 | win = tcp_round_to(val: win, round: tp->t_maxseg); |
472 | if (win < 2 * tp->t_maxseg) { |
473 | win = 2 * tp->t_maxseg; |
474 | } |
475 | tp->snd_ssthresh = win; |
476 | tcp_cc_resize_sndbuf(tp); |
477 | } |
478 | |
479 | static void |
480 | tcp_cubic_post_fr(struct tcpcb *tp, struct tcphdr *th) |
481 | { |
482 | uint32_t flight_size = 0; |
483 | uint32_t ack; |
484 | |
485 | if (th != NULL) { |
486 | ack = th->th_ack; |
487 | } else { |
488 | ack = tp->snd_una; |
489 | } |
490 | |
491 | if (SEQ_LEQ(ack, tp->snd_max) && (!tcp_cubic_minor_fixes || tcp_flow_control_response)) { |
492 | flight_size = tp->snd_max - ack; |
493 | } else if (tcp_cubic_minor_fixes) { |
494 | /* |
495 | * Cubic Minor Fixes: snd_max - th_ack is a very very bad estimate |
496 | * of the flight size. Either the app is sending at full speed and |
497 | * flight_size *is* snd_sshtresh, or the app is not sending at full |
498 | * speed and congestion-window validation would have kicked in earlier. |
499 | * |
500 | * Except that for the latter, snd_ssthresh is way too high. |
501 | * When we exit recovery we will burst a lot of data out... |
502 | * |
503 | * So, tcp_flow_control_response brings us back to the old behavior. |
504 | * Too many feature-flags... |
505 | */ |
506 | flight_size = tp->snd_ssthresh; |
507 | } |
508 | |
509 | /* |
510 | * Cubic Minor Fixes: t_lossflightsize is always 0, because of |
511 | * EXIT_FASTRECOVERY. This here is basically dead code... |
512 | */ |
513 | if (SACK_ENABLED(tp) && tp->t_lossflightsize > 0 && !tcp_cubic_minor_fixes) { |
514 | uint32_t total_rxt_size = 0, ncwnd; |
515 | /* |
516 | * When SACK is enabled, the number of retransmitted bytes |
517 | * can be counted more accurately. |
518 | */ |
519 | total_rxt_size = tcp_rxtseg_total_size(tp); |
520 | ncwnd = max(a: tp->t_pipeack, b: tp->t_lossflightsize); |
521 | if (total_rxt_size <= ncwnd) { |
522 | ncwnd = ncwnd - total_rxt_size; |
523 | } |
524 | |
525 | /* |
526 | * To avoid sending a large burst at the end of recovery |
527 | * set a max limit on ncwnd |
528 | */ |
529 | ncwnd = min(a: ncwnd, b: (tp->t_maxseg << 6)); |
530 | ncwnd = ncwnd >> 1; |
531 | flight_size = max(a: ncwnd, b: flight_size); |
532 | } |
533 | /* |
534 | * Complete ack. The current window was inflated for fast recovery. |
535 | * It has to be deflated post recovery. |
536 | * |
537 | * Window inflation should have left us with approx snd_ssthresh |
538 | * outstanding data. If the flight size is zero or one segment, |
539 | * make congestion window to be at least as big as 2 segments to |
540 | * avoid delayed acknowledgements. This is according to RFC 6582. |
541 | */ |
542 | if (flight_size < tp->snd_ssthresh) { |
543 | tp->snd_cwnd = max(a: flight_size, b: tp->t_maxseg) + tp->t_maxseg; |
544 | } else { |
545 | tp->snd_cwnd = tp->snd_ssthresh; |
546 | } |
547 | |
548 | tp->t_ccstate->cub_tcp_win = 0; |
549 | tp->t_ccstate->cub_tcp_bytes_acked = 0; |
550 | } |
551 | |
552 | static void |
553 | tcp_cubic_after_timeout(struct tcpcb *tp) |
554 | { |
555 | VERIFY(tp->t_ccstate != NULL); |
556 | |
557 | /* |
558 | * Avoid adjusting congestion window due to SYN retransmissions. |
559 | * If more than one byte (SYN) is outstanding then it is still |
560 | * needed to adjust the window. |
561 | */ |
562 | if (tp->t_state < TCPS_ESTABLISHED && |
563 | ((int)(tp->snd_max - tp->snd_una) <= 1)) { |
564 | return; |
565 | } |
566 | |
567 | if (!IN_FASTRECOVERY(tp)) { |
568 | tcp_cubic_clear_state(tp); |
569 | tcp_cubic_pre_fr(tp); |
570 | } |
571 | |
572 | /* |
573 | * Close the congestion window down to one segment as a retransmit |
574 | * timeout might indicate severe congestion. |
575 | */ |
576 | tp->snd_cwnd = tp->t_maxseg; |
577 | } |
578 | |
579 | static int |
580 | tcp_cubic_delay_ack(struct tcpcb *tp, struct tcphdr *th) |
581 | { |
582 | return tcp_cc_delay_ack(tp, th); |
583 | } |
584 | |
585 | /* |
586 | * When switching from a different CC it is better for Cubic to start |
587 | * fresh. The state required for Cubic calculation might be stale and it |
588 | * might not represent the current state of the network. If it starts as |
589 | * a new connection it will probe and learn the existing network conditions. |
590 | */ |
591 | static void |
592 | tcp_cubic_switch_cc(struct tcpcb *tp) |
593 | { |
594 | tcp_cubic_cwnd_init_or_reset(tp); |
595 | |
596 | os_atomic_inc(&tcp_cc_cubic.num_sockets, relaxed); |
597 | } |
598 | |
599 | static inline void |
600 | tcp_cubic_clear_state(struct tcpcb *tp) |
601 | { |
602 | tp->t_ccstate->cub_last_max = 0; |
603 | tp->t_ccstate->cub_epoch_start = 0; |
604 | tp->t_ccstate->cub_origin_point = 0; |
605 | tp->t_ccstate->cub_tcp_win = 0; |
606 | tp->t_ccstate->cub_tcp_bytes_acked = 0; |
607 | tp->t_ccstate->cub_epoch_period = 0; |
608 | } |
609 | |