1 | /* |
2 | * Copyright (c) 2012-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <sys/param.h> |
30 | #include <sys/systm.h> |
31 | #include <sys/kernel.h> |
32 | #include <sys/mbuf.h> |
33 | #include <sys/mcache.h> |
34 | #include <sys/syslog.h> |
35 | #include <sys/socket.h> |
36 | #include <sys/socketvar.h> |
37 | #include <sys/protosw.h> |
38 | #include <sys/proc_internal.h> |
39 | |
40 | #include <mach/boolean.h> |
41 | #include <kern/zalloc.h> |
42 | #include <kern/locks.h> |
43 | |
44 | #include <netinet/mp_pcb.h> |
45 | #include <netinet/mptcp_var.h> |
46 | #include <netinet6/in6_pcb.h> |
47 | |
48 | static LCK_GRP_DECLARE(mp_lock_grp, "multipath" ); |
49 | static LCK_ATTR_DECLARE(mp_lock_attr, 0, 0); |
50 | static LCK_MTX_DECLARE_ATTR(mp_lock, &mp_lock_grp, &mp_lock_attr); |
51 | static LCK_MTX_DECLARE_ATTR(mp_timeout_lock, &mp_lock_grp, &mp_lock_attr); |
52 | |
53 | static TAILQ_HEAD(, mppcbinfo) mppi_head = TAILQ_HEAD_INITIALIZER(mppi_head); |
54 | |
55 | static boolean_t mp_timeout_run; /* MP timer is scheduled to run */ |
56 | static boolean_t mp_garbage_collecting; |
57 | static boolean_t mp_ticking; |
58 | static void mp_sched_timeout(void); |
59 | static void mp_timeout(void *); |
60 | |
61 | static void |
62 | mpp_lock_assert_held(struct mppcb *mp) |
63 | { |
64 | #if !MACH_ASSERT |
65 | #pragma unused(mp) |
66 | #endif |
67 | LCK_MTX_ASSERT(&mp->mpp_lock, LCK_MTX_ASSERT_OWNED); |
68 | } |
69 | |
70 | static void |
71 | mp_timeout(void *arg) |
72 | { |
73 | #pragma unused(arg) |
74 | struct mppcbinfo *mppi; |
75 | boolean_t t, gc; |
76 | uint32_t t_act = 0; |
77 | uint32_t gc_act = 0; |
78 | |
79 | /* |
80 | * Update coarse-grained networking timestamp (in sec.); the idea |
81 | * is to piggy-back on the timeout callout to update the counter |
82 | * returnable via net_uptime(). |
83 | */ |
84 | net_update_uptime(); |
85 | |
86 | lck_mtx_lock_spin(lck: &mp_timeout_lock); |
87 | gc = mp_garbage_collecting; |
88 | mp_garbage_collecting = FALSE; |
89 | |
90 | t = mp_ticking; |
91 | mp_ticking = FALSE; |
92 | |
93 | if (gc || t) { |
94 | lck_mtx_unlock(lck: &mp_timeout_lock); |
95 | |
96 | lck_mtx_lock(lck: &mp_lock); |
97 | TAILQ_FOREACH(mppi, &mppi_head, mppi_entry) { |
98 | if ((gc && mppi->mppi_gc != NULL) || |
99 | (t && mppi->mppi_timer != NULL)) { |
100 | lck_mtx_lock(lck: &mppi->mppi_lock); |
101 | if (gc && mppi->mppi_gc != NULL) { |
102 | gc_act += mppi->mppi_gc(mppi); |
103 | } |
104 | if (t && mppi->mppi_timer != NULL) { |
105 | t_act += mppi->mppi_timer(mppi); |
106 | } |
107 | lck_mtx_unlock(lck: &mppi->mppi_lock); |
108 | } |
109 | } |
110 | lck_mtx_unlock(lck: &mp_lock); |
111 | |
112 | lck_mtx_lock_spin(lck: &mp_timeout_lock); |
113 | } |
114 | |
115 | /* lock was dropped above, so check first before overriding */ |
116 | if (!mp_garbage_collecting) { |
117 | mp_garbage_collecting = (gc_act != 0); |
118 | } |
119 | if (!mp_ticking) { |
120 | mp_ticking = (t_act != 0); |
121 | } |
122 | |
123 | /* re-arm the timer if there's work to do */ |
124 | mp_timeout_run = FALSE; |
125 | mp_sched_timeout(); |
126 | lck_mtx_unlock(lck: &mp_timeout_lock); |
127 | } |
128 | |
129 | static void |
130 | mp_sched_timeout(void) |
131 | { |
132 | LCK_MTX_ASSERT(&mp_timeout_lock, LCK_MTX_ASSERT_OWNED); |
133 | |
134 | if (!mp_timeout_run && (mp_garbage_collecting || mp_ticking)) { |
135 | lck_mtx_convert_spin(lck: &mp_timeout_lock); |
136 | mp_timeout_run = TRUE; |
137 | timeout(mp_timeout, NULL, ticks: hz); |
138 | } |
139 | } |
140 | |
141 | void |
142 | mp_gc_sched(void) |
143 | { |
144 | lck_mtx_lock_spin(lck: &mp_timeout_lock); |
145 | mp_garbage_collecting = TRUE; |
146 | mp_sched_timeout(); |
147 | lck_mtx_unlock(lck: &mp_timeout_lock); |
148 | } |
149 | |
150 | void |
151 | mptcp_timer_sched(void) |
152 | { |
153 | lck_mtx_lock_spin(lck: &mp_timeout_lock); |
154 | mp_ticking = TRUE; |
155 | mp_sched_timeout(); |
156 | lck_mtx_unlock(lck: &mp_timeout_lock); |
157 | } |
158 | |
159 | void |
160 | mp_pcbinfo_attach(struct mppcbinfo *mppi) |
161 | { |
162 | struct mppcbinfo *mppi0; |
163 | |
164 | lck_mtx_lock(lck: &mp_lock); |
165 | TAILQ_FOREACH(mppi0, &mppi_head, mppi_entry) { |
166 | if (mppi0 == mppi) { |
167 | panic("%s: mppi %p already in the list" , |
168 | __func__, mppi); |
169 | /* NOTREACHED */ |
170 | } |
171 | } |
172 | TAILQ_INSERT_TAIL(&mppi_head, mppi, mppi_entry); |
173 | lck_mtx_unlock(lck: &mp_lock); |
174 | } |
175 | |
176 | int |
177 | mp_pcbinfo_detach(struct mppcbinfo *mppi) |
178 | { |
179 | struct mppcbinfo *mppi0; |
180 | int error = 0; |
181 | |
182 | lck_mtx_lock(lck: &mp_lock); |
183 | TAILQ_FOREACH(mppi0, &mppi_head, mppi_entry) { |
184 | if (mppi0 == mppi) { |
185 | break; |
186 | } |
187 | } |
188 | if (mppi0 != NULL) { |
189 | TAILQ_REMOVE(&mppi_head, mppi0, mppi_entry); |
190 | } else { |
191 | error = ENXIO; |
192 | } |
193 | lck_mtx_unlock(lck: &mp_lock); |
194 | |
195 | return error; |
196 | } |
197 | |
198 | int |
199 | mp_pcballoc(struct socket *so, struct mppcbinfo *mppi) |
200 | { |
201 | struct mppcb *mpp = NULL; |
202 | int error; |
203 | |
204 | VERIFY(mpsotomppcb(so) == NULL); |
205 | |
206 | mpp = mppi->mppi_alloc(); |
207 | lck_mtx_init(lck: &mpp->mpp_lock, grp: mppi->mppi_lock_grp, attr: &mppi->mppi_lock_attr); |
208 | mpp->mpp_pcbinfo = mppi; |
209 | mpp->mpp_state = MPPCB_STATE_INUSE; |
210 | mpp->mpp_socket = so; |
211 | so->so_pcb = mpp; |
212 | |
213 | error = mptcp_session_create(mpp); |
214 | if (error) { |
215 | lck_mtx_destroy(lck: &mpp->mpp_lock, grp: mppi->mppi_lock_grp); |
216 | mppi->mppi_free(mpp); |
217 | return error; |
218 | } |
219 | |
220 | lck_mtx_lock(lck: &mppi->mppi_lock); |
221 | mpp->mpp_flags |= MPP_ATTACHED; |
222 | TAILQ_INSERT_TAIL(&mppi->mppi_pcbs, mpp, mpp_entry); |
223 | mppi->mppi_count++; |
224 | |
225 | lck_mtx_unlock(lck: &mppi->mppi_lock); |
226 | |
227 | return 0; |
228 | } |
229 | |
230 | void |
231 | mp_pcbdetach(struct socket *mp_so) |
232 | { |
233 | struct mppcb *mpp = mpsotomppcb(mp_so); |
234 | |
235 | mpp->mpp_state = MPPCB_STATE_DEAD; |
236 | |
237 | mp_gc_sched(); |
238 | } |
239 | |
240 | void |
241 | mptcp_pcbdispose(struct mppcb *mpp) |
242 | { |
243 | struct mppcbinfo *mppi = mpp->mpp_pcbinfo; |
244 | struct socket *mp_so = mpp->mpp_socket; |
245 | |
246 | VERIFY(mppi != NULL); |
247 | |
248 | LCK_MTX_ASSERT(&mppi->mppi_lock, LCK_MTX_ASSERT_OWNED); |
249 | mpp_lock_assert_held(mp: mpp); |
250 | |
251 | VERIFY(mpp->mpp_state == MPPCB_STATE_DEAD); |
252 | VERIFY(mpp->mpp_flags & MPP_ATTACHED); |
253 | |
254 | mpp->mpp_flags &= ~MPP_ATTACHED; |
255 | TAILQ_REMOVE(&mppi->mppi_pcbs, mpp, mpp_entry); |
256 | VERIFY(mppi->mppi_count != 0); |
257 | mppi->mppi_count--; |
258 | |
259 | if (mppi->mppi_count == 0) { |
260 | if (mptcp_cellicon_refcount) { |
261 | os_log_error(mptcp_log_handle, "%s: No more MPTCP-flows, but cell icon counter is %u\n" , |
262 | __func__, mptcp_cellicon_refcount); |
263 | mptcp_clear_cellicon(); |
264 | mptcp_cellicon_refcount = 0; |
265 | } |
266 | } |
267 | |
268 | VERIFY(mpp->mpp_inside == 0); |
269 | mpp_unlock(mp: mpp); |
270 | |
271 | #if NECP |
272 | necp_mppcb_dispose(mpp); |
273 | #endif /* NECP */ |
274 | |
275 | sofreelastref(mp_so, 0); |
276 | if (mp_so->so_rcv.sb_cc > 0 || mp_so->so_snd.sb_cc > 0) { |
277 | /* |
278 | * selthreadclear() already called |
279 | * during sofreelastref() above. |
280 | */ |
281 | sbrelease(sb: &mp_so->so_rcv); |
282 | sbrelease(sb: &mp_so->so_snd); |
283 | } |
284 | |
285 | lck_mtx_destroy(lck: &mpp->mpp_lock, grp: mppi->mppi_lock_grp); |
286 | |
287 | VERIFY(mpp->mpp_socket != NULL); |
288 | VERIFY(mpp->mpp_socket->so_usecount == 0); |
289 | mpp->mpp_socket->so_pcb = NULL; |
290 | mpp->mpp_socket = NULL; |
291 | mppi->mppi_free(mpp); |
292 | } |
293 | |
294 | static int |
295 | mp_getaddr_v4(struct socket *mp_so, struct sockaddr **nam, boolean_t peer) |
296 | { |
297 | struct mptses *mpte = mpsotompte(so: mp_so); |
298 | struct sockaddr_in *sin; |
299 | |
300 | /* |
301 | * Do the malloc first in case it blocks. |
302 | */ |
303 | sin = (struct sockaddr_in *)alloc_sockaddr(size: sizeof(*sin), |
304 | flags: Z_WAITOK | Z_NOFAIL); |
305 | |
306 | sin->sin_family = AF_INET; |
307 | |
308 | if (!peer) { |
309 | sin->sin_port = mpte->__mpte_src_v4.sin_port; |
310 | sin->sin_addr = mpte->__mpte_src_v4.sin_addr; |
311 | } else { |
312 | sin->sin_port = mpte->__mpte_dst_v4.sin_port; |
313 | sin->sin_addr = mpte->__mpte_dst_v4.sin_addr; |
314 | } |
315 | |
316 | *nam = (struct sockaddr *)sin; |
317 | return 0; |
318 | } |
319 | |
320 | static int |
321 | mp_getaddr_v6(struct socket *mp_so, struct sockaddr **nam, boolean_t peer) |
322 | { |
323 | struct mptses *mpte = mpsotompte(so: mp_so); |
324 | struct in6_addr addr; |
325 | in_port_t port; |
326 | uint32_t ifscope; |
327 | |
328 | if (!peer) { |
329 | port = mpte->__mpte_src_v6.sin6_port; |
330 | addr = mpte->__mpte_src_v6.sin6_addr; |
331 | ifscope = mpte->__mpte_src_v6.sin6_scope_id; |
332 | } else { |
333 | port = mpte->__mpte_dst_v6.sin6_port; |
334 | addr = mpte->__mpte_dst_v6.sin6_addr; |
335 | ifscope = mpte->__mpte_dst_v6.sin6_scope_id; |
336 | } |
337 | |
338 | *nam = in6_sockaddr(port, addr_p: &addr, ifscope); |
339 | if (*nam == NULL) { |
340 | return ENOBUFS; |
341 | } |
342 | |
343 | return 0; |
344 | } |
345 | |
346 | int |
347 | mp_getsockaddr(struct socket *mp_so, struct sockaddr **nam) |
348 | { |
349 | struct mptses *mpte = mpsotompte(so: mp_so); |
350 | |
351 | if (mpte->mpte_src.sa_family == AF_INET || mpte->mpte_src.sa_family == 0) { |
352 | return mp_getaddr_v4(mp_so, nam, false); |
353 | } else if (mpte->mpte_src.sa_family == AF_INET6) { |
354 | return mp_getaddr_v6(mp_so, nam, false); |
355 | } else { |
356 | return EINVAL; |
357 | } |
358 | } |
359 | |
360 | int |
361 | mp_getpeeraddr(struct socket *mp_so, struct sockaddr **nam) |
362 | { |
363 | struct mptses *mpte = mpsotompte(so: mp_so); |
364 | |
365 | if (mpte->mpte_src.sa_family == AF_INET || mpte->mpte_src.sa_family == 0) { |
366 | return mp_getaddr_v4(mp_so, nam, true); |
367 | } else if (mpte->mpte_src.sa_family == AF_INET6) { |
368 | return mp_getaddr_v6(mp_so, nam, true); |
369 | } else { |
370 | return EINVAL; |
371 | } |
372 | } |
373 | |