1 | /* |
2 | * Copyright (c) 2012-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <sys/param.h> |
30 | #include <sys/systm.h> |
31 | #include <sys/kernel.h> |
32 | #include <sys/mbuf.h> |
33 | #include <sys/mcache.h> |
34 | #include <sys/syslog.h> |
35 | #include <sys/socket.h> |
36 | #include <sys/socketvar.h> |
37 | #include <sys/protosw.h> |
38 | #include <sys/proc_internal.h> |
39 | |
40 | #include <mach/boolean.h> |
41 | #include <kern/zalloc.h> |
42 | #include <kern/locks.h> |
43 | |
44 | #include <netinet/mp_pcb.h> |
45 | #include <netinet/mptcp_var.h> |
46 | #include <netinet6/in6_pcb.h> |
47 | |
48 | static lck_grp_t *mp_lock_grp; |
49 | static lck_attr_t *mp_lock_attr; |
50 | static lck_grp_attr_t *mp_lock_grp_attr; |
51 | decl_lck_mtx_data(static, mp_lock); /* global MULTIPATH lock */ |
52 | decl_lck_mtx_data(static, mp_timeout_lock); |
53 | |
54 | static TAILQ_HEAD(, mppcbinfo) mppi_head = TAILQ_HEAD_INITIALIZER(mppi_head); |
55 | |
56 | static boolean_t mp_timeout_run; /* MP timer is scheduled to run */ |
57 | static boolean_t mp_garbage_collecting; |
58 | static boolean_t mp_ticking; |
59 | static void mp_sched_timeout(void); |
60 | static void mp_timeout(void *); |
61 | |
62 | void |
63 | mp_pcbinit(void) |
64 | { |
65 | static int mp_initialized = 0; |
66 | |
67 | VERIFY(!mp_initialized); |
68 | mp_initialized = 1; |
69 | |
70 | mp_lock_grp_attr = lck_grp_attr_alloc_init(); |
71 | mp_lock_grp = lck_grp_alloc_init("multipath" , mp_lock_grp_attr); |
72 | mp_lock_attr = lck_attr_alloc_init(); |
73 | lck_mtx_init(&mp_lock, mp_lock_grp, mp_lock_attr); |
74 | lck_mtx_init(&mp_timeout_lock, mp_lock_grp, mp_lock_attr); |
75 | } |
76 | |
77 | static void |
78 | mp_timeout(void *arg) |
79 | { |
80 | #pragma unused(arg) |
81 | struct mppcbinfo *mppi; |
82 | boolean_t t, gc; |
83 | uint32_t t_act = 0; |
84 | uint32_t gc_act = 0; |
85 | |
86 | /* |
87 | * Update coarse-grained networking timestamp (in sec.); the idea |
88 | * is to piggy-back on the timeout callout to update the counter |
89 | * returnable via net_uptime(). |
90 | */ |
91 | net_update_uptime(); |
92 | |
93 | lck_mtx_lock_spin(&mp_timeout_lock); |
94 | gc = mp_garbage_collecting; |
95 | mp_garbage_collecting = FALSE; |
96 | |
97 | t = mp_ticking; |
98 | mp_ticking = FALSE; |
99 | |
100 | if (gc || t) { |
101 | lck_mtx_unlock(&mp_timeout_lock); |
102 | |
103 | lck_mtx_lock(&mp_lock); |
104 | TAILQ_FOREACH(mppi, &mppi_head, mppi_entry) { |
105 | if ((gc && mppi->mppi_gc != NULL) || |
106 | (t && mppi->mppi_timer != NULL)) { |
107 | lck_mtx_lock(&mppi->mppi_lock); |
108 | if (gc && mppi->mppi_gc != NULL) |
109 | gc_act += mppi->mppi_gc(mppi); |
110 | if (t && mppi->mppi_timer != NULL) |
111 | t_act += mppi->mppi_timer(mppi); |
112 | lck_mtx_unlock(&mppi->mppi_lock); |
113 | } |
114 | } |
115 | lck_mtx_unlock(&mp_lock); |
116 | |
117 | lck_mtx_lock_spin(&mp_timeout_lock); |
118 | } |
119 | |
120 | /* lock was dropped above, so check first before overriding */ |
121 | if (!mp_garbage_collecting) |
122 | mp_garbage_collecting = (gc_act != 0); |
123 | if (!mp_ticking) |
124 | mp_ticking = (t_act != 0); |
125 | |
126 | /* re-arm the timer if there's work to do */ |
127 | mp_timeout_run = FALSE; |
128 | mp_sched_timeout(); |
129 | lck_mtx_unlock(&mp_timeout_lock); |
130 | } |
131 | |
132 | static void |
133 | mp_sched_timeout(void) |
134 | { |
135 | LCK_MTX_ASSERT(&mp_timeout_lock, LCK_MTX_ASSERT_OWNED); |
136 | |
137 | if (!mp_timeout_run && (mp_garbage_collecting || mp_ticking)) { |
138 | lck_mtx_convert_spin(&mp_timeout_lock); |
139 | mp_timeout_run = TRUE; |
140 | timeout(mp_timeout, NULL, hz); |
141 | } |
142 | } |
143 | |
144 | void |
145 | mp_gc_sched(void) |
146 | { |
147 | lck_mtx_lock_spin(&mp_timeout_lock); |
148 | mp_garbage_collecting = TRUE; |
149 | mp_sched_timeout(); |
150 | lck_mtx_unlock(&mp_timeout_lock); |
151 | } |
152 | |
153 | void |
154 | mptcp_timer_sched(void) |
155 | { |
156 | lck_mtx_lock_spin(&mp_timeout_lock); |
157 | mp_ticking = TRUE; |
158 | mp_sched_timeout(); |
159 | lck_mtx_unlock(&mp_timeout_lock); |
160 | } |
161 | |
162 | void |
163 | mp_pcbinfo_attach(struct mppcbinfo *mppi) |
164 | { |
165 | struct mppcbinfo *mppi0; |
166 | |
167 | lck_mtx_lock(&mp_lock); |
168 | TAILQ_FOREACH(mppi0, &mppi_head, mppi_entry) { |
169 | if (mppi0 == mppi) { |
170 | panic("%s: mppi %p already in the list\n" , |
171 | __func__, mppi); |
172 | /* NOTREACHED */ |
173 | } |
174 | } |
175 | TAILQ_INSERT_TAIL(&mppi_head, mppi, mppi_entry); |
176 | lck_mtx_unlock(&mp_lock); |
177 | } |
178 | |
179 | int |
180 | mp_pcbinfo_detach(struct mppcbinfo *mppi) |
181 | { |
182 | struct mppcbinfo *mppi0; |
183 | int error = 0; |
184 | |
185 | lck_mtx_lock(&mp_lock); |
186 | TAILQ_FOREACH(mppi0, &mppi_head, mppi_entry) { |
187 | if (mppi0 == mppi) |
188 | break; |
189 | } |
190 | if (mppi0 != NULL) |
191 | TAILQ_REMOVE(&mppi_head, mppi0, mppi_entry); |
192 | else |
193 | error = ENXIO; |
194 | lck_mtx_unlock(&mp_lock); |
195 | |
196 | return (error); |
197 | } |
198 | |
199 | int |
200 | mp_pcballoc(struct socket *so, struct mppcbinfo *mppi) |
201 | { |
202 | struct mppcb *mpp = NULL; |
203 | int error; |
204 | |
205 | VERIFY(mpsotomppcb(so) == NULL); |
206 | |
207 | mpp = zalloc(mppi->mppi_zone); |
208 | if (mpp == NULL) { |
209 | return (ENOBUFS); |
210 | } |
211 | |
212 | bzero(mpp, mppi->mppi_size); |
213 | lck_mtx_init(&mpp->mpp_lock, mppi->mppi_lock_grp, mppi->mppi_lock_attr); |
214 | mpp->mpp_pcbinfo = mppi; |
215 | mpp->mpp_state = MPPCB_STATE_INUSE; |
216 | mpp->mpp_socket = so; |
217 | so->so_pcb = mpp; |
218 | |
219 | error = mptcp_sescreate(mpp); |
220 | if (error) { |
221 | lck_mtx_destroy(&mpp->mpp_lock, mppi->mppi_lock_grp); |
222 | zfree(mppi->mppi_zone, mpp); |
223 | return (error); |
224 | } |
225 | |
226 | lck_mtx_lock(&mppi->mppi_lock); |
227 | mpp->mpp_flags |= MPP_ATTACHED; |
228 | TAILQ_INSERT_TAIL(&mppi->mppi_pcbs, mpp, mpp_entry); |
229 | mppi->mppi_count++; |
230 | lck_mtx_unlock(&mppi->mppi_lock); |
231 | |
232 | return (0); |
233 | } |
234 | |
235 | void |
236 | mp_pcbdetach(struct socket *mp_so) |
237 | { |
238 | struct mppcb *mpp = mpsotomppcb(mp_so); |
239 | |
240 | mpp->mpp_state = MPPCB_STATE_DEAD; |
241 | if (!(mp_so->so_flags & SOF_PCBCLEARING)) |
242 | mp_so->so_flags |= SOF_PCBCLEARING; |
243 | |
244 | mp_gc_sched(); |
245 | } |
246 | |
247 | void |
248 | mp_pcbdispose(struct mppcb *mpp) |
249 | { |
250 | struct mppcbinfo *mppi = mpp->mpp_pcbinfo; |
251 | |
252 | VERIFY(mppi != NULL); |
253 | |
254 | LCK_MTX_ASSERT(&mppi->mppi_lock, LCK_MTX_ASSERT_OWNED); |
255 | mpp_lock_assert_held(mpp); |
256 | |
257 | VERIFY(mpp->mpp_state == MPPCB_STATE_DEAD); |
258 | VERIFY(mpp->mpp_flags & MPP_ATTACHED); |
259 | |
260 | mpp->mpp_flags &= ~MPP_ATTACHED; |
261 | TAILQ_REMOVE(&mppi->mppi_pcbs, mpp, mpp_entry); |
262 | VERIFY(mppi->mppi_count != 0); |
263 | mppi->mppi_count--; |
264 | |
265 | mpp_unlock(mpp); |
266 | |
267 | #if NECP |
268 | necp_mppcb_dispose(mpp); |
269 | #endif /* NECP */ |
270 | |
271 | lck_mtx_destroy(&mpp->mpp_lock, mppi->mppi_lock_grp); |
272 | |
273 | VERIFY(mpp->mpp_socket != NULL); |
274 | VERIFY(mpp->mpp_socket->so_usecount == 0); |
275 | mpp->mpp_socket->so_pcb = NULL; |
276 | mpp->mpp_socket = NULL; |
277 | |
278 | zfree(mppi->mppi_zone, mpp); |
279 | } |
280 | |
281 | static int |
282 | mp_getaddr_v4(struct socket *mp_so, struct sockaddr **nam, boolean_t peer) |
283 | { |
284 | struct mptses *mpte = mpsotompte(mp_so); |
285 | struct sockaddr_in *sin; |
286 | |
287 | /* |
288 | * Do the malloc first in case it blocks. |
289 | */ |
290 | MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); |
291 | if (sin == NULL) |
292 | return (ENOBUFS); |
293 | bzero(sin, sizeof (*sin)); |
294 | sin->sin_family = AF_INET; |
295 | sin->sin_len = sizeof (*sin); |
296 | |
297 | if (!peer) { |
298 | sin->sin_port = mpte->__mpte_src_v4.sin_port; |
299 | sin->sin_addr = mpte->__mpte_src_v4.sin_addr; |
300 | } else { |
301 | sin->sin_port = mpte->__mpte_dst_v4.sin_port; |
302 | sin->sin_addr = mpte->__mpte_dst_v4.sin_addr; |
303 | } |
304 | |
305 | *nam = (struct sockaddr *)sin; |
306 | return (0); |
307 | } |
308 | |
309 | static int |
310 | mp_getaddr_v6(struct socket *mp_so, struct sockaddr **nam, boolean_t peer) |
311 | { |
312 | struct mptses *mpte = mpsotompte(mp_so); |
313 | struct in6_addr addr; |
314 | in_port_t port; |
315 | |
316 | if (!peer) { |
317 | port = mpte->__mpte_src_v6.sin6_port; |
318 | addr = mpte->__mpte_src_v6.sin6_addr; |
319 | } else { |
320 | port = mpte->__mpte_dst_v6.sin6_port; |
321 | addr = mpte->__mpte_dst_v6.sin6_addr; |
322 | } |
323 | |
324 | *nam = in6_sockaddr(port, &addr); |
325 | if (*nam == NULL) |
326 | return (ENOBUFS); |
327 | |
328 | return (0); |
329 | } |
330 | |
331 | int |
332 | mp_getsockaddr(struct socket *mp_so, struct sockaddr **nam) |
333 | { |
334 | struct mptses *mpte = mpsotompte(mp_so); |
335 | |
336 | if (mpte->mpte_src.sa_family == AF_INET || mpte->mpte_src.sa_family == 0) |
337 | return mp_getaddr_v4(mp_so, nam, false); |
338 | else if (mpte->mpte_src.sa_family == AF_INET6) |
339 | return mp_getaddr_v6(mp_so, nam, false); |
340 | else |
341 | return (EINVAL); |
342 | } |
343 | |
344 | int |
345 | mp_getpeeraddr(struct socket *mp_so, struct sockaddr **nam) |
346 | { |
347 | struct mptses *mpte = mpsotompte(mp_so); |
348 | |
349 | if (mpte->mpte_src.sa_family == AF_INET || mpte->mpte_src.sa_family == 0) |
350 | return mp_getaddr_v4(mp_so, nam, true); |
351 | else if (mpte->mpte_src.sa_family == AF_INET6) |
352 | return mp_getaddr_v6(mp_so, nam, true); |
353 | else |
354 | return (EINVAL); |
355 | } |
356 | |