| 1 | /* |
| 2 | * Copyright (c) 2012-2017 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | |
| 29 | #include <sys/param.h> |
| 30 | #include <sys/systm.h> |
| 31 | #include <sys/kernel.h> |
| 32 | #include <sys/mbuf.h> |
| 33 | #include <sys/mcache.h> |
| 34 | #include <sys/syslog.h> |
| 35 | #include <sys/socket.h> |
| 36 | #include <sys/socketvar.h> |
| 37 | #include <sys/protosw.h> |
| 38 | #include <sys/proc_internal.h> |
| 39 | |
| 40 | #include <mach/boolean.h> |
| 41 | #include <kern/zalloc.h> |
| 42 | #include <kern/locks.h> |
| 43 | |
| 44 | #include <netinet/mp_pcb.h> |
| 45 | #include <netinet/mptcp_var.h> |
| 46 | #include <netinet6/in6_pcb.h> |
| 47 | |
| 48 | static LCK_GRP_DECLARE(mp_lock_grp, "multipath" ); |
| 49 | static LCK_ATTR_DECLARE(mp_lock_attr, 0, 0); |
| 50 | static LCK_MTX_DECLARE_ATTR(mp_lock, &mp_lock_grp, &mp_lock_attr); |
| 51 | static LCK_MTX_DECLARE_ATTR(mp_timeout_lock, &mp_lock_grp, &mp_lock_attr); |
| 52 | |
| 53 | static TAILQ_HEAD(, mppcbinfo) mppi_head = TAILQ_HEAD_INITIALIZER(mppi_head); |
| 54 | |
| 55 | static boolean_t mp_timeout_run; /* MP timer is scheduled to run */ |
| 56 | static boolean_t mp_garbage_collecting; |
| 57 | static boolean_t mp_ticking; |
| 58 | static void mp_sched_timeout(void); |
| 59 | static void mp_timeout(void *); |
| 60 | |
| 61 | static void |
| 62 | mpp_lock_assert_held(struct mppcb *mp) |
| 63 | { |
| 64 | #if !MACH_ASSERT |
| 65 | #pragma unused(mp) |
| 66 | #endif |
| 67 | LCK_MTX_ASSERT(&mp->mpp_lock, LCK_MTX_ASSERT_OWNED); |
| 68 | } |
| 69 | |
| 70 | static void |
| 71 | mp_timeout(void *arg) |
| 72 | { |
| 73 | #pragma unused(arg) |
| 74 | struct mppcbinfo *mppi; |
| 75 | boolean_t t, gc; |
| 76 | uint32_t t_act = 0; |
| 77 | uint32_t gc_act = 0; |
| 78 | |
| 79 | /* |
| 80 | * Update coarse-grained networking timestamp (in sec.); the idea |
| 81 | * is to piggy-back on the timeout callout to update the counter |
| 82 | * returnable via net_uptime(). |
| 83 | */ |
| 84 | net_update_uptime(); |
| 85 | |
| 86 | lck_mtx_lock_spin(lck: &mp_timeout_lock); |
| 87 | gc = mp_garbage_collecting; |
| 88 | mp_garbage_collecting = FALSE; |
| 89 | |
| 90 | t = mp_ticking; |
| 91 | mp_ticking = FALSE; |
| 92 | |
| 93 | if (gc || t) { |
| 94 | lck_mtx_unlock(lck: &mp_timeout_lock); |
| 95 | |
| 96 | lck_mtx_lock(lck: &mp_lock); |
| 97 | TAILQ_FOREACH(mppi, &mppi_head, mppi_entry) { |
| 98 | if ((gc && mppi->mppi_gc != NULL) || |
| 99 | (t && mppi->mppi_timer != NULL)) { |
| 100 | lck_mtx_lock(lck: &mppi->mppi_lock); |
| 101 | if (gc && mppi->mppi_gc != NULL) { |
| 102 | gc_act += mppi->mppi_gc(mppi); |
| 103 | } |
| 104 | if (t && mppi->mppi_timer != NULL) { |
| 105 | t_act += mppi->mppi_timer(mppi); |
| 106 | } |
| 107 | lck_mtx_unlock(lck: &mppi->mppi_lock); |
| 108 | } |
| 109 | } |
| 110 | lck_mtx_unlock(lck: &mp_lock); |
| 111 | |
| 112 | lck_mtx_lock_spin(lck: &mp_timeout_lock); |
| 113 | } |
| 114 | |
| 115 | /* lock was dropped above, so check first before overriding */ |
| 116 | if (!mp_garbage_collecting) { |
| 117 | mp_garbage_collecting = (gc_act != 0); |
| 118 | } |
| 119 | if (!mp_ticking) { |
| 120 | mp_ticking = (t_act != 0); |
| 121 | } |
| 122 | |
| 123 | /* re-arm the timer if there's work to do */ |
| 124 | mp_timeout_run = FALSE; |
| 125 | mp_sched_timeout(); |
| 126 | lck_mtx_unlock(lck: &mp_timeout_lock); |
| 127 | } |
| 128 | |
| 129 | static void |
| 130 | mp_sched_timeout(void) |
| 131 | { |
| 132 | LCK_MTX_ASSERT(&mp_timeout_lock, LCK_MTX_ASSERT_OWNED); |
| 133 | |
| 134 | if (!mp_timeout_run && (mp_garbage_collecting || mp_ticking)) { |
| 135 | lck_mtx_convert_spin(lck: &mp_timeout_lock); |
| 136 | mp_timeout_run = TRUE; |
| 137 | timeout(mp_timeout, NULL, ticks: hz); |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | void |
| 142 | mp_gc_sched(void) |
| 143 | { |
| 144 | lck_mtx_lock_spin(lck: &mp_timeout_lock); |
| 145 | mp_garbage_collecting = TRUE; |
| 146 | mp_sched_timeout(); |
| 147 | lck_mtx_unlock(lck: &mp_timeout_lock); |
| 148 | } |
| 149 | |
| 150 | void |
| 151 | mptcp_timer_sched(void) |
| 152 | { |
| 153 | lck_mtx_lock_spin(lck: &mp_timeout_lock); |
| 154 | mp_ticking = TRUE; |
| 155 | mp_sched_timeout(); |
| 156 | lck_mtx_unlock(lck: &mp_timeout_lock); |
| 157 | } |
| 158 | |
| 159 | void |
| 160 | mp_pcbinfo_attach(struct mppcbinfo *mppi) |
| 161 | { |
| 162 | struct mppcbinfo *mppi0; |
| 163 | |
| 164 | lck_mtx_lock(lck: &mp_lock); |
| 165 | TAILQ_FOREACH(mppi0, &mppi_head, mppi_entry) { |
| 166 | if (mppi0 == mppi) { |
| 167 | panic("%s: mppi %p already in the list" , |
| 168 | __func__, mppi); |
| 169 | /* NOTREACHED */ |
| 170 | } |
| 171 | } |
| 172 | TAILQ_INSERT_TAIL(&mppi_head, mppi, mppi_entry); |
| 173 | lck_mtx_unlock(lck: &mp_lock); |
| 174 | } |
| 175 | |
| 176 | int |
| 177 | mp_pcbinfo_detach(struct mppcbinfo *mppi) |
| 178 | { |
| 179 | struct mppcbinfo *mppi0; |
| 180 | int error = 0; |
| 181 | |
| 182 | lck_mtx_lock(lck: &mp_lock); |
| 183 | TAILQ_FOREACH(mppi0, &mppi_head, mppi_entry) { |
| 184 | if (mppi0 == mppi) { |
| 185 | break; |
| 186 | } |
| 187 | } |
| 188 | if (mppi0 != NULL) { |
| 189 | TAILQ_REMOVE(&mppi_head, mppi0, mppi_entry); |
| 190 | } else { |
| 191 | error = ENXIO; |
| 192 | } |
| 193 | lck_mtx_unlock(lck: &mp_lock); |
| 194 | |
| 195 | return error; |
| 196 | } |
| 197 | |
| 198 | int |
| 199 | mp_pcballoc(struct socket *so, struct mppcbinfo *mppi) |
| 200 | { |
| 201 | struct mppcb *mpp = NULL; |
| 202 | int error; |
| 203 | |
| 204 | VERIFY(mpsotomppcb(so) == NULL); |
| 205 | |
| 206 | mpp = mppi->mppi_alloc(); |
| 207 | lck_mtx_init(lck: &mpp->mpp_lock, grp: mppi->mppi_lock_grp, attr: &mppi->mppi_lock_attr); |
| 208 | mpp->mpp_pcbinfo = mppi; |
| 209 | mpp->mpp_state = MPPCB_STATE_INUSE; |
| 210 | mpp->mpp_socket = so; |
| 211 | so->so_pcb = mpp; |
| 212 | |
| 213 | error = mptcp_session_create(mpp); |
| 214 | if (error) { |
| 215 | lck_mtx_destroy(lck: &mpp->mpp_lock, grp: mppi->mppi_lock_grp); |
| 216 | mppi->mppi_free(mpp); |
| 217 | return error; |
| 218 | } |
| 219 | |
| 220 | lck_mtx_lock(lck: &mppi->mppi_lock); |
| 221 | mpp->mpp_flags |= MPP_ATTACHED; |
| 222 | TAILQ_INSERT_TAIL(&mppi->mppi_pcbs, mpp, mpp_entry); |
| 223 | mppi->mppi_count++; |
| 224 | |
| 225 | lck_mtx_unlock(lck: &mppi->mppi_lock); |
| 226 | |
| 227 | return 0; |
| 228 | } |
| 229 | |
| 230 | void |
| 231 | mp_pcbdetach(struct socket *mp_so) |
| 232 | { |
| 233 | struct mppcb *mpp = mpsotomppcb(mp_so); |
| 234 | |
| 235 | mpp->mpp_state = MPPCB_STATE_DEAD; |
| 236 | |
| 237 | mp_gc_sched(); |
| 238 | } |
| 239 | |
| 240 | void |
| 241 | mptcp_pcbdispose(struct mppcb *mpp) |
| 242 | { |
| 243 | struct mppcbinfo *mppi = mpp->mpp_pcbinfo; |
| 244 | struct socket *mp_so = mpp->mpp_socket; |
| 245 | |
| 246 | VERIFY(mppi != NULL); |
| 247 | |
| 248 | LCK_MTX_ASSERT(&mppi->mppi_lock, LCK_MTX_ASSERT_OWNED); |
| 249 | mpp_lock_assert_held(mp: mpp); |
| 250 | |
| 251 | VERIFY(mpp->mpp_state == MPPCB_STATE_DEAD); |
| 252 | VERIFY(mpp->mpp_flags & MPP_ATTACHED); |
| 253 | |
| 254 | mpp->mpp_flags &= ~MPP_ATTACHED; |
| 255 | TAILQ_REMOVE(&mppi->mppi_pcbs, mpp, mpp_entry); |
| 256 | VERIFY(mppi->mppi_count != 0); |
| 257 | mppi->mppi_count--; |
| 258 | |
| 259 | if (mppi->mppi_count == 0) { |
| 260 | if (mptcp_cellicon_refcount) { |
| 261 | os_log_error(mptcp_log_handle, "%s: No more MPTCP-flows, but cell icon counter is %u\n" , |
| 262 | __func__, mptcp_cellicon_refcount); |
| 263 | mptcp_clear_cellicon(); |
| 264 | mptcp_cellicon_refcount = 0; |
| 265 | } |
| 266 | } |
| 267 | |
| 268 | VERIFY(mpp->mpp_inside == 0); |
| 269 | mpp_unlock(mp: mpp); |
| 270 | |
| 271 | #if NECP |
| 272 | necp_mppcb_dispose(mpp); |
| 273 | #endif /* NECP */ |
| 274 | |
| 275 | sofreelastref(mp_so, 0); |
| 276 | if (mp_so->so_rcv.sb_cc > 0 || mp_so->so_snd.sb_cc > 0) { |
| 277 | /* |
| 278 | * selthreadclear() already called |
| 279 | * during sofreelastref() above. |
| 280 | */ |
| 281 | sbrelease(sb: &mp_so->so_rcv); |
| 282 | sbrelease(sb: &mp_so->so_snd); |
| 283 | } |
| 284 | |
| 285 | lck_mtx_destroy(lck: &mpp->mpp_lock, grp: mppi->mppi_lock_grp); |
| 286 | |
| 287 | VERIFY(mpp->mpp_socket != NULL); |
| 288 | VERIFY(mpp->mpp_socket->so_usecount == 0); |
| 289 | mpp->mpp_socket->so_pcb = NULL; |
| 290 | mpp->mpp_socket = NULL; |
| 291 | mppi->mppi_free(mpp); |
| 292 | } |
| 293 | |
| 294 | static int |
| 295 | mp_getaddr_v4(struct socket *mp_so, struct sockaddr **nam, boolean_t peer) |
| 296 | { |
| 297 | struct mptses *mpte = mpsotompte(so: mp_so); |
| 298 | struct sockaddr_in *sin; |
| 299 | |
| 300 | /* |
| 301 | * Do the malloc first in case it blocks. |
| 302 | */ |
| 303 | sin = (struct sockaddr_in *)alloc_sockaddr(size: sizeof(*sin), |
| 304 | flags: Z_WAITOK | Z_NOFAIL); |
| 305 | |
| 306 | sin->sin_family = AF_INET; |
| 307 | |
| 308 | if (!peer) { |
| 309 | sin->sin_port = mpte->__mpte_src_v4.sin_port; |
| 310 | sin->sin_addr = mpte->__mpte_src_v4.sin_addr; |
| 311 | } else { |
| 312 | sin->sin_port = mpte->__mpte_dst_v4.sin_port; |
| 313 | sin->sin_addr = mpte->__mpte_dst_v4.sin_addr; |
| 314 | } |
| 315 | |
| 316 | *nam = (struct sockaddr *)sin; |
| 317 | return 0; |
| 318 | } |
| 319 | |
| 320 | static int |
| 321 | mp_getaddr_v6(struct socket *mp_so, struct sockaddr **nam, boolean_t peer) |
| 322 | { |
| 323 | struct mptses *mpte = mpsotompte(so: mp_so); |
| 324 | struct in6_addr addr; |
| 325 | in_port_t port; |
| 326 | uint32_t ifscope; |
| 327 | |
| 328 | if (!peer) { |
| 329 | port = mpte->__mpte_src_v6.sin6_port; |
| 330 | addr = mpte->__mpte_src_v6.sin6_addr; |
| 331 | ifscope = mpte->__mpte_src_v6.sin6_scope_id; |
| 332 | } else { |
| 333 | port = mpte->__mpte_dst_v6.sin6_port; |
| 334 | addr = mpte->__mpte_dst_v6.sin6_addr; |
| 335 | ifscope = mpte->__mpte_dst_v6.sin6_scope_id; |
| 336 | } |
| 337 | |
| 338 | *nam = in6_sockaddr(port, addr_p: &addr, ifscope); |
| 339 | if (*nam == NULL) { |
| 340 | return ENOBUFS; |
| 341 | } |
| 342 | |
| 343 | return 0; |
| 344 | } |
| 345 | |
| 346 | int |
| 347 | mp_getsockaddr(struct socket *mp_so, struct sockaddr **nam) |
| 348 | { |
| 349 | struct mptses *mpte = mpsotompte(so: mp_so); |
| 350 | |
| 351 | if (mpte->mpte_src.sa_family == AF_INET || mpte->mpte_src.sa_family == 0) { |
| 352 | return mp_getaddr_v4(mp_so, nam, false); |
| 353 | } else if (mpte->mpte_src.sa_family == AF_INET6) { |
| 354 | return mp_getaddr_v6(mp_so, nam, false); |
| 355 | } else { |
| 356 | return EINVAL; |
| 357 | } |
| 358 | } |
| 359 | |
| 360 | int |
| 361 | mp_getpeeraddr(struct socket *mp_so, struct sockaddr **nam) |
| 362 | { |
| 363 | struct mptses *mpte = mpsotompte(so: mp_so); |
| 364 | |
| 365 | if (mpte->mpte_src.sa_family == AF_INET || mpte->mpte_src.sa_family == 0) { |
| 366 | return mp_getaddr_v4(mp_so, nam, true); |
| 367 | } else if (mpte->mpte_src.sa_family == AF_INET6) { |
| 368 | return mp_getaddr_v6(mp_so, nam, true); |
| 369 | } else { |
| 370 | return EINVAL; |
| 371 | } |
| 372 | } |
| 373 | |