| 1 | /* | 
|---|
| 2 | * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. | 
|---|
| 3 | * | 
|---|
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | 
|---|
| 5 | * | 
|---|
| 6 | * This file contains Original Code and/or Modifications of Original Code | 
|---|
| 7 | * as defined in and that are subject to the Apple Public Source License | 
|---|
| 8 | * Version 2.0 (the 'License'). You may not use this file except in | 
|---|
| 9 | * compliance with the License. The rights granted to you under the License | 
|---|
| 10 | * may not be used to create, or enable the creation or redistribution of, | 
|---|
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to | 
|---|
| 12 | * circumvent, violate, or enable the circumvention or violation of, any | 
|---|
| 13 | * terms of an Apple operating system software license agreement. | 
|---|
| 14 | * | 
|---|
| 15 | * Please obtain a copy of the License at | 
|---|
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | 
|---|
| 17 | * | 
|---|
| 18 | * The Original Code and all software distributed under the License are | 
|---|
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | 
|---|
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | 
|---|
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | 
|---|
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | 
|---|
| 23 | * Please see the License for the specific language governing rights and | 
|---|
| 24 | * limitations under the License. | 
|---|
| 25 | * | 
|---|
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | 
|---|
| 27 | */ | 
|---|
| 28 | /* | 
|---|
| 29 | * @OSF_COPYRIGHT@ | 
|---|
| 30 | */ | 
|---|
| 31 | /* | 
|---|
| 32 | * Mach Operating System | 
|---|
| 33 | * Copyright (c) 1991,1990,1989 Carnegie Mellon University | 
|---|
| 34 | * All Rights Reserved. | 
|---|
| 35 | * | 
|---|
| 36 | * Permission to use, copy, modify and distribute this software and its | 
|---|
| 37 | * documentation is hereby granted, provided that both the copyright | 
|---|
| 38 | * notice and this permission notice appear in all copies of the | 
|---|
| 39 | * software, derivative works or modified versions, and any portions | 
|---|
| 40 | * thereof, and that both notices appear in supporting documentation. | 
|---|
| 41 | * | 
|---|
| 42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | 
|---|
| 43 | * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR | 
|---|
| 44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | 
|---|
| 45 | * | 
|---|
| 46 | * Carnegie Mellon requests users of this software to return to | 
|---|
| 47 | * | 
|---|
| 48 | *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU | 
|---|
| 49 | *  School of Computer Science | 
|---|
| 50 | *  Carnegie Mellon University | 
|---|
| 51 | *  Pittsburgh PA 15213-3890 | 
|---|
| 52 | * | 
|---|
| 53 | * any improvements or extensions that they make and grant Carnegie Mellon | 
|---|
| 54 | * the rights to redistribute these changes. | 
|---|
| 55 | */ | 
|---|
| 56 | /* | 
|---|
| 57 | */ | 
|---|
| 58 | /* | 
|---|
| 59 | *	File:	ipc/ipc_pset.c | 
|---|
| 60 | *	Author:	Rich Draves | 
|---|
| 61 | *	Date:	1989 | 
|---|
| 62 | * | 
|---|
| 63 | *	Functions to manipulate IPC port sets. | 
|---|
| 64 | */ | 
|---|
| 65 |  | 
|---|
| 66 | #include <mach/port.h> | 
|---|
| 67 | #include <mach/kern_return.h> | 
|---|
| 68 | #include <mach/message.h> | 
|---|
| 69 | #include <ipc/ipc_mqueue.h> | 
|---|
| 70 | #include <ipc/ipc_object.h> | 
|---|
| 71 | #include <ipc/ipc_pset.h> | 
|---|
| 72 | #include <ipc/ipc_right.h> | 
|---|
| 73 | #include <ipc/ipc_space.h> | 
|---|
| 74 | #include <ipc/ipc_port.h> | 
|---|
| 75 | #include <ipc/ipc_kmsg.h> | 
|---|
| 76 | #include <kern/policy_internal.h> | 
|---|
| 77 |  | 
|---|
| 78 | #include <kern/kern_types.h> | 
|---|
| 79 |  | 
|---|
| 80 | #include <vm/vm_map.h> | 
|---|
| 81 | #include <libkern/section_keywords.h> | 
|---|
| 82 | #include <pthread/priority_private.h> | 
|---|
| 83 |  | 
|---|
| 84 | /* processor_set stole ipc_pset_init */ | 
|---|
| 85 | static void | 
|---|
| 86 | ipc_port_set_init(ipc_pset_t pset, mach_port_name_t name, int policy) | 
|---|
| 87 | { | 
|---|
| 88 | waitq_init(waitq: &pset->ips_wqset, type: WQT_PORT_SET, policy: policy | SYNC_POLICY_FIFO); | 
|---|
| 89 | klist_init(list: &pset->ips_klist); | 
|---|
| 90 | pset->ips_wqset.wqset_index = MACH_PORT_INDEX(name); | 
|---|
| 91 | } | 
|---|
| 92 |  | 
|---|
| 93 | /* | 
|---|
| 94 | *	Routine:	ipc_pset_alloc | 
|---|
| 95 | *	Purpose: | 
|---|
| 96 | *		Allocate a port set. | 
|---|
| 97 | *	Conditions: | 
|---|
| 98 | *		Nothing locked.  If successful, the port set is returned | 
|---|
| 99 | *		locked.  (The caller doesn't have a reference.) | 
|---|
| 100 | *	Returns: | 
|---|
| 101 | *		KERN_SUCCESS		The port set is allocated. | 
|---|
| 102 | *		KERN_INVALID_TASK	The space is dead. | 
|---|
| 103 | *		KERN_NO_SPACE		No room for an entry in the space. | 
|---|
| 104 | */ | 
|---|
| 105 |  | 
|---|
| 106 | kern_return_t | 
|---|
| 107 | ipc_pset_alloc( | 
|---|
| 108 | ipc_space_t             space, | 
|---|
| 109 | mach_port_name_t        *namep, | 
|---|
| 110 | ipc_pset_t              *psetp) | 
|---|
| 111 | { | 
|---|
| 112 | ipc_pset_t pset; | 
|---|
| 113 | mach_port_name_t name; | 
|---|
| 114 | kern_return_t kr; | 
|---|
| 115 |  | 
|---|
| 116 | kr = ipc_object_alloc(space, IOT_PORT_SET, | 
|---|
| 117 | MACH_PORT_TYPE_PORT_SET, urefs: 0, | 
|---|
| 118 | namep: &name, objectp: (ipc_object_t *) &pset); | 
|---|
| 119 | if (kr != KERN_SUCCESS) { | 
|---|
| 120 | return kr; | 
|---|
| 121 | } | 
|---|
| 122 | /* space is locked */ | 
|---|
| 123 |  | 
|---|
| 124 | ipc_port_set_init(pset, name, SYNC_POLICY_INIT_LOCKED); | 
|---|
| 125 | /* port set is locked */ | 
|---|
| 126 |  | 
|---|
| 127 | is_write_unlock(space); | 
|---|
| 128 |  | 
|---|
| 129 | *namep = name; | 
|---|
| 130 | *psetp = pset; | 
|---|
| 131 | return KERN_SUCCESS; | 
|---|
| 132 | } | 
|---|
| 133 |  | 
|---|
| 134 | /* | 
|---|
| 135 | *	Routine:	ipc_pset_alloc_name | 
|---|
| 136 | *	Purpose: | 
|---|
| 137 | *		Allocate a port set, with a specific name. | 
|---|
| 138 | *	Conditions: | 
|---|
| 139 | *		Nothing locked.  If successful, the port set is returned | 
|---|
| 140 | *		locked.  (The caller doesn't have a reference.) | 
|---|
| 141 | *	Returns: | 
|---|
| 142 | *		KERN_SUCCESS		The port set is allocated. | 
|---|
| 143 | *		KERN_INVALID_TASK	The space is dead. | 
|---|
| 144 | *		KERN_NAME_EXISTS	The name already denotes a right. | 
|---|
| 145 | */ | 
|---|
| 146 |  | 
|---|
| 147 | kern_return_t | 
|---|
| 148 | ipc_pset_alloc_name( | 
|---|
| 149 | ipc_space_t             space, | 
|---|
| 150 | mach_port_name_t        name, | 
|---|
| 151 | ipc_pset_t              *psetp) | 
|---|
| 152 | { | 
|---|
| 153 | return ipc_object_alloc_name(space, IOT_PORT_SET, | 
|---|
| 154 | MACH_PORT_TYPE_PORT_SET, urefs: 0, | 
|---|
| 155 | name, objectp: (ipc_object_t *)psetp, finish_init: ^(ipc_object_t object){ | 
|---|
| 156 | ipc_port_set_init(ips_object_to_pset(object), name, | 
|---|
| 157 | SYNC_POLICY_INIT_LOCKED); | 
|---|
| 158 | }); | 
|---|
| 159 | } | 
|---|
| 160 |  | 
|---|
| 161 |  | 
|---|
| 162 | /* | 
|---|
| 163 | *	Routine:	ipc_pset_alloc_special | 
|---|
| 164 | *	Purpose: | 
|---|
| 165 | *		Allocate a port set in a special space. | 
|---|
| 166 | *		The new port set is returned with one ref. | 
|---|
| 167 | *		If unsuccessful, IPS_NULL is returned. | 
|---|
| 168 | *	Conditions: | 
|---|
| 169 | *		Nothing locked. | 
|---|
| 170 | */ | 
|---|
| 171 | ipc_pset_t | 
|---|
| 172 | ipc_pset_alloc_special( | 
|---|
| 173 | __assert_only ipc_space_t space) | 
|---|
| 174 | { | 
|---|
| 175 | ipc_pset_t pset; | 
|---|
| 176 |  | 
|---|
| 177 | assert(space != IS_NULL); | 
|---|
| 178 | assert(!is_active(space)); | 
|---|
| 179 |  | 
|---|
| 180 | pset = ips_object_to_pset(io_alloc(IOT_PORT_SET, Z_WAITOK | Z_ZERO)); | 
|---|
| 181 | if (pset == IPS_NULL) { | 
|---|
| 182 | return IPS_NULL; | 
|---|
| 183 | } | 
|---|
| 184 |  | 
|---|
| 185 | os_atomic_init(&pset->ips_object.io_bits, io_makebits(IOT_PORT_SET)); | 
|---|
| 186 | os_atomic_init(&pset->ips_object.io_references, 1); | 
|---|
| 187 |  | 
|---|
| 188 | ipc_port_set_init(pset, MACH_PORT_SPECIAL_DEFAULT, policy: 0); | 
|---|
| 189 |  | 
|---|
| 190 | return pset; | 
|---|
| 191 | } | 
|---|
| 192 |  | 
|---|
| 193 |  | 
|---|
| 194 | /* | 
|---|
| 195 | *	Routine:	ipc_pset_destroy | 
|---|
| 196 | *	Purpose: | 
|---|
| 197 | *		Destroys a port_set. | 
|---|
| 198 | *	Conditions: | 
|---|
| 199 | *		The port_set is locked and alive. | 
|---|
| 200 | *		The caller has a reference, which is consumed. | 
|---|
| 201 | *		Afterwards, the port_set is unlocked and dead. | 
|---|
| 202 | */ | 
|---|
| 203 |  | 
|---|
| 204 | void | 
|---|
| 205 | ipc_pset_destroy( | 
|---|
| 206 | ipc_space_t     space, | 
|---|
| 207 | ipc_pset_t      pset) | 
|---|
| 208 | { | 
|---|
| 209 | waitq_link_list_t free_l = { }; | 
|---|
| 210 |  | 
|---|
| 211 | assert(ips_active(pset)); | 
|---|
| 212 |  | 
|---|
| 213 | io_bits_andnot(ips_to_object(pset), IO_BITS_ACTIVE); | 
|---|
| 214 |  | 
|---|
| 215 | /* | 
|---|
| 216 | * Set all waiters on the portset running to | 
|---|
| 217 | * discover the change. | 
|---|
| 218 | * | 
|---|
| 219 | * Then under the same lock hold, deinit the waitq-set, | 
|---|
| 220 | * which will remove all the member message queues, | 
|---|
| 221 | * linkages and clean up preposts. | 
|---|
| 222 | */ | 
|---|
| 223 | ipc_mqueue_changed(space, waitq: &pset->ips_wqset); | 
|---|
| 224 | waitq_invalidate(wq: &pset->ips_wqset); | 
|---|
| 225 | waitq_set_unlink_all_locked(wqset: &pset->ips_wqset, free_l: &free_l); | 
|---|
| 226 |  | 
|---|
| 227 | ips_mq_unlock(pset); | 
|---|
| 228 |  | 
|---|
| 229 | ips_release(pset);       /* consume the ref our caller gave us */ | 
|---|
| 230 |  | 
|---|
| 231 | waitq_link_free_list(type: WQT_PORT_SET, list: &free_l); | 
|---|
| 232 | } | 
|---|
| 233 |  | 
|---|
| 234 | /* | 
|---|
| 235 | *	Routine:	ipc_pset_finalize | 
|---|
| 236 | *	Purpose: | 
|---|
| 237 | *		Called on last reference deallocate to | 
|---|
| 238 | *		free any remaining data associated with the pset. | 
|---|
| 239 | *	Conditions: | 
|---|
| 240 | *		Nothing locked. | 
|---|
| 241 | */ | 
|---|
| 242 | void | 
|---|
| 243 | ipc_pset_finalize( | 
|---|
| 244 | ipc_pset_t              pset) | 
|---|
| 245 | { | 
|---|
| 246 | waitq_deinit(waitq: &pset->ips_wqset); | 
|---|
| 247 | } | 
|---|
| 248 |  | 
|---|
| 249 |  | 
|---|
| 250 | #pragma mark - kevent support | 
|---|
| 251 |  | 
|---|
| 252 | /* | 
|---|
| 253 | * Kqueue EVFILT_MACHPORT support | 
|---|
| 254 | * | 
|---|
| 255 | * - kn_ipc_{port,pset} points to the monitored ipc port or pset. If the knote | 
|---|
| 256 | *   is using a kqwl, it is eligible to participate in sync IPC overrides. | 
|---|
| 257 | * | 
|---|
| 258 | *   For the first such sync IPC message in the port, we set up the port's | 
|---|
| 259 | *   turnstile to directly push on the kqwl's turnstile (which is in turn set up | 
|---|
| 260 | *   during filt_machportattach). If userspace responds to the message, the | 
|---|
| 261 | *   turnstile push is severed the point of reply. If userspace returns without | 
|---|
| 262 | *   responding to the message, we sever the turnstile push at the | 
|---|
| 263 | *   point of reenabling the knote to deliver the next message. This is why the | 
|---|
| 264 | *   knote needs to remember the port. For more details, see also | 
|---|
| 265 | *   filt_machport_turnstile_complete. | 
|---|
| 266 | * | 
|---|
| 267 | *   If there are multiple other sync IPC messages in the port, messages 2 to n | 
|---|
| 268 | *   redirect their turnstile push to the kqwl through an intermediatry "knote" | 
|---|
| 269 | *   turnstile which in turn, pushes on the kqwl turnstile. This knote turnstile | 
|---|
| 270 | *   is stored in the kn_hook. See also filt_machport_turnstile_prepare_lazily. | 
|---|
| 271 | * | 
|---|
| 272 | * - (in/out) ext[0] holds a mach_vm_address_t to a userspace buffer | 
|---|
| 273 | *   that can be used to direct-deliver messages when | 
|---|
| 274 | *   MACH_RCV_MSG is set in kn_sfflags | 
|---|
| 275 | * | 
|---|
| 276 | * - (in/out) ext[1] holds a mach_msg_size_t representing the size | 
|---|
| 277 | *   of the userspace buffer held in ext[0]. | 
|---|
| 278 | * | 
|---|
| 279 | * - (out)    ext[2] is used to deliver qos information | 
|---|
| 280 | *   about the send queue to userspace. | 
|---|
| 281 | * | 
|---|
| 282 | * - (abused) ext[3] is used in kernel to hold a reference to the first port | 
|---|
| 283 | *   with a turnstile that participate to sync IPC override. For more details, | 
|---|
| 284 | *   see filt_machport_stash_port | 
|---|
| 285 | * | 
|---|
| 286 | * - kn_hook is optionally a "knote" turnstile. It is used as the inheritor | 
|---|
| 287 | *   of turnstiles for rights copied out as part of direct message delivery | 
|---|
| 288 | *   when they can participate to sync IPC override. | 
|---|
| 289 | * | 
|---|
| 290 | *   It is used to atomically neuter the sync IPC override when the knote is | 
|---|
| 291 | *   re-enabled. | 
|---|
| 292 | * | 
|---|
| 293 | */ | 
|---|
| 294 |  | 
|---|
| 295 | #include <sys/event.h> | 
|---|
| 296 | #include <sys/errno.h> | 
|---|
| 297 |  | 
|---|
| 298 | static int | 
|---|
| 299 | filt_pset_filter_result(ipc_pset_t pset) | 
|---|
| 300 | { | 
|---|
| 301 | ips_mq_lock_held(pset); | 
|---|
| 302 |  | 
|---|
| 303 | if (!waitq_is_valid(wq: &pset->ips_wqset)) { | 
|---|
| 304 | return 0; | 
|---|
| 305 | } | 
|---|
| 306 |  | 
|---|
| 307 | return waitq_set_first_prepost(wqset: &pset->ips_wqset, flags: WQS_PREPOST_PEEK) ? | 
|---|
| 308 | FILTER_ACTIVE : 0; | 
|---|
| 309 | } | 
|---|
| 310 |  | 
|---|
| 311 | static int | 
|---|
| 312 | filt_port_filter_result(struct knote *kn, ipc_port_t port) | 
|---|
| 313 | { | 
|---|
| 314 | struct kqueue *kqwl = knote_get_kq(kn); | 
|---|
| 315 | ipc_kmsg_t first; | 
|---|
| 316 | int result = 0; | 
|---|
| 317 |  | 
|---|
| 318 | ip_mq_lock_held(port); | 
|---|
| 319 |  | 
|---|
| 320 | if (kn->kn_sfflags & MACH_RCV_MSG) { | 
|---|
| 321 | result = FILTER_RESET_EVENT_QOS; | 
|---|
| 322 | } | 
|---|
| 323 |  | 
|---|
| 324 | if (!waitq_is_valid(wq: &port->ip_waitq)) { | 
|---|
| 325 | return result; | 
|---|
| 326 | } | 
|---|
| 327 |  | 
|---|
| 328 | if (port->ip_kernel_iotier_override != kqueue_get_iotier_override(kqu: kqwl)) { | 
|---|
| 329 | kqueue_set_iotier_override(kqu: kqwl, iotier_override: port->ip_kernel_iotier_override); | 
|---|
| 330 | result |= FILTER_ADJUST_EVENT_IOTIER_BIT; | 
|---|
| 331 | } | 
|---|
| 332 |  | 
|---|
| 333 | first = ipc_kmsg_queue_first(&port->ip_messages.imq_messages); | 
|---|
| 334 | if (!first) { | 
|---|
| 335 | return result; | 
|---|
| 336 | } | 
|---|
| 337 |  | 
|---|
| 338 | result = FILTER_ACTIVE; | 
|---|
| 339 | if (kn->kn_sfflags & MACH_RCV_MSG) { | 
|---|
| 340 | result |= FILTER_ADJUST_EVENT_QOS(first->ikm_qos_override); | 
|---|
| 341 | } | 
|---|
| 342 |  | 
|---|
| 343 | #if CONFIG_PREADOPT_TG | 
|---|
| 344 | struct thread_group *tg = ipc_kmsg_get_thread_group(kmsg: first); | 
|---|
| 345 | if (tg) { | 
|---|
| 346 | struct kqueue *kq = knote_get_kq(kn); | 
|---|
| 347 | kqueue_set_preadopted_thread_group(kq, tg, | 
|---|
| 348 | qos: first->ikm_qos_override); | 
|---|
| 349 | } | 
|---|
| 350 | #endif | 
|---|
| 351 |  | 
|---|
| 352 | return result; | 
|---|
| 353 | } | 
|---|
| 354 |  | 
|---|
| 355 | struct turnstile * | 
|---|
| 356 | filt_ipc_kqueue_turnstile(struct knote *kn) | 
|---|
| 357 | { | 
|---|
| 358 | assert(kn->kn_filter == EVFILT_MACHPORT || kn->kn_filter == EVFILT_WORKLOOP); | 
|---|
| 359 | return kqueue_turnstile(knote_get_kq(kn)); | 
|---|
| 360 | } | 
|---|
| 361 |  | 
|---|
| 362 | bool | 
|---|
| 363 | filt_machport_kqueue_has_turnstile(struct knote *kn) | 
|---|
| 364 | { | 
|---|
| 365 | assert(kn->kn_filter == EVFILT_MACHPORT); | 
|---|
| 366 | return ((kn->kn_sfflags & MACH_RCV_MSG) || (kn->kn_sfflags & MACH_RCV_SYNC_PEEK)) | 
|---|
| 367 | && (kn->kn_flags & EV_DISPATCH); | 
|---|
| 368 | } | 
|---|
| 369 |  | 
|---|
| 370 | /* | 
|---|
| 371 | * Stashes a port that participate to sync IPC override on the knote until the | 
|---|
| 372 | * knote is re-enabled. | 
|---|
| 373 | * | 
|---|
| 374 | * It returns: | 
|---|
| 375 | * - the turnstile to use as an inheritor for the stashed port | 
|---|
| 376 | * - the kind of stash that happened as PORT_SYNC_* value among: | 
|---|
| 377 | *   o not stashed (no sync IPC support) | 
|---|
| 378 | *   o stashed in the knote (in kn_ext[3]) | 
|---|
| 379 | *   o to be hooked to the kn_hook knote | 
|---|
| 380 | */ | 
|---|
| 381 | struct turnstile * | 
|---|
| 382 | filt_machport_stash_port(struct knote *kn, ipc_port_t port, int *link) | 
|---|
| 383 | { | 
|---|
| 384 | struct turnstile *ts = TURNSTILE_NULL; | 
|---|
| 385 |  | 
|---|
| 386 | if (kn->kn_filter == EVFILT_WORKLOOP) { | 
|---|
| 387 | assert(kn->kn_ipc_port == NULL); | 
|---|
| 388 | kn->kn_ipc_port = port; | 
|---|
| 389 | ip_reference(port); | 
|---|
| 390 | if (link) { | 
|---|
| 391 | *link = PORT_SYNC_LINK_WORKLOOP_KNOTE; | 
|---|
| 392 | } | 
|---|
| 393 | ts = filt_ipc_kqueue_turnstile(kn); | 
|---|
| 394 | } else if (!filt_machport_kqueue_has_turnstile(kn)) { | 
|---|
| 395 | if (link) { | 
|---|
| 396 | *link = PORT_SYNC_LINK_NO_LINKAGE; | 
|---|
| 397 | } | 
|---|
| 398 | } else if (kn->kn_ext[3] == 0) { | 
|---|
| 399 | ip_reference(port); | 
|---|
| 400 | kn->kn_ext[3] = (uintptr_t)port; | 
|---|
| 401 | ts = filt_ipc_kqueue_turnstile(kn); | 
|---|
| 402 | if (link) { | 
|---|
| 403 | *link = PORT_SYNC_LINK_WORKLOOP_KNOTE; | 
|---|
| 404 | } | 
|---|
| 405 | } else { | 
|---|
| 406 | ts = (struct turnstile *)knote_kn_hook_get_raw(kn); | 
|---|
| 407 | if (link) { | 
|---|
| 408 | *link = PORT_SYNC_LINK_WORKLOOP_STASH; | 
|---|
| 409 | } | 
|---|
| 410 | } | 
|---|
| 411 |  | 
|---|
| 412 | return ts; | 
|---|
| 413 | } | 
|---|
| 414 |  | 
|---|
| 415 | /* | 
|---|
| 416 | * Lazily prepare a turnstile so that filt_machport_stash_port() | 
|---|
| 417 | * can be called with the mqueue lock held. | 
|---|
| 418 | * | 
|---|
| 419 | * It will allocate a turnstile in kn_hook if: | 
|---|
| 420 | * - the knote supports sync IPC override, | 
|---|
| 421 | * - we already stashed a port in kn_ext[3], | 
|---|
| 422 | * - the object that will be copied out has a chance to ask to be stashed. | 
|---|
| 423 | * | 
|---|
| 424 | * It is setup so that its inheritor is the workloop turnstile that has been | 
|---|
| 425 | * allocated when this knote was attached. | 
|---|
| 426 | */ | 
|---|
| 427 | void | 
|---|
| 428 | filt_machport_turnstile_prepare_lazily( | 
|---|
| 429 | struct knote *kn, | 
|---|
| 430 | mach_msg_type_name_t msgt_name, | 
|---|
| 431 | ipc_port_t port) | 
|---|
| 432 | { | 
|---|
| 433 | /* This is called from within filt_machportprocess */ | 
|---|
| 434 | assert((kn->kn_status & KN_SUPPRESSED) && (kn->kn_status & KN_LOCKED)); | 
|---|
| 435 |  | 
|---|
| 436 | if (!filt_machport_kqueue_has_turnstile(kn)) { | 
|---|
| 437 | return; | 
|---|
| 438 | } | 
|---|
| 439 |  | 
|---|
| 440 | if (kn->kn_ext[3] == 0 || knote_kn_hook_get_raw(kn)) { | 
|---|
| 441 | return; | 
|---|
| 442 | } | 
|---|
| 443 |  | 
|---|
| 444 | struct turnstile *ts = filt_ipc_kqueue_turnstile(kn); | 
|---|
| 445 | if ((msgt_name == MACH_MSG_TYPE_PORT_SEND_ONCE && port->ip_specialreply) || | 
|---|
| 446 | (msgt_name == MACH_MSG_TYPE_PORT_RECEIVE)) { | 
|---|
| 447 | struct turnstile *kn_ts = turnstile_alloc(); | 
|---|
| 448 | struct turnstile *ts_store; | 
|---|
| 449 | kn_ts = turnstile_prepare(proprietor: (uintptr_t)kn, tstore: &ts_store, turnstile: kn_ts, type: TURNSTILE_KNOTE); | 
|---|
| 450 | knote_kn_hook_set_raw(kn, kn_hook: ts_store); | 
|---|
| 451 |  | 
|---|
| 452 | turnstile_update_inheritor(turnstile: kn_ts, new_inheritor: ts, | 
|---|
| 453 | flags: TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_TURNSTILE); | 
|---|
| 454 | turnstile_cleanup(); | 
|---|
| 455 | } | 
|---|
| 456 | } | 
|---|
| 457 |  | 
|---|
| 458 | static void | 
|---|
| 459 | filt_machport_turnstile_complete_port(struct knote *kn, ipc_port_t port) | 
|---|
| 460 | { | 
|---|
| 461 | struct turnstile *ts = TURNSTILE_NULL; | 
|---|
| 462 |  | 
|---|
| 463 | ip_mq_lock(port); | 
|---|
| 464 | if (port->ip_specialreply) { | 
|---|
| 465 | /* | 
|---|
| 466 | * If the reply has been sent to the special reply port already, | 
|---|
| 467 | * then the special reply port may already be reused to do something | 
|---|
| 468 | * entirely different. | 
|---|
| 469 | * | 
|---|
| 470 | * However, the only reason for it to still point to this knote is | 
|---|
| 471 | * that it's still waiting for a reply, so when this is the case, | 
|---|
| 472 | * neuter the linkage. | 
|---|
| 473 | */ | 
|---|
| 474 | if (port->ip_sync_link_state == PORT_SYNC_LINK_WORKLOOP_KNOTE && | 
|---|
| 475 | port->ip_sync_inheritor_knote == kn) { | 
|---|
| 476 | ipc_port_adjust_special_reply_port_locked(special_reply_port: port, NULL, | 
|---|
| 477 | flags: (IPC_PORT_ADJUST_SR_NONE | IPC_PORT_ADJUST_SR_ENABLE_EVENT), FALSE); | 
|---|
| 478 | /* port unlocked */ | 
|---|
| 479 | } else { | 
|---|
| 480 | ip_mq_unlock(port); | 
|---|
| 481 | } | 
|---|
| 482 | } else { | 
|---|
| 483 | /* | 
|---|
| 484 | * For receive rights, if their IMQ_KNOTE() is still this | 
|---|
| 485 | * knote, then sever the link. | 
|---|
| 486 | */ | 
|---|
| 487 | if (port->ip_sync_link_state == PORT_SYNC_LINK_WORKLOOP_KNOTE && | 
|---|
| 488 | port->ip_messages.imq_inheritor_knote == kn) { | 
|---|
| 489 | ipc_port_adjust_sync_link_state_locked(port, PORT_SYNC_LINK_ANY, NULL); | 
|---|
| 490 | ts = port_send_turnstile(port); | 
|---|
| 491 | } | 
|---|
| 492 | if (ts) { | 
|---|
| 493 | turnstile_reference(turnstile: ts); | 
|---|
| 494 | turnstile_update_inheritor(turnstile: ts, TURNSTILE_INHERITOR_NULL, | 
|---|
| 495 | flags: TURNSTILE_IMMEDIATE_UPDATE); | 
|---|
| 496 | } | 
|---|
| 497 | ip_mq_unlock(port); | 
|---|
| 498 |  | 
|---|
| 499 | if (ts) { | 
|---|
| 500 | turnstile_update_inheritor_complete(turnstile: ts, | 
|---|
| 501 | flags: TURNSTILE_INTERLOCK_NOT_HELD); | 
|---|
| 502 | turnstile_deallocate(turnstile: ts); | 
|---|
| 503 | } | 
|---|
| 504 | } | 
|---|
| 505 |  | 
|---|
| 506 | ip_release(port); | 
|---|
| 507 | } | 
|---|
| 508 |  | 
|---|
| 509 | void | 
|---|
| 510 | filt_wldetach_sync_ipc(struct knote *kn) | 
|---|
| 511 | { | 
|---|
| 512 | ipc_port_t port = kn->kn_ipc_port; | 
|---|
| 513 | filt_machport_turnstile_complete_port(kn, port); | 
|---|
| 514 | kn->kn_ipc_port = IP_NULL; | 
|---|
| 515 | } | 
|---|
| 516 |  | 
|---|
| 517 | /* | 
|---|
| 518 | * Other half of filt_machport_turnstile_prepare_lazily() | 
|---|
| 519 | * | 
|---|
| 520 | * This is serialized by the knote state machine. | 
|---|
| 521 | */ | 
|---|
| 522 | static void | 
|---|
| 523 | filt_machport_turnstile_complete(struct knote *kn) | 
|---|
| 524 | { | 
|---|
| 525 | if (kn->kn_ext[3]) { | 
|---|
| 526 | ipc_port_t port = (ipc_port_t)kn->kn_ext[3]; | 
|---|
| 527 | filt_machport_turnstile_complete_port(kn, port); | 
|---|
| 528 | kn->kn_ext[3] = 0; | 
|---|
| 529 | } | 
|---|
| 530 |  | 
|---|
| 531 | struct turnstile *ts = knote_kn_hook_get_raw(kn); | 
|---|
| 532 | if (ts) { | 
|---|
| 533 | turnstile_update_inheritor(turnstile: ts, TURNSTILE_INHERITOR_NULL, | 
|---|
| 534 | flags: TURNSTILE_IMMEDIATE_UPDATE); | 
|---|
| 535 | turnstile_update_inheritor_complete(turnstile: ts, flags: TURNSTILE_INTERLOCK_HELD); | 
|---|
| 536 |  | 
|---|
| 537 | struct turnstile *ts_store = ts; | 
|---|
| 538 | turnstile_complete(proprietor: (uintptr_t)kn, tstore: (struct turnstile **)&ts_store, turnstile: &ts, type: TURNSTILE_KNOTE); | 
|---|
| 539 | knote_kn_hook_set_raw(kn, kn_hook: ts_store); | 
|---|
| 540 |  | 
|---|
| 541 | turnstile_cleanup(); | 
|---|
| 542 |  | 
|---|
| 543 | assert(ts); | 
|---|
| 544 | turnstile_deallocate(turnstile: ts); | 
|---|
| 545 | } | 
|---|
| 546 | } | 
|---|
| 547 |  | 
|---|
| 548 | static void | 
|---|
| 549 | filt_machport_link(struct klist *klist, struct knote *kn) | 
|---|
| 550 | { | 
|---|
| 551 | struct knote *hd = SLIST_FIRST(klist); | 
|---|
| 552 |  | 
|---|
| 553 | if (hd && filt_machport_kqueue_has_turnstile(kn)) { | 
|---|
| 554 | SLIST_INSERT_AFTER(hd, kn, kn_selnext); | 
|---|
| 555 | } else { | 
|---|
| 556 | SLIST_INSERT_HEAD(klist, kn, kn_selnext); | 
|---|
| 557 | } | 
|---|
| 558 | } | 
|---|
| 559 |  | 
|---|
| 560 | static void | 
|---|
| 561 | filt_machport_unlink(struct klist *klist, struct knote *kn) | 
|---|
| 562 | { | 
|---|
| 563 | struct knote **knprev; | 
|---|
| 564 |  | 
|---|
| 565 | KNOTE_DETACH(klist, kn); | 
|---|
| 566 |  | 
|---|
| 567 | /* make sure the first knote is a knote we can push on */ | 
|---|
| 568 | SLIST_FOREACH_PREVPTR(kn, knprev, klist, kn_selnext) { | 
|---|
| 569 | if (filt_machport_kqueue_has_turnstile(kn)) { | 
|---|
| 570 | *knprev = SLIST_NEXT(kn, kn_selnext); | 
|---|
| 571 | SLIST_INSERT_HEAD(klist, kn, kn_selnext); | 
|---|
| 572 | break; | 
|---|
| 573 | } | 
|---|
| 574 | } | 
|---|
| 575 | } | 
|---|
| 576 |  | 
|---|
| 577 | int | 
|---|
| 578 | filt_wlattach_sync_ipc(struct knote *kn) | 
|---|
| 579 | { | 
|---|
| 580 | mach_port_name_t name = (mach_port_name_t)kn->kn_id; | 
|---|
| 581 | ipc_space_t space = current_space(); | 
|---|
| 582 | ipc_entry_bits_t bits; | 
|---|
| 583 | ipc_object_t object; | 
|---|
| 584 | ipc_port_t port = IP_NULL; | 
|---|
| 585 | int error = 0; | 
|---|
| 586 |  | 
|---|
| 587 | if (ipc_right_lookup_read(space, name, bitsp: &bits, objectp: &object) != KERN_SUCCESS) { | 
|---|
| 588 | return ENOENT; | 
|---|
| 589 | } | 
|---|
| 590 | /* object is locked and active */ | 
|---|
| 591 |  | 
|---|
| 592 | if (bits & MACH_PORT_TYPE_RECEIVE) { | 
|---|
| 593 | port = ip_object_to_port(object); | 
|---|
| 594 | if (port->ip_specialreply) { | 
|---|
| 595 | error = ENOENT; | 
|---|
| 596 | } | 
|---|
| 597 | } else if (bits & MACH_PORT_TYPE_SEND_ONCE) { | 
|---|
| 598 | port = ip_object_to_port(object); | 
|---|
| 599 | if (!port->ip_specialreply) { | 
|---|
| 600 | error = ENOENT; | 
|---|
| 601 | } | 
|---|
| 602 | } else { | 
|---|
| 603 | error = ENOENT; | 
|---|
| 604 | } | 
|---|
| 605 | if (error) { | 
|---|
| 606 | io_unlock(object); | 
|---|
| 607 | return error; | 
|---|
| 608 | } | 
|---|
| 609 |  | 
|---|
| 610 | if (port->ip_sync_link_state == PORT_SYNC_LINK_ANY) { | 
|---|
| 611 | io_unlock(object); | 
|---|
| 612 | /* | 
|---|
| 613 | * We cannot start a sync IPC inheritance chain, only further one | 
|---|
| 614 | * Note: this can also happen if the inheritance chain broke | 
|---|
| 615 | * because the original requestor died. | 
|---|
| 616 | */ | 
|---|
| 617 | return ENOENT; | 
|---|
| 618 | } | 
|---|
| 619 |  | 
|---|
| 620 | if (port->ip_specialreply) { | 
|---|
| 621 | ipc_port_adjust_special_reply_port_locked(special_reply_port: port, kn, | 
|---|
| 622 | IPC_PORT_ADJUST_SR_LINK_WORKLOOP, FALSE); | 
|---|
| 623 | } else { | 
|---|
| 624 | ipc_port_adjust_port_locked(port, kn, FALSE); | 
|---|
| 625 | } | 
|---|
| 626 |  | 
|---|
| 627 | /* make sure the port was stashed */ | 
|---|
| 628 | assert(kn->kn_ipc_port == port); | 
|---|
| 629 |  | 
|---|
| 630 | /* port has been unlocked by ipc_port_adjust_* */ | 
|---|
| 631 |  | 
|---|
| 632 | return 0; | 
|---|
| 633 | } | 
|---|
| 634 |  | 
|---|
| 635 | static int | 
|---|
| 636 | filt_psetattach(struct knote *kn, ipc_pset_t pset) | 
|---|
| 637 | { | 
|---|
| 638 | int result = 0; | 
|---|
| 639 |  | 
|---|
| 640 | ips_reference(pset); | 
|---|
| 641 | kn->kn_ipc_pset = pset; | 
|---|
| 642 |  | 
|---|
| 643 | filt_machport_link(klist: &pset->ips_klist, kn); | 
|---|
| 644 | result = filt_pset_filter_result(pset); | 
|---|
| 645 | ips_mq_unlock(pset); | 
|---|
| 646 |  | 
|---|
| 647 | return result; | 
|---|
| 648 | } | 
|---|
| 649 |  | 
|---|
| 650 | static int | 
|---|
| 651 | filt_portattach(struct knote *kn, ipc_port_t port) | 
|---|
| 652 | { | 
|---|
| 653 | struct turnstile *send_turnstile = TURNSTILE_NULL; | 
|---|
| 654 | int result = 0; | 
|---|
| 655 |  | 
|---|
| 656 | if (port->ip_specialreply) { | 
|---|
| 657 | /* | 
|---|
| 658 | * Registering for kevents on special reply ports | 
|---|
| 659 | * isn't supported for two reasons: | 
|---|
| 660 | * | 
|---|
| 661 | * 1. it really makes very little sense for a port that | 
|---|
| 662 | *    is supposed to be used synchronously | 
|---|
| 663 | * | 
|---|
| 664 | * 2. their ports's ip_klist field will be used to | 
|---|
| 665 | *    store the receive turnstile, so we can't possibly | 
|---|
| 666 | *    attach them anyway. | 
|---|
| 667 | */ | 
|---|
| 668 | ip_mq_unlock(port); | 
|---|
| 669 | knote_set_error(kn, ENOTSUP); | 
|---|
| 670 | return 0; | 
|---|
| 671 | } | 
|---|
| 672 |  | 
|---|
| 673 | ip_reference(port); | 
|---|
| 674 | kn->kn_ipc_port = port; | 
|---|
| 675 | if (port->ip_sync_link_state != PORT_SYNC_LINK_ANY) { | 
|---|
| 676 | /* | 
|---|
| 677 | * We're attaching a port that used to have an IMQ_KNOTE, | 
|---|
| 678 | * clobber this state, we'll fixup its turnstile inheritor below. | 
|---|
| 679 | */ | 
|---|
| 680 | ipc_port_adjust_sync_link_state_locked(port, PORT_SYNC_LINK_ANY, NULL); | 
|---|
| 681 | } | 
|---|
| 682 |  | 
|---|
| 683 | filt_machport_link(klist: &port->ip_klist, kn); | 
|---|
| 684 | result = filt_port_filter_result(kn, port); | 
|---|
| 685 |  | 
|---|
| 686 | /* | 
|---|
| 687 | * Update the port's turnstile inheritor | 
|---|
| 688 | * | 
|---|
| 689 | * Unlike filt_machportdetach(), we don't have to care about races for | 
|---|
| 690 | * turnstile_workloop_pusher_info(): filt_machport_link() doesn't affect | 
|---|
| 691 | * already pushing knotes, and if the current one becomes the new | 
|---|
| 692 | * pusher, it'll only be visible when turnstile_workloop_pusher_info() | 
|---|
| 693 | * returns. | 
|---|
| 694 | */ | 
|---|
| 695 | send_turnstile = port_send_turnstile(port); | 
|---|
| 696 | if (send_turnstile) { | 
|---|
| 697 | turnstile_reference(turnstile: send_turnstile); | 
|---|
| 698 | ipc_port_send_update_inheritor(port, turnstile: send_turnstile, | 
|---|
| 699 | flags: TURNSTILE_IMMEDIATE_UPDATE); | 
|---|
| 700 |  | 
|---|
| 701 | /* | 
|---|
| 702 | * rdar://problem/48861190 | 
|---|
| 703 | * | 
|---|
| 704 | * When a listener connection resumes a peer, | 
|---|
| 705 | * updating the inheritor above has moved the push | 
|---|
| 706 | * from the current thread to the workloop. | 
|---|
| 707 | * | 
|---|
| 708 | * However, we haven't told the workloop yet | 
|---|
| 709 | * that it needs a thread request, and we risk | 
|---|
| 710 | * to be preeempted as soon as we drop the space | 
|---|
| 711 | * lock below. | 
|---|
| 712 | * | 
|---|
| 713 | * To avoid this disable preemption and let kevent | 
|---|
| 714 | * reenable it after it takes the kqlock. | 
|---|
| 715 | */ | 
|---|
| 716 | disable_preemption(); | 
|---|
| 717 | result |= FILTER_THREADREQ_NODEFEER; | 
|---|
| 718 | } | 
|---|
| 719 |  | 
|---|
| 720 | ip_mq_unlock(port); | 
|---|
| 721 |  | 
|---|
| 722 | if (send_turnstile) { | 
|---|
| 723 | turnstile_update_inheritor_complete(turnstile: send_turnstile, | 
|---|
| 724 | flags: TURNSTILE_INTERLOCK_NOT_HELD); | 
|---|
| 725 | turnstile_deallocate_safe(turnstile: send_turnstile); | 
|---|
| 726 | } | 
|---|
| 727 |  | 
|---|
| 728 | return result; | 
|---|
| 729 | } | 
|---|
| 730 |  | 
|---|
| 731 | static int | 
|---|
| 732 | filt_machportattach(struct knote *kn, __unused struct kevent_qos_s *kev) | 
|---|
| 733 | { | 
|---|
| 734 | mach_port_name_t name = (mach_port_name_t)kn->kn_id; | 
|---|
| 735 | ipc_space_t space = current_space(); | 
|---|
| 736 | ipc_entry_bits_t bits; | 
|---|
| 737 | ipc_object_t object; | 
|---|
| 738 | kern_return_t kr; | 
|---|
| 739 |  | 
|---|
| 740 | kn->kn_flags &= ~EV_EOF; | 
|---|
| 741 | kn->kn_ext[3] = 0; | 
|---|
| 742 |  | 
|---|
| 743 | if (filt_machport_kqueue_has_turnstile(kn)) { | 
|---|
| 744 | /* | 
|---|
| 745 | * If the filter is likely to support sync IPC override, | 
|---|
| 746 | * and it happens to be attaching to a workloop, | 
|---|
| 747 | * make sure the workloop has an allocated turnstile. | 
|---|
| 748 | */ | 
|---|
| 749 | kqueue_alloc_turnstile(knote_get_kq(kn)); | 
|---|
| 750 | } | 
|---|
| 751 |  | 
|---|
| 752 | kr = ipc_right_lookup_read(space, name, bitsp: &bits, objectp: &object); | 
|---|
| 753 |  | 
|---|
| 754 | if (kr != KERN_SUCCESS) { | 
|---|
| 755 | knote_set_error(kn, ENOENT); | 
|---|
| 756 | return 0; | 
|---|
| 757 | } | 
|---|
| 758 | /* object is locked and active */ | 
|---|
| 759 |  | 
|---|
| 760 | if (bits & MACH_PORT_TYPE_PORT_SET) { | 
|---|
| 761 | kn->kn_filtid = EVFILTID_MACH_PORT_SET; | 
|---|
| 762 | return filt_psetattach(kn, ips_object_to_pset(object)); | 
|---|
| 763 | } | 
|---|
| 764 |  | 
|---|
| 765 | if (bits & MACH_PORT_TYPE_RECEIVE) { | 
|---|
| 766 | kn->kn_filtid = EVFILTID_MACH_PORT; | 
|---|
| 767 | return filt_portattach(kn, ip_object_to_port(object)); | 
|---|
| 768 | } | 
|---|
| 769 |  | 
|---|
| 770 | io_unlock(object); | 
|---|
| 771 | knote_set_error(kn, ENOTSUP); | 
|---|
| 772 | return 0; | 
|---|
| 773 | } | 
|---|
| 774 |  | 
|---|
| 775 | static void | 
|---|
| 776 | filt_psetdetach(struct knote *kn) | 
|---|
| 777 | { | 
|---|
| 778 | ipc_pset_t pset = kn->kn_ipc_pset; | 
|---|
| 779 |  | 
|---|
| 780 | filt_machport_turnstile_complete(kn); | 
|---|
| 781 |  | 
|---|
| 782 | ips_mq_lock(pset); | 
|---|
| 783 |  | 
|---|
| 784 | if ((kn->kn_status & KN_VANISHED) || (kn->kn_flags & EV_EOF)) { | 
|---|
| 785 | /* | 
|---|
| 786 | * ipc_mqueue_changed() already unhooked this knote from the waitq, | 
|---|
| 787 | */ | 
|---|
| 788 | } else { | 
|---|
| 789 | filt_machport_unlink(klist: &pset->ips_klist, kn); | 
|---|
| 790 | } | 
|---|
| 791 |  | 
|---|
| 792 | kn->kn_ipc_pset = IPS_NULL; | 
|---|
| 793 | ips_mq_unlock(pset); | 
|---|
| 794 | ips_release(pset); | 
|---|
| 795 | } | 
|---|
| 796 |  | 
|---|
| 797 | static void | 
|---|
| 798 | filt_portdetach(struct knote *kn) | 
|---|
| 799 | { | 
|---|
| 800 | ipc_port_t port = kn->kn_ipc_port; | 
|---|
| 801 | struct turnstile *send_turnstile = TURNSTILE_NULL; | 
|---|
| 802 |  | 
|---|
| 803 | filt_machport_turnstile_complete(kn); | 
|---|
| 804 |  | 
|---|
| 805 | ip_mq_lock(port); | 
|---|
| 806 | if ((kn->kn_status & KN_VANISHED) || (kn->kn_flags & EV_EOF)) { | 
|---|
| 807 | /* | 
|---|
| 808 | * ipc_mqueue_changed() already unhooked this knote from the waitq, | 
|---|
| 809 | */ | 
|---|
| 810 | } else { | 
|---|
| 811 | /* | 
|---|
| 812 | * When the knote being detached is the first one in the list, | 
|---|
| 813 | * then unlinking the knote *and* updating the turnstile inheritor | 
|---|
| 814 | * need to happen atomically with respect to the callers of | 
|---|
| 815 | * turnstile_workloop_pusher_info(). | 
|---|
| 816 | * | 
|---|
| 817 | * The caller of turnstile_workloop_pusher_info() will use the kq req | 
|---|
| 818 | * lock (and hence the kqlock), so we just need to hold the kqlock too. | 
|---|
| 819 | */ | 
|---|
| 820 | assert(port->ip_sync_link_state == PORT_SYNC_LINK_ANY); | 
|---|
| 821 | if (kn == SLIST_FIRST(&port->ip_klist)) { | 
|---|
| 822 | send_turnstile = port_send_turnstile(port); | 
|---|
| 823 | } | 
|---|
| 824 | filt_machport_unlink(klist: &port->ip_klist, kn); | 
|---|
| 825 | struct kqueue *kq = knote_get_kq(kn); | 
|---|
| 826 | kqueue_set_iotier_override(kqu: kq, THROTTLE_LEVEL_END); | 
|---|
| 827 | } | 
|---|
| 828 |  | 
|---|
| 829 | if (send_turnstile) { | 
|---|
| 830 | turnstile_reference(turnstile: send_turnstile); | 
|---|
| 831 | ipc_port_send_update_inheritor(port, turnstile: send_turnstile, | 
|---|
| 832 | flags: TURNSTILE_IMMEDIATE_UPDATE); | 
|---|
| 833 | } | 
|---|
| 834 |  | 
|---|
| 835 | /* Clear the knote pointer once the knote has been removed from turnstile */ | 
|---|
| 836 | kn->kn_ipc_port = IP_NULL; | 
|---|
| 837 | ip_mq_unlock(port); | 
|---|
| 838 |  | 
|---|
| 839 | if (send_turnstile) { | 
|---|
| 840 | turnstile_update_inheritor_complete(turnstile: send_turnstile, | 
|---|
| 841 | flags: TURNSTILE_INTERLOCK_NOT_HELD); | 
|---|
| 842 | turnstile_deallocate(turnstile: send_turnstile); | 
|---|
| 843 | } | 
|---|
| 844 |  | 
|---|
| 845 | ip_release(port); | 
|---|
| 846 | } | 
|---|
| 847 |  | 
|---|
| 848 | /* | 
|---|
| 849 | * filt_{pset,port}event - deliver events into the mach port filter | 
|---|
| 850 | * | 
|---|
| 851 | * Mach port message arrival events are currently only posted via the | 
|---|
| 852 | * kqueue filter routine for ports. | 
|---|
| 853 | * | 
|---|
| 854 | * If there is a message at the head of the queue, | 
|---|
| 855 | * we indicate that the knote should go active.  If | 
|---|
| 856 | * the message is to be direct-received, we adjust the | 
|---|
| 857 | * QoS of the knote according the requested and override | 
|---|
| 858 | * QoS of that first message. | 
|---|
| 859 | * | 
|---|
| 860 | * When the knote is for a port-set, the hint is non 0 | 
|---|
| 861 | * and is the waitq which is posting. | 
|---|
| 862 | */ | 
|---|
| 863 | static int | 
|---|
| 864 | filt_psetevent(struct knote *kn __unused, long hint __assert_only) | 
|---|
| 865 | { | 
|---|
| 866 | /* | 
|---|
| 867 | * When called for a port-set, | 
|---|
| 868 | * the posting port waitq is locked. | 
|---|
| 869 | * | 
|---|
| 870 | * waitq_set_first_prepost() | 
|---|
| 871 | * in filt_machport_filter_result() | 
|---|
| 872 | * would try to lock it and be very sad. | 
|---|
| 873 | * | 
|---|
| 874 | * Just trust what we know to be true. | 
|---|
| 875 | */ | 
|---|
| 876 | assert(hint != 0); | 
|---|
| 877 | return FILTER_ACTIVE; | 
|---|
| 878 | } | 
|---|
| 879 |  | 
|---|
| 880 | static int | 
|---|
| 881 | filt_portevent(struct knote *kn, long hint __assert_only) | 
|---|
| 882 | { | 
|---|
| 883 | assert(hint == 0); | 
|---|
| 884 | return filt_port_filter_result(kn, port: kn->kn_ipc_port); | 
|---|
| 885 | } | 
|---|
| 886 |  | 
|---|
| 887 | void | 
|---|
| 888 | ipc_pset_prepost(struct waitq_set *wqs, struct waitq *waitq) | 
|---|
| 889 | { | 
|---|
| 890 | KNOTE(&ips_from_waitq(wqs)->ips_klist, (long)waitq); | 
|---|
| 891 | } | 
|---|
| 892 |  | 
|---|
| 893 | static void | 
|---|
| 894 | filt_machporttouch(struct knote *kn, struct kevent_qos_s *kev) | 
|---|
| 895 | { | 
|---|
| 896 | /* | 
|---|
| 897 | * Specificying MACH_RCV_MSG or MACH_RCV_SYNC_PEEK during attach results in | 
|---|
| 898 | * allocation of a turnstile. Modifying the filter flags to include these | 
|---|
| 899 | * flags later, without a turnstile being allocated, leads to | 
|---|
| 900 | * inconsistencies. | 
|---|
| 901 | */ | 
|---|
| 902 | if ((kn->kn_sfflags ^ kev->fflags) & (MACH_RCV_MSG | MACH_RCV_SYNC_PEEK)) { | 
|---|
| 903 | kev->flags |= EV_ERROR; | 
|---|
| 904 | kev->data = EINVAL; | 
|---|
| 905 | return; | 
|---|
| 906 | } | 
|---|
| 907 |  | 
|---|
| 908 | /* copy in new settings and save off new input fflags */ | 
|---|
| 909 | kn->kn_sfflags = kev->fflags; | 
|---|
| 910 | kn->kn_ext[0] = kev->ext[0]; | 
|---|
| 911 | kn->kn_ext[1] = kev->ext[1]; | 
|---|
| 912 |  | 
|---|
| 913 | if (kev->flags & EV_ENABLE) { | 
|---|
| 914 | /* | 
|---|
| 915 | * If the knote is being enabled, make sure there's no lingering | 
|---|
| 916 | * IPC overrides from the previous message delivery. | 
|---|
| 917 | */ | 
|---|
| 918 | filt_machport_turnstile_complete(kn); | 
|---|
| 919 | } | 
|---|
| 920 | } | 
|---|
| 921 |  | 
|---|
| 922 | static int | 
|---|
| 923 | filt_psettouch(struct knote *kn, struct kevent_qos_s *kev) | 
|---|
| 924 | { | 
|---|
| 925 | ipc_pset_t pset = kn->kn_ipc_pset; | 
|---|
| 926 | int result = 0; | 
|---|
| 927 |  | 
|---|
| 928 | filt_machporttouch(kn, kev); | 
|---|
| 929 | if (kev->flags & EV_ERROR) { | 
|---|
| 930 | return 0; | 
|---|
| 931 | } | 
|---|
| 932 |  | 
|---|
| 933 | ips_mq_lock(pset); | 
|---|
| 934 | result = filt_pset_filter_result(pset); | 
|---|
| 935 | ips_mq_unlock(pset); | 
|---|
| 936 |  | 
|---|
| 937 | return result; | 
|---|
| 938 | } | 
|---|
| 939 |  | 
|---|
| 940 | static int | 
|---|
| 941 | filt_porttouch(struct knote *kn, struct kevent_qos_s *kev) | 
|---|
| 942 | { | 
|---|
| 943 | ipc_port_t port = kn->kn_ipc_port; | 
|---|
| 944 | int result = 0; | 
|---|
| 945 |  | 
|---|
| 946 | filt_machporttouch(kn, kev); | 
|---|
| 947 | if (kev->flags & EV_ERROR) { | 
|---|
| 948 | return 0; | 
|---|
| 949 | } | 
|---|
| 950 |  | 
|---|
| 951 | ip_mq_lock(port); | 
|---|
| 952 | result = filt_port_filter_result(kn, port); | 
|---|
| 953 | ip_mq_unlock(port); | 
|---|
| 954 |  | 
|---|
| 955 | return result; | 
|---|
| 956 | } | 
|---|
| 957 |  | 
|---|
| 958 | static int | 
|---|
| 959 | filt_machportprocess( | 
|---|
| 960 | struct knote           *kn, | 
|---|
| 961 | struct kevent_qos_s    *kev, | 
|---|
| 962 | ipc_object_t            object, | 
|---|
| 963 | ipc_object_type_t       otype) | 
|---|
| 964 | { | 
|---|
| 965 | thread_t self = current_thread(); | 
|---|
| 966 | kevent_ctx_t kectx = NULL; | 
|---|
| 967 |  | 
|---|
| 968 | wait_result_t wresult; | 
|---|
| 969 | mach_msg_option64_t option64; | 
|---|
| 970 | mach_vm_address_t msg_addr; | 
|---|
| 971 | mach_msg_size_t max_msg_size, cpout_aux_size, cpout_msg_size; | 
|---|
| 972 | uint32_t ppri; | 
|---|
| 973 | mach_msg_qos_t oqos; | 
|---|
| 974 |  | 
|---|
| 975 | int result = FILTER_ACTIVE; | 
|---|
| 976 |  | 
|---|
| 977 | /* Capture current state */ | 
|---|
| 978 | knote_fill_kevent(kn, kev, MACH_PORT_NULL); | 
|---|
| 979 |  | 
|---|
| 980 | /* Clear port reference, use ext3 as size of msg aux data */ | 
|---|
| 981 | kev->ext[3] = 0; | 
|---|
| 982 |  | 
|---|
| 983 | /* If already deallocated/moved return one last EOF event */ | 
|---|
| 984 | if (kev->flags & EV_EOF) { | 
|---|
| 985 | return FILTER_ACTIVE | FILTER_RESET_EVENT_QOS; | 
|---|
| 986 | } | 
|---|
| 987 |  | 
|---|
| 988 | /* | 
|---|
| 989 | * Only honor supported receive options. If no options are | 
|---|
| 990 | * provided, just force a MACH_RCV_LARGE to detect the | 
|---|
| 991 | * name of the port and sizeof the waiting message. | 
|---|
| 992 | * | 
|---|
| 993 | * Extend kn_sfflags to 64 bits. | 
|---|
| 994 | */ | 
|---|
| 995 | option64 = (mach_msg_option64_t)kn->kn_sfflags & (MACH_RCV_MSG | | 
|---|
| 996 | MACH_RCV_LARGE | MACH_RCV_LARGE_IDENTITY | | 
|---|
| 997 | MACH_RCV_TRAILER_MASK | MACH_RCV_VOUCHER | MACH_MSG_STRICT_REPLY); | 
|---|
| 998 |  | 
|---|
| 999 | if (option64 & MACH_RCV_MSG) { | 
|---|
| 1000 | msg_addr = (mach_vm_address_t) kn->kn_ext[0]; | 
|---|
| 1001 | max_msg_size = (mach_msg_size_t) kn->kn_ext[1]; | 
|---|
| 1002 |  | 
|---|
| 1003 | /* | 
|---|
| 1004 | * Copy out the incoming message as vector, and append aux data | 
|---|
| 1005 | * immediately after the message proper (if any) and report its | 
|---|
| 1006 | * size on ext3. | 
|---|
| 1007 | */ | 
|---|
| 1008 | option64 |= (MACH64_MSG_VECTOR | MACH64_RCV_LINEAR_VECTOR); | 
|---|
| 1009 |  | 
|---|
| 1010 | /* | 
|---|
| 1011 | * If the kevent didn't specify a buffer and length, carve a buffer | 
|---|
| 1012 | * from the filter processing data according to the flags. | 
|---|
| 1013 | */ | 
|---|
| 1014 | if (max_msg_size == 0) { | 
|---|
| 1015 | kectx = kevent_get_context(thread: self); | 
|---|
| 1016 | msg_addr  = (mach_vm_address_t)kectx->kec_data_out; | 
|---|
| 1017 | max_msg_size  = (mach_msg_size_t)kectx->kec_data_resid; | 
|---|
| 1018 | option64 |= (MACH_RCV_LARGE | MACH_RCV_LARGE_IDENTITY); | 
|---|
| 1019 | /* Receive vector linearly onto stack */ | 
|---|
| 1020 | if (kectx->kec_process_flags & KEVENT_FLAG_STACK_DATA) { | 
|---|
| 1021 | option64 |= MACH64_RCV_STACK; | 
|---|
| 1022 | } | 
|---|
| 1023 | } | 
|---|
| 1024 | } else { | 
|---|
| 1025 | /* just detect the port name (if a set) and size of the first message */ | 
|---|
| 1026 | option64 = MACH_RCV_LARGE; | 
|---|
| 1027 | msg_addr = 0; | 
|---|
| 1028 | max_msg_size = 0; | 
|---|
| 1029 | } | 
|---|
| 1030 |  | 
|---|
| 1031 | /* | 
|---|
| 1032 | * Set up to receive a message or the notification of a | 
|---|
| 1033 | * too large message.  But never allow this call to wait. | 
|---|
| 1034 | * If the user provided aditional options, like trailer | 
|---|
| 1035 | * options, pass those through here.  But we don't support | 
|---|
| 1036 | * scatter lists through this interface. | 
|---|
| 1037 | * | 
|---|
| 1038 | * Note: while in filt_machportprocess(), | 
|---|
| 1039 | *       the knote has a reference on `object` that we can borrow. | 
|---|
| 1040 | */ | 
|---|
| 1041 | self->ith_object = object; | 
|---|
| 1042 |  | 
|---|
| 1043 | /* Using msg_addr as combined buffer for message proper and aux */ | 
|---|
| 1044 | self->ith_msg_addr = msg_addr; | 
|---|
| 1045 | self->ith_max_msize = max_msg_size; | 
|---|
| 1046 | self->ith_msize = 0; | 
|---|
| 1047 |  | 
|---|
| 1048 | self->ith_aux_addr = 0; | 
|---|
| 1049 | self->ith_max_asize = 0; | 
|---|
| 1050 | self->ith_asize = 0; | 
|---|
| 1051 |  | 
|---|
| 1052 | self->ith_option = option64; | 
|---|
| 1053 | self->ith_receiver_name = MACH_PORT_NULL; | 
|---|
| 1054 | option64 |= MACH_RCV_TIMEOUT; // never wait | 
|---|
| 1055 | self->ith_state = MACH_RCV_IN_PROGRESS; | 
|---|
| 1056 | self->ith_knote = kn; | 
|---|
| 1057 |  | 
|---|
| 1058 | ipc_object_lock(object, type: otype); | 
|---|
| 1059 |  | 
|---|
| 1060 | wresult = ipc_mqueue_receive_on_thread_and_unlock( | 
|---|
| 1061 | io_waitq(object), | 
|---|
| 1062 | option64, | 
|---|
| 1063 | max_size: self->ith_max_msize,       /* max msg suze */ | 
|---|
| 1064 | max_aux_size: 0,                         /* max aux size 0, using combined buffer */ | 
|---|
| 1065 | rcv_timeout: 0,                         /* immediate timeout */ | 
|---|
| 1066 | THREAD_INTERRUPTIBLE, | 
|---|
| 1067 | thread: self); | 
|---|
| 1068 | /* port unlocked */ | 
|---|
| 1069 |  | 
|---|
| 1070 | /* If we timed out, or the process is exiting, just zero.  */ | 
|---|
| 1071 | if (wresult == THREAD_RESTART || self->ith_state == MACH_RCV_TIMED_OUT) { | 
|---|
| 1072 | assert(self->turnstile != TURNSTILE_NULL); | 
|---|
| 1073 | self->ith_knote = ITH_KNOTE_NULL; | 
|---|
| 1074 | return 0; | 
|---|
| 1075 | } | 
|---|
| 1076 |  | 
|---|
| 1077 | assert(wresult == THREAD_NOT_WAITING); | 
|---|
| 1078 | assert(self->ith_state != MACH_RCV_IN_PROGRESS); | 
|---|
| 1079 |  | 
|---|
| 1080 | /* | 
|---|
| 1081 | * If we weren't attempting to receive a message | 
|---|
| 1082 | * directly, we need to return the port name in | 
|---|
| 1083 | * the kevent structure. | 
|---|
| 1084 | */ | 
|---|
| 1085 | if ((option64 & MACH_RCV_MSG) != MACH_RCV_MSG) { | 
|---|
| 1086 | assert(self->ith_state == MACH_RCV_TOO_LARGE); | 
|---|
| 1087 | assert(self->ith_kmsg == IKM_NULL); | 
|---|
| 1088 | kev->data = self->ith_receiver_name; | 
|---|
| 1089 | self->ith_knote = ITH_KNOTE_NULL; | 
|---|
| 1090 | return result; | 
|---|
| 1091 | } | 
|---|
| 1092 |  | 
|---|
| 1093 | #if CONFIG_PREADOPT_TG | 
|---|
| 1094 | /* If we're the first EVFILT_MACHPORT knote that is being processed for this | 
|---|
| 1095 | * kqwl, then make sure to preadopt the thread group from the kmsg we're | 
|---|
| 1096 | * about to receive. This is to make sure that we fix up the preadoption | 
|---|
| 1097 | * thread group correctly on the receive side for the first message. | 
|---|
| 1098 | */ | 
|---|
| 1099 | struct kqueue *kq = knote_get_kq(kn); | 
|---|
| 1100 |  | 
|---|
| 1101 | if (self->ith_kmsg) { | 
|---|
| 1102 | struct thread_group *tg = ipc_kmsg_get_thread_group(kmsg: self->ith_kmsg); | 
|---|
| 1103 |  | 
|---|
| 1104 | kqueue_process_preadopt_thread_group(t: self, kq, tg); | 
|---|
| 1105 | } | 
|---|
| 1106 | #endif | 
|---|
| 1107 | if (otype == IOT_PORT) { | 
|---|
| 1108 | ipc_port_t port = ip_object_to_port(object); | 
|---|
| 1109 | struct kqueue *kqwl = knote_get_kq(kn); | 
|---|
| 1110 | if (port->ip_kernel_iotier_override != kqueue_get_iotier_override(kqu: kqwl)) { | 
|---|
| 1111 | /* | 
|---|
| 1112 | * Lock the port to make sure port->ip_kernel_iotier_override does | 
|---|
| 1113 | * not change while updating the kqueue override, else kqueue could | 
|---|
| 1114 | * have old iotier value. | 
|---|
| 1115 | */ | 
|---|
| 1116 | ip_mq_lock(port); | 
|---|
| 1117 | kqueue_set_iotier_override(kqu: kqwl, iotier_override: port->ip_kernel_iotier_override); | 
|---|
| 1118 | ip_mq_unlock(port); | 
|---|
| 1119 | result |= FILTER_ADJUST_EVENT_IOTIER_BIT; | 
|---|
| 1120 | } | 
|---|
| 1121 | } | 
|---|
| 1122 |  | 
|---|
| 1123 | /* | 
|---|
| 1124 | * Attempt to receive the message directly, returning | 
|---|
| 1125 | * the results in the fflags field. | 
|---|
| 1126 | */ | 
|---|
| 1127 | io_reference(object); | 
|---|
| 1128 | kev->fflags = mach_msg_receive_results_kevent(size: &cpout_msg_size, | 
|---|
| 1129 | aux_size: &cpout_aux_size, ppri: &ppri, oqos: &oqos); | 
|---|
| 1130 |  | 
|---|
| 1131 | /* kmsg and object reference consumed */ | 
|---|
| 1132 |  | 
|---|
| 1133 | /* | 
|---|
| 1134 | * if the user asked for the identity of ports containing a | 
|---|
| 1135 | * a too-large message, return it in the data field (as we | 
|---|
| 1136 | * do for messages we didn't try to receive). | 
|---|
| 1137 | */ | 
|---|
| 1138 | if (kev->fflags == MACH_RCV_TOO_LARGE) { | 
|---|
| 1139 | kev->ext[1] = self->ith_msize; | 
|---|
| 1140 | kev->ext[3] = self->ith_asize;  /* Only lower 32 bits of ext3 are used */ | 
|---|
| 1141 | if (option64 & MACH_RCV_LARGE_IDENTITY) { | 
|---|
| 1142 | kev->data = self->ith_receiver_name; | 
|---|
| 1143 | } else { | 
|---|
| 1144 | kev->data = MACH_PORT_NULL; | 
|---|
| 1145 | } | 
|---|
| 1146 | } else { | 
|---|
| 1147 | kev->ext[1] = cpout_msg_size; | 
|---|
| 1148 | kev->ext[3] = cpout_aux_size; /* Only lower 32 bits of ext3 are used */ | 
|---|
| 1149 | kev->data = MACH_PORT_NULL; | 
|---|
| 1150 | } | 
|---|
| 1151 |  | 
|---|
| 1152 | /* | 
|---|
| 1153 | * If we used a data buffer carved out from the filt_process data, | 
|---|
| 1154 | * store the address used in the knote and adjust the residual and | 
|---|
| 1155 | * other parameters for future use. | 
|---|
| 1156 | */ | 
|---|
| 1157 | if (kectx) { | 
|---|
| 1158 | assert(kectx->kec_data_resid >= cpout_msg_size + cpout_aux_size); | 
|---|
| 1159 | kectx->kec_data_resid -= cpout_msg_size + cpout_aux_size; | 
|---|
| 1160 | if ((kectx->kec_process_flags & KEVENT_FLAG_STACK_DATA) == 0) { | 
|---|
| 1161 | kev->ext[0] = kectx->kec_data_out; | 
|---|
| 1162 | kectx->kec_data_out += cpout_msg_size + cpout_aux_size; | 
|---|
| 1163 | } else { | 
|---|
| 1164 | assert(option64 & MACH64_RCV_STACK); | 
|---|
| 1165 | kev->ext[0] = kectx->kec_data_out + kectx->kec_data_resid; | 
|---|
| 1166 | } | 
|---|
| 1167 | } | 
|---|
| 1168 |  | 
|---|
| 1169 | /* | 
|---|
| 1170 | * Apply message-based QoS values to output kevent as prescribed. | 
|---|
| 1171 | * The kev->ext[2] field gets (msg-qos << 32) | (override-qos). | 
|---|
| 1172 | */ | 
|---|
| 1173 | if (kev->fflags == MACH_MSG_SUCCESS) { | 
|---|
| 1174 | kev->ext[2] = ((uint64_t)ppri << 32) | | 
|---|
| 1175 | _pthread_priority_make_from_thread_qos(qos: oqos, relpri: 0, flags: 0); | 
|---|
| 1176 | } | 
|---|
| 1177 |  | 
|---|
| 1178 | self->ith_knote = ITH_KNOTE_NULL; | 
|---|
| 1179 | return result; | 
|---|
| 1180 | } | 
|---|
| 1181 |  | 
|---|
| 1182 | static int | 
|---|
| 1183 | filt_psetprocess(struct knote *kn, struct kevent_qos_s *kev) | 
|---|
| 1184 | { | 
|---|
| 1185 | ipc_object_t io = ips_to_object(kn->kn_ipc_pset); | 
|---|
| 1186 |  | 
|---|
| 1187 | return filt_machportprocess(kn, kev, object: io, IOT_PORT_SET); | 
|---|
| 1188 | } | 
|---|
| 1189 |  | 
|---|
| 1190 | static int | 
|---|
| 1191 | filt_portprocess(struct knote *kn, struct kevent_qos_s *kev) | 
|---|
| 1192 | { | 
|---|
| 1193 | ipc_object_t io = ip_to_object(kn->kn_ipc_port); | 
|---|
| 1194 |  | 
|---|
| 1195 | return filt_machportprocess(kn, kev, object: io, IOT_PORT); | 
|---|
| 1196 | } | 
|---|
| 1197 |  | 
|---|
| 1198 | static void | 
|---|
| 1199 | filt_machportsanitizedcopyout(struct knote *kn, struct kevent_qos_s *kev) | 
|---|
| 1200 | { | 
|---|
| 1201 | *kev = *(struct kevent_qos_s *)&kn->kn_kevent; | 
|---|
| 1202 |  | 
|---|
| 1203 | // We may have stashed the address to the port that is pushing on the sync | 
|---|
| 1204 | // IPC so clear it out. | 
|---|
| 1205 | kev->ext[3] = 0; | 
|---|
| 1206 | } | 
|---|
| 1207 |  | 
|---|
| 1208 | const struct filterops machport_attach_filtops = { | 
|---|
| 1209 | .f_adjusts_qos = true, | 
|---|
| 1210 | .f_extended_codes = true, | 
|---|
| 1211 | .f_attach = filt_machportattach, | 
|---|
| 1212 | .f_sanitized_copyout = filt_machportsanitizedcopyout, | 
|---|
| 1213 | }; | 
|---|
| 1214 |  | 
|---|
| 1215 | const struct filterops mach_port_filtops = { | 
|---|
| 1216 | .f_adjusts_qos = true, | 
|---|
| 1217 | .f_extended_codes = true, | 
|---|
| 1218 | .f_detach = filt_portdetach, | 
|---|
| 1219 | .f_event = filt_portevent, | 
|---|
| 1220 | .f_touch = filt_porttouch, | 
|---|
| 1221 | .f_process = filt_portprocess, | 
|---|
| 1222 | .f_sanitized_copyout = filt_machportsanitizedcopyout, | 
|---|
| 1223 | }; | 
|---|
| 1224 |  | 
|---|
| 1225 | const struct filterops mach_port_set_filtops = { | 
|---|
| 1226 | .f_adjusts_qos = true, | 
|---|
| 1227 | .f_extended_codes = true, | 
|---|
| 1228 | .f_detach = filt_psetdetach, | 
|---|
| 1229 | .f_event = filt_psetevent, | 
|---|
| 1230 | .f_touch = filt_psettouch, | 
|---|
| 1231 | .f_process = filt_psetprocess, | 
|---|
| 1232 | .f_sanitized_copyout = filt_machportsanitizedcopyout, | 
|---|
| 1233 | }; | 
|---|
| 1234 |  | 
|---|