1 | /* |
2 | * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * @OSF_COPYRIGHT@ |
30 | */ |
31 | /* |
32 | * Mach Operating System |
33 | * Copyright (c) 1991,1990,1989 Carnegie Mellon University |
34 | * All Rights Reserved. |
35 | * |
36 | * Permission to use, copy, modify and distribute this software and its |
37 | * documentation is hereby granted, provided that both the copyright |
38 | * notice and this permission notice appear in all copies of the |
39 | * software, derivative works or modified versions, and any portions |
40 | * thereof, and that both notices appear in supporting documentation. |
41 | * |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
45 | * |
46 | * Carnegie Mellon requests users of this software to return to |
47 | * |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
49 | * School of Computer Science |
50 | * Carnegie Mellon University |
51 | * Pittsburgh PA 15213-3890 |
52 | * |
53 | * any improvements or extensions that they make and grant Carnegie Mellon |
54 | * the rights to redistribute these changes. |
55 | */ |
56 | /* |
57 | */ |
58 | /* |
59 | * File: ipc/ipc_pset.c |
60 | * Author: Rich Draves |
61 | * Date: 1989 |
62 | * |
63 | * Functions to manipulate IPC port sets. |
64 | */ |
65 | |
66 | #include <mach/port.h> |
67 | #include <mach/kern_return.h> |
68 | #include <mach/message.h> |
69 | #include <ipc/ipc_mqueue.h> |
70 | #include <ipc/ipc_object.h> |
71 | #include <ipc/ipc_pset.h> |
72 | #include <ipc/ipc_right.h> |
73 | #include <ipc/ipc_space.h> |
74 | #include <ipc/ipc_port.h> |
75 | |
76 | #include <kern/kern_types.h> |
77 | |
78 | #include <vm/vm_map.h> |
79 | #include <libkern/section_keywords.h> |
80 | |
81 | /* |
82 | * Routine: ipc_pset_alloc |
83 | * Purpose: |
84 | * Allocate a port set. |
85 | * Conditions: |
86 | * Nothing locked. If successful, the port set is returned |
87 | * locked. (The caller doesn't have a reference.) |
88 | * Returns: |
89 | * KERN_SUCCESS The port set is allocated. |
90 | * KERN_INVALID_TASK The space is dead. |
91 | * KERN_NO_SPACE No room for an entry in the space. |
92 | * KERN_RESOURCE_SHORTAGE Couldn't allocate memory. |
93 | */ |
94 | |
95 | kern_return_t |
96 | ipc_pset_alloc( |
97 | ipc_space_t space, |
98 | mach_port_name_t *namep, |
99 | ipc_pset_t *psetp) |
100 | { |
101 | ipc_pset_t pset; |
102 | mach_port_name_t name; |
103 | kern_return_t kr; |
104 | |
105 | kr = ipc_object_alloc(space, IOT_PORT_SET, |
106 | MACH_PORT_TYPE_PORT_SET, 0, |
107 | &name, (ipc_object_t *) &pset); |
108 | if (kr != KERN_SUCCESS) { |
109 | return kr; |
110 | } |
111 | /* pset and space are locked */ |
112 | |
113 | ipc_mqueue_init(&pset->ips_messages, TRUE /* set */); |
114 | is_write_unlock(space); |
115 | |
116 | *namep = name; |
117 | *psetp = pset; |
118 | return KERN_SUCCESS; |
119 | } |
120 | |
121 | /* |
122 | * Routine: ipc_pset_alloc_name |
123 | * Purpose: |
124 | * Allocate a port set, with a specific name. |
125 | * Conditions: |
126 | * Nothing locked. If successful, the port set is returned |
127 | * locked. (The caller doesn't have a reference.) |
128 | * Returns: |
129 | * KERN_SUCCESS The port set is allocated. |
130 | * KERN_INVALID_TASK The space is dead. |
131 | * KERN_NAME_EXISTS The name already denotes a right. |
132 | * KERN_RESOURCE_SHORTAGE Couldn't allocate memory. |
133 | */ |
134 | |
135 | kern_return_t |
136 | ipc_pset_alloc_name( |
137 | ipc_space_t space, |
138 | mach_port_name_t name, |
139 | ipc_pset_t *psetp) |
140 | { |
141 | ipc_pset_t pset; |
142 | kern_return_t kr; |
143 | |
144 | kr = ipc_object_alloc_name(space, IOT_PORT_SET, |
145 | MACH_PORT_TYPE_PORT_SET, 0, |
146 | name, (ipc_object_t *) &pset); |
147 | if (kr != KERN_SUCCESS) { |
148 | return kr; |
149 | } |
150 | /* pset is locked */ |
151 | |
152 | ipc_mqueue_init(&pset->ips_messages, TRUE /* set */); |
153 | |
154 | *psetp = pset; |
155 | return KERN_SUCCESS; |
156 | } |
157 | |
158 | |
159 | /* |
160 | * Routine: ipc_pset_alloc_special |
161 | * Purpose: |
162 | * Allocate a port set in a special space. |
163 | * The new port set is returned with one ref. |
164 | * If unsuccessful, IPS_NULL is returned. |
165 | * Conditions: |
166 | * Nothing locked. |
167 | */ |
168 | ipc_pset_t |
169 | ipc_pset_alloc_special( |
170 | __assert_only ipc_space_t space) |
171 | { |
172 | ipc_pset_t pset; |
173 | |
174 | assert(space != IS_NULL); |
175 | assert(space->is_table == IE_NULL); |
176 | assert(!is_active(space)); |
177 | |
178 | __IGNORE_WCASTALIGN(pset = (ipc_pset_t)io_alloc(IOT_PORT_SET)); |
179 | if (pset == IPS_NULL) { |
180 | return IPS_NULL; |
181 | } |
182 | |
183 | bzero((char *)pset, sizeof(*pset)); |
184 | |
185 | io_lock_init(&pset->ips_object); |
186 | pset->ips_references = 1; |
187 | pset->ips_object.io_bits = io_makebits(TRUE, IOT_PORT_SET, 0); |
188 | |
189 | ipc_mqueue_init(&pset->ips_messages, TRUE /* set */); |
190 | |
191 | return pset; |
192 | } |
193 | |
194 | |
195 | /* |
196 | * Routine: ipc_pset_member |
197 | * Purpose: |
198 | * Checks to see if a port is a member of a pset |
199 | * Conditions: |
200 | * Both port and port set are locked. |
201 | * The port must be active. |
202 | */ |
203 | boolean_t |
204 | ipc_pset_member( |
205 | ipc_pset_t pset, |
206 | ipc_port_t port) |
207 | { |
208 | assert(ip_active(port)); |
209 | |
210 | return (ipc_mqueue_member(&port->ip_messages, &pset->ips_messages)); |
211 | } |
212 | |
213 | |
214 | /* |
215 | * Routine: ipc_pset_add |
216 | * Purpose: |
217 | * Puts a port into a port set. |
218 | * Conditions: |
219 | * Both port and port set are locked and active. |
220 | * The owner of the port set is also receiver for the port. |
221 | */ |
222 | |
223 | kern_return_t |
224 | ipc_pset_add( |
225 | ipc_pset_t pset, |
226 | ipc_port_t port, |
227 | uint64_t *reserved_link, |
228 | uint64_t *reserved_prepost) |
229 | { |
230 | kern_return_t kr; |
231 | |
232 | assert(ips_active(pset)); |
233 | assert(ip_active(port)); |
234 | |
235 | kr = ipc_mqueue_add(&port->ip_messages, &pset->ips_messages, |
236 | reserved_link, reserved_prepost); |
237 | |
238 | return kr; |
239 | } |
240 | |
241 | |
242 | |
243 | /* |
244 | * Routine: ipc_pset_remove |
245 | * Purpose: |
246 | * Removes a port from a port set. |
247 | * The port set loses a reference. |
248 | * Conditions: |
249 | * Both port and port set are locked. |
250 | * The port must be active. |
251 | */ |
252 | |
253 | kern_return_t |
254 | ipc_pset_remove( |
255 | ipc_pset_t pset, |
256 | ipc_port_t port) |
257 | { |
258 | kern_return_t kr; |
259 | |
260 | assert(ip_active(port)); |
261 | |
262 | if (port->ip_in_pset == 0) |
263 | return KERN_NOT_IN_SET; |
264 | |
265 | kr = ipc_mqueue_remove(&port->ip_messages, &pset->ips_messages); |
266 | |
267 | return kr; |
268 | } |
269 | |
270 | /* |
271 | * Routine: ipc_pset_lazy_allocate |
272 | * Purpose: |
273 | * lazily initialize the wqset of a port set. |
274 | * Conditions: |
275 | * Nothing locked. |
276 | */ |
277 | |
278 | kern_return_t |
279 | ipc_pset_lazy_allocate( |
280 | ipc_space_t space, |
281 | mach_port_name_t psname) |
282 | { |
283 | kern_return_t kr; |
284 | ipc_entry_t entry; |
285 | ipc_object_t psobj; |
286 | ipc_pset_t pset; |
287 | |
288 | kr = ipc_right_lookup_read(space, psname, &entry); |
289 | if (kr != KERN_SUCCESS) |
290 | return kr; |
291 | |
292 | /* space is read-locked and active */ |
293 | if ((entry->ie_bits & MACH_PORT_TYPE_PORT_SET) == 0) { |
294 | is_read_unlock(space); |
295 | kr = KERN_INVALID_RIGHT; |
296 | return kr; |
297 | } |
298 | |
299 | psobj = entry->ie_object; |
300 | __IGNORE_WCASTALIGN(pset = (ipc_pset_t) psobj); |
301 | assert(pset != NULL); |
302 | ipc_mqueue_t set_mqueue = &pset->ips_messages; |
303 | struct waitq_set *wqset = &set_mqueue->imq_set_queue; |
304 | |
305 | io_reference(psobj); |
306 | is_read_unlock(space); |
307 | |
308 | /* |
309 | * lazily initialize the wqset to avoid |
310 | * possible allocation while linking |
311 | * under spinlocks. |
312 | */ |
313 | waitq_set_lazy_init_link(wqset); |
314 | io_release(psobj); |
315 | |
316 | return KERN_SUCCESS; |
317 | } |
318 | |
319 | /* |
320 | * Routine: ipc_pset_remove_from_all |
321 | * Purpose: |
322 | * Removes a port from all it's port sets. |
323 | * Conditions: |
324 | * port is locked and active. |
325 | */ |
326 | |
327 | kern_return_t |
328 | ipc_pset_remove_from_all( |
329 | ipc_port_t port) |
330 | { |
331 | if (port->ip_in_pset == 0) |
332 | return KERN_NOT_IN_SET; |
333 | |
334 | /* |
335 | * Remove the port's mqueue from all sets |
336 | */ |
337 | ipc_mqueue_remove_from_all(&port->ip_messages); |
338 | return KERN_SUCCESS; |
339 | } |
340 | |
341 | |
342 | /* |
343 | * Routine: ipc_pset_destroy |
344 | * Purpose: |
345 | * Destroys a port_set. |
346 | * Conditions: |
347 | * The port_set is locked and alive. |
348 | * The caller has a reference, which is consumed. |
349 | * Afterwards, the port_set is unlocked and dead. |
350 | */ |
351 | |
352 | void |
353 | ipc_pset_destroy( |
354 | ipc_pset_t pset) |
355 | { |
356 | assert(ips_active(pset)); |
357 | |
358 | pset->ips_object.io_bits &= ~IO_BITS_ACTIVE; |
359 | |
360 | /* |
361 | * remove all the member message queues |
362 | * AND remove this message queue from any containing sets |
363 | */ |
364 | ipc_mqueue_remove_all(&pset->ips_messages); |
365 | |
366 | /* |
367 | * Set all waiters on the portset running to |
368 | * discover the change. |
369 | */ |
370 | imq_lock(&pset->ips_messages); |
371 | ipc_mqueue_changed(&pset->ips_messages); |
372 | imq_unlock(&pset->ips_messages); |
373 | |
374 | ipc_mqueue_deinit(&pset->ips_messages); |
375 | |
376 | ips_unlock(pset); |
377 | ips_release(pset); /* consume the ref our caller gave us */ |
378 | } |
379 | |
380 | /* |
381 | * Kqueue EVFILT_MACHPORT support |
382 | * |
383 | * - kn_ptr.p_mqueue points to the monitored mqueue |
384 | * |
385 | * - (in/out) ext[0] holds a mach_vm_address_t to a userspace buffer |
386 | * that can be used to direct-deliver messages when |
387 | * MACH_RCV_MSG is set in kn_sfflags |
388 | * |
389 | * - (in/out) ext[1] holds a mach_msg_size_t representing the size |
390 | * of the userspace buffer held in ext[0]. |
391 | * |
392 | * - (out) ext[2] is used to deliver qos information |
393 | * about the send queue to userspace. |
394 | * |
395 | * - (abused) ext[3] is used in kernel to hold a reference to the first port |
396 | * with a turnstile that participate to sync IPC override. |
397 | * |
398 | * - kn_hook is optionally a "knote" turnstile. It is used as the inheritor |
399 | * of turnstiles for rights copied out as part of direct message delivery |
400 | * when they can participate to sync IPC override. |
401 | * |
402 | * It is used to atomically neuter the sync IPC override when the knote is |
403 | * re-enabled. |
404 | * |
405 | */ |
406 | |
407 | #include <sys/event.h> |
408 | #include <sys/errno.h> |
409 | |
410 | static int |
411 | filt_machport_adjust_qos(struct knote *kn, ipc_kmsg_t first) |
412 | { |
413 | if (kn->kn_sfflags & MACH_RCV_MSG) { |
414 | int qos = _pthread_priority_thread_qos(first->ikm_qos_override); |
415 | return FILTER_ADJUST_EVENT_QOS(qos); |
416 | } |
417 | return 0; |
418 | } |
419 | |
420 | struct turnstile * |
421 | filt_machport_kqueue_turnstile(struct knote *kn) |
422 | { |
423 | if ((kn->kn_sfflags & MACH_RCV_MSG) && (kn->kn_status & KN_DISPATCH)) { |
424 | return kqueue_turnstile(knote_get_kq(kn)); |
425 | } |
426 | return TURNSTILE_NULL; |
427 | } |
428 | |
429 | /* |
430 | * Stashes a port that participate to sync IPC override until the knote |
431 | * is being re-enabled. |
432 | * |
433 | * It returns: |
434 | * - the turnstile to use as an inheritor for the stashed port |
435 | * - the kind of stash that happened as PORT_SYNC_* value among: |
436 | * o not stashed (no sync IPC support) |
437 | * o stashed in the knote (in kn_ext[3]) |
438 | * o to be hooked to the kn_hook knote |
439 | */ |
440 | struct turnstile * |
441 | filt_machport_stash_port(struct knote *kn, ipc_port_t port, int *link) |
442 | { |
443 | struct turnstile *ts = filt_machport_kqueue_turnstile(kn); |
444 | |
445 | if (!ts) { |
446 | if (link) *link = PORT_SYNC_LINK_NO_LINKAGE; |
447 | } else if (kn->kn_ext[3] == 0) { |
448 | ip_reference(port); |
449 | kn->kn_ext[3] = (uintptr_t)port; |
450 | if (link) *link = PORT_SYNC_LINK_WORKLOOP_KNOTE; |
451 | } else { |
452 | ts = (struct turnstile *)kn->kn_hook; |
453 | if (link) *link = PORT_SYNC_LINK_WORKLOOP_STASH; |
454 | } |
455 | |
456 | return ts; |
457 | } |
458 | |
459 | struct turnstile * |
460 | filt_machport_stashed_special_reply_port_turnstile(ipc_port_t port) |
461 | { |
462 | struct knote *kn = port->ip_sync_inheritor_knote; |
463 | |
464 | assert(port->ip_specialreply); |
465 | assert(port->ip_sync_link_state == PORT_SYNC_LINK_WORKLOOP_KNOTE); |
466 | if (kn->kn_ext[3] == (uint64_t)port) { |
467 | return kqueue_turnstile(knote_get_kq(kn)); |
468 | } |
469 | return kn->kn_hook; |
470 | } |
471 | |
472 | /* |
473 | * Lazily prepare a turnstile so that filt_machport_stash_port() |
474 | * can be called with the mqueue lock held. |
475 | * |
476 | * It will allocate a turnstile in kn_hook if: |
477 | * - the knote supports sync IPC override, |
478 | * - we already stashed a port in kn_ext[3], |
479 | * - the object that will be copied out has a chance to ask to be stashed. |
480 | * |
481 | * It is setup so that its inheritor is the workloop turnstile that has been |
482 | * allocated when this knote was attached. |
483 | */ |
484 | void |
485 | filt_machport_turnstile_prepare_lazily( |
486 | struct knote *kn, |
487 | mach_msg_type_name_t msgt_name, |
488 | ipc_port_t port) |
489 | { |
490 | /* This is called from within filt_machportprocess */ |
491 | assert((kn->kn_status & KN_SUPPRESSED) && (kn->kn_status & KN_LOCKED)); |
492 | |
493 | struct turnstile *ts = filt_machport_kqueue_turnstile(kn); |
494 | if (ts == TURNSTILE_NULL || kn->kn_ext[3] == 0 || kn->kn_hook) |
495 | return; |
496 | |
497 | if ((msgt_name == MACH_MSG_TYPE_PORT_SEND_ONCE && port->ip_specialreply) || |
498 | (msgt_name == MACH_MSG_TYPE_PORT_RECEIVE)) { |
499 | struct turnstile *kn_ts = turnstile_alloc(); |
500 | kn_ts = turnstile_prepare((uintptr_t)kn, |
501 | (struct turnstile **)&kn->kn_hook, kn_ts, TURNSTILE_KNOTE); |
502 | turnstile_update_inheritor(kn_ts, ts, |
503 | TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_TURNSTILE); |
504 | turnstile_cleanup(); |
505 | } |
506 | } |
507 | |
508 | /* |
509 | * Other half of filt_machport_turnstile_prepare_lazily() |
510 | * |
511 | * This is serialized by the knote state machine. |
512 | */ |
513 | static void |
514 | filt_machport_turnstile_complete(struct knote *kn) |
515 | { |
516 | struct turnstile *ts = TURNSTILE_NULL; |
517 | |
518 | if (kn->kn_ext[3]) { |
519 | ipc_port_t port = (ipc_port_t)kn->kn_ext[3]; |
520 | ipc_mqueue_t mqueue = &port->ip_messages; |
521 | |
522 | ip_lock(port); |
523 | if (port->ip_specialreply) { |
524 | /* |
525 | * If the reply has been sent to the special reply port already, |
526 | * then the special reply port may already be reused to do something |
527 | * entirely different. |
528 | * |
529 | * However, the only reason for it to still point to this knote is |
530 | * that it's still waiting for a reply, so when this is the case, |
531 | * neuter the linkage. |
532 | */ |
533 | if (port->ip_sync_link_state == PORT_SYNC_LINK_WORKLOOP_KNOTE && |
534 | port->ip_sync_inheritor_knote == kn) { |
535 | ipc_port_adjust_special_reply_port_locked(port, NULL, |
536 | (IPC_PORT_ADJUST_SR_NONE | IPC_PORT_ADJUST_SR_ENABLE_EVENT), FALSE); |
537 | } else { |
538 | ip_unlock(port); |
539 | } |
540 | } else { |
541 | struct turnstile *kq_ts = kqueue_turnstile(knote_get_kq(kn)); |
542 | |
543 | /* |
544 | * For receive rights, if their IMQ_INHERITOR() is still this |
545 | * workloop, then sever the link. |
546 | * |
547 | * It has a theoretical hole: if the port is sent again to a new |
548 | * receive right that is also monitored by the same kqueue, |
549 | * we would sever the link incorrectly. |
550 | * |
551 | * However this would be a REALLY cumbersome thing to do. |
552 | */ |
553 | imq_lock(mqueue); |
554 | if (!IMQ_KLIST_VALID(mqueue) && IMQ_INHERITOR(mqueue) == kq_ts) { |
555 | turnstile_deallocate_safe(kq_ts); |
556 | klist_init(&mqueue->imq_klist); |
557 | ts = port_send_turnstile(port); |
558 | } |
559 | if (ts) { |
560 | turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, |
561 | TURNSTILE_IMMEDIATE_UPDATE); |
562 | turnstile_reference(ts); |
563 | } |
564 | imq_unlock(mqueue); |
565 | ip_unlock(port); |
566 | |
567 | if (ts) { |
568 | turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD); |
569 | turnstile_deallocate(ts); |
570 | } |
571 | } |
572 | |
573 | ip_release(port); |
574 | kn->kn_ext[3] = 0; |
575 | } |
576 | |
577 | if (kn->kn_hook) { |
578 | ts = kn->kn_hook; |
579 | |
580 | turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, |
581 | TURNSTILE_IMMEDIATE_UPDATE); |
582 | turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD); |
583 | |
584 | turnstile_complete((uintptr_t)kn, (struct turnstile **)&kn->kn_hook, &ts); |
585 | turnstile_cleanup(); |
586 | |
587 | assert(ts); |
588 | turnstile_deallocate(ts); |
589 | } |
590 | } |
591 | |
592 | static int |
593 | filt_machportattach( |
594 | struct knote *kn, |
595 | __unused struct kevent_internal_s *kev) |
596 | { |
597 | mach_port_name_t name = (mach_port_name_t)kn->kn_kevent.ident; |
598 | uint64_t wq_link_id = waitq_link_reserve(NULL); |
599 | ipc_space_t space = current_space(); |
600 | ipc_kmsg_t first; |
601 | struct turnstile *turnstile = TURNSTILE_NULL; |
602 | struct turnstile *send_turnstile = TURNSTILE_NULL; |
603 | |
604 | int error; |
605 | int result = 0; |
606 | kern_return_t kr; |
607 | ipc_entry_t entry; |
608 | ipc_mqueue_t mqueue; |
609 | |
610 | kn->kn_flags &= ~EV_EOF; |
611 | kn->kn_ext[3] = 0; |
612 | |
613 | if ((kn->kn_sfflags & MACH_RCV_MSG) && (kn->kn_status & KN_DISPATCH)) { |
614 | /* |
615 | * If the filter is likely to support sync IPC override, |
616 | * and it happens to be attaching to a workloop, |
617 | * make sure the workloop has an allocated turnstile. |
618 | */ |
619 | turnstile = kqueue_alloc_turnstile(knote_get_kq(kn)); |
620 | } |
621 | |
622 | kr = ipc_right_lookup_read(space, name, &entry); |
623 | |
624 | check_lookup: |
625 | if (kr == KERN_SUCCESS) { |
626 | /* space is read-locked and active */ |
627 | |
628 | if (entry->ie_bits & MACH_PORT_TYPE_PORT_SET) { |
629 | ipc_pset_t pset; |
630 | |
631 | if (knote_link_waitqset_should_lazy_alloc(kn)) { |
632 | is_read_unlock(space); |
633 | |
634 | /* |
635 | * We need to link the portset of the kn, |
636 | * to insure that the link is allocated before taking |
637 | * any spinlocks. |
638 | */ |
639 | knote_link_waitqset_lazy_alloc(kn); |
640 | |
641 | /* |
642 | * We had to drop the space lock because knote_link_waitqset_lazy_alloc() |
643 | * could have allocated memory. The ipc_right_lookup_read() |
644 | * function returns with the space locked, so we need to revalidate state. |
645 | */ |
646 | kr = ipc_right_lookup_read(space, name, &entry); |
647 | if (!(kr == KERN_SUCCESS) || !(entry->ie_bits & MACH_PORT_TYPE_PORT_SET)) { |
648 | goto check_lookup; |
649 | } |
650 | } |
651 | |
652 | __IGNORE_WCASTALIGN(pset = (ipc_pset_t)entry->ie_object); |
653 | mqueue = &pset->ips_messages; |
654 | ips_reference(pset); |
655 | |
656 | imq_lock(mqueue); |
657 | kn->kn_ptr.p_mqueue = mqueue; |
658 | |
659 | /* |
660 | * Bind the portset wait queue directly to knote/kqueue. |
661 | * This allows us to just use wait_queue foo to effect a wakeup, |
662 | * rather than having to call knote() from the Mach code on each |
663 | * message. We still attach the knote to the mqueue klist for |
664 | * NOTE_REVOKE purposes only. |
665 | */ |
666 | error = knote_link_waitq(kn, &mqueue->imq_wait_queue, &wq_link_id); |
667 | if (!error) { |
668 | assert(IMQ_KLIST_VALID(mqueue)); |
669 | KNOTE_ATTACH(&mqueue->imq_klist, kn); |
670 | imq_unlock(mqueue); |
671 | } else { |
672 | kn->kn_ptr.p_mqueue = IMQ_NULL; |
673 | imq_unlock(mqueue); |
674 | ips_release(pset); |
675 | } |
676 | |
677 | is_read_unlock(space); |
678 | |
679 | /* |
680 | * linked knotes are marked stay-active and therefore don't |
681 | * need an indication of their fired state to be returned |
682 | * from the attach operation. |
683 | */ |
684 | |
685 | } else if (entry->ie_bits & MACH_PORT_TYPE_RECEIVE) { |
686 | ipc_port_t port; |
687 | |
688 | __IGNORE_WCASTALIGN(port = (ipc_port_t)entry->ie_object); |
689 | mqueue = &port->ip_messages; |
690 | ip_reference(port); |
691 | |
692 | /* |
693 | * attach knote to port and determine result |
694 | * If the filter requested direct message receipt, |
695 | * we may need to adjust the qos of the knote to |
696 | * reflect the requested and override qos of the |
697 | * first message in the queue. |
698 | */ |
699 | imq_lock(mqueue); |
700 | kn->kn_ptr.p_mqueue = mqueue; |
701 | if (!IMQ_KLIST_VALID(mqueue)) { |
702 | /* |
703 | * We're attaching a port that used to have an IMQ_INHERITOR, |
704 | * clobber this state, and set the inheritor of its turnstile |
705 | * to the kqueue it's now attached to. |
706 | */ |
707 | turnstile_deallocate_safe(IMQ_INHERITOR(mqueue)); |
708 | klist_init(&mqueue->imq_klist); |
709 | } |
710 | KNOTE_ATTACH(&mqueue->imq_klist, kn); |
711 | |
712 | /* Update the port's turnstile inheritor */ |
713 | send_turnstile = port_send_turnstile(port); |
714 | if (send_turnstile) { |
715 | turnstile_reference(send_turnstile); |
716 | turnstile_update_inheritor(send_turnstile, turnstile, |
717 | (TURNSTILE_INHERITOR_TURNSTILE | TURNSTILE_IMMEDIATE_UPDATE)); |
718 | } |
719 | |
720 | if ((first = ipc_kmsg_queue_first(&mqueue->imq_messages)) != IKM_NULL) { |
721 | result = FILTER_ACTIVE | filt_machport_adjust_qos(kn, first); |
722 | } |
723 | imq_unlock(mqueue); |
724 | is_read_unlock(space); |
725 | if (send_turnstile) { |
726 | turnstile_update_inheritor_complete(send_turnstile, |
727 | TURNSTILE_INTERLOCK_NOT_HELD); |
728 | turnstile_deallocate(send_turnstile); |
729 | } |
730 | |
731 | error = 0; |
732 | } else { |
733 | is_read_unlock(space); |
734 | error = ENOTSUP; |
735 | } |
736 | } else { |
737 | error = ENOENT; |
738 | } |
739 | |
740 | waitq_link_release(wq_link_id); |
741 | |
742 | /* bail out on errors */ |
743 | if (error) { |
744 | knote_set_error(kn, error); |
745 | return 0; |
746 | } |
747 | |
748 | return result; |
749 | } |
750 | |
751 | /* NOT proud of these - we should have a stricter relationship between mqueue and ipc object */ |
752 | #define mqueue_to_pset(mq) ((ipc_pset_t)((uintptr_t)mq-offsetof(struct ipc_pset, ips_messages))) |
753 | #define mqueue_to_port(mq) ((ipc_port_t)((uintptr_t)mq-offsetof(struct ipc_port, ip_messages))) |
754 | #define mqueue_to_object(mq) (((ipc_object_t)(mq)) - 1) |
755 | |
756 | |
757 | static void |
758 | filt_machportdetach( |
759 | struct knote *kn) |
760 | { |
761 | ipc_mqueue_t mqueue = kn->kn_ptr.p_mqueue; |
762 | ipc_object_t object = mqueue_to_object(mqueue); |
763 | struct turnstile *send_turnstile = TURNSTILE_NULL; |
764 | |
765 | filt_machport_turnstile_complete(kn); |
766 | |
767 | imq_lock(mqueue); |
768 | if ((kn->kn_status & KN_VANISHED) || (kn->kn_flags & EV_EOF)) { |
769 | /* |
770 | * ipc_mqueue_changed() already unhooked this knote from the mqueue, |
771 | */ |
772 | } else { |
773 | assert(IMQ_KLIST_VALID(mqueue)); |
774 | KNOTE_DETACH(&mqueue->imq_klist, kn); |
775 | } |
776 | |
777 | if (io_otype(object) == IOT_PORT) { |
778 | ipc_port_t port = ip_from_mq(mqueue); |
779 | |
780 | send_turnstile = port_send_turnstile(port); |
781 | if (send_turnstile) { |
782 | turnstile_reference(send_turnstile); |
783 | turnstile_update_inheritor(send_turnstile, |
784 | ipc_port_get_inheritor(port), |
785 | TURNSTILE_INHERITOR_TURNSTILE | TURNSTILE_IMMEDIATE_UPDATE); |
786 | } |
787 | } |
788 | |
789 | /* Clear the knote pointer once the knote has been removed from turnstile */ |
790 | kn->kn_ptr.p_mqueue = IMQ_NULL; |
791 | imq_unlock(mqueue); |
792 | |
793 | if (send_turnstile) { |
794 | turnstile_update_inheritor_complete(send_turnstile, |
795 | TURNSTILE_INTERLOCK_NOT_HELD); |
796 | turnstile_deallocate(send_turnstile); |
797 | } |
798 | |
799 | if (io_otype(object) == IOT_PORT_SET) { |
800 | /* |
801 | * Unlink the portset wait queue from knote/kqueue. |
802 | * JMM - Does this need to be atomic under the mq lock? |
803 | */ |
804 | (void)knote_unlink_waitq(kn, &mqueue->imq_wait_queue); |
805 | } |
806 | io_release(object); |
807 | } |
808 | |
809 | /* |
810 | * filt_machportevent - deliver events into the mach port filter |
811 | * |
812 | * Mach port message arrival events are currently only posted via the |
813 | * kqueue filter routine for ports. Port sets are marked stay-active |
814 | * and the wait queue code will break any kqueue waiters out to go |
815 | * poll the stay-queued knotes again. |
816 | * |
817 | * If there is a message at the head of the queue, |
818 | * we indicate that the knote should go active. If |
819 | * the message is to be direct-received, we adjust the |
820 | * QoS of the knote according the requested and override |
821 | * QoS of that first message. |
822 | * |
823 | * NOTE_REVOKE events are a legacy way to indicate that the port/portset |
824 | * was deallocated or left the current Mach portspace (modern technique |
825 | * is with an EV_VANISHED protocol). If we see NOTE_REVOKE, deliver an |
826 | * EV_EOF event for these changes (hopefully it will get delivered before |
827 | * the port name recycles to the same generation count and someone tries |
828 | * to re-register a kevent for it or the events are udata-specific - |
829 | * avoiding a conflict). |
830 | */ |
831 | static int |
832 | filt_machportevent( |
833 | struct knote *kn, |
834 | long hint) |
835 | { |
836 | ipc_mqueue_t mqueue = kn->kn_ptr.p_mqueue; |
837 | ipc_kmsg_t first; |
838 | int result = 0; |
839 | |
840 | /* mqueue locked by caller */ |
841 | assert(imq_held(mqueue)); |
842 | |
843 | if (hint == NOTE_REVOKE) { |
844 | kn->kn_flags |= EV_EOF | EV_ONESHOT; |
845 | result = FILTER_ACTIVE | FILTER_RESET_EVENT_QOS; |
846 | } else if (imq_is_valid(mqueue)) { |
847 | assert(!imq_is_set(mqueue)); |
848 | if ((first = ipc_kmsg_queue_first(&mqueue->imq_messages)) != IKM_NULL) { |
849 | result = FILTER_ACTIVE | filt_machport_adjust_qos(kn, first); |
850 | } |
851 | } |
852 | |
853 | return result; |
854 | } |
855 | |
856 | static int |
857 | filt_machporttouch( |
858 | struct knote *kn, |
859 | struct kevent_internal_s *kev) |
860 | { |
861 | ipc_mqueue_t mqueue = kn->kn_ptr.p_mqueue; |
862 | ipc_kmsg_t first; |
863 | int result = 0; |
864 | |
865 | /* copy in new settings and save off new input fflags */ |
866 | kn->kn_sfflags = kev->fflags; |
867 | kn->kn_ext[0] = kev->ext[0]; |
868 | kn->kn_ext[1] = kev->ext[1]; |
869 | |
870 | if (kev->flags & EV_ENABLE) { |
871 | /* |
872 | * If the knote is being enabled, make sure there's no lingering |
873 | * IPC overrides from the previous message delivery. |
874 | */ |
875 | filt_machport_turnstile_complete(kn); |
876 | } |
877 | |
878 | /* |
879 | * If the mqueue is a valid port and there is a message |
880 | * that will be direct-received from the knote, update |
881 | * the knote qos based on the first message and trigger |
882 | * the event. If there are no more messages, reset the |
883 | * QoS to the value provided by the kevent. |
884 | */ |
885 | imq_lock(mqueue); |
886 | if (imq_is_valid(mqueue) && !imq_is_set(mqueue) && |
887 | (first = ipc_kmsg_queue_first(&mqueue->imq_messages)) != IKM_NULL) { |
888 | result = FILTER_ACTIVE | filt_machport_adjust_qos(kn, first); |
889 | } else if (kn->kn_sfflags & MACH_RCV_MSG) { |
890 | result = FILTER_RESET_EVENT_QOS; |
891 | } |
892 | imq_unlock(mqueue); |
893 | |
894 | return result; |
895 | } |
896 | |
897 | static int |
898 | filt_machportprocess( |
899 | struct knote *kn, |
900 | struct filt_process_s *process_data, |
901 | struct kevent_internal_s *kev) |
902 | { |
903 | ipc_mqueue_t mqueue = kn->kn_ptr.p_mqueue; |
904 | ipc_object_t object = mqueue_to_object(mqueue); |
905 | thread_t self = current_thread(); |
906 | boolean_t used_filtprocess_data = FALSE; |
907 | |
908 | wait_result_t wresult; |
909 | mach_msg_option_t option; |
910 | mach_vm_address_t addr; |
911 | mach_msg_size_t size; |
912 | |
913 | /* Capture current state */ |
914 | *kev = kn->kn_kevent; |
915 | kev->ext[3] = 0; /* hide our port reference from userspace */ |
916 | |
917 | /* If already deallocated/moved return one last EOF event */ |
918 | if (kev->flags & EV_EOF) { |
919 | return FILTER_ACTIVE | FILTER_RESET_EVENT_QOS; |
920 | } |
921 | |
922 | /* |
923 | * Only honor supported receive options. If no options are |
924 | * provided, just force a MACH_RCV_TOO_LARGE to detect the |
925 | * name of the port and sizeof the waiting message. |
926 | */ |
927 | option = kn->kn_sfflags & (MACH_RCV_MSG|MACH_RCV_LARGE|MACH_RCV_LARGE_IDENTITY| |
928 | MACH_RCV_TRAILER_MASK|MACH_RCV_VOUCHER); |
929 | |
930 | if (option & MACH_RCV_MSG) { |
931 | addr = (mach_vm_address_t) kn->kn_ext[0]; |
932 | size = (mach_msg_size_t) kn->kn_ext[1]; |
933 | |
934 | /* |
935 | * If the kevent didn't specify a buffer and length, carve a buffer |
936 | * from the filter processing data according to the flags. |
937 | */ |
938 | if (size == 0 && process_data != NULL) { |
939 | used_filtprocess_data = TRUE; |
940 | |
941 | addr = (mach_vm_address_t)process_data->fp_data_out; |
942 | size = (mach_msg_size_t)process_data->fp_data_resid; |
943 | option |= (MACH_RCV_LARGE | MACH_RCV_LARGE_IDENTITY); |
944 | if (process_data->fp_flags & KEVENT_FLAG_STACK_DATA) |
945 | option |= MACH_RCV_STACK; |
946 | } |
947 | } else { |
948 | /* just detect the port name (if a set) and size of the first message */ |
949 | option = MACH_RCV_LARGE; |
950 | addr = 0; |
951 | size = 0; |
952 | } |
953 | |
954 | imq_lock(mqueue); |
955 | |
956 | /* just use the reference from here on out */ |
957 | io_reference(object); |
958 | |
959 | /* |
960 | * Set up to receive a message or the notification of a |
961 | * too large message. But never allow this call to wait. |
962 | * If the user provided aditional options, like trailer |
963 | * options, pass those through here. But we don't support |
964 | * scatter lists through this interface. |
965 | */ |
966 | self->ith_object = object; |
967 | self->ith_msg_addr = addr; |
968 | self->ith_rsize = size; |
969 | self->ith_msize = 0; |
970 | self->ith_option = option; |
971 | self->ith_receiver_name = MACH_PORT_NULL; |
972 | self->ith_continuation = NULL; |
973 | option |= MACH_RCV_TIMEOUT; // never wait |
974 | self->ith_state = MACH_RCV_IN_PROGRESS; |
975 | self->ith_knote = kn; |
976 | |
977 | wresult = ipc_mqueue_receive_on_thread( |
978 | mqueue, |
979 | option, |
980 | size, /* max_size */ |
981 | 0, /* immediate timeout */ |
982 | THREAD_INTERRUPTIBLE, |
983 | self); |
984 | /* mqueue unlocked */ |
985 | |
986 | /* |
987 | * If we timed out, or the process is exiting, just release the |
988 | * reference on the ipc_object and return zero. |
989 | */ |
990 | if (wresult == THREAD_RESTART || self->ith_state == MACH_RCV_TIMED_OUT) { |
991 | assert(self->turnstile != TURNSTILE_NULL); |
992 | io_release(object); |
993 | return 0; |
994 | } |
995 | |
996 | assert(wresult == THREAD_NOT_WAITING); |
997 | assert(self->ith_state != MACH_RCV_IN_PROGRESS); |
998 | |
999 | /* |
1000 | * If we weren't attempting to receive a message |
1001 | * directly, we need to return the port name in |
1002 | * the kevent structure. |
1003 | */ |
1004 | if ((option & MACH_RCV_MSG) != MACH_RCV_MSG) { |
1005 | assert(self->ith_state == MACH_RCV_TOO_LARGE); |
1006 | assert(self->ith_kmsg == IKM_NULL); |
1007 | kev->data = self->ith_receiver_name; |
1008 | io_release(object); |
1009 | return FILTER_ACTIVE; |
1010 | } |
1011 | |
1012 | /* |
1013 | * Attempt to receive the message directly, returning |
1014 | * the results in the fflags field. |
1015 | */ |
1016 | kev->fflags = mach_msg_receive_results(&size); |
1017 | |
1018 | /* kmsg and object reference consumed */ |
1019 | |
1020 | /* |
1021 | * if the user asked for the identity of ports containing a |
1022 | * a too-large message, return it in the data field (as we |
1023 | * do for messages we didn't try to receive). |
1024 | */ |
1025 | if (kev->fflags == MACH_RCV_TOO_LARGE) { |
1026 | kev->ext[1] = self->ith_msize; |
1027 | if (option & MACH_RCV_LARGE_IDENTITY) |
1028 | kev->data = self->ith_receiver_name; |
1029 | else |
1030 | kev->data = MACH_PORT_NULL; |
1031 | } else { |
1032 | kev->ext[1] = size; |
1033 | kev->data = MACH_PORT_NULL; |
1034 | } |
1035 | |
1036 | /* |
1037 | * If we used a data buffer carved out from the filt_process data, |
1038 | * store the address used in the knote and adjust the residual and |
1039 | * other parameters for future use. |
1040 | */ |
1041 | if (used_filtprocess_data) { |
1042 | assert(process_data->fp_data_resid >= size); |
1043 | process_data->fp_data_resid -= size; |
1044 | if ((process_data->fp_flags & KEVENT_FLAG_STACK_DATA) == 0) { |
1045 | kev->ext[0] = process_data->fp_data_out; |
1046 | process_data->fp_data_out += size; |
1047 | } else { |
1048 | assert(option & MACH_RCV_STACK); |
1049 | kev->ext[0] = process_data->fp_data_out + |
1050 | process_data->fp_data_resid; |
1051 | } |
1052 | } |
1053 | |
1054 | /* |
1055 | * Apply message-based QoS values to output kevent as prescribed. |
1056 | * The kev->ext[2] field gets (msg-qos << 32) | (override-qos). |
1057 | * |
1058 | * The mach_msg_receive_results() call saved off the message |
1059 | * QoS values in the continuation save area on successful receive. |
1060 | */ |
1061 | if (kev->fflags == MACH_MSG_SUCCESS) { |
1062 | kev->ext[2] = ((uint64_t)self->ith_qos << 32) | |
1063 | (uint64_t)self->ith_qos_override; |
1064 | } |
1065 | |
1066 | return FILTER_ACTIVE; |
1067 | } |
1068 | |
1069 | /* |
1070 | * Peek to see if the message queue associated with the knote has any |
1071 | * events. This pre-hook is called when a filter uses the stay- |
1072 | * on-queue mechanism (as the knote_link_waitq mechanism does for |
1073 | * portsets) and someone calls select() against the containing kqueue. |
1074 | * |
1075 | * Just peek at the pre-post status of the portset's wait queue |
1076 | * to determine if it has anything interesting. We can do it |
1077 | * without holding the lock, as it is just a snapshot in time |
1078 | * (if this is used as part of really waiting for events, we |
1079 | * will catch changes in this status when the event gets posted |
1080 | * up to the knote's kqueue). |
1081 | */ |
1082 | static int |
1083 | filt_machportpeek(struct knote *kn) |
1084 | { |
1085 | ipc_mqueue_t mqueue = kn->kn_ptr.p_mqueue; |
1086 | |
1087 | return ipc_mqueue_set_peek(mqueue) ? FILTER_ACTIVE : 0; |
1088 | } |
1089 | |
1090 | SECURITY_READ_ONLY_EARLY(struct filterops) machport_filtops = { |
1091 | .f_adjusts_qos = true, |
1092 | .f_extended_codes = true, |
1093 | .f_attach = filt_machportattach, |
1094 | .f_detach = filt_machportdetach, |
1095 | .f_event = filt_machportevent, |
1096 | .f_touch = filt_machporttouch, |
1097 | .f_process = filt_machportprocess, |
1098 | .f_peek = filt_machportpeek, |
1099 | }; |
1100 | |