1/*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
61 */
62/*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
68#include <os/log.h>
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/kernel.h>
72#include <sys/domain.h>
73#include <sys/fcntl.h>
74#include <sys/malloc.h> /* XXX must be before <sys/file.h> */
75#include <sys/file_internal.h>
76#include <sys/guarded.h>
77#include <sys/filedesc.h>
78#include <sys/lock.h>
79#include <sys/mbuf.h>
80#include <sys/namei.h>
81#include <sys/proc_internal.h>
82#include <sys/kauth.h>
83#include <sys/protosw.h>
84#include <sys/socket.h>
85#include <sys/socketvar.h>
86#include <sys/stat.h>
87#include <sys/sysctl.h>
88#include <sys/un.h>
89#include <sys/unpcb.h>
90#include <sys/vnode_internal.h>
91#include <sys/kdebug.h>
92#include <sys/mcache.h>
93
94#include <kern/zalloc.h>
95#include <kern/locks.h>
96#include <kern/task.h>
97
98#include <net/sockaddr_utils.h>
99
100#if __has_ptrcheck
101#include <machine/trap.h>
102#endif /* __has_ptrcheck */
103
104#if CONFIG_MACF
105#include <security/mac_framework.h>
106#endif /* CONFIG_MACF */
107
108#include <mach/vm_param.h>
109
110#ifndef ROUNDUP64
111#define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t))
112#endif
113
114#ifndef ADVANCE64
115#define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n))
116#endif
117
118/*
119 * Maximum number of FDs that can be passed in an mbuf
120 */
121#define UIPC_MAX_CMSG_FD 512
122
123ZONE_DEFINE_TYPE(unp_zone, "unpzone", struct unpcb, ZC_NONE);
124static unp_gen_t unp_gencnt;
125static u_int unp_count;
126
127static LCK_ATTR_DECLARE(unp_mtx_attr, 0, 0);
128static LCK_GRP_DECLARE(unp_mtx_grp, "unp_list");
129static LCK_RW_DECLARE_ATTR(unp_list_mtx, &unp_mtx_grp, &unp_mtx_attr);
130
131static LCK_MTX_DECLARE_ATTR(unp_disconnect_lock, &unp_mtx_grp, &unp_mtx_attr);
132static LCK_MTX_DECLARE_ATTR(unp_connect_lock, &unp_mtx_grp, &unp_mtx_attr);
133static LCK_MTX_DECLARE_ATTR(uipc_lock, &unp_mtx_grp, &unp_mtx_attr);
134
135static u_int disconnect_in_progress;
136
137static struct unp_head unp_shead, unp_dhead;
138static int unp_defer;
139static thread_call_t unp_gc_tcall;
140static LIST_HEAD(, fileglob) unp_msghead = LIST_HEAD_INITIALIZER(unp_msghead);
141
142SYSCTL_DECL(_net_local);
143
144static int unp_rights; /* file descriptors in flight */
145static int unp_disposed; /* discarded file descriptors */
146
147SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
148
149#define ULEF_CONNECTION 0x01
150uint32_t unp_log_enable_flags = 0;
151
152SYSCTL_UINT(_net_local, OID_AUTO, log, CTLFLAG_RD | CTLFLAG_LOCKED,
153 &unp_log_enable_flags, 0, "");
154
155
156/*
157 * mDNSResponder tracing. When enabled, endpoints connected to
158 * /var/run/mDNSResponder will be traced; during each send on
159 * the traced socket, we log the PID and process name of the
160 * sending process. We also print out a bit of info related
161 * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
162 * of mDNSResponder stays the same.
163 */
164#define MDNSRESPONDER_PATH "/var/run/mDNSResponder"
165
166static int unpst_tracemdns; /* enable tracing */
167
168#define MDNS_IPC_MSG_HDR_VERSION_1 1
169
170struct mdns_ipc_msg_hdr {
171 uint32_t version;
172 uint32_t datalen;
173 uint32_t ipc_flags;
174 uint32_t op;
175 union {
176 void *context;
177 uint32_t u32[2];
178 } __attribute__((packed));
179 uint32_t reg_index;
180} __attribute__((packed));
181
182/*
183 * Unix communications domain.
184 *
185 * TODO:
186 * SEQPACKET, RDM
187 * rethink name space problems
188 * need a proper out-of-band
189 * lock pushdown
190 */
191static struct sockaddr sun_noname = {
192 .sa_len = sizeof(struct sockaddr),
193 .sa_family = AF_LOCAL,
194 .sa_data = {
195 0, 0, 0, 0, 0, 0, 0,
196 0, 0, 0, 0, 0, 0, 0
197 }
198};
199
200static ino_t unp_ino; /* prototype for fake inode numbers */
201
202static int unp_attach(struct socket *);
203static void unp_detach(struct unpcb *);
204static int unp_bind(struct unpcb *, struct sockaddr *, proc_t);
205static int unp_connect(struct socket *, struct sockaddr *, proc_t);
206static void unp_disconnect(struct unpcb *);
207static void unp_shutdown(struct unpcb *);
208static void unp_drop(struct unpcb *, int);
209static void unp_gc(thread_call_param_t arg0, thread_call_param_t arg1);
210static void unp_scan(struct mbuf *, void (*)(struct fileglob *, void *arg), void *arg);
211static void unp_mark(struct fileglob *, __unused void *);
212static void unp_discard(struct fileglob *, void *);
213static int unp_internalize(struct mbuf *, proc_t);
214static int unp_listen(struct unpcb *, proc_t);
215static void unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
216static void unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
217
218__startup_func
219static void
220unp_gc_setup(void)
221{
222 unp_gc_tcall = thread_call_allocate_with_options(func: unp_gc,
223 NULL, pri: THREAD_CALL_PRIORITY_KERNEL,
224 options: THREAD_CALL_OPTIONS_ONCE);
225}
226STARTUP(THREAD_CALL, STARTUP_RANK_MIDDLE, unp_gc_setup);
227
228static void
229unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
230{
231 if (so < conn_so) {
232 socket_lock(so: conn_so, refcount: 1);
233 } else {
234 struct unpcb *unp = sotounpcb(so);
235 unp->unp_flags |= UNP_DONTDISCONNECT;
236 unp->rw_thrcount++;
237 socket_unlock(so, refcount: 0);
238
239 /* Get the locks in the correct order */
240 socket_lock(so: conn_so, refcount: 1);
241 socket_lock(so, refcount: 0);
242 unp->rw_thrcount--;
243 if (unp->rw_thrcount == 0) {
244 unp->unp_flags &= ~UNP_DONTDISCONNECT;
245 wakeup(chan: unp);
246 }
247 }
248}
249
250static int
251uipc_abort(struct socket *so)
252{
253 struct unpcb *unp = sotounpcb(so);
254
255 if (unp == 0) {
256 return EINVAL;
257 }
258 unp_drop(unp, ECONNABORTED);
259 unp_detach(unp);
260 sofree(so);
261 return 0;
262}
263
264static int
265uipc_accept(struct socket *so, struct sockaddr **nam)
266{
267 struct unpcb *unp = sotounpcb(so);
268
269 if (unp == 0) {
270 return EINVAL;
271 }
272
273 /*
274 * Pass back name of connected socket,
275 * if it was bound and we are still connected
276 * (our peer may have closed already!).
277 */
278 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
279 *nam = dup_sockaddr(SA(unp->unp_conn->unp_addr), canwait: 1);
280 } else {
281 if (unp_log_enable_flags & ULEF_CONNECTION) {
282 os_log(OS_LOG_DEFAULT, "%s: peer disconnected unp_gencnt %llu",
283 __func__, unp->unp_gencnt);
284 }
285 *nam = dup_sockaddr(SA(&sun_noname), canwait: 1);
286 }
287 return 0;
288}
289
290/*
291 * Returns: 0 Success
292 * EISCONN
293 * unp_attach:
294 */
295static int
296uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
297{
298 struct unpcb *unp = sotounpcb(so);
299
300 if (unp != 0) {
301 return EISCONN;
302 }
303 return unp_attach(so);
304}
305
306static int
307uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
308{
309 struct unpcb *unp = sotounpcb(so);
310
311 if (unp == 0) {
312 return EINVAL;
313 }
314
315 return unp_bind(unp, nam, p);
316}
317
318/*
319 * Returns: 0 Success
320 * EINVAL
321 * unp_connect:??? [See elsewhere in this file]
322 */
323static int
324uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
325{
326 struct unpcb *unp = sotounpcb(so);
327
328 if (unp == 0) {
329 return EINVAL;
330 }
331 return unp_connect(so, nam, p);
332}
333
334/*
335 * Returns: 0 Success
336 * EINVAL
337 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
338 * unp_connect2:EINVAL Invalid argument
339 */
340static int
341uipc_connect2(struct socket *so1, struct socket *so2)
342{
343 struct unpcb *unp = sotounpcb(so1);
344
345 if (unp == 0) {
346 return EINVAL;
347 }
348
349 return unp_connect2(so: so1, so2);
350}
351
352/* control is EOPNOTSUPP */
353
354static int
355uipc_detach(struct socket *so)
356{
357 struct unpcb *unp = sotounpcb(so);
358
359 if (unp == 0) {
360 return EINVAL;
361 }
362
363 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
364 unp_detach(unp);
365 return 0;
366}
367
368static int
369uipc_disconnect(struct socket *so)
370{
371 struct unpcb *unp = sotounpcb(so);
372
373 if (unp == 0) {
374 return EINVAL;
375 }
376 unp_disconnect(unp);
377 return 0;
378}
379
380/*
381 * Returns: 0 Success
382 * EINVAL
383 */
384static int
385uipc_listen(struct socket *so, __unused proc_t p)
386{
387 struct unpcb *unp = sotounpcb(so);
388
389 if (unp == 0 || unp->unp_vnode == 0) {
390 return EINVAL;
391 }
392 return unp_listen(unp, p);
393}
394
395static int
396uipc_peeraddr(struct socket *so, struct sockaddr **nam)
397{
398 struct unpcb *unp = sotounpcb(so);
399 struct socket *so2;
400
401 if (unp == NULL) {
402 return EINVAL;
403 }
404 so2 = unp->unp_conn != NULL ? unp->unp_conn->unp_socket : NULL;
405 if (so2 != NULL) {
406 unp_get_locks_in_order(so, conn_so: so2);
407 }
408
409 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
410 *nam = dup_sockaddr(SA(unp->unp_conn->unp_addr), canwait: 1);
411 } else {
412 *nam = dup_sockaddr(SA(&sun_noname), canwait: 1);
413 }
414 if (so2 != NULL) {
415 socket_unlock(so: so2, refcount: 1);
416 }
417 return 0;
418}
419
420static int
421uipc_rcvd(struct socket *so, __unused int flags)
422{
423 struct unpcb *unp = sotounpcb(so);
424 struct socket *so2;
425
426 if (unp == 0) {
427 return EINVAL;
428 }
429 switch (so->so_type) {
430 case SOCK_DGRAM:
431 panic("uipc_rcvd DGRAM?");
432 /*NOTREACHED*/
433
434 case SOCK_STREAM:
435#define rcv (&so->so_rcv)
436#define snd (&so2->so_snd)
437 if (unp->unp_conn == 0) {
438 break;
439 }
440
441 so2 = unp->unp_conn->unp_socket;
442 unp_get_locks_in_order(so, conn_so: so2);
443 /*
444 * Adjust backpressure on sender
445 * and wakeup any waiting to write.
446 */
447 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
448 unp->unp_mbcnt = rcv->sb_mbcnt;
449 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
450 unp->unp_cc = rcv->sb_cc;
451 if (sb_notify(sb: &so2->so_snd)) {
452 sowakeup(so: so2, sb: &so2->so_snd, so2: so);
453 }
454
455 socket_unlock(so: so2, refcount: 1);
456
457#undef snd
458#undef rcv
459 break;
460
461 default:
462 panic("uipc_rcvd unknown socktype");
463 }
464 return 0;
465}
466
467/* pru_rcvoob is EOPNOTSUPP */
468
469/*
470 * Returns: 0 Success
471 * EINVAL
472 * EOPNOTSUPP
473 * EPIPE
474 * ENOTCONN
475 * EISCONN
476 * unp_internalize:EINVAL
477 * unp_internalize:EBADF
478 * unp_connect:EAFNOSUPPORT Address family not supported
479 * unp_connect:EINVAL Invalid argument
480 * unp_connect:ENOTSOCK Not a socket
481 * unp_connect:ECONNREFUSED Connection refused
482 * unp_connect:EISCONN Socket is connected
483 * unp_connect:EPROTOTYPE Protocol wrong type for socket
484 * unp_connect:???
485 * sbappendaddr:ENOBUFS [5th argument, contents modified]
486 * sbappendaddr:??? [whatever a filter author chooses]
487 */
488static int
489uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
490 struct mbuf *control, proc_t p)
491{
492 int error = 0;
493 struct unpcb *unp = sotounpcb(so);
494 struct socket *so2;
495 int32_t len = m_pktlen(m);
496
497 if (unp == 0) {
498 error = EINVAL;
499 goto release;
500 }
501 if (flags & PRUS_OOB) {
502 error = EOPNOTSUPP;
503 goto release;
504 }
505
506 if (control) {
507 /* release lock to avoid deadlock (4436174) */
508 socket_unlock(so, refcount: 0);
509 error = unp_internalize(control, p);
510 socket_lock(so, refcount: 0);
511 if (error) {
512 goto release;
513 }
514 }
515
516 switch (so->so_type) {
517 case SOCK_DGRAM:
518 {
519 struct sockaddr *from;
520
521 if (nam) {
522 if (unp->unp_conn) {
523 error = EISCONN;
524 break;
525 }
526 error = unp_connect(so, nam, p);
527 if (error) {
528 so->so_state &= ~SS_ISCONNECTING;
529 break;
530 }
531 } else {
532 if (unp->unp_conn == 0) {
533 error = ENOTCONN;
534 break;
535 }
536 }
537
538 so2 = unp->unp_conn->unp_socket;
539 if (so != so2) {
540 unp_get_locks_in_order(so, conn_so: so2);
541 }
542
543 if (unp->unp_addr) {
544 from = SA(unp->unp_addr);
545 } else {
546 from = &sun_noname;
547 }
548 /*
549 * sbappendaddr() will fail when the receiver runs out of
550 * space; in contrast to SOCK_STREAM, we will lose messages
551 * for the SOCK_DGRAM case when the receiver's queue overflows.
552 * SB_UNIX on the socket buffer implies that the callee will
553 * not free the control message, if any, because we would need
554 * to call unp_dispose() on it.
555 */
556 if (sbappendaddr(sb: &so2->so_rcv, asa: from, m0: m, control, error_out: &error)) {
557 control = NULL;
558 if (sb_notify(sb: &so2->so_rcv)) {
559 sowakeup(so: so2, sb: &so2->so_rcv, so2: so);
560 }
561 so2->so_tc_stats[0].rxpackets += 1;
562 so2->so_tc_stats[0].rxbytes += len;
563 } else if (control != NULL && error == 0) {
564 /* A socket filter took control; don't touch it */
565 control = NULL;
566 }
567
568 if (so != so2) {
569 socket_unlock(so: so2, refcount: 1);
570 }
571
572 m = NULL;
573 if (nam) {
574 unp_disconnect(unp);
575 }
576 break;
577 }
578
579 case SOCK_STREAM: {
580 int didreceive = 0;
581#define rcv (&so2->so_rcv)
582#define snd (&so->so_snd)
583 /* Connect if not connected yet. */
584 /*
585 * Note: A better implementation would complain
586 * if not equal to the peer's address.
587 */
588 if ((so->so_state & SS_ISCONNECTED) == 0) {
589 if (nam) {
590 error = unp_connect(so, nam, p);
591 if (error) {
592 so->so_state &= ~SS_ISCONNECTING;
593 break; /* XXX */
594 }
595 } else {
596 error = ENOTCONN;
597 break;
598 }
599 }
600
601 if (so->so_state & SS_CANTSENDMORE) {
602 error = EPIPE;
603 break;
604 }
605 if (unp->unp_conn == 0) {
606 panic("uipc_send connected but no connection? "
607 "socket state: %x socket flags: %x socket flags1: %x.",
608 so->so_state, so->so_flags, so->so_flags1);
609 }
610
611 so2 = unp->unp_conn->unp_socket;
612 unp_get_locks_in_order(so, conn_so: so2);
613
614 /* Check socket state again as we might have unlocked the socket
615 * while trying to get the locks in order
616 */
617
618 if ((so->so_state & SS_CANTSENDMORE)) {
619 error = EPIPE;
620 socket_unlock(so: so2, refcount: 1);
621 break;
622 }
623
624 if (unp->unp_flags & UNP_TRACE_MDNS) {
625 struct mdns_ipc_msg_hdr hdr;
626
627 if (mbuf_copydata(mbuf: m, offset: 0, length: sizeof(hdr), out_data: &hdr) == 0 &&
628 hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
629 os_log(OS_LOG_DEFAULT,
630 "%s[mDNSResponder] pid=%d (%s): op=0x%x",
631 __func__, proc_getpid(p), p->p_comm, ntohl(hdr.op));
632 }
633 }
634
635 /*
636 * Send to paired receive port, and then reduce send buffer
637 * hiwater marks to maintain backpressure. Wake up readers.
638 * SB_UNIX flag will allow new record to be appended to the
639 * receiver's queue even when it is already full. It is
640 * possible, however, that append might fail. In that case,
641 * we will need to call unp_dispose() on the control message;
642 * the callee will not free it since SB_UNIX is set.
643 */
644 didreceive = control ?
645 sbappendcontrol(rcv, m0: m, control, error_out: &error) : sbappend(rcv, m);
646
647 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
648 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
649 if ((int32_t)snd->sb_hiwat >=
650 (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {
651 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
652 } else {
653 snd->sb_hiwat = 0;
654 }
655 unp->unp_conn->unp_cc = rcv->sb_cc;
656 if (didreceive) {
657 control = NULL;
658 if (sb_notify(sb: &so2->so_rcv)) {
659 sowakeup(so: so2, sb: &so2->so_rcv, so2: so);
660 }
661 so2->so_tc_stats[0].rxpackets += 1;
662 so2->so_tc_stats[0].rxbytes += len;
663 } else if (control != NULL && error == 0) {
664 /* A socket filter took control; don't touch it */
665 control = NULL;
666 }
667
668 socket_unlock(so: so2, refcount: 1);
669 m = NULL;
670#undef snd
671#undef rcv
672 }
673 break;
674
675 default:
676 panic("uipc_send unknown socktype");
677 }
678
679 so->so_tc_stats[0].txpackets += 1;
680 so->so_tc_stats[0].txbytes += len;
681
682 /*
683 * SEND_EOF is equivalent to a SEND followed by
684 * a SHUTDOWN.
685 */
686 if (flags & PRUS_EOF) {
687 socantsendmore(so);
688 unp_shutdown(unp);
689 }
690
691 if (control && error != 0) {
692 socket_unlock(so, refcount: 0);
693 unp_dispose(m: control);
694 socket_lock(so, refcount: 0);
695 }
696
697release:
698 if (control) {
699 m_freem(control);
700 }
701 if (m) {
702 m_freem(m);
703 }
704 return error;
705}
706
707static int
708uipc_sense(struct socket *so, void *ub, int isstat64)
709{
710 struct unpcb *unp = sotounpcb(so);
711 struct socket *so2;
712 blksize_t blksize;
713
714 if (unp == 0) {
715 return EINVAL;
716 }
717
718 blksize = so->so_snd.sb_hiwat;
719 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
720 so2 = unp->unp_conn->unp_socket;
721 blksize += so2->so_rcv.sb_cc;
722 }
723 if (unp->unp_ino == 0) {
724 unp->unp_ino = unp_ino++;
725 }
726
727 if (isstat64 != 0) {
728 struct stat64 *sb64;
729
730 sb64 = (struct stat64 *)ub;
731 sb64->st_blksize = blksize;
732 sb64->st_dev = NODEV;
733 sb64->st_ino = (ino64_t)unp->unp_ino;
734 } else {
735 struct stat *sb;
736
737 sb = (struct stat *)ub;
738 sb->st_blksize = blksize;
739 sb->st_dev = NODEV;
740 sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
741 }
742
743 return 0;
744}
745
746/*
747 * Returns: 0 Success
748 * EINVAL
749 *
750 * Notes: This is not strictly correct, as unp_shutdown() also calls
751 * socantrcvmore(). These should maybe both be conditionalized
752 * on the 'how' argument in soshutdown() as called from the
753 * shutdown() system call.
754 */
755static int
756uipc_shutdown(struct socket *so)
757{
758 struct unpcb *unp = sotounpcb(so);
759
760 if (unp == 0) {
761 return EINVAL;
762 }
763 socantsendmore(so);
764 unp_shutdown(unp);
765 return 0;
766}
767
768/*
769 * Returns: 0 Success
770 * EINVAL Invalid argument
771 */
772static int
773uipc_sockaddr(struct socket *so, struct sockaddr **nam)
774{
775 struct unpcb *unp = sotounpcb(so);
776
777 if (unp == NULL) {
778 return EINVAL;
779 }
780 if (unp->unp_addr != NULL) {
781 *nam = dup_sockaddr(SA(unp->unp_addr), canwait: 1);
782 } else {
783 *nam = dup_sockaddr(SA(&sun_noname), canwait: 1);
784 }
785 return 0;
786}
787
788struct pr_usrreqs uipc_usrreqs = {
789 .pru_abort = uipc_abort,
790 .pru_accept = uipc_accept,
791 .pru_attach = uipc_attach,
792 .pru_bind = uipc_bind,
793 .pru_connect = uipc_connect,
794 .pru_connect2 = uipc_connect2,
795 .pru_detach = uipc_detach,
796 .pru_disconnect = uipc_disconnect,
797 .pru_listen = uipc_listen,
798 .pru_peeraddr = uipc_peeraddr,
799 .pru_rcvd = uipc_rcvd,
800 .pru_send = uipc_send,
801 .pru_sense = uipc_sense,
802 .pru_shutdown = uipc_shutdown,
803 .pru_sockaddr = uipc_sockaddr,
804 .pru_sosend = sosend,
805 .pru_soreceive = soreceive,
806};
807
808int
809uipc_ctloutput(struct socket *so, struct sockopt *sopt)
810{
811 struct unpcb *unp = sotounpcb(so);
812 int error = 0;
813 pid_t peerpid;
814 proc_t p;
815 task_t t __single;
816 struct socket *peerso;
817
818 switch (sopt->sopt_dir) {
819 case SOPT_GET:
820 switch (sopt->sopt_name) {
821 case LOCAL_PEERCRED:
822 if (unp->unp_flags & UNP_HAVEPC) {
823 error = sooptcopyout(sopt, data: &unp->unp_peercred,
824 len: sizeof(unp->unp_peercred));
825 } else {
826 if (so->so_type == SOCK_STREAM) {
827 error = ENOTCONN;
828 } else {
829 error = EINVAL;
830 }
831 }
832 break;
833 case LOCAL_PEERPID:
834 case LOCAL_PEEREPID:
835 if (unp->unp_conn == NULL) {
836 error = ENOTCONN;
837 break;
838 }
839 peerso = unp->unp_conn->unp_socket;
840 if (peerso == NULL) {
841 panic("peer is connected but has no socket?");
842 }
843 unp_get_locks_in_order(so, conn_so: peerso);
844 if (sopt->sopt_name == LOCAL_PEEREPID &&
845 peerso->so_flags & SOF_DELEGATED) {
846 peerpid = peerso->e_pid;
847 } else {
848 peerpid = peerso->last_pid;
849 }
850 socket_unlock(so: peerso, refcount: 1);
851 error = sooptcopyout(sopt, data: &peerpid, len: sizeof(peerpid));
852 break;
853 case LOCAL_PEERUUID:
854 case LOCAL_PEEREUUID:
855 if (unp->unp_conn == NULL) {
856 error = ENOTCONN;
857 break;
858 }
859 peerso = unp->unp_conn->unp_socket;
860 if (peerso == NULL) {
861 panic("peer is connected but has no socket?");
862 }
863 unp_get_locks_in_order(so, conn_so: peerso);
864 if (sopt->sopt_name == LOCAL_PEEREUUID &&
865 peerso->so_flags & SOF_DELEGATED) {
866 error = sooptcopyout(sopt, data: &peerso->e_uuid,
867 len: sizeof(peerso->e_uuid));
868 } else {
869 error = sooptcopyout(sopt, data: &peerso->last_uuid,
870 len: sizeof(peerso->last_uuid));
871 }
872 socket_unlock(so: peerso, refcount: 1);
873 break;
874 case LOCAL_PEERTOKEN:
875 if (unp->unp_conn == NULL) {
876 error = ENOTCONN;
877 break;
878 }
879 peerso = unp->unp_conn->unp_socket;
880 if (peerso == NULL) {
881 panic("peer is connected but has no socket?");
882 }
883 unp_get_locks_in_order(so, conn_so: peerso);
884 peerpid = peerso->last_pid;
885 p = proc_find(pid: peerpid);
886 if (p != PROC_NULL) {
887 t = proc_task(p);
888 if (t != TASK_NULL) {
889 audit_token_t peertoken;
890 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
891 if (task_info(task: t, TASK_AUDIT_TOKEN, task_info_out: (task_info_t)&peertoken, task_info_count: &count) == KERN_SUCCESS) {
892 error = sooptcopyout(sopt, data: &peertoken, len: sizeof(peertoken));
893 } else {
894 error = EINVAL;
895 }
896 } else {
897 error = EINVAL;
898 }
899 proc_rele(p);
900 } else {
901 error = EINVAL;
902 }
903 socket_unlock(so: peerso, refcount: 1);
904 break;
905 default:
906 error = EOPNOTSUPP;
907 break;
908 }
909 break;
910 case SOPT_SET:
911 default:
912 error = EOPNOTSUPP;
913 break;
914 }
915
916 return error;
917}
918
919/*
920 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
921 * for stream sockets, although the total for sender and receiver is
922 * actually only PIPSIZ.
923 * Datagram sockets really use the sendspace as the maximum datagram size,
924 * and don't really want to reserve the sendspace. Their recvspace should
925 * be large enough for at least one max-size datagram plus address.
926 */
927#ifndef PIPSIZ
928#define PIPSIZ 8192
929#endif
930static u_int32_t unpst_sendspace = PIPSIZ;
931static u_int32_t unpst_recvspace = PIPSIZ;
932static u_int32_t unpdg_sendspace = 2 * 1024; /* really max datagram size */
933static u_int32_t unpdg_recvspace = 4 * 1024;
934
935SYSCTL_DECL(_net_local_stream);
936SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
937 &unpst_sendspace, 0, "");
938SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
939 &unpst_recvspace, 0, "");
940SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
941 &unpst_tracemdns, 0, "");
942SYSCTL_DECL(_net_local_dgram);
943SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
944 &unpdg_sendspace, 0, "");
945SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
946 &unpdg_recvspace, 0, "");
947
948/*
949 * Returns: 0 Success
950 * ENOBUFS
951 * soreserve:ENOBUFS
952 */
953static int
954unp_attach(struct socket *so)
955{
956 struct unpcb *unp;
957 int error = 0;
958
959 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
960 switch (so->so_type) {
961 case SOCK_STREAM:
962 error = soreserve(so, sndcc: unpst_sendspace, rcvcc: unpst_recvspace);
963 break;
964
965 case SOCK_DGRAM:
966 /*
967 * By default soreserve() will set the low water
968 * mark to MCLBYTES which is too high given our
969 * default sendspace. Override it here to something
970 * sensible.
971 */
972 so->so_snd.sb_lowat = 1;
973 error = soreserve(so, sndcc: unpdg_sendspace, rcvcc: unpdg_recvspace);
974 break;
975
976 default:
977 panic("unp_attach");
978 }
979 if (error) {
980 return error;
981 }
982 }
983 unp = zalloc_flags(unp_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
984
985 lck_mtx_init(lck: &unp->unp_mtx, grp: &unp_mtx_grp, attr: &unp_mtx_attr);
986
987 lck_rw_lock_exclusive(lck: &unp_list_mtx);
988 LIST_INIT(&unp->unp_refs);
989 unp->unp_socket = so;
990 unp->unp_gencnt = ++unp_gencnt;
991 unp_count++;
992 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
993 &unp_dhead : &unp_shead, unp, unp_link);
994 lck_rw_done(lck: &unp_list_mtx);
995 so->so_pcb = (caddr_t)unp;
996 /*
997 * Mark AF_UNIX socket buffers accordingly so that:
998 *
999 * a. In the SOCK_STREAM case, socket buffer append won't fail due to
1000 * the lack of space; this essentially loosens the sbspace() check,
1001 * since there is disconnect between sosend() and uipc_send() with
1002 * respect to flow control that might result in our dropping the
1003 * data in uipc_send(). By setting this, we allow for slightly
1004 * more records to be appended to the receiving socket to avoid
1005 * losing data (which we can't afford in the SOCK_STREAM case).
1006 * Flow control still takes place since we adjust the sender's
1007 * hiwat during each send. This doesn't affect the SOCK_DGRAM
1008 * case and append would still fail when the queue overflows.
1009 *
1010 * b. In the presence of control messages containing internalized
1011 * file descriptors, the append routines will not free them since
1012 * we'd need to undo the work first via unp_dispose().
1013 */
1014 so->so_rcv.sb_flags |= SB_UNIX;
1015 so->so_snd.sb_flags |= SB_UNIX;
1016 return 0;
1017}
1018
1019static void
1020unp_detach(struct unpcb *unp)
1021{
1022 int so_locked = 1;
1023
1024 lck_rw_lock_exclusive(lck: &unp_list_mtx);
1025 LIST_REMOVE(unp, unp_link);
1026 --unp_count;
1027 ++unp_gencnt;
1028 lck_rw_done(lck: &unp_list_mtx);
1029 if (unp->unp_vnode) {
1030 struct vnode *tvp = NULL;
1031 socket_unlock(so: unp->unp_socket, refcount: 0);
1032
1033 /* Holding unp_connect_lock will avoid a race between
1034 * a thread closing the listening socket and a thread
1035 * connecting to it.
1036 */
1037 lck_mtx_lock(lck: &unp_connect_lock);
1038 socket_lock(so: unp->unp_socket, refcount: 0);
1039 if (unp->unp_vnode) {
1040 tvp = unp->unp_vnode;
1041 unp->unp_vnode->v_socket = NULL;
1042 unp->unp_vnode = NULL;
1043 }
1044 lck_mtx_unlock(lck: &unp_connect_lock);
1045 if (tvp != NULL) {
1046 vnode_rele(vp: tvp); /* drop the usecount */
1047 }
1048 }
1049 if (unp->unp_conn) {
1050 unp_disconnect(unp);
1051 }
1052 while (unp->unp_refs.lh_first) {
1053 struct unpcb *unp2 = NULL;
1054
1055 /* This datagram socket is connected to one or more
1056 * sockets. In order to avoid a race condition between removing
1057 * this reference and closing the connected socket, we need
1058 * to check disconnect_in_progress
1059 */
1060 if (so_locked == 1) {
1061 socket_unlock(so: unp->unp_socket, refcount: 0);
1062 so_locked = 0;
1063 }
1064 lck_mtx_lock(lck: &unp_disconnect_lock);
1065 while (disconnect_in_progress != 0) {
1066 (void)msleep(chan: (caddr_t)&disconnect_in_progress, mtx: &unp_disconnect_lock,
1067 PSOCK, wmesg: "disconnect", NULL);
1068 }
1069 disconnect_in_progress = 1;
1070 lck_mtx_unlock(lck: &unp_disconnect_lock);
1071
1072 /* Now we are sure that any unpcb socket disconnect is not happening */
1073 if (unp->unp_refs.lh_first != NULL) {
1074 unp2 = unp->unp_refs.lh_first;
1075 socket_lock(so: unp2->unp_socket, refcount: 1);
1076 }
1077
1078 lck_mtx_lock(lck: &unp_disconnect_lock);
1079 disconnect_in_progress = 0;
1080 wakeup(chan: &disconnect_in_progress);
1081 lck_mtx_unlock(lck: &unp_disconnect_lock);
1082
1083 if (unp2 != NULL) {
1084 /* We already locked this socket and have a reference on it */
1085 unp_drop(unp2, ECONNRESET);
1086 socket_unlock(so: unp2->unp_socket, refcount: 1);
1087 }
1088 }
1089
1090 if (so_locked == 0) {
1091 socket_lock(so: unp->unp_socket, refcount: 0);
1092 so_locked = 1;
1093 }
1094 soisdisconnected(so: unp->unp_socket);
1095 /* makes sure we're getting dealloced */
1096 unp->unp_socket->so_flags |= SOF_PCBCLEARING;
1097}
1098
1099/*
1100 * Returns: 0 Success
1101 * EAFNOSUPPORT
1102 * EINVAL
1103 * EADDRINUSE
1104 * namei:??? [anything namei can return]
1105 * vnode_authorize:??? [anything vnode_authorize can return]
1106 *
1107 * Notes: p at this point is the current process, as this function is
1108 * only called by sobind().
1109 */
1110static int
1111unp_bind(
1112 struct unpcb *unp,
1113 struct sockaddr *nam,
1114 proc_t p)
1115{
1116 struct sockaddr_un *soun = SUN(nam);
1117 struct vnode *vp __single, *dvp;
1118 struct vnode_attr va;
1119 vfs_context_t ctx = vfs_context_current();
1120 int error, namelen;
1121 struct nameidata nd;
1122 struct socket *so = unp->unp_socket;
1123 char buf[SOCK_MAXADDRLEN];
1124
1125 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1126 return EAFNOSUPPORT;
1127 }
1128
1129 /*
1130 * Check if the socket is already bound to an address
1131 */
1132 if (unp->unp_vnode != NULL) {
1133 return EINVAL;
1134 }
1135 /*
1136 * Check if the socket may have been shut down
1137 */
1138 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1139 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1140 return EINVAL;
1141 }
1142
1143 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
1144 if (namelen <= 0) {
1145 return EINVAL;
1146 }
1147 /*
1148 * Note: sun_path is not a zero terminated "C" string
1149 */
1150 if (namelen >= SOCK_MAXADDRLEN) {
1151 return EINVAL;
1152 }
1153 bcopy(src: soun->sun_path, dst: buf, n: namelen);
1154 buf[namelen] = 0;
1155
1156 socket_unlock(so, refcount: 0);
1157
1158 NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
1159 CAST_USER_ADDR_T(buf), ctx);
1160 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1161 error = namei(ndp: &nd);
1162 if (error) {
1163 socket_lock(so, refcount: 0);
1164 return error;
1165 }
1166 dvp = nd.ni_dvp;
1167 vp = nd.ni_vp;
1168
1169 if (vp != NULL) {
1170 /*
1171 * need to do this before the vnode_put of dvp
1172 * since we may have to release an fs_nodelock
1173 */
1174 nameidone(&nd);
1175
1176 vnode_put(vp: dvp);
1177 vnode_put(vp);
1178
1179 socket_lock(so, refcount: 0);
1180 return EADDRINUSE;
1181 }
1182
1183 VATTR_INIT(&va);
1184 VATTR_SET(&va, va_type, VSOCK);
1185 VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd.fd_cmask));
1186
1187#if CONFIG_MACF
1188 error = mac_vnode_check_create(ctx,
1189 dvp: nd.ni_dvp, cnp: &nd.ni_cnd, vap: &va);
1190
1191 if (error == 0)
1192#endif /* CONFIG_MACF */
1193#if CONFIG_MACF_SOCKET_SUBSET
1194 error = mac_vnode_check_uipc_bind(ctx,
1195 dvp: nd.ni_dvp, cnp: &nd.ni_cnd, vap: &va);
1196
1197 if (error == 0)
1198#endif /* MAC_SOCKET_SUBSET */
1199 /* authorize before creating */
1200 error = vnode_authorize(vp: dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
1201
1202 if (!error) {
1203 /* create the socket */
1204 error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
1205 }
1206
1207 nameidone(&nd);
1208 vnode_put(vp: dvp);
1209
1210 if (error) {
1211 socket_lock(so, refcount: 0);
1212 return error;
1213 }
1214
1215 socket_lock(so, refcount: 0);
1216
1217 if (unp->unp_vnode != NULL) {
1218 vnode_put(vp); /* drop the iocount */
1219 return EINVAL;
1220 }
1221
1222 error = vnode_ref(vp); /* gain a longterm reference */
1223 if (error) {
1224 vnode_put(vp); /* drop the iocount */
1225 return error;
1226 }
1227
1228 vp->v_socket = unp->unp_socket;
1229 unp->unp_vnode = vp;
1230 unp->unp_addr = SUN(dup_sockaddr(nam, 1));
1231 vnode_put(vp); /* drop the iocount */
1232
1233 return 0;
1234}
1235
1236
1237/*
1238 * Returns: 0 Success
1239 * EAFNOSUPPORT Address family not supported
1240 * EINVAL Invalid argument
1241 * ENOTSOCK Not a socket
1242 * ECONNREFUSED Connection refused
1243 * EPROTOTYPE Protocol wrong type for socket
1244 * EISCONN Socket is connected
1245 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
1246 * unp_connect2:EINVAL Invalid argument
1247 * namei:??? [anything namei can return]
1248 * vnode_authorize:???? [anything vnode_authorize can return]
1249 *
1250 * Notes: p at this point is the current process, as this function is
1251 * only called by sosend(), sendfile(), and soconnectlock().
1252 */
1253static int
1254unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1255{
1256 struct sockaddr_un *soun = SUN(nam);
1257 struct vnode *vp;
1258 struct socket *so2, *so3, *list_so = NULL;
1259 struct unpcb *unp, *unp2, *unp3;
1260 vfs_context_t ctx = vfs_context_current();
1261 int error, len;
1262 struct nameidata nd;
1263 char buf[SOCK_MAXADDRLEN];
1264
1265 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1266 return EAFNOSUPPORT;
1267 }
1268
1269 unp = sotounpcb(so);
1270 so2 = so3 = NULL;
1271
1272 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1273 if (len <= 0) {
1274 return EINVAL;
1275 }
1276 /*
1277 * Note: sun_path is not a zero terminated "C" string
1278 */
1279 if (len >= SOCK_MAXADDRLEN) {
1280 return EINVAL;
1281 }
1282
1283 soisconnecting(so);
1284
1285 bcopy(src: soun->sun_path, dst: buf, n: len);
1286 buf[len] = 0;
1287
1288 socket_unlock(so, refcount: 0);
1289
1290 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1291 CAST_USER_ADDR_T(buf), ctx);
1292 error = namei(ndp: &nd);
1293 if (error) {
1294 socket_lock(so, refcount: 0);
1295 return error;
1296 }
1297 nameidone(&nd);
1298 vp = nd.ni_vp;
1299 if (vp->v_type != VSOCK) {
1300 error = ENOTSOCK;
1301 socket_lock(so, refcount: 0);
1302 goto out;
1303 }
1304
1305#if CONFIG_MACF_SOCKET_SUBSET
1306 error = mac_vnode_check_uipc_connect(ctx, vp, so);
1307 if (error) {
1308 socket_lock(so, refcount: 0);
1309 goto out;
1310 }
1311#endif /* MAC_SOCKET_SUBSET */
1312
1313 error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1314 if (error) {
1315 socket_lock(so, refcount: 0);
1316 goto out;
1317 }
1318
1319 lck_mtx_lock(lck: &unp_connect_lock);
1320
1321 if (vp->v_socket == 0) {
1322 lck_mtx_unlock(lck: &unp_connect_lock);
1323 error = ECONNREFUSED;
1324 socket_lock(so, refcount: 0);
1325 goto out;
1326 }
1327
1328 socket_lock(so: vp->v_socket, refcount: 1); /* Get a reference on the listening socket */
1329 so2 = vp->v_socket;
1330 lck_mtx_unlock(lck: &unp_connect_lock);
1331
1332
1333 if (so2->so_pcb == NULL) {
1334 error = ECONNREFUSED;
1335 if (so != so2) {
1336 socket_unlock(so: so2, refcount: 1);
1337 socket_lock(so, refcount: 0);
1338 } else {
1339 /* Release the reference held for the listen socket */
1340 VERIFY(so2->so_usecount > 0);
1341 so2->so_usecount--;
1342 }
1343 goto out;
1344 }
1345
1346 if (so < so2) {
1347 socket_unlock(so: so2, refcount: 0);
1348 socket_lock(so, refcount: 0);
1349 socket_lock(so: so2, refcount: 0);
1350 } else if (so > so2) {
1351 socket_lock(so, refcount: 0);
1352 }
1353 /*
1354 * Check if socket was connected while we were trying to
1355 * get the socket locks in order.
1356 * XXX - probably shouldn't return an error for SOCK_DGRAM
1357 */
1358 if ((so->so_state & SS_ISCONNECTED) != 0) {
1359 error = EISCONN;
1360 goto decref_out;
1361 }
1362
1363 if (so->so_type != so2->so_type) {
1364 error = EPROTOTYPE;
1365 goto decref_out;
1366 }
1367
1368 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1369 /* Release the incoming socket but keep a reference */
1370 socket_unlock(so, refcount: 0);
1371
1372 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1373 (so3 = sonewconn(head: so2, connstatus: 0, from: nam)) == 0) {
1374 error = ECONNREFUSED;
1375 if (so != so2) {
1376 socket_unlock(so: so2, refcount: 1);
1377 socket_lock(so, refcount: 0);
1378 } else {
1379 socket_lock(so, refcount: 0);
1380 /* Release the reference held for
1381 * listen socket.
1382 */
1383 VERIFY(so2->so_usecount > 0);
1384 so2->so_usecount--;
1385 }
1386 goto out;
1387 }
1388 unp2 = sotounpcb(so2);
1389 unp3 = sotounpcb(so3);
1390 if (unp2->unp_addr) {
1391 unp3->unp_addr = SUN(dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1));
1392 }
1393
1394 /*
1395 * unp_peercred management:
1396 *
1397 * The connecter's (client's) credentials are copied
1398 * from its process structure at the time of connect()
1399 * (which is now).
1400 */
1401 cru2x(cr: vfs_context_ucred(ctx), xcr: &unp3->unp_peercred);
1402 unp3->unp_flags |= UNP_HAVEPC;
1403 /*
1404 * The receiver's (server's) credentials are copied
1405 * from the unp_peercred member of socket on which the
1406 * former called listen(); unp_listen() cached that
1407 * process's credentials at that time so we can use
1408 * them now.
1409 */
1410 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1411 ("unp_connect: listener without cached peercred"));
1412
1413 /* Here we need to have both so and so2 locks and so2
1414 * is already locked. Lock ordering is required.
1415 */
1416 if (so < so2) {
1417 socket_unlock(so: so2, refcount: 0);
1418 socket_lock(so, refcount: 0);
1419 socket_lock(so: so2, refcount: 0);
1420 } else {
1421 socket_lock(so, refcount: 0);
1422 }
1423
1424 /* Check again if the socket state changed when its lock was released */
1425 if ((so->so_state & SS_ISCONNECTED) != 0) {
1426 error = EISCONN;
1427 socket_unlock(so: so2, refcount: 1);
1428 socket_lock(so: so3, refcount: 0);
1429 sofreelastref(so3, 1);
1430 goto out;
1431 }
1432 memcpy(dst: &unp->unp_peercred, src: &unp2->unp_peercred,
1433 n: sizeof(unp->unp_peercred));
1434 unp->unp_flags |= UNP_HAVEPC;
1435
1436 /* Hold the reference on listening socket until the end */
1437 socket_unlock(so: so2, refcount: 0);
1438 list_so = so2;
1439
1440 /* Lock ordering doesn't matter because so3 was just created */
1441 socket_lock(so: so3, refcount: 1);
1442 so2 = so3;
1443
1444 /*
1445 * Enable tracing for mDNSResponder endpoints. (The use
1446 * of sizeof instead of strlen below takes the null
1447 * terminating character into account.)
1448 */
1449 if (unpst_tracemdns &&
1450 !strncmp(s1: soun->sun_path, MDNSRESPONDER_PATH,
1451 n: sizeof(MDNSRESPONDER_PATH))) {
1452 unp->unp_flags |= UNP_TRACE_MDNS;
1453 unp2->unp_flags |= UNP_TRACE_MDNS;
1454 }
1455 }
1456
1457 error = unp_connect2(so, so2);
1458
1459decref_out:
1460 if (so2 != NULL) {
1461 if (so != so2) {
1462 socket_unlock(so: so2, refcount: 1);
1463 } else {
1464 /* Release the extra reference held for the listen socket.
1465 * This is possible only for SOCK_DGRAM sockets. We refuse
1466 * connecting to the same socket for SOCK_STREAM sockets.
1467 */
1468 VERIFY(so2->so_usecount > 0);
1469 so2->so_usecount--;
1470 }
1471 }
1472
1473 if (list_so != NULL) {
1474 socket_lock(so: list_so, refcount: 0);
1475 socket_unlock(so: list_so, refcount: 1);
1476 }
1477
1478out:
1479 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1480 vnode_put(vp);
1481 return error;
1482}
1483
1484/*
1485 * Returns: 0 Success
1486 * EPROTOTYPE Protocol wrong type for socket
1487 * EINVAL Invalid argument
1488 */
1489int
1490unp_connect2(struct socket *so, struct socket *so2)
1491{
1492 struct unpcb *unp = sotounpcb(so);
1493 struct unpcb *unp2;
1494
1495 if (so2->so_type != so->so_type) {
1496 return EPROTOTYPE;
1497 }
1498
1499 unp2 = sotounpcb(so2);
1500
1501 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1502 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1503
1504 /* Verify both sockets are still opened */
1505 if (unp == 0 || unp2 == 0) {
1506 return EINVAL;
1507 }
1508
1509 unp->unp_conn = unp2;
1510 so2->so_usecount++;
1511
1512 switch (so->so_type) {
1513 case SOCK_DGRAM:
1514 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1515
1516 if (so != so2) {
1517 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1518 /* Keep an extra reference on so2 that will be dropped
1519 * soon after getting the locks in order
1520 */
1521 socket_unlock(so: so2, refcount: 0);
1522 soisconnected(so);
1523 unp_get_locks_in_order(so, conn_so: so2);
1524 VERIFY(so2->so_usecount > 0);
1525 so2->so_usecount--;
1526 } else {
1527 soisconnected(so);
1528 }
1529
1530 break;
1531
1532 case SOCK_STREAM:
1533 /* This takes care of socketpair */
1534 if (!(unp->unp_flags & UNP_HAVEPC) &&
1535 !(unp2->unp_flags & UNP_HAVEPC)) {
1536 cru2x(cr: kauth_cred_get(), xcr: &unp->unp_peercred);
1537 unp->unp_flags |= UNP_HAVEPC;
1538
1539 cru2x(cr: kauth_cred_get(), xcr: &unp2->unp_peercred);
1540 unp2->unp_flags |= UNP_HAVEPC;
1541 }
1542 unp2->unp_conn = unp;
1543 so->so_usecount++;
1544
1545 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1546 socket_unlock(so, refcount: 0);
1547 soisconnected(so: so2);
1548
1549 /* Keep an extra reference on so2, that will be dropped soon after
1550 * getting the locks in order again.
1551 */
1552 socket_unlock(so: so2, refcount: 0);
1553
1554 socket_lock(so, refcount: 0);
1555 soisconnected(so);
1556
1557 unp_get_locks_in_order(so, conn_so: so2);
1558 /* Decrement the extra reference left before */
1559 VERIFY(so2->so_usecount > 0);
1560 so2->so_usecount--;
1561 break;
1562
1563 default:
1564 panic("unknown socket type %d in unp_connect2", so->so_type);
1565 }
1566 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1567 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1568 return 0;
1569}
1570
1571static void
1572unp_disconnect(struct unpcb *unp)
1573{
1574 struct unpcb *unp2 = NULL;
1575 struct socket *so2 = NULL, *so;
1576 struct socket *waitso;
1577 int so_locked = 1, strdisconn = 0;
1578
1579 so = unp->unp_socket;
1580 if (unp->unp_conn == NULL) {
1581 return;
1582 }
1583 lck_mtx_lock(lck: &unp_disconnect_lock);
1584 while (disconnect_in_progress != 0) {
1585 if (so_locked == 1) {
1586 socket_unlock(so, refcount: 0);
1587 so_locked = 0;
1588 }
1589 (void)msleep(chan: (caddr_t)&disconnect_in_progress, mtx: &unp_disconnect_lock,
1590 PSOCK, wmesg: "disconnect", NULL);
1591 }
1592 disconnect_in_progress = 1;
1593 lck_mtx_unlock(lck: &unp_disconnect_lock);
1594
1595 if (so_locked == 0) {
1596 socket_lock(so, refcount: 0);
1597 so_locked = 1;
1598 }
1599
1600 unp2 = unp->unp_conn;
1601
1602 if (unp2 == 0 || unp2->unp_socket == NULL) {
1603 goto out;
1604 }
1605 so2 = unp2->unp_socket;
1606
1607try_again:
1608 if (so == so2) {
1609 if (so_locked == 0) {
1610 socket_lock(so, refcount: 0);
1611 }
1612 waitso = so;
1613 } else if (so < so2) {
1614 if (so_locked == 0) {
1615 socket_lock(so, refcount: 0);
1616 }
1617 socket_lock(so: so2, refcount: 1);
1618 waitso = so2;
1619 } else {
1620 if (so_locked == 1) {
1621 socket_unlock(so, refcount: 0);
1622 }
1623 socket_lock(so: so2, refcount: 1);
1624 socket_lock(so, refcount: 0);
1625 waitso = so;
1626 }
1627 so_locked = 1;
1628
1629 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1630 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1631
1632 /* Check for the UNP_DONTDISCONNECT flag, if it
1633 * is set, release both sockets and go to sleep
1634 */
1635
1636 if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1637 if (so != so2) {
1638 socket_unlock(so: so2, refcount: 1);
1639 }
1640 so_locked = 0;
1641
1642 (void)msleep(chan: waitso->so_pcb, mtx: &unp->unp_mtx,
1643 PSOCK | PDROP, wmesg: "unpdisconnect", NULL);
1644 goto try_again;
1645 }
1646
1647 if (unp->unp_conn == NULL) {
1648 panic("unp_conn became NULL after sleep");
1649 }
1650
1651 unp->unp_conn = NULL;
1652 VERIFY(so2->so_usecount > 0);
1653 so2->so_usecount--;
1654
1655 if (unp->unp_flags & UNP_TRACE_MDNS) {
1656 unp->unp_flags &= ~UNP_TRACE_MDNS;
1657 }
1658
1659 switch (unp->unp_socket->so_type) {
1660 case SOCK_DGRAM:
1661 LIST_REMOVE(unp, unp_reflink);
1662 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1663 if (so != so2) {
1664 socket_unlock(so: so2, refcount: 1);
1665 }
1666 break;
1667
1668 case SOCK_STREAM:
1669 unp2->unp_conn = NULL;
1670 VERIFY(so->so_usecount > 0);
1671 so->so_usecount--;
1672
1673 /*
1674 * Set the socket state correctly but do a wakeup later when
1675 * we release all locks except the socket lock, this will avoid
1676 * a deadlock.
1677 */
1678 unp->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1679 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1680
1681 unp2->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1682 unp2->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1683
1684 if (unp2->unp_flags & UNP_TRACE_MDNS) {
1685 unp2->unp_flags &= ~UNP_TRACE_MDNS;
1686 }
1687
1688 strdisconn = 1;
1689 break;
1690 default:
1691 panic("unknown socket type %d", so->so_type);
1692 }
1693out:
1694 lck_mtx_lock(lck: &unp_disconnect_lock);
1695 disconnect_in_progress = 0;
1696 wakeup(chan: &disconnect_in_progress);
1697 lck_mtx_unlock(lck: &unp_disconnect_lock);
1698
1699 if (strdisconn) {
1700 socket_unlock(so, refcount: 0);
1701 soisdisconnected(so: so2);
1702 socket_unlock(so: so2, refcount: 1);
1703
1704 socket_lock(so, refcount: 0);
1705 soisdisconnected(so);
1706 }
1707 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1708 return;
1709}
1710
1711/*
1712 * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1713 * The unpcb_compat data structure is passed to user space and must not change.
1714 */
1715static void
1716unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1717{
1718#if defined(__LP64__)
1719 cp->unp_link.le_next = (u_int32_t)
1720 VM_KERNEL_ADDRHASH(up->unp_link.le_next);
1721 cp->unp_link.le_prev = (u_int32_t)
1722 VM_KERNEL_ADDRHASH(up->unp_link.le_prev);
1723#else
1724 cp->unp_link.le_next = (struct unpcb_compat *)
1725 VM_KERNEL_ADDRHASH(up->unp_link.le_next);
1726 cp->unp_link.le_prev = (struct unpcb_compat **)
1727 VM_KERNEL_ADDRHASH(up->unp_link.le_prev);
1728#endif
1729 cp->unp_socket = (_UNPCB_PTR(struct socket *))
1730 VM_KERNEL_ADDRHASH(up->unp_socket);
1731 cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1732 VM_KERNEL_ADDRHASH(up->unp_vnode);
1733 cp->unp_ino = up->unp_ino;
1734 cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1735 VM_KERNEL_ADDRHASH(up->unp_conn);
1736 cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRHASH(up->unp_refs.lh_first);
1737#if defined(__LP64__)
1738 cp->unp_reflink.le_next =
1739 (u_int32_t)VM_KERNEL_ADDRHASH(up->unp_reflink.le_next);
1740 cp->unp_reflink.le_prev =
1741 (u_int32_t)VM_KERNEL_ADDRHASH(up->unp_reflink.le_prev);
1742#else
1743 cp->unp_reflink.le_next =
1744 (struct unpcb_compat *)VM_KERNEL_ADDRHASH(up->unp_reflink.le_next);
1745 cp->unp_reflink.le_prev =
1746 (struct unpcb_compat **)VM_KERNEL_ADDRHASH(up->unp_reflink.le_prev);
1747#endif
1748 cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1749 VM_KERNEL_ADDRHASH(up->unp_addr);
1750 cp->unp_cc = up->unp_cc;
1751 cp->unp_mbcnt = up->unp_mbcnt;
1752 cp->unp_gencnt = up->unp_gencnt;
1753}
1754
1755static int
1756unp_pcblist SYSCTL_HANDLER_ARGS
1757{
1758#pragma unused(oidp,arg2)
1759 int error, i, n;
1760 struct unpcb *unp, **unp_list __bidi_indexable;
1761 size_t unp_list_len;
1762 unp_gen_t gencnt;
1763 struct xunpgen xug;
1764 struct unp_head *head;
1765
1766 lck_rw_lock_shared(lck: &unp_list_mtx);
1767 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1768
1769 /*
1770 * The process of preparing the PCB list is too time-consuming and
1771 * resource-intensive to repeat twice on every request.
1772 */
1773 if (req->oldptr == USER_ADDR_NULL) {
1774 n = unp_count;
1775 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1776 sizeof(struct xunpcb);
1777 lck_rw_done(lck: &unp_list_mtx);
1778 return 0;
1779 }
1780
1781 if (req->newptr != USER_ADDR_NULL) {
1782 lck_rw_done(lck: &unp_list_mtx);
1783 return EPERM;
1784 }
1785
1786 /*
1787 * OK, now we're committed to doing something.
1788 */
1789 gencnt = unp_gencnt;
1790 n = unp_count;
1791
1792 bzero(s: &xug, n: sizeof(xug));
1793 xug.xug_len = sizeof(xug);
1794 xug.xug_count = n;
1795 xug.xug_gen = gencnt;
1796 xug.xug_sogen = so_gencnt;
1797 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1798 if (error) {
1799 lck_rw_done(lck: &unp_list_mtx);
1800 return error;
1801 }
1802
1803 /*
1804 * We are done if there is no pcb
1805 */
1806 if (n == 0) {
1807 lck_rw_done(lck: &unp_list_mtx);
1808 return 0;
1809 }
1810
1811 unp_list_len = n;
1812 unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1813 if (unp_list == 0) {
1814 lck_rw_done(lck: &unp_list_mtx);
1815 return ENOMEM;
1816 }
1817
1818 for (unp = head->lh_first, i = 0; unp && i < n;
1819 unp = unp->unp_link.le_next) {
1820 if (unp->unp_gencnt <= gencnt) {
1821 unp_list[i++] = unp;
1822 }
1823 }
1824 n = i; /* in case we lost some during malloc */
1825
1826 error = 0;
1827 for (i = 0; i < n; i++) {
1828 unp = unp_list[i];
1829 if (unp->unp_gencnt <= gencnt) {
1830 struct xunpcb xu;
1831
1832 bzero(s: &xu, n: sizeof(xu));
1833 xu.xu_len = sizeof(xu);
1834 xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1835 VM_KERNEL_ADDRHASH(unp);
1836 /*
1837 * XXX - need more locking here to protect against
1838 * connect/disconnect races for SMP.
1839 */
1840 if (unp->unp_addr) {
1841 struct sockaddr_un *dst __single = &xu.xu_au.xuu_addr;
1842 SOCKADDR_COPY(unp->unp_addr, dst, unp->unp_addr->sun_len);
1843 }
1844 if (unp->unp_conn && unp->unp_conn->unp_addr) {
1845 struct sockaddr_un *dst __single = &xu.xu_cau.xuu_caddr;
1846 SOCKADDR_COPY(unp->unp_conn->unp_addr, dst, unp->unp_conn->unp_addr->sun_len);
1847 }
1848 unpcb_to_compat(up: unp, cp: &xu.xu_unp);
1849 sotoxsocket(so: unp->unp_socket, xso: &xu.xu_socket);
1850 error = SYSCTL_OUT(req, &xu, sizeof(xu));
1851 }
1852 }
1853 if (!error) {
1854 /*
1855 * Give the user an updated idea of our state.
1856 * If the generation differs from what we told
1857 * her before, she knows that something happened
1858 * while we were processing this request, and it
1859 * might be necessary to retry.
1860 */
1861 bzero(s: &xug, n: sizeof(xug));
1862 xug.xug_len = sizeof(xug);
1863 xug.xug_gen = unp_gencnt;
1864 xug.xug_sogen = so_gencnt;
1865 xug.xug_count = unp_count;
1866 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1867 }
1868 kfree_type(struct unpcb *, unp_list_len, unp_list);
1869 lck_rw_done(lck: &unp_list_mtx);
1870 return error;
1871}
1872
1873const caddr_t SYSCTL_SOCK_DGRAM_ARG = __unsafe_forge_single(caddr_t, SOCK_DGRAM);
1874const caddr_t SYSCTL_SOCK_STREAM_ARG = __unsafe_forge_single(caddr_t, SOCK_STREAM);
1875
1876SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
1877 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1878 SYSCTL_SOCK_DGRAM_ARG, 0, unp_pcblist, "S,xunpcb",
1879 "List of active local datagram sockets");
1880SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
1881 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1882 SYSCTL_SOCK_STREAM_ARG, 0, unp_pcblist, "S,xunpcb",
1883 "List of active local stream sockets");
1884
1885#if XNU_TARGET_OS_OSX
1886
1887static int
1888unp_pcblist64 SYSCTL_HANDLER_ARGS
1889{
1890#pragma unused(oidp,arg2)
1891 int error, i, n;
1892 struct unpcb *unp, **unp_list;
1893 unp_gen_t gencnt;
1894 struct xunpgen xug;
1895 struct unp_head *head;
1896
1897 lck_rw_lock_shared(lck: &unp_list_mtx);
1898 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1899
1900 /*
1901 * The process of preparing the PCB list is too time-consuming and
1902 * resource-intensive to repeat twice on every request.
1903 */
1904 if (req->oldptr == USER_ADDR_NULL) {
1905 n = unp_count;
1906 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1907 (sizeof(struct xunpcb64));
1908 lck_rw_done(lck: &unp_list_mtx);
1909 return 0;
1910 }
1911
1912 if (req->newptr != USER_ADDR_NULL) {
1913 lck_rw_done(lck: &unp_list_mtx);
1914 return EPERM;
1915 }
1916
1917 /*
1918 * OK, now we're committed to doing something.
1919 */
1920 gencnt = unp_gencnt;
1921 n = unp_count;
1922
1923 bzero(s: &xug, n: sizeof(xug));
1924 xug.xug_len = sizeof(xug);
1925 xug.xug_count = n;
1926 xug.xug_gen = gencnt;
1927 xug.xug_sogen = so_gencnt;
1928 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1929 if (error) {
1930 lck_rw_done(lck: &unp_list_mtx);
1931 return error;
1932 }
1933
1934 /*
1935 * We are done if there is no pcb
1936 */
1937 if (n == 0) {
1938 lck_rw_done(lck: &unp_list_mtx);
1939 return 0;
1940 }
1941
1942 size_t unp_list_len = n;
1943 unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1944 if (unp_list == 0) {
1945 lck_rw_done(lck: &unp_list_mtx);
1946 return ENOMEM;
1947 }
1948
1949 for (unp = head->lh_first, i = 0; unp && i < n;
1950 unp = unp->unp_link.le_next) {
1951 if (unp->unp_gencnt <= gencnt) {
1952 unp_list[i++] = unp;
1953 }
1954 }
1955 n = i; /* in case we lost some during malloc */
1956
1957 error = 0;
1958 for (i = 0; i < n; i++) {
1959 unp = unp_list[i];
1960 if (unp->unp_gencnt <= gencnt) {
1961 struct xunpcb64 xu;
1962 size_t xu_len = sizeof(struct xunpcb64);
1963
1964 bzero(s: &xu, n: xu_len);
1965 xu.xu_len = (u_int32_t)xu_len;
1966 xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRHASH(unp);
1967 xu.xunp_link.le_next = (u_int64_t)
1968 VM_KERNEL_ADDRHASH(unp->unp_link.le_next);
1969 xu.xunp_link.le_prev = (u_int64_t)
1970 VM_KERNEL_ADDRHASH(unp->unp_link.le_prev);
1971 xu.xunp_socket = (u_int64_t)
1972 VM_KERNEL_ADDRHASH(unp->unp_socket);
1973 xu.xunp_vnode = (u_int64_t)
1974 VM_KERNEL_ADDRHASH(unp->unp_vnode);
1975 xu.xunp_ino = unp->unp_ino;
1976 xu.xunp_conn = (u_int64_t)
1977 VM_KERNEL_ADDRHASH(unp->unp_conn);
1978 xu.xunp_refs = (u_int64_t)
1979 VM_KERNEL_ADDRHASH(unp->unp_refs.lh_first);
1980 xu.xunp_reflink.le_next = (u_int64_t)
1981 VM_KERNEL_ADDRHASH(unp->unp_reflink.le_next);
1982 xu.xunp_reflink.le_prev = (u_int64_t)
1983 VM_KERNEL_ADDRHASH(unp->unp_reflink.le_prev);
1984 xu.xunp_cc = unp->unp_cc;
1985 xu.xunp_mbcnt = unp->unp_mbcnt;
1986 xu.xunp_gencnt = unp->unp_gencnt;
1987
1988 if (unp->unp_socket) {
1989 sotoxsocket64(so: unp->unp_socket, xso: &xu.xu_socket);
1990 }
1991
1992 /*
1993 * XXX - need more locking here to protect against
1994 * connect/disconnect races for SMP.
1995 */
1996 if (unp->unp_addr) {
1997 bcopy(src: unp->unp_addr, dst: &xu.xu_au,
1998 n: unp->unp_addr->sun_len);
1999 }
2000 if (unp->unp_conn && unp->unp_conn->unp_addr) {
2001 bcopy(src: unp->unp_conn->unp_addr,
2002 dst: &xu.xu_cau,
2003 n: unp->unp_conn->unp_addr->sun_len);
2004 }
2005
2006 error = SYSCTL_OUT(req, &xu, xu_len);
2007 }
2008 }
2009 if (!error) {
2010 /*
2011 * Give the user an updated idea of our state.
2012 * If the generation differs from what we told
2013 * her before, she knows that something happened
2014 * while we were processing this request, and it
2015 * might be necessary to retry.
2016 */
2017 bzero(s: &xug, n: sizeof(xug));
2018 xug.xug_len = sizeof(xug);
2019 xug.xug_gen = unp_gencnt;
2020 xug.xug_sogen = so_gencnt;
2021 xug.xug_count = unp_count;
2022 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2023 }
2024 kfree_type(struct unpcb *, unp_list_len, unp_list);
2025 lck_rw_done(lck: &unp_list_mtx);
2026 return error;
2027}
2028
2029SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,
2030 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2031 SYSCTL_SOCK_DGRAM_ARG, 0, unp_pcblist64, "S,xunpcb64",
2032 "List of active local datagram sockets 64 bit");
2033SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,
2034 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2035 SYSCTL_SOCK_STREAM_ARG, 0, unp_pcblist64, "S,xunpcb64",
2036 "List of active local stream sockets 64 bit");
2037
2038#endif /* XNU_TARGET_OS_OSX */
2039
2040static int
2041unp_pcblist_n SYSCTL_HANDLER_ARGS
2042{
2043#pragma unused(oidp,arg2)
2044 int error = 0;
2045 int i, n;
2046 struct unpcb *unp;
2047 unp_gen_t gencnt;
2048 struct xunpgen xug;
2049 struct unp_head *head;
2050 void *buf __single = NULL;
2051 size_t item_size = ROUNDUP64(sizeof(struct xunpcb_n)) +
2052 ROUNDUP64(sizeof(struct xsocket_n)) +
2053 2 * ROUNDUP64(sizeof(struct xsockbuf_n)) +
2054 ROUNDUP64(sizeof(struct xsockstat_n));
2055
2056 buf = kalloc_data(item_size, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2057
2058 lck_rw_lock_shared(lck: &unp_list_mtx);
2059
2060 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
2061
2062 /*
2063 * The process of preparing the PCB list is too time-consuming and
2064 * resource-intensive to repeat twice on every request.
2065 */
2066 if (req->oldptr == USER_ADDR_NULL) {
2067 n = unp_count;
2068 req->oldidx = 2 * sizeof(xug) + (n + n / 8) * item_size;
2069 goto done;
2070 }
2071
2072 if (req->newptr != USER_ADDR_NULL) {
2073 error = EPERM;
2074 goto done;
2075 }
2076
2077 /*
2078 * OK, now we're committed to doing something.
2079 */
2080 gencnt = unp_gencnt;
2081 n = unp_count;
2082
2083 bzero(s: &xug, n: sizeof(xug));
2084 xug.xug_len = sizeof(xug);
2085 xug.xug_count = n;
2086 xug.xug_gen = gencnt;
2087 xug.xug_sogen = so_gencnt;
2088 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2089 if (error != 0) {
2090 goto done;
2091 }
2092
2093 /*
2094 * We are done if there is no pcb
2095 */
2096 if (n == 0) {
2097 goto done;
2098 }
2099
2100 for (i = 0, unp = head->lh_first;
2101 i < n && unp != NULL;
2102 i++, unp = unp->unp_link.le_next) {
2103 struct xunpcb_n *xu = (struct xunpcb_n *)buf;
2104 struct xsocket_n *xso = (struct xsocket_n *)
2105 ADVANCE64(xu, sizeof(*xu));
2106 struct xsockbuf_n *xsbrcv = (struct xsockbuf_n *)
2107 ADVANCE64(xso, sizeof(*xso));
2108 struct xsockbuf_n *xsbsnd = (struct xsockbuf_n *)
2109 ADVANCE64(xsbrcv, sizeof(*xsbrcv));
2110 struct xsockstat_n *xsostats = (struct xsockstat_n *)
2111 ADVANCE64(xsbsnd, sizeof(*xsbsnd));
2112
2113 if (unp->unp_gencnt > gencnt) {
2114 continue;
2115 }
2116
2117 bzero(s: buf, n: item_size);
2118
2119 xu->xunp_len = sizeof(struct xunpcb_n);
2120 xu->xunp_kind = XSO_UNPCB;
2121 xu->xunp_unpp = (uint64_t)VM_KERNEL_ADDRHASH(unp);
2122 xu->xunp_vnode = (uint64_t)VM_KERNEL_ADDRHASH(unp->unp_vnode);
2123 xu->xunp_ino = unp->unp_ino;
2124 xu->xunp_conn = (uint64_t)VM_KERNEL_ADDRHASH(unp->unp_conn);
2125 xu->xunp_refs = (uint64_t)VM_KERNEL_ADDRHASH(unp->unp_refs.lh_first);
2126 xu->xunp_reflink = (uint64_t)VM_KERNEL_ADDRHASH(unp->unp_reflink.le_next);
2127 xu->xunp_cc = unp->unp_cc;
2128 xu->xunp_mbcnt = unp->unp_mbcnt;
2129 xu->xunp_flags = unp->unp_flags;
2130 xu->xunp_gencnt = unp->unp_gencnt;
2131
2132 if (unp->unp_addr) {
2133 struct sockaddr_un *dst __single = &xu->xu_au.xuu_addr;
2134 SOCKADDR_COPY(unp->unp_addr, dst, unp->unp_addr->sun_len);
2135 }
2136 if (unp->unp_conn && unp->unp_conn->unp_addr) {
2137 struct sockaddr_un *dst __single = &xu->xu_cau.xuu_caddr;
2138 SOCKADDR_COPY(unp->unp_conn->unp_addr, dst, unp->unp_conn->unp_addr->sun_len);
2139 }
2140 sotoxsocket_n(unp->unp_socket, xso);
2141 sbtoxsockbuf_n(unp->unp_socket ?
2142 &unp->unp_socket->so_rcv : NULL, xsbrcv);
2143 sbtoxsockbuf_n(unp->unp_socket ?
2144 &unp->unp_socket->so_snd : NULL, xsbsnd);
2145 sbtoxsockstat_n(unp->unp_socket, xsostats);
2146
2147 error = SYSCTL_OUT(req, buf, item_size);
2148 if (error != 0) {
2149 break;
2150 }
2151 }
2152 if (error == 0) {
2153 /*
2154 * Give the user an updated idea of our state.
2155 * If the generation differs from what we told
2156 * her before, she knows that something happened
2157 * while we were processing this request, and it
2158 * might be necessary to retry.
2159 */
2160 bzero(s: &xug, n: sizeof(xug));
2161 xug.xug_len = sizeof(xug);
2162 xug.xug_gen = unp_gencnt;
2163 xug.xug_sogen = so_gencnt;
2164 xug.xug_count = unp_count;
2165 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2166 }
2167done:
2168 lck_rw_done(lck: &unp_list_mtx);
2169 kfree_data(buf, item_size);
2170 return error;
2171}
2172
2173SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist_n,
2174 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2175 SYSCTL_SOCK_DGRAM_ARG, 0, unp_pcblist_n, "S,xunpcb_n",
2176 "List of active local datagram sockets");
2177SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist_n,
2178 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2179 SYSCTL_SOCK_STREAM_ARG, 0, unp_pcblist_n, "S,xunpcb_n",
2180 "List of active local stream sockets");
2181
2182static void
2183unp_shutdown(struct unpcb *unp)
2184{
2185 struct socket *so = unp->unp_socket;
2186 struct socket *so2;
2187 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
2188 so2 = unp->unp_conn->unp_socket;
2189 unp_get_locks_in_order(so, conn_so: so2);
2190 socantrcvmore(so: so2);
2191 socket_unlock(so: so2, refcount: 1);
2192 }
2193}
2194
2195static void
2196unp_drop(struct unpcb *unp, int errno)
2197{
2198 struct socket *so = unp->unp_socket;
2199
2200 so->so_error = (u_short)errno;
2201 unp_disconnect(unp);
2202}
2203
2204/*
2205 * fg_insertuipc_mark
2206 *
2207 * Description: Mark fileglob for insertion onto message queue if needed
2208 * Also takes fileglob reference
2209 *
2210 * Parameters: fg Fileglob pointer to insert
2211 *
2212 * Returns: true, if the fileglob needs to be inserted onto msg queue
2213 *
2214 * Locks: Takes and drops fg_lock, potentially many times
2215 */
2216static boolean_t
2217fg_insertuipc_mark(struct fileglob * fg)
2218{
2219 boolean_t insert = FALSE;
2220
2221 lck_mtx_lock_spin(lck: &fg->fg_lock);
2222 while (fg->fg_lflags & FG_RMMSGQ) {
2223 lck_mtx_convert_spin(lck: &fg->fg_lock);
2224
2225 fg->fg_lflags |= FG_WRMMSGQ;
2226 msleep(chan: &fg->fg_lflags, mtx: &fg->fg_lock, pri: 0, wmesg: "fg_insertuipc", NULL);
2227 }
2228
2229 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2230 fg->fg_msgcount++;
2231 if (fg->fg_msgcount == 1) {
2232 fg->fg_lflags |= FG_INSMSGQ;
2233 insert = TRUE;
2234 }
2235 lck_mtx_unlock(lck: &fg->fg_lock);
2236 return insert;
2237}
2238
2239/*
2240 * fg_insertuipc
2241 *
2242 * Description: Insert marked fileglob onto message queue
2243 *
2244 * Parameters: fg Fileglob pointer to insert
2245 *
2246 * Returns: void
2247 *
2248 * Locks: Takes and drops fg_lock & uipc_lock
2249 * DO NOT call this function with proc_fdlock held as unp_gc()
2250 * can potentially try to acquire proc_fdlock, which can result
2251 * in a deadlock.
2252 */
2253static void
2254fg_insertuipc(struct fileglob * fg)
2255{
2256 if (fg->fg_lflags & FG_INSMSGQ) {
2257 lck_mtx_lock(lck: &uipc_lock);
2258 LIST_INSERT_HEAD(&unp_msghead, fg, f_msglist);
2259 lck_mtx_unlock(lck: &uipc_lock);
2260 lck_mtx_lock(lck: &fg->fg_lock);
2261 fg->fg_lflags &= ~FG_INSMSGQ;
2262 if (fg->fg_lflags & FG_WINSMSGQ) {
2263 fg->fg_lflags &= ~FG_WINSMSGQ;
2264 wakeup(chan: &fg->fg_lflags);
2265 }
2266 lck_mtx_unlock(lck: &fg->fg_lock);
2267 }
2268}
2269
2270/*
2271 * fg_removeuipc_mark
2272 *
2273 * Description: Mark the fileglob for removal from message queue if needed
2274 * Also releases fileglob message queue reference
2275 *
2276 * Parameters: fg Fileglob pointer to remove
2277 *
2278 * Returns: true, if the fileglob needs to be removed from msg queue
2279 *
2280 * Locks: Takes and drops fg_lock, potentially many times
2281 */
2282static boolean_t
2283fg_removeuipc_mark(struct fileglob * fg)
2284{
2285 boolean_t remove = FALSE;
2286
2287 lck_mtx_lock_spin(lck: &fg->fg_lock);
2288 while (fg->fg_lflags & FG_INSMSGQ) {
2289 lck_mtx_convert_spin(lck: &fg->fg_lock);
2290
2291 fg->fg_lflags |= FG_WINSMSGQ;
2292 msleep(chan: &fg->fg_lflags, mtx: &fg->fg_lock, pri: 0, wmesg: "fg_removeuipc", NULL);
2293 }
2294 fg->fg_msgcount--;
2295 if (fg->fg_msgcount == 0) {
2296 fg->fg_lflags |= FG_RMMSGQ;
2297 remove = TRUE;
2298 }
2299 lck_mtx_unlock(lck: &fg->fg_lock);
2300 return remove;
2301}
2302
2303/*
2304 * fg_removeuipc
2305 *
2306 * Description: Remove marked fileglob from message queue
2307 *
2308 * Parameters: fg Fileglob pointer to remove
2309 *
2310 * Returns: void
2311 *
2312 * Locks: Takes and drops fg_lock & uipc_lock
2313 * DO NOT call this function with proc_fdlock held as unp_gc()
2314 * can potentially try to acquire proc_fdlock, which can result
2315 * in a deadlock.
2316 */
2317static void
2318fg_removeuipc(struct fileglob * fg)
2319{
2320 if (fg->fg_lflags & FG_RMMSGQ) {
2321 lck_mtx_lock(lck: &uipc_lock);
2322 LIST_REMOVE(fg, f_msglist);
2323 lck_mtx_unlock(lck: &uipc_lock);
2324 lck_mtx_lock(lck: &fg->fg_lock);
2325 fg->fg_lflags &= ~FG_RMMSGQ;
2326 if (fg->fg_lflags & FG_WRMMSGQ) {
2327 fg->fg_lflags &= ~FG_WRMMSGQ;
2328 wakeup(chan: &fg->fg_lflags);
2329 }
2330 lck_mtx_unlock(lck: &fg->fg_lock);
2331 }
2332}
2333
2334/*
2335 * Returns: 0 Success
2336 * EMSGSIZE The new fd's will not fit
2337 * ENOBUFS Cannot alloc struct fileproc
2338 */
2339int
2340unp_externalize(struct mbuf *rights)
2341{
2342 proc_t p = current_proc();
2343 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
2344 struct fileglob **rp = (struct fileglob **)(cm + 1);
2345 const int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2346 int *fds __bidi_indexable;
2347 int error = 0;
2348
2349 fds = kalloc_data(newfds * sizeof(int), Z_WAITOK);
2350 if (fds == NULL) {
2351 error = ENOMEM;
2352 goto out;
2353 }
2354
2355 /*
2356 * Step 1:
2357 * Allocate all the fds, and if it doesn't fit,
2358 * then fail and discard everything.
2359 */
2360 proc_fdlock(p);
2361
2362 if (fdt_available_locked(p, n: newfds)) {
2363 for (int i = 0; i < newfds; i++) {
2364 error = fdalloc(p, want: 0, result: &fds[i]);
2365 if (error) {
2366 while (i-- > 0) {
2367 fdrelse(p, fd: fds[i]);
2368 }
2369 break;
2370 }
2371 }
2372 } else {
2373 error = EMSGSIZE;
2374 }
2375
2376 proc_fdunlock(p);
2377
2378 if (error) {
2379 goto out;
2380 }
2381
2382 /*
2383 * Step 2:
2384 * At this point we are commited, and can't fail anymore.
2385 * Allocate all the fileprocs, and remove the files
2386 * from the queue.
2387 *
2388 * Until we call procfdtbl_releasefd(), fds are in flux
2389 * and can't be closed.
2390 */
2391 for (int i = 0; i < newfds; i++) {
2392 struct fileproc *fp = NULL;
2393
2394 fp = fileproc_alloc_init();
2395 fp->fp_glob = rp[i];
2396 if (fg_removeuipc_mark(fg: rp[i])) {
2397 fg_removeuipc(fg: rp[i]);
2398 }
2399
2400 proc_fdlock(p);
2401 procfdtbl_releasefd(p, fd: fds[i], fp);
2402 proc_fdunlock(p);
2403 }
2404
2405 /*
2406 * Step 3:
2407 * Return the fds into `cm`.
2408 * Handle the fact ints and pointers do not have the same size.
2409 */
2410 int *fds_out = (int *)(cm + 1);
2411 memcpy(dst: fds_out, src: fds, n: newfds * sizeof(int));
2412 if (sizeof(struct fileglob *) != sizeof(int)) {
2413 bzero(s: fds_out + newfds,
2414 n: newfds * (sizeof(struct fileglob *) - sizeof(int)));
2415 }
2416 OSAddAtomic(-newfds, &unp_rights);
2417
2418out:
2419 if (error) {
2420 for (int i = 0; i < newfds; i++) {
2421 unp_discard(rp[i], p);
2422 }
2423 bzero(s: rp, n: newfds * sizeof(struct fileglob *));
2424 }
2425
2426 kfree_data(fds, newfds * sizeof(int));
2427 return error;
2428}
2429
2430void
2431unp_init(void)
2432{
2433 _CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));
2434 LIST_INIT(&unp_dhead);
2435 LIST_INIT(&unp_shead);
2436}
2437
2438#ifndef MIN
2439#define MIN(a, b) (((a) < (b)) ? (a) : (b))
2440#endif
2441
2442/*
2443 * Returns: 0 Success
2444 * EINVAL
2445 * EBADF
2446 */
2447static int
2448unp_internalize(struct mbuf *control, proc_t p)
2449{
2450 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
2451 int *fds;
2452 struct fileglob **rp;
2453 struct fileproc *fp;
2454 int i, error;
2455 int oldfds;
2456 uint8_t fg_ins[UIPC_MAX_CMSG_FD / 8];
2457
2458 /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
2459 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
2460 (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
2461 return EINVAL;
2462 }
2463 oldfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2464 bzero(s: fg_ins, n: sizeof(fg_ins));
2465
2466 proc_fdlock(p);
2467 fds = (int *)(cm + 1);
2468
2469 for (i = 0; i < oldfds; i++) {
2470 struct fileproc *tmpfp;
2471 if ((tmpfp = fp_get_noref_locked(p, fd: fds[i])) == NULL) {
2472 proc_fdunlock(p);
2473 return EBADF;
2474 } else if (!fg_sendable(fg: tmpfp->fp_glob)) {
2475 proc_fdunlock(p);
2476 return EINVAL;
2477 } else if (fp_isguarded(fp: tmpfp, GUARD_SOCKET_IPC)) {
2478 error = fp_guard_exception(p,
2479 fd: fds[i], fp: tmpfp, attribs: kGUARD_EXC_SOCKET_IPC);
2480 proc_fdunlock(p);
2481 return error;
2482 }
2483 }
2484 rp = (struct fileglob **)(cm + 1);
2485
2486 /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
2487 * and doing them in-order would result in stomping over unprocessed fd's
2488 */
2489 for (i = (oldfds - 1); i >= 0; i--) {
2490 fp = fp_get_noref_locked(p, fd: fds[i]);
2491 if (fg_insertuipc_mark(fg: fp->fp_glob)) {
2492 fg_ins[i / 8] |= 0x80 >> (i % 8);
2493 }
2494 rp[i] = fp->fp_glob;
2495 }
2496 proc_fdunlock(p);
2497
2498 for (i = 0; i < oldfds; i++) {
2499 if (fg_ins[i / 8] & (0x80 >> (i % 8))) {
2500 VERIFY(rp[i]->fg_lflags & FG_INSMSGQ);
2501 fg_insertuipc(fg: rp[i]);
2502 }
2503 (void) OSAddAtomic(1, &unp_rights);
2504 }
2505
2506 return 0;
2507}
2508
2509static void
2510unp_gc(thread_call_param_t arg0, thread_call_param_t arg1)
2511{
2512#pragma unused(arg0, arg1)
2513 struct fileglob *fg;
2514 struct socket *so;
2515 static struct fileglob **extra_ref;
2516 struct fileglob **fpp;
2517 int nunref, i;
2518
2519restart:
2520 lck_mtx_lock(lck: &uipc_lock);
2521 unp_defer = 0;
2522 /*
2523 * before going through all this, set all FDs to
2524 * be NOT defered and NOT externally accessible
2525 */
2526 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2527 os_atomic_andnot(&fg->fg_flag, FMARK | FDEFER, relaxed);
2528 }
2529 do {
2530 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2531 lck_mtx_lock(lck: &fg->fg_lock);
2532 /*
2533 * If the file is not open, skip it
2534 */
2535 if (os_ref_get_count_raw(rc: &fg->fg_count) == 0) {
2536 lck_mtx_unlock(lck: &fg->fg_lock);
2537 continue;
2538 }
2539 /*
2540 * If we already marked it as 'defer' in a
2541 * previous pass, then try process it this time
2542 * and un-mark it
2543 */
2544 if (fg->fg_flag & FDEFER) {
2545 os_atomic_andnot(&fg->fg_flag, FDEFER, relaxed);
2546 unp_defer--;
2547 } else {
2548 /*
2549 * if it's not defered, then check if it's
2550 * already marked.. if so skip it
2551 */
2552 if (fg->fg_flag & FMARK) {
2553 lck_mtx_unlock(lck: &fg->fg_lock);
2554 continue;
2555 }
2556 /*
2557 * If all references are from messages
2558 * in transit, then skip it. it's not
2559 * externally accessible.
2560 */
2561 if (os_ref_get_count_raw(rc: &fg->fg_count) ==
2562 fg->fg_msgcount) {
2563 lck_mtx_unlock(lck: &fg->fg_lock);
2564 continue;
2565 }
2566 /*
2567 * If it got this far then it must be
2568 * externally accessible.
2569 */
2570 os_atomic_or(&fg->fg_flag, FMARK, relaxed);
2571 }
2572 /*
2573 * either it was defered, or it is externally
2574 * accessible and not already marked so.
2575 * Now check if it is possibly one of OUR sockets.
2576 */
2577 if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||
2578 (so = (struct socket *)fg_get_data(fg)) == 0) {
2579 lck_mtx_unlock(lck: &fg->fg_lock);
2580 continue;
2581 }
2582 if (so->so_proto->pr_domain != localdomain ||
2583 (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
2584 lck_mtx_unlock(lck: &fg->fg_lock);
2585 continue;
2586 }
2587 /*
2588 * So, Ok, it's one of our sockets and it IS externally
2589 * accessible (or was defered). Now we look
2590 * to see if we hold any file descriptors in its
2591 * message buffers. Follow those links and mark them
2592 * as accessible too.
2593 *
2594 * In case a file is passed onto itself we need to
2595 * release the file lock.
2596 */
2597 lck_mtx_unlock(lck: &fg->fg_lock);
2598 /*
2599 * It's safe to lock the socket after dropping fg_lock
2600 * because the socket isn't going away at this point.
2601 *
2602 * If we couldn't lock the socket or the socket buffer,
2603 * then it's because someone holding one of these
2604 * locks is stuck in unp_{internalize,externalize}().
2605 * Yield to that process and restart the garbage
2606 * collection.
2607 */
2608 if (!socket_try_lock(so)) {
2609 lck_mtx_unlock(lck: &uipc_lock);
2610 goto restart;
2611 }
2612 so->so_usecount++;
2613 /*
2614 * Lock the receive socket buffer so that we can
2615 * iterate over its mbuf list.
2616 */
2617 if (sblock(sb: &so->so_rcv, SBL_NOINTR | SBL_IGNDEFUNCT)) {
2618 socket_unlock(so, refcount: 1);
2619 lck_mtx_unlock(lck: &uipc_lock);
2620 goto restart;
2621 }
2622 VERIFY(so->so_rcv.sb_flags & SB_LOCK);
2623 socket_unlock(so, refcount: 0);
2624 unp_scan(so->so_rcv.sb_mb, unp_mark, arg: 0);
2625 socket_lock(so, refcount: 0);
2626 sbunlock(sb: &so->so_rcv, TRUE);
2627 /*
2628 * Unlock and release the reference acquired above.
2629 */
2630 socket_unlock(so, refcount: 1);
2631 }
2632 } while (unp_defer);
2633 /*
2634 * We grab an extra reference to each of the file table entries
2635 * that are not otherwise accessible and then free the rights
2636 * that are stored in messages on them.
2637 *
2638 * Here, we first take an extra reference to each inaccessible
2639 * descriptor. Then, we call sorflush ourself, since we know
2640 * it is a Unix domain socket anyhow. After we destroy all the
2641 * rights carried in messages, we do a last closef to get rid
2642 * of our extra reference. This is the last close, and the
2643 * unp_detach etc will shut down the socket.
2644 *
2645 * 91/09/19, bsy@cs.cmu.edu
2646 */
2647 size_t extra_ref_size = nfiles;
2648 extra_ref = kalloc_type(struct fileglob *, extra_ref_size, Z_WAITOK);
2649 if (extra_ref == NULL) {
2650 lck_mtx_unlock(lck: &uipc_lock);
2651 return;
2652 }
2653 nunref = 0;
2654 fpp = extra_ref;
2655 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2656 lck_mtx_lock(lck: &fg->fg_lock);
2657 /*
2658 * If it's not open, skip it
2659 */
2660 if (os_ref_get_count_raw(rc: &fg->fg_count) == 0) {
2661 lck_mtx_unlock(lck: &fg->fg_lock);
2662 continue;
2663 }
2664 /*
2665 * If all refs are from msgs, and it's not marked accessible
2666 * then it must be referenced from some unreachable cycle
2667 * of (shut-down) FDs, so include it in our
2668 * list of FDs to remove
2669 */
2670 if (fg->fg_flag & FMARK) {
2671 lck_mtx_unlock(lck: &fg->fg_lock);
2672 continue;
2673 }
2674 if (os_ref_get_count_raw(rc: &fg->fg_count) == fg->fg_msgcount) {
2675 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2676 *fpp++ = fg;
2677 nunref++;
2678 }
2679 lck_mtx_unlock(lck: &fg->fg_lock);
2680 }
2681 lck_mtx_unlock(lck: &uipc_lock);
2682
2683 /*
2684 * for each FD on our hit list, do the following two things
2685 */
2686 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2687 struct fileglob *tfg;
2688
2689 tfg = *fpp;
2690
2691 if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET) {
2692 so = (struct socket *)fg_get_data(fg: tfg);
2693
2694 if (so) {
2695 socket_lock(so, refcount: 0);
2696 sorflush(so);
2697 socket_unlock(so, refcount: 0);
2698 }
2699 }
2700 }
2701 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2702 fg_drop(PROC_NULL, fg: *fpp);
2703 }
2704
2705 kfree_type(struct fileglob *, extra_ref_size, extra_ref);
2706}
2707
2708void
2709unp_dispose(struct mbuf *m)
2710{
2711 if (m) {
2712 unp_scan(m, unp_discard, NULL);
2713 }
2714}
2715
2716/*
2717 * Returns: 0 Success
2718 */
2719static int
2720unp_listen(struct unpcb *unp, proc_t p)
2721{
2722 kauth_cred_t safecred __single = kauth_cred_proc_ref(procp: p);
2723 cru2x(cr: safecred, xcr: &unp->unp_peercred);
2724 kauth_cred_unref(&safecred);
2725 unp->unp_flags |= UNP_HAVEPCCACHED;
2726 return 0;
2727}
2728
2729static void
2730unp_scan(struct mbuf *m0, void (*op)(struct fileglob *, void *arg), void *arg)
2731{
2732 struct mbuf *m;
2733 struct fileglob **rp;
2734 struct cmsghdr *cm;
2735 int i;
2736 int qfds;
2737
2738 while (m0) {
2739 for (m = m0; m; m = m->m_next) {
2740 if (m->m_type == MT_CONTROL &&
2741 (size_t)m->m_len >= sizeof(*cm)) {
2742 cm = mtod(m, struct cmsghdr *);
2743 if (cm->cmsg_level != SOL_SOCKET ||
2744 cm->cmsg_type != SCM_RIGHTS) {
2745 continue;
2746 }
2747 qfds = (cm->cmsg_len - sizeof(*cm)) /
2748 sizeof(int);
2749 rp = (struct fileglob **)(cm + 1);
2750 for (i = 0; i < qfds; i++) {
2751 (*op)(*rp++, arg);
2752 }
2753 break; /* XXX, but saves time */
2754 }
2755 }
2756 m0 = m0->m_act;
2757 }
2758}
2759
2760static void
2761unp_mark(struct fileglob *fg, __unused void *arg)
2762{
2763 uint32_t oflags, nflags;
2764
2765 os_atomic_rmw_loop(&fg->fg_flag, oflags, nflags, relaxed, {
2766 if (oflags & FMARK) {
2767 os_atomic_rmw_loop_give_up(return );
2768 }
2769 nflags = oflags | FMARK | FDEFER;
2770 });
2771
2772 unp_defer++;
2773}
2774
2775static void
2776unp_discard(struct fileglob *fg, void *p)
2777{
2778 if (p == NULL) {
2779 p = current_proc(); /* XXX */
2780 }
2781 (void) OSAddAtomic(1, &unp_disposed);
2782 if (fg_removeuipc_mark(fg)) {
2783 VERIFY(fg->fg_lflags & FG_RMMSGQ);
2784 fg_removeuipc(fg);
2785 }
2786 (void) OSAddAtomic(-1, &unp_rights);
2787
2788 (void) fg_drop(p, fg);
2789}
2790
2791int
2792unp_lock(struct socket *so, int refcount, void * lr)
2793{
2794 void * lr_saved __single;
2795 if (lr == 0) {
2796 lr_saved = __unsafe_forge_single(void*, __builtin_return_address(0));
2797 } else {
2798 lr_saved = lr;
2799 }
2800
2801 if (so->so_pcb) {
2802 lck_mtx_lock(lck: &((struct unpcb *)so->so_pcb)->unp_mtx);
2803 } else {
2804 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x",
2805 so, lr_saved, so->so_usecount);
2806 }
2807
2808 if (so->so_usecount < 0) {
2809 panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x",
2810 so, so->so_pcb, lr_saved, so->so_usecount);
2811 }
2812
2813 if (refcount) {
2814 VERIFY(so->so_usecount > 0);
2815 so->so_usecount++;
2816 }
2817 so->lock_lr[so->next_lock_lr] = lr_saved;
2818 so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
2819 return 0;
2820}
2821
2822int
2823unp_unlock(struct socket *so, int refcount, void * lr)
2824{
2825 void * lr_saved __single;
2826 lck_mtx_t * mutex_held = NULL;
2827 struct unpcb *unp __single = sotounpcb(so);
2828
2829 if (lr == 0) {
2830 lr_saved = __unsafe_forge_single(void*, __builtin_return_address(0));
2831 } else {
2832 lr_saved = lr;
2833 }
2834
2835 if (refcount) {
2836 so->so_usecount--;
2837 }
2838
2839 if (so->so_usecount < 0) {
2840 panic("unp_unlock: so=%p usecount=%x", so, so->so_usecount);
2841 }
2842 if (so->so_pcb == NULL) {
2843 panic("unp_unlock: so=%p NO PCB usecount=%x", so, so->so_usecount);
2844 } else {
2845 mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2846 }
2847 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2848 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2849 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2850
2851 if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2852 sofreelastref(so, 1);
2853
2854 if (unp->unp_addr != NULL) {
2855 free_sockaddr(unp->unp_addr);
2856 }
2857
2858 lck_mtx_unlock(lck: mutex_held);
2859
2860 lck_mtx_destroy(lck: &unp->unp_mtx, grp: &unp_mtx_grp);
2861 zfree(unp_zone, unp);
2862 thread_call_enter(call: unp_gc_tcall);
2863 } else {
2864 lck_mtx_unlock(lck: mutex_held);
2865 }
2866
2867 return 0;
2868}
2869
2870lck_mtx_t *
2871unp_getlock(struct socket *so, __unused int flags)
2872{
2873 struct unpcb *unp = (struct unpcb *)so->so_pcb;
2874
2875
2876 if (so->so_pcb) {
2877 if (so->so_usecount < 0) {
2878 panic("unp_getlock: so=%p usecount=%x", so, so->so_usecount);
2879 }
2880 return &unp->unp_mtx;
2881 } else {
2882 panic("unp_getlock: so=%p NULL so_pcb", so);
2883 return so->so_proto->pr_domain->dom_mtx;
2884 }
2885}
2886