uipc_usrreq.c source code [xnu/bsd/kern/uipc_usrreq.c]

1	/*
2	* Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/*
29	* Copyright (c) 1982, 1986, 1989, 1991, 1993
30	* The Regents of the University of California. All rights reserved.
31	*
32	* Redistribution and use in source and binary forms, with or without
33	* modification, are permitted provided that the following conditions
34	* are met:
35	* 1. Redistributions of source code must retain the above copyright
36	* notice, this list of conditions and the following disclaimer.
37	* 2. Redistributions in binary form must reproduce the above copyright
38	* notice, this list of conditions and the following disclaimer in the
39	* documentation and/or other materials provided with the distribution.
40	* 3. All advertising materials mentioning features or use of this software
41	* must display the following acknowledgement:
42	* This product includes software developed by the University of
43	* California, Berkeley and its contributors.
44	* 4. Neither the name of the University nor the names of its contributors
45	* may be used to endorse or promote products derived from this software
46	* without specific prior written permission.
47	*
48	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58	* SUCH DAMAGE.
59	*
60	* From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
61	*/
62	/*
63	* NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64	* support for mandatory and extensible security protections. This notice
65	* is included in support of clause 2.2 (b) of the Apple Public License,
66	* Version 2.0.
67	*/
68
69	#include <sys/param.h>
70	#include <sys/systm.h>
71	#include <sys/kernel.h>
72	#include <sys/domain.h>
73	#include <sys/fcntl.h>
74	#include <sys/malloc.h> /* XXX must be before <sys/file.h> */
75	#include <sys/file_internal.h>
76	#include <sys/guarded.h>
77	#include <sys/filedesc.h>
78	#include <sys/lock.h>
79	#include <sys/mbuf.h>
80	#include <sys/namei.h>
81	#include <sys/proc_internal.h>
82	#include <sys/kauth.h>
83	#include <sys/protosw.h>
84	#include <sys/socket.h>
85	#include <sys/socketvar.h>
86	#include <sys/stat.h>
87	#include <sys/sysctl.h>
88	#include <sys/un.h>
89	#include <sys/unpcb.h>
90	#include <sys/vnode_internal.h>
91	#include <sys/kdebug.h>
92	#include <sys/mcache.h>
93
94	#include <kern/zalloc.h>
95	#include <kern/locks.h>
96
97	#if CONFIG_MACF
98	#include <security/mac_framework.h>
99	#endif /* CONFIG_MACF */
100
101	#include <mach/vm_param.h>
102
103	/*
104	* Maximum number of FDs that can be passed in an mbuf
105	*/
106	#define UIPC_MAX_CMSG_FD 512
107
108	#define f_msgcount f_fglob->fg_msgcount
109	#define f_cred f_fglob->fg_cred
110	#define f_ops f_fglob->fg_ops
111	#define f_offset f_fglob->fg_offset
112	#define f_data f_fglob->fg_data
113	struct zone *unp_zone;
114	static unp_gen_t unp_gencnt;
115	static u_int unp_count;
116
117	static lck_attr_t *unp_mtx_attr;
118	static lck_grp_t *unp_mtx_grp;
119	static lck_grp_attr_t *unp_mtx_grp_attr;
120	static lck_rw_t *unp_list_mtx;
121
122	static lck_mtx_t *unp_disconnect_lock;
123	static lck_mtx_t *unp_connect_lock;
124	static u_int disconnect_in_progress;
125
126	extern lck_mtx_t *uipc_lock;
127	static struct unp_head unp_shead, unp_dhead;
128
129	/*
130	* mDNSResponder tracing. When enabled, endpoints connected to
131	* /var/run/mDNSResponder will be traced; during each send on
132	* the traced socket, we log the PID and process name of the
133	* sending process. We also print out a bit of info related
134	* to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
135	* of mDNSResponder stays the same.
136	*/
137	#define MDNSRESPONDER_PATH "/var/run/mDNSResponder"
138
139	static int unpst_tracemdns; / enable tracing /
140
141	#define MDNS_IPC_MSG_HDR_VERSION_1 1
142
143	struct mdns_ipc_msg_hdr {
144	uint32_t version;
145	uint32_t datalen;
146	uint32_t ipc_flags;
147	uint32_t op;
148	union {
149	void *context;
150	uint32_t u32[`2`];
151	} __attribute__((packed));
152	uint32_t reg_index;
153	} __attribute__((packed));
154
155	/*
156	* Unix communications domain.
157	*
158	* TODO:
159	* SEQPACKET, RDM
160	* rethink name space problems
161	* need a proper out-of-band
162	* lock pushdown
163	*/
164	static struct sockaddr sun_noname = { sizeof (sun_noname), AF_LOCAL, { `0` } };
165	static ino_t unp_ino; / prototype for fake inode numbers /
166
167	static int unp_attach(struct socket *);
168	static void unp_detach(struct unpcb *);
169	static int unp_bind(struct unpcb , struct* sockaddr *, proc_t);
170	static int unp_connect(struct socket , struct* sockaddr *, proc_t);
171	static void unp_disconnect(struct unpcb *);
172	static void unp_shutdown(struct unpcb *);
173	static void unp_drop(struct unpcb , int*);
174	__private_extern__ void unp_gc(void);
175	static void unp_scan(struct mbuf , void* ()(struct* fileglob , void* arg), void* *arg);
176	static void unp_mark(struct fileglob , __unused void* *);
177	static void unp_discard(struct fileglob , void* *);
178	static int unp_internalize(struct mbuf *, proc_t);
179	static int unp_listen(struct unpcb *, proc_t);
180	static void unpcb_to_compat(struct unpcb , struct* unpcb_compat *);
181	static void unp_get_locks_in_order(struct socket so, struct* socket *conn_so);
182
183	static void
184	unp_get_locks_in_order(struct socket so, struct* socket *conn_so)
185	{
186	if (so < conn_so) {
187	socket_lock(conn_so, `1`);
188	} else {
189	struct unpcb *unp = sotounpcb(so);
190	unp->unp_flags \|= UNP_DONTDISCONNECT;
191	unp->rw_thrcount++;
192	socket_unlock(so, `0`);
193
194	/ Get the locks in the correct order /
195	socket_lock(conn_so, `1`);
196	socket_lock(so, `0`);
197	unp->rw_thrcount--;
198	if (unp->rw_thrcount == `0`) {
199	unp->unp_flags &= ~UNP_DONTDISCONNECT;
200	wakeup(unp);
201	}
202	}
203	}
204
205	static int
206	uipc_abort(struct socket *so)
207	{
208	struct unpcb *unp = sotounpcb(so);
209
210	if (unp == `0`)
211	return (EINVAL);
212	unp_drop(unp, ECONNABORTED);
213	unp_detach(unp);
214	sofree(so);
215	return (`0`);
216	}
217
218	static int
219	uipc_accept(struct socket so, struct* sockaddr **nam)
220	{
221	struct unpcb *unp = sotounpcb(so);
222
223	if (unp == `0`)
224	return (EINVAL);
225
226	/*
227	* Pass back name of connected socket,
228	* if it was bound and we are still connected
229	* (our peer may have closed already!).
230	*/
231	if (unp->unp_conn && unp->unp_conn->unp_addr) {
232	nam = dup_sockaddr((struct* sockaddr *)
233	unp->unp_conn->unp_addr, `1`);
234	} else {
235	nam = dup_sockaddr((struct* sockaddr *)&sun_noname, `1`);
236	}
237	return (`0`);
238	}
239
240	/*
241	* Returns: 0 Success
242	* EISCONN
243	* unp_attach:
244	*/
245	static int
246	uipc_attach(struct socket so, __unused int* proto, __unused proc_t p)
247	{
248	struct unpcb *unp = sotounpcb(so);
249
250	if (unp != `0`)
251	return (EISCONN);
252	return (unp_attach(so));
253	}
254
255	static int
256	uipc_bind(struct socket so, struct* sockaddr *nam, proc_t p)
257	{
258	struct unpcb *unp = sotounpcb(so);
259
260	if (unp == `0`)
261	return (EINVAL);
262
263	return (unp_bind(unp, nam, p));
264	}
265
266	/*
267	* Returns: 0 Success
268	* EINVAL
269	* unp_connect:??? [See elsewhere in this file]
270	*/
271	static int
272	uipc_connect(struct socket so, struct* sockaddr *nam, proc_t p)
273	{
274	struct unpcb *unp = sotounpcb(so);
275
276	if (unp == `0`)
277	return (EINVAL);
278	return (unp_connect(so, nam, p));
279	}
280
281	/*
282	* Returns: 0 Success
283	* EINVAL
284	* unp_connect2:EPROTOTYPE Protocol wrong type for socket
285	* unp_connect2:EINVAL Invalid argument
286	*/
287	static int
288	uipc_connect2(struct socket so1, struct* socket *so2)
289	{
290	struct unpcb *unp = sotounpcb(so1);
291
292	if (unp == `0`)
293	return (EINVAL);
294
295	return (unp_connect2(so1, so2));
296	}
297
298	/ control is EOPNOTSUPP /
299
300	static int
301	uipc_detach(struct socket *so)
302	{
303	struct unpcb *unp = sotounpcb(so);
304
305	if (unp == `0`)
306	return (EINVAL);
307
308	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
309	unp_detach(unp);
310	return (`0`);
311	}
312
313	static int
314	uipc_disconnect(struct socket *so)
315	{
316	struct unpcb *unp = sotounpcb(so);
317
318	if (unp == `0`)
319	return (EINVAL);
320	unp_disconnect(unp);
321	return (`0`);
322	}
323
324	/*
325	* Returns: 0 Success
326	* EINVAL
327	*/
328	static int
329	uipc_listen(struct socket *so, __unused proc_t p)
330	{
331	struct unpcb *unp = sotounpcb(so);
332
333	if (unp == `0` \|\| unp->unp_vnode == `0`)
334	return (EINVAL);
335	return (unp_listen(unp, p));
336	}
337
338	static int
339	uipc_peeraddr(struct socket so, struct* sockaddr **nam)
340	{
341	struct unpcb *unp = sotounpcb(so);
342
343	if (unp == NULL)
344	return (EINVAL);
345	if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
346	nam = dup_sockaddr((struct* sockaddr *)
347	unp->unp_conn->unp_addr, `1`);
348	} else {
349	nam = dup_sockaddr((struct* sockaddr *)&sun_noname, `1`);
350	}
351	return (`0`);
352	}
353
354	static int
355	uipc_rcvd(struct socket so, __unused int* flags)
356	{
357	struct unpcb *unp = sotounpcb(so);
358	struct socket *so2;
359
360	if (unp == `0`)
361	return (EINVAL);
362	switch (so->so_type) {
363	case SOCK_DGRAM:
364	panic("uipc_rcvd DGRAM?");
365	/NOTREACHED/
366
367	case SOCK_STREAM:
368	#define rcv (&so->so_rcv)
369	#define snd (&so2->so_snd)
370	if (unp->unp_conn == `0`)
371	break;
372
373	so2 = unp->unp_conn->unp_socket;
374	unp_get_locks_in_order(so, so2);
375	/*
376	* Adjust backpressure on sender
377	* and wakeup any waiting to write.
378	*/
379	snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
380	unp->unp_mbcnt = rcv->sb_mbcnt;
381	snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
382	unp->unp_cc = rcv->sb_cc;
383	sowwakeup(so2);
384
385	socket_unlock(so2, `1`);
386
387	#undef snd
388	#undef rcv
389	break;
390
391	default:
392	panic("uipc_rcvd unknown socktype");
393	}
394	return (`0`);
395	}
396
397	/ pru_rcvoob is EOPNOTSUPP /
398
399	/*
400	* Returns: 0 Success
401	* EINVAL
402	* EOPNOTSUPP
403	* EPIPE
404	* ENOTCONN
405	* EISCONN
406	* unp_internalize:EINVAL
407	* unp_internalize:EBADF
408	* unp_connect:EAFNOSUPPORT Address family not supported
409	* unp_connect:EINVAL Invalid argument
410	* unp_connect:ENOTSOCK Not a socket
411	* unp_connect:ECONNREFUSED Connection refused
412	* unp_connect:EISCONN Socket is connected
413	* unp_connect:EPROTOTYPE Protocol wrong type for socket
414	* unp_connect:???
415	* sbappendaddr:ENOBUFS [5th argument, contents modified]
416	* sbappendaddr:??? [whatever a filter author chooses]
417	*/
418	static int
419	uipc_send(struct socket so, int* flags, struct mbuf m, struct* sockaddr *nam,
420	struct mbuf *control, proc_t p)
421	{
422	int error = `0`;
423	struct unpcb *unp = sotounpcb(so);
424	struct socket *so2;
425
426	if (unp == `0`) {
427	error = EINVAL;
428	goto release;
429	}
430	if (flags & PRUS_OOB) {
431	error = EOPNOTSUPP;
432	goto release;
433	}
434
435	if (control) {
436	/ release lock to avoid deadlock (4436174) /
437	socket_unlock(so, `0`);
438	error = unp_internalize(control, p);
439	socket_lock(so, `0`);
440	if (error)
441	goto release;
442	}
443
444	switch (so->so_type) {
445	case SOCK_DGRAM:
446	{
447	struct sockaddr *from;
448
449	if (nam) {
450	if (unp->unp_conn) {
451	error = EISCONN;
452	break;
453	}
454	error = unp_connect(so, nam, p);
455	if (error)
456	break;
457	} else {
458	if (unp->unp_conn == `0`) {
459	error = ENOTCONN;
460	break;
461	}
462	}
463
464	so2 = unp->unp_conn->unp_socket;
465	if (so != so2)
466	unp_get_locks_in_order(so, so2);
467
468	if (unp->unp_addr)
469	from = (struct sockaddr *)unp->unp_addr;
470	else
471	from = &sun_noname;
472	/*
473	* sbappendaddr() will fail when the receiver runs out of
474	* space; in contrast to SOCK_STREAM, we will lose messages
475	* for the SOCK_DGRAM case when the receiver's queue overflows.
476	* SB_UNIX on the socket buffer implies that the callee will
477	* not free the control message, if any, because we would need
478	* to call unp_dispose() on it.
479	*/
480	if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
481	control = NULL;
482	sorwakeup(so2);
483	} else if (control != NULL && error == `0`) {
484	/ A socket filter took control; don't touch it /
485	control = NULL;
486	}
487
488	if (so != so2)
489	socket_unlock(so2, `1`);
490
491	m = NULL;
492	if (nam)
493	unp_disconnect(unp);
494	break;
495	}
496
497	case SOCK_STREAM: {
498	int didreceive = `0`;
499	#define rcv (&so2->so_rcv)
500	#define snd (&so->so_snd)
501	/ Connect if not connected yet. /
502	/*
503	* Note: A better implementation would complain
504	* if not equal to the peer's address.
505	*/
506	if ((so->so_state & SS_ISCONNECTED) == `0`) {
507	if (nam) {
508	error = unp_connect(so, nam, p);
509	if (error)
510	break; / XXX /
511	} else {
512	error = ENOTCONN;
513	break;
514	}
515	}
516
517	if (so->so_state & SS_CANTSENDMORE) {
518	error = EPIPE;
519	break;
520	}
521	if (unp->unp_conn == `0`)
522	panic("uipc_send connected but no connection?");
523
524	so2 = unp->unp_conn->unp_socket;
525	unp_get_locks_in_order(so, so2);
526
527	/ Check socket state again as we might have unlocked the socket*
528	* while trying to get the locks in order
529	*/
530
531	if ((so->so_state & SS_CANTSENDMORE)) {
532	error = EPIPE;
533	socket_unlock(so2, `1`);
534	break;
535	}
536
537	if (unp->unp_flags & UNP_TRACE_MDNS) {
538	struct mdns_ipc_msg_hdr hdr;
539
540	if (mbuf_copydata(m, `0`, sizeof (hdr), &hdr) == `0` &&
541	hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
542	printf("%s[mDNSResponder] pid=%d (%s): op=0x%x\n",
543	__func__, p->p_pid, p->p_comm, ntohl(hdr.op));
544	}
545	}
546
547	/*
548	* Send to paired receive port, and then reduce send buffer
549	* hiwater marks to maintain backpressure. Wake up readers.
550	* SB_UNIX flag will allow new record to be appended to the
551	* receiver's queue even when it is already full. It is
552	* possible, however, that append might fail. In that case,
553	* we will need to call unp_dispose() on the control message;
554	* the callee will not free it since SB_UNIX is set.
555	*/
556	didreceive = control ?
557	sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
558
559	snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
560	unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
561	if ((int32_t)snd->sb_hiwat >=
562	(int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {
563	snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
564	} else {
565	snd->sb_hiwat = `0`;
566	}
567	unp->unp_conn->unp_cc = rcv->sb_cc;
568	if (didreceive) {
569	control = NULL;
570	sorwakeup(so2);
571	} else if (control != NULL && error == `0`) {
572	/ A socket filter took control; don't touch it /
573	control = NULL;
574	}
575
576	socket_unlock(so2, `1`);
577	m = NULL;
578	#undef snd
579	#undef rcv
580	}
581	break;
582
583	default:
584	panic("uipc_send unknown socktype");
585	}
586
587	/*
588	* SEND_EOF is equivalent to a SEND followed by
589	* a SHUTDOWN.
590	*/
591	if (flags & PRUS_EOF) {
592	socantsendmore(so);
593	unp_shutdown(unp);
594	}
595
596	if (control && error != `0`) {
597	socket_unlock(so, `0`);
598	unp_dispose(control);
599	socket_lock(so, `0`);
600	}
601
602	release:
603	if (control)
604	m_freem(control);
605	if (m)
606	m_freem(m);
607	return (error);
608	}
609
610	static int
611	uipc_sense(struct socket so, void* ub, int* isstat64)
612	{
613	struct unpcb *unp = sotounpcb(so);
614	struct socket *so2;
615	blksize_t blksize;
616
617	if (unp == `0`)
618	return (EINVAL);
619
620	blksize = so->so_snd.sb_hiwat;
621	if (so->so_type == SOCK_STREAM && unp->unp_conn != `0`) {
622	so2 = unp->unp_conn->unp_socket;
623	blksize += so2->so_rcv.sb_cc;
624	}
625	if (unp->unp_ino == `0`)
626	unp->unp_ino = unp_ino++;
627
628	if (isstat64 != `0`) {
629	struct stat64 *sb64;
630
631	sb64 = (struct stat64 *)ub;
632	sb64->st_blksize = blksize;
633	sb64->st_dev = NODEV;
634	sb64->st_ino = (ino64_t)unp->unp_ino;
635	} else {
636	struct stat *sb;
637
638	sb = (struct stat *)ub;
639	sb->st_blksize = blksize;
640	sb->st_dev = NODEV;
641	sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
642	}
643
644	return (`0`);
645	}
646
647	/*
648	* Returns: 0 Success
649	* EINVAL
650	*
651	* Notes: This is not strictly correct, as unp_shutdown() also calls
652	* socantrcvmore(). These should maybe both be conditionalized
653	* on the 'how' argument in soshutdown() as called from the
654	* shutdown() system call.
655	*/
656	static int
657	uipc_shutdown(struct socket *so)
658	{
659	struct unpcb *unp = sotounpcb(so);
660
661	if (unp == `0`)
662	return (EINVAL);
663	socantsendmore(so);
664	unp_shutdown(unp);
665	return (`0`);
666	}
667
668	/*
669	* Returns: 0 Success
670	* EINVAL Invalid argument
671	*/
672	static int
673	uipc_sockaddr(struct socket so, struct* sockaddr **nam)
674	{
675	struct unpcb *unp = sotounpcb(so);
676
677	if (unp == NULL)
678	return (EINVAL);
679	if (unp->unp_addr != NULL) {
680	nam = dup_sockaddr((struct* sockaddr *)unp->unp_addr, `1`);
681	} else {
682	nam = dup_sockaddr((struct* sockaddr *)&sun_noname, `1`);
683	}
684	return (`0`);
685	}
686
687	struct pr_usrreqs uipc_usrreqs = {
688	.pru_abort = uipc_abort,
689	.pru_accept = uipc_accept,
690	.pru_attach = uipc_attach,
691	.pru_bind = uipc_bind,
692	.pru_connect = uipc_connect,
693	.pru_connect2 = uipc_connect2,
694	.pru_detach = uipc_detach,
695	.pru_disconnect = uipc_disconnect,
696	.pru_listen = uipc_listen,
697	.pru_peeraddr = uipc_peeraddr,
698	.pru_rcvd = uipc_rcvd,
699	.pru_send = uipc_send,
700	.pru_sense = uipc_sense,
701	.pru_shutdown = uipc_shutdown,
702	.pru_sockaddr = uipc_sockaddr,
703	.pru_sosend = sosend,
704	.pru_soreceive = soreceive,
705	};
706
707	int
708	uipc_ctloutput(struct socket so, struct* sockopt *sopt)
709	{
710	struct unpcb *unp = sotounpcb(so);
711	int error = `0`;
712	pid_t peerpid;
713	struct socket *peerso;
714
715	switch (sopt->sopt_dir) {
716	case SOPT_GET:
717	switch (sopt->sopt_name) {
718	case LOCAL_PEERCRED:
719	if (unp->unp_flags & UNP_HAVEPC) {
720	error = sooptcopyout(sopt, &unp->unp_peercred,
721	sizeof (unp->unp_peercred));
722	} else {
723	if (so->so_type == SOCK_STREAM)
724	error = ENOTCONN;
725	else
726	error = EINVAL;
727	}
728	break;
729	case LOCAL_PEERPID:
730	case LOCAL_PEEREPID:
731	if (unp->unp_conn == NULL) {
732	error = ENOTCONN;
733	break;
734	}
735	peerso = unp->unp_conn->unp_socket;
736	if (peerso == NULL)
737	panic("peer is connected but has no socket?");
738	unp_get_locks_in_order(so, peerso);
739	if (sopt->sopt_name == LOCAL_PEEREPID &&
740	peerso->so_flags & SOF_DELEGATED)
741	peerpid = peerso->e_pid;
742	else
743	peerpid = peerso->last_pid;
744	socket_unlock(peerso, `1`);
745	error = sooptcopyout(sopt, &peerpid, sizeof (peerpid));
746	break;
747	case LOCAL_PEERUUID:
748	case LOCAL_PEEREUUID:
749	if (unp->unp_conn == NULL) {
750	error = ENOTCONN;
751	break;
752	}
753	peerso = unp->unp_conn->unp_socket;
754	if (peerso == NULL)
755	panic("peer is connected but has no socket?");
756	unp_get_locks_in_order(so, peerso);
757	if (sopt->sopt_name == LOCAL_PEEREUUID &&
758	peerso->so_flags & SOF_DELEGATED)
759	error = sooptcopyout(sopt, &peerso->e_uuid,
760	sizeof (peerso->e_uuid));
761	else
762	error = sooptcopyout(sopt, &peerso->last_uuid,
763	sizeof (peerso->last_uuid));
764	socket_unlock(peerso, `1`);
765	break;
766	default:
767	error = EOPNOTSUPP;
768	break;
769	}
770	break;
771	case SOPT_SET:
772	default:
773	error = EOPNOTSUPP;
774	break;
775	}
776
777	return (error);
778	}
779
780	/*
781	* Both send and receive buffers are allocated PIPSIZ bytes of buffering
782	* for stream sockets, although the total for sender and receiver is
783	* actually only PIPSIZ.
784	* Datagram sockets really use the sendspace as the maximum datagram size,
785	* and don't really want to reserve the sendspace. Their recvspace should
786	* be large enough for at least one max-size datagram plus address.
787	*/
788	#ifndef PIPSIZ
789	#define PIPSIZ 8192
790	#endif
791	static u_int32_t unpst_sendspace = PIPSIZ;
792	static u_int32_t unpst_recvspace = PIPSIZ;
793	static u_int32_t unpdg_sendspace = `2``1024`; /* really max datagram size /
794	static u_int32_t unpdg_recvspace = `4`*`1024`;
795
796	static int unp_rights; / file descriptors in flight /
797	static int unp_disposed; / discarded file descriptors /
798
799	SYSCTL_DECL(_net_local_stream);
800	SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW \| CTLFLAG_LOCKED,
801	&unpst_sendspace, `0`, "");
802	SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW \| CTLFLAG_LOCKED,
803	&unpst_recvspace, `0`, "");
804	SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW \| CTLFLAG_LOCKED,
805	&unpst_tracemdns, `0`, "");
806	SYSCTL_DECL(_net_local_dgram);
807	SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW \| CTLFLAG_LOCKED,
808	&unpdg_sendspace, `0`, "");
809	SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW \| CTLFLAG_LOCKED,
810	&unpdg_recvspace, `0`, "");
811	SYSCTL_DECL(_net_local);
812	SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD \| CTLFLAG_LOCKED, &unp_rights, `0`, "");
813
814	/*
815	* Returns: 0 Success
816	* ENOBUFS
817	* soreserve:ENOBUFS
818	*/
819	static int
820	unp_attach(struct socket *so)
821	{
822	struct unpcb *unp;
823	int error = `0`;
824
825	if (so->so_snd.sb_hiwat == `0` \|\| so->so_rcv.sb_hiwat == `0`) {
826	switch (so->so_type) {
827
828	case SOCK_STREAM:
829	error = soreserve(so, unpst_sendspace, unpst_recvspace);
830	break;
831
832	case SOCK_DGRAM:
833	error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
834	break;
835
836	default:
837	panic("unp_attach");
838	}
839	if (error)
840	return (error);
841	}
842	unp = (struct unpcb *)zalloc(unp_zone);
843	if (unp == NULL)
844	return (ENOBUFS);
845	bzero(unp, sizeof (*unp));
846
847	lck_mtx_init(&unp->unp_mtx,
848	unp_mtx_grp, unp_mtx_attr);
849
850	lck_rw_lock_exclusive(unp_list_mtx);
851	LIST_INIT(&unp->unp_refs);
852	unp->unp_socket = so;
853	unp->unp_gencnt = ++unp_gencnt;
854	unp_count++;
855	LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
856	&unp_dhead : &unp_shead, unp, unp_link);
857	lck_rw_done(unp_list_mtx);
858	so->so_pcb = (caddr_t)unp;
859	/*
860	* Mark AF_UNIX socket buffers accordingly so that:
861	*
862	* a. In the SOCK_STREAM case, socket buffer append won't fail due to
863	* the lack of space; this essentially loosens the sbspace() check,
864	* since there is disconnect between sosend() and uipc_send() with
865	* respect to flow control that might result in our dropping the
866	* data in uipc_send(). By setting this, we allow for slightly
867	* more records to be appended to the receiving socket to avoid
868	* losing data (which we can't afford in the SOCK_STREAM case).
869	* Flow control still takes place since we adjust the sender's
870	* hiwat during each send. This doesn't affect the SOCK_DGRAM
871	* case and append would still fail when the queue overflows.
872	*
873	* b. In the presence of control messages containing internalized
874	* file descriptors, the append routines will not free them since
875	* we'd need to undo the work first via unp_dispose().
876	*/
877	so->so_rcv.sb_flags \|= SB_UNIX;
878	so->so_snd.sb_flags \|= SB_UNIX;
879	return (`0`);
880	}
881
882	static void
883	unp_detach(struct unpcb *unp)
884	{
885	int so_locked = `1`;
886
887	lck_rw_lock_exclusive(unp_list_mtx);
888	LIST_REMOVE(unp, unp_link);
889	--unp_count;
890	++unp_gencnt;
891	lck_rw_done(unp_list_mtx);
892	if (unp->unp_vnode) {
893	struct vnode *tvp = NULL;
894	socket_unlock(unp->unp_socket, `0`);
895
896	/ Holding unp_connect_lock will avoid a race between*
897	* a thread closing the listening socket and a thread
898	* connecting to it.
899	*/
900	lck_mtx_lock(unp_connect_lock);
901	socket_lock(unp->unp_socket, `0`);
902	if (unp->unp_vnode) {
903	tvp = unp->unp_vnode;
904	unp->unp_vnode->v_socket = NULL;
905	unp->unp_vnode = NULL;
906	}
907	lck_mtx_unlock(unp_connect_lock);
908	if (tvp != NULL)
909	vnode_rele(tvp); / drop the usecount /
910	}
911	if (unp->unp_conn)
912	unp_disconnect(unp);
913	while (unp->unp_refs.lh_first) {
914	struct unpcb *unp2 = NULL;
915
916	/ This datagram socket is connected to one or more*
917	* sockets. In order to avoid a race condition between removing
918	* this reference and closing the connected socket, we need
919	* to check disconnect_in_progress
920	*/
921	if (so_locked == `1`) {
922	socket_unlock(unp->unp_socket, `0`);
923	so_locked = `0`;
924	}
925	lck_mtx_lock(unp_disconnect_lock);
926	while (disconnect_in_progress != `0`) {
927	(void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
928	PSOCK, "disconnect", NULL);
929	}
930	disconnect_in_progress = `1`;
931	lck_mtx_unlock(unp_disconnect_lock);
932
933	/ Now we are sure that any unpcb socket disconnect is not happening /
934	if (unp->unp_refs.lh_first != NULL) {
935	unp2 = unp->unp_refs.lh_first;
936	socket_lock(unp2->unp_socket, `1`);
937	}
938
939	lck_mtx_lock(unp_disconnect_lock);
940	disconnect_in_progress = `0`;
941	wakeup(&disconnect_in_progress);
942	lck_mtx_unlock(unp_disconnect_lock);
943
944	if (unp2 != NULL) {
945	/ We already locked this socket and have a reference on it /
946	unp_drop(unp2, ECONNRESET);
947	socket_unlock(unp2->unp_socket, `1`);
948	}
949	}
950
951	if (so_locked == `0`) {
952	socket_lock(unp->unp_socket, `0`);
953	so_locked = `1`;
954	}
955	soisdisconnected(unp->unp_socket);
956	/ makes sure we're getting dealloced /
957	unp->unp_socket->so_flags \|= SOF_PCBCLEARING;
958	}
959
960	/*
961	* Returns: 0 Success
962	* EAFNOSUPPORT
963	* EINVAL
964	* EADDRINUSE
965	* namei:??? [anything namei can return]
966	* vnode_authorize:??? [anything vnode_authorize can return]
967	*
968	* Notes: p at this point is the current process, as this function is
969	* only called by sobind().
970	*/
971	static int
972	unp_bind(
973	struct unpcb *unp,
974	struct sockaddr *nam,
975	proc_t p)
976	{
977	struct sockaddr_un soun = (struct* sockaddr_un *)nam;
978	struct vnode vp, dvp;
979	struct vnode_attr va;
980	vfs_context_t ctx = vfs_context_current();
981	int error, namelen;
982	struct nameidata nd;
983	struct socket *so = unp->unp_socket;
984	char buf[SOCK_MAXADDRLEN];
985
986	if (nam->sa_family != `0` && nam->sa_family != AF_UNIX) {
987	return (EAFNOSUPPORT);
988	}
989
990	/*
991	* Check if the socket is already bound to an address
992	*/
993	if (unp->unp_vnode != NULL)
994	return (EINVAL);
995	/*
996	* Check if the socket may have been shut down
997	*/
998	if ((so->so_state & (SS_CANTRCVMORE \| SS_CANTSENDMORE)) ==
999	(SS_CANTRCVMORE \| SS_CANTSENDMORE))
1000	return (EINVAL);
1001
1002	namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
1003	if (namelen <= `0`)
1004	return (EINVAL);
1005	/*
1006	* Note: sun_path is not a zero terminated "C" string
1007	*/
1008	if (namelen >= SOCK_MAXADDRLEN)
1009	return (EINVAL);
1010	bcopy(soun->sun_path, buf, namelen);
1011	buf[namelen] = `0`;
1012
1013	socket_unlock(so, `0`);
1014
1015	NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW \| LOCKPARENT, UIO_SYSSPACE,
1016	CAST_USER_ADDR_T(buf), ctx);
1017	/ SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's /
1018	error = namei(&nd);
1019	if (error) {
1020	socket_lock(so, `0`);
1021	return (error);
1022	}
1023	dvp = nd.ni_dvp;
1024	vp = nd.ni_vp;
1025
1026	if (vp != NULL) {
1027	/*
1028	* need to do this before the vnode_put of dvp
1029	* since we may have to release an fs_nodelock
1030	*/
1031	nameidone(&nd);
1032
1033	vnode_put(dvp);
1034	vnode_put(vp);
1035
1036	socket_lock(so, `0`);
1037	return (EADDRINUSE);
1038	}
1039
1040	VATTR_INIT(&va);
1041	VATTR_SET(&va, va_type, VSOCK);
1042	VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask));
1043
1044	#if CONFIG_MACF
1045	error = mac_vnode_check_create(ctx,
1046	nd.ni_dvp, &nd.ni_cnd, &va);
1047
1048	if (error == `0`)
1049	#endif /* CONFIG_MACF */
1050	#if CONFIG_MACF_SOCKET_SUBSET
1051	error = mac_vnode_check_uipc_bind(ctx,
1052	nd.ni_dvp, &nd.ni_cnd, &va);
1053
1054	if (error == `0`)
1055	#endif /* MAC_SOCKET_SUBSET */
1056	/ authorize before creating /
1057	error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
1058
1059	if (!error) {
1060	/ create the socket /
1061	error = vn_create(dvp, &vp, &nd, &va, `0`, `0`, NULL, ctx);
1062	}
1063
1064	nameidone(&nd);
1065	vnode_put(dvp);
1066
1067	if (error) {
1068	socket_lock(so, `0`);
1069	return (error);
1070	}
1071	vnode_ref(vp); / gain a longterm reference /
1072	socket_lock(so, `0`);
1073	vp->v_socket = unp->unp_socket;
1074	unp->unp_vnode = vp;
1075	unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, `1`);
1076	vnode_put(vp); / drop the iocount /
1077
1078	return (`0`);
1079	}
1080
1081
1082	/*
1083	* Returns: 0 Success
1084	* EAFNOSUPPORT Address family not supported
1085	* EINVAL Invalid argument
1086	* ENOTSOCK Not a socket
1087	* ECONNREFUSED Connection refused
1088	* EPROTOTYPE Protocol wrong type for socket
1089	* EISCONN Socket is connected
1090	* unp_connect2:EPROTOTYPE Protocol wrong type for socket
1091	* unp_connect2:EINVAL Invalid argument
1092	* namei:??? [anything namei can return]
1093	* vnode_authorize:???? [anything vnode_authorize can return]
1094	*
1095	* Notes: p at this point is the current process, as this function is
1096	* only called by sosend(), sendfile(), and soconnectlock().
1097	*/
1098	static int
1099	unp_connect(struct socket so, struct* sockaddr *nam, __unused proc_t p)
1100	{
1101	struct sockaddr_un soun = (struct* sockaddr_un *)nam;
1102	struct vnode *vp;
1103	struct socket so2, so3, *list_so=NULL;
1104	struct unpcb unp, unp2, *unp3;
1105	vfs_context_t ctx = vfs_context_current();
1106	int error, len;
1107	struct nameidata nd;
1108	char buf[SOCK_MAXADDRLEN];
1109
1110	if (nam->sa_family != `0` && nam->sa_family != AF_UNIX) {
1111	return (EAFNOSUPPORT);
1112	}
1113
1114	unp = sotounpcb(so);
1115	so2 = so3 = NULL;
1116
1117	len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1118	if (len <= `0`)
1119	return (EINVAL);
1120	/*
1121	* Note: sun_path is not a zero terminated "C" string
1122	*/
1123	if (len >= SOCK_MAXADDRLEN)
1124	return (EINVAL);
1125	bcopy(soun->sun_path, buf, len);
1126	buf[len] = `0`;
1127
1128	socket_unlock(so, `0`);
1129
1130	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW \| LOCKLEAF, UIO_SYSSPACE,
1131	CAST_USER_ADDR_T(buf), ctx);
1132	error = namei(&nd);
1133	if (error) {
1134	socket_lock(so, `0`);
1135	return (error);
1136	}
1137	nameidone(&nd);
1138	vp = nd.ni_vp;
1139	if (vp->v_type != VSOCK) {
1140	error = ENOTSOCK;
1141	socket_lock(so, `0`);
1142	goto out;
1143	}
1144
1145	#if CONFIG_MACF_SOCKET_SUBSET
1146	error = mac_vnode_check_uipc_connect(ctx, vp, so);
1147	if (error) {
1148	socket_lock(so, `0`);
1149	goto out;
1150	}
1151	#endif /* MAC_SOCKET_SUBSET */
1152
1153	error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1154	if (error) {
1155	socket_lock(so, `0`);
1156	goto out;
1157	}
1158
1159	lck_mtx_lock(unp_connect_lock);
1160
1161	if (vp->v_socket == `0`) {
1162	lck_mtx_unlock(unp_connect_lock);
1163	error = ECONNREFUSED;
1164	socket_lock(so, `0`);
1165	goto out;
1166	}
1167
1168	socket_lock(vp->v_socket, `1`); / Get a reference on the listening socket /
1169	so2 = vp->v_socket;
1170	lck_mtx_unlock(unp_connect_lock);
1171
1172
1173	if (so2->so_pcb == NULL) {
1174	error = ECONNREFUSED;
1175	if (so != so2) {
1176	socket_unlock(so2, `1`);
1177	socket_lock(so, `0`);
1178	} else {
1179	/ Release the reference held for the listen socket /
1180	VERIFY(so2->so_usecount > `0`);
1181	so2->so_usecount--;
1182	}
1183	goto out;
1184	}
1185
1186	if (so < so2) {
1187	socket_unlock(so2, `0`);
1188	socket_lock(so, `0`);
1189	socket_lock(so2, `0`);
1190	} else if (so > so2) {
1191	socket_lock(so, `0`);
1192	}
1193	/*
1194	* Check if socket was connected while we were trying to
1195	* get the socket locks in order.
1196	* XXX - probably shouldn't return an error for SOCK_DGRAM
1197	*/
1198	if ((so->so_state & SS_ISCONNECTED) != `0`) {
1199	error = EISCONN;
1200	goto decref_out;
1201	}
1202
1203	if (so->so_type != so2->so_type) {
1204	error = EPROTOTYPE;
1205	goto decref_out;
1206	}
1207
1208	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1209	/ Release the incoming socket but keep a reference /
1210	socket_unlock(so, `0`);
1211
1212	if ((so2->so_options & SO_ACCEPTCONN) == `0` \|\|
1213	(so3 = sonewconn(so2, `0`, nam)) == `0`) {
1214	error = ECONNREFUSED;
1215	if (so != so2) {
1216	socket_unlock(so2, `1`);
1217	socket_lock(so, `0`);
1218	} else {
1219	socket_lock(so, `0`);
1220	/ Release the reference held for*
1221	* listen socket.
1222	*/
1223	VERIFY(so2->so_usecount > `0`);
1224	so2->so_usecount--;
1225	}
1226	goto out;
1227	}
1228	unp2 = sotounpcb(so2);
1229	unp3 = sotounpcb(so3);
1230	if (unp2->unp_addr)
1231	unp3->unp_addr = (struct sockaddr_un *)
1232	dup_sockaddr((struct sockaddr *)unp2->unp_addr, `1`);
1233
1234	/*
1235	* unp_peercred management:
1236	*
1237	* The connecter's (client's) credentials are copied
1238	* from its process structure at the time of connect()
1239	* (which is now).
1240	*/
1241	cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1242	unp3->unp_flags \|= UNP_HAVEPC;
1243	/*
1244	* The receiver's (server's) credentials are copied
1245	* from the unp_peercred member of socket on which the
1246	* former called listen(); unp_listen() cached that
1247	* process's credentials at that time so we can use
1248	* them now.
1249	*/
1250	KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1251	("unp_connect: listener without cached peercred"));
1252
1253	/ Here we need to have both so and so2 locks and so2*
1254	* is already locked. Lock ordering is required.
1255	*/
1256	if (so < so2) {
1257	socket_unlock(so2, `0`);
1258	socket_lock(so, `0`);
1259	socket_lock(so2, `0`);
1260	} else {
1261	socket_lock(so, `0`);
1262	}
1263
1264	/ Check again if the socket state changed when its lock was released /
1265	if ((so->so_state & SS_ISCONNECTED) != `0`) {
1266	error = EISCONN;
1267	socket_unlock(so2, `1`);
1268	socket_lock(so3, `0`);
1269	sofreelastref(so3, `1`);
1270	goto out;
1271	}
1272	memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1273	sizeof (unp->unp_peercred));
1274	unp->unp_flags \|= UNP_HAVEPC;
1275
1276	#if CONFIG_MACF_SOCKET
1277	/ XXXMAC: recursive lock: SOCK_LOCK(so); /
1278	mac_socketpeer_label_associate_socket(so, so3);
1279	mac_socketpeer_label_associate_socket(so3, so);
1280	/ XXXMAC: SOCK_UNLOCK(so); /
1281	#endif /* MAC_SOCKET */
1282
1283	/ Hold the reference on listening socket until the end /
1284	socket_unlock(so2, `0`);
1285	list_so = so2;
1286
1287	/ Lock ordering doesn't matter because so3 was just created /
1288	socket_lock(so3, `1`);
1289	so2 = so3;
1290
1291	/*
1292	* Enable tracing for mDNSResponder endpoints. (The use
1293	* of sizeof instead of strlen below takes the null
1294	* terminating character into account.)
1295	*/
1296	if (unpst_tracemdns &&
1297	!strncmp(soun->sun_path, MDNSRESPONDER_PATH,
1298	sizeof (MDNSRESPONDER_PATH))) {
1299	unp->unp_flags \|= UNP_TRACE_MDNS;
1300	unp2->unp_flags \|= UNP_TRACE_MDNS;
1301	}
1302	}
1303
1304	error = unp_connect2(so, so2);
1305
1306	decref_out:
1307	if (so2 != NULL) {
1308	if (so != so2) {
1309	socket_unlock(so2, `1`);
1310	} else {
1311	/ Release the extra reference held for the listen socket.*
1312	* This is possible only for SOCK_DGRAM sockets. We refuse
1313	* connecting to the same socket for SOCK_STREAM sockets.
1314	*/
1315	VERIFY(so2->so_usecount > `0`);
1316	so2->so_usecount--;
1317	}
1318	}
1319
1320	if (list_so != NULL) {
1321	socket_lock(list_so, `0`);
1322	socket_unlock(list_so, `1`);
1323	}
1324
1325	out:
1326	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1327	vnode_put(vp);
1328	return (error);
1329	}
1330
1331	/*
1332	* Returns: 0 Success
1333	* EPROTOTYPE Protocol wrong type for socket
1334	* EINVAL Invalid argument
1335	*/
1336	int
1337	unp_connect2(struct socket so, struct* socket *so2)
1338	{
1339	struct unpcb *unp = sotounpcb(so);
1340	struct unpcb *unp2;
1341
1342	if (so2->so_type != so->so_type)
1343	return (EPROTOTYPE);
1344
1345	unp2 = sotounpcb(so2);
1346
1347	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1348	LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1349
1350	/ Verify both sockets are still opened /
1351	if (unp == `0` \|\| unp2 == `0`)
1352	return (EINVAL);
1353
1354	unp->unp_conn = unp2;
1355	so2->so_usecount++;
1356
1357	switch (so->so_type) {
1358
1359	case SOCK_DGRAM:
1360	LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1361
1362	if (so != so2) {
1363	/ Avoid lock order reversals due to drop/acquire in soisconnected. /
1364	/ Keep an extra reference on so2 that will be dropped*
1365	* soon after getting the locks in order
1366	*/
1367	socket_unlock(so2, `0`);
1368	soisconnected(so);
1369	unp_get_locks_in_order(so, so2);
1370	VERIFY(so2->so_usecount > `0`);
1371	so2->so_usecount--;
1372	} else {
1373	soisconnected(so);
1374	}
1375
1376	break;
1377
1378	case SOCK_STREAM:
1379	/ This takes care of socketpair /
1380	if (!(unp->unp_flags & UNP_HAVEPC) &&
1381	!(unp2->unp_flags & UNP_HAVEPC)) {
1382	cru2x(kauth_cred_get(), &unp->unp_peercred);
1383	unp->unp_flags \|= UNP_HAVEPC;
1384
1385	cru2x(kauth_cred_get(), &unp2->unp_peercred);
1386	unp2->unp_flags \|= UNP_HAVEPC;
1387	}
1388	unp2->unp_conn = unp;
1389	so->so_usecount++;
1390
1391	/ Avoid lock order reversals due to drop/acquire in soisconnected. /
1392	socket_unlock(so, `0`);
1393	soisconnected(so2);
1394
1395	/ Keep an extra reference on so2, that will be dropped soon after*
1396	* getting the locks in order again.
1397	*/
1398	socket_unlock(so2, `0`);
1399
1400	socket_lock(so, `0`);
1401	soisconnected(so);
1402
1403	unp_get_locks_in_order(so, so2);
1404	/ Decrement the extra reference left before /
1405	VERIFY(so2->so_usecount > `0`);
1406	so2->so_usecount--;
1407	break;
1408
1409	default:
1410	panic("unknown socket type %d in unp_connect2", so->so_type);
1411	}
1412	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1413	LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1414	return (`0`);
1415	}
1416
1417	static void
1418	unp_disconnect(struct unpcb *unp)
1419	{
1420	struct unpcb *unp2 = NULL;
1421	struct socket so2 = NULL, so;
1422	struct socket *waitso;
1423	int so_locked = `1`, strdisconn = `0`;
1424
1425	so = unp->unp_socket;
1426	if (unp->unp_conn == NULL) {
1427	return;
1428	}
1429	lck_mtx_lock(unp_disconnect_lock);
1430	while (disconnect_in_progress != `0`) {
1431	if (so_locked == `1`) {
1432	socket_unlock(so, `0`);
1433	so_locked = `0`;
1434	}
1435	(void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
1436	PSOCK, "disconnect", NULL);
1437	}
1438	disconnect_in_progress = `1`;
1439	lck_mtx_unlock(unp_disconnect_lock);
1440
1441	if (so_locked == `0`) {
1442	socket_lock(so, `0`);
1443	so_locked = `1`;
1444	}
1445
1446	unp2 = unp->unp_conn;
1447
1448	if (unp2 == `0` \|\| unp2->unp_socket == NULL) {
1449	goto out;
1450	}
1451	so2 = unp2->unp_socket;
1452
1453	try_again:
1454	if (so == so2) {
1455	if (so_locked == `0`) {
1456	socket_lock(so, `0`);
1457	}
1458	waitso = so;
1459	} else if (so < so2) {
1460	if (so_locked == `0`) {
1461	socket_lock(so, `0`);
1462	}
1463	socket_lock(so2, `1`);
1464	waitso = so2;
1465	} else {
1466	if (so_locked == `1`) {
1467	socket_unlock(so, `0`);
1468	}
1469	socket_lock(so2, `1`);
1470	socket_lock(so, `0`);
1471	waitso = so;
1472	}
1473	so_locked = `1`;
1474
1475	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1476	LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1477
1478	/ Check for the UNP_DONTDISCONNECT flag, if it*
1479	* is set, release both sockets and go to sleep
1480	*/
1481
1482	if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != `0`) {
1483	if (so != so2) {
1484	socket_unlock(so2, `1`);
1485	}
1486	so_locked = `0`;
1487
1488	(void)msleep(waitso->so_pcb, &unp->unp_mtx,
1489	PSOCK \| PDROP, "unpdisconnect", NULL);
1490	goto try_again;
1491	}
1492
1493	if (unp->unp_conn == NULL) {
1494	panic("unp_conn became NULL after sleep");
1495	}
1496
1497	unp->unp_conn = NULL;
1498	VERIFY(so2->so_usecount > `0`);
1499	so2->so_usecount--;
1500
1501	if (unp->unp_flags & UNP_TRACE_MDNS)
1502	unp->unp_flags &= ~UNP_TRACE_MDNS;
1503
1504	switch (unp->unp_socket->so_type) {
1505
1506	case SOCK_DGRAM:
1507	LIST_REMOVE(unp, unp_reflink);
1508	unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1509	if (so != so2)
1510	socket_unlock(so2, `1`);
1511	break;
1512
1513	case SOCK_STREAM:
1514	unp2->unp_conn = NULL;
1515	VERIFY(so->so_usecount > `0`);
1516	so->so_usecount--;
1517
1518	/ Set the socket state correctly but do a wakeup later when*
1519	* we release all locks except the socket lock, this will avoid
1520	* a deadlock.
1521	*/
1522	unp->unp_socket->so_state &= ~(SS_ISCONNECTING\|SS_ISCONNECTED\|SS_ISDISCONNECTING);
1523	unp->unp_socket->so_state \|= (SS_CANTRCVMORE\|SS_CANTSENDMORE\|SS_ISDISCONNECTED);
1524
1525	unp2->unp_socket->so_state &= ~(SS_ISCONNECTING\|SS_ISCONNECTED\|SS_ISDISCONNECTING);
1526	unp->unp_socket->so_state \|= (SS_CANTRCVMORE\|SS_CANTSENDMORE\|SS_ISDISCONNECTED);
1527
1528	if (unp2->unp_flags & UNP_TRACE_MDNS)
1529	unp2->unp_flags &= ~UNP_TRACE_MDNS;
1530
1531	strdisconn = `1`;
1532	break;
1533	default:
1534	panic("unknown socket type %d", so->so_type);
1535	}
1536	out:
1537	lck_mtx_lock(unp_disconnect_lock);
1538	disconnect_in_progress = `0`;
1539	wakeup(&disconnect_in_progress);
1540	lck_mtx_unlock(unp_disconnect_lock);
1541
1542	if (strdisconn) {
1543	socket_unlock(so, `0`);
1544	soisdisconnected(so2);
1545	socket_unlock(so2, `1`);
1546
1547	socket_lock(so,`0`);
1548	soisdisconnected(so);
1549	}
1550	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1551	return;
1552	}
1553
1554	/*
1555	* unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1556	* The unpcb_compat data structure is passed to user space and must not change.
1557	*/
1558	static void
1559	unpcb_to_compat(struct unpcb up, struct* unpcb_compat *cp)
1560	{
1561	#if defined(__LP64__)
1562	cp->unp_link.le_next = (u_int32_t)
1563	VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1564	cp->unp_link.le_prev = (u_int32_t)
1565	VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1566	#else
1567	cp->unp_link.le_next = (struct unpcb_compat *)
1568	VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1569	cp->unp_link.le_prev = (struct unpcb_compat **)
1570	VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1571	#endif
1572	cp->unp_socket = (_UNPCB_PTR(struct socket *))
1573	VM_KERNEL_ADDRPERM(up->unp_socket);
1574	cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1575	VM_KERNEL_ADDRPERM(up->unp_vnode);
1576	cp->unp_ino = up->unp_ino;
1577	cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1578	VM_KERNEL_ADDRPERM(up->unp_conn);
1579	cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
1580	#if defined(__LP64__)
1581	cp->unp_reflink.le_next =
1582	(u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1583	cp->unp_reflink.le_prev =
1584	(u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1585	#else
1586	cp->unp_reflink.le_next =
1587	(struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1588	cp->unp_reflink.le_prev =
1589	(struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1590	#endif
1591	cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1592	VM_KERNEL_ADDRPERM(up->unp_addr);
1593	cp->unp_cc = up->unp_cc;
1594	cp->unp_mbcnt = up->unp_mbcnt;
1595	cp->unp_gencnt = up->unp_gencnt;
1596	}
1597
1598	static int
1599	unp_pcblist SYSCTL_HANDLER_ARGS
1600	{
1601	#pragma unused(oidp,arg2)
1602	int error, i, n;
1603	struct unpcb unp, *unp_list;
1604	unp_gen_t gencnt;
1605	struct xunpgen xug;
1606	struct unp_head *head;
1607
1608	lck_rw_lock_shared(unp_list_mtx);
1609	head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1610
1611	/*
1612	* The process of preparing the PCB list is too time-consuming and
1613	* resource-intensive to repeat twice on every request.
1614	*/
1615	if (req->oldptr == USER_ADDR_NULL) {
1616	n = unp_count;
1617	req->oldidx = `2` * sizeof (xug) + (n + n / `8`) *
1618	sizeof (struct xunpcb);
1619	lck_rw_done(unp_list_mtx);
1620	return (`0`);
1621	}
1622
1623	if (req->newptr != USER_ADDR_NULL) {
1624	lck_rw_done(unp_list_mtx);
1625	return (EPERM);
1626	}
1627
1628	/*
1629	* OK, now we're committed to doing something.
1630	*/
1631	gencnt = unp_gencnt;
1632	n = unp_count;
1633
1634	bzero(&xug, sizeof (xug));
1635	xug.xug_len = sizeof (xug);
1636	xug.xug_count = n;
1637	xug.xug_gen = gencnt;
1638	xug.xug_sogen = so_gencnt;
1639	error = SYSCTL_OUT(req, &xug, sizeof (xug));
1640	if (error) {
1641	lck_rw_done(unp_list_mtx);
1642	return (error);
1643	}
1644
1645	/*
1646	* We are done if there is no pcb
1647	*/
1648	if (n == `0`) {
1649	lck_rw_done(unp_list_mtx);
1650	return (`0`);
1651	}
1652
1653	MALLOC(unp_list, struct unpcb *, n sizeof (*unp_list),
1654	M_TEMP, M_WAITOK);
1655	if (unp_list == `0`) {
1656	lck_rw_done(unp_list_mtx);
1657	return (ENOMEM);
1658	}
1659
1660	for (unp = head->lh_first, i = `0`; unp && i < n;
1661	unp = unp->unp_link.le_next) {
1662	if (unp->unp_gencnt <= gencnt)
1663	unp_list[i++] = unp;
1664	}
1665	n = i; / in case we lost some during malloc /
1666
1667	error = `0`;
1668	for (i = `0`; i < n; i++) {
1669	unp = unp_list[i];
1670	if (unp->unp_gencnt <= gencnt) {
1671	struct xunpcb xu;
1672
1673	bzero(&xu, sizeof (xu));
1674	xu.xu_len = sizeof (xu);
1675	xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1676	VM_KERNEL_ADDRPERM(unp);
1677	/*
1678	* XXX - need more locking here to protect against
1679	* connect/disconnect races for SMP.
1680	*/
1681	if (unp->unp_addr)
1682	bcopy(unp->unp_addr, &xu.xu_addr,
1683	unp->unp_addr->sun_len);
1684	if (unp->unp_conn && unp->unp_conn->unp_addr)
1685	bcopy(unp->unp_conn->unp_addr,
1686	&xu.xu_caddr,
1687	unp->unp_conn->unp_addr->sun_len);
1688	unpcb_to_compat(unp, &xu.xu_unp);
1689	sotoxsocket(unp->unp_socket, &xu.xu_socket);
1690	error = SYSCTL_OUT(req, &xu, sizeof (xu));
1691	}
1692	}
1693	if (!error) {
1694	/*
1695	* Give the user an updated idea of our state.
1696	* If the generation differs from what we told
1697	* her before, she knows that something happened
1698	* while we were processing this request, and it
1699	* might be necessary to retry.
1700	*/
1701	bzero(&xug, sizeof (xug));
1702	xug.xug_len = sizeof (xug);
1703	xug.xug_gen = unp_gencnt;
1704	xug.xug_sogen = so_gencnt;
1705	xug.xug_count = unp_count;
1706	error = SYSCTL_OUT(req, &xug, sizeof (xug));
1707	}
1708	FREE(unp_list, M_TEMP);
1709	lck_rw_done(unp_list_mtx);
1710	return (error);
1711	}
1712
1713	SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
1714	CTLTYPE_STRUCT \| CTLFLAG_RD \| CTLFLAG_LOCKED,
1715	(caddr_t)(long)SOCK_DGRAM, `0`, unp_pcblist, "S,xunpcb",
1716	"List of active local datagram sockets");
1717	SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
1718	CTLTYPE_STRUCT \| CTLFLAG_RD \| CTLFLAG_LOCKED,
1719	(caddr_t)(long)SOCK_STREAM, `0`, unp_pcblist, "S,xunpcb",
1720	"List of active local stream sockets");
1721
1722	#if !CONFIG_EMBEDDED
1723
1724	static int
1725	unp_pcblist64 SYSCTL_HANDLER_ARGS
1726	{
1727	#pragma unused(oidp,arg2)
1728	int error, i, n;
1729	struct unpcb unp, *unp_list;
1730	unp_gen_t gencnt;
1731	struct xunpgen xug;
1732	struct unp_head *head;
1733
1734	lck_rw_lock_shared(unp_list_mtx);
1735	head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1736
1737	/*
1738	* The process of preparing the PCB list is too time-consuming and
1739	* resource-intensive to repeat twice on every request.
1740	*/
1741	if (req->oldptr == USER_ADDR_NULL) {
1742	n = unp_count;
1743	req->oldidx = `2` * sizeof (xug) + (n + n / `8`) *
1744	(sizeof (struct xunpcb64));
1745	lck_rw_done(unp_list_mtx);
1746	return (`0`);
1747	}
1748
1749	if (req->newptr != USER_ADDR_NULL) {
1750	lck_rw_done(unp_list_mtx);
1751	return (EPERM);
1752	}
1753
1754	/*
1755	* OK, now we're committed to doing something.
1756	*/
1757	gencnt = unp_gencnt;
1758	n = unp_count;
1759
1760	bzero(&xug, sizeof (xug));
1761	xug.xug_len = sizeof (xug);
1762	xug.xug_count = n;
1763	xug.xug_gen = gencnt;
1764	xug.xug_sogen = so_gencnt;
1765	error = SYSCTL_OUT(req, &xug, sizeof (xug));
1766	if (error) {
1767	lck_rw_done(unp_list_mtx);
1768	return (error);
1769	}
1770
1771	/*
1772	* We are done if there is no pcb
1773	*/
1774	if (n == `0`) {
1775	lck_rw_done(unp_list_mtx);
1776	return (`0`);
1777	}
1778
1779	MALLOC(unp_list, struct unpcb *, n sizeof (*unp_list),
1780	M_TEMP, M_WAITOK);
1781	if (unp_list == `0`) {
1782	lck_rw_done(unp_list_mtx);
1783	return (ENOMEM);
1784	}
1785
1786	for (unp = head->lh_first, i = `0`; unp && i < n;
1787	unp = unp->unp_link.le_next) {
1788	if (unp->unp_gencnt <= gencnt)
1789	unp_list[i++] = unp;
1790	}
1791	n = i; / in case we lost some during malloc /
1792
1793	error = `0`;
1794	for (i = `0`; i < n; i++) {
1795	unp = unp_list[i];
1796	if (unp->unp_gencnt <= gencnt) {
1797	struct xunpcb64 xu;
1798	size_t xu_len = sizeof(struct xunpcb64);
1799
1800	bzero(&xu, xu_len);
1801	xu.xu_len = xu_len;
1802	xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1803	xu.xunp_link.le_next = (u_int64_t)
1804	VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1805	xu.xunp_link.le_prev = (u_int64_t)
1806	VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1807	xu.xunp_socket = (u_int64_t)
1808	VM_KERNEL_ADDRPERM(unp->unp_socket);
1809	xu.xunp_vnode = (u_int64_t)
1810	VM_KERNEL_ADDRPERM(unp->unp_vnode);
1811	xu.xunp_ino = unp->unp_ino;
1812	xu.xunp_conn = (u_int64_t)
1813	VM_KERNEL_ADDRPERM(unp->unp_conn);
1814	xu.xunp_refs = (u_int64_t)
1815	VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1816	xu.xunp_reflink.le_next = (u_int64_t)
1817	VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1818	xu.xunp_reflink.le_prev = (u_int64_t)
1819	VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
1820	xu.xunp_cc = unp->unp_cc;
1821	xu.xunp_mbcnt = unp->unp_mbcnt;
1822	xu.xunp_gencnt = unp->unp_gencnt;
1823
1824	if (unp->unp_socket)
1825	sotoxsocket64(unp->unp_socket, &xu.xu_socket);
1826
1827	/*
1828	* XXX - need more locking here to protect against
1829	* connect/disconnect races for SMP.
1830	*/
1831	if (unp->unp_addr)
1832	bcopy(unp->unp_addr, &xu.xunp_addr,
1833	unp->unp_addr->sun_len);
1834	if (unp->unp_conn && unp->unp_conn->unp_addr)
1835	bcopy(unp->unp_conn->unp_addr,
1836	&xu.xunp_caddr,
1837	unp->unp_conn->unp_addr->sun_len);
1838
1839	error = SYSCTL_OUT(req, &xu, xu_len);
1840	}
1841	}
1842	if (!error) {
1843	/*
1844	* Give the user an updated idea of our state.
1845	* If the generation differs from what we told
1846	* her before, she knows that something happened
1847	* while we were processing this request, and it
1848	* might be necessary to retry.
1849	*/
1850	bzero(&xug, sizeof (xug));
1851	xug.xug_len = sizeof (xug);
1852	xug.xug_gen = unp_gencnt;
1853	xug.xug_sogen = so_gencnt;
1854	xug.xug_count = unp_count;
1855	error = SYSCTL_OUT(req, &xug, sizeof (xug));
1856	}
1857	FREE(unp_list, M_TEMP);
1858	lck_rw_done(unp_list_mtx);
1859	return (error);
1860	}
1861
1862	SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,
1863	CTLTYPE_STRUCT \| CTLFLAG_RD \| CTLFLAG_LOCKED,
1864	(caddr_t)(long)SOCK_DGRAM, `0`, unp_pcblist64, "S,xunpcb64",
1865	"List of active local datagram sockets 64 bit");
1866	SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,
1867	CTLTYPE_STRUCT \| CTLFLAG_RD \| CTLFLAG_LOCKED,
1868	(caddr_t)(long)SOCK_STREAM, `0`, unp_pcblist64, "S,xunpcb64",
1869	"List of active local stream sockets 64 bit");
1870
1871	#endif /* !CONFIG_EMBEDDED */
1872
1873	static void
1874	unp_shutdown(struct unpcb *unp)
1875	{
1876	struct socket *so = unp->unp_socket;
1877	struct socket *so2;
1878	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
1879	so2 = unp->unp_conn->unp_socket;
1880	unp_get_locks_in_order(so, so2);
1881	socantrcvmore(so2);
1882	socket_unlock(so2, `1`);
1883	}
1884	}
1885
1886	static void
1887	unp_drop(struct unpcb unp, int* errno)
1888	{
1889	struct socket *so = unp->unp_socket;
1890
1891	so->so_error = errno;
1892	unp_disconnect(unp);
1893	}
1894
1895	/*
1896	* Returns: 0 Success
1897	* EMSGSIZE The new fd's will not fit
1898	* ENOBUFS Cannot alloc struct fileproc
1899	*/
1900	int
1901	unp_externalize(struct mbuf *rights)
1902	{
1903	proc_t p = current_proc(); / XXX /
1904	int i;
1905	struct cmsghdr cm = mtod(rights, struct* cmsghdr *);
1906	struct fileglob rp = (struct fileglob )(cm + `1`);
1907	int fds = (int* *)(cm + `1`);
1908	struct fileproc *fp;
1909	struct fileproc **fileproc_l;
1910	int newfds = (cm->cmsg_len - sizeof (cm)) / sizeof* (int);
1911	int f, error = `0`;
1912
1913	MALLOC(fileproc_l, struct fileproc **,
1914	newfds * sizeof (struct fileproc *), M_TEMP, M_WAITOK);
1915	if (fileproc_l == NULL) {
1916	error = ENOMEM;
1917	goto discard;
1918	}
1919
1920	proc_fdlock(p);
1921
1922	/*
1923	* if the new FD's will not fit, then we free them all
1924	*/
1925	if (!fdavail(p, newfds)) {
1926	proc_fdunlock(p);
1927	error = EMSGSIZE;
1928	goto discard;
1929	}
1930	/*
1931	* now change each pointer to an fd in the global table to
1932	* an integer that is the index to the local fd table entry
1933	* that we set up to point to the global one we are transferring.
1934	* XXX (1) this assumes a pointer and int are the same size,
1935	* XXX or the mbuf can hold the expansion
1936	* XXX (2) allocation failures should be non-fatal
1937	*/
1938	for (i = `0`; i < newfds; i++) {
1939	#if CONFIG_MACF_SOCKET
1940	/*
1941	* If receive access is denied, don't pass along
1942	* and error message, just discard the descriptor.
1943	*/
1944	if (mac_file_check_receive(kauth_cred_get(), rp[i])) {
1945	proc_fdunlock(p);
1946	unp_discard(rp[i], p);
1947	fds[i] = `0`;
1948	proc_fdlock(p);
1949	continue;
1950	}
1951	#endif
1952	if (fdalloc(p, `0`, &f))
1953	panic("unp_externalize:fdalloc");
1954	fp = fileproc_alloc_init(NULL);
1955	if (fp == NULL)
1956	panic("unp_externalize: MALLOC_ZONE");
1957	fp->f_iocount = `0`;
1958	fp->f_fglob = rp[i];
1959	if (fg_removeuipc_mark(rp[i])) {
1960
1961	/*
1962	* Take an iocount on the fp for completing the
1963	* removal from the global msg queue
1964	*/
1965	fp->f_iocount++;
1966	fileproc_l[i] = fp;
1967	} else {
1968	fileproc_l[i] = NULL;
1969	}
1970	procfdtbl_releasefd(p, f, fp);
1971	fds[i] = f;
1972	}
1973	proc_fdunlock(p);
1974
1975	for (i = `0`; i < newfds; i++) {
1976	if (fileproc_l[i] != NULL) {
1977	VERIFY(fileproc_l[i]->f_fglob != NULL &&
1978	(fileproc_l[i]->f_fglob->fg_lflags & FG_RMMSGQ));
1979	VERIFY(fds[i] >= `0`);
1980	fg_removeuipc(fileproc_l[i]->f_fglob);
1981
1982	/ Drop the iocount /
1983	fp_drop(p, fds[i], fileproc_l[i], `0`);
1984	fileproc_l[i] = NULL;
1985	}
1986	if (fds[i] != `0`)
1987	(void) OSAddAtomic(-`1`, &unp_rights);
1988	}
1989
1990	discard:
1991	if (fileproc_l != NULL)
1992	FREE(fileproc_l, M_TEMP);
1993	if (error) {
1994	for (i = `0`; i < newfds; i++) {
1995	unp_discard(*rp, p);
1996	*rp++ = NULL;
1997	}
1998	}
1999	return (error);
2000	}
2001
2002	void
2003	unp_init(void)
2004	{
2005	_CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));
2006	unp_zone = zinit(sizeof (struct unpcb),
2007	(nmbclusters * sizeof (struct unpcb)), `4096`, "unpzone");
2008
2009	if (unp_zone == `0`)
2010	panic("unp_init");
2011	LIST_INIT(&unp_dhead);
2012	LIST_INIT(&unp_shead);
2013
2014	/*
2015	* allocate lock group attribute and group for udp pcb mutexes
2016	*/
2017	unp_mtx_grp_attr = lck_grp_attr_alloc_init();
2018
2019	unp_mtx_grp = lck_grp_alloc_init("unp_list", unp_mtx_grp_attr);
2020
2021	unp_mtx_attr = lck_attr_alloc_init();
2022
2023	if ((unp_list_mtx = lck_rw_alloc_init(unp_mtx_grp,
2024	unp_mtx_attr)) == NULL)
2025	return; / pretty much dead if this fails... /
2026
2027	if ((unp_disconnect_lock = lck_mtx_alloc_init(unp_mtx_grp,
2028	unp_mtx_attr)) == NULL)
2029	return;
2030
2031	if ((unp_connect_lock = lck_mtx_alloc_init(unp_mtx_grp,
2032	unp_mtx_attr)) == NULL)
2033	return;
2034	}
2035
2036	#ifndef MIN
2037	#define MIN(a, b) (((a) < (b)) ? (a) : (b))
2038	#endif
2039
2040	/*
2041	* Returns: 0 Success
2042	* EINVAL
2043	* fdgetf_noref:EBADF
2044	*/
2045	static int
2046	unp_internalize(struct mbuf *control, proc_t p)
2047	{
2048	struct cmsghdr cm = mtod(control, struct* cmsghdr *);
2049	int *fds;
2050	struct fileglob **rp;
2051	struct fileproc *fp;
2052	int i, error;
2053	int oldfds;
2054	uint8_t fg_ins[UIPC_MAX_CMSG_FD / `8`];
2055
2056	/ 64bit: cmsg_len is 'uint32_t', m_len is 'long' /
2057	if (cm->cmsg_type != SCM_RIGHTS \|\| cm->cmsg_level != SOL_SOCKET \|\|
2058	(socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
2059	return (EINVAL);
2060	}
2061	oldfds = (cm->cmsg_len - sizeof (cm)) / sizeof* (int);
2062	bzero(fg_ins, sizeof(fg_ins));
2063
2064	proc_fdlock(p);
2065	fds = (int *)(cm + `1`);
2066
2067	for (i = `0`; i < oldfds; i++) {
2068	struct fileproc *tmpfp;
2069	if (((error = fdgetf_noref(p, fds[i], &tmpfp)) != `0`)) {
2070	proc_fdunlock(p);
2071	return (error);
2072	} else if (!file_issendable(p, tmpfp)) {
2073	proc_fdunlock(p);
2074	return (EINVAL);
2075	} else if (FP_ISGUARDED(tmpfp, GUARD_SOCKET_IPC)) {
2076	error = fp_guard_exception(p,
2077	fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);
2078	proc_fdunlock(p);
2079	return (error);
2080	}
2081	}
2082	rp = (struct fileglob **)(cm + `1`);
2083
2084	/ On K64 we need to walk backwards because a fileglob * is twice the size of an fd*
2085	* and doing them in-order would result in stomping over unprocessed fd's
2086	*/
2087	for (i = (oldfds - `1`); i >= `0`; i--) {
2088	(void) fdgetf_noref(p, fds[i], &fp);
2089	if (fg_insertuipc_mark(fp->f_fglob))
2090	fg_ins[i / `8`] \|= `0x80` >> (i % `8`);
2091	rp[i] = fp->f_fglob;
2092	}
2093	proc_fdunlock(p);
2094
2095	for (i = `0`; i < oldfds; i++) {
2096	if (fg_ins[i / `8`] & (`0x80` >> (i % `8`))) {
2097	VERIFY(rp[i]->fg_lflags & FG_INSMSGQ);
2098	fg_insertuipc(rp[i]);
2099	}
2100	(void) OSAddAtomic(`1`, &unp_rights);
2101	}
2102
2103	return (`0`);
2104	}
2105
2106	static int unp_defer, unp_gcing, unp_gcwait;
2107	static thread_t unp_gcthread = NULL;
2108
2109	/ always called under uipc_lock /
2110	void
2111	unp_gc_wait(void)
2112	{
2113	if (unp_gcthread == current_thread())
2114	return;
2115
2116	while (unp_gcing != `0`) {
2117	unp_gcwait = `1`;
2118	msleep(&unp_gcing, uipc_lock, `0` , "unp_gc_wait", NULL);
2119	}
2120	}
2121
2122
2123	__private_extern__ void
2124	unp_gc(void)
2125	{
2126	struct fileglob fg, nextfg;
2127	struct socket *so;
2128	static struct fileglob **extra_ref;
2129	struct fileglob **fpp;
2130	int nunref, i;
2131	int need_gcwakeup = `0`;
2132
2133	lck_mtx_lock(uipc_lock);
2134	if (unp_gcing) {
2135	lck_mtx_unlock(uipc_lock);
2136	return;
2137	}
2138	unp_gcing = `1`;
2139	unp_defer = `0`;
2140	unp_gcthread = current_thread();
2141	lck_mtx_unlock(uipc_lock);
2142	/*
2143	* before going through all this, set all FDs to
2144	* be NOT defered and NOT externally accessible
2145	*/
2146	for (fg = fmsghead.lh_first; fg != `0`; fg = fg->f_msglist.le_next) {
2147	lck_mtx_lock(&fg->fg_lock);
2148	fg->fg_flag &= ~(FMARK\|FDEFER);
2149	lck_mtx_unlock(&fg->fg_lock);
2150	}
2151	do {
2152	for (fg = fmsghead.lh_first; fg != `0`;
2153	fg = fg->f_msglist.le_next) {
2154	lck_mtx_lock(&fg->fg_lock);
2155	/*
2156	* If the file is not open, skip it
2157	*/
2158	if (fg->fg_count == `0`) {
2159	lck_mtx_unlock(&fg->fg_lock);
2160	continue;
2161	}
2162	/*
2163	* If we already marked it as 'defer' in a
2164	* previous pass, then try process it this time
2165	* and un-mark it
2166	*/
2167	if (fg->fg_flag & FDEFER) {
2168	fg->fg_flag &= ~FDEFER;
2169	unp_defer--;
2170	} else {
2171	/*
2172	* if it's not defered, then check if it's
2173	* already marked.. if so skip it
2174	*/
2175	if (fg->fg_flag & FMARK) {
2176	lck_mtx_unlock(&fg->fg_lock);
2177	continue;
2178	}
2179	/*
2180	* If all references are from messages
2181	* in transit, then skip it. it's not
2182	* externally accessible.
2183	*/
2184	if (fg->fg_count == fg->fg_msgcount) {
2185	lck_mtx_unlock(&fg->fg_lock);
2186	continue;
2187	}
2188	/*
2189	* If it got this far then it must be
2190	* externally accessible.
2191	*/
2192	fg->fg_flag \|= FMARK;
2193	}
2194	/*
2195	* either it was defered, or it is externally
2196	* accessible and not already marked so.
2197	* Now check if it is possibly one of OUR sockets.
2198	*/
2199	if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET \|\|
2200	(so = (struct socket *)fg->fg_data) == `0`) {
2201	lck_mtx_unlock(&fg->fg_lock);
2202	continue;
2203	}
2204	if (so->so_proto->pr_domain != localdomain \|\|
2205	(so->so_proto->pr_flags&PR_RIGHTS) == `0`) {
2206	lck_mtx_unlock(&fg->fg_lock);
2207	continue;
2208	}
2209	#ifdef notdef
2210	if (so->so_rcv.sb_flags & SB_LOCK) {
2211	/*
2212	* This is problematical; it's not clear
2213	* we need to wait for the sockbuf to be
2214	* unlocked (on a uniprocessor, at least),
2215	* and it's also not clear what to do
2216	* if sbwait returns an error due to receipt
2217	* of a signal. If sbwait does return
2218	* an error, we'll go into an infinite
2219	* loop. Delete all of this for now.
2220	*/
2221	(void) sbwait(&so->so_rcv);
2222	goto restart;
2223	}
2224	#endif
2225	/*
2226	* So, Ok, it's one of our sockets and it IS externally
2227	* accessible (or was defered). Now we look
2228	* to see if we hold any file descriptors in its
2229	* message buffers. Follow those links and mark them
2230	* as accessible too.
2231	*
2232	* In case a file is passed onto itself we need to
2233	* release the file lock.
2234	*/
2235	lck_mtx_unlock(&fg->fg_lock);
2236
2237	unp_scan(so->so_rcv.sb_mb, unp_mark, `0`);
2238	}
2239	} while (unp_defer);
2240	/*
2241	* We grab an extra reference to each of the file table entries
2242	* that are not otherwise accessible and then free the rights
2243	* that are stored in messages on them.
2244	*
2245	* The bug in the orginal code is a little tricky, so I'll describe
2246	* what's wrong with it here.
2247	*
2248	* It is incorrect to simply unp_discard each entry for f_msgcount
2249	* times -- consider the case of sockets A and B that contain
2250	* references to each other. On a last close of some other socket,
2251	* we trigger a gc since the number of outstanding rights (unp_rights)
2252	* is non-zero. If during the sweep phase the gc code un_discards,
2253	* we end up doing a (full) closef on the descriptor. A closef on A
2254	* results in the following chain. Closef calls soo_close, which
2255	* calls soclose. Soclose calls first (through the switch
2256	* uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
2257	* returns because the previous instance had set unp_gcing, and
2258	* we return all the way back to soclose, which marks the socket
2259	* with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
2260	* to free up the rights that are queued in messages on the socket A,
2261	* i.e., the reference on B. The sorflush calls via the dom_dispose
2262	* switch unp_dispose, which unp_scans with unp_discard. This second
2263	* instance of unp_discard just calls closef on B.
2264	*
2265	* Well, a similar chain occurs on B, resulting in a sorflush on B,
2266	* which results in another closef on A. Unfortunately, A is already
2267	* being closed, and the descriptor has already been marked with
2268	* SS_NOFDREF, and soclose panics at this point.
2269	*
2270	* Here, we first take an extra reference to each inaccessible
2271	* descriptor. Then, we call sorflush ourself, since we know
2272	* it is a Unix domain socket anyhow. After we destroy all the
2273	* rights carried in messages, we do a last closef to get rid
2274	* of our extra reference. This is the last close, and the
2275	* unp_detach etc will shut down the socket.
2276	*
2277	* 91/09/19, bsy@cs.cmu.edu
2278	*/
2279	extra_ref = _MALLOC(nfiles * sizeof (struct fileglob *),
2280	M_FILEGLOB, M_WAITOK);
2281	if (extra_ref == NULL)
2282	goto bail;
2283	for (nunref = `0`, fg = fmsghead.lh_first, fpp = extra_ref; fg != `0`;
2284	fg = nextfg) {
2285	lck_mtx_lock(&fg->fg_lock);
2286
2287	nextfg = fg->f_msglist.le_next;
2288	/*
2289	* If it's not open, skip it
2290	*/
2291	if (fg->fg_count == `0`) {
2292	lck_mtx_unlock(&fg->fg_lock);
2293	continue;
2294	}
2295	/*
2296	* If all refs are from msgs, and it's not marked accessible
2297	* then it must be referenced from some unreachable cycle
2298	* of (shut-down) FDs, so include it in our
2299	* list of FDs to remove
2300	*/
2301	if (fg->fg_count == fg->fg_msgcount && !(fg->fg_flag & FMARK)) {
2302	fg->fg_count++;
2303	*fpp++ = fg;
2304	nunref++;
2305	}
2306	lck_mtx_unlock(&fg->fg_lock);
2307	}
2308	/*
2309	* for each FD on our hit list, do the following two things
2310	*/
2311	for (i = nunref, fpp = extra_ref; --i >= `0`; ++fpp) {
2312	struct fileglob *tfg;
2313
2314	tfg = *fpp;
2315
2316	if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET &&
2317	tfg->fg_data != NULL) {
2318	so = (struct socket *)(tfg->fg_data);
2319
2320	socket_lock(so, `0`);
2321
2322	sorflush(so);
2323
2324	socket_unlock(so, `0`);
2325	}
2326	}
2327	for (i = nunref, fpp = extra_ref; --i >= `0`; ++fpp)
2328	closef_locked((struct fileproc )`0`, fpp, (proc_t)NULL);
2329
2330	FREE((caddr_t)extra_ref, M_FILEGLOB);
2331	bail:
2332	lck_mtx_lock(uipc_lock);
2333	unp_gcing = `0`;
2334	unp_gcthread = NULL;
2335
2336	if (unp_gcwait != `0`) {
2337	unp_gcwait = `0`;
2338	need_gcwakeup = `1`;
2339	}
2340	lck_mtx_unlock(uipc_lock);
2341
2342	if (need_gcwakeup != `0`)
2343	wakeup(&unp_gcing);
2344	}
2345
2346	void
2347	unp_dispose(struct mbuf *m)
2348	{
2349	if (m) {
2350	unp_scan(m, unp_discard, NULL);
2351	}
2352	}
2353
2354	/*
2355	* Returns: 0 Success
2356	*/
2357	static int
2358	unp_listen(struct unpcb *unp, proc_t p)
2359	{
2360	kauth_cred_t safecred = kauth_cred_proc_ref(p);
2361	cru2x(safecred, &unp->unp_peercred);
2362	kauth_cred_unref(&safecred);
2363	unp->unp_flags \|= UNP_HAVEPCCACHED;
2364	return (`0`);
2365	}
2366
2367	static void
2368	unp_scan(struct mbuf m0, void* (op)(struct* fileglob , void* arg), void* *arg)
2369	{
2370	struct mbuf *m;
2371	struct fileglob **rp;
2372	struct cmsghdr *cm;
2373	int i;
2374	int qfds;
2375
2376	while (m0) {
2377	for (m = m0; m; m = m->m_next)
2378	if (m->m_type == MT_CONTROL &&
2379	(size_t)m->m_len >= sizeof (*cm)) {
2380	cm = mtod(m, struct cmsghdr *);
2381	if (cm->cmsg_level != SOL_SOCKET \|\|
2382	cm->cmsg_type != SCM_RIGHTS)
2383	continue;
2384	qfds = (cm->cmsg_len - sizeof (*cm)) /
2385	sizeof (int);
2386	rp = (struct fileglob **)(cm + `1`);
2387	for (i = `0`; i < qfds; i++)
2388	(op)(rp++, arg);
2389	break; / XXX, but saves time /
2390	}
2391	m0 = m0->m_act;
2392	}
2393	}
2394
2395	static void
2396	unp_mark(struct fileglob fg, __unused void* *arg)
2397	{
2398	lck_mtx_lock(&fg->fg_lock);
2399
2400	if (fg->fg_flag & FMARK) {
2401	lck_mtx_unlock(&fg->fg_lock);
2402	return;
2403	}
2404	fg->fg_flag \|= (FMARK\|FDEFER);
2405
2406	lck_mtx_unlock(&fg->fg_lock);
2407
2408	unp_defer++;
2409	}
2410
2411	static void
2412	unp_discard(struct fileglob fg, void* *p)
2413	{
2414	if (p == NULL)
2415	p = current_proc(); / XXX /
2416
2417	(void) OSAddAtomic(`1`, &unp_disposed);
2418	if (fg_removeuipc_mark(fg)) {
2419	VERIFY(fg->fg_lflags & FG_RMMSGQ);
2420	fg_removeuipc(fg);
2421	}
2422	(void) OSAddAtomic(-`1`, &unp_rights);
2423
2424	proc_fdlock(p);
2425	(void) closef_locked((struct fileproc *)`0`, fg, p);
2426	proc_fdunlock(p);
2427	}
2428
2429	int
2430	unp_lock(struct socket so, int* refcount, void * lr)
2431	{
2432	void * lr_saved;
2433	if (lr == `0`)
2434	lr_saved = (void *) __builtin_return_address(`0`);
2435	else lr_saved = lr;
2436
2437	if (so->so_pcb) {
2438	lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2439	} else {
2440	panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x\n",
2441	so, lr_saved, so->so_usecount);
2442	}
2443
2444	if (so->so_usecount < `0`)
2445	panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x\n",
2446	so, so->so_pcb, lr_saved, so->so_usecount);
2447
2448	if (refcount) {
2449	VERIFY(so->so_usecount > `0`);
2450	so->so_usecount++;
2451	}
2452	so->lock_lr[so->next_lock_lr] = lr_saved;
2453	so->next_lock_lr = (so->next_lock_lr+`1`) % SO_LCKDBG_MAX;
2454	return (`0`);
2455	}
2456
2457	int
2458	unp_unlock(struct socket so, int* refcount, void * lr)
2459	{
2460	void * lr_saved;
2461	lck_mtx_t * mutex_held = NULL;
2462	struct unpcb *unp = sotounpcb(so);
2463
2464	if (lr == `0`)
2465	lr_saved = (void *) __builtin_return_address(`0`);
2466	else lr_saved = lr;
2467
2468	if (refcount)
2469	so->so_usecount--;
2470
2471	if (so->so_usecount < `0`)
2472	panic("unp_unlock: so=%p usecount=%x\n", so, so->so_usecount);
2473	if (so->so_pcb == NULL) {
2474	panic("unp_unlock: so=%p NO PCB usecount=%x\n", so, so->so_usecount);
2475	} else {
2476	mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2477	}
2478	LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2479	so->unlock_lr[so->next_unlock_lr] = lr_saved;
2480	so->next_unlock_lr = (so->next_unlock_lr+`1`) % SO_LCKDBG_MAX;
2481
2482	if (so->so_usecount == `0` && (so->so_flags & SOF_PCBCLEARING)) {
2483	sofreelastref(so, `1`);
2484
2485	if (unp->unp_addr)
2486	FREE(unp->unp_addr, M_SONAME);
2487
2488	lck_mtx_unlock(mutex_held);
2489
2490	lck_mtx_destroy(&unp->unp_mtx, unp_mtx_grp);
2491	zfree(unp_zone, unp);
2492
2493	unp_gc();
2494	} else {
2495	lck_mtx_unlock(mutex_held);
2496	}
2497
2498	return (`0`);
2499	}
2500
2501	lck_mtx_t *
2502	unp_getlock(struct socket so, __unused int* flags)
2503	{
2504	struct unpcb unp = (struct* unpcb *)so->so_pcb;
2505
2506
2507	if (so->so_pcb) {
2508	if (so->so_usecount < `0`)
2509	panic("unp_getlock: so=%p usecount=%x\n", so, so->so_usecount);
2510	return(&unp->unp_mtx);
2511	} else {
2512	panic("unp_getlock: so=%p NULL so_pcb\n", so);
2513	return (so->so_proto->pr_domain->dom_mtx);
2514	}
2515	}
2516

Browse the source code of xnu/bsd/kern/uipc_usrreq.c