1/*
2 * Copyright (c) 2000-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
65/*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
71
72#include <sys/cdefs.h>
73#include <sys/param.h>
74#include <sys/systm.h>
75#include <sys/filedesc.h>
76#include <sys/proc_internal.h>
77#include <sys/file_internal.h>
78#include <sys/vnode_internal.h>
79#include <sys/malloc.h>
80#include <sys/mcache.h>
81#include <sys/mbuf.h>
82#include <kern/locks.h>
83#include <sys/domain.h>
84#include <sys/protosw.h>
85#include <sys/signalvar.h>
86#include <sys/socket.h>
87#include <sys/socketvar.h>
88#include <sys/kernel.h>
89#include <sys/uio_internal.h>
90#include <sys/kauth.h>
91#include <kern/task.h>
92#include <sys/priv.h>
93#include <sys/sysctl.h>
94#include <sys/sys_domain.h>
95#include <sys/types.h>
96
97#include <security/audit/audit.h>
98
99#include <sys/kdebug.h>
100#include <sys/sysproto.h>
101#include <netinet/in.h>
102#include <net/route.h>
103#include <netinet/in_pcb.h>
104
105#include <os/log.h>
106#include <os/ptrtools.h>
107
108#include <os/log.h>
109
110#if CONFIG_MACF_SOCKET_SUBSET
111#include <security/mac_framework.h>
112#endif /* MAC_SOCKET_SUBSET */
113
114#include <net/sockaddr_utils.h>
115
116#define f_flag fp_glob->fg_flag
117#define f_ops fp_glob->fg_ops
118
119#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
120#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
121#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
122#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
123#define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
124#define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
125#define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
126#define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
127#define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
128#define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
129#define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
130#define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
131#define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
132#define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
133#define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
134#define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
135
136/* Forward declarations for referenced types */
137__CCT_DECLARE_CONSTRAINED_PTR_TYPE(void, void, __CCT_PTR);
138__CCT_DECLARE_CONSTRAINED_PTR_TYPE(uint8_t, uint8_t, __CCT_PTR);
139__CCT_DECLARE_CONSTRAINED_PTR_TYPE(int32_t, int32, __CCT_REF);
140__CCT_DECLARE_CONSTRAINED_PTR_TYPE(int, int, __CCT_REF);
141__CCT_DECLARE_CONSTRAINED_PTR_TYPE(user_ssize_t, user_ssize, __CCT_REF);
142__CCT_DECLARE_CONSTRAINED_PTR_TYPE(unsigned int, uint, __CCT_REF);
143__CCT_DECLARE_CONSTRAINED_PTR_TYPE(sae_connid_t, sae_connid, __CCT_REF);
144__CCT_DECLARE_CONSTRAINED_PTR_TYPE(socklen_t, socklen, __CCT_REF);
145__CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct setsockopt_args, setsockopt_args, __CCT_REF);
146__CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct connectx_args, connectx_args, __CCT_REF);
147__CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct disconnectx_args, disconnectx_args, __CCT_REF);
148__CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct cmsghdr, cmsghdr, __CCT_REF);
149__CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct timeval, timeval, __CCT_REF);
150__CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user64_timeval, user64_timeval, __CCT_REF);
151__CCT_DECLARE_CONSTRAINED_PTR_TYPE(struct user32_timeval, user32_timeval, __CCT_REF);
152
153static int sendit(proc_ref_t, socket_ref_t, user_msghdr_ref_t, uio_t,
154 int, int32_ref_t );
155static int recvit(proc_ref_t, int, user_msghdr_ref_t, uio_t, user_addr_t,
156 int32_ref_t);
157static int connectit(socket_ref_t, sockaddr_ref_t);
158static int getsockaddr(socket_ref_t, sockaddr_ref_ref_t, user_addr_t,
159 size_t, boolean_t);
160static int getsockaddr_s(socket_ref_t, sockaddr_storage_ref_t,
161 user_addr_t, size_t, boolean_t);
162#if SENDFILE
163static void alloc_sendpkt(int, size_t, uint_ref_t, mbuf_ref_ref_t,
164 boolean_t);
165#endif /* SENDFILE */
166static int connectx_nocancel(proc_ref_t, connectx_args_ref_t, int_ref_t);
167static int connectitx(socket_ref_t, sockaddr_ref_t,
168 sockaddr_ref_t, proc_ref_t, uint32_t, sae_associd_t,
169 sae_connid_ref_t, uio_t, unsigned int, user_ssize_ref_t);
170static int disconnectx_nocancel(proc_ref_t, disconnectx_args_ref_t,
171 int_ref_t);
172static int socket_common(proc_ref_t, int, int, int, pid_t, int32_ref_t, int);
173
174static int internalize_recv_msghdr_array(const void_ptr_t, int, int,
175 u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t);
176static u_int externalize_recv_msghdr_array(proc_ref_t, socket_ref_t, void_ptr_t,
177 u_int count, user_msghdr_x_ptr_t, recv_msg_elem_ptr_t, int_ref_t);
178
179static recv_msg_elem_ptr_t alloc_recv_msg_array(u_int count);
180static int recv_msg_array_is_valid(recv_msg_elem_ptr_t, u_int count);
181static void free_recv_msg_array(recv_msg_elem_ptr_t, u_int count);
182static int copyout_control(proc_ref_t, mbuf_ref_t, user_addr_t control,
183 socklen_ref_t, int_ref_t, socket_ref_t);
184
185SYSCTL_DECL(_kern_ipc);
186
187#define SO_MAX_MSG_X_DEFAULT 256
188
189static u_int somaxsendmsgx = SO_MAX_MSG_X_DEFAULT;
190SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
191 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
192
193static u_int somaxrecvmsgx = SO_MAX_MSG_X_DEFAULT;
194SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
195 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
196
197static u_int missingpktinfo = 0;
198SYSCTL_UINT(_kern_ipc, OID_AUTO, missingpktinfo,
199 CTLFLAG_RD | CTLFLAG_LOCKED, &missingpktinfo, 0, "");
200
201static int do_recvmsg_x_donttrunc = 0;
202SYSCTL_INT(_kern_ipc, OID_AUTO, do_recvmsg_x_donttrunc,
203 CTLFLAG_RW | CTLFLAG_LOCKED, &do_recvmsg_x_donttrunc, 0, "");
204
205#if DEBUG || DEVELOPMENT
206static int uipc_debug = 0;
207SYSCTL_INT(_kern_ipc, OID_AUTO, debug,
208 CTLFLAG_RW | CTLFLAG_LOCKED, &uipc_debug, 0, "");
209
210#define DEBUG_KERNEL_ADDRPERM(_v) (_v)
211#define DBG_PRINTF(...) if (uipc_debug != 0) { \
212 os_log(OS_LOG_DEFAULT, __VA_ARGS__); \
213}
214#else
215#define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
216#define DBG_PRINTF(...) do { } while (0)
217#endif
218
219
220/*
221 * Values for sendmsg_x_mode
222 * 0: default
223 * 1: sendit loop one at a time
224 * 2: old implementation
225 */
226static u_int sendmsg_x_mode = 0;
227SYSCTL_UINT(_kern_ipc, OID_AUTO, sendmsg_x_mode,
228 CTLFLAG_RW | CTLFLAG_LOCKED, &sendmsg_x_mode, 0, "");
229
230/*
231 * System call interface to the socket abstraction.
232 */
233
234extern const struct fileops socketops;
235
236/*
237 * Returns: 0 Success
238 * EACCES Mandatory Access Control failure
239 * falloc:ENFILE
240 * falloc:EMFILE
241 * falloc:ENOMEM
242 * socreate:EAFNOSUPPORT
243 * socreate:EPROTOTYPE
244 * socreate:EPROTONOSUPPORT
245 * socreate:ENOBUFS
246 * socreate:ENOMEM
247 * socreate:??? [other protocol families, IPSEC]
248 */
249int
250socket(proc_ref_t p,
251 struct socket_args *uap,
252 int32_ref_t retval)
253{
254 return socket_common(p, uap->domain, uap->type, uap->protocol,
255 proc_selfpid(), retval, 0);
256}
257
258int
259socket_delegate(proc_ref_t p,
260 struct socket_delegate_args *uap,
261 int32_ref_t retval)
262{
263 return socket_common(p, uap->domain, uap->type, uap->protocol,
264 uap->epid, retval, 1);
265}
266
267static int
268socket_common(proc_ref_t p,
269 int domain,
270 int type,
271 int protocol,
272 pid_t epid,
273 int32_ref_t retval,
274 int delegate)
275{
276 socket_ref_t so;
277 fileproc_ref_t fp;
278 int fd, error;
279
280 AUDIT_ARG(socket, domain, type, protocol);
281#if CONFIG_MACF_SOCKET_SUBSET
282 if ((error = mac_socket_check_create(cred: kauth_cred_get(), domain,
283 type, protocol)) != 0) {
284 return error;
285 }
286#endif /* MAC_SOCKET_SUBSET */
287
288 if (delegate) {
289 error = priv_check_cred(cred: kauth_cred_get(),
290 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, flags: 0);
291 if (error) {
292 return EACCES;
293 }
294 }
295
296 error = falloc(p, &fp, &fd);
297 if (error) {
298 return error;
299 }
300 fp->f_flag = FREAD | FWRITE;
301 fp->f_ops = &socketops;
302
303 if (delegate) {
304 error = socreate_delegate(dom: domain, aso: &so, type, proto: protocol, epid);
305 } else {
306 error = socreate(dom: domain, aso: &so, type, proto: protocol);
307 }
308
309 if (error) {
310 fp_free(p, fd, fp);
311 } else {
312 fp_set_data(fp, fg_data: so);
313
314 proc_fdlock(p);
315 procfdtbl_releasefd(p, fd, NULL);
316
317 if (ENTR_SHOULDTRACE) {
318 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
319 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
320 }
321 fp_drop(p, fd, fp, locked: 1);
322 proc_fdunlock(p);
323
324 *retval = fd;
325 }
326 return error;
327}
328
329/*
330 * Returns: 0 Success
331 * EDESTADDRREQ Destination address required
332 * EBADF Bad file descriptor
333 * EACCES Mandatory Access Control failure
334 * file_socket:ENOTSOCK
335 * file_socket:EBADF
336 * getsockaddr:ENAMETOOLONG Filename too long
337 * getsockaddr:EINVAL Invalid argument
338 * getsockaddr:ENOMEM Not enough space
339 * getsockaddr:EFAULT Bad address
340 * sobindlock:???
341 */
342/* ARGSUSED */
343int
344bind(__unused proc_t p, struct bind_args *uap, __unused int32_ref_t retval)
345{
346 struct sockaddr_storage ss;
347 sockaddr_ref_t sa = NULL;
348 socket_ref_t so;
349 boolean_t want_free = TRUE;
350 int error;
351
352 AUDIT_ARG(fd, uap->s);
353 error = file_socket(uap->s, &so);
354 if (error != 0) {
355 return error;
356 }
357 if (so == NULL) {
358 error = EBADF;
359 goto out;
360 }
361 if (uap->name == USER_ADDR_NULL) {
362 error = EDESTADDRREQ;
363 goto out;
364 }
365 if (uap->namelen > sizeof(ss)) {
366 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
367 } else {
368 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
369 if (error == 0) {
370 sa = SA(&ss);
371 want_free = FALSE;
372 }
373 }
374 if (error != 0) {
375 goto out;
376 }
377 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
378#if CONFIG_MACF_SOCKET_SUBSET
379 if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
380 (error = mac_socket_check_bind(cred: kauth_cred_get(), so, addr: sa)) == 0) {
381 error = sobindlock(so, nam: sa, dolock: 1); /* will lock socket */
382 }
383#else
384 error = sobindlock(so, sa, 1); /* will lock socket */
385#endif /* MAC_SOCKET_SUBSET */
386 if (want_free) {
387 free_sockaddr(sa);
388 }
389out:
390 file_drop(uap->s);
391 return error;
392}
393
394/*
395 * Returns: 0 Success
396 * EBADF
397 * EACCES Mandatory Access Control failure
398 * file_socket:ENOTSOCK
399 * file_socket:EBADF
400 * solisten:EINVAL
401 * solisten:EOPNOTSUPP
402 * solisten:???
403 */
404int
405listen(__unused proc_ref_t p, struct listen_args *uap,
406 __unused int32_ref_t retval)
407{
408 int error;
409 socket_ref_t so;
410
411 AUDIT_ARG(fd, uap->s);
412 error = file_socket(uap->s, &so);
413 if (error) {
414 return error;
415 }
416 if (so != NULL)
417#if CONFIG_MACF_SOCKET_SUBSET
418 {
419 error = mac_socket_check_listen(cred: kauth_cred_get(), so);
420 if (error == 0) {
421 error = solisten(so, backlog: uap->backlog);
422 }
423 }
424#else
425 { error = solisten(so, uap->backlog);}
426#endif /* MAC_SOCKET_SUBSET */
427 else {
428 error = EBADF;
429 }
430
431 file_drop(uap->s);
432 return error;
433}
434
435/*
436 * Returns: fp_get_ftype:EBADF Bad file descriptor
437 * fp_get_ftype:ENOTSOCK Socket operation on non-socket
438 * :EFAULT Bad address on copyin/copyout
439 * :EBADF Bad file descriptor
440 * :EOPNOTSUPP Operation not supported on socket
441 * :EINVAL Invalid argument
442 * :EWOULDBLOCK Operation would block
443 * :ECONNABORTED Connection aborted
444 * :EINTR Interrupted function
445 * :EACCES Mandatory Access Control failure
446 * falloc:ENFILE Too many files open in system
447 * falloc:EMFILE Too many open files
448 * falloc:ENOMEM Not enough space
449 * 0 Success
450 */
451int
452accept_nocancel(proc_ref_t p, struct accept_nocancel_args *uap,
453 int32_ref_t retval)
454{
455 fileproc_ref_t fp;
456 sockaddr_ref_t sa = NULL;
457 socklen_t namelen;
458 int error;
459 socket_ref_t head;
460 socket_ref_t so = NULL;
461 lck_mtx_t *mutex_held;
462 int fd = uap->s;
463 int newfd;
464 unsigned int fflag;
465 int dosocklock = 0;
466
467 *retval = -1;
468
469 AUDIT_ARG(fd, uap->s);
470
471 if (uap->name) {
472 error = copyin(uap->anamelen, (caddr_t)&namelen,
473 sizeof(socklen_t));
474 if (error) {
475 return error;
476 }
477 }
478 error = fp_get_ftype(p, fd, ftype: DTYPE_SOCKET, ENOTSOCK, fpp: &fp);
479 if (error) {
480 return error;
481 }
482 head = (struct socket *)fp_get_data(fp);
483
484#if CONFIG_MACF_SOCKET_SUBSET
485 if ((error = mac_socket_check_accept(cred: kauth_cred_get(), so: head)) != 0) {
486 goto out;
487 }
488#endif /* MAC_SOCKET_SUBSET */
489
490 socket_lock(so: head, refcount: 1);
491
492 if (head->so_proto->pr_getlock != NULL) {
493 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
494 dosocklock = 1;
495 } else {
496 mutex_held = head->so_proto->pr_domain->dom_mtx;
497 dosocklock = 0;
498 }
499
500 if ((head->so_options & SO_ACCEPTCONN) == 0) {
501 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
502 error = EOPNOTSUPP;
503 } else {
504 /* POSIX: The socket is not accepting connections */
505 error = EINVAL;
506 }
507 socket_unlock(so: head, refcount: 1);
508 goto out;
509 }
510check_again:
511 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
512 socket_unlock(so: head, refcount: 1);
513 error = EWOULDBLOCK;
514 goto out;
515 }
516 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
517 if (head->so_state & SS_CANTRCVMORE) {
518 head->so_error = ECONNABORTED;
519 break;
520 }
521 if (head->so_usecount < 1) {
522 panic("accept: head=%p refcount=%d", head,
523 head->so_usecount);
524 }
525 error = msleep(chan: (caddr_t)&head->so_timeo, mtx: mutex_held,
526 PSOCK | PCATCH, wmesg: "accept", ts: 0);
527 if (head->so_usecount < 1) {
528 panic("accept: 2 head=%p refcount=%d", head,
529 head->so_usecount);
530 }
531 if ((head->so_state & SS_DRAINING)) {
532 error = ECONNABORTED;
533 }
534 if (error) {
535 socket_unlock(so: head, refcount: 1);
536 goto out;
537 }
538 }
539 if (head->so_error) {
540 error = head->so_error;
541 head->so_error = 0;
542 socket_unlock(so: head, refcount: 1);
543 goto out;
544 }
545
546 /*
547 * At this point we know that there is at least one connection
548 * ready to be accepted. Remove it from the queue prior to
549 * allocating the file descriptor for it since falloc() may
550 * block allowing another process to accept the connection
551 * instead.
552 */
553 lck_mtx_assert(lck: mutex_held, LCK_MTX_ASSERT_OWNED);
554
555 so_acquire_accept_list(head, NULL);
556 if (TAILQ_EMPTY(&head->so_comp)) {
557 so_release_accept_list(head);
558 goto check_again;
559 }
560
561 so = TAILQ_FIRST(&head->so_comp);
562 TAILQ_REMOVE(&head->so_comp, so, so_list);
563 /*
564 * Acquire the lock of the new connection
565 * as we may be in the process of receiving
566 * a packet that may change its so_state
567 * (e.g.: a TCP FIN).
568 */
569 if (dosocklock) {
570 socket_lock(so, refcount: 0);
571 }
572 so->so_head = NULL;
573 so->so_state &= ~SS_COMP;
574 if (dosocklock) {
575 socket_unlock(so, refcount: 0);
576 }
577 head->so_qlen--;
578 so_release_accept_list(head);
579
580 /* unlock head to avoid deadlock with select, keep a ref on head */
581 socket_unlock(so: head, refcount: 0);
582
583#if CONFIG_MACF_SOCKET_SUBSET
584 /*
585 * Pass the pre-accepted socket to the MAC framework. This is
586 * cheaper than allocating a file descriptor for the socket,
587 * calling the protocol accept callback, and possibly freeing
588 * the file descriptor should the MAC check fails.
589 */
590 if ((error = mac_socket_check_accepted(cred: kauth_cred_get(), so)) != 0) {
591 socket_lock(so, refcount: 1);
592 so->so_state &= ~SS_NOFDREF;
593 socket_unlock(so, refcount: 1);
594 soclose(so);
595 /* Drop reference on listening socket */
596 sodereference(so: head);
597 goto out;
598 }
599#endif /* MAC_SOCKET_SUBSET */
600
601 /*
602 * Pass the pre-accepted socket to any interested socket filter(s).
603 * Upon failure, the socket would have been closed by the callee.
604 */
605 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
606 /* Drop reference on listening socket */
607 sodereference(so: head);
608 /* Propagate socket filter's error code to the caller */
609 goto out;
610 }
611
612 fflag = fp->f_flag;
613 error = falloc(p, &fp, &newfd);
614 if (error) {
615 /*
616 * Probably ran out of file descriptors.
617 *
618 * <rdar://problem/8554930>
619 * Don't put this back on the socket like we used to, that
620 * just causes the client to spin. Drop the socket.
621 */
622 socket_lock(so, refcount: 1);
623 so->so_state &= ~SS_NOFDREF;
624 socket_unlock(so, refcount: 1);
625 soclose(so);
626 sodereference(so: head);
627 goto out;
628 }
629 *retval = newfd;
630 fp->f_flag = fflag;
631 fp->f_ops = &socketops;
632 fp_set_data(fp, fg_data: so);
633
634 socket_lock(so: head, refcount: 0);
635 if (dosocklock) {
636 socket_lock(so, refcount: 1);
637 }
638
639 /* Sync socket non-blocking/async state with file flags */
640 if (fp->f_flag & FNONBLOCK) {
641 so->so_state |= SS_NBIO;
642 } else {
643 so->so_state &= ~SS_NBIO;
644 }
645
646 if (fp->f_flag & FASYNC) {
647 so->so_state |= SS_ASYNC;
648 so->so_rcv.sb_flags |= SB_ASYNC;
649 so->so_snd.sb_flags |= SB_ASYNC;
650 } else {
651 so->so_state &= ~SS_ASYNC;
652 so->so_rcv.sb_flags &= ~SB_ASYNC;
653 so->so_snd.sb_flags &= ~SB_ASYNC;
654 }
655
656 (void) soacceptlock(so, nam: &sa, dolock: 0);
657 socket_unlock(so: head, refcount: 1);
658 if (sa == NULL) {
659 namelen = 0;
660 if (uap->name) {
661 goto gotnoname;
662 }
663 error = 0;
664 goto releasefd;
665 }
666 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
667
668 if (uap->name) {
669 socklen_t sa_len;
670
671 /* save sa_len before it is destroyed */
672 sa_len = sa->sa_len;
673 namelen = MIN(namelen, sa_len);
674 error = copyout(sa, uap->name, namelen);
675 if (!error) {
676 /* return the actual, untruncated address length */
677 namelen = sa_len;
678 }
679gotnoname:
680 error = copyout((caddr_t)&namelen, uap->anamelen,
681 sizeof(socklen_t));
682 }
683 free_sockaddr(sa);
684
685releasefd:
686 /*
687 * If the socket has been marked as inactive by sosetdefunct(),
688 * disallow further operations on it.
689 */
690 if (so->so_flags & SOF_DEFUNCT) {
691 sodefunct(current_proc(), so,
692 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
693 }
694
695 if (dosocklock) {
696 socket_unlock(so, refcount: 1);
697 }
698
699 proc_fdlock(p);
700 procfdtbl_releasefd(p, fd: newfd, NULL);
701 fp_drop(p, fd: newfd, fp, locked: 1);
702 proc_fdunlock(p);
703
704out:
705 if (error == 0 && ENTR_SHOULDTRACE) {
706 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
707 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
708 }
709
710 file_drop(fd);
711 return error;
712}
713
714int
715accept(proc_ref_t p, struct accept_args *uap, int32_ref_t retval)
716{
717 __pthread_testcancel(presyscall: 1);
718 return accept_nocancel(p, uap: (struct accept_nocancel_args *)uap,
719 retval);
720}
721
722/*
723 * Returns: 0 Success
724 * EBADF Bad file descriptor
725 * EALREADY Connection already in progress
726 * EINPROGRESS Operation in progress
727 * ECONNABORTED Connection aborted
728 * EINTR Interrupted function
729 * EACCES Mandatory Access Control failure
730 * file_socket:ENOTSOCK
731 * file_socket:EBADF
732 * getsockaddr:ENAMETOOLONG Filename too long
733 * getsockaddr:EINVAL Invalid argument
734 * getsockaddr:ENOMEM Not enough space
735 * getsockaddr:EFAULT Bad address
736 * soconnectlock:EOPNOTSUPP
737 * soconnectlock:EISCONN
738 * soconnectlock:??? [depends on protocol, filters]
739 * msleep:EINTR
740 *
741 * Imputed: so_error error may be set from so_error, which
742 * may have been set by soconnectlock.
743 */
744/* ARGSUSED */
745int
746connect(proc_ref_t p, struct connect_args *uap, int32_ref_t retval)
747{
748 __pthread_testcancel(presyscall: 1);
749 return connect_nocancel(p, (struct connect_nocancel_args *)uap,
750 retval);
751}
752
753int
754connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_ref_t retval)
755{
756#pragma unused(p, retval)
757 socket_ref_t so;
758 struct sockaddr_storage ss;
759 sockaddr_ref_t sa = NULL;
760 int error;
761 int fd = uap->s;
762 boolean_t dgram;
763
764 AUDIT_ARG(fd, uap->s);
765 error = file_socket(fd, &so);
766 if (error != 0) {
767 return error;
768 }
769 if (so == NULL) {
770 error = EBADF;
771 goto out;
772 }
773
774 /*
775 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
776 * if this is a datagram socket; translate for other types.
777 */
778 dgram = (so->so_type == SOCK_DGRAM);
779
780 /* Get socket address now before we obtain socket lock */
781 if (uap->namelen > sizeof(ss)) {
782 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
783 } else {
784 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
785 if (error == 0) {
786 sa = SA(&ss);
787 }
788 }
789 if (error != 0) {
790 goto out;
791 }
792
793 error = connectit(so, sa);
794
795 if (sa != NULL && sa != SA(&ss)) {
796 free_sockaddr(sa);
797 }
798 if (error == ERESTART) {
799 error = EINTR;
800 }
801out:
802 file_drop(fd);
803 return error;
804}
805
806static int
807connectx_nocancel(proc_ref_t p, connectx_args_ref_t uap, int_ref_t retval)
808{
809#pragma unused(p, retval)
810 struct sockaddr_storage ss, sd;
811 sockaddr_ref_t src = NULL, dst = NULL;
812 socket_ref_t so;
813 int error, error1, fd = uap->socket;
814 boolean_t dgram;
815 sae_connid_t cid = SAE_CONNID_ANY;
816 struct user32_sa_endpoints ep32;
817 struct user64_sa_endpoints ep64;
818 struct user_sa_endpoints ep;
819 user_ssize_t bytes_written = 0;
820 struct user_iovec *iovp;
821 uio_t auio = NULL;
822
823 AUDIT_ARG(fd, uap->socket);
824 error = file_socket(fd, &so);
825 if (error != 0) {
826 return error;
827 }
828 if (so == NULL) {
829 error = EBADF;
830 goto out;
831 }
832
833 if (uap->endpoints == USER_ADDR_NULL) {
834 error = EINVAL;
835 goto out;
836 }
837
838 if (IS_64BIT_PROCESS(p)) {
839 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
840 if (error != 0) {
841 goto out;
842 }
843
844 ep.sae_srcif = ep64.sae_srcif;
845 ep.sae_srcaddr = (user_addr_t)ep64.sae_srcaddr;
846 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
847 ep.sae_dstaddr = (user_addr_t)ep64.sae_dstaddr;
848 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
849 } else {
850 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
851 if (error != 0) {
852 goto out;
853 }
854
855 ep.sae_srcif = ep32.sae_srcif;
856 ep.sae_srcaddr = ep32.sae_srcaddr;
857 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
858 ep.sae_dstaddr = ep32.sae_dstaddr;
859 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
860 }
861
862 /*
863 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
864 * if this is a datagram socket; translate for other types.
865 */
866 dgram = (so->so_type == SOCK_DGRAM);
867
868 /* Get socket address now before we obtain socket lock */
869 if (ep.sae_srcaddr != USER_ADDR_NULL) {
870 if (ep.sae_srcaddrlen > sizeof(ss)) {
871 error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
872 } else {
873 error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
874 if (error == 0) {
875 src = SA(&ss);
876 }
877 }
878
879 if (error) {
880 goto out;
881 }
882 }
883
884 if (ep.sae_dstaddr == USER_ADDR_NULL) {
885 error = EINVAL;
886 goto out;
887 }
888
889 /* Get socket address now before we obtain socket lock */
890 if (ep.sae_dstaddrlen > sizeof(sd)) {
891 error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
892 } else {
893 error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
894 if (error == 0) {
895 dst = SA(&sd);
896 }
897 }
898
899 if (error) {
900 goto out;
901 }
902
903 VERIFY(dst != NULL);
904
905 if (uap->iov != USER_ADDR_NULL) {
906 /* Verify range before calling uio_create() */
907 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
908 error = EINVAL;
909 goto out;
910 }
911
912 if (uap->len == USER_ADDR_NULL) {
913 error = EINVAL;
914 goto out;
915 }
916
917 /* allocate a uio to hold the number of iovecs passed */
918 auio = uio_create(a_iovcount: uap->iovcnt, a_offset: 0,
919 a_spacetype: (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
920 a_iodirection: UIO_WRITE);
921
922 if (auio == NULL) {
923 error = ENOMEM;
924 goto out;
925 }
926
927 /*
928 * get location of iovecs within the uio.
929 * then copyin the iovecs from user space.
930 */
931 iovp = uio_iovsaddr_user(a_uio: auio);
932 if (iovp == NULL) {
933 error = ENOMEM;
934 goto out;
935 }
936 error = copyin_user_iovec_array(uaddr: uap->iov,
937 spacetype: IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
938 count: uap->iovcnt, dst: iovp);
939 if (error != 0) {
940 goto out;
941 }
942
943 /* finish setup of uio_t */
944 error = uio_calculateresid_user(a_uio: auio);
945 if (error != 0) {
946 goto out;
947 }
948 }
949
950 error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
951 &cid, auio, uap->flags, &bytes_written);
952 if (error == ERESTART) {
953 error = EINTR;
954 }
955
956 if (uap->len != USER_ADDR_NULL) {
957 if (IS_64BIT_PROCESS(p)) {
958 error1 = copyout(&bytes_written, uap->len, sizeof(user64_size_t));
959 } else {
960 error1 = copyout(&bytes_written, uap->len, sizeof(user32_size_t));
961 }
962 /* give precedence to connectitx errors */
963 if ((error1 != 0) && (error == 0)) {
964 error = error1;
965 }
966 }
967
968 if (uap->connid != USER_ADDR_NULL) {
969 error1 = copyout(&cid, uap->connid, sizeof(cid));
970 /* give precedence to connectitx errors */
971 if ((error1 != 0) && (error == 0)) {
972 error = error1;
973 }
974 }
975out:
976 file_drop(fd);
977 if (auio != NULL) {
978 uio_free(a_uio: auio);
979 }
980 if (src != NULL && src != SA(&ss)) {
981 free_sockaddr(src);
982 }
983 if (dst != NULL && dst != SA(&sd)) {
984 free_sockaddr(dst);
985 }
986 return error;
987}
988
989int
990connectx(proc_ref_t p, struct connectx_args *uap, int *retval)
991{
992 /*
993 * Due to similiarity with a POSIX interface, define as
994 * an unofficial cancellation point.
995 */
996 __pthread_testcancel(presyscall: 1);
997 return connectx_nocancel(p, uap, retval);
998}
999
1000static int
1001connectit(struct socket *so, sockaddr_ref_t sa)
1002{
1003 int error;
1004
1005 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
1006#if CONFIG_MACF_SOCKET_SUBSET
1007 if ((error = mac_socket_check_connect(cred: kauth_cred_get(), so, addr: sa)) != 0) {
1008 return error;
1009 }
1010#endif /* MAC_SOCKET_SUBSET */
1011
1012 socket_lock(so, refcount: 1);
1013 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1014 error = EALREADY;
1015 goto out;
1016 }
1017 error = soconnectlock(so, nam: sa, dolock: 0);
1018 if (error != 0) {
1019 goto out;
1020 }
1021 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1022 error = EINPROGRESS;
1023 goto out;
1024 }
1025 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1026 lck_mtx_t *mutex_held;
1027
1028 if (so->so_proto->pr_getlock != NULL) {
1029 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1030 } else {
1031 mutex_held = so->so_proto->pr_domain->dom_mtx;
1032 }
1033 error = msleep(chan: (caddr_t)&so->so_timeo, mtx: mutex_held,
1034 PSOCK | PCATCH, wmesg: __func__, ts: 0);
1035 if (so->so_state & SS_DRAINING) {
1036 error = ECONNABORTED;
1037 }
1038 if (error != 0) {
1039 break;
1040 }
1041 }
1042 if (error == 0) {
1043 error = so->so_error;
1044 so->so_error = 0;
1045 }
1046out:
1047 socket_unlock(so, refcount: 1);
1048 return error;
1049}
1050
1051static int
1052connectitx(struct socket *so, sockaddr_ref_t src,
1053 sockaddr_ref_t dst, proc_ref_t p, uint32_t ifscope,
1054 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
1055 user_ssize_t *bytes_written)
1056{
1057 int error;
1058
1059 VERIFY(dst != NULL);
1060
1061 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
1062#if CONFIG_MACF_SOCKET_SUBSET
1063 if ((error = mac_socket_check_connect(cred: kauth_cred_get(), so, addr: dst)) != 0) {
1064 return error;
1065 }
1066
1067 if (auio != NULL) {
1068 if ((error = mac_socket_check_send(cred: kauth_cred_get(), so, addr: dst)) != 0) {
1069 return error;
1070 }
1071 }
1072#endif /* MAC_SOCKET_SUBSET */
1073
1074 socket_lock(so, refcount: 1);
1075 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1076 error = EALREADY;
1077 goto out;
1078 }
1079
1080 error = soconnectxlocked(so, src, dst, p, ifscope,
1081 aid, pcid, flags, NULL, 0, auio, bytes_written);
1082 if (error != 0) {
1083 goto out;
1084 }
1085 /*
1086 * If, after the call to soconnectxlocked the flag is still set (in case
1087 * data has been queued and the connect() has actually been triggered,
1088 * it will have been unset by the transport), we exit immediately. There
1089 * is no reason to wait on any event.
1090 */
1091 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1092 error = 0;
1093 goto out;
1094 }
1095 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1096 error = EINPROGRESS;
1097 goto out;
1098 }
1099 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1100 lck_mtx_t *mutex_held;
1101
1102 if (so->so_proto->pr_getlock != NULL) {
1103 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1104 } else {
1105 mutex_held = so->so_proto->pr_domain->dom_mtx;
1106 }
1107 error = msleep(chan: (caddr_t)&so->so_timeo, mtx: mutex_held,
1108 PSOCK | PCATCH, wmesg: __func__, ts: 0);
1109 if (so->so_state & SS_DRAINING) {
1110 error = ECONNABORTED;
1111 }
1112 if (error != 0) {
1113 break;
1114 }
1115 }
1116 if (error == 0) {
1117 error = so->so_error;
1118 so->so_error = 0;
1119 }
1120out:
1121 socket_unlock(so, refcount: 1);
1122 return error;
1123}
1124
1125int
1126peeloff(proc_ref_t p, struct peeloff_args *uap, int *retval)
1127{
1128#pragma unused(p, uap, retval)
1129 /*
1130 * Due to similiarity with a POSIX interface, define as
1131 * an unofficial cancellation point.
1132 */
1133 __pthread_testcancel(presyscall: 1);
1134 return 0;
1135}
1136
1137int
1138disconnectx(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1139{
1140 /*
1141 * Due to similiarity with a POSIX interface, define as
1142 * an unofficial cancellation point.
1143 */
1144 __pthread_testcancel(presyscall: 1);
1145 return disconnectx_nocancel(p, uap, retval);
1146}
1147
1148static int
1149disconnectx_nocancel(proc_ref_t p, struct disconnectx_args *uap, int *retval)
1150{
1151#pragma unused(p, retval)
1152 socket_ref_t so;
1153 int fd = uap->s;
1154 int error;
1155
1156 error = file_socket(fd, &so);
1157 if (error != 0) {
1158 return error;
1159 }
1160 if (so == NULL) {
1161 error = EBADF;
1162 goto out;
1163 }
1164
1165 error = sodisconnectx(so, uap->aid, uap->cid);
1166out:
1167 file_drop(fd);
1168 return error;
1169}
1170
1171/*
1172 * Returns: 0 Success
1173 * socreate:EAFNOSUPPORT
1174 * socreate:EPROTOTYPE
1175 * socreate:EPROTONOSUPPORT
1176 * socreate:ENOBUFS
1177 * socreate:ENOMEM
1178 * socreate:EISCONN
1179 * socreate:??? [other protocol families, IPSEC]
1180 * falloc:ENFILE
1181 * falloc:EMFILE
1182 * falloc:ENOMEM
1183 * copyout:EFAULT
1184 * soconnect2:EINVAL
1185 * soconnect2:EPROTOTYPE
1186 * soconnect2:??? [other protocol families[
1187 */
1188int
1189socketpair(proc_ref_t p, struct socketpair_args *uap,
1190 __unused int32_ref_t retval)
1191{
1192 fileproc_ref_t fp1, fp2;
1193 socket_ref_t so1, so2;
1194 int fd, error, sv[2];
1195
1196 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1197 error = socreate(dom: uap->domain, aso: &so1, type: uap->type, proto: uap->protocol);
1198 if (error) {
1199 return error;
1200 }
1201 error = socreate(dom: uap->domain, aso: &so2, type: uap->type, proto: uap->protocol);
1202 if (error) {
1203 goto free1;
1204 }
1205
1206 error = falloc(p, &fp1, &fd);
1207 if (error) {
1208 goto free2;
1209 }
1210 fp1->f_flag = FREAD | FWRITE;
1211 fp1->f_ops = &socketops;
1212 fp_set_data(fp: fp1, fg_data: so1);
1213 sv[0] = fd;
1214
1215 error = falloc(p, &fp2, &fd);
1216 if (error) {
1217 goto free3;
1218 }
1219 fp2->f_flag = FREAD | FWRITE;
1220 fp2->f_ops = &socketops;
1221 fp_set_data(fp: fp2, fg_data: so2);
1222 sv[1] = fd;
1223
1224 error = soconnect2(so1, so2);
1225 if (error) {
1226 goto free4;
1227 }
1228 if (uap->type == SOCK_DGRAM) {
1229 /*
1230 * Datagram socket connection is asymmetric.
1231 */
1232 error = soconnect2(so1: so2, so2: so1);
1233 if (error) {
1234 goto free4;
1235 }
1236 }
1237
1238 if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
1239 goto free4;
1240 }
1241
1242 proc_fdlock(p);
1243 procfdtbl_releasefd(p, fd: sv[0], NULL);
1244 procfdtbl_releasefd(p, fd: sv[1], NULL);
1245 fp_drop(p, fd: sv[0], fp: fp1, locked: 1);
1246 fp_drop(p, fd: sv[1], fp: fp2, locked: 1);
1247 proc_fdunlock(p);
1248
1249 return 0;
1250free4:
1251 fp_free(p, fd: sv[1], fp: fp2);
1252free3:
1253 fp_free(p, fd: sv[0], fp: fp1);
1254free2:
1255 (void) soclose(so: so2);
1256free1:
1257 (void) soclose(so: so1);
1258 return error;
1259}
1260
1261/*
1262 * Returns: 0 Success
1263 * EINVAL
1264 * ENOBUFS
1265 * EBADF
1266 * EPIPE
1267 * EACCES Mandatory Access Control failure
1268 * file_socket:ENOTSOCK
1269 * file_socket:EBADF
1270 * getsockaddr:ENAMETOOLONG Filename too long
1271 * getsockaddr:EINVAL Invalid argument
1272 * getsockaddr:ENOMEM Not enough space
1273 * getsockaddr:EFAULT Bad address
1274 * <pru_sosend>:EACCES[TCP]
1275 * <pru_sosend>:EADDRINUSE[TCP]
1276 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1277 * <pru_sosend>:EAFNOSUPPORT[TCP]
1278 * <pru_sosend>:EAGAIN[TCP]
1279 * <pru_sosend>:EBADF
1280 * <pru_sosend>:ECONNRESET[TCP]
1281 * <pru_sosend>:EFAULT
1282 * <pru_sosend>:EHOSTUNREACH[TCP]
1283 * <pru_sosend>:EINTR
1284 * <pru_sosend>:EINVAL
1285 * <pru_sosend>:EISCONN[AF_INET]
1286 * <pru_sosend>:EMSGSIZE[TCP]
1287 * <pru_sosend>:ENETDOWN[TCP]
1288 * <pru_sosend>:ENETUNREACH[TCP]
1289 * <pru_sosend>:ENOBUFS
1290 * <pru_sosend>:ENOMEM[TCP]
1291 * <pru_sosend>:ENOTCONN[AF_INET]
1292 * <pru_sosend>:EOPNOTSUPP
1293 * <pru_sosend>:EPERM[TCP]
1294 * <pru_sosend>:EPIPE
1295 * <pru_sosend>:EWOULDBLOCK
1296 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1297 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1298 * <pru_sosend>:??? [value from so_error]
1299 * sockargs:???
1300 */
1301static int
1302sendit(proc_ref_t p, struct socket *so, user_msghdr_ref_t mp, uio_t uiop,
1303 int flags, int32_ref_t retval)
1304{
1305 mbuf_ref_t control = NULL;
1306 struct sockaddr_storage ss;
1307 sockaddr_ref_t to = NULL;
1308 boolean_t want_free = TRUE;
1309 int error;
1310 user_ssize_t len;
1311
1312 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1313
1314 if (mp->msg_name != USER_ADDR_NULL) {
1315 if (mp->msg_namelen > sizeof(ss)) {
1316 error = getsockaddr(so, &to, mp->msg_name,
1317 mp->msg_namelen, TRUE);
1318 } else {
1319 error = getsockaddr_s(so, &ss, mp->msg_name,
1320 mp->msg_namelen, TRUE);
1321 if (error == 0) {
1322 to = SA(&ss);
1323 want_free = FALSE;
1324 }
1325 }
1326 if (error != 0) {
1327 goto out;
1328 }
1329 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1330 }
1331 if (mp->msg_control != USER_ADDR_NULL) {
1332 if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1333 error = EINVAL;
1334 goto bad;
1335 }
1336 error = sockargs(mp: &control, data: mp->msg_control,
1337 buflen: mp->msg_controllen, MT_CONTROL);
1338 if (error != 0) {
1339 goto bad;
1340 }
1341 }
1342
1343#if CONFIG_MACF_SOCKET_SUBSET
1344 /*
1345 * We check the state without holding the socket lock;
1346 * if a race condition occurs, it would simply result
1347 * in an extra call to the MAC check function.
1348 */
1349 if (to != NULL &&
1350 !(so->so_state & SS_DEFUNCT) &&
1351 (error = mac_socket_check_send(cred: kauth_cred_get(), so, addr: to)) != 0) {
1352 if (control != NULL) {
1353 m_freem(control);
1354 }
1355
1356 goto bad;
1357 }
1358#endif /* MAC_SOCKET_SUBSET */
1359
1360 len = uio_resid(a_uio: uiop);
1361 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1362 control, flags);
1363 if (error != 0) {
1364 if (uio_resid(a_uio: uiop) != len && (error == ERESTART ||
1365 error == EINTR || error == EWOULDBLOCK)) {
1366 error = 0;
1367 }
1368 /* Generation of SIGPIPE can be controlled per socket */
1369 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1370 !(flags & MSG_NOSIGNAL)) {
1371 psignal(p, SIGPIPE);
1372 }
1373 }
1374 if (error == 0) {
1375 *retval = (int)(len - uio_resid(a_uio: uiop));
1376 }
1377bad:
1378 if (want_free) {
1379 free_sockaddr(to);
1380 }
1381out:
1382 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1383
1384 return error;
1385}
1386
1387/*
1388 * Returns: 0 Success
1389 * ENOMEM
1390 * sendit:??? [see sendit definition in this file]
1391 * write:??? [4056224: applicable for pipes]
1392 */
1393int
1394sendto(proc_ref_t p, struct sendto_args *uap, int32_ref_t retval)
1395{
1396 __pthread_testcancel(presyscall: 1);
1397 return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
1398}
1399
1400int
1401sendto_nocancel(proc_ref_t p,
1402 struct sendto_nocancel_args *uap,
1403 int32_ref_t retval)
1404{
1405 struct user_msghdr msg;
1406 int error;
1407 uio_t auio = NULL;
1408 socket_ref_t so;
1409
1410 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1411 AUDIT_ARG(fd, uap->s);
1412
1413 if (uap->flags & MSG_SKIPCFIL) {
1414 error = EPERM;
1415 goto done;
1416 }
1417
1418 if (uap->len > LONG_MAX) {
1419 error = EINVAL;
1420 goto done;
1421 }
1422
1423 auio = uio_create(a_iovcount: 1, a_offset: 0,
1424 a_spacetype: (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1425 a_iodirection: UIO_WRITE);
1426 if (auio == NULL) {
1427 error = ENOMEM;
1428 goto done;
1429 }
1430 uio_addiov(a_uio: auio, a_baseaddr: uap->buf, a_length: uap->len);
1431
1432 msg.msg_name = uap->to;
1433 msg.msg_namelen = uap->tolen;
1434 /* no need to set up msg_iov. sendit uses uio_t we send it */
1435 msg.msg_iov = 0;
1436 msg.msg_iovlen = 0;
1437 msg.msg_control = 0;
1438 msg.msg_flags = 0;
1439
1440 error = file_socket(uap->s, &so);
1441 if (error) {
1442 goto done;
1443 }
1444
1445 if (so == NULL) {
1446 error = EBADF;
1447 } else {
1448 error = sendit(p, so, mp: &msg, uiop: auio, flags: uap->flags, retval);
1449 }
1450
1451 file_drop(uap->s);
1452done:
1453 if (auio != NULL) {
1454 uio_free(a_uio: auio);
1455 }
1456
1457 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1458
1459 return error;
1460}
1461
1462/*
1463 * Returns: 0 Success
1464 * ENOBUFS
1465 * copyin:EFAULT
1466 * sendit:??? [see sendit definition in this file]
1467 */
1468int
1469sendmsg(proc_ref_t p, struct sendmsg_args *uap, int32_ref_t retval)
1470{
1471 __pthread_testcancel(presyscall: 1);
1472 return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1473 retval);
1474}
1475
1476int
1477sendmsg_nocancel(proc_ref_t p, struct sendmsg_nocancel_args *uap,
1478 int32_ref_t retval)
1479{
1480 struct user32_msghdr msg32;
1481 struct user64_msghdr msg64;
1482 struct user_msghdr user_msg;
1483 caddr_t msghdrp;
1484 int size_of_msghdr;
1485 int error;
1486 uio_t auio = NULL;
1487 struct user_iovec *iovp;
1488 socket_ref_t so;
1489
1490 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1491
1492 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1493 AUDIT_ARG(fd, uap->s);
1494
1495 if (uap->flags & MSG_SKIPCFIL) {
1496 error = EPERM;
1497 goto done;
1498 }
1499
1500 if (is_p_64bit_process) {
1501 msghdrp = (caddr_t)&msg64;
1502 size_of_msghdr = sizeof(msg64);
1503 } else {
1504 msghdrp = (caddr_t)&msg32;
1505 size_of_msghdr = sizeof(msg32);
1506 }
1507 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1508 if (error) {
1509 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1510 return error;
1511 }
1512
1513 if (is_p_64bit_process) {
1514 user_msg.msg_flags = msg64.msg_flags;
1515 user_msg.msg_controllen = msg64.msg_controllen;
1516 user_msg.msg_control = (user_addr_t)msg64.msg_control;
1517 user_msg.msg_iovlen = msg64.msg_iovlen;
1518 user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
1519 user_msg.msg_namelen = msg64.msg_namelen;
1520 user_msg.msg_name = (user_addr_t)msg64.msg_name;
1521 } else {
1522 user_msg.msg_flags = msg32.msg_flags;
1523 user_msg.msg_controllen = msg32.msg_controllen;
1524 user_msg.msg_control = msg32.msg_control;
1525 user_msg.msg_iovlen = msg32.msg_iovlen;
1526 user_msg.msg_iov = msg32.msg_iov;
1527 user_msg.msg_namelen = msg32.msg_namelen;
1528 user_msg.msg_name = msg32.msg_name;
1529 }
1530
1531 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1532 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1533 0, 0, 0, 0);
1534 return EMSGSIZE;
1535 }
1536
1537 /* allocate a uio large enough to hold the number of iovecs passed */
1538 auio = uio_create(a_iovcount: user_msg.msg_iovlen, a_offset: 0,
1539 a_spacetype: (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1540 a_iodirection: UIO_WRITE);
1541 if (auio == NULL) {
1542 error = ENOBUFS;
1543 goto done;
1544 }
1545
1546 if (user_msg.msg_iovlen) {
1547 /*
1548 * get location of iovecs within the uio.
1549 * then copyin the iovecs from user space.
1550 */
1551 iovp = uio_iovsaddr_user(a_uio: auio);
1552 if (iovp == NULL) {
1553 error = ENOBUFS;
1554 goto done;
1555 }
1556 error = copyin_user_iovec_array(uaddr: user_msg.msg_iov,
1557 spacetype: is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1558 count: user_msg.msg_iovlen, dst: iovp);
1559 if (error) {
1560 goto done;
1561 }
1562 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1563
1564 /* finish setup of uio_t */
1565 error = uio_calculateresid_user(a_uio: auio);
1566 if (error) {
1567 goto done;
1568 }
1569 } else {
1570 user_msg.msg_iov = 0;
1571 }
1572
1573 /* msg_flags is ignored for send */
1574 user_msg.msg_flags = 0;
1575
1576 error = file_socket(uap->s, &so);
1577 if (error) {
1578 goto done;
1579 }
1580 if (so == NULL) {
1581 error = EBADF;
1582 } else {
1583 error = sendit(p, so, mp: &user_msg, uiop: auio, flags: uap->flags, retval);
1584 }
1585 file_drop(uap->s);
1586done:
1587 if (auio != NULL) {
1588 uio_free(a_uio: auio);
1589 }
1590 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1591
1592 return error;
1593}
1594
1595static int
1596internalize_user_msg_x(struct user_msghdr *user_msg, uio_t *auiop, proc_ref_t p, void_ptr_t user_msghdr_x_src)
1597{
1598 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1599 uio_t auio = *auiop;
1600 int error;
1601
1602 if (is_p_64bit_process) {
1603 struct user64_msghdr_x msghdrx64;
1604
1605 error = copyin((user_addr_t)user_msghdr_x_src,
1606 &msghdrx64, sizeof(msghdrx64));
1607 if (error != 0) {
1608 DBG_PRINTF("%s copyin() msghdrx64 failed %d",
1609 __func__, error);
1610 goto done;
1611 }
1612 user_msg->msg_name = msghdrx64.msg_name;
1613 user_msg->msg_namelen = msghdrx64.msg_namelen;
1614 user_msg->msg_iov = msghdrx64.msg_iov;
1615 user_msg->msg_iovlen = msghdrx64.msg_iovlen;
1616 user_msg->msg_control = msghdrx64.msg_control;
1617 user_msg->msg_controllen = msghdrx64.msg_controllen;
1618 } else {
1619 struct user32_msghdr_x msghdrx32;
1620
1621 error = copyin((user_addr_t)user_msghdr_x_src,
1622 &msghdrx32, sizeof(msghdrx32));
1623 if (error != 0) {
1624 DBG_PRINTF("%s copyin() msghdrx32 failed %d",
1625 __func__, error);
1626 goto done;
1627 }
1628 user_msg->msg_name = msghdrx32.msg_name;
1629 user_msg->msg_namelen = msghdrx32.msg_namelen;
1630 user_msg->msg_iov = msghdrx32.msg_iov;
1631 user_msg->msg_iovlen = msghdrx32.msg_iovlen;
1632 user_msg->msg_control = msghdrx32.msg_control;
1633 user_msg->msg_controllen = msghdrx32.msg_controllen;
1634 }
1635 /* msg_flags is ignored for send */
1636 user_msg->msg_flags = 0;
1637
1638 if (user_msg->msg_iovlen <= 0 || user_msg->msg_iovlen > UIO_MAXIOV) {
1639 error = EMSGSIZE;
1640 DBG_PRINTF("%s bad msg_iovlen, error %d",
1641 __func__, error);
1642 goto done;
1643 }
1644 /*
1645 * Attempt to reuse the uio if large enough, otherwise we need
1646 * a new one
1647 */
1648 if (auio != NULL) {
1649 if (auio->uio_max_iovs >= user_msg->msg_iovlen) {
1650 uio_reset_fast(a_uio: auio, a_offset: 0,
1651 a_spacetype: is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1652 a_iodirection: UIO_WRITE);
1653 } else {
1654 uio_free(a_uio: auio);
1655 auio = NULL;
1656 }
1657 }
1658 if (auio == NULL) {
1659 auio = uio_create(a_iovcount: user_msg->msg_iovlen, a_offset: 0,
1660 a_spacetype: is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1661 a_iodirection: UIO_WRITE);
1662 if (auio == NULL) {
1663 error = ENOBUFS;
1664 DBG_PRINTF("%s uio_create() failed %d",
1665 __func__, error);
1666 goto done;
1667 }
1668 }
1669
1670 if (user_msg->msg_iovlen) {
1671 /*
1672 * get location of iovecs within the uio.
1673 * then copyin the iovecs from user space.
1674 */
1675 struct user_iovec *iovp = uio_iovsaddr_user(a_uio: auio);
1676 if (iovp == NULL) {
1677 error = ENOBUFS;
1678 goto done;
1679 }
1680 error = copyin_user_iovec_array(uaddr: user_msg->msg_iov,
1681 spacetype: is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
1682 count: user_msg->msg_iovlen, dst: iovp);
1683 if (error != 0) {
1684 goto done;
1685 }
1686 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
1687
1688 /* finish setup of uio_t */
1689 error = uio_calculateresid_user(a_uio: auio);
1690 if (error) {
1691 goto done;
1692 }
1693 } else {
1694 user_msg->msg_iov = 0;
1695 }
1696
1697done:
1698 *auiop = auio;
1699 return error;
1700}
1701
1702static int
1703mbuf_packet_from_uio(socket_ref_t so, mbuf_ref_ref_t mp, uio_t auio)
1704{
1705 int error = 0;
1706 uint16_t headroom = 0;
1707 size_t bytes_to_alloc;
1708 mbuf_ref_t top = NULL, m;
1709
1710 if (soreserveheadroom != 0) {
1711 headroom = so->so_pktheadroom;
1712 }
1713 bytes_to_alloc = headroom + uio_resid(a_uio: auio);
1714
1715 error = mbuf_allocpacket(how: MBUF_WAITOK, packetlen: bytes_to_alloc, NULL, mbuf: &top);
1716 if (error != 0) {
1717 os_log(OS_LOG_DEFAULT, "mbuf_packet_from_uio: mbuf_allocpacket %zu error %d",
1718 bytes_to_alloc, error);
1719 goto done;
1720 }
1721
1722 if (headroom > 0 && headroom < mbuf_maxlen(mbuf: top)) {
1723 top->m_data += headroom;
1724 }
1725
1726 for (m = top; m != NULL; m = m->m_next) {
1727 int bytes_to_copy = (int)uio_resid(a_uio: auio);
1728 ssize_t mlen;
1729
1730 if ((m->m_flags & M_EXT)) {
1731 mlen = m->m_ext.ext_size -
1732 M_LEADINGSPACE(m);
1733 } else if ((m->m_flags & M_PKTHDR)) {
1734 mlen = MHLEN - M_LEADINGSPACE(m);
1735 m_add_crumb(m, PKT_CRUMB_SOSEND);
1736 } else {
1737 mlen = MLEN - M_LEADINGSPACE(m);
1738 }
1739 int len = imin(a: (int)mlen, b: bytes_to_copy);
1740
1741 error = uio_copyin_user(mtod(m, caddr_t), n: (int)len, uio: auio);
1742 if (error != 0) {
1743 os_log(OS_LOG_DEFAULT, "mbuf_packet_from_uio: len %d error %d",
1744 len, error);
1745 goto done;
1746 }
1747 m->m_len = len;
1748 top->m_pkthdr.len += len;
1749 }
1750
1751done:
1752 if (error != 0) {
1753 m_freem(top);
1754 } else {
1755 *mp = top;
1756 }
1757 return error;
1758}
1759
1760static int
1761sendit_x(proc_ref_t p, socket_ref_t so, struct sendmsg_x_args *uap, u_int *retval)
1762{
1763 int error = 0;
1764 uio_t auio = NULL;
1765 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1766 void_ptr_t src;
1767 MBUFQ_HEAD() pktlist = {};
1768 size_t total_pkt_len = 0;
1769 u_int pkt_cnt = 0;
1770 int flags = uap->flags;
1771 mbuf_ref_t top;
1772
1773 MBUFQ_INIT(&pktlist);
1774
1775 *retval = 0;
1776
1777 /* We re-use the uio when possible */
1778 auio = uio_create(a_iovcount: 1, a_offset: 0,
1779 a_spacetype: (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1780 a_iodirection: UIO_WRITE);
1781 if (auio == NULL) {
1782 error = ENOBUFS;
1783 DBG_PRINTF("%s uio_create() failed %d",
1784 __func__, error);
1785 goto done;
1786 }
1787
1788 src = (void_ptr_t)uap->msgp;
1789
1790 /*
1791 * Create a list of packets
1792 */
1793 for (u_int i = 0; i < uap->cnt; i++) {
1794 struct user_msghdr user_msg = {};
1795 mbuf_ref_t m = NULL;
1796
1797 if (is_p_64bit_process) {
1798 error = internalize_user_msg_x(user_msg: &user_msg, auiop: &auio, p, user_msghdr_x_src: ((struct user64_msghdr_x *)src) + i);
1799 if (error != 0) {
1800 os_log(OS_LOG_DEFAULT, "sendit_x: internalize_user_msg_x error %d\n", error);
1801 goto done;
1802 }
1803 } else {
1804 error = internalize_user_msg_x(user_msg: &user_msg, auiop: &auio, p, user_msghdr_x_src: ((struct user32_msghdr_x *)src) + i);
1805 if (error != 0) {
1806 os_log(OS_LOG_DEFAULT, "sendit_x: internalize_user_msg_x error %d\n", error);
1807 goto done;
1808 }
1809 }
1810 /*
1811 * Stop on the first datagram that is too large
1812 */
1813 if (uio_resid(a_uio: auio) > so->so_snd.sb_hiwat) {
1814 if (i == 0) {
1815 error = EMSGSIZE;
1816 goto done;
1817 }
1818 break;
1819 }
1820 /*
1821 * An mbuf packet has the control mbuf(s) followed by data
1822 * We allocate the mbufs in reverse order
1823 */
1824 error = mbuf_packet_from_uio(so, mp: &m, auio);
1825 if (error != 0) {
1826 os_log(OS_LOG_DEFAULT, "sendit_x: mbuf_packet_from_uio error %d\n", error);
1827 goto done;
1828 }
1829 total_pkt_len += m->m_pkthdr.len;
1830
1831 if (user_msg.msg_control != USER_ADDR_NULL && user_msg.msg_controllen != 0) {
1832 mbuf_ref_t control = NULL;
1833
1834 error = sockargs(mp: &control, data: user_msg.msg_control, buflen: user_msg.msg_controllen, MT_CONTROL);
1835 if (error != 0) {
1836 os_log(OS_LOG_DEFAULT, "sendit_x: sockargs error %d\n", error);
1837 goto done;
1838 }
1839 control->m_next = m;
1840 m = control;
1841 }
1842 MBUFQ_ENQUEUE(&pktlist, m);
1843
1844 pkt_cnt += 1;
1845 }
1846
1847 top = MBUFQ_FIRST(&pktlist);
1848 MBUFQ_INIT(&pktlist);
1849 error = sosend_list(so, pktlist: top, total_pkt_len, pktcnt: &pkt_cnt, flags);
1850 if (error != 0) {
1851 os_log(OS_LOG_DEFAULT, "sendit_x: sosend_list error %d\n", error);
1852 goto done;
1853 }
1854done:
1855 *retval = pkt_cnt;
1856
1857 if (auio != NULL) {
1858 uio_free(a_uio: auio);
1859 }
1860 MBUFQ_DRAIN(&pktlist);
1861 return error;
1862}
1863
1864int
1865sendmsg_x(proc_ref_t p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1866{
1867 void_ptr_t src;
1868 int error;
1869 uio_t auio = NULL;
1870 socket_ref_t so;
1871 u_int uiocnt = 0;
1872 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
1873
1874 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1875 AUDIT_ARG(fd, uap->s);
1876
1877 if (uap->flags & MSG_SKIPCFIL) {
1878 error = EPERM;
1879 goto done_no_filedrop;
1880 }
1881
1882 error = file_socket(uap->s, &so);
1883 if (error) {
1884 goto done_no_filedrop;
1885 }
1886 if (so == NULL) {
1887 error = EBADF;
1888 goto done;
1889 }
1890
1891 /*
1892 * For an atomic datagram connected socket we can build the list of
1893 * mbuf packets with sosend_list()
1894 */
1895 if (so->so_type == SOCK_DGRAM && sosendallatonce(so) &&
1896 (so->so_state & SS_ISCONNECTED) && sendmsg_x_mode != 1) {
1897 error = sendit_x(p, so, uap, retval: &uiocnt);
1898 if (error != 0) {
1899 DBG_PRINTF("%s sendit_x() failed %d",
1900 __func__, error);
1901 }
1902 goto done;
1903 }
1904
1905 src = (void_ptr_t)uap->msgp;
1906
1907 /* We re-use the uio when possible */
1908 auio = uio_create(a_iovcount: 1, a_offset: 0,
1909 a_spacetype: (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
1910 a_iodirection: UIO_WRITE);
1911 if (auio == NULL) {
1912 error = ENOBUFS;
1913 DBG_PRINTF("%s uio_create() failed %d",
1914 __func__, error);
1915 goto done;
1916 }
1917
1918 for (u_int i = 0; i < uap->cnt; i++) {
1919 struct user_msghdr user_msg = {};
1920
1921 if (is_p_64bit_process) {
1922 error = internalize_user_msg_x(user_msg: &user_msg, auiop: &auio, p, user_msghdr_x_src: ((struct user64_msghdr_x *)src) + i);
1923 if (error != 0) {
1924 goto done;
1925 }
1926 } else {
1927 error = internalize_user_msg_x(user_msg: &user_msg, auiop: &auio, p, user_msghdr_x_src: ((struct user32_msghdr_x *)src) + i);
1928 if (error != 0) {
1929 goto done;
1930 }
1931 }
1932
1933 int32_t len = 0;
1934 error = sendit(p, so, mp: &user_msg, uiop: auio, flags: uap->flags, retval: &len);
1935 if (error != 0) {
1936 break;
1937 }
1938 uiocnt += 1;
1939 }
1940done:
1941 if (error != 0) {
1942 if (uiocnt != 0 && (error == ERESTART ||
1943 error == EINTR || error == EWOULDBLOCK ||
1944 error == ENOBUFS || error == EMSGSIZE)) {
1945 error = 0;
1946 }
1947 /* Generation of SIGPIPE can be controlled per socket */
1948 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1949 !(uap->flags & MSG_NOSIGNAL)) {
1950 psignal(p, SIGPIPE);
1951 }
1952 }
1953 if (error == 0) {
1954 *retval = (int)(uiocnt);
1955 }
1956 file_drop(uap->s);
1957
1958done_no_filedrop:
1959 if (auio != NULL) {
1960 uio_free(a_uio: auio);
1961 }
1962 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1963
1964 return error;
1965}
1966
1967
1968static int
1969copyout_sa(sockaddr_ref_t fromsa, user_addr_t name, socklen_t *namelen)
1970{
1971 int error = 0;
1972 socklen_t sa_len = 0;
1973 ssize_t len;
1974
1975 len = *namelen;
1976 if (len <= 0 || fromsa == 0) {
1977 len = 0;
1978 } else {
1979#ifndef MIN
1980#define MIN(a, b) ((a) > (b) ? (b) : (a))
1981#endif
1982 sa_len = fromsa->sa_len;
1983 len = MIN((unsigned int)len, sa_len);
1984 error = copyout(fromsa, name, (unsigned)len);
1985 if (error) {
1986 goto out;
1987 }
1988 }
1989 *namelen = sa_len;
1990out:
1991 return 0;
1992}
1993
1994static int
1995copyout_maddr(struct mbuf *m, user_addr_t name, socklen_t *namelen)
1996{
1997 int error = 0;
1998 socklen_t sa_len = 0;
1999 ssize_t len;
2000
2001 len = *namelen;
2002 if (len <= 0 || m == NULL) {
2003 len = 0;
2004 } else {
2005#ifndef MIN
2006#define MIN(a, b) ((a) > (b) ? (b) : (a))
2007#endif
2008 struct sockaddr *fromsa = mtod(m, struct sockaddr *);
2009
2010 sa_len = fromsa->sa_len;
2011 len = MIN((unsigned int)len, sa_len);
2012 error = copyout(fromsa, name, (unsigned)len);
2013 if (error != 0) {
2014 goto out;
2015 }
2016 }
2017 *namelen = sa_len;
2018out:
2019 return 0;
2020}
2021
2022static int
2023copyout_control(proc_ref_t p, mbuf_ref_t m, user_addr_t control,
2024 socklen_ref_t controllen, int_ref_t flags, socket_ref_t so)
2025{
2026 int error = 0;
2027 socklen_t len;
2028 user_addr_t ctlbuf;
2029 struct inpcb *inp = NULL;
2030 bool want_pktinfo = false;
2031 bool seen_pktinfo = false;
2032
2033 if (so != NULL && (SOCK_DOM(so) == PF_INET6 || SOCK_DOM(so) == PF_INET)) {
2034 inp = sotoinpcb(so);
2035 want_pktinfo = (inp->inp_flags & IN6P_PKTINFO) != 0;
2036 }
2037
2038 len = *controllen;
2039 *controllen = 0;
2040 ctlbuf = control;
2041
2042 while (m && len > 0) {
2043 socklen_t tocopy;
2044 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
2045 socklen_t cp_size = CMSG_ALIGN(cp->cmsg_len);
2046 socklen_t buflen = m->m_len;
2047
2048 while (buflen > 0 && len > 0) {
2049 /*
2050 * SCM_TIMESTAMP hack because struct timeval has a
2051 * different size for 32 bits and 64 bits processes
2052 */
2053 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
2054 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
2055 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
2056 socklen_t tmp_space;
2057 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
2058
2059 tmp_cp->cmsg_level = SOL_SOCKET;
2060 tmp_cp->cmsg_type = SCM_TIMESTAMP;
2061
2062 if (proc_is64bit(p)) {
2063 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
2064
2065 os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
2066 os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
2067
2068 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
2069 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
2070 } else {
2071 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
2072
2073 tv32->tv_sec = (user32_time_t)tv->tv_sec;
2074 tv32->tv_usec = tv->tv_usec;
2075
2076 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
2077 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
2078 }
2079 if (len >= tmp_space) {
2080 tocopy = tmp_space;
2081 } else {
2082 *flags |= MSG_CTRUNC;
2083 tocopy = len;
2084 }
2085 error = copyout(tmp_buffer, ctlbuf, tocopy);
2086 if (error) {
2087 goto out;
2088 }
2089 } else {
2090 /* If socket has flow tracking and socket did not request address, ignore it */
2091 if (SOFLOW_ENABLED(so) &&
2092 ((cp->cmsg_level == IPPROTO_IP && cp->cmsg_type == IP_RECVDSTADDR && inp != NULL &&
2093 !(inp->inp_flags & INP_RECVDSTADDR)) ||
2094 (cp->cmsg_level == IPPROTO_IPV6 && (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO) && inp &&
2095 !(inp->inp_flags & IN6P_PKTINFO)))) {
2096 tocopy = 0;
2097 } else {
2098 if (cp_size > buflen) {
2099 panic("cp_size > buflen, something wrong with alignment!");
2100 }
2101 if (len >= cp_size) {
2102 tocopy = cp_size;
2103 } else {
2104 *flags |= MSG_CTRUNC;
2105 tocopy = len;
2106 }
2107 error = copyout((caddr_t) cp, ctlbuf, tocopy);
2108 if (error) {
2109 goto out;
2110 }
2111 if (want_pktinfo && cp->cmsg_level == IPPROTO_IPV6 &&
2112 (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO)) {
2113 seen_pktinfo = true;
2114 }
2115 }
2116 }
2117
2118
2119 ctlbuf += tocopy;
2120 len -= tocopy;
2121
2122 buflen -= cp_size;
2123 cp = (struct cmsghdr *)(void *)
2124 ((unsigned char *) cp + cp_size);
2125 cp_size = CMSG_ALIGN(cp->cmsg_len);
2126 }
2127
2128 m = m->m_next;
2129 }
2130 *controllen = (socklen_t)(ctlbuf - control);
2131out:
2132 if (want_pktinfo && !seen_pktinfo) {
2133 missingpktinfo += 1;
2134#if (DEBUG || DEVELOPMENT)
2135 char pname[MAXCOMLEN];
2136 char local[MAX_IPv6_STR_LEN + 6];
2137 char remote[MAX_IPv6_STR_LEN + 6];
2138
2139 proc_name(so->last_pid, pname, sizeof(MAXCOMLEN));
2140 if (inp->inp_vflag & INP_IPV6) {
2141 inet_ntop(AF_INET6, &inp->in6p_laddr.s6_addr, local, sizeof(local));
2142 inet_ntop(AF_INET6, &inp->in6p_faddr.s6_addr, remote, sizeof(local));
2143 } else {
2144 inet_ntop(AF_INET, &inp->inp_laddr.s_addr, local, sizeof(local));
2145 inet_ntop(AF_INET, &inp->inp_faddr.s_addr, remote, sizeof(local));
2146 }
2147
2148 os_log(OS_LOG_DEFAULT,
2149 "cmsg IPV6_PKTINFO missing for %s:%u > %s:%u proc %s.%u error %d\n",
2150 local, ntohs(inp->inp_lport), remote, ntohs(inp->inp_fport),
2151 pname, so->last_pid, error);
2152#endif /* (DEBUG || DEVELOPMENT) */
2153 }
2154 return error;
2155}
2156
2157/*
2158 * Returns: 0 Success
2159 * ENOTSOCK
2160 * EINVAL
2161 * EBADF
2162 * EACCES Mandatory Access Control failure
2163 * copyout:EFAULT
2164 * fp_lookup:EBADF
2165 * <pru_soreceive>:ENOBUFS
2166 * <pru_soreceive>:ENOTCONN
2167 * <pru_soreceive>:EWOULDBLOCK
2168 * <pru_soreceive>:EFAULT
2169 * <pru_soreceive>:EINTR
2170 * <pru_soreceive>:EBADF
2171 * <pru_soreceive>:EINVAL
2172 * <pru_soreceive>:EMSGSIZE
2173 * <pru_soreceive>:???
2174 *
2175 * Notes: Additional return values from calls through <pru_soreceive>
2176 * depend on protocols other than TCP or AF_UNIX, which are
2177 * documented above.
2178 */
2179static int
2180recvit(proc_ref_t p, int s, user_msghdr_ref_t mp, uio_t uiop,
2181 user_addr_t namelenp, int32_ref_t retval)
2182{
2183 ssize_t len;
2184 int error;
2185 mbuf_ref_t control = 0;
2186 socket_ref_t so;
2187 sockaddr_ref_t fromsa = 0;
2188 fileproc_ref_t fp;
2189
2190 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
2191 if ((error = fp_get_ftype(p, fd: s, ftype: DTYPE_SOCKET, ENOTSOCK, fpp: &fp))) {
2192 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2193 return error;
2194 }
2195 so = (struct socket *)fp_get_data(fp);
2196
2197#if CONFIG_MACF_SOCKET_SUBSET
2198 /*
2199 * We check the state without holding the socket lock;
2200 * if a race condition occurs, it would simply result
2201 * in an extra call to the MAC check function.
2202 */
2203 if (!(so->so_state & SS_DEFUNCT) &&
2204 !(so->so_state & SS_ISCONNECTED) &&
2205 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2206 (error = mac_socket_check_receive(cred: kauth_cred_get(), so)) != 0) {
2207 goto out1;
2208 }
2209#endif /* MAC_SOCKET_SUBSET */
2210 if (uio_resid(a_uio: uiop) < 0 || uio_resid(a_uio: uiop) > INT_MAX) {
2211 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
2212 error = EINVAL;
2213 goto out1;
2214 }
2215
2216 len = uio_resid(a_uio: uiop);
2217 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
2218 NULL, mp->msg_control ? &control : NULL,
2219 &mp->msg_flags);
2220 if (fromsa) {
2221 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
2222 fromsa);
2223 }
2224 if (error) {
2225 if (uio_resid(a_uio: uiop) != len && (error == ERESTART ||
2226 error == EINTR || error == EWOULDBLOCK)) {
2227 error = 0;
2228 }
2229 }
2230 if (error) {
2231 goto out;
2232 }
2233
2234 *retval = (int32_t)(len - uio_resid(a_uio: uiop));
2235
2236 if (mp->msg_name) {
2237 error = copyout_sa(fromsa, name: mp->msg_name, namelen: &mp->msg_namelen);
2238 if (error) {
2239 goto out;
2240 }
2241 /* return the actual, untruncated address length */
2242 if (namelenp &&
2243 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
2244 sizeof(int)))) {
2245 goto out;
2246 }
2247 }
2248
2249 if (mp->msg_control) {
2250 error = copyout_control(p, m: control, control: mp->msg_control,
2251 controllen: &mp->msg_controllen, flags: &mp->msg_flags, so);
2252 }
2253out:
2254 free_sockaddr(fromsa);
2255 if (control) {
2256 m_freem(control);
2257 }
2258 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2259out1:
2260 fp_drop(p, fd: s, fp, locked: 0);
2261 return error;
2262}
2263
2264/*
2265 * Returns: 0 Success
2266 * ENOMEM
2267 * copyin:EFAULT
2268 * recvit:???
2269 * read:??? [4056224: applicable for pipes]
2270 *
2271 * Notes: The read entry point is only called as part of support for
2272 * binary backward compatability; new code should use read
2273 * instead of recv or recvfrom when attempting to read data
2274 * from pipes.
2275 *
2276 * For full documentation of the return codes from recvit, see
2277 * the block header for the recvit function.
2278 */
2279int
2280recvfrom(proc_ref_t p, struct recvfrom_args *uap, int32_ref_t retval)
2281{
2282 __pthread_testcancel(presyscall: 1);
2283 return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2284 retval);
2285}
2286
2287int
2288recvfrom_nocancel(proc_ref_t p, struct recvfrom_nocancel_args *uap,
2289 int32_ref_t retval)
2290{
2291 struct user_msghdr msg;
2292 int error;
2293 uio_t auio = NULL;
2294
2295 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2296 AUDIT_ARG(fd, uap->s);
2297
2298 if (uap->fromlenaddr) {
2299 error = copyin(uap->fromlenaddr,
2300 (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2301 if (error) {
2302 return error;
2303 }
2304 } else {
2305 msg.msg_namelen = 0;
2306 }
2307 msg.msg_name = uap->from;
2308 auio = uio_create(a_iovcount: 1, a_offset: 0,
2309 a_spacetype: (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2310 a_iodirection: UIO_READ);
2311 if (auio == NULL) {
2312 return ENOMEM;
2313 }
2314
2315 uio_addiov(a_uio: auio, a_baseaddr: uap->buf, a_length: uap->len);
2316 /* no need to set up msg_iov. recvit uses uio_t we send it */
2317 msg.msg_iov = 0;
2318 msg.msg_iovlen = 0;
2319 msg.msg_control = 0;
2320 msg.msg_controllen = 0;
2321 msg.msg_flags = uap->flags;
2322 error = recvit(p, s: uap->s, mp: &msg, uiop: auio, namelenp: uap->fromlenaddr, retval);
2323 if (auio != NULL) {
2324 uio_free(a_uio: auio);
2325 }
2326
2327 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2328
2329 return error;
2330}
2331
2332/*
2333 * Returns: 0 Success
2334 * EMSGSIZE
2335 * ENOMEM
2336 * copyin:EFAULT
2337 * copyout:EFAULT
2338 * recvit:???
2339 *
2340 * Notes: For full documentation of the return codes from recvit, see
2341 * the block header for the recvit function.
2342 */
2343int
2344recvmsg(proc_ref_t p, struct recvmsg_args *uap, int32_ref_t retval)
2345{
2346 __pthread_testcancel(presyscall: 1);
2347 return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2348 retval);
2349}
2350
2351int
2352recvmsg_nocancel(proc_ref_t p, struct recvmsg_nocancel_args *uap,
2353 int32_ref_t retval)
2354{
2355 struct user32_msghdr msg32;
2356 struct user64_msghdr msg64;
2357 struct user_msghdr user_msg;
2358 caddr_t msghdrp;
2359 int size_of_msghdr;
2360 user_addr_t uiov;
2361 int error;
2362 uio_t auio = NULL;
2363 struct user_iovec *iovp;
2364
2365 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2366
2367 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2368 AUDIT_ARG(fd, uap->s);
2369 if (is_p_64bit_process) {
2370 msghdrp = (caddr_t)&msg64;
2371 size_of_msghdr = sizeof(msg64);
2372 } else {
2373 msghdrp = (caddr_t)&msg32;
2374 size_of_msghdr = sizeof(msg32);
2375 }
2376 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2377 if (error) {
2378 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2379 return error;
2380 }
2381
2382 /* only need to copy if user process is not 64-bit */
2383 if (is_p_64bit_process) {
2384 user_msg.msg_flags = msg64.msg_flags;
2385 user_msg.msg_controllen = msg64.msg_controllen;
2386 user_msg.msg_control = (user_addr_t)msg64.msg_control;
2387 user_msg.msg_iovlen = msg64.msg_iovlen;
2388 user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
2389 user_msg.msg_namelen = msg64.msg_namelen;
2390 user_msg.msg_name = (user_addr_t)msg64.msg_name;
2391 } else {
2392 user_msg.msg_flags = msg32.msg_flags;
2393 user_msg.msg_controllen = msg32.msg_controllen;
2394 user_msg.msg_control = msg32.msg_control;
2395 user_msg.msg_iovlen = msg32.msg_iovlen;
2396 user_msg.msg_iov = msg32.msg_iov;
2397 user_msg.msg_namelen = msg32.msg_namelen;
2398 user_msg.msg_name = msg32.msg_name;
2399 }
2400
2401 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2402 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2403 0, 0, 0, 0);
2404 return EMSGSIZE;
2405 }
2406
2407 user_msg.msg_flags = uap->flags;
2408
2409 /* allocate a uio large enough to hold the number of iovecs passed */
2410 auio = uio_create(a_iovcount: user_msg.msg_iovlen, a_offset: 0,
2411 a_spacetype: (is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32),
2412 a_iodirection: UIO_READ);
2413 if (auio == NULL) {
2414 error = ENOMEM;
2415 goto done;
2416 }
2417
2418 /*
2419 * get location of iovecs within the uio. then copyin the iovecs from
2420 * user space.
2421 */
2422 iovp = uio_iovsaddr_user(a_uio: auio);
2423 if (iovp == NULL) {
2424 error = ENOMEM;
2425 goto done;
2426 }
2427 uiov = user_msg.msg_iov;
2428 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2429 error = copyin_user_iovec_array(uaddr: uiov,
2430 spacetype: is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2431 count: user_msg.msg_iovlen, dst: iovp);
2432 if (error) {
2433 goto done;
2434 }
2435
2436 /* finish setup of uio_t */
2437 error = uio_calculateresid_user(a_uio: auio);
2438 if (error) {
2439 goto done;
2440 }
2441
2442 error = recvit(p, s: uap->s, mp: &user_msg, uiop: auio, namelenp: 0, retval);
2443 if (!error) {
2444 user_msg.msg_iov = uiov;
2445 if (is_p_64bit_process) {
2446 msg64.msg_flags = user_msg.msg_flags;
2447 msg64.msg_controllen = user_msg.msg_controllen;
2448 msg64.msg_control = user_msg.msg_control;
2449 msg64.msg_iovlen = user_msg.msg_iovlen;
2450 msg64.msg_iov = user_msg.msg_iov;
2451 msg64.msg_namelen = user_msg.msg_namelen;
2452 msg64.msg_name = user_msg.msg_name;
2453 } else {
2454 msg32.msg_flags = user_msg.msg_flags;
2455 msg32.msg_controllen = user_msg.msg_controllen;
2456 msg32.msg_control = (user32_addr_t)user_msg.msg_control;
2457 msg32.msg_iovlen = user_msg.msg_iovlen;
2458 msg32.msg_iov = (user32_addr_t)user_msg.msg_iov;
2459 msg32.msg_namelen = user_msg.msg_namelen;
2460 msg32.msg_name = (user32_addr_t)user_msg.msg_name;
2461 }
2462 error = copyout(msghdrp, uap->msg, size_of_msghdr);
2463 }
2464done:
2465 if (auio != NULL) {
2466 uio_free(a_uio: auio);
2467 }
2468 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2469 return error;
2470}
2471
2472__attribute__((noinline))
2473static int
2474recvmsg_x_array(proc_ref_t p, socket_ref_t so, struct recvmsg_x_args *uap, user_ssize_t *retval)
2475{
2476 int error = EOPNOTSUPP;
2477 user_msghdr_x_ptr_t user_msg_x = NULL;
2478 recv_msg_elem_ptr_t recv_msg_array = NULL;
2479 user_ssize_t len_before = 0, len_after;
2480 size_t size_of_msghdr;
2481 void_ptr_t umsgp = NULL;
2482 u_int i;
2483 u_int uiocnt;
2484 int flags = uap->flags;
2485
2486 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
2487
2488 size_of_msghdr = is_p_64bit_process ?
2489 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2490
2491 /*
2492 * Support only a subset of message flags
2493 */
2494 if (uap->flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA | MSG_NBIO)) {
2495 return EOPNOTSUPP;
2496 }
2497 /*
2498 * Input parameter range check
2499 */
2500 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2501 error = EINVAL;
2502 goto out;
2503 }
2504 if (uap->cnt > somaxrecvmsgx) {
2505 uap->cnt = somaxrecvmsgx > 0 ? somaxrecvmsgx : 1;
2506 }
2507
2508 user_msg_x = kalloc_type(struct user_msghdr_x, uap->cnt,
2509 Z_WAITOK | Z_ZERO);
2510 if (user_msg_x == NULL) {
2511 DBG_PRINTF("%s user_msg_x alloc failed", __func__);
2512 error = ENOMEM;
2513 goto out;
2514 }
2515 recv_msg_array = alloc_recv_msg_array(count: uap->cnt);
2516 if (recv_msg_array == NULL) {
2517 DBG_PRINTF("%s alloc_recv_msg_array() failed", __func__);
2518 error = ENOMEM;
2519 goto out;
2520 }
2521
2522 umsgp = kalloc_data(uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
2523 if (umsgp == NULL) {
2524 DBG_PRINTF("%s umsgp alloc failed", __func__);
2525 error = ENOMEM;
2526 goto out;
2527 }
2528 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2529 if (error) {
2530 DBG_PRINTF("%s copyin() failed", __func__);
2531 goto out;
2532 }
2533 error = internalize_recv_msghdr_array(umsgp,
2534 is_p_64bit_process ? UIO_USERSPACE64 : UIO_USERSPACE32,
2535 UIO_READ, count: uap->cnt, user_msg_x, recv_msg_array);
2536 if (error) {
2537 DBG_PRINTF("%s copyin_user_msghdr_array() failed", __func__);
2538 goto out;
2539 }
2540 /*
2541 * Make sure the size of each message iovec and
2542 * the aggregate size of all the iovec is valid
2543 */
2544 if (recv_msg_array_is_valid(recv_msg_array, count: uap->cnt) == 0) {
2545 error = EINVAL;
2546 goto out;
2547 }
2548 /*
2549 * Sanity check on passed arguments
2550 */
2551 for (i = 0; i < uap->cnt; i++) {
2552 struct user_msghdr_x *mp = user_msg_x + i;
2553
2554 if (mp->msg_flags != 0) {
2555 error = EINVAL;
2556 goto out;
2557 }
2558 }
2559#if CONFIG_MACF_SOCKET_SUBSET
2560 /*
2561 * We check the state without holding the socket lock;
2562 * if a race condition occurs, it would simply result
2563 * in an extra call to the MAC check function.
2564 */
2565 if (!(so->so_state & SS_DEFUNCT) &&
2566 !(so->so_state & SS_ISCONNECTED) &&
2567 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2568 (error = mac_socket_check_receive(cred: kauth_cred_get(), so)) != 0) {
2569 goto out;
2570 }
2571#endif /* MAC_SOCKET_SUBSET */
2572
2573 len_before = recv_msg_array_resid(recv_msg_array, count: uap->cnt);
2574
2575 for (i = 0; i < uap->cnt; i++) {
2576 struct recv_msg_elem *recv_msg_elem;
2577 uio_t auio;
2578 sockaddr_ref_ref_t psa;
2579 struct mbuf **controlp;
2580
2581 recv_msg_elem = recv_msg_array + i;
2582 auio = recv_msg_elem->uio;
2583
2584 /*
2585 * Do not block if we got at least one packet
2586 */
2587 if (i > 0) {
2588 flags |= MSG_DONTWAIT;
2589 }
2590
2591 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2592 &recv_msg_elem->psa : NULL;
2593 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2594 &recv_msg_elem->controlp : NULL;
2595
2596 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2597 auio, NULL, controlp, &flags);
2598 if (error) {
2599 break;
2600 }
2601 /*
2602 * We have some data
2603 */
2604 recv_msg_elem->which |= SOCK_MSG_DATA;
2605 /*
2606 * Set the messages flags for this packet
2607 */
2608 flags &= ~MSG_DONTWAIT;
2609 recv_msg_elem->flags = flags;
2610 /*
2611 * Stop on partial copy
2612 */
2613 if (recv_msg_elem->flags & (MSG_RCVMORE | MSG_TRUNC)) {
2614 break;
2615 }
2616 }
2617
2618 len_after = recv_msg_array_resid(recv_msg_array, count: uap->cnt);
2619
2620 if (error) {
2621 if (len_after != len_before && (error == ERESTART ||
2622 error == EINTR || error == EWOULDBLOCK)) {
2623 error = 0;
2624 } else {
2625 goto out;
2626 }
2627 }
2628
2629 uiocnt = externalize_recv_msghdr_array(p, so, umsgp,
2630 count: uap->cnt, user_msg_x, recv_msg_array, &error);
2631 if (error != 0) {
2632 goto out;
2633 }
2634
2635 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2636 if (error) {
2637 DBG_PRINTF("%s copyout() failed", __func__);
2638 goto out;
2639 }
2640 *retval = (int)(uiocnt);
2641
2642out:
2643 kfree_data(umsgp, uap->cnt * size_of_msghdr);
2644 free_recv_msg_array(recv_msg_array, count: uap->cnt);
2645 kfree_type(struct user_msghdr_x, uap->cnt, user_msg_x);
2646
2647 return error;
2648}
2649
2650int
2651recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2652{
2653 int error = EOPNOTSUPP;
2654 socket_ref_t so;
2655 size_t size_of_msghdrx;
2656 caddr_t msghdrxp;
2657 struct user32_msghdr_x msghdrx32 = {};
2658 struct user64_msghdr_x msghdrx64 = {};
2659 int spacetype;
2660 u_int i;
2661 uio_t auio = NULL;
2662 caddr_t src;
2663 int flags;
2664 struct mbuf *pkt_list = NULL, *m;
2665 struct mbuf *addr_list = NULL, *m_addr;
2666 struct mbuf *ctl_list = NULL, *control;
2667 u_int pktcnt;
2668
2669 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2670
2671 error = file_socket(uap->s, &so);
2672 if (error) {
2673 goto done_no_filedrop;
2674 }
2675 if (so == NULL) {
2676 error = EBADF;
2677 goto done;
2678 }
2679
2680#if CONFIG_MACF_SOCKET_SUBSET
2681 /*
2682 * We check the state without holding the socket lock;
2683 * if a race condition occurs, it would simply result
2684 * in an extra call to the MAC check function.
2685 */
2686 if (!(so->so_state & SS_DEFUNCT) &&
2687 !(so->so_state & SS_ISCONNECTED) &&
2688 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2689 (error = mac_socket_check_receive(cred: kauth_cred_get(), so)) != 0) {
2690 goto done;
2691 }
2692#endif /* MAC_SOCKET_SUBSET */
2693
2694 /*
2695 * With soreceive_m_list, all packets must be uniform, with address and
2696 * control as they are returned in parallel lists and it's only guaranteed
2697 * when pru_send_list is supported
2698 */
2699 if (do_recvmsg_x_donttrunc != 0 || (so->so_options & SO_DONTTRUNC)) {
2700 error = recvmsg_x_array(p, so, uap, retval);
2701 goto done;
2702 }
2703
2704 /*
2705 * Input parameter range check
2706 */
2707 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2708 error = EINVAL;
2709 goto done;
2710 }
2711 if (uap->cnt > somaxrecvmsgx) {
2712 uap->cnt = somaxrecvmsgx > 0 ? somaxrecvmsgx : 1;
2713 }
2714
2715 if (IS_64BIT_PROCESS(p)) {
2716 msghdrxp = (caddr_t)&msghdrx64;
2717 size_of_msghdrx = sizeof(struct user64_msghdr_x);
2718 spacetype = UIO_USERSPACE64;
2719 } else {
2720 msghdrxp = (caddr_t)&msghdrx32;
2721 size_of_msghdrx = sizeof(struct user32_msghdr_x);
2722 spacetype = UIO_USERSPACE32;
2723 }
2724 src = (caddr_t)uap->msgp;
2725
2726 flags = uap->flags;
2727
2728 /*
2729 * Only allow MSG_DONTWAIT
2730 */
2731 if ((flags & ~(MSG_DONTWAIT | MSG_NBIO)) != 0) {
2732 error = EINVAL;
2733 goto done;
2734 }
2735
2736 /*
2737 * Receive list of packet in a single call
2738 */
2739 pktcnt = uap->cnt;
2740 error = soreceive_m_list(so, &pktcnt, madrp: &addr_list, &pkt_list, &ctl_list,
2741 &flags);
2742 if (error != 0) {
2743 if (pktcnt != 0 && (error == ERESTART ||
2744 error == EINTR || error == EWOULDBLOCK)) {
2745 error = 0;
2746 } else {
2747 goto done;
2748 }
2749 }
2750
2751 m_addr = addr_list;
2752 m = pkt_list;
2753 control = ctl_list;
2754
2755 for (i = 0; i < pktcnt; i++) {
2756 struct user_msghdr user_msg;
2757 ssize_t len;
2758 struct user_iovec *iovp;
2759 struct mbuf *n;
2760
2761 if (!m_has_mtype(m, mtype_flags: MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
2762 panic("%s: m %p m_type %d != MT_DATA", __func__, m, m->m_type);
2763 }
2764
2765 error = copyin((user_addr_t)(src + i * size_of_msghdrx),
2766 msghdrxp, size_of_msghdrx);
2767 if (error) {
2768 DBG_PRINTF("%s copyin() msghdrx failed %d\n",
2769 __func__, error);
2770 goto done;
2771 }
2772 if (spacetype == UIO_USERSPACE64) {
2773 user_msg.msg_name = msghdrx64.msg_name;
2774 user_msg.msg_namelen = msghdrx64.msg_namelen;
2775 user_msg.msg_iov = msghdrx64.msg_iov;
2776 user_msg.msg_iovlen = msghdrx64.msg_iovlen;
2777 user_msg.msg_control = msghdrx64.msg_control;
2778 user_msg.msg_controllen = msghdrx64.msg_controllen;
2779 } else {
2780 user_msg.msg_name = msghdrx32.msg_name;
2781 user_msg.msg_namelen = msghdrx32.msg_namelen;
2782 user_msg.msg_iov = msghdrx32.msg_iov;
2783 user_msg.msg_iovlen = msghdrx32.msg_iovlen;
2784 user_msg.msg_control = msghdrx32.msg_control;
2785 user_msg.msg_controllen = msghdrx32.msg_controllen;
2786 }
2787 user_msg.msg_flags = 0;
2788 if (user_msg.msg_iovlen <= 0 ||
2789 user_msg.msg_iovlen > UIO_MAXIOV) {
2790 error = EMSGSIZE;
2791 DBG_PRINTF("%s bad msg_iovlen, error %d\n",
2792 __func__, error);
2793 goto done;
2794 }
2795 /*
2796 * Attempt to reuse the uio if large enough, otherwise we need
2797 * a new one
2798 */
2799 if (auio != NULL) {
2800 if (auio->uio_max_iovs <= user_msg.msg_iovlen) {
2801 uio_reset_fast(a_uio: auio, a_offset: 0, a_spacetype: spacetype, a_iodirection: UIO_READ);
2802 } else {
2803 uio_free(a_uio: auio);
2804 auio = NULL;
2805 }
2806 }
2807 if (auio == NULL) {
2808 auio = uio_create(a_iovcount: user_msg.msg_iovlen, a_offset: 0, a_spacetype: spacetype,
2809 a_iodirection: UIO_READ);
2810 if (auio == NULL) {
2811 error = ENOBUFS;
2812 DBG_PRINTF("%s uio_create() failed %d\n",
2813 __func__, error);
2814 goto done;
2815 }
2816 }
2817 /*
2818 * get location of iovecs within the uio then copy the iovecs
2819 * from user space.
2820 */
2821 iovp = uio_iovsaddr_user(a_uio: auio);
2822 if (iovp == NULL) {
2823 error = ENOMEM;
2824 DBG_PRINTF("%s uio_iovsaddr() failed %d\n",
2825 __func__, error);
2826 goto done;
2827 }
2828 error = copyin_user_iovec_array(uaddr: user_msg.msg_iov,
2829 spacetype, count: user_msg.msg_iovlen, dst: iovp);
2830 if (error != 0) {
2831 DBG_PRINTF("%s copyin_user_iovec_array() failed %d\n",
2832 __func__, error);
2833 goto done;
2834 }
2835 error = uio_calculateresid_user(a_uio: auio);
2836 if (error != 0) {
2837 DBG_PRINTF("%s uio_calculateresid() failed %d\n",
2838 __func__, error);
2839 goto done;
2840 }
2841 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2842
2843 len = uio_resid(a_uio: auio);
2844 for (n = m; n != NULL; n = n->m_next) {
2845 user_ssize_t resid = uio_resid(a_uio: auio);
2846 if (resid < n->m_len) {
2847 error = uio_copyout_user(mtod(n, caddr_t), n: (int)n->m_len, uio: auio);
2848 if (error != 0) {
2849 DBG_PRINTF("%s uiomove() failed\n",
2850 __func__);
2851 goto done;
2852 }
2853 flags |= MSG_TRUNC;
2854 break;
2855 }
2856
2857 error = uio_copyout_user(mtod(n, caddr_t), n: (int)n->m_len, uio: auio);
2858 if (error != 0) {
2859 DBG_PRINTF("%s uiomove() failed\n",
2860 __func__);
2861 goto done;
2862 }
2863 }
2864 len -= uio_resid(a_uio: auio);
2865
2866 if (user_msg.msg_name != 0 && user_msg.msg_namelen != 0) {
2867 error = copyout_maddr(m: m_addr, name: user_msg.msg_name,
2868 namelen: &user_msg.msg_namelen);
2869 if (error) {
2870 DBG_PRINTF("%s copyout_maddr() failed\n",
2871 __func__);
2872 goto done;
2873 }
2874 }
2875 if (user_msg.msg_control != 0 && user_msg.msg_controllen != 0) {
2876 error = copyout_control(p, m: control,
2877 control: user_msg.msg_control, controllen: &user_msg.msg_controllen,
2878 flags: &user_msg.msg_flags, so);
2879 if (error) {
2880 DBG_PRINTF("%s copyout_control() failed\n",
2881 __func__);
2882 goto done;
2883 }
2884 }
2885 /*
2886 * Note: the original msg_iovlen and msg_iov do not change
2887 */
2888 if (spacetype == UIO_USERSPACE64) {
2889 msghdrx64.msg_flags = user_msg.msg_flags;
2890 msghdrx64.msg_controllen = user_msg.msg_controllen;
2891 msghdrx64.msg_control = user_msg.msg_control;
2892 msghdrx64.msg_namelen = user_msg.msg_namelen;
2893 msghdrx64.msg_name = user_msg.msg_name;
2894 msghdrx64.msg_datalen = len;
2895 } else {
2896 msghdrx32.msg_flags = user_msg.msg_flags;
2897 msghdrx32.msg_controllen = user_msg.msg_controllen;
2898 msghdrx32.msg_control = (user32_addr_t) user_msg.msg_control;
2899 msghdrx32.msg_name = user_msg.msg_namelen;
2900 msghdrx32.msg_name = (user32_addr_t) user_msg.msg_name;
2901 msghdrx32.msg_datalen = (user32_size_t) len;
2902 }
2903 error = copyout(msghdrxp,
2904 (user_addr_t)(src + i * size_of_msghdrx),
2905 size_of_msghdrx);
2906 if (error) {
2907 DBG_PRINTF("%s copyout() msghdrx failed\n", __func__);
2908 goto done;
2909 }
2910
2911 m = m->m_nextpkt;
2912 if (control != NULL) {
2913 control = control->m_nextpkt;
2914 }
2915 if (m_addr != NULL) {
2916 m_addr = m_addr->m_nextpkt;
2917 }
2918 }
2919
2920 uap->flags = flags;
2921
2922 *retval = (int)i;
2923done:
2924 file_drop(uap->s);
2925
2926done_no_filedrop:
2927 if (pkt_list != NULL) {
2928 m_freem_list(pkt_list);
2929 }
2930 if (addr_list != NULL) {
2931 m_freem_list(addr_list);
2932 }
2933 if (ctl_list != NULL) {
2934 m_freem_list(ctl_list);
2935 }
2936 if (auio != NULL) {
2937 uio_free(a_uio: auio);
2938 }
2939
2940 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2941
2942 return error;
2943}
2944
2945/*
2946 * Returns: 0 Success
2947 * EBADF
2948 * file_socket:ENOTSOCK
2949 * file_socket:EBADF
2950 * soshutdown:EINVAL
2951 * soshutdown:ENOTCONN
2952 * soshutdown:EADDRNOTAVAIL[TCP]
2953 * soshutdown:ENOBUFS[TCP]
2954 * soshutdown:EMSGSIZE[TCP]
2955 * soshutdown:EHOSTUNREACH[TCP]
2956 * soshutdown:ENETUNREACH[TCP]
2957 * soshutdown:ENETDOWN[TCP]
2958 * soshutdown:ENOMEM[TCP]
2959 * soshutdown:EACCES[TCP]
2960 * soshutdown:EMSGSIZE[TCP]
2961 * soshutdown:ENOBUFS[TCP]
2962 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2963 * soshutdown:??? [other protocol families]
2964 */
2965/* ARGSUSED */
2966int
2967shutdown(__unused proc_ref_t p, struct shutdown_args *uap,
2968 __unused int32_ref_t retval)
2969{
2970 socket_ref_t so;
2971 int error;
2972
2973 AUDIT_ARG(fd, uap->s);
2974 error = file_socket(uap->s, &so);
2975 if (error) {
2976 return error;
2977 }
2978 if (so == NULL) {
2979 error = EBADF;
2980 goto out;
2981 }
2982 error = soshutdown(so: (struct socket *)so, how: uap->how);
2983out:
2984 file_drop(uap->s);
2985 return error;
2986}
2987
2988/*
2989 * Returns: 0 Success
2990 * EFAULT
2991 * EINVAL
2992 * EACCES Mandatory Access Control failure
2993 * file_socket:ENOTSOCK
2994 * file_socket:EBADF
2995 * sosetopt:EINVAL
2996 * sosetopt:ENOPROTOOPT
2997 * sosetopt:ENOBUFS
2998 * sosetopt:EDOM
2999 * sosetopt:EFAULT
3000 * sosetopt:EOPNOTSUPP[AF_UNIX]
3001 * sosetopt:???
3002 */
3003/* ARGSUSED */
3004int
3005setsockopt(proc_ref_t p, setsockopt_args_ref_t uap,
3006 __unused int32_ref_t retval)
3007{
3008 socket_ref_t so;
3009 struct sockopt sopt;
3010 int error;
3011
3012 AUDIT_ARG(fd, uap->s);
3013 if (uap->val == 0 && uap->valsize != 0) {
3014 return EFAULT;
3015 }
3016 /* No bounds checking on size (it's unsigned) */
3017
3018 error = file_socket(uap->s, &so);
3019 if (error) {
3020 return error;
3021 }
3022
3023 sopt.sopt_dir = SOPT_SET;
3024 sopt.sopt_level = uap->level;
3025 sopt.sopt_name = uap->name;
3026 sopt.sopt_val = uap->val;
3027 sopt.sopt_valsize = uap->valsize;
3028 sopt.sopt_p = p;
3029
3030 if (so == NULL) {
3031 error = EINVAL;
3032 goto out;
3033 }
3034#if CONFIG_MACF_SOCKET_SUBSET
3035 if ((error = mac_socket_check_setsockopt(cred: kauth_cred_get(), so,
3036 sopt: &sopt)) != 0) {
3037 goto out;
3038 }
3039#endif /* MAC_SOCKET_SUBSET */
3040 error = sosetoptlock(so, sopt: &sopt, 1); /* will lock socket */
3041out:
3042 file_drop(uap->s);
3043 return error;
3044}
3045
3046/*
3047 * Returns: 0 Success
3048 * EINVAL
3049 * EBADF
3050 * EACCES Mandatory Access Control failure
3051 * copyin:EFAULT
3052 * copyout:EFAULT
3053 * file_socket:ENOTSOCK
3054 * file_socket:EBADF
3055 * sogetopt:???
3056 */
3057int
3058getsockopt(proc_ref_t p, struct getsockopt_args *uap,
3059 __unused int32_ref_t retval)
3060{
3061 int error;
3062 socklen_t valsize;
3063 struct sockopt sopt;
3064 socket_ref_t so;
3065
3066 error = file_socket(uap->s, &so);
3067 if (error) {
3068 return error;
3069 }
3070 if (uap->val) {
3071 error = copyin(uap->avalsize, (caddr_t)&valsize,
3072 sizeof(valsize));
3073 if (error) {
3074 goto out;
3075 }
3076 /* No bounds checking on size (it's unsigned) */
3077 } else {
3078 valsize = 0;
3079 }
3080 sopt.sopt_dir = SOPT_GET;
3081 sopt.sopt_level = uap->level;
3082 sopt.sopt_name = uap->name;
3083 sopt.sopt_val = uap->val;
3084 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
3085 sopt.sopt_p = p;
3086
3087 if (so == NULL) {
3088 error = EBADF;
3089 goto out;
3090 }
3091#if CONFIG_MACF_SOCKET_SUBSET
3092 if ((error = mac_socket_check_getsockopt(cred: kauth_cred_get(), so,
3093 sopt: &sopt)) != 0) {
3094 goto out;
3095 }
3096#endif /* MAC_SOCKET_SUBSET */
3097 error = sogetoptlock(so: (struct socket *)so, sopt: &sopt, 1); /* will lock */
3098 if (error == 0) {
3099 valsize = (socklen_t)sopt.sopt_valsize;
3100 error = copyout((caddr_t)&valsize, uap->avalsize,
3101 sizeof(valsize));
3102 }
3103out:
3104 file_drop(uap->s);
3105 return error;
3106}
3107
3108
3109/*
3110 * Get socket name.
3111 *
3112 * Returns: 0 Success
3113 * EBADF
3114 * file_socket:ENOTSOCK
3115 * file_socket:EBADF
3116 * copyin:EFAULT
3117 * copyout:EFAULT
3118 * <pru_sockaddr>:ENOBUFS[TCP]
3119 * <pru_sockaddr>:ECONNRESET[TCP]
3120 * <pru_sockaddr>:EINVAL[AF_UNIX]
3121 * <sf_getsockname>:???
3122 */
3123/* ARGSUSED */
3124int
3125getsockname(__unused proc_ref_t p, struct getsockname_args *uap,
3126 __unused int32_ref_t retval)
3127{
3128 socket_ref_t so;
3129 sockaddr_ref_t sa;
3130 socklen_t len;
3131 socklen_t sa_len;
3132 int error;
3133
3134 error = file_socket(uap->fdes, &so);
3135 if (error) {
3136 return error;
3137 }
3138 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
3139 if (error) {
3140 goto out;
3141 }
3142 if (so == NULL) {
3143 error = EBADF;
3144 goto out;
3145 }
3146 sa = 0;
3147 socket_lock(so, refcount: 1);
3148 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
3149 if (error == 0) {
3150 error = sflt_getsockname(so, local: &sa);
3151 if (error == EJUSTRETURN) {
3152 error = 0;
3153 }
3154 }
3155 socket_unlock(so, refcount: 1);
3156 if (error) {
3157 goto bad;
3158 }
3159 if (sa == 0) {
3160 len = 0;
3161 goto gotnothing;
3162 }
3163
3164 sa_len = sa->sa_len;
3165 len = MIN(len, sa_len);
3166 error = copyout((caddr_t)sa, uap->asa, len);
3167 if (error) {
3168 goto bad;
3169 }
3170 /* return the actual, untruncated address length */
3171 len = sa_len;
3172gotnothing:
3173 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
3174bad:
3175 free_sockaddr(sa);
3176out:
3177 file_drop(uap->fdes);
3178 return error;
3179}
3180
3181/*
3182 * Get name of peer for connected socket.
3183 *
3184 * Returns: 0 Success
3185 * EBADF
3186 * EINVAL
3187 * ENOTCONN
3188 * file_socket:ENOTSOCK
3189 * file_socket:EBADF
3190 * copyin:EFAULT
3191 * copyout:EFAULT
3192 * <pru_peeraddr>:???
3193 * <sf_getpeername>:???
3194 */
3195/* ARGSUSED */
3196int
3197getpeername(__unused proc_ref_t p, struct getpeername_args *uap,
3198 __unused int32_ref_t retval)
3199{
3200 socket_ref_t so;
3201 sockaddr_ref_t sa;
3202 socklen_t len;
3203 socklen_t sa_len;
3204 int error;
3205
3206 error = file_socket(uap->fdes, &so);
3207 if (error) {
3208 return error;
3209 }
3210 if (so == NULL) {
3211 error = EBADF;
3212 goto out;
3213 }
3214
3215 socket_lock(so, refcount: 1);
3216
3217 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
3218 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
3219 /* the socket has been shutdown, no more getpeername's */
3220 socket_unlock(so, refcount: 1);
3221 error = EINVAL;
3222 goto out;
3223 }
3224
3225 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
3226 socket_unlock(so, refcount: 1);
3227 error = ENOTCONN;
3228 goto out;
3229 }
3230 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
3231 if (error) {
3232 socket_unlock(so, refcount: 1);
3233 goto out;
3234 }
3235 sa = 0;
3236 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
3237 if (error == 0) {
3238 error = sflt_getpeername(so, remote: &sa);
3239 if (error == EJUSTRETURN) {
3240 error = 0;
3241 }
3242 }
3243 socket_unlock(so, refcount: 1);
3244 if (error) {
3245 goto bad;
3246 }
3247 if (sa == 0) {
3248 len = 0;
3249 goto gotnothing;
3250 }
3251 sa_len = sa->sa_len;
3252 len = MIN(len, sa_len);
3253 error = copyout(sa, uap->asa, len);
3254 if (error) {
3255 goto bad;
3256 }
3257 /* return the actual, untruncated address length */
3258 len = sa_len;
3259gotnothing:
3260 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
3261bad:
3262 free_sockaddr(sa);
3263out:
3264 file_drop(uap->fdes);
3265 return error;
3266}
3267
3268int
3269sockargs(struct mbuf **mp, user_addr_t data, socklen_t buflen, int type)
3270{
3271 sockaddr_ref_t sa;
3272 struct mbuf *m;
3273 int error;
3274 socklen_t alloc_buflen = buflen;
3275
3276 if (buflen > INT_MAX / 2) {
3277 return EINVAL;
3278 }
3279 if (type == MT_SONAME && (buflen > SOCK_MAXADDRLEN ||
3280 buflen < offsetof(struct sockaddr, sa_data[0]))) {
3281 return EINVAL;
3282 }
3283 if (type == MT_CONTROL && buflen < sizeof(struct cmsghdr)) {
3284 return EINVAL;
3285 }
3286
3287#ifdef __LP64__
3288 /*
3289 * The fd's in the buffer must expand to be pointers, thus we need twice
3290 * as much space
3291 */
3292 if (type == MT_CONTROL) {
3293 alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
3294 sizeof(struct cmsghdr);
3295 }
3296#endif
3297 if (alloc_buflen > MLEN) {
3298 if (type == MT_SONAME && alloc_buflen <= 112) {
3299 alloc_buflen = MLEN; /* unix domain compat. hack */
3300 } else if (alloc_buflen > MCLBYTES) {
3301 return EINVAL;
3302 }
3303 }
3304 m = m_get(M_WAIT, type);
3305 if (m == NULL) {
3306 return ENOBUFS;
3307 }
3308 if (alloc_buflen > MLEN) {
3309 MCLGET(m, M_WAIT);
3310 if ((m->m_flags & M_EXT) == 0) {
3311 m_free(m);
3312 return ENOBUFS;
3313 }
3314 }
3315 /*
3316 * K64: We still copyin the original buflen because it gets expanded
3317 * later and we lie about the size of the mbuf because it only affects
3318 * unp_* functions
3319 */
3320 m->m_len = buflen;
3321 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
3322 if (error) {
3323 (void) m_free(m);
3324 } else {
3325 *mp = m;
3326 if (type == MT_SONAME) {
3327 VERIFY(buflen <= SOCK_MAXADDRLEN);
3328 sa = mtod(m, sockaddr_ref_t);
3329 sa->sa_len = (__uint8_t)buflen;
3330 }
3331 }
3332 return error;
3333}
3334
3335/*
3336 * Given a user_addr_t of length len, allocate and fill out a *sa.
3337 *
3338 * Returns: 0 Success
3339 * ENAMETOOLONG Filename too long
3340 * EINVAL Invalid argument
3341 * ENOMEM Not enough space
3342 * copyin:EFAULT Bad address
3343 */
3344static int
3345getsockaddr(struct socket *so, sockaddr_ref_ref_t namp, user_addr_t uaddr,
3346 size_t len, boolean_t translate_unspec)
3347{
3348 sockaddr_ref_t sa;
3349 int error;
3350
3351 if (len > SOCK_MAXADDRLEN) {
3352 return ENAMETOOLONG;
3353 }
3354
3355 if (len < offsetof(struct sockaddr, sa_data[0])) {
3356 return EINVAL;
3357 }
3358
3359 sa = SA(alloc_sockaddr(len, Z_WAITOK | Z_NOFAIL));
3360
3361 error = copyin(uaddr, (caddr_t)sa, len);
3362 if (error) {
3363 free_sockaddr(sa);
3364 } else {
3365 /*
3366 * Force sa_family to AF_INET on AF_INET sockets to handle
3367 * legacy applications that use AF_UNSPEC (0). On all other
3368 * sockets we leave it unchanged and let the lower layer
3369 * handle it.
3370 */
3371 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
3372 SOCK_CHECK_DOM(so, PF_INET) &&
3373 len == sizeof(struct sockaddr_in)) {
3374 sa->sa_family = AF_INET;
3375 }
3376 VERIFY(len <= SOCK_MAXADDRLEN);
3377 sa = *&sa;
3378 sa->sa_len = (__uint8_t)len;
3379 *namp = sa;
3380 }
3381 return error;
3382}
3383
3384static int
3385getsockaddr_s(struct socket *so, sockaddr_storage_ref_t ss,
3386 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
3387{
3388 int error;
3389
3390 if (ss == NULL || uaddr == USER_ADDR_NULL ||
3391 len < offsetof(struct sockaddr, sa_data[0])) {
3392 return EINVAL;
3393 }
3394
3395 /*
3396 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
3397 * so the check here is inclusive.
3398 */
3399 if (len > sizeof(*ss)) {
3400 return ENAMETOOLONG;
3401 }
3402
3403 bzero(s: ss, n: sizeof(*ss));
3404 error = copyin(uaddr, (caddr_t)ss, len);
3405 if (error == 0) {
3406 /*
3407 * Force sa_family to AF_INET on AF_INET sockets to handle
3408 * legacy applications that use AF_UNSPEC (0). On all other
3409 * sockets we leave it unchanged and let the lower layer
3410 * handle it.
3411 */
3412 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
3413 SOCK_CHECK_DOM(so, PF_INET) &&
3414 len == sizeof(struct sockaddr_in)) {
3415 ss->ss_family = AF_INET;
3416 }
3417
3418 ss->ss_len = (__uint8_t)len;
3419 }
3420 return error;
3421}
3422
3423int
3424internalize_recv_msghdr_array(const void_ptr_t src, int spacetype, int direction,
3425 u_int count, user_msghdr_x_ptr_t dst,
3426 recv_msg_elem_ptr_t recv_msg_array)
3427{
3428 int error = 0;
3429 u_int i;
3430
3431 for (i = 0; i < count; i++) {
3432 struct user_iovec *iovp;
3433 struct user_msghdr_x *user_msg = dst + i;
3434 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3435
3436 if (spacetype == UIO_USERSPACE64) {
3437 const struct user64_msghdr_x *msghdr64;
3438
3439 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3440
3441 user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
3442 user_msg->msg_namelen = msghdr64->msg_namelen;
3443 user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
3444 user_msg->msg_iovlen = msghdr64->msg_iovlen;
3445 user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
3446 user_msg->msg_controllen = msghdr64->msg_controllen;
3447 user_msg->msg_flags = msghdr64->msg_flags;
3448 user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
3449 } else {
3450 const struct user32_msghdr_x *msghdr32;
3451
3452 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3453
3454 user_msg->msg_name = msghdr32->msg_name;
3455 user_msg->msg_namelen = msghdr32->msg_namelen;
3456 user_msg->msg_iov = msghdr32->msg_iov;
3457 user_msg->msg_iovlen = msghdr32->msg_iovlen;
3458 user_msg->msg_control = msghdr32->msg_control;
3459 user_msg->msg_controllen = msghdr32->msg_controllen;
3460 user_msg->msg_flags = msghdr32->msg_flags;
3461 user_msg->msg_datalen = msghdr32->msg_datalen;
3462 }
3463
3464 if (user_msg->msg_iovlen <= 0 ||
3465 user_msg->msg_iovlen > UIO_MAXIOV) {
3466 error = EMSGSIZE;
3467 goto done;
3468 }
3469 recv_msg_elem->uio = uio_create(a_iovcount: user_msg->msg_iovlen, a_offset: 0,
3470 a_spacetype: spacetype, a_iodirection: direction);
3471 if (recv_msg_elem->uio == NULL) {
3472 error = ENOMEM;
3473 goto done;
3474 }
3475
3476 iovp = uio_iovsaddr_user(a_uio: recv_msg_elem->uio);
3477 if (iovp == NULL) {
3478 error = ENOMEM;
3479 goto done;
3480 }
3481 error = copyin_user_iovec_array(uaddr: user_msg->msg_iov,
3482 spacetype, count: user_msg->msg_iovlen, dst: iovp);
3483 if (error) {
3484 goto done;
3485 }
3486 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3487
3488 error = uio_calculateresid_user(a_uio: recv_msg_elem->uio);
3489 if (error) {
3490 goto done;
3491 }
3492 user_msg->msg_datalen = uio_resid(a_uio: recv_msg_elem->uio);
3493
3494 if (user_msg->msg_name && user_msg->msg_namelen) {
3495 recv_msg_elem->which |= SOCK_MSG_SA;
3496 }
3497 if (user_msg->msg_control && user_msg->msg_controllen) {
3498 recv_msg_elem->which |= SOCK_MSG_CONTROL;
3499 }
3500 }
3501done:
3502
3503 return error;
3504}
3505
3506u_int
3507externalize_recv_msghdr_array(proc_ref_t p, socket_ref_t so, void_ptr_t dst,
3508 u_int count, user_msghdr_x_ptr_t src,
3509 recv_msg_elem_ptr_t recv_msg_array, int_ref_t ret_error)
3510{
3511 u_int i;
3512 u_int retcnt = 0;
3513 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
3514
3515 *ret_error = 0;
3516
3517 for (i = 0; i < count; i++) {
3518 struct user_msghdr_x *user_msg = src + i;
3519 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3520 user_ssize_t len = 0;
3521 int error;
3522
3523 len = user_msg->msg_datalen - uio_resid(a_uio: recv_msg_elem->uio);
3524
3525 if ((recv_msg_elem->which & SOCK_MSG_DATA)) {
3526 retcnt++;
3527
3528 if (recv_msg_elem->which & SOCK_MSG_SA) {
3529 error = copyout_sa(fromsa: recv_msg_elem->psa, name: user_msg->msg_name,
3530 namelen: &user_msg->msg_namelen);
3531 if (error != 0) {
3532 *ret_error = error;
3533 return 0;
3534 }
3535 }
3536 if (recv_msg_elem->which & SOCK_MSG_CONTROL) {
3537 error = copyout_control(p, m: recv_msg_elem->controlp,
3538 control: user_msg->msg_control, controllen: &user_msg->msg_controllen,
3539 flags: &recv_msg_elem->flags, so);
3540 if (error != 0) {
3541 *ret_error = error;
3542 return 0;
3543 }
3544 }
3545 }
3546
3547 if (spacetype == UIO_USERSPACE64) {
3548 struct user64_msghdr_x *msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3549
3550 msghdr64->msg_namelen = user_msg->msg_namelen;
3551 msghdr64->msg_controllen = user_msg->msg_controllen;
3552 msghdr64->msg_flags = recv_msg_elem->flags;
3553 msghdr64->msg_datalen = len;
3554 } else {
3555 struct user32_msghdr_x *msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3556
3557 msghdr32->msg_namelen = user_msg->msg_namelen;
3558 msghdr32->msg_controllen = user_msg->msg_controllen;
3559 msghdr32->msg_flags = recv_msg_elem->flags;
3560 msghdr32->msg_datalen = (user32_size_t)len;
3561 }
3562 }
3563 return retcnt;
3564}
3565
3566recv_msg_elem_ptr_t
3567alloc_recv_msg_array(u_int count)
3568{
3569 return kalloc_type(struct recv_msg_elem, count, Z_WAITOK | Z_ZERO);
3570}
3571
3572void
3573free_recv_msg_array(recv_msg_elem_ptr_t recv_msg_array, u_int count)
3574{
3575 if (recv_msg_array == NULL) {
3576 return;
3577 }
3578 for (uint32_t i = 0; i < count; i++) {
3579 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3580
3581 if (recv_msg_elem->uio != NULL) {
3582 uio_free(a_uio: recv_msg_elem->uio);
3583 }
3584 free_sockaddr(recv_msg_elem->psa);
3585 if (recv_msg_elem->controlp != NULL) {
3586 m_freem(recv_msg_elem->controlp);
3587 }
3588 }
3589 kfree_type(struct recv_msg_elem, count, recv_msg_array);
3590}
3591
3592
3593/* Extern linkage requires using __counted_by instead of bptr */
3594__private_extern__ user_ssize_t
3595recv_msg_array_resid(struct recv_msg_elem * __counted_by(count)recv_msg_array, u_int count)
3596{
3597 user_ssize_t len = 0;
3598 u_int i;
3599
3600 for (i = 0; i < count; i++) {
3601 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3602
3603 if (recv_msg_elem->uio != NULL) {
3604 len += uio_resid(a_uio: recv_msg_elem->uio);
3605 }
3606 }
3607 return len;
3608}
3609
3610int
3611recv_msg_array_is_valid(recv_msg_elem_ptr_t recv_msg_array, u_int count)
3612{
3613 user_ssize_t len = 0;
3614 u_int i;
3615
3616 for (i = 0; i < count; i++) {
3617 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3618
3619 if (recv_msg_elem->uio != NULL) {
3620 user_ssize_t resid = uio_resid(a_uio: recv_msg_elem->uio);
3621
3622 /*
3623 * Sanity check on the validity of the iovec:
3624 * no point of going over sb_max
3625 */
3626 if (resid < 0 || (u_int32_t)resid > sb_max) {
3627 return 0;
3628 }
3629
3630 len += resid;
3631 if (len < 0 || (u_int32_t)len > sb_max) {
3632 return 0;
3633 }
3634 }
3635 }
3636 return 1;
3637}
3638
3639#if SENDFILE
3640
3641#define SFUIOBUFS 64
3642
3643/* Macros to compute the number of mbufs needed depending on cluster size */
3644#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3645#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3646
3647/* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3648#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
3649
3650/* Upper send limit in the number of mbuf clusters */
3651#define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3652#define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3653
3654static void
3655alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3656 mbuf_ref_ref_t m, boolean_t jumbocl)
3657{
3658 unsigned int needed;
3659
3660 if (pktlen == 0) {
3661 panic("%s: pktlen (%ld) must be non-zero", __func__, pktlen);
3662 }
3663
3664 /*
3665 * Try to allocate for the whole thing. Since we want full control
3666 * over the buffer size and be able to accept partial result, we can't
3667 * use mbuf_allocpacket(). The logic below is similar to sosend().
3668 */
3669 *m = NULL;
3670 if (pktlen > MBIGCLBYTES && jumbocl) {
3671 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3672 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3673 }
3674 if (*m == NULL) {
3675 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
3676 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
3677 }
3678
3679 /*
3680 * Our previous attempt(s) at allocation had failed; the system
3681 * may be short on mbufs, and we want to block until they are
3682 * available. This time, ask just for 1 mbuf and don't return
3683 * until we get it.
3684 */
3685 if (*m == NULL) {
3686 needed = 1;
3687 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
3688 }
3689 if (*m == NULL) {
3690 panic("%s: blocking allocation returned NULL", __func__);
3691 }
3692
3693 *maxchunks = needed;
3694}
3695
3696/*
3697 * sendfile(2).
3698 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3699 * struct sf_hdtr *hdtr, int flags)
3700 *
3701 * Send a file specified by 'fd' and starting at 'offset' to a socket
3702 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3703 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3704 * output. If specified, write the total number of bytes sent into *nbytes.
3705 */
3706int
3707sendfile(proc_ref_t p, struct sendfile_args *uap, __unused int *retval)
3708{
3709 fileproc_ref_t fp;
3710 vnode_ref_t vp;
3711 socket_ref_t so;
3712 struct writev_nocancel_args nuap;
3713 user_ssize_t writev_retval;
3714 struct user_sf_hdtr user_hdtr;
3715 struct user32_sf_hdtr user32_hdtr;
3716 struct user64_sf_hdtr user64_hdtr;
3717 off_t off, xfsize;
3718 off_t nbytes = 0, sbytes = 0;
3719 int error = 0;
3720 size_t sizeof_hdtr;
3721 off_t file_size;
3722 struct vfs_context context = *vfs_context_current();
3723
3724 const bool is_p_64bit_process = IS_64BIT_PROCESS(p);
3725
3726 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3727 0, 0, 0, 0);
3728
3729 AUDIT_ARG(fd, uap->fd);
3730 AUDIT_ARG(value32, uap->s);
3731
3732 /*
3733 * Do argument checking. Must be a regular file in, stream
3734 * type and connected socket out, positive offset.
3735 */
3736 if ((error = fp_getfvp(p, fd: uap->fd, resultfp: &fp, resultvp: &vp))) {
3737 goto done;
3738 }
3739 if ((fp->f_flag & FREAD) == 0) {
3740 error = EBADF;
3741 goto done1;
3742 }
3743 if (vnode_isreg(vp) == 0) {
3744 error = ENOTSUP;
3745 goto done1;
3746 }
3747 error = file_socket(uap->s, &so);
3748 if (error) {
3749 goto done1;
3750 }
3751 if (so == NULL) {
3752 error = EBADF;
3753 goto done2;
3754 }
3755 if (so->so_type != SOCK_STREAM) {
3756 error = EINVAL;
3757 goto done2;
3758 }
3759 if ((so->so_state & SS_ISCONNECTED) == 0) {
3760 error = ENOTCONN;
3761 goto done2;
3762 }
3763 if (uap->offset < 0) {
3764 error = EINVAL;
3765 goto done2;
3766 }
3767 if (uap->nbytes == USER_ADDR_NULL) {
3768 error = EINVAL;
3769 goto done2;
3770 }
3771 if (uap->flags != 0) {
3772 error = EINVAL;
3773 goto done2;
3774 }
3775
3776 context.vc_ucred = fp->fp_glob->fg_cred;
3777
3778#if CONFIG_MACF_SOCKET_SUBSET
3779 /* JMM - fetch connected sockaddr? */
3780 error = mac_socket_check_send(cred: context.vc_ucred, so, NULL);
3781 if (error) {
3782 goto done2;
3783 }
3784#endif
3785
3786 /*
3787 * Get number of bytes to send
3788 * Should it applies to size of header and trailer?
3789 */
3790 error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
3791 if (error) {
3792 goto done2;
3793 }
3794
3795 /*
3796 * If specified, get the pointer to the sf_hdtr struct for
3797 * any headers/trailers.
3798 */
3799 if (uap->hdtr != USER_ADDR_NULL) {
3800 caddr_t hdtrp;
3801
3802 bzero(s: &user_hdtr, n: sizeof(user_hdtr));
3803 if (is_p_64bit_process) {
3804 hdtrp = (caddr_t)&user64_hdtr;
3805 sizeof_hdtr = sizeof(user64_hdtr);
3806 } else {
3807 hdtrp = (caddr_t)&user32_hdtr;
3808 sizeof_hdtr = sizeof(user32_hdtr);
3809 }
3810 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
3811 if (error) {
3812 goto done2;
3813 }
3814 if (is_p_64bit_process) {
3815 user_hdtr.headers = user64_hdtr.headers;
3816 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3817 user_hdtr.trailers = user64_hdtr.trailers;
3818 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3819 } else {
3820 user_hdtr.headers = user32_hdtr.headers;
3821 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3822 user_hdtr.trailers = user32_hdtr.trailers;
3823 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
3824 }
3825
3826 /*
3827 * Send any headers. Wimp out and use writev(2).
3828 */
3829 if (user_hdtr.headers != USER_ADDR_NULL) {
3830 bzero(s: &nuap, n: sizeof(struct writev_args));
3831 nuap.fd = uap->s;
3832 nuap.iovp = user_hdtr.headers;
3833 nuap.iovcnt = user_hdtr.hdr_cnt;
3834 error = writev_nocancel(p, &nuap, &writev_retval);
3835 if (error) {
3836 goto done2;
3837 }
3838 sbytes += writev_retval;
3839 }
3840 }
3841
3842 /*
3843 * Get the file size for 2 reasons:
3844 * 1. We don't want to allocate more mbufs than necessary
3845 * 2. We don't want to read past the end of file
3846 */
3847 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
3848 goto done2;
3849 }
3850
3851 /*
3852 * Simply read file data into a chain of mbufs that used with scatter
3853 * gather reads. We're not (yet?) setup to use zero copy external
3854 * mbufs that point to the file pages.
3855 */
3856 socket_lock(so, refcount: 1);
3857 error = sblock(sb: &so->so_snd, SBL_WAIT);
3858 if (error) {
3859 socket_unlock(so, refcount: 1);
3860 goto done2;
3861 }
3862 for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
3863 mbuf_ref_t m0 = NULL;
3864 mbuf_t m;
3865 unsigned int nbufs = SFUIOBUFS, i;
3866 uio_t auio;
3867 UIO_STACKBUF(uio_buf, SFUIOBUFS); /* 1KB !!! */
3868 size_t uiolen;
3869 user_ssize_t rlen;
3870 off_t pgoff;
3871 size_t pktlen;
3872 boolean_t jumbocl;
3873
3874 /*
3875 * Calculate the amount to transfer.
3876 * Align to round number of pages.
3877 * Not to exceed send socket buffer,
3878 * the EOF, or the passed in nbytes.
3879 */
3880 xfsize = sbspace(sb: &so->so_snd);
3881
3882 if (xfsize <= 0) {
3883 if (so->so_state & SS_CANTSENDMORE) {
3884 error = EPIPE;
3885 goto done3;
3886 } else if ((so->so_state & SS_NBIO)) {
3887 error = EAGAIN;
3888 goto done3;
3889 } else {
3890 xfsize = PAGE_SIZE;
3891 }
3892 }
3893
3894 if (xfsize > SENDFILE_MAX_BYTES) {
3895 xfsize = SENDFILE_MAX_BYTES;
3896 } else if (xfsize > PAGE_SIZE) {
3897 xfsize = trunc_page(xfsize);
3898 }
3899 pgoff = off & PAGE_MASK_64;
3900 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
3901 xfsize = PAGE_SIZE_64 - pgoff;
3902 }
3903 if (nbytes && xfsize > (nbytes - sbytes)) {
3904 xfsize = nbytes - sbytes;
3905 }
3906 if (xfsize <= 0) {
3907 break;
3908 }
3909 if (off + xfsize > file_size) {
3910 xfsize = file_size - off;
3911 }
3912 if (xfsize <= 0) {
3913 break;
3914 }
3915
3916 /*
3917 * Attempt to use larger than system page-size clusters for
3918 * large writes only if there is a jumbo cluster pool and
3919 * if the socket is marked accordingly.
3920 */
3921 jumbocl = sosendjcl && njcl > 0 &&
3922 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3923
3924 socket_unlock(so, refcount: 0);
3925 alloc_sendpkt(M_WAIT, pktlen: xfsize, maxchunks: &nbufs, m: &m0, jumbocl);
3926 pktlen = mbuf_pkthdr_maxlen(mbuf: m0);
3927 if (pktlen < (size_t)xfsize) {
3928 xfsize = pktlen;
3929 }
3930
3931 auio = uio_createwithbuffer(a_iovcount: nbufs, a_offset: off, a_spacetype: UIO_SYSSPACE,
3932 a_iodirection: UIO_READ, a_buf_p: &uio_buf[0], a_buffer_size: sizeof(uio_buf));
3933 if (auio == NULL) {
3934 DBG_PRINTF("sendfile failed. nbufs = %d. %s", nbufs,
3935 "File a radar related to rdar://10146739.\n");
3936 mbuf_freem(mbuf: m0);
3937 error = ENXIO;
3938 socket_lock(so, refcount: 0);
3939 goto done3;
3940 }
3941
3942 for (i = 0, m = m0, uiolen = 0;
3943 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
3944 i++, m = mbuf_next(mbuf: m)) {
3945 size_t mlen = mbuf_maxlen(mbuf: m);
3946
3947 if (mlen + uiolen > (size_t)xfsize) {
3948 mlen = xfsize - uiolen;
3949 }
3950 mbuf_setlen(mbuf: m, len: mlen);
3951 uio_addiov(a_uio: auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3952 a_length: mlen);
3953 uiolen += mlen;
3954 }
3955
3956 if (xfsize != uio_resid(a_uio: auio)) {
3957 DBG_PRINTF("sendfile: xfsize: %lld != uio_resid(auio): "
3958 "%lld\n", xfsize, (long long)uio_resid(auio));
3959 }
3960
3961 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3962 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3963 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3964 error = fo_read(fp, uio: auio, FOF_OFFSET, ctx: &context);
3965 socket_lock(so, refcount: 0);
3966 if (error != 0) {
3967 if (uio_resid(a_uio: auio) != xfsize && (error == ERESTART ||
3968 error == EINTR || error == EWOULDBLOCK)) {
3969 error = 0;
3970 } else {
3971 mbuf_freem(mbuf: m0);
3972 goto done3;
3973 }
3974 }
3975 xfsize -= uio_resid(a_uio: auio);
3976 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3977 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3978 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3979
3980 if (xfsize == 0) {
3981 break;
3982 }
3983 if (xfsize + off > file_size) {
3984 DBG_PRINTF("sendfile: xfsize: %lld + off: %lld > file_size:"
3985 "%lld\n", xfsize, off, file_size);
3986 }
3987 for (i = 0, m = m0, rlen = 0;
3988 i < nbufs && m != NULL && rlen < xfsize;
3989 i++, m = mbuf_next(mbuf: m)) {
3990 size_t mlen = mbuf_maxlen(mbuf: m);
3991
3992 if (rlen + mlen > (size_t)xfsize) {
3993 mlen = xfsize - rlen;
3994 }
3995 mbuf_setlen(mbuf: m, len: mlen);
3996
3997 rlen += mlen;
3998 }
3999 mbuf_pkthdr_setlen(mbuf: m0, len: xfsize);
4000
4001retry_space:
4002 /*
4003 * Make sure that the socket is still able to take more data.
4004 * CANTSENDMORE being true usually means that the connection
4005 * was closed. so_error is true when an error was sensed after
4006 * a previous send.
4007 * The state is checked after the page mapping and buffer
4008 * allocation above since those operations may block and make
4009 * any socket checks stale. From this point forward, nothing
4010 * blocks before the pru_send (or more accurately, any blocking
4011 * results in a loop back to here to re-check).
4012 */
4013 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
4014 if (so->so_state & SS_CANTSENDMORE) {
4015 error = EPIPE;
4016 } else {
4017 error = so->so_error;
4018 so->so_error = 0;
4019 }
4020 m_freem(m0);
4021 goto done3;
4022 }
4023 /*
4024 * Wait for socket space to become available. We do this just
4025 * after checking the connection state above in order to avoid
4026 * a race condition with sbwait().
4027 */
4028 if (sbspace(sb: &so->so_snd) < (long)so->so_snd.sb_lowat) {
4029 if (so->so_state & SS_NBIO) {
4030 m_freem(m0);
4031 error = EAGAIN;
4032 goto done3;
4033 }
4034 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
4035 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
4036 error = sbwait(sb: &so->so_snd);
4037 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
4038 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
4039 /*
4040 * An error from sbwait usually indicates that we've
4041 * been interrupted by a signal. If we've sent anything
4042 * then return bytes sent, otherwise return the error.
4043 */
4044 if (error) {
4045 m_freem(m0);
4046 goto done3;
4047 }
4048 goto retry_space;
4049 }
4050
4051 mbuf_ref_t control = NULL;
4052 {
4053 /*
4054 * Socket filter processing
4055 */
4056
4057 error = sflt_data_out(so, NULL, data: &m0, control: &control, flags: 0);
4058 if (error) {
4059 if (error == EJUSTRETURN) {
4060 error = 0;
4061 continue;
4062 }
4063 goto done3;
4064 }
4065 /*
4066 * End Socket filter processing
4067 */
4068 }
4069 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
4070 uap->s, 0, 0, 0, 0);
4071 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
4072 NULL, control, p);
4073 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
4074 uap->s, 0, 0, 0, 0);
4075 if (error) {
4076 goto done3;
4077 }
4078 }
4079 sbunlock(sb: &so->so_snd, FALSE); /* will unlock socket */
4080 /*
4081 * Send trailers. Wimp out and use writev(2).
4082 */
4083 if (uap->hdtr != USER_ADDR_NULL &&
4084 user_hdtr.trailers != USER_ADDR_NULL) {
4085 bzero(s: &nuap, n: sizeof(struct writev_args));
4086 nuap.fd = uap->s;
4087 nuap.iovp = user_hdtr.trailers;
4088 nuap.iovcnt = user_hdtr.trl_cnt;
4089 error = writev_nocancel(p, &nuap, &writev_retval);
4090 if (error) {
4091 goto done2;
4092 }
4093 sbytes += writev_retval;
4094 }
4095done2:
4096 file_drop(uap->s);
4097done1:
4098 file_drop(uap->fd);
4099done:
4100 if (uap->nbytes != USER_ADDR_NULL) {
4101 /* XXX this appears bogus for some early failure conditions */
4102 copyout(&sbytes, uap->nbytes, sizeof(off_t));
4103 }
4104 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
4105 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
4106 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
4107 return error;
4108done3:
4109 sbunlock(sb: &so->so_snd, FALSE); /* will unlock socket */
4110 goto done2;
4111}
4112
4113
4114#endif /* SENDFILE */
4115