1/*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
65/*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/filedesc.h>
75#include <sys/proc_internal.h>
76#include <sys/file_internal.h>
77#include <sys/vnode_internal.h>
78#include <sys/malloc.h>
79#include <sys/mcache.h>
80#include <sys/mbuf.h>
81#include <kern/locks.h>
82#include <sys/domain.h>
83#include <sys/protosw.h>
84#include <sys/signalvar.h>
85#include <sys/socket.h>
86#include <sys/socketvar.h>
87#include <sys/kernel.h>
88#include <sys/uio_internal.h>
89#include <sys/kauth.h>
90#include <kern/task.h>
91#include <sys/priv.h>
92#include <sys/sysctl.h>
93#include <sys/sys_domain.h>
94
95#include <security/audit/audit.h>
96
97#include <sys/kdebug.h>
98#include <sys/sysproto.h>
99#include <netinet/in.h>
100#include <net/route.h>
101#include <netinet/in_pcb.h>
102
103#if CONFIG_MACF_SOCKET_SUBSET
104#include <security/mac_framework.h>
105#endif /* MAC_SOCKET_SUBSET */
106
107#define f_flag f_fglob->fg_flag
108#define f_type f_fglob->fg_ops->fo_type
109#define f_msgcount f_fglob->fg_msgcount
110#define f_cred f_fglob->fg_cred
111#define f_ops f_fglob->fg_ops
112#define f_offset f_fglob->fg_offset
113#define f_data f_fglob->fg_data
114
115#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
116#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
117#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
118#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
119#define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
120#define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
121#define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
122#define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
123#define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
124#define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
125#define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
126#define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
127#define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
128#define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
129#define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
130#define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
131
132#if DEBUG || DEVELOPMENT
133#define DEBUG_KERNEL_ADDRPERM(_v) (_v)
134#define DBG_PRINTF(...) printf(__VA_ARGS__)
135#else
136#define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
137#define DBG_PRINTF(...) do { } while (0)
138#endif
139
140/* TODO: should be in header file */
141int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
142
143static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
144 int, int32_t *);
145static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
146 int32_t *);
147static int connectit(struct socket *, struct sockaddr *);
148static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
149 size_t, boolean_t);
150static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
151 user_addr_t, size_t, boolean_t);
152#if SENDFILE
153static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
154 boolean_t);
155#endif /* SENDFILE */
156static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
157static int connectitx(struct socket *, struct sockaddr *,
158 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
159 sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
160static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
161 int *);
162static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
163
164static int internalize_user_msghdr_array(const void *, int, int, u_int,
165 struct user_msghdr_x *, struct uio **);
166static u_int externalize_user_msghdr_array(void *, int, int, u_int,
167 const struct user_msghdr_x *, struct uio **);
168
169static void free_uio_array(struct uio **, u_int);
170static int uio_array_is_valid(struct uio **, u_int);
171static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
172static int internalize_recv_msghdr_array(const void *, int, int,
173 u_int, struct user_msghdr_x *, struct recv_msg_elem *);
174static u_int externalize_recv_msghdr_array(void *, int, int, u_int,
175 const struct user_msghdr_x *, struct recv_msg_elem *);
176static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
177static void free_recv_msg_array(struct recv_msg_elem *, u_int);
178
179SYSCTL_DECL(_kern_ipc);
180
181static u_int somaxsendmsgx = 100;
182SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
183 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
184static u_int somaxrecvmsgx = 100;
185SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
186 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
187
188/*
189 * System call interface to the socket abstraction.
190 */
191
192extern const struct fileops socketops;
193
194/*
195 * Returns: 0 Success
196 * EACCES Mandatory Access Control failure
197 * falloc:ENFILE
198 * falloc:EMFILE
199 * falloc:ENOMEM
200 * socreate:EAFNOSUPPORT
201 * socreate:EPROTOTYPE
202 * socreate:EPROTONOSUPPORT
203 * socreate:ENOBUFS
204 * socreate:ENOMEM
205 * socreate:??? [other protocol families, IPSEC]
206 */
207int
208socket(struct proc *p,
209 struct socket_args *uap,
210 int32_t *retval)
211{
212 return (socket_common(p, uap->domain, uap->type, uap->protocol,
213 proc_selfpid(), retval, 0));
214}
215
216int
217socket_delegate(struct proc *p,
218 struct socket_delegate_args *uap,
219 int32_t *retval)
220{
221 return socket_common(p, uap->domain, uap->type, uap->protocol,
222 uap->epid, retval, 1);
223}
224
225static int
226socket_common(struct proc *p,
227 int domain,
228 int type,
229 int protocol,
230 pid_t epid,
231 int32_t *retval,
232 int delegate)
233{
234 struct socket *so;
235 struct fileproc *fp;
236 int fd, error;
237
238 AUDIT_ARG(socket, domain, type, protocol);
239#if CONFIG_MACF_SOCKET_SUBSET
240 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
241 type, protocol)) != 0)
242 return (error);
243#endif /* MAC_SOCKET_SUBSET */
244
245 if (delegate) {
246 error = priv_check_cred(kauth_cred_get(),
247 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
248 if (error)
249 return (EACCES);
250 }
251
252 error = falloc(p, &fp, &fd, vfs_context_current());
253 if (error) {
254 return (error);
255 }
256 fp->f_flag = FREAD|FWRITE;
257 fp->f_ops = &socketops;
258
259 if (delegate)
260 error = socreate_delegate(domain, &so, type, protocol, epid);
261 else
262 error = socreate(domain, &so, type, protocol);
263
264 if (error) {
265 fp_free(p, fd, fp);
266 } else {
267 fp->f_data = (caddr_t)so;
268
269 proc_fdlock(p);
270 procfdtbl_releasefd(p, fd, NULL);
271
272 fp_drop(p, fd, fp, 1);
273 proc_fdunlock(p);
274
275 *retval = fd;
276 if (ENTR_SHOULDTRACE) {
277 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
278 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
279 }
280 }
281 return (error);
282}
283
284/*
285 * Returns: 0 Success
286 * EDESTADDRREQ Destination address required
287 * EBADF Bad file descriptor
288 * EACCES Mandatory Access Control failure
289 * file_socket:ENOTSOCK
290 * file_socket:EBADF
291 * getsockaddr:ENAMETOOLONG Filename too long
292 * getsockaddr:EINVAL Invalid argument
293 * getsockaddr:ENOMEM Not enough space
294 * getsockaddr:EFAULT Bad address
295 * sobindlock:???
296 */
297/* ARGSUSED */
298int
299bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
300{
301 struct sockaddr_storage ss;
302 struct sockaddr *sa = NULL;
303 struct socket *so;
304 boolean_t want_free = TRUE;
305 int error;
306
307 AUDIT_ARG(fd, uap->s);
308 error = file_socket(uap->s, &so);
309 if (error != 0)
310 return (error);
311 if (so == NULL) {
312 error = EBADF;
313 goto out;
314 }
315 if (uap->name == USER_ADDR_NULL) {
316 error = EDESTADDRREQ;
317 goto out;
318 }
319 if (uap->namelen > sizeof (ss)) {
320 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
321 } else {
322 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
323 if (error == 0) {
324 sa = (struct sockaddr *)&ss;
325 want_free = FALSE;
326 }
327 }
328 if (error != 0)
329 goto out;
330 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
331#if CONFIG_MACF_SOCKET_SUBSET
332 if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
333 (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0)
334 error = sobindlock(so, sa, 1); /* will lock socket */
335#else
336 error = sobindlock(so, sa, 1); /* will lock socket */
337#endif /* MAC_SOCKET_SUBSET */
338 if (want_free)
339 FREE(sa, M_SONAME);
340out:
341 file_drop(uap->s);
342 return (error);
343}
344
345/*
346 * Returns: 0 Success
347 * EBADF
348 * EACCES Mandatory Access Control failure
349 * file_socket:ENOTSOCK
350 * file_socket:EBADF
351 * solisten:EINVAL
352 * solisten:EOPNOTSUPP
353 * solisten:???
354 */
355int
356listen(__unused struct proc *p, struct listen_args *uap,
357 __unused int32_t *retval)
358{
359 int error;
360 struct socket *so;
361
362 AUDIT_ARG(fd, uap->s);
363 error = file_socket(uap->s, &so);
364 if (error)
365 return (error);
366 if (so != NULL)
367#if CONFIG_MACF_SOCKET_SUBSET
368 {
369 error = mac_socket_check_listen(kauth_cred_get(), so);
370 if (error == 0)
371 error = solisten(so, uap->backlog);
372 }
373#else
374 error = solisten(so, uap->backlog);
375#endif /* MAC_SOCKET_SUBSET */
376 else
377 error = EBADF;
378
379 file_drop(uap->s);
380 return (error);
381}
382
383/*
384 * Returns: fp_getfsock:EBADF Bad file descriptor
385 * fp_getfsock:EOPNOTSUPP ...
386 * xlate => :ENOTSOCK Socket operation on non-socket
387 * :EFAULT Bad address on copyin/copyout
388 * :EBADF Bad file descriptor
389 * :EOPNOTSUPP Operation not supported on socket
390 * :EINVAL Invalid argument
391 * :EWOULDBLOCK Operation would block
392 * :ECONNABORTED Connection aborted
393 * :EINTR Interrupted function
394 * :EACCES Mandatory Access Control failure
395 * falloc_locked:ENFILE Too many files open in system
396 * falloc_locked::EMFILE Too many open files
397 * falloc_locked::ENOMEM Not enough space
398 * 0 Success
399 */
400int
401accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
402 int32_t *retval)
403{
404 struct fileproc *fp;
405 struct sockaddr *sa = NULL;
406 socklen_t namelen;
407 int error;
408 struct socket *head, *so = NULL;
409 lck_mtx_t *mutex_held;
410 int fd = uap->s;
411 int newfd;
412 short fflag; /* type must match fp->f_flag */
413 int dosocklock = 0;
414
415 *retval = -1;
416
417 AUDIT_ARG(fd, uap->s);
418
419 if (uap->name) {
420 error = copyin(uap->anamelen, (caddr_t)&namelen,
421 sizeof (socklen_t));
422 if (error)
423 return (error);
424 }
425 error = fp_getfsock(p, fd, &fp, &head);
426 if (error) {
427 if (error == EOPNOTSUPP)
428 error = ENOTSOCK;
429 return (error);
430 }
431 if (head == NULL) {
432 error = EBADF;
433 goto out;
434 }
435#if CONFIG_MACF_SOCKET_SUBSET
436 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0)
437 goto out;
438#endif /* MAC_SOCKET_SUBSET */
439
440 socket_lock(head, 1);
441
442 if (head->so_proto->pr_getlock != NULL) {
443 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
444 dosocklock = 1;
445 } else {
446 mutex_held = head->so_proto->pr_domain->dom_mtx;
447 dosocklock = 0;
448 }
449
450 if ((head->so_options & SO_ACCEPTCONN) == 0) {
451 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
452 error = EOPNOTSUPP;
453 } else {
454 /* POSIX: The socket is not accepting connections */
455 error = EINVAL;
456 }
457 socket_unlock(head, 1);
458 goto out;
459 }
460check_again:
461 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
462 socket_unlock(head, 1);
463 error = EWOULDBLOCK;
464 goto out;
465 }
466 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
467 if (head->so_state & SS_CANTRCVMORE) {
468 head->so_error = ECONNABORTED;
469 break;
470 }
471 if (head->so_usecount < 1)
472 panic("accept: head=%p refcount=%d\n", head,
473 head->so_usecount);
474 error = msleep((caddr_t)&head->so_timeo, mutex_held,
475 PSOCK | PCATCH, "accept", 0);
476 if (head->so_usecount < 1)
477 panic("accept: 2 head=%p refcount=%d\n", head,
478 head->so_usecount);
479 if ((head->so_state & SS_DRAINING)) {
480 error = ECONNABORTED;
481 }
482 if (error) {
483 socket_unlock(head, 1);
484 goto out;
485 }
486 }
487 if (head->so_error) {
488 error = head->so_error;
489 head->so_error = 0;
490 socket_unlock(head, 1);
491 goto out;
492 }
493
494 /*
495 * At this point we know that there is at least one connection
496 * ready to be accepted. Remove it from the queue prior to
497 * allocating the file descriptor for it since falloc() may
498 * block allowing another process to accept the connection
499 * instead.
500 */
501 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
502
503 so_acquire_accept_list(head, NULL);
504 if (TAILQ_EMPTY(&head->so_comp)) {
505 so_release_accept_list(head);
506 goto check_again;
507 }
508
509 so = TAILQ_FIRST(&head->so_comp);
510 TAILQ_REMOVE(&head->so_comp, so, so_list);
511 so->so_head = NULL;
512 so->so_state &= ~SS_COMP;
513 head->so_qlen--;
514 so_release_accept_list(head);
515
516 /* unlock head to avoid deadlock with select, keep a ref on head */
517 socket_unlock(head, 0);
518
519#if CONFIG_MACF_SOCKET_SUBSET
520 /*
521 * Pass the pre-accepted socket to the MAC framework. This is
522 * cheaper than allocating a file descriptor for the socket,
523 * calling the protocol accept callback, and possibly freeing
524 * the file descriptor should the MAC check fails.
525 */
526 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
527 socket_lock(so, 1);
528 so->so_state &= ~SS_NOFDREF;
529 socket_unlock(so, 1);
530 soclose(so);
531 /* Drop reference on listening socket */
532 sodereference(head);
533 goto out;
534 }
535#endif /* MAC_SOCKET_SUBSET */
536
537 /*
538 * Pass the pre-accepted socket to any interested socket filter(s).
539 * Upon failure, the socket would have been closed by the callee.
540 */
541 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
542 /* Drop reference on listening socket */
543 sodereference(head);
544 /* Propagate socket filter's error code to the caller */
545 goto out;
546 }
547
548 fflag = fp->f_flag;
549 error = falloc(p, &fp, &newfd, vfs_context_current());
550 if (error) {
551 /*
552 * Probably ran out of file descriptors.
553 *
554 * <rdar://problem/8554930>
555 * Don't put this back on the socket like we used to, that
556 * just causes the client to spin. Drop the socket.
557 */
558 socket_lock(so, 1);
559 so->so_state &= ~SS_NOFDREF;
560 socket_unlock(so, 1);
561 soclose(so);
562 sodereference(head);
563 goto out;
564 }
565 *retval = newfd;
566 fp->f_flag = fflag;
567 fp->f_ops = &socketops;
568 fp->f_data = (caddr_t)so;
569
570 socket_lock(head, 0);
571 if (dosocklock)
572 socket_lock(so, 1);
573
574 /* Sync socket non-blocking/async state with file flags */
575 if (fp->f_flag & FNONBLOCK) {
576 so->so_state |= SS_NBIO;
577 } else {
578 so->so_state &= ~SS_NBIO;
579 }
580
581 if (fp->f_flag & FASYNC) {
582 so->so_state |= SS_ASYNC;
583 so->so_rcv.sb_flags |= SB_ASYNC;
584 so->so_snd.sb_flags |= SB_ASYNC;
585 } else {
586 so->so_state &= ~SS_ASYNC;
587 so->so_rcv.sb_flags &= ~SB_ASYNC;
588 so->so_snd.sb_flags &= ~SB_ASYNC;
589 }
590
591 (void) soacceptlock(so, &sa, 0);
592 socket_unlock(head, 1);
593 if (sa == NULL) {
594 namelen = 0;
595 if (uap->name)
596 goto gotnoname;
597 error = 0;
598 goto releasefd;
599 }
600 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
601
602 if (uap->name) {
603 socklen_t sa_len;
604
605 /* save sa_len before it is destroyed */
606 sa_len = sa->sa_len;
607 namelen = MIN(namelen, sa_len);
608 error = copyout(sa, uap->name, namelen);
609 if (!error)
610 /* return the actual, untruncated address length */
611 namelen = sa_len;
612gotnoname:
613 error = copyout((caddr_t)&namelen, uap->anamelen,
614 sizeof (socklen_t));
615 }
616 FREE(sa, M_SONAME);
617
618releasefd:
619 /*
620 * If the socket has been marked as inactive by sosetdefunct(),
621 * disallow further operations on it.
622 */
623 if (so->so_flags & SOF_DEFUNCT) {
624 sodefunct(current_proc(), so,
625 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
626 }
627
628 if (dosocklock)
629 socket_unlock(so, 1);
630
631 proc_fdlock(p);
632 procfdtbl_releasefd(p, newfd, NULL);
633 fp_drop(p, newfd, fp, 1);
634 proc_fdunlock(p);
635
636out:
637 file_drop(fd);
638
639 if (error == 0 && ENTR_SHOULDTRACE) {
640 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
641 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
642 }
643 return (error);
644}
645
646int
647accept(struct proc *p, struct accept_args *uap, int32_t *retval)
648{
649 __pthread_testcancel(1);
650 return (accept_nocancel(p, (struct accept_nocancel_args *)uap,
651 retval));
652}
653
654/*
655 * Returns: 0 Success
656 * EBADF Bad file descriptor
657 * EALREADY Connection already in progress
658 * EINPROGRESS Operation in progress
659 * ECONNABORTED Connection aborted
660 * EINTR Interrupted function
661 * EACCES Mandatory Access Control failure
662 * file_socket:ENOTSOCK
663 * file_socket:EBADF
664 * getsockaddr:ENAMETOOLONG Filename too long
665 * getsockaddr:EINVAL Invalid argument
666 * getsockaddr:ENOMEM Not enough space
667 * getsockaddr:EFAULT Bad address
668 * soconnectlock:EOPNOTSUPP
669 * soconnectlock:EISCONN
670 * soconnectlock:??? [depends on protocol, filters]
671 * msleep:EINTR
672 *
673 * Imputed: so_error error may be set from so_error, which
674 * may have been set by soconnectlock.
675 */
676/* ARGSUSED */
677int
678connect(struct proc *p, struct connect_args *uap, int32_t *retval)
679{
680 __pthread_testcancel(1);
681 return (connect_nocancel(p, (struct connect_nocancel_args *)uap,
682 retval));
683}
684
685int
686connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
687{
688#pragma unused(p, retval)
689 struct socket *so;
690 struct sockaddr_storage ss;
691 struct sockaddr *sa = NULL;
692 int error;
693 int fd = uap->s;
694 boolean_t dgram;
695
696 AUDIT_ARG(fd, uap->s);
697 error = file_socket(fd, &so);
698 if (error != 0)
699 return (error);
700 if (so == NULL) {
701 error = EBADF;
702 goto out;
703 }
704
705 /*
706 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
707 * if this is a datagram socket; translate for other types.
708 */
709 dgram = (so->so_type == SOCK_DGRAM);
710
711 /* Get socket address now before we obtain socket lock */
712 if (uap->namelen > sizeof (ss)) {
713 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
714 } else {
715 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
716 if (error == 0)
717 sa = (struct sockaddr *)&ss;
718 }
719 if (error != 0)
720 goto out;
721
722 error = connectit(so, sa);
723
724 if (sa != NULL && sa != SA(&ss))
725 FREE(sa, M_SONAME);
726 if (error == ERESTART)
727 error = EINTR;
728out:
729 file_drop(fd);
730 return (error);
731}
732
733static int
734connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
735{
736#pragma unused(p, retval)
737 struct sockaddr_storage ss, sd;
738 struct sockaddr *src = NULL, *dst = NULL;
739 struct socket *so;
740 int error, error1, fd = uap->socket;
741 boolean_t dgram;
742 sae_connid_t cid = SAE_CONNID_ANY;
743 struct user32_sa_endpoints ep32;
744 struct user64_sa_endpoints ep64;
745 struct user_sa_endpoints ep;
746 user_ssize_t bytes_written = 0;
747 struct user_iovec *iovp;
748 uio_t auio = NULL;
749
750 AUDIT_ARG(fd, uap->socket);
751 error = file_socket(fd, &so);
752 if (error != 0)
753 return (error);
754 if (so == NULL) {
755 error = EBADF;
756 goto out;
757 }
758
759 if (uap->endpoints == USER_ADDR_NULL) {
760 error = EINVAL;
761 goto out;
762 }
763
764 if (IS_64BIT_PROCESS(p)) {
765 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
766 if (error != 0)
767 goto out;
768
769 ep.sae_srcif = ep64.sae_srcif;
770 ep.sae_srcaddr = ep64.sae_srcaddr;
771 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
772 ep.sae_dstaddr = ep64.sae_dstaddr;
773 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
774 } else {
775 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
776 if (error != 0)
777 goto out;
778
779 ep.sae_srcif = ep32.sae_srcif;
780 ep.sae_srcaddr = ep32.sae_srcaddr;
781 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
782 ep.sae_dstaddr = ep32.sae_dstaddr;
783 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
784 }
785
786 /*
787 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
788 * if this is a datagram socket; translate for other types.
789 */
790 dgram = (so->so_type == SOCK_DGRAM);
791
792 /* Get socket address now before we obtain socket lock */
793 if (ep.sae_srcaddr != USER_ADDR_NULL) {
794 if (ep.sae_srcaddrlen > sizeof (ss)) {
795 error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
796 } else {
797 error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
798 if (error == 0)
799 src = (struct sockaddr *)&ss;
800 }
801
802 if (error)
803 goto out;
804 }
805
806 if (ep.sae_dstaddr == USER_ADDR_NULL) {
807 error = EINVAL;
808 goto out;
809 }
810
811 /* Get socket address now before we obtain socket lock */
812 if (ep.sae_dstaddrlen > sizeof (sd)) {
813 error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
814 } else {
815 error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
816 if (error == 0)
817 dst = (struct sockaddr *)&sd;
818 }
819
820 if (error)
821 goto out;
822
823 VERIFY(dst != NULL);
824
825 if (uap->iov != USER_ADDR_NULL) {
826 /* Verify range before calling uio_create() */
827 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV){
828 error = EINVAL;
829 goto out;
830 }
831
832 if (uap->len == USER_ADDR_NULL){
833 error = EINVAL;
834 goto out;
835 }
836
837 /* allocate a uio to hold the number of iovecs passed */
838 auio = uio_create(uap->iovcnt, 0,
839 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
840 UIO_WRITE);
841
842 if (auio == NULL) {
843 error = ENOMEM;
844 goto out;
845 }
846
847 /*
848 * get location of iovecs within the uio.
849 * then copyin the iovecs from user space.
850 */
851 iovp = uio_iovsaddr(auio);
852 if (iovp == NULL) {
853 error = ENOMEM;
854 goto out;
855 }
856 error = copyin_user_iovec_array(uap->iov,
857 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
858 uap->iovcnt, iovp);
859 if (error != 0)
860 goto out;
861
862 /* finish setup of uio_t */
863 error = uio_calculateresid(auio);
864 if (error != 0) {
865 goto out;
866 }
867 }
868
869 error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
870 &cid, auio, uap->flags, &bytes_written);
871 if (error == ERESTART)
872 error = EINTR;
873
874 if (uap->len != USER_ADDR_NULL) {
875 error1 = copyout(&bytes_written, uap->len, sizeof (uap->len));
876 /* give precedence to connectitx errors */
877 if ((error1 != 0) && (error == 0))
878 error = error1;
879 }
880
881 if (uap->connid != USER_ADDR_NULL) {
882 error1 = copyout(&cid, uap->connid, sizeof (cid));
883 /* give precedence to connectitx errors */
884 if ((error1 != 0) && (error == 0))
885 error = error1;
886 }
887out:
888 file_drop(fd);
889 if (auio != NULL) {
890 uio_free(auio);
891 }
892 if (src != NULL && src != SA(&ss))
893 FREE(src, M_SONAME);
894 if (dst != NULL && dst != SA(&sd))
895 FREE(dst, M_SONAME);
896 return (error);
897}
898
899int
900connectx(struct proc *p, struct connectx_args *uap, int *retval)
901{
902 /*
903 * Due to similiarity with a POSIX interface, define as
904 * an unofficial cancellation point.
905 */
906 __pthread_testcancel(1);
907 return (connectx_nocancel(p, uap, retval));
908}
909
910static int
911connectit(struct socket *so, struct sockaddr *sa)
912{
913 int error;
914
915 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
916#if CONFIG_MACF_SOCKET_SUBSET
917 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0)
918 return (error);
919#endif /* MAC_SOCKET_SUBSET */
920
921 socket_lock(so, 1);
922 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
923 error = EALREADY;
924 goto out;
925 }
926 error = soconnectlock(so, sa, 0);
927 if (error != 0) {
928 so->so_state &= ~SS_ISCONNECTING;
929 goto out;
930 }
931 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
932 error = EINPROGRESS;
933 goto out;
934 }
935 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
936 lck_mtx_t *mutex_held;
937
938 if (so->so_proto->pr_getlock != NULL)
939 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
940 else
941 mutex_held = so->so_proto->pr_domain->dom_mtx;
942 error = msleep((caddr_t)&so->so_timeo, mutex_held,
943 PSOCK | PCATCH, __func__, 0);
944 if (so->so_state & SS_DRAINING) {
945 error = ECONNABORTED;
946 }
947 if (error != 0)
948 break;
949 }
950 if (error == 0) {
951 error = so->so_error;
952 so->so_error = 0;
953 }
954out:
955 socket_unlock(so, 1);
956 return (error);
957}
958
959static int
960connectitx(struct socket *so, struct sockaddr *src,
961 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
962 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
963 user_ssize_t *bytes_written)
964{
965 int error;
966#pragma unused (flags)
967
968 VERIFY(dst != NULL);
969
970 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
971#if CONFIG_MACF_SOCKET_SUBSET
972 if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0)
973 return (error);
974#endif /* MAC_SOCKET_SUBSET */
975
976 socket_lock(so, 1);
977 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
978 error = EALREADY;
979 goto out;
980 }
981
982 if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) &&
983 (flags & CONNECT_DATA_IDEMPOTENT)) {
984 so->so_flags1 |= SOF1_DATA_IDEMPOTENT;
985
986 if (flags & CONNECT_DATA_AUTHENTICATED)
987 so->so_flags1 |= SOF1_DATA_AUTHENTICATED;
988 }
989
990 /*
991 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
992 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
993 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
994 * Case 3 allows user to combine write with connect even if they have
995 * no use for TFO (such as regular TCP, and UDP).
996 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
997 */
998 if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) &&
999 ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio))
1000 so->so_flags1 |= SOF1_PRECONNECT_DATA;
1001
1002 /*
1003 * If a user sets data idempotent and does not pass an uio, or
1004 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
1005 * SOF1_DATA_IDEMPOTENT.
1006 */
1007 if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) &&
1008 (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) {
1009 /* We should return EINVAL instead perhaps. */
1010 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
1011 }
1012
1013 error = soconnectxlocked(so, src, dst, p, ifscope,
1014 aid, pcid, 0, NULL, 0, auio, bytes_written);
1015 if (error != 0) {
1016 so->so_state &= ~SS_ISCONNECTING;
1017 goto out;
1018 }
1019 /*
1020 * If, after the call to soconnectxlocked the flag is still set (in case
1021 * data has been queued and the connect() has actually been triggered,
1022 * it will have been unset by the transport), we exit immediately. There
1023 * is no reason to wait on any event.
1024 */
1025 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1026 error = 0;
1027 goto out;
1028 }
1029 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1030 error = EINPROGRESS;
1031 goto out;
1032 }
1033 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1034 lck_mtx_t *mutex_held;
1035
1036 if (so->so_proto->pr_getlock != NULL)
1037 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1038 else
1039 mutex_held = so->so_proto->pr_domain->dom_mtx;
1040 error = msleep((caddr_t)&so->so_timeo, mutex_held,
1041 PSOCK | PCATCH, __func__, 0);
1042 if (so->so_state & SS_DRAINING) {
1043 error = ECONNABORTED;
1044 }
1045 if (error != 0)
1046 break;
1047 }
1048 if (error == 0) {
1049 error = so->so_error;
1050 so->so_error = 0;
1051 }
1052out:
1053 socket_unlock(so, 1);
1054 return (error);
1055}
1056
1057int
1058peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
1059{
1060#pragma unused(p, uap, retval)
1061 /*
1062 * Due to similiarity with a POSIX interface, define as
1063 * an unofficial cancellation point.
1064 */
1065 __pthread_testcancel(1);
1066 return (0);
1067}
1068
1069int
1070disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
1071{
1072 /*
1073 * Due to similiarity with a POSIX interface, define as
1074 * an unofficial cancellation point.
1075 */
1076 __pthread_testcancel(1);
1077 return (disconnectx_nocancel(p, uap, retval));
1078}
1079
1080static int
1081disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
1082{
1083#pragma unused(p, retval)
1084 struct socket *so;
1085 int fd = uap->s;
1086 int error;
1087
1088 error = file_socket(fd, &so);
1089 if (error != 0)
1090 return (error);
1091 if (so == NULL) {
1092 error = EBADF;
1093 goto out;
1094 }
1095
1096 error = sodisconnectx(so, uap->aid, uap->cid);
1097out:
1098 file_drop(fd);
1099 return (error);
1100}
1101
1102/*
1103 * Returns: 0 Success
1104 * socreate:EAFNOSUPPORT
1105 * socreate:EPROTOTYPE
1106 * socreate:EPROTONOSUPPORT
1107 * socreate:ENOBUFS
1108 * socreate:ENOMEM
1109 * socreate:EISCONN
1110 * socreate:??? [other protocol families, IPSEC]
1111 * falloc:ENFILE
1112 * falloc:EMFILE
1113 * falloc:ENOMEM
1114 * copyout:EFAULT
1115 * soconnect2:EINVAL
1116 * soconnect2:EPROTOTYPE
1117 * soconnect2:??? [other protocol families[
1118 */
1119int
1120socketpair(struct proc *p, struct socketpair_args *uap,
1121 __unused int32_t *retval)
1122{
1123 struct fileproc *fp1, *fp2;
1124 struct socket *so1, *so2;
1125 int fd, error, sv[2];
1126
1127 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1128 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1129 if (error)
1130 return (error);
1131 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1132 if (error)
1133 goto free1;
1134
1135 error = falloc(p, &fp1, &fd, vfs_context_current());
1136 if (error) {
1137 goto free2;
1138 }
1139 fp1->f_flag = FREAD|FWRITE;
1140 fp1->f_ops = &socketops;
1141 fp1->f_data = (caddr_t)so1;
1142 sv[0] = fd;
1143
1144 error = falloc(p, &fp2, &fd, vfs_context_current());
1145 if (error) {
1146 goto free3;
1147 }
1148 fp2->f_flag = FREAD|FWRITE;
1149 fp2->f_ops = &socketops;
1150 fp2->f_data = (caddr_t)so2;
1151 sv[1] = fd;
1152
1153 error = soconnect2(so1, so2);
1154 if (error) {
1155 goto free4;
1156 }
1157 if (uap->type == SOCK_DGRAM) {
1158 /*
1159 * Datagram socket connection is asymmetric.
1160 */
1161 error = soconnect2(so2, so1);
1162 if (error) {
1163 goto free4;
1164 }
1165 }
1166
1167 if ((error = copyout(sv, uap->rsv, 2 * sizeof (int))) != 0)
1168 goto free4;
1169
1170 proc_fdlock(p);
1171 procfdtbl_releasefd(p, sv[0], NULL);
1172 procfdtbl_releasefd(p, sv[1], NULL);
1173 fp_drop(p, sv[0], fp1, 1);
1174 fp_drop(p, sv[1], fp2, 1);
1175 proc_fdunlock(p);
1176
1177 return (0);
1178free4:
1179 fp_free(p, sv[1], fp2);
1180free3:
1181 fp_free(p, sv[0], fp1);
1182free2:
1183 (void) soclose(so2);
1184free1:
1185 (void) soclose(so1);
1186 return (error);
1187}
1188
1189/*
1190 * Returns: 0 Success
1191 * EINVAL
1192 * ENOBUFS
1193 * EBADF
1194 * EPIPE
1195 * EACCES Mandatory Access Control failure
1196 * file_socket:ENOTSOCK
1197 * file_socket:EBADF
1198 * getsockaddr:ENAMETOOLONG Filename too long
1199 * getsockaddr:EINVAL Invalid argument
1200 * getsockaddr:ENOMEM Not enough space
1201 * getsockaddr:EFAULT Bad address
1202 * <pru_sosend>:EACCES[TCP]
1203 * <pru_sosend>:EADDRINUSE[TCP]
1204 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1205 * <pru_sosend>:EAFNOSUPPORT[TCP]
1206 * <pru_sosend>:EAGAIN[TCP]
1207 * <pru_sosend>:EBADF
1208 * <pru_sosend>:ECONNRESET[TCP]
1209 * <pru_sosend>:EFAULT
1210 * <pru_sosend>:EHOSTUNREACH[TCP]
1211 * <pru_sosend>:EINTR
1212 * <pru_sosend>:EINVAL
1213 * <pru_sosend>:EISCONN[AF_INET]
1214 * <pru_sosend>:EMSGSIZE[TCP]
1215 * <pru_sosend>:ENETDOWN[TCP]
1216 * <pru_sosend>:ENETUNREACH[TCP]
1217 * <pru_sosend>:ENOBUFS
1218 * <pru_sosend>:ENOMEM[TCP]
1219 * <pru_sosend>:ENOTCONN[AF_INET]
1220 * <pru_sosend>:EOPNOTSUPP
1221 * <pru_sosend>:EPERM[TCP]
1222 * <pru_sosend>:EPIPE
1223 * <pru_sosend>:EWOULDBLOCK
1224 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1225 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1226 * <pru_sosend>:??? [value from so_error]
1227 * sockargs:???
1228 */
1229static int
1230sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
1231 int flags, int32_t *retval)
1232{
1233 struct mbuf *control = NULL;
1234 struct sockaddr_storage ss;
1235 struct sockaddr *to = NULL;
1236 boolean_t want_free = TRUE;
1237 int error;
1238 user_ssize_t len;
1239
1240 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1241
1242 if (mp->msg_name != USER_ADDR_NULL) {
1243 if (mp->msg_namelen > sizeof (ss)) {
1244 error = getsockaddr(so, &to, mp->msg_name,
1245 mp->msg_namelen, TRUE);
1246 } else {
1247 error = getsockaddr_s(so, &ss, mp->msg_name,
1248 mp->msg_namelen, TRUE);
1249 if (error == 0) {
1250 to = (struct sockaddr *)&ss;
1251 want_free = FALSE;
1252 }
1253 }
1254 if (error != 0)
1255 goto out;
1256 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1257 }
1258 if (mp->msg_control != USER_ADDR_NULL) {
1259 if (mp->msg_controllen < sizeof (struct cmsghdr)) {
1260 error = EINVAL;
1261 goto bad;
1262 }
1263 error = sockargs(&control, mp->msg_control,
1264 mp->msg_controllen, MT_CONTROL);
1265 if (error != 0)
1266 goto bad;
1267 }
1268
1269#if CONFIG_MACF_SOCKET_SUBSET
1270 /*
1271 * We check the state without holding the socket lock;
1272 * if a race condition occurs, it would simply result
1273 * in an extra call to the MAC check function.
1274 */
1275 if (to != NULL &&
1276 !(so->so_state & SS_DEFUNCT) &&
1277 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
1278 goto bad;
1279#endif /* MAC_SOCKET_SUBSET */
1280
1281 len = uio_resid(uiop);
1282 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1283 control, flags);
1284 if (error != 0) {
1285 if (uio_resid(uiop) != len && (error == ERESTART ||
1286 error == EINTR || error == EWOULDBLOCK))
1287 error = 0;
1288 /* Generation of SIGPIPE can be controlled per socket */
1289 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1290 psignal(p, SIGPIPE);
1291 }
1292 if (error == 0)
1293 *retval = (int)(len - uio_resid(uiop));
1294bad:
1295 if (to != NULL && want_free)
1296 FREE(to, M_SONAME);
1297out:
1298 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1299
1300 return (error);
1301}
1302
1303/*
1304 * Returns: 0 Success
1305 * ENOMEM
1306 * sendit:??? [see sendit definition in this file]
1307 * write:??? [4056224: applicable for pipes]
1308 */
1309int
1310sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
1311{
1312 __pthread_testcancel(1);
1313 return (sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
1314}
1315
1316int
1317sendto_nocancel(struct proc *p,
1318 struct sendto_nocancel_args *uap,
1319 int32_t *retval)
1320{
1321 struct user_msghdr msg;
1322 int error;
1323 uio_t auio = NULL;
1324 struct socket *so;
1325
1326 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1327 AUDIT_ARG(fd, uap->s);
1328
1329 auio = uio_create(1, 0,
1330 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1331 UIO_WRITE);
1332 if (auio == NULL) {
1333 error = ENOMEM;
1334 goto done;
1335 }
1336 uio_addiov(auio, uap->buf, uap->len);
1337
1338 msg.msg_name = uap->to;
1339 msg.msg_namelen = uap->tolen;
1340 /* no need to set up msg_iov. sendit uses uio_t we send it */
1341 msg.msg_iov = 0;
1342 msg.msg_iovlen = 0;
1343 msg.msg_control = 0;
1344 msg.msg_flags = 0;
1345
1346 error = file_socket(uap->s, &so);
1347 if (error)
1348 goto done;
1349
1350 if (so == NULL) {
1351 error = EBADF;
1352 } else {
1353 error = sendit(p, so, &msg, auio, uap->flags, retval);
1354 }
1355
1356 file_drop(uap->s);
1357done:
1358 if (auio != NULL)
1359 uio_free(auio);
1360
1361 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1362
1363 return (error);
1364}
1365
1366/*
1367 * Returns: 0 Success
1368 * ENOBUFS
1369 * copyin:EFAULT
1370 * sendit:??? [see sendit definition in this file]
1371 */
1372int
1373sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1374{
1375 __pthread_testcancel(1);
1376 return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1377 retval));
1378}
1379
1380int
1381sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
1382 int32_t *retval)
1383{
1384 struct user32_msghdr msg32;
1385 struct user64_msghdr msg64;
1386 struct user_msghdr user_msg;
1387 caddr_t msghdrp;
1388 int size_of_msghdr;
1389 int error;
1390 uio_t auio = NULL;
1391 struct user_iovec *iovp;
1392 struct socket *so;
1393
1394 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1395 AUDIT_ARG(fd, uap->s);
1396 if (IS_64BIT_PROCESS(p)) {
1397 msghdrp = (caddr_t)&msg64;
1398 size_of_msghdr = sizeof (msg64);
1399 } else {
1400 msghdrp = (caddr_t)&msg32;
1401 size_of_msghdr = sizeof (msg32);
1402 }
1403 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1404 if (error) {
1405 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1406 return (error);
1407 }
1408
1409 if (IS_64BIT_PROCESS(p)) {
1410 user_msg.msg_flags = msg64.msg_flags;
1411 user_msg.msg_controllen = msg64.msg_controllen;
1412 user_msg.msg_control = msg64.msg_control;
1413 user_msg.msg_iovlen = msg64.msg_iovlen;
1414 user_msg.msg_iov = msg64.msg_iov;
1415 user_msg.msg_namelen = msg64.msg_namelen;
1416 user_msg.msg_name = msg64.msg_name;
1417 } else {
1418 user_msg.msg_flags = msg32.msg_flags;
1419 user_msg.msg_controllen = msg32.msg_controllen;
1420 user_msg.msg_control = msg32.msg_control;
1421 user_msg.msg_iovlen = msg32.msg_iovlen;
1422 user_msg.msg_iov = msg32.msg_iov;
1423 user_msg.msg_namelen = msg32.msg_namelen;
1424 user_msg.msg_name = msg32.msg_name;
1425 }
1426
1427 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1428 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1429 0, 0, 0, 0);
1430 return (EMSGSIZE);
1431 }
1432
1433 /* allocate a uio large enough to hold the number of iovecs passed */
1434 auio = uio_create(user_msg.msg_iovlen, 0,
1435 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1436 UIO_WRITE);
1437 if (auio == NULL) {
1438 error = ENOBUFS;
1439 goto done;
1440 }
1441
1442 if (user_msg.msg_iovlen) {
1443 /*
1444 * get location of iovecs within the uio.
1445 * then copyin the iovecs from user space.
1446 */
1447 iovp = uio_iovsaddr(auio);
1448 if (iovp == NULL) {
1449 error = ENOBUFS;
1450 goto done;
1451 }
1452 error = copyin_user_iovec_array(user_msg.msg_iov,
1453 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1454 user_msg.msg_iovlen, iovp);
1455 if (error)
1456 goto done;
1457 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1458
1459 /* finish setup of uio_t */
1460 error = uio_calculateresid(auio);
1461 if (error) {
1462 goto done;
1463 }
1464 } else {
1465 user_msg.msg_iov = 0;
1466 }
1467
1468 /* msg_flags is ignored for send */
1469 user_msg.msg_flags = 0;
1470
1471 error = file_socket(uap->s, &so);
1472 if (error) {
1473 goto done;
1474 }
1475 if (so == NULL) {
1476 error = EBADF;
1477 } else {
1478 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1479 }
1480 file_drop(uap->s);
1481done:
1482 if (auio != NULL) {
1483 uio_free(auio);
1484 }
1485 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1486
1487 return (error);
1488}
1489
1490int
1491sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1492{
1493 int error = 0;
1494 struct user_msghdr_x *user_msg_x = NULL;
1495 struct uio **uiop = NULL;
1496 struct socket *so;
1497 u_int i;
1498 struct sockaddr *to = NULL;
1499 user_ssize_t len_before = 0, len_after;
1500 int need_drop = 0;
1501 size_t size_of_msghdr;
1502 void *umsgp = NULL;
1503 u_int uiocnt;
1504 int has_addr_or_ctl = 0;
1505
1506 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1507
1508 error = file_socket(uap->s, &so);
1509 if (error) {
1510 goto out;
1511 }
1512 need_drop = 1;
1513 if (so == NULL) {
1514 error = EBADF;
1515 goto out;
1516 }
1517
1518 /*
1519 * Input parameter range check
1520 */
1521 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1522 error = EINVAL;
1523 goto out;
1524 }
1525 /*
1526 * Clip to max currently allowed
1527 */
1528 if (uap->cnt > somaxsendmsgx)
1529 uap->cnt = somaxsendmsgx;
1530
1531 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
1532 M_TEMP, M_WAITOK | M_ZERO);
1533 if (user_msg_x == NULL) {
1534 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
1535 error = ENOMEM;
1536 goto out;
1537 }
1538 uiop = _MALLOC(uap->cnt * sizeof(struct uio *),
1539 M_TEMP, M_WAITOK | M_ZERO);
1540 if (uiop == NULL) {
1541 DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__);
1542 error = ENOMEM;
1543 goto out;
1544 }
1545
1546 size_of_msghdr = IS_64BIT_PROCESS(p) ?
1547 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1548
1549 umsgp = _MALLOC(uap->cnt * size_of_msghdr,
1550 M_TEMP, M_WAITOK | M_ZERO);
1551 if (umsgp == NULL) {
1552 printf("%s _MALLOC() user_msg_x failed\n", __func__);
1553 error = ENOMEM;
1554 goto out;
1555 }
1556 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1557 if (error) {
1558 DBG_PRINTF("%s copyin() failed\n", __func__);
1559 goto out;
1560 }
1561 error = internalize_user_msghdr_array(umsgp,
1562 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1563 UIO_WRITE, uap->cnt, user_msg_x, uiop);
1564 if (error) {
1565 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
1566 goto out;
1567 }
1568 /*
1569 * Make sure the size of each message iovec and
1570 * the aggregate size of all the iovec is valid
1571 */
1572 if (uio_array_is_valid(uiop, uap->cnt) == 0) {
1573 error = EINVAL;
1574 goto out;
1575 }
1576
1577 /*
1578 * Sanity check on passed arguments
1579 */
1580 for (i = 0; i < uap->cnt; i++) {
1581 struct user_msghdr_x *mp = user_msg_x + i;
1582
1583 /*
1584 * No flags on send message
1585 */
1586 if (mp->msg_flags != 0) {
1587 error = EINVAL;
1588 goto out;
1589 }
1590 /*
1591 * No support for address or ancillary data (yet)
1592 */
1593 if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0)
1594 has_addr_or_ctl = 1;
1595
1596 if (mp->msg_control != USER_ADDR_NULL ||
1597 mp->msg_controllen != 0)
1598 has_addr_or_ctl = 1;
1599
1600#if CONFIG_MACF_SOCKET_SUBSET
1601 /*
1602 * We check the state without holding the socket lock;
1603 * if a race condition occurs, it would simply result
1604 * in an extra call to the MAC check function.
1605 *
1606 * Note: The following check is never true taken with the
1607 * current limitation that we do not accept to pass an address,
1608 * this is effectively placeholder code. If we add support for
1609 * addresses, we will have to check every address.
1610 */
1611 if (to != NULL &&
1612 !(so->so_state & SS_DEFUNCT) &&
1613 (error = mac_socket_check_send(kauth_cred_get(), so, to))
1614 != 0)
1615 goto out;
1616#endif /* MAC_SOCKET_SUBSET */
1617 }
1618
1619 len_before = uio_array_resid(uiop, uap->cnt);
1620
1621 /*
1622 * Feed list of packets at once only for connected socket without
1623 * control message
1624 */
1625 if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1626 pru_sosend_list_notsupp &&
1627 has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1628 error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1629 uap->cnt, uap->flags);
1630 } else {
1631 for (i = 0; i < uap->cnt; i++) {
1632 struct user_msghdr_x *mp = user_msg_x + i;
1633 struct user_msghdr user_msg;
1634 uio_t auio = uiop[i];
1635 int32_t tmpval;
1636
1637 user_msg.msg_flags = mp->msg_flags;
1638 user_msg.msg_controllen = mp->msg_controllen;
1639 user_msg.msg_control = mp->msg_control;
1640 user_msg.msg_iovlen = mp->msg_iovlen;
1641 user_msg.msg_iov = mp->msg_iov;
1642 user_msg.msg_namelen = mp->msg_namelen;
1643 user_msg.msg_name = mp->msg_name;
1644
1645 error = sendit(p, so, &user_msg, auio, uap->flags,
1646 &tmpval);
1647 if (error != 0)
1648 break;
1649 }
1650 }
1651 len_after = uio_array_resid(uiop, uap->cnt);
1652
1653 VERIFY(len_after <= len_before);
1654
1655 if (error != 0) {
1656 if (len_after != len_before && (error == ERESTART ||
1657 error == EINTR || error == EWOULDBLOCK ||
1658 error == ENOBUFS))
1659 error = 0;
1660 /* Generation of SIGPIPE can be controlled per socket */
1661 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1662 psignal(p, SIGPIPE);
1663 }
1664 if (error == 0) {
1665 uiocnt = externalize_user_msghdr_array(umsgp,
1666 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1667 UIO_WRITE, uap->cnt, user_msg_x, uiop);
1668
1669 *retval = (int)(uiocnt);
1670 }
1671out:
1672 if (need_drop)
1673 file_drop(uap->s);
1674 if (umsgp != NULL)
1675 _FREE(umsgp, M_TEMP);
1676 if (uiop != NULL) {
1677 free_uio_array(uiop, uap->cnt);
1678 _FREE(uiop, M_TEMP);
1679 }
1680 if (user_msg_x != NULL)
1681 _FREE(user_msg_x, M_TEMP);
1682
1683 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1684
1685 return (error);
1686}
1687
1688
1689static int
1690copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
1691{
1692 int error = 0;
1693 socklen_t sa_len = 0;
1694 ssize_t len;
1695
1696 len = *namelen;
1697 if (len <= 0 || fromsa == 0) {
1698 len = 0;
1699 } else {
1700#ifndef MIN
1701#define MIN(a, b) ((a) > (b) ? (b) : (a))
1702#endif
1703 sa_len = fromsa->sa_len;
1704 len = MIN((unsigned int)len, sa_len);
1705 error = copyout(fromsa, name, (unsigned)len);
1706 if (error)
1707 goto out;
1708 }
1709 *namelen = sa_len;
1710out:
1711 return (0);
1712}
1713
1714static int
1715copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
1716 socklen_t *controllen, int *flags)
1717{
1718 int error = 0;
1719 ssize_t len;
1720 user_addr_t ctlbuf;
1721
1722 len = *controllen;
1723 *controllen = 0;
1724 ctlbuf = control;
1725
1726 while (m && len > 0) {
1727 unsigned int tocopy;
1728 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1729 int cp_size = CMSG_ALIGN(cp->cmsg_len);
1730 int buflen = m->m_len;
1731
1732 while (buflen > 0 && len > 0) {
1733 /*
1734 * SCM_TIMESTAMP hack because struct timeval has a
1735 * different size for 32 bits and 64 bits processes
1736 */
1737 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1738 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
1739 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1740 int tmp_space;
1741 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1742
1743 tmp_cp->cmsg_level = SOL_SOCKET;
1744 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1745
1746 if (proc_is64bit(p)) {
1747 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1748
1749 tv64->tv_sec = tv->tv_sec;
1750 tv64->tv_usec = tv->tv_usec;
1751
1752 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1753 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1754 } else {
1755 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1756
1757 tv32->tv_sec = tv->tv_sec;
1758 tv32->tv_usec = tv->tv_usec;
1759
1760 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1761 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1762 }
1763 if (len >= tmp_space) {
1764 tocopy = tmp_space;
1765 } else {
1766 *flags |= MSG_CTRUNC;
1767 tocopy = len;
1768 }
1769 error = copyout(tmp_buffer, ctlbuf, tocopy);
1770 if (error)
1771 goto out;
1772 } else {
1773 if (cp_size > buflen) {
1774 panic("cp_size > buflen, something"
1775 "wrong with alignment!");
1776 }
1777 if (len >= cp_size) {
1778 tocopy = cp_size;
1779 } else {
1780 *flags |= MSG_CTRUNC;
1781 tocopy = len;
1782 }
1783 error = copyout((caddr_t) cp, ctlbuf, tocopy);
1784 if (error)
1785 goto out;
1786 }
1787
1788 ctlbuf += tocopy;
1789 len -= tocopy;
1790
1791 buflen -= cp_size;
1792 cp = (struct cmsghdr *)(void *)
1793 ((unsigned char *) cp + cp_size);
1794 cp_size = CMSG_ALIGN(cp->cmsg_len);
1795 }
1796
1797 m = m->m_next;
1798 }
1799 *controllen = ctlbuf - control;
1800out:
1801 return (error);
1802}
1803
1804/*
1805 * Returns: 0 Success
1806 * ENOTSOCK
1807 * EINVAL
1808 * EBADF
1809 * EACCES Mandatory Access Control failure
1810 * copyout:EFAULT
1811 * fp_lookup:EBADF
1812 * <pru_soreceive>:ENOBUFS
1813 * <pru_soreceive>:ENOTCONN
1814 * <pru_soreceive>:EWOULDBLOCK
1815 * <pru_soreceive>:EFAULT
1816 * <pru_soreceive>:EINTR
1817 * <pru_soreceive>:EBADF
1818 * <pru_soreceive>:EINVAL
1819 * <pru_soreceive>:EMSGSIZE
1820 * <pru_soreceive>:???
1821 *
1822 * Notes: Additional return values from calls through <pru_soreceive>
1823 * depend on protocols other than TCP or AF_UNIX, which are
1824 * documented above.
1825 */
1826static int
1827recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
1828 user_addr_t namelenp, int32_t *retval)
1829{
1830 ssize_t len;
1831 int error;
1832 struct mbuf *control = 0;
1833 struct socket *so;
1834 struct sockaddr *fromsa = 0;
1835 struct fileproc *fp;
1836
1837 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1838 proc_fdlock(p);
1839 if ((error = fp_lookup(p, s, &fp, 1))) {
1840 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1841 proc_fdunlock(p);
1842 return (error);
1843 }
1844 if (fp->f_type != DTYPE_SOCKET) {
1845 fp_drop(p, s, fp, 1);
1846 proc_fdunlock(p);
1847 return (ENOTSOCK);
1848 }
1849
1850 so = (struct socket *)fp->f_data;
1851 if (so == NULL) {
1852 fp_drop(p, s, fp, 1);
1853 proc_fdunlock(p);
1854 return (EBADF);
1855 }
1856
1857 proc_fdunlock(p);
1858
1859#if CONFIG_MACF_SOCKET_SUBSET
1860 /*
1861 * We check the state without holding the socket lock;
1862 * if a race condition occurs, it would simply result
1863 * in an extra call to the MAC check function.
1864 */
1865 if (!(so->so_state & SS_DEFUNCT) &&
1866 !(so->so_state & SS_ISCONNECTED) &&
1867 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1868 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
1869 goto out1;
1870#endif /* MAC_SOCKET_SUBSET */
1871 if (uio_resid(uiop) < 0) {
1872 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
1873 error = EINVAL;
1874 goto out1;
1875 }
1876
1877 len = uio_resid(uiop);
1878 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1879 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1880 &mp->msg_flags);
1881 if (fromsa)
1882 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1883 fromsa);
1884 if (error) {
1885 if (uio_resid(uiop) != len && (error == ERESTART ||
1886 error == EINTR || error == EWOULDBLOCK))
1887 error = 0;
1888 }
1889 if (error)
1890 goto out;
1891
1892 *retval = len - uio_resid(uiop);
1893
1894 if (mp->msg_name) {
1895 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
1896 if (error)
1897 goto out;
1898 /* return the actual, untruncated address length */
1899 if (namelenp &&
1900 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
1901 sizeof (int)))) {
1902 goto out;
1903 }
1904 }
1905
1906 if (mp->msg_control) {
1907 error = copyout_control(p, control, mp->msg_control,
1908 &mp->msg_controllen, &mp->msg_flags);
1909 }
1910out:
1911 if (fromsa)
1912 FREE(fromsa, M_SONAME);
1913 if (control)
1914 m_freem(control);
1915 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1916out1:
1917 fp_drop(p, s, fp, 0);
1918 return (error);
1919}
1920
1921/*
1922 * Returns: 0 Success
1923 * ENOMEM
1924 * copyin:EFAULT
1925 * recvit:???
1926 * read:??? [4056224: applicable for pipes]
1927 *
1928 * Notes: The read entry point is only called as part of support for
1929 * binary backward compatability; new code should use read
1930 * instead of recv or recvfrom when attempting to read data
1931 * from pipes.
1932 *
1933 * For full documentation of the return codes from recvit, see
1934 * the block header for the recvit function.
1935 */
1936int
1937recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
1938{
1939 __pthread_testcancel(1);
1940 return (recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
1941 retval));
1942}
1943
1944int
1945recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
1946 int32_t *retval)
1947{
1948 struct user_msghdr msg;
1949 int error;
1950 uio_t auio = NULL;
1951
1952 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
1953 AUDIT_ARG(fd, uap->s);
1954
1955 if (uap->fromlenaddr) {
1956 error = copyin(uap->fromlenaddr,
1957 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1958 if (error)
1959 return (error);
1960 } else {
1961 msg.msg_namelen = 0;
1962 }
1963 msg.msg_name = uap->from;
1964 auio = uio_create(1, 0,
1965 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1966 UIO_READ);
1967 if (auio == NULL) {
1968 return (ENOMEM);
1969 }
1970
1971 uio_addiov(auio, uap->buf, uap->len);
1972 /* no need to set up msg_iov. recvit uses uio_t we send it */
1973 msg.msg_iov = 0;
1974 msg.msg_iovlen = 0;
1975 msg.msg_control = 0;
1976 msg.msg_controllen = 0;
1977 msg.msg_flags = uap->flags;
1978 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
1979 if (auio != NULL) {
1980 uio_free(auio);
1981 }
1982
1983 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
1984
1985 return (error);
1986}
1987
1988/*
1989 * Returns: 0 Success
1990 * EMSGSIZE
1991 * ENOMEM
1992 * copyin:EFAULT
1993 * copyout:EFAULT
1994 * recvit:???
1995 *
1996 * Notes: For full documentation of the return codes from recvit, see
1997 * the block header for the recvit function.
1998 */
1999int
2000recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
2001{
2002 __pthread_testcancel(1);
2003 return (recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2004 retval));
2005}
2006
2007int
2008recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
2009 int32_t *retval)
2010{
2011 struct user32_msghdr msg32;
2012 struct user64_msghdr msg64;
2013 struct user_msghdr user_msg;
2014 caddr_t msghdrp;
2015 int size_of_msghdr;
2016 user_addr_t uiov;
2017 int error;
2018 uio_t auio = NULL;
2019 struct user_iovec *iovp;
2020
2021 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2022 AUDIT_ARG(fd, uap->s);
2023 if (IS_64BIT_PROCESS(p)) {
2024 msghdrp = (caddr_t)&msg64;
2025 size_of_msghdr = sizeof (msg64);
2026 } else {
2027 msghdrp = (caddr_t)&msg32;
2028 size_of_msghdr = sizeof (msg32);
2029 }
2030 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2031 if (error) {
2032 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2033 return (error);
2034 }
2035
2036 /* only need to copy if user process is not 64-bit */
2037 if (IS_64BIT_PROCESS(p)) {
2038 user_msg.msg_flags = msg64.msg_flags;
2039 user_msg.msg_controllen = msg64.msg_controllen;
2040 user_msg.msg_control = msg64.msg_control;
2041 user_msg.msg_iovlen = msg64.msg_iovlen;
2042 user_msg.msg_iov = msg64.msg_iov;
2043 user_msg.msg_namelen = msg64.msg_namelen;
2044 user_msg.msg_name = msg64.msg_name;
2045 } else {
2046 user_msg.msg_flags = msg32.msg_flags;
2047 user_msg.msg_controllen = msg32.msg_controllen;
2048 user_msg.msg_control = msg32.msg_control;
2049 user_msg.msg_iovlen = msg32.msg_iovlen;
2050 user_msg.msg_iov = msg32.msg_iov;
2051 user_msg.msg_namelen = msg32.msg_namelen;
2052 user_msg.msg_name = msg32.msg_name;
2053 }
2054
2055 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2056 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2057 0, 0, 0, 0);
2058 return (EMSGSIZE);
2059 }
2060
2061 user_msg.msg_flags = uap->flags;
2062
2063 /* allocate a uio large enough to hold the number of iovecs passed */
2064 auio = uio_create(user_msg.msg_iovlen, 0,
2065 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2066 UIO_READ);
2067 if (auio == NULL) {
2068 error = ENOMEM;
2069 goto done;
2070 }
2071
2072 /*
2073 * get location of iovecs within the uio. then copyin the iovecs from
2074 * user space.
2075 */
2076 iovp = uio_iovsaddr(auio);
2077 if (iovp == NULL) {
2078 error = ENOMEM;
2079 goto done;
2080 }
2081 uiov = user_msg.msg_iov;
2082 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2083 error = copyin_user_iovec_array(uiov,
2084 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2085 user_msg.msg_iovlen, iovp);
2086 if (error)
2087 goto done;
2088
2089 /* finish setup of uio_t */
2090 error = uio_calculateresid(auio);
2091 if (error) {
2092 goto done;
2093 }
2094
2095 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2096 if (!error) {
2097 user_msg.msg_iov = uiov;
2098 if (IS_64BIT_PROCESS(p)) {
2099 msg64.msg_flags = user_msg.msg_flags;
2100 msg64.msg_controllen = user_msg.msg_controllen;
2101 msg64.msg_control = user_msg.msg_control;
2102 msg64.msg_iovlen = user_msg.msg_iovlen;
2103 msg64.msg_iov = user_msg.msg_iov;
2104 msg64.msg_namelen = user_msg.msg_namelen;
2105 msg64.msg_name = user_msg.msg_name;
2106 } else {
2107 msg32.msg_flags = user_msg.msg_flags;
2108 msg32.msg_controllen = user_msg.msg_controllen;
2109 msg32.msg_control = user_msg.msg_control;
2110 msg32.msg_iovlen = user_msg.msg_iovlen;
2111 msg32.msg_iov = user_msg.msg_iov;
2112 msg32.msg_namelen = user_msg.msg_namelen;
2113 msg32.msg_name = user_msg.msg_name;
2114 }
2115 error = copyout(msghdrp, uap->msg, size_of_msghdr);
2116 }
2117done:
2118 if (auio != NULL) {
2119 uio_free(auio);
2120 }
2121 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2122 return (error);
2123}
2124
2125int
2126recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2127{
2128 int error = EOPNOTSUPP;
2129 struct user_msghdr_x *user_msg_x = NULL;
2130 struct recv_msg_elem *recv_msg_array = NULL;
2131 struct socket *so;
2132 user_ssize_t len_before = 0, len_after;
2133 int need_drop = 0;
2134 size_t size_of_msghdr;
2135 void *umsgp = NULL;
2136 u_int i;
2137 u_int uiocnt;
2138
2139 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2140
2141 error = file_socket(uap->s, &so);
2142 if (error) {
2143 goto out;
2144 }
2145 need_drop = 1;
2146 if (so == NULL) {
2147 error = EBADF;
2148 goto out;
2149 }
2150 /*
2151 * Input parameter range check
2152 */
2153 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2154 error = EINVAL;
2155 goto out;
2156 }
2157 if (uap->cnt > somaxrecvmsgx)
2158 uap->cnt = somaxrecvmsgx;
2159
2160 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
2161 M_TEMP, M_WAITOK | M_ZERO);
2162 if (user_msg_x == NULL) {
2163 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
2164 error = ENOMEM;
2165 goto out;
2166 }
2167 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2168 if (recv_msg_array == NULL) {
2169 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
2170 error = ENOMEM;
2171 goto out;
2172 }
2173 size_of_msghdr = IS_64BIT_PROCESS(p) ?
2174 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2175
2176 umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO);
2177 if (umsgp == NULL) {
2178 DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__);
2179 error = ENOMEM;
2180 goto out;
2181 }
2182 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2183 if (error) {
2184 DBG_PRINTF("%s copyin() failed\n", __func__);
2185 goto out;
2186 }
2187 error = internalize_recv_msghdr_array(umsgp,
2188 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2189 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2190 if (error) {
2191 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
2192 goto out;
2193 }
2194 /*
2195 * Make sure the size of each message iovec and
2196 * the aggregate size of all the iovec is valid
2197 */
2198 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2199 error = EINVAL;
2200 goto out;
2201 }
2202 /*
2203 * Sanity check on passed arguments
2204 */
2205 for (i = 0; i < uap->cnt; i++) {
2206 struct user_msghdr_x *mp = user_msg_x + i;
2207
2208 if (mp->msg_flags != 0) {
2209 error = EINVAL;
2210 goto out;
2211 }
2212 }
2213#if CONFIG_MACF_SOCKET_SUBSET
2214 /*
2215 * We check the state without holding the socket lock;
2216 * if a race condition occurs, it would simply result
2217 * in an extra call to the MAC check function.
2218 */
2219 if (!(so->so_state & SS_DEFUNCT) &&
2220 !(so->so_state & SS_ISCONNECTED) &&
2221 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2222 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
2223 goto out;
2224#endif /* MAC_SOCKET_SUBSET */
2225
2226 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2227
2228 if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2229 pru_soreceive_list_notsupp &&
2230 somaxrecvmsgx == 0) {
2231 error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2232 recv_msg_array, uap->cnt, &uap->flags);
2233 } else {
2234 int flags = uap->flags;
2235
2236 for (i = 0; i < uap->cnt; i++) {
2237 struct recv_msg_elem *recv_msg_elem;
2238 uio_t auio;
2239 struct sockaddr **psa;
2240 struct mbuf **controlp;
2241
2242 recv_msg_elem = recv_msg_array + i;
2243 auio = recv_msg_elem->uio;
2244
2245 /*
2246 * Do not block if we got at least one packet
2247 */
2248 if (i > 0)
2249 flags |= MSG_DONTWAIT;
2250
2251 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2252 &recv_msg_elem->psa : NULL;
2253 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2254 &recv_msg_elem->controlp : NULL;
2255
2256 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2257 auio, (struct mbuf **)0, controlp, &flags);
2258 if (error)
2259 break;
2260 /*
2261 * We have some data
2262 */
2263 recv_msg_elem->which |= SOCK_MSG_DATA;
2264 /*
2265 * Stop on partial copy
2266 */
2267 if (flags & (MSG_RCVMORE | MSG_TRUNC))
2268 break;
2269 }
2270 if ((uap->flags & MSG_DONTWAIT) == 0)
2271 flags &= ~MSG_DONTWAIT;
2272 uap->flags = flags;
2273 }
2274
2275 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2276
2277 if (error) {
2278 if (len_after != len_before && (error == ERESTART ||
2279 error == EINTR || error == EWOULDBLOCK))
2280 error = 0;
2281 else
2282 goto out;
2283 }
2284
2285 uiocnt = externalize_recv_msghdr_array(umsgp,
2286 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2287 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2288
2289 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2290 if (error) {
2291 DBG_PRINTF("%s copyout() failed\n", __func__);
2292 goto out;
2293 }
2294 *retval = (int)(uiocnt);
2295
2296 for (i = 0; i < uap->cnt; i++) {
2297 struct user_msghdr_x *mp = user_msg_x + i;
2298 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2299 struct sockaddr *fromsa = recv_msg_elem->psa;
2300
2301 if (mp->msg_name) {
2302 error = copyout_sa(fromsa, mp->msg_name,
2303 &mp->msg_namelen);
2304 if (error)
2305 goto out;
2306 }
2307 if (mp->msg_control) {
2308 error = copyout_control(p, recv_msg_elem->controlp,
2309 mp->msg_control, &mp->msg_controllen,
2310 &mp->msg_flags);
2311 if (error)
2312 goto out;
2313 }
2314 }
2315out:
2316 if (need_drop)
2317 file_drop(uap->s);
2318 if (umsgp != NULL)
2319 _FREE(umsgp, M_TEMP);
2320 if (recv_msg_array != NULL)
2321 free_recv_msg_array(recv_msg_array, uap->cnt);
2322 if (user_msg_x != NULL)
2323 _FREE(user_msg_x, M_TEMP);
2324
2325 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2326
2327 return (error);
2328}
2329
2330/*
2331 * Returns: 0 Success
2332 * EBADF
2333 * file_socket:ENOTSOCK
2334 * file_socket:EBADF
2335 * soshutdown:EINVAL
2336 * soshutdown:ENOTCONN
2337 * soshutdown:EADDRNOTAVAIL[TCP]
2338 * soshutdown:ENOBUFS[TCP]
2339 * soshutdown:EMSGSIZE[TCP]
2340 * soshutdown:EHOSTUNREACH[TCP]
2341 * soshutdown:ENETUNREACH[TCP]
2342 * soshutdown:ENETDOWN[TCP]
2343 * soshutdown:ENOMEM[TCP]
2344 * soshutdown:EACCES[TCP]
2345 * soshutdown:EMSGSIZE[TCP]
2346 * soshutdown:ENOBUFS[TCP]
2347 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2348 * soshutdown:??? [other protocol families]
2349 */
2350/* ARGSUSED */
2351int
2352shutdown(__unused struct proc *p, struct shutdown_args *uap,
2353 __unused int32_t *retval)
2354{
2355 struct socket *so;
2356 int error;
2357
2358 AUDIT_ARG(fd, uap->s);
2359 error = file_socket(uap->s, &so);
2360 if (error)
2361 return (error);
2362 if (so == NULL) {
2363 error = EBADF;
2364 goto out;
2365 }
2366 error = soshutdown((struct socket *)so, uap->how);
2367out:
2368 file_drop(uap->s);
2369 return (error);
2370}
2371
2372/*
2373 * Returns: 0 Success
2374 * EFAULT
2375 * EINVAL
2376 * EACCES Mandatory Access Control failure
2377 * file_socket:ENOTSOCK
2378 * file_socket:EBADF
2379 * sosetopt:EINVAL
2380 * sosetopt:ENOPROTOOPT
2381 * sosetopt:ENOBUFS
2382 * sosetopt:EDOM
2383 * sosetopt:EFAULT
2384 * sosetopt:EOPNOTSUPP[AF_UNIX]
2385 * sosetopt:???
2386 */
2387/* ARGSUSED */
2388int
2389setsockopt(struct proc *p, struct setsockopt_args *uap,
2390 __unused int32_t *retval)
2391{
2392 struct socket *so;
2393 struct sockopt sopt;
2394 int error;
2395
2396 AUDIT_ARG(fd, uap->s);
2397 if (uap->val == 0 && uap->valsize != 0)
2398 return (EFAULT);
2399 /* No bounds checking on size (it's unsigned) */
2400
2401 error = file_socket(uap->s, &so);
2402 if (error)
2403 return (error);
2404
2405 sopt.sopt_dir = SOPT_SET;
2406 sopt.sopt_level = uap->level;
2407 sopt.sopt_name = uap->name;
2408 sopt.sopt_val = uap->val;
2409 sopt.sopt_valsize = uap->valsize;
2410 sopt.sopt_p = p;
2411
2412 if (so == NULL) {
2413 error = EINVAL;
2414 goto out;
2415 }
2416#if CONFIG_MACF_SOCKET_SUBSET
2417 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
2418 &sopt)) != 0)
2419 goto out;
2420#endif /* MAC_SOCKET_SUBSET */
2421 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
2422out:
2423 file_drop(uap->s);
2424 return (error);
2425}
2426
2427
2428
2429/*
2430 * Returns: 0 Success
2431 * EINVAL
2432 * EBADF
2433 * EACCES Mandatory Access Control failure
2434 * copyin:EFAULT
2435 * copyout:EFAULT
2436 * file_socket:ENOTSOCK
2437 * file_socket:EBADF
2438 * sogetopt:???
2439 */
2440int
2441getsockopt(struct proc *p, struct getsockopt_args *uap,
2442 __unused int32_t *retval)
2443{
2444 int error;
2445 socklen_t valsize;
2446 struct sockopt sopt;
2447 struct socket *so;
2448
2449 error = file_socket(uap->s, &so);
2450 if (error)
2451 return (error);
2452 if (uap->val) {
2453 error = copyin(uap->avalsize, (caddr_t)&valsize,
2454 sizeof (valsize));
2455 if (error)
2456 goto out;
2457 /* No bounds checking on size (it's unsigned) */
2458 } else {
2459 valsize = 0;
2460 }
2461 sopt.sopt_dir = SOPT_GET;
2462 sopt.sopt_level = uap->level;
2463 sopt.sopt_name = uap->name;
2464 sopt.sopt_val = uap->val;
2465 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2466 sopt.sopt_p = p;
2467
2468 if (so == NULL) {
2469 error = EBADF;
2470 goto out;
2471 }
2472#if CONFIG_MACF_SOCKET_SUBSET
2473 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
2474 &sopt)) != 0)
2475 goto out;
2476#endif /* MAC_SOCKET_SUBSET */
2477 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
2478 if (error == 0) {
2479 valsize = sopt.sopt_valsize;
2480 error = copyout((caddr_t)&valsize, uap->avalsize,
2481 sizeof (valsize));
2482 }
2483out:
2484 file_drop(uap->s);
2485 return (error);
2486}
2487
2488
2489/*
2490 * Get socket name.
2491 *
2492 * Returns: 0 Success
2493 * EBADF
2494 * file_socket:ENOTSOCK
2495 * file_socket:EBADF
2496 * copyin:EFAULT
2497 * copyout:EFAULT
2498 * <pru_sockaddr>:ENOBUFS[TCP]
2499 * <pru_sockaddr>:ECONNRESET[TCP]
2500 * <pru_sockaddr>:EINVAL[AF_UNIX]
2501 * <sf_getsockname>:???
2502 */
2503/* ARGSUSED */
2504int
2505getsockname(__unused struct proc *p, struct getsockname_args *uap,
2506 __unused int32_t *retval)
2507{
2508 struct socket *so;
2509 struct sockaddr *sa;
2510 socklen_t len;
2511 socklen_t sa_len;
2512 int error;
2513
2514 error = file_socket(uap->fdes, &so);
2515 if (error)
2516 return (error);
2517 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
2518 if (error)
2519 goto out;
2520 if (so == NULL) {
2521 error = EBADF;
2522 goto out;
2523 }
2524 sa = 0;
2525 socket_lock(so, 1);
2526 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2527 if (error == 0) {
2528 error = sflt_getsockname(so, &sa);
2529 if (error == EJUSTRETURN)
2530 error = 0;
2531 }
2532 socket_unlock(so, 1);
2533 if (error)
2534 goto bad;
2535 if (sa == 0) {
2536 len = 0;
2537 goto gotnothing;
2538 }
2539
2540 sa_len = sa->sa_len;
2541 len = MIN(len, sa_len);
2542 error = copyout((caddr_t)sa, uap->asa, len);
2543 if (error)
2544 goto bad;
2545 /* return the actual, untruncated address length */
2546 len = sa_len;
2547gotnothing:
2548 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
2549bad:
2550 if (sa)
2551 FREE(sa, M_SONAME);
2552out:
2553 file_drop(uap->fdes);
2554 return (error);
2555}
2556
2557/*
2558 * Get name of peer for connected socket.
2559 *
2560 * Returns: 0 Success
2561 * EBADF
2562 * EINVAL
2563 * ENOTCONN
2564 * file_socket:ENOTSOCK
2565 * file_socket:EBADF
2566 * copyin:EFAULT
2567 * copyout:EFAULT
2568 * <pru_peeraddr>:???
2569 * <sf_getpeername>:???
2570 */
2571/* ARGSUSED */
2572int
2573getpeername(__unused struct proc *p, struct getpeername_args *uap,
2574 __unused int32_t *retval)
2575{
2576 struct socket *so;
2577 struct sockaddr *sa;
2578 socklen_t len;
2579 socklen_t sa_len;
2580 int error;
2581
2582 error = file_socket(uap->fdes, &so);
2583 if (error)
2584 return (error);
2585 if (so == NULL) {
2586 error = EBADF;
2587 goto out;
2588 }
2589
2590 socket_lock(so, 1);
2591
2592 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2593 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2594 /* the socket has been shutdown, no more getpeername's */
2595 socket_unlock(so, 1);
2596 error = EINVAL;
2597 goto out;
2598 }
2599
2600 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
2601 socket_unlock(so, 1);
2602 error = ENOTCONN;
2603 goto out;
2604 }
2605 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
2606 if (error) {
2607 socket_unlock(so, 1);
2608 goto out;
2609 }
2610 sa = 0;
2611 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2612 if (error == 0) {
2613 error = sflt_getpeername(so, &sa);
2614 if (error == EJUSTRETURN)
2615 error = 0;
2616 }
2617 socket_unlock(so, 1);
2618 if (error)
2619 goto bad;
2620 if (sa == 0) {
2621 len = 0;
2622 goto gotnothing;
2623 }
2624 sa_len = sa->sa_len;
2625 len = MIN(len, sa_len);
2626 error = copyout(sa, uap->asa, len);
2627 if (error)
2628 goto bad;
2629 /* return the actual, untruncated address length */
2630 len = sa_len;
2631gotnothing:
2632 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
2633bad:
2634 if (sa) FREE(sa, M_SONAME);
2635out:
2636 file_drop(uap->fdes);
2637 return (error);
2638}
2639
2640int
2641sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
2642{
2643 struct sockaddr *sa;
2644 struct mbuf *m;
2645 int error;
2646
2647 size_t alloc_buflen = (size_t)buflen;
2648
2649 if (alloc_buflen > INT_MAX/2)
2650 return (EINVAL);
2651#ifdef __LP64__
2652 /*
2653 * The fd's in the buffer must expand to be pointers, thus we need twice
2654 * as much space
2655 */
2656 if (type == MT_CONTROL)
2657 alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) +
2658 sizeof(struct cmsghdr);
2659#endif
2660 if (alloc_buflen > MLEN) {
2661 if (type == MT_SONAME && alloc_buflen <= 112)
2662 alloc_buflen = MLEN; /* unix domain compat. hack */
2663 else if (alloc_buflen > MCLBYTES)
2664 return (EINVAL);
2665 }
2666 m = m_get(M_WAIT, type);
2667 if (m == NULL)
2668 return (ENOBUFS);
2669 if (alloc_buflen > MLEN) {
2670 MCLGET(m, M_WAIT);
2671 if ((m->m_flags & M_EXT) == 0) {
2672 m_free(m);
2673 return (ENOBUFS);
2674 }
2675 }
2676 /*
2677 * K64: We still copyin the original buflen because it gets expanded
2678 * later and we lie about the size of the mbuf because it only affects
2679 * unp_* functions
2680 */
2681 m->m_len = buflen;
2682 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2683 if (error) {
2684 (void) m_free(m);
2685 } else {
2686 *mp = m;
2687 if (type == MT_SONAME) {
2688 sa = mtod(m, struct sockaddr *);
2689 sa->sa_len = buflen;
2690 }
2691 }
2692 return (error);
2693}
2694
2695/*
2696 * Given a user_addr_t of length len, allocate and fill out a *sa.
2697 *
2698 * Returns: 0 Success
2699 * ENAMETOOLONG Filename too long
2700 * EINVAL Invalid argument
2701 * ENOMEM Not enough space
2702 * copyin:EFAULT Bad address
2703 */
2704static int
2705getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
2706 size_t len, boolean_t translate_unspec)
2707{
2708 struct sockaddr *sa;
2709 int error;
2710
2711 if (len > SOCK_MAXADDRLEN)
2712 return (ENAMETOOLONG);
2713
2714 if (len < offsetof(struct sockaddr, sa_data[0]))
2715 return (EINVAL);
2716
2717 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
2718 if (sa == NULL) {
2719 return (ENOMEM);
2720 }
2721 error = copyin(uaddr, (caddr_t)sa, len);
2722 if (error) {
2723 FREE(sa, M_SONAME);
2724 } else {
2725 /*
2726 * Force sa_family to AF_INET on AF_INET sockets to handle
2727 * legacy applications that use AF_UNSPEC (0). On all other
2728 * sockets we leave it unchanged and let the lower layer
2729 * handle it.
2730 */
2731 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
2732 SOCK_CHECK_DOM(so, PF_INET) &&
2733 len == sizeof (struct sockaddr_in))
2734 sa->sa_family = AF_INET;
2735
2736 sa->sa_len = len;
2737 *namp = sa;
2738 }
2739 return (error);
2740}
2741
2742static int
2743getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
2744 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
2745{
2746 int error;
2747
2748 if (ss == NULL || uaddr == USER_ADDR_NULL ||
2749 len < offsetof(struct sockaddr, sa_data[0]))
2750 return (EINVAL);
2751
2752 /*
2753 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2754 * so the check here is inclusive.
2755 */
2756 if (len > sizeof (*ss))
2757 return (ENAMETOOLONG);
2758
2759 bzero(ss, sizeof (*ss));
2760 error = copyin(uaddr, (caddr_t)ss, len);
2761 if (error == 0) {
2762 /*
2763 * Force sa_family to AF_INET on AF_INET sockets to handle
2764 * legacy applications that use AF_UNSPEC (0). On all other
2765 * sockets we leave it unchanged and let the lower layer
2766 * handle it.
2767 */
2768 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
2769 SOCK_CHECK_DOM(so, PF_INET) &&
2770 len == sizeof (struct sockaddr_in))
2771 ss->ss_family = AF_INET;
2772
2773 ss->ss_len = len;
2774 }
2775 return (error);
2776}
2777
2778int
2779internalize_user_msghdr_array(const void *src, int spacetype, int direction,
2780 u_int count, struct user_msghdr_x *dst, struct uio **uiop)
2781{
2782 int error = 0;
2783 u_int i;
2784 u_int namecnt = 0;
2785 u_int ctlcnt = 0;
2786
2787 for (i = 0; i < count; i++) {
2788 uio_t auio;
2789 struct user_iovec *iovp;
2790 struct user_msghdr_x *user_msg = dst + i;
2791
2792 if (spacetype == UIO_USERSPACE64) {
2793 const struct user64_msghdr_x *msghdr64;
2794
2795 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2796
2797 user_msg->msg_name = msghdr64->msg_name;
2798 user_msg->msg_namelen = msghdr64->msg_namelen;
2799 user_msg->msg_iov = msghdr64->msg_iov;
2800 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2801 user_msg->msg_control = msghdr64->msg_control;
2802 user_msg->msg_controllen = msghdr64->msg_controllen;
2803 user_msg->msg_flags = msghdr64->msg_flags;
2804 user_msg->msg_datalen = msghdr64->msg_datalen;
2805 } else {
2806 const struct user32_msghdr_x *msghdr32;
2807
2808 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2809
2810 user_msg->msg_name = msghdr32->msg_name;
2811 user_msg->msg_namelen = msghdr32->msg_namelen;
2812 user_msg->msg_iov = msghdr32->msg_iov;
2813 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2814 user_msg->msg_control = msghdr32->msg_control;
2815 user_msg->msg_controllen = msghdr32->msg_controllen;
2816 user_msg->msg_flags = msghdr32->msg_flags;
2817 user_msg->msg_datalen = msghdr32->msg_datalen;
2818 }
2819
2820 if (user_msg->msg_iovlen <= 0 ||
2821 user_msg->msg_iovlen > UIO_MAXIOV) {
2822 error = EMSGSIZE;
2823 goto done;
2824 }
2825 auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2826 direction);
2827 if (auio == NULL) {
2828 error = ENOMEM;
2829 goto done;
2830 }
2831 uiop[i] = auio;
2832
2833 iovp = uio_iovsaddr(auio);
2834 if (iovp == NULL) {
2835 error = ENOMEM;
2836 goto done;
2837 }
2838 error = copyin_user_iovec_array(user_msg->msg_iov,
2839 spacetype, user_msg->msg_iovlen, iovp);
2840 if (error)
2841 goto done;
2842 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
2843
2844 error = uio_calculateresid(auio);
2845 if (error)
2846 goto done;
2847 user_msg->msg_datalen = uio_resid(auio);
2848
2849 if (user_msg->msg_name && user_msg->msg_namelen)
2850 namecnt++;
2851 if (user_msg->msg_control && user_msg->msg_controllen)
2852 ctlcnt++;
2853 }
2854done:
2855
2856 return (error);
2857}
2858
2859int
2860internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
2861 u_int count, struct user_msghdr_x *dst,
2862 struct recv_msg_elem *recv_msg_array)
2863{
2864 int error = 0;
2865 u_int i;
2866
2867 for (i = 0; i < count; i++) {
2868 struct user_iovec *iovp;
2869 struct user_msghdr_x *user_msg = dst + i;
2870 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2871
2872 if (spacetype == UIO_USERSPACE64) {
2873 const struct user64_msghdr_x *msghdr64;
2874
2875 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2876
2877 user_msg->msg_name = msghdr64->msg_name;
2878 user_msg->msg_namelen = msghdr64->msg_namelen;
2879 user_msg->msg_iov = msghdr64->msg_iov;
2880 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2881 user_msg->msg_control = msghdr64->msg_control;
2882 user_msg->msg_controllen = msghdr64->msg_controllen;
2883 user_msg->msg_flags = msghdr64->msg_flags;
2884 user_msg->msg_datalen = msghdr64->msg_datalen;
2885 } else {
2886 const struct user32_msghdr_x *msghdr32;
2887
2888 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2889
2890 user_msg->msg_name = msghdr32->msg_name;
2891 user_msg->msg_namelen = msghdr32->msg_namelen;
2892 user_msg->msg_iov = msghdr32->msg_iov;
2893 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2894 user_msg->msg_control = msghdr32->msg_control;
2895 user_msg->msg_controllen = msghdr32->msg_controllen;
2896 user_msg->msg_flags = msghdr32->msg_flags;
2897 user_msg->msg_datalen = msghdr32->msg_datalen;
2898 }
2899
2900 if (user_msg->msg_iovlen <= 0 ||
2901 user_msg->msg_iovlen > UIO_MAXIOV) {
2902 error = EMSGSIZE;
2903 goto done;
2904 }
2905 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
2906 spacetype, direction);
2907 if (recv_msg_elem->uio == NULL) {
2908 error = ENOMEM;
2909 goto done;
2910 }
2911
2912 iovp = uio_iovsaddr(recv_msg_elem->uio);
2913 if (iovp == NULL) {
2914 error = ENOMEM;
2915 goto done;
2916 }
2917 error = copyin_user_iovec_array(user_msg->msg_iov,
2918 spacetype, user_msg->msg_iovlen, iovp);
2919 if (error)
2920 goto done;
2921 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
2922
2923 error = uio_calculateresid(recv_msg_elem->uio);
2924 if (error)
2925 goto done;
2926 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
2927
2928 if (user_msg->msg_name && user_msg->msg_namelen)
2929 recv_msg_elem->which |= SOCK_MSG_SA;
2930 if (user_msg->msg_control && user_msg->msg_controllen)
2931 recv_msg_elem->which |= SOCK_MSG_CONTROL;
2932 }
2933done:
2934
2935 return (error);
2936}
2937
2938u_int
2939externalize_user_msghdr_array(void *dst, int spacetype, int direction,
2940 u_int count, const struct user_msghdr_x *src, struct uio **uiop)
2941{
2942#pragma unused(direction)
2943 u_int i;
2944 int seenlast = 0;
2945 u_int retcnt = 0;
2946
2947 for (i = 0; i < count; i++) {
2948 const struct user_msghdr_x *user_msg = src + i;
2949 uio_t auio = uiop[i];
2950 user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
2951
2952 if (user_msg->msg_datalen != 0 && len == 0)
2953 seenlast = 1;
2954
2955 if (seenlast == 0)
2956 retcnt ++;
2957
2958 if (spacetype == UIO_USERSPACE64) {
2959 struct user64_msghdr_x *msghdr64;
2960
2961 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
2962
2963 msghdr64->msg_flags = user_msg->msg_flags;
2964 msghdr64->msg_datalen = len;
2965
2966 } else {
2967 struct user32_msghdr_x *msghdr32;
2968
2969 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
2970
2971 msghdr32->msg_flags = user_msg->msg_flags;
2972 msghdr32->msg_datalen = len;
2973 }
2974 }
2975 return (retcnt);
2976}
2977
2978u_int
2979externalize_recv_msghdr_array(void *dst, int spacetype, int direction,
2980 u_int count, const struct user_msghdr_x *src,
2981 struct recv_msg_elem *recv_msg_array)
2982{
2983 u_int i;
2984 int seenlast = 0;
2985 u_int retcnt = 0;
2986
2987 for (i = 0; i < count; i++) {
2988 const struct user_msghdr_x *user_msg = src + i;
2989 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2990 user_ssize_t len;
2991
2992 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
2993
2994 if (direction == UIO_READ) {
2995 if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0)
2996 seenlast = 1;
2997 } else {
2998 if (user_msg->msg_datalen != 0 && len == 0)
2999 seenlast = 1;
3000 }
3001
3002 if (seenlast == 0)
3003 retcnt ++;
3004
3005 if (spacetype == UIO_USERSPACE64) {
3006 struct user64_msghdr_x *msghdr64;
3007
3008 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3009
3010 msghdr64->msg_flags = user_msg->msg_flags;
3011 msghdr64->msg_datalen = len;
3012
3013 } else {
3014 struct user32_msghdr_x *msghdr32;
3015
3016 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3017
3018 msghdr32->msg_flags = user_msg->msg_flags;
3019 msghdr32->msg_datalen = len;
3020 }
3021 }
3022 return (retcnt);
3023}
3024
3025void
3026free_uio_array(struct uio **uiop, u_int count)
3027{
3028 u_int i;
3029
3030 for (i = 0; i < count; i++) {
3031 if (uiop[i] != NULL)
3032 uio_free(uiop[i]);
3033 }
3034}
3035
3036__private_extern__ user_ssize_t
3037uio_array_resid(struct uio **uiop, u_int count)
3038{
3039 user_ssize_t len = 0;
3040 u_int i;
3041
3042 for (i = 0; i < count; i++) {
3043 struct uio *auio = uiop[i];
3044
3045 if (auio != NULL)
3046 len += uio_resid(auio);
3047 }
3048 return (len);
3049}
3050
3051int
3052uio_array_is_valid(struct uio **uiop, u_int count)
3053{
3054 user_ssize_t len = 0;
3055 u_int i;
3056
3057 for (i = 0; i < count; i++) {
3058 struct uio *auio = uiop[i];
3059
3060 if (auio != NULL) {
3061 user_ssize_t resid = uio_resid(auio);
3062
3063 /*
3064 * Sanity check on the validity of the iovec:
3065 * no point of going over sb_max
3066 */
3067 if (resid < 0 || (u_int32_t)resid > sb_max)
3068 return (0);
3069
3070 len += resid;
3071 if (len < 0 || (u_int32_t)len > sb_max)
3072 return (0);
3073 }
3074 }
3075 return (1);
3076}
3077
3078
3079struct recv_msg_elem *
3080alloc_recv_msg_array(u_int count)
3081{
3082 struct recv_msg_elem *recv_msg_array;
3083
3084 recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem),
3085 M_TEMP, M_WAITOK | M_ZERO);
3086
3087 return (recv_msg_array);
3088}
3089
3090void
3091free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
3092{
3093 u_int i;
3094
3095 for (i = 0; i < count; i++) {
3096 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3097
3098 if (recv_msg_elem->uio != NULL)
3099 uio_free(recv_msg_elem->uio);
3100 if (recv_msg_elem->psa != NULL)
3101 _FREE(recv_msg_elem->psa, M_TEMP);
3102 if (recv_msg_elem->controlp != NULL)
3103 m_freem(recv_msg_elem->controlp);
3104 }
3105 _FREE(recv_msg_array, M_TEMP);
3106}
3107
3108
3109__private_extern__ user_ssize_t
3110recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
3111{
3112 user_ssize_t len = 0;
3113 u_int i;
3114
3115 for (i = 0; i < count; i++) {
3116 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3117
3118 if (recv_msg_elem->uio != NULL)
3119 len += uio_resid(recv_msg_elem->uio);
3120 }
3121 return (len);
3122}
3123
3124int
3125recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
3126{
3127 user_ssize_t len = 0;
3128 u_int i;
3129
3130 for (i = 0; i < count; i++) {
3131 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3132
3133 if (recv_msg_elem->uio != NULL) {
3134 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3135
3136 /*
3137 * Sanity check on the validity of the iovec:
3138 * no point of going over sb_max
3139 */
3140 if (resid < 0 || (u_int32_t)resid > sb_max)
3141 return (0);
3142
3143 len += resid;
3144 if (len < 0 || (u_int32_t)len > sb_max)
3145 return (0);
3146 }
3147 }
3148 return (1);
3149}
3150
3151#if SENDFILE
3152
3153#define SFUIOBUFS 64
3154
3155/* Macros to compute the number of mbufs needed depending on cluster size */
3156#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3157#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3158
3159/* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3160#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
3161
3162/* Upper send limit in the number of mbuf clusters */
3163#define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3164#define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3165
3166static void
3167alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3168 struct mbuf **m, boolean_t jumbocl)
3169{
3170 unsigned int needed;
3171
3172 if (pktlen == 0)
3173 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
3174
3175 /*
3176 * Try to allocate for the whole thing. Since we want full control
3177 * over the buffer size and be able to accept partial result, we can't
3178 * use mbuf_allocpacket(). The logic below is similar to sosend().
3179 */
3180 *m = NULL;
3181 if (pktlen > MBIGCLBYTES && jumbocl) {
3182 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3183 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3184 }
3185 if (*m == NULL) {
3186 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
3187 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
3188 }
3189
3190 /*
3191 * Our previous attempt(s) at allocation had failed; the system
3192 * may be short on mbufs, and we want to block until they are
3193 * available. This time, ask just for 1 mbuf and don't return
3194 * until we get it.
3195 */
3196 if (*m == NULL) {
3197 needed = 1;
3198 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
3199 }
3200 if (*m == NULL)
3201 panic("%s: blocking allocation returned NULL\n", __func__);
3202
3203 *maxchunks = needed;
3204}
3205
3206/*
3207 * sendfile(2).
3208 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3209 * struct sf_hdtr *hdtr, int flags)
3210 *
3211 * Send a file specified by 'fd' and starting at 'offset' to a socket
3212 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3213 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3214 * output. If specified, write the total number of bytes sent into *nbytes.
3215 */
3216int
3217sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
3218{
3219 struct fileproc *fp;
3220 struct vnode *vp;
3221 struct socket *so;
3222 struct writev_nocancel_args nuap;
3223 user_ssize_t writev_retval;
3224 struct user_sf_hdtr user_hdtr;
3225 struct user32_sf_hdtr user32_hdtr;
3226 struct user64_sf_hdtr user64_hdtr;
3227 off_t off, xfsize;
3228 off_t nbytes = 0, sbytes = 0;
3229 int error = 0;
3230 size_t sizeof_hdtr;
3231 off_t file_size;
3232 struct vfs_context context = *vfs_context_current();
3233
3234 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3235 0, 0, 0, 0);
3236
3237 AUDIT_ARG(fd, uap->fd);
3238 AUDIT_ARG(value32, uap->s);
3239
3240 /*
3241 * Do argument checking. Must be a regular file in, stream
3242 * type and connected socket out, positive offset.
3243 */
3244 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
3245 goto done;
3246 }
3247 if ((fp->f_flag & FREAD) == 0) {
3248 error = EBADF;
3249 goto done1;
3250 }
3251 if (vnode_isreg(vp) == 0) {
3252 error = ENOTSUP;
3253 goto done1;
3254 }
3255 error = file_socket(uap->s, &so);
3256 if (error) {
3257 goto done1;
3258 }
3259 if (so == NULL) {
3260 error = EBADF;
3261 goto done2;
3262 }
3263 if (so->so_type != SOCK_STREAM) {
3264 error = EINVAL;
3265 goto done2;
3266 }
3267 if ((so->so_state & SS_ISCONNECTED) == 0) {
3268 error = ENOTCONN;
3269 goto done2;
3270 }
3271 if (uap->offset < 0) {
3272 error = EINVAL;
3273 goto done2;
3274 }
3275 if (uap->nbytes == USER_ADDR_NULL) {
3276 error = EINVAL;
3277 goto done2;
3278 }
3279 if (uap->flags != 0) {
3280 error = EINVAL;
3281 goto done2;
3282 }
3283
3284 context.vc_ucred = fp->f_fglob->fg_cred;
3285
3286#if CONFIG_MACF_SOCKET_SUBSET
3287 /* JMM - fetch connected sockaddr? */
3288 error = mac_socket_check_send(context.vc_ucred, so, NULL);
3289 if (error)
3290 goto done2;
3291#endif
3292
3293 /*
3294 * Get number of bytes to send
3295 * Should it applies to size of header and trailer?
3296 * JMM - error handling?
3297 */
3298 copyin(uap->nbytes, &nbytes, sizeof (off_t));
3299
3300 /*
3301 * If specified, get the pointer to the sf_hdtr struct for
3302 * any headers/trailers.
3303 */
3304 if (uap->hdtr != USER_ADDR_NULL) {
3305 caddr_t hdtrp;
3306
3307 bzero(&user_hdtr, sizeof (user_hdtr));
3308 if (IS_64BIT_PROCESS(p)) {
3309 hdtrp = (caddr_t)&user64_hdtr;
3310 sizeof_hdtr = sizeof (user64_hdtr);
3311 } else {
3312 hdtrp = (caddr_t)&user32_hdtr;
3313 sizeof_hdtr = sizeof (user32_hdtr);
3314 }
3315 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
3316 if (error)
3317 goto done2;
3318 if (IS_64BIT_PROCESS(p)) {
3319 user_hdtr.headers = user64_hdtr.headers;
3320 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3321 user_hdtr.trailers = user64_hdtr.trailers;
3322 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3323 } else {
3324 user_hdtr.headers = user32_hdtr.headers;
3325 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3326 user_hdtr.trailers = user32_hdtr.trailers;
3327 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
3328 }
3329
3330 /*
3331 * Send any headers. Wimp out and use writev(2).
3332 */
3333 if (user_hdtr.headers != USER_ADDR_NULL) {
3334 bzero(&nuap, sizeof (struct writev_args));
3335 nuap.fd = uap->s;
3336 nuap.iovp = user_hdtr.headers;
3337 nuap.iovcnt = user_hdtr.hdr_cnt;
3338 error = writev_nocancel(p, &nuap, &writev_retval);
3339 if (error) {
3340 goto done2;
3341 }
3342 sbytes += writev_retval;
3343 }
3344 }
3345
3346 /*
3347 * Get the file size for 2 reasons:
3348 * 1. We don't want to allocate more mbufs than necessary
3349 * 2. We don't want to read past the end of file
3350 */
3351 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
3352 goto done2;
3353 }
3354
3355 /*
3356 * Simply read file data into a chain of mbufs that used with scatter
3357 * gather reads. We're not (yet?) setup to use zero copy external
3358 * mbufs that point to the file pages.
3359 */
3360 socket_lock(so, 1);
3361 error = sblock(&so->so_snd, SBL_WAIT);
3362 if (error) {
3363 socket_unlock(so, 1);
3364 goto done2;
3365 }
3366 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
3367 mbuf_t m0 = NULL, m;
3368 unsigned int nbufs = SFUIOBUFS, i;
3369 uio_t auio;
3370 char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
3371 size_t uiolen;
3372 user_ssize_t rlen;
3373 off_t pgoff;
3374 size_t pktlen;
3375 boolean_t jumbocl;
3376
3377 /*
3378 * Calculate the amount to transfer.
3379 * Align to round number of pages.
3380 * Not to exceed send socket buffer,
3381 * the EOF, or the passed in nbytes.
3382 */
3383 xfsize = sbspace(&so->so_snd);
3384
3385 if (xfsize <= 0) {
3386 if (so->so_state & SS_CANTSENDMORE) {
3387 error = EPIPE;
3388 goto done3;
3389 } else if ((so->so_state & SS_NBIO)) {
3390 error = EAGAIN;
3391 goto done3;
3392 } else {
3393 xfsize = PAGE_SIZE;
3394 }
3395 }
3396
3397 if (xfsize > SENDFILE_MAX_BYTES)
3398 xfsize = SENDFILE_MAX_BYTES;
3399 else if (xfsize > PAGE_SIZE)
3400 xfsize = trunc_page(xfsize);
3401 pgoff = off & PAGE_MASK_64;
3402 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize)
3403 xfsize = PAGE_SIZE_64 - pgoff;
3404 if (nbytes && xfsize > (nbytes - sbytes))
3405 xfsize = nbytes - sbytes;
3406 if (xfsize <= 0)
3407 break;
3408 if (off + xfsize > file_size)
3409 xfsize = file_size - off;
3410 if (xfsize <= 0)
3411 break;
3412
3413 /*
3414 * Attempt to use larger than system page-size clusters for
3415 * large writes only if there is a jumbo cluster pool and
3416 * if the socket is marked accordingly.
3417 */
3418 jumbocl = sosendjcl && njcl > 0 &&
3419 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3420
3421 socket_unlock(so, 0);
3422 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
3423 pktlen = mbuf_pkthdr_maxlen(m0);
3424 if (pktlen < (size_t)xfsize)
3425 xfsize = pktlen;
3426
3427 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3428 UIO_READ, &uio_buf[0], sizeof (uio_buf));
3429 if (auio == NULL) {
3430 printf("sendfile failed. nbufs = %d. %s", nbufs,
3431 "File a radar related to rdar://10146739.\n");
3432 mbuf_freem(m0);
3433 error = ENXIO;
3434 socket_lock(so, 0);
3435 goto done3;
3436 }
3437
3438 for (i = 0, m = m0, uiolen = 0;
3439 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
3440 i++, m = mbuf_next(m)) {
3441 size_t mlen = mbuf_maxlen(m);
3442
3443 if (mlen + uiolen > (size_t)xfsize)
3444 mlen = xfsize - uiolen;
3445 mbuf_setlen(m, mlen);
3446 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3447 mlen);
3448 uiolen += mlen;
3449 }
3450
3451 if (xfsize != uio_resid(auio))
3452 printf("sendfile: xfsize: %lld != uio_resid(auio): "
3453 "%lld\n", xfsize, (long long)uio_resid(auio));
3454
3455 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3456 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3457 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3458 error = fo_read(fp, auio, FOF_OFFSET, &context);
3459 socket_lock(so, 0);
3460 if (error != 0) {
3461 if (uio_resid(auio) != xfsize && (error == ERESTART ||
3462 error == EINTR || error == EWOULDBLOCK)) {
3463 error = 0;
3464 } else {
3465 mbuf_freem(m0);
3466 goto done3;
3467 }
3468 }
3469 xfsize -= uio_resid(auio);
3470 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3471 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3472 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3473
3474 if (xfsize == 0) {
3475 // printf("sendfile: fo_read 0 bytes, EOF\n");
3476 break;
3477 }
3478 if (xfsize + off > file_size)
3479 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3480 "%lld\n", xfsize, off, file_size);
3481 for (i = 0, m = m0, rlen = 0;
3482 i < nbufs && m != NULL && rlen < xfsize;
3483 i++, m = mbuf_next(m)) {
3484 size_t mlen = mbuf_maxlen(m);
3485
3486 if (rlen + mlen > (size_t)xfsize)
3487 mlen = xfsize - rlen;
3488 mbuf_setlen(m, mlen);
3489
3490 rlen += mlen;
3491 }
3492 mbuf_pkthdr_setlen(m0, xfsize);
3493
3494retry_space:
3495 /*
3496 * Make sure that the socket is still able to take more data.
3497 * CANTSENDMORE being true usually means that the connection
3498 * was closed. so_error is true when an error was sensed after
3499 * a previous send.
3500 * The state is checked after the page mapping and buffer
3501 * allocation above since those operations may block and make
3502 * any socket checks stale. From this point forward, nothing
3503 * blocks before the pru_send (or more accurately, any blocking
3504 * results in a loop back to here to re-check).
3505 */
3506 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3507 if (so->so_state & SS_CANTSENDMORE) {
3508 error = EPIPE;
3509 } else {
3510 error = so->so_error;
3511 so->so_error = 0;
3512 }
3513 m_freem(m0);
3514 goto done3;
3515 }
3516 /*
3517 * Wait for socket space to become available. We do this just
3518 * after checking the connection state above in order to avoid
3519 * a race condition with sbwait().
3520 */
3521 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
3522 if (so->so_state & SS_NBIO) {
3523 m_freem(m0);
3524 error = EAGAIN;
3525 goto done3;
3526 }
3527 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3528 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
3529 error = sbwait(&so->so_snd);
3530 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT|
3531 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
3532 /*
3533 * An error from sbwait usually indicates that we've
3534 * been interrupted by a signal. If we've sent anything
3535 * then return bytes sent, otherwise return the error.
3536 */
3537 if (error) {
3538 m_freem(m0);
3539 goto done3;
3540 }
3541 goto retry_space;
3542 }
3543
3544 struct mbuf *control = NULL;
3545 {
3546 /*
3547 * Socket filter processing
3548 */
3549
3550 error = sflt_data_out(so, NULL, &m0, &control, 0);
3551 if (error) {
3552 if (error == EJUSTRETURN) {
3553 error = 0;
3554 continue;
3555 }
3556 goto done3;
3557 }
3558 /*
3559 * End Socket filter processing
3560 */
3561 }
3562 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3563 uap->s, 0, 0, 0, 0);
3564 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
3565 0, control, p);
3566 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3567 uap->s, 0, 0, 0, 0);
3568 if (error) {
3569 goto done3;
3570 }
3571 }
3572 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
3573 /*
3574 * Send trailers. Wimp out and use writev(2).
3575 */
3576 if (uap->hdtr != USER_ADDR_NULL &&
3577 user_hdtr.trailers != USER_ADDR_NULL) {
3578 bzero(&nuap, sizeof (struct writev_args));
3579 nuap.fd = uap->s;
3580 nuap.iovp = user_hdtr.trailers;
3581 nuap.iovcnt = user_hdtr.trl_cnt;
3582 error = writev_nocancel(p, &nuap, &writev_retval);
3583 if (error) {
3584 goto done2;
3585 }
3586 sbytes += writev_retval;
3587 }
3588done2:
3589 file_drop(uap->s);
3590done1:
3591 file_drop(uap->fd);
3592done:
3593 if (uap->nbytes != USER_ADDR_NULL) {
3594 /* XXX this appears bogus for some early failure conditions */
3595 copyout(&sbytes, uap->nbytes, sizeof (off_t));
3596 }
3597 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3598 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3599 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
3600 return (error);
3601done3:
3602 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
3603 goto done2;
3604}
3605
3606
3607#endif /* SENDFILE */
3608