1/*
2 * Copyright (c) 1996 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice immediately at the beginning of the file, without modification,
10 * this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 * John S. Dyson.
16 * 4. Modifications may be freely made to this file if the above conditions
17 * are met.
18 */
19/*
20 * Copyright (c) 2003-2014 Apple Inc. All rights reserved.
21 *
22 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
23 *
24 * This file contains Original Code and/or Modifications of Original Code
25 * as defined in and that are subject to the Apple Public Source License
26 * Version 2.0 (the 'License'). You may not use this file except in
27 * compliance with the License. The rights granted to you under the License
28 * may not be used to create, or enable the creation or redistribution of,
29 * unlawful or unlicensed copies of an Apple operating system, or to
30 * circumvent, violate, or enable the circumvention or violation of, any
31 * terms of an Apple operating system software license agreement.
32 *
33 * Please obtain a copy of the License at
34 * http://www.opensource.apple.com/apsl/ and read it before using this file.
35 *
36 * The Original Code and all software distributed under the License are
37 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
38 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
39 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
40 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
41 * Please see the License for the specific language governing rights and
42 * limitations under the License.
43 *
44 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
45 */
46/*
47 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
48 * support for mandatory and extensible security protections. This notice
49 * is included in support of clause 2.2 (b) of the Apple Public License,
50 * Version 2.0.
51 */
52
53/*
54 * This file contains a high-performance replacement for the socket-based
55 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support
56 * all features of sockets, but does do everything that pipes normally
57 * do.
58 *
59 * Pipes are implemented as circular buffers. Following are the valid states in pipes operations
60 *
61 * _________________________________
62 * 1. |_________________________________| r=w, c=0
63 *
64 * _________________________________
65 * 2. |__r:::::wc_______________________| r <= w , c > 0
66 *
67 * _________________________________
68 * 3. |::::wc_____r:::::::::::::::::::::| r>w , c > 0
69 *
70 * _________________________________
71 * 4. |:::::::wrc:::::::::::::::::::::::| w=r, c = Max size
72 *
73 *
74 * Nomenclature:-
75 * a-z define the steps in a program flow
76 * 1-4 are the states as defined aboe
77 * Action: is what file operation is done on the pipe
78 *
79 * Current:None Action: initialize with size M=200
80 * a. State 1 ( r=0, w=0, c=0)
81 *
82 * Current: a Action: write(100) (w < M)
83 * b. State 2 (r=0, w=100, c=100)
84 *
85 * Current: b Action: write(100) (w = M-w)
86 * c. State 4 (r=0,w=0,c=200)
87 *
88 * Current: b Action: read(70) ( r < c )
89 * d. State 2(r=70,w=100,c=30)
90 *
91 * Current: d Action: write(75) ( w < (m-w))
92 * e. State 2 (r=70,w=175,c=105)
93 *
94 * Current: d Action: write(110) ( w > (m-w))
95 * f. State 3 (r=70,w=10,c=140)
96 *
97 * Current: d Action: read(30) (r >= c )
98 * g. State 1 (r=100,w=100,c=0)
99 *
100 */
101
102/*
103 * This code create half duplex pipe buffers for facilitating file like
104 * operations on pipes. The initial buffer is very small, but this can
105 * dynamically change to larger sizes based on usage. The buffer size is never
106 * reduced. The total amount of kernel memory used is governed by maxpipekva.
107 * In case of dynamic expansion limit is reached, the output thread is blocked
108 * until the pipe buffer empties enough to continue.
109 *
110 * In order to limit the resource use of pipes, two sysctls exist:
111 *
112 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
113 * address space available to us in pipe_map.
114 *
115 * Memory usage may be monitored through the sysctls
116 * kern.ipc.pipes, kern.ipc.pipekva.
117 *
118 */
119
120#include <sys/param.h>
121#include <sys/systm.h>
122#include <sys/filedesc.h>
123#include <sys/kernel.h>
124#include <sys/vnode.h>
125#include <sys/proc_internal.h>
126#include <sys/kauth.h>
127#include <sys/file_internal.h>
128#include <sys/stat.h>
129#include <sys/ioctl.h>
130#include <sys/fcntl.h>
131#include <sys/malloc.h>
132#include <sys/syslog.h>
133#include <sys/unistd.h>
134#include <sys/resourcevar.h>
135#include <sys/aio_kern.h>
136#include <sys/signalvar.h>
137#include <sys/pipe.h>
138#include <sys/sysproto.h>
139#include <sys/proc_info.h>
140
141#include <security/audit/audit.h>
142
143#include <sys/kdebug.h>
144
145#include <kern/zalloc.h>
146#include <kern/kalloc.h>
147#include <vm/vm_kern.h>
148#include <libkern/OSAtomic.h>
149#include <libkern/section_keywords.h>
150
151#if CONFIG_MACF
152#include <security/mac_framework.h>
153#endif
154
155#define f_flag f_fglob->fg_flag
156#define f_msgcount f_fglob->fg_msgcount
157#define f_cred f_fglob->fg_cred
158#define f_ops f_fglob->fg_ops
159#define f_offset f_fglob->fg_offset
160#define f_data f_fglob->fg_data
161
162/*
163 * interfaces to the outside world exported through file operations
164 */
165static int pipe_read(struct fileproc *fp, struct uio *uio,
166 int flags, vfs_context_t ctx);
167static int pipe_write(struct fileproc *fp, struct uio *uio,
168 int flags, vfs_context_t ctx);
169static int pipe_close(struct fileglob *fg, vfs_context_t ctx);
170static int pipe_select(struct fileproc *fp, int which, void * wql,
171 vfs_context_t ctx);
172static int pipe_kqfilter(struct fileproc *fp, struct knote *kn,
173 struct kevent_internal_s *kev, vfs_context_t ctx);
174static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
175 vfs_context_t ctx);
176static int pipe_drain(struct fileproc *fp,vfs_context_t ctx);
177
178static const struct fileops pipeops = {
179 .fo_type = DTYPE_PIPE,
180 .fo_read = pipe_read,
181 .fo_write = pipe_write,
182 .fo_ioctl = pipe_ioctl,
183 .fo_select = pipe_select,
184 .fo_close = pipe_close,
185 .fo_kqfilter = pipe_kqfilter,
186 .fo_drain = pipe_drain,
187};
188
189static void filt_pipedetach(struct knote *kn);
190
191static int filt_piperead(struct knote *kn, long hint);
192static int filt_pipereadtouch(struct knote *kn, struct kevent_internal_s *kev);
193static int filt_pipereadprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev);
194
195static int filt_pipewrite(struct knote *kn, long hint);
196static int filt_pipewritetouch(struct knote *kn, struct kevent_internal_s *kev);
197static int filt_pipewriteprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev);
198
199SECURITY_READ_ONLY_EARLY(struct filterops) pipe_rfiltops = {
200 .f_isfd = 1,
201 .f_detach = filt_pipedetach,
202 .f_event = filt_piperead,
203 .f_touch = filt_pipereadtouch,
204 .f_process = filt_pipereadprocess,
205};
206
207SECURITY_READ_ONLY_EARLY(struct filterops) pipe_wfiltops = {
208 .f_isfd = 1,
209 .f_detach = filt_pipedetach,
210 .f_event = filt_pipewrite,
211 .f_touch = filt_pipewritetouch,
212 .f_process = filt_pipewriteprocess,
213};
214
215static int nbigpipe; /* for compatibility sake. no longer used */
216static int amountpipes; /* total number of pipes in system */
217static int amountpipekva; /* total memory used by pipes */
218
219int maxpipekva __attribute__((used)) = PIPE_KVAMAX; /* allowing 16MB max. */
220
221#if PIPE_SYSCTLS
222SYSCTL_DECL(_kern_ipc);
223
224SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD|CTLFLAG_LOCKED,
225 &maxpipekva, 0, "Pipe KVA limit");
226SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW|CTLFLAG_LOCKED,
227 &maxpipekvawired, 0, "Pipe KVA wired limit");
228SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD|CTLFLAG_LOCKED,
229 &amountpipes, 0, "Current # of pipes");
230SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD|CTLFLAG_LOCKED,
231 &nbigpipe, 0, "Current # of big pipes");
232SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD|CTLFLAG_LOCKED,
233 &amountpipekva, 0, "Pipe KVA usage");
234SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD|CTLFLAG_LOCKED,
235 &amountpipekvawired, 0, "Pipe wired KVA usage");
236#endif
237
238static void pipeclose(struct pipe *cpipe);
239static void pipe_free_kmem(struct pipe *cpipe);
240static int pipe_create(struct pipe **cpipep);
241static int pipespace(struct pipe *cpipe, int size);
242static int choose_pipespace(unsigned long current, unsigned long expected);
243static int expand_pipespace(struct pipe *p, int target_size);
244static void pipeselwakeup(struct pipe *cpipe, struct pipe *spipe);
245static __inline int pipeio_lock(struct pipe *cpipe, int catch);
246static __inline void pipeio_unlock(struct pipe *cpipe);
247
248extern int postpipeevent(struct pipe *, int);
249extern void evpipefree(struct pipe *cpipe);
250
251static lck_grp_t *pipe_mtx_grp;
252static lck_attr_t *pipe_mtx_attr;
253static lck_grp_attr_t *pipe_mtx_grp_attr;
254
255static zone_t pipe_zone;
256
257#define MAX_PIPESIZE(pipe) ( MAX(PIPE_SIZE, (pipe)->pipe_buffer.size) )
258
259#define PIPE_GARBAGE_AGE_LIMIT 5000 /* In milliseconds */
260#define PIPE_GARBAGE_QUEUE_LIMIT 32000
261
262struct pipe_garbage {
263 struct pipe *pg_pipe;
264 struct pipe_garbage *pg_next;
265 uint64_t pg_timestamp;
266};
267
268static zone_t pipe_garbage_zone;
269static struct pipe_garbage *pipe_garbage_head = NULL;
270static struct pipe_garbage *pipe_garbage_tail = NULL;
271static uint64_t pipe_garbage_age_limit = PIPE_GARBAGE_AGE_LIMIT;
272static int pipe_garbage_count = 0;
273static lck_mtx_t *pipe_garbage_lock;
274static void pipe_garbage_collect(struct pipe *cpipe);
275
276SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
277
278/* initial setup done at time of sysinit */
279void
280pipeinit(void)
281{
282 nbigpipe=0;
283 vm_size_t zone_size;
284
285 zone_size = 8192 * sizeof(struct pipe);
286 pipe_zone = zinit(sizeof(struct pipe), zone_size, 4096, "pipe zone");
287
288
289 /* allocate lock group attribute and group for pipe mutexes */
290 pipe_mtx_grp_attr = lck_grp_attr_alloc_init();
291 pipe_mtx_grp = lck_grp_alloc_init("pipe", pipe_mtx_grp_attr);
292
293 /* allocate the lock attribute for pipe mutexes */
294 pipe_mtx_attr = lck_attr_alloc_init();
295
296 /*
297 * Set up garbage collection for dead pipes
298 */
299 zone_size = (PIPE_GARBAGE_QUEUE_LIMIT + 20) *
300 sizeof(struct pipe_garbage);
301 pipe_garbage_zone = (zone_t)zinit(sizeof(struct pipe_garbage),
302 zone_size, 4096, "pipe garbage zone");
303 pipe_garbage_lock = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr);
304
305}
306
307#ifndef CONFIG_EMBEDDED
308/* Bitmap for things to touch in pipe_touch() */
309#define PIPE_ATIME 0x00000001 /* time of last access */
310#define PIPE_MTIME 0x00000002 /* time of last modification */
311#define PIPE_CTIME 0x00000004 /* time of last status change */
312
313static void
314pipe_touch(struct pipe *tpipe, int touch)
315{
316 struct timespec now;
317
318 nanotime(&now);
319
320 if (touch & PIPE_ATIME) {
321 tpipe->st_atimespec.tv_sec = now.tv_sec;
322 tpipe->st_atimespec.tv_nsec = now.tv_nsec;
323 }
324
325 if (touch & PIPE_MTIME) {
326 tpipe->st_mtimespec.tv_sec = now.tv_sec;
327 tpipe->st_mtimespec.tv_nsec = now.tv_nsec;
328 }
329
330 if (touch & PIPE_CTIME) {
331 tpipe->st_ctimespec.tv_sec = now.tv_sec;
332 tpipe->st_ctimespec.tv_nsec = now.tv_nsec;
333 }
334}
335#endif
336
337static const unsigned int pipesize_blocks[] = {512,1024,2048,4096, 4096 * 2, PIPE_SIZE , PIPE_SIZE * 4 };
338
339/*
340 * finds the right size from possible sizes in pipesize_blocks
341 * returns the size which matches max(current,expected)
342 */
343static int
344choose_pipespace(unsigned long current, unsigned long expected)
345{
346 int i = sizeof(pipesize_blocks)/sizeof(unsigned int) -1;
347 unsigned long target;
348
349 /*
350 * assert that we always get an atomic transaction sized pipe buffer,
351 * even if the system pipe buffer high-water mark has been crossed.
352 */
353 assert(PIPE_BUF == pipesize_blocks[0]);
354
355 if (expected > current)
356 target = expected;
357 else
358 target = current;
359
360 while ( i >0 && pipesize_blocks[i-1] > target) {
361 i=i-1;
362
363 }
364
365 return pipesize_blocks[i];
366}
367
368
369/*
370 * expand the size of pipe while there is data to be read,
371 * and then free the old buffer once the current buffered
372 * data has been transferred to new storage.
373 * Required: PIPE_LOCK and io lock to be held by caller.
374 * returns 0 on success or no expansion possible
375 */
376static int
377expand_pipespace(struct pipe *p, int target_size)
378{
379 struct pipe tmp, oldpipe;
380 int error;
381 tmp.pipe_buffer.buffer = 0;
382
383 if (p->pipe_buffer.size >= (unsigned) target_size) {
384 return 0; /* the existing buffer is max size possible */
385 }
386
387 /* create enough space in the target */
388 error = pipespace(&tmp, target_size);
389 if (error != 0)
390 return (error);
391
392 oldpipe.pipe_buffer.buffer = p->pipe_buffer.buffer;
393 oldpipe.pipe_buffer.size = p->pipe_buffer.size;
394
395 memcpy(tmp.pipe_buffer.buffer, p->pipe_buffer.buffer, p->pipe_buffer.size);
396 if (p->pipe_buffer.cnt > 0 && p->pipe_buffer.in <= p->pipe_buffer.out ){
397 /* we are in State 3 and need extra copying for read to be consistent */
398 memcpy(&tmp.pipe_buffer.buffer[p->pipe_buffer.size], p->pipe_buffer.buffer, p->pipe_buffer.size);
399 p->pipe_buffer.in += p->pipe_buffer.size;
400 }
401
402 p->pipe_buffer.buffer = tmp.pipe_buffer.buffer;
403 p->pipe_buffer.size = tmp.pipe_buffer.size;
404
405
406 pipe_free_kmem(&oldpipe);
407 return 0;
408}
409
410/*
411 * The pipe system call for the DTYPE_PIPE type of pipes
412 *
413 * returns:
414 * FREAD | fd0 | -->[struct rpipe] --> |~~buffer~~| \
415 * (pipe_mutex)
416 * FWRITE | fd1 | -->[struct wpipe] --X /
417 */
418
419/* ARGSUSED */
420int
421pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval)
422{
423 struct fileproc *rf, *wf;
424 struct pipe *rpipe, *wpipe;
425 lck_mtx_t *pmtx;
426 int fd, error;
427
428 if ((pmtx = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr)) == NULL)
429 return (ENOMEM);
430
431 rpipe = wpipe = NULL;
432 if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
433 error = ENFILE;
434 goto freepipes;
435 }
436 /*
437 * allocate the space for the normal I/O direction up
438 * front... we'll delay the allocation for the other
439 * direction until a write actually occurs (most likely it won't)...
440 */
441 error = pipespace(rpipe, choose_pipespace(rpipe->pipe_buffer.size, 0));
442 if (error)
443 goto freepipes;
444
445 TAILQ_INIT(&rpipe->pipe_evlist);
446 TAILQ_INIT(&wpipe->pipe_evlist);
447
448 error = falloc(p, &rf, &fd, vfs_context_current());
449 if (error) {
450 goto freepipes;
451 }
452 retval[0] = fd;
453
454 /*
455 * for now we'll create half-duplex pipes(refer returns section above).
456 * this is what we've always supported..
457 */
458 rf->f_flag = FREAD;
459 rf->f_data = (caddr_t)rpipe;
460 rf->f_ops = &pipeops;
461
462 error = falloc(p, &wf, &fd, vfs_context_current());
463 if (error) {
464 fp_free(p, retval[0], rf);
465 goto freepipes;
466 }
467 wf->f_flag = FWRITE;
468 wf->f_data = (caddr_t)wpipe;
469 wf->f_ops = &pipeops;
470
471 rpipe->pipe_peer = wpipe;
472 wpipe->pipe_peer = rpipe;
473 /* both structures share the same mutex */
474 rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
475
476 retval[1] = fd;
477#if CONFIG_MACF
478 /*
479 * XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX
480 *
481 * struct pipe represents a pipe endpoint. The MAC label is shared
482 * between the connected endpoints. As a result mac_pipe_label_init() and
483 * mac_pipe_label_associate() should only be called on one of the endpoints
484 * after they have been connected.
485 */
486 mac_pipe_label_init(rpipe);
487 mac_pipe_label_associate(kauth_cred_get(), rpipe);
488 wpipe->pipe_label = rpipe->pipe_label;
489#endif
490 proc_fdlock_spin(p);
491 procfdtbl_releasefd(p, retval[0], NULL);
492 procfdtbl_releasefd(p, retval[1], NULL);
493 fp_drop(p, retval[0], rf, 1);
494 fp_drop(p, retval[1], wf, 1);
495 proc_fdunlock(p);
496
497
498 return (0);
499
500freepipes:
501 pipeclose(rpipe);
502 pipeclose(wpipe);
503 lck_mtx_free(pmtx, pipe_mtx_grp);
504
505 return (error);
506}
507
508int
509pipe_stat(struct pipe *cpipe, void *ub, int isstat64)
510{
511#if CONFIG_MACF
512 int error;
513#endif
514 int pipe_size = 0;
515 int pipe_count;
516 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */
517 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */
518
519 if (cpipe == NULL)
520 return (EBADF);
521 PIPE_LOCK(cpipe);
522
523#if CONFIG_MACF
524 error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
525 if (error) {
526 PIPE_UNLOCK(cpipe);
527 return (error);
528 }
529#endif
530 if (cpipe->pipe_buffer.buffer == 0) {
531 /* must be stat'ing the write fd */
532 if (cpipe->pipe_peer) {
533 /* the peer still exists, use it's info */
534 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
535 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
536 } else {
537 pipe_count = 0;
538 }
539 } else {
540 pipe_size = MAX_PIPESIZE(cpipe);
541 pipe_count = cpipe->pipe_buffer.cnt;
542 }
543 /*
544 * since peer's buffer is setup ouside of lock
545 * we might catch it in transient state
546 */
547 if (pipe_size == 0)
548 pipe_size = MAX(PIPE_SIZE, pipesize_blocks[0]);
549
550 if (isstat64 != 0) {
551 sb64 = (struct stat64 *)ub;
552
553 bzero(sb64, sizeof(*sb64));
554 sb64->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
555 sb64->st_blksize = pipe_size;
556 sb64->st_size = pipe_count;
557 sb64->st_blocks = (sb64->st_size + sb64->st_blksize - 1) / sb64->st_blksize;
558
559 sb64->st_uid = kauth_getuid();
560 sb64->st_gid = kauth_getgid();
561
562 sb64->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
563 sb64->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
564
565 sb64->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
566 sb64->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
567
568 sb64->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
569 sb64->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
570
571 /*
572 * Return a relatively unique inode number based on the current
573 * address of this pipe's struct pipe. This number may be recycled
574 * relatively quickly.
575 */
576 sb64->st_ino = (ino64_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
577 } else {
578 sb = (struct stat *)ub;
579
580 bzero(sb, sizeof(*sb));
581 sb->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
582 sb->st_blksize = pipe_size;
583 sb->st_size = pipe_count;
584 sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize;
585
586 sb->st_uid = kauth_getuid();
587 sb->st_gid = kauth_getgid();
588
589 sb->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
590 sb->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
591
592 sb->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
593 sb->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
594
595 sb->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
596 sb->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
597
598 /*
599 * Return a relatively unique inode number based on the current
600 * address of this pipe's struct pipe. This number may be recycled
601 * relatively quickly.
602 */
603 sb->st_ino = (ino_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
604 }
605 PIPE_UNLOCK(cpipe);
606
607 /*
608 * POSIX: Left as 0: st_dev, st_nlink, st_rdev, st_flags, st_gen,
609 * st_uid, st_gid.
610 *
611 * XXX (st_dev) should be unique, but there is no device driver that
612 * XXX is associated with pipes, since they are implemented via a
613 * XXX struct fileops indirection rather than as FS objects.
614 */
615 return (0);
616}
617
618
619/*
620 * Allocate kva for pipe circular buffer, the space is pageable
621 * This routine will 'realloc' the size of a pipe safely, if it fails
622 * it will retain the old buffer.
623 * If it fails it will return ENOMEM.
624 */
625static int
626pipespace(struct pipe *cpipe, int size)
627{
628 vm_offset_t buffer;
629
630 if (size <= 0)
631 return(EINVAL);
632
633 if ((buffer = (vm_offset_t)kalloc(size)) == 0 )
634 return(ENOMEM);
635
636 /* free old resources if we're resizing */
637 pipe_free_kmem(cpipe);
638 cpipe->pipe_buffer.buffer = (caddr_t)buffer;
639 cpipe->pipe_buffer.size = size;
640 cpipe->pipe_buffer.in = 0;
641 cpipe->pipe_buffer.out = 0;
642 cpipe->pipe_buffer.cnt = 0;
643
644 OSAddAtomic(1, &amountpipes);
645 OSAddAtomic(cpipe->pipe_buffer.size, &amountpipekva);
646
647 return (0);
648}
649
650/*
651 * initialize and allocate VM and memory for pipe
652 */
653static int
654pipe_create(struct pipe **cpipep)
655{
656 struct pipe *cpipe;
657 cpipe = (struct pipe *)zalloc(pipe_zone);
658
659 if ((*cpipep = cpipe) == NULL)
660 return (ENOMEM);
661
662 /*
663 * protect so pipespace or pipeclose don't follow a junk pointer
664 * if pipespace() fails.
665 */
666 bzero(cpipe, sizeof *cpipe);
667
668#ifndef CONFIG_EMBEDDED
669 /* Initial times are all the time of creation of the pipe */
670 pipe_touch(cpipe, PIPE_ATIME | PIPE_MTIME | PIPE_CTIME);
671#endif
672 return (0);
673}
674
675
676/*
677 * lock a pipe for I/O, blocking other access
678 */
679static inline int
680pipeio_lock(struct pipe *cpipe, int catch)
681{
682 int error;
683 while (cpipe->pipe_state & PIPE_LOCKFL) {
684 cpipe->pipe_state |= PIPE_LWANT;
685 error = msleep(cpipe, PIPE_MTX(cpipe), catch ? (PRIBIO | PCATCH) : PRIBIO,
686 "pipelk", 0);
687 if (error != 0)
688 return (error);
689 }
690 cpipe->pipe_state |= PIPE_LOCKFL;
691 return (0);
692}
693
694/*
695 * unlock a pipe I/O lock
696 */
697static inline void
698pipeio_unlock(struct pipe *cpipe)
699{
700 cpipe->pipe_state &= ~PIPE_LOCKFL;
701 if (cpipe->pipe_state & PIPE_LWANT) {
702 cpipe->pipe_state &= ~PIPE_LWANT;
703 wakeup(cpipe);
704 }
705}
706
707/*
708 * wakeup anyone whos blocked in select
709 */
710static void
711pipeselwakeup(struct pipe *cpipe, struct pipe *spipe)
712{
713 if (cpipe->pipe_state & PIPE_SEL) {
714 cpipe->pipe_state &= ~PIPE_SEL;
715 selwakeup(&cpipe->pipe_sel);
716 }
717 if (cpipe->pipe_state & PIPE_KNOTE)
718 KNOTE(&cpipe->pipe_sel.si_note, 1);
719
720 postpipeevent(cpipe, EV_RWBYTES);
721
722 if (spipe && (spipe->pipe_state & PIPE_ASYNC) && spipe->pipe_pgid) {
723 if (spipe->pipe_pgid < 0)
724 gsignal(-spipe->pipe_pgid, SIGIO);
725 else
726 proc_signal(spipe->pipe_pgid, SIGIO);
727 }
728}
729
730/*
731 * Read n bytes from the buffer. Semantics are similar to file read.
732 * returns: number of bytes read from the buffer
733 */
734/* ARGSUSED */
735static int
736pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags,
737 __unused vfs_context_t ctx)
738{
739 struct pipe *rpipe = (struct pipe *)fp->f_data;
740 int error;
741 int nread = 0;
742 u_int size;
743
744 PIPE_LOCK(rpipe);
745 ++rpipe->pipe_busy;
746
747 error = pipeio_lock(rpipe, 1);
748 if (error)
749 goto unlocked_error;
750
751#if CONFIG_MACF
752 error = mac_pipe_check_read(kauth_cred_get(), rpipe);
753 if (error)
754 goto locked_error;
755#endif
756
757
758 while (uio_resid(uio)) {
759 /*
760 * normal pipe buffer receive
761 */
762 if (rpipe->pipe_buffer.cnt > 0) {
763 /*
764 * # bytes to read is min( bytes from read pointer until end of buffer,
765 * total unread bytes,
766 * user requested byte count)
767 */
768 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
769 if (size > rpipe->pipe_buffer.cnt)
770 size = rpipe->pipe_buffer.cnt;
771 // LP64todo - fix this!
772 if (size > (u_int) uio_resid(uio))
773 size = (u_int) uio_resid(uio);
774
775 PIPE_UNLOCK(rpipe); /* we still hold io lock.*/
776 error = uiomove(
777 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
778 size, uio);
779 PIPE_LOCK(rpipe);
780 if (error)
781 break;
782
783 rpipe->pipe_buffer.out += size;
784 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
785 rpipe->pipe_buffer.out = 0;
786
787 rpipe->pipe_buffer.cnt -= size;
788
789 /*
790 * If there is no more to read in the pipe, reset
791 * its pointers to the beginning. This improves
792 * cache hit stats.
793 */
794 if (rpipe->pipe_buffer.cnt == 0) {
795 rpipe->pipe_buffer.in = 0;
796 rpipe->pipe_buffer.out = 0;
797 }
798 nread += size;
799 } else {
800 /*
801 * detect EOF condition
802 * read returns 0 on EOF, no need to set error
803 */
804 if (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
805 break;
806 }
807
808 /*
809 * If the "write-side" has been blocked, wake it up now.
810 */
811 if (rpipe->pipe_state & PIPE_WANTW) {
812 rpipe->pipe_state &= ~PIPE_WANTW;
813 wakeup(rpipe);
814 }
815
816 /*
817 * Break if some data was read in previous iteration.
818 */
819 if (nread > 0)
820 break;
821
822 /*
823 * Unlock the pipe buffer for our remaining processing.
824 * We will either break out with an error or we will
825 * sleep and relock to loop.
826 */
827 pipeio_unlock(rpipe);
828
829 /*
830 * Handle non-blocking mode operation or
831 * wait for more data.
832 */
833 if (fp->f_flag & FNONBLOCK) {
834 error = EAGAIN;
835 } else {
836 rpipe->pipe_state |= PIPE_WANTR;
837 error = msleep(rpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, "piperd", 0);
838 if (error == 0)
839 error = pipeio_lock(rpipe, 1);
840 }
841 if (error)
842 goto unlocked_error;
843 }
844 }
845#if CONFIG_MACF
846locked_error:
847#endif
848 pipeio_unlock(rpipe);
849
850unlocked_error:
851 --rpipe->pipe_busy;
852
853 /*
854 * PIPE_WANT processing only makes sense if pipe_busy is 0.
855 */
856 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
857 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
858 wakeup(rpipe);
859 } else if (rpipe->pipe_buffer.cnt < rpipe->pipe_buffer.size) {
860 /*
861 * Handle write blocking hysteresis.
862 */
863 if (rpipe->pipe_state & PIPE_WANTW) {
864 rpipe->pipe_state &= ~PIPE_WANTW;
865 wakeup(rpipe);
866 }
867 }
868
869 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) > 0)
870 pipeselwakeup(rpipe, rpipe->pipe_peer);
871
872#ifndef CONFIG_EMBEDDED
873 /* update last read time */
874 pipe_touch(rpipe, PIPE_ATIME);
875#endif
876
877 PIPE_UNLOCK(rpipe);
878
879 return (error);
880}
881
882/*
883 * perform a write of n bytes into the read side of buffer. Since
884 * pipes are unidirectional a write is meant to be read by the otherside only.
885 */
886static int
887pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
888 __unused vfs_context_t ctx)
889{
890 int error = 0;
891 int orig_resid;
892 int pipe_size;
893 struct pipe *wpipe, *rpipe;
894 // LP64todo - fix this!
895 orig_resid = uio_resid(uio);
896 int space;
897
898 rpipe = (struct pipe *)fp->f_data;
899
900 PIPE_LOCK(rpipe);
901 wpipe = rpipe->pipe_peer;
902
903 /*
904 * detect loss of pipe read side, issue SIGPIPE if lost.
905 */
906 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
907 PIPE_UNLOCK(rpipe);
908 return (EPIPE);
909 }
910#if CONFIG_MACF
911 error = mac_pipe_check_write(kauth_cred_get(), wpipe);
912 if (error) {
913 PIPE_UNLOCK(rpipe);
914 return (error);
915 }
916#endif
917 ++wpipe->pipe_busy;
918
919 pipe_size = 0;
920
921 /*
922 * need to allocate some storage... we delay the allocation
923 * until the first write on fd[0] to avoid allocating storage for both
924 * 'pipe ends'... most pipes are half-duplex with the writes targeting
925 * fd[1], so allocating space for both ends is a waste...
926 */
927
928 if ( wpipe->pipe_buffer.buffer == 0 || (
929 (unsigned)orig_resid > wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt &&
930 amountpipekva < maxpipekva ) ) {
931
932 pipe_size = choose_pipespace(wpipe->pipe_buffer.size, wpipe->pipe_buffer.cnt + orig_resid);
933 }
934 if (pipe_size) {
935 /*
936 * need to do initial allocation or resizing of pipe
937 * holding both structure and io locks.
938 */
939 if ((error = pipeio_lock(wpipe, 1)) == 0) {
940 if (wpipe->pipe_buffer.cnt == 0)
941 error = pipespace(wpipe, pipe_size);
942 else
943 error = expand_pipespace(wpipe, pipe_size);
944
945 pipeio_unlock(wpipe);
946
947 /* allocation failed */
948 if (wpipe->pipe_buffer.buffer == 0)
949 error = ENOMEM;
950 }
951 if (error) {
952 /*
953 * If an error occurred unbusy and return, waking up any pending
954 * readers.
955 */
956 --wpipe->pipe_busy;
957 if ((wpipe->pipe_busy == 0) &&
958 (wpipe->pipe_state & PIPE_WANT)) {
959 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
960 wakeup(wpipe);
961 }
962 PIPE_UNLOCK(rpipe);
963 return(error);
964 }
965 }
966
967 while (uio_resid(uio)) {
968
969 retrywrite:
970 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
971
972 /* Writes of size <= PIPE_BUF must be atomic. */
973 if ((space < uio_resid(uio)) && (orig_resid <= PIPE_BUF))
974 space = 0;
975
976 if (space > 0) {
977
978 if ((error = pipeio_lock(wpipe,1)) == 0) {
979 int size; /* Transfer size */
980 int segsize; /* first segment to transfer */
981
982 if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
983 pipeio_unlock(wpipe);
984 error = EPIPE;
985 break;
986 }
987 /*
988 * If a process blocked in pipeio_lock, our
989 * value for space might be bad... the mutex
990 * is dropped while we're blocked
991 */
992 if (space > (int)(wpipe->pipe_buffer.size -
993 wpipe->pipe_buffer.cnt)) {
994 pipeio_unlock(wpipe);
995 goto retrywrite;
996 }
997
998 /*
999 * Transfer size is minimum of uio transfer
1000 * and free space in pipe buffer.
1001 */
1002 // LP64todo - fix this!
1003 if (space > uio_resid(uio))
1004 size = uio_resid(uio);
1005 else
1006 size = space;
1007 /*
1008 * First segment to transfer is minimum of
1009 * transfer size and contiguous space in
1010 * pipe buffer. If first segment to transfer
1011 * is less than the transfer size, we've got
1012 * a wraparound in the buffer.
1013 */
1014 segsize = wpipe->pipe_buffer.size -
1015 wpipe->pipe_buffer.in;
1016 if (segsize > size)
1017 segsize = size;
1018
1019 /* Transfer first segment */
1020
1021 PIPE_UNLOCK(rpipe);
1022 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
1023 segsize, uio);
1024 PIPE_LOCK(rpipe);
1025
1026 if (error == 0 && segsize < size) {
1027 /*
1028 * Transfer remaining part now, to
1029 * support atomic writes. Wraparound
1030 * happened. (State 3)
1031 */
1032 if (wpipe->pipe_buffer.in + segsize !=
1033 wpipe->pipe_buffer.size)
1034 panic("Expected pipe buffer "
1035 "wraparound disappeared");
1036
1037 PIPE_UNLOCK(rpipe);
1038 error = uiomove(
1039 &wpipe->pipe_buffer.buffer[0],
1040 size - segsize, uio);
1041 PIPE_LOCK(rpipe);
1042 }
1043 /*
1044 * readers never know to read until count is updated.
1045 */
1046 if (error == 0) {
1047 wpipe->pipe_buffer.in += size;
1048 if (wpipe->pipe_buffer.in >
1049 wpipe->pipe_buffer.size) {
1050 if (wpipe->pipe_buffer.in !=
1051 size - segsize +
1052 wpipe->pipe_buffer.size)
1053 panic("Expected "
1054 "wraparound bad");
1055 wpipe->pipe_buffer.in = size -
1056 segsize;
1057 }
1058
1059 wpipe->pipe_buffer.cnt += size;
1060 if (wpipe->pipe_buffer.cnt >
1061 wpipe->pipe_buffer.size)
1062 panic("Pipe buffer overflow");
1063
1064 }
1065 pipeio_unlock(wpipe);
1066 }
1067 if (error)
1068 break;
1069
1070 } else {
1071 /*
1072 * If the "read-side" has been blocked, wake it up now.
1073 */
1074 if (wpipe->pipe_state & PIPE_WANTR) {
1075 wpipe->pipe_state &= ~PIPE_WANTR;
1076 wakeup(wpipe);
1077 }
1078 /*
1079 * don't block on non-blocking I/O
1080 * we'll do the pipeselwakeup on the way out
1081 */
1082 if (fp->f_flag & FNONBLOCK) {
1083 error = EAGAIN;
1084 break;
1085 }
1086
1087 /*
1088 * If read side wants to go away, we just issue a signal
1089 * to ourselves.
1090 */
1091 if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
1092 error = EPIPE;
1093 break;
1094 }
1095
1096 /*
1097 * We have no more space and have something to offer,
1098 * wake up select/poll.
1099 */
1100 pipeselwakeup(wpipe, wpipe);
1101
1102 wpipe->pipe_state |= PIPE_WANTW;
1103
1104 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipewr", 0);
1105
1106 if (error != 0)
1107 break;
1108 }
1109 }
1110 --wpipe->pipe_busy;
1111
1112 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
1113 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
1114 wakeup(wpipe);
1115 }
1116 if (wpipe->pipe_buffer.cnt > 0) {
1117 /*
1118 * If there are any characters in the buffer, we wake up
1119 * the reader if it was blocked waiting for data.
1120 */
1121 if (wpipe->pipe_state & PIPE_WANTR) {
1122 wpipe->pipe_state &= ~PIPE_WANTR;
1123 wakeup(wpipe);
1124 }
1125 /*
1126 * wake up thread blocked in select/poll or post the notification
1127 */
1128 pipeselwakeup(wpipe, wpipe);
1129 }
1130
1131#ifndef CONFIG_EMBEDDED
1132 /* Update modification, status change (# of bytes in pipe) times */
1133 pipe_touch(rpipe, PIPE_MTIME | PIPE_CTIME);
1134 pipe_touch(wpipe, PIPE_MTIME | PIPE_CTIME);
1135#endif
1136 PIPE_UNLOCK(rpipe);
1137
1138 return (error);
1139}
1140
1141/*
1142 * we implement a very minimal set of ioctls for compatibility with sockets.
1143 */
1144/* ARGSUSED 3 */
1145static int
1146pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
1147 __unused vfs_context_t ctx)
1148{
1149 struct pipe *mpipe = (struct pipe *)fp->f_data;
1150#if CONFIG_MACF
1151 int error;
1152#endif
1153
1154 PIPE_LOCK(mpipe);
1155
1156#if CONFIG_MACF
1157 error = mac_pipe_check_ioctl(kauth_cred_get(), mpipe, cmd);
1158 if (error) {
1159 PIPE_UNLOCK(mpipe);
1160
1161 return (error);
1162 }
1163#endif
1164
1165 switch (cmd) {
1166
1167 case FIONBIO:
1168 PIPE_UNLOCK(mpipe);
1169 return (0);
1170
1171 case FIOASYNC:
1172 if (*(int *)data) {
1173 mpipe->pipe_state |= PIPE_ASYNC;
1174 } else {
1175 mpipe->pipe_state &= ~PIPE_ASYNC;
1176 }
1177 PIPE_UNLOCK(mpipe);
1178 return (0);
1179
1180 case FIONREAD:
1181 *(int *)data = mpipe->pipe_buffer.cnt;
1182 PIPE_UNLOCK(mpipe);
1183 return (0);
1184
1185 case TIOCSPGRP:
1186 mpipe->pipe_pgid = *(int *)data;
1187
1188 PIPE_UNLOCK(mpipe);
1189 return (0);
1190
1191 case TIOCGPGRP:
1192 *(int *)data = mpipe->pipe_pgid;
1193
1194 PIPE_UNLOCK(mpipe);
1195 return (0);
1196
1197 }
1198 PIPE_UNLOCK(mpipe);
1199 return (ENOTTY);
1200}
1201
1202
1203static int
1204pipe_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
1205{
1206 struct pipe *rpipe = (struct pipe *)fp->f_data;
1207 struct pipe *wpipe;
1208 int retnum = 0;
1209
1210 if (rpipe == NULL || rpipe == (struct pipe *)-1)
1211 return (retnum);
1212
1213 PIPE_LOCK(rpipe);
1214
1215 wpipe = rpipe->pipe_peer;
1216
1217
1218#if CONFIG_MACF
1219 /*
1220 * XXX We should use a per thread credential here; minimally, the
1221 * XXX process credential should have a persistent reference on it
1222 * XXX before being passed in here.
1223 */
1224 if (mac_pipe_check_select(vfs_context_ucred(ctx), rpipe, which)) {
1225 PIPE_UNLOCK(rpipe);
1226 return (0);
1227 }
1228#endif
1229 switch (which) {
1230
1231 case FREAD:
1232 if ((rpipe->pipe_state & PIPE_DIRECTW) ||
1233 (rpipe->pipe_buffer.cnt > 0) ||
1234 (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
1235
1236 retnum = 1;
1237 } else {
1238 rpipe->pipe_state |= PIPE_SEL;
1239 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
1240 }
1241 break;
1242
1243 case FWRITE:
1244 if (wpipe)
1245 wpipe->pipe_state |= PIPE_WSELECT;
1246 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1247 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
1248 (MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) {
1249
1250 retnum = 1;
1251 } else {
1252 wpipe->pipe_state |= PIPE_SEL;
1253 selrecord(vfs_context_proc(ctx), &wpipe->pipe_sel, wql);
1254 }
1255 break;
1256 case 0:
1257 rpipe->pipe_state |= PIPE_SEL;
1258 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
1259 break;
1260 }
1261 PIPE_UNLOCK(rpipe);
1262
1263 return (retnum);
1264}
1265
1266
1267/* ARGSUSED 1 */
1268static int
1269pipe_close(struct fileglob *fg, __unused vfs_context_t ctx)
1270{
1271 struct pipe *cpipe;
1272
1273 proc_fdlock_spin(vfs_context_proc(ctx));
1274 cpipe = (struct pipe *)fg->fg_data;
1275 fg->fg_data = NULL;
1276 proc_fdunlock(vfs_context_proc(ctx));
1277 if (cpipe)
1278 pipeclose(cpipe);
1279
1280 return (0);
1281}
1282
1283static void
1284pipe_free_kmem(struct pipe *cpipe)
1285{
1286 if (cpipe->pipe_buffer.buffer != NULL) {
1287 OSAddAtomic(-(cpipe->pipe_buffer.size), &amountpipekva);
1288 OSAddAtomic(-1, &amountpipes);
1289 kfree((void *)cpipe->pipe_buffer.buffer,
1290 cpipe->pipe_buffer.size);
1291 cpipe->pipe_buffer.buffer = NULL;
1292 cpipe->pipe_buffer.size = 0;
1293 }
1294}
1295
1296/*
1297 * shutdown the pipe
1298 */
1299static void
1300pipeclose(struct pipe *cpipe)
1301{
1302 struct pipe *ppipe;
1303
1304 if (cpipe == NULL)
1305 return;
1306 /* partially created pipes won't have a valid mutex. */
1307 if (PIPE_MTX(cpipe) != NULL)
1308 PIPE_LOCK(cpipe);
1309
1310
1311 /*
1312 * If the other side is blocked, wake it up saying that
1313 * we want to close it down.
1314 */
1315 cpipe->pipe_state &= ~PIPE_DRAIN;
1316 cpipe->pipe_state |= PIPE_EOF;
1317 pipeselwakeup(cpipe, cpipe);
1318
1319 while (cpipe->pipe_busy) {
1320 cpipe->pipe_state |= PIPE_WANT;
1321
1322 wakeup(cpipe);
1323 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
1324 }
1325
1326#if CONFIG_MACF
1327 /*
1328 * Free the shared pipe label only after the two ends are disconnected.
1329 */
1330 if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL)
1331 mac_pipe_label_destroy(cpipe);
1332#endif
1333
1334 /*
1335 * Disconnect from peer
1336 */
1337 if ((ppipe = cpipe->pipe_peer) != NULL) {
1338
1339 ppipe->pipe_state &= ~(PIPE_DRAIN);
1340 ppipe->pipe_state |= PIPE_EOF;
1341
1342 pipeselwakeup(ppipe, ppipe);
1343 wakeup(ppipe);
1344
1345 if (cpipe->pipe_state & PIPE_KNOTE)
1346 KNOTE(&ppipe->pipe_sel.si_note, 1);
1347
1348 postpipeevent(ppipe, EV_RCLOSED);
1349
1350 ppipe->pipe_peer = NULL;
1351 }
1352 evpipefree(cpipe);
1353
1354 /*
1355 * free resources
1356 */
1357 if (PIPE_MTX(cpipe) != NULL) {
1358 if (ppipe != NULL) {
1359 /*
1360 * since the mutex is shared and the peer is still
1361 * alive, we need to release the mutex, not free it
1362 */
1363 PIPE_UNLOCK(cpipe);
1364 } else {
1365 /*
1366 * peer is gone, so we're the sole party left with
1367 * interest in this mutex... unlock and free it
1368 */
1369 PIPE_UNLOCK(cpipe);
1370 lck_mtx_free(PIPE_MTX(cpipe), pipe_mtx_grp);
1371 }
1372 }
1373 pipe_free_kmem(cpipe);
1374 if (cpipe->pipe_state & PIPE_WSELECT) {
1375 pipe_garbage_collect(cpipe);
1376 } else {
1377 zfree(pipe_zone, cpipe);
1378 pipe_garbage_collect(NULL);
1379 }
1380
1381}
1382
1383/*ARGSUSED*/
1384static int
1385filt_piperead_common(struct knote *kn, struct pipe *rpipe)
1386{
1387 struct pipe *wpipe;
1388 int retval;
1389
1390 /*
1391 * we're being called back via the KNOTE post
1392 * we made in pipeselwakeup, and we already hold the mutex...
1393 */
1394
1395 wpipe = rpipe->pipe_peer;
1396 kn->kn_data = rpipe->pipe_buffer.cnt;
1397 if ((rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) ||
1398 (wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
1399 kn->kn_flags |= EV_EOF;
1400 retval = 1;
1401 } else {
1402 int64_t lowwat = 1;
1403 if (kn->kn_sfflags & NOTE_LOWAT) {
1404 if (rpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(rpipe))
1405 lowwat = MAX_PIPESIZE(rpipe);
1406 else if (kn->kn_sdata > lowwat)
1407 lowwat = kn->kn_sdata;
1408 }
1409 retval = kn->kn_data >= lowwat;
1410 }
1411 return (retval);
1412}
1413
1414static int
1415filt_piperead(struct knote *kn, long hint)
1416{
1417#pragma unused(hint)
1418 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1419
1420 return filt_piperead_common(kn, rpipe);
1421}
1422
1423static int
1424filt_pipereadtouch(struct knote *kn, struct kevent_internal_s *kev)
1425{
1426 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1427 int retval;
1428
1429 PIPE_LOCK(rpipe);
1430
1431 /* accept new inputs (and save the low water threshold and flag) */
1432 kn->kn_sdata = kev->data;
1433 kn->kn_sfflags = kev->fflags;
1434
1435 /* identify if any events are now fired */
1436 retval = filt_piperead_common(kn, rpipe);
1437
1438 PIPE_UNLOCK(rpipe);
1439
1440 return retval;
1441}
1442
1443static int
1444filt_pipereadprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev)
1445{
1446#pragma unused(data)
1447 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1448 int retval;
1449
1450 PIPE_LOCK(rpipe);
1451 retval = filt_piperead_common(kn, rpipe);
1452 if (retval) {
1453 *kev = kn->kn_kevent;
1454 if (kn->kn_flags & EV_CLEAR) {
1455 kn->kn_fflags = 0;
1456 kn->kn_data = 0;
1457 }
1458 }
1459 PIPE_UNLOCK(rpipe);
1460
1461 return (retval);
1462}
1463
1464/*ARGSUSED*/
1465static int
1466filt_pipewrite_common(struct knote *kn, struct pipe *rpipe)
1467{
1468 struct pipe *wpipe;
1469
1470 /*
1471 * we're being called back via the KNOTE post
1472 * we made in pipeselwakeup, and we already hold the mutex...
1473 */
1474 wpipe = rpipe->pipe_peer;
1475
1476 if ((wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) {
1477 kn->kn_data = 0;
1478 kn->kn_flags |= EV_EOF;
1479 return (1);
1480 }
1481 kn->kn_data = MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt;
1482
1483 int64_t lowwat = PIPE_BUF;
1484 if (kn->kn_sfflags & NOTE_LOWAT) {
1485 if (wpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(wpipe))
1486 lowwat = MAX_PIPESIZE(wpipe);
1487 else if (kn->kn_sdata > lowwat)
1488 lowwat = kn->kn_sdata;
1489 }
1490
1491 return (kn->kn_data >= lowwat);
1492}
1493
1494/*ARGSUSED*/
1495static int
1496filt_pipewrite(struct knote *kn, long hint)
1497{
1498#pragma unused(hint)
1499 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1500
1501 return filt_pipewrite_common(kn, rpipe);
1502}
1503
1504
1505static int
1506filt_pipewritetouch(struct knote *kn, struct kevent_internal_s *kev)
1507{
1508 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1509 int res;
1510
1511 PIPE_LOCK(rpipe);
1512
1513 /* accept new kevent data (and save off lowat threshold and flag) */
1514 kn->kn_sfflags = kev->fflags;
1515 kn->kn_sdata = kev->data;
1516
1517 /* determine if any event is now deemed fired */
1518 res = filt_pipewrite_common(kn, rpipe);
1519
1520 PIPE_UNLOCK(rpipe);
1521
1522 return res;
1523}
1524
1525static int
1526filt_pipewriteprocess(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev)
1527{
1528#pragma unused(data)
1529 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1530 int res;
1531
1532 PIPE_LOCK(rpipe);
1533 res = filt_pipewrite_common(kn, rpipe);
1534 if (res) {
1535 *kev = kn->kn_kevent;
1536 if (kn->kn_flags & EV_CLEAR) {
1537 kn->kn_fflags = 0;
1538 kn->kn_data = 0;
1539 }
1540 }
1541 PIPE_UNLOCK(rpipe);
1542
1543 return res;
1544}
1545
1546/*ARGSUSED*/
1547static int
1548pipe_kqfilter(__unused struct fileproc *fp, struct knote *kn,
1549 __unused struct kevent_internal_s *kev, __unused vfs_context_t ctx)
1550{
1551 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
1552 int res;
1553
1554 PIPE_LOCK(cpipe);
1555#if CONFIG_MACF
1556 /*
1557 * XXX We should use a per thread credential here; minimally, the
1558 * XXX process credential should have a persistent reference on it
1559 * XXX before being passed in here.
1560 */
1561 if (mac_pipe_check_kqfilter(vfs_context_ucred(ctx), kn, cpipe) != 0) {
1562 PIPE_UNLOCK(cpipe);
1563 kn->kn_flags = EV_ERROR;
1564 kn->kn_data = EPERM;
1565 return 0;
1566 }
1567#endif
1568
1569 switch (kn->kn_filter) {
1570 case EVFILT_READ:
1571 kn->kn_filtid = EVFILTID_PIPE_R;
1572
1573 /* determine initial state */
1574 res = filt_piperead_common(kn, cpipe);
1575 break;
1576
1577 case EVFILT_WRITE:
1578 kn->kn_filtid = EVFILTID_PIPE_W;
1579
1580 if (cpipe->pipe_peer == NULL) {
1581 /*
1582 * other end of pipe has been closed
1583 */
1584 PIPE_UNLOCK(cpipe);
1585 kn->kn_flags = EV_ERROR;
1586 kn->kn_data = EPIPE;
1587 return 0;
1588 }
1589 if (cpipe->pipe_peer)
1590 cpipe = cpipe->pipe_peer;
1591
1592 /* determine inital state */
1593 res = filt_pipewrite_common(kn, cpipe);
1594 break;
1595 default:
1596 PIPE_UNLOCK(cpipe);
1597 kn->kn_flags = EV_ERROR;
1598 kn->kn_data = EINVAL;
1599 return 0;
1600 }
1601
1602 if (KNOTE_ATTACH(&cpipe->pipe_sel.si_note, kn))
1603 cpipe->pipe_state |= PIPE_KNOTE;
1604
1605 PIPE_UNLOCK(cpipe);
1606 return res;
1607}
1608
1609static void
1610filt_pipedetach(struct knote *kn)
1611{
1612 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
1613
1614 PIPE_LOCK(cpipe);
1615
1616 if (kn->kn_filter == EVFILT_WRITE) {
1617 if (cpipe->pipe_peer == NULL) {
1618 PIPE_UNLOCK(cpipe);
1619 return;
1620 }
1621 cpipe = cpipe->pipe_peer;
1622 }
1623 if (cpipe->pipe_state & PIPE_KNOTE) {
1624 if (KNOTE_DETACH(&cpipe->pipe_sel.si_note, kn))
1625 cpipe->pipe_state &= ~PIPE_KNOTE;
1626 }
1627 PIPE_UNLOCK(cpipe);
1628}
1629
1630int
1631fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo)
1632{
1633#if CONFIG_MACF
1634 int error;
1635#endif
1636 struct timespec now;
1637 struct vinfo_stat * ub;
1638 int pipe_size = 0;
1639 int pipe_count;
1640
1641 if (cpipe == NULL)
1642 return (EBADF);
1643 PIPE_LOCK(cpipe);
1644
1645#if CONFIG_MACF
1646 error = mac_pipe_check_stat(kauth_cred_get(), cpipe);
1647 if (error) {
1648 PIPE_UNLOCK(cpipe);
1649 return (error);
1650 }
1651#endif
1652 if (cpipe->pipe_buffer.buffer == 0) {
1653 /*
1654 * must be stat'ing the write fd
1655 */
1656 if (cpipe->pipe_peer) {
1657 /*
1658 * the peer still exists, use it's info
1659 */
1660 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
1661 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
1662 } else {
1663 pipe_count = 0;
1664 }
1665 } else {
1666 pipe_size = MAX_PIPESIZE(cpipe);
1667 pipe_count = cpipe->pipe_buffer.cnt;
1668 }
1669 /*
1670 * since peer's buffer is setup ouside of lock
1671 * we might catch it in transient state
1672 */
1673 if (pipe_size == 0)
1674 pipe_size = PIPE_SIZE;
1675
1676 ub = &pinfo->pipe_stat;
1677
1678 bzero(ub, sizeof(*ub));
1679 ub->vst_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
1680 ub->vst_blksize = pipe_size;
1681 ub->vst_size = pipe_count;
1682 if (ub->vst_blksize != 0)
1683 ub->vst_blocks = (ub->vst_size + ub->vst_blksize - 1) / ub->vst_blksize;
1684 ub->vst_nlink = 1;
1685
1686 ub->vst_uid = kauth_getuid();
1687 ub->vst_gid = kauth_getgid();
1688
1689 nanotime(&now);
1690 ub->vst_atime = now.tv_sec;
1691 ub->vst_atimensec = now.tv_nsec;
1692
1693 ub->vst_mtime = now.tv_sec;
1694 ub->vst_mtimensec = now.tv_nsec;
1695
1696 ub->vst_ctime = now.tv_sec;
1697 ub->vst_ctimensec = now.tv_nsec;
1698
1699 /*
1700 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid.
1701 * XXX (st_dev, st_ino) should be unique.
1702 */
1703
1704 pinfo->pipe_handle = (uint64_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe);
1705 pinfo->pipe_peerhandle = (uint64_t)VM_KERNEL_ADDRPERM((uintptr_t)(cpipe->pipe_peer));
1706 pinfo->pipe_status = cpipe->pipe_state;
1707
1708 PIPE_UNLOCK(cpipe);
1709
1710 return (0);
1711}
1712
1713
1714static int
1715pipe_drain(struct fileproc *fp, __unused vfs_context_t ctx)
1716{
1717
1718 /* Note: fdlock already held */
1719 struct pipe *ppipe, *cpipe = (struct pipe *)(fp->f_fglob->fg_data);
1720
1721 if (cpipe) {
1722 PIPE_LOCK(cpipe);
1723 cpipe->pipe_state |= PIPE_DRAIN;
1724 cpipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
1725 wakeup(cpipe);
1726
1727 /* Must wake up peer: a writer sleeps on the read side */
1728 if ((ppipe = cpipe->pipe_peer)) {
1729 ppipe->pipe_state |= PIPE_DRAIN;
1730 ppipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW);
1731 wakeup(ppipe);
1732 }
1733
1734 PIPE_UNLOCK(cpipe);
1735 return 0;
1736 }
1737
1738 return 1;
1739}
1740
1741
1742 /*
1743 * When a thread sets a write-select on a pipe, it creates an implicit,
1744 * untracked dependency between that thread and the peer of the pipe
1745 * on which the select is set. If the peer pipe is closed and freed
1746 * before the select()ing thread wakes up, the system will panic as
1747 * it attempts to unwind the dangling select(). To avoid that panic,
1748 * we notice whenever a dangerous select() is set on a pipe, and
1749 * defer the final deletion of the pipe until that select()s are all
1750 * resolved. Since we can't currently detect exactly when that
1751 * resolution happens, we use a simple garbage collection queue to
1752 * reap the at-risk pipes 'later'.
1753 */
1754static void
1755pipe_garbage_collect(struct pipe *cpipe)
1756{
1757 uint64_t old, now;
1758 struct pipe_garbage *pgp;
1759
1760 /* Convert msecs to nsecs and then to abstime */
1761 old = pipe_garbage_age_limit * 1000000;
1762 nanoseconds_to_absolutetime(old, &old);
1763
1764 lck_mtx_lock(pipe_garbage_lock);
1765
1766 /* Free anything that's been on the queue for <mumble> seconds */
1767 now = mach_absolute_time();
1768 old = now - old;
1769 while ((pgp = pipe_garbage_head) && pgp->pg_timestamp < old) {
1770 pipe_garbage_head = pgp->pg_next;
1771 if (pipe_garbage_head == NULL)
1772 pipe_garbage_tail = NULL;
1773 pipe_garbage_count--;
1774 zfree(pipe_zone, pgp->pg_pipe);
1775 zfree(pipe_garbage_zone, pgp);
1776 }
1777
1778 /* Add the new pipe (if any) to the tail of the garbage queue */
1779 if (cpipe) {
1780 cpipe->pipe_state = PIPE_DEAD;
1781 pgp = (struct pipe_garbage *)zalloc(pipe_garbage_zone);
1782 if (pgp == NULL) {
1783 /*
1784 * We're too low on memory to garbage collect the
1785 * pipe. Freeing it runs the risk of panicing the
1786 * system. All we can do is leak it and leave
1787 * a breadcrumb behind. The good news, such as it
1788 * is, is that this will probably never happen.
1789 * We will probably hit the panic below first.
1790 */
1791 printf("Leaking pipe %p - no room left in the queue",
1792 cpipe);
1793 lck_mtx_unlock(pipe_garbage_lock);
1794 return;
1795 }
1796
1797 pgp->pg_pipe = cpipe;
1798 pgp->pg_timestamp = now;
1799 pgp->pg_next = NULL;
1800
1801 if (pipe_garbage_tail)
1802 pipe_garbage_tail->pg_next = pgp;
1803 pipe_garbage_tail = pgp;
1804 if (pipe_garbage_head == NULL)
1805 pipe_garbage_head = pipe_garbage_tail;
1806
1807 if (pipe_garbage_count++ >= PIPE_GARBAGE_QUEUE_LIMIT)
1808 panic("Length of pipe garbage queue exceeded %d",
1809 PIPE_GARBAGE_QUEUE_LIMIT);
1810 }
1811 lck_mtx_unlock(pipe_garbage_lock);
1812}
1813
1814