sys_pipe.c source code [xnu/bsd/kern/sys_pipe.c]

1	/*
2	* Copyright (c) 1996 John S. Dyson
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	* 1. Redistributions of source code must retain the above copyright
9	* notice immediately at the beginning of the file, without modification,
10	* this list of conditions, and the following disclaimer.
11	* 2. Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* 3. Absolutely no warranty of function or purpose is made by the author
15	* John S. Dyson.
16	* 4. Modifications may be freely made to this file if the above conditions
17	* are met.
18	*/
19	/*
20	* Copyright (c) 2003-2020 Apple Inc. All rights reserved.
21	*
22	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
23	*
24	* This file contains Original Code and/or Modifications of Original Code
25	* as defined in and that are subject to the Apple Public Source License
26	* Version 2.0 (the 'License'). You may not use this file except in
27	* compliance with the License. The rights granted to you under the License
28	* may not be used to create, or enable the creation or redistribution of,
29	* unlawful or unlicensed copies of an Apple operating system, or to
30	* circumvent, violate, or enable the circumvention or violation of, any
31	* terms of an Apple operating system software license agreement.
32	*
33	* Please obtain a copy of the License at
34	* http://www.opensource.apple.com/apsl/ and read it before using this file.
35	*
36	* The Original Code and all software distributed under the License are
37	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
38	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
39	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
40	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
41	* Please see the License for the specific language governing rights and
42	* limitations under the License.
43	*
44	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
45	*/
46	/*
47	* NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
48	* support for mandatory and extensible security protections. This notice
49	* is included in support of clause 2.2 (b) of the Apple Public License,
50	* Version 2.0.
51	*/
52
53	/*
54	* This file contains a high-performance replacement for the socket-based
55	* pipes scheme originally used in FreeBSD/4.4Lite. It does not support
56	* all features of sockets, but does do everything that pipes normally
57	* do.
58	*
59	* Pipes are implemented as circular buffers. Following are the valid states in pipes operations
60	*
61	* _________________________________
62	* 1. \|_________________________________\| r=w, c=0
63	*
64	* _________________________________
65	* 2. \|__r:::::wc_______________________\| r <= w , c > 0
66	*
67	* _________________________________
68	* 3. \|::::wc_____r:::::::::::::::::::::\| r>w , c > 0
69	*
70	* _________________________________
71	* 4. \|:::::::wrc:::::::::::::::::::::::\| w=r, c = Max size
72	*
73	*
74	* Nomenclature:-
75	* a-z define the steps in a program flow
76	* 1-4 are the states as defined aboe
77	* Action: is what file operation is done on the pipe
78	*
79	* Current:None Action: initialize with size M=200
80	* a. State 1 ( r=0, w=0, c=0)
81	*
82	* Current: a Action: write(100) (w < M)
83	* b. State 2 (r=0, w=100, c=100)
84	*
85	* Current: b Action: write(100) (w = M-w)
86	* c. State 4 (r=0,w=0,c=200)
87	*
88	* Current: b Action: read(70) ( r < c )
89	* d. State 2(r=70,w=100,c=30)
90	*
91	* Current: d Action: write(75) ( w < (m-w))
92	* e. State 2 (r=70,w=175,c=105)
93	*
94	* Current: d Action: write(110) ( w > (m-w))
95	* f. State 3 (r=70,w=10,c=140)
96	*
97	* Current: d Action: read(30) (r >= c )
98	* g. State 1 (r=100,w=100,c=0)
99	*
100	*/
101
102	/*
103	* This code create half duplex pipe buffers for facilitating file like
104	* operations on pipes. The initial buffer is very small, but this can
105	* dynamically change to larger sizes based on usage. The buffer size is never
106	* reduced. The total amount of kernel memory used is governed by maxpipekva.
107	* In case of dynamic expansion limit is reached, the output thread is blocked
108	* until the pipe buffer empties enough to continue.
109	*
110	* In order to limit the resource use of pipes, two sysctls exist:
111	*
112	* kern.ipc.maxpipekva - This is a hard limit on the amount of pageable
113	* address space available to us in pipe_map.
114	*
115	* Memory usage may be monitored through the sysctls
116	* kern.ipc.pipes, kern.ipc.pipekva.
117	*
118	*/
119
120	#include <sys/param.h>
121	#include <sys/systm.h>
122	#include <sys/filedesc.h>
123	#include <sys/kernel.h>
124	#include <sys/vnode.h>
125	#include <sys/proc_internal.h>
126	#include <sys/kauth.h>
127	#include <sys/file_internal.h>
128	#include <sys/stat.h>
129	#include <sys/ioctl.h>
130	#include <sys/fcntl.h>
131	#include <sys/malloc.h>
132	#include <sys/syslog.h>
133	#include <sys/unistd.h>
134	#include <sys/resourcevar.h>
135	#include <sys/aio_kern.h>
136	#include <sys/signalvar.h>
137	#include <sys/pipe.h>
138	#include <sys/sysproto.h>
139	#include <sys/proc_info.h>
140
141	#include <security/audit/audit.h>
142
143	#include <sys/kdebug.h>
144
145	#include <kern/zalloc.h>
146	#include <kern/kalloc.h>
147	#include <vm/vm_kern.h>
148	#include <libkern/OSAtomic.h>
149	#include <libkern/section_keywords.h>
150
151	#if CONFIG_MACF
152	#include <security/mac_framework.h>
153	#endif
154
155	#define f_flag fp_glob->fg_flag
156	#define f_ops fp_glob->fg_ops
157
158	struct pipepair {
159	lck_mtx_t pp_mtx;
160	struct pipe pp_rpipe;
161	struct pipe pp_wpipe;
162	uint64_t pp_pipe_id; / unique ID shared by both pipe ends /
163	};
164
165	#define PIPE_PAIR(pipe) \
166	__container_of(PIPE_MTX(pipe), struct pipepair, pp_mtx)
167
168	/*
169	* interfaces to the outside world exported through file operations
170	*/
171	static int pipe_read(struct fileproc fp, struct* uio *uio,
172	int flags, vfs_context_t ctx);
173	static int pipe_write(struct fileproc fp, struct* uio *uio,
174	int flags, vfs_context_t ctx);
175	static int pipe_close(struct fileglob *fg, vfs_context_t ctx);
176	static int pipe_select(struct fileproc fp, int* which, void * wql,
177	vfs_context_t ctx);
178	static int pipe_kqfilter(struct fileproc fp, struct* knote *kn,
179	struct kevent_qos_s *kev);
180	static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
181	vfs_context_t ctx);
182	static int pipe_drain(struct fileproc *fp, vfs_context_t ctx);
183
184	static const struct fileops pipeops = {
185	.fo_type = DTYPE_PIPE,
186	.fo_read = pipe_read,
187	.fo_write = pipe_write,
188	.fo_ioctl = pipe_ioctl,
189	.fo_select = pipe_select,
190	.fo_close = pipe_close,
191	.fo_drain = pipe_drain,
192	.fo_kqfilter = pipe_kqfilter,
193	};
194
195	static void filt_pipedetach(struct knote *kn);
196
197	static int filt_pipenotsup(struct knote kn, long* hint);
198	static int filt_pipenotsuptouch(struct knote kn, struct* kevent_qos_s *kev);
199	static int filt_pipenotsupprocess(struct knote kn, struct* kevent_qos_s *kev);
200
201	static int filt_piperead(struct knote kn, long* hint);
202	static int filt_pipereadtouch(struct knote kn, struct* kevent_qos_s *kev);
203	static int filt_pipereadprocess(struct knote kn, struct* kevent_qos_s *kev);
204
205	static int filt_pipewrite(struct knote kn, long* hint);
206	static int filt_pipewritetouch(struct knote kn, struct* kevent_qos_s *kev);
207	static int filt_pipewriteprocess(struct knote kn, struct* kevent_qos_s *kev);
208
209	SECURITY_READ_ONLY_EARLY(struct filterops) pipe_nfiltops = {
210	.f_isfd = `1`,
211	.f_detach = filt_pipedetach,
212	.f_event = filt_pipenotsup,
213	.f_touch = filt_pipenotsuptouch,
214	.f_process = filt_pipenotsupprocess,
215	};
216
217	SECURITY_READ_ONLY_EARLY(struct filterops) pipe_rfiltops = {
218	.f_isfd = `1`,
219	.f_detach = filt_pipedetach,
220	.f_event = filt_piperead,
221	.f_touch = filt_pipereadtouch,
222	.f_process = filt_pipereadprocess,
223	};
224
225	SECURITY_READ_ONLY_EARLY(struct filterops) pipe_wfiltops = {
226	.f_isfd = `1`,
227	.f_detach = filt_pipedetach,
228	.f_event = filt_pipewrite,
229	.f_touch = filt_pipewritetouch,
230	.f_process = filt_pipewriteprocess,
231	};
232
233	#if PIPE_SYSCTLS
234	static int nbigpipe; / for compatibility sake. no longer used /
235	#endif
236	static int amountpipes; / total number of pipes in system /
237	static int amountpipekva; / total memory used by pipes /
238
239	static _Atomic uint64_t pipe_unique_id = `1`;
240
241	int maxpipekva __attribute__((used)) = PIPE_KVAMAX; / allowing 16MB max. /
242
243	#if PIPE_SYSCTLS
244	SYSCTL_DECL(_kern_ipc);
245
246	SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD \| CTLFLAG_LOCKED,
247	&maxpipekva, `0`, "Pipe KVA limit");
248	SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW \| CTLFLAG_LOCKED,
249	&maxpipekvawired, `0`, "Pipe KVA wired limit");
250	SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD \| CTLFLAG_LOCKED,
251	&amountpipes, `0`, "Current # of pipes");
252	SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD \| CTLFLAG_LOCKED,
253	&nbigpipe, `0`, "Current # of big pipes");
254	SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD \| CTLFLAG_LOCKED,
255	&amountpipekva, `0`, "Pipe KVA usage");
256	SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD \| CTLFLAG_LOCKED,
257	&amountpipekvawired, `0`, "Pipe wired KVA usage");
258	#endif
259
260	static int pipepair_alloc(struct pipe rpipe, struct pipe wpipe);
261	static void pipeclose(struct pipe *cpipe);
262	static void pipe_free_kmem(struct pipe *cpipe);
263	static int pipespace(struct pipe cpipe, int* size);
264	static int choose_pipespace(unsigned long current, unsigned long expected);
265	static int expand_pipespace(struct pipe p, int* target_size);
266	static void pipeselwakeup(struct pipe cpipe, struct* pipe *spipe);
267	static __inline int pipeio_lock(struct pipe cpipe, int* catch);
268	static __inline void pipeio_unlock(struct pipe *cpipe);
269
270	static LCK_GRP_DECLARE(pipe_mtx_grp, "pipe");
271	static KALLOC_TYPE_DEFINE(pipe_zone, struct pipepair, KT_DEFAULT);
272
273	#define MAX_PIPESIZE(pipe) ( MAX(PIPE_SIZE, (pipe)->pipe_buffer.size) )
274
275	SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
276
277	#if defined(XNU_TARGET_OS_OSX)
278	/ Bitmap for things to touch in pipe_touch() /
279	#define PIPE_ATIME 0x00000001 /* time of last access */
280	#define PIPE_MTIME 0x00000002 /* time of last modification */
281	#define PIPE_CTIME 0x00000004 /* time of last status change */
282
283	static void
284	pipe_touch(struct pipe tpipe, int* touch)
285	{
286	struct timespec now;
287
288	nanotime(ts: &now);
289
290	if (touch & PIPE_ATIME) {
291	tpipe->st_atimespec.tv_sec = now.tv_sec;
292	tpipe->st_atimespec.tv_nsec = now.tv_nsec;
293	}
294
295	if (touch & PIPE_MTIME) {
296	tpipe->st_mtimespec.tv_sec = now.tv_sec;
297	tpipe->st_mtimespec.tv_nsec = now.tv_nsec;
298	}
299
300	if (touch & PIPE_CTIME) {
301	tpipe->st_ctimespec.tv_sec = now.tv_sec;
302	tpipe->st_ctimespec.tv_nsec = now.tv_nsec;
303	}
304	}
305	#endif
306
307	static const unsigned int pipesize_blocks[] = {`512`, `1024`, `2048`, `4096`, `4096` * `2`, PIPE_SIZE, PIPE_SIZE * `4` };
308
309	/*
310	* finds the right size from possible sizes in pipesize_blocks
311	* returns the size which matches max(current,expected)
312	*/
313	static int
314	choose_pipespace(unsigned long current, unsigned long expected)
315	{
316	int i = sizeof(pipesize_blocks) / sizeof(unsigned int) - `1`;
317	unsigned long target;
318
319	/*
320	* assert that we always get an atomic transaction sized pipe buffer,
321	* even if the system pipe buffer high-water mark has been crossed.
322	*/
323	assert(PIPE_BUF == pipesize_blocks[`0`]);
324
325	if (expected > current) {
326	target = expected;
327	} else {
328	target = current;
329	}
330
331	while (i > `0` && pipesize_blocks[i - `1`] > target) {
332	i = i - `1`;
333	}
334
335	return pipesize_blocks[i];
336	}
337
338
339	/*
340	* expand the size of pipe while there is data to be read,
341	* and then free the old buffer once the current buffered
342	* data has been transferred to new storage.
343	* Required: PIPE_LOCK and io lock to be held by caller.
344	* returns 0 on success or no expansion possible
345	*/
346	static int
347	expand_pipespace(struct pipe p, int* target_size)
348	{
349	struct pipe tmp, oldpipe;
350	int error;
351	tmp.pipe_buffer.buffer = `0`;
352
353	if (p->pipe_buffer.size >= (unsigned) target_size) {
354	return `0`; / the existing buffer is max size possible /
355	}
356
357	/ create enough space in the target /
358	error = pipespace(cpipe: &tmp, size: target_size);
359	if (error != `0`) {
360	return error;
361	}
362
363	oldpipe.pipe_buffer.buffer = p->pipe_buffer.buffer;
364	oldpipe.pipe_buffer.size = p->pipe_buffer.size;
365
366	memcpy(dst: tmp.pipe_buffer.buffer, src: p->pipe_buffer.buffer, n: p->pipe_buffer.size);
367	if (p->pipe_buffer.cnt > `0` && p->pipe_buffer.in <= p->pipe_buffer.out) {
368	/ we are in State 3 and need extra copying for read to be consistent /
369	memcpy(dst: &tmp.pipe_buffer.buffer[p->pipe_buffer.size], src: p->pipe_buffer.buffer, n: p->pipe_buffer.size);
370	p->pipe_buffer.in += p->pipe_buffer.size;
371	}
372
373	p->pipe_buffer.buffer = tmp.pipe_buffer.buffer;
374	p->pipe_buffer.size = tmp.pipe_buffer.size;
375
376
377	pipe_free_kmem(cpipe: &oldpipe);
378	return `0`;
379	}
380
381	/*
382	* The pipe system call for the DTYPE_PIPE type of pipes
383	*
384	* returns:
385	* FREAD \| fd0 \| -->[struct rpipe] --> \|~~buffer~~\| \
386	* (pipe_mutex)
387	* FWRITE \| fd1 \| -->[struct wpipe] --X /
388	*/
389
390	/ ARGSUSED /
391	int
392	pipe(proc_t p, __unused struct pipe_args uap, int32_t retval)
393	{
394	struct fileproc rf, wf;
395	struct pipe rpipe, wpipe;
396	int error;
397
398	error = pipepair_alloc(rpipe: &rpipe, wpipe: &wpipe);
399	if (error) {
400	return error;
401	}
402
403	/*
404	* for now we'll create half-duplex pipes(refer returns section above).
405	* this is what we've always supported..
406	*/
407
408	error = falloc(p, &rf, &retval[`0`]);
409	if (error) {
410	goto freepipes;
411	}
412	rf->f_flag = FREAD;
413	rf->f_ops = &pipeops;
414	fp_set_data(fp: rf, fg_data: rpipe);
415
416	error = falloc(p, &wf, &retval[`1`]);
417	if (error) {
418	fp_free(p, fd: retval[`0`], fp: rf);
419	goto freepipes;
420	}
421	wf->f_flag = FWRITE;
422	wf->f_ops = &pipeops;
423	fp_set_data(fp: wf, fg_data: wpipe);
424
425	rpipe->pipe_peer = wpipe;
426	wpipe->pipe_peer = rpipe;
427
428	#if CONFIG_MACF
429	/*
430	* XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX
431	*
432	* struct pipe represents a pipe endpoint. The MAC label is shared
433	* between the connected endpoints. As a result mac_pipe_label_init() and
434	* mac_pipe_label_associate() should only be called on one of the endpoints
435	* after they have been connected.
436	*/
437	mac_pipe_label_init(cpipe: rpipe);
438	mac_pipe_label_associate(cred: kauth_cred_get(), cpipe: rpipe);
439	mac_pipe_set_label(cpipe: wpipe, label: mac_pipe_label(cpipe: rpipe));
440	#endif
441	proc_fdlock_spin(p);
442	procfdtbl_releasefd(p, fd: retval[`0`], NULL);
443	procfdtbl_releasefd(p, fd: retval[`1`], NULL);
444	fp_drop(p, fd: retval[`0`], fp: rf, locked: `1`);
445	fp_drop(p, fd: retval[`1`], fp: wf, locked: `1`);
446	proc_fdunlock(p);
447	return `0`;
448
449	freepipes:
450	pipeclose(cpipe: rpipe);
451	pipeclose(cpipe: wpipe);
452	return error;
453	}
454
455	int
456	pipe_stat(struct pipe cpipe, void* ub, int* isstat64)
457	{
458	#if CONFIG_MACF
459	int error;
460	#endif
461	int pipe_size = `0`;
462	int pipe_count;
463	struct stat sb = (struct* stat )`0`; /* warning avoidance ; protected by isstat64 /
464	struct stat64 * sb64 = (struct stat64 )`0`; /* warning avoidance ; protected by isstat64 /
465
466	if (cpipe == NULL) {
467	return EBADF;
468	}
469	PIPE_LOCK(cpipe);
470
471	#if CONFIG_MACF
472	error = mac_pipe_check_stat(cred: kauth_cred_get(), cpipe);
473	if (error) {
474	PIPE_UNLOCK(cpipe);
475	return error;
476	}
477	#endif
478	if (cpipe->pipe_buffer.buffer == `0`) {
479	/ must be stat'ing the write fd /
480	if (cpipe->pipe_peer) {
481	/ the peer still exists, use it's info /
482	pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
483	pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
484	} else {
485	pipe_count = `0`;
486	}
487	} else {
488	pipe_size = MAX_PIPESIZE(cpipe);
489	pipe_count = cpipe->pipe_buffer.cnt;
490	}
491	/*
492	* since peer's buffer is setup ouside of lock
493	* we might catch it in transient state
494	*/
495	if (pipe_size == `0`) {
496	pipe_size = MAX(PIPE_SIZE, pipesize_blocks[`0`]);
497	}
498
499	if (isstat64 != `0`) {
500	sb64 = (struct stat64 *)ub;
501
502	bzero(s: sb64, n: sizeof(*sb64));
503	sb64->st_mode = S_IFIFO \| S_IRUSR \| S_IWUSR \| S_IRGRP \| S_IWGRP;
504	sb64->st_blksize = pipe_size;
505	sb64->st_size = pipe_count;
506	sb64->st_blocks = (sb64->st_size + sb64->st_blksize - `1`) / sb64->st_blksize;
507
508	sb64->st_uid = kauth_getuid();
509	sb64->st_gid = kauth_getgid();
510
511	sb64->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
512	sb64->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
513
514	sb64->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
515	sb64->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
516
517	sb64->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
518	sb64->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
519
520	/*
521	* Return a relatively unique inode number based on the current
522	* address of this pipe's struct pipe. This number may be recycled
523	* relatively quickly.
524	*/
525	sb64->st_ino = (ino64_t)VM_KERNEL_ADDRHASH((uintptr_t)cpipe);
526	} else {
527	sb = (struct stat *)ub;
528
529	bzero(s: sb, n: sizeof(*sb));
530	sb->st_mode = S_IFIFO \| S_IRUSR \| S_IWUSR \| S_IRGRP \| S_IWGRP;
531	sb->st_blksize = pipe_size;
532	sb->st_size = pipe_count;
533	sb->st_blocks = (sb->st_size + sb->st_blksize - `1`) / sb->st_blksize;
534
535	sb->st_uid = kauth_getuid();
536	sb->st_gid = kauth_getgid();
537
538	sb->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec;
539	sb->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec;
540
541	sb->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec;
542	sb->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec;
543
544	sb->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec;
545	sb->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec;
546
547	/*
548	* Return a relatively unique inode number based on the current
549	* address of this pipe's struct pipe. This number may be recycled
550	* relatively quickly.
551	*/
552	sb->st_ino = (ino_t)VM_KERNEL_ADDRHASH((uintptr_t)cpipe);
553	}
554	PIPE_UNLOCK(cpipe);
555
556	/*
557	* POSIX: Left as 0: st_dev, st_nlink, st_rdev, st_flags, st_gen,
558	* st_uid, st_gid.
559	*
560	* XXX (st_dev) should be unique, but there is no device driver that
561	* XXX is associated with pipes, since they are implemented via a
562	* XXX struct fileops indirection rather than as FS objects.
563	*/
564	return `0`;
565	}
566
567	uint64_t
568	pipe_id(struct pipe *p)
569	{
570	return PIPE_PAIR(p)->pp_pipe_id;
571	}
572
573	/*
574	* Allocate kva for pipe circular buffer, the space is pageable
575	* This routine will 'realloc' the size of a pipe safely, if it fails
576	* it will retain the old buffer.
577	* If it fails it will return ENOMEM.
578	*/
579	static int
580	pipespace(struct pipe cpipe, int* size)
581	{
582	vm_offset_t buffer;
583
584	if (size <= `0`) {
585	return EINVAL;
586	}
587
588	buffer = (vm_offset_t)kalloc_data(size, Z_WAITOK);
589	if (!buffer) {
590	return ENOMEM;
591	}
592
593	/ free old resources if we're resizing /
594	pipe_free_kmem(cpipe);
595	cpipe->pipe_buffer.buffer = (caddr_t)buffer;
596	cpipe->pipe_buffer.size = size;
597	cpipe->pipe_buffer.in = `0`;
598	cpipe->pipe_buffer.out = `0`;
599	cpipe->pipe_buffer.cnt = `0`;
600
601	OSAddAtomic(`1`, &amountpipes);
602	OSAddAtomic(cpipe->pipe_buffer.size, &amountpipekva);
603
604	return `0`;
605	}
606
607	/*
608	* initialize and allocate VM and memory for pipe
609	*/
610	static int
611	pipepair_alloc(struct pipe rp_out, struct pipe wp_out)
612	{
613	struct pipepair *pp = zalloc_flags(pipe_zone, Z_WAITOK \| Z_ZERO \| Z_NOFAIL);
614	struct pipe *rpipe = &pp->pp_rpipe;
615	struct pipe *wpipe = &pp->pp_wpipe;
616
617	/*
618	* protect so pipespace or pipeclose don't follow a junk pointer
619	* if pipespace() fails.
620	*/
621	pp->pp_pipe_id = os_atomic_inc_orig(&pipe_unique_id, relaxed);
622	lck_mtx_init(lck: &pp->pp_mtx, grp: &pipe_mtx_grp, LCK_ATTR_NULL);
623
624	rpipe->pipe_mtxp = &pp->pp_mtx;
625	wpipe->pipe_mtxp = &pp->pp_mtx;
626
627	#if defined(XNU_TARGET_OS_OSX)
628	/ Initial times are all the time of creation of the pipe /
629	pipe_touch(tpipe: rpipe, PIPE_ATIME \| PIPE_MTIME \| PIPE_CTIME);
630	pipe_touch(tpipe: wpipe, PIPE_ATIME \| PIPE_MTIME \| PIPE_CTIME);
631	#endif
632
633	/*
634	* allocate the space for the normal I/O direction up
635	* front... we'll delay the allocation for the other
636	* direction until a write actually occurs (most likely it won't)...
637	*/
638	int error = pipespace(cpipe: rpipe, size: choose_pipespace(current: rpipe->pipe_buffer.size, expected: `0`));
639	if (__improbable(error)) {
640	lck_mtx_destroy(lck: &pp->pp_mtx, grp: &pipe_mtx_grp);
641	zfree(pipe_zone, pp);
642	return error;
643	}
644
645	*rp_out = rpipe;
646	*wp_out = wpipe;
647	return `0`;
648	}
649
650	static void
651	pipepair_destroy_pipe(struct pipepair pp, struct* pipe *cpipe)
652	{
653	bool can_free;
654
655	pipe_free_kmem(cpipe);
656
657	lck_mtx_lock(lck: &pp->pp_mtx);
658	if (__improbable(cpipe->pipe_state & PIPE_DEAD)) {
659	panic("double free of pipe %p in pair %p", cpipe, pp);
660	}
661
662	cpipe->pipe_state \|= PIPE_DEAD;
663
664	can_free = (pp->pp_rpipe.pipe_state & PIPE_DEAD) &&
665	(pp->pp_wpipe.pipe_state & PIPE_DEAD);
666	lck_mtx_unlock(lck: &pp->pp_mtx);
667
668	if (can_free) {
669	lck_mtx_destroy(lck: &pp->pp_mtx, grp: &pipe_mtx_grp);
670	zfree(pipe_zone, pp);
671	}
672	}
673
674	/*
675	* lock a pipe for I/O, blocking other access
676	*/
677	static inline int
678	pipeio_lock(struct pipe cpipe, int* catch)
679	{
680	int error;
681	while (cpipe->pipe_state & PIPE_LOCKFL) {
682	cpipe->pipe_state \|= PIPE_LWANT;
683	error = msleep(chan: cpipe, PIPE_MTX(cpipe), pri: catch ? (PRIBIO \| PCATCH) : PRIBIO,
684	wmesg: "pipelk", ts: `0`);
685	if (error != `0`) {
686	return error;
687	}
688	}
689	cpipe->pipe_state \|= PIPE_LOCKFL;
690	return `0`;
691	}
692
693	/*
694	* unlock a pipe I/O lock
695	*/
696	static inline void
697	pipeio_unlock(struct pipe *cpipe)
698	{
699	cpipe->pipe_state &= ~PIPE_LOCKFL;
700	if (cpipe->pipe_state & PIPE_LWANT) {
701	cpipe->pipe_state &= ~PIPE_LWANT;
702	wakeup(chan: cpipe);
703	}
704	}
705
706	/*
707	* wakeup anyone whos blocked in select
708	*/
709	static void
710	pipeselwakeup(struct pipe cpipe, struct* pipe *spipe)
711	{
712	if (cpipe->pipe_state & PIPE_EOF) {
713	selthreadclear(&cpipe->pipe_sel);
714	} else {
715	selwakeup(&cpipe->pipe_sel);
716	}
717
718	KNOTE(&cpipe->pipe_sel.si_note, `1`);
719
720	if (spipe && (spipe->pipe_state & PIPE_ASYNC) && spipe->pipe_pgid) {
721	if (spipe->pipe_pgid < `0`) {
722	gsignal(pgid: -spipe->pipe_pgid, SIGIO);
723	} else {
724	proc_signal(pid: spipe->pipe_pgid, SIGIO);
725	}
726	}
727	}
728
729	static void
730	pipe_check_bounds_panic(struct pipe *cpipe)
731	{
732	caddr_t start = cpipe->pipe_buffer.buffer;
733	u_int size = cpipe->pipe_buffer.size;
734	u_int in = cpipe->pipe_buffer.in;
735	u_int out = cpipe->pipe_buffer.out;
736
737	kalloc_data_require(data: start, size);
738
739	if (__improbable(in > size \|\| out > size)) {
740	panic("%s: corrupted pipe read/write pointer or size.", __func__);
741	}
742	}
743	/*
744	* Read n bytes from the buffer. Semantics are similar to file read.
745	* returns: number of bytes read from the buffer
746	*/
747	/ ARGSUSED /
748	static int
749	pipe_read(struct fileproc fp, struct* uio uio, __unused int* flags,
750	__unused vfs_context_t ctx)
751	{
752	struct pipe rpipe = (struct* pipe *)fp_get_data(fp);
753	int error;
754	int nread = `0`;
755	u_int size;
756
757	PIPE_LOCK(rpipe);
758	++rpipe->pipe_busy;
759
760	error = pipeio_lock(cpipe: rpipe, catch: `1`);
761	if (error) {
762	goto unlocked_error;
763	}
764
765	#if CONFIG_MACF
766	error = mac_pipe_check_read(cred: kauth_cred_get(), cpipe: rpipe);
767	if (error) {
768	goto locked_error;
769	}
770	#endif
771
772
773	while (uio_resid(a_uio: uio)) {
774	/*
775	* normal pipe buffer receive
776	*/
777	if (rpipe->pipe_buffer.cnt > `0`) {
778	/*
779	* # bytes to read is min( bytes from read pointer until end of buffer,
780	* total unread bytes,
781	* user requested byte count)
782	*/
783	size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
784	if (size > rpipe->pipe_buffer.cnt) {
785	size = rpipe->pipe_buffer.cnt;
786	}
787
788	size = (u_int) MIN(INT_MAX, MIN((user_size_t)size,
789	(user_size_t)uio_resid(uio)));
790
791	PIPE_UNLOCK(rpipe); / we still hold io lock./
792	pipe_check_bounds_panic(cpipe: rpipe);
793	error = uiomove(
794	cp: &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
795	n: size, uio);
796	PIPE_LOCK(rpipe);
797	if (error) {
798	break;
799	}
800
801	rpipe->pipe_buffer.out += size;
802	if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) {
803	rpipe->pipe_buffer.out = `0`;
804	}
805
806	rpipe->pipe_buffer.cnt -= size;
807
808	/*
809	* If there is no more to read in the pipe, reset
810	* its pointers to the beginning. This improves
811	* cache hit stats.
812	*/
813	if (rpipe->pipe_buffer.cnt == `0`) {
814	rpipe->pipe_buffer.in = `0`;
815	rpipe->pipe_buffer.out = `0`;
816	}
817	nread += size;
818	} else {
819	/*
820	* detect EOF condition
821	* read returns 0 on EOF, no need to set error
822	*/
823	if ((rpipe->pipe_state & (PIPE_DRAIN \| PIPE_EOF)) \|\|
824	(fileproc_get_vflags(fp) & FPV_DRAIN)) {
825	break;
826	}
827
828	/*
829	* If the "write-side" has been blocked, wake it up now.
830	*/
831	if (rpipe->pipe_state & PIPE_WANTW) {
832	rpipe->pipe_state &= ~PIPE_WANTW;
833	wakeup(chan: rpipe);
834	}
835
836	/*
837	* Break if some data was read in previous iteration.
838	*/
839	if (nread > `0`) {
840	break;
841	}
842
843	/*
844	* Unlock the pipe buffer for our remaining processing.
845	* We will either break out with an error or we will
846	* sleep and relock to loop.
847	*/
848	pipeio_unlock(cpipe: rpipe);
849
850	/*
851	* Handle non-blocking mode operation or
852	* wait for more data.
853	*/
854	if (fp->f_flag & FNONBLOCK) {
855	error = EAGAIN;
856	} else {
857	rpipe->pipe_state \|= PIPE_WANTR;
858	error = msleep(chan: rpipe, PIPE_MTX(rpipe), PRIBIO \| PCATCH, wmesg: "piperd", ts: `0`);
859	if (error == `0`) {
860	error = pipeio_lock(cpipe: rpipe, catch: `1`);
861	}
862	}
863	if (error) {
864	goto unlocked_error;
865	}
866	}
867	}
868	#if CONFIG_MACF
869	locked_error:
870	#endif
871	pipeio_unlock(cpipe: rpipe);
872
873	unlocked_error:
874	--rpipe->pipe_busy;
875
876	/*
877	* PIPE_WANT processing only makes sense if pipe_busy is 0.
878	*/
879	if ((rpipe->pipe_busy == `0`) && (rpipe->pipe_state & PIPE_WANT)) {
880	rpipe->pipe_state &= ~(PIPE_WANT \| PIPE_WANTW);
881	wakeup(chan: rpipe);
882	} else if (rpipe->pipe_buffer.cnt < rpipe->pipe_buffer.size) {
883	/*
884	* Handle write blocking hysteresis.
885	*/
886	if (rpipe->pipe_state & PIPE_WANTW) {
887	rpipe->pipe_state &= ~PIPE_WANTW;
888	wakeup(chan: rpipe);
889	}
890	}
891
892	if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) > `0`) {
893	pipeselwakeup(cpipe: rpipe, spipe: rpipe->pipe_peer);
894	}
895
896	#if defined(XNU_TARGET_OS_OSX)
897	/ update last read time /
898	pipe_touch(tpipe: rpipe, PIPE_ATIME);
899	#endif
900
901	PIPE_UNLOCK(rpipe);
902
903	return error;
904	}
905
906	/*
907	* perform a write of n bytes into the read side of buffer. Since
908	* pipes are unidirectional a write is meant to be read by the otherside only.
909	*/
910	static int
911	pipe_write(struct fileproc fp, struct* uio uio, __unused int* flags,
912	__unused vfs_context_t ctx)
913	{
914	int error = `0`;
915	size_t orig_resid;
916	int pipe_size;
917	struct pipe wpipe, rpipe;
918	// LP64todo - fix this!
919	orig_resid = (size_t)uio_resid(a_uio: uio);
920	if (orig_resid > LONG_MAX) {
921	return EINVAL;
922	}
923	int space;
924
925	rpipe = (struct pipe *)fp_get_data(fp);
926
927	PIPE_LOCK(rpipe);
928	wpipe = rpipe->pipe_peer;
929
930	/*
931	* detect loss of pipe read side, issue SIGPIPE if lost.
932	*/
933	if (wpipe == NULL \|\| (wpipe->pipe_state & (PIPE_DRAIN \| PIPE_EOF)) \|\|
934	(fileproc_get_vflags(fp) & FPV_DRAIN)) {
935	PIPE_UNLOCK(rpipe);
936	return EPIPE;
937	}
938	#if CONFIG_MACF
939	error = mac_pipe_check_write(cred: kauth_cred_get(), cpipe: wpipe);
940	if (error) {
941	PIPE_UNLOCK(rpipe);
942	return error;
943	}
944	#endif
945	++wpipe->pipe_busy;
946
947	pipe_size = `0`;
948
949	/*
950	* need to allocate some storage... we delay the allocation
951	* until the first write on fd[0] to avoid allocating storage for both
952	* 'pipe ends'... most pipes are half-duplex with the writes targeting
953	* fd[1], so allocating space for both ends is a waste...
954	*/
955
956	if (wpipe->pipe_buffer.buffer == `0` \|\| (
957	(unsigned)orig_resid > wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt &&
958	amountpipekva < maxpipekva)) {
959	pipe_size = choose_pipespace(current: wpipe->pipe_buffer.size, expected: wpipe->pipe_buffer.cnt + orig_resid);
960	}
961	if (pipe_size) {
962	/*
963	* need to do initial allocation or resizing of pipe
964	* holding both structure and io locks.
965	*/
966	if ((error = pipeio_lock(cpipe: wpipe, catch: `1`)) == `0`) {
967	if (wpipe->pipe_buffer.cnt == `0`) {
968	error = pipespace(cpipe: wpipe, size: pipe_size);
969	} else {
970	error = expand_pipespace(p: wpipe, target_size: pipe_size);
971	}
972
973	pipeio_unlock(cpipe: wpipe);
974
975	/ allocation failed /
976	if (wpipe->pipe_buffer.buffer == `0`) {
977	error = ENOMEM;
978	}
979	}
980	if (error) {
981	/*
982	* If an error occurred unbusy and return, waking up any pending
983	* readers.
984	*/
985	--wpipe->pipe_busy;
986	if ((wpipe->pipe_busy == `0`) &&
987	(wpipe->pipe_state & PIPE_WANT)) {
988	wpipe->pipe_state &= ~(PIPE_WANT \| PIPE_WANTR);
989	wakeup(chan: wpipe);
990	}
991	PIPE_UNLOCK(rpipe);
992	return error;
993	}
994	}
995
996	while (uio_resid(a_uio: uio)) {
997	retrywrite:
998	space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
999
1000	/ Writes of size <= PIPE_BUF must be atomic. /
1001	if ((space < uio_resid(a_uio: uio)) && (orig_resid <= PIPE_BUF)) {
1002	space = `0`;
1003	}
1004
1005	if (space > `0`) {
1006	if ((error = pipeio_lock(cpipe: wpipe, catch: `1`)) == `0`) {
1007	size_t size; / Transfer size /
1008	size_t segsize; / first segment to transfer /
1009
1010	if ((wpipe->pipe_state & (PIPE_DRAIN \| PIPE_EOF)) \|\|
1011	(fileproc_get_vflags(fp) & FPV_DRAIN)) {
1012	pipeio_unlock(cpipe: wpipe);
1013	error = EPIPE;
1014	break;
1015	}
1016	/*
1017	* If a process blocked in pipeio_lock, our
1018	* value for space might be bad... the mutex
1019	* is dropped while we're blocked
1020	*/
1021	if (space > (int)(wpipe->pipe_buffer.size -
1022	wpipe->pipe_buffer.cnt)) {
1023	pipeio_unlock(cpipe: wpipe);
1024	goto retrywrite;
1025	}
1026
1027	/*
1028	* Transfer size is minimum of uio transfer
1029	* and free space in pipe buffer.
1030	*/
1031	// LP64todo - fix this!
1032	if (space > uio_resid(a_uio: uio)) {
1033	size = (size_t)uio_resid(a_uio: uio);
1034	if (size > LONG_MAX) {
1035	panic("size greater than LONG_MAX");
1036	}
1037	} else {
1038	size = space;
1039	}
1040	/*
1041	* First segment to transfer is minimum of
1042	* transfer size and contiguous space in
1043	* pipe buffer. If first segment to transfer
1044	* is less than the transfer size, we've got
1045	* a wraparound in the buffer.
1046	*/
1047	segsize = wpipe->pipe_buffer.size -
1048	wpipe->pipe_buffer.in;
1049	if (segsize > size) {
1050	segsize = size;
1051	}
1052
1053	/ Transfer first segment /
1054
1055	PIPE_UNLOCK(rpipe);
1056	pipe_check_bounds_panic(cpipe: wpipe);
1057	error = uiomove(cp: &wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
1058	n: (int)segsize, uio);
1059	PIPE_LOCK(rpipe);
1060
1061	if (error == `0` && segsize < size) {
1062	/*
1063	* Transfer remaining part now, to
1064	* support atomic writes. Wraparound
1065	* happened. (State 3)
1066	*/
1067	if (wpipe->pipe_buffer.in + segsize !=
1068	wpipe->pipe_buffer.size) {
1069	panic("Expected pipe buffer "
1070	"wraparound disappeared");
1071	}
1072
1073	PIPE_UNLOCK(rpipe);
1074	pipe_check_bounds_panic(cpipe: wpipe);
1075	error = uiomove(
1076	cp: &wpipe->pipe_buffer.buffer[`0`],
1077	n: (int)(size - segsize), uio);
1078	PIPE_LOCK(rpipe);
1079	}
1080	/*
1081	* readers never know to read until count is updated.
1082	*/
1083	if (error == `0`) {
1084	wpipe->pipe_buffer.in += size;
1085	if (wpipe->pipe_buffer.in >
1086	wpipe->pipe_buffer.size) {
1087	if (wpipe->pipe_buffer.in !=
1088	size - segsize +
1089	wpipe->pipe_buffer.size) {
1090	panic("Expected "
1091	"wraparound bad");
1092	}
1093	wpipe->pipe_buffer.in = (unsigned int)(size -
1094	segsize);
1095	}
1096
1097	wpipe->pipe_buffer.cnt += size;
1098	if (wpipe->pipe_buffer.cnt >
1099	wpipe->pipe_buffer.size) {
1100	panic("Pipe buffer overflow");
1101	}
1102	}
1103	pipeio_unlock(cpipe: wpipe);
1104	}
1105	if (error) {
1106	break;
1107	}
1108	} else {
1109	/*
1110	* If the "read-side" has been blocked, wake it up now.
1111	*/
1112	if (wpipe->pipe_state & PIPE_WANTR) {
1113	wpipe->pipe_state &= ~PIPE_WANTR;
1114	wakeup(chan: wpipe);
1115	}
1116
1117	/*
1118	* If read side wants to go away, we just issue a signal
1119	* to ourselves.
1120	*/
1121	if ((wpipe->pipe_state & (PIPE_DRAIN \| PIPE_EOF)) \|\|
1122	(fileproc_get_vflags(fp) & FPV_DRAIN)) {
1123	error = EPIPE;
1124	break;
1125	}
1126
1127	/*
1128	* don't block on non-blocking I/O
1129	* we'll do the pipeselwakeup on the way out
1130	*/
1131	if (fp->f_flag & FNONBLOCK) {
1132	error = EAGAIN;
1133	break;
1134	}
1135
1136	/*
1137	* We have no more space and have something to offer,
1138	* wake up select/poll.
1139	*/
1140	pipeselwakeup(cpipe: wpipe, spipe: wpipe);
1141
1142	wpipe->pipe_state \|= PIPE_WANTW;
1143
1144	error = msleep(chan: wpipe, PIPE_MTX(wpipe), PRIBIO \| PCATCH, wmesg: "pipewr", ts: `0`);
1145
1146	if (error != `0`) {
1147	break;
1148	}
1149	}
1150	}
1151	--wpipe->pipe_busy;
1152
1153	if ((wpipe->pipe_busy == `0`) && (wpipe->pipe_state & PIPE_WANT)) {
1154	wpipe->pipe_state &= ~(PIPE_WANT \| PIPE_WANTR);
1155	wakeup(chan: wpipe);
1156	}
1157	if (wpipe->pipe_buffer.cnt > `0`) {
1158	/*
1159	* If there are any characters in the buffer, we wake up
1160	* the reader if it was blocked waiting for data.
1161	*/
1162	if (wpipe->pipe_state & PIPE_WANTR) {
1163	wpipe->pipe_state &= ~PIPE_WANTR;
1164	wakeup(chan: wpipe);
1165	}
1166	/*
1167	* wake up thread blocked in select/poll or post the notification
1168	*/
1169	pipeselwakeup(cpipe: wpipe, spipe: wpipe);
1170	}
1171
1172	#if defined(XNU_TARGET_OS_OSX)
1173	/ Update modification, status change (# of bytes in pipe) times /
1174	pipe_touch(tpipe: rpipe, PIPE_MTIME \| PIPE_CTIME);
1175	pipe_touch(tpipe: wpipe, PIPE_MTIME \| PIPE_CTIME);
1176	#endif
1177	PIPE_UNLOCK(rpipe);
1178
1179	return error;
1180	}
1181
1182	/*
1183	* we implement a very minimal set of ioctls for compatibility with sockets.
1184	*/
1185	/ ARGSUSED 3 /
1186	static int
1187	pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data,
1188	__unused vfs_context_t ctx)
1189	{
1190	struct pipe mpipe = (struct* pipe *)fp_get_data(fp);
1191	#if CONFIG_MACF
1192	int error;
1193	#endif
1194
1195	PIPE_LOCK(mpipe);
1196
1197	#if CONFIG_MACF
1198	error = mac_pipe_check_ioctl(cred: kauth_cred_get(), cpipe: mpipe, cmd);
1199	if (error) {
1200	PIPE_UNLOCK(mpipe);
1201
1202	return error;
1203	}
1204	#endif
1205
1206	switch (cmd) {
1207	case FIONBIO:
1208	PIPE_UNLOCK(mpipe);
1209	return `0`;
1210
1211	case FIOASYNC:
1212	if ((int* *)data) {
1213	mpipe->pipe_state \|= PIPE_ASYNC;
1214	} else {
1215	mpipe->pipe_state &= ~PIPE_ASYNC;
1216	}
1217	PIPE_UNLOCK(mpipe);
1218	return `0`;
1219
1220	case FIONREAD:
1221	(int* *)data = mpipe->pipe_buffer.cnt;
1222	PIPE_UNLOCK(mpipe);
1223	return `0`;
1224
1225	case TIOCSPGRP:
1226	mpipe->pipe_pgid = (int* *)data;
1227
1228	PIPE_UNLOCK(mpipe);
1229	return `0`;
1230
1231	case TIOCGPGRP:
1232	(int* *)data = mpipe->pipe_pgid;
1233
1234	PIPE_UNLOCK(mpipe);
1235	return `0`;
1236	}
1237	PIPE_UNLOCK(mpipe);
1238	return ENOTTY;
1239	}
1240
1241
1242	static int
1243	pipe_select(struct fileproc fp, int* which, void *wql, vfs_context_t ctx)
1244	{
1245	struct pipe rpipe = (struct* pipe *)fp_get_data(fp);
1246	struct pipe *wpipe;
1247	int retnum = `0`;
1248
1249	if (rpipe == NULL \|\| rpipe == (struct pipe *)-`1`) {
1250	return retnum;
1251	}
1252
1253	PIPE_LOCK(rpipe);
1254
1255	wpipe = rpipe->pipe_peer;
1256
1257
1258	#if CONFIG_MACF
1259	/*
1260	* XXX We should use a per thread credential here; minimally, the
1261	* XXX process credential should have a persistent reference on it
1262	* XXX before being passed in here.
1263	*/
1264	if (mac_pipe_check_select(cred: vfs_context_ucred(ctx), cpipe: rpipe, which)) {
1265	PIPE_UNLOCK(rpipe);
1266	return `0`;
1267	}
1268	#endif
1269	switch (which) {
1270	case FREAD:
1271	if ((rpipe->pipe_state & PIPE_DIRECTW) \|\|
1272	(rpipe->pipe_buffer.cnt > `0`) \|\|
1273	(rpipe->pipe_state & (PIPE_DRAIN \| PIPE_EOF)) \|\|
1274	(fileproc_get_vflags(fp) & FPV_DRAIN)) {
1275	retnum = `1`;
1276	} else {
1277	selrecord(selector: vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
1278	}
1279	break;
1280
1281	case FWRITE:
1282	if (wpipe) {
1283	wpipe->pipe_state \|= PIPE_WSELECT;
1284	}
1285	if (wpipe == NULL \|\| (wpipe->pipe_state & (PIPE_DRAIN \| PIPE_EOF)) \|\|
1286	(fileproc_get_vflags(fp) & FPV_DRAIN) \|\|
1287	(((wpipe->pipe_state & PIPE_DIRECTW) == `0`) &&
1288	(MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) {
1289	retnum = `1`;
1290	} else {
1291	selrecord(selector: vfs_context_proc(ctx), &wpipe->pipe_sel, wql);
1292	}
1293	break;
1294	case `0`:
1295	selrecord(selector: vfs_context_proc(ctx), &rpipe->pipe_sel, wql);
1296	break;
1297	}
1298	PIPE_UNLOCK(rpipe);
1299
1300	return retnum;
1301	}
1302
1303
1304	/ ARGSUSED 1 /
1305	static int
1306	pipe_close(struct fileglob *fg, __unused vfs_context_t ctx)
1307	{
1308	struct pipe *cpipe;
1309
1310	proc_fdlock_spin(vfs_context_proc(ctx));
1311	cpipe = (struct pipe *)fg_get_data(fg);
1312	fg_set_data(fg, NULL);
1313	proc_fdunlock(vfs_context_proc(ctx));
1314	if (cpipe) {
1315	pipeclose(cpipe);
1316	}
1317
1318	return `0`;
1319	}
1320
1321	static void
1322	pipe_free_kmem(struct pipe *cpipe)
1323	{
1324	if (cpipe->pipe_buffer.buffer != NULL) {
1325	OSAddAtomic(-(cpipe->pipe_buffer.size), &amountpipekva);
1326	OSAddAtomic(-`1`, &amountpipes);
1327	kfree_data(cpipe->pipe_buffer.buffer, cpipe->pipe_buffer.size);
1328	cpipe->pipe_buffer.buffer = NULL;
1329	cpipe->pipe_buffer.size = `0`;
1330	}
1331	}
1332
1333	/*
1334	* shutdown the pipe
1335	*/
1336	static void
1337	pipeclose(struct pipe *cpipe)
1338	{
1339	struct pipe *ppipe;
1340
1341	PIPE_LOCK(cpipe);
1342
1343	/*
1344	* If the other side is blocked, wake it up saying that
1345	* we want to close it down.
1346	*/
1347	cpipe->pipe_state &= ~PIPE_DRAIN;
1348	cpipe->pipe_state \|= PIPE_EOF;
1349	pipeselwakeup(cpipe, spipe: cpipe);
1350
1351	while (cpipe->pipe_busy) {
1352	cpipe->pipe_state \|= PIPE_WANT;
1353
1354	wakeup(chan: cpipe);
1355	msleep(chan: cpipe, PIPE_MTX(cpipe), PRIBIO, wmesg: "pipecl", ts: `0`);
1356	}
1357
1358	#if CONFIG_MACF
1359	/*
1360	* Free the shared pipe label only after the two ends are disconnected.
1361	*/
1362	if (mac_pipe_label(cpipe) != NULL && cpipe->pipe_peer == NULL) {
1363	mac_pipe_label_destroy(cpipe);
1364	}
1365	#endif
1366
1367	/*
1368	* Disconnect from peer
1369	*/
1370	if ((ppipe = cpipe->pipe_peer) != NULL) {
1371	ppipe->pipe_state &= ~(PIPE_DRAIN);
1372	ppipe->pipe_state \|= PIPE_EOF;
1373
1374	pipeselwakeup(cpipe: ppipe, spipe: ppipe);
1375	wakeup(chan: ppipe);
1376
1377	KNOTE(&ppipe->pipe_sel.si_note, `1`);
1378
1379	ppipe->pipe_peer = NULL;
1380	}
1381
1382	/*
1383	* free resources
1384	*/
1385
1386	PIPE_UNLOCK(cpipe);
1387
1388	pipepair_destroy_pipe(PIPE_PAIR(cpipe), cpipe);
1389	}
1390
1391	static int64_t
1392	filt_pipelowwat(struct knote kn, struct* pipe *rpipe, int64_t def_lowwat)
1393	{
1394	if ((kn->kn_sfflags & NOTE_LOWAT) == `0`) {
1395	return def_lowwat;
1396	}
1397	if (rpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(rpipe)) {
1398	return MAX_PIPESIZE(rpipe);
1399	}
1400	return MAX(kn->kn_sdata, def_lowwat);
1401	}
1402
1403	static int
1404	filt_pipe_draincommon(struct knote kn, struct* pipe *rpipe)
1405	{
1406	struct pipe *wpipe = rpipe->pipe_peer;
1407
1408	if ((rpipe->pipe_state & (PIPE_DRAIN \| PIPE_EOF)) \|\|
1409	(wpipe == NULL) \|\| (wpipe->pipe_state & (PIPE_DRAIN \| PIPE_EOF))) {
1410	kn->kn_flags \|= EV_EOF;
1411	return `1`;
1412	}
1413
1414	return `0`;
1415	}
1416
1417	static int
1418	filt_pipenotsup(struct knote kn, long* hint)
1419	{
1420	#pragma unused(hint)
1421	struct pipe *rpipe = knote_kn_hook_get_raw(kn);
1422
1423	return filt_pipe_draincommon(kn, rpipe);
1424	}
1425
1426	static int
1427	filt_pipenotsuptouch(struct knote kn, struct* kevent_qos_s *kev)
1428	{
1429	struct pipe *rpipe = knote_kn_hook_get_raw(kn);
1430	int res;
1431
1432	PIPE_LOCK(rpipe);
1433
1434	/ accept new kevent data (and save off lowat threshold and flag) /
1435	kn->kn_sfflags = kev->fflags;
1436	kn->kn_sdata = kev->data;
1437
1438	/ determine if any event is now deemed fired /
1439	res = filt_pipe_draincommon(kn, rpipe);
1440
1441	PIPE_UNLOCK(rpipe);
1442
1443	return res;
1444	}
1445
1446	static int
1447	filt_pipenotsupprocess(struct knote kn, struct* kevent_qos_s *kev)
1448	{
1449	struct pipe *rpipe = knote_kn_hook_get_raw(kn);
1450	int res;
1451
1452	PIPE_LOCK(rpipe);
1453	res = filt_pipe_draincommon(kn, rpipe);
1454	if (res) {
1455	knote_fill_kevent(kn, kev, data: `0`);
1456	}
1457	PIPE_UNLOCK(rpipe);
1458
1459	return res;
1460	}
1461
1462	/ARGSUSED/
1463	static int
1464	filt_piperead_common(struct knote kn, struct* kevent_qos_s kev, struct* pipe *rpipe)
1465	{
1466	int64_t data = rpipe->pipe_buffer.cnt;
1467	int res = `0`;
1468
1469	if (filt_pipe_draincommon(kn, rpipe)) {
1470	res = `1`;
1471	} else {
1472	res = data >= filt_pipelowwat(kn, rpipe, def_lowwat: `1`);
1473	}
1474	if (res && kev) {
1475	knote_fill_kevent(kn, kev, data);
1476	}
1477	return res;
1478	}
1479
1480	static int
1481	filt_piperead(struct knote kn, long* hint)
1482	{
1483	#pragma unused(hint)
1484	struct pipe *rpipe = knote_kn_hook_get_raw(kn);
1485
1486	return filt_piperead_common(kn, NULL, rpipe);
1487	}
1488
1489	static int
1490	filt_pipereadtouch(struct knote kn, struct* kevent_qos_s *kev)
1491	{
1492	struct pipe *rpipe = knote_kn_hook_get_raw(kn);
1493	int retval;
1494
1495	PIPE_LOCK(rpipe);
1496
1497	/ accept new inputs (and save the low water threshold and flag) /
1498	kn->kn_sdata = kev->data;
1499	kn->kn_sfflags = kev->fflags;
1500
1501	/ identify if any events are now fired /
1502	retval = filt_piperead_common(kn, NULL, rpipe);
1503
1504	PIPE_UNLOCK(rpipe);
1505
1506	return retval;
1507	}
1508
1509	static int
1510	filt_pipereadprocess(struct knote kn, struct* kevent_qos_s *kev)
1511	{
1512	struct pipe *rpipe = knote_kn_hook_get_raw(kn);
1513	int retval;
1514
1515	PIPE_LOCK(rpipe);
1516	retval = filt_piperead_common(kn, kev, rpipe);
1517	PIPE_UNLOCK(rpipe);
1518
1519	return retval;
1520	}
1521
1522	/ARGSUSED/
1523	static int
1524	filt_pipewrite_common(struct knote kn, struct* kevent_qos_s kev, struct* pipe *rpipe)
1525	{
1526	int64_t data = `0`;
1527	int res = `0`;
1528
1529	if (filt_pipe_draincommon(kn, rpipe)) {
1530	res = `1`;
1531	} else {
1532	data = MAX_PIPESIZE(rpipe) - rpipe->pipe_buffer.cnt;
1533	res = data >= filt_pipelowwat(kn, rpipe, PIPE_BUF);
1534	}
1535	if (res && kev) {
1536	knote_fill_kevent(kn, kev, data);
1537	}
1538	return res;
1539	}
1540
1541	/ARGSUSED/
1542	static int
1543	filt_pipewrite(struct knote kn, long* hint)
1544	{
1545	#pragma unused(hint)
1546	struct pipe *rpipe = knote_kn_hook_get_raw(kn);
1547
1548	return filt_pipewrite_common(kn, NULL, rpipe);
1549	}
1550
1551
1552	static int
1553	filt_pipewritetouch(struct knote kn, struct* kevent_qos_s *kev)
1554	{
1555	struct pipe *rpipe = knote_kn_hook_get_raw(kn);
1556	int res;
1557
1558	PIPE_LOCK(rpipe);
1559
1560	/ accept new kevent data (and save off lowat threshold and flag) /
1561	kn->kn_sfflags = kev->fflags;
1562	kn->kn_sdata = kev->data;
1563
1564	/ determine if any event is now deemed fired /
1565	res = filt_pipewrite_common(kn, NULL, rpipe);
1566
1567	PIPE_UNLOCK(rpipe);
1568
1569	return res;
1570	}
1571
1572	static int
1573	filt_pipewriteprocess(struct knote kn, struct* kevent_qos_s *kev)
1574	{
1575	struct pipe *rpipe = knote_kn_hook_get_raw(kn);
1576	int res;
1577
1578	PIPE_LOCK(rpipe);
1579	res = filt_pipewrite_common(kn, kev, rpipe);
1580	PIPE_UNLOCK(rpipe);
1581
1582	return res;
1583	}
1584
1585	/ARGSUSED/
1586	static int
1587	pipe_kqfilter(struct fileproc fp, struct* knote *kn,
1588	__unused struct kevent_qos_s *kev)
1589	{
1590	struct pipe cpipe = (struct* pipe *)fp_get_data(fp);
1591	struct pipe *rpipe = &PIPE_PAIR(cpipe)->pp_rpipe;
1592	int res;
1593
1594	PIPE_LOCK(cpipe);
1595	#if CONFIG_MACF
1596	/*
1597	* XXX We should use a per thread credential here; minimally, the
1598	* XXX process credential should have a persistent reference on it
1599	* XXX before being passed in here.
1600	*/
1601	kauth_cred_t cred = vfs_context_ucred(ctx: vfs_context_current());
1602	if (mac_pipe_check_kqfilter(cred, kn, cpipe) != `0`) {
1603	PIPE_UNLOCK(cpipe);
1604	knote_set_error(kn, EPERM);
1605	return `0`;
1606	}
1607	#endif
1608
1609	/*
1610	* FreeBSD will fail the attach with EPIPE if the peer pipe is detached,
1611	* however, this isn't a programming error as the other side closing
1612	* could race with the kevent registration.
1613	*
1614	* Attach should only fail for programming mistakes else it will break
1615	* libdispatch.
1616	*
1617	* Like FreeBSD, have a "Neutered" filter that will not fire until
1618	* the pipe dies if the wrong filter is attached to the wrong end.
1619	*
1620	* Knotes are always attached to the "rpipe".
1621	*/
1622	switch (kn->kn_filter) {
1623	case EVFILT_READ:
1624	if (fp->f_flag & FREAD) {
1625	kn->kn_filtid = EVFILTID_PIPE_R;
1626	res = filt_piperead_common(kn, NULL, rpipe);
1627	} else {
1628	kn->kn_filtid = EVFILTID_PIPE_N;
1629	res = filt_pipe_draincommon(kn, rpipe);
1630	}
1631	break;
1632
1633	case EVFILT_WRITE:
1634	if (fp->f_flag & FWRITE) {
1635	kn->kn_filtid = EVFILTID_PIPE_W;
1636	res = filt_pipewrite_common(kn, NULL, rpipe);
1637	} else {
1638	kn->kn_filtid = EVFILTID_PIPE_N;
1639	res = filt_pipe_draincommon(kn, rpipe);
1640	}
1641	break;
1642
1643	default:
1644	PIPE_UNLOCK(cpipe);
1645	knote_set_error(kn, EINVAL);
1646	return `0`;
1647	}
1648
1649	knote_kn_hook_set_raw(kn, kn_hook: rpipe);
1650	KNOTE_ATTACH(&rpipe->pipe_sel.si_note, kn);
1651
1652	PIPE_UNLOCK(cpipe);
1653	return res;
1654	}
1655
1656	static void
1657	filt_pipedetach(struct knote *kn)
1658	{
1659	struct pipe cpipe = (struct* pipe *)fp_get_data(fp: kn->kn_fp);
1660	struct pipe *rpipe = &PIPE_PAIR(cpipe)->pp_rpipe;
1661
1662	PIPE_LOCK(cpipe);
1663	KNOTE_DETACH(&rpipe->pipe_sel.si_note, kn);
1664	PIPE_UNLOCK(cpipe);
1665	}
1666
1667	int
1668	fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo)
1669	{
1670	#if CONFIG_MACF
1671	int error;
1672	#endif
1673	struct timespec now;
1674	struct vinfo_stat * ub;
1675	int pipe_size = `0`;
1676	int pipe_count;
1677
1678	if (cpipe == NULL) {
1679	return EBADF;
1680	}
1681	PIPE_LOCK(cpipe);
1682
1683	#if CONFIG_MACF
1684	error = mac_pipe_check_stat(cred: kauth_cred_get(), cpipe);
1685	if (error) {
1686	PIPE_UNLOCK(cpipe);
1687	return error;
1688	}
1689	#endif
1690	if (cpipe->pipe_buffer.buffer == `0`) {
1691	/*
1692	* must be stat'ing the write fd
1693	*/
1694	if (cpipe->pipe_peer) {
1695	/*
1696	* the peer still exists, use it's info
1697	*/
1698	pipe_size = MAX_PIPESIZE(cpipe->pipe_peer);
1699	pipe_count = cpipe->pipe_peer->pipe_buffer.cnt;
1700	} else {
1701	pipe_count = `0`;
1702	}
1703	} else {
1704	pipe_size = MAX_PIPESIZE(cpipe);
1705	pipe_count = cpipe->pipe_buffer.cnt;
1706	}
1707	/*
1708	* since peer's buffer is setup ouside of lock
1709	* we might catch it in transient state
1710	*/
1711	if (pipe_size == `0`) {
1712	pipe_size = PIPE_SIZE;
1713	}
1714
1715	ub = &pinfo->pipe_stat;
1716
1717	bzero(s: ub, n: sizeof(*ub));
1718	ub->vst_mode = S_IFIFO \| S_IRUSR \| S_IWUSR \| S_IRGRP \| S_IWGRP;
1719	ub->vst_blksize = pipe_size;
1720	ub->vst_size = pipe_count;
1721	if (ub->vst_blksize != `0`) {
1722	ub->vst_blocks = (ub->vst_size + ub->vst_blksize - `1`) / ub->vst_blksize;
1723	}
1724	ub->vst_nlink = `1`;
1725
1726	ub->vst_uid = kauth_getuid();
1727	ub->vst_gid = kauth_getgid();
1728
1729	nanotime(ts: &now);
1730	ub->vst_atime = now.tv_sec;
1731	ub->vst_atimensec = now.tv_nsec;
1732
1733	ub->vst_mtime = now.tv_sec;
1734	ub->vst_mtimensec = now.tv_nsec;
1735
1736	ub->vst_ctime = now.tv_sec;
1737	ub->vst_ctimensec = now.tv_nsec;
1738
1739	/*
1740	* Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid.
1741	* XXX (st_dev, st_ino) should be unique.
1742	*/
1743
1744	pinfo->pipe_handle = (uint64_t)VM_KERNEL_ADDRHASH((uintptr_t)cpipe);
1745	pinfo->pipe_peerhandle = (uint64_t)VM_KERNEL_ADDRHASH((uintptr_t)(cpipe->pipe_peer));
1746	pinfo->pipe_status = cpipe->pipe_state;
1747
1748	PIPE_UNLOCK(cpipe);
1749
1750	return `0`;
1751	}
1752
1753
1754	static int
1755	pipe_drain(struct fileproc *fp, __unused vfs_context_t ctx)
1756	{
1757	/ Note: fdlock already held /
1758	struct pipe ppipe, cpipe = fp_get_data(fp);
1759	boolean_t drain_pipe = FALSE;
1760
1761	/ Check if the pipe is going away /
1762	lck_mtx_lock_spin(lck: &fp->fp_glob->fg_lock);
1763	if (os_ref_get_count_raw(rc: &fp->fp_glob->fg_count) == `1`) {
1764	drain_pipe = TRUE;
1765	}
1766	lck_mtx_unlock(lck: &fp->fp_glob->fg_lock);
1767
1768	if (cpipe) {
1769	PIPE_LOCK(cpipe);
1770
1771	if (drain_pipe) {
1772	cpipe->pipe_state \|= PIPE_DRAIN;
1773	cpipe->pipe_state &= ~(PIPE_WANTR \| PIPE_WANTW);
1774	}
1775	wakeup(chan: cpipe);
1776
1777	/ Must wake up peer: a writer sleeps on the read side /
1778	if ((ppipe = cpipe->pipe_peer)) {
1779	if (drain_pipe) {
1780	ppipe->pipe_state \|= PIPE_DRAIN;
1781	ppipe->pipe_state &= ~(PIPE_WANTR \| PIPE_WANTW);
1782	}
1783	wakeup(chan: ppipe);
1784	}
1785
1786	PIPE_UNLOCK(cpipe);
1787	return `0`;
1788	}
1789
1790	return `1`;
1791	}
1792

Browse the source code of xnu/bsd/kern/sys_pipe.c