1/*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
66 */
67
68#include <nfs/nfs_conf.h>
69
70/*
71 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
72 * support for mandatory and extensible security protections. This notice
73 * is included in support of clause 2.2 (b) of the Apple Public License,
74 * Version 2.0.
75 */
76
77#include <sys/file_internal.h>
78#include <sys/vnode_internal.h>
79#include <sys/uio_internal.h>
80#include <sys/sysctl.h>
81#include <sys/socketvar.h>
82#include <sys/sysproto.h>
83#include <sys/fsevents.h>
84#include <kern/task.h>
85
86#include <security/audit/audit.h>
87
88#include <netinet/in.h>
89#include <netinet/tcp.h>
90#include <nfs/xdr_subs.h>
91#include <nfs/rpcv2.h>
92#include <nfs/nfsproto.h>
93#include <nfs/nfs.h>
94#include <nfs/nfsm_subs.h>
95#include <nfs/nfsrvcache.h>
96#include <nfs/nfs_gss.h>
97#if CONFIG_MACF
98#include <security/mac_framework.h>
99#endif
100
101#if CONFIG_NFS_SERVER
102
103extern const nfsrv_proc_t nfsrv_procs[NFS_NPROCS];
104
105extern int nfsrv_wg_delay;
106extern int nfsrv_wg_delay_v3;
107
108static int nfsrv_require_resv_port = 0;
109static time_t nfsrv_idlesock_timer_on = 0;
110static int nfsrv_sock_tcp_cnt = 0;
111#define NFSD_MIN_IDLE_TIMEOUT 30
112static int nfsrv_sock_idle_timeout = 3600; /* One hour */
113
114int nfssvc_export(user_addr_t argp);
115int nfssvc_exportstats(proc_t p, user_addr_t argp);
116int nfssvc_userstats(proc_t p, user_addr_t argp);
117int nfssvc_usercount(proc_t p, user_addr_t argp);
118int nfssvc_zerostats(void);
119int nfssvc_srvstats(proc_t p, user_addr_t argp);
120int nfssvc_nfsd(void);
121int nfssvc_addsock(socket_t, mbuf_t);
122void nfsrv_zapsock(struct nfsrv_sock *);
123void nfsrv_slpderef(struct nfsrv_sock *);
124void nfsrv_slpfree(struct nfsrv_sock *);
125
126#endif /* CONFIG_NFS_SERVER */
127
128/*
129 * sysctl stuff
130 */
131SYSCTL_DECL(_vfs_generic);
132SYSCTL_EXTENSIBLE_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs hinge");
133
134#if CONFIG_NFS_SERVER
135SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs server hinge");
136SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
137SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
138SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
139SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
140SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
141SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
142SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
143SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
144SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
145SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_debug_ctl, 0, "");
146SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, unprocessed_rpc_current, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_unprocessed_rpc_current, 0, "");
147SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, unprocessed_rpc_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_unprocessed_rpc_max, 0, "");
148#if CONFIG_FSE
149SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
150#endif
151SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
152SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
153SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "");
154SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "");
155#ifdef NFS_UC_Q_DEBUG
156SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
157SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
158SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
159SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
160#endif
161#endif /* CONFIG_NFS_SERVER */
162
163/* NFS hooks */
164
165/* NFS hooks variables */
166struct nfs_hooks_in nfsh = {
167 .f_vinvalbuf = NULL,
168 .f_buf_page_inval = NULL
169};
170
171/* NFS hooks registration functions */
172void
173nfs_register_hooks(struct nfs_hooks_in *inh, struct nfs_hooks_out *outh)
174{
175 if (inh) {
176 nfsh.f_vinvalbuf = inh->f_vinvalbuf;
177 nfsh.f_buf_page_inval = inh->f_buf_page_inval;
178 }
179
180 if (outh) {
181 outh->f_get_bsdthreadtask_info = get_bsdthreadtask_info;
182 }
183}
184
185void
186nfs_unregister_hooks(void)
187{
188 memset(s: &nfsh, c: 0, n: sizeof(nfsh));
189}
190
191/* NFS hooks wrappers */
192int
193nfs_vinvalbuf(vnode_t vp, int flags, vfs_context_t ctx, int intrflg)
194{
195 if (nfsh.f_vinvalbuf == NULL) {
196 return 0;
197 }
198
199 return nfsh.f_vinvalbuf(vp, flags, ctx, intrflg);
200}
201
202int
203nfs_buf_page_inval(vnode_t vp, off_t offset)
204{
205 if (nfsh.f_buf_page_inval == NULL) {
206 return 0;
207 }
208
209 return nfsh.f_buf_page_inval(vp, offset);
210}
211
212#if !CONFIG_NFS_SERVER
213#define __no_nfs_server_unused __unused
214#else
215#define __no_nfs_server_unused /* nothing */
216#endif
217
218/*
219 * NFS server system calls
220 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
221 */
222
223#if CONFIG_NFS_SERVER
224static struct nfs_exportfs *
225nfsrv_find_exportfs(const char *ptr)
226{
227 struct nfs_exportfs *nxfs;
228
229 LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
230 if (!strncmp(s1: nxfs->nxfs_path, s2: ptr, MAXPATHLEN)) {
231 break;
232 }
233 }
234 if (nxfs && strncmp(s1: nxfs->nxfs_path, s2: ptr, n: strlen(s: nxfs->nxfs_path))) {
235 nxfs = NULL;
236 }
237
238 return nxfs;
239}
240
241static char *
242nfsrv_export_remainder(char *path, char *nxfs_path)
243{
244 int error;
245 vnode_t vp, rvp;
246 struct nameidata nd;
247 size_t pathbuflen = MAXPATHLEN;
248 char real_mntonname[MAXPATHLEN];
249
250 if (!strncmp(s1: path, s2: nxfs_path, n: strlen(s: nxfs_path))) {
251 return path + strlen(s: nxfs_path);
252 }
253
254 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
255 UIO_SYSSPACE, CAST_USER_ADDR_T(nxfs_path), vfs_context_current());
256 error = namei(ndp: &nd);
257 if (error) {
258 return NULL;
259 }
260
261 nameidone(&nd);
262 vp = nd.ni_vp;
263
264 error = VFS_ROOT(vnode_mount(vp), &rvp, vfs_context_current());
265 vnode_put(vp);
266 if (error) {
267 return NULL;
268 }
269
270 error = vn_getpath_ext(vp: rvp, NULLVP, pathbuf: real_mntonname, len: &pathbuflen, VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK);
271 vnode_put(vp: rvp);
272
273 if (error || strncmp(s1: path, s2: real_mntonname, n: strlen(s: real_mntonname))) {
274 return NULL;
275 }
276
277 return path + strlen(s: real_mntonname);
278}
279/*
280 * Get file handle system call
281 */
282int
283getfh(
284 proc_t p __no_nfs_server_unused,
285 struct getfh_args *uap __no_nfs_server_unused,
286 __unused int *retval)
287{
288 vnode_t vp;
289 struct nfs_filehandle nfh;
290 int error, fhlen = 0, fidlen;
291 struct nameidata nd;
292 char path[MAXPATHLEN], real_mntonname[MAXPATHLEN], *ptr;
293 size_t pathlen;
294 struct nfs_exportfs *nxfs;
295 struct nfs_export *nx;
296
297 /*
298 * Must be super user
299 */
300 error = proc_suser(p);
301 if (error) {
302 return error;
303 }
304
305 error = copyinstr(uaddr: uap->fname, kaddr: path, MAXPATHLEN, done: &pathlen);
306 if (!error) {
307 error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
308 }
309 if (error) {
310 return error;
311 }
312 /* limit fh size to length specified (or v3 size by default) */
313 if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) {
314 fhlen = NFSV3_MAX_FH_SIZE;
315 }
316 fidlen = fhlen - sizeof(struct nfs_exphandle);
317
318 if (!nfsrv_is_initialized()) {
319 return EINVAL;
320 }
321
322 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
323 UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
324 error = namei(ndp: &nd);
325 if (error) {
326 return error;
327 }
328 nameidone(&nd);
329
330 vp = nd.ni_vp;
331
332 // find exportfs that matches f_mntonname
333 lck_rw_lock_shared(lck: &nfsrv_export_rwlock);
334 ptr = vfs_statfs(mp: vnode_mount(vp))->f_mntonname;
335 if ((nxfs = nfsrv_find_exportfs(ptr)) == NULL) {
336 /*
337 * The f_mntonname might be a firmlink path. Resolve
338 * it into a physical path and try again.
339 */
340 size_t pathbuflen = MAXPATHLEN;
341 vnode_t rvp;
342
343 error = VFS_ROOT(vnode_mount(vp), &rvp, vfs_context_current());
344 if (error) {
345 goto out;
346 }
347 error = vn_getpath_ext(vp: rvp, NULLVP, pathbuf: real_mntonname, len: &pathbuflen,
348 VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK);
349 vnode_put(vp: rvp);
350 if (error) {
351 goto out;
352 }
353 ptr = real_mntonname;
354 nxfs = nfsrv_find_exportfs(ptr);
355 }
356 if (nxfs == NULL) {
357 error = EINVAL;
358 goto out;
359 }
360 // find export that best matches remainder of path
361 if ((ptr = nfsrv_export_remainder(path, nxfs_path: nxfs->nxfs_path)) == NULL) {
362 error = EINVAL;
363 goto out;
364 }
365
366 while (*ptr && (*ptr == '/')) {
367 ptr++;
368 }
369 LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
370 size_t len = strlen(s: nx->nx_path);
371 if (len == 0) { // we've hit the export entry for the root directory
372 break;
373 }
374 if (!strncmp(s1: nx->nx_path, s2: ptr, n: len)) {
375 break;
376 }
377 }
378 if (!nx) {
379 error = EINVAL;
380 goto out;
381 }
382
383 bzero(s: &nfh, n: sizeof(nfh));
384 nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
385 nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
386 nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
387 nfh.nfh_xh.nxh_flags = 0;
388 nfh.nfh_xh.nxh_reserved = 0;
389 nfh.nfh_len = fidlen;
390 error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
391 if (nfh.nfh_len > (uint32_t)fidlen) {
392 error = EOVERFLOW;
393 }
394 nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
395 nfh.nfh_len += sizeof(nfh.nfh_xh);
396 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
397
398out:
399 lck_rw_done(lck: &nfsrv_export_rwlock);
400 vnode_put(vp);
401 if (error) {
402 return error;
403 }
404 /*
405 * At first blush, this may appear to leak a kernel stack
406 * address, but the copyout() never reaches &nfh.nfh_fhp
407 * (sizeof(fhandle_t) < sizeof(nfh)).
408 */
409 error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
410 return error;
411}
412
413extern const struct fileops vnops;
414
415/*
416 * syscall for the rpc.lockd to use to translate a NFS file handle into
417 * an open descriptor.
418 *
419 * warning: do not remove the suser() call or this becomes one giant
420 * security hole.
421 */
422int
423fhopen(proc_t p __no_nfs_server_unused,
424 struct fhopen_args *uap __no_nfs_server_unused,
425 int32_t *retval __no_nfs_server_unused)
426{
427 vnode_t vp;
428 struct nfs_filehandle nfh;
429 struct nfs_export *nx;
430 struct nfs_export_options *nxo;
431 struct flock lf;
432 struct fileproc *fp, *nfp;
433 int fmode, error, type;
434 int indx;
435 vfs_context_t ctx = vfs_context_current();
436 kauth_action_t action;
437
438 /*
439 * Must be super user
440 */
441 error = suser(cred: vfs_context_ucred(ctx), acflag: 0);
442 if (error) {
443 return error;
444 }
445
446 if (!nfsrv_is_initialized()) {
447 return EINVAL;
448 }
449
450 fmode = FFLAGS(uap->flags);
451 /* why not allow a non-read/write open for our lockd? */
452 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) {
453 return EINVAL;
454 }
455
456 error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len));
457 if (error) {
458 return error;
459 }
460 if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
461 (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) {
462 return EINVAL;
463 }
464 error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
465 if (error) {
466 return error;
467 }
468 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
469
470 lck_rw_lock_shared(lck: &nfsrv_export_rwlock);
471 /* now give me my vnode, it gets returned to me with a reference */
472 error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
473 lck_rw_done(lck: &nfsrv_export_rwlock);
474 if (error) {
475 if (error == NFSERR_TRYLATER) {
476 error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
477 }
478 return error;
479 }
480
481 /*
482 * From now on we have to make sure not
483 * to forget about the vnode.
484 * Any error that causes an abort must vnode_put(vp).
485 * Just set error = err and 'goto bad;'.
486 */
487
488 /*
489 * from vn_open
490 */
491 if (vnode_vtype(vp) == VSOCK) {
492 error = EOPNOTSUPP;
493 goto bad;
494 }
495
496 /* disallow write operations on directories */
497 if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
498 error = EISDIR;
499 goto bad;
500 }
501
502#if CONFIG_MACF
503 if ((error = mac_vnode_check_open(ctx, vp, acc_mode: fmode))) {
504 goto bad;
505 }
506#endif
507
508 /* compute action to be authorized */
509 action = 0;
510 if (fmode & FREAD) {
511 action |= KAUTH_VNODE_READ_DATA;
512 }
513 if (fmode & (FWRITE | O_TRUNC)) {
514 action |= KAUTH_VNODE_WRITE_DATA;
515 }
516 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) {
517 goto bad;
518 }
519
520 if ((error = VNOP_OPEN(vp, fmode, ctx))) {
521 goto bad;
522 }
523 if ((error = vnode_ref_ext(vp, fmode, 0))) {
524 goto bad;
525 }
526
527 /*
528 * end of vn_open code
529 */
530
531 // starting here... error paths should call vn_close/vnode_put
532 if ((error = falloc(p, &nfp, &indx)) != 0) {
533 vn_close(vp, flags: fmode & FMASK, ctx);
534 goto bad;
535 }
536 fp = nfp;
537
538 fp->fp_glob->fg_flag = fmode & FMASK;
539 fp->fp_glob->fg_ops = &vnops;
540 fp_set_data(fp, fg_data: vp);
541
542 // XXX do we really need to support this with fhopen()?
543 if (fmode & (O_EXLOCK | O_SHLOCK)) {
544 lf.l_whence = SEEK_SET;
545 lf.l_start = 0;
546 lf.l_len = 0;
547 if (fmode & O_EXLOCK) {
548 lf.l_type = F_WRLCK;
549 } else {
550 lf.l_type = F_RDLCK;
551 }
552 type = F_FLOCK;
553 if ((fmode & FNONBLOCK) == 0) {
554 type |= F_WAIT;
555 }
556 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf, type, ctx, NULL))) {
557 struct vfs_context context = *vfs_context_current();
558 /* Modify local copy (to not damage thread copy) */
559 context.vc_ucred = fp->fp_glob->fg_cred;
560
561 vn_close(vp, flags: fp->fp_glob->fg_flag, ctx: &context);
562 fp_free(p, fd: indx, fp);
563 goto bad;
564 }
565 fp->fp_glob->fg_flag |= FWASLOCKED;
566 }
567
568 vnode_put(vp);
569
570 proc_fdlock(p);
571 procfdtbl_releasefd(p, fd: indx, NULL);
572 fp_drop(p, fd: indx, fp, locked: 1);
573 proc_fdunlock(p);
574
575 *retval = indx;
576 return 0;
577
578bad:
579 vnode_put(vp);
580 return error;
581}
582
583/*
584 * NFS server pseudo system call
585 */
586int
587nfssvc(proc_t p __no_nfs_server_unused,
588 struct nfssvc_args *uap __no_nfs_server_unused,
589 __unused int *retval)
590{
591 mbuf_t nam;
592 struct user_nfsd_args user_nfsdarg;
593 socket_t so;
594 int error;
595
596 AUDIT_ARG(cmd, uap->flag);
597
598 /*
599 * Must be super user for NFSSVC_NFSD and NFSSVC_ADDSOCK operations.
600 */
601 if ((uap->flag & (NFSSVC_NFSD | NFSSVC_ADDSOCK)) && ((error = proc_suser(p)))) {
602 return error;
603 }
604#if CONFIG_MACF
605 error = mac_system_check_nfsd(cred: kauth_cred_get());
606 if (error) {
607 return error;
608 }
609#endif
610
611 /* make sure NFS server data structures have been initialized */
612 nfsrv_init();
613
614 if (uap->flag & NFSSVC_ADDSOCK) {
615 if (IS_64BIT_PROCESS(p)) {
616 error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
617 } else {
618 struct nfsd_args tmp_args;
619 error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args));
620 if (error == 0) {
621 user_nfsdarg.sock = tmp_args.sock;
622 user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name);
623 user_nfsdarg.namelen = tmp_args.namelen;
624 }
625 }
626 if (error) {
627 return error;
628 }
629 /* get the socket */
630 error = file_socket(user_nfsdarg.sock, &so);
631 if (error) {
632 return error;
633 }
634 /* Get the client address for connected sockets. */
635 if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) {
636 nam = NULL;
637 } else {
638 error = sockargs(mp: &nam, data: user_nfsdarg.name, buflen: user_nfsdarg.namelen, type: MBUF_TYPE_SONAME);
639 if (error) {
640 /* drop the iocount file_socket() grabbed on the file descriptor */
641 file_drop(user_nfsdarg.sock);
642 return error;
643 }
644 }
645 /*
646 * nfssvc_addsock() will grab a retain count on the socket
647 * to keep the socket from being closed when nfsd closes its
648 * file descriptor for it.
649 */
650 error = nfssvc_addsock(so, nam);
651 /* drop the iocount file_socket() grabbed on the file descriptor */
652 file_drop(user_nfsdarg.sock);
653 } else if (uap->flag & NFSSVC_NFSD) {
654 error = nfssvc_nfsd();
655 } else if (uap->flag & NFSSVC_EXPORT) {
656 error = nfssvc_export(argp: uap->argp);
657 } else if (uap->flag & NFSSVC_EXPORTSTATS) {
658 error = nfssvc_exportstats(p, argp: uap->argp);
659 } else if (uap->flag & NFSSVC_USERSTATS) {
660 error = nfssvc_userstats(p, argp: uap->argp);
661 } else if (uap->flag & NFSSVC_USERCOUNT) {
662 error = nfssvc_usercount(p, argp: uap->argp);
663 } else if (uap->flag & NFSSVC_ZEROSTATS) {
664 error = nfssvc_zerostats();
665 } else if (uap->flag & NFSSVC_SRVSTATS) {
666 error = nfssvc_srvstats(p, argp: uap->argp);
667 } else {
668 error = EINVAL;
669 }
670 if (error == EINTR || error == ERESTART) {
671 error = 0;
672 }
673 return error;
674}
675
676/*
677 * Adds a socket to the list for servicing by nfsds.
678 */
679int
680nfssvc_addsock(socket_t so, mbuf_t mynam)
681{
682 struct nfsrv_sock *slp;
683 int error = 0, sodomain, sotype, soprotocol, on = 1;
684 int first;
685 struct timeval timeo;
686 uint64_t sobufsize;
687
688 /* make sure mbuf constants are set up */
689 if (!nfs_mbuf_mhlen) {
690 nfs_mbuf_init();
691 }
692
693 sock_gettype(so, domain: &sodomain, type: &sotype, protocol: &soprotocol);
694
695 /* There should be only one UDP socket for each of IPv4 and IPv6 */
696 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
697 mbuf_freem(mbuf: mynam);
698 return EEXIST;
699 }
700 if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
701 mbuf_freem(mbuf: mynam);
702 return EEXIST;
703 }
704
705 /* Set protocol options and reserve some space (for UDP). */
706 if (sotype == SOCK_STREAM) {
707 error = nfsrv_check_exports_allow_address(mynam);
708 if (error) {
709 log(LOG_INFO, "nfsvc_addsock:: nfsrv_check_exports_allow_address(myname) returned %d\n", error);
710 mbuf_freem(mbuf: mynam);
711 return error;
712 }
713 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, optval: &on, optlen: sizeof(on));
714 }
715 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) {
716 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, optval: &on, optlen: sizeof(on));
717 }
718
719 /* Set socket buffer sizes for UDP/TCP */
720 sobufsize = (sotype == SOCK_DGRAM) ? NFS_UDPSOCKBUF : NFSRV_TCPSOCKBUF;
721 error = sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, optval: &sobufsize, optlen: sizeof(sobufsize));
722 if (error) {
723 log(LOG_INFO, "nfssvc_addsock: socket buffer setting SO_SNDBUF to %llu error(s) %d\n", sobufsize, error);
724 }
725
726 error = sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, optval: &sobufsize, optlen: sizeof(sobufsize));
727 if (error) {
728 log(LOG_INFO, "nfssvc_addsock: socket buffer setting SO_RCVBUF to %llu error(s) %d\n", sobufsize, error);
729 }
730 sock_nointerrupt(so, on: 0);
731
732 /*
733 * Set socket send/receive timeouts.
734 * Receive timeout shouldn't matter, but setting the send timeout
735 * will make sure that an unresponsive client can't hang the server.
736 */
737 timeo.tv_usec = 0;
738 timeo.tv_sec = 1;
739 error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, optval: &timeo, optlen: sizeof(timeo));
740 if (error) {
741 log(LOG_INFO, "nfssvc_addsock: socket timeout setting SO_RCVTIMEO error(s) %d\n", error);
742 }
743
744 timeo.tv_sec = 30;
745 error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, optval: &timeo, optlen: sizeof(timeo));
746 if (error) {
747 log(LOG_INFO, "nfssvc_addsock: socket timeout setting SO_SNDTIMEO error(s) %d\n", error);
748 }
749
750 slp = kalloc_type(struct nfsrv_sock, Z_WAITOK | Z_ZERO | Z_NOFAIL);
751 lck_rw_init(lck: &slp->ns_rwlock, grp: &nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
752 lck_mtx_init(lck: &slp->ns_wgmutex, grp: &nfsrv_slp_mutex_group, LCK_ATTR_NULL);
753
754 lck_mtx_lock(lck: &nfsd_mutex);
755
756 if (soprotocol == IPPROTO_UDP) {
757 if (sodomain == AF_INET) {
758 /* There should be only one UDP/IPv4 socket */
759 if (nfsrv_udpsock) {
760 lck_mtx_unlock(lck: &nfsd_mutex);
761 nfsrv_slpfree(slp);
762 mbuf_freem(mbuf: mynam);
763 return EEXIST;
764 }
765 nfsrv_udpsock = slp;
766 }
767 if (sodomain == AF_INET6) {
768 /* There should be only one UDP/IPv6 socket */
769 if (nfsrv_udp6sock) {
770 lck_mtx_unlock(lck: &nfsd_mutex);
771 nfsrv_slpfree(slp);
772 mbuf_freem(mbuf: mynam);
773 return EEXIST;
774 }
775 nfsrv_udp6sock = slp;
776 }
777 }
778
779 /* add the socket to the list */
780 first = TAILQ_EMPTY(&nfsrv_socklist);
781 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
782 if (sotype == SOCK_STREAM) {
783 nfsrv_sock_tcp_cnt++;
784 if (nfsrv_sock_idle_timeout < 0) {
785 nfsrv_sock_idle_timeout = 0;
786 }
787 if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)) {
788 nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT;
789 }
790 /*
791 * Possibly start or stop the idle timer. We only start the idle timer when
792 * we have more than 2 * nfsd_thread_max connections. If the idle timer is
793 * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
794 * the number of connections.
795 */
796 if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) {
797 if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
798 if (nfsrv_idlesock_timer_on) {
799 thread_call_cancel(call: nfsrv_idlesock_timer_call);
800 nfsrv_idlesock_timer_on = 0;
801 }
802 } else {
803 struct nfsrv_sock *old_slp;
804 struct timeval now;
805 microuptime(tv: &now);
806 time_t time_to_wait = nfsrv_sock_idle_timeout;
807 /*
808 * Get the oldest tcp socket and calculate the
809 * earliest time for the next idle timer to fire
810 * based on the possibly updated nfsrv_sock_idle_timeout
811 */
812 TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) {
813 if (old_slp->ns_sotype == SOCK_STREAM) {
814 time_to_wait -= now.tv_sec - old_slp->ns_timestamp;
815 if (time_to_wait < 1) {
816 time_to_wait = 1;
817 }
818 break;
819 }
820 }
821 /*
822 * If we have a timer scheduled, but if its going to fire too late,
823 * turn it off.
824 */
825 if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) {
826 thread_call_cancel(call: nfsrv_idlesock_timer_call);
827 nfsrv_idlesock_timer_on = 0;
828 }
829 /* Schedule the idle thread if it isn't already */
830 if (!nfsrv_idlesock_timer_on) {
831 nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
832 nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
833 }
834 }
835 }
836 }
837
838 sock_retain(so); /* grab a retain count on the socket */
839 slp->ns_so = so;
840 slp->ns_sotype = sotype;
841 slp->ns_nam = mynam;
842
843 /* set up the socket up-call */
844 nfsrv_uc_addsock(slp, first);
845
846 /* mark that the socket is not in the nfsrv_sockwg list */
847 slp->ns_wgq.tqe_next = SLPNOLIST;
848
849 slp->ns_flag = SLP_VALID | SLP_NEEDQ;
850
851 nfsrv_wakenfsd(slp);
852 lck_mtx_unlock(lck: &nfsd_mutex);
853
854 return 0;
855}
856
857/*
858 * nfssvc_nfsd()
859 *
860 * nfsd theory of operation:
861 *
862 * The first nfsd thread stays in user mode accepting new TCP connections
863 * which are then added via the "addsock" call. The rest of the nfsd threads
864 * simply call into the kernel and remain there in a loop handling NFS
865 * requests until killed by a signal.
866 *
867 * There's a list of nfsd threads (nfsd_head).
868 * There's an nfsd queue that contains only those nfsds that are
869 * waiting for work to do (nfsd_queue).
870 *
871 * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
872 * managing the work on the sockets:
873 * nfsrv_sockwait - sockets w/new data waiting to be worked on
874 * nfsrv_sockwork - sockets being worked on which may have more work to do
875 * nfsrv_sockwg -- sockets which have pending write gather data
876 * When a socket receives data, if it is not currently queued, it
877 * will be placed at the end of the "wait" queue.
878 * Whenever a socket needs servicing we make sure it is queued and
879 * wake up a waiting nfsd (if there is one).
880 *
881 * nfsds will service at most 8 requests from the same socket before
882 * defecting to work on another socket.
883 * nfsds will defect immediately if there are any sockets in the "wait" queue
884 * nfsds looking for a socket to work on check the "wait" queue first and
885 * then check the "work" queue.
886 * When an nfsd starts working on a socket, it removes it from the head of
887 * the queue it's currently on and moves it to the end of the "work" queue.
888 * When nfsds are checking the queues for work, any sockets found not to
889 * have any work are simply dropped from the queue.
890 *
891 */
892int
893nfssvc_nfsd(void)
894{
895 mbuf_t m, mrep = NULL;
896 struct nfsrv_sock *slp;
897 struct nfsd *nfsd;
898 struct nfsrv_descript *nd = NULL;
899 int error = 0, cacherep, writes_todo;
900 int siz, procrastinate, opcnt = 0;
901 time_t cur_usec;
902 struct timeval now;
903 struct vfs_context context;
904 struct timespec to;
905
906#ifndef nolint
907 cacherep = RC_DOIT;
908 writes_todo = 0;
909#endif
910
911 nfsd = kalloc_type(struct nfsd, Z_WAITOK | Z_ZERO | Z_NOFAIL);
912 lck_mtx_lock(lck: &nfsd_mutex);
913 if (nfsd_thread_count++ == 0) {
914 nfsrv_initcache(); /* Init the server request cache */
915 }
916 TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
917 lck_mtx_unlock(lck: &nfsd_mutex);
918
919 context.vc_thread = current_thread();
920
921 /* Set time out so that nfsd threads can wake up a see if they are still needed. */
922 to.tv_sec = 5;
923 to.tv_nsec = 0;
924
925 /*
926 * Loop getting rpc requests until SIGKILL.
927 */
928 for (;;) {
929 if (nfsd_thread_max <= 0) {
930 /* NFS server shutting down, get out ASAP */
931 error = EINTR;
932 slp = nfsd->nfsd_slp;
933 } else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
934 /* already have some work to do */
935 error = 0;
936 slp = nfsd->nfsd_slp;
937 } else {
938 /* need to find work to do */
939 error = 0;
940 lck_mtx_lock(lck: &nfsd_mutex);
941 while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
942 if (nfsd_thread_count > nfsd_thread_max) {
943 /*
944 * If we have no socket and there are more
945 * nfsd threads than configured, let's exit.
946 */
947 error = 0;
948 goto done;
949 }
950 nfsd->nfsd_flag |= NFSD_WAITING;
951 TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
952 error = msleep(chan: nfsd, mtx: &nfsd_mutex, PSOCK | PCATCH, wmesg: "nfsd", ts: &to);
953 if (error) {
954 if (nfsd->nfsd_flag & NFSD_WAITING) {
955 TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
956 nfsd->nfsd_flag &= ~NFSD_WAITING;
957 }
958 if (error == EWOULDBLOCK) {
959 continue;
960 }
961 goto done;
962 }
963 }
964 slp = nfsd->nfsd_slp;
965 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
966 /* look for a socket to work on in the wait queue */
967 while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
968 lck_rw_lock_exclusive(lck: &slp->ns_rwlock);
969 /* remove from the head of the queue */
970 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
971 slp->ns_flag &= ~SLP_WAITQ;
972 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
973 break;
974 }
975 /* nothing to do, so skip this socket */
976 lck_rw_done(lck: &slp->ns_rwlock);
977 }
978 }
979 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
980 /* look for a socket to work on in the work queue */
981 while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
982 lck_rw_lock_exclusive(lck: &slp->ns_rwlock);
983 /* remove from the head of the queue */
984 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
985 slp->ns_flag &= ~SLP_WORKQ;
986 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) {
987 break;
988 }
989 /* nothing to do, so skip this socket */
990 lck_rw_done(lck: &slp->ns_rwlock);
991 }
992 }
993 if (!nfsd->nfsd_slp && slp) {
994 /* we found a socket to work on, grab a reference */
995 slp->ns_sref++;
996 microuptime(tv: &now);
997 slp->ns_timestamp = now.tv_sec;
998 /* We keep the socket list in least recently used order for reaping idle sockets */
999 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1000 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
1001 nfsd->nfsd_slp = slp;
1002 opcnt = 0;
1003 /* and put it at the back of the work queue */
1004 TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
1005 slp->ns_flag |= SLP_WORKQ;
1006 lck_rw_done(lck: &slp->ns_rwlock);
1007 }
1008 lck_mtx_unlock(lck: &nfsd_mutex);
1009 if (!slp) {
1010 continue;
1011 }
1012 lck_rw_lock_exclusive(lck: &slp->ns_rwlock);
1013 if (slp->ns_flag & SLP_VALID) {
1014 if ((slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)) == SLP_NEEDQ) {
1015 slp->ns_flag &= ~SLP_NEEDQ;
1016 nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK);
1017 }
1018 if (slp->ns_flag & SLP_DISCONN) {
1019 nfsrv_zapsock(slp);
1020 }
1021 error = nfsrv_dorec(slp, nfsd, &nd);
1022 if (error == EINVAL) { // RPCSEC_GSS drop
1023 if (slp->ns_sotype == SOCK_STREAM) {
1024 nfsrv_zapsock(slp); // drop connection
1025 }
1026 }
1027 writes_todo = 0;
1028 if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
1029 microuptime(tv: &now);
1030 cur_usec = (now.tv_sec * 1000000) + now.tv_usec;
1031 if (slp->ns_wgtime <= cur_usec) {
1032 error = 0;
1033 cacherep = RC_DOIT;
1034 writes_todo = 1;
1035 }
1036 slp->ns_flag &= ~SLP_DOWRITES;
1037 }
1038 nfsd->nfsd_flag |= NFSD_REQINPROG;
1039 }
1040 lck_rw_done(lck: &slp->ns_rwlock);
1041 }
1042 if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
1043 if (nd) {
1044 nfsm_chain_cleanup(&nd->nd_nmreq);
1045 if (nd->nd_nam2) {
1046 mbuf_freem(mbuf: nd->nd_nam2);
1047 }
1048 if (IS_VALID_CRED(nd->nd_cr)) {
1049 kauth_cred_unref(&nd->nd_cr);
1050 }
1051 if (nd->nd_gss_context) {
1052 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1053 }
1054 NFS_ZFREE(nfsrv_descript_zone, nd);
1055 }
1056 nfsd->nfsd_slp = NULL;
1057 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1058 if (slp) {
1059 nfsrv_slpderef(slp);
1060 }
1061 if (nfsd_thread_max <= 0) {
1062 break;
1063 }
1064 continue;
1065 }
1066 if (nd) {
1067 microuptime(tv: &nd->nd_starttime);
1068 if (nd->nd_nam2) {
1069 nd->nd_nam = nd->nd_nam2;
1070 } else {
1071 nd->nd_nam = slp->ns_nam;
1072 }
1073
1074 cacherep = nfsrv_getcache(nd, slp, &mrep);
1075
1076 if (nfsrv_require_resv_port) {
1077 /* Check if source port is a reserved port */
1078 in_port_t port = 0;
1079 struct sockaddr *saddr = mbuf_data(mbuf: nd->nd_nam);
1080
1081 if (saddr->sa_family == AF_INET) {
1082 port = ntohs(((struct sockaddr_in*)saddr)->sin_port);
1083 } else if (saddr->sa_family == AF_INET6) {
1084 port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
1085 }
1086 if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
1087 nd->nd_procnum = NFSPROC_NOOP;
1088 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
1089 cacherep = RC_DOIT;
1090 }
1091 }
1092 }
1093
1094 /*
1095 * Loop to get all the write RPC replies that have been
1096 * gathered together.
1097 */
1098 do {
1099 switch (cacherep) {
1100 case RC_DOIT:
1101 if (nd && (nd->nd_vers == NFS_VER3)) {
1102 procrastinate = nfsrv_wg_delay_v3;
1103 } else {
1104 procrastinate = nfsrv_wg_delay;
1105 }
1106 lck_rw_lock_shared(lck: &nfsrv_export_rwlock);
1107 context.vc_ucred = NULL;
1108 if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) {
1109 error = nfsrv_writegather(&nd, slp, &context, &mrep);
1110 } else {
1111 error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
1112 }
1113 lck_rw_done(lck: &nfsrv_export_rwlock);
1114 if (mrep == NULL) {
1115 /*
1116 * If this is a stream socket and we are not going
1117 * to send a reply we better close the connection
1118 * so the client doesn't hang.
1119 */
1120 if (error && slp->ns_sotype == SOCK_STREAM) {
1121 lck_rw_lock_exclusive(lck: &slp->ns_rwlock);
1122 nfsrv_zapsock(slp);
1123 lck_rw_done(lck: &slp->ns_rwlock);
1124 printf("NFS server: NULL reply from proc = %d error = %d\n",
1125 nd->nd_procnum, error);
1126 }
1127 break;
1128 }
1129 if (error) {
1130 OSAddAtomic64(1, &nfsrvstats.srv_errs);
1131 nfsrv_updatecache(nd, FALSE, mrep);
1132 if (nd->nd_nam2) {
1133 mbuf_freem(mbuf: nd->nd_nam2);
1134 nd->nd_nam2 = NULL;
1135 }
1136 break;
1137 }
1138 OSAddAtomic64(1, &nfsrvstats.srvrpccntv3[nd->nd_procnum]);
1139 nfsrv_updatecache(nd, TRUE, mrep);
1140 OS_FALLTHROUGH;
1141
1142 case RC_REPLY:
1143 if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS
1144 /*
1145 * Need to checksum or encrypt the reply
1146 */
1147 error = nfs_gss_svc_protect_reply(nd, mrep);
1148 if (error) {
1149 mbuf_freem(mbuf: mrep);
1150 break;
1151 }
1152 }
1153
1154 /*
1155 * Get the total size of the reply
1156 */
1157 m = mrep;
1158 siz = 0;
1159 while (m) {
1160 siz += mbuf_len(mbuf: m);
1161 m = mbuf_next(mbuf: m);
1162 }
1163 if (siz <= 0 || siz > NFS_MAXPACKET) {
1164 printf("mbuf siz=%d\n", siz);
1165 panic("Bad nfs svc reply");
1166 }
1167 m = mrep;
1168 mbuf_pkthdr_setlen(mbuf: m, len: siz);
1169 error = mbuf_pkthdr_setrcvif(mbuf: m, NULL);
1170 if (error) {
1171 panic("nfsd setrcvif failed: %d", error);
1172 }
1173 /*
1174 * For stream protocols, prepend a Sun RPC
1175 * Record Mark.
1176 */
1177 if (slp->ns_sotype == SOCK_STREAM) {
1178 error = mbuf_prepend(mbuf: &m, NFSX_UNSIGNED, how: MBUF_WAITOK);
1179 if (!error) {
1180 *(u_int32_t*)mbuf_data(mbuf: m) = htonl(0x80000000 | siz);
1181 }
1182 }
1183 if (!error) {
1184 if (slp->ns_flag & SLP_VALID) {
1185 error = nfsrv_send(slp, nd->nd_nam2, m);
1186 } else {
1187 error = EPIPE;
1188 mbuf_freem(mbuf: m);
1189 }
1190 } else {
1191 mbuf_freem(mbuf: m);
1192 }
1193 mrep = NULL;
1194 if (nd->nd_nam2) {
1195 mbuf_freem(mbuf: nd->nd_nam2);
1196 nd->nd_nam2 = NULL;
1197 }
1198 if (error == EPIPE) {
1199 lck_rw_lock_exclusive(lck: &slp->ns_rwlock);
1200 nfsrv_zapsock(slp);
1201 lck_rw_done(lck: &slp->ns_rwlock);
1202 }
1203 if (error == EINTR || error == ERESTART) {
1204 nfsm_chain_cleanup(&nd->nd_nmreq);
1205 if (IS_VALID_CRED(nd->nd_cr)) {
1206 kauth_cred_unref(&nd->nd_cr);
1207 }
1208 if (nd->nd_gss_context) {
1209 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1210 }
1211 NFS_ZFREE(nfsrv_descript_zone, nd);
1212 nfsrv_slpderef(slp);
1213 lck_mtx_lock(lck: &nfsd_mutex);
1214 goto done;
1215 }
1216 break;
1217 case RC_DROPIT:
1218 mbuf_freem(mbuf: nd->nd_nam2);
1219 nd->nd_nam2 = NULL;
1220 break;
1221 }
1222 ;
1223 opcnt++;
1224 if (nd) {
1225 nfsm_chain_cleanup(&nd->nd_nmreq);
1226 if (nd->nd_nam2) {
1227 mbuf_freem(mbuf: nd->nd_nam2);
1228 }
1229 if (IS_VALID_CRED(nd->nd_cr)) {
1230 kauth_cred_unref(&nd->nd_cr);
1231 }
1232 if (nd->nd_gss_context) {
1233 nfs_gss_svc_ctx_deref(nd->nd_gss_context);
1234 }
1235 NFS_ZFREE(nfsrv_descript_zone, nd);
1236 }
1237
1238 /*
1239 * Check to see if there are outstanding writes that
1240 * need to be serviced.
1241 */
1242 writes_todo = 0;
1243 if (slp->ns_wgtime) {
1244 microuptime(tv: &now);
1245 cur_usec = (now.tv_sec * 1000000) + now.tv_usec;
1246 if (slp->ns_wgtime <= cur_usec) {
1247 cacherep = RC_DOIT;
1248 writes_todo = 1;
1249 }
1250 }
1251 } while (writes_todo);
1252
1253 nd = NULL;
1254 if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
1255 lck_rw_lock_exclusive(lck: &slp->ns_rwlock);
1256 error = nfsrv_dorec(slp, nfsd, &nd);
1257 if (error == EINVAL) { // RPCSEC_GSS drop
1258 if (slp->ns_sotype == SOCK_STREAM) {
1259 nfsrv_zapsock(slp); // drop connection
1260 }
1261 }
1262 lck_rw_done(lck: &slp->ns_rwlock);
1263 }
1264 if (!nd) {
1265 /* drop our reference on the socket */
1266 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
1267 nfsd->nfsd_slp = NULL;
1268 nfsrv_slpderef(slp);
1269 }
1270 }
1271 lck_mtx_lock(lck: &nfsd_mutex);
1272done:
1273 TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
1274 kfree_type(struct nfsd, nfsd);
1275 if (--nfsd_thread_count == 0) {
1276 nfsrv_cleanup();
1277 }
1278 lck_mtx_unlock(lck: &nfsd_mutex);
1279 return error;
1280}
1281
1282int
1283nfssvc_export(user_addr_t argp)
1284{
1285 int error = 0, is_64bit;
1286 struct user_nfs_export_args unxa;
1287 vfs_context_t ctx = vfs_context_current();
1288
1289 is_64bit = vfs_context_is64bit(ctx);
1290
1291 /* copy in pointers to path and export args */
1292 if (is_64bit) {
1293 error = copyin(argp, (caddr_t)&unxa, sizeof(unxa));
1294 } else {
1295 struct nfs_export_args tnxa;
1296 error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa));
1297 if (error == 0) {
1298 /* munge into LP64 version of nfs_export_args structure */
1299 unxa.nxa_fsid = tnxa.nxa_fsid;
1300 unxa.nxa_expid = tnxa.nxa_expid;
1301 unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath);
1302 unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath);
1303 unxa.nxa_flags = tnxa.nxa_flags;
1304 unxa.nxa_netcount = tnxa.nxa_netcount;
1305 unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets);
1306 }
1307 }
1308 if (error) {
1309 return error;
1310 }
1311
1312 error = nfsrv_export(&unxa, ctx);
1313
1314 return error;
1315}
1316
1317int
1318nfssvc_exportstats(proc_t p, user_addr_t argp)
1319{
1320 int error = 0;
1321 uint pos;
1322 struct nfs_exportfs *nxfs;
1323 struct nfs_export *nx;
1324 struct nfs_export_stat_desc stat_desc = {};
1325 struct nfs_export_stat_rec statrec;
1326 uint numExports, totlen, count;
1327 size_t numRecs;
1328 user_addr_t oldp, newlenp;
1329 user_size_t oldlen, newlen;
1330 struct user_iovec iov[2];
1331
1332 error = copyin_user_iovec_array(uaddr: argp, spacetype: IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, count: 2, dst: iov);
1333 if (error) {
1334 return error;
1335 }
1336
1337 oldp = iov[0].iov_base;
1338 oldlen = iov[0].iov_len;
1339 newlenp = iov[1].iov_base;
1340 newlen = iov[1].iov_len;
1341
1342 /* setup export stat descriptor */
1343 stat_desc.rec_vers = NFS_EXPORT_STAT_REC_VERSION;
1344
1345 if (!nfsrv_is_initialized()) {
1346 stat_desc.rec_count = 0;
1347 if (oldp && (oldlen >= sizeof(struct nfs_export_stat_desc))) {
1348 error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1349 }
1350 size_t stat_desc_size = sizeof(struct nfs_export_stat_desc);
1351 if (!error && newlenp && newlen >= sizeof(stat_desc_size)) {
1352 error = copyout(&stat_desc_size, newlenp, sizeof(stat_desc_size));
1353 }
1354 return error;
1355 }
1356
1357 /* Count the number of exported directories */
1358 lck_rw_lock_shared(lck: &nfsrv_export_rwlock);
1359 numExports = 0;
1360 LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next)
1361 LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next)
1362 numExports += 1;
1363
1364 /* update stat descriptor's export record count */
1365 stat_desc.rec_count = numExports;
1366
1367 /* calculate total size of required buffer */
1368 totlen = sizeof(struct nfs_export_stat_desc) + (numExports * sizeof(struct nfs_export_stat_rec));
1369
1370 /* Check caller's buffer */
1371 if (oldp == 0 || newlenp == 0) {
1372 lck_rw_done(lck: &nfsrv_export_rwlock);
1373 /* indicate required buffer len */
1374 if (newlenp && newlen >= sizeof(totlen)) {
1375 error = copyout(&totlen, newlenp, sizeof(totlen));
1376 }
1377 return error;
1378 }
1379
1380 /* We require the caller's buffer to be at least large enough to hold the descriptor */
1381 if (oldlen < sizeof(struct nfs_export_stat_desc) || newlen < sizeof(totlen)) {
1382 lck_rw_done(lck: &nfsrv_export_rwlock);
1383 /* indicate required buffer len */
1384 if (newlenp && newlen >= sizeof(totlen)) {
1385 (void)copyout(&totlen, newlenp, sizeof(totlen));
1386 }
1387 return ENOMEM;
1388 }
1389
1390 /* indicate required buffer len */
1391 error = copyout(&totlen, newlenp, sizeof(totlen));
1392 if (error) {
1393 lck_rw_done(lck: &nfsrv_export_rwlock);
1394 return error;
1395 }
1396
1397 /* check if export table is empty */
1398 if (!numExports) {
1399 lck_rw_done(lck: &nfsrv_export_rwlock);
1400 error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1401 return error;
1402 }
1403
1404 /* calculate how many actual export stat records fit into caller's buffer */
1405 numRecs = (totlen - sizeof(struct nfs_export_stat_desc)) / sizeof(struct nfs_export_stat_rec);
1406
1407 if (!numRecs) {
1408 /* caller's buffer can only accomodate descriptor */
1409 lck_rw_done(lck: &nfsrv_export_rwlock);
1410 stat_desc.rec_count = 0;
1411 error = copyout(&stat_desc, oldp, sizeof(struct nfs_export_stat_desc));
1412 return error;
1413 }
1414
1415 /* adjust to actual number of records to copyout to caller's buffer */
1416 if (numRecs > numExports) {
1417 numRecs = numExports;
1418 }
1419
1420 /* set actual number of records we are returning */
1421 stat_desc.rec_count = numRecs;
1422
1423 /* first copy out the stat descriptor */
1424 pos = 0;
1425 error = copyout(&stat_desc, oldp + pos, sizeof(struct nfs_export_stat_desc));
1426 if (error) {
1427 lck_rw_done(lck: &nfsrv_export_rwlock);
1428 return error;
1429 }
1430 pos += sizeof(struct nfs_export_stat_desc);
1431
1432 /* Loop through exported directories */
1433 count = 0;
1434 LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
1435 LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
1436 if (count >= numRecs) {
1437 break;
1438 }
1439
1440 /* build exported filesystem path */
1441 memset(s: statrec.path, c: 0, n: sizeof(statrec.path));
1442 snprintf(statrec.path, count: sizeof(statrec.path), "%s%s%s",
1443 nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
1444 nx->nx_path);
1445
1446 /* build the 64-bit export stat counters */
1447 statrec.ops = ((uint64_t)nx->nx_stats.ops.hi << 32) |
1448 nx->nx_stats.ops.lo;
1449 statrec.bytes_read = ((uint64_t)nx->nx_stats.bytes_read.hi << 32) |
1450 nx->nx_stats.bytes_read.lo;
1451 statrec.bytes_written = ((uint64_t)nx->nx_stats.bytes_written.hi << 32) |
1452 nx->nx_stats.bytes_written.lo;
1453 error = copyout(&statrec, oldp + pos, sizeof(statrec));
1454 if (error) {
1455 lck_rw_done(lck: &nfsrv_export_rwlock);
1456 return error;
1457 }
1458 /* advance buffer position */
1459 pos += sizeof(statrec);
1460 }
1461 }
1462 lck_rw_done(lck: &nfsrv_export_rwlock);
1463
1464 return error;
1465}
1466
1467int
1468nfssvc_userstats(proc_t p, user_addr_t argp)
1469{
1470 int error = 0;
1471 struct nfs_exportfs *nxfs;
1472 struct nfs_export *nx;
1473 struct nfs_active_user_list *ulist;
1474 struct nfs_user_stat_desc ustat_desc = {};
1475 struct nfs_user_stat_node *unode, *unode_next;
1476 struct nfs_user_stat_user_rec ustat_rec;
1477 struct nfs_user_stat_path_rec upath_rec;
1478 uint bytes_total, recs_copied, pos;
1479 size_t bytes_avail;
1480 user_addr_t oldp, newlenp;
1481 user_size_t oldlen, newlen;
1482 struct user_iovec iov[2];
1483
1484 error = copyin_user_iovec_array(uaddr: argp, spacetype: IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, count: 2, dst: iov);
1485 if (error) {
1486 return error;
1487 }
1488
1489 oldp = iov[0].iov_base;
1490 oldlen = iov[0].iov_len;
1491 newlenp = iov[1].iov_base;
1492 newlen = iov[1].iov_len;
1493
1494 /* init structures used for copying out of kernel */
1495 ustat_desc.rec_vers = NFS_USER_STAT_REC_VERSION;
1496 ustat_rec.rec_type = NFS_USER_STAT_USER_REC;
1497 upath_rec.rec_type = NFS_USER_STAT_PATH_REC;
1498
1499 /* initialize counters */
1500 bytes_total = sizeof(struct nfs_user_stat_desc);
1501 bytes_avail = oldlen;
1502 recs_copied = 0;
1503
1504 if (!nfsrv_is_initialized()) { /* NFS server not initialized, so no stats */
1505 goto ustat_skip;
1506 }
1507
1508 /* reclaim old expired user nodes */
1509 nfsrv_active_user_list_reclaim();
1510
1511 /* reserve space for the buffer descriptor */
1512 if (bytes_avail >= sizeof(struct nfs_user_stat_desc)) {
1513 bytes_avail -= sizeof(struct nfs_user_stat_desc);
1514 } else {
1515 bytes_avail = 0;
1516 }
1517
1518 /* put buffer position past the buffer descriptor */
1519 pos = sizeof(struct nfs_user_stat_desc);
1520
1521 /* Loop through exported directories */
1522 lck_rw_lock_shared(lck: &nfsrv_export_rwlock);
1523 LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
1524 LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
1525 /* copy out path */
1526 if (bytes_avail >= sizeof(struct nfs_user_stat_path_rec)) {
1527 memset(s: upath_rec.path, c: 0, n: sizeof(upath_rec.path));
1528 snprintf(upath_rec.path, count: sizeof(upath_rec.path), "%s%s%s",
1529 nxfs->nxfs_path, ((nxfs->nxfs_path[1] && nx->nx_path[0]) ? "/" : ""),
1530 nx->nx_path);
1531
1532 error = copyout(&upath_rec, oldp + pos, sizeof(struct nfs_user_stat_path_rec));
1533 if (error) {
1534 /* punt */
1535 goto ustat_done;
1536 }
1537
1538 pos += sizeof(struct nfs_user_stat_path_rec);
1539 bytes_avail -= sizeof(struct nfs_user_stat_path_rec);
1540 recs_copied++;
1541 } else {
1542 /* Caller's buffer is exhausted */
1543 bytes_avail = 0;
1544 }
1545
1546 bytes_total += sizeof(struct nfs_user_stat_path_rec);
1547
1548 /* Scan through all user nodes of this export */
1549 ulist = &nx->nx_user_list;
1550 lck_mtx_lock(lck: &ulist->user_mutex);
1551 for (unode = TAILQ_FIRST(&ulist->user_lru); unode; unode = unode_next) {
1552 unode_next = TAILQ_NEXT(unode, lru_link);
1553
1554 /* copy out node if there is space */
1555 if (bytes_avail >= sizeof(struct nfs_user_stat_user_rec)) {
1556 /* prepare a user stat rec for copying out */
1557 ustat_rec.uid = unode->uid;
1558 memset(s: &ustat_rec.sock, c: 0, n: sizeof(ustat_rec.sock));
1559 bcopy(src: &unode->sock, dst: &ustat_rec.sock, n: unode->sock.ss_len);
1560 ustat_rec.ops = unode->ops;
1561 ustat_rec.bytes_read = unode->bytes_read;
1562 ustat_rec.bytes_written = unode->bytes_written;
1563 ustat_rec.tm_start = unode->tm_start;
1564 ustat_rec.tm_last = unode->tm_last;
1565
1566 error = copyout(&ustat_rec, oldp + pos, sizeof(struct nfs_user_stat_user_rec));
1567
1568 if (error) {
1569 /* punt */
1570 lck_mtx_unlock(lck: &ulist->user_mutex);
1571 goto ustat_done;
1572 }
1573
1574 pos += sizeof(struct nfs_user_stat_user_rec);
1575 bytes_avail -= sizeof(struct nfs_user_stat_user_rec);
1576 recs_copied++;
1577 } else {
1578 /* Caller's buffer is exhausted */
1579 bytes_avail = 0;
1580 }
1581 bytes_total += sizeof(struct nfs_user_stat_user_rec);
1582 }
1583 /* can unlock this export's list now */
1584 lck_mtx_unlock(lck: &ulist->user_mutex);
1585 }
1586 }
1587
1588ustat_done:
1589 /* unlock the export table */
1590 lck_rw_done(lck: &nfsrv_export_rwlock);
1591
1592ustat_skip:
1593 /* indicate number of actual records copied */
1594 ustat_desc.rec_count = recs_copied;
1595
1596 if (!error) {
1597 /* check if there was enough room for the buffer descriptor */
1598 if (oldlen >= sizeof(struct nfs_user_stat_desc)) {
1599 error = copyout(&ustat_desc, oldp, sizeof(struct nfs_user_stat_desc));
1600 } else {
1601 error = ENOMEM;
1602 }
1603
1604 /* always indicate required buffer size */
1605 if (!error && newlenp && newlen >= sizeof(bytes_total)) {
1606 error = copyout(&bytes_total, newlenp, sizeof(bytes_total));
1607 }
1608 }
1609 return error;
1610}
1611
1612int
1613nfssvc_usercount(proc_t p, user_addr_t argp)
1614{
1615 int error;
1616 user_addr_t oldp, newlenp;
1617 user_size_t oldlen, newlen;
1618 struct user_iovec iov[2];
1619 size_t stat_size = sizeof(nfsrv_user_stat_node_count);
1620
1621 error = copyin_user_iovec_array(uaddr: argp, spacetype: IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, count: 2, dst: iov);
1622 if (error) {
1623 return error;
1624 }
1625
1626 oldp = iov[0].iov_base;
1627 oldlen = iov[0].iov_len;
1628 newlenp = iov[1].iov_base;
1629 newlen = iov[1].iov_len;
1630
1631 if (!oldp) {
1632 if (newlenp && newlen >= sizeof(stat_size)) {
1633 error = copyout(&stat_size, newlenp, sizeof(stat_size));
1634 }
1635 return error;
1636 }
1637
1638 if (oldlen < stat_size) {
1639 if (newlenp && newlen >= sizeof(stat_size)) {
1640 (void)copyout(&stat_size, newlenp, sizeof(stat_size));
1641 }
1642 return ENOMEM;
1643 }
1644
1645 if (nfsrv_is_initialized()) {
1646 /* reclaim old expired user nodes */
1647 nfsrv_active_user_list_reclaim();
1648 }
1649
1650 error = copyout(&nfsrv_user_stat_node_count, oldp, sizeof(nfsrv_user_stat_node_count));
1651
1652 return error;
1653}
1654
1655int
1656nfssvc_zerostats(void)
1657{
1658 bzero(s: &nfsrvstats, n: sizeof nfsrvstats);
1659 return 0;
1660}
1661
1662int
1663nfssvc_srvstats(proc_t p, user_addr_t argp)
1664{
1665 int error;
1666 user_addr_t oldp, newlenp;
1667 user_size_t oldlen, newlen;
1668 struct user_iovec iov[2];
1669 size_t stat_size = sizeof(nfsrvstats);
1670
1671 error = copyin_user_iovec_array(uaddr: argp, spacetype: IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, count: 2, dst: iov);
1672 if (error) {
1673 return error;
1674 }
1675
1676 oldp = iov[0].iov_base;
1677 oldlen = iov[0].iov_len;
1678 newlenp = iov[1].iov_base;
1679 newlen = iov[1].iov_len;
1680
1681 if (!oldp) {
1682 if (newlenp && newlen >= sizeof(stat_size)) {
1683 error = copyout(&stat_size, newlenp, sizeof(stat_size));
1684 }
1685 return error;
1686 }
1687
1688 if (oldlen < stat_size) {
1689 if (newlenp && newlen >= sizeof(stat_size)) {
1690 (void)copyout(&stat_size, newlenp, sizeof(stat_size));
1691 }
1692 return ENOMEM;
1693 }
1694
1695 error = copyout(&nfsrvstats, oldp, stat_size);
1696 if (error) {
1697 return error;
1698 }
1699
1700 return 0;
1701}
1702
1703/*
1704 * Shut down a socket associated with an nfsrv_sock structure.
1705 * Should be called with the send lock set, if required.
1706 * The trick here is to increment the sref at the start, so that the nfsds
1707 * will stop using it and clear ns_flag at the end so that it will not be
1708 * reassigned during cleanup.
1709 */
1710void
1711nfsrv_zapsock(struct nfsrv_sock *slp)
1712{
1713 socket_t so;
1714
1715 if ((slp->ns_flag & SLP_VALID) == 0) {
1716 return;
1717 }
1718 slp->ns_flag &= ~SLP_ALLFLAGS;
1719
1720 so = slp->ns_so;
1721 if (so == NULL) {
1722 return;
1723 }
1724
1725 sock_setupcall(sock: so, NULL, NULL);
1726 sock_shutdown(so, SHUT_RDWR);
1727
1728 /*
1729 * Remove from the up-call queue
1730 */
1731 nfsrv_uc_dequeue(slp);
1732}
1733
1734/*
1735 * cleanup and release a server socket structure.
1736 */
1737void
1738nfsrv_slpfree(struct nfsrv_sock *slp)
1739{
1740 struct nfsrv_descript *nwp, *nnwp;
1741
1742 if (slp->ns_so) {
1743 sock_release(so: slp->ns_so);
1744 slp->ns_so = NULL;
1745 }
1746 if (slp->ns_recslen) {
1747 OSAddAtomic(-slp->ns_recslen, &nfsrv_unprocessed_rpc_current);
1748 }
1749 if (slp->ns_nam) {
1750 mbuf_free(mbuf: slp->ns_nam);
1751 }
1752 if (slp->ns_raw) {
1753 mbuf_freem(mbuf: slp->ns_raw);
1754 }
1755 if (slp->ns_rec) {
1756 mbuf_freem(mbuf: slp->ns_rec);
1757 }
1758 if (slp->ns_frag) {
1759 mbuf_freem(mbuf: slp->ns_frag);
1760 }
1761 slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
1762 slp->ns_reccnt = 0;
1763
1764 for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
1765 nnwp = nwp->nd_tq.le_next;
1766 LIST_REMOVE(nwp, nd_tq);
1767 nfsm_chain_cleanup(&nwp->nd_nmreq);
1768 if (nwp->nd_mrep) {
1769 mbuf_freem(mbuf: nwp->nd_mrep);
1770 }
1771 if (nwp->nd_nam2) {
1772 mbuf_freem(mbuf: nwp->nd_nam2);
1773 }
1774 if (IS_VALID_CRED(nwp->nd_cr)) {
1775 kauth_cred_unref(&nwp->nd_cr);
1776 }
1777 if (nwp->nd_gss_context) {
1778 nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
1779 }
1780 NFS_ZFREE(nfsrv_descript_zone, nwp);
1781 }
1782 LIST_INIT(&slp->ns_tq);
1783
1784 lck_rw_destroy(lck: &slp->ns_rwlock, grp: &nfsrv_slp_rwlock_group);
1785 lck_mtx_destroy(lck: &slp->ns_wgmutex, grp: &nfsrv_slp_mutex_group);
1786 kfree_type(struct nfsrv_sock, slp);
1787}
1788
1789/*
1790 * Derefence a server socket structure. If it has no more references and
1791 * is no longer valid, you can throw it away.
1792 */
1793static void
1794nfsrv_slpderef_locked(struct nfsrv_sock *slp)
1795{
1796 lck_rw_lock_exclusive(lck: &slp->ns_rwlock);
1797 slp->ns_sref--;
1798
1799 if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
1800 if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
1801 /* remove socket from queue since there's no work */
1802 if (slp->ns_flag & SLP_WAITQ) {
1803 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1804 } else {
1805 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1806 }
1807 slp->ns_flag &= ~SLP_QUEUED;
1808 }
1809 lck_rw_done(lck: &slp->ns_rwlock);
1810 return;
1811 }
1812
1813 /* This socket is no longer valid, so we'll get rid of it */
1814
1815 if (slp->ns_flag & SLP_QUEUED) {
1816 if (slp->ns_flag & SLP_WAITQ) {
1817 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
1818 } else {
1819 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
1820 }
1821 slp->ns_flag &= ~SLP_QUEUED;
1822 }
1823 lck_rw_done(lck: &slp->ns_rwlock);
1824
1825 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
1826 if (slp->ns_sotype == SOCK_STREAM) {
1827 nfsrv_sock_tcp_cnt--;
1828 }
1829
1830 /* now remove from the write gather socket list */
1831 if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1832 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1833 slp->ns_wgq.tqe_next = SLPNOLIST;
1834 }
1835 nfsrv_slpfree(slp);
1836}
1837
1838void
1839nfsrv_slpderef(struct nfsrv_sock *slp)
1840{
1841 lck_mtx_lock(lck: &nfsd_mutex);
1842 nfsrv_slpderef_locked(slp);
1843 lck_mtx_unlock(lck: &nfsd_mutex);
1844}
1845
1846/*
1847 * Check periodically for idle sockest if needed and
1848 * zap them.
1849 */
1850void
1851nfsrv_idlesock_timer(__unused void *param0, __unused void *param1)
1852{
1853 struct nfsrv_sock *slp, *tslp;
1854 struct timeval now;
1855 time_t time_to_wait = nfsrv_sock_idle_timeout;
1856
1857 microuptime(tv: &now);
1858 lck_mtx_lock(lck: &nfsd_mutex);
1859
1860 /* Turn off the timer if we're suppose to and get out */
1861 if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT) {
1862 nfsrv_sock_idle_timeout = 0;
1863 }
1864 if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
1865 nfsrv_idlesock_timer_on = 0;
1866 lck_mtx_unlock(lck: &nfsd_mutex);
1867 return;
1868 }
1869
1870 TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) {
1871 lck_rw_lock_exclusive(lck: &slp->ns_rwlock);
1872 /* Skip udp and referenced sockets */
1873 if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) {
1874 lck_rw_done(lck: &slp->ns_rwlock);
1875 continue;
1876 }
1877 /*
1878 * If this is the first non-referenced socket that hasn't idle out,
1879 * use its time stamp to calculate the earlist time in the future
1880 * to start the next invocation of the timer. Since the nfsrv_socklist
1881 * is sorted oldest access to newest. Once we find the first one,
1882 * we're done and break out of the loop.
1883 */
1884 if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) ||
1885 nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
1886 time_to_wait -= now.tv_sec - slp->ns_timestamp;
1887 if (time_to_wait < 1) {
1888 time_to_wait = 1;
1889 }
1890 lck_rw_done(lck: &slp->ns_rwlock);
1891 break;
1892 }
1893 /*
1894 * Bump the ref count. nfsrv_slpderef below will destroy
1895 * the socket, since nfsrv_zapsock has closed it.
1896 */
1897 slp->ns_sref++;
1898 nfsrv_zapsock(slp);
1899 lck_rw_done(lck: &slp->ns_rwlock);
1900 nfsrv_slpderef_locked(slp);
1901 }
1902
1903 /* Start ourself back up */
1904 nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
1905 /* Remember when the next timer will fire for nfssvc_addsock. */
1906 nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
1907 lck_mtx_unlock(lck: &nfsd_mutex);
1908}
1909
1910/*
1911 * Clean up the data structures for the server.
1912 */
1913void
1914nfsrv_cleanup(void)
1915{
1916 struct nfsrv_sock *slp, *nslp;
1917 struct timeval now;
1918#if CONFIG_FSE
1919 struct nfsrv_fmod *fp, *nfp;
1920 int i;
1921#endif
1922
1923 microuptime(tv: &now);
1924 for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
1925 nslp = TAILQ_NEXT(slp, ns_chain);
1926 lck_rw_lock_exclusive(lck: &slp->ns_rwlock);
1927 slp->ns_sref++;
1928 if (slp->ns_flag & SLP_VALID) {
1929 nfsrv_zapsock(slp);
1930 }
1931 lck_rw_done(lck: &slp->ns_rwlock);
1932 nfsrv_slpderef_locked(slp);
1933 }
1934#
1935#if CONFIG_FSE
1936 /*
1937 * Flush pending file write fsevents
1938 */
1939 lck_mtx_lock(lck: &nfsrv_fmod_mutex);
1940 for (i = 0; i < NFSRVFMODHASHSZ; i++) {
1941 for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
1942 /*
1943 * Fire off the content modified fsevent for each
1944 * entry, remove it from the list, and free it.
1945 */
1946 if (nfsrv_fsevents_enabled) {
1947 fp->fm_context.vc_thread = current_thread();
1948 add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
1949 FSE_ARG_VNODE, fp->fm_vp,
1950 FSE_ARG_DONE);
1951 }
1952 vnode_put(vp: fp->fm_vp);
1953 kauth_cred_unref(&fp->fm_context.vc_ucred);
1954 nfp = LIST_NEXT(fp, fm_link);
1955 LIST_REMOVE(fp, fm_link);
1956 kfree_type(struct nfsrv_fmod, fp);
1957 }
1958 }
1959 nfsrv_fmod_pending = 0;
1960 lck_mtx_unlock(lck: &nfsrv_fmod_mutex);
1961#endif
1962
1963 nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */
1964
1965 nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */
1966
1967 nfsrv_cleancache(); /* And clear out server cache */
1968
1969 nfsrv_udpsock = NULL;
1970 nfsrv_udp6sock = NULL;
1971}
1972
1973#endif /* CONFIG_NFS_SERVER */
1974