1/*
2 * Copyright (c) 2007-2020 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1988 University of Utah.
30 * Copyright (c) 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * the Systems Programming Group of the University of Utah Computer
35 * Science Department.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66 *
67 * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95
68 */
69/*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76/*
77 * Mapped file (mmap) interface to VM
78 */
79
80#include <sys/param.h>
81#include <sys/systm.h>
82#include <sys/filedesc.h>
83#include <sys/proc_internal.h>
84#include <sys/kauth.h>
85#include <sys/resourcevar.h>
86#include <sys/vnode_internal.h>
87#include <sys/acct.h>
88#include <sys/wait.h>
89#include <sys/file_internal.h>
90#include <sys/vadvise.h>
91#include <sys/trace.h>
92#include <sys/mman.h>
93#include <sys/conf.h>
94#include <sys/stat.h>
95#include <sys/ubc.h>
96#include <sys/ubc_internal.h>
97#include <sys/sysproto.h>
98
99#include <sys/syscall.h>
100#include <sys/kdebug.h>
101#include <sys/bsdtask_info.h>
102
103#include <security/audit/audit.h>
104#include <bsm/audit_kevents.h>
105
106#include <mach/mach_types.h>
107#include <mach/mach_traps.h>
108#include <mach/vm_sync.h>
109#include <mach/vm_behavior.h>
110#include <mach/vm_inherit.h>
111#include <mach/vm_statistics.h>
112#include <mach/mach_vm.h>
113#include <mach/vm_map.h>
114#include <mach/host_priv.h>
115#include <mach/sdt.h>
116#include <mach-o/loader.h>
117
118#include <machine/machine_routines.h>
119
120#include <kern/cpu_number.h>
121#include <kern/host.h>
122#include <kern/task.h>
123#include <kern/page_decrypt.h>
124
125#include <IOKit/IOReturn.h>
126#include <IOKit/IOBSD.h>
127
128#include <vm/vm_map.h>
129#include <vm/vm_kern.h>
130#include <vm/vm_pager.h>
131#include <vm/vm_protos.h>
132
133#if CONFIG_MACF
134#include <security/mac_framework.h>
135#endif
136#include <os/overflow.h>
137
138/*
139 * this function implements the same logic as dyld's "dyld_fall_2020_os_versions"
140 * from dyld_priv.h. Basically, we attempt to draw the line of: "was this code
141 * compiled with an SDK from fall of 2020 or later?""
142 */
143static bool
144proc_2020_fall_os_sdk_or_later(void)
145{
146 const uint32_t proc_sdk_ver = proc_sdk(current_proc());
147
148 switch (proc_platform(current_proc())) {
149 case PLATFORM_MACOS:
150 return proc_sdk_ver >= 0x000a1000; // DYLD_MACOSX_VERSION_10_16
151 case PLATFORM_IOS:
152 case PLATFORM_IOSSIMULATOR:
153 case PLATFORM_MACCATALYST:
154 return proc_sdk_ver >= 0x000e0000; // DYLD_IOS_VERSION_14_0
155 case PLATFORM_BRIDGEOS:
156 return proc_sdk_ver >= 0x00050000; // DYLD_BRIDGEOS_VERSION_5_0
157 case PLATFORM_TVOS:
158 case PLATFORM_TVOSSIMULATOR:
159 return proc_sdk_ver >= 0x000e0000; // DYLD_TVOS_VERSION_14_0
160 case PLATFORM_WATCHOS:
161 case PLATFORM_WATCHOSSIMULATOR:
162 return proc_sdk_ver >= 0x00070000; // DYLD_WATCHOS_VERSION_7_0
163 default:
164 /*
165 * tough call, but let's give new platforms the benefit of the doubt
166 * to avoid a re-occurence of rdar://89843927
167 */
168 return true;
169 }
170}
171
172/*
173 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
174 * XXX usage is PROT_* from an interface perspective. Thus the values of
175 * XXX VM_PROT_* and PROT_* need to correspond.
176 */
177int
178mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
179{
180 /*
181 * Map in special device (must be SHARED) or file
182 */
183 struct fileproc *fp;
184 struct vnode *vp;
185 int flags;
186 int prot;
187 int err = 0;
188 vm_map_t user_map;
189 kern_return_t result;
190 vm_map_offset_t user_addr;
191 vm_map_offset_t sum;
192 vm_map_size_t user_size;
193 vm_object_offset_t pageoff;
194 vm_object_offset_t file_pos;
195 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
196 boolean_t docow;
197 vm_prot_t maxprot;
198 void *handle;
199 memory_object_t pager = MEMORY_OBJECT_NULL;
200 memory_object_control_t control;
201 int mapanon = 0;
202 int fpref = 0;
203 int error = 0;
204 int fd = uap->fd;
205 int num_retries = 0;
206
207 /*
208 * Note that for UNIX03 conformance, there is additional parameter checking for
209 * mmap() system call in libsyscall prior to entering the kernel. The sanity
210 * checks and argument validation done in this function are not the only places
211 * one can get returned errnos.
212 */
213
214 user_map = current_map();
215 user_addr = (vm_map_offset_t)uap->addr;
216 user_size = (vm_map_size_t) uap->len;
217
218 AUDIT_ARG(addr, user_addr);
219 AUDIT_ARG(len, user_size);
220 AUDIT_ARG(fd, uap->fd);
221
222 if (vm_map_range_overflows(map: user_map, addr: user_addr, size: user_size)) {
223 return EINVAL;
224 }
225 prot = (uap->prot & VM_PROT_ALL);
226#if 3777787
227 /*
228 * Since the hardware currently does not support writing without
229 * read-before-write, or execution-without-read, if the request is
230 * for write or execute access, we must imply read access as well;
231 * otherwise programs expecting this to work will fail to operate.
232 */
233 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
234 prot |= VM_PROT_READ;
235 }
236#endif /* radar 3777787 */
237
238 flags = uap->flags;
239 vp = NULLVP;
240
241 /*
242 * verify no unknown flags are passed in, and if any are,
243 * fail out early to make sure the logic below never has to deal
244 * with invalid flag values. only do so for processes compiled
245 * with Fall 2020 or later SDK, which is where we drew this
246 * line and documented it as such.
247 */
248 if (flags & ~(MAP_SHARED |
249 MAP_PRIVATE |
250 MAP_COPY |
251 MAP_FIXED |
252 MAP_RENAME |
253 MAP_NORESERVE |
254 MAP_RESERVED0080 | //grandfathered in as accepted and ignored
255 MAP_NOEXTEND |
256 MAP_HASSEMAPHORE |
257 MAP_NOCACHE |
258 MAP_JIT |
259 MAP_TPRO |
260 MAP_FILE |
261 MAP_ANON |
262 MAP_RESILIENT_CODESIGN |
263 MAP_RESILIENT_MEDIA |
264#if XNU_TARGET_OS_OSX
265 MAP_32BIT |
266#endif
267 MAP_TRANSLATED_ALLOW_EXECUTE |
268 MAP_UNIX03)) {
269 if (proc_2020_fall_os_sdk_or_later()) {
270 return EINVAL;
271 }
272 }
273
274
275 /*
276 * The vm code does not have prototypes & compiler doesn't do
277 * the right thing when you cast 64bit value and pass it in function
278 * call. So here it is.
279 */
280 file_pos = (vm_object_offset_t)uap->pos;
281
282
283 /* make sure mapping fits into numeric range etc */
284 if (os_add3_overflow(file_pos, user_size, vm_map_page_size(user_map) - 1, &sum)) {
285 return EINVAL;
286 }
287
288 if (flags & MAP_UNIX03) {
289 vm_map_offset_t offset_alignment_mask;
290
291 /*
292 * Enforce UNIX03 compliance.
293 */
294
295 if (vm_map_is_exotic(map: current_map())) {
296 offset_alignment_mask = 0xFFF;
297 } else {
298 offset_alignment_mask = vm_map_page_mask(map: current_map());
299 }
300 if (file_pos & offset_alignment_mask) {
301 /* file offset should be page-aligned */
302 return EINVAL;
303 }
304 if (!(flags & (MAP_PRIVATE | MAP_SHARED))) {
305 /* need either MAP_PRIVATE or MAP_SHARED */
306 return EINVAL;
307 }
308 if (user_size == 0) {
309 /* mapping length should not be 0 */
310 return EINVAL;
311 }
312 }
313
314 /*
315 * Align the file position to a page boundary,
316 * and save its page offset component.
317 */
318 pageoff = (file_pos & vm_map_page_mask(map: user_map));
319 file_pos -= (vm_object_offset_t)pageoff;
320
321
322 /* Adjust size for rounding (on both ends). */
323 user_size += pageoff; /* low end... */
324 user_size = vm_map_round_page(user_size,
325 vm_map_page_mask(user_map)); /* hi end */
326
327
328 if (flags & MAP_JIT) {
329 if ((flags & MAP_FIXED) ||
330 (flags & MAP_SHARED) ||
331 !(flags & MAP_ANON) ||
332 (flags & MAP_RESILIENT_CODESIGN) ||
333 (flags & MAP_RESILIENT_MEDIA) ||
334 (flags & MAP_TPRO)) {
335 return EINVAL;
336 }
337 }
338
339 if ((flags & MAP_RESILIENT_CODESIGN) ||
340 (flags & MAP_RESILIENT_MEDIA)) {
341 if ((flags & MAP_ANON) ||
342 (flags & MAP_JIT) ||
343 (flags & MAP_TPRO)) {
344 return EINVAL;
345 }
346 }
347 if (flags & MAP_RESILIENT_CODESIGN) {
348 int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
349 if (prot & reject_prot) {
350 /*
351 * Quick sanity check. maxprot is calculated below and
352 * we will test it again.
353 */
354 return EPERM;
355 }
356 }
357 if (flags & MAP_SHARED) {
358 /*
359 * MAP_RESILIENT_MEDIA is not valid with MAP_SHARED because
360 * there is no place to inject zero-filled pages without
361 * actually adding them to the file.
362 * Since we didn't reject that combination before, there might
363 * already be callers using it and getting a valid MAP_SHARED
364 * mapping but without the resilience.
365 * For backwards compatibility's sake, let's keep ignoring
366 * MAP_RESILIENT_MEDIA in that case.
367 */
368 flags &= ~MAP_RESILIENT_MEDIA;
369 }
370 if (flags & MAP_RESILIENT_MEDIA) {
371 if ((flags & MAP_ANON) ||
372 (flags & MAP_SHARED)) {
373 return EINVAL;
374 }
375 }
376 if (flags & MAP_TPRO) {
377 /*
378 * MAP_TPRO without VM_PROT_WRITE is not valid here because
379 * the TPRO mapping is handled at the PMAP layer with implicit RW
380 * protections.
381 *
382 * This would enable bypassing of file-based protections, i.e.
383 * a file open/mapped as read-only could be written to.
384 */
385 if ((prot & VM_PROT_EXECUTE) ||
386 !(prot & VM_PROT_WRITE)) {
387 return EPERM;
388 }
389 }
390
391 /*
392 * Check for illegal addresses. Watch out for address wrap... Note
393 * that VM_*_ADDRESS are not constants due to casts (argh).
394 */
395 if (flags & MAP_FIXED) {
396 /*
397 * The specified address must have the same remainder
398 * as the file offset taken modulo PAGE_SIZE, so it
399 * should be aligned after adjustment by pageoff.
400 */
401 user_addr -= pageoff;
402 if (user_addr & vm_map_page_mask(map: user_map)) {
403 return EINVAL;
404 }
405 }
406#ifdef notyet
407 /* DO not have apis to get this info, need to wait till then*/
408 /*
409 * XXX for non-fixed mappings where no hint is provided or
410 * the hint would fall in the potential heap space,
411 * place it after the end of the largest possible heap.
412 *
413 * There should really be a pmap call to determine a reasonable
414 * location.
415 */
416 else if (addr < vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
417 vm_map_page_mask(user_map))) {
418 addr = vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
419 vm_map_page_mask(user_map));
420 }
421
422#endif
423
424 /* Entitlement check against code signing monitor */
425 if ((flags & MAP_JIT) && (vm_map_csm_allow_jit(map: user_map) != KERN_SUCCESS)) {
426 printf("[%d] code signing monitor denies JIT mapping\n", proc_pid(p));
427 return EPERM;
428 }
429
430 if (flags & MAP_ANON) {
431 maxprot = VM_PROT_ALL;
432#if CONFIG_MACF
433 /*
434 * Entitlement check.
435 */
436 error = mac_proc_check_map_anon(proc: p, cred: current_cached_proc_cred(p),
437 u_addr: user_addr, u_size: user_size, prot, flags, maxprot: &maxprot);
438 if (error) {
439 return EINVAL;
440 }
441#endif /* MAC */
442
443 /*
444 * Mapping blank space is trivial. Use positive fds as the alias
445 * value for memory tracking.
446 */
447 if (fd != -1) {
448 /*
449 * Use "fd" to pass (some) Mach VM allocation flags,
450 * (see the VM_FLAGS_* definitions).
451 */
452 int vm_flags = fd & (VM_FLAGS_ALIAS_MASK |
453 VM_FLAGS_SUPERPAGE_MASK |
454 VM_FLAGS_PURGABLE |
455 VM_FLAGS_4GB_CHUNK);
456
457 if (vm_flags != fd) {
458 /* reject if there are any extra flags */
459 return EINVAL;
460 }
461
462 /*
463 * vm_map_kernel_flags_set_vmflags() will assume that
464 * the full set of VM flags are passed, which is
465 * problematic for FIXED/ANYWHERE.
466 *
467 * The block handling MAP_FIXED below will do the same
468 * thing again which is fine because it's idempotent.
469 */
470 if (flags & MAP_FIXED) {
471 vm_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
472 } else {
473 vm_flags |= VM_FLAGS_ANYWHERE;
474 }
475 vm_map_kernel_flags_set_vmflags(vmk_flags: &vmk_flags, vm_flags_and_tag: vm_flags);
476 }
477
478#if CONFIG_MAP_RANGES
479 /*
480 * if the client specified a tag, let the system policy apply.
481 *
482 * otherwise, force the heap range.
483 */
484 if (vmk_flags.vm_tag) {
485 vm_map_kernel_flags_update_range_id(&vmk_flags, user_map);
486 } else {
487 vmk_flags.vmkf_range_id = UMEM_RANGE_ID_HEAP;
488 }
489#endif /* CONFIG_MAP_RANGES */
490
491 handle = NULL;
492 file_pos = 0;
493 pageoff = 0;
494 mapanon = 1;
495 } else {
496 struct vnode_attr va;
497 vfs_context_t ctx = vfs_context_current();
498
499 if (flags & MAP_JIT) {
500 return EINVAL;
501 }
502
503 /*
504 * Mapping file, get fp for validation. Obtain vnode and make
505 * sure it is of appropriate type.
506 */
507 err = fp_lookup(p, fd, resultfp: &fp, locked: 0);
508 if (err) {
509 return err;
510 }
511 fpref = 1;
512 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
513 case DTYPE_PSXSHM:
514 uap->addr = (user_addr_t)user_addr;
515 uap->len = (user_size_t)user_size;
516 uap->prot = prot;
517 uap->flags = flags;
518 uap->pos = file_pos;
519 error = pshm_mmap(p, uap, retval, fp, pageoff: (off_t)pageoff);
520 goto bad;
521 case DTYPE_VNODE:
522 break;
523 default:
524 error = EINVAL;
525 goto bad;
526 }
527 vp = (struct vnode *)fp_get_data(fp);
528 error = vnode_getwithref(vp);
529 if (error != 0) {
530 goto bad;
531 }
532
533 if (vp->v_type != VREG && vp->v_type != VCHR) {
534 (void)vnode_put(vp);
535 error = EINVAL;
536 goto bad;
537 }
538
539 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
540
541 /*
542 * POSIX: mmap needs to update access time for mapped files
543 */
544 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
545 VATTR_INIT(&va);
546 nanotime(ts: &va.va_access_time);
547 VATTR_SET_ACTIVE(&va, va_access_time);
548 vnode_setattr(vp, vap: &va, ctx);
549 }
550
551 /*
552 * XXX hack to handle use of /dev/zero to map anon memory (ala
553 * SunOS).
554 */
555 if (vp->v_type == VCHR || vp->v_type == VSTR) {
556 (void)vnode_put(vp);
557 error = ENODEV;
558 goto bad;
559 } else {
560 /*
561 * Ensure that file and memory protections are
562 * compatible. Note that we only worry about
563 * writability if mapping is shared; in this case,
564 * current and max prot are dictated by the open file.
565 * XXX use the vnode instead? Problem is: what
566 * credentials do we use for determination? What if
567 * proc does a setuid?
568 */
569 maxprot = VM_PROT_EXECUTE; /* TODO: Remove this and restrict maxprot? */
570 if (fp->fp_glob->fg_flag & FREAD) {
571 maxprot |= VM_PROT_READ;
572 } else if (prot & PROT_READ) {
573 (void)vnode_put(vp);
574 error = EACCES;
575 goto bad;
576 }
577 /*
578 * If we are sharing potential changes (either via
579 * MAP_SHARED or via the implicit sharing of character
580 * device mappings), and we are trying to get write
581 * permission although we opened it without asking
582 * for it, bail out.
583 */
584
585 if ((flags & MAP_SHARED) != 0) {
586 if ((fp->fp_glob->fg_flag & FWRITE) != 0 &&
587 /*
588 * Do not allow writable mappings of
589 * swap files (see vm_swapfile_pager.c).
590 */
591 !vnode_isswap(vp)) {
592 /*
593 * check for write access
594 *
595 * Note that we already made this check when granting FWRITE
596 * against the file, so it seems redundant here.
597 */
598 error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
599
600 /* if not granted for any reason, but we wanted it, bad */
601 if ((prot & PROT_WRITE) && (error != 0)) {
602 vnode_put(vp);
603 goto bad;
604 }
605
606 /* if writable, remember */
607 if (error == 0) {
608 maxprot |= VM_PROT_WRITE;
609 }
610 } else if ((prot & PROT_WRITE) != 0) {
611 (void)vnode_put(vp);
612 error = EACCES;
613 goto bad;
614 }
615 } else {
616 maxprot |= VM_PROT_WRITE;
617 }
618
619 handle = (void *)vp;
620#if CONFIG_MACF
621 error = mac_file_check_mmap(cred: vfs_context_ucred(ctx),
622 fg: fp->fp_glob, prot, flags, file_pos: file_pos + pageoff,
623 maxprot: &maxprot);
624 if (error) {
625 (void)vnode_put(vp);
626 goto bad;
627 }
628#endif /* MAC */
629 /*
630 * Consult the file system to determine if this
631 * particular file object can be mapped.
632 *
633 * N.B. If MAP_PRIVATE (i.e. CoW) has been specified,
634 * then we don't check for writeability on the file
635 * object, because it will only ever see reads.
636 */
637 error = VNOP_MMAP_CHECK(vp, (flags & MAP_PRIVATE) ?
638 (prot & ~PROT_WRITE) : prot, ctx);
639 if (error) {
640 (void)vnode_put(vp);
641 goto bad;
642 }
643 }
644
645 /*
646 * No copy-on-read for mmap() mappings themselves.
647 */
648 vmk_flags.vmkf_no_copy_on_read = 1;
649#if CONFIG_MAP_RANGES && !XNU_PLATFORM_MacOSX
650 /* force file ranges on !macOS */
651 vmk_flags.vmkf_range_id = UMEM_RANGE_ID_HEAP;
652#endif /* CONFIG_MAP_RANGES && !XNU_PLATFORM_MacOSX */
653 }
654
655 if (user_size == 0) {
656 if (!mapanon) {
657 (void)vnode_put(vp);
658 }
659 error = 0;
660 goto bad;
661 }
662
663 /*
664 * We bend a little - round the start and end addresses
665 * to the nearest page boundary.
666 */
667 user_size = vm_map_round_page(user_size,
668 vm_map_page_mask(user_map));
669
670 if (file_pos & vm_map_page_mask(map: user_map)) {
671 if (!mapanon) {
672 (void)vnode_put(vp);
673 }
674 error = EINVAL;
675 goto bad;
676 }
677
678 if ((flags & MAP_FIXED) == 0) {
679 user_addr = vm_map_round_page(user_addr,
680 vm_map_page_mask(user_map));
681 } else {
682 if (user_addr != vm_map_trunc_page(user_addr,
683 vm_map_page_mask(user_map))) {
684 if (!mapanon) {
685 (void)vnode_put(vp);
686 }
687 error = EINVAL;
688 goto bad;
689 }
690 /*
691 * mmap(MAP_FIXED) will replace any existing mappings in the
692 * specified range, if the new mapping is successful.
693 * If we just deallocate the specified address range here,
694 * another thread might jump in and allocate memory in that
695 * range before we get a chance to establish the new mapping,
696 * and we won't have a chance to restore the old mappings.
697 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
698 * has to deallocate the existing mappings and establish the
699 * new ones atomically.
700 */
701 vmk_flags.vmf_fixed = true;
702 vmk_flags.vmf_overwrite = true;
703 }
704
705 if (flags & MAP_NOCACHE) {
706 vmk_flags.vmf_no_cache = true;
707 }
708
709 if (flags & MAP_JIT) {
710 vmk_flags.vmkf_map_jit = TRUE;
711 }
712
713 if (flags & MAP_TPRO) {
714 vmk_flags.vmf_tpro = true;
715 }
716
717#if CONFIG_ROSETTA
718 if (flags & MAP_TRANSLATED_ALLOW_EXECUTE) {
719 if (!proc_is_translated(p)) {
720 if (!mapanon) {
721 (void)vnode_put(vp);
722 }
723 error = EINVAL;
724 goto bad;
725 }
726 vmk_flags.vmkf_translated_allow_execute = TRUE;
727 }
728#endif
729
730 if (flags & MAP_RESILIENT_CODESIGN) {
731 vmk_flags.vmf_resilient_codesign = true;
732 }
733 if (flags & MAP_RESILIENT_MEDIA) {
734 vmk_flags.vmf_resilient_media = true;
735 }
736
737#if XNU_TARGET_OS_OSX
738 /* macOS-specific MAP_32BIT flag handling */
739 if (flags & MAP_32BIT) {
740 vmk_flags.vmkf_32bit_map_va = TRUE;
741 }
742#endif
743
744 /*
745 * Lookup/allocate object.
746 */
747 if (handle == NULL) {
748 control = NULL;
749#ifdef notyet
750/* Hmm .. */
751#if defined(VM_PROT_READ_IS_EXEC)
752 if (prot & VM_PROT_READ) {
753 prot |= VM_PROT_EXECUTE;
754 }
755 if (maxprot & VM_PROT_READ) {
756 maxprot |= VM_PROT_EXECUTE;
757 }
758#endif
759#endif
760
761#if 3777787
762 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
763 prot |= VM_PROT_READ;
764 }
765 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
766 maxprot |= VM_PROT_READ;
767 }
768#endif /* radar 3777787 */
769map_anon_retry:
770
771 result = vm_map_enter_mem_object(map: user_map,
772 address: &user_addr, size: user_size,
773 mask: 0, vmk_flags,
774 IPC_PORT_NULL, offset: 0, FALSE,
775 cur_protection: prot, max_protection: maxprot,
776 inheritance: (flags & MAP_SHARED) ?
777 VM_INHERIT_SHARE :
778 VM_INHERIT_DEFAULT);
779
780 /* If a non-binding address was specified for this anonymous
781 * mapping, retry the mapping with a zero base
782 * in the event the mapping operation failed due to
783 * lack of space between the address and the map's maximum.
784 */
785 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
786 user_addr = vm_map_page_size(map: user_map);
787 goto map_anon_retry;
788 }
789 } else {
790 if (vnode_isswap(vp)) {
791 /*
792 * Map swap files with a special pager
793 * that returns obfuscated contents.
794 */
795 control = NULL;
796 pager = swapfile_pager_setup(vp);
797 if (pager != MEMORY_OBJECT_NULL) {
798 control = swapfile_pager_control(mem_obj: pager);
799 }
800 } else {
801 control = ubc_getobject(vp, UBC_FLAGS_NONE);
802 }
803
804 if (control == NULL) {
805 (void)vnode_put(vp);
806 error = ENOMEM;
807 goto bad;
808 }
809
810#if FBDP_DEBUG_OBJECT_NO_PAGER
811//#define FBDP_PATH_NAME1 "/private/var/db/timezone/tz/2022a.1.1/icutz/"
812#define FBDP_PATH_NAME1 "/private/var/db/timezone/tz/202"
813#define FBDP_FILE_NAME1 "icutz44l.dat"
814#define FBDP_PATH_NAME2 "/private/var/mobile/Containers/Data/InternalDaemon/"
815#define FBDP_FILE_NAME_START2 "com.apple.LaunchServices-"
816#define FBDP_FILE_NAME_END2 "-v2.csstore"
817 if (!strncmp(vp->v_name, FBDP_FILE_NAME1, strlen(FBDP_FILE_NAME1))) {
818 char *path;
819 int len;
820 bool already_tracked;
821 len = MAXPATHLEN;
822 path = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_NOFAIL);
823 vn_getpath(vp, path, &len);
824 if (!strncmp(path, FBDP_PATH_NAME1, strlen(FBDP_PATH_NAME1))) {
825 if (memory_object_mark_as_tracked(control,
826 true,
827 &already_tracked) == KERN_SUCCESS &&
828 !already_tracked) {
829 printf("FBDP %s:%d marked vp %p \"%s\" moc %p as tracked\n", __FUNCTION__, __LINE__, vp, path, control);
830 }
831 }
832 zfree(ZV_NAMEI, path);
833 } else if (!strncmp(vp->v_name, FBDP_FILE_NAME_START2, strlen(FBDP_FILE_NAME_START2)) &&
834 strlen(vp->v_name) > strlen(FBDP_FILE_NAME_START2) + strlen(FBDP_FILE_NAME_END2) &&
835 !strncmp(vp->v_name + strlen(vp->v_name) - strlen(FBDP_FILE_NAME_END2),
836 FBDP_FILE_NAME_END2,
837 strlen(FBDP_FILE_NAME_END2))) {
838 char *path;
839 int len;
840 bool already_tracked;
841 len = MAXPATHLEN;
842 path = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_NOFAIL);
843 vn_getpath(vp, path, &len);
844 if (!strncmp(path, FBDP_PATH_NAME2, strlen(FBDP_PATH_NAME2))) {
845 if (memory_object_mark_as_tracked(control,
846 true,
847 &already_tracked) == KERN_SUCCESS &&
848 !already_tracked) {
849 printf("FBDP %s:%d marked vp %p \"%s\" moc %p as tracked\n", __FUNCTION__, __LINE__, vp, path, control);
850 }
851 }
852 zfree(ZV_NAMEI, path);
853 }
854#endif /* FBDP_DEBUG_OBJECT_NO_PAGER */
855
856 /*
857 * Set credentials:
858 * FIXME: if we're writing the file we need a way to
859 * ensure that someone doesn't replace our R/W creds
860 * with ones that only work for read.
861 */
862
863 ubc_setthreadcred(vp, p, current_thread());
864 docow = FALSE;
865 if ((flags & (MAP_ANON | MAP_SHARED)) == 0) {
866 docow = TRUE;
867 }
868
869#ifdef notyet
870/* Hmm .. */
871#if defined(VM_PROT_READ_IS_EXEC)
872 if (prot & VM_PROT_READ) {
873 prot |= VM_PROT_EXECUTE;
874 }
875 if (maxprot & VM_PROT_READ) {
876 maxprot |= VM_PROT_EXECUTE;
877 }
878#endif
879#endif /* notyet */
880
881#if 3777787
882 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
883 prot |= VM_PROT_READ;
884 }
885 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
886 maxprot |= VM_PROT_READ;
887 }
888#endif /* radar 3777787 */
889
890map_file_retry:
891 if (flags & MAP_RESILIENT_CODESIGN) {
892 int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
893 if (prot & reject_prot) {
894 /*
895 * Would like to use (prot | maxprot) here
896 * but the assignment of VM_PROT_EXECUTE
897 * to maxprot above would always fail the test.
898 *
899 * Skipping the check is ok, however, because we
900 * restrict maxprot to prot just below in this
901 * block.
902 */
903 assert(!mapanon);
904 vnode_put(vp);
905 error = EPERM;
906 goto bad;
907 }
908 /* strictly limit access to "prot" */
909 maxprot &= prot;
910 }
911
912 vm_object_offset_t end_pos = 0;
913 if (os_add_overflow(user_size, file_pos, &end_pos)) {
914 vnode_put(vp);
915 error = EINVAL;
916 goto bad;
917 }
918
919 result = vm_map_enter_mem_object_control(map: user_map,
920 address: &user_addr, size: user_size,
921 mask: 0, vmk_flags,
922 control, offset: file_pos,
923 needs_copy: docow, cur_protection: prot, max_protection: maxprot,
924 inheritance: (flags & MAP_SHARED) ?
925 VM_INHERIT_SHARE :
926 VM_INHERIT_DEFAULT);
927
928 /* If a non-binding address was specified for this file backed
929 * mapping, retry the mapping with a zero base
930 * in the event the mapping operation failed due to
931 * lack of space between the address and the map's maximum.
932 */
933 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
934 user_addr = vm_map_page_size(map: user_map);
935 goto map_file_retry;
936 }
937 }
938
939 if (!mapanon) {
940 (void)vnode_put(vp);
941 }
942
943 switch (result) {
944 case KERN_SUCCESS:
945 *retval = user_addr + pageoff;
946 error = 0;
947 break;
948 case KERN_INVALID_ADDRESS:
949 case KERN_NO_SPACE:
950 error = ENOMEM;
951 break;
952 case KERN_PROTECTION_FAILURE:
953 error = EACCES;
954 break;
955 default:
956 error = EINVAL;
957 break;
958 }
959bad:
960 if (pager != MEMORY_OBJECT_NULL) {
961 /*
962 * Release the reference on the pager.
963 * If the mapping was successful, it now holds
964 * an extra reference.
965 */
966 memory_object_deallocate(object: pager);
967 }
968 if (fpref) {
969 fp_drop(p, fd, fp, locked: 0);
970 }
971
972 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
973#if XNU_TARGET_OS_OSX
974 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
975 (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
976#endif /* XNU_TARGET_OS_OSX */
977 return error;
978}
979
980int
981msync(__unused proc_t p, struct msync_args *uap, int32_t *retval)
982{
983 __pthread_testcancel(presyscall: 1);
984 return msync_nocancel(p, (struct msync_nocancel_args *)uap, retval);
985}
986
987int
988msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval)
989{
990 mach_vm_offset_t addr;
991 mach_vm_size_t size;
992 int flags;
993 vm_map_t user_map;
994 int rv;
995 vm_sync_t sync_flags = 0;
996
997 user_map = current_map();
998 addr = (mach_vm_offset_t) uap->addr;
999 size = (mach_vm_size_t) uap->len;
1000#if XNU_TARGET_OS_OSX
1001 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
1002#endif /* XNU_TARGET_OS_OSX */
1003 if (vm_map_range_overflows(map: user_map, addr, size)) {
1004 return EINVAL;
1005 }
1006 if (addr & vm_map_page_mask(map: user_map)) {
1007 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1008 return EINVAL;
1009 }
1010 if (size == 0) {
1011 /*
1012 * We cannot support this properly without maintaining
1013 * list all mmaps done. Cannot use vm_map_entry as they could be
1014 * split or coalesced by indepenedant actions. So instead of
1015 * inaccurate results, lets just return error as invalid size
1016 * specified
1017 */
1018 return EINVAL; /* XXX breaks posix apps */
1019 }
1020
1021 flags = uap->flags;
1022 /* disallow contradictory flags */
1023 if ((flags & (MS_SYNC | MS_ASYNC)) == (MS_SYNC | MS_ASYNC)) {
1024 return EINVAL;
1025 }
1026
1027 if (flags & MS_KILLPAGES) {
1028 sync_flags |= VM_SYNC_KILLPAGES;
1029 }
1030 if (flags & MS_DEACTIVATE) {
1031 sync_flags |= VM_SYNC_DEACTIVATE;
1032 }
1033 if (flags & MS_INVALIDATE) {
1034 sync_flags |= VM_SYNC_INVALIDATE;
1035 }
1036
1037 if (!(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
1038 if (flags & MS_ASYNC) {
1039 sync_flags |= VM_SYNC_ASYNCHRONOUS;
1040 } else {
1041 sync_flags |= VM_SYNC_SYNCHRONOUS;
1042 }
1043 }
1044
1045 sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */
1046
1047 rv = mach_vm_msync(target_task: user_map, address: addr, size, sync_flags);
1048
1049 switch (rv) {
1050 case KERN_SUCCESS:
1051 break;
1052 case KERN_INVALID_ADDRESS: /* hole in region being sync'ed */
1053 return ENOMEM;
1054 case KERN_FAILURE:
1055 return EIO;
1056 default:
1057 return EINVAL;
1058 }
1059 return 0;
1060}
1061
1062
1063int
1064munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval)
1065{
1066 mach_vm_offset_t user_addr;
1067 mach_vm_size_t user_size;
1068 kern_return_t result;
1069 vm_map_t user_map;
1070
1071 user_map = current_map();
1072 user_addr = (mach_vm_offset_t) uap->addr;
1073 user_size = (mach_vm_size_t) uap->len;
1074
1075 AUDIT_ARG(addr, user_addr);
1076 AUDIT_ARG(len, user_size);
1077
1078 if (user_addr & vm_map_page_mask(map: user_map)) {
1079 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1080 return EINVAL;
1081 }
1082
1083 if (vm_map_range_overflows(map: user_map, addr: user_addr, size: user_size)) {
1084 return EINVAL;
1085 }
1086
1087 if (user_size == 0) {
1088 /* UNIX SPEC: size is 0, return EINVAL */
1089 return EINVAL;
1090 }
1091
1092 result = mach_vm_deallocate(target: user_map, address: user_addr, size: user_size);
1093 if (result != KERN_SUCCESS) {
1094 return EINVAL;
1095 }
1096 return 0;
1097}
1098
1099int
1100mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
1101{
1102 vm_prot_t prot;
1103 mach_vm_offset_t user_addr;
1104 mach_vm_size_t user_size;
1105 kern_return_t result;
1106 vm_map_t user_map;
1107#if CONFIG_MACF
1108 int error;
1109#endif
1110
1111 AUDIT_ARG(addr, uap->addr);
1112 AUDIT_ARG(len, uap->len);
1113 AUDIT_ARG(value32, uap->prot);
1114
1115 user_map = current_map();
1116 user_addr = (mach_vm_offset_t) uap->addr;
1117 user_size = (mach_vm_size_t) uap->len;
1118 prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED | VM_PROT_STRIP_READ));
1119
1120 if (vm_map_range_overflows(map: user_map, addr: user_addr, size: user_size)) {
1121 return EINVAL;
1122 }
1123 if (user_addr & vm_map_page_mask(map: user_map)) {
1124 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1125 return EINVAL;
1126 }
1127
1128#ifdef notyet
1129/* Hmm .. */
1130#if defined(VM_PROT_READ_IS_EXEC)
1131 if (prot & VM_PROT_READ) {
1132 prot |= VM_PROT_EXECUTE;
1133 }
1134#endif
1135#endif /* notyet */
1136
1137#if 3936456
1138 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
1139 prot |= VM_PROT_READ;
1140 }
1141#endif /* 3936456 */
1142
1143#if CONFIG_MACF
1144 /*
1145 * The MAC check for mprotect is of limited use for 2 reasons:
1146 * Without mmap revocation, the caller could have asked for the max
1147 * protections initially instead of a reduced set, so a mprotect
1148 * check would offer no new security.
1149 * It is not possible to extract the vnode from the pager object(s)
1150 * of the target memory range.
1151 * However, the MAC check may be used to prevent a process from,
1152 * e.g., making the stack executable.
1153 */
1154 error = mac_proc_check_mprotect(proc: p, addr: user_addr,
1155 size: user_size, prot);
1156 if (error) {
1157 return error;
1158 }
1159#endif
1160
1161 if (prot & VM_PROT_TRUSTED) {
1162#if CONFIG_DYNAMIC_CODE_SIGNING
1163 /* CODE SIGNING ENFORCEMENT - JIT support */
1164 /* The special protection value VM_PROT_TRUSTED requests that we treat
1165 * this page as if it had a valid code signature.
1166 * If this is enabled, there MUST be a MAC policy implementing the
1167 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
1168 * compromised because the check would always succeed and thusly any
1169 * process could sign dynamically. */
1170 result = vm_map_sign(
1171 user_map,
1172 vm_map_trunc_page(user_addr,
1173 vm_map_page_mask(user_map)),
1174 vm_map_round_page(user_addr + user_size,
1175 vm_map_page_mask(user_map)));
1176 switch (result) {
1177 case KERN_SUCCESS:
1178 break;
1179 case KERN_INVALID_ADDRESS:
1180 /* UNIX SPEC: for an invalid address range, return ENOMEM */
1181 return ENOMEM;
1182 default:
1183 return EINVAL;
1184 }
1185#else
1186 return ENOTSUP;
1187#endif
1188 }
1189 prot &= ~VM_PROT_TRUSTED;
1190
1191 result = mach_vm_protect(target_task: user_map, address: user_addr, size: user_size,
1192 FALSE, new_protection: prot);
1193 switch (result) {
1194 case KERN_SUCCESS:
1195 return 0;
1196 case KERN_PROTECTION_FAILURE:
1197 return EACCES;
1198 case KERN_INVALID_ADDRESS:
1199 /* UNIX SPEC: for an invalid address range, return ENOMEM */
1200 return ENOMEM;
1201 }
1202 return EINVAL;
1203}
1204
1205
1206int
1207minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval)
1208{
1209 mach_vm_offset_t addr;
1210 mach_vm_size_t size;
1211 vm_inherit_t inherit;
1212 vm_map_t user_map;
1213 kern_return_t result;
1214
1215 AUDIT_ARG(addr, uap->addr);
1216 AUDIT_ARG(len, uap->len);
1217 AUDIT_ARG(value32, uap->inherit);
1218
1219 user_map = current_map();
1220 addr = (mach_vm_offset_t)uap->addr;
1221 size = (mach_vm_size_t)uap->len;
1222 inherit = uap->inherit;
1223 if (vm_map_range_overflows(map: user_map, addr, size)) {
1224 return EINVAL;
1225 }
1226 result = mach_vm_inherit(target_task: user_map, address: addr, size,
1227 new_inheritance: inherit);
1228 switch (result) {
1229 case KERN_SUCCESS:
1230 return 0;
1231 case KERN_PROTECTION_FAILURE:
1232 return EACCES;
1233 }
1234 return EINVAL;
1235}
1236
1237int
1238madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
1239{
1240 vm_map_t user_map;
1241 mach_vm_offset_t start;
1242 mach_vm_size_t size;
1243 vm_behavior_t new_behavior;
1244 kern_return_t result;
1245
1246 /*
1247 * Since this routine is only advisory, we default to conservative
1248 * behavior.
1249 */
1250 switch (uap->behav) {
1251 case MADV_RANDOM:
1252 new_behavior = VM_BEHAVIOR_RANDOM;
1253 break;
1254 case MADV_SEQUENTIAL:
1255 new_behavior = VM_BEHAVIOR_SEQUENTIAL;
1256 break;
1257 case MADV_NORMAL:
1258 new_behavior = VM_BEHAVIOR_DEFAULT;
1259 break;
1260 case MADV_WILLNEED:
1261 new_behavior = VM_BEHAVIOR_WILLNEED;
1262 break;
1263 case MADV_DONTNEED:
1264 new_behavior = VM_BEHAVIOR_DONTNEED;
1265 break;
1266 case MADV_FREE:
1267 new_behavior = VM_BEHAVIOR_FREE;
1268 break;
1269 case MADV_ZERO_WIRED_PAGES:
1270 new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES;
1271 break;
1272 case MADV_FREE_REUSABLE:
1273 new_behavior = VM_BEHAVIOR_REUSABLE;
1274 break;
1275 case MADV_FREE_REUSE:
1276 new_behavior = VM_BEHAVIOR_REUSE;
1277 break;
1278 case MADV_CAN_REUSE:
1279 new_behavior = VM_BEHAVIOR_CAN_REUSE;
1280 break;
1281 case MADV_PAGEOUT:
1282#if MACH_ASSERT
1283 new_behavior = VM_BEHAVIOR_PAGEOUT;
1284 break;
1285#else /* MACH_ASSERT */
1286 return ENOTSUP;
1287#endif /* MACH_ASSERT */
1288 case MADV_ZERO:
1289 new_behavior = VM_BEHAVIOR_ZERO;
1290 break;
1291 default:
1292 return EINVAL;
1293 }
1294
1295 user_map = current_map();
1296 start = (mach_vm_offset_t) uap->addr;
1297 size = (mach_vm_size_t) uap->len;
1298 if (vm_map_range_overflows(map: user_map, addr: start, size)) {
1299 return EINVAL;
1300 }
1301#if __arm64__
1302 if (start == 0 &&
1303 size != 0 &&
1304 (uap->behav == MADV_FREE ||
1305 uap->behav == MADV_FREE_REUSABLE)) {
1306 printf("** FOURK_COMPAT: %d[%s] "
1307 "failing madvise(0x%llx,0x%llx,%s)\n",
1308 proc_getpid(p), p->p_comm, start, size,
1309 ((uap->behav == MADV_FREE_REUSABLE)
1310 ? "MADV_FREE_REUSABLE"
1311 : "MADV_FREE"));
1312 DTRACE_VM3(fourk_compat_madvise,
1313 uint64_t, start,
1314 uint64_t, size,
1315 int, uap->behav);
1316 return EINVAL;
1317 }
1318#endif /* __arm64__ */
1319
1320 result = mach_vm_behavior_set(target_task: user_map, address: start, size, new_behavior);
1321 switch (result) {
1322 case KERN_SUCCESS:
1323 return 0;
1324 case KERN_INVALID_ADDRESS:
1325 return EINVAL;
1326 case KERN_NO_SPACE:
1327 return ENOMEM;
1328 case KERN_PROTECTION_FAILURE:
1329 return EPERM;
1330 case KERN_NO_ACCESS:
1331 return ENOTSUP;
1332 }
1333
1334 return EINVAL;
1335}
1336
1337int
1338mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval)
1339{
1340 mach_vm_offset_t addr = 0, first_addr = 0, end = 0, cur_end = 0;
1341 vm_map_t map = VM_MAP_NULL;
1342 user_addr_t vec = 0;
1343 int error = 0;
1344 int64_t lastvecindex = 0;
1345 int mincoreinfo = 0;
1346 int pqueryinfo = 0;
1347 uint64_t pqueryinfo_vec_size = 0;
1348 vm_page_info_basic_t info = NULL;
1349 mach_msg_type_number_t count = 0;
1350 char *kernel_vec = NULL;
1351 uint64_t req_vec_size_pages = 0, cur_vec_size_pages = 0, vecindex = 0;
1352 kern_return_t kr = KERN_SUCCESS;
1353 int effective_page_shift, effective_page_size;
1354
1355 map = current_map();
1356
1357 /*
1358 * On systems with 4k kernel space and 16k user space, we will
1359 * use the kernel page size to report back the residency information.
1360 * This is for backwards compatibility since we already have
1361 * processes that depend on this behavior.
1362 */
1363 if (vm_map_page_shift(map) < PAGE_SHIFT) {
1364 effective_page_shift = vm_map_page_shift(map);
1365 effective_page_size = vm_map_page_size(map);
1366 } else {
1367 effective_page_shift = PAGE_SHIFT;
1368 effective_page_size = PAGE_SIZE;
1369 }
1370
1371 /*
1372 * Make sure that the addresses presented are valid for user
1373 * mode.
1374 */
1375 first_addr = addr = vm_map_trunc_page(uap->addr,
1376 vm_map_page_mask(map));
1377 end = vm_map_round_page(uap->addr + uap->len,
1378 vm_map_page_mask(map));
1379
1380 if (end < addr) {
1381 return EINVAL;
1382 }
1383
1384 if (end == addr) {
1385 return 0;
1386 }
1387
1388 /*
1389 * We are going to loop through the whole 'req_vec_size' pages
1390 * range in chunks of 'cur_vec_size'.
1391 */
1392
1393 req_vec_size_pages = (end - addr) >> effective_page_shift;
1394 cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1395 size_t kernel_vec_size = cur_vec_size_pages;
1396
1397 kernel_vec = (char *)kalloc_data(kernel_vec_size, Z_WAITOK | Z_ZERO);
1398
1399 if (kernel_vec == NULL) {
1400 return ENOMEM;
1401 }
1402
1403 /*
1404 * Address of byte vector
1405 */
1406 vec = uap->vec;
1407
1408 pqueryinfo_vec_size = cur_vec_size_pages * sizeof(struct vm_page_info_basic);
1409
1410 info = (struct vm_page_info_basic *)kalloc_data(pqueryinfo_vec_size, Z_WAITOK);
1411
1412 if (info == NULL) {
1413 kfree_data(kernel_vec, kernel_vec_size);
1414 return ENOMEM;
1415 }
1416
1417 while (addr < end) {
1418 cur_end = addr + (cur_vec_size_pages * effective_page_size);
1419
1420 count = VM_PAGE_INFO_BASIC_COUNT;
1421 kr = vm_map_page_range_info_internal(map,
1422 start_offset: addr,
1423 end_offset: cur_end,
1424 effective_page_shift,
1425 VM_PAGE_INFO_BASIC,
1426 info: (vm_page_info_t) info,
1427 count: &count);
1428
1429 assert(kr == KERN_SUCCESS);
1430
1431 /*
1432 * Do this on a map entry basis so that if the pages are not
1433 * in the current processes address space, we can easily look
1434 * up the pages elsewhere.
1435 */
1436 lastvecindex = -1;
1437
1438 for (; addr < cur_end; addr += effective_page_size) {
1439 pqueryinfo = info[lastvecindex + 1].disposition;
1440
1441 mincoreinfo = 0;
1442
1443 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT) {
1444 mincoreinfo |= MINCORE_INCORE;
1445 }
1446 if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF) {
1447 mincoreinfo |= MINCORE_REFERENCED;
1448 }
1449 if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY) {
1450 mincoreinfo |= MINCORE_MODIFIED;
1451 }
1452 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PAGED_OUT) {
1453 mincoreinfo |= MINCORE_PAGED_OUT;
1454 }
1455 if (pqueryinfo & VM_PAGE_QUERY_PAGE_COPIED) {
1456 mincoreinfo |= MINCORE_COPIED;
1457 }
1458 if ((pqueryinfo & VM_PAGE_QUERY_PAGE_EXTERNAL) == 0) {
1459 mincoreinfo |= MINCORE_ANONYMOUS;
1460 }
1461 /*
1462 * calculate index into user supplied byte vector
1463 */
1464 vecindex = (addr - first_addr) >> effective_page_shift;
1465 kernel_vec[vecindex] = (char)mincoreinfo;
1466 lastvecindex = vecindex;
1467 }
1468
1469
1470 assert(vecindex == (cur_vec_size_pages - 1));
1471
1472 error = copyout(kernel_vec, vec, cur_vec_size_pages * sizeof(char) /* a char per page */);
1473
1474 if (error) {
1475 break;
1476 }
1477
1478 /*
1479 * For the next chunk, we'll need:
1480 * - bump the location in the user buffer for our next disposition.
1481 * - new length
1482 * - starting address
1483 */
1484 vec += cur_vec_size_pages * sizeof(char);
1485 req_vec_size_pages = (end - addr) >> effective_page_shift;
1486 cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1487
1488 first_addr = addr;
1489 }
1490
1491 kfree_data(info, pqueryinfo_vec_size);
1492 kfree_data(kernel_vec, kernel_vec_size);
1493
1494 if (error) {
1495 return EFAULT;
1496 }
1497
1498 return 0;
1499}
1500
1501int
1502mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
1503{
1504 vm_map_t user_map;
1505 vm_map_offset_t addr;
1506 vm_map_size_t size, pageoff;
1507 kern_return_t result;
1508
1509 AUDIT_ARG(addr, uap->addr);
1510 AUDIT_ARG(len, uap->len);
1511
1512 user_map = current_map();
1513 addr = (vm_map_offset_t) uap->addr;
1514 size = (vm_map_size_t)uap->len;
1515
1516 if (vm_map_range_overflows(map: user_map, addr, size)) {
1517 return EINVAL;
1518 }
1519
1520 if (size == 0) {
1521 return 0;
1522 }
1523
1524 pageoff = (addr & vm_map_page_mask(map: user_map));
1525 addr -= pageoff;
1526 size = vm_map_round_page(size + pageoff, vm_map_page_mask(user_map));
1527
1528 /* have to call vm_map_wire directly to pass "I don't know" protections */
1529 result = vm_map_wire_kernel(map: user_map, start: addr, end: addr + size, VM_PROT_NONE, VM_KERN_MEMORY_MLOCK, TRUE);
1530
1531 if (result == KERN_RESOURCE_SHORTAGE) {
1532 return EAGAIN;
1533 } else if (result == KERN_PROTECTION_FAILURE) {
1534 return EACCES;
1535 } else if (result != KERN_SUCCESS) {
1536 return ENOMEM;
1537 }
1538
1539 return 0; /* KERN_SUCCESS */
1540}
1541
1542int
1543munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval)
1544{
1545 mach_vm_offset_t addr;
1546 mach_vm_size_t size;
1547 vm_map_t user_map;
1548 kern_return_t result;
1549
1550 AUDIT_ARG(addr, uap->addr);
1551 AUDIT_ARG(len, uap->len);
1552
1553 addr = (mach_vm_offset_t) uap->addr;
1554 size = (mach_vm_size_t)uap->len;
1555 user_map = current_map();
1556 if (vm_map_range_overflows(map: user_map, addr, size)) {
1557 return EINVAL;
1558 }
1559 /* JMM - need to remove all wirings by spec - this just removes one */
1560 result = mach_vm_wire_kernel(map: user_map, start: addr, size, VM_PROT_NONE, VM_KERN_MEMORY_MLOCK);
1561 return result == KERN_SUCCESS ? 0 : ENOMEM;
1562}
1563
1564
1565int
1566mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval)
1567{
1568 return ENOSYS;
1569}
1570
1571int
1572munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval)
1573{
1574 return ENOSYS;
1575}
1576
1577#if CONFIG_CODE_DECRYPTION
1578int
1579mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __unused int32_t *retval)
1580{
1581 mach_vm_offset_t user_addr;
1582 mach_vm_size_t user_size;
1583 kern_return_t result;
1584 vm_map_t user_map;
1585 uint32_t cryptid;
1586 cpu_type_t cputype;
1587 cpu_subtype_t cpusubtype;
1588 pager_crypt_info_t crypt_info;
1589 const char * cryptname = 0;
1590 char *vpath;
1591 int len, ret;
1592 struct proc_regioninfo_internal pinfo;
1593 vnode_t vp;
1594 uintptr_t vnodeaddr;
1595 uint32_t vid;
1596
1597 AUDIT_ARG(addr, uap->addr);
1598 AUDIT_ARG(len, uap->len);
1599
1600 user_map = current_map();
1601 user_addr = (mach_vm_offset_t) uap->addr;
1602 user_size = (mach_vm_size_t) uap->len;
1603
1604 cryptid = uap->cryptid;
1605 cputype = uap->cputype;
1606 cpusubtype = uap->cpusubtype;
1607
1608 if (vm_map_range_overflows(map: user_map, addr: user_addr, size: user_size)) {
1609 return EINVAL;
1610 }
1611 if (user_addr & vm_map_page_mask(map: user_map)) {
1612 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1613 return EINVAL;
1614 }
1615
1616 switch (cryptid) {
1617 case CRYPTID_NO_ENCRYPTION:
1618 /* not encrypted, just an empty load command */
1619 return 0;
1620 case CRYPTID_APP_ENCRYPTION:
1621 case CRYPTID_MODEL_ENCRYPTION:
1622 cryptname = "com.apple.unfree";
1623 break;
1624 case 0x10:
1625 /* some random cryptid that you could manually put into
1626 * your binary if you want NULL */
1627 cryptname = "com.apple.null";
1628 break;
1629 default:
1630 return EINVAL;
1631 }
1632
1633 if (NULL == text_crypter_create) {
1634 return ENOTSUP;
1635 }
1636
1637 ret = fill_procregioninfo_onlymappedvnodes( t: proc_task(p), arg: user_addr, pinfo: &pinfo, vp: &vnodeaddr, vid: &vid);
1638 if (ret == 0 || !vnodeaddr) {
1639 /* No really, this returns 0 if the memory address is not backed by a file */
1640 return EINVAL;
1641 }
1642
1643 vp = (vnode_t)vnodeaddr;
1644 if ((vnode_getwithvid(vp, vid)) == 0) {
1645 vpath = zalloc(view: ZV_NAMEI);
1646
1647 len = MAXPATHLEN;
1648 ret = vn_getpath(vp, pathbuf: vpath, len: &len);
1649 if (ret) {
1650 zfree(ZV_NAMEI, vpath);
1651 vnode_put(vp);
1652 return ret;
1653 }
1654
1655 vnode_put(vp);
1656 } else {
1657 return EINVAL;
1658 }
1659
1660#if 0
1661 kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n",
1662 __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size);
1663#endif
1664
1665 if (user_size == 0) {
1666 printf("%s:%d '%s': user_addr 0x%llx user_size 0x%llx cryptid 0x%x ignored\n", __FUNCTION__, __LINE__, vpath, user_addr, user_size, cryptid);
1667 zfree(ZV_NAMEI, vpath);
1668 return 0;
1669 }
1670
1671 /* set up decrypter first */
1672 crypt_file_data_t crypt_data = {
1673 .filename = vpath,
1674 .cputype = cputype,
1675 .cpusubtype = cpusubtype,
1676 .origin = CRYPT_ORIGIN_LIBRARY_LOAD,
1677 };
1678 result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data);
1679#if VM_MAP_DEBUG_APPLE_PROTECT
1680 if (vm_map_debug_apple_protect) {
1681 printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s) -> 0x%x\n",
1682 proc_getpid(p), p->p_comm,
1683 user_map,
1684 (uint64_t) user_addr,
1685 (uint64_t) (user_addr + user_size),
1686 __FUNCTION__, vpath, result);
1687 }
1688#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1689 zfree(ZV_NAMEI, vpath);
1690
1691 if (result) {
1692 printf("%s: unable to create decrypter %s, kr=%d\n",
1693 __FUNCTION__, cryptname, result);
1694 if (result == kIOReturnNotPrivileged) {
1695 /* text encryption returned decryption failure */
1696 return EPERM;
1697 } else {
1698 return ENOMEM;
1699 }
1700 }
1701
1702 /* now remap using the decrypter */
1703 vm_object_offset_t crypto_backing_offset;
1704 crypto_backing_offset = -1; /* i.e. use map entry's offset */
1705 result = vm_map_apple_protected(map: user_map,
1706 start: user_addr,
1707 end: user_addr + user_size,
1708 crypto_backing_offset,
1709 crypt_info: &crypt_info,
1710 cryptid);
1711 if (result) {
1712 printf("%s: mapping failed with %d\n", __FUNCTION__, result);
1713 }
1714
1715 if (result) {
1716 return EPERM;
1717 }
1718 return 0;
1719}
1720#endif /* CONFIG_CODE_DECRYPTION */
1721