1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66#include <task_swapper.h>
67#include <mach_assert.h>
68
69#include <vm/vm_options.h>
70
71#include <libkern/OSAtomic.h>
72
73#include <mach/kern_return.h>
74#include <mach/port.h>
75#include <mach/vm_attributes.h>
76#include <mach/vm_param.h>
77#include <mach/vm_behavior.h>
78#include <mach/vm_statistics.h>
79#include <mach/memory_object.h>
80#include <mach/mach_vm.h>
81#include <machine/cpu_capabilities.h>
82#include <mach/sdt.h>
83
84#include <kern/assert.h>
85#include <kern/backtrace.h>
86#include <kern/counters.h>
87#include <kern/exc_guard.h>
88#include <kern/kalloc.h>
89#include <kern/zalloc.h>
90
91#include <vm/cpm.h>
92#include <vm/vm_compressor.h>
93#include <vm/vm_compressor_pager.h>
94#include <vm/vm_init.h>
95#include <vm/vm_fault.h>
96#include <vm/vm_map.h>
97#include <vm/vm_object.h>
98#include <vm/vm_page.h>
99#include <vm/vm_pageout.h>
100#include <vm/pmap.h>
101#include <vm/vm_kern.h>
102#include <ipc/ipc_port.h>
103#include <kern/sched_prim.h>
104#include <kern/misc_protos.h>
105#include <kern/xpr.h>
106
107#include <mach/vm_map_server.h>
108#include <mach/mach_host_server.h>
109#include <vm/vm_protos.h>
110#include <vm/vm_purgeable_internal.h>
111
112#include <vm/vm_protos.h>
113#include <vm/vm_shared_region.h>
114#include <vm/vm_map_store.h>
115
116#include <san/kasan.h>
117
118#include <sys/codesign.h>
119#include <libkern/section_keywords.h>
120#if DEVELOPMENT || DEBUG
121extern int proc_selfcsflags(void);
122#if CONFIG_EMBEDDED
123extern int panic_on_unsigned_execute;
124#endif /* CONFIG_EMBEDDED */
125#endif /* DEVELOPMENT || DEBUG */
126
127#if __arm64__
128extern const int fourk_binary_compatibility_unsafe;
129extern const int fourk_binary_compatibility_allow_wx;
130#endif /* __arm64__ */
131extern int proc_selfpid(void);
132extern char *proc_name_address(void *p);
133
134#if VM_MAP_DEBUG_APPLE_PROTECT
135int vm_map_debug_apple_protect = 0;
136#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
137#if VM_MAP_DEBUG_FOURK
138int vm_map_debug_fourk = 0;
139#endif /* VM_MAP_DEBUG_FOURK */
140
141SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
142int vm_map_executable_immutable_verbose = 0;
143
144extern u_int32_t random(void); /* from <libkern/libkern.h> */
145/* Internal prototypes
146 */
147
148static void vm_map_simplify_range(
149 vm_map_t map,
150 vm_map_offset_t start,
151 vm_map_offset_t end); /* forward */
152
153static boolean_t vm_map_range_check(
154 vm_map_t map,
155 vm_map_offset_t start,
156 vm_map_offset_t end,
157 vm_map_entry_t *entry);
158
159static vm_map_entry_t _vm_map_entry_create(
160 struct vm_map_header *map_header, boolean_t map_locked);
161
162static void _vm_map_entry_dispose(
163 struct vm_map_header *map_header,
164 vm_map_entry_t entry);
165
166static void vm_map_pmap_enter(
167 vm_map_t map,
168 vm_map_offset_t addr,
169 vm_map_offset_t end_addr,
170 vm_object_t object,
171 vm_object_offset_t offset,
172 vm_prot_t protection);
173
174static void _vm_map_clip_end(
175 struct vm_map_header *map_header,
176 vm_map_entry_t entry,
177 vm_map_offset_t end);
178
179static void _vm_map_clip_start(
180 struct vm_map_header *map_header,
181 vm_map_entry_t entry,
182 vm_map_offset_t start);
183
184static void vm_map_entry_delete(
185 vm_map_t map,
186 vm_map_entry_t entry);
187
188static kern_return_t vm_map_delete(
189 vm_map_t map,
190 vm_map_offset_t start,
191 vm_map_offset_t end,
192 int flags,
193 vm_map_t zap_map);
194
195static void vm_map_copy_insert(
196 vm_map_t map,
197 vm_map_entry_t after_where,
198 vm_map_copy_t copy);
199
200static kern_return_t vm_map_copy_overwrite_unaligned(
201 vm_map_t dst_map,
202 vm_map_entry_t entry,
203 vm_map_copy_t copy,
204 vm_map_address_t start,
205 boolean_t discard_on_success);
206
207static kern_return_t vm_map_copy_overwrite_aligned(
208 vm_map_t dst_map,
209 vm_map_entry_t tmp_entry,
210 vm_map_copy_t copy,
211 vm_map_offset_t start,
212 pmap_t pmap);
213
214static kern_return_t vm_map_copyin_kernel_buffer(
215 vm_map_t src_map,
216 vm_map_address_t src_addr,
217 vm_map_size_t len,
218 boolean_t src_destroy,
219 vm_map_copy_t *copy_result); /* OUT */
220
221static kern_return_t vm_map_copyout_kernel_buffer(
222 vm_map_t map,
223 vm_map_address_t *addr, /* IN/OUT */
224 vm_map_copy_t copy,
225 vm_map_size_t copy_size,
226 boolean_t overwrite,
227 boolean_t consume_on_success);
228
229static void vm_map_fork_share(
230 vm_map_t old_map,
231 vm_map_entry_t old_entry,
232 vm_map_t new_map);
233
234static boolean_t vm_map_fork_copy(
235 vm_map_t old_map,
236 vm_map_entry_t *old_entry_p,
237 vm_map_t new_map,
238 int vm_map_copyin_flags);
239
240static kern_return_t vm_map_wire_nested(
241 vm_map_t map,
242 vm_map_offset_t start,
243 vm_map_offset_t end,
244 vm_prot_t caller_prot,
245 vm_tag_t tag,
246 boolean_t user_wire,
247 pmap_t map_pmap,
248 vm_map_offset_t pmap_addr,
249 ppnum_t *physpage_p);
250
251static kern_return_t vm_map_unwire_nested(
252 vm_map_t map,
253 vm_map_offset_t start,
254 vm_map_offset_t end,
255 boolean_t user_wire,
256 pmap_t map_pmap,
257 vm_map_offset_t pmap_addr);
258
259static kern_return_t vm_map_overwrite_submap_recurse(
260 vm_map_t dst_map,
261 vm_map_offset_t dst_addr,
262 vm_map_size_t dst_size);
263
264static kern_return_t vm_map_copy_overwrite_nested(
265 vm_map_t dst_map,
266 vm_map_offset_t dst_addr,
267 vm_map_copy_t copy,
268 boolean_t interruptible,
269 pmap_t pmap,
270 boolean_t discard_on_success);
271
272static kern_return_t vm_map_remap_extract(
273 vm_map_t map,
274 vm_map_offset_t addr,
275 vm_map_size_t size,
276 boolean_t copy,
277 struct vm_map_header *map_header,
278 vm_prot_t *cur_protection,
279 vm_prot_t *max_protection,
280 vm_inherit_t inheritance,
281 boolean_t pageable,
282 boolean_t same_map,
283 vm_map_kernel_flags_t vmk_flags);
284
285static kern_return_t vm_map_remap_range_allocate(
286 vm_map_t map,
287 vm_map_address_t *address,
288 vm_map_size_t size,
289 vm_map_offset_t mask,
290 int flags,
291 vm_map_kernel_flags_t vmk_flags,
292 vm_tag_t tag,
293 vm_map_entry_t *map_entry);
294
295static void vm_map_region_look_for_page(
296 vm_map_t map,
297 vm_map_offset_t va,
298 vm_object_t object,
299 vm_object_offset_t offset,
300 int max_refcnt,
301 int depth,
302 vm_region_extended_info_t extended,
303 mach_msg_type_number_t count);
304
305static int vm_map_region_count_obj_refs(
306 vm_map_entry_t entry,
307 vm_object_t object);
308
309
310static kern_return_t vm_map_willneed(
311 vm_map_t map,
312 vm_map_offset_t start,
313 vm_map_offset_t end);
314
315static kern_return_t vm_map_reuse_pages(
316 vm_map_t map,
317 vm_map_offset_t start,
318 vm_map_offset_t end);
319
320static kern_return_t vm_map_reusable_pages(
321 vm_map_t map,
322 vm_map_offset_t start,
323 vm_map_offset_t end);
324
325static kern_return_t vm_map_can_reuse(
326 vm_map_t map,
327 vm_map_offset_t start,
328 vm_map_offset_t end);
329
330#if MACH_ASSERT
331static kern_return_t vm_map_pageout(
332 vm_map_t map,
333 vm_map_offset_t start,
334 vm_map_offset_t end);
335#endif /* MACH_ASSERT */
336
337static void vm_map_corpse_footprint_destroy(
338 vm_map_t map);
339
340pid_t find_largest_process_vm_map_entries(void);
341
342/*
343 * Macros to copy a vm_map_entry. We must be careful to correctly
344 * manage the wired page count. vm_map_entry_copy() creates a new
345 * map entry to the same memory - the wired count in the new entry
346 * must be set to zero. vm_map_entry_copy_full() creates a new
347 * entry that is identical to the old entry. This preserves the
348 * wire count; it's used for map splitting and zone changing in
349 * vm_map_copyout.
350 */
351
352#if CONFIG_EMBEDDED
353
354/*
355 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
356 * But for security reasons on embedded platforms, we don't want the
357 * new mapping to be "used for jit", so we always reset the flag here.
358 * Same for "pmap_cs_associated".
359 */
360#define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW,OLD) \
361MACRO_BEGIN \
362 (NEW)->used_for_jit = FALSE; \
363 (NEW)->pmap_cs_associated = FALSE; \
364MACRO_END
365
366#else /* CONFIG_EMBEDDED */
367
368/*
369 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
370 * On macOS, the new mapping can be "used for jit".
371 */
372#define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW,OLD) \
373MACRO_BEGIN \
374 assert((NEW)->used_for_jit == (OLD)->used_for_jit); \
375 assert((NEW)->pmap_cs_associated == FALSE); \
376MACRO_END
377
378#endif /* CONFIG_EMBEDDED */
379
380#define vm_map_entry_copy(NEW,OLD) \
381MACRO_BEGIN \
382boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
383 *(NEW) = *(OLD); \
384 (NEW)->is_shared = FALSE; \
385 (NEW)->needs_wakeup = FALSE; \
386 (NEW)->in_transition = FALSE; \
387 (NEW)->wired_count = 0; \
388 (NEW)->user_wired_count = 0; \
389 (NEW)->permanent = FALSE; \
390 VM_MAP_ENTRY_COPY_CODE_SIGNING((NEW),(OLD)); \
391 (NEW)->from_reserved_zone = _vmec_reserved; \
392 if ((NEW)->iokit_acct) { \
393 assertf(!(NEW)->use_pmap, "old %p new %p\n", (OLD), (NEW)); \
394 (NEW)->iokit_acct = FALSE; \
395 (NEW)->use_pmap = TRUE; \
396 } \
397 (NEW)->vme_resilient_codesign = FALSE; \
398 (NEW)->vme_resilient_media = FALSE; \
399 (NEW)->vme_atomic = FALSE; \
400MACRO_END
401
402#define vm_map_entry_copy_full(NEW,OLD) \
403MACRO_BEGIN \
404boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
405(*(NEW) = *(OLD)); \
406(NEW)->from_reserved_zone = _vmecf_reserved; \
407MACRO_END
408
409/*
410 * Decide if we want to allow processes to execute from their data or stack areas.
411 * override_nx() returns true if we do. Data/stack execution can be enabled independently
412 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
413 * or allow_stack_exec to enable data execution for that type of data area for that particular
414 * ABI (or both by or'ing the flags together). These are initialized in the architecture
415 * specific pmap files since the default behavior varies according to architecture. The
416 * main reason it varies is because of the need to provide binary compatibility with old
417 * applications that were written before these restrictions came into being. In the old
418 * days, an app could execute anything it could read, but this has slowly been tightened
419 * up over time. The default behavior is:
420 *
421 * 32-bit PPC apps may execute from both stack and data areas
422 * 32-bit Intel apps may exeucte from data areas but not stack
423 * 64-bit PPC/Intel apps may not execute from either data or stack
424 *
425 * An application on any architecture may override these defaults by explicitly
426 * adding PROT_EXEC permission to the page in question with the mprotect(2)
427 * system call. This code here just determines what happens when an app tries to
428 * execute from a page that lacks execute permission.
429 *
430 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
431 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
432 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
433 * execution from data areas for a particular binary even if the arch normally permits it. As
434 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
435 * to support some complicated use cases, notably browsers with out-of-process plugins that
436 * are not all NX-safe.
437 */
438
439extern int allow_data_exec, allow_stack_exec;
440
441int
442override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
443{
444 int current_abi;
445
446 if (map->pmap == kernel_pmap) return FALSE;
447
448 /*
449 * Determine if the app is running in 32 or 64 bit mode.
450 */
451
452 if (vm_map_is_64bit(map))
453 current_abi = VM_ABI_64;
454 else
455 current_abi = VM_ABI_32;
456
457 /*
458 * Determine if we should allow the execution based on whether it's a
459 * stack or data area and the current architecture.
460 */
461
462 if (user_tag == VM_MEMORY_STACK)
463 return allow_stack_exec & current_abi;
464
465 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
466}
467
468
469/*
470 * Virtual memory maps provide for the mapping, protection,
471 * and sharing of virtual memory objects. In addition,
472 * this module provides for an efficient virtual copy of
473 * memory from one map to another.
474 *
475 * Synchronization is required prior to most operations.
476 *
477 * Maps consist of an ordered doubly-linked list of simple
478 * entries; a single hint is used to speed up lookups.
479 *
480 * Sharing maps have been deleted from this version of Mach.
481 * All shared objects are now mapped directly into the respective
482 * maps. This requires a change in the copy on write strategy;
483 * the asymmetric (delayed) strategy is used for shared temporary
484 * objects instead of the symmetric (shadow) strategy. All maps
485 * are now "top level" maps (either task map, kernel map or submap
486 * of the kernel map).
487 *
488 * Since portions of maps are specified by start/end addreses,
489 * which may not align with existing map entries, all
490 * routines merely "clip" entries to these start/end values.
491 * [That is, an entry is split into two, bordering at a
492 * start or end value.] Note that these clippings may not
493 * always be necessary (as the two resulting entries are then
494 * not changed); however, the clipping is done for convenience.
495 * No attempt is currently made to "glue back together" two
496 * abutting entries.
497 *
498 * The symmetric (shadow) copy strategy implements virtual copy
499 * by copying VM object references from one map to
500 * another, and then marking both regions as copy-on-write.
501 * It is important to note that only one writeable reference
502 * to a VM object region exists in any map when this strategy
503 * is used -- this means that shadow object creation can be
504 * delayed until a write operation occurs. The symmetric (delayed)
505 * strategy allows multiple maps to have writeable references to
506 * the same region of a vm object, and hence cannot delay creating
507 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
508 * Copying of permanent objects is completely different; see
509 * vm_object_copy_strategically() in vm_object.c.
510 */
511
512static zone_t vm_map_zone; /* zone for vm_map structures */
513zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
514static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking allocations */
515static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
516zone_t vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
517
518
519/*
520 * Placeholder object for submap operations. This object is dropped
521 * into the range by a call to vm_map_find, and removed when
522 * vm_map_submap creates the submap.
523 */
524
525vm_object_t vm_submap_object;
526
527static void *map_data;
528static vm_size_t map_data_size;
529static void *kentry_data;
530static vm_size_t kentry_data_size;
531static void *map_holes_data;
532static vm_size_t map_holes_data_size;
533
534#if CONFIG_EMBEDDED
535#define NO_COALESCE_LIMIT 0
536#else
537#define NO_COALESCE_LIMIT ((1024 * 128) - 1)
538#endif
539
540/* Skip acquiring locks if we're in the midst of a kernel core dump */
541unsigned int not_in_kdp = 1;
542
543unsigned int vm_map_set_cache_attr_count = 0;
544
545kern_return_t
546vm_map_set_cache_attr(
547 vm_map_t map,
548 vm_map_offset_t va)
549{
550 vm_map_entry_t map_entry;
551 vm_object_t object;
552 kern_return_t kr = KERN_SUCCESS;
553
554 vm_map_lock_read(map);
555
556 if (!vm_map_lookup_entry(map, va, &map_entry) ||
557 map_entry->is_sub_map) {
558 /*
559 * that memory is not properly mapped
560 */
561 kr = KERN_INVALID_ARGUMENT;
562 goto done;
563 }
564 object = VME_OBJECT(map_entry);
565
566 if (object == VM_OBJECT_NULL) {
567 /*
568 * there should be a VM object here at this point
569 */
570 kr = KERN_INVALID_ARGUMENT;
571 goto done;
572 }
573 vm_object_lock(object);
574 object->set_cache_attr = TRUE;
575 vm_object_unlock(object);
576
577 vm_map_set_cache_attr_count++;
578done:
579 vm_map_unlock_read(map);
580
581 return kr;
582}
583
584
585#if CONFIG_CODE_DECRYPTION
586/*
587 * vm_map_apple_protected:
588 * This remaps the requested part of the object with an object backed by
589 * the decrypting pager.
590 * crypt_info contains entry points and session data for the crypt module.
591 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
592 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
593 */
594kern_return_t
595vm_map_apple_protected(
596 vm_map_t map,
597 vm_map_offset_t start,
598 vm_map_offset_t end,
599 vm_object_offset_t crypto_backing_offset,
600 struct pager_crypt_info *crypt_info)
601{
602 boolean_t map_locked;
603 kern_return_t kr;
604 vm_map_entry_t map_entry;
605 struct vm_map_entry tmp_entry;
606 memory_object_t unprotected_mem_obj;
607 vm_object_t protected_object;
608 vm_map_offset_t map_addr;
609 vm_map_offset_t start_aligned, end_aligned;
610 vm_object_offset_t crypto_start, crypto_end;
611 int vm_flags;
612 vm_map_kernel_flags_t vmk_flags;
613
614 vm_flags = 0;
615 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
616
617 map_locked = FALSE;
618 unprotected_mem_obj = MEMORY_OBJECT_NULL;
619
620 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
621 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
622 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
623 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
624
625#if __arm64__
626 /*
627 * "start" and "end" might be 4K-aligned but not 16K-aligned,
628 * so we might have to loop and establish up to 3 mappings:
629 *
630 * + the first 16K-page, which might overlap with the previous
631 * 4K-aligned mapping,
632 * + the center,
633 * + the last 16K-page, which might overlap with the next
634 * 4K-aligned mapping.
635 * Each of these mapping might be backed by a vnode pager (if
636 * properly page-aligned) or a "fourk_pager", itself backed by a
637 * vnode pager (if 4K-aligned but not page-aligned).
638 */
639#else /* __arm64__ */
640 assert(start_aligned == start);
641 assert(end_aligned == end);
642#endif /* __arm64__ */
643
644 map_addr = start_aligned;
645 for (map_addr = start_aligned;
646 map_addr < end;
647 map_addr = tmp_entry.vme_end) {
648 vm_map_lock(map);
649 map_locked = TRUE;
650
651 /* lookup the protected VM object */
652 if (!vm_map_lookup_entry(map,
653 map_addr,
654 &map_entry) ||
655 map_entry->is_sub_map ||
656 VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
657 !(map_entry->protection & VM_PROT_EXECUTE)) {
658 /* that memory is not properly mapped */
659 kr = KERN_INVALID_ARGUMENT;
660 goto done;
661 }
662
663 /* get the protected object to be decrypted */
664 protected_object = VME_OBJECT(map_entry);
665 if (protected_object == VM_OBJECT_NULL) {
666 /* there should be a VM object here at this point */
667 kr = KERN_INVALID_ARGUMENT;
668 goto done;
669 }
670 /* ensure protected object stays alive while map is unlocked */
671 vm_object_reference(protected_object);
672
673 /* limit the map entry to the area we want to cover */
674 vm_map_clip_start(map, map_entry, start_aligned);
675 vm_map_clip_end(map, map_entry, end_aligned);
676
677 tmp_entry = *map_entry;
678 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
679 vm_map_unlock(map);
680 map_locked = FALSE;
681
682 /*
683 * This map entry might be only partially encrypted
684 * (if not fully "page-aligned").
685 */
686 crypto_start = 0;
687 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
688 if (tmp_entry.vme_start < start) {
689 if (tmp_entry.vme_start != start_aligned) {
690 kr = KERN_INVALID_ADDRESS;
691 }
692 crypto_start += (start - tmp_entry.vme_start);
693 }
694 if (tmp_entry.vme_end > end) {
695 if (tmp_entry.vme_end != end_aligned) {
696 kr = KERN_INVALID_ADDRESS;
697 }
698 crypto_end -= (tmp_entry.vme_end - end);
699 }
700
701 /*
702 * This "extra backing offset" is needed to get the decryption
703 * routine to use the right key. It adjusts for the possibly
704 * relative offset of an interposed "4K" pager...
705 */
706 if (crypto_backing_offset == (vm_object_offset_t) -1) {
707 crypto_backing_offset = VME_OFFSET(&tmp_entry);
708 }
709
710 /*
711 * Lookup (and create if necessary) the protected memory object
712 * matching that VM object.
713 * If successful, this also grabs a reference on the memory object,
714 * to guarantee that it doesn't go away before we get a chance to map
715 * it.
716 */
717 unprotected_mem_obj = apple_protect_pager_setup(
718 protected_object,
719 VME_OFFSET(&tmp_entry),
720 crypto_backing_offset,
721 crypt_info,
722 crypto_start,
723 crypto_end);
724
725 /* release extra ref on protected object */
726 vm_object_deallocate(protected_object);
727
728 if (unprotected_mem_obj == NULL) {
729 kr = KERN_FAILURE;
730 goto done;
731 }
732
733 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
734 /* can overwrite an immutable mapping */
735 vmk_flags.vmkf_overwrite_immutable = TRUE;
736#if __arm64__
737 if (tmp_entry.used_for_jit &&
738 (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
739 PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
740 fourk_binary_compatibility_unsafe &&
741 fourk_binary_compatibility_allow_wx) {
742 printf("** FOURK_COMPAT [%d]: "
743 "allowing write+execute at 0x%llx\n",
744 proc_selfpid(), tmp_entry.vme_start);
745 vmk_flags.vmkf_map_jit = TRUE;
746 }
747#endif /* __arm64__ */
748
749 /* map this memory object in place of the current one */
750 map_addr = tmp_entry.vme_start;
751 kr = vm_map_enter_mem_object(map,
752 &map_addr,
753 (tmp_entry.vme_end -
754 tmp_entry.vme_start),
755 (mach_vm_offset_t) 0,
756 vm_flags,
757 vmk_flags,
758 VM_KERN_MEMORY_NONE,
759 (ipc_port_t)(uintptr_t) unprotected_mem_obj,
760 0,
761 TRUE,
762 tmp_entry.protection,
763 tmp_entry.max_protection,
764 tmp_entry.inheritance);
765 assertf(kr == KERN_SUCCESS,
766 "kr = 0x%x\n", kr);
767 assertf(map_addr == tmp_entry.vme_start,
768 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
769 (uint64_t)map_addr,
770 (uint64_t) tmp_entry.vme_start,
771 &tmp_entry);
772
773#if VM_MAP_DEBUG_APPLE_PROTECT
774 if (vm_map_debug_apple_protect) {
775 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
776 " backing:[object:%p,offset:0x%llx,"
777 "crypto_backing_offset:0x%llx,"
778 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
779 map,
780 (uint64_t) map_addr,
781 (uint64_t) (map_addr + (tmp_entry.vme_end -
782 tmp_entry.vme_start)),
783 unprotected_mem_obj,
784 protected_object,
785 VME_OFFSET(&tmp_entry),
786 crypto_backing_offset,
787 crypto_start,
788 crypto_end);
789 }
790#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
791
792 /*
793 * Release the reference obtained by
794 * apple_protect_pager_setup().
795 * The mapping (if it succeeded) is now holding a reference on
796 * the memory object.
797 */
798 memory_object_deallocate(unprotected_mem_obj);
799 unprotected_mem_obj = MEMORY_OBJECT_NULL;
800
801 /* continue with next map entry */
802 crypto_backing_offset += (tmp_entry.vme_end -
803 tmp_entry.vme_start);
804 crypto_backing_offset -= crypto_start;
805 }
806 kr = KERN_SUCCESS;
807
808done:
809 if (map_locked) {
810 vm_map_unlock(map);
811 }
812 return kr;
813}
814#endif /* CONFIG_CODE_DECRYPTION */
815
816
817lck_grp_t vm_map_lck_grp;
818lck_grp_attr_t vm_map_lck_grp_attr;
819lck_attr_t vm_map_lck_attr;
820lck_attr_t vm_map_lck_rw_attr;
821
822#if CONFIG_EMBEDDED
823int malloc_no_cow = 1;
824#define VM_PROTECT_WX_FAIL 0
825#else /* CONFIG_EMBEDDED */
826int malloc_no_cow = 0;
827#define VM_PROTECT_WX_FAIL 1
828#endif /* CONFIG_EMBEDDED */
829uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
830
831/*
832 * vm_map_init:
833 *
834 * Initialize the vm_map module. Must be called before
835 * any other vm_map routines.
836 *
837 * Map and entry structures are allocated from zones -- we must
838 * initialize those zones.
839 *
840 * There are three zones of interest:
841 *
842 * vm_map_zone: used to allocate maps.
843 * vm_map_entry_zone: used to allocate map entries.
844 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
845 *
846 * The kernel allocates map entries from a special zone that is initially
847 * "crammed" with memory. It would be difficult (perhaps impossible) for
848 * the kernel to allocate more memory to a entry zone when it became
849 * empty since the very act of allocating memory implies the creation
850 * of a new entry.
851 */
852void
853vm_map_init(
854 void)
855{
856 vm_size_t entry_zone_alloc_size;
857 const char *mez_name = "VM map entries";
858
859 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
860 PAGE_SIZE, "maps");
861 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
862#if defined(__LP64__)
863 entry_zone_alloc_size = PAGE_SIZE * 5;
864#else
865 entry_zone_alloc_size = PAGE_SIZE * 6;
866#endif
867 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
868 1024*1024, entry_zone_alloc_size,
869 mez_name);
870 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
871 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
872 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
873
874 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
875 kentry_data_size * 64, kentry_data_size,
876 "Reserved VM map entries");
877 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
878 /* Don't quarantine because we always need elements available */
879 zone_change(vm_map_entry_reserved_zone, Z_KASAN_QUARANTINE, FALSE);
880
881 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
882 16*1024, PAGE_SIZE, "VM map copies");
883 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
884
885 vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
886 16*1024, PAGE_SIZE, "VM map holes");
887 zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
888
889 /*
890 * Cram the map and kentry zones with initial data.
891 * Set reserved_zone non-collectible to aid zone_gc().
892 */
893 zone_change(vm_map_zone, Z_COLLECT, FALSE);
894 zone_change(vm_map_zone, Z_FOREIGN, TRUE);
895 zone_change(vm_map_zone, Z_GZALLOC_EXEMPT, TRUE);
896
897 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
898 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
899 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
900 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
901 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
902 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
903 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
904
905 zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
906 zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
907 zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
908 zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
909 zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
910 zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
911
912 /*
913 * Add the stolen memory to zones, adjust zone size and stolen counts.
914 * zcram only up to the maximum number of pages for each zone chunk.
915 */
916 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
917
918 const vm_size_t stride = ZONE_CHUNK_MAXPAGES * PAGE_SIZE;
919 for (vm_offset_t off = 0; off < kentry_data_size; off += stride) {
920 zcram(vm_map_entry_reserved_zone,
921 (vm_offset_t)kentry_data + off,
922 MIN(kentry_data_size - off, stride));
923 }
924 for (vm_offset_t off = 0; off < map_holes_data_size; off += stride) {
925 zcram(vm_map_holes_zone,
926 (vm_offset_t)map_holes_data + off,
927 MIN(map_holes_data_size - off, stride));
928 }
929
930 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
931
932 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
933 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
934 lck_attr_setdefault(&vm_map_lck_attr);
935
936 lck_attr_setdefault(&vm_map_lck_rw_attr);
937 lck_attr_cleardebug(&vm_map_lck_rw_attr);
938
939#if VM_MAP_DEBUG_APPLE_PROTECT
940 PE_parse_boot_argn("vm_map_debug_apple_protect",
941 &vm_map_debug_apple_protect,
942 sizeof(vm_map_debug_apple_protect));
943#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
944#if VM_MAP_DEBUG_APPLE_FOURK
945 PE_parse_boot_argn("vm_map_debug_fourk",
946 &vm_map_debug_fourk,
947 sizeof(vm_map_debug_fourk));
948#endif /* VM_MAP_DEBUG_FOURK */
949 PE_parse_boot_argn("vm_map_executable_immutable",
950 &vm_map_executable_immutable,
951 sizeof(vm_map_executable_immutable));
952 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
953 &vm_map_executable_immutable_verbose,
954 sizeof(vm_map_executable_immutable_verbose));
955
956 PE_parse_boot_argn("malloc_no_cow",
957 &malloc_no_cow,
958 sizeof(malloc_no_cow));
959 if (malloc_no_cow) {
960 vm_memory_malloc_no_cow_mask = 0ULL;
961 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
962 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
963 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
964// vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
965// vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
966 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
967 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
968 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
969 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
970// vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
971 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
972 &vm_memory_malloc_no_cow_mask,
973 sizeof(vm_memory_malloc_no_cow_mask));
974 }
975}
976
977void
978vm_map_steal_memory(
979 void)
980{
981 uint32_t kentry_initial_pages;
982
983 map_data_size = round_page(10 * sizeof(struct _vm_map));
984 map_data = pmap_steal_memory(map_data_size);
985
986 /*
987 * kentry_initial_pages corresponds to the number of kernel map entries
988 * required during bootstrap until the asynchronous replenishment
989 * scheme is activated and/or entries are available from the general
990 * map entry pool.
991 */
992#if defined(__LP64__)
993 kentry_initial_pages = 10;
994#else
995 kentry_initial_pages = 6;
996#endif
997
998#if CONFIG_GZALLOC
999 /* If using the guard allocator, reserve more memory for the kernel
1000 * reserved map entry pool.
1001 */
1002 if (gzalloc_enabled())
1003 kentry_initial_pages *= 1024;
1004#endif
1005
1006 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
1007 kentry_data = pmap_steal_memory(kentry_data_size);
1008
1009 map_holes_data_size = kentry_data_size;
1010 map_holes_data = pmap_steal_memory(map_holes_data_size);
1011}
1012
1013boolean_t vm_map_supports_hole_optimization = FALSE;
1014
1015void
1016vm_kernel_reserved_entry_init(void) {
1017 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
1018
1019 /*
1020 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1021 */
1022 zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links));
1023 vm_map_supports_hole_optimization = TRUE;
1024}
1025
1026void
1027vm_map_disable_hole_optimization(vm_map_t map)
1028{
1029 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
1030
1031 if (map->holelistenabled) {
1032
1033 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1034
1035 while (hole_entry != NULL) {
1036
1037 next_hole_entry = hole_entry->vme_next;
1038
1039 hole_entry->vme_next = NULL;
1040 hole_entry->vme_prev = NULL;
1041 zfree(vm_map_holes_zone, hole_entry);
1042
1043 if (next_hole_entry == head_entry) {
1044 hole_entry = NULL;
1045 } else {
1046 hole_entry = next_hole_entry;
1047 }
1048 }
1049
1050 map->holes_list = NULL;
1051 map->holelistenabled = FALSE;
1052
1053 map->first_free = vm_map_first_entry(map);
1054 SAVE_HINT_HOLE_WRITE(map, NULL);
1055 }
1056}
1057
1058boolean_t
1059vm_kernel_map_is_kernel(vm_map_t map) {
1060 return (map->pmap == kernel_pmap);
1061}
1062
1063/*
1064 * vm_map_create:
1065 *
1066 * Creates and returns a new empty VM map with
1067 * the given physical map structure, and having
1068 * the given lower and upper address bounds.
1069 */
1070
1071vm_map_t
1072vm_map_create(
1073 pmap_t pmap,
1074 vm_map_offset_t min,
1075 vm_map_offset_t max,
1076 boolean_t pageable)
1077{
1078 int options;
1079
1080 options = 0;
1081 if (pageable) {
1082 options |= VM_MAP_CREATE_PAGEABLE;
1083 }
1084 return vm_map_create_options(pmap, min, max, options);
1085}
1086
1087vm_map_t
1088vm_map_create_options(
1089 pmap_t pmap,
1090 vm_map_offset_t min,
1091 vm_map_offset_t max,
1092 int options)
1093{
1094 vm_map_t result;
1095 struct vm_map_links *hole_entry = NULL;
1096
1097 if (options & ~(VM_MAP_CREATE_ALL_OPTIONS)) {
1098 /* unknown option */
1099 return VM_MAP_NULL;
1100 }
1101
1102 result = (vm_map_t) zalloc(vm_map_zone);
1103 if (result == VM_MAP_NULL)
1104 panic("vm_map_create");
1105
1106 vm_map_first_entry(result) = vm_map_to_entry(result);
1107 vm_map_last_entry(result) = vm_map_to_entry(result);
1108 result->hdr.nentries = 0;
1109 if (options & VM_MAP_CREATE_PAGEABLE) {
1110 result->hdr.entries_pageable = TRUE;
1111 } else {
1112 result->hdr.entries_pageable = FALSE;
1113 }
1114
1115 vm_map_store_init( &(result->hdr) );
1116
1117 result->hdr.page_shift = PAGE_SHIFT;
1118
1119 result->size = 0;
1120 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
1121 result->user_wire_size = 0;
1122#if __x86_64__
1123 result->vmmap_high_start = 0;
1124#endif /* __x86_64__ */
1125 result->map_refcnt = 1;
1126#if TASK_SWAPPER
1127 result->res_count = 1;
1128 result->sw_state = MAP_SW_IN;
1129#endif /* TASK_SWAPPER */
1130 result->pmap = pmap;
1131 result->min_offset = min;
1132 result->max_offset = max;
1133 result->wiring_required = FALSE;
1134 result->no_zero_fill = FALSE;
1135 result->mapped_in_other_pmaps = FALSE;
1136 result->wait_for_space = FALSE;
1137 result->switch_protect = FALSE;
1138 result->disable_vmentry_reuse = FALSE;
1139 result->map_disallow_data_exec = FALSE;
1140 result->is_nested_map = FALSE;
1141 result->map_disallow_new_exec = FALSE;
1142 result->highest_entry_end = 0;
1143 result->first_free = vm_map_to_entry(result);
1144 result->hint = vm_map_to_entry(result);
1145 result->jit_entry_exists = FALSE;
1146
1147 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1148 if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1149 result->has_corpse_footprint = TRUE;
1150 result->holelistenabled = FALSE;
1151 result->vmmap_corpse_footprint = NULL;
1152 } else {
1153 result->has_corpse_footprint = FALSE;
1154 if (vm_map_supports_hole_optimization) {
1155 hole_entry = zalloc(vm_map_holes_zone);
1156
1157 hole_entry->start = min;
1158#if defined(__arm__) || defined(__arm64__)
1159 hole_entry->end = result->max_offset;
1160#else
1161 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
1162#endif
1163 result->holes_list = result->hole_hint = hole_entry;
1164 hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1165 result->holelistenabled = TRUE;
1166 } else {
1167 result->holelistenabled = FALSE;
1168 }
1169 }
1170
1171 vm_map_lock_init(result);
1172 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
1173
1174 return(result);
1175}
1176
1177/*
1178 * vm_map_entry_create: [ internal use only ]
1179 *
1180 * Allocates a VM map entry for insertion in the
1181 * given map (or map copy). No fields are filled.
1182 */
1183#define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1184
1185#define vm_map_copy_entry_create(copy, map_locked) \
1186 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1187unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1188
1189static vm_map_entry_t
1190_vm_map_entry_create(
1191 struct vm_map_header *map_header, boolean_t __unused map_locked)
1192{
1193 zone_t zone;
1194 vm_map_entry_t entry;
1195
1196 zone = vm_map_entry_zone;
1197
1198 assert(map_header->entries_pageable ? !map_locked : TRUE);
1199
1200 if (map_header->entries_pageable) {
1201 entry = (vm_map_entry_t) zalloc(zone);
1202 }
1203 else {
1204 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1205
1206 if (entry == VM_MAP_ENTRY_NULL) {
1207 zone = vm_map_entry_reserved_zone;
1208 entry = (vm_map_entry_t) zalloc(zone);
1209 OSAddAtomic(1, &reserved_zalloc_count);
1210 } else
1211 OSAddAtomic(1, &nonreserved_zalloc_count);
1212 }
1213
1214 if (entry == VM_MAP_ENTRY_NULL)
1215 panic("vm_map_entry_create");
1216 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1217
1218 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1219#if MAP_ENTRY_CREATION_DEBUG
1220 entry->vme_creation_maphdr = map_header;
1221 backtrace(&entry->vme_creation_bt[0],
1222 (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
1223#endif
1224 return(entry);
1225}
1226
1227/*
1228 * vm_map_entry_dispose: [ internal use only ]
1229 *
1230 * Inverse of vm_map_entry_create.
1231 *
1232 * write map lock held so no need to
1233 * do anything special to insure correctness
1234 * of the stores
1235 */
1236#define vm_map_entry_dispose(map, entry) \
1237 _vm_map_entry_dispose(&(map)->hdr, (entry))
1238
1239#define vm_map_copy_entry_dispose(map, entry) \
1240 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1241
1242static void
1243_vm_map_entry_dispose(
1244 struct vm_map_header *map_header,
1245 vm_map_entry_t entry)
1246{
1247 zone_t zone;
1248
1249 if (map_header->entries_pageable || !(entry->from_reserved_zone))
1250 zone = vm_map_entry_zone;
1251 else
1252 zone = vm_map_entry_reserved_zone;
1253
1254 if (!map_header->entries_pageable) {
1255 if (zone == vm_map_entry_zone)
1256 OSAddAtomic(-1, &nonreserved_zalloc_count);
1257 else
1258 OSAddAtomic(-1, &reserved_zalloc_count);
1259 }
1260
1261 zfree(zone, entry);
1262}
1263
1264#if MACH_ASSERT
1265static boolean_t first_free_check = FALSE;
1266boolean_t
1267first_free_is_valid(
1268 vm_map_t map)
1269{
1270 if (!first_free_check)
1271 return TRUE;
1272
1273 return( first_free_is_valid_store( map ));
1274}
1275#endif /* MACH_ASSERT */
1276
1277
1278#define vm_map_copy_entry_link(copy, after_where, entry) \
1279 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1280
1281#define vm_map_copy_entry_unlink(copy, entry) \
1282 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1283
1284#if MACH_ASSERT && TASK_SWAPPER
1285/*
1286 * vm_map_res_reference:
1287 *
1288 * Adds another valid residence count to the given map.
1289 *
1290 * Map is locked so this function can be called from
1291 * vm_map_swapin.
1292 *
1293 */
1294void vm_map_res_reference(vm_map_t map)
1295{
1296 /* assert map is locked */
1297 assert(map->res_count >= 0);
1298 assert(map->map_refcnt >= map->res_count);
1299 if (map->res_count == 0) {
1300 lck_mtx_unlock(&map->s_lock);
1301 vm_map_lock(map);
1302 vm_map_swapin(map);
1303 lck_mtx_lock(&map->s_lock);
1304 ++map->res_count;
1305 vm_map_unlock(map);
1306 } else
1307 ++map->res_count;
1308}
1309
1310/*
1311 * vm_map_reference_swap:
1312 *
1313 * Adds valid reference and residence counts to the given map.
1314 *
1315 * The map may not be in memory (i.e. zero residence count).
1316 *
1317 */
1318void vm_map_reference_swap(vm_map_t map)
1319{
1320 assert(map != VM_MAP_NULL);
1321 lck_mtx_lock(&map->s_lock);
1322 assert(map->res_count >= 0);
1323 assert(map->map_refcnt >= map->res_count);
1324 map->map_refcnt++;
1325 vm_map_res_reference(map);
1326 lck_mtx_unlock(&map->s_lock);
1327}
1328
1329/*
1330 * vm_map_res_deallocate:
1331 *
1332 * Decrement residence count on a map; possibly causing swapout.
1333 *
1334 * The map must be in memory (i.e. non-zero residence count).
1335 *
1336 * The map is locked, so this function is callable from vm_map_deallocate.
1337 *
1338 */
1339void vm_map_res_deallocate(vm_map_t map)
1340{
1341 assert(map->res_count > 0);
1342 if (--map->res_count == 0) {
1343 lck_mtx_unlock(&map->s_lock);
1344 vm_map_lock(map);
1345 vm_map_swapout(map);
1346 vm_map_unlock(map);
1347 lck_mtx_lock(&map->s_lock);
1348 }
1349 assert(map->map_refcnt >= map->res_count);
1350}
1351#endif /* MACH_ASSERT && TASK_SWAPPER */
1352
1353/*
1354 * vm_map_destroy:
1355 *
1356 * Actually destroy a map.
1357 */
1358void
1359vm_map_destroy(
1360 vm_map_t map,
1361 int flags)
1362{
1363 vm_map_lock(map);
1364
1365 /* final cleanup: no need to unnest shared region */
1366 flags |= VM_MAP_REMOVE_NO_UNNESTING;
1367 /* final cleanup: ok to remove immutable mappings */
1368 flags |= VM_MAP_REMOVE_IMMUTABLE;
1369 /* final cleanup: allow gaps in range */
1370 flags |= VM_MAP_REMOVE_GAPS_OK;
1371
1372 /* clean up regular map entries */
1373 (void) vm_map_delete(map, map->min_offset, map->max_offset,
1374 flags, VM_MAP_NULL);
1375 /* clean up leftover special mappings (commpage, etc...) */
1376#if !defined(__arm__) && !defined(__arm64__)
1377 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1378 flags, VM_MAP_NULL);
1379#endif /* !__arm__ && !__arm64__ */
1380
1381 vm_map_disable_hole_optimization(map);
1382 vm_map_corpse_footprint_destroy(map);
1383
1384 vm_map_unlock(map);
1385
1386 assert(map->hdr.nentries == 0);
1387
1388 if(map->pmap)
1389 pmap_destroy(map->pmap);
1390
1391 if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1392 /*
1393 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1394 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1395 * structure or kalloc'ed via lck_mtx_init.
1396 * An example is s_lock_ext within struct _vm_map.
1397 *
1398 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1399 * can add another tag to detect embedded vs alloc'ed indirect external
1400 * mutexes but that'll be additional checks in the lock path and require
1401 * updating dependencies for the old vs new tag.
1402 *
1403 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1404 * just when lock debugging is ON, we choose to forego explicitly destroying
1405 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1406 * count on vm_map_lck_grp, which has no serious side-effect.
1407 */
1408 } else {
1409 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1410 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1411 }
1412
1413 zfree(vm_map_zone, map);
1414}
1415
1416/*
1417 * Returns pid of the task with the largest number of VM map entries.
1418 * Used in the zone-map-exhaustion jetsam path.
1419 */
1420pid_t
1421find_largest_process_vm_map_entries(void)
1422{
1423 pid_t victim_pid = -1;
1424 int max_vm_map_entries = 0;
1425 task_t task = TASK_NULL;
1426 queue_head_t *task_list = &tasks;
1427
1428 lck_mtx_lock(&tasks_threads_lock);
1429 queue_iterate(task_list, task, task_t, tasks) {
1430 if (task == kernel_task || !task->active)
1431 continue;
1432
1433 vm_map_t task_map = task->map;
1434 if (task_map != VM_MAP_NULL) {
1435 int task_vm_map_entries = task_map->hdr.nentries;
1436 if (task_vm_map_entries > max_vm_map_entries) {
1437 max_vm_map_entries = task_vm_map_entries;
1438 victim_pid = pid_from_task(task);
1439 }
1440 }
1441 }
1442 lck_mtx_unlock(&tasks_threads_lock);
1443
1444 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1445 return victim_pid;
1446}
1447
1448#if TASK_SWAPPER
1449/*
1450 * vm_map_swapin/vm_map_swapout
1451 *
1452 * Swap a map in and out, either referencing or releasing its resources.
1453 * These functions are internal use only; however, they must be exported
1454 * because they may be called from macros, which are exported.
1455 *
1456 * In the case of swapout, there could be races on the residence count,
1457 * so if the residence count is up, we return, assuming that a
1458 * vm_map_deallocate() call in the near future will bring us back.
1459 *
1460 * Locking:
1461 * -- We use the map write lock for synchronization among races.
1462 * -- The map write lock, and not the simple s_lock, protects the
1463 * swap state of the map.
1464 * -- If a map entry is a share map, then we hold both locks, in
1465 * hierarchical order.
1466 *
1467 * Synchronization Notes:
1468 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1469 * will block on the map lock and proceed when swapout is through.
1470 * 2) A vm_map_reference() call at this time is illegal, and will
1471 * cause a panic. vm_map_reference() is only allowed on resident
1472 * maps, since it refuses to block.
1473 * 3) A vm_map_swapin() call during a swapin will block, and
1474 * proceeed when the first swapin is done, turning into a nop.
1475 * This is the reason the res_count is not incremented until
1476 * after the swapin is complete.
1477 * 4) There is a timing hole after the checks of the res_count, before
1478 * the map lock is taken, during which a swapin may get the lock
1479 * before a swapout about to happen. If this happens, the swapin
1480 * will detect the state and increment the reference count, causing
1481 * the swapout to be a nop, thereby delaying it until a later
1482 * vm_map_deallocate. If the swapout gets the lock first, then
1483 * the swapin will simply block until the swapout is done, and
1484 * then proceed.
1485 *
1486 * Because vm_map_swapin() is potentially an expensive operation, it
1487 * should be used with caution.
1488 *
1489 * Invariants:
1490 * 1) A map with a residence count of zero is either swapped, or
1491 * being swapped.
1492 * 2) A map with a non-zero residence count is either resident,
1493 * or being swapped in.
1494 */
1495
1496int vm_map_swap_enable = 1;
1497
1498void vm_map_swapin (vm_map_t map)
1499{
1500 vm_map_entry_t entry;
1501
1502 if (!vm_map_swap_enable) /* debug */
1503 return;
1504
1505 /*
1506 * Map is locked
1507 * First deal with various races.
1508 */
1509 if (map->sw_state == MAP_SW_IN)
1510 /*
1511 * we raced with swapout and won. Returning will incr.
1512 * the res_count, turning the swapout into a nop.
1513 */
1514 return;
1515
1516 /*
1517 * The residence count must be zero. If we raced with another
1518 * swapin, the state would have been IN; if we raced with a
1519 * swapout (after another competing swapin), we must have lost
1520 * the race to get here (see above comment), in which case
1521 * res_count is still 0.
1522 */
1523 assert(map->res_count == 0);
1524
1525 /*
1526 * There are no intermediate states of a map going out or
1527 * coming in, since the map is locked during the transition.
1528 */
1529 assert(map->sw_state == MAP_SW_OUT);
1530
1531 /*
1532 * We now operate upon each map entry. If the entry is a sub-
1533 * or share-map, we call vm_map_res_reference upon it.
1534 * If the entry is an object, we call vm_object_res_reference
1535 * (this may iterate through the shadow chain).
1536 * Note that we hold the map locked the entire time,
1537 * even if we get back here via a recursive call in
1538 * vm_map_res_reference.
1539 */
1540 entry = vm_map_first_entry(map);
1541
1542 while (entry != vm_map_to_entry(map)) {
1543 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1544 if (entry->is_sub_map) {
1545 vm_map_t lmap = VME_SUBMAP(entry);
1546 lck_mtx_lock(&lmap->s_lock);
1547 vm_map_res_reference(lmap);
1548 lck_mtx_unlock(&lmap->s_lock);
1549 } else {
1550 vm_object_t object = VME_OBEJCT(entry);
1551 vm_object_lock(object);
1552 /*
1553 * This call may iterate through the
1554 * shadow chain.
1555 */
1556 vm_object_res_reference(object);
1557 vm_object_unlock(object);
1558 }
1559 }
1560 entry = entry->vme_next;
1561 }
1562 assert(map->sw_state == MAP_SW_OUT);
1563 map->sw_state = MAP_SW_IN;
1564}
1565
1566void vm_map_swapout(vm_map_t map)
1567{
1568 vm_map_entry_t entry;
1569
1570 /*
1571 * Map is locked
1572 * First deal with various races.
1573 * If we raced with a swapin and lost, the residence count
1574 * will have been incremented to 1, and we simply return.
1575 */
1576 lck_mtx_lock(&map->s_lock);
1577 if (map->res_count != 0) {
1578 lck_mtx_unlock(&map->s_lock);
1579 return;
1580 }
1581 lck_mtx_unlock(&map->s_lock);
1582
1583 /*
1584 * There are no intermediate states of a map going out or
1585 * coming in, since the map is locked during the transition.
1586 */
1587 assert(map->sw_state == MAP_SW_IN);
1588
1589 if (!vm_map_swap_enable)
1590 return;
1591
1592 /*
1593 * We now operate upon each map entry. If the entry is a sub-
1594 * or share-map, we call vm_map_res_deallocate upon it.
1595 * If the entry is an object, we call vm_object_res_deallocate
1596 * (this may iterate through the shadow chain).
1597 * Note that we hold the map locked the entire time,
1598 * even if we get back here via a recursive call in
1599 * vm_map_res_deallocate.
1600 */
1601 entry = vm_map_first_entry(map);
1602
1603 while (entry != vm_map_to_entry(map)) {
1604 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1605 if (entry->is_sub_map) {
1606 vm_map_t lmap = VME_SUBMAP(entry);
1607 lck_mtx_lock(&lmap->s_lock);
1608 vm_map_res_deallocate(lmap);
1609 lck_mtx_unlock(&lmap->s_lock);
1610 } else {
1611 vm_object_t object = VME_OBJECT(entry);
1612 vm_object_lock(object);
1613 /*
1614 * This call may take a long time,
1615 * since it could actively push
1616 * out pages (if we implement it
1617 * that way).
1618 */
1619 vm_object_res_deallocate(object);
1620 vm_object_unlock(object);
1621 }
1622 }
1623 entry = entry->vme_next;
1624 }
1625 assert(map->sw_state == MAP_SW_IN);
1626 map->sw_state = MAP_SW_OUT;
1627}
1628
1629#endif /* TASK_SWAPPER */
1630
1631/*
1632 * vm_map_lookup_entry: [ internal use only ]
1633 *
1634 * Calls into the vm map store layer to find the map
1635 * entry containing (or immediately preceding) the
1636 * specified address in the given map; the entry is returned
1637 * in the "entry" parameter. The boolean
1638 * result indicates whether the address is
1639 * actually contained in the map.
1640 */
1641boolean_t
1642vm_map_lookup_entry(
1643 vm_map_t map,
1644 vm_map_offset_t address,
1645 vm_map_entry_t *entry) /* OUT */
1646{
1647 return ( vm_map_store_lookup_entry( map, address, entry ));
1648}
1649
1650/*
1651 * Routine: vm_map_find_space
1652 * Purpose:
1653 * Allocate a range in the specified virtual address map,
1654 * returning the entry allocated for that range.
1655 * Used by kmem_alloc, etc.
1656 *
1657 * The map must be NOT be locked. It will be returned locked
1658 * on KERN_SUCCESS, unlocked on failure.
1659 *
1660 * If an entry is allocated, the object/offset fields
1661 * are initialized to zero.
1662 */
1663kern_return_t
1664vm_map_find_space(
1665 vm_map_t map,
1666 vm_map_offset_t *address, /* OUT */
1667 vm_map_size_t size,
1668 vm_map_offset_t mask,
1669 int flags __unused,
1670 vm_map_kernel_flags_t vmk_flags,
1671 vm_tag_t tag,
1672 vm_map_entry_t *o_entry) /* OUT */
1673{
1674 vm_map_entry_t entry, new_entry;
1675 vm_map_offset_t start;
1676 vm_map_offset_t end;
1677 vm_map_entry_t hole_entry;
1678
1679 if (size == 0) {
1680 *address = 0;
1681 return KERN_INVALID_ARGUMENT;
1682 }
1683
1684 if (vmk_flags.vmkf_guard_after) {
1685 /* account for the back guard page in the size */
1686 size += VM_MAP_PAGE_SIZE(map);
1687 }
1688
1689 new_entry = vm_map_entry_create(map, FALSE);
1690
1691 /*
1692 * Look for the first possible address; if there's already
1693 * something at this address, we have to start after it.
1694 */
1695
1696 vm_map_lock(map);
1697
1698 if( map->disable_vmentry_reuse == TRUE) {
1699 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1700 } else {
1701 if (map->holelistenabled) {
1702 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1703
1704 if (hole_entry == NULL) {
1705 /*
1706 * No more space in the map?
1707 */
1708 vm_map_entry_dispose(map, new_entry);
1709 vm_map_unlock(map);
1710 return(KERN_NO_SPACE);
1711 }
1712
1713 entry = hole_entry;
1714 start = entry->vme_start;
1715 } else {
1716 assert(first_free_is_valid(map));
1717 if ((entry = map->first_free) == vm_map_to_entry(map))
1718 start = map->min_offset;
1719 else
1720 start = entry->vme_end;
1721 }
1722 }
1723
1724 /*
1725 * In any case, the "entry" always precedes
1726 * the proposed new region throughout the loop:
1727 */
1728
1729 while (TRUE) {
1730 vm_map_entry_t next;
1731
1732 /*
1733 * Find the end of the proposed new region.
1734 * Be sure we didn't go beyond the end, or
1735 * wrap around the address.
1736 */
1737
1738 if (vmk_flags.vmkf_guard_before) {
1739 /* reserve space for the front guard page */
1740 start += VM_MAP_PAGE_SIZE(map);
1741 }
1742 end = ((start + mask) & ~mask);
1743
1744 if (end < start) {
1745 vm_map_entry_dispose(map, new_entry);
1746 vm_map_unlock(map);
1747 return(KERN_NO_SPACE);
1748 }
1749 start = end;
1750 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
1751 end += size;
1752 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1753
1754 if ((end > map->max_offset) || (end < start)) {
1755 vm_map_entry_dispose(map, new_entry);
1756 vm_map_unlock(map);
1757 return(KERN_NO_SPACE);
1758 }
1759
1760 next = entry->vme_next;
1761
1762 if (map->holelistenabled) {
1763 if (entry->vme_end >= end)
1764 break;
1765 } else {
1766 /*
1767 * If there are no more entries, we must win.
1768 *
1769 * OR
1770 *
1771 * If there is another entry, it must be
1772 * after the end of the potential new region.
1773 */
1774
1775 if (next == vm_map_to_entry(map))
1776 break;
1777
1778 if (next->vme_start >= end)
1779 break;
1780 }
1781
1782 /*
1783 * Didn't fit -- move to the next entry.
1784 */
1785
1786 entry = next;
1787
1788 if (map->holelistenabled) {
1789 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
1790 /*
1791 * Wrapped around
1792 */
1793 vm_map_entry_dispose(map, new_entry);
1794 vm_map_unlock(map);
1795 return(KERN_NO_SPACE);
1796 }
1797 start = entry->vme_start;
1798 } else {
1799 start = entry->vme_end;
1800 }
1801 }
1802
1803 if (map->holelistenabled) {
1804 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1805 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1806 }
1807 }
1808
1809 /*
1810 * At this point,
1811 * "start" and "end" should define the endpoints of the
1812 * available new range, and
1813 * "entry" should refer to the region before the new
1814 * range, and
1815 *
1816 * the map should be locked.
1817 */
1818
1819 if (vmk_flags.vmkf_guard_before) {
1820 /* go back for the front guard page */
1821 start -= VM_MAP_PAGE_SIZE(map);
1822 }
1823 *address = start;
1824
1825 assert(start < end);
1826 new_entry->vme_start = start;
1827 new_entry->vme_end = end;
1828 assert(page_aligned(new_entry->vme_start));
1829 assert(page_aligned(new_entry->vme_end));
1830 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1831 VM_MAP_PAGE_MASK(map)));
1832 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1833 VM_MAP_PAGE_MASK(map)));
1834
1835 new_entry->is_shared = FALSE;
1836 new_entry->is_sub_map = FALSE;
1837 new_entry->use_pmap = TRUE;
1838 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1839 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1840
1841 new_entry->needs_copy = FALSE;
1842
1843 new_entry->inheritance = VM_INHERIT_DEFAULT;
1844 new_entry->protection = VM_PROT_DEFAULT;
1845 new_entry->max_protection = VM_PROT_ALL;
1846 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1847 new_entry->wired_count = 0;
1848 new_entry->user_wired_count = 0;
1849
1850 new_entry->in_transition = FALSE;
1851 new_entry->needs_wakeup = FALSE;
1852 new_entry->no_cache = FALSE;
1853 new_entry->permanent = FALSE;
1854 new_entry->superpage_size = FALSE;
1855 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1856 new_entry->map_aligned = TRUE;
1857 } else {
1858 new_entry->map_aligned = FALSE;
1859 }
1860
1861 new_entry->used_for_jit = FALSE;
1862 new_entry->pmap_cs_associated = FALSE;
1863 new_entry->zero_wired_pages = FALSE;
1864 new_entry->iokit_acct = FALSE;
1865 new_entry->vme_resilient_codesign = FALSE;
1866 new_entry->vme_resilient_media = FALSE;
1867 if (vmk_flags.vmkf_atomic_entry)
1868 new_entry->vme_atomic = TRUE;
1869 else
1870 new_entry->vme_atomic = FALSE;
1871
1872 VME_ALIAS_SET(new_entry, tag);
1873
1874 /*
1875 * Insert the new entry into the list
1876 */
1877
1878 vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
1879
1880 map->size += size;
1881
1882 /*
1883 * Update the lookup hint
1884 */
1885 SAVE_HINT_MAP_WRITE(map, new_entry);
1886
1887 *o_entry = new_entry;
1888 return(KERN_SUCCESS);
1889}
1890
1891int vm_map_pmap_enter_print = FALSE;
1892int vm_map_pmap_enter_enable = FALSE;
1893
1894/*
1895 * Routine: vm_map_pmap_enter [internal only]
1896 *
1897 * Description:
1898 * Force pages from the specified object to be entered into
1899 * the pmap at the specified address if they are present.
1900 * As soon as a page not found in the object the scan ends.
1901 *
1902 * Returns:
1903 * Nothing.
1904 *
1905 * In/out conditions:
1906 * The source map should not be locked on entry.
1907 */
1908__unused static void
1909vm_map_pmap_enter(
1910 vm_map_t map,
1911 vm_map_offset_t addr,
1912 vm_map_offset_t end_addr,
1913 vm_object_t object,
1914 vm_object_offset_t offset,
1915 vm_prot_t protection)
1916{
1917 int type_of_fault;
1918 kern_return_t kr;
1919 struct vm_object_fault_info fault_info = {};
1920
1921 if(map->pmap == 0)
1922 return;
1923
1924 while (addr < end_addr) {
1925 vm_page_t m;
1926
1927
1928 /*
1929 * TODO:
1930 * From vm_map_enter(), we come into this function without the map
1931 * lock held or the object lock held.
1932 * We haven't taken a reference on the object either.
1933 * We should do a proper lookup on the map to make sure
1934 * that things are sane before we go locking objects that
1935 * could have been deallocated from under us.
1936 */
1937
1938 vm_object_lock(object);
1939
1940 m = vm_page_lookup(object, offset);
1941
1942 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
1943 (m->vmp_unusual && ( m->vmp_error || m->vmp_restart || m->vmp_absent))) {
1944 vm_object_unlock(object);
1945 return;
1946 }
1947
1948 if (vm_map_pmap_enter_print) {
1949 printf("vm_map_pmap_enter:");
1950 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1951 map, (unsigned long long)addr, object, (unsigned long long)offset);
1952 }
1953 type_of_fault = DBG_CACHE_HIT_FAULT;
1954 kr = vm_fault_enter(m, map->pmap,
1955 addr, protection, protection,
1956 VM_PAGE_WIRED(m),
1957 FALSE, /* change_wiring */
1958 VM_KERN_MEMORY_NONE, /* tag - not wiring */
1959 &fault_info,
1960 NULL, /* need_retry */
1961 &type_of_fault);
1962
1963 vm_object_unlock(object);
1964
1965 offset += PAGE_SIZE_64;
1966 addr += PAGE_SIZE;
1967 }
1968}
1969
1970boolean_t vm_map_pmap_is_empty(
1971 vm_map_t map,
1972 vm_map_offset_t start,
1973 vm_map_offset_t end);
1974boolean_t vm_map_pmap_is_empty(
1975 vm_map_t map,
1976 vm_map_offset_t start,
1977 vm_map_offset_t end)
1978{
1979#ifdef MACHINE_PMAP_IS_EMPTY
1980 return pmap_is_empty(map->pmap, start, end);
1981#else /* MACHINE_PMAP_IS_EMPTY */
1982 vm_map_offset_t offset;
1983 ppnum_t phys_page;
1984
1985 if (map->pmap == NULL) {
1986 return TRUE;
1987 }
1988
1989 for (offset = start;
1990 offset < end;
1991 offset += PAGE_SIZE) {
1992 phys_page = pmap_find_phys(map->pmap, offset);
1993 if (phys_page) {
1994 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1995 "page %d at 0x%llx\n",
1996 map, (long long)start, (long long)end,
1997 phys_page, (long long)offset);
1998 return FALSE;
1999 }
2000 }
2001 return TRUE;
2002#endif /* MACHINE_PMAP_IS_EMPTY */
2003}
2004
2005#define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2006kern_return_t
2007vm_map_random_address_for_size(
2008 vm_map_t map,
2009 vm_map_offset_t *address,
2010 vm_map_size_t size)
2011{
2012 kern_return_t kr = KERN_SUCCESS;
2013 int tries = 0;
2014 vm_map_offset_t random_addr = 0;
2015 vm_map_offset_t hole_end;
2016
2017 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
2018 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
2019 vm_map_size_t vm_hole_size = 0;
2020 vm_map_size_t addr_space_size;
2021
2022 addr_space_size = vm_map_max(map) - vm_map_min(map);
2023
2024 assert(page_aligned(size));
2025
2026 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2027 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
2028 random_addr = vm_map_trunc_page(
2029 vm_map_min(map) +(random_addr % addr_space_size),
2030 VM_MAP_PAGE_MASK(map));
2031
2032 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2033 if (prev_entry == vm_map_to_entry(map)) {
2034 next_entry = vm_map_first_entry(map);
2035 } else {
2036 next_entry = prev_entry->vme_next;
2037 }
2038 if (next_entry == vm_map_to_entry(map)) {
2039 hole_end = vm_map_max(map);
2040 } else {
2041 hole_end = next_entry->vme_start;
2042 }
2043 vm_hole_size = hole_end - random_addr;
2044 if (vm_hole_size >= size) {
2045 *address = random_addr;
2046 break;
2047 }
2048 }
2049 tries++;
2050 }
2051
2052 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2053 kr = KERN_NO_SPACE;
2054 }
2055 return kr;
2056}
2057
2058static boolean_t
2059vm_memory_malloc_no_cow(
2060 int alias)
2061{
2062 uint64_t alias_mask;
2063
2064 alias_mask = 1ULL << alias;
2065 if (alias_mask & vm_memory_malloc_no_cow_mask) {
2066 return TRUE;
2067 }
2068 return FALSE;
2069}
2070
2071/*
2072 * Routine: vm_map_enter
2073 *
2074 * Description:
2075 * Allocate a range in the specified virtual address map.
2076 * The resulting range will refer to memory defined by
2077 * the given memory object and offset into that object.
2078 *
2079 * Arguments are as defined in the vm_map call.
2080 */
2081int _map_enter_debug = 0;
2082static unsigned int vm_map_enter_restore_successes = 0;
2083static unsigned int vm_map_enter_restore_failures = 0;
2084kern_return_t
2085vm_map_enter(
2086 vm_map_t map,
2087 vm_map_offset_t *address, /* IN/OUT */
2088 vm_map_size_t size,
2089 vm_map_offset_t mask,
2090 int flags,
2091 vm_map_kernel_flags_t vmk_flags,
2092 vm_tag_t alias,
2093 vm_object_t object,
2094 vm_object_offset_t offset,
2095 boolean_t needs_copy,
2096 vm_prot_t cur_protection,
2097 vm_prot_t max_protection,
2098 vm_inherit_t inheritance)
2099{
2100 vm_map_entry_t entry, new_entry;
2101 vm_map_offset_t start, tmp_start, tmp_offset;
2102 vm_map_offset_t end, tmp_end;
2103 vm_map_offset_t tmp2_start, tmp2_end;
2104 vm_map_offset_t desired_empty_end;
2105 vm_map_offset_t step;
2106 kern_return_t result = KERN_SUCCESS;
2107 vm_map_t zap_old_map = VM_MAP_NULL;
2108 vm_map_t zap_new_map = VM_MAP_NULL;
2109 boolean_t map_locked = FALSE;
2110 boolean_t pmap_empty = TRUE;
2111 boolean_t new_mapping_established = FALSE;
2112 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2113 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2114 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2115 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2116 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2117 boolean_t is_submap = vmk_flags.vmkf_submap;
2118 boolean_t permanent = vmk_flags.vmkf_permanent;
2119 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
2120 boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
2121 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2122 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2123 boolean_t random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2124 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2125 vm_tag_t user_alias;
2126 vm_map_offset_t effective_min_offset, effective_max_offset;
2127 kern_return_t kr;
2128 boolean_t clear_map_aligned = FALSE;
2129 vm_map_entry_t hole_entry;
2130 vm_map_size_t chunk_size = 0;
2131
2132 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2133
2134 if (flags & VM_FLAGS_4GB_CHUNK) {
2135#if defined(__LP64__)
2136 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2137#else /* __LP64__ */
2138 chunk_size = ANON_CHUNK_SIZE;
2139#endif /* __LP64__ */
2140 } else {
2141 chunk_size = ANON_CHUNK_SIZE;
2142 }
2143
2144 if (superpage_size) {
2145 switch (superpage_size) {
2146 /*
2147 * Note that the current implementation only supports
2148 * a single size for superpages, SUPERPAGE_SIZE, per
2149 * architecture. As soon as more sizes are supposed
2150 * to be supported, SUPERPAGE_SIZE has to be replaced
2151 * with a lookup of the size depending on superpage_size.
2152 */
2153#ifdef __x86_64__
2154 case SUPERPAGE_SIZE_ANY:
2155 /* handle it like 2 MB and round up to page size */
2156 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
2157 case SUPERPAGE_SIZE_2MB:
2158 break;
2159#endif
2160 default:
2161 return KERN_INVALID_ARGUMENT;
2162 }
2163 mask = SUPERPAGE_SIZE-1;
2164 if (size & (SUPERPAGE_SIZE-1))
2165 return KERN_INVALID_ARGUMENT;
2166 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
2167 }
2168
2169
2170 if ((cur_protection & VM_PROT_WRITE) &&
2171 (cur_protection & VM_PROT_EXECUTE) &&
2172#if !CONFIG_EMBEDDED
2173 map != kernel_map &&
2174 (cs_process_global_enforcement() ||
2175 (vmk_flags.vmkf_cs_enforcement_override
2176 ? vmk_flags.vmkf_cs_enforcement
2177 : cs_process_enforcement(NULL))) &&
2178#endif /* !CONFIG_EMBEDDED */
2179 !entry_for_jit) {
2180 DTRACE_VM3(cs_wx,
2181 uint64_t, 0,
2182 uint64_t, 0,
2183 vm_prot_t, cur_protection);
2184 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
2185#if VM_PROTECT_WX_FAIL
2186 "failing\n",
2187#else /* VM_PROTECT_WX_FAIL */
2188 "turning off execute\n",
2189#endif /* VM_PROTECT_WX_FAIL */
2190 proc_selfpid(),
2191 (current_task()->bsd_info
2192 ? proc_name_address(current_task()->bsd_info)
2193 : "?"),
2194 __FUNCTION__);
2195 cur_protection &= ~VM_PROT_EXECUTE;
2196#if VM_PROTECT_WX_FAIL
2197 return KERN_PROTECTION_FAILURE;
2198#endif /* VM_PROTECT_WX_FAIL */
2199 }
2200
2201 /*
2202 * If the task has requested executable lockdown,
2203 * deny any new executable mapping.
2204 */
2205 if (map->map_disallow_new_exec == TRUE) {
2206 if (cur_protection & VM_PROT_EXECUTE) {
2207 return KERN_PROTECTION_FAILURE;
2208 }
2209 }
2210
2211 if (resilient_codesign || resilient_media) {
2212 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
2213 (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2214 return KERN_PROTECTION_FAILURE;
2215 }
2216 }
2217
2218 if (is_submap) {
2219 if (purgable) {
2220 /* submaps can not be purgeable */
2221 return KERN_INVALID_ARGUMENT;
2222 }
2223 if (object == VM_OBJECT_NULL) {
2224 /* submaps can not be created lazily */
2225 return KERN_INVALID_ARGUMENT;
2226 }
2227 }
2228 if (vmk_flags.vmkf_already) {
2229 /*
2230 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2231 * is already present. For it to be meaningul, the requested
2232 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2233 * we shouldn't try and remove what was mapped there first
2234 * (!VM_FLAGS_OVERWRITE).
2235 */
2236 if ((flags & VM_FLAGS_ANYWHERE) ||
2237 (flags & VM_FLAGS_OVERWRITE)) {
2238 return KERN_INVALID_ARGUMENT;
2239 }
2240 }
2241
2242 effective_min_offset = map->min_offset;
2243
2244 if (vmk_flags.vmkf_beyond_max) {
2245 /*
2246 * Allow an insertion beyond the map's max offset.
2247 */
2248#if !defined(__arm__) && !defined(__arm64__)
2249 if (vm_map_is_64bit(map))
2250 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2251 else
2252#endif /* __arm__ */
2253 effective_max_offset = 0x00000000FFFFF000ULL;
2254 } else {
2255 effective_max_offset = map->max_offset;
2256 }
2257
2258 if (size == 0 ||
2259 (offset & PAGE_MASK_64) != 0) {
2260 *address = 0;
2261 return KERN_INVALID_ARGUMENT;
2262 }
2263
2264 if (map->pmap == kernel_pmap) {
2265 user_alias = VM_KERN_MEMORY_NONE;
2266 } else {
2267 user_alias = alias;
2268 }
2269
2270#define RETURN(value) { result = value; goto BailOut; }
2271
2272 assert(page_aligned(*address));
2273 assert(page_aligned(size));
2274
2275 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2276 /*
2277 * In most cases, the caller rounds the size up to the
2278 * map's page size.
2279 * If we get a size that is explicitly not map-aligned here,
2280 * we'll have to respect the caller's wish and mark the
2281 * mapping as "not map-aligned" to avoid tripping the
2282 * map alignment checks later.
2283 */
2284 clear_map_aligned = TRUE;
2285 }
2286 if (!anywhere &&
2287 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2288 /*
2289 * We've been asked to map at a fixed address and that
2290 * address is not aligned to the map's specific alignment.
2291 * The caller should know what it's doing (i.e. most likely
2292 * mapping some fragmented copy map, transferring memory from
2293 * a VM map with a different alignment), so clear map_aligned
2294 * for this new VM map entry and proceed.
2295 */
2296 clear_map_aligned = TRUE;
2297 }
2298
2299 /*
2300 * Only zero-fill objects are allowed to be purgable.
2301 * LP64todo - limit purgable objects to 32-bits for now
2302 */
2303 if (purgable &&
2304 (offset != 0 ||
2305 (object != VM_OBJECT_NULL &&
2306 (object->vo_size != size ||
2307 object->purgable == VM_PURGABLE_DENY))
2308 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
2309 return KERN_INVALID_ARGUMENT;
2310
2311 if (!anywhere && overwrite) {
2312 /*
2313 * Create a temporary VM map to hold the old mappings in the
2314 * affected area while we create the new one.
2315 * This avoids releasing the VM map lock in
2316 * vm_map_entry_delete() and allows atomicity
2317 * when we want to replace some mappings with a new one.
2318 * It also allows us to restore the old VM mappings if the
2319 * new mapping fails.
2320 */
2321 zap_old_map = vm_map_create(PMAP_NULL,
2322 *address,
2323 *address + size,
2324 map->hdr.entries_pageable);
2325 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2326 vm_map_disable_hole_optimization(zap_old_map);
2327 }
2328
2329StartAgain: ;
2330
2331 start = *address;
2332
2333 if (anywhere) {
2334 vm_map_lock(map);
2335 map_locked = TRUE;
2336
2337 if (entry_for_jit) {
2338#if CONFIG_EMBEDDED
2339 if (map->jit_entry_exists) {
2340 result = KERN_INVALID_ARGUMENT;
2341 goto BailOut;
2342 }
2343 random_address = TRUE;
2344#endif /* CONFIG_EMBEDDED */
2345 }
2346
2347 if (random_address) {
2348 /*
2349 * Get a random start address.
2350 */
2351 result = vm_map_random_address_for_size(map, address, size);
2352 if (result != KERN_SUCCESS) {
2353 goto BailOut;
2354 }
2355 start = *address;
2356 }
2357#if __x86_64__
2358 else if ((start == 0 || start == vm_map_min(map)) &&
2359 !map->disable_vmentry_reuse &&
2360 map->vmmap_high_start != 0) {
2361 start = map->vmmap_high_start;
2362 }
2363#endif /* __x86_64__ */
2364
2365
2366 /*
2367 * Calculate the first possible address.
2368 */
2369
2370 if (start < effective_min_offset)
2371 start = effective_min_offset;
2372 if (start > effective_max_offset)
2373 RETURN(KERN_NO_SPACE);
2374
2375 /*
2376 * Look for the first possible address;
2377 * if there's already something at this
2378 * address, we have to start after it.
2379 */
2380
2381 if( map->disable_vmentry_reuse == TRUE) {
2382 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2383 } else {
2384
2385 if (map->holelistenabled) {
2386 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
2387
2388 if (hole_entry == NULL) {
2389 /*
2390 * No more space in the map?
2391 */
2392 result = KERN_NO_SPACE;
2393 goto BailOut;
2394 } else {
2395
2396 boolean_t found_hole = FALSE;
2397
2398 do {
2399 if (hole_entry->vme_start >= start) {
2400 start = hole_entry->vme_start;
2401 found_hole = TRUE;
2402 break;
2403 }
2404
2405 if (hole_entry->vme_end > start) {
2406 found_hole = TRUE;
2407 break;
2408 }
2409 hole_entry = hole_entry->vme_next;
2410
2411 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
2412
2413 if (found_hole == FALSE) {
2414 result = KERN_NO_SPACE;
2415 goto BailOut;
2416 }
2417
2418 entry = hole_entry;
2419
2420 if (start == 0)
2421 start += PAGE_SIZE_64;
2422 }
2423 } else {
2424 assert(first_free_is_valid(map));
2425
2426 entry = map->first_free;
2427
2428 if (entry == vm_map_to_entry(map)) {
2429 entry = NULL;
2430 } else {
2431 if (entry->vme_next == vm_map_to_entry(map)){
2432 /*
2433 * Hole at the end of the map.
2434 */
2435 entry = NULL;
2436 } else {
2437 if (start < (entry->vme_next)->vme_start ) {
2438 start = entry->vme_end;
2439 start = vm_map_round_page(start,
2440 VM_MAP_PAGE_MASK(map));
2441 } else {
2442 /*
2443 * Need to do a lookup.
2444 */
2445 entry = NULL;
2446 }
2447 }
2448 }
2449
2450 if (entry == NULL) {
2451 vm_map_entry_t tmp_entry;
2452 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2453 assert(!entry_for_jit);
2454 start = tmp_entry->vme_end;
2455 start = vm_map_round_page(start,
2456 VM_MAP_PAGE_MASK(map));
2457 }
2458 entry = tmp_entry;
2459 }
2460 }
2461 }
2462
2463 /*
2464 * In any case, the "entry" always precedes
2465 * the proposed new region throughout the
2466 * loop:
2467 */
2468
2469 while (TRUE) {
2470 vm_map_entry_t next;
2471
2472 /*
2473 * Find the end of the proposed new region.
2474 * Be sure we didn't go beyond the end, or
2475 * wrap around the address.
2476 */
2477
2478 end = ((start + mask) & ~mask);
2479 end = vm_map_round_page(end,
2480 VM_MAP_PAGE_MASK(map));
2481 if (end < start)
2482 RETURN(KERN_NO_SPACE);
2483 start = end;
2484 assert(VM_MAP_PAGE_ALIGNED(start,
2485 VM_MAP_PAGE_MASK(map)));
2486 end += size;
2487
2488 /* We want an entire page of empty space, but don't increase the allocation size. */
2489 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2490
2491 if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
2492 if (map->wait_for_space) {
2493 assert(!keep_map_locked);
2494 if (size <= (effective_max_offset -
2495 effective_min_offset)) {
2496 assert_wait((event_t)map,
2497 THREAD_ABORTSAFE);
2498 vm_map_unlock(map);
2499 map_locked = FALSE;
2500 thread_block(THREAD_CONTINUE_NULL);
2501 goto StartAgain;
2502 }
2503 }
2504 RETURN(KERN_NO_SPACE);
2505 }
2506
2507 next = entry->vme_next;
2508
2509 if (map->holelistenabled) {
2510 if (entry->vme_end >= desired_empty_end)
2511 break;
2512 } else {
2513 /*
2514 * If there are no more entries, we must win.
2515 *
2516 * OR
2517 *
2518 * If there is another entry, it must be
2519 * after the end of the potential new region.
2520 */
2521
2522 if (next == vm_map_to_entry(map))
2523 break;
2524
2525 if (next->vme_start >= desired_empty_end)
2526 break;
2527 }
2528
2529 /*
2530 * Didn't fit -- move to the next entry.
2531 */
2532
2533 entry = next;
2534
2535 if (map->holelistenabled) {
2536 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2537 /*
2538 * Wrapped around
2539 */
2540 result = KERN_NO_SPACE;
2541 goto BailOut;
2542 }
2543 start = entry->vme_start;
2544 } else {
2545 start = entry->vme_end;
2546 }
2547
2548 start = vm_map_round_page(start,
2549 VM_MAP_PAGE_MASK(map));
2550 }
2551
2552 if (map->holelistenabled) {
2553 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2554 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2555 }
2556 }
2557
2558 *address = start;
2559 assert(VM_MAP_PAGE_ALIGNED(*address,
2560 VM_MAP_PAGE_MASK(map)));
2561 } else {
2562 /*
2563 * Verify that:
2564 * the address doesn't itself violate
2565 * the mask requirement.
2566 */
2567
2568 vm_map_lock(map);
2569 map_locked = TRUE;
2570 if ((start & mask) != 0)
2571 RETURN(KERN_NO_SPACE);
2572
2573 /*
2574 * ... the address is within bounds
2575 */
2576
2577 end = start + size;
2578
2579 if ((start < effective_min_offset) ||
2580 (end > effective_max_offset) ||
2581 (start >= end)) {
2582 RETURN(KERN_INVALID_ADDRESS);
2583 }
2584
2585 if (overwrite && zap_old_map != VM_MAP_NULL) {
2586 int remove_flags;
2587 /*
2588 * Fixed mapping and "overwrite" flag: attempt to
2589 * remove all existing mappings in the specified
2590 * address range, saving them in our "zap_old_map".
2591 */
2592 remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2593 remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2594 if (vmk_flags.vmkf_overwrite_immutable) {
2595 /* we can overwrite immutable mappings */
2596 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2597 }
2598 (void) vm_map_delete(map, start, end,
2599 remove_flags,
2600 zap_old_map);
2601 }
2602
2603 /*
2604 * ... the starting address isn't allocated
2605 */
2606
2607 if (vm_map_lookup_entry(map, start, &entry)) {
2608 if (! (vmk_flags.vmkf_already)) {
2609 RETURN(KERN_NO_SPACE);
2610 }
2611 /*
2612 * Check if what's already there is what we want.
2613 */
2614 tmp_start = start;
2615 tmp_offset = offset;
2616 if (entry->vme_start < start) {
2617 tmp_start -= start - entry->vme_start;
2618 tmp_offset -= start - entry->vme_start;
2619
2620 }
2621 for (; entry->vme_start < end;
2622 entry = entry->vme_next) {
2623 /*
2624 * Check if the mapping's attributes
2625 * match the existing map entry.
2626 */
2627 if (entry == vm_map_to_entry(map) ||
2628 entry->vme_start != tmp_start ||
2629 entry->is_sub_map != is_submap ||
2630 VME_OFFSET(entry) != tmp_offset ||
2631 entry->needs_copy != needs_copy ||
2632 entry->protection != cur_protection ||
2633 entry->max_protection != max_protection ||
2634 entry->inheritance != inheritance ||
2635 entry->iokit_acct != iokit_acct ||
2636 VME_ALIAS(entry) != alias) {
2637 /* not the same mapping ! */
2638 RETURN(KERN_NO_SPACE);
2639 }
2640 /*
2641 * Check if the same object is being mapped.
2642 */
2643 if (is_submap) {
2644 if (VME_SUBMAP(entry) !=
2645 (vm_map_t) object) {
2646 /* not the same submap */
2647 RETURN(KERN_NO_SPACE);
2648 }
2649 } else {
2650 if (VME_OBJECT(entry) != object) {
2651 /* not the same VM object... */
2652 vm_object_t obj2;
2653
2654 obj2 = VME_OBJECT(entry);
2655 if ((obj2 == VM_OBJECT_NULL ||
2656 obj2->internal) &&
2657 (object == VM_OBJECT_NULL ||
2658 object->internal)) {
2659 /*
2660 * ... but both are
2661 * anonymous memory,
2662 * so equivalent.
2663 */
2664 } else {
2665 RETURN(KERN_NO_SPACE);
2666 }
2667 }
2668 }
2669
2670 tmp_offset += entry->vme_end - entry->vme_start;
2671 tmp_start += entry->vme_end - entry->vme_start;
2672 if (entry->vme_end >= end) {
2673 /* reached the end of our mapping */
2674 break;
2675 }
2676 }
2677 /* it all matches: let's use what's already there ! */
2678 RETURN(KERN_MEMORY_PRESENT);
2679 }
2680
2681 /*
2682 * ... the next region doesn't overlap the
2683 * end point.
2684 */
2685
2686 if ((entry->vme_next != vm_map_to_entry(map)) &&
2687 (entry->vme_next->vme_start < end))
2688 RETURN(KERN_NO_SPACE);
2689 }
2690
2691 /*
2692 * At this point,
2693 * "start" and "end" should define the endpoints of the
2694 * available new range, and
2695 * "entry" should refer to the region before the new
2696 * range, and
2697 *
2698 * the map should be locked.
2699 */
2700
2701 /*
2702 * See whether we can avoid creating a new entry (and object) by
2703 * extending one of our neighbors. [So far, we only attempt to
2704 * extend from below.] Note that we can never extend/join
2705 * purgable objects because they need to remain distinct
2706 * entities in order to implement their "volatile object"
2707 * semantics.
2708 */
2709
2710 if (purgable ||
2711 entry_for_jit ||
2712 vm_memory_malloc_no_cow(user_alias)) {
2713 if (object == VM_OBJECT_NULL) {
2714
2715 object = vm_object_allocate(size);
2716 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2717 object->true_share = FALSE;
2718 if (purgable) {
2719 task_t owner;
2720 object->purgable = VM_PURGABLE_NONVOLATILE;
2721 if (map->pmap == kernel_pmap) {
2722 /*
2723 * Purgeable mappings made in a kernel
2724 * map are "owned" by the kernel itself
2725 * rather than the current user task
2726 * because they're likely to be used by
2727 * more than this user task (see
2728 * execargs_purgeable_allocate(), for
2729 * example).
2730 */
2731 owner = kernel_task;
2732 } else {
2733 owner = current_task();
2734 }
2735 assert(object->vo_owner == NULL);
2736 assert(object->resident_page_count == 0);
2737 assert(object->wired_page_count == 0);
2738 vm_object_lock(object);
2739 vm_purgeable_nonvolatile_enqueue(object, owner);
2740 vm_object_unlock(object);
2741 }
2742 offset = (vm_object_offset_t)0;
2743 }
2744 } else if ((is_submap == FALSE) &&
2745 (object == VM_OBJECT_NULL) &&
2746 (entry != vm_map_to_entry(map)) &&
2747 (entry->vme_end == start) &&
2748 (!entry->is_shared) &&
2749 (!entry->is_sub_map) &&
2750 (!entry->in_transition) &&
2751 (!entry->needs_wakeup) &&
2752 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2753 (entry->protection == cur_protection) &&
2754 (entry->max_protection == max_protection) &&
2755 (entry->inheritance == inheritance) &&
2756 ((user_alias == VM_MEMORY_REALLOC) ||
2757 (VME_ALIAS(entry) == alias)) &&
2758 (entry->no_cache == no_cache) &&
2759 (entry->permanent == permanent) &&
2760 /* no coalescing for immutable executable mappings */
2761 !((entry->protection & VM_PROT_EXECUTE) &&
2762 entry->permanent) &&
2763 (!entry->superpage_size && !superpage_size) &&
2764 /*
2765 * No coalescing if not map-aligned, to avoid propagating
2766 * that condition any further than needed:
2767 */
2768 (!entry->map_aligned || !clear_map_aligned) &&
2769 (!entry->zero_wired_pages) &&
2770 (!entry->used_for_jit && !entry_for_jit) &&
2771 (!entry->pmap_cs_associated) &&
2772 (entry->iokit_acct == iokit_acct) &&
2773 (!entry->vme_resilient_codesign) &&
2774 (!entry->vme_resilient_media) &&
2775 (!entry->vme_atomic) &&
2776
2777 ((entry->vme_end - entry->vme_start) + size <=
2778 (user_alias == VM_MEMORY_REALLOC ?
2779 ANON_CHUNK_SIZE :
2780 NO_COALESCE_LIMIT)) &&
2781
2782 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2783 if (vm_object_coalesce(VME_OBJECT(entry),
2784 VM_OBJECT_NULL,
2785 VME_OFFSET(entry),
2786 (vm_object_offset_t) 0,
2787 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2788 (vm_map_size_t)(end - entry->vme_end))) {
2789
2790 /*
2791 * Coalesced the two objects - can extend
2792 * the previous map entry to include the
2793 * new range.
2794 */
2795 map->size += (end - entry->vme_end);
2796 assert(entry->vme_start < end);
2797 assert(VM_MAP_PAGE_ALIGNED(end,
2798 VM_MAP_PAGE_MASK(map)));
2799 if (__improbable(vm_debug_events))
2800 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2801 entry->vme_end = end;
2802 if (map->holelistenabled) {
2803 vm_map_store_update_first_free(map, entry, TRUE);
2804 } else {
2805 vm_map_store_update_first_free(map, map->first_free, TRUE);
2806 }
2807 new_mapping_established = TRUE;
2808 RETURN(KERN_SUCCESS);
2809 }
2810 }
2811
2812 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2813 new_entry = NULL;
2814
2815 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2816 tmp2_end = tmp2_start + step;
2817 /*
2818 * Create a new entry
2819 *
2820 * XXX FBDP
2821 * The reserved "page zero" in each process's address space can
2822 * be arbitrarily large. Splitting it into separate objects and
2823 * therefore different VM map entries serves no purpose and just
2824 * slows down operations on the VM map, so let's not split the
2825 * allocation into chunks if the max protection is NONE. That
2826 * memory should never be accessible, so it will never get to the
2827 * default pager.
2828 */
2829 tmp_start = tmp2_start;
2830 if (object == VM_OBJECT_NULL &&
2831 size > chunk_size &&
2832 max_protection != VM_PROT_NONE &&
2833 superpage_size == 0)
2834 tmp_end = tmp_start + chunk_size;
2835 else
2836 tmp_end = tmp2_end;
2837 do {
2838 new_entry = vm_map_entry_insert(
2839 map, entry, tmp_start, tmp_end,
2840 object, offset, needs_copy,
2841 FALSE, FALSE,
2842 cur_protection, max_protection,
2843 VM_BEHAVIOR_DEFAULT,
2844 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2845 0,
2846 no_cache,
2847 permanent,
2848 superpage_size,
2849 clear_map_aligned,
2850 is_submap,
2851 entry_for_jit,
2852 alias);
2853
2854 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2855
2856 if (resilient_codesign &&
2857 ! ((cur_protection | max_protection) &
2858 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2859 new_entry->vme_resilient_codesign = TRUE;
2860 }
2861
2862 if (resilient_media &&
2863 ! ((cur_protection | max_protection) &
2864 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2865 new_entry->vme_resilient_media = TRUE;
2866 }
2867
2868 assert(!new_entry->iokit_acct);
2869 if (!is_submap &&
2870 object != VM_OBJECT_NULL &&
2871 (object->purgable != VM_PURGABLE_DENY ||
2872 object->vo_ledger_tag)) {
2873 assert(new_entry->use_pmap);
2874 assert(!new_entry->iokit_acct);
2875 /*
2876 * Turn off pmap accounting since
2877 * purgeable (or tagged) objects have their
2878 * own ledgers.
2879 */
2880 new_entry->use_pmap = FALSE;
2881 } else if (!is_submap &&
2882 iokit_acct &&
2883 object != VM_OBJECT_NULL &&
2884 object->internal) {
2885 /* alternate accounting */
2886 assert(!new_entry->iokit_acct);
2887 assert(new_entry->use_pmap);
2888 new_entry->iokit_acct = TRUE;
2889 new_entry->use_pmap = FALSE;
2890 DTRACE_VM4(
2891 vm_map_iokit_mapped_region,
2892 vm_map_t, map,
2893 vm_map_offset_t, new_entry->vme_start,
2894 vm_map_offset_t, new_entry->vme_end,
2895 int, VME_ALIAS(new_entry));
2896 vm_map_iokit_mapped_region(
2897 map,
2898 (new_entry->vme_end -
2899 new_entry->vme_start));
2900 } else if (!is_submap) {
2901 assert(!new_entry->iokit_acct);
2902 assert(new_entry->use_pmap);
2903 }
2904
2905 if (is_submap) {
2906 vm_map_t submap;
2907 boolean_t submap_is_64bit;
2908 boolean_t use_pmap;
2909
2910 assert(new_entry->is_sub_map);
2911 assert(!new_entry->use_pmap);
2912 assert(!new_entry->iokit_acct);
2913 submap = (vm_map_t) object;
2914 submap_is_64bit = vm_map_is_64bit(submap);
2915 use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
2916#ifndef NO_NESTED_PMAP
2917 if (use_pmap && submap->pmap == NULL) {
2918 ledger_t ledger = map->pmap->ledger;
2919 /* we need a sub pmap to nest... */
2920 submap->pmap = pmap_create(ledger, 0,
2921 submap_is_64bit);
2922 if (submap->pmap == NULL) {
2923 /* let's proceed without nesting... */
2924 }
2925#if defined(__arm__) || defined(__arm64__)
2926 else {
2927 pmap_set_nested(submap->pmap);
2928 }
2929#endif
2930 }
2931 if (use_pmap && submap->pmap != NULL) {
2932 kr = pmap_nest(map->pmap,
2933 submap->pmap,
2934 tmp_start,
2935 tmp_start,
2936 tmp_end - tmp_start);
2937 if (kr != KERN_SUCCESS) {
2938 printf("vm_map_enter: "
2939 "pmap_nest(0x%llx,0x%llx) "
2940 "error 0x%x\n",
2941 (long long)tmp_start,
2942 (long long)tmp_end,
2943 kr);
2944 } else {
2945 /* we're now nested ! */
2946 new_entry->use_pmap = TRUE;
2947 pmap_empty = FALSE;
2948 }
2949 }
2950#endif /* NO_NESTED_PMAP */
2951 }
2952 entry = new_entry;
2953
2954 if (superpage_size) {
2955 vm_page_t pages, m;
2956 vm_object_t sp_object;
2957 vm_object_offset_t sp_offset;
2958
2959 VME_OFFSET_SET(entry, 0);
2960
2961 /* allocate one superpage */
2962 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2963 if (kr != KERN_SUCCESS) {
2964 /* deallocate whole range... */
2965 new_mapping_established = TRUE;
2966 /* ... but only up to "tmp_end" */
2967 size -= end - tmp_end;
2968 RETURN(kr);
2969 }
2970
2971 /* create one vm_object per superpage */
2972 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2973 sp_object->phys_contiguous = TRUE;
2974 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages)*PAGE_SIZE;
2975 VME_OBJECT_SET(entry, sp_object);
2976 assert(entry->use_pmap);
2977
2978 /* enter the base pages into the object */
2979 vm_object_lock(sp_object);
2980 for (sp_offset = 0;
2981 sp_offset < SUPERPAGE_SIZE;
2982 sp_offset += PAGE_SIZE) {
2983 m = pages;
2984 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
2985 pages = NEXT_PAGE(m);
2986 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2987 vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
2988 }
2989 vm_object_unlock(sp_object);
2990 }
2991 } while (tmp_end != tmp2_end &&
2992 (tmp_start = tmp_end) &&
2993 (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
2994 tmp_end + chunk_size : tmp2_end));
2995 }
2996
2997 new_mapping_established = TRUE;
2998
2999BailOut:
3000 assert(map_locked == TRUE);
3001
3002 if (result == KERN_SUCCESS) {
3003 vm_prot_t pager_prot;
3004 memory_object_t pager;
3005
3006#if DEBUG
3007 if (pmap_empty &&
3008 !(vmk_flags.vmkf_no_pmap_check)) {
3009 assert(vm_map_pmap_is_empty(map,
3010 *address,
3011 *address+size));
3012 }
3013#endif /* DEBUG */
3014
3015 /*
3016 * For "named" VM objects, let the pager know that the
3017 * memory object is being mapped. Some pagers need to keep
3018 * track of this, to know when they can reclaim the memory
3019 * object, for example.
3020 * VM calls memory_object_map() for each mapping (specifying
3021 * the protection of each mapping) and calls
3022 * memory_object_last_unmap() when all the mappings are gone.
3023 */
3024 pager_prot = max_protection;
3025 if (needs_copy) {
3026 /*
3027 * Copy-On-Write mapping: won't modify
3028 * the memory object.
3029 */
3030 pager_prot &= ~VM_PROT_WRITE;
3031 }
3032 if (!is_submap &&
3033 object != VM_OBJECT_NULL &&
3034 object->named &&
3035 object->pager != MEMORY_OBJECT_NULL) {
3036 vm_object_lock(object);
3037 pager = object->pager;
3038 if (object->named &&
3039 pager != MEMORY_OBJECT_NULL) {
3040 assert(object->pager_ready);
3041 vm_object_mapping_wait(object, THREAD_UNINT);
3042 vm_object_mapping_begin(object);
3043 vm_object_unlock(object);
3044
3045 kr = memory_object_map(pager, pager_prot);
3046 assert(kr == KERN_SUCCESS);
3047
3048 vm_object_lock(object);
3049 vm_object_mapping_end(object);
3050 }
3051 vm_object_unlock(object);
3052 }
3053 }
3054
3055 assert(map_locked == TRUE);
3056
3057 if (!keep_map_locked) {
3058 vm_map_unlock(map);
3059 map_locked = FALSE;
3060 }
3061
3062 /*
3063 * We can't hold the map lock if we enter this block.
3064 */
3065
3066 if (result == KERN_SUCCESS) {
3067
3068 /* Wire down the new entry if the user
3069 * requested all new map entries be wired.
3070 */
3071 if ((map->wiring_required)||(superpage_size)) {
3072 assert(!keep_map_locked);
3073 pmap_empty = FALSE; /* pmap won't be empty */
3074 kr = vm_map_wire_kernel(map, start, end,
3075 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3076 TRUE);
3077 result = kr;
3078 }
3079
3080 }
3081
3082 if (result != KERN_SUCCESS) {
3083 if (new_mapping_established) {
3084 /*
3085 * We have to get rid of the new mappings since we
3086 * won't make them available to the user.
3087 * Try and do that atomically, to minimize the risk
3088 * that someone else create new mappings that range.
3089 */
3090 zap_new_map = vm_map_create(PMAP_NULL,
3091 *address,
3092 *address + size,
3093 map->hdr.entries_pageable);
3094 vm_map_set_page_shift(zap_new_map,
3095 VM_MAP_PAGE_SHIFT(map));
3096 vm_map_disable_hole_optimization(zap_new_map);
3097
3098 if (!map_locked) {
3099 vm_map_lock(map);
3100 map_locked = TRUE;
3101 }
3102 (void) vm_map_delete(map, *address, *address+size,
3103 (VM_MAP_REMOVE_SAVE_ENTRIES |
3104 VM_MAP_REMOVE_NO_MAP_ALIGN),
3105 zap_new_map);
3106 }
3107 if (zap_old_map != VM_MAP_NULL &&
3108 zap_old_map->hdr.nentries != 0) {
3109 vm_map_entry_t entry1, entry2;
3110
3111 /*
3112 * The new mapping failed. Attempt to restore
3113 * the old mappings, saved in the "zap_old_map".
3114 */
3115 if (!map_locked) {
3116 vm_map_lock(map);
3117 map_locked = TRUE;
3118 }
3119
3120 /* first check if the coast is still clear */
3121 start = vm_map_first_entry(zap_old_map)->vme_start;
3122 end = vm_map_last_entry(zap_old_map)->vme_end;
3123 if (vm_map_lookup_entry(map, start, &entry1) ||
3124 vm_map_lookup_entry(map, end, &entry2) ||
3125 entry1 != entry2) {
3126 /*
3127 * Part of that range has already been
3128 * re-mapped: we can't restore the old
3129 * mappings...
3130 */
3131 vm_map_enter_restore_failures++;
3132 } else {
3133 /*
3134 * Transfer the saved map entries from
3135 * "zap_old_map" to the original "map",
3136 * inserting them all after "entry1".
3137 */
3138 for (entry2 = vm_map_first_entry(zap_old_map);
3139 entry2 != vm_map_to_entry(zap_old_map);
3140 entry2 = vm_map_first_entry(zap_old_map)) {
3141 vm_map_size_t entry_size;
3142
3143 entry_size = (entry2->vme_end -
3144 entry2->vme_start);
3145 vm_map_store_entry_unlink(zap_old_map,
3146 entry2);
3147 zap_old_map->size -= entry_size;
3148 vm_map_store_entry_link(map, entry1, entry2,
3149 VM_MAP_KERNEL_FLAGS_NONE);
3150 map->size += entry_size;
3151 entry1 = entry2;
3152 }
3153 if (map->wiring_required) {
3154 /*
3155 * XXX TODO: we should rewire the
3156 * old pages here...
3157 */
3158 }
3159 vm_map_enter_restore_successes++;
3160 }
3161 }
3162 }
3163
3164 /*
3165 * The caller is responsible for releasing the lock if it requested to
3166 * keep the map locked.
3167 */
3168 if (map_locked && !keep_map_locked) {
3169 vm_map_unlock(map);
3170 }
3171
3172 /*
3173 * Get rid of the "zap_maps" and all the map entries that
3174 * they may still contain.
3175 */
3176 if (zap_old_map != VM_MAP_NULL) {
3177 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3178 zap_old_map = VM_MAP_NULL;
3179 }
3180 if (zap_new_map != VM_MAP_NULL) {
3181 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3182 zap_new_map = VM_MAP_NULL;
3183 }
3184
3185 return result;
3186
3187#undef RETURN
3188}
3189
3190#if __arm64__
3191extern const struct memory_object_pager_ops fourk_pager_ops;
3192kern_return_t
3193vm_map_enter_fourk(
3194 vm_map_t map,
3195 vm_map_offset_t *address, /* IN/OUT */
3196 vm_map_size_t size,
3197 vm_map_offset_t mask,
3198 int flags,
3199 vm_map_kernel_flags_t vmk_flags,
3200 vm_tag_t alias,
3201 vm_object_t object,
3202 vm_object_offset_t offset,
3203 boolean_t needs_copy,
3204 vm_prot_t cur_protection,
3205 vm_prot_t max_protection,
3206 vm_inherit_t inheritance)
3207{
3208 vm_map_entry_t entry, new_entry;
3209 vm_map_offset_t start, fourk_start;
3210 vm_map_offset_t end, fourk_end;
3211 vm_map_size_t fourk_size;
3212 kern_return_t result = KERN_SUCCESS;
3213 vm_map_t zap_old_map = VM_MAP_NULL;
3214 vm_map_t zap_new_map = VM_MAP_NULL;
3215 boolean_t map_locked = FALSE;
3216 boolean_t pmap_empty = TRUE;
3217 boolean_t new_mapping_established = FALSE;
3218 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3219 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3220 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3221 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3222 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3223 boolean_t is_submap = vmk_flags.vmkf_submap;
3224 boolean_t permanent = vmk_flags.vmkf_permanent;
3225 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
3226// boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3227 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3228 vm_map_offset_t effective_min_offset, effective_max_offset;
3229 kern_return_t kr;
3230 boolean_t clear_map_aligned = FALSE;
3231 memory_object_t fourk_mem_obj;
3232 vm_object_t fourk_object;
3233 vm_map_offset_t fourk_pager_offset;
3234 int fourk_pager_index_start, fourk_pager_index_num;
3235 int cur_idx;
3236 boolean_t fourk_copy;
3237 vm_object_t copy_object;
3238 vm_object_offset_t copy_offset;
3239
3240 fourk_mem_obj = MEMORY_OBJECT_NULL;
3241 fourk_object = VM_OBJECT_NULL;
3242
3243 if (superpage_size) {
3244 return KERN_NOT_SUPPORTED;
3245 }
3246
3247 if ((cur_protection & VM_PROT_WRITE) &&
3248 (cur_protection & VM_PROT_EXECUTE) &&
3249#if !CONFIG_EMBEDDED
3250 map != kernel_map &&
3251 cs_process_enforcement(NULL) &&
3252#endif /* !CONFIG_EMBEDDED */
3253 !entry_for_jit) {
3254 DTRACE_VM3(cs_wx,
3255 uint64_t, 0,
3256 uint64_t, 0,
3257 vm_prot_t, cur_protection);
3258 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3259 "turning off execute\n",
3260 proc_selfpid(),
3261 (current_task()->bsd_info
3262 ? proc_name_address(current_task()->bsd_info)
3263 : "?"),
3264 __FUNCTION__);
3265 cur_protection &= ~VM_PROT_EXECUTE;
3266 }
3267
3268 /*
3269 * If the task has requested executable lockdown,
3270 * deny any new executable mapping.
3271 */
3272 if (map->map_disallow_new_exec == TRUE) {
3273 if (cur_protection & VM_PROT_EXECUTE) {
3274 return KERN_PROTECTION_FAILURE;
3275 }
3276 }
3277
3278 if (is_submap) {
3279 return KERN_NOT_SUPPORTED;
3280 }
3281 if (vmk_flags.vmkf_already) {
3282 return KERN_NOT_SUPPORTED;
3283 }
3284 if (purgable || entry_for_jit) {
3285 return KERN_NOT_SUPPORTED;
3286 }
3287
3288 effective_min_offset = map->min_offset;
3289
3290 if (vmk_flags.vmkf_beyond_max) {
3291 return KERN_NOT_SUPPORTED;
3292 } else {
3293 effective_max_offset = map->max_offset;
3294 }
3295
3296 if (size == 0 ||
3297 (offset & FOURK_PAGE_MASK) != 0) {
3298 *address = 0;
3299 return KERN_INVALID_ARGUMENT;
3300 }
3301
3302#define RETURN(value) { result = value; goto BailOut; }
3303
3304 assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3305 assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3306
3307 if (!anywhere && overwrite) {
3308 return KERN_NOT_SUPPORTED;
3309 }
3310 if (!anywhere && overwrite) {
3311 /*
3312 * Create a temporary VM map to hold the old mappings in the
3313 * affected area while we create the new one.
3314 * This avoids releasing the VM map lock in
3315 * vm_map_entry_delete() and allows atomicity
3316 * when we want to replace some mappings with a new one.
3317 * It also allows us to restore the old VM mappings if the
3318 * new mapping fails.
3319 */
3320 zap_old_map = vm_map_create(PMAP_NULL,
3321 *address,
3322 *address + size,
3323 map->hdr.entries_pageable);
3324 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3325 vm_map_disable_hole_optimization(zap_old_map);
3326 }
3327
3328 fourk_start = *address;
3329 fourk_size = size;
3330 fourk_end = fourk_start + fourk_size;
3331
3332 start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3333 end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3334 size = end - start;
3335
3336 if (anywhere) {
3337 return KERN_NOT_SUPPORTED;
3338 } else {
3339 /*
3340 * Verify that:
3341 * the address doesn't itself violate
3342 * the mask requirement.
3343 */
3344
3345 vm_map_lock(map);
3346 map_locked = TRUE;
3347 if ((start & mask) != 0) {
3348 RETURN(KERN_NO_SPACE);
3349 }
3350
3351 /*
3352 * ... the address is within bounds
3353 */
3354
3355 end = start + size;
3356
3357 if ((start < effective_min_offset) ||
3358 (end > effective_max_offset) ||
3359 (start >= end)) {
3360 RETURN(KERN_INVALID_ADDRESS);
3361 }
3362
3363 if (overwrite && zap_old_map != VM_MAP_NULL) {
3364 /*
3365 * Fixed mapping and "overwrite" flag: attempt to
3366 * remove all existing mappings in the specified
3367 * address range, saving them in our "zap_old_map".
3368 */
3369 (void) vm_map_delete(map, start, end,
3370 (VM_MAP_REMOVE_SAVE_ENTRIES |
3371 VM_MAP_REMOVE_NO_MAP_ALIGN),
3372 zap_old_map);
3373 }
3374
3375 /*
3376 * ... the starting address isn't allocated
3377 */
3378 if (vm_map_lookup_entry(map, start, &entry)) {
3379 vm_object_t cur_object, shadow_object;
3380
3381 /*
3382 * We might already some 4K mappings
3383 * in a 16K page here.
3384 */
3385
3386 if (entry->vme_end - entry->vme_start
3387 != SIXTEENK_PAGE_SIZE) {
3388 RETURN(KERN_NO_SPACE);
3389 }
3390 if (entry->is_sub_map) {
3391 RETURN(KERN_NO_SPACE);
3392 }
3393 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3394 RETURN(KERN_NO_SPACE);
3395 }
3396
3397 /* go all the way down the shadow chain */
3398 cur_object = VME_OBJECT(entry);
3399 vm_object_lock(cur_object);
3400 while (cur_object->shadow != VM_OBJECT_NULL) {
3401 shadow_object = cur_object->shadow;
3402 vm_object_lock(shadow_object);
3403 vm_object_unlock(cur_object);
3404 cur_object = shadow_object;
3405 shadow_object = VM_OBJECT_NULL;
3406 }
3407 if (cur_object->internal ||
3408 cur_object->pager == NULL) {
3409 vm_object_unlock(cur_object);
3410 RETURN(KERN_NO_SPACE);
3411 }
3412 if (cur_object->pager->mo_pager_ops
3413 != &fourk_pager_ops) {
3414 vm_object_unlock(cur_object);
3415 RETURN(KERN_NO_SPACE);
3416 }
3417 fourk_object = cur_object;
3418 fourk_mem_obj = fourk_object->pager;
3419
3420 /* keep the "4K" object alive */
3421 vm_object_reference_locked(fourk_object);
3422 vm_object_unlock(fourk_object);
3423
3424 /* merge permissions */
3425 entry->protection |= cur_protection;
3426 entry->max_protection |= max_protection;
3427 if ((entry->protection & (VM_PROT_WRITE |
3428 VM_PROT_EXECUTE)) ==
3429 (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3430 fourk_binary_compatibility_unsafe &&
3431 fourk_binary_compatibility_allow_wx) {
3432 /* write+execute: need to be "jit" */
3433 entry->used_for_jit = TRUE;
3434 }
3435
3436 goto map_in_fourk_pager;
3437 }
3438
3439 /*
3440 * ... the next region doesn't overlap the
3441 * end point.
3442 */
3443
3444 if ((entry->vme_next != vm_map_to_entry(map)) &&
3445 (entry->vme_next->vme_start < end)) {
3446 RETURN(KERN_NO_SPACE);
3447 }
3448 }
3449
3450 /*
3451 * At this point,
3452 * "start" and "end" should define the endpoints of the
3453 * available new range, and
3454 * "entry" should refer to the region before the new
3455 * range, and
3456 *
3457 * the map should be locked.
3458 */
3459
3460 /* create a new "4K" pager */
3461 fourk_mem_obj = fourk_pager_create();
3462 fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3463 assert(fourk_object);
3464
3465 /* keep the "4" object alive */
3466 vm_object_reference(fourk_object);
3467
3468 /* create a "copy" object, to map the "4K" object copy-on-write */
3469 fourk_copy = TRUE;
3470 result = vm_object_copy_strategically(fourk_object,
3471 0,
3472 end - start,
3473 &copy_object,
3474 &copy_offset,
3475 &fourk_copy);
3476 assert(result == KERN_SUCCESS);
3477 assert(copy_object != VM_OBJECT_NULL);
3478 assert(copy_offset == 0);
3479
3480 /* take a reference on the copy object, for this mapping */
3481 vm_object_reference(copy_object);
3482
3483 /* map the "4K" pager's copy object */
3484 new_entry =
3485 vm_map_entry_insert(map, entry,
3486 vm_map_trunc_page(start,
3487 VM_MAP_PAGE_MASK(map)),
3488 vm_map_round_page(end,
3489 VM_MAP_PAGE_MASK(map)),
3490 copy_object,
3491 0, /* offset */
3492 FALSE, /* needs_copy */
3493 FALSE, FALSE,
3494 cur_protection, max_protection,
3495 VM_BEHAVIOR_DEFAULT,
3496 ((entry_for_jit)
3497 ? VM_INHERIT_NONE
3498 : inheritance),
3499 0,
3500 no_cache,
3501 permanent,
3502 superpage_size,
3503 clear_map_aligned,
3504 is_submap,
3505 FALSE, /* jit */
3506 alias);
3507 entry = new_entry;
3508
3509#if VM_MAP_DEBUG_FOURK
3510 if (vm_map_debug_fourk) {
3511 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3512 map,
3513 (uint64_t) entry->vme_start,
3514 (uint64_t) entry->vme_end,
3515 fourk_mem_obj);
3516 }
3517#endif /* VM_MAP_DEBUG_FOURK */
3518
3519 new_mapping_established = TRUE;
3520
3521map_in_fourk_pager:
3522 /* "map" the original "object" where it belongs in the "4K" pager */
3523 fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3524 fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3525 if (fourk_size > SIXTEENK_PAGE_SIZE) {
3526 fourk_pager_index_num = 4;
3527 } else {
3528 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3529 }
3530 if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3531 fourk_pager_index_num = 4 - fourk_pager_index_start;
3532 }
3533 for (cur_idx = 0;
3534 cur_idx < fourk_pager_index_num;
3535 cur_idx++) {
3536 vm_object_t old_object;
3537 vm_object_offset_t old_offset;
3538
3539 kr = fourk_pager_populate(fourk_mem_obj,
3540 TRUE, /* overwrite */
3541 fourk_pager_index_start + cur_idx,
3542 object,
3543 (object
3544 ? (offset +
3545 (cur_idx * FOURK_PAGE_SIZE))
3546 : 0),
3547 &old_object,
3548 &old_offset);
3549#if VM_MAP_DEBUG_FOURK
3550 if (vm_map_debug_fourk) {
3551 if (old_object == (vm_object_t) -1 &&
3552 old_offset == (vm_object_offset_t) -1) {
3553 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3554 "pager [%p:0x%llx] "
3555 "populate[%d] "
3556 "[object:%p,offset:0x%llx]\n",
3557 map,
3558 (uint64_t) entry->vme_start,
3559 (uint64_t) entry->vme_end,
3560 fourk_mem_obj,
3561 VME_OFFSET(entry),
3562 fourk_pager_index_start + cur_idx,
3563 object,
3564 (object
3565 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3566 : 0));
3567 } else {
3568 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3569 "pager [%p:0x%llx] "
3570 "populate[%d] [object:%p,offset:0x%llx] "
3571 "old [%p:0x%llx]\n",
3572 map,
3573 (uint64_t) entry->vme_start,
3574 (uint64_t) entry->vme_end,
3575 fourk_mem_obj,
3576 VME_OFFSET(entry),
3577 fourk_pager_index_start + cur_idx,
3578 object,
3579 (object
3580 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3581 : 0),
3582 old_object,
3583 old_offset);
3584 }
3585 }
3586#endif /* VM_MAP_DEBUG_FOURK */
3587
3588 assert(kr == KERN_SUCCESS);
3589 if (object != old_object &&
3590 object != VM_OBJECT_NULL &&
3591 object != (vm_object_t) -1) {
3592 vm_object_reference(object);
3593 }
3594 if (object != old_object &&
3595 old_object != VM_OBJECT_NULL &&
3596 old_object != (vm_object_t) -1) {
3597 vm_object_deallocate(old_object);
3598 }
3599 }
3600
3601BailOut:
3602 assert(map_locked == TRUE);
3603
3604 if (fourk_object != VM_OBJECT_NULL) {
3605 vm_object_deallocate(fourk_object);
3606 fourk_object = VM_OBJECT_NULL;
3607 fourk_mem_obj = MEMORY_OBJECT_NULL;
3608 }
3609
3610 if (result == KERN_SUCCESS) {
3611 vm_prot_t pager_prot;
3612 memory_object_t pager;
3613
3614#if DEBUG
3615 if (pmap_empty &&
3616 !(vmk_flags.vmkf_no_pmap_check)) {
3617 assert(vm_map_pmap_is_empty(map,
3618 *address,
3619 *address+size));
3620 }
3621#endif /* DEBUG */
3622
3623 /*
3624 * For "named" VM objects, let the pager know that the
3625 * memory object is being mapped. Some pagers need to keep
3626 * track of this, to know when they can reclaim the memory
3627 * object, for example.
3628 * VM calls memory_object_map() for each mapping (specifying
3629 * the protection of each mapping) and calls
3630 * memory_object_last_unmap() when all the mappings are gone.
3631 */
3632 pager_prot = max_protection;
3633 if (needs_copy) {
3634 /*
3635 * Copy-On-Write mapping: won't modify
3636 * the memory object.
3637 */
3638 pager_prot &= ~VM_PROT_WRITE;
3639 }
3640 if (!is_submap &&
3641 object != VM_OBJECT_NULL &&
3642 object->named &&
3643 object->pager != MEMORY_OBJECT_NULL) {
3644 vm_object_lock(object);
3645 pager = object->pager;
3646 if (object->named &&
3647 pager != MEMORY_OBJECT_NULL) {
3648 assert(object->pager_ready);
3649 vm_object_mapping_wait(object, THREAD_UNINT);
3650 vm_object_mapping_begin(object);
3651 vm_object_unlock(object);
3652
3653 kr = memory_object_map(pager, pager_prot);
3654 assert(kr == KERN_SUCCESS);
3655
3656 vm_object_lock(object);
3657 vm_object_mapping_end(object);
3658 }
3659 vm_object_unlock(object);
3660 }
3661 if (!is_submap &&
3662 fourk_object != VM_OBJECT_NULL &&
3663 fourk_object->named &&
3664 fourk_object->pager != MEMORY_OBJECT_NULL) {
3665 vm_object_lock(fourk_object);
3666 pager = fourk_object->pager;
3667 if (fourk_object->named &&
3668 pager != MEMORY_OBJECT_NULL) {
3669 assert(fourk_object->pager_ready);
3670 vm_object_mapping_wait(fourk_object,
3671 THREAD_UNINT);
3672 vm_object_mapping_begin(fourk_object);
3673 vm_object_unlock(fourk_object);
3674
3675 kr = memory_object_map(pager, VM_PROT_READ);
3676 assert(kr == KERN_SUCCESS);
3677
3678 vm_object_lock(fourk_object);
3679 vm_object_mapping_end(fourk_object);
3680 }
3681 vm_object_unlock(fourk_object);
3682 }
3683 }
3684
3685 assert(map_locked == TRUE);
3686
3687 if (!keep_map_locked) {
3688 vm_map_unlock(map);
3689 map_locked = FALSE;
3690 }
3691
3692 /*
3693 * We can't hold the map lock if we enter this block.
3694 */
3695
3696 if (result == KERN_SUCCESS) {
3697
3698 /* Wire down the new entry if the user
3699 * requested all new map entries be wired.
3700 */
3701 if ((map->wiring_required)||(superpage_size)) {
3702 assert(!keep_map_locked);
3703 pmap_empty = FALSE; /* pmap won't be empty */
3704 kr = vm_map_wire_kernel(map, start, end,
3705 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3706 TRUE);
3707 result = kr;
3708 }
3709
3710 }
3711
3712 if (result != KERN_SUCCESS) {
3713 if (new_mapping_established) {
3714 /*
3715 * We have to get rid of the new mappings since we
3716 * won't make them available to the user.
3717 * Try and do that atomically, to minimize the risk
3718 * that someone else create new mappings that range.
3719 */
3720 zap_new_map = vm_map_create(PMAP_NULL,
3721 *address,
3722 *address + size,
3723 map->hdr.entries_pageable);
3724 vm_map_set_page_shift(zap_new_map,
3725 VM_MAP_PAGE_SHIFT(map));
3726 vm_map_disable_hole_optimization(zap_new_map);
3727
3728 if (!map_locked) {
3729 vm_map_lock(map);
3730 map_locked = TRUE;
3731 }
3732 (void) vm_map_delete(map, *address, *address+size,
3733 (VM_MAP_REMOVE_SAVE_ENTRIES |
3734 VM_MAP_REMOVE_NO_MAP_ALIGN),
3735 zap_new_map);
3736 }
3737 if (zap_old_map != VM_MAP_NULL &&
3738 zap_old_map->hdr.nentries != 0) {
3739 vm_map_entry_t entry1, entry2;
3740
3741 /*
3742 * The new mapping failed. Attempt to restore
3743 * the old mappings, saved in the "zap_old_map".
3744 */
3745 if (!map_locked) {
3746 vm_map_lock(map);
3747 map_locked = TRUE;
3748 }
3749
3750 /* first check if the coast is still clear */
3751 start = vm_map_first_entry(zap_old_map)->vme_start;
3752 end = vm_map_last_entry(zap_old_map)->vme_end;
3753 if (vm_map_lookup_entry(map, start, &entry1) ||
3754 vm_map_lookup_entry(map, end, &entry2) ||
3755 entry1 != entry2) {
3756 /*
3757 * Part of that range has already been
3758 * re-mapped: we can't restore the old
3759 * mappings...
3760 */
3761 vm_map_enter_restore_failures++;
3762 } else {
3763 /*
3764 * Transfer the saved map entries from
3765 * "zap_old_map" to the original "map",
3766 * inserting them all after "entry1".
3767 */
3768 for (entry2 = vm_map_first_entry(zap_old_map);
3769 entry2 != vm_map_to_entry(zap_old_map);
3770 entry2 = vm_map_first_entry(zap_old_map)) {
3771 vm_map_size_t entry_size;
3772
3773 entry_size = (entry2->vme_end -
3774 entry2->vme_start);
3775 vm_map_store_entry_unlink(zap_old_map,
3776 entry2);
3777 zap_old_map->size -= entry_size;
3778 vm_map_store_entry_link(map, entry1, entry2,
3779 VM_MAP_KERNEL_FLAGS_NONE);
3780 map->size += entry_size;
3781 entry1 = entry2;
3782 }
3783 if (map->wiring_required) {
3784 /*
3785 * XXX TODO: we should rewire the
3786 * old pages here...
3787 */
3788 }
3789 vm_map_enter_restore_successes++;
3790 }
3791 }
3792 }
3793
3794 /*
3795 * The caller is responsible for releasing the lock if it requested to
3796 * keep the map locked.
3797 */
3798 if (map_locked && !keep_map_locked) {
3799 vm_map_unlock(map);
3800 }
3801
3802 /*
3803 * Get rid of the "zap_maps" and all the map entries that
3804 * they may still contain.
3805 */
3806 if (zap_old_map != VM_MAP_NULL) {
3807 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3808 zap_old_map = VM_MAP_NULL;
3809 }
3810 if (zap_new_map != VM_MAP_NULL) {
3811 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3812 zap_new_map = VM_MAP_NULL;
3813 }
3814
3815 return result;
3816
3817#undef RETURN
3818}
3819#endif /* __arm64__ */
3820
3821/*
3822 * Counters for the prefault optimization.
3823 */
3824int64_t vm_prefault_nb_pages = 0;
3825int64_t vm_prefault_nb_bailout = 0;
3826
3827static kern_return_t
3828vm_map_enter_mem_object_helper(
3829 vm_map_t target_map,
3830 vm_map_offset_t *address,
3831 vm_map_size_t initial_size,
3832 vm_map_offset_t mask,
3833 int flags,
3834 vm_map_kernel_flags_t vmk_flags,
3835 vm_tag_t tag,
3836 ipc_port_t port,
3837 vm_object_offset_t offset,
3838 boolean_t copy,
3839 vm_prot_t cur_protection,
3840 vm_prot_t max_protection,
3841 vm_inherit_t inheritance,
3842 upl_page_list_ptr_t page_list,
3843 unsigned int page_list_count)
3844{
3845 vm_map_address_t map_addr;
3846 vm_map_size_t map_size;
3847 vm_object_t object;
3848 vm_object_size_t size;
3849 kern_return_t result;
3850 boolean_t mask_cur_protection, mask_max_protection;
3851 boolean_t kernel_prefault, try_prefault = (page_list_count != 0);
3852 vm_map_offset_t offset_in_mapping = 0;
3853#if __arm64__
3854 boolean_t fourk = vmk_flags.vmkf_fourk;
3855#endif /* __arm64__ */
3856
3857 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
3858
3859 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
3860 mask_max_protection = max_protection & VM_PROT_IS_MASK;
3861 cur_protection &= ~VM_PROT_IS_MASK;
3862 max_protection &= ~VM_PROT_IS_MASK;
3863
3864 /*
3865 * Check arguments for validity
3866 */
3867 if ((target_map == VM_MAP_NULL) ||
3868 (cur_protection & ~VM_PROT_ALL) ||
3869 (max_protection & ~VM_PROT_ALL) ||
3870 (inheritance > VM_INHERIT_LAST_VALID) ||
3871 (try_prefault && (copy || !page_list)) ||
3872 initial_size == 0) {
3873 return KERN_INVALID_ARGUMENT;
3874 }
3875
3876#if __arm64__
3877 if (fourk) {
3878 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
3879 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
3880 } else
3881#endif /* __arm64__ */
3882 {
3883 map_addr = vm_map_trunc_page(*address,
3884 VM_MAP_PAGE_MASK(target_map));
3885 map_size = vm_map_round_page(initial_size,
3886 VM_MAP_PAGE_MASK(target_map));
3887 }
3888 size = vm_object_round_page(initial_size);
3889
3890 /*
3891 * Find the vm object (if any) corresponding to this port.
3892 */
3893 if (!IP_VALID(port)) {
3894 object = VM_OBJECT_NULL;
3895 offset = 0;
3896 copy = FALSE;
3897 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
3898 vm_named_entry_t named_entry;
3899
3900 named_entry = (vm_named_entry_t) port->ip_kobject;
3901
3902 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3903 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3904 offset += named_entry->data_offset;
3905 }
3906
3907 /* a few checks to make sure user is obeying rules */
3908 if (size == 0) {
3909 if (offset >= named_entry->size)
3910 return KERN_INVALID_RIGHT;
3911 size = named_entry->size - offset;
3912 }
3913 if (mask_max_protection) {
3914 max_protection &= named_entry->protection;
3915 }
3916 if (mask_cur_protection) {
3917 cur_protection &= named_entry->protection;
3918 }
3919 if ((named_entry->protection & max_protection) !=
3920 max_protection)
3921 return KERN_INVALID_RIGHT;
3922 if ((named_entry->protection & cur_protection) !=
3923 cur_protection)
3924 return KERN_INVALID_RIGHT;
3925 if (offset + size < offset) {
3926 /* overflow */
3927 return KERN_INVALID_ARGUMENT;
3928 }
3929 if (named_entry->size < (offset + initial_size)) {
3930 return KERN_INVALID_ARGUMENT;
3931 }
3932
3933 if (named_entry->is_copy) {
3934 /* for a vm_map_copy, we can only map it whole */
3935 if ((size != named_entry->size) &&
3936 (vm_map_round_page(size,
3937 VM_MAP_PAGE_MASK(target_map)) ==
3938 named_entry->size)) {
3939 /* XXX FBDP use the rounded size... */
3940 size = vm_map_round_page(
3941 size,
3942 VM_MAP_PAGE_MASK(target_map));
3943 }
3944
3945 if (!(flags & VM_FLAGS_ANYWHERE) &&
3946 (offset != 0 ||
3947 size != named_entry->size)) {
3948 /*
3949 * XXX for a mapping at a "fixed" address,
3950 * we can't trim after mapping the whole
3951 * memory entry, so reject a request for a
3952 * partial mapping.
3953 */
3954 return KERN_INVALID_ARGUMENT;
3955 }
3956 }
3957
3958 /* the callers parameter offset is defined to be the */
3959 /* offset from beginning of named entry offset in object */
3960 offset = offset + named_entry->offset;
3961
3962 if (! VM_MAP_PAGE_ALIGNED(size,
3963 VM_MAP_PAGE_MASK(target_map))) {
3964 /*
3965 * Let's not map more than requested;
3966 * vm_map_enter() will handle this "not map-aligned"
3967 * case.
3968 */
3969 map_size = size;
3970 }
3971
3972 named_entry_lock(named_entry);
3973 if (named_entry->is_sub_map) {
3974 vm_map_t submap;
3975
3976 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3977 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3978 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3979 }
3980
3981 submap = named_entry->backing.map;
3982 vm_map_lock(submap);
3983 vm_map_reference(submap);
3984 vm_map_unlock(submap);
3985 named_entry_unlock(named_entry);
3986
3987 vmk_flags.vmkf_submap = TRUE;
3988
3989 result = vm_map_enter(target_map,
3990 &map_addr,
3991 map_size,
3992 mask,
3993 flags,
3994 vmk_flags,
3995 tag,
3996 (vm_object_t)(uintptr_t) submap,
3997 offset,
3998 copy,
3999 cur_protection,
4000 max_protection,
4001 inheritance);
4002 if (result != KERN_SUCCESS) {
4003 vm_map_deallocate(submap);
4004 } else {
4005 /*
4006 * No need to lock "submap" just to check its
4007 * "mapped" flag: that flag is never reset
4008 * once it's been set and if we race, we'll
4009 * just end up setting it twice, which is OK.
4010 */
4011 if (submap->mapped_in_other_pmaps == FALSE &&
4012 vm_map_pmap(submap) != PMAP_NULL &&
4013 vm_map_pmap(submap) !=
4014 vm_map_pmap(target_map)) {
4015 /*
4016 * This submap is being mapped in a map
4017 * that uses a different pmap.
4018 * Set its "mapped_in_other_pmaps" flag
4019 * to indicate that we now need to
4020 * remove mappings from all pmaps rather
4021 * than just the submap's pmap.
4022 */
4023 vm_map_lock(submap);
4024 submap->mapped_in_other_pmaps = TRUE;
4025 vm_map_unlock(submap);
4026 }
4027 *address = map_addr;
4028 }
4029 return result;
4030
4031 } else if (named_entry->is_copy) {
4032 kern_return_t kr;
4033 vm_map_copy_t copy_map;
4034 vm_map_entry_t copy_entry;
4035 vm_map_offset_t copy_addr;
4036
4037 if (flags & ~(VM_FLAGS_FIXED |
4038 VM_FLAGS_ANYWHERE |
4039 VM_FLAGS_OVERWRITE |
4040 VM_FLAGS_RETURN_4K_DATA_ADDR |
4041 VM_FLAGS_RETURN_DATA_ADDR |
4042 VM_FLAGS_ALIAS_MASK)) {
4043 named_entry_unlock(named_entry);
4044 return KERN_INVALID_ARGUMENT;
4045 }
4046
4047 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4048 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4049 offset_in_mapping = offset - vm_object_trunc_page(offset);
4050 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
4051 offset_in_mapping &= ~((signed)(0xFFF));
4052 offset = vm_object_trunc_page(offset);
4053 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4054 }
4055
4056 copy_map = named_entry->backing.copy;
4057 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4058 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4059 /* unsupported type; should not happen */
4060 printf("vm_map_enter_mem_object: "
4061 "memory_entry->backing.copy "
4062 "unsupported type 0x%x\n",
4063 copy_map->type);
4064 named_entry_unlock(named_entry);
4065 return KERN_INVALID_ARGUMENT;
4066 }
4067
4068 /* reserve a contiguous range */
4069 kr = vm_map_enter(target_map,
4070 &map_addr,
4071 /* map whole mem entry, trim later: */
4072 named_entry->size,
4073 mask,
4074 flags & (VM_FLAGS_ANYWHERE |
4075 VM_FLAGS_OVERWRITE |
4076 VM_FLAGS_RETURN_4K_DATA_ADDR |
4077 VM_FLAGS_RETURN_DATA_ADDR),
4078 vmk_flags,
4079 tag,
4080 VM_OBJECT_NULL,
4081 0,
4082 FALSE, /* copy */
4083 cur_protection,
4084 max_protection,
4085 inheritance);
4086 if (kr != KERN_SUCCESS) {
4087 named_entry_unlock(named_entry);
4088 return kr;
4089 }
4090
4091 copy_addr = map_addr;
4092
4093 for (copy_entry = vm_map_copy_first_entry(copy_map);
4094 copy_entry != vm_map_copy_to_entry(copy_map);
4095 copy_entry = copy_entry->vme_next) {
4096 int remap_flags;
4097 vm_map_kernel_flags_t vmk_remap_flags;
4098 vm_map_t copy_submap;
4099 vm_object_t copy_object;
4100 vm_map_size_t copy_size;
4101 vm_object_offset_t copy_offset;
4102 int copy_vm_alias;
4103
4104 remap_flags = 0;
4105 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4106
4107 copy_object = VME_OBJECT(copy_entry);
4108 copy_offset = VME_OFFSET(copy_entry);
4109 copy_size = (copy_entry->vme_end -
4110 copy_entry->vme_start);
4111 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4112 if (copy_vm_alias == 0) {
4113 /*
4114 * Caller does not want a specific
4115 * alias for this new mapping: use
4116 * the alias of the original mapping.
4117 */
4118 copy_vm_alias = VME_ALIAS(copy_entry);
4119 }
4120
4121 /* sanity check */
4122 if ((copy_addr + copy_size) >
4123 (map_addr +
4124 named_entry->size /* XXX full size */ )) {
4125 /* over-mapping too much !? */
4126 kr = KERN_INVALID_ARGUMENT;
4127 /* abort */
4128 break;
4129 }
4130
4131 /* take a reference on the object */
4132 if (copy_entry->is_sub_map) {
4133 vmk_remap_flags.vmkf_submap = TRUE;
4134 copy_submap = VME_SUBMAP(copy_entry);
4135 vm_map_lock(copy_submap);
4136 vm_map_reference(copy_submap);
4137 vm_map_unlock(copy_submap);
4138 copy_object = (vm_object_t)(uintptr_t) copy_submap;
4139 } else if (!copy &&
4140 copy_object != VM_OBJECT_NULL &&
4141 (copy_entry->needs_copy ||
4142 copy_object->shadowed ||
4143 (!copy_object->true_share &&
4144 !copy_entry->is_shared &&
4145 copy_object->vo_size > copy_size))) {
4146 /*
4147 * We need to resolve our side of this
4148 * "symmetric" copy-on-write now; we
4149 * need a new object to map and share,
4150 * instead of the current one which
4151 * might still be shared with the
4152 * original mapping.
4153 *
4154 * Note: A "vm_map_copy_t" does not
4155 * have a lock but we're protected by
4156 * the named entry's lock here.
4157 */
4158 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4159 VME_OBJECT_SHADOW(copy_entry, copy_size);
4160 if (!copy_entry->needs_copy &&
4161 copy_entry->protection & VM_PROT_WRITE) {
4162 vm_prot_t prot;
4163
4164 prot = copy_entry->protection & ~VM_PROT_WRITE;
4165 vm_object_pmap_protect(copy_object,
4166 copy_offset,
4167 copy_size,
4168 PMAP_NULL,
4169 0,
4170 prot);
4171 }
4172
4173 copy_entry->needs_copy = FALSE;
4174 copy_entry->is_shared = TRUE;
4175 copy_object = VME_OBJECT(copy_entry);
4176 copy_offset = VME_OFFSET(copy_entry);
4177 vm_object_lock(copy_object);
4178 vm_object_reference_locked(copy_object);
4179 if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4180 /* we're about to make a shared mapping of this object */
4181 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4182 copy_object->true_share = TRUE;
4183 }
4184 vm_object_unlock(copy_object);
4185 } else {
4186 /*
4187 * We already have the right object
4188 * to map.
4189 */
4190 copy_object = VME_OBJECT(copy_entry);
4191 vm_object_reference(copy_object);
4192 }
4193
4194 /* over-map the object into destination */
4195 remap_flags |= flags;
4196 remap_flags |= VM_FLAGS_FIXED;
4197 remap_flags |= VM_FLAGS_OVERWRITE;
4198 remap_flags &= ~VM_FLAGS_ANYWHERE;
4199 if (!copy && !copy_entry->is_sub_map) {
4200 /*
4201 * copy-on-write should have been
4202 * resolved at this point, or we would
4203 * end up sharing instead of copying.
4204 */
4205 assert(!copy_entry->needs_copy);
4206 }
4207#if !CONFIG_EMBEDDED
4208 if (copy_entry->used_for_jit) {
4209 vmk_remap_flags.vmkf_map_jit = TRUE;
4210 }
4211#endif /* !CONFIG_EMBEDDED */
4212 kr = vm_map_enter(target_map,
4213 &copy_addr,
4214 copy_size,
4215 (vm_map_offset_t) 0,
4216 remap_flags,
4217 vmk_remap_flags,
4218 copy_vm_alias,
4219 copy_object,
4220 copy_offset,
4221 copy,
4222 cur_protection,
4223 max_protection,
4224 inheritance);
4225 if (kr != KERN_SUCCESS) {
4226 if (copy_entry->is_sub_map) {
4227 vm_map_deallocate(copy_submap);
4228 } else {
4229 vm_object_deallocate(copy_object);
4230 }
4231 /* abort */
4232 break;
4233 }
4234
4235 /* next mapping */
4236 copy_addr += copy_size;
4237 }
4238
4239 if (kr == KERN_SUCCESS) {
4240 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4241 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4242 *address = map_addr + offset_in_mapping;
4243 } else {
4244 *address = map_addr;
4245 }
4246
4247 if (offset) {
4248 /*
4249 * Trim in front, from 0 to "offset".
4250 */
4251 vm_map_remove(target_map,
4252 map_addr,
4253 map_addr + offset,
4254 VM_MAP_REMOVE_NO_FLAGS);
4255 *address += offset;
4256 }
4257 if (offset + map_size < named_entry->size) {
4258 /*
4259 * Trim in back, from
4260 * "offset + map_size" to
4261 * "named_entry->size".
4262 */
4263 vm_map_remove(target_map,
4264 (map_addr +
4265 offset + map_size),
4266 (map_addr +
4267 named_entry->size),
4268 VM_MAP_REMOVE_NO_FLAGS);
4269 }
4270 }
4271 named_entry_unlock(named_entry);
4272
4273 if (kr != KERN_SUCCESS) {
4274 if (! (flags & VM_FLAGS_OVERWRITE)) {
4275 /* deallocate the contiguous range */
4276 (void) vm_deallocate(target_map,
4277 map_addr,
4278 map_size);
4279 }
4280 }
4281
4282 return kr;
4283
4284 } else {
4285 unsigned int access;
4286 vm_prot_t protections;
4287 unsigned int wimg_mode;
4288
4289 /* we are mapping a VM object */
4290
4291 protections = named_entry->protection & VM_PROT_ALL;
4292 access = GET_MAP_MEM(named_entry->protection);
4293
4294 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4295 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4296 offset_in_mapping = offset - vm_object_trunc_page(offset);
4297 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
4298 offset_in_mapping &= ~((signed)(0xFFF));
4299 offset = vm_object_trunc_page(offset);
4300 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4301 }
4302
4303 object = named_entry->backing.object;
4304 assert(object != VM_OBJECT_NULL);
4305 vm_object_lock(object);
4306 named_entry_unlock(named_entry);
4307
4308 vm_object_reference_locked(object);
4309
4310 wimg_mode = object->wimg_bits;
4311 vm_prot_to_wimg(access, &wimg_mode);
4312 if (object->wimg_bits != wimg_mode)
4313 vm_object_change_wimg_mode(object, wimg_mode);
4314
4315 vm_object_unlock(object);
4316 }
4317 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4318 /*
4319 * JMM - This is temporary until we unify named entries
4320 * and raw memory objects.
4321 *
4322 * Detected fake ip_kotype for a memory object. In
4323 * this case, the port isn't really a port at all, but
4324 * instead is just a raw memory object.
4325 */
4326 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4327 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4328 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4329 }
4330
4331 object = memory_object_to_vm_object((memory_object_t)port);
4332 if (object == VM_OBJECT_NULL)
4333 return KERN_INVALID_OBJECT;
4334 vm_object_reference(object);
4335
4336 /* wait for object (if any) to be ready */
4337 if (object != VM_OBJECT_NULL) {
4338 if (object == kernel_object) {
4339 printf("Warning: Attempt to map kernel object"
4340 " by a non-private kernel entity\n");
4341 return KERN_INVALID_OBJECT;
4342 }
4343 if (!object->pager_ready) {
4344 vm_object_lock(object);
4345
4346 while (!object->pager_ready) {
4347 vm_object_wait(object,
4348 VM_OBJECT_EVENT_PAGER_READY,
4349 THREAD_UNINT);
4350 vm_object_lock(object);
4351 }
4352 vm_object_unlock(object);
4353 }
4354 }
4355 } else {
4356 return KERN_INVALID_OBJECT;
4357 }
4358
4359 if (object != VM_OBJECT_NULL &&
4360 object->named &&
4361 object->pager != MEMORY_OBJECT_NULL &&
4362 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4363 memory_object_t pager;
4364 vm_prot_t pager_prot;
4365 kern_return_t kr;
4366
4367 /*
4368 * For "named" VM objects, let the pager know that the
4369 * memory object is being mapped. Some pagers need to keep
4370 * track of this, to know when they can reclaim the memory
4371 * object, for example.
4372 * VM calls memory_object_map() for each mapping (specifying
4373 * the protection of each mapping) and calls
4374 * memory_object_last_unmap() when all the mappings are gone.
4375 */
4376 pager_prot = max_protection;
4377 if (copy) {
4378 /*
4379 * Copy-On-Write mapping: won't modify the
4380 * memory object.
4381 */
4382 pager_prot &= ~VM_PROT_WRITE;
4383 }
4384 vm_object_lock(object);
4385 pager = object->pager;
4386 if (object->named &&
4387 pager != MEMORY_OBJECT_NULL &&
4388 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4389 assert(object->pager_ready);
4390 vm_object_mapping_wait(object, THREAD_UNINT);
4391 vm_object_mapping_begin(object);
4392 vm_object_unlock(object);
4393
4394 kr = memory_object_map(pager, pager_prot);
4395 assert(kr == KERN_SUCCESS);
4396
4397 vm_object_lock(object);
4398 vm_object_mapping_end(object);
4399 }
4400 vm_object_unlock(object);
4401 }
4402
4403 /*
4404 * Perform the copy if requested
4405 */
4406
4407 if (copy) {
4408 vm_object_t new_object;
4409 vm_object_offset_t new_offset;
4410
4411 result = vm_object_copy_strategically(object, offset,
4412 map_size,
4413 &new_object, &new_offset,
4414 &copy);
4415
4416
4417 if (result == KERN_MEMORY_RESTART_COPY) {
4418 boolean_t success;
4419 boolean_t src_needs_copy;
4420
4421 /*
4422 * XXX
4423 * We currently ignore src_needs_copy.
4424 * This really is the issue of how to make
4425 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4426 * non-kernel users to use. Solution forthcoming.
4427 * In the meantime, since we don't allow non-kernel
4428 * memory managers to specify symmetric copy,
4429 * we won't run into problems here.
4430 */
4431 new_object = object;
4432 new_offset = offset;
4433 success = vm_object_copy_quickly(&new_object,
4434 new_offset,
4435 map_size,
4436 &src_needs_copy,
4437 &copy);
4438 assert(success);
4439 result = KERN_SUCCESS;
4440 }
4441 /*
4442 * Throw away the reference to the
4443 * original object, as it won't be mapped.
4444 */
4445
4446 vm_object_deallocate(object);
4447
4448 if (result != KERN_SUCCESS) {
4449 return result;
4450 }
4451
4452 object = new_object;
4453 offset = new_offset;
4454 }
4455
4456 /*
4457 * If non-kernel users want to try to prefault pages, the mapping and prefault
4458 * needs to be atomic.
4459 */
4460 kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4461 vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4462
4463#if __arm64__
4464 if (fourk) {
4465 /* map this object in a "4K" pager */
4466 result = vm_map_enter_fourk(target_map,
4467 &map_addr,
4468 map_size,
4469 (vm_map_offset_t) mask,
4470 flags,
4471 vmk_flags,
4472 tag,
4473 object,
4474 offset,
4475 copy,
4476 cur_protection,
4477 max_protection,
4478 inheritance);
4479 } else
4480#endif /* __arm64__ */
4481 {
4482 result = vm_map_enter(target_map,
4483 &map_addr, map_size,
4484 (vm_map_offset_t)mask,
4485 flags,
4486 vmk_flags,
4487 tag,
4488 object, offset,
4489 copy,
4490 cur_protection, max_protection,
4491 inheritance);
4492 }
4493 if (result != KERN_SUCCESS)
4494 vm_object_deallocate(object);
4495
4496 /*
4497 * Try to prefault, and do not forget to release the vm map lock.
4498 */
4499 if (result == KERN_SUCCESS && try_prefault) {
4500 mach_vm_address_t va = map_addr;
4501 kern_return_t kr = KERN_SUCCESS;
4502 unsigned int i = 0;
4503 int pmap_options;
4504
4505 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4506 if (object->internal) {
4507 pmap_options |= PMAP_OPTIONS_INTERNAL;
4508 }
4509
4510 for (i = 0; i < page_list_count; ++i) {
4511 if (!UPL_VALID_PAGE(page_list, i)) {
4512 if (kernel_prefault) {
4513 assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4514 result = KERN_MEMORY_ERROR;
4515 break;
4516 }
4517 } else {
4518 /*
4519 * If this function call failed, we should stop
4520 * trying to optimize, other calls are likely
4521 * going to fail too.
4522 *
4523 * We are not gonna report an error for such
4524 * failure though. That's an optimization, not
4525 * something critical.
4526 */
4527 kr = pmap_enter_options(target_map->pmap,
4528 va, UPL_PHYS_PAGE(page_list, i),
4529 cur_protection, VM_PROT_NONE,
4530 0, TRUE, pmap_options, NULL);
4531 if (kr != KERN_SUCCESS) {
4532 OSIncrementAtomic64(&vm_prefault_nb_bailout);
4533 if (kernel_prefault) {
4534 result = kr;
4535 }
4536 break;
4537 }
4538 OSIncrementAtomic64(&vm_prefault_nb_pages);
4539 }
4540
4541 /* Next virtual address */
4542 va += PAGE_SIZE;
4543 }
4544 if (vmk_flags.vmkf_keep_map_locked) {
4545 vm_map_unlock(target_map);
4546 }
4547 }
4548
4549 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4550 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4551 *address = map_addr + offset_in_mapping;
4552 } else {
4553 *address = map_addr;
4554 }
4555 return result;
4556}
4557
4558kern_return_t
4559vm_map_enter_mem_object(
4560 vm_map_t target_map,
4561 vm_map_offset_t *address,
4562 vm_map_size_t initial_size,
4563 vm_map_offset_t mask,
4564 int flags,
4565 vm_map_kernel_flags_t vmk_flags,
4566 vm_tag_t tag,
4567 ipc_port_t port,
4568 vm_object_offset_t offset,
4569 boolean_t copy,
4570 vm_prot_t cur_protection,
4571 vm_prot_t max_protection,
4572 vm_inherit_t inheritance)
4573{
4574 kern_return_t ret;
4575
4576 ret = vm_map_enter_mem_object_helper(target_map,
4577 address,
4578 initial_size,
4579 mask,
4580 flags,
4581 vmk_flags,
4582 tag,
4583 port,
4584 offset,
4585 copy,
4586 cur_protection,
4587 max_protection,
4588 inheritance,
4589 NULL,
4590 0);
4591
4592#if KASAN
4593 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4594 kasan_notify_address(*address, initial_size);
4595 }
4596#endif
4597
4598 return ret;
4599}
4600
4601kern_return_t
4602vm_map_enter_mem_object_prefault(
4603 vm_map_t target_map,
4604 vm_map_offset_t *address,
4605 vm_map_size_t initial_size,
4606 vm_map_offset_t mask,
4607 int flags,
4608 vm_map_kernel_flags_t vmk_flags,
4609 vm_tag_t tag,
4610 ipc_port_t port,
4611 vm_object_offset_t offset,
4612 vm_prot_t cur_protection,
4613 vm_prot_t max_protection,
4614 upl_page_list_ptr_t page_list,
4615 unsigned int page_list_count)
4616{
4617 kern_return_t ret;
4618
4619 ret = vm_map_enter_mem_object_helper(target_map,
4620 address,
4621 initial_size,
4622 mask,
4623 flags,
4624 vmk_flags,
4625 tag,
4626 port,
4627 offset,
4628 FALSE,
4629 cur_protection,
4630 max_protection,
4631 VM_INHERIT_DEFAULT,
4632 page_list,
4633 page_list_count);
4634
4635#if KASAN
4636 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4637 kasan_notify_address(*address, initial_size);
4638 }
4639#endif
4640
4641 return ret;
4642}
4643
4644
4645kern_return_t
4646vm_map_enter_mem_object_control(
4647 vm_map_t target_map,
4648 vm_map_offset_t *address,
4649 vm_map_size_t initial_size,
4650 vm_map_offset_t mask,
4651 int flags,
4652 vm_map_kernel_flags_t vmk_flags,
4653 vm_tag_t tag,
4654 memory_object_control_t control,
4655 vm_object_offset_t offset,
4656 boolean_t copy,
4657 vm_prot_t cur_protection,
4658 vm_prot_t max_protection,
4659 vm_inherit_t inheritance)
4660{
4661 vm_map_address_t map_addr;
4662 vm_map_size_t map_size;
4663 vm_object_t object;
4664 vm_object_size_t size;
4665 kern_return_t result;
4666 memory_object_t pager;
4667 vm_prot_t pager_prot;
4668 kern_return_t kr;
4669#if __arm64__
4670 boolean_t fourk = vmk_flags.vmkf_fourk;
4671#endif /* __arm64__ */
4672
4673 /*
4674 * Check arguments for validity
4675 */
4676 if ((target_map == VM_MAP_NULL) ||
4677 (cur_protection & ~VM_PROT_ALL) ||
4678 (max_protection & ~VM_PROT_ALL) ||
4679 (inheritance > VM_INHERIT_LAST_VALID) ||
4680 initial_size == 0) {
4681 return KERN_INVALID_ARGUMENT;
4682 }
4683
4684#if __arm64__
4685 if (fourk) {
4686 map_addr = vm_map_trunc_page(*address,
4687 FOURK_PAGE_MASK);
4688 map_size = vm_map_round_page(initial_size,
4689 FOURK_PAGE_MASK);
4690 } else
4691#endif /* __arm64__ */
4692 {
4693 map_addr = vm_map_trunc_page(*address,
4694 VM_MAP_PAGE_MASK(target_map));
4695 map_size = vm_map_round_page(initial_size,
4696 VM_MAP_PAGE_MASK(target_map));
4697 }
4698 size = vm_object_round_page(initial_size);
4699
4700 object = memory_object_control_to_vm_object(control);
4701
4702 if (object == VM_OBJECT_NULL)
4703 return KERN_INVALID_OBJECT;
4704
4705 if (object == kernel_object) {
4706 printf("Warning: Attempt to map kernel object"
4707 " by a non-private kernel entity\n");
4708 return KERN_INVALID_OBJECT;
4709 }
4710
4711 vm_object_lock(object);
4712 object->ref_count++;
4713 vm_object_res_reference(object);
4714
4715 /*
4716 * For "named" VM objects, let the pager know that the
4717 * memory object is being mapped. Some pagers need to keep
4718 * track of this, to know when they can reclaim the memory
4719 * object, for example.
4720 * VM calls memory_object_map() for each mapping (specifying
4721 * the protection of each mapping) and calls
4722 * memory_object_last_unmap() when all the mappings are gone.
4723 */
4724 pager_prot = max_protection;
4725 if (copy) {
4726 pager_prot &= ~VM_PROT_WRITE;
4727 }
4728 pager = object->pager;
4729 if (object->named &&
4730 pager != MEMORY_OBJECT_NULL &&
4731 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4732 assert(object->pager_ready);
4733 vm_object_mapping_wait(object, THREAD_UNINT);
4734 vm_object_mapping_begin(object);
4735 vm_object_unlock(object);
4736
4737 kr = memory_object_map(pager, pager_prot);
4738 assert(kr == KERN_SUCCESS);
4739
4740 vm_object_lock(object);
4741 vm_object_mapping_end(object);
4742 }
4743 vm_object_unlock(object);
4744
4745 /*
4746 * Perform the copy if requested
4747 */
4748
4749 if (copy) {
4750 vm_object_t new_object;
4751 vm_object_offset_t new_offset;
4752
4753 result = vm_object_copy_strategically(object, offset, size,
4754 &new_object, &new_offset,
4755 &copy);
4756
4757
4758 if (result == KERN_MEMORY_RESTART_COPY) {
4759 boolean_t success;
4760 boolean_t src_needs_copy;
4761
4762 /*
4763 * XXX
4764 * We currently ignore src_needs_copy.
4765 * This really is the issue of how to make
4766 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4767 * non-kernel users to use. Solution forthcoming.
4768 * In the meantime, since we don't allow non-kernel
4769 * memory managers to specify symmetric copy,
4770 * we won't run into problems here.
4771 */
4772 new_object = object;
4773 new_offset = offset;
4774 success = vm_object_copy_quickly(&new_object,
4775 new_offset, size,
4776 &src_needs_copy,
4777 &copy);
4778 assert(success);
4779 result = KERN_SUCCESS;
4780 }
4781 /*
4782 * Throw away the reference to the
4783 * original object, as it won't be mapped.
4784 */
4785
4786 vm_object_deallocate(object);
4787
4788 if (result != KERN_SUCCESS) {
4789 return result;
4790 }
4791
4792 object = new_object;
4793 offset = new_offset;
4794 }
4795
4796#if __arm64__
4797 if (fourk) {
4798 result = vm_map_enter_fourk(target_map,
4799 &map_addr,
4800 map_size,
4801 (vm_map_offset_t)mask,
4802 flags,
4803 vmk_flags,
4804 tag,
4805 object, offset,
4806 copy,
4807 cur_protection, max_protection,
4808 inheritance);
4809 } else
4810#endif /* __arm64__ */
4811 {
4812 result = vm_map_enter(target_map,
4813 &map_addr, map_size,
4814 (vm_map_offset_t)mask,
4815 flags,
4816 vmk_flags,
4817 tag,
4818 object, offset,
4819 copy,
4820 cur_protection, max_protection,
4821 inheritance);
4822 }
4823 if (result != KERN_SUCCESS)
4824 vm_object_deallocate(object);
4825 *address = map_addr;
4826
4827 return result;
4828}
4829
4830
4831#if VM_CPM
4832
4833#ifdef MACH_ASSERT
4834extern pmap_paddr_t avail_start, avail_end;
4835#endif
4836
4837/*
4838 * Allocate memory in the specified map, with the caveat that
4839 * the memory is physically contiguous. This call may fail
4840 * if the system can't find sufficient contiguous memory.
4841 * This call may cause or lead to heart-stopping amounts of
4842 * paging activity.
4843 *
4844 * Memory obtained from this call should be freed in the
4845 * normal way, viz., via vm_deallocate.
4846 */
4847kern_return_t
4848vm_map_enter_cpm(
4849 vm_map_t map,
4850 vm_map_offset_t *addr,
4851 vm_map_size_t size,
4852 int flags)
4853{
4854 vm_object_t cpm_obj;
4855 pmap_t pmap;
4856 vm_page_t m, pages;
4857 kern_return_t kr;
4858 vm_map_offset_t va, start, end, offset;
4859#if MACH_ASSERT
4860 vm_map_offset_t prev_addr = 0;
4861#endif /* MACH_ASSERT */
4862
4863 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
4864 vm_tag_t tag;
4865
4866 VM_GET_FLAGS_ALIAS(flags, tag);
4867
4868 if (size == 0) {
4869 *addr = 0;
4870 return KERN_SUCCESS;
4871 }
4872 if (anywhere)
4873 *addr = vm_map_min(map);
4874 else
4875 *addr = vm_map_trunc_page(*addr,
4876 VM_MAP_PAGE_MASK(map));
4877 size = vm_map_round_page(size,
4878 VM_MAP_PAGE_MASK(map));
4879
4880 /*
4881 * LP64todo - cpm_allocate should probably allow
4882 * allocations of >4GB, but not with the current
4883 * algorithm, so just cast down the size for now.
4884 */
4885 if (size > VM_MAX_ADDRESS)
4886 return KERN_RESOURCE_SHORTAGE;
4887 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
4888 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
4889 return kr;
4890
4891 cpm_obj = vm_object_allocate((vm_object_size_t)size);
4892 assert(cpm_obj != VM_OBJECT_NULL);
4893 assert(cpm_obj->internal);
4894 assert(cpm_obj->vo_size == (vm_object_size_t)size);
4895 assert(cpm_obj->can_persist == FALSE);
4896 assert(cpm_obj->pager_created == FALSE);
4897 assert(cpm_obj->pageout == FALSE);
4898 assert(cpm_obj->shadow == VM_OBJECT_NULL);
4899
4900 /*
4901 * Insert pages into object.
4902 */
4903
4904 vm_object_lock(cpm_obj);
4905 for (offset = 0; offset < size; offset += PAGE_SIZE) {
4906 m = pages;
4907 pages = NEXT_PAGE(m);
4908 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
4909
4910 assert(!m->vmp_gobbled);
4911 assert(!m->vmp_wanted);
4912 assert(!m->vmp_pageout);
4913 assert(!m->vmp_tabled);
4914 assert(VM_PAGE_WIRED(m));
4915 assert(m->vmp_busy);
4916 assert(VM_PAGE_GET_PHYS_PAGE(m)>=(avail_start>>PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m)<=(avail_end>>PAGE_SHIFT));
4917
4918 m->vmp_busy = FALSE;
4919 vm_page_insert(m, cpm_obj, offset);
4920 }
4921 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
4922 vm_object_unlock(cpm_obj);
4923
4924 /*
4925 * Hang onto a reference on the object in case a
4926 * multi-threaded application for some reason decides
4927 * to deallocate the portion of the address space into
4928 * which we will insert this object.
4929 *
4930 * Unfortunately, we must insert the object now before
4931 * we can talk to the pmap module about which addresses
4932 * must be wired down. Hence, the race with a multi-
4933 * threaded app.
4934 */
4935 vm_object_reference(cpm_obj);
4936
4937 /*
4938 * Insert object into map.
4939 */
4940
4941 kr = vm_map_enter(
4942 map,
4943 addr,
4944 size,
4945 (vm_map_offset_t)0,
4946 flags,
4947 VM_MAP_KERNEL_FLAGS_NONE,
4948 cpm_obj,
4949 (vm_object_offset_t)0,
4950 FALSE,
4951 VM_PROT_ALL,
4952 VM_PROT_ALL,
4953 VM_INHERIT_DEFAULT);
4954
4955 if (kr != KERN_SUCCESS) {
4956 /*
4957 * A CPM object doesn't have can_persist set,
4958 * so all we have to do is deallocate it to
4959 * free up these pages.
4960 */
4961 assert(cpm_obj->pager_created == FALSE);
4962 assert(cpm_obj->can_persist == FALSE);
4963 assert(cpm_obj->pageout == FALSE);
4964 assert(cpm_obj->shadow == VM_OBJECT_NULL);
4965 vm_object_deallocate(cpm_obj); /* kill acquired ref */
4966 vm_object_deallocate(cpm_obj); /* kill creation ref */
4967 }
4968
4969 /*
4970 * Inform the physical mapping system that the
4971 * range of addresses may not fault, so that
4972 * page tables and such can be locked down as well.
4973 */
4974 start = *addr;
4975 end = start + size;
4976 pmap = vm_map_pmap(map);
4977 pmap_pageable(pmap, start, end, FALSE);
4978
4979 /*
4980 * Enter each page into the pmap, to avoid faults.
4981 * Note that this loop could be coded more efficiently,
4982 * if the need arose, rather than looking up each page
4983 * again.
4984 */
4985 for (offset = 0, va = start; offset < size;
4986 va += PAGE_SIZE, offset += PAGE_SIZE) {
4987 int type_of_fault;
4988
4989 vm_object_lock(cpm_obj);
4990 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
4991 assert(m != VM_PAGE_NULL);
4992
4993 vm_page_zero_fill(m);
4994
4995 type_of_fault = DBG_ZERO_FILL_FAULT;
4996
4997 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
4998 VM_PAGE_WIRED(m),
4999 FALSE, /* change_wiring */
5000 VM_KERN_MEMORY_NONE, /* tag - not wiring */
5001 FALSE, /* no_cache */
5002 FALSE, /* cs_bypass */
5003 0, /* user_tag */
5004 0, /* pmap_options */
5005 NULL, /* need_retry */
5006 &type_of_fault);
5007
5008 vm_object_unlock(cpm_obj);
5009 }
5010
5011#if MACH_ASSERT
5012 /*
5013 * Verify ordering in address space.
5014 */
5015 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5016 vm_object_lock(cpm_obj);
5017 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5018 vm_object_unlock(cpm_obj);
5019 if (m == VM_PAGE_NULL)
5020 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5021 cpm_obj, (uint64_t)offset);
5022 assert(m->vmp_tabled);
5023 assert(!m->vmp_busy);
5024 assert(!m->vmp_wanted);
5025 assert(!m->vmp_fictitious);
5026 assert(!m->vmp_private);
5027 assert(!m->vmp_absent);
5028 assert(!m->vmp_error);
5029 assert(!m->vmp_cleaning);
5030 assert(!m->vmp_laundry);
5031 assert(!m->vmp_precious);
5032 assert(!m->vmp_clustered);
5033 if (offset != 0) {
5034 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5035 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5036 (uint64_t)start, (uint64_t)end, (uint64_t)va);
5037 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5038 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5039 panic("vm_allocate_cpm: pages not contig!");
5040 }
5041 }
5042 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5043 }
5044#endif /* MACH_ASSERT */
5045
5046 vm_object_deallocate(cpm_obj); /* kill extra ref */
5047
5048 return kr;
5049}
5050
5051
5052#else /* VM_CPM */
5053
5054/*
5055 * Interface is defined in all cases, but unless the kernel
5056 * is built explicitly for this option, the interface does
5057 * nothing.
5058 */
5059
5060kern_return_t
5061vm_map_enter_cpm(
5062 __unused vm_map_t map,
5063 __unused vm_map_offset_t *addr,
5064 __unused vm_map_size_t size,
5065 __unused int flags)
5066{
5067 return KERN_FAILURE;
5068}
5069#endif /* VM_CPM */
5070
5071/* Not used without nested pmaps */
5072#ifndef NO_NESTED_PMAP
5073/*
5074 * Clip and unnest a portion of a nested submap mapping.
5075 */
5076
5077
5078static void
5079vm_map_clip_unnest(
5080 vm_map_t map,
5081 vm_map_entry_t entry,
5082 vm_map_offset_t start_unnest,
5083 vm_map_offset_t end_unnest)
5084{
5085 vm_map_offset_t old_start_unnest = start_unnest;
5086 vm_map_offset_t old_end_unnest = end_unnest;
5087
5088 assert(entry->is_sub_map);
5089 assert(VME_SUBMAP(entry) != NULL);
5090 assert(entry->use_pmap);
5091
5092 /*
5093 * Query the platform for the optimal unnest range.
5094 * DRK: There's some duplication of effort here, since
5095 * callers may have adjusted the range to some extent. This
5096 * routine was introduced to support 1GiB subtree nesting
5097 * for x86 platforms, which can also nest on 2MiB boundaries
5098 * depending on size/alignment.
5099 */
5100 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5101 assert(VME_SUBMAP(entry)->is_nested_map);
5102 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5103 log_unnest_badness(map,
5104 old_start_unnest,
5105 old_end_unnest,
5106 VME_SUBMAP(entry)->is_nested_map,
5107 (entry->vme_start +
5108 VME_SUBMAP(entry)->lowest_unnestable_start -
5109 VME_OFFSET(entry)));
5110 }
5111
5112 if (entry->vme_start > start_unnest ||
5113 entry->vme_end < end_unnest) {
5114 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5115 "bad nested entry: start=0x%llx end=0x%llx\n",
5116 (long long)start_unnest, (long long)end_unnest,
5117 (long long)entry->vme_start, (long long)entry->vme_end);
5118 }
5119
5120 if (start_unnest > entry->vme_start) {
5121 _vm_map_clip_start(&map->hdr,
5122 entry,
5123 start_unnest);
5124 if (map->holelistenabled) {
5125 vm_map_store_update_first_free(map, NULL, FALSE);
5126 } else {
5127 vm_map_store_update_first_free(map, map->first_free, FALSE);
5128 }
5129 }
5130 if (entry->vme_end > end_unnest) {
5131 _vm_map_clip_end(&map->hdr,
5132 entry,
5133 end_unnest);
5134 if (map->holelistenabled) {
5135 vm_map_store_update_first_free(map, NULL, FALSE);
5136 } else {
5137 vm_map_store_update_first_free(map, map->first_free, FALSE);
5138 }
5139 }
5140
5141 pmap_unnest(map->pmap,
5142 entry->vme_start,
5143 entry->vme_end - entry->vme_start);
5144 if ((map->mapped_in_other_pmaps) && (map->map_refcnt)) {
5145 /* clean up parent map/maps */
5146 vm_map_submap_pmap_clean(
5147 map, entry->vme_start,
5148 entry->vme_end,
5149 VME_SUBMAP(entry),
5150 VME_OFFSET(entry));
5151 }
5152 entry->use_pmap = FALSE;
5153 if ((map->pmap != kernel_pmap) &&
5154 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5155 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5156 }
5157}
5158#endif /* NO_NESTED_PMAP */
5159
5160/*
5161 * vm_map_clip_start: [ internal use only ]
5162 *
5163 * Asserts that the given entry begins at or after
5164 * the specified address; if necessary,
5165 * it splits the entry into two.
5166 */
5167void
5168vm_map_clip_start(
5169 vm_map_t map,
5170 vm_map_entry_t entry,
5171 vm_map_offset_t startaddr)
5172{
5173#ifndef NO_NESTED_PMAP
5174 if (entry->is_sub_map &&
5175 entry->use_pmap &&
5176 startaddr >= entry->vme_start) {
5177 vm_map_offset_t start_unnest, end_unnest;
5178
5179 /*
5180 * Make sure "startaddr" is no longer in a nested range
5181 * before we clip. Unnest only the minimum range the platform
5182 * can handle.
5183 * vm_map_clip_unnest may perform additional adjustments to
5184 * the unnest range.
5185 */
5186 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
5187 end_unnest = start_unnest + pmap_nesting_size_min;
5188 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5189 }
5190#endif /* NO_NESTED_PMAP */
5191 if (startaddr > entry->vme_start) {
5192 if (VME_OBJECT(entry) &&
5193 !entry->is_sub_map &&
5194 VME_OBJECT(entry)->phys_contiguous) {
5195 pmap_remove(map->pmap,
5196 (addr64_t)(entry->vme_start),
5197 (addr64_t)(entry->vme_end));
5198 }
5199 if (entry->vme_atomic) {
5200 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5201 }
5202
5203 DTRACE_VM5(
5204 vm_map_clip_start,
5205 vm_map_t, map,
5206 vm_map_offset_t, entry->vme_start,
5207 vm_map_offset_t, entry->vme_end,
5208 vm_map_offset_t, startaddr,
5209 int, VME_ALIAS(entry));
5210
5211 _vm_map_clip_start(&map->hdr, entry, startaddr);
5212 if (map->holelistenabled) {
5213 vm_map_store_update_first_free(map, NULL, FALSE);
5214 } else {
5215 vm_map_store_update_first_free(map, map->first_free, FALSE);
5216 }
5217 }
5218}
5219
5220
5221#define vm_map_copy_clip_start(copy, entry, startaddr) \
5222 MACRO_BEGIN \
5223 if ((startaddr) > (entry)->vme_start) \
5224 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5225 MACRO_END
5226
5227/*
5228 * This routine is called only when it is known that
5229 * the entry must be split.
5230 */
5231static void
5232_vm_map_clip_start(
5233 struct vm_map_header *map_header,
5234 vm_map_entry_t entry,
5235 vm_map_offset_t start)
5236{
5237 vm_map_entry_t new_entry;
5238
5239 /*
5240 * Split off the front portion --
5241 * note that we must insert the new
5242 * entry BEFORE this one, so that
5243 * this entry has the specified starting
5244 * address.
5245 */
5246
5247 if (entry->map_aligned) {
5248 assert(VM_MAP_PAGE_ALIGNED(start,
5249 VM_MAP_HDR_PAGE_MASK(map_header)));
5250 }
5251
5252 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5253 vm_map_entry_copy_full(new_entry, entry);
5254
5255 new_entry->vme_end = start;
5256 assert(new_entry->vme_start < new_entry->vme_end);
5257 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5258 assert(start < entry->vme_end);
5259 entry->vme_start = start;
5260
5261 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5262
5263 if (entry->is_sub_map)
5264 vm_map_reference(VME_SUBMAP(new_entry));
5265 else
5266 vm_object_reference(VME_OBJECT(new_entry));
5267}
5268
5269
5270/*
5271 * vm_map_clip_end: [ internal use only ]
5272 *
5273 * Asserts that the given entry ends at or before
5274 * the specified address; if necessary,
5275 * it splits the entry into two.
5276 */
5277void
5278vm_map_clip_end(
5279 vm_map_t map,
5280 vm_map_entry_t entry,
5281 vm_map_offset_t endaddr)
5282{
5283 if (endaddr > entry->vme_end) {
5284 /*
5285 * Within the scope of this clipping, limit "endaddr" to
5286 * the end of this map entry...
5287 */
5288 endaddr = entry->vme_end;
5289 }
5290#ifndef NO_NESTED_PMAP
5291 if (entry->is_sub_map && entry->use_pmap) {
5292 vm_map_offset_t start_unnest, end_unnest;
5293
5294 /*
5295 * Make sure the range between the start of this entry and
5296 * the new "endaddr" is no longer nested before we clip.
5297 * Unnest only the minimum range the platform can handle.
5298 * vm_map_clip_unnest may perform additional adjustments to
5299 * the unnest range.
5300 */
5301 start_unnest = entry->vme_start;
5302 end_unnest =
5303 (endaddr + pmap_nesting_size_min - 1) &
5304 ~(pmap_nesting_size_min - 1);
5305 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5306 }
5307#endif /* NO_NESTED_PMAP */
5308 if (endaddr < entry->vme_end) {
5309 if (VME_OBJECT(entry) &&
5310 !entry->is_sub_map &&
5311 VME_OBJECT(entry)->phys_contiguous) {
5312 pmap_remove(map->pmap,
5313 (addr64_t)(entry->vme_start),
5314 (addr64_t)(entry->vme_end));
5315 }
5316 if (entry->vme_atomic) {
5317 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5318 }
5319 DTRACE_VM5(
5320 vm_map_clip_end,
5321 vm_map_t, map,
5322 vm_map_offset_t, entry->vme_start,
5323 vm_map_offset_t, entry->vme_end,
5324 vm_map_offset_t, endaddr,
5325 int, VME_ALIAS(entry));
5326
5327 _vm_map_clip_end(&map->hdr, entry, endaddr);
5328 if (map->holelistenabled) {
5329 vm_map_store_update_first_free(map, NULL, FALSE);
5330 } else {
5331 vm_map_store_update_first_free(map, map->first_free, FALSE);
5332 }
5333 }
5334}
5335
5336
5337#define vm_map_copy_clip_end(copy, entry, endaddr) \
5338 MACRO_BEGIN \
5339 if ((endaddr) < (entry)->vme_end) \
5340 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5341 MACRO_END
5342
5343/*
5344 * This routine is called only when it is known that
5345 * the entry must be split.
5346 */
5347static void
5348_vm_map_clip_end(
5349 struct vm_map_header *map_header,
5350 vm_map_entry_t entry,
5351 vm_map_offset_t end)
5352{
5353 vm_map_entry_t new_entry;
5354
5355 /*
5356 * Create a new entry and insert it
5357 * AFTER the specified entry
5358 */
5359
5360 if (entry->map_aligned) {
5361 assert(VM_MAP_PAGE_ALIGNED(end,
5362 VM_MAP_HDR_PAGE_MASK(map_header)));
5363 }
5364
5365 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5366 vm_map_entry_copy_full(new_entry, entry);
5367
5368 assert(entry->vme_start < end);
5369 new_entry->vme_start = entry->vme_end = end;
5370 VME_OFFSET_SET(new_entry,
5371 VME_OFFSET(new_entry) + (end - entry->vme_start));
5372 assert(new_entry->vme_start < new_entry->vme_end);
5373
5374 _vm_map_store_entry_link(map_header, entry, new_entry);
5375
5376 if (entry->is_sub_map)
5377 vm_map_reference(VME_SUBMAP(new_entry));
5378 else
5379 vm_object_reference(VME_OBJECT(new_entry));
5380}
5381
5382
5383/*
5384 * VM_MAP_RANGE_CHECK: [ internal use only ]
5385 *
5386 * Asserts that the starting and ending region
5387 * addresses fall within the valid range of the map.
5388 */
5389#define VM_MAP_RANGE_CHECK(map, start, end) \
5390 MACRO_BEGIN \
5391 if (start < vm_map_min(map)) \
5392 start = vm_map_min(map); \
5393 if (end > vm_map_max(map)) \
5394 end = vm_map_max(map); \
5395 if (start > end) \
5396 start = end; \
5397 MACRO_END
5398
5399/*
5400 * vm_map_range_check: [ internal use only ]
5401 *
5402 * Check that the region defined by the specified start and
5403 * end addresses are wholly contained within a single map
5404 * entry or set of adjacent map entries of the spacified map,
5405 * i.e. the specified region contains no unmapped space.
5406 * If any or all of the region is unmapped, FALSE is returned.
5407 * Otherwise, TRUE is returned and if the output argument 'entry'
5408 * is not NULL it points to the map entry containing the start
5409 * of the region.
5410 *
5411 * The map is locked for reading on entry and is left locked.
5412 */
5413static boolean_t
5414vm_map_range_check(
5415 vm_map_t map,
5416 vm_map_offset_t start,
5417 vm_map_offset_t end,
5418 vm_map_entry_t *entry)
5419{
5420 vm_map_entry_t cur;
5421 vm_map_offset_t prev;
5422
5423 /*
5424 * Basic sanity checks first
5425 */
5426 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
5427 return (FALSE);
5428
5429 /*
5430 * Check first if the region starts within a valid
5431 * mapping for the map.
5432 */
5433 if (!vm_map_lookup_entry(map, start, &cur))
5434 return (FALSE);
5435
5436 /*
5437 * Optimize for the case that the region is contained
5438 * in a single map entry.
5439 */
5440 if (entry != (vm_map_entry_t *) NULL)
5441 *entry = cur;
5442 if (end <= cur->vme_end)
5443 return (TRUE);
5444
5445 /*
5446 * If the region is not wholly contained within a
5447 * single entry, walk the entries looking for holes.
5448 */
5449 prev = cur->vme_end;
5450 cur = cur->vme_next;
5451 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5452 if (end <= cur->vme_end)
5453 return (TRUE);
5454 prev = cur->vme_end;
5455 cur = cur->vme_next;
5456 }
5457 return (FALSE);
5458}
5459
5460/*
5461 * vm_map_submap: [ kernel use only ]
5462 *
5463 * Mark the given range as handled by a subordinate map.
5464 *
5465 * This range must have been created with vm_map_find using
5466 * the vm_submap_object, and no other operations may have been
5467 * performed on this range prior to calling vm_map_submap.
5468 *
5469 * Only a limited number of operations can be performed
5470 * within this rage after calling vm_map_submap:
5471 * vm_fault
5472 * [Don't try vm_map_copyin!]
5473 *
5474 * To remove a submapping, one must first remove the
5475 * range from the superior map, and then destroy the
5476 * submap (if desired). [Better yet, don't try it.]
5477 */
5478kern_return_t
5479vm_map_submap(
5480 vm_map_t map,
5481 vm_map_offset_t start,
5482 vm_map_offset_t end,
5483 vm_map_t submap,
5484 vm_map_offset_t offset,
5485#ifdef NO_NESTED_PMAP
5486 __unused
5487#endif /* NO_NESTED_PMAP */
5488 boolean_t use_pmap)
5489{
5490 vm_map_entry_t entry;
5491 kern_return_t result = KERN_INVALID_ARGUMENT;
5492 vm_object_t object;
5493
5494 vm_map_lock(map);
5495
5496 if (! vm_map_lookup_entry(map, start, &entry)) {
5497 entry = entry->vme_next;
5498 }
5499
5500 if (entry == vm_map_to_entry(map) ||
5501 entry->is_sub_map) {
5502 vm_map_unlock(map);
5503 return KERN_INVALID_ARGUMENT;
5504 }
5505
5506 vm_map_clip_start(map, entry, start);
5507 vm_map_clip_end(map, entry, end);
5508
5509 if ((entry->vme_start == start) && (entry->vme_end == end) &&
5510 (!entry->is_sub_map) &&
5511 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5512 (object->resident_page_count == 0) &&
5513 (object->copy == VM_OBJECT_NULL) &&
5514 (object->shadow == VM_OBJECT_NULL) &&
5515 (!object->pager_created)) {
5516 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5517 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5518 vm_object_deallocate(object);
5519 entry->is_sub_map = TRUE;
5520 entry->use_pmap = FALSE;
5521 VME_SUBMAP_SET(entry, submap);
5522 vm_map_reference(submap);
5523 if (submap->mapped_in_other_pmaps == FALSE &&
5524 vm_map_pmap(submap) != PMAP_NULL &&
5525 vm_map_pmap(submap) != vm_map_pmap(map)) {
5526 /*
5527 * This submap is being mapped in a map
5528 * that uses a different pmap.
5529 * Set its "mapped_in_other_pmaps" flag
5530 * to indicate that we now need to
5531 * remove mappings from all pmaps rather
5532 * than just the submap's pmap.
5533 */
5534 submap->mapped_in_other_pmaps = TRUE;
5535 }
5536
5537#ifndef NO_NESTED_PMAP
5538 if (use_pmap) {
5539 /* nest if platform code will allow */
5540 if(submap->pmap == NULL) {
5541 ledger_t ledger = map->pmap->ledger;
5542 submap->pmap = pmap_create(ledger,
5543 (vm_map_size_t) 0, FALSE);
5544 if(submap->pmap == PMAP_NULL) {
5545 vm_map_unlock(map);
5546 return(KERN_NO_SPACE);
5547 }
5548#if defined(__arm__) || defined(__arm64__)
5549 pmap_set_nested(submap->pmap);
5550#endif
5551 }
5552 result = pmap_nest(map->pmap,
5553 (VME_SUBMAP(entry))->pmap,
5554 (addr64_t)start,
5555 (addr64_t)start,
5556 (uint64_t)(end - start));
5557 if(result)
5558 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
5559 entry->use_pmap = TRUE;
5560 }
5561#else /* NO_NESTED_PMAP */
5562 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
5563#endif /* NO_NESTED_PMAP */
5564 result = KERN_SUCCESS;
5565 }
5566 vm_map_unlock(map);
5567
5568 return(result);
5569}
5570
5571/*
5572 * vm_map_protect:
5573 *
5574 * Sets the protection of the specified address
5575 * region in the target map. If "set_max" is
5576 * specified, the maximum protection is to be set;
5577 * otherwise, only the current protection is affected.
5578 */
5579kern_return_t
5580vm_map_protect(
5581 vm_map_t map,
5582 vm_map_offset_t start,
5583 vm_map_offset_t end,
5584 vm_prot_t new_prot,
5585 boolean_t set_max)
5586{
5587 vm_map_entry_t current;
5588 vm_map_offset_t prev;
5589 vm_map_entry_t entry;
5590 vm_prot_t new_max;
5591 int pmap_options = 0;
5592 kern_return_t kr;
5593
5594 XPR(XPR_VM_MAP,
5595 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
5596 map, start, end, new_prot, set_max);
5597
5598 if (new_prot & VM_PROT_COPY) {
5599 vm_map_offset_t new_start;
5600 vm_prot_t cur_prot, max_prot;
5601 vm_map_kernel_flags_t kflags;
5602
5603 /* LP64todo - see below */
5604 if (start >= map->max_offset) {
5605 return KERN_INVALID_ADDRESS;
5606 }
5607
5608#if VM_PROTECT_WX_FAIL
5609 if ((new_prot & VM_PROT_EXECUTE) &&
5610 map != kernel_map &&
5611 cs_process_enforcement(NULL)) {
5612 DTRACE_VM3(cs_wx,
5613 uint64_t, (uint64_t) start,
5614 uint64_t, (uint64_t) end,
5615 vm_prot_t, new_prot);
5616 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5617 proc_selfpid(),
5618 (current_task()->bsd_info
5619 ? proc_name_address(current_task()->bsd_info)
5620 : "?"),
5621 __FUNCTION__);
5622 return KERN_PROTECTION_FAILURE;
5623 }
5624#endif /* VM_PROTECT_WX_FAIL */
5625
5626 /*
5627 * Let vm_map_remap_extract() know that it will need to:
5628 * + make a copy of the mapping
5629 * + add VM_PROT_WRITE to the max protections
5630 * + remove any protections that are no longer allowed from the
5631 * max protections (to avoid any WRITE/EXECUTE conflict, for
5632 * example).
5633 * Note that "max_prot" is an IN/OUT parameter only for this
5634 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
5635 * only.
5636 */
5637 max_prot = new_prot & VM_PROT_ALL;
5638 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5639 kflags.vmkf_remap_prot_copy = TRUE;
5640 kflags.vmkf_overwrite_immutable = TRUE;
5641 new_start = start;
5642 kr = vm_map_remap(map,
5643 &new_start,
5644 end - start,
5645 0, /* mask */
5646 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5647 kflags,
5648 0,
5649 map,
5650 start,
5651 TRUE, /* copy-on-write remapping! */
5652 &cur_prot,
5653 &max_prot,
5654 VM_INHERIT_DEFAULT);
5655 if (kr != KERN_SUCCESS) {
5656 return kr;
5657 }
5658 new_prot &= ~VM_PROT_COPY;
5659 }
5660
5661 vm_map_lock(map);
5662
5663 /* LP64todo - remove this check when vm_map_commpage64()
5664 * no longer has to stuff in a map_entry for the commpage
5665 * above the map's max_offset.
5666 */
5667 if (start >= map->max_offset) {
5668 vm_map_unlock(map);
5669 return(KERN_INVALID_ADDRESS);
5670 }
5671
5672 while(1) {
5673 /*
5674 * Lookup the entry. If it doesn't start in a valid
5675 * entry, return an error.
5676 */
5677 if (! vm_map_lookup_entry(map, start, &entry)) {
5678 vm_map_unlock(map);
5679 return(KERN_INVALID_ADDRESS);
5680 }
5681
5682 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
5683 start = SUPERPAGE_ROUND_DOWN(start);
5684 continue;
5685 }
5686 break;
5687 }
5688 if (entry->superpage_size)
5689 end = SUPERPAGE_ROUND_UP(end);
5690
5691 /*
5692 * Make a first pass to check for protection and address
5693 * violations.
5694 */
5695
5696 current = entry;
5697 prev = current->vme_start;
5698 while ((current != vm_map_to_entry(map)) &&
5699 (current->vme_start < end)) {
5700
5701 /*
5702 * If there is a hole, return an error.
5703 */
5704 if (current->vme_start != prev) {
5705 vm_map_unlock(map);
5706 return(KERN_INVALID_ADDRESS);
5707 }
5708
5709 new_max = current->max_protection;
5710 if ((new_prot & new_max) != new_prot) {
5711 vm_map_unlock(map);
5712 return(KERN_PROTECTION_FAILURE);
5713 }
5714
5715 if ((new_prot & VM_PROT_WRITE) &&
5716 (new_prot & VM_PROT_EXECUTE) &&
5717#if !CONFIG_EMBEDDED
5718 map != kernel_map &&
5719 cs_process_enforcement(NULL) &&
5720#endif /* !CONFIG_EMBEDDED */
5721 !(current->used_for_jit)) {
5722 DTRACE_VM3(cs_wx,
5723 uint64_t, (uint64_t) current->vme_start,
5724 uint64_t, (uint64_t) current->vme_end,
5725 vm_prot_t, new_prot);
5726 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5727 proc_selfpid(),
5728 (current_task()->bsd_info
5729 ? proc_name_address(current_task()->bsd_info)
5730 : "?"),
5731 __FUNCTION__);
5732 new_prot &= ~VM_PROT_EXECUTE;
5733#if VM_PROTECT_WX_FAIL
5734 vm_map_unlock(map);
5735 return KERN_PROTECTION_FAILURE;
5736#endif /* VM_PROTECT_WX_FAIL */
5737 }
5738
5739 /*
5740 * If the task has requested executable lockdown,
5741 * deny both:
5742 * - adding executable protections OR
5743 * - adding write protections to an existing executable mapping.
5744 */
5745 if (map->map_disallow_new_exec == TRUE) {
5746 if ((new_prot & VM_PROT_EXECUTE) ||
5747 ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
5748 vm_map_unlock(map);
5749 return(KERN_PROTECTION_FAILURE);
5750 }
5751 }
5752
5753 prev = current->vme_end;
5754 current = current->vme_next;
5755 }
5756
5757#if __arm64__
5758 if (end > prev &&
5759 end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
5760 vm_map_entry_t prev_entry;
5761
5762 prev_entry = current->vme_prev;
5763 if (prev_entry != vm_map_to_entry(map) &&
5764 !prev_entry->map_aligned &&
5765 (vm_map_round_page(prev_entry->vme_end,
5766 VM_MAP_PAGE_MASK(map))
5767 == end)) {
5768 /*
5769 * The last entry in our range is not "map-aligned"
5770 * but it would have reached all the way to "end"
5771 * if it had been map-aligned, so this is not really
5772 * a hole in the range and we can proceed.
5773 */
5774 prev = end;
5775 }
5776 }
5777#endif /* __arm64__ */
5778
5779 if (end > prev) {
5780 vm_map_unlock(map);
5781 return(KERN_INVALID_ADDRESS);
5782 }
5783
5784 /*
5785 * Go back and fix up protections.
5786 * Clip to start here if the range starts within
5787 * the entry.
5788 */
5789
5790 current = entry;
5791 if (current != vm_map_to_entry(map)) {
5792 /* clip and unnest if necessary */
5793 vm_map_clip_start(map, current, start);
5794 }
5795
5796 while ((current != vm_map_to_entry(map)) &&
5797 (current->vme_start < end)) {
5798
5799 vm_prot_t old_prot;
5800
5801 vm_map_clip_end(map, current, end);
5802
5803 if (current->is_sub_map) {
5804 /* clipping did unnest if needed */
5805 assert(!current->use_pmap);
5806 }
5807
5808 old_prot = current->protection;
5809
5810 if (set_max) {
5811 current->max_protection = new_prot;
5812 current->protection = new_prot & old_prot;
5813 } else {
5814 current->protection = new_prot;
5815 }
5816
5817 /*
5818 * Update physical map if necessary.
5819 * If the request is to turn off write protection,
5820 * we won't do it for real (in pmap). This is because
5821 * it would cause copy-on-write to fail. We've already
5822 * set, the new protection in the map, so if a
5823 * write-protect fault occurred, it will be fixed up
5824 * properly, COW or not.
5825 */
5826 if (current->protection != old_prot) {
5827 /* Look one level in we support nested pmaps */
5828 /* from mapped submaps which are direct entries */
5829 /* in our map */
5830
5831 vm_prot_t prot;
5832
5833 prot = current->protection;
5834 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
5835 prot &= ~VM_PROT_WRITE;
5836 } else {
5837 assert(!VME_OBJECT(current)->code_signed);
5838 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
5839 }
5840
5841 if (override_nx(map, VME_ALIAS(current)) && prot)
5842 prot |= VM_PROT_EXECUTE;
5843
5844#if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5845 if (!(old_prot & VM_PROT_EXECUTE) &&
5846 (prot & VM_PROT_EXECUTE) &&
5847 panic_on_unsigned_execute &&
5848 (proc_selfcsflags() & CS_KILL)) {
5849 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
5850 }
5851#endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5852
5853 if (pmap_has_prot_policy(prot)) {
5854 if (current->wired_count) {
5855 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
5856 map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
5857 }
5858
5859 /* If the pmap layer cares about this
5860 * protection type, force a fault for
5861 * each page so that vm_fault will
5862 * repopulate the page with the full
5863 * set of protections.
5864 */
5865 /*
5866 * TODO: We don't seem to need this,
5867 * but this is due to an internal
5868 * implementation detail of
5869 * pmap_protect. Do we want to rely
5870 * on this?
5871 */
5872 prot = VM_PROT_NONE;
5873 }
5874
5875 if (current->is_sub_map && current->use_pmap) {
5876 pmap_protect(VME_SUBMAP(current)->pmap,
5877 current->vme_start,
5878 current->vme_end,
5879 prot);
5880 } else {
5881 if (prot & VM_PROT_WRITE) {
5882 if (VME_OBJECT(current) == compressor_object) {
5883 /*
5884 * For write requests on the
5885 * compressor, we wil ask the
5886 * pmap layer to prevent us from
5887 * taking a write fault when we
5888 * attempt to access the mapping
5889 * next.
5890 */
5891 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
5892 }
5893 }
5894
5895 pmap_protect_options(map->pmap,
5896 current->vme_start,
5897 current->vme_end,
5898 prot,
5899 pmap_options,
5900 NULL);
5901 }
5902 }
5903 current = current->vme_next;
5904 }
5905
5906 current = entry;
5907 while ((current != vm_map_to_entry(map)) &&
5908 (current->vme_start <= end)) {
5909 vm_map_simplify_entry(map, current);
5910 current = current->vme_next;
5911 }
5912
5913 vm_map_unlock(map);
5914 return(KERN_SUCCESS);
5915}
5916
5917/*
5918 * vm_map_inherit:
5919 *
5920 * Sets the inheritance of the specified address
5921 * range in the target map. Inheritance
5922 * affects how the map will be shared with
5923 * child maps at the time of vm_map_fork.
5924 */
5925kern_return_t
5926vm_map_inherit(
5927 vm_map_t map,
5928 vm_map_offset_t start,
5929 vm_map_offset_t end,
5930 vm_inherit_t new_inheritance)
5931{
5932 vm_map_entry_t entry;
5933 vm_map_entry_t temp_entry;
5934
5935 vm_map_lock(map);
5936
5937 VM_MAP_RANGE_CHECK(map, start, end);
5938
5939 if (vm_map_lookup_entry(map, start, &temp_entry)) {
5940 entry = temp_entry;
5941 }
5942 else {
5943 temp_entry = temp_entry->vme_next;
5944 entry = temp_entry;
5945 }
5946
5947 /* first check entire range for submaps which can't support the */
5948 /* given inheritance. */
5949 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5950 if(entry->is_sub_map) {
5951 if(new_inheritance == VM_INHERIT_COPY) {
5952 vm_map_unlock(map);
5953 return(KERN_INVALID_ARGUMENT);
5954 }
5955 }
5956
5957 entry = entry->vme_next;
5958 }
5959
5960 entry = temp_entry;
5961 if (entry != vm_map_to_entry(map)) {
5962 /* clip and unnest if necessary */
5963 vm_map_clip_start(map, entry, start);
5964 }
5965
5966 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5967 vm_map_clip_end(map, entry, end);
5968 if (entry->is_sub_map) {
5969 /* clip did unnest if needed */
5970 assert(!entry->use_pmap);
5971 }
5972
5973 entry->inheritance = new_inheritance;
5974
5975 entry = entry->vme_next;
5976 }
5977
5978 vm_map_unlock(map);
5979 return(KERN_SUCCESS);
5980}
5981
5982/*
5983 * Update the accounting for the amount of wired memory in this map. If the user has
5984 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
5985 */
5986
5987static kern_return_t
5988add_wire_counts(
5989 vm_map_t map,
5990 vm_map_entry_t entry,
5991 boolean_t user_wire)
5992{
5993 vm_map_size_t size;
5994
5995 if (user_wire) {
5996 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
5997
5998 /*
5999 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6000 * this map entry.
6001 */
6002
6003 if (entry->user_wired_count == 0) {
6004 size = entry->vme_end - entry->vme_start;
6005
6006 /*
6007 * Since this is the first time the user is wiring this map entry, check to see if we're
6008 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6009 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
6010 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6011 * limit, then we fail.
6012 */
6013
6014 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
6015 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
6016 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
6017 return KERN_RESOURCE_SHORTAGE;
6018
6019 /*
6020 * The first time the user wires an entry, we also increment the wired_count and add this to
6021 * the total that has been wired in the map.
6022 */
6023
6024 if (entry->wired_count >= MAX_WIRE_COUNT)
6025 return KERN_FAILURE;
6026
6027 entry->wired_count++;
6028 map->user_wire_size += size;
6029 }
6030
6031 if (entry->user_wired_count >= MAX_WIRE_COUNT)
6032 return KERN_FAILURE;
6033
6034 entry->user_wired_count++;
6035
6036 } else {
6037
6038 /*
6039 * The kernel's wiring the memory. Just bump the count and continue.
6040 */
6041
6042 if (entry->wired_count >= MAX_WIRE_COUNT)
6043 panic("vm_map_wire: too many wirings");
6044
6045 entry->wired_count++;
6046 }
6047
6048 return KERN_SUCCESS;
6049}
6050
6051/*
6052 * Update the memory wiring accounting now that the given map entry is being unwired.
6053 */
6054
6055static void
6056subtract_wire_counts(
6057 vm_map_t map,
6058 vm_map_entry_t entry,
6059 boolean_t user_wire)
6060{
6061
6062 if (user_wire) {
6063
6064 /*
6065 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6066 */
6067
6068 if (entry->user_wired_count == 1) {
6069
6070 /*
6071 * We're removing the last user wire reference. Decrement the wired_count and the total
6072 * user wired memory for this map.
6073 */
6074
6075 assert(entry->wired_count >= 1);
6076 entry->wired_count--;
6077 map->user_wire_size -= entry->vme_end - entry->vme_start;
6078 }
6079
6080 assert(entry->user_wired_count >= 1);
6081 entry->user_wired_count--;
6082
6083 } else {
6084
6085 /*
6086 * The kernel is unwiring the memory. Just update the count.
6087 */
6088
6089 assert(entry->wired_count >= 1);
6090 entry->wired_count--;
6091 }
6092}
6093
6094int cs_executable_wire = 0;
6095
6096/*
6097 * vm_map_wire:
6098 *
6099 * Sets the pageability of the specified address range in the
6100 * target map as wired. Regions specified as not pageable require
6101 * locked-down physical memory and physical page maps. The
6102 * access_type variable indicates types of accesses that must not
6103 * generate page faults. This is checked against protection of
6104 * memory being locked-down.
6105 *
6106 * The map must not be locked, but a reference must remain to the
6107 * map throughout the call.
6108 */
6109static kern_return_t
6110vm_map_wire_nested(
6111 vm_map_t map,
6112 vm_map_offset_t start,
6113 vm_map_offset_t end,
6114 vm_prot_t caller_prot,
6115 vm_tag_t tag,
6116 boolean_t user_wire,
6117 pmap_t map_pmap,
6118 vm_map_offset_t pmap_addr,
6119 ppnum_t *physpage_p)
6120{
6121 vm_map_entry_t entry;
6122 vm_prot_t access_type;
6123 struct vm_map_entry *first_entry, tmp_entry;
6124 vm_map_t real_map;
6125 vm_map_offset_t s,e;
6126 kern_return_t rc;
6127 boolean_t need_wakeup;
6128 boolean_t main_map = FALSE;
6129 wait_interrupt_t interruptible_state;
6130 thread_t cur_thread;
6131 unsigned int last_timestamp;
6132 vm_map_size_t size;
6133 boolean_t wire_and_extract;
6134
6135 access_type = (caller_prot & VM_PROT_ALL);
6136
6137 wire_and_extract = FALSE;
6138 if (physpage_p != NULL) {
6139 /*
6140 * The caller wants the physical page number of the
6141 * wired page. We return only one physical page number
6142 * so this works for only one page at a time.
6143 */
6144 if ((end - start) != PAGE_SIZE) {
6145 return KERN_INVALID_ARGUMENT;
6146 }
6147 wire_and_extract = TRUE;
6148 *physpage_p = 0;
6149 }
6150
6151 vm_map_lock(map);
6152 if(map_pmap == NULL)
6153 main_map = TRUE;
6154 last_timestamp = map->timestamp;
6155
6156 VM_MAP_RANGE_CHECK(map, start, end);
6157 assert(page_aligned(start));
6158 assert(page_aligned(end));
6159 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6160 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6161 if (start == end) {
6162 /* We wired what the caller asked for, zero pages */
6163 vm_map_unlock(map);
6164 return KERN_SUCCESS;
6165 }
6166
6167 need_wakeup = FALSE;
6168 cur_thread = current_thread();
6169
6170 s = start;
6171 rc = KERN_SUCCESS;
6172
6173 if (vm_map_lookup_entry(map, s, &first_entry)) {
6174 entry = first_entry;
6175 /*
6176 * vm_map_clip_start will be done later.
6177 * We don't want to unnest any nested submaps here !
6178 */
6179 } else {
6180 /* Start address is not in map */
6181 rc = KERN_INVALID_ADDRESS;
6182 goto done;
6183 }
6184
6185 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6186 /*
6187 * At this point, we have wired from "start" to "s".
6188 * We still need to wire from "s" to "end".
6189 *
6190 * "entry" hasn't been clipped, so it could start before "s"
6191 * and/or end after "end".
6192 */
6193
6194 /* "e" is how far we want to wire in this entry */
6195 e = entry->vme_end;
6196 if (e > end)
6197 e = end;
6198
6199 /*
6200 * If another thread is wiring/unwiring this entry then
6201 * block after informing other thread to wake us up.
6202 */
6203 if (entry->in_transition) {
6204 wait_result_t wait_result;
6205
6206 /*
6207 * We have not clipped the entry. Make sure that
6208 * the start address is in range so that the lookup
6209 * below will succeed.
6210 * "s" is the current starting point: we've already
6211 * wired from "start" to "s" and we still have
6212 * to wire from "s" to "end".
6213 */
6214
6215 entry->needs_wakeup = TRUE;
6216
6217 /*
6218 * wake up anybody waiting on entries that we have
6219 * already wired.
6220 */
6221 if (need_wakeup) {
6222 vm_map_entry_wakeup(map);
6223 need_wakeup = FALSE;
6224 }
6225 /*
6226 * User wiring is interruptible
6227 */
6228 wait_result = vm_map_entry_wait(map,
6229 (user_wire) ? THREAD_ABORTSAFE :
6230 THREAD_UNINT);
6231 if (user_wire && wait_result == THREAD_INTERRUPTED) {
6232 /*
6233 * undo the wirings we have done so far
6234 * We do not clear the needs_wakeup flag,
6235 * because we cannot tell if we were the
6236 * only one waiting.
6237 */
6238 rc = KERN_FAILURE;
6239 goto done;
6240 }
6241
6242 /*
6243 * Cannot avoid a lookup here. reset timestamp.
6244 */
6245 last_timestamp = map->timestamp;
6246
6247 /*
6248 * The entry could have been clipped, look it up again.
6249 * Worse that can happen is, it may not exist anymore.
6250 */
6251 if (!vm_map_lookup_entry(map, s, &first_entry)) {
6252 /*
6253 * User: undo everything upto the previous
6254 * entry. let vm_map_unwire worry about
6255 * checking the validity of the range.
6256 */
6257 rc = KERN_FAILURE;
6258 goto done;
6259 }
6260 entry = first_entry;
6261 continue;
6262 }
6263
6264 if (entry->is_sub_map) {
6265 vm_map_offset_t sub_start;
6266 vm_map_offset_t sub_end;
6267 vm_map_offset_t local_start;
6268 vm_map_offset_t local_end;
6269 pmap_t pmap;
6270
6271 if (wire_and_extract) {
6272 /*
6273 * Wiring would result in copy-on-write
6274 * which would not be compatible with
6275 * the sharing we have with the original
6276 * provider of this memory.
6277 */
6278 rc = KERN_INVALID_ARGUMENT;
6279 goto done;
6280 }
6281
6282 vm_map_clip_start(map, entry, s);
6283 vm_map_clip_end(map, entry, end);
6284
6285 sub_start = VME_OFFSET(entry);
6286 sub_end = entry->vme_end;
6287 sub_end += VME_OFFSET(entry) - entry->vme_start;
6288
6289 local_end = entry->vme_end;
6290 if(map_pmap == NULL) {
6291 vm_object_t object;
6292 vm_object_offset_t offset;
6293 vm_prot_t prot;
6294 boolean_t wired;
6295 vm_map_entry_t local_entry;
6296 vm_map_version_t version;
6297 vm_map_t lookup_map;
6298
6299 if(entry->use_pmap) {
6300 pmap = VME_SUBMAP(entry)->pmap;
6301 /* ppc implementation requires that */
6302 /* submaps pmap address ranges line */
6303 /* up with parent map */
6304#ifdef notdef
6305 pmap_addr = sub_start;
6306#endif
6307 pmap_addr = s;
6308 } else {
6309 pmap = map->pmap;
6310 pmap_addr = s;
6311 }
6312
6313 if (entry->wired_count) {
6314 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6315 goto done;
6316
6317 /*
6318 * The map was not unlocked:
6319 * no need to goto re-lookup.
6320 * Just go directly to next entry.
6321 */
6322 entry = entry->vme_next;
6323 s = entry->vme_start;
6324 continue;
6325
6326 }
6327
6328 /* call vm_map_lookup_locked to */
6329 /* cause any needs copy to be */
6330 /* evaluated */
6331 local_start = entry->vme_start;
6332 lookup_map = map;
6333 vm_map_lock_write_to_read(map);
6334 if(vm_map_lookup_locked(
6335 &lookup_map, local_start,
6336 access_type | VM_PROT_COPY,
6337 OBJECT_LOCK_EXCLUSIVE,
6338 &version, &object,
6339 &offset, &prot, &wired,
6340 NULL,
6341 &real_map)) {
6342
6343 vm_map_unlock_read(lookup_map);
6344 assert(map_pmap == NULL);
6345 vm_map_unwire(map, start,
6346 s, user_wire);
6347 return(KERN_FAILURE);
6348 }
6349 vm_object_unlock(object);
6350 if(real_map != lookup_map)
6351 vm_map_unlock(real_map);
6352 vm_map_unlock_read(lookup_map);
6353 vm_map_lock(map);
6354
6355 /* we unlocked, so must re-lookup */
6356 if (!vm_map_lookup_entry(map,
6357 local_start,
6358 &local_entry)) {
6359 rc = KERN_FAILURE;
6360 goto done;
6361 }
6362
6363 /*
6364 * entry could have been "simplified",
6365 * so re-clip
6366 */
6367 entry = local_entry;
6368 assert(s == local_start);
6369 vm_map_clip_start(map, entry, s);
6370 vm_map_clip_end(map, entry, end);
6371 /* re-compute "e" */
6372 e = entry->vme_end;
6373 if (e > end)
6374 e = end;
6375
6376 /* did we have a change of type? */
6377 if (!entry->is_sub_map) {
6378 last_timestamp = map->timestamp;
6379 continue;
6380 }
6381 } else {
6382 local_start = entry->vme_start;
6383 pmap = map_pmap;
6384 }
6385
6386 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6387 goto done;
6388
6389 entry->in_transition = TRUE;
6390
6391 vm_map_unlock(map);
6392 rc = vm_map_wire_nested(VME_SUBMAP(entry),
6393 sub_start, sub_end,
6394 caller_prot, tag,
6395 user_wire, pmap, pmap_addr,
6396 NULL);
6397 vm_map_lock(map);
6398
6399 /*
6400 * Find the entry again. It could have been clipped
6401 * after we unlocked the map.
6402 */
6403 if (!vm_map_lookup_entry(map, local_start,
6404 &first_entry))
6405 panic("vm_map_wire: re-lookup failed");
6406 entry = first_entry;
6407
6408 assert(local_start == s);
6409 /* re-compute "e" */
6410 e = entry->vme_end;
6411 if (e > end)
6412 e = end;
6413
6414 last_timestamp = map->timestamp;
6415 while ((entry != vm_map_to_entry(map)) &&
6416 (entry->vme_start < e)) {
6417 assert(entry->in_transition);
6418 entry->in_transition = FALSE;
6419 if (entry->needs_wakeup) {
6420 entry->needs_wakeup = FALSE;
6421 need_wakeup = TRUE;
6422 }
6423 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6424 subtract_wire_counts(map, entry, user_wire);
6425 }
6426 entry = entry->vme_next;
6427 }
6428 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6429 goto done;
6430 }
6431
6432 /* no need to relookup again */
6433 s = entry->vme_start;
6434 continue;
6435 }
6436
6437 /*
6438 * If this entry is already wired then increment
6439 * the appropriate wire reference count.
6440 */
6441 if (entry->wired_count) {
6442
6443 if ((entry->protection & access_type) != access_type) {
6444 /* found a protection problem */
6445
6446 /*
6447 * XXX FBDP
6448 * We should always return an error
6449 * in this case but since we didn't
6450 * enforce it before, let's do
6451 * it only for the new "wire_and_extract"
6452 * code path for now...
6453 */
6454 if (wire_and_extract) {
6455 rc = KERN_PROTECTION_FAILURE;
6456 goto done;
6457 }
6458 }
6459
6460 /*
6461 * entry is already wired down, get our reference
6462 * after clipping to our range.
6463 */
6464 vm_map_clip_start(map, entry, s);
6465 vm_map_clip_end(map, entry, end);
6466
6467 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6468 goto done;
6469
6470 if (wire_and_extract) {
6471 vm_object_t object;
6472 vm_object_offset_t offset;
6473 vm_page_t m;
6474
6475 /*
6476 * We don't have to "wire" the page again
6477 * bit we still have to "extract" its
6478 * physical page number, after some sanity
6479 * checks.
6480 */
6481 assert((entry->vme_end - entry->vme_start)
6482 == PAGE_SIZE);
6483 assert(!entry->needs_copy);
6484 assert(!entry->is_sub_map);
6485 assert(VME_OBJECT(entry));
6486 if (((entry->vme_end - entry->vme_start)
6487 != PAGE_SIZE) ||
6488 entry->needs_copy ||
6489 entry->is_sub_map ||
6490 VME_OBJECT(entry) == VM_OBJECT_NULL) {
6491 rc = KERN_INVALID_ARGUMENT;
6492 goto done;
6493 }
6494
6495 object = VME_OBJECT(entry);
6496 offset = VME_OFFSET(entry);
6497 /* need exclusive lock to update m->dirty */
6498 if (entry->protection & VM_PROT_WRITE) {
6499 vm_object_lock(object);
6500 } else {
6501 vm_object_lock_shared(object);
6502 }
6503 m = vm_page_lookup(object, offset);
6504 assert(m != VM_PAGE_NULL);
6505 assert(VM_PAGE_WIRED(m));
6506 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6507 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6508 if (entry->protection & VM_PROT_WRITE) {
6509 vm_object_lock_assert_exclusive(
6510 object);
6511 m->vmp_dirty = TRUE;
6512 }
6513 } else {
6514 /* not already wired !? */
6515 *physpage_p = 0;
6516 }
6517 vm_object_unlock(object);
6518 }
6519
6520 /* map was not unlocked: no need to relookup */
6521 entry = entry->vme_next;
6522 s = entry->vme_start;
6523 continue;
6524 }
6525
6526 /*
6527 * Unwired entry or wire request transmitted via submap
6528 */
6529
6530 /*
6531 * Wiring would copy the pages to the shadow object.
6532 * The shadow object would not be code-signed so
6533 * attempting to execute code from these copied pages
6534 * would trigger a code-signing violation.
6535 */
6536
6537 if ((entry->protection & VM_PROT_EXECUTE)
6538#if !CONFIG_EMBEDDED
6539 &&
6540 map != kernel_map &&
6541 cs_process_enforcement(NULL)
6542#endif /* !CONFIG_EMBEDDED */
6543 ) {
6544#if MACH_ASSERT
6545 printf("pid %d[%s] wiring executable range from "
6546 "0x%llx to 0x%llx: rejected to preserve "
6547 "code-signing\n",
6548 proc_selfpid(),
6549 (current_task()->bsd_info
6550 ? proc_name_address(current_task()->bsd_info)
6551 : "?"),
6552 (uint64_t) entry->vme_start,
6553 (uint64_t) entry->vme_end);
6554#endif /* MACH_ASSERT */
6555 DTRACE_VM2(cs_executable_wire,
6556 uint64_t, (uint64_t)entry->vme_start,
6557 uint64_t, (uint64_t)entry->vme_end);
6558 cs_executable_wire++;
6559 rc = KERN_PROTECTION_FAILURE;
6560 goto done;
6561 }
6562
6563 /*
6564 * Perform actions of vm_map_lookup that need the write
6565 * lock on the map: create a shadow object for a
6566 * copy-on-write region, or an object for a zero-fill
6567 * region.
6568 */
6569 size = entry->vme_end - entry->vme_start;
6570 /*
6571 * If wiring a copy-on-write page, we need to copy it now
6572 * even if we're only (currently) requesting read access.
6573 * This is aggressive, but once it's wired we can't move it.
6574 */
6575 if (entry->needs_copy) {
6576 if (wire_and_extract) {
6577 /*
6578 * We're supposed to share with the original
6579 * provider so should not be "needs_copy"
6580 */
6581 rc = KERN_INVALID_ARGUMENT;
6582 goto done;
6583 }
6584
6585 VME_OBJECT_SHADOW(entry, size);
6586 entry->needs_copy = FALSE;
6587 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6588 if (wire_and_extract) {
6589 /*
6590 * We're supposed to share with the original
6591 * provider so should already have an object.
6592 */
6593 rc = KERN_INVALID_ARGUMENT;
6594 goto done;
6595 }
6596 VME_OBJECT_SET(entry, vm_object_allocate(size));
6597 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6598 assert(entry->use_pmap);
6599 }
6600
6601 vm_map_clip_start(map, entry, s);
6602 vm_map_clip_end(map, entry, end);
6603
6604 /* re-compute "e" */
6605 e = entry->vme_end;
6606 if (e > end)
6607 e = end;
6608
6609 /*
6610 * Check for holes and protection mismatch.
6611 * Holes: Next entry should be contiguous unless this
6612 * is the end of the region.
6613 * Protection: Access requested must be allowed, unless
6614 * wiring is by protection class
6615 */
6616 if ((entry->vme_end < end) &&
6617 ((entry->vme_next == vm_map_to_entry(map)) ||
6618 (entry->vme_next->vme_start > entry->vme_end))) {
6619 /* found a hole */
6620 rc = KERN_INVALID_ADDRESS;
6621 goto done;
6622 }
6623 if ((entry->protection & access_type) != access_type) {
6624 /* found a protection problem */
6625 rc = KERN_PROTECTION_FAILURE;
6626 goto done;
6627 }
6628
6629 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6630
6631 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6632 goto done;
6633
6634 entry->in_transition = TRUE;
6635
6636 /*
6637 * This entry might get split once we unlock the map.
6638 * In vm_fault_wire(), we need the current range as
6639 * defined by this entry. In order for this to work
6640 * along with a simultaneous clip operation, we make a
6641 * temporary copy of this entry and use that for the
6642 * wiring. Note that the underlying objects do not
6643 * change during a clip.
6644 */
6645 tmp_entry = *entry;
6646
6647 /*
6648 * The in_transition state guarentees that the entry
6649 * (or entries for this range, if split occured) will be
6650 * there when the map lock is acquired for the second time.
6651 */
6652 vm_map_unlock(map);
6653
6654 if (!user_wire && cur_thread != THREAD_NULL)
6655 interruptible_state = thread_interrupt_level(THREAD_UNINT);
6656 else
6657 interruptible_state = THREAD_UNINT;
6658
6659 if(map_pmap)
6660 rc = vm_fault_wire(map,
6661 &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
6662 physpage_p);
6663 else
6664 rc = vm_fault_wire(map,
6665 &tmp_entry, caller_prot, tag, map->pmap,
6666 tmp_entry.vme_start,
6667 physpage_p);
6668
6669 if (!user_wire && cur_thread != THREAD_NULL)
6670 thread_interrupt_level(interruptible_state);
6671
6672 vm_map_lock(map);
6673
6674 if (last_timestamp+1 != map->timestamp) {
6675 /*
6676 * Find the entry again. It could have been clipped
6677 * after we unlocked the map.
6678 */
6679 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
6680 &first_entry))
6681 panic("vm_map_wire: re-lookup failed");
6682
6683 entry = first_entry;
6684 }
6685
6686 last_timestamp = map->timestamp;
6687
6688 while ((entry != vm_map_to_entry(map)) &&
6689 (entry->vme_start < tmp_entry.vme_end)) {
6690 assert(entry->in_transition);
6691 entry->in_transition = FALSE;
6692 if (entry->needs_wakeup) {
6693 entry->needs_wakeup = FALSE;
6694 need_wakeup = TRUE;
6695 }
6696 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6697 subtract_wire_counts(map, entry, user_wire);
6698 }
6699 entry = entry->vme_next;
6700 }
6701
6702 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6703 goto done;
6704 }
6705
6706 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
6707 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
6708 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
6709 /* found a "new" hole */
6710 s = tmp_entry.vme_end;
6711 rc = KERN_INVALID_ADDRESS;
6712 goto done;
6713 }
6714
6715 s = entry->vme_start;
6716
6717 } /* end while loop through map entries */
6718
6719done:
6720 if (rc == KERN_SUCCESS) {
6721 /* repair any damage we may have made to the VM map */
6722 vm_map_simplify_range(map, start, end);
6723 }
6724
6725 vm_map_unlock(map);
6726
6727 /*
6728 * wake up anybody waiting on entries we wired.
6729 */
6730 if (need_wakeup)
6731 vm_map_entry_wakeup(map);
6732
6733 if (rc != KERN_SUCCESS) {
6734 /* undo what has been wired so far */
6735 vm_map_unwire_nested(map, start, s, user_wire,
6736 map_pmap, pmap_addr);
6737 if (physpage_p) {
6738 *physpage_p = 0;
6739 }
6740 }
6741
6742 return rc;
6743
6744}
6745
6746kern_return_t
6747vm_map_wire_external(
6748 vm_map_t map,
6749 vm_map_offset_t start,
6750 vm_map_offset_t end,
6751 vm_prot_t caller_prot,
6752 boolean_t user_wire)
6753{
6754 kern_return_t kret;
6755
6756 kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
6757 user_wire, (pmap_t)NULL, 0, NULL);
6758 return kret;
6759}
6760
6761kern_return_t
6762vm_map_wire_kernel(
6763 vm_map_t map,
6764 vm_map_offset_t start,
6765 vm_map_offset_t end,
6766 vm_prot_t caller_prot,
6767 vm_tag_t tag,
6768 boolean_t user_wire)
6769{
6770 kern_return_t kret;
6771
6772 kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
6773 user_wire, (pmap_t)NULL, 0, NULL);
6774 return kret;
6775}
6776
6777kern_return_t
6778vm_map_wire_and_extract_external(
6779 vm_map_t map,
6780 vm_map_offset_t start,
6781 vm_prot_t caller_prot,
6782 boolean_t user_wire,
6783 ppnum_t *physpage_p)
6784{
6785 kern_return_t kret;
6786
6787 kret = vm_map_wire_nested(map,
6788 start,
6789 start+VM_MAP_PAGE_SIZE(map),
6790 caller_prot,
6791 vm_tag_bt(),
6792 user_wire,
6793 (pmap_t)NULL,
6794 0,
6795 physpage_p);
6796 if (kret != KERN_SUCCESS &&
6797 physpage_p != NULL) {
6798 *physpage_p = 0;
6799 }
6800 return kret;
6801}
6802
6803kern_return_t
6804vm_map_wire_and_extract_kernel(
6805 vm_map_t map,
6806 vm_map_offset_t start,
6807 vm_prot_t caller_prot,
6808 vm_tag_t tag,
6809 boolean_t user_wire,
6810 ppnum_t *physpage_p)
6811{
6812 kern_return_t kret;
6813
6814 kret = vm_map_wire_nested(map,
6815 start,
6816 start+VM_MAP_PAGE_SIZE(map),
6817 caller_prot,
6818 tag,
6819 user_wire,
6820 (pmap_t)NULL,
6821 0,
6822 physpage_p);
6823 if (kret != KERN_SUCCESS &&
6824 physpage_p != NULL) {
6825 *physpage_p = 0;
6826 }
6827 return kret;
6828}
6829
6830/*
6831 * vm_map_unwire:
6832 *
6833 * Sets the pageability of the specified address range in the target
6834 * as pageable. Regions specified must have been wired previously.
6835 *
6836 * The map must not be locked, but a reference must remain to the map
6837 * throughout the call.
6838 *
6839 * Kernel will panic on failures. User unwire ignores holes and
6840 * unwired and intransition entries to avoid losing memory by leaving
6841 * it unwired.
6842 */
6843static kern_return_t
6844vm_map_unwire_nested(
6845 vm_map_t map,
6846 vm_map_offset_t start,
6847 vm_map_offset_t end,
6848 boolean_t user_wire,
6849 pmap_t map_pmap,
6850 vm_map_offset_t pmap_addr)
6851{
6852 vm_map_entry_t entry;
6853 struct vm_map_entry *first_entry, tmp_entry;
6854 boolean_t need_wakeup;
6855 boolean_t main_map = FALSE;
6856 unsigned int last_timestamp;
6857
6858 vm_map_lock(map);
6859 if(map_pmap == NULL)
6860 main_map = TRUE;
6861 last_timestamp = map->timestamp;
6862
6863 VM_MAP_RANGE_CHECK(map, start, end);
6864 assert(page_aligned(start));
6865 assert(page_aligned(end));
6866 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6867 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6868
6869 if (start == end) {
6870 /* We unwired what the caller asked for: zero pages */
6871 vm_map_unlock(map);
6872 return KERN_SUCCESS;
6873 }
6874
6875 if (vm_map_lookup_entry(map, start, &first_entry)) {
6876 entry = first_entry;
6877 /*
6878 * vm_map_clip_start will be done later.
6879 * We don't want to unnest any nested sub maps here !
6880 */
6881 }
6882 else {
6883 if (!user_wire) {
6884 panic("vm_map_unwire: start not found");
6885 }
6886 /* Start address is not in map. */
6887 vm_map_unlock(map);
6888 return(KERN_INVALID_ADDRESS);
6889 }
6890
6891 if (entry->superpage_size) {
6892 /* superpages are always wired */
6893 vm_map_unlock(map);
6894 return KERN_INVALID_ADDRESS;
6895 }
6896
6897 need_wakeup = FALSE;
6898 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6899 if (entry->in_transition) {
6900 /*
6901 * 1)
6902 * Another thread is wiring down this entry. Note
6903 * that if it is not for the other thread we would
6904 * be unwiring an unwired entry. This is not
6905 * permitted. If we wait, we will be unwiring memory
6906 * we did not wire.
6907 *
6908 * 2)
6909 * Another thread is unwiring this entry. We did not
6910 * have a reference to it, because if we did, this
6911 * entry will not be getting unwired now.
6912 */
6913 if (!user_wire) {
6914 /*
6915 * XXX FBDP
6916 * This could happen: there could be some
6917 * overlapping vslock/vsunlock operations
6918 * going on.
6919 * We should probably just wait and retry,
6920 * but then we have to be careful that this
6921 * entry could get "simplified" after
6922 * "in_transition" gets unset and before
6923 * we re-lookup the entry, so we would
6924 * have to re-clip the entry to avoid
6925 * re-unwiring what we have already unwired...
6926 * See vm_map_wire_nested().
6927 *
6928 * Or we could just ignore "in_transition"
6929 * here and proceed to decement the wired
6930 * count(s) on this entry. That should be fine
6931 * as long as "wired_count" doesn't drop all
6932 * the way to 0 (and we should panic if THAT
6933 * happens).
6934 */
6935 panic("vm_map_unwire: in_transition entry");
6936 }
6937
6938 entry = entry->vme_next;
6939 continue;
6940 }
6941
6942 if (entry->is_sub_map) {
6943 vm_map_offset_t sub_start;
6944 vm_map_offset_t sub_end;
6945 vm_map_offset_t local_end;
6946 pmap_t pmap;
6947
6948 vm_map_clip_start(map, entry, start);
6949 vm_map_clip_end(map, entry, end);
6950
6951 sub_start = VME_OFFSET(entry);
6952 sub_end = entry->vme_end - entry->vme_start;
6953 sub_end += VME_OFFSET(entry);
6954 local_end = entry->vme_end;
6955 if(map_pmap == NULL) {
6956 if(entry->use_pmap) {
6957 pmap = VME_SUBMAP(entry)->pmap;
6958 pmap_addr = sub_start;
6959 } else {
6960 pmap = map->pmap;
6961 pmap_addr = start;
6962 }
6963 if (entry->wired_count == 0 ||
6964 (user_wire && entry->user_wired_count == 0)) {
6965 if (!user_wire)
6966 panic("vm_map_unwire: entry is unwired");
6967 entry = entry->vme_next;
6968 continue;
6969 }
6970
6971 /*
6972 * Check for holes
6973 * Holes: Next entry should be contiguous unless
6974 * this is the end of the region.
6975 */
6976 if (((entry->vme_end < end) &&
6977 ((entry->vme_next == vm_map_to_entry(map)) ||
6978 (entry->vme_next->vme_start
6979 > entry->vme_end)))) {
6980 if (!user_wire)
6981 panic("vm_map_unwire: non-contiguous region");
6982/*
6983 entry = entry->vme_next;
6984 continue;
6985*/
6986 }
6987
6988 subtract_wire_counts(map, entry, user_wire);
6989
6990 if (entry->wired_count != 0) {
6991 entry = entry->vme_next;
6992 continue;
6993 }
6994
6995 entry->in_transition = TRUE;
6996 tmp_entry = *entry;/* see comment in vm_map_wire() */
6997
6998 /*
6999 * We can unlock the map now. The in_transition state
7000 * guarantees existance of the entry.
7001 */
7002 vm_map_unlock(map);
7003 vm_map_unwire_nested(VME_SUBMAP(entry),
7004 sub_start, sub_end, user_wire, pmap, pmap_addr);
7005 vm_map_lock(map);
7006
7007 if (last_timestamp+1 != map->timestamp) {
7008 /*
7009 * Find the entry again. It could have been
7010 * clipped or deleted after we unlocked the map.
7011 */
7012 if (!vm_map_lookup_entry(map,
7013 tmp_entry.vme_start,
7014 &first_entry)) {
7015 if (!user_wire)
7016 panic("vm_map_unwire: re-lookup failed");
7017 entry = first_entry->vme_next;
7018 } else
7019 entry = first_entry;
7020 }
7021 last_timestamp = map->timestamp;
7022
7023 /*
7024 * clear transition bit for all constituent entries
7025 * that were in the original entry (saved in
7026 * tmp_entry). Also check for waiters.
7027 */
7028 while ((entry != vm_map_to_entry(map)) &&
7029 (entry->vme_start < tmp_entry.vme_end)) {
7030 assert(entry->in_transition);
7031 entry->in_transition = FALSE;
7032 if (entry->needs_wakeup) {
7033 entry->needs_wakeup = FALSE;
7034 need_wakeup = TRUE;
7035 }
7036 entry = entry->vme_next;
7037 }
7038 continue;
7039 } else {
7040 vm_map_unlock(map);
7041 vm_map_unwire_nested(VME_SUBMAP(entry),
7042 sub_start, sub_end, user_wire, map_pmap,
7043 pmap_addr);
7044 vm_map_lock(map);
7045
7046 if (last_timestamp+1 != map->timestamp) {
7047 /*
7048 * Find the entry again. It could have been
7049 * clipped or deleted after we unlocked the map.
7050 */
7051 if (!vm_map_lookup_entry(map,
7052 tmp_entry.vme_start,
7053 &first_entry)) {
7054 if (!user_wire)
7055 panic("vm_map_unwire: re-lookup failed");
7056 entry = first_entry->vme_next;
7057 } else
7058 entry = first_entry;
7059 }
7060 last_timestamp = map->timestamp;
7061 }
7062 }
7063
7064
7065 if ((entry->wired_count == 0) ||
7066 (user_wire && entry->user_wired_count == 0)) {
7067 if (!user_wire)
7068 panic("vm_map_unwire: entry is unwired");
7069
7070 entry = entry->vme_next;
7071 continue;
7072 }
7073
7074 assert(entry->wired_count > 0 &&
7075 (!user_wire || entry->user_wired_count > 0));
7076
7077 vm_map_clip_start(map, entry, start);
7078 vm_map_clip_end(map, entry, end);
7079
7080 /*
7081 * Check for holes
7082 * Holes: Next entry should be contiguous unless
7083 * this is the end of the region.
7084 */
7085 if (((entry->vme_end < end) &&
7086 ((entry->vme_next == vm_map_to_entry(map)) ||
7087 (entry->vme_next->vme_start > entry->vme_end)))) {
7088
7089 if (!user_wire)
7090 panic("vm_map_unwire: non-contiguous region");
7091 entry = entry->vme_next;
7092 continue;
7093 }
7094
7095 subtract_wire_counts(map, entry, user_wire);
7096
7097 if (entry->wired_count != 0) {
7098 entry = entry->vme_next;
7099 continue;
7100 }
7101
7102 if(entry->zero_wired_pages) {
7103 entry->zero_wired_pages = FALSE;
7104 }
7105
7106 entry->in_transition = TRUE;
7107 tmp_entry = *entry; /* see comment in vm_map_wire() */
7108
7109 /*
7110 * We can unlock the map now. The in_transition state
7111 * guarantees existance of the entry.
7112 */
7113 vm_map_unlock(map);
7114 if(map_pmap) {
7115 vm_fault_unwire(map,
7116 &tmp_entry, FALSE, map_pmap, pmap_addr);
7117 } else {
7118 vm_fault_unwire(map,
7119 &tmp_entry, FALSE, map->pmap,
7120 tmp_entry.vme_start);
7121 }
7122 vm_map_lock(map);
7123
7124 if (last_timestamp+1 != map->timestamp) {
7125 /*
7126 * Find the entry again. It could have been clipped
7127 * or deleted after we unlocked the map.
7128 */
7129 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7130 &first_entry)) {
7131 if (!user_wire)
7132 panic("vm_map_unwire: re-lookup failed");
7133 entry = first_entry->vme_next;
7134 } else
7135 entry = first_entry;
7136 }
7137 last_timestamp = map->timestamp;
7138
7139 /*
7140 * clear transition bit for all constituent entries that
7141 * were in the original entry (saved in tmp_entry). Also
7142 * check for waiters.
7143 */
7144 while ((entry != vm_map_to_entry(map)) &&
7145 (entry->vme_start < tmp_entry.vme_end)) {
7146 assert(entry->in_transition);
7147 entry->in_transition = FALSE;
7148 if (entry->needs_wakeup) {
7149 entry->needs_wakeup = FALSE;
7150 need_wakeup = TRUE;
7151 }
7152 entry = entry->vme_next;
7153 }
7154 }
7155
7156 /*
7157 * We might have fragmented the address space when we wired this
7158 * range of addresses. Attempt to re-coalesce these VM map entries
7159 * with their neighbors now that they're no longer wired.
7160 * Under some circumstances, address space fragmentation can
7161 * prevent VM object shadow chain collapsing, which can cause
7162 * swap space leaks.
7163 */
7164 vm_map_simplify_range(map, start, end);
7165
7166 vm_map_unlock(map);
7167 /*
7168 * wake up anybody waiting on entries that we have unwired.
7169 */
7170 if (need_wakeup)
7171 vm_map_entry_wakeup(map);
7172 return(KERN_SUCCESS);
7173
7174}
7175
7176kern_return_t
7177vm_map_unwire(
7178 vm_map_t map,
7179 vm_map_offset_t start,
7180 vm_map_offset_t end,
7181 boolean_t user_wire)
7182{
7183 return vm_map_unwire_nested(map, start, end,
7184 user_wire, (pmap_t)NULL, 0);
7185}
7186
7187
7188/*
7189 * vm_map_entry_delete: [ internal use only ]
7190 *
7191 * Deallocate the given entry from the target map.
7192 */
7193static void
7194vm_map_entry_delete(
7195 vm_map_t map,
7196 vm_map_entry_t entry)
7197{
7198 vm_map_offset_t s, e;
7199 vm_object_t object;
7200 vm_map_t submap;
7201
7202 s = entry->vme_start;
7203 e = entry->vme_end;
7204 assert(page_aligned(s));
7205 assert(page_aligned(e));
7206 if (entry->map_aligned == TRUE) {
7207 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7208 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7209 }
7210 assert(entry->wired_count == 0);
7211 assert(entry->user_wired_count == 0);
7212 assert(!entry->permanent);
7213
7214 if (entry->is_sub_map) {
7215 object = NULL;
7216 submap = VME_SUBMAP(entry);
7217 } else {
7218 submap = NULL;
7219 object = VME_OBJECT(entry);
7220 }
7221
7222 vm_map_store_entry_unlink(map, entry);
7223 map->size -= e - s;
7224
7225 vm_map_entry_dispose(map, entry);
7226
7227 vm_map_unlock(map);
7228 /*
7229 * Deallocate the object only after removing all
7230 * pmap entries pointing to its pages.
7231 */
7232 if (submap)
7233 vm_map_deallocate(submap);
7234 else
7235 vm_object_deallocate(object);
7236
7237}
7238
7239void
7240vm_map_submap_pmap_clean(
7241 vm_map_t map,
7242 vm_map_offset_t start,
7243 vm_map_offset_t end,
7244 vm_map_t sub_map,
7245 vm_map_offset_t offset)
7246{
7247 vm_map_offset_t submap_start;
7248 vm_map_offset_t submap_end;
7249 vm_map_size_t remove_size;
7250 vm_map_entry_t entry;
7251
7252 submap_end = offset + (end - start);
7253 submap_start = offset;
7254
7255 vm_map_lock_read(sub_map);
7256 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
7257
7258 remove_size = (entry->vme_end - entry->vme_start);
7259 if(offset > entry->vme_start)
7260 remove_size -= offset - entry->vme_start;
7261
7262
7263 if(submap_end < entry->vme_end) {
7264 remove_size -=
7265 entry->vme_end - submap_end;
7266 }
7267 if(entry->is_sub_map) {
7268 vm_map_submap_pmap_clean(
7269 sub_map,
7270 start,
7271 start + remove_size,
7272 VME_SUBMAP(entry),
7273 VME_OFFSET(entry));
7274 } else {
7275
7276 if((map->mapped_in_other_pmaps) && (map->map_refcnt)
7277 && (VME_OBJECT(entry) != NULL)) {
7278 vm_object_pmap_protect_options(
7279 VME_OBJECT(entry),
7280 (VME_OFFSET(entry) +
7281 offset -
7282 entry->vme_start),
7283 remove_size,
7284 PMAP_NULL,
7285 entry->vme_start,
7286 VM_PROT_NONE,
7287 PMAP_OPTIONS_REMOVE);
7288 } else {
7289 pmap_remove(map->pmap,
7290 (addr64_t)start,
7291 (addr64_t)(start + remove_size));
7292 }
7293 }
7294 }
7295
7296 entry = entry->vme_next;
7297
7298 while((entry != vm_map_to_entry(sub_map))
7299 && (entry->vme_start < submap_end)) {
7300 remove_size = (entry->vme_end - entry->vme_start);
7301 if(submap_end < entry->vme_end) {
7302 remove_size -= entry->vme_end - submap_end;
7303 }
7304 if(entry->is_sub_map) {
7305 vm_map_submap_pmap_clean(
7306 sub_map,
7307 (start + entry->vme_start) - offset,
7308 ((start + entry->vme_start) - offset) + remove_size,
7309 VME_SUBMAP(entry),
7310 VME_OFFSET(entry));
7311 } else {
7312 if((map->mapped_in_other_pmaps) && (map->map_refcnt)
7313 && (VME_OBJECT(entry) != NULL)) {
7314 vm_object_pmap_protect_options(
7315 VME_OBJECT(entry),
7316 VME_OFFSET(entry),
7317 remove_size,
7318 PMAP_NULL,
7319 entry->vme_start,
7320 VM_PROT_NONE,
7321 PMAP_OPTIONS_REMOVE);
7322 } else {
7323 pmap_remove(map->pmap,
7324 (addr64_t)((start + entry->vme_start)
7325 - offset),
7326 (addr64_t)(((start + entry->vme_start)
7327 - offset) + remove_size));
7328 }
7329 }
7330 entry = entry->vme_next;
7331 }
7332 vm_map_unlock_read(sub_map);
7333 return;
7334}
7335
7336/*
7337 * virt_memory_guard_ast:
7338 *
7339 * Handle the AST callout for a virtual memory guard.
7340 * raise an EXC_GUARD exception and terminate the task
7341 * if configured to do so.
7342 */
7343void
7344virt_memory_guard_ast(
7345 thread_t thread,
7346 mach_exception_data_type_t code,
7347 mach_exception_data_type_t subcode)
7348{
7349 task_t task = thread->task;
7350 assert(task != kernel_task);
7351 assert(task == current_task());
7352 uint32_t behavior;
7353
7354 behavior = task->task_exc_guard;
7355
7356 /* Is delivery enabled */
7357 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7358 return;
7359 }
7360
7361 /* If only once, make sure we're that once */
7362 while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7363 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7364
7365 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7366 break;
7367 }
7368 behavior = task->task_exc_guard;
7369 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7370 return;
7371 }
7372 }
7373
7374 /* Raise exception via corpse fork or synchronously */
7375 if ((task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) &&
7376 (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) == 0) {
7377 task_violated_guard(code, subcode, NULL);
7378 } else {
7379 task_exception_notify(EXC_GUARD, code, subcode);
7380 }
7381
7382 /* Terminate the task if desired */
7383 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7384 task_bsdtask_kill(current_task());
7385 }
7386}
7387
7388/*
7389 * vm_map_guard_exception:
7390 *
7391 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7392 *
7393 * Right now, we do this when we find nothing mapped, or a
7394 * gap in the mapping when a user address space deallocate
7395 * was requested. We report the address of the first gap found.
7396 */
7397static void
7398vm_map_guard_exception(
7399 vm_map_offset_t gap_start,
7400 unsigned reason)
7401{
7402 mach_exception_code_t code = 0;
7403 unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7404 unsigned int target = 0; /* should we pass in pid associated with map? */
7405 mach_exception_data_type_t subcode = (uint64_t)gap_start;
7406
7407 /* Can't deliver exceptions to kernel task */
7408 if (current_task() == kernel_task)
7409 return;
7410
7411 EXC_GUARD_ENCODE_TYPE(code, guard_type);
7412 EXC_GUARD_ENCODE_FLAVOR(code, reason);
7413 EXC_GUARD_ENCODE_TARGET(code, target);
7414 thread_guard_violation(current_thread(), code, subcode);
7415}
7416
7417/*
7418 * vm_map_delete: [ internal use only ]
7419 *
7420 * Deallocates the given address range from the target map.
7421 * Removes all user wirings. Unwires one kernel wiring if
7422 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7423 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7424 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7425 *
7426 * This routine is called with map locked and leaves map locked.
7427 */
7428static kern_return_t
7429vm_map_delete(
7430 vm_map_t map,
7431 vm_map_offset_t start,
7432 vm_map_offset_t end,
7433 int flags,
7434 vm_map_t zap_map)
7435{
7436 vm_map_entry_t entry, next;
7437 struct vm_map_entry *first_entry, tmp_entry;
7438 vm_map_offset_t s;
7439 vm_object_t object;
7440 boolean_t need_wakeup;
7441 unsigned int last_timestamp = ~0; /* unlikely value */
7442 int interruptible;
7443 vm_map_offset_t gap_start;
7444 vm_map_offset_t save_start = start;
7445 vm_map_offset_t save_end = end;
7446 const vm_map_offset_t FIND_GAP = 1; /* a not page aligned value */
7447 const vm_map_offset_t GAPS_OK = 2; /* a different not page aligned value */
7448
7449 if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK))
7450 gap_start = FIND_GAP;
7451 else
7452 gap_start = GAPS_OK;
7453
7454 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7455 THREAD_ABORTSAFE : THREAD_UNINT;
7456
7457 /*
7458 * All our DMA I/O operations in IOKit are currently done by
7459 * wiring through the map entries of the task requesting the I/O.
7460 * Because of this, we must always wait for kernel wirings
7461 * to go away on the entries before deleting them.
7462 *
7463 * Any caller who wants to actually remove a kernel wiring
7464 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7465 * properly remove one wiring instead of blasting through
7466 * them all.
7467 */
7468 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7469
7470 while(1) {
7471 /*
7472 * Find the start of the region, and clip it
7473 */
7474 if (vm_map_lookup_entry(map, start, &first_entry)) {
7475 entry = first_entry;
7476 if (map == kalloc_map &&
7477 (entry->vme_start != start ||
7478 entry->vme_end != end)) {
7479 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7480 "mismatched entry %p [0x%llx:0x%llx]\n",
7481 map,
7482 (uint64_t)start,
7483 (uint64_t)end,
7484 entry,
7485 (uint64_t)entry->vme_start,
7486 (uint64_t)entry->vme_end);
7487 }
7488
7489 /*
7490 * If in a superpage, extend the range to include the start of the mapping.
7491 */
7492 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
7493 start = SUPERPAGE_ROUND_DOWN(start);
7494 continue;
7495 }
7496
7497 if (start == entry->vme_start) {
7498 /*
7499 * No need to clip. We don't want to cause
7500 * any unnecessary unnesting in this case...
7501 */
7502 } else {
7503 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7504 entry->map_aligned &&
7505 !VM_MAP_PAGE_ALIGNED(
7506 start,
7507 VM_MAP_PAGE_MASK(map))) {
7508 /*
7509 * The entry will no longer be
7510 * map-aligned after clipping
7511 * and the caller said it's OK.
7512 */
7513 entry->map_aligned = FALSE;
7514 }
7515 if (map == kalloc_map) {
7516 panic("vm_map_delete(%p,0x%llx,0x%llx):"
7517 " clipping %p at 0x%llx\n",
7518 map,
7519 (uint64_t)start,
7520 (uint64_t)end,
7521 entry,
7522 (uint64_t)start);
7523 }
7524 vm_map_clip_start(map, entry, start);
7525 }
7526
7527 /*
7528 * Fix the lookup hint now, rather than each
7529 * time through the loop.
7530 */
7531 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7532
7533 } else {
7534
7535 if (map->pmap == kernel_pmap &&
7536 map->map_refcnt != 0) {
7537 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7538 "no map entry at 0x%llx\n",
7539 map,
7540 (uint64_t)start,
7541 (uint64_t)end,
7542 (uint64_t)start);
7543 }
7544 entry = first_entry->vme_next;
7545 if (gap_start == FIND_GAP)
7546 gap_start = start;
7547 }
7548 break;
7549 }
7550 if (entry->superpage_size)
7551 end = SUPERPAGE_ROUND_UP(end);
7552
7553 need_wakeup = FALSE;
7554 /*
7555 * Step through all entries in this region
7556 */
7557 s = entry->vme_start;
7558 while ((entry != vm_map_to_entry(map)) && (s < end)) {
7559 /*
7560 * At this point, we have deleted all the memory entries
7561 * between "start" and "s". We still need to delete
7562 * all memory entries between "s" and "end".
7563 * While we were blocked and the map was unlocked, some
7564 * new memory entries could have been re-allocated between
7565 * "start" and "s" and we don't want to mess with those.
7566 * Some of those entries could even have been re-assembled
7567 * with an entry after "s" (in vm_map_simplify_entry()), so
7568 * we may have to vm_map_clip_start() again.
7569 */
7570
7571 if (entry->vme_start >= s) {
7572 /*
7573 * This entry starts on or after "s"
7574 * so no need to clip its start.
7575 */
7576 } else {
7577 /*
7578 * This entry has been re-assembled by a
7579 * vm_map_simplify_entry(). We need to
7580 * re-clip its start.
7581 */
7582 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7583 entry->map_aligned &&
7584 !VM_MAP_PAGE_ALIGNED(s,
7585 VM_MAP_PAGE_MASK(map))) {
7586 /*
7587 * The entry will no longer be map-aligned
7588 * after clipping and the caller said it's OK.
7589 */
7590 entry->map_aligned = FALSE;
7591 }
7592 if (map == kalloc_map) {
7593 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7594 "clipping %p at 0x%llx\n",
7595 map,
7596 (uint64_t)start,
7597 (uint64_t)end,
7598 entry,
7599 (uint64_t)s);
7600 }
7601 vm_map_clip_start(map, entry, s);
7602 }
7603 if (entry->vme_end <= end) {
7604 /*
7605 * This entry is going away completely, so no need
7606 * to clip and possibly cause an unnecessary unnesting.
7607 */
7608 } else {
7609 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7610 entry->map_aligned &&
7611 !VM_MAP_PAGE_ALIGNED(end,
7612 VM_MAP_PAGE_MASK(map))) {
7613 /*
7614 * The entry will no longer be map-aligned
7615 * after clipping and the caller said it's OK.
7616 */
7617 entry->map_aligned = FALSE;
7618 }
7619 if (map == kalloc_map) {
7620 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7621 "clipping %p at 0x%llx\n",
7622 map,
7623 (uint64_t)start,
7624 (uint64_t)end,
7625 entry,
7626 (uint64_t)end);
7627 }
7628 vm_map_clip_end(map, entry, end);
7629 }
7630
7631 if (entry->permanent) {
7632 if (map->pmap == kernel_pmap) {
7633 panic("%s(%p,0x%llx,0x%llx): "
7634 "attempt to remove permanent "
7635 "VM map entry "
7636 "%p [0x%llx:0x%llx]\n",
7637 __FUNCTION__,
7638 map,
7639 (uint64_t) start,
7640 (uint64_t) end,
7641 entry,
7642 (uint64_t) entry->vme_start,
7643 (uint64_t) entry->vme_end);
7644 } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
7645// printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7646 entry->permanent = FALSE;
7647#if PMAP_CS
7648 } else if ((entry->protection & VM_PROT_EXECUTE) && !pmap_cs_enforced(map->pmap)) {
7649 entry->permanent = FALSE;
7650
7651 printf("%d[%s] %s(0x%llx,0x%llx): "
7652 "pmap_cs disabled, allowing for permanent executable entry [0x%llx:0x%llx] "
7653 "prot 0x%x/0x%x\n",
7654 proc_selfpid(),
7655 (current_task()->bsd_info
7656 ? proc_name_address(current_task()->bsd_info)
7657 : "?"),
7658 __FUNCTION__,
7659 (uint64_t) start,
7660 (uint64_t) end,
7661 (uint64_t)entry->vme_start,
7662 (uint64_t)entry->vme_end,
7663 entry->protection,
7664 entry->max_protection);
7665#endif
7666 } else {
7667 if (vm_map_executable_immutable_verbose) {
7668 printf("%d[%s] %s(0x%llx,0x%llx): "
7669 "permanent entry [0x%llx:0x%llx] "
7670 "prot 0x%x/0x%x\n",
7671 proc_selfpid(),
7672 (current_task()->bsd_info
7673 ? proc_name_address(current_task()->bsd_info)
7674 : "?"),
7675 __FUNCTION__,
7676 (uint64_t) start,
7677 (uint64_t) end,
7678 (uint64_t)entry->vme_start,
7679 (uint64_t)entry->vme_end,
7680 entry->protection,
7681 entry->max_protection);
7682 }
7683 /*
7684 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7685 */
7686 DTRACE_VM5(vm_map_delete_permanent,
7687 vm_map_offset_t, entry->vme_start,
7688 vm_map_offset_t, entry->vme_end,
7689 vm_prot_t, entry->protection,
7690 vm_prot_t, entry->max_protection,
7691 int, VME_ALIAS(entry));
7692 }
7693 }
7694
7695
7696 if (entry->in_transition) {
7697 wait_result_t wait_result;
7698
7699 /*
7700 * Another thread is wiring/unwiring this entry.
7701 * Let the other thread know we are waiting.
7702 */
7703 assert(s == entry->vme_start);
7704 entry->needs_wakeup = TRUE;
7705
7706 /*
7707 * wake up anybody waiting on entries that we have
7708 * already unwired/deleted.
7709 */
7710 if (need_wakeup) {
7711 vm_map_entry_wakeup(map);
7712 need_wakeup = FALSE;
7713 }
7714
7715 wait_result = vm_map_entry_wait(map, interruptible);
7716
7717 if (interruptible &&
7718 wait_result == THREAD_INTERRUPTED) {
7719 /*
7720 * We do not clear the needs_wakeup flag,
7721 * since we cannot tell if we were the only one.
7722 */
7723 return KERN_ABORTED;
7724 }
7725
7726 /*
7727 * The entry could have been clipped or it
7728 * may not exist anymore. Look it up again.
7729 */
7730 if (!vm_map_lookup_entry(map, s, &first_entry)) {
7731 /*
7732 * User: use the next entry
7733 */
7734 if (gap_start == FIND_GAP)
7735 gap_start = s;
7736 entry = first_entry->vme_next;
7737 s = entry->vme_start;
7738 } else {
7739 entry = first_entry;
7740 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7741 }
7742 last_timestamp = map->timestamp;
7743 continue;
7744 } /* end in_transition */
7745
7746 if (entry->wired_count) {
7747 boolean_t user_wire;
7748
7749 user_wire = entry->user_wired_count > 0;
7750
7751 /*
7752 * Remove a kernel wiring if requested
7753 */
7754 if (flags & VM_MAP_REMOVE_KUNWIRE) {
7755 entry->wired_count--;
7756 }
7757
7758 /*
7759 * Remove all user wirings for proper accounting
7760 */
7761 if (entry->user_wired_count > 0) {
7762 while (entry->user_wired_count)
7763 subtract_wire_counts(map, entry, user_wire);
7764 }
7765
7766 if (entry->wired_count != 0) {
7767 assert(map != kernel_map);
7768 /*
7769 * Cannot continue. Typical case is when
7770 * a user thread has physical io pending on
7771 * on this page. Either wait for the
7772 * kernel wiring to go away or return an
7773 * error.
7774 */
7775 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
7776 wait_result_t wait_result;
7777
7778 assert(s == entry->vme_start);
7779 entry->needs_wakeup = TRUE;
7780 wait_result = vm_map_entry_wait(map,
7781 interruptible);
7782
7783 if (interruptible &&
7784 wait_result == THREAD_INTERRUPTED) {
7785 /*
7786 * We do not clear the
7787 * needs_wakeup flag, since we
7788 * cannot tell if we were the
7789 * only one.
7790 */
7791 return KERN_ABORTED;
7792 }
7793
7794 /*
7795 * The entry could have been clipped or
7796 * it may not exist anymore. Look it
7797 * up again.
7798 */
7799 if (!vm_map_lookup_entry(map, s,
7800 &first_entry)) {
7801 assert(map != kernel_map);
7802 /*
7803 * User: use the next entry
7804 */
7805 if (gap_start == FIND_GAP)
7806 gap_start = s;
7807 entry = first_entry->vme_next;
7808 s = entry->vme_start;
7809 } else {
7810 entry = first_entry;
7811 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7812 }
7813 last_timestamp = map->timestamp;
7814 continue;
7815 }
7816 else {
7817 return KERN_FAILURE;
7818 }
7819 }
7820
7821 entry->in_transition = TRUE;
7822 /*
7823 * copy current entry. see comment in vm_map_wire()
7824 */
7825 tmp_entry = *entry;
7826 assert(s == entry->vme_start);
7827
7828 /*
7829 * We can unlock the map now. The in_transition
7830 * state guarentees existance of the entry.
7831 */
7832 vm_map_unlock(map);
7833
7834 if (tmp_entry.is_sub_map) {
7835 vm_map_t sub_map;
7836 vm_map_offset_t sub_start, sub_end;
7837 pmap_t pmap;
7838 vm_map_offset_t pmap_addr;
7839
7840
7841 sub_map = VME_SUBMAP(&tmp_entry);
7842 sub_start = VME_OFFSET(&tmp_entry);
7843 sub_end = sub_start + (tmp_entry.vme_end -
7844 tmp_entry.vme_start);
7845 if (tmp_entry.use_pmap) {
7846 pmap = sub_map->pmap;
7847 pmap_addr = tmp_entry.vme_start;
7848 } else {
7849 pmap = map->pmap;
7850 pmap_addr = tmp_entry.vme_start;
7851 }
7852 (void) vm_map_unwire_nested(sub_map,
7853 sub_start, sub_end,
7854 user_wire,
7855 pmap, pmap_addr);
7856 } else {
7857
7858 if (VME_OBJECT(&tmp_entry) == kernel_object) {
7859 pmap_protect_options(
7860 map->pmap,
7861 tmp_entry.vme_start,
7862 tmp_entry.vme_end,
7863 VM_PROT_NONE,
7864 PMAP_OPTIONS_REMOVE,
7865 NULL);
7866 }
7867 vm_fault_unwire(map, &tmp_entry,
7868 VME_OBJECT(&tmp_entry) == kernel_object,
7869 map->pmap, tmp_entry.vme_start);
7870 }
7871
7872 vm_map_lock(map);
7873
7874 if (last_timestamp+1 != map->timestamp) {
7875 /*
7876 * Find the entry again. It could have
7877 * been clipped after we unlocked the map.
7878 */
7879 if (!vm_map_lookup_entry(map, s, &first_entry)){
7880 assert((map != kernel_map) &&
7881 (!entry->is_sub_map));
7882 if (gap_start == FIND_GAP)
7883 gap_start = s;
7884 first_entry = first_entry->vme_next;
7885 s = first_entry->vme_start;
7886 } else {
7887 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7888 }
7889 } else {
7890 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7891 first_entry = entry;
7892 }
7893
7894 last_timestamp = map->timestamp;
7895
7896 entry = first_entry;
7897 while ((entry != vm_map_to_entry(map)) &&
7898 (entry->vme_start < tmp_entry.vme_end)) {
7899 assert(entry->in_transition);
7900 entry->in_transition = FALSE;
7901 if (entry->needs_wakeup) {
7902 entry->needs_wakeup = FALSE;
7903 need_wakeup = TRUE;
7904 }
7905 entry = entry->vme_next;
7906 }
7907 /*
7908 * We have unwired the entry(s). Go back and
7909 * delete them.
7910 */
7911 entry = first_entry;
7912 continue;
7913 }
7914
7915 /* entry is unwired */
7916 assert(entry->wired_count == 0);
7917 assert(entry->user_wired_count == 0);
7918
7919 assert(s == entry->vme_start);
7920
7921 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
7922 /*
7923 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
7924 * vm_map_delete(), some map entries might have been
7925 * transferred to a "zap_map", which doesn't have a
7926 * pmap. The original pmap has already been flushed
7927 * in the vm_map_delete() call targeting the original
7928 * map, but when we get to destroying the "zap_map",
7929 * we don't have any pmap to flush, so let's just skip
7930 * all this.
7931 */
7932 } else if (entry->is_sub_map) {
7933 if (entry->use_pmap) {
7934#ifndef NO_NESTED_PMAP
7935 int pmap_flags;
7936
7937 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
7938 /*
7939 * This is the final cleanup of the
7940 * address space being terminated.
7941 * No new mappings are expected and
7942 * we don't really need to unnest the
7943 * shared region (and lose the "global"
7944 * pmap mappings, if applicable).
7945 *
7946 * Tell the pmap layer that we're
7947 * "clean" wrt nesting.
7948 */
7949 pmap_flags = PMAP_UNNEST_CLEAN;
7950 } else {
7951 /*
7952 * We're unmapping part of the nested
7953 * shared region, so we can't keep the
7954 * nested pmap.
7955 */
7956 pmap_flags = 0;
7957 }
7958 pmap_unnest_options(
7959 map->pmap,
7960 (addr64_t)entry->vme_start,
7961 entry->vme_end - entry->vme_start,
7962 pmap_flags);
7963#endif /* NO_NESTED_PMAP */
7964 if ((map->mapped_in_other_pmaps) && (map->map_refcnt)) {
7965 /* clean up parent map/maps */
7966 vm_map_submap_pmap_clean(
7967 map, entry->vme_start,
7968 entry->vme_end,
7969 VME_SUBMAP(entry),
7970 VME_OFFSET(entry));
7971 }
7972 } else {
7973 vm_map_submap_pmap_clean(
7974 map, entry->vme_start, entry->vme_end,
7975 VME_SUBMAP(entry),
7976 VME_OFFSET(entry));
7977 }
7978 } else if (VME_OBJECT(entry) != kernel_object &&
7979 VME_OBJECT(entry) != compressor_object) {
7980 object = VME_OBJECT(entry);
7981 if ((map->mapped_in_other_pmaps) && (map->map_refcnt)) {
7982 vm_object_pmap_protect_options(
7983 object, VME_OFFSET(entry),
7984 entry->vme_end - entry->vme_start,
7985 PMAP_NULL,
7986 entry->vme_start,
7987 VM_PROT_NONE,
7988 PMAP_OPTIONS_REMOVE);
7989 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
7990 (map->pmap == kernel_pmap)) {
7991 /* Remove translations associated
7992 * with this range unless the entry
7993 * does not have an object, or
7994 * it's the kernel map or a descendant
7995 * since the platform could potentially
7996 * create "backdoor" mappings invisible
7997 * to the VM. It is expected that
7998 * objectless, non-kernel ranges
7999 * do not have such VM invisible
8000 * translations.
8001 */
8002 pmap_remove_options(map->pmap,
8003 (addr64_t)entry->vme_start,
8004 (addr64_t)entry->vme_end,
8005 PMAP_OPTIONS_REMOVE);
8006 }
8007 }
8008
8009 if (entry->iokit_acct) {
8010 /* alternate accounting */
8011 DTRACE_VM4(vm_map_iokit_unmapped_region,
8012 vm_map_t, map,
8013 vm_map_offset_t, entry->vme_start,
8014 vm_map_offset_t, entry->vme_end,
8015 int, VME_ALIAS(entry));
8016 vm_map_iokit_unmapped_region(map,
8017 (entry->vme_end -
8018 entry->vme_start));
8019 entry->iokit_acct = FALSE;
8020 entry->use_pmap = FALSE;
8021 }
8022
8023 /*
8024 * All pmap mappings for this map entry must have been
8025 * cleared by now.
8026 */
8027#if DEBUG
8028 assert(vm_map_pmap_is_empty(map,
8029 entry->vme_start,
8030 entry->vme_end));
8031#endif /* DEBUG */
8032
8033 next = entry->vme_next;
8034
8035 if (map->pmap == kernel_pmap &&
8036 map->map_refcnt != 0 &&
8037 entry->vme_end < end &&
8038 (next == vm_map_to_entry(map) ||
8039 next->vme_start != entry->vme_end)) {
8040 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8041 "hole after %p at 0x%llx\n",
8042 map,
8043 (uint64_t)start,
8044 (uint64_t)end,
8045 entry,
8046 (uint64_t)entry->vme_end);
8047 }
8048
8049 /*
8050 * If the desired range didn't end with "entry", then there is a gap if
8051 * we wrapped around to the start of the map or if "entry" and "next"
8052 * aren't contiguous.
8053 *
8054 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8055 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8056 */
8057 if (gap_start == FIND_GAP &&
8058 vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8059 (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8060 gap_start = entry->vme_end;
8061 }
8062 s = next->vme_start;
8063 last_timestamp = map->timestamp;
8064
8065 if (entry->permanent) {
8066 /*
8067 * A permanent entry can not be removed, so leave it
8068 * in place but remove all access permissions.
8069 */
8070 entry->protection = VM_PROT_NONE;
8071 entry->max_protection = VM_PROT_NONE;
8072 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
8073 zap_map != VM_MAP_NULL) {
8074 vm_map_size_t entry_size;
8075 /*
8076 * The caller wants to save the affected VM map entries
8077 * into the "zap_map". The caller will take care of
8078 * these entries.
8079 */
8080 /* unlink the entry from "map" ... */
8081 vm_map_store_entry_unlink(map, entry);
8082 /* ... and add it to the end of the "zap_map" */
8083 vm_map_store_entry_link(zap_map,
8084 vm_map_last_entry(zap_map),
8085 entry,
8086 VM_MAP_KERNEL_FLAGS_NONE);
8087 entry_size = entry->vme_end - entry->vme_start;
8088 map->size -= entry_size;
8089 zap_map->size += entry_size;
8090 /* we didn't unlock the map, so no timestamp increase */
8091 last_timestamp--;
8092 } else {
8093 vm_map_entry_delete(map, entry);
8094 /* vm_map_entry_delete unlocks the map */
8095 vm_map_lock(map);
8096 }
8097
8098 entry = next;
8099
8100 if(entry == vm_map_to_entry(map)) {
8101 break;
8102 }
8103 if (last_timestamp + 1 != map->timestamp) {
8104 /*
8105 * We are responsible for deleting everything
8106 * from the given space. If someone has interfered,
8107 * we pick up where we left off. Back fills should
8108 * be all right for anyone, except map_delete, and
8109 * we have to assume that the task has been fully
8110 * disabled before we get here
8111 */
8112 if (!vm_map_lookup_entry(map, s, &entry)){
8113 entry = entry->vme_next;
8114
8115 /*
8116 * Nothing found for s. If we weren't already done, then there is a gap.
8117 */
8118 if (gap_start == FIND_GAP && s < end)
8119 gap_start = s;
8120 s = entry->vme_start;
8121 } else {
8122 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8123 }
8124 /*
8125 * others can not only allocate behind us, we can
8126 * also see coalesce while we don't have the map lock
8127 */
8128 if (entry == vm_map_to_entry(map)) {
8129 break;
8130 }
8131 }
8132 last_timestamp = map->timestamp;
8133 }
8134
8135 if (map->wait_for_space)
8136 thread_wakeup((event_t) map);
8137 /*
8138 * wake up anybody waiting on entries that we have already deleted.
8139 */
8140 if (need_wakeup)
8141 vm_map_entry_wakeup(map);
8142
8143 if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8144 DTRACE_VM3(kern_vm_deallocate_gap,
8145 vm_map_offset_t, gap_start,
8146 vm_map_offset_t, save_start,
8147 vm_map_offset_t, save_end);
8148 if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
8149#if defined(DEVELOPMENT) || defined(DEBUG)
8150 /* log just once if not checking, otherwise log each one */
8151 if (!map->warned_delete_gap ||
8152 (task_exc_guard_default & TASK_EXC_GUARD_VM_ALL) != 0) {
8153 printf("vm_map_delete: map %p [%p...%p] nothing at %p\n",
8154 (void *)map, (void *)save_start, (void *)save_end,
8155 (void *)gap_start);
8156 if (!map->warned_delete_gap) {
8157 map->warned_delete_gap = 1;
8158 }
8159 }
8160#endif
8161 vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8162 }
8163 }
8164
8165 return KERN_SUCCESS;
8166}
8167
8168/*
8169 * vm_map_remove:
8170 *
8171 * Remove the given address range from the target map.
8172 * This is the exported form of vm_map_delete.
8173 */
8174kern_return_t
8175vm_map_remove(
8176 vm_map_t map,
8177 vm_map_offset_t start,
8178 vm_map_offset_t end,
8179 boolean_t flags)
8180{
8181 kern_return_t result;
8182
8183 vm_map_lock(map);
8184 VM_MAP_RANGE_CHECK(map, start, end);
8185 /*
8186 * For the zone_map, the kernel controls the allocation/freeing of memory.
8187 * Any free to the zone_map should be within the bounds of the map and
8188 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8189 * free to the zone_map into a no-op, there is a problem and we should
8190 * panic.
8191 */
8192 if ((map == zone_map) && (start == end))
8193 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
8194 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8195 vm_map_unlock(map);
8196
8197 return(result);
8198}
8199
8200/*
8201 * vm_map_remove_locked:
8202 *
8203 * Remove the given address range from the target locked map.
8204 * This is the exported form of vm_map_delete.
8205 */
8206kern_return_t
8207vm_map_remove_locked(
8208 vm_map_t map,
8209 vm_map_offset_t start,
8210 vm_map_offset_t end,
8211 boolean_t flags)
8212{
8213 kern_return_t result;
8214
8215 VM_MAP_RANGE_CHECK(map, start, end);
8216 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8217 return(result);
8218}
8219
8220
8221/*
8222 * Routine: vm_map_copy_allocate
8223 *
8224 * Description:
8225 * Allocates and initializes a map copy object.
8226 */
8227static vm_map_copy_t
8228vm_map_copy_allocate(void)
8229{
8230 vm_map_copy_t new_copy;
8231
8232 new_copy = zalloc(vm_map_copy_zone);
8233 bzero(new_copy, sizeof (*new_copy));
8234 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8235 vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8236 vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8237 return new_copy;
8238}
8239
8240/*
8241 * Routine: vm_map_copy_discard
8242 *
8243 * Description:
8244 * Dispose of a map copy object (returned by
8245 * vm_map_copyin).
8246 */
8247void
8248vm_map_copy_discard(
8249 vm_map_copy_t copy)
8250{
8251 if (copy == VM_MAP_COPY_NULL)
8252 return;
8253
8254 switch (copy->type) {
8255 case VM_MAP_COPY_ENTRY_LIST:
8256 while (vm_map_copy_first_entry(copy) !=
8257 vm_map_copy_to_entry(copy)) {
8258 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
8259
8260 vm_map_copy_entry_unlink(copy, entry);
8261 if (entry->is_sub_map) {
8262 vm_map_deallocate(VME_SUBMAP(entry));
8263 } else {
8264 vm_object_deallocate(VME_OBJECT(entry));
8265 }
8266 vm_map_copy_entry_dispose(copy, entry);
8267 }
8268 break;
8269 case VM_MAP_COPY_OBJECT:
8270 vm_object_deallocate(copy->cpy_object);
8271 break;
8272 case VM_MAP_COPY_KERNEL_BUFFER:
8273
8274 /*
8275 * The vm_map_copy_t and possibly the data buffer were
8276 * allocated by a single call to kalloc(), i.e. the
8277 * vm_map_copy_t was not allocated out of the zone.
8278 */
8279 if (copy->size > msg_ool_size_small || copy->offset)
8280 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8281 (long long)copy->size, (long long)copy->offset);
8282 kfree(copy, copy->size + cpy_kdata_hdr_sz);
8283 return;
8284 }
8285 zfree(vm_map_copy_zone, copy);
8286}
8287
8288/*
8289 * Routine: vm_map_copy_copy
8290 *
8291 * Description:
8292 * Move the information in a map copy object to
8293 * a new map copy object, leaving the old one
8294 * empty.
8295 *
8296 * This is used by kernel routines that need
8297 * to look at out-of-line data (in copyin form)
8298 * before deciding whether to return SUCCESS.
8299 * If the routine returns FAILURE, the original
8300 * copy object will be deallocated; therefore,
8301 * these routines must make a copy of the copy
8302 * object and leave the original empty so that
8303 * deallocation will not fail.
8304 */
8305vm_map_copy_t
8306vm_map_copy_copy(
8307 vm_map_copy_t copy)
8308{
8309 vm_map_copy_t new_copy;
8310
8311 if (copy == VM_MAP_COPY_NULL)
8312 return VM_MAP_COPY_NULL;
8313
8314 /*
8315 * Allocate a new copy object, and copy the information
8316 * from the old one into it.
8317 */
8318
8319 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8320 *new_copy = *copy;
8321
8322 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8323 /*
8324 * The links in the entry chain must be
8325 * changed to point to the new copy object.
8326 */
8327 vm_map_copy_first_entry(copy)->vme_prev
8328 = vm_map_copy_to_entry(new_copy);
8329 vm_map_copy_last_entry(copy)->vme_next
8330 = vm_map_copy_to_entry(new_copy);
8331 }
8332
8333 /*
8334 * Change the old copy object into one that contains
8335 * nothing to be deallocated.
8336 */
8337 copy->type = VM_MAP_COPY_OBJECT;
8338 copy->cpy_object = VM_OBJECT_NULL;
8339
8340 /*
8341 * Return the new object.
8342 */
8343 return new_copy;
8344}
8345
8346static kern_return_t
8347vm_map_overwrite_submap_recurse(
8348 vm_map_t dst_map,
8349 vm_map_offset_t dst_addr,
8350 vm_map_size_t dst_size)
8351{
8352 vm_map_offset_t dst_end;
8353 vm_map_entry_t tmp_entry;
8354 vm_map_entry_t entry;
8355 kern_return_t result;
8356 boolean_t encountered_sub_map = FALSE;
8357
8358
8359
8360 /*
8361 * Verify that the destination is all writeable
8362 * initially. We have to trunc the destination
8363 * address and round the copy size or we'll end up
8364 * splitting entries in strange ways.
8365 */
8366
8367 dst_end = vm_map_round_page(dst_addr + dst_size,
8368 VM_MAP_PAGE_MASK(dst_map));
8369 vm_map_lock(dst_map);
8370
8371start_pass_1:
8372 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8373 vm_map_unlock(dst_map);
8374 return(KERN_INVALID_ADDRESS);
8375 }
8376
8377 vm_map_clip_start(dst_map,
8378 tmp_entry,
8379 vm_map_trunc_page(dst_addr,
8380 VM_MAP_PAGE_MASK(dst_map)));
8381 if (tmp_entry->is_sub_map) {
8382 /* clipping did unnest if needed */
8383 assert(!tmp_entry->use_pmap);
8384 }
8385
8386 for (entry = tmp_entry;;) {
8387 vm_map_entry_t next;
8388
8389 next = entry->vme_next;
8390 while(entry->is_sub_map) {
8391 vm_map_offset_t sub_start;
8392 vm_map_offset_t sub_end;
8393 vm_map_offset_t local_end;
8394
8395 if (entry->in_transition) {
8396 /*
8397 * Say that we are waiting, and wait for entry.
8398 */
8399 entry->needs_wakeup = TRUE;
8400 vm_map_entry_wait(dst_map, THREAD_UNINT);
8401
8402 goto start_pass_1;
8403 }
8404
8405 encountered_sub_map = TRUE;
8406 sub_start = VME_OFFSET(entry);
8407
8408 if(entry->vme_end < dst_end)
8409 sub_end = entry->vme_end;
8410 else
8411 sub_end = dst_end;
8412 sub_end -= entry->vme_start;
8413 sub_end += VME_OFFSET(entry);
8414 local_end = entry->vme_end;
8415 vm_map_unlock(dst_map);
8416
8417 result = vm_map_overwrite_submap_recurse(
8418 VME_SUBMAP(entry),
8419 sub_start,
8420 sub_end - sub_start);
8421
8422 if(result != KERN_SUCCESS)
8423 return result;
8424 if (dst_end <= entry->vme_end)
8425 return KERN_SUCCESS;
8426 vm_map_lock(dst_map);
8427 if(!vm_map_lookup_entry(dst_map, local_end,
8428 &tmp_entry)) {
8429 vm_map_unlock(dst_map);
8430 return(KERN_INVALID_ADDRESS);
8431 }
8432 entry = tmp_entry;
8433 next = entry->vme_next;
8434 }
8435
8436 if ( ! (entry->protection & VM_PROT_WRITE)) {
8437 vm_map_unlock(dst_map);
8438 return(KERN_PROTECTION_FAILURE);
8439 }
8440
8441 /*
8442 * If the entry is in transition, we must wait
8443 * for it to exit that state. Anything could happen
8444 * when we unlock the map, so start over.
8445 */
8446 if (entry->in_transition) {
8447
8448 /*
8449 * Say that we are waiting, and wait for entry.
8450 */
8451 entry->needs_wakeup = TRUE;
8452 vm_map_entry_wait(dst_map, THREAD_UNINT);
8453
8454 goto start_pass_1;
8455 }
8456
8457/*
8458 * our range is contained completely within this map entry
8459 */
8460 if (dst_end <= entry->vme_end) {
8461 vm_map_unlock(dst_map);
8462 return KERN_SUCCESS;
8463 }
8464/*
8465 * check that range specified is contiguous region
8466 */
8467 if ((next == vm_map_to_entry(dst_map)) ||
8468 (next->vme_start != entry->vme_end)) {
8469 vm_map_unlock(dst_map);
8470 return(KERN_INVALID_ADDRESS);
8471 }
8472
8473 /*
8474 * Check for permanent objects in the destination.
8475 */
8476 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8477 ((!VME_OBJECT(entry)->internal) ||
8478 (VME_OBJECT(entry)->true_share))) {
8479 if(encountered_sub_map) {
8480 vm_map_unlock(dst_map);
8481 return(KERN_FAILURE);
8482 }
8483 }
8484
8485
8486 entry = next;
8487 }/* for */
8488 vm_map_unlock(dst_map);
8489 return(KERN_SUCCESS);
8490}
8491
8492/*
8493 * Routine: vm_map_copy_overwrite
8494 *
8495 * Description:
8496 * Copy the memory described by the map copy
8497 * object (copy; returned by vm_map_copyin) onto
8498 * the specified destination region (dst_map, dst_addr).
8499 * The destination must be writeable.
8500 *
8501 * Unlike vm_map_copyout, this routine actually
8502 * writes over previously-mapped memory. If the
8503 * previous mapping was to a permanent (user-supplied)
8504 * memory object, it is preserved.
8505 *
8506 * The attributes (protection and inheritance) of the
8507 * destination region are preserved.
8508 *
8509 * If successful, consumes the copy object.
8510 * Otherwise, the caller is responsible for it.
8511 *
8512 * Implementation notes:
8513 * To overwrite aligned temporary virtual memory, it is
8514 * sufficient to remove the previous mapping and insert
8515 * the new copy. This replacement is done either on
8516 * the whole region (if no permanent virtual memory
8517 * objects are embedded in the destination region) or
8518 * in individual map entries.
8519 *
8520 * To overwrite permanent virtual memory , it is necessary
8521 * to copy each page, as the external memory management
8522 * interface currently does not provide any optimizations.
8523 *
8524 * Unaligned memory also has to be copied. It is possible
8525 * to use 'vm_trickery' to copy the aligned data. This is
8526 * not done but not hard to implement.
8527 *
8528 * Once a page of permanent memory has been overwritten,
8529 * it is impossible to interrupt this function; otherwise,
8530 * the call would be neither atomic nor location-independent.
8531 * The kernel-state portion of a user thread must be
8532 * interruptible.
8533 *
8534 * It may be expensive to forward all requests that might
8535 * overwrite permanent memory (vm_write, vm_copy) to
8536 * uninterruptible kernel threads. This routine may be
8537 * called by interruptible threads; however, success is
8538 * not guaranteed -- if the request cannot be performed
8539 * atomically and interruptibly, an error indication is
8540 * returned.
8541 */
8542
8543static kern_return_t
8544vm_map_copy_overwrite_nested(
8545 vm_map_t dst_map,
8546 vm_map_address_t dst_addr,
8547 vm_map_copy_t copy,
8548 boolean_t interruptible,
8549 pmap_t pmap,
8550 boolean_t discard_on_success)
8551{
8552 vm_map_offset_t dst_end;
8553 vm_map_entry_t tmp_entry;
8554 vm_map_entry_t entry;
8555 kern_return_t kr;
8556 boolean_t aligned = TRUE;
8557 boolean_t contains_permanent_objects = FALSE;
8558 boolean_t encountered_sub_map = FALSE;
8559 vm_map_offset_t base_addr;
8560 vm_map_size_t copy_size;
8561 vm_map_size_t total_size;
8562
8563
8564 /*
8565 * Check for null copy object.
8566 */
8567
8568 if (copy == VM_MAP_COPY_NULL)
8569 return(KERN_SUCCESS);
8570
8571 /*
8572 * Check for special kernel buffer allocated
8573 * by new_ipc_kmsg_copyin.
8574 */
8575
8576 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8577 return(vm_map_copyout_kernel_buffer(
8578 dst_map, &dst_addr,
8579 copy, copy->size, TRUE, discard_on_success));
8580 }
8581
8582 /*
8583 * Only works for entry lists at the moment. Will
8584 * support page lists later.
8585 */
8586
8587 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8588
8589 if (copy->size == 0) {
8590 if (discard_on_success)
8591 vm_map_copy_discard(copy);
8592 return(KERN_SUCCESS);
8593 }
8594
8595 /*
8596 * Verify that the destination is all writeable
8597 * initially. We have to trunc the destination
8598 * address and round the copy size or we'll end up
8599 * splitting entries in strange ways.
8600 */
8601
8602 if (!VM_MAP_PAGE_ALIGNED(copy->size,
8603 VM_MAP_PAGE_MASK(dst_map)) ||
8604 !VM_MAP_PAGE_ALIGNED(copy->offset,
8605 VM_MAP_PAGE_MASK(dst_map)) ||
8606 !VM_MAP_PAGE_ALIGNED(dst_addr,
8607 VM_MAP_PAGE_MASK(dst_map)))
8608 {
8609 aligned = FALSE;
8610 dst_end = vm_map_round_page(dst_addr + copy->size,
8611 VM_MAP_PAGE_MASK(dst_map));
8612 } else {
8613 dst_end = dst_addr + copy->size;
8614 }
8615
8616 vm_map_lock(dst_map);
8617
8618 /* LP64todo - remove this check when vm_map_commpage64()
8619 * no longer has to stuff in a map_entry for the commpage
8620 * above the map's max_offset.
8621 */
8622 if (dst_addr >= dst_map->max_offset) {
8623 vm_map_unlock(dst_map);
8624 return(KERN_INVALID_ADDRESS);
8625 }
8626
8627start_pass_1:
8628 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8629 vm_map_unlock(dst_map);
8630 return(KERN_INVALID_ADDRESS);
8631 }
8632 vm_map_clip_start(dst_map,
8633 tmp_entry,
8634 vm_map_trunc_page(dst_addr,
8635 VM_MAP_PAGE_MASK(dst_map)));
8636 for (entry = tmp_entry;;) {
8637 vm_map_entry_t next = entry->vme_next;
8638
8639 while(entry->is_sub_map) {
8640 vm_map_offset_t sub_start;
8641 vm_map_offset_t sub_end;
8642 vm_map_offset_t local_end;
8643
8644 if (entry->in_transition) {
8645
8646 /*
8647 * Say that we are waiting, and wait for entry.
8648 */
8649 entry->needs_wakeup = TRUE;
8650 vm_map_entry_wait(dst_map, THREAD_UNINT);
8651
8652 goto start_pass_1;
8653 }
8654
8655 local_end = entry->vme_end;
8656 if (!(entry->needs_copy)) {
8657 /* if needs_copy we are a COW submap */
8658 /* in such a case we just replace so */
8659 /* there is no need for the follow- */
8660 /* ing check. */
8661 encountered_sub_map = TRUE;
8662 sub_start = VME_OFFSET(entry);
8663
8664 if(entry->vme_end < dst_end)
8665 sub_end = entry->vme_end;
8666 else
8667 sub_end = dst_end;
8668 sub_end -= entry->vme_start;
8669 sub_end += VME_OFFSET(entry);
8670 vm_map_unlock(dst_map);
8671
8672 kr = vm_map_overwrite_submap_recurse(
8673 VME_SUBMAP(entry),
8674 sub_start,
8675 sub_end - sub_start);
8676 if(kr != KERN_SUCCESS)
8677 return kr;
8678 vm_map_lock(dst_map);
8679 }
8680
8681 if (dst_end <= entry->vme_end)
8682 goto start_overwrite;
8683 if(!vm_map_lookup_entry(dst_map, local_end,
8684 &entry)) {
8685 vm_map_unlock(dst_map);
8686 return(KERN_INVALID_ADDRESS);
8687 }
8688 next = entry->vme_next;
8689 }
8690
8691 if ( ! (entry->protection & VM_PROT_WRITE)) {
8692 vm_map_unlock(dst_map);
8693 return(KERN_PROTECTION_FAILURE);
8694 }
8695
8696 /*
8697 * If the entry is in transition, we must wait
8698 * for it to exit that state. Anything could happen
8699 * when we unlock the map, so start over.
8700 */
8701 if (entry->in_transition) {
8702
8703 /*
8704 * Say that we are waiting, and wait for entry.
8705 */
8706 entry->needs_wakeup = TRUE;
8707 vm_map_entry_wait(dst_map, THREAD_UNINT);
8708
8709 goto start_pass_1;
8710 }
8711
8712/*
8713 * our range is contained completely within this map entry
8714 */
8715 if (dst_end <= entry->vme_end)
8716 break;
8717/*
8718 * check that range specified is contiguous region
8719 */
8720 if ((next == vm_map_to_entry(dst_map)) ||
8721 (next->vme_start != entry->vme_end)) {
8722 vm_map_unlock(dst_map);
8723 return(KERN_INVALID_ADDRESS);
8724 }
8725
8726
8727 /*
8728 * Check for permanent objects in the destination.
8729 */
8730 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8731 ((!VME_OBJECT(entry)->internal) ||
8732 (VME_OBJECT(entry)->true_share))) {
8733 contains_permanent_objects = TRUE;
8734 }
8735
8736 entry = next;
8737 }/* for */
8738
8739start_overwrite:
8740 /*
8741 * If there are permanent objects in the destination, then
8742 * the copy cannot be interrupted.
8743 */
8744
8745 if (interruptible && contains_permanent_objects) {
8746 vm_map_unlock(dst_map);
8747 return(KERN_FAILURE); /* XXX */
8748 }
8749
8750 /*
8751 *
8752 * Make a second pass, overwriting the data
8753 * At the beginning of each loop iteration,
8754 * the next entry to be overwritten is "tmp_entry"
8755 * (initially, the value returned from the lookup above),
8756 * and the starting address expected in that entry
8757 * is "start".
8758 */
8759
8760 total_size = copy->size;
8761 if(encountered_sub_map) {
8762 copy_size = 0;
8763 /* re-calculate tmp_entry since we've had the map */
8764 /* unlocked */
8765 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
8766 vm_map_unlock(dst_map);
8767 return(KERN_INVALID_ADDRESS);
8768 }
8769 } else {
8770 copy_size = copy->size;
8771 }
8772
8773 base_addr = dst_addr;
8774 while(TRUE) {
8775 /* deconstruct the copy object and do in parts */
8776 /* only in sub_map, interruptable case */
8777 vm_map_entry_t copy_entry;
8778 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
8779 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
8780 int nentries;
8781 int remaining_entries = 0;
8782 vm_map_offset_t new_offset = 0;
8783
8784 for (entry = tmp_entry; copy_size == 0;) {
8785 vm_map_entry_t next;
8786
8787 next = entry->vme_next;
8788
8789 /* tmp_entry and base address are moved along */
8790 /* each time we encounter a sub-map. Otherwise */
8791 /* entry can outpase tmp_entry, and the copy_size */
8792 /* may reflect the distance between them */
8793 /* if the current entry is found to be in transition */
8794 /* we will start over at the beginning or the last */
8795 /* encounter of a submap as dictated by base_addr */
8796 /* we will zero copy_size accordingly. */
8797 if (entry->in_transition) {
8798 /*
8799 * Say that we are waiting, and wait for entry.
8800 */
8801 entry->needs_wakeup = TRUE;
8802 vm_map_entry_wait(dst_map, THREAD_UNINT);
8803
8804 if(!vm_map_lookup_entry(dst_map, base_addr,
8805 &tmp_entry)) {
8806 vm_map_unlock(dst_map);
8807 return(KERN_INVALID_ADDRESS);
8808 }
8809 copy_size = 0;
8810 entry = tmp_entry;
8811 continue;
8812 }
8813 if (entry->is_sub_map) {
8814 vm_map_offset_t sub_start;
8815 vm_map_offset_t sub_end;
8816 vm_map_offset_t local_end;
8817
8818 if (entry->needs_copy) {
8819 /* if this is a COW submap */
8820 /* just back the range with a */
8821 /* anonymous entry */
8822 if(entry->vme_end < dst_end)
8823 sub_end = entry->vme_end;
8824 else
8825 sub_end = dst_end;
8826 if(entry->vme_start < base_addr)
8827 sub_start = base_addr;
8828 else
8829 sub_start = entry->vme_start;
8830 vm_map_clip_end(
8831 dst_map, entry, sub_end);
8832 vm_map_clip_start(
8833 dst_map, entry, sub_start);
8834 assert(!entry->use_pmap);
8835 assert(!entry->iokit_acct);
8836 entry->use_pmap = TRUE;
8837 entry->is_sub_map = FALSE;
8838 vm_map_deallocate(
8839 VME_SUBMAP(entry));
8840 VME_OBJECT_SET(entry, NULL);
8841 VME_OFFSET_SET(entry, 0);
8842 entry->is_shared = FALSE;
8843 entry->needs_copy = FALSE;
8844 entry->protection = VM_PROT_DEFAULT;
8845 entry->max_protection = VM_PROT_ALL;
8846 entry->wired_count = 0;
8847 entry->user_wired_count = 0;
8848 if(entry->inheritance
8849 == VM_INHERIT_SHARE)
8850 entry->inheritance = VM_INHERIT_COPY;
8851 continue;
8852 }
8853 /* first take care of any non-sub_map */
8854 /* entries to send */
8855 if(base_addr < entry->vme_start) {
8856 /* stuff to send */
8857 copy_size =
8858 entry->vme_start - base_addr;
8859 break;
8860 }
8861 sub_start = VME_OFFSET(entry);
8862
8863 if(entry->vme_end < dst_end)
8864 sub_end = entry->vme_end;
8865 else
8866 sub_end = dst_end;
8867 sub_end -= entry->vme_start;
8868 sub_end += VME_OFFSET(entry);
8869 local_end = entry->vme_end;
8870 vm_map_unlock(dst_map);
8871 copy_size = sub_end - sub_start;
8872
8873 /* adjust the copy object */
8874 if (total_size > copy_size) {
8875 vm_map_size_t local_size = 0;
8876 vm_map_size_t entry_size;
8877
8878 nentries = 1;
8879 new_offset = copy->offset;
8880 copy_entry = vm_map_copy_first_entry(copy);
8881 while(copy_entry !=
8882 vm_map_copy_to_entry(copy)){
8883 entry_size = copy_entry->vme_end -
8884 copy_entry->vme_start;
8885 if((local_size < copy_size) &&
8886 ((local_size + entry_size)
8887 >= copy_size)) {
8888 vm_map_copy_clip_end(copy,
8889 copy_entry,
8890 copy_entry->vme_start +
8891 (copy_size - local_size));
8892 entry_size = copy_entry->vme_end -
8893 copy_entry->vme_start;
8894 local_size += entry_size;
8895 new_offset += entry_size;
8896 }
8897 if(local_size >= copy_size) {
8898 next_copy = copy_entry->vme_next;
8899 copy_entry->vme_next =
8900 vm_map_copy_to_entry(copy);
8901 previous_prev =
8902 copy->cpy_hdr.links.prev;
8903 copy->cpy_hdr.links.prev = copy_entry;
8904 copy->size = copy_size;
8905 remaining_entries =
8906 copy->cpy_hdr.nentries;
8907 remaining_entries -= nentries;
8908 copy->cpy_hdr.nentries = nentries;
8909 break;
8910 } else {
8911 local_size += entry_size;
8912 new_offset += entry_size;
8913 nentries++;
8914 }
8915 copy_entry = copy_entry->vme_next;
8916 }
8917 }
8918
8919 if((entry->use_pmap) && (pmap == NULL)) {
8920 kr = vm_map_copy_overwrite_nested(
8921 VME_SUBMAP(entry),
8922 sub_start,
8923 copy,
8924 interruptible,
8925 VME_SUBMAP(entry)->pmap,
8926 TRUE);
8927 } else if (pmap != NULL) {
8928 kr = vm_map_copy_overwrite_nested(
8929 VME_SUBMAP(entry),
8930 sub_start,
8931 copy,
8932 interruptible, pmap,
8933 TRUE);
8934 } else {
8935 kr = vm_map_copy_overwrite_nested(
8936 VME_SUBMAP(entry),
8937 sub_start,
8938 copy,
8939 interruptible,
8940 dst_map->pmap,
8941 TRUE);
8942 }
8943 if(kr != KERN_SUCCESS) {
8944 if(next_copy != NULL) {
8945 copy->cpy_hdr.nentries +=
8946 remaining_entries;
8947 copy->cpy_hdr.links.prev->vme_next =
8948 next_copy;
8949 copy->cpy_hdr.links.prev
8950 = previous_prev;
8951 copy->size = total_size;
8952 }
8953 return kr;
8954 }
8955 if (dst_end <= local_end) {
8956 return(KERN_SUCCESS);
8957 }
8958 /* otherwise copy no longer exists, it was */
8959 /* destroyed after successful copy_overwrite */
8960 copy = vm_map_copy_allocate();
8961 copy->type = VM_MAP_COPY_ENTRY_LIST;
8962 copy->offset = new_offset;
8963
8964 /*
8965 * XXX FBDP
8966 * this does not seem to deal with
8967 * the VM map store (R&B tree)
8968 */
8969
8970 total_size -= copy_size;
8971 copy_size = 0;
8972 /* put back remainder of copy in container */
8973 if(next_copy != NULL) {
8974 copy->cpy_hdr.nentries = remaining_entries;
8975 copy->cpy_hdr.links.next = next_copy;
8976 copy->cpy_hdr.links.prev = previous_prev;
8977 copy->size = total_size;
8978 next_copy->vme_prev =
8979 vm_map_copy_to_entry(copy);
8980 next_copy = NULL;
8981 }
8982 base_addr = local_end;
8983 vm_map_lock(dst_map);
8984 if(!vm_map_lookup_entry(dst_map,
8985 local_end, &tmp_entry)) {
8986 vm_map_unlock(dst_map);
8987 return(KERN_INVALID_ADDRESS);
8988 }
8989 entry = tmp_entry;
8990 continue;
8991 }
8992 if (dst_end <= entry->vme_end) {
8993 copy_size = dst_end - base_addr;
8994 break;
8995 }
8996
8997 if ((next == vm_map_to_entry(dst_map)) ||
8998 (next->vme_start != entry->vme_end)) {
8999 vm_map_unlock(dst_map);
9000 return(KERN_INVALID_ADDRESS);
9001 }
9002
9003 entry = next;
9004 }/* for */
9005
9006 next_copy = NULL;
9007 nentries = 1;
9008
9009 /* adjust the copy object */
9010 if (total_size > copy_size) {
9011 vm_map_size_t local_size = 0;
9012 vm_map_size_t entry_size;
9013
9014 new_offset = copy->offset;
9015 copy_entry = vm_map_copy_first_entry(copy);
9016 while(copy_entry != vm_map_copy_to_entry(copy)) {
9017 entry_size = copy_entry->vme_end -
9018 copy_entry->vme_start;
9019 if((local_size < copy_size) &&
9020 ((local_size + entry_size)
9021 >= copy_size)) {
9022 vm_map_copy_clip_end(copy, copy_entry,
9023 copy_entry->vme_start +
9024 (copy_size - local_size));
9025 entry_size = copy_entry->vme_end -
9026 copy_entry->vme_start;
9027 local_size += entry_size;
9028 new_offset += entry_size;
9029 }
9030 if(local_size >= copy_size) {
9031 next_copy = copy_entry->vme_next;
9032 copy_entry->vme_next =
9033 vm_map_copy_to_entry(copy);
9034 previous_prev =
9035 copy->cpy_hdr.links.prev;
9036 copy->cpy_hdr.links.prev = copy_entry;
9037 copy->size = copy_size;
9038 remaining_entries =
9039 copy->cpy_hdr.nentries;
9040 remaining_entries -= nentries;
9041 copy->cpy_hdr.nentries = nentries;
9042 break;
9043 } else {
9044 local_size += entry_size;
9045 new_offset += entry_size;
9046 nentries++;
9047 }
9048 copy_entry = copy_entry->vme_next;
9049 }
9050 }
9051
9052 if (aligned) {
9053 pmap_t local_pmap;
9054
9055 if(pmap)
9056 local_pmap = pmap;
9057 else
9058 local_pmap = dst_map->pmap;
9059
9060 if ((kr = vm_map_copy_overwrite_aligned(
9061 dst_map, tmp_entry, copy,
9062 base_addr, local_pmap)) != KERN_SUCCESS) {
9063 if(next_copy != NULL) {
9064 copy->cpy_hdr.nentries +=
9065 remaining_entries;
9066 copy->cpy_hdr.links.prev->vme_next =
9067 next_copy;
9068 copy->cpy_hdr.links.prev =
9069 previous_prev;
9070 copy->size += copy_size;
9071 }
9072 return kr;
9073 }
9074 vm_map_unlock(dst_map);
9075 } else {
9076 /*
9077 * Performance gain:
9078 *
9079 * if the copy and dst address are misaligned but the same
9080 * offset within the page we can copy_not_aligned the
9081 * misaligned parts and copy aligned the rest. If they are
9082 * aligned but len is unaligned we simply need to copy
9083 * the end bit unaligned. We'll need to split the misaligned
9084 * bits of the region in this case !
9085 */
9086 /* ALWAYS UNLOCKS THE dst_map MAP */
9087 kr = vm_map_copy_overwrite_unaligned(
9088 dst_map,
9089 tmp_entry,
9090 copy,
9091 base_addr,
9092 discard_on_success);
9093 if (kr != KERN_SUCCESS) {
9094 if(next_copy != NULL) {
9095 copy->cpy_hdr.nentries +=
9096 remaining_entries;
9097 copy->cpy_hdr.links.prev->vme_next =
9098 next_copy;
9099 copy->cpy_hdr.links.prev =
9100 previous_prev;
9101 copy->size += copy_size;
9102 }
9103 return kr;
9104 }
9105 }
9106 total_size -= copy_size;
9107 if(total_size == 0)
9108 break;
9109 base_addr += copy_size;
9110 copy_size = 0;
9111 copy->offset = new_offset;
9112 if(next_copy != NULL) {
9113 copy->cpy_hdr.nentries = remaining_entries;
9114 copy->cpy_hdr.links.next = next_copy;
9115 copy->cpy_hdr.links.prev = previous_prev;
9116 next_copy->vme_prev = vm_map_copy_to_entry(copy);
9117 copy->size = total_size;
9118 }
9119 vm_map_lock(dst_map);
9120 while(TRUE) {
9121 if (!vm_map_lookup_entry(dst_map,
9122 base_addr, &tmp_entry)) {
9123 vm_map_unlock(dst_map);
9124 return(KERN_INVALID_ADDRESS);
9125 }
9126 if (tmp_entry->in_transition) {
9127 entry->needs_wakeup = TRUE;
9128 vm_map_entry_wait(dst_map, THREAD_UNINT);
9129 } else {
9130 break;
9131 }
9132 }
9133 vm_map_clip_start(dst_map,
9134 tmp_entry,
9135 vm_map_trunc_page(base_addr,
9136 VM_MAP_PAGE_MASK(dst_map)));
9137
9138 entry = tmp_entry;
9139 } /* while */
9140
9141 /*
9142 * Throw away the vm_map_copy object
9143 */
9144 if (discard_on_success)
9145 vm_map_copy_discard(copy);
9146
9147 return(KERN_SUCCESS);
9148}/* vm_map_copy_overwrite */
9149
9150kern_return_t
9151vm_map_copy_overwrite(
9152 vm_map_t dst_map,
9153 vm_map_offset_t dst_addr,
9154 vm_map_copy_t copy,
9155 boolean_t interruptible)
9156{
9157 vm_map_size_t head_size, tail_size;
9158 vm_map_copy_t head_copy, tail_copy;
9159 vm_map_offset_t head_addr, tail_addr;
9160 vm_map_entry_t entry;
9161 kern_return_t kr;
9162 vm_map_offset_t effective_page_mask, effective_page_size;
9163
9164 head_size = 0;
9165 tail_size = 0;
9166 head_copy = NULL;
9167 tail_copy = NULL;
9168 head_addr = 0;
9169 tail_addr = 0;
9170
9171 if (interruptible ||
9172 copy == VM_MAP_COPY_NULL ||
9173 copy->type != VM_MAP_COPY_ENTRY_LIST) {
9174 /*
9175 * We can't split the "copy" map if we're interruptible
9176 * or if we don't have a "copy" map...
9177 */
9178 blunt_copy:
9179 return vm_map_copy_overwrite_nested(dst_map,
9180 dst_addr,
9181 copy,
9182 interruptible,
9183 (pmap_t) NULL,
9184 TRUE);
9185 }
9186
9187 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9188 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9189 effective_page_mask);
9190 effective_page_size = effective_page_mask + 1;
9191
9192 if (copy->size < 3 * effective_page_size) {
9193 /*
9194 * Too small to bother with optimizing...
9195 */
9196 goto blunt_copy;
9197 }
9198
9199 if ((dst_addr & effective_page_mask) !=
9200 (copy->offset & effective_page_mask)) {
9201 /*
9202 * Incompatible mis-alignment of source and destination...
9203 */
9204 goto blunt_copy;
9205 }
9206
9207 /*
9208 * Proper alignment or identical mis-alignment at the beginning.
9209 * Let's try and do a small unaligned copy first (if needed)
9210 * and then an aligned copy for the rest.
9211 */
9212 if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9213 head_addr = dst_addr;
9214 head_size = (effective_page_size -
9215 (copy->offset & effective_page_mask));
9216 head_size = MIN(head_size, copy->size);
9217 }
9218 if (!vm_map_page_aligned(copy->offset + copy->size,
9219 effective_page_mask)) {
9220 /*
9221 * Mis-alignment at the end.
9222 * Do an aligned copy up to the last page and
9223 * then an unaligned copy for the remaining bytes.
9224 */
9225 tail_size = ((copy->offset + copy->size) &
9226 effective_page_mask);
9227 tail_size = MIN(tail_size, copy->size);
9228 tail_addr = dst_addr + copy->size - tail_size;
9229 assert(tail_addr >= head_addr + head_size);
9230 }
9231 assert(head_size + tail_size <= copy->size);
9232
9233 if (head_size + tail_size == copy->size) {
9234 /*
9235 * It's all unaligned, no optimization possible...
9236 */
9237 goto blunt_copy;
9238 }
9239
9240 /*
9241 * Can't optimize if there are any submaps in the
9242 * destination due to the way we free the "copy" map
9243 * progressively in vm_map_copy_overwrite_nested()
9244 * in that case.
9245 */
9246 vm_map_lock_read(dst_map);
9247 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9248 vm_map_unlock_read(dst_map);
9249 goto blunt_copy;
9250 }
9251 for (;
9252 (entry != vm_map_copy_to_entry(copy) &&
9253 entry->vme_start < dst_addr + copy->size);
9254 entry = entry->vme_next) {
9255 if (entry->is_sub_map) {
9256 vm_map_unlock_read(dst_map);
9257 goto blunt_copy;
9258 }
9259 }
9260 vm_map_unlock_read(dst_map);
9261
9262 if (head_size) {
9263 /*
9264 * Unaligned copy of the first "head_size" bytes, to reach
9265 * a page boundary.
9266 */
9267
9268 /*
9269 * Extract "head_copy" out of "copy".
9270 */
9271 head_copy = vm_map_copy_allocate();
9272 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9273 head_copy->cpy_hdr.entries_pageable =
9274 copy->cpy_hdr.entries_pageable;
9275 vm_map_store_init(&head_copy->cpy_hdr);
9276
9277 entry = vm_map_copy_first_entry(copy);
9278 if (entry->vme_end < copy->offset + head_size) {
9279 head_size = entry->vme_end - copy->offset;
9280 }
9281
9282 head_copy->offset = copy->offset;
9283 head_copy->size = head_size;
9284 copy->offset += head_size;
9285 copy->size -= head_size;
9286
9287 vm_map_copy_clip_end(copy, entry, copy->offset);
9288 vm_map_copy_entry_unlink(copy, entry);
9289 vm_map_copy_entry_link(head_copy,
9290 vm_map_copy_to_entry(head_copy),
9291 entry);
9292
9293 /*
9294 * Do the unaligned copy.
9295 */
9296 kr = vm_map_copy_overwrite_nested(dst_map,
9297 head_addr,
9298 head_copy,
9299 interruptible,
9300 (pmap_t) NULL,
9301 FALSE);
9302 if (kr != KERN_SUCCESS)
9303 goto done;
9304 }
9305
9306 if (tail_size) {
9307 /*
9308 * Extract "tail_copy" out of "copy".
9309 */
9310 tail_copy = vm_map_copy_allocate();
9311 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9312 tail_copy->cpy_hdr.entries_pageable =
9313 copy->cpy_hdr.entries_pageable;
9314 vm_map_store_init(&tail_copy->cpy_hdr);
9315
9316 tail_copy->offset = copy->offset + copy->size - tail_size;
9317 tail_copy->size = tail_size;
9318
9319 copy->size -= tail_size;
9320
9321 entry = vm_map_copy_last_entry(copy);
9322 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9323 entry = vm_map_copy_last_entry(copy);
9324 vm_map_copy_entry_unlink(copy, entry);
9325 vm_map_copy_entry_link(tail_copy,
9326 vm_map_copy_last_entry(tail_copy),
9327 entry);
9328 }
9329
9330 /*
9331 * Copy most (or possibly all) of the data.
9332 */
9333 kr = vm_map_copy_overwrite_nested(dst_map,
9334 dst_addr + head_size,
9335 copy,
9336 interruptible,
9337 (pmap_t) NULL,
9338 FALSE);
9339 if (kr != KERN_SUCCESS) {
9340 goto done;
9341 }
9342
9343 if (tail_size) {
9344 kr = vm_map_copy_overwrite_nested(dst_map,
9345 tail_addr,
9346 tail_copy,
9347 interruptible,
9348 (pmap_t) NULL,
9349 FALSE);
9350 }
9351
9352done:
9353 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9354 if (kr == KERN_SUCCESS) {
9355 /*
9356 * Discard all the copy maps.
9357 */
9358 if (head_copy) {
9359 vm_map_copy_discard(head_copy);
9360 head_copy = NULL;
9361 }
9362 vm_map_copy_discard(copy);
9363 if (tail_copy) {
9364 vm_map_copy_discard(tail_copy);
9365 tail_copy = NULL;
9366 }
9367 } else {
9368 /*
9369 * Re-assemble the original copy map.
9370 */
9371 if (head_copy) {
9372 entry = vm_map_copy_first_entry(head_copy);
9373 vm_map_copy_entry_unlink(head_copy, entry);
9374 vm_map_copy_entry_link(copy,
9375 vm_map_copy_to_entry(copy),
9376 entry);
9377 copy->offset -= head_size;
9378 copy->size += head_size;
9379 vm_map_copy_discard(head_copy);
9380 head_copy = NULL;
9381 }
9382 if (tail_copy) {
9383 entry = vm_map_copy_last_entry(tail_copy);
9384 vm_map_copy_entry_unlink(tail_copy, entry);
9385 vm_map_copy_entry_link(copy,
9386 vm_map_copy_last_entry(copy),
9387 entry);
9388 copy->size += tail_size;
9389 vm_map_copy_discard(tail_copy);
9390 tail_copy = NULL;
9391 }
9392 }
9393 return kr;
9394}
9395
9396
9397/*
9398 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
9399 *
9400 * Decription:
9401 * Physically copy unaligned data
9402 *
9403 * Implementation:
9404 * Unaligned parts of pages have to be physically copied. We use
9405 * a modified form of vm_fault_copy (which understands none-aligned
9406 * page offsets and sizes) to do the copy. We attempt to copy as
9407 * much memory in one go as possibly, however vm_fault_copy copies
9408 * within 1 memory object so we have to find the smaller of "amount left"
9409 * "source object data size" and "target object data size". With
9410 * unaligned data we don't need to split regions, therefore the source
9411 * (copy) object should be one map entry, the target range may be split
9412 * over multiple map entries however. In any event we are pessimistic
9413 * about these assumptions.
9414 *
9415 * Assumptions:
9416 * dst_map is locked on entry and is return locked on success,
9417 * unlocked on error.
9418 */
9419
9420static kern_return_t
9421vm_map_copy_overwrite_unaligned(
9422 vm_map_t dst_map,
9423 vm_map_entry_t entry,
9424 vm_map_copy_t copy,
9425 vm_map_offset_t start,
9426 boolean_t discard_on_success)
9427{
9428 vm_map_entry_t copy_entry;
9429 vm_map_entry_t copy_entry_next;
9430 vm_map_version_t version;
9431 vm_object_t dst_object;
9432 vm_object_offset_t dst_offset;
9433 vm_object_offset_t src_offset;
9434 vm_object_offset_t entry_offset;
9435 vm_map_offset_t entry_end;
9436 vm_map_size_t src_size,
9437 dst_size,
9438 copy_size,
9439 amount_left;
9440 kern_return_t kr = KERN_SUCCESS;
9441
9442
9443 copy_entry = vm_map_copy_first_entry(copy);
9444
9445 vm_map_lock_write_to_read(dst_map);
9446
9447 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
9448 amount_left = copy->size;
9449/*
9450 * unaligned so we never clipped this entry, we need the offset into
9451 * the vm_object not just the data.
9452 */
9453 while (amount_left > 0) {
9454
9455 if (entry == vm_map_to_entry(dst_map)) {
9456 vm_map_unlock_read(dst_map);
9457 return KERN_INVALID_ADDRESS;
9458 }
9459
9460 /* "start" must be within the current map entry */
9461 assert ((start>=entry->vme_start) && (start<entry->vme_end));
9462
9463 dst_offset = start - entry->vme_start;
9464
9465 dst_size = entry->vme_end - start;
9466
9467 src_size = copy_entry->vme_end -
9468 (copy_entry->vme_start + src_offset);
9469
9470 if (dst_size < src_size) {
9471/*
9472 * we can only copy dst_size bytes before
9473 * we have to get the next destination entry
9474 */
9475 copy_size = dst_size;
9476 } else {
9477/*
9478 * we can only copy src_size bytes before
9479 * we have to get the next source copy entry
9480 */
9481 copy_size = src_size;
9482 }
9483
9484 if (copy_size > amount_left) {
9485 copy_size = amount_left;
9486 }
9487/*
9488 * Entry needs copy, create a shadow shadow object for
9489 * Copy on write region.
9490 */
9491 if (entry->needs_copy &&
9492 ((entry->protection & VM_PROT_WRITE) != 0))
9493 {
9494 if (vm_map_lock_read_to_write(dst_map)) {
9495 vm_map_lock_read(dst_map);
9496 goto RetryLookup;
9497 }
9498 VME_OBJECT_SHADOW(entry,
9499 (vm_map_size_t)(entry->vme_end
9500 - entry->vme_start));
9501 entry->needs_copy = FALSE;
9502 vm_map_lock_write_to_read(dst_map);
9503 }
9504 dst_object = VME_OBJECT(entry);
9505/*
9506 * unlike with the virtual (aligned) copy we're going
9507 * to fault on it therefore we need a target object.
9508 */
9509 if (dst_object == VM_OBJECT_NULL) {
9510 if (vm_map_lock_read_to_write(dst_map)) {
9511 vm_map_lock_read(dst_map);
9512 goto RetryLookup;
9513 }
9514 dst_object = vm_object_allocate((vm_map_size_t)
9515 entry->vme_end - entry->vme_start);
9516 VME_OBJECT(entry) = dst_object;
9517 VME_OFFSET_SET(entry, 0);
9518 assert(entry->use_pmap);
9519 vm_map_lock_write_to_read(dst_map);
9520 }
9521/*
9522 * Take an object reference and unlock map. The "entry" may
9523 * disappear or change when the map is unlocked.
9524 */
9525 vm_object_reference(dst_object);
9526 version.main_timestamp = dst_map->timestamp;
9527 entry_offset = VME_OFFSET(entry);
9528 entry_end = entry->vme_end;
9529 vm_map_unlock_read(dst_map);
9530/*
9531 * Copy as much as possible in one pass
9532 */
9533 kr = vm_fault_copy(
9534 VME_OBJECT(copy_entry),
9535 VME_OFFSET(copy_entry) + src_offset,
9536 &copy_size,
9537 dst_object,
9538 entry_offset + dst_offset,
9539 dst_map,
9540 &version,
9541 THREAD_UNINT );
9542
9543 start += copy_size;
9544 src_offset += copy_size;
9545 amount_left -= copy_size;
9546/*
9547 * Release the object reference
9548 */
9549 vm_object_deallocate(dst_object);
9550/*
9551 * If a hard error occurred, return it now
9552 */
9553 if (kr != KERN_SUCCESS)
9554 return kr;
9555
9556 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
9557 || amount_left == 0)
9558 {
9559/*
9560 * all done with this copy entry, dispose.
9561 */
9562 copy_entry_next = copy_entry->vme_next;
9563
9564 if (discard_on_success) {
9565 vm_map_copy_entry_unlink(copy, copy_entry);
9566 assert(!copy_entry->is_sub_map);
9567 vm_object_deallocate(VME_OBJECT(copy_entry));
9568 vm_map_copy_entry_dispose(copy, copy_entry);
9569 }
9570
9571 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
9572 amount_left) {
9573/*
9574 * not finished copying but run out of source
9575 */
9576 return KERN_INVALID_ADDRESS;
9577 }
9578
9579 copy_entry = copy_entry_next;
9580
9581 src_offset = 0;
9582 }
9583
9584 if (amount_left == 0)
9585 return KERN_SUCCESS;
9586
9587 vm_map_lock_read(dst_map);
9588 if (version.main_timestamp == dst_map->timestamp) {
9589 if (start == entry_end) {
9590/*
9591 * destination region is split. Use the version
9592 * information to avoid a lookup in the normal
9593 * case.
9594 */
9595 entry = entry->vme_next;
9596/*
9597 * should be contiguous. Fail if we encounter
9598 * a hole in the destination.
9599 */
9600 if (start != entry->vme_start) {
9601 vm_map_unlock_read(dst_map);
9602 return KERN_INVALID_ADDRESS ;
9603 }
9604 }
9605 } else {
9606/*
9607 * Map version check failed.
9608 * we must lookup the entry because somebody
9609 * might have changed the map behind our backs.
9610 */
9611 RetryLookup:
9612 if (!vm_map_lookup_entry(dst_map, start, &entry))
9613 {
9614 vm_map_unlock_read(dst_map);
9615 return KERN_INVALID_ADDRESS ;
9616 }
9617 }
9618 }/* while */
9619
9620 return KERN_SUCCESS;
9621}/* vm_map_copy_overwrite_unaligned */
9622
9623/*
9624 * Routine: vm_map_copy_overwrite_aligned [internal use only]
9625 *
9626 * Description:
9627 * Does all the vm_trickery possible for whole pages.
9628 *
9629 * Implementation:
9630 *
9631 * If there are no permanent objects in the destination,
9632 * and the source and destination map entry zones match,
9633 * and the destination map entry is not shared,
9634 * then the map entries can be deleted and replaced
9635 * with those from the copy. The following code is the
9636 * basic idea of what to do, but there are lots of annoying
9637 * little details about getting protection and inheritance
9638 * right. Should add protection, inheritance, and sharing checks
9639 * to the above pass and make sure that no wiring is involved.
9640 */
9641
9642int vm_map_copy_overwrite_aligned_src_not_internal = 0;
9643int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
9644int vm_map_copy_overwrite_aligned_src_large = 0;
9645
9646static kern_return_t
9647vm_map_copy_overwrite_aligned(
9648 vm_map_t dst_map,
9649 vm_map_entry_t tmp_entry,
9650 vm_map_copy_t copy,
9651 vm_map_offset_t start,
9652 __unused pmap_t pmap)
9653{
9654 vm_object_t object;
9655 vm_map_entry_t copy_entry;
9656 vm_map_size_t copy_size;
9657 vm_map_size_t size;
9658 vm_map_entry_t entry;
9659
9660 while ((copy_entry = vm_map_copy_first_entry(copy))
9661 != vm_map_copy_to_entry(copy))
9662 {
9663 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
9664
9665 entry = tmp_entry;
9666 if (entry->is_sub_map) {
9667 /* unnested when clipped earlier */
9668 assert(!entry->use_pmap);
9669 }
9670 if (entry == vm_map_to_entry(dst_map)) {
9671 vm_map_unlock(dst_map);
9672 return KERN_INVALID_ADDRESS;
9673 }
9674 size = (entry->vme_end - entry->vme_start);
9675 /*
9676 * Make sure that no holes popped up in the
9677 * address map, and that the protection is
9678 * still valid, in case the map was unlocked
9679 * earlier.
9680 */
9681
9682 if ((entry->vme_start != start) || ((entry->is_sub_map)
9683 && !entry->needs_copy)) {
9684 vm_map_unlock(dst_map);
9685 return(KERN_INVALID_ADDRESS);
9686 }
9687 assert(entry != vm_map_to_entry(dst_map));
9688
9689 /*
9690 * Check protection again
9691 */
9692
9693 if ( ! (entry->protection & VM_PROT_WRITE)) {
9694 vm_map_unlock(dst_map);
9695 return(KERN_PROTECTION_FAILURE);
9696 }
9697
9698 /*
9699 * Adjust to source size first
9700 */
9701
9702 if (copy_size < size) {
9703 if (entry->map_aligned &&
9704 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
9705 VM_MAP_PAGE_MASK(dst_map))) {
9706 /* no longer map-aligned */
9707 entry->map_aligned = FALSE;
9708 }
9709 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
9710 size = copy_size;
9711 }
9712
9713 /*
9714 * Adjust to destination size
9715 */
9716
9717 if (size < copy_size) {
9718 vm_map_copy_clip_end(copy, copy_entry,
9719 copy_entry->vme_start + size);
9720 copy_size = size;
9721 }
9722
9723 assert((entry->vme_end - entry->vme_start) == size);
9724 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
9725 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
9726
9727 /*
9728 * If the destination contains temporary unshared memory,
9729 * we can perform the copy by throwing it away and
9730 * installing the source data.
9731 */
9732
9733 object = VME_OBJECT(entry);
9734 if ((!entry->is_shared &&
9735 ((object == VM_OBJECT_NULL) ||
9736 (object->internal && !object->true_share))) ||
9737 entry->needs_copy) {
9738 vm_object_t old_object = VME_OBJECT(entry);
9739 vm_object_offset_t old_offset = VME_OFFSET(entry);
9740 vm_object_offset_t offset;
9741
9742 /*
9743 * Ensure that the source and destination aren't
9744 * identical
9745 */
9746 if (old_object == VME_OBJECT(copy_entry) &&
9747 old_offset == VME_OFFSET(copy_entry)) {
9748 vm_map_copy_entry_unlink(copy, copy_entry);
9749 vm_map_copy_entry_dispose(copy, copy_entry);
9750
9751 if (old_object != VM_OBJECT_NULL)
9752 vm_object_deallocate(old_object);
9753
9754 start = tmp_entry->vme_end;
9755 tmp_entry = tmp_entry->vme_next;
9756 continue;
9757 }
9758
9759#if !CONFIG_EMBEDDED
9760#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
9761#define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
9762 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
9763 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
9764 copy_size <= __TRADEOFF1_COPY_SIZE) {
9765 /*
9766 * Virtual vs. Physical copy tradeoff #1.
9767 *
9768 * Copying only a few pages out of a large
9769 * object: do a physical copy instead of
9770 * a virtual copy, to avoid possibly keeping
9771 * the entire large object alive because of
9772 * those few copy-on-write pages.
9773 */
9774 vm_map_copy_overwrite_aligned_src_large++;
9775 goto slow_copy;
9776 }
9777#endif /* !CONFIG_EMBEDDED */
9778
9779 if ((dst_map->pmap != kernel_pmap) &&
9780 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
9781 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) {
9782 vm_object_t new_object, new_shadow;
9783
9784 /*
9785 * We're about to map something over a mapping
9786 * established by malloc()...
9787 */
9788 new_object = VME_OBJECT(copy_entry);
9789 if (new_object != VM_OBJECT_NULL) {
9790 vm_object_lock_shared(new_object);
9791 }
9792 while (new_object != VM_OBJECT_NULL &&
9793#if !CONFIG_EMBEDDED
9794 !new_object->true_share &&
9795 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
9796#endif /* !CONFIG_EMBEDDED */
9797 new_object->internal) {
9798 new_shadow = new_object->shadow;
9799 if (new_shadow == VM_OBJECT_NULL) {
9800 break;
9801 }
9802 vm_object_lock_shared(new_shadow);
9803 vm_object_unlock(new_object);
9804 new_object = new_shadow;
9805 }
9806 if (new_object != VM_OBJECT_NULL) {
9807 if (!new_object->internal) {
9808 /*
9809 * The new mapping is backed
9810 * by an external object. We
9811 * don't want malloc'ed memory
9812 * to be replaced with such a
9813 * non-anonymous mapping, so
9814 * let's go off the optimized
9815 * path...
9816 */
9817 vm_map_copy_overwrite_aligned_src_not_internal++;
9818 vm_object_unlock(new_object);
9819 goto slow_copy;
9820 }
9821#if !CONFIG_EMBEDDED
9822 if (new_object->true_share ||
9823 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
9824 /*
9825 * Same if there's a "true_share"
9826 * object in the shadow chain, or
9827 * an object with a non-default
9828 * (SYMMETRIC) copy strategy.
9829 */
9830 vm_map_copy_overwrite_aligned_src_not_symmetric++;
9831 vm_object_unlock(new_object);
9832 goto slow_copy;
9833 }
9834#endif /* !CONFIG_EMBEDDED */
9835 vm_object_unlock(new_object);
9836 }
9837 /*
9838 * The new mapping is still backed by
9839 * anonymous (internal) memory, so it's
9840 * OK to substitute it for the original
9841 * malloc() mapping.
9842 */
9843 }
9844
9845 if (old_object != VM_OBJECT_NULL) {
9846 if(entry->is_sub_map) {
9847 if(entry->use_pmap) {
9848#ifndef NO_NESTED_PMAP
9849 pmap_unnest(dst_map->pmap,
9850 (addr64_t)entry->vme_start,
9851 entry->vme_end - entry->vme_start);
9852#endif /* NO_NESTED_PMAP */
9853 if(dst_map->mapped_in_other_pmaps) {
9854 /* clean up parent */
9855 /* map/maps */
9856 vm_map_submap_pmap_clean(
9857 dst_map, entry->vme_start,
9858 entry->vme_end,
9859 VME_SUBMAP(entry),
9860 VME_OFFSET(entry));
9861 }
9862 } else {
9863 vm_map_submap_pmap_clean(
9864 dst_map, entry->vme_start,
9865 entry->vme_end,
9866 VME_SUBMAP(entry),
9867 VME_OFFSET(entry));
9868 }
9869 vm_map_deallocate(VME_SUBMAP(entry));
9870 } else {
9871 if(dst_map->mapped_in_other_pmaps) {
9872 vm_object_pmap_protect_options(
9873 VME_OBJECT(entry),
9874 VME_OFFSET(entry),
9875 entry->vme_end
9876 - entry->vme_start,
9877 PMAP_NULL,
9878 entry->vme_start,
9879 VM_PROT_NONE,
9880 PMAP_OPTIONS_REMOVE);
9881 } else {
9882 pmap_remove_options(
9883 dst_map->pmap,
9884 (addr64_t)(entry->vme_start),
9885 (addr64_t)(entry->vme_end),
9886 PMAP_OPTIONS_REMOVE);
9887 }
9888 vm_object_deallocate(old_object);
9889 }
9890 }
9891
9892 if (entry->iokit_acct) {
9893 /* keep using iokit accounting */
9894 entry->use_pmap = FALSE;
9895 } else {
9896 /* use pmap accounting */
9897 entry->use_pmap = TRUE;
9898 }
9899 entry->is_sub_map = FALSE;
9900 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
9901 object = VME_OBJECT(entry);
9902 entry->needs_copy = copy_entry->needs_copy;
9903 entry->wired_count = 0;
9904 entry->user_wired_count = 0;
9905 offset = VME_OFFSET(copy_entry);
9906 VME_OFFSET_SET(entry, offset);
9907
9908 vm_map_copy_entry_unlink(copy, copy_entry);
9909 vm_map_copy_entry_dispose(copy, copy_entry);
9910
9911 /*
9912 * we could try to push pages into the pmap at this point, BUT
9913 * this optimization only saved on average 2 us per page if ALL
9914 * the pages in the source were currently mapped
9915 * and ALL the pages in the dest were touched, if there were fewer
9916 * than 2/3 of the pages touched, this optimization actually cost more cycles
9917 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
9918 */
9919
9920 /*
9921 * Set up for the next iteration. The map
9922 * has not been unlocked, so the next
9923 * address should be at the end of this
9924 * entry, and the next map entry should be
9925 * the one following it.
9926 */
9927
9928 start = tmp_entry->vme_end;
9929 tmp_entry = tmp_entry->vme_next;
9930 } else {
9931 vm_map_version_t version;
9932 vm_object_t dst_object;
9933 vm_object_offset_t dst_offset;
9934 kern_return_t r;
9935
9936 slow_copy:
9937 if (entry->needs_copy) {
9938 VME_OBJECT_SHADOW(entry,
9939 (entry->vme_end -
9940 entry->vme_start));
9941 entry->needs_copy = FALSE;
9942 }
9943
9944 dst_object = VME_OBJECT(entry);
9945 dst_offset = VME_OFFSET(entry);
9946
9947 /*
9948 * Take an object reference, and record
9949 * the map version information so that the
9950 * map can be safely unlocked.
9951 */
9952
9953 if (dst_object == VM_OBJECT_NULL) {
9954 /*
9955 * We would usually have just taken the
9956 * optimized path above if the destination
9957 * object has not been allocated yet. But we
9958 * now disable that optimization if the copy
9959 * entry's object is not backed by anonymous
9960 * memory to avoid replacing malloc'ed
9961 * (i.e. re-usable) anonymous memory with a
9962 * not-so-anonymous mapping.
9963 * So we have to handle this case here and
9964 * allocate a new VM object for this map entry.
9965 */
9966 dst_object = vm_object_allocate(
9967 entry->vme_end - entry->vme_start);
9968 dst_offset = 0;
9969 VME_OBJECT_SET(entry, dst_object);
9970 VME_OFFSET_SET(entry, dst_offset);
9971 assert(entry->use_pmap);
9972
9973 }
9974
9975 vm_object_reference(dst_object);
9976
9977 /* account for unlock bumping up timestamp */
9978 version.main_timestamp = dst_map->timestamp + 1;
9979
9980 vm_map_unlock(dst_map);
9981
9982 /*
9983 * Copy as much as possible in one pass
9984 */
9985
9986 copy_size = size;
9987 r = vm_fault_copy(
9988 VME_OBJECT(copy_entry),
9989 VME_OFFSET(copy_entry),
9990 &copy_size,
9991 dst_object,
9992 dst_offset,
9993 dst_map,
9994 &version,
9995 THREAD_UNINT );
9996
9997 /*
9998 * Release the object reference
9999 */
10000
10001 vm_object_deallocate(dst_object);
10002
10003 /*
10004 * If a hard error occurred, return it now
10005 */
10006
10007 if (r != KERN_SUCCESS)
10008 return(r);
10009
10010 if (copy_size != 0) {
10011 /*
10012 * Dispose of the copied region
10013 */
10014
10015 vm_map_copy_clip_end(copy, copy_entry,
10016 copy_entry->vme_start + copy_size);
10017 vm_map_copy_entry_unlink(copy, copy_entry);
10018 vm_object_deallocate(VME_OBJECT(copy_entry));
10019 vm_map_copy_entry_dispose(copy, copy_entry);
10020 }
10021
10022 /*
10023 * Pick up in the destination map where we left off.
10024 *
10025 * Use the version information to avoid a lookup
10026 * in the normal case.
10027 */
10028
10029 start += copy_size;
10030 vm_map_lock(dst_map);
10031 if (version.main_timestamp == dst_map->timestamp &&
10032 copy_size != 0) {
10033 /* We can safely use saved tmp_entry value */
10034
10035 if (tmp_entry->map_aligned &&
10036 !VM_MAP_PAGE_ALIGNED(
10037 start,
10038 VM_MAP_PAGE_MASK(dst_map))) {
10039 /* no longer map-aligned */
10040 tmp_entry->map_aligned = FALSE;
10041 }
10042 vm_map_clip_end(dst_map, tmp_entry, start);
10043 tmp_entry = tmp_entry->vme_next;
10044 } else {
10045 /* Must do lookup of tmp_entry */
10046
10047 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10048 vm_map_unlock(dst_map);
10049 return(KERN_INVALID_ADDRESS);
10050 }
10051 if (tmp_entry->map_aligned &&
10052 !VM_MAP_PAGE_ALIGNED(
10053 start,
10054 VM_MAP_PAGE_MASK(dst_map))) {
10055 /* no longer map-aligned */
10056 tmp_entry->map_aligned = FALSE;
10057 }
10058 vm_map_clip_start(dst_map, tmp_entry, start);
10059 }
10060 }
10061 }/* while */
10062
10063 return(KERN_SUCCESS);
10064}/* vm_map_copy_overwrite_aligned */
10065
10066/*
10067 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10068 *
10069 * Description:
10070 * Copy in data to a kernel buffer from space in the
10071 * source map. The original space may be optionally
10072 * deallocated.
10073 *
10074 * If successful, returns a new copy object.
10075 */
10076static kern_return_t
10077vm_map_copyin_kernel_buffer(
10078 vm_map_t src_map,
10079 vm_map_offset_t src_addr,
10080 vm_map_size_t len,
10081 boolean_t src_destroy,
10082 vm_map_copy_t *copy_result)
10083{
10084 kern_return_t kr;
10085 vm_map_copy_t copy;
10086 vm_size_t kalloc_size;
10087
10088 if (len > msg_ool_size_small)
10089 return KERN_INVALID_ARGUMENT;
10090
10091 kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
10092
10093 copy = (vm_map_copy_t)kalloc(kalloc_size);
10094 if (copy == VM_MAP_COPY_NULL)
10095 return KERN_RESOURCE_SHORTAGE;
10096 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10097 copy->size = len;
10098 copy->offset = 0;
10099
10100 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10101 if (kr != KERN_SUCCESS) {
10102 kfree(copy, kalloc_size);
10103 return kr;
10104 }
10105 if (src_destroy) {
10106 (void) vm_map_remove(
10107 src_map,
10108 vm_map_trunc_page(src_addr,
10109 VM_MAP_PAGE_MASK(src_map)),
10110 vm_map_round_page(src_addr + len,
10111 VM_MAP_PAGE_MASK(src_map)),
10112 (VM_MAP_REMOVE_INTERRUPTIBLE |
10113 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10114 ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
10115 }
10116 *copy_result = copy;
10117 return KERN_SUCCESS;
10118}
10119
10120/*
10121 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10122 *
10123 * Description:
10124 * Copy out data from a kernel buffer into space in the
10125 * destination map. The space may be otpionally dynamically
10126 * allocated.
10127 *
10128 * If successful, consumes the copy object.
10129 * Otherwise, the caller is responsible for it.
10130 */
10131static int vm_map_copyout_kernel_buffer_failures = 0;
10132static kern_return_t
10133vm_map_copyout_kernel_buffer(
10134 vm_map_t map,
10135 vm_map_address_t *addr, /* IN/OUT */
10136 vm_map_copy_t copy,
10137 vm_map_size_t copy_size,
10138 boolean_t overwrite,
10139 boolean_t consume_on_success)
10140{
10141 kern_return_t kr = KERN_SUCCESS;
10142 thread_t thread = current_thread();
10143
10144 assert(copy->size == copy_size);
10145
10146 /*
10147 * check for corrupted vm_map_copy structure
10148 */
10149 if (copy_size > msg_ool_size_small || copy->offset)
10150 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10151 (long long)copy->size, (long long)copy->offset);
10152
10153 if (!overwrite) {
10154
10155 /*
10156 * Allocate space in the target map for the data
10157 */
10158 *addr = 0;
10159 kr = vm_map_enter(map,
10160 addr,
10161 vm_map_round_page(copy_size,
10162 VM_MAP_PAGE_MASK(map)),
10163 (vm_map_offset_t) 0,
10164 VM_FLAGS_ANYWHERE,
10165 VM_MAP_KERNEL_FLAGS_NONE,
10166 VM_KERN_MEMORY_NONE,
10167 VM_OBJECT_NULL,
10168 (vm_object_offset_t) 0,
10169 FALSE,
10170 VM_PROT_DEFAULT,
10171 VM_PROT_ALL,
10172 VM_INHERIT_DEFAULT);
10173 if (kr != KERN_SUCCESS)
10174 return kr;
10175#if KASAN
10176 if (map->pmap == kernel_pmap) {
10177 kasan_notify_address(*addr, copy->size);
10178 }
10179#endif
10180 }
10181
10182 /*
10183 * Copyout the data from the kernel buffer to the target map.
10184 */
10185 if (thread->map == map) {
10186
10187 /*
10188 * If the target map is the current map, just do
10189 * the copy.
10190 */
10191 assert((vm_size_t)copy_size == copy_size);
10192 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10193 kr = KERN_INVALID_ADDRESS;
10194 }
10195 }
10196 else {
10197 vm_map_t oldmap;
10198
10199 /*
10200 * If the target map is another map, assume the
10201 * target's address space identity for the duration
10202 * of the copy.
10203 */
10204 vm_map_reference(map);
10205 oldmap = vm_map_switch(map);
10206
10207 assert((vm_size_t)copy_size == copy_size);
10208 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10209 vm_map_copyout_kernel_buffer_failures++;
10210 kr = KERN_INVALID_ADDRESS;
10211 }
10212
10213 (void) vm_map_switch(oldmap);
10214 vm_map_deallocate(map);
10215 }
10216
10217 if (kr != KERN_SUCCESS) {
10218 /* the copy failed, clean up */
10219 if (!overwrite) {
10220 /*
10221 * Deallocate the space we allocated in the target map.
10222 */
10223 (void) vm_map_remove(
10224 map,
10225 vm_map_trunc_page(*addr,
10226 VM_MAP_PAGE_MASK(map)),
10227 vm_map_round_page((*addr +
10228 vm_map_round_page(copy_size,
10229 VM_MAP_PAGE_MASK(map))),
10230 VM_MAP_PAGE_MASK(map)),
10231 VM_MAP_REMOVE_NO_FLAGS);
10232 *addr = 0;
10233 }
10234 } else {
10235 /* copy was successful, dicard the copy structure */
10236 if (consume_on_success) {
10237 kfree(copy, copy_size + cpy_kdata_hdr_sz);
10238 }
10239 }
10240
10241 return kr;
10242}
10243
10244/*
10245 * Routine: vm_map_copy_insert [internal use only]
10246 *
10247 * Description:
10248 * Link a copy chain ("copy") into a map at the
10249 * specified location (after "where").
10250 * Side effects:
10251 * The copy chain is destroyed.
10252 */
10253static void
10254vm_map_copy_insert(
10255 vm_map_t map,
10256 vm_map_entry_t after_where,
10257 vm_map_copy_t copy)
10258{
10259 vm_map_entry_t entry;
10260
10261 while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10262 entry = vm_map_copy_first_entry(copy);
10263 vm_map_copy_entry_unlink(copy, entry);
10264 vm_map_store_entry_link(map, after_where, entry,
10265 VM_MAP_KERNEL_FLAGS_NONE);
10266 after_where = entry;
10267 }
10268 zfree(vm_map_copy_zone, copy);
10269}
10270
10271void
10272vm_map_copy_remap(
10273 vm_map_t map,
10274 vm_map_entry_t where,
10275 vm_map_copy_t copy,
10276 vm_map_offset_t adjustment,
10277 vm_prot_t cur_prot,
10278 vm_prot_t max_prot,
10279 vm_inherit_t inheritance)
10280{
10281 vm_map_entry_t copy_entry, new_entry;
10282
10283 for (copy_entry = vm_map_copy_first_entry(copy);
10284 copy_entry != vm_map_copy_to_entry(copy);
10285 copy_entry = copy_entry->vme_next) {
10286 /* get a new VM map entry for the map */
10287 new_entry = vm_map_entry_create(map,
10288 !map->hdr.entries_pageable);
10289 /* copy the "copy entry" to the new entry */
10290 vm_map_entry_copy(new_entry, copy_entry);
10291 /* adjust "start" and "end" */
10292 new_entry->vme_start += adjustment;
10293 new_entry->vme_end += adjustment;
10294 /* clear some attributes */
10295 new_entry->inheritance = inheritance;
10296 new_entry->protection = cur_prot;
10297 new_entry->max_protection = max_prot;
10298 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10299 /* take an extra reference on the entry's "object" */
10300 if (new_entry->is_sub_map) {
10301 assert(!new_entry->use_pmap); /* not nested */
10302 vm_map_lock(VME_SUBMAP(new_entry));
10303 vm_map_reference(VME_SUBMAP(new_entry));
10304 vm_map_unlock(VME_SUBMAP(new_entry));
10305 } else {
10306 vm_object_reference(VME_OBJECT(new_entry));
10307 }
10308 /* insert the new entry in the map */
10309 vm_map_store_entry_link(map, where, new_entry,
10310 VM_MAP_KERNEL_FLAGS_NONE);
10311 /* continue inserting the "copy entries" after the new entry */
10312 where = new_entry;
10313 }
10314}
10315
10316
10317/*
10318 * Returns true if *size matches (or is in the range of) copy->size.
10319 * Upon returning true, the *size field is updated with the actual size of the
10320 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10321 */
10322boolean_t
10323vm_map_copy_validate_size(
10324 vm_map_t dst_map,
10325 vm_map_copy_t copy,
10326 vm_map_size_t *size)
10327{
10328 if (copy == VM_MAP_COPY_NULL)
10329 return FALSE;
10330 vm_map_size_t copy_sz = copy->size;
10331 vm_map_size_t sz = *size;
10332 switch (copy->type) {
10333 case VM_MAP_COPY_OBJECT:
10334 case VM_MAP_COPY_KERNEL_BUFFER:
10335 if (sz == copy_sz)
10336 return TRUE;
10337 break;
10338 case VM_MAP_COPY_ENTRY_LIST:
10339 /*
10340 * potential page-size rounding prevents us from exactly
10341 * validating this flavor of vm_map_copy, but we can at least
10342 * assert that it's within a range.
10343 */
10344 if (copy_sz >= sz &&
10345 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10346 *size = copy_sz;
10347 return TRUE;
10348 }
10349 break;
10350 default:
10351 break;
10352 }
10353 return FALSE;
10354}
10355
10356/*
10357 * Routine: vm_map_copyout_size
10358 *
10359 * Description:
10360 * Copy out a copy chain ("copy") into newly-allocated
10361 * space in the destination map. Uses a prevalidated
10362 * size for the copy object (vm_map_copy_validate_size).
10363 *
10364 * If successful, consumes the copy object.
10365 * Otherwise, the caller is responsible for it.
10366 */
10367kern_return_t
10368vm_map_copyout_size(
10369 vm_map_t dst_map,
10370 vm_map_address_t *dst_addr, /* OUT */
10371 vm_map_copy_t copy,
10372 vm_map_size_t copy_size)
10373{
10374 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
10375 TRUE, /* consume_on_success */
10376 VM_PROT_DEFAULT,
10377 VM_PROT_ALL,
10378 VM_INHERIT_DEFAULT);
10379}
10380
10381/*
10382 * Routine: vm_map_copyout
10383 *
10384 * Description:
10385 * Copy out a copy chain ("copy") into newly-allocated
10386 * space in the destination map.
10387 *
10388 * If successful, consumes the copy object.
10389 * Otherwise, the caller is responsible for it.
10390 */
10391kern_return_t
10392vm_map_copyout(
10393 vm_map_t dst_map,
10394 vm_map_address_t *dst_addr, /* OUT */
10395 vm_map_copy_t copy)
10396{
10397 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
10398 TRUE, /* consume_on_success */
10399 VM_PROT_DEFAULT,
10400 VM_PROT_ALL,
10401 VM_INHERIT_DEFAULT);
10402}
10403
10404kern_return_t
10405vm_map_copyout_internal(
10406 vm_map_t dst_map,
10407 vm_map_address_t *dst_addr, /* OUT */
10408 vm_map_copy_t copy,
10409 vm_map_size_t copy_size,
10410 boolean_t consume_on_success,
10411 vm_prot_t cur_protection,
10412 vm_prot_t max_protection,
10413 vm_inherit_t inheritance)
10414{
10415 vm_map_size_t size;
10416 vm_map_size_t adjustment;
10417 vm_map_offset_t start;
10418 vm_object_offset_t vm_copy_start;
10419 vm_map_entry_t last;
10420 vm_map_entry_t entry;
10421 vm_map_entry_t hole_entry;
10422
10423 /*
10424 * Check for null copy object.
10425 */
10426
10427 if (copy == VM_MAP_COPY_NULL) {
10428 *dst_addr = 0;
10429 return(KERN_SUCCESS);
10430 }
10431
10432 if (copy->size != copy_size) {
10433 *dst_addr = 0;
10434 return KERN_FAILURE;
10435 }
10436
10437 /*
10438 * Check for special copy object, created
10439 * by vm_map_copyin_object.
10440 */
10441
10442 if (copy->type == VM_MAP_COPY_OBJECT) {
10443 vm_object_t object = copy->cpy_object;
10444 kern_return_t kr;
10445 vm_object_offset_t offset;
10446
10447 offset = vm_object_trunc_page(copy->offset);
10448 size = vm_map_round_page((copy_size +
10449 (vm_map_size_t)(copy->offset -
10450 offset)),
10451 VM_MAP_PAGE_MASK(dst_map));
10452 *dst_addr = 0;
10453 kr = vm_map_enter(dst_map, dst_addr, size,
10454 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10455 VM_MAP_KERNEL_FLAGS_NONE,
10456 VM_KERN_MEMORY_NONE,
10457 object, offset, FALSE,
10458 VM_PROT_DEFAULT, VM_PROT_ALL,
10459 VM_INHERIT_DEFAULT);
10460 if (kr != KERN_SUCCESS)
10461 return(kr);
10462 /* Account for non-pagealigned copy object */
10463 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
10464 if (consume_on_success)
10465 zfree(vm_map_copy_zone, copy);
10466 return(KERN_SUCCESS);
10467 }
10468
10469 /*
10470 * Check for special kernel buffer allocated
10471 * by new_ipc_kmsg_copyin.
10472 */
10473
10474 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
10475 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
10476 copy, copy_size, FALSE,
10477 consume_on_success);
10478 }
10479
10480
10481 /*
10482 * Find space for the data
10483 */
10484
10485 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
10486 VM_MAP_COPY_PAGE_MASK(copy));
10487 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
10488 VM_MAP_COPY_PAGE_MASK(copy))
10489 - vm_copy_start;
10490
10491
10492StartAgain: ;
10493
10494 vm_map_lock(dst_map);
10495 if( dst_map->disable_vmentry_reuse == TRUE) {
10496 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
10497 last = entry;
10498 } else {
10499 if (dst_map->holelistenabled) {
10500 hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
10501
10502 if (hole_entry == NULL) {
10503 /*
10504 * No more space in the map?
10505 */
10506 vm_map_unlock(dst_map);
10507 return(KERN_NO_SPACE);
10508 }
10509
10510 last = hole_entry;
10511 start = last->vme_start;
10512 } else {
10513 assert(first_free_is_valid(dst_map));
10514 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
10515 vm_map_min(dst_map) : last->vme_end;
10516 }
10517 start = vm_map_round_page(start,
10518 VM_MAP_PAGE_MASK(dst_map));
10519 }
10520
10521 while (TRUE) {
10522 vm_map_entry_t next = last->vme_next;
10523 vm_map_offset_t end = start + size;
10524
10525 if ((end > dst_map->max_offset) || (end < start)) {
10526 if (dst_map->wait_for_space) {
10527 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
10528 assert_wait((event_t) dst_map,
10529 THREAD_INTERRUPTIBLE);
10530 vm_map_unlock(dst_map);
10531 thread_block(THREAD_CONTINUE_NULL);
10532 goto StartAgain;
10533 }
10534 }
10535 vm_map_unlock(dst_map);
10536 return(KERN_NO_SPACE);
10537 }
10538
10539 if (dst_map->holelistenabled) {
10540 if (last->vme_end >= end)
10541 break;
10542 } else {
10543 /*
10544 * If there are no more entries, we must win.
10545 *
10546 * OR
10547 *
10548 * If there is another entry, it must be
10549 * after the end of the potential new region.
10550 */
10551
10552 if (next == vm_map_to_entry(dst_map))
10553 break;
10554
10555 if (next->vme_start >= end)
10556 break;
10557 }
10558
10559 last = next;
10560
10561 if (dst_map->holelistenabled) {
10562 if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
10563 /*
10564 * Wrapped around
10565 */
10566 vm_map_unlock(dst_map);
10567 return(KERN_NO_SPACE);
10568 }
10569 start = last->vme_start;
10570 } else {
10571 start = last->vme_end;
10572 }
10573 start = vm_map_round_page(start,
10574 VM_MAP_PAGE_MASK(dst_map));
10575 }
10576
10577 if (dst_map->holelistenabled) {
10578 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
10579 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
10580 }
10581 }
10582
10583
10584 adjustment = start - vm_copy_start;
10585 if (! consume_on_success) {
10586 /*
10587 * We're not allowed to consume "copy", so we'll have to
10588 * copy its map entries into the destination map below.
10589 * No need to re-allocate map entries from the correct
10590 * (pageable or not) zone, since we'll get new map entries
10591 * during the transfer.
10592 * We'll also adjust the map entries's "start" and "end"
10593 * during the transfer, to keep "copy"'s entries consistent
10594 * with its "offset".
10595 */
10596 goto after_adjustments;
10597 }
10598
10599 /*
10600 * Since we're going to just drop the map
10601 * entries from the copy into the destination
10602 * map, they must come from the same pool.
10603 */
10604
10605 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
10606 /*
10607 * Mismatches occur when dealing with the default
10608 * pager.
10609 */
10610 zone_t old_zone;
10611 vm_map_entry_t next, new;
10612
10613 /*
10614 * Find the zone that the copies were allocated from
10615 */
10616
10617 entry = vm_map_copy_first_entry(copy);
10618
10619 /*
10620 * Reinitialize the copy so that vm_map_copy_entry_link
10621 * will work.
10622 */
10623 vm_map_store_copy_reset(copy, entry);
10624 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
10625
10626 /*
10627 * Copy each entry.
10628 */
10629 while (entry != vm_map_copy_to_entry(copy)) {
10630 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
10631 vm_map_entry_copy_full(new, entry);
10632 assert(!new->iokit_acct);
10633 if (new->is_sub_map) {
10634 /* clr address space specifics */
10635 new->use_pmap = FALSE;
10636 }
10637 vm_map_copy_entry_link(copy,
10638 vm_map_copy_last_entry(copy),
10639 new);
10640 next = entry->vme_next;
10641 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
10642 zfree(old_zone, entry);
10643 entry = next;
10644 }
10645 }
10646
10647 /*
10648 * Adjust the addresses in the copy chain, and
10649 * reset the region attributes.
10650 */
10651
10652 for (entry = vm_map_copy_first_entry(copy);
10653 entry != vm_map_copy_to_entry(copy);
10654 entry = entry->vme_next) {
10655 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
10656 /*
10657 * We're injecting this copy entry into a map that
10658 * has the standard page alignment, so clear
10659 * "map_aligned" (which might have been inherited
10660 * from the original map entry).
10661 */
10662 entry->map_aligned = FALSE;
10663 }
10664
10665 entry->vme_start += adjustment;
10666 entry->vme_end += adjustment;
10667
10668 if (entry->map_aligned) {
10669 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
10670 VM_MAP_PAGE_MASK(dst_map)));
10671 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
10672 VM_MAP_PAGE_MASK(dst_map)));
10673 }
10674
10675 entry->inheritance = VM_INHERIT_DEFAULT;
10676 entry->protection = VM_PROT_DEFAULT;
10677 entry->max_protection = VM_PROT_ALL;
10678 entry->behavior = VM_BEHAVIOR_DEFAULT;
10679
10680 /*
10681 * If the entry is now wired,
10682 * map the pages into the destination map.
10683 */
10684 if (entry->wired_count != 0) {
10685 vm_map_offset_t va;
10686 vm_object_offset_t offset;
10687 vm_object_t object;
10688 vm_prot_t prot;
10689 int type_of_fault;
10690
10691 object = VME_OBJECT(entry);
10692 offset = VME_OFFSET(entry);
10693 va = entry->vme_start;
10694
10695 pmap_pageable(dst_map->pmap,
10696 entry->vme_start,
10697 entry->vme_end,
10698 TRUE);
10699
10700 while (va < entry->vme_end) {
10701 vm_page_t m;
10702 struct vm_object_fault_info fault_info = {};
10703
10704 /*
10705 * Look up the page in the object.
10706 * Assert that the page will be found in the
10707 * top object:
10708 * either
10709 * the object was newly created by
10710 * vm_object_copy_slowly, and has
10711 * copies of all of the pages from
10712 * the source object
10713 * or
10714 * the object was moved from the old
10715 * map entry; because the old map
10716 * entry was wired, all of the pages
10717 * were in the top-level object.
10718 * (XXX not true if we wire pages for
10719 * reading)
10720 */
10721 vm_object_lock(object);
10722
10723 m = vm_page_lookup(object, offset);
10724 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
10725 m->vmp_absent)
10726 panic("vm_map_copyout: wiring %p", m);
10727
10728 prot = entry->protection;
10729
10730 if (override_nx(dst_map, VME_ALIAS(entry)) &&
10731 prot)
10732 prot |= VM_PROT_EXECUTE;
10733
10734 type_of_fault = DBG_CACHE_HIT_FAULT;
10735
10736 fault_info.user_tag = VME_ALIAS(entry);
10737 fault_info.pmap_options = 0;
10738 if (entry->iokit_acct ||
10739 (!entry->is_sub_map && !entry->use_pmap)) {
10740 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
10741 }
10742
10743 vm_fault_enter(m,
10744 dst_map->pmap,
10745 va,
10746 prot,
10747 prot,
10748 VM_PAGE_WIRED(m),
10749 FALSE, /* change_wiring */
10750 VM_KERN_MEMORY_NONE, /* tag - not wiring */
10751 &fault_info,
10752 NULL, /* need_retry */
10753 &type_of_fault);
10754
10755 vm_object_unlock(object);
10756
10757 offset += PAGE_SIZE_64;
10758 va += PAGE_SIZE;
10759 }
10760 }
10761 }
10762
10763after_adjustments:
10764
10765 /*
10766 * Correct the page alignment for the result
10767 */
10768
10769 *dst_addr = start + (copy->offset - vm_copy_start);
10770
10771#if KASAN
10772 kasan_notify_address(*dst_addr, size);
10773#endif
10774
10775 /*
10776 * Update the hints and the map size
10777 */
10778
10779 if (consume_on_success) {
10780 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
10781 } else {
10782 SAVE_HINT_MAP_WRITE(dst_map, last);
10783 }
10784
10785 dst_map->size += size;
10786
10787 /*
10788 * Link in the copy
10789 */
10790
10791 if (consume_on_success) {
10792 vm_map_copy_insert(dst_map, last, copy);
10793 } else {
10794 vm_map_copy_remap(dst_map, last, copy, adjustment,
10795 cur_protection, max_protection,
10796 inheritance);
10797 }
10798
10799 vm_map_unlock(dst_map);
10800
10801 /*
10802 * XXX If wiring_required, call vm_map_pageable
10803 */
10804
10805 return(KERN_SUCCESS);
10806}
10807
10808/*
10809 * Routine: vm_map_copyin
10810 *
10811 * Description:
10812 * see vm_map_copyin_common. Exported via Unsupported.exports.
10813 *
10814 */
10815
10816#undef vm_map_copyin
10817
10818kern_return_t
10819vm_map_copyin(
10820 vm_map_t src_map,
10821 vm_map_address_t src_addr,
10822 vm_map_size_t len,
10823 boolean_t src_destroy,
10824 vm_map_copy_t *copy_result) /* OUT */
10825{
10826 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
10827 FALSE, copy_result, FALSE));
10828}
10829
10830/*
10831 * Routine: vm_map_copyin_common
10832 *
10833 * Description:
10834 * Copy the specified region (src_addr, len) from the
10835 * source address space (src_map), possibly removing
10836 * the region from the source address space (src_destroy).
10837 *
10838 * Returns:
10839 * A vm_map_copy_t object (copy_result), suitable for
10840 * insertion into another address space (using vm_map_copyout),
10841 * copying over another address space region (using
10842 * vm_map_copy_overwrite). If the copy is unused, it
10843 * should be destroyed (using vm_map_copy_discard).
10844 *
10845 * In/out conditions:
10846 * The source map should not be locked on entry.
10847 */
10848
10849typedef struct submap_map {
10850 vm_map_t parent_map;
10851 vm_map_offset_t base_start;
10852 vm_map_offset_t base_end;
10853 vm_map_size_t base_len;
10854 struct submap_map *next;
10855} submap_map_t;
10856
10857kern_return_t
10858vm_map_copyin_common(
10859 vm_map_t src_map,
10860 vm_map_address_t src_addr,
10861 vm_map_size_t len,
10862 boolean_t src_destroy,
10863 __unused boolean_t src_volatile,
10864 vm_map_copy_t *copy_result, /* OUT */
10865 boolean_t use_maxprot)
10866{
10867 int flags;
10868
10869 flags = 0;
10870 if (src_destroy) {
10871 flags |= VM_MAP_COPYIN_SRC_DESTROY;
10872 }
10873 if (use_maxprot) {
10874 flags |= VM_MAP_COPYIN_USE_MAXPROT;
10875 }
10876 return vm_map_copyin_internal(src_map,
10877 src_addr,
10878 len,
10879 flags,
10880 copy_result);
10881}
10882kern_return_t
10883vm_map_copyin_internal(
10884 vm_map_t src_map,
10885 vm_map_address_t src_addr,
10886 vm_map_size_t len,
10887 int flags,
10888 vm_map_copy_t *copy_result) /* OUT */
10889{
10890 vm_map_entry_t tmp_entry; /* Result of last map lookup --
10891 * in multi-level lookup, this
10892 * entry contains the actual
10893 * vm_object/offset.
10894 */
10895 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
10896
10897 vm_map_offset_t src_start; /* Start of current entry --
10898 * where copy is taking place now
10899 */
10900 vm_map_offset_t src_end; /* End of entire region to be
10901 * copied */
10902 vm_map_offset_t src_base;
10903 vm_map_t base_map = src_map;
10904 boolean_t map_share=FALSE;
10905 submap_map_t *parent_maps = NULL;
10906
10907 vm_map_copy_t copy; /* Resulting copy */
10908 vm_map_address_t copy_addr;
10909 vm_map_size_t copy_size;
10910 boolean_t src_destroy;
10911 boolean_t use_maxprot;
10912 boolean_t preserve_purgeable;
10913 boolean_t entry_was_shared;
10914 vm_map_entry_t saved_src_entry;
10915
10916 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
10917 return KERN_INVALID_ARGUMENT;
10918 }
10919
10920 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
10921 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
10922 preserve_purgeable =
10923 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
10924
10925 /*
10926 * Check for copies of zero bytes.
10927 */
10928
10929 if (len == 0) {
10930 *copy_result = VM_MAP_COPY_NULL;
10931 return(KERN_SUCCESS);
10932 }
10933
10934 /*
10935 * Check that the end address doesn't overflow
10936 */
10937 src_end = src_addr + len;
10938 if (src_end < src_addr)
10939 return KERN_INVALID_ADDRESS;
10940
10941 /*
10942 * Compute (page aligned) start and end of region
10943 */
10944 src_start = vm_map_trunc_page(src_addr,
10945 VM_MAP_PAGE_MASK(src_map));
10946 src_end = vm_map_round_page(src_end,
10947 VM_MAP_PAGE_MASK(src_map));
10948
10949 /*
10950 * If the copy is sufficiently small, use a kernel buffer instead
10951 * of making a virtual copy. The theory being that the cost of
10952 * setting up VM (and taking C-O-W faults) dominates the copy costs
10953 * for small regions.
10954 */
10955 if ((len < msg_ool_size_small) &&
10956 !use_maxprot &&
10957 !preserve_purgeable &&
10958 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
10959 /*
10960 * Since the "msg_ool_size_small" threshold was increased and
10961 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
10962 * address space limits, we revert to doing a virtual copy if the
10963 * copied range goes beyond those limits. Otherwise, mach_vm_read()
10964 * of the commpage would now fail when it used to work.
10965 */
10966 (src_start >= vm_map_min(src_map) &&
10967 src_start < vm_map_max(src_map) &&
10968 src_end >= vm_map_min(src_map) &&
10969 src_end < vm_map_max(src_map)))
10970 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
10971 src_destroy, copy_result);
10972
10973 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
10974
10975 /*
10976 * Allocate a header element for the list.
10977 *
10978 * Use the start and end in the header to
10979 * remember the endpoints prior to rounding.
10980 */
10981
10982 copy = vm_map_copy_allocate();
10983 copy->type = VM_MAP_COPY_ENTRY_LIST;
10984 copy->cpy_hdr.entries_pageable = TRUE;
10985#if 00
10986 copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
10987#else
10988 /*
10989 * The copy entries can be broken down for a variety of reasons,
10990 * so we can't guarantee that they will remain map-aligned...
10991 * Will need to adjust the first copy_entry's "vme_start" and
10992 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
10993 * rather than the original map's alignment.
10994 */
10995 copy->cpy_hdr.page_shift = PAGE_SHIFT;
10996#endif
10997
10998 vm_map_store_init( &(copy->cpy_hdr) );
10999
11000 copy->offset = src_addr;
11001 copy->size = len;
11002
11003 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11004
11005#define RETURN(x) \
11006 MACRO_BEGIN \
11007 vm_map_unlock(src_map); \
11008 if(src_map != base_map) \
11009 vm_map_deallocate(src_map); \
11010 if (new_entry != VM_MAP_ENTRY_NULL) \
11011 vm_map_copy_entry_dispose(copy,new_entry); \
11012 vm_map_copy_discard(copy); \
11013 { \
11014 submap_map_t *_ptr; \
11015 \
11016 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11017 parent_maps=parent_maps->next; \
11018 if (_ptr->parent_map != base_map) \
11019 vm_map_deallocate(_ptr->parent_map); \
11020 kfree(_ptr, sizeof(submap_map_t)); \
11021 } \
11022 } \
11023 MACRO_RETURN(x); \
11024 MACRO_END
11025
11026 /*
11027 * Find the beginning of the region.
11028 */
11029
11030 vm_map_lock(src_map);
11031
11032 /*
11033 * Lookup the original "src_addr" rather than the truncated
11034 * "src_start", in case "src_start" falls in a non-map-aligned
11035 * map entry *before* the map entry that contains "src_addr"...
11036 */
11037 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
11038 RETURN(KERN_INVALID_ADDRESS);
11039 if(!tmp_entry->is_sub_map) {
11040 /*
11041 * ... but clip to the map-rounded "src_start" rather than
11042 * "src_addr" to preserve map-alignment. We'll adjust the
11043 * first copy entry at the end, if needed.
11044 */
11045 vm_map_clip_start(src_map, tmp_entry, src_start);
11046 }
11047 if (src_start < tmp_entry->vme_start) {
11048 /*
11049 * Move "src_start" up to the start of the
11050 * first map entry to copy.
11051 */
11052 src_start = tmp_entry->vme_start;
11053 }
11054 /* set for later submap fix-up */
11055 copy_addr = src_start;
11056
11057 /*
11058 * Go through entries until we get to the end.
11059 */
11060
11061 while (TRUE) {
11062 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
11063 vm_map_size_t src_size; /* Size of source
11064 * map entry (in both
11065 * maps)
11066 */
11067
11068 vm_object_t src_object; /* Object to copy */
11069 vm_object_offset_t src_offset;
11070
11071 boolean_t src_needs_copy; /* Should source map
11072 * be made read-only
11073 * for copy-on-write?
11074 */
11075
11076 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
11077
11078 boolean_t was_wired; /* Was source wired? */
11079 vm_map_version_t version; /* Version before locks
11080 * dropped to make copy
11081 */
11082 kern_return_t result; /* Return value from
11083 * copy_strategically.
11084 */
11085 while(tmp_entry->is_sub_map) {
11086 vm_map_size_t submap_len;
11087 submap_map_t *ptr;
11088
11089 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
11090 ptr->next = parent_maps;
11091 parent_maps = ptr;
11092 ptr->parent_map = src_map;
11093 ptr->base_start = src_start;
11094 ptr->base_end = src_end;
11095 submap_len = tmp_entry->vme_end - src_start;
11096 if(submap_len > (src_end-src_start))
11097 submap_len = src_end-src_start;
11098 ptr->base_len = submap_len;
11099
11100 src_start -= tmp_entry->vme_start;
11101 src_start += VME_OFFSET(tmp_entry);
11102 src_end = src_start + submap_len;
11103 src_map = VME_SUBMAP(tmp_entry);
11104 vm_map_lock(src_map);
11105 /* keep an outstanding reference for all maps in */
11106 /* the parents tree except the base map */
11107 vm_map_reference(src_map);
11108 vm_map_unlock(ptr->parent_map);
11109 if (!vm_map_lookup_entry(
11110 src_map, src_start, &tmp_entry))
11111 RETURN(KERN_INVALID_ADDRESS);
11112 map_share = TRUE;
11113 if(!tmp_entry->is_sub_map)
11114 vm_map_clip_start(src_map, tmp_entry, src_start);
11115 src_entry = tmp_entry;
11116 }
11117 /* we are now in the lowest level submap... */
11118
11119 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11120 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11121 /* This is not, supported for now.In future */
11122 /* we will need to detect the phys_contig */
11123 /* condition and then upgrade copy_slowly */
11124 /* to do physical copy from the device mem */
11125 /* based object. We can piggy-back off of */
11126 /* the was wired boolean to set-up the */
11127 /* proper handling */
11128 RETURN(KERN_PROTECTION_FAILURE);
11129 }
11130 /*
11131 * Create a new address map entry to hold the result.
11132 * Fill in the fields from the appropriate source entries.
11133 * We must unlock the source map to do this if we need
11134 * to allocate a map entry.
11135 */
11136 if (new_entry == VM_MAP_ENTRY_NULL) {
11137 version.main_timestamp = src_map->timestamp;
11138 vm_map_unlock(src_map);
11139
11140 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11141
11142 vm_map_lock(src_map);
11143 if ((version.main_timestamp + 1) != src_map->timestamp) {
11144 if (!vm_map_lookup_entry(src_map, src_start,
11145 &tmp_entry)) {
11146 RETURN(KERN_INVALID_ADDRESS);
11147 }
11148 if (!tmp_entry->is_sub_map)
11149 vm_map_clip_start(src_map, tmp_entry, src_start);
11150 continue; /* restart w/ new tmp_entry */
11151 }
11152 }
11153
11154 /*
11155 * Verify that the region can be read.
11156 */
11157 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11158 !use_maxprot) ||
11159 (src_entry->max_protection & VM_PROT_READ) == 0)
11160 RETURN(KERN_PROTECTION_FAILURE);
11161
11162 /*
11163 * Clip against the endpoints of the entire region.
11164 */
11165
11166 vm_map_clip_end(src_map, src_entry, src_end);
11167
11168 src_size = src_entry->vme_end - src_start;
11169 src_object = VME_OBJECT(src_entry);
11170 src_offset = VME_OFFSET(src_entry);
11171 was_wired = (src_entry->wired_count != 0);
11172
11173 vm_map_entry_copy(new_entry, src_entry);
11174 if (new_entry->is_sub_map) {
11175 /* clr address space specifics */
11176 new_entry->use_pmap = FALSE;
11177 } else {
11178 /*
11179 * We're dealing with a copy-on-write operation,
11180 * so the resulting mapping should not inherit the
11181 * original mapping's accounting settings.
11182 * "iokit_acct" should have been cleared in
11183 * vm_map_entry_copy().
11184 * "use_pmap" should be reset to its default (TRUE)
11185 * so that the new mapping gets accounted for in
11186 * the task's memory footprint.
11187 */
11188 assert(!new_entry->iokit_acct);
11189 new_entry->use_pmap = TRUE;
11190 }
11191
11192 /*
11193 * Attempt non-blocking copy-on-write optimizations.
11194 */
11195
11196 if (src_destroy &&
11197 (src_object == VM_OBJECT_NULL ||
11198 (src_object->internal &&
11199 src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11200 !map_share))) {
11201 /*
11202 * If we are destroying the source, and the object
11203 * is internal, we can move the object reference
11204 * from the source to the copy. The copy is
11205 * copy-on-write only if the source is.
11206 * We make another reference to the object, because
11207 * destroying the source entry will deallocate it.
11208 */
11209 vm_object_reference(src_object);
11210
11211 /*
11212 * Copy is always unwired. vm_map_copy_entry
11213 * set its wired count to zero.
11214 */
11215
11216 goto CopySuccessful;
11217 }
11218
11219
11220 RestartCopy:
11221 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
11222 src_object, new_entry, VME_OBJECT(new_entry),
11223 was_wired, 0);
11224 if ((src_object == VM_OBJECT_NULL ||
11225 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
11226 vm_object_copy_quickly(
11227 &VME_OBJECT(new_entry),
11228 src_offset,
11229 src_size,
11230 &src_needs_copy,
11231 &new_entry_needs_copy)) {
11232
11233 new_entry->needs_copy = new_entry_needs_copy;
11234
11235 /*
11236 * Handle copy-on-write obligations
11237 */
11238
11239 if (src_needs_copy && !tmp_entry->needs_copy) {
11240 vm_prot_t prot;
11241
11242 prot = src_entry->protection & ~VM_PROT_WRITE;
11243
11244 if (override_nx(src_map, VME_ALIAS(src_entry))
11245 && prot)
11246 prot |= VM_PROT_EXECUTE;
11247
11248 vm_object_pmap_protect(
11249 src_object,
11250 src_offset,
11251 src_size,
11252 (src_entry->is_shared ?
11253 PMAP_NULL
11254 : src_map->pmap),
11255 src_entry->vme_start,
11256 prot);
11257
11258 assert(tmp_entry->wired_count == 0);
11259 tmp_entry->needs_copy = TRUE;
11260 }
11261
11262 /*
11263 * The map has never been unlocked, so it's safe
11264 * to move to the next entry rather than doing
11265 * another lookup.
11266 */
11267
11268 goto CopySuccessful;
11269 }
11270
11271 entry_was_shared = tmp_entry->is_shared;
11272
11273 /*
11274 * Take an object reference, so that we may
11275 * release the map lock(s).
11276 */
11277
11278 assert(src_object != VM_OBJECT_NULL);
11279 vm_object_reference(src_object);
11280
11281 /*
11282 * Record the timestamp for later verification.
11283 * Unlock the map.
11284 */
11285
11286 version.main_timestamp = src_map->timestamp;
11287 vm_map_unlock(src_map); /* Increments timestamp once! */
11288 saved_src_entry = src_entry;
11289 tmp_entry = VM_MAP_ENTRY_NULL;
11290 src_entry = VM_MAP_ENTRY_NULL;
11291
11292 /*
11293 * Perform the copy
11294 */
11295
11296 if (was_wired) {
11297 CopySlowly:
11298 vm_object_lock(src_object);
11299 result = vm_object_copy_slowly(
11300 src_object,
11301 src_offset,
11302 src_size,
11303 THREAD_UNINT,
11304 &VME_OBJECT(new_entry));
11305 VME_OFFSET_SET(new_entry, 0);
11306 new_entry->needs_copy = FALSE;
11307 }
11308 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11309 (entry_was_shared || map_share)) {
11310 vm_object_t new_object;
11311
11312 vm_object_lock_shared(src_object);
11313 new_object = vm_object_copy_delayed(
11314 src_object,
11315 src_offset,
11316 src_size,
11317 TRUE);
11318 if (new_object == VM_OBJECT_NULL)
11319 goto CopySlowly;
11320
11321 VME_OBJECT_SET(new_entry, new_object);
11322 assert(new_entry->wired_count == 0);
11323 new_entry->needs_copy = TRUE;
11324 assert(!new_entry->iokit_acct);
11325 assert(new_object->purgable == VM_PURGABLE_DENY);
11326 assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
11327 result = KERN_SUCCESS;
11328
11329 } else {
11330 vm_object_offset_t new_offset;
11331 new_offset = VME_OFFSET(new_entry);
11332 result = vm_object_copy_strategically(src_object,
11333 src_offset,
11334 src_size,
11335 &VME_OBJECT(new_entry),
11336 &new_offset,
11337 &new_entry_needs_copy);
11338 if (new_offset != VME_OFFSET(new_entry)) {
11339 VME_OFFSET_SET(new_entry, new_offset);
11340 }
11341
11342 new_entry->needs_copy = new_entry_needs_copy;
11343 }
11344
11345 if (result == KERN_SUCCESS &&
11346 preserve_purgeable &&
11347 src_object->purgable != VM_PURGABLE_DENY) {
11348 vm_object_t new_object;
11349
11350 new_object = VME_OBJECT(new_entry);
11351 assert(new_object != src_object);
11352 vm_object_lock(new_object);
11353 assert(new_object->ref_count == 1);
11354 assert(new_object->shadow == VM_OBJECT_NULL);
11355 assert(new_object->copy == VM_OBJECT_NULL);
11356 assert(new_object->vo_owner == NULL);
11357
11358 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
11359 new_object->true_share = TRUE;
11360 /* start as non-volatile with no owner... */
11361 new_object->purgable = VM_PURGABLE_NONVOLATILE;
11362 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
11363 /* ... and move to src_object's purgeable state */
11364 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
11365 int state;
11366 state = src_object->purgable;
11367 vm_object_purgable_control(
11368 new_object,
11369 VM_PURGABLE_SET_STATE_FROM_KERNEL,
11370 &state);
11371 }
11372 vm_object_unlock(new_object);
11373 new_object = VM_OBJECT_NULL;
11374 /* no pmap accounting for purgeable objects */
11375 new_entry->use_pmap = FALSE;
11376 }
11377
11378 if (result != KERN_SUCCESS &&
11379 result != KERN_MEMORY_RESTART_COPY) {
11380 vm_map_lock(src_map);
11381 RETURN(result);
11382 }
11383
11384 /*
11385 * Throw away the extra reference
11386 */
11387
11388 vm_object_deallocate(src_object);
11389
11390 /*
11391 * Verify that the map has not substantially
11392 * changed while the copy was being made.
11393 */
11394
11395 vm_map_lock(src_map);
11396
11397 if ((version.main_timestamp + 1) == src_map->timestamp) {
11398 /* src_map hasn't changed: src_entry is still valid */
11399 src_entry = saved_src_entry;
11400 goto VerificationSuccessful;
11401 }
11402
11403 /*
11404 * Simple version comparison failed.
11405 *
11406 * Retry the lookup and verify that the
11407 * same object/offset are still present.
11408 *
11409 * [Note: a memory manager that colludes with
11410 * the calling task can detect that we have
11411 * cheated. While the map was unlocked, the
11412 * mapping could have been changed and restored.]
11413 */
11414
11415 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
11416 if (result != KERN_MEMORY_RESTART_COPY) {
11417 vm_object_deallocate(VME_OBJECT(new_entry));
11418 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
11419 /* reset accounting state */
11420 new_entry->iokit_acct = FALSE;
11421 new_entry->use_pmap = TRUE;
11422 }
11423 RETURN(KERN_INVALID_ADDRESS);
11424 }
11425
11426 src_entry = tmp_entry;
11427 vm_map_clip_start(src_map, src_entry, src_start);
11428
11429 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
11430 !use_maxprot) ||
11431 ((src_entry->max_protection & VM_PROT_READ) == 0))
11432 goto VerificationFailed;
11433
11434 if (src_entry->vme_end < new_entry->vme_end) {
11435 /*
11436 * This entry might have been shortened
11437 * (vm_map_clip_end) or been replaced with
11438 * an entry that ends closer to "src_start"
11439 * than before.
11440 * Adjust "new_entry" accordingly; copying
11441 * less memory would be correct but we also
11442 * redo the copy (see below) if the new entry
11443 * no longer points at the same object/offset.
11444 */
11445 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
11446 VM_MAP_COPY_PAGE_MASK(copy)));
11447 new_entry->vme_end = src_entry->vme_end;
11448 src_size = new_entry->vme_end - src_start;
11449 } else if (src_entry->vme_end > new_entry->vme_end) {
11450 /*
11451 * This entry might have been extended
11452 * (vm_map_entry_simplify() or coalesce)
11453 * or been replaced with an entry that ends farther
11454 * from "src_start" than before.
11455 *
11456 * We've called vm_object_copy_*() only on
11457 * the previous <start:end> range, so we can't
11458 * just extend new_entry. We have to re-do
11459 * the copy based on the new entry as if it was
11460 * pointing at a different object/offset (see
11461 * "Verification failed" below).
11462 */
11463 }
11464
11465 if ((VME_OBJECT(src_entry) != src_object) ||
11466 (VME_OFFSET(src_entry) != src_offset) ||
11467 (src_entry->vme_end > new_entry->vme_end)) {
11468
11469 /*
11470 * Verification failed.
11471 *
11472 * Start over with this top-level entry.
11473 */
11474
11475 VerificationFailed: ;
11476
11477 vm_object_deallocate(VME_OBJECT(new_entry));
11478 tmp_entry = src_entry;
11479 continue;
11480 }
11481
11482 /*
11483 * Verification succeeded.
11484 */
11485
11486 VerificationSuccessful: ;
11487
11488 if (result == KERN_MEMORY_RESTART_COPY)
11489 goto RestartCopy;
11490
11491 /*
11492 * Copy succeeded.
11493 */
11494
11495 CopySuccessful: ;
11496
11497 /*
11498 * Link in the new copy entry.
11499 */
11500
11501 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
11502 new_entry);
11503
11504 /*
11505 * Determine whether the entire region
11506 * has been copied.
11507 */
11508 src_base = src_start;
11509 src_start = new_entry->vme_end;
11510 new_entry = VM_MAP_ENTRY_NULL;
11511 while ((src_start >= src_end) && (src_end != 0)) {
11512 submap_map_t *ptr;
11513
11514 if (src_map == base_map) {
11515 /* back to the top */
11516 break;
11517 }
11518
11519 ptr = parent_maps;
11520 assert(ptr != NULL);
11521 parent_maps = parent_maps->next;
11522
11523 /* fix up the damage we did in that submap */
11524 vm_map_simplify_range(src_map,
11525 src_base,
11526 src_end);
11527
11528 vm_map_unlock(src_map);
11529 vm_map_deallocate(src_map);
11530 vm_map_lock(ptr->parent_map);
11531 src_map = ptr->parent_map;
11532 src_base = ptr->base_start;
11533 src_start = ptr->base_start + ptr->base_len;
11534 src_end = ptr->base_end;
11535 if (!vm_map_lookup_entry(src_map,
11536 src_start,
11537 &tmp_entry) &&
11538 (src_end > src_start)) {
11539 RETURN(KERN_INVALID_ADDRESS);
11540 }
11541 kfree(ptr, sizeof(submap_map_t));
11542 if (parent_maps == NULL)
11543 map_share = FALSE;
11544 src_entry = tmp_entry->vme_prev;
11545 }
11546
11547 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
11548 (src_start >= src_addr + len) &&
11549 (src_addr + len != 0)) {
11550 /*
11551 * Stop copying now, even though we haven't reached
11552 * "src_end". We'll adjust the end of the last copy
11553 * entry at the end, if needed.
11554 *
11555 * If src_map's aligment is different from the
11556 * system's page-alignment, there could be
11557 * extra non-map-aligned map entries between
11558 * the original (non-rounded) "src_addr + len"
11559 * and the rounded "src_end".
11560 * We do not want to copy those map entries since
11561 * they're not part of the copied range.
11562 */
11563 break;
11564 }
11565
11566 if ((src_start >= src_end) && (src_end != 0))
11567 break;
11568
11569 /*
11570 * Verify that there are no gaps in the region
11571 */
11572
11573 tmp_entry = src_entry->vme_next;
11574 if ((tmp_entry->vme_start != src_start) ||
11575 (tmp_entry == vm_map_to_entry(src_map))) {
11576 RETURN(KERN_INVALID_ADDRESS);
11577 }
11578 }
11579
11580 /*
11581 * If the source should be destroyed, do it now, since the
11582 * copy was successful.
11583 */
11584 if (src_destroy) {
11585 (void) vm_map_delete(
11586 src_map,
11587 vm_map_trunc_page(src_addr,
11588 VM_MAP_PAGE_MASK(src_map)),
11589 src_end,
11590 ((src_map == kernel_map) ?
11591 VM_MAP_REMOVE_KUNWIRE :
11592 VM_MAP_REMOVE_NO_FLAGS),
11593 VM_MAP_NULL);
11594 } else {
11595 /* fix up the damage we did in the base map */
11596 vm_map_simplify_range(
11597 src_map,
11598 vm_map_trunc_page(src_addr,
11599 VM_MAP_PAGE_MASK(src_map)),
11600 vm_map_round_page(src_end,
11601 VM_MAP_PAGE_MASK(src_map)));
11602 }
11603
11604 vm_map_unlock(src_map);
11605 tmp_entry = VM_MAP_ENTRY_NULL;
11606
11607 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
11608 vm_map_offset_t original_start, original_offset, original_end;
11609
11610 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
11611
11612 /* adjust alignment of first copy_entry's "vme_start" */
11613 tmp_entry = vm_map_copy_first_entry(copy);
11614 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11615 vm_map_offset_t adjustment;
11616
11617 original_start = tmp_entry->vme_start;
11618 original_offset = VME_OFFSET(tmp_entry);
11619
11620 /* map-align the start of the first copy entry... */
11621 adjustment = (tmp_entry->vme_start -
11622 vm_map_trunc_page(
11623 tmp_entry->vme_start,
11624 VM_MAP_PAGE_MASK(src_map)));
11625 tmp_entry->vme_start -= adjustment;
11626 VME_OFFSET_SET(tmp_entry,
11627 VME_OFFSET(tmp_entry) - adjustment);
11628 copy_addr -= adjustment;
11629 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11630 /* ... adjust for mis-aligned start of copy range */
11631 adjustment =
11632 (vm_map_trunc_page(copy->offset,
11633 PAGE_MASK) -
11634 vm_map_trunc_page(copy->offset,
11635 VM_MAP_PAGE_MASK(src_map)));
11636 if (adjustment) {
11637 assert(page_aligned(adjustment));
11638 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11639 tmp_entry->vme_start += adjustment;
11640 VME_OFFSET_SET(tmp_entry,
11641 (VME_OFFSET(tmp_entry) +
11642 adjustment));
11643 copy_addr += adjustment;
11644 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11645 }
11646
11647 /*
11648 * Assert that the adjustments haven't exposed
11649 * more than was originally copied...
11650 */
11651 assert(tmp_entry->vme_start >= original_start);
11652 assert(VME_OFFSET(tmp_entry) >= original_offset);
11653 /*
11654 * ... and that it did not adjust outside of a
11655 * a single 16K page.
11656 */
11657 assert(vm_map_trunc_page(tmp_entry->vme_start,
11658 VM_MAP_PAGE_MASK(src_map)) ==
11659 vm_map_trunc_page(original_start,
11660 VM_MAP_PAGE_MASK(src_map)));
11661 }
11662
11663 /* adjust alignment of last copy_entry's "vme_end" */
11664 tmp_entry = vm_map_copy_last_entry(copy);
11665 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11666 vm_map_offset_t adjustment;
11667
11668 original_end = tmp_entry->vme_end;
11669
11670 /* map-align the end of the last copy entry... */
11671 tmp_entry->vme_end =
11672 vm_map_round_page(tmp_entry->vme_end,
11673 VM_MAP_PAGE_MASK(src_map));
11674 /* ... adjust for mis-aligned end of copy range */
11675 adjustment =
11676 (vm_map_round_page((copy->offset +
11677 copy->size),
11678 VM_MAP_PAGE_MASK(src_map)) -
11679 vm_map_round_page((copy->offset +
11680 copy->size),
11681 PAGE_MASK));
11682 if (adjustment) {
11683 assert(page_aligned(adjustment));
11684 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11685 tmp_entry->vme_end -= adjustment;
11686 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11687 }
11688
11689 /*
11690 * Assert that the adjustments haven't exposed
11691 * more than was originally copied...
11692 */
11693 assert(tmp_entry->vme_end <= original_end);
11694 /*
11695 * ... and that it did not adjust outside of a
11696 * a single 16K page.
11697 */
11698 assert(vm_map_round_page(tmp_entry->vme_end,
11699 VM_MAP_PAGE_MASK(src_map)) ==
11700 vm_map_round_page(original_end,
11701 VM_MAP_PAGE_MASK(src_map)));
11702 }
11703 }
11704
11705 /* Fix-up start and end points in copy. This is necessary */
11706 /* when the various entries in the copy object were picked */
11707 /* up from different sub-maps */
11708
11709 tmp_entry = vm_map_copy_first_entry(copy);
11710 copy_size = 0; /* compute actual size */
11711 while (tmp_entry != vm_map_copy_to_entry(copy)) {
11712 assert(VM_MAP_PAGE_ALIGNED(
11713 copy_addr + (tmp_entry->vme_end -
11714 tmp_entry->vme_start),
11715 VM_MAP_COPY_PAGE_MASK(copy)));
11716 assert(VM_MAP_PAGE_ALIGNED(
11717 copy_addr,
11718 VM_MAP_COPY_PAGE_MASK(copy)));
11719
11720 /*
11721 * The copy_entries will be injected directly into the
11722 * destination map and might not be "map aligned" there...
11723 */
11724 tmp_entry->map_aligned = FALSE;
11725
11726 tmp_entry->vme_end = copy_addr +
11727 (tmp_entry->vme_end - tmp_entry->vme_start);
11728 tmp_entry->vme_start = copy_addr;
11729 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11730 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
11731 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
11732 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
11733 }
11734
11735 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
11736 copy_size < copy->size) {
11737 /*
11738 * The actual size of the VM map copy is smaller than what
11739 * was requested by the caller. This must be because some
11740 * PAGE_SIZE-sized pages are missing at the end of the last
11741 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
11742 * The caller might not have been aware of those missing
11743 * pages and might not want to be aware of it, which is
11744 * fine as long as they don't try to access (and crash on)
11745 * those missing pages.
11746 * Let's adjust the size of the "copy", to avoid failing
11747 * in vm_map_copyout() or vm_map_copy_overwrite().
11748 */
11749 assert(vm_map_round_page(copy_size,
11750 VM_MAP_PAGE_MASK(src_map)) ==
11751 vm_map_round_page(copy->size,
11752 VM_MAP_PAGE_MASK(src_map)));
11753 copy->size = copy_size;
11754 }
11755
11756 *copy_result = copy;
11757 return(KERN_SUCCESS);
11758
11759#undef RETURN
11760}
11761
11762kern_return_t
11763vm_map_copy_extract(
11764 vm_map_t src_map,
11765 vm_map_address_t src_addr,
11766 vm_map_size_t len,
11767 vm_map_copy_t *copy_result, /* OUT */
11768 vm_prot_t *cur_prot, /* OUT */
11769 vm_prot_t *max_prot)
11770{
11771 vm_map_offset_t src_start, src_end;
11772 vm_map_copy_t copy;
11773 kern_return_t kr;
11774
11775 /*
11776 * Check for copies of zero bytes.
11777 */
11778
11779 if (len == 0) {
11780 *copy_result = VM_MAP_COPY_NULL;
11781 return(KERN_SUCCESS);
11782 }
11783
11784 /*
11785 * Check that the end address doesn't overflow
11786 */
11787 src_end = src_addr + len;
11788 if (src_end < src_addr)
11789 return KERN_INVALID_ADDRESS;
11790
11791 /*
11792 * Compute (page aligned) start and end of region
11793 */
11794 src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
11795 src_end = vm_map_round_page(src_end, PAGE_MASK);
11796
11797 /*
11798 * Allocate a header element for the list.
11799 *
11800 * Use the start and end in the header to
11801 * remember the endpoints prior to rounding.
11802 */
11803
11804 copy = vm_map_copy_allocate();
11805 copy->type = VM_MAP_COPY_ENTRY_LIST;
11806 copy->cpy_hdr.entries_pageable = TRUE;
11807
11808 vm_map_store_init(&copy->cpy_hdr);
11809
11810 copy->offset = 0;
11811 copy->size = len;
11812
11813 kr = vm_map_remap_extract(src_map,
11814 src_addr,
11815 len,
11816 FALSE, /* copy */
11817 &copy->cpy_hdr,
11818 cur_prot,
11819 max_prot,
11820 VM_INHERIT_SHARE,
11821 TRUE, /* pageable */
11822 FALSE, /* same_map */
11823 VM_MAP_KERNEL_FLAGS_NONE);
11824 if (kr != KERN_SUCCESS) {
11825 vm_map_copy_discard(copy);
11826 return kr;
11827 }
11828
11829 *copy_result = copy;
11830 return KERN_SUCCESS;
11831}
11832
11833/*
11834 * vm_map_copyin_object:
11835 *
11836 * Create a copy object from an object.
11837 * Our caller donates an object reference.
11838 */
11839
11840kern_return_t
11841vm_map_copyin_object(
11842 vm_object_t object,
11843 vm_object_offset_t offset, /* offset of region in object */
11844 vm_object_size_t size, /* size of region in object */
11845 vm_map_copy_t *copy_result) /* OUT */
11846{
11847 vm_map_copy_t copy; /* Resulting copy */
11848
11849 /*
11850 * We drop the object into a special copy object
11851 * that contains the object directly.
11852 */
11853
11854 copy = vm_map_copy_allocate();
11855 copy->type = VM_MAP_COPY_OBJECT;
11856 copy->cpy_object = object;
11857 copy->offset = offset;
11858 copy->size = size;
11859
11860 *copy_result = copy;
11861 return(KERN_SUCCESS);
11862}
11863
11864static void
11865vm_map_fork_share(
11866 vm_map_t old_map,
11867 vm_map_entry_t old_entry,
11868 vm_map_t new_map)
11869{
11870 vm_object_t object;
11871 vm_map_entry_t new_entry;
11872
11873 /*
11874 * New sharing code. New map entry
11875 * references original object. Internal
11876 * objects use asynchronous copy algorithm for
11877 * future copies. First make sure we have
11878 * the right object. If we need a shadow,
11879 * or someone else already has one, then
11880 * make a new shadow and share it.
11881 */
11882
11883 object = VME_OBJECT(old_entry);
11884 if (old_entry->is_sub_map) {
11885 assert(old_entry->wired_count == 0);
11886#ifndef NO_NESTED_PMAP
11887 if(old_entry->use_pmap) {
11888 kern_return_t result;
11889
11890 result = pmap_nest(new_map->pmap,
11891 (VME_SUBMAP(old_entry))->pmap,
11892 (addr64_t)old_entry->vme_start,
11893 (addr64_t)old_entry->vme_start,
11894 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
11895 if(result)
11896 panic("vm_map_fork_share: pmap_nest failed!");
11897 }
11898#endif /* NO_NESTED_PMAP */
11899 } else if (object == VM_OBJECT_NULL) {
11900 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
11901 old_entry->vme_start));
11902 VME_OFFSET_SET(old_entry, 0);
11903 VME_OBJECT_SET(old_entry, object);
11904 old_entry->use_pmap = TRUE;
11905// assert(!old_entry->needs_copy);
11906 } else if (object->copy_strategy !=
11907 MEMORY_OBJECT_COPY_SYMMETRIC) {
11908
11909 /*
11910 * We are already using an asymmetric
11911 * copy, and therefore we already have
11912 * the right object.
11913 */
11914
11915 assert(! old_entry->needs_copy);
11916 }
11917 else if (old_entry->needs_copy || /* case 1 */
11918 object->shadowed || /* case 2 */
11919 (!object->true_share && /* case 3 */
11920 !old_entry->is_shared &&
11921 (object->vo_size >
11922 (vm_map_size_t)(old_entry->vme_end -
11923 old_entry->vme_start)))) {
11924
11925 /*
11926 * We need to create a shadow.
11927 * There are three cases here.
11928 * In the first case, we need to
11929 * complete a deferred symmetrical
11930 * copy that we participated in.
11931 * In the second and third cases,
11932 * we need to create the shadow so
11933 * that changes that we make to the
11934 * object do not interfere with
11935 * any symmetrical copies which
11936 * have occured (case 2) or which
11937 * might occur (case 3).
11938 *
11939 * The first case is when we had
11940 * deferred shadow object creation
11941 * via the entry->needs_copy mechanism.
11942 * This mechanism only works when
11943 * only one entry points to the source
11944 * object, and we are about to create
11945 * a second entry pointing to the
11946 * same object. The problem is that
11947 * there is no way of mapping from
11948 * an object to the entries pointing
11949 * to it. (Deferred shadow creation
11950 * works with one entry because occurs
11951 * at fault time, and we walk from the
11952 * entry to the object when handling
11953 * the fault.)
11954 *
11955 * The second case is when the object
11956 * to be shared has already been copied
11957 * with a symmetric copy, but we point
11958 * directly to the object without
11959 * needs_copy set in our entry. (This
11960 * can happen because different ranges
11961 * of an object can be pointed to by
11962 * different entries. In particular,
11963 * a single entry pointing to an object
11964 * can be split by a call to vm_inherit,
11965 * which, combined with task_create, can
11966 * result in the different entries
11967 * having different needs_copy values.)
11968 * The shadowed flag in the object allows
11969 * us to detect this case. The problem
11970 * with this case is that if this object
11971 * has or will have shadows, then we
11972 * must not perform an asymmetric copy
11973 * of this object, since such a copy
11974 * allows the object to be changed, which
11975 * will break the previous symmetrical
11976 * copies (which rely upon the object
11977 * not changing). In a sense, the shadowed
11978 * flag says "don't change this object".
11979 * We fix this by creating a shadow
11980 * object for this object, and sharing
11981 * that. This works because we are free
11982 * to change the shadow object (and thus
11983 * to use an asymmetric copy strategy);
11984 * this is also semantically correct,
11985 * since this object is temporary, and
11986 * therefore a copy of the object is
11987 * as good as the object itself. (This
11988 * is not true for permanent objects,
11989 * since the pager needs to see changes,
11990 * which won't happen if the changes
11991 * are made to a copy.)
11992 *
11993 * The third case is when the object
11994 * to be shared has parts sticking
11995 * outside of the entry we're working
11996 * with, and thus may in the future
11997 * be subject to a symmetrical copy.
11998 * (This is a preemptive version of
11999 * case 2.)
12000 */
12001 VME_OBJECT_SHADOW(old_entry,
12002 (vm_map_size_t) (old_entry->vme_end -
12003 old_entry->vme_start));
12004
12005 /*
12006 * If we're making a shadow for other than
12007 * copy on write reasons, then we have
12008 * to remove write permission.
12009 */
12010
12011 if (!old_entry->needs_copy &&
12012 (old_entry->protection & VM_PROT_WRITE)) {
12013 vm_prot_t prot;
12014
12015 assert(!pmap_has_prot_policy(old_entry->protection));
12016
12017 prot = old_entry->protection & ~VM_PROT_WRITE;
12018
12019 assert(!pmap_has_prot_policy(prot));
12020
12021 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot)
12022 prot |= VM_PROT_EXECUTE;
12023
12024
12025 if (old_map->mapped_in_other_pmaps) {
12026 vm_object_pmap_protect(
12027 VME_OBJECT(old_entry),
12028 VME_OFFSET(old_entry),
12029 (old_entry->vme_end -
12030 old_entry->vme_start),
12031 PMAP_NULL,
12032 old_entry->vme_start,
12033 prot);
12034 } else {
12035 pmap_protect(old_map->pmap,
12036 old_entry->vme_start,
12037 old_entry->vme_end,
12038 prot);
12039 }
12040 }
12041
12042 old_entry->needs_copy = FALSE;
12043 object = VME_OBJECT(old_entry);
12044 }
12045
12046
12047 /*
12048 * If object was using a symmetric copy strategy,
12049 * change its copy strategy to the default
12050 * asymmetric copy strategy, which is copy_delay
12051 * in the non-norma case and copy_call in the
12052 * norma case. Bump the reference count for the
12053 * new entry.
12054 */
12055
12056 if(old_entry->is_sub_map) {
12057 vm_map_lock(VME_SUBMAP(old_entry));
12058 vm_map_reference(VME_SUBMAP(old_entry));
12059 vm_map_unlock(VME_SUBMAP(old_entry));
12060 } else {
12061 vm_object_lock(object);
12062 vm_object_reference_locked(object);
12063 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12064 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12065 }
12066 vm_object_unlock(object);
12067 }
12068
12069 /*
12070 * Clone the entry, using object ref from above.
12071 * Mark both entries as shared.
12072 */
12073
12074 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
12075 * map or descendants */
12076 vm_map_entry_copy(new_entry, old_entry);
12077 old_entry->is_shared = TRUE;
12078 new_entry->is_shared = TRUE;
12079
12080 /*
12081 * We're dealing with a shared mapping, so the resulting mapping
12082 * should inherit some of the original mapping's accounting settings.
12083 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12084 * "use_pmap" should stay the same as before (if it hasn't been reset
12085 * to TRUE when we cleared "iokit_acct").
12086 */
12087 assert(!new_entry->iokit_acct);
12088
12089 /*
12090 * If old entry's inheritence is VM_INHERIT_NONE,
12091 * the new entry is for corpse fork, remove the
12092 * write permission from the new entry.
12093 */
12094 if (old_entry->inheritance == VM_INHERIT_NONE) {
12095
12096 new_entry->protection &= ~VM_PROT_WRITE;
12097 new_entry->max_protection &= ~VM_PROT_WRITE;
12098 }
12099
12100 /*
12101 * Insert the entry into the new map -- we
12102 * know we're inserting at the end of the new
12103 * map.
12104 */
12105
12106 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12107 VM_MAP_KERNEL_FLAGS_NONE);
12108
12109 /*
12110 * Update the physical map
12111 */
12112
12113 if (old_entry->is_sub_map) {
12114 /* Bill Angell pmap support goes here */
12115 } else {
12116 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12117 old_entry->vme_end - old_entry->vme_start,
12118 old_entry->vme_start);
12119 }
12120}
12121
12122static boolean_t
12123vm_map_fork_copy(
12124 vm_map_t old_map,
12125 vm_map_entry_t *old_entry_p,
12126 vm_map_t new_map,
12127 int vm_map_copyin_flags)
12128{
12129 vm_map_entry_t old_entry = *old_entry_p;
12130 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12131 vm_map_offset_t start = old_entry->vme_start;
12132 vm_map_copy_t copy;
12133 vm_map_entry_t last = vm_map_last_entry(new_map);
12134
12135 vm_map_unlock(old_map);
12136 /*
12137 * Use maxprot version of copyin because we
12138 * care about whether this memory can ever
12139 * be accessed, not just whether it's accessible
12140 * right now.
12141 */
12142 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12143 if (vm_map_copyin_internal(old_map, start, entry_size,
12144 vm_map_copyin_flags, &copy)
12145 != KERN_SUCCESS) {
12146 /*
12147 * The map might have changed while it
12148 * was unlocked, check it again. Skip
12149 * any blank space or permanently
12150 * unreadable region.
12151 */
12152 vm_map_lock(old_map);
12153 if (!vm_map_lookup_entry(old_map, start, &last) ||
12154 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12155 last = last->vme_next;
12156 }
12157 *old_entry_p = last;
12158
12159 /*
12160 * XXX For some error returns, want to
12161 * XXX skip to the next element. Note
12162 * that INVALID_ADDRESS and
12163 * PROTECTION_FAILURE are handled above.
12164 */
12165
12166 return FALSE;
12167 }
12168
12169 /*
12170 * Insert the copy into the new map
12171 */
12172
12173 vm_map_copy_insert(new_map, last, copy);
12174
12175 /*
12176 * Pick up the traversal at the end of
12177 * the copied region.
12178 */
12179
12180 vm_map_lock(old_map);
12181 start += entry_size;
12182 if (! vm_map_lookup_entry(old_map, start, &last)) {
12183 last = last->vme_next;
12184 } else {
12185 if (last->vme_start == start) {
12186 /*
12187 * No need to clip here and we don't
12188 * want to cause any unnecessary
12189 * unnesting...
12190 */
12191 } else {
12192 vm_map_clip_start(old_map, last, start);
12193 }
12194 }
12195 *old_entry_p = last;
12196
12197 return TRUE;
12198}
12199
12200/*
12201 * vm_map_fork:
12202 *
12203 * Create and return a new map based on the old
12204 * map, according to the inheritance values on the
12205 * regions in that map and the options.
12206 *
12207 * The source map must not be locked.
12208 */
12209vm_map_t
12210vm_map_fork(
12211 ledger_t ledger,
12212 vm_map_t old_map,
12213 int options)
12214{
12215 pmap_t new_pmap;
12216 vm_map_t new_map;
12217 vm_map_entry_t old_entry;
12218 vm_map_size_t new_size = 0, entry_size;
12219 vm_map_entry_t new_entry;
12220 boolean_t src_needs_copy;
12221 boolean_t new_entry_needs_copy;
12222 boolean_t pmap_is64bit;
12223 int vm_map_copyin_flags;
12224 vm_inherit_t old_entry_inheritance;
12225 int map_create_options;
12226 kern_return_t footprint_collect_kr;
12227
12228 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
12229 VM_MAP_FORK_PRESERVE_PURGEABLE |
12230 VM_MAP_FORK_CORPSE_FOOTPRINT)) {
12231 /* unsupported option */
12232 return VM_MAP_NULL;
12233 }
12234
12235 pmap_is64bit =
12236#if defined(__i386__) || defined(__x86_64__)
12237 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
12238#elif defined(__arm64__)
12239 old_map->pmap->max == MACH_VM_MAX_ADDRESS;
12240#elif defined(__arm__)
12241 FALSE;
12242#else
12243#error Unknown architecture.
12244#endif
12245
12246 new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
12247
12248 vm_map_reference_swap(old_map);
12249 vm_map_lock(old_map);
12250
12251 map_create_options = 0;
12252 if (old_map->hdr.entries_pageable) {
12253 map_create_options |= VM_MAP_CREATE_PAGEABLE;
12254 }
12255 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12256 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12257 footprint_collect_kr = KERN_SUCCESS;
12258 }
12259 new_map = vm_map_create_options(new_pmap,
12260 old_map->min_offset,
12261 old_map->max_offset,
12262 map_create_options);
12263 vm_map_lock(new_map);
12264 vm_commit_pagezero_status(new_map);
12265 /* inherit the parent map's page size */
12266 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
12267 for (
12268 old_entry = vm_map_first_entry(old_map);
12269 old_entry != vm_map_to_entry(old_map);
12270 ) {
12271
12272 entry_size = old_entry->vme_end - old_entry->vme_start;
12273
12274 old_entry_inheritance = old_entry->inheritance;
12275 /*
12276 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12277 * share VM_INHERIT_NONE entries that are not backed by a
12278 * device pager.
12279 */
12280 if (old_entry_inheritance == VM_INHERIT_NONE &&
12281 (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12282 !(!old_entry->is_sub_map &&
12283 VME_OBJECT(old_entry) != NULL &&
12284 VME_OBJECT(old_entry)->pager != NULL &&
12285 is_device_pager_ops(
12286 VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
12287 old_entry_inheritance = VM_INHERIT_SHARE;
12288 }
12289
12290 if (old_entry_inheritance != VM_INHERIT_NONE &&
12291 (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12292 footprint_collect_kr == KERN_SUCCESS) {
12293 /*
12294 * The corpse won't have old_map->pmap to query
12295 * footprint information, so collect that data now
12296 * and store it in new_map->vmmap_corpse_footprint
12297 * for later autopsy.
12298 */
12299 footprint_collect_kr =
12300 vm_map_corpse_footprint_collect(old_map,
12301 old_entry,
12302 new_map);
12303 }
12304
12305 switch (old_entry_inheritance) {
12306 case VM_INHERIT_NONE:
12307 break;
12308
12309 case VM_INHERIT_SHARE:
12310 vm_map_fork_share(old_map, old_entry, new_map);
12311 new_size += entry_size;
12312 break;
12313
12314 case VM_INHERIT_COPY:
12315
12316 /*
12317 * Inline the copy_quickly case;
12318 * upon failure, fall back on call
12319 * to vm_map_fork_copy.
12320 */
12321
12322 if(old_entry->is_sub_map)
12323 break;
12324 if ((old_entry->wired_count != 0) ||
12325 ((VME_OBJECT(old_entry) != NULL) &&
12326 (VME_OBJECT(old_entry)->true_share))) {
12327 goto slow_vm_map_fork_copy;
12328 }
12329
12330 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
12331 vm_map_entry_copy(new_entry, old_entry);
12332 if (new_entry->is_sub_map) {
12333 /* clear address space specifics */
12334 new_entry->use_pmap = FALSE;
12335 } else {
12336 /*
12337 * We're dealing with a copy-on-write operation,
12338 * so the resulting mapping should not inherit
12339 * the original mapping's accounting settings.
12340 * "iokit_acct" should have been cleared in
12341 * vm_map_entry_copy().
12342 * "use_pmap" should be reset to its default
12343 * (TRUE) so that the new mapping gets
12344 * accounted for in the task's memory footprint.
12345 */
12346 assert(!new_entry->iokit_acct);
12347 new_entry->use_pmap = TRUE;
12348 }
12349
12350 if (! vm_object_copy_quickly(
12351 &VME_OBJECT(new_entry),
12352 VME_OFFSET(old_entry),
12353 (old_entry->vme_end -
12354 old_entry->vme_start),
12355 &src_needs_copy,
12356 &new_entry_needs_copy)) {
12357 vm_map_entry_dispose(new_map, new_entry);
12358 goto slow_vm_map_fork_copy;
12359 }
12360
12361 /*
12362 * Handle copy-on-write obligations
12363 */
12364
12365 if (src_needs_copy && !old_entry->needs_copy) {
12366 vm_prot_t prot;
12367
12368 assert(!pmap_has_prot_policy(old_entry->protection));
12369
12370 prot = old_entry->protection & ~VM_PROT_WRITE;
12371
12372 if (override_nx(old_map, VME_ALIAS(old_entry))
12373 && prot)
12374 prot |= VM_PROT_EXECUTE;
12375
12376 assert(!pmap_has_prot_policy(prot));
12377
12378 vm_object_pmap_protect(
12379 VME_OBJECT(old_entry),
12380 VME_OFFSET(old_entry),
12381 (old_entry->vme_end -
12382 old_entry->vme_start),
12383 ((old_entry->is_shared
12384 || old_map->mapped_in_other_pmaps)
12385 ? PMAP_NULL :
12386 old_map->pmap),
12387 old_entry->vme_start,
12388 prot);
12389
12390 assert(old_entry->wired_count == 0);
12391 old_entry->needs_copy = TRUE;
12392 }
12393 new_entry->needs_copy = new_entry_needs_copy;
12394
12395 /*
12396 * Insert the entry at the end
12397 * of the map.
12398 */
12399
12400 vm_map_store_entry_link(new_map,
12401 vm_map_last_entry(new_map),
12402 new_entry,
12403 VM_MAP_KERNEL_FLAGS_NONE);
12404 new_size += entry_size;
12405 break;
12406
12407 slow_vm_map_fork_copy:
12408 vm_map_copyin_flags = 0;
12409 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
12410 vm_map_copyin_flags |=
12411 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
12412 }
12413 if (vm_map_fork_copy(old_map,
12414 &old_entry,
12415 new_map,
12416 vm_map_copyin_flags)) {
12417 new_size += entry_size;
12418 }
12419 continue;
12420 }
12421 old_entry = old_entry->vme_next;
12422 }
12423
12424#if defined(__arm64__)
12425 pmap_insert_sharedpage(new_map->pmap);
12426#endif
12427
12428 new_map->size = new_size;
12429
12430 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12431 vm_map_corpse_footprint_collect_done(new_map);
12432 }
12433
12434 vm_map_unlock(new_map);
12435 vm_map_unlock(old_map);
12436 vm_map_deallocate(old_map);
12437
12438 return(new_map);
12439}
12440
12441/*
12442 * vm_map_exec:
12443 *
12444 * Setup the "new_map" with the proper execution environment according
12445 * to the type of executable (platform, 64bit, chroot environment).
12446 * Map the comm page and shared region, etc...
12447 */
12448kern_return_t
12449vm_map_exec(
12450 vm_map_t new_map,
12451 task_t task,
12452 boolean_t is64bit,
12453 void *fsroot,
12454 cpu_type_t cpu,
12455 cpu_subtype_t cpu_subtype)
12456{
12457 SHARED_REGION_TRACE_DEBUG(
12458 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
12459 (void *)VM_KERNEL_ADDRPERM(current_task()),
12460 (void *)VM_KERNEL_ADDRPERM(new_map),
12461 (void *)VM_KERNEL_ADDRPERM(task),
12462 (void *)VM_KERNEL_ADDRPERM(fsroot),
12463 cpu,
12464 cpu_subtype));
12465 (void) vm_commpage_enter(new_map, task, is64bit);
12466 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype);
12467 SHARED_REGION_TRACE_DEBUG(
12468 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
12469 (void *)VM_KERNEL_ADDRPERM(current_task()),
12470 (void *)VM_KERNEL_ADDRPERM(new_map),
12471 (void *)VM_KERNEL_ADDRPERM(task),
12472 (void *)VM_KERNEL_ADDRPERM(fsroot),
12473 cpu,
12474 cpu_subtype));
12475 return KERN_SUCCESS;
12476}
12477
12478/*
12479 * vm_map_lookup_locked:
12480 *
12481 * Finds the VM object, offset, and
12482 * protection for a given virtual address in the
12483 * specified map, assuming a page fault of the
12484 * type specified.
12485 *
12486 * Returns the (object, offset, protection) for
12487 * this address, whether it is wired down, and whether
12488 * this map has the only reference to the data in question.
12489 * In order to later verify this lookup, a "version"
12490 * is returned.
12491 *
12492 * The map MUST be locked by the caller and WILL be
12493 * locked on exit. In order to guarantee the
12494 * existence of the returned object, it is returned
12495 * locked.
12496 *
12497 * If a lookup is requested with "write protection"
12498 * specified, the map may be changed to perform virtual
12499 * copying operations, although the data referenced will
12500 * remain the same.
12501 */
12502kern_return_t
12503vm_map_lookup_locked(
12504 vm_map_t *var_map, /* IN/OUT */
12505 vm_map_offset_t vaddr,
12506 vm_prot_t fault_type,
12507 int object_lock_type,
12508 vm_map_version_t *out_version, /* OUT */
12509 vm_object_t *object, /* OUT */
12510 vm_object_offset_t *offset, /* OUT */
12511 vm_prot_t *out_prot, /* OUT */
12512 boolean_t *wired, /* OUT */
12513 vm_object_fault_info_t fault_info, /* OUT */
12514 vm_map_t *real_map)
12515{
12516 vm_map_entry_t entry;
12517 vm_map_t map = *var_map;
12518 vm_map_t old_map = *var_map;
12519 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
12520 vm_map_offset_t cow_parent_vaddr = 0;
12521 vm_map_offset_t old_start = 0;
12522 vm_map_offset_t old_end = 0;
12523 vm_prot_t prot;
12524 boolean_t mask_protections;
12525 boolean_t force_copy;
12526 vm_prot_t original_fault_type;
12527
12528 /*
12529 * VM_PROT_MASK means that the caller wants us to use "fault_type"
12530 * as a mask against the mapping's actual protections, not as an
12531 * absolute value.
12532 */
12533 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
12534 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
12535 fault_type &= VM_PROT_ALL;
12536 original_fault_type = fault_type;
12537
12538 *real_map = map;
12539
12540RetryLookup:
12541 fault_type = original_fault_type;
12542
12543 /*
12544 * If the map has an interesting hint, try it before calling
12545 * full blown lookup routine.
12546 */
12547 entry = map->hint;
12548
12549 if ((entry == vm_map_to_entry(map)) ||
12550 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
12551 vm_map_entry_t tmp_entry;
12552
12553 /*
12554 * Entry was either not a valid hint, or the vaddr
12555 * was not contained in the entry, so do a full lookup.
12556 */
12557 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
12558 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
12559 vm_map_unlock(cow_sub_map_parent);
12560 if((*real_map != map)
12561 && (*real_map != cow_sub_map_parent))
12562 vm_map_unlock(*real_map);
12563 return KERN_INVALID_ADDRESS;
12564 }
12565
12566 entry = tmp_entry;
12567 }
12568 if(map == old_map) {
12569 old_start = entry->vme_start;
12570 old_end = entry->vme_end;
12571 }
12572
12573 /*
12574 * Handle submaps. Drop lock on upper map, submap is
12575 * returned locked.
12576 */
12577
12578submap_recurse:
12579 if (entry->is_sub_map) {
12580 vm_map_offset_t local_vaddr;
12581 vm_map_offset_t end_delta;
12582 vm_map_offset_t start_delta;
12583 vm_map_entry_t submap_entry;
12584 vm_prot_t subentry_protection;
12585 vm_prot_t subentry_max_protection;
12586 boolean_t mapped_needs_copy=FALSE;
12587
12588 local_vaddr = vaddr;
12589
12590 if ((entry->use_pmap &&
12591 ! ((fault_type & VM_PROT_WRITE) ||
12592 force_copy))) {
12593 /* if real_map equals map we unlock below */
12594 if ((*real_map != map) &&
12595 (*real_map != cow_sub_map_parent))
12596 vm_map_unlock(*real_map);
12597 *real_map = VME_SUBMAP(entry);
12598 }
12599
12600 if(entry->needs_copy &&
12601 ((fault_type & VM_PROT_WRITE) ||
12602 force_copy)) {
12603 if (!mapped_needs_copy) {
12604 if (vm_map_lock_read_to_write(map)) {
12605 vm_map_lock_read(map);
12606 *real_map = map;
12607 goto RetryLookup;
12608 }
12609 vm_map_lock_read(VME_SUBMAP(entry));
12610 *var_map = VME_SUBMAP(entry);
12611 cow_sub_map_parent = map;
12612 /* reset base to map before cow object */
12613 /* this is the map which will accept */
12614 /* the new cow object */
12615 old_start = entry->vme_start;
12616 old_end = entry->vme_end;
12617 cow_parent_vaddr = vaddr;
12618 mapped_needs_copy = TRUE;
12619 } else {
12620 vm_map_lock_read(VME_SUBMAP(entry));
12621 *var_map = VME_SUBMAP(entry);
12622 if((cow_sub_map_parent != map) &&
12623 (*real_map != map))
12624 vm_map_unlock(map);
12625 }
12626 } else {
12627 vm_map_lock_read(VME_SUBMAP(entry));
12628 *var_map = VME_SUBMAP(entry);
12629 /* leave map locked if it is a target */
12630 /* cow sub_map above otherwise, just */
12631 /* follow the maps down to the object */
12632 /* here we unlock knowing we are not */
12633 /* revisiting the map. */
12634 if((*real_map != map) && (map != cow_sub_map_parent))
12635 vm_map_unlock_read(map);
12636 }
12637
12638 map = *var_map;
12639
12640 /* calculate the offset in the submap for vaddr */
12641 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
12642
12643 RetrySubMap:
12644 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
12645 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
12646 vm_map_unlock(cow_sub_map_parent);
12647 }
12648 if((*real_map != map)
12649 && (*real_map != cow_sub_map_parent)) {
12650 vm_map_unlock(*real_map);
12651 }
12652 *real_map = map;
12653 return KERN_INVALID_ADDRESS;
12654 }
12655
12656 /* find the attenuated shadow of the underlying object */
12657 /* on our target map */
12658
12659 /* in english the submap object may extend beyond the */
12660 /* region mapped by the entry or, may only fill a portion */
12661 /* of it. For our purposes, we only care if the object */
12662 /* doesn't fill. In this case the area which will */
12663 /* ultimately be clipped in the top map will only need */
12664 /* to be as big as the portion of the underlying entry */
12665 /* which is mapped */
12666 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
12667 submap_entry->vme_start - VME_OFFSET(entry) : 0;
12668
12669 end_delta =
12670 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
12671 submap_entry->vme_end ?
12672 0 : (VME_OFFSET(entry) +
12673 (old_end - old_start))
12674 - submap_entry->vme_end;
12675
12676 old_start += start_delta;
12677 old_end -= end_delta;
12678
12679 if(submap_entry->is_sub_map) {
12680 entry = submap_entry;
12681 vaddr = local_vaddr;
12682 goto submap_recurse;
12683 }
12684
12685 if (((fault_type & VM_PROT_WRITE) ||
12686 force_copy)
12687 && cow_sub_map_parent) {
12688
12689 vm_object_t sub_object, copy_object;
12690 vm_object_offset_t copy_offset;
12691 vm_map_offset_t local_start;
12692 vm_map_offset_t local_end;
12693 boolean_t copied_slowly = FALSE;
12694
12695 if (vm_map_lock_read_to_write(map)) {
12696 vm_map_lock_read(map);
12697 old_start -= start_delta;
12698 old_end += end_delta;
12699 goto RetrySubMap;
12700 }
12701
12702
12703 sub_object = VME_OBJECT(submap_entry);
12704 if (sub_object == VM_OBJECT_NULL) {
12705 sub_object =
12706 vm_object_allocate(
12707 (vm_map_size_t)
12708 (submap_entry->vme_end -
12709 submap_entry->vme_start));
12710 VME_OBJECT_SET(submap_entry, sub_object);
12711 VME_OFFSET_SET(submap_entry, 0);
12712 assert(!submap_entry->is_sub_map);
12713 assert(submap_entry->use_pmap);
12714 }
12715 local_start = local_vaddr -
12716 (cow_parent_vaddr - old_start);
12717 local_end = local_vaddr +
12718 (old_end - cow_parent_vaddr);
12719 vm_map_clip_start(map, submap_entry, local_start);
12720 vm_map_clip_end(map, submap_entry, local_end);
12721 if (submap_entry->is_sub_map) {
12722 /* unnesting was done when clipping */
12723 assert(!submap_entry->use_pmap);
12724 }
12725
12726 /* This is the COW case, lets connect */
12727 /* an entry in our space to the underlying */
12728 /* object in the submap, bypassing the */
12729 /* submap. */
12730
12731
12732 if(submap_entry->wired_count != 0 ||
12733 (sub_object->copy_strategy ==
12734 MEMORY_OBJECT_COPY_NONE)) {
12735 vm_object_lock(sub_object);
12736 vm_object_copy_slowly(sub_object,
12737 VME_OFFSET(submap_entry),
12738 (submap_entry->vme_end -
12739 submap_entry->vme_start),
12740 FALSE,
12741 &copy_object);
12742 copied_slowly = TRUE;
12743 } else {
12744
12745 /* set up shadow object */
12746 copy_object = sub_object;
12747 vm_object_lock(sub_object);
12748 vm_object_reference_locked(sub_object);
12749 sub_object->shadowed = TRUE;
12750 vm_object_unlock(sub_object);
12751
12752 assert(submap_entry->wired_count == 0);
12753 submap_entry->needs_copy = TRUE;
12754
12755 prot = submap_entry->protection;
12756 assert(!pmap_has_prot_policy(prot));
12757 prot = prot & ~VM_PROT_WRITE;
12758 assert(!pmap_has_prot_policy(prot));
12759
12760 if (override_nx(old_map,
12761 VME_ALIAS(submap_entry))
12762 && prot)
12763 prot |= VM_PROT_EXECUTE;
12764
12765 vm_object_pmap_protect(
12766 sub_object,
12767 VME_OFFSET(submap_entry),
12768 submap_entry->vme_end -
12769 submap_entry->vme_start,
12770 (submap_entry->is_shared
12771 || map->mapped_in_other_pmaps) ?
12772 PMAP_NULL : map->pmap,
12773 submap_entry->vme_start,
12774 prot);
12775 }
12776
12777 /*
12778 * Adjust the fault offset to the submap entry.
12779 */
12780 copy_offset = (local_vaddr -
12781 submap_entry->vme_start +
12782 VME_OFFSET(submap_entry));
12783
12784 /* This works diffently than the */
12785 /* normal submap case. We go back */
12786 /* to the parent of the cow map and*/
12787 /* clip out the target portion of */
12788 /* the sub_map, substituting the */
12789 /* new copy object, */
12790
12791 subentry_protection = submap_entry->protection;
12792 subentry_max_protection = submap_entry->max_protection;
12793 vm_map_unlock(map);
12794 submap_entry = NULL; /* not valid after map unlock */
12795
12796 local_start = old_start;
12797 local_end = old_end;
12798 map = cow_sub_map_parent;
12799 *var_map = cow_sub_map_parent;
12800 vaddr = cow_parent_vaddr;
12801 cow_sub_map_parent = NULL;
12802
12803 if(!vm_map_lookup_entry(map,
12804 vaddr, &entry)) {
12805 vm_object_deallocate(
12806 copy_object);
12807 vm_map_lock_write_to_read(map);
12808 return KERN_INVALID_ADDRESS;
12809 }
12810
12811 /* clip out the portion of space */
12812 /* mapped by the sub map which */
12813 /* corresponds to the underlying */
12814 /* object */
12815
12816 /*
12817 * Clip (and unnest) the smallest nested chunk
12818 * possible around the faulting address...
12819 */
12820 local_start = vaddr & ~(pmap_nesting_size_min - 1);
12821 local_end = local_start + pmap_nesting_size_min;
12822 /*
12823 * ... but don't go beyond the "old_start" to "old_end"
12824 * range, to avoid spanning over another VM region
12825 * with a possibly different VM object and/or offset.
12826 */
12827 if (local_start < old_start) {
12828 local_start = old_start;
12829 }
12830 if (local_end > old_end) {
12831 local_end = old_end;
12832 }
12833 /*
12834 * Adjust copy_offset to the start of the range.
12835 */
12836 copy_offset -= (vaddr - local_start);
12837
12838 vm_map_clip_start(map, entry, local_start);
12839 vm_map_clip_end(map, entry, local_end);
12840 if (entry->is_sub_map) {
12841 /* unnesting was done when clipping */
12842 assert(!entry->use_pmap);
12843 }
12844
12845 /* substitute copy object for */
12846 /* shared map entry */
12847 vm_map_deallocate(VME_SUBMAP(entry));
12848 assert(!entry->iokit_acct);
12849 entry->is_sub_map = FALSE;
12850 entry->use_pmap = TRUE;
12851 VME_OBJECT_SET(entry, copy_object);
12852
12853 /* propagate the submap entry's protections */
12854 if (entry->protection != VM_PROT_READ) {
12855 /*
12856 * Someone has already altered the top entry's
12857 * protections via vm_protect(VM_PROT_COPY).
12858 * Respect these new values and ignore the
12859 * submap entry's protections.
12860 */
12861 } else {
12862 /*
12863 * Regular copy-on-write: propagate the submap
12864 * entry's protections to the top map entry.
12865 */
12866 entry->protection |= subentry_protection;
12867 }
12868 entry->max_protection |= subentry_max_protection;
12869
12870 if ((entry->protection & VM_PROT_WRITE) &&
12871 (entry->protection & VM_PROT_EXECUTE) &&
12872#if !CONFIG_EMBEDDED
12873 map != kernel_map &&
12874 cs_process_enforcement(NULL) &&
12875#endif /* !CONFIG_EMBEDDED */
12876 !(entry->used_for_jit)) {
12877 DTRACE_VM3(cs_wx,
12878 uint64_t, (uint64_t)entry->vme_start,
12879 uint64_t, (uint64_t)entry->vme_end,
12880 vm_prot_t, entry->protection);
12881 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
12882 proc_selfpid(),
12883 (current_task()->bsd_info
12884 ? proc_name_address(current_task()->bsd_info)
12885 : "?"),
12886 __FUNCTION__);
12887 entry->protection &= ~VM_PROT_EXECUTE;
12888 }
12889
12890 if(copied_slowly) {
12891 VME_OFFSET_SET(entry, local_start - old_start);
12892 entry->needs_copy = FALSE;
12893 entry->is_shared = FALSE;
12894 } else {
12895 VME_OFFSET_SET(entry, copy_offset);
12896 assert(entry->wired_count == 0);
12897 entry->needs_copy = TRUE;
12898 if(entry->inheritance == VM_INHERIT_SHARE)
12899 entry->inheritance = VM_INHERIT_COPY;
12900 if (map != old_map)
12901 entry->is_shared = TRUE;
12902 }
12903 if(entry->inheritance == VM_INHERIT_SHARE)
12904 entry->inheritance = VM_INHERIT_COPY;
12905
12906 vm_map_lock_write_to_read(map);
12907 } else {
12908 if((cow_sub_map_parent)
12909 && (cow_sub_map_parent != *real_map)
12910 && (cow_sub_map_parent != map)) {
12911 vm_map_unlock(cow_sub_map_parent);
12912 }
12913 entry = submap_entry;
12914 vaddr = local_vaddr;
12915 }
12916 }
12917
12918 /*
12919 * Check whether this task is allowed to have
12920 * this page.
12921 */
12922
12923 prot = entry->protection;
12924
12925 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
12926 /*
12927 * HACK -- if not a stack, then allow execution
12928 */
12929 prot |= VM_PROT_EXECUTE;
12930 }
12931
12932 if (mask_protections) {
12933 fault_type &= prot;
12934 if (fault_type == VM_PROT_NONE) {
12935 goto protection_failure;
12936 }
12937 }
12938 if (((fault_type & prot) != fault_type)
12939#if __arm64__
12940 /* prefetch abort in execute-only page */
12941 && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
12942#endif
12943 ) {
12944 protection_failure:
12945 if (*real_map != map) {
12946 vm_map_unlock(*real_map);
12947 }
12948 *real_map = map;
12949
12950 if ((fault_type & VM_PROT_EXECUTE) && prot)
12951 log_stack_execution_failure((addr64_t)vaddr, prot);
12952
12953 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
12954 return KERN_PROTECTION_FAILURE;
12955 }
12956
12957 /*
12958 * If this page is not pageable, we have to get
12959 * it for all possible accesses.
12960 */
12961
12962 *wired = (entry->wired_count != 0);
12963 if (*wired)
12964 fault_type = prot;
12965
12966 /*
12967 * If the entry was copy-on-write, we either ...
12968 */
12969
12970 if (entry->needs_copy) {
12971 /*
12972 * If we want to write the page, we may as well
12973 * handle that now since we've got the map locked.
12974 *
12975 * If we don't need to write the page, we just
12976 * demote the permissions allowed.
12977 */
12978
12979 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
12980 /*
12981 * Make a new object, and place it in the
12982 * object chain. Note that no new references
12983 * have appeared -- one just moved from the
12984 * map to the new object.
12985 */
12986
12987 if (vm_map_lock_read_to_write(map)) {
12988 vm_map_lock_read(map);
12989 goto RetryLookup;
12990 }
12991
12992 if (VME_OBJECT(entry)->shadowed == FALSE) {
12993 vm_object_lock(VME_OBJECT(entry));
12994 VME_OBJECT(entry)->shadowed = TRUE;
12995 vm_object_unlock(VME_OBJECT(entry));
12996 }
12997 VME_OBJECT_SHADOW(entry,
12998 (vm_map_size_t) (entry->vme_end -
12999 entry->vme_start));
13000 entry->needs_copy = FALSE;
13001
13002 vm_map_lock_write_to_read(map);
13003 }
13004 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
13005 /*
13006 * We're attempting to read a copy-on-write
13007 * page -- don't allow writes.
13008 */
13009
13010 prot &= (~VM_PROT_WRITE);
13011 }
13012 }
13013
13014 /*
13015 * Create an object if necessary.
13016 */
13017 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
13018
13019 if (vm_map_lock_read_to_write(map)) {
13020 vm_map_lock_read(map);
13021 goto RetryLookup;
13022 }
13023
13024 VME_OBJECT_SET(entry,
13025 vm_object_allocate(
13026 (vm_map_size_t)(entry->vme_end -
13027 entry->vme_start)));
13028 VME_OFFSET_SET(entry, 0);
13029 assert(entry->use_pmap);
13030 vm_map_lock_write_to_read(map);
13031 }
13032
13033 /*
13034 * Return the object/offset from this entry. If the entry
13035 * was copy-on-write or empty, it has been fixed up. Also
13036 * return the protection.
13037 */
13038
13039 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13040 *object = VME_OBJECT(entry);
13041 *out_prot = prot;
13042
13043 if (fault_info) {
13044 fault_info->interruptible = THREAD_UNINT; /* for now... */
13045 /* ... the caller will change "interruptible" if needed */
13046 fault_info->cluster_size = 0;
13047 fault_info->user_tag = VME_ALIAS(entry);
13048 fault_info->pmap_options = 0;
13049 if (entry->iokit_acct ||
13050 (!entry->is_sub_map && !entry->use_pmap)) {
13051 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13052 }
13053 fault_info->behavior = entry->behavior;
13054 fault_info->lo_offset = VME_OFFSET(entry);
13055 fault_info->hi_offset =
13056 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
13057 fault_info->no_cache = entry->no_cache;
13058 fault_info->stealth = FALSE;
13059 fault_info->io_sync = FALSE;
13060 if (entry->used_for_jit ||
13061 entry->vme_resilient_codesign) {
13062 fault_info->cs_bypass = TRUE;
13063 } else {
13064 fault_info->cs_bypass = FALSE;
13065 }
13066 fault_info->pmap_cs_associated = FALSE;
13067#if CONFIG_PMAP_CS
13068 if (entry->pmap_cs_associated) {
13069 /*
13070 * The pmap layer will validate this page
13071 * before allowing it to be executed from.
13072 */
13073 fault_info->pmap_cs_associated = TRUE;
13074 }
13075#endif /* CONFIG_PMAP_CS */
13076 fault_info->mark_zf_absent = FALSE;
13077 fault_info->batch_pmap_op = FALSE;
13078 }
13079
13080 /*
13081 * Lock the object to prevent it from disappearing
13082 */
13083 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
13084 vm_object_lock(*object);
13085 else
13086 vm_object_lock_shared(*object);
13087
13088 /*
13089 * Save the version number
13090 */
13091
13092 out_version->main_timestamp = map->timestamp;
13093
13094 return KERN_SUCCESS;
13095}
13096
13097
13098/*
13099 * vm_map_verify:
13100 *
13101 * Verifies that the map in question has not changed
13102 * since the given version. The map has to be locked
13103 * ("shared" mode is fine) before calling this function
13104 * and it will be returned locked too.
13105 */
13106boolean_t
13107vm_map_verify(
13108 vm_map_t map,
13109 vm_map_version_t *version) /* REF */
13110{
13111 boolean_t result;
13112
13113 vm_map_lock_assert_held(map);
13114 result = (map->timestamp == version->main_timestamp);
13115
13116 return(result);
13117}
13118
13119/*
13120 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13121 * Goes away after regular vm_region_recurse function migrates to
13122 * 64 bits
13123 * vm_region_recurse: A form of vm_region which follows the
13124 * submaps in a target map
13125 *
13126 */
13127
13128kern_return_t
13129vm_map_region_recurse_64(
13130 vm_map_t map,
13131 vm_map_offset_t *address, /* IN/OUT */
13132 vm_map_size_t *size, /* OUT */
13133 natural_t *nesting_depth, /* IN/OUT */
13134 vm_region_submap_info_64_t submap_info, /* IN/OUT */
13135 mach_msg_type_number_t *count) /* IN/OUT */
13136{
13137 mach_msg_type_number_t original_count;
13138 vm_region_extended_info_data_t extended;
13139 vm_map_entry_t tmp_entry;
13140 vm_map_offset_t user_address;
13141 unsigned int user_max_depth;
13142
13143 /*
13144 * "curr_entry" is the VM map entry preceding or including the
13145 * address we're looking for.
13146 * "curr_map" is the map or sub-map containing "curr_entry".
13147 * "curr_address" is the equivalent of the top map's "user_address"
13148 * in the current map.
13149 * "curr_offset" is the cumulated offset of "curr_map" in the
13150 * target task's address space.
13151 * "curr_depth" is the depth of "curr_map" in the chain of
13152 * sub-maps.
13153 *
13154 * "curr_max_below" and "curr_max_above" limit the range (around
13155 * "curr_address") we should take into account in the current (sub)map.
13156 * They limit the range to what's visible through the map entries
13157 * we've traversed from the top map to the current map.
13158
13159 */
13160 vm_map_entry_t curr_entry;
13161 vm_map_address_t curr_address;
13162 vm_map_offset_t curr_offset;
13163 vm_map_t curr_map;
13164 unsigned int curr_depth;
13165 vm_map_offset_t curr_max_below, curr_max_above;
13166 vm_map_offset_t curr_skip;
13167
13168 /*
13169 * "next_" is the same as "curr_" but for the VM region immediately
13170 * after the address we're looking for. We need to keep track of this
13171 * too because we want to return info about that region if the
13172 * address we're looking for is not mapped.
13173 */
13174 vm_map_entry_t next_entry;
13175 vm_map_offset_t next_offset;
13176 vm_map_offset_t next_address;
13177 vm_map_t next_map;
13178 unsigned int next_depth;
13179 vm_map_offset_t next_max_below, next_max_above;
13180 vm_map_offset_t next_skip;
13181
13182 boolean_t look_for_pages;
13183 vm_region_submap_short_info_64_t short_info;
13184 boolean_t do_region_footprint;
13185
13186 if (map == VM_MAP_NULL) {
13187 /* no address space to work on */
13188 return KERN_INVALID_ARGUMENT;
13189 }
13190
13191
13192 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
13193 /*
13194 * "info" structure is not big enough and
13195 * would overflow
13196 */
13197 return KERN_INVALID_ARGUMENT;
13198 }
13199
13200 do_region_footprint = task_self_region_footprint();
13201 original_count = *count;
13202
13203 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
13204 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
13205 look_for_pages = FALSE;
13206 short_info = (vm_region_submap_short_info_64_t) submap_info;
13207 submap_info = NULL;
13208 } else {
13209 look_for_pages = TRUE;
13210 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
13211 short_info = NULL;
13212
13213 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13214 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
13215 }
13216 }
13217
13218 user_address = *address;
13219 user_max_depth = *nesting_depth;
13220
13221 if (not_in_kdp) {
13222 vm_map_lock_read(map);
13223 }
13224
13225recurse_again:
13226 curr_entry = NULL;
13227 curr_map = map;
13228 curr_address = user_address;
13229 curr_offset = 0;
13230 curr_skip = 0;
13231 curr_depth = 0;
13232 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
13233 curr_max_below = curr_address;
13234
13235 next_entry = NULL;
13236 next_map = NULL;
13237 next_address = 0;
13238 next_offset = 0;
13239 next_skip = 0;
13240 next_depth = 0;
13241 next_max_above = (vm_map_offset_t) -1;
13242 next_max_below = (vm_map_offset_t) -1;
13243
13244 for (;;) {
13245 if (vm_map_lookup_entry(curr_map,
13246 curr_address,
13247 &tmp_entry)) {
13248 /* tmp_entry contains the address we're looking for */
13249 curr_entry = tmp_entry;
13250 } else {
13251 vm_map_offset_t skip;
13252 /*
13253 * The address is not mapped. "tmp_entry" is the
13254 * map entry preceding the address. We want the next
13255 * one, if it exists.
13256 */
13257 curr_entry = tmp_entry->vme_next;
13258
13259 if (curr_entry == vm_map_to_entry(curr_map) ||
13260 (curr_entry->vme_start >=
13261 curr_address + curr_max_above)) {
13262 /* no next entry at this level: stop looking */
13263 if (not_in_kdp) {
13264 vm_map_unlock_read(curr_map);
13265 }
13266 curr_entry = NULL;
13267 curr_map = NULL;
13268 curr_skip = 0;
13269 curr_offset = 0;
13270 curr_depth = 0;
13271 curr_max_above = 0;
13272 curr_max_below = 0;
13273 break;
13274 }
13275
13276 /* adjust current address and offset */
13277 skip = curr_entry->vme_start - curr_address;
13278 curr_address = curr_entry->vme_start;
13279 curr_skip += skip;
13280 curr_offset += skip;
13281 curr_max_above -= skip;
13282 curr_max_below = 0;
13283 }
13284
13285 /*
13286 * Is the next entry at this level closer to the address (or
13287 * deeper in the submap chain) than the one we had
13288 * so far ?
13289 */
13290 tmp_entry = curr_entry->vme_next;
13291 if (tmp_entry == vm_map_to_entry(curr_map)) {
13292 /* no next entry at this level */
13293 } else if (tmp_entry->vme_start >=
13294 curr_address + curr_max_above) {
13295 /*
13296 * tmp_entry is beyond the scope of what we mapped of
13297 * this submap in the upper level: ignore it.
13298 */
13299 } else if ((next_entry == NULL) ||
13300 (tmp_entry->vme_start + curr_offset <=
13301 next_entry->vme_start + next_offset)) {
13302 /*
13303 * We didn't have a "next_entry" or this one is
13304 * closer to the address we're looking for:
13305 * use this "tmp_entry" as the new "next_entry".
13306 */
13307 if (next_entry != NULL) {
13308 /* unlock the last "next_map" */
13309 if (next_map != curr_map && not_in_kdp) {
13310 vm_map_unlock_read(next_map);
13311 }
13312 }
13313 next_entry = tmp_entry;
13314 next_map = curr_map;
13315 next_depth = curr_depth;
13316 next_address = next_entry->vme_start;
13317 next_skip = curr_skip;
13318 next_skip += (next_address - curr_address);
13319 next_offset = curr_offset;
13320 next_offset += (next_address - curr_address);
13321 next_max_above = MIN(next_max_above, curr_max_above);
13322 next_max_above = MIN(next_max_above,
13323 next_entry->vme_end - next_address);
13324 next_max_below = MIN(next_max_below, curr_max_below);
13325 next_max_below = MIN(next_max_below,
13326 next_address - next_entry->vme_start);
13327 }
13328
13329 /*
13330 * "curr_max_{above,below}" allow us to keep track of the
13331 * portion of the submap that is actually mapped at this level:
13332 * the rest of that submap is irrelevant to us, since it's not
13333 * mapped here.
13334 * The relevant portion of the map starts at
13335 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
13336 */
13337 curr_max_above = MIN(curr_max_above,
13338 curr_entry->vme_end - curr_address);
13339 curr_max_below = MIN(curr_max_below,
13340 curr_address - curr_entry->vme_start);
13341
13342 if (!curr_entry->is_sub_map ||
13343 curr_depth >= user_max_depth) {
13344 /*
13345 * We hit a leaf map or we reached the maximum depth
13346 * we could, so stop looking. Keep the current map
13347 * locked.
13348 */
13349 break;
13350 }
13351
13352 /*
13353 * Get down to the next submap level.
13354 */
13355
13356 /*
13357 * Lock the next level and unlock the current level,
13358 * unless we need to keep it locked to access the "next_entry"
13359 * later.
13360 */
13361 if (not_in_kdp) {
13362 vm_map_lock_read(VME_SUBMAP(curr_entry));
13363 }
13364 if (curr_map == next_map) {
13365 /* keep "next_map" locked in case we need it */
13366 } else {
13367 /* release this map */
13368 if (not_in_kdp)
13369 vm_map_unlock_read(curr_map);
13370 }
13371
13372 /*
13373 * Adjust the offset. "curr_entry" maps the submap
13374 * at relative address "curr_entry->vme_start" in the
13375 * curr_map but skips the first "VME_OFFSET(curr_entry)"
13376 * bytes of the submap.
13377 * "curr_offset" always represents the offset of a virtual
13378 * address in the curr_map relative to the absolute address
13379 * space (i.e. the top-level VM map).
13380 */
13381 curr_offset +=
13382 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
13383 curr_address = user_address + curr_offset;
13384 /* switch to the submap */
13385 curr_map = VME_SUBMAP(curr_entry);
13386 curr_depth++;
13387 curr_entry = NULL;
13388 }
13389
13390// LP64todo: all the current tools are 32bit, obviously never worked for 64b
13391// so probably should be a real 32b ID vs. ptr.
13392// Current users just check for equality
13393
13394 if (curr_entry == NULL) {
13395 /* no VM region contains the address... */
13396
13397 if (do_region_footprint && /* we want footprint numbers */
13398 next_entry == NULL && /* & there are no more regions */
13399 /* & we haven't already provided our fake region: */
13400 user_address <= vm_map_last_entry(map)->vme_end) {
13401 ledger_amount_t nonvol, nonvol_compressed;
13402 /*
13403 * Add a fake memory region to account for
13404 * purgeable memory that counts towards this
13405 * task's memory footprint, i.e. the resident
13406 * compressed pages of non-volatile objects
13407 * owned by that task.
13408 */
13409 ledger_get_balance(
13410 map->pmap->ledger,
13411 task_ledgers.purgeable_nonvolatile,
13412 &nonvol);
13413 ledger_get_balance(
13414 map->pmap->ledger,
13415 task_ledgers.purgeable_nonvolatile_compressed,
13416 &nonvol_compressed);
13417 if (nonvol + nonvol_compressed == 0) {
13418 /* no purgeable memory usage to report */
13419 return KERN_INVALID_ADDRESS;
13420 }
13421 /* fake region to show nonvolatile footprint */
13422 if (look_for_pages) {
13423 submap_info->protection = VM_PROT_DEFAULT;
13424 submap_info->max_protection = VM_PROT_DEFAULT;
13425 submap_info->inheritance = VM_INHERIT_DEFAULT;
13426 submap_info->offset = 0;
13427 submap_info->user_tag = -1;
13428 submap_info->pages_resident = (unsigned int) (nonvol / PAGE_SIZE);
13429 submap_info->pages_shared_now_private = 0;
13430 submap_info->pages_swapped_out = (unsigned int) (nonvol_compressed / PAGE_SIZE);
13431 submap_info->pages_dirtied = submap_info->pages_resident;
13432 submap_info->ref_count = 1;
13433 submap_info->shadow_depth = 0;
13434 submap_info->external_pager = 0;
13435 submap_info->share_mode = SM_PRIVATE;
13436 submap_info->is_submap = 0;
13437 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
13438 submap_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13439 submap_info->user_wired_count = 0;
13440 submap_info->pages_reusable = 0;
13441 } else {
13442 short_info->user_tag = -1;
13443 short_info->offset = 0;
13444 short_info->protection = VM_PROT_DEFAULT;
13445 short_info->inheritance = VM_INHERIT_DEFAULT;
13446 short_info->max_protection = VM_PROT_DEFAULT;
13447 short_info->behavior = VM_BEHAVIOR_DEFAULT;
13448 short_info->user_wired_count = 0;
13449 short_info->is_submap = 0;
13450 short_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13451 short_info->external_pager = 0;
13452 short_info->shadow_depth = 0;
13453 short_info->share_mode = SM_PRIVATE;
13454 short_info->ref_count = 1;
13455 }
13456 *nesting_depth = 0;
13457 *size = (vm_map_size_t) (nonvol + nonvol_compressed);
13458// *address = user_address;
13459 *address = vm_map_last_entry(map)->vme_end;
13460 return KERN_SUCCESS;
13461 }
13462
13463 if (next_entry == NULL) {
13464 /* ... and no VM region follows it either */
13465 return KERN_INVALID_ADDRESS;
13466 }
13467 /* ... gather info about the next VM region */
13468 curr_entry = next_entry;
13469 curr_map = next_map; /* still locked ... */
13470 curr_address = next_address;
13471 curr_skip = next_skip;
13472 curr_offset = next_offset;
13473 curr_depth = next_depth;
13474 curr_max_above = next_max_above;
13475 curr_max_below = next_max_below;
13476 } else {
13477 /* we won't need "next_entry" after all */
13478 if (next_entry != NULL) {
13479 /* release "next_map" */
13480 if (next_map != curr_map && not_in_kdp) {
13481 vm_map_unlock_read(next_map);
13482 }
13483 }
13484 }
13485 next_entry = NULL;
13486 next_map = NULL;
13487 next_offset = 0;
13488 next_skip = 0;
13489 next_depth = 0;
13490 next_max_below = -1;
13491 next_max_above = -1;
13492
13493 if (curr_entry->is_sub_map &&
13494 curr_depth < user_max_depth) {
13495 /*
13496 * We're not as deep as we could be: we must have
13497 * gone back up after not finding anything mapped
13498 * below the original top-level map entry's.
13499 * Let's move "curr_address" forward and recurse again.
13500 */
13501 user_address = curr_address;
13502 goto recurse_again;
13503 }
13504
13505 *nesting_depth = curr_depth;
13506 *size = curr_max_above + curr_max_below;
13507 *address = user_address + curr_skip - curr_max_below;
13508
13509// LP64todo: all the current tools are 32bit, obviously never worked for 64b
13510// so probably should be a real 32b ID vs. ptr.
13511// Current users just check for equality
13512#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
13513
13514 if (look_for_pages) {
13515 submap_info->user_tag = VME_ALIAS(curr_entry);
13516 submap_info->offset = VME_OFFSET(curr_entry);
13517 submap_info->protection = curr_entry->protection;
13518 submap_info->inheritance = curr_entry->inheritance;
13519 submap_info->max_protection = curr_entry->max_protection;
13520 submap_info->behavior = curr_entry->behavior;
13521 submap_info->user_wired_count = curr_entry->user_wired_count;
13522 submap_info->is_submap = curr_entry->is_sub_map;
13523 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13524 } else {
13525 short_info->user_tag = VME_ALIAS(curr_entry);
13526 short_info->offset = VME_OFFSET(curr_entry);
13527 short_info->protection = curr_entry->protection;
13528 short_info->inheritance = curr_entry->inheritance;
13529 short_info->max_protection = curr_entry->max_protection;
13530 short_info->behavior = curr_entry->behavior;
13531 short_info->user_wired_count = curr_entry->user_wired_count;
13532 short_info->is_submap = curr_entry->is_sub_map;
13533 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13534 }
13535
13536 extended.pages_resident = 0;
13537 extended.pages_swapped_out = 0;
13538 extended.pages_shared_now_private = 0;
13539 extended.pages_dirtied = 0;
13540 extended.pages_reusable = 0;
13541 extended.external_pager = 0;
13542 extended.shadow_depth = 0;
13543 extended.share_mode = SM_EMPTY;
13544 extended.ref_count = 0;
13545
13546 if (not_in_kdp) {
13547 if (!curr_entry->is_sub_map) {
13548 vm_map_offset_t range_start, range_end;
13549 range_start = MAX((curr_address - curr_max_below),
13550 curr_entry->vme_start);
13551 range_end = MIN((curr_address + curr_max_above),
13552 curr_entry->vme_end);
13553 vm_map_region_walk(curr_map,
13554 range_start,
13555 curr_entry,
13556 (VME_OFFSET(curr_entry) +
13557 (range_start -
13558 curr_entry->vme_start)),
13559 range_end - range_start,
13560 &extended,
13561 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
13562 if (extended.external_pager &&
13563 extended.ref_count == 2 &&
13564 extended.share_mode == SM_SHARED) {
13565 extended.share_mode = SM_PRIVATE;
13566 }
13567 } else {
13568 if (curr_entry->use_pmap) {
13569 extended.share_mode = SM_TRUESHARED;
13570 } else {
13571 extended.share_mode = SM_PRIVATE;
13572 }
13573 extended.ref_count = VME_SUBMAP(curr_entry)->map_refcnt;
13574 }
13575 }
13576
13577 if (look_for_pages) {
13578 submap_info->pages_resident = extended.pages_resident;
13579 submap_info->pages_swapped_out = extended.pages_swapped_out;
13580 submap_info->pages_shared_now_private =
13581 extended.pages_shared_now_private;
13582 submap_info->pages_dirtied = extended.pages_dirtied;
13583 submap_info->external_pager = extended.external_pager;
13584 submap_info->shadow_depth = extended.shadow_depth;
13585 submap_info->share_mode = extended.share_mode;
13586 submap_info->ref_count = extended.ref_count;
13587
13588 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13589 submap_info->pages_reusable = extended.pages_reusable;
13590 }
13591 } else {
13592 short_info->external_pager = extended.external_pager;
13593 short_info->shadow_depth = extended.shadow_depth;
13594 short_info->share_mode = extended.share_mode;
13595 short_info->ref_count = extended.ref_count;
13596 }
13597
13598 if (not_in_kdp) {
13599 vm_map_unlock_read(curr_map);
13600 }
13601
13602 return KERN_SUCCESS;
13603}
13604
13605/*
13606 * vm_region:
13607 *
13608 * User call to obtain information about a region in
13609 * a task's address map. Currently, only one flavor is
13610 * supported.
13611 *
13612 * XXX The reserved and behavior fields cannot be filled
13613 * in until the vm merge from the IK is completed, and
13614 * vm_reserve is implemented.
13615 */
13616
13617kern_return_t
13618vm_map_region(
13619 vm_map_t map,
13620 vm_map_offset_t *address, /* IN/OUT */
13621 vm_map_size_t *size, /* OUT */
13622 vm_region_flavor_t flavor, /* IN */
13623 vm_region_info_t info, /* OUT */
13624 mach_msg_type_number_t *count, /* IN/OUT */
13625 mach_port_t *object_name) /* OUT */
13626{
13627 vm_map_entry_t tmp_entry;
13628 vm_map_entry_t entry;
13629 vm_map_offset_t start;
13630
13631 if (map == VM_MAP_NULL)
13632 return(KERN_INVALID_ARGUMENT);
13633
13634 switch (flavor) {
13635
13636 case VM_REGION_BASIC_INFO:
13637 /* legacy for old 32-bit objects info */
13638 {
13639 vm_region_basic_info_t basic;
13640
13641 if (*count < VM_REGION_BASIC_INFO_COUNT)
13642 return(KERN_INVALID_ARGUMENT);
13643
13644 basic = (vm_region_basic_info_t) info;
13645 *count = VM_REGION_BASIC_INFO_COUNT;
13646
13647 vm_map_lock_read(map);
13648
13649 start = *address;
13650 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13651 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13652 vm_map_unlock_read(map);
13653 return(KERN_INVALID_ADDRESS);
13654 }
13655 } else {
13656 entry = tmp_entry;
13657 }
13658
13659 start = entry->vme_start;
13660
13661 basic->offset = (uint32_t)VME_OFFSET(entry);
13662 basic->protection = entry->protection;
13663 basic->inheritance = entry->inheritance;
13664 basic->max_protection = entry->max_protection;
13665 basic->behavior = entry->behavior;
13666 basic->user_wired_count = entry->user_wired_count;
13667 basic->reserved = entry->is_sub_map;
13668 *address = start;
13669 *size = (entry->vme_end - start);
13670
13671 if (object_name) *object_name = IP_NULL;
13672 if (entry->is_sub_map) {
13673 basic->shared = FALSE;
13674 } else {
13675 basic->shared = entry->is_shared;
13676 }
13677
13678 vm_map_unlock_read(map);
13679 return(KERN_SUCCESS);
13680 }
13681
13682 case VM_REGION_BASIC_INFO_64:
13683 {
13684 vm_region_basic_info_64_t basic;
13685
13686 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
13687 return(KERN_INVALID_ARGUMENT);
13688
13689 basic = (vm_region_basic_info_64_t) info;
13690 *count = VM_REGION_BASIC_INFO_COUNT_64;
13691
13692 vm_map_lock_read(map);
13693
13694 start = *address;
13695 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13696 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13697 vm_map_unlock_read(map);
13698 return(KERN_INVALID_ADDRESS);
13699 }
13700 } else {
13701 entry = tmp_entry;
13702 }
13703
13704 start = entry->vme_start;
13705
13706 basic->offset = VME_OFFSET(entry);
13707 basic->protection = entry->protection;
13708 basic->inheritance = entry->inheritance;
13709 basic->max_protection = entry->max_protection;
13710 basic->behavior = entry->behavior;
13711 basic->user_wired_count = entry->user_wired_count;
13712 basic->reserved = entry->is_sub_map;
13713 *address = start;
13714 *size = (entry->vme_end - start);
13715
13716 if (object_name) *object_name = IP_NULL;
13717 if (entry->is_sub_map) {
13718 basic->shared = FALSE;
13719 } else {
13720 basic->shared = entry->is_shared;
13721 }
13722
13723 vm_map_unlock_read(map);
13724 return(KERN_SUCCESS);
13725 }
13726 case VM_REGION_EXTENDED_INFO:
13727 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
13728 return(KERN_INVALID_ARGUMENT);
13729 /*fallthru*/
13730 case VM_REGION_EXTENDED_INFO__legacy:
13731 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
13732 return KERN_INVALID_ARGUMENT;
13733
13734 {
13735 vm_region_extended_info_t extended;
13736 mach_msg_type_number_t original_count;
13737
13738 extended = (vm_region_extended_info_t) info;
13739
13740 vm_map_lock_read(map);
13741
13742 start = *address;
13743 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13744 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13745 vm_map_unlock_read(map);
13746 return(KERN_INVALID_ADDRESS);
13747 }
13748 } else {
13749 entry = tmp_entry;
13750 }
13751 start = entry->vme_start;
13752
13753 extended->protection = entry->protection;
13754 extended->user_tag = VME_ALIAS(entry);
13755 extended->pages_resident = 0;
13756 extended->pages_swapped_out = 0;
13757 extended->pages_shared_now_private = 0;
13758 extended->pages_dirtied = 0;
13759 extended->external_pager = 0;
13760 extended->shadow_depth = 0;
13761
13762 original_count = *count;
13763 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
13764 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
13765 } else {
13766 extended->pages_reusable = 0;
13767 *count = VM_REGION_EXTENDED_INFO_COUNT;
13768 }
13769
13770 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
13771
13772 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
13773 extended->share_mode = SM_PRIVATE;
13774
13775 if (object_name)
13776 *object_name = IP_NULL;
13777 *address = start;
13778 *size = (entry->vme_end - start);
13779
13780 vm_map_unlock_read(map);
13781 return(KERN_SUCCESS);
13782 }
13783 case VM_REGION_TOP_INFO:
13784 {
13785 vm_region_top_info_t top;
13786
13787 if (*count < VM_REGION_TOP_INFO_COUNT)
13788 return(KERN_INVALID_ARGUMENT);
13789
13790 top = (vm_region_top_info_t) info;
13791 *count = VM_REGION_TOP_INFO_COUNT;
13792
13793 vm_map_lock_read(map);
13794
13795 start = *address;
13796 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13797 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13798 vm_map_unlock_read(map);
13799 return(KERN_INVALID_ADDRESS);
13800 }
13801 } else {
13802 entry = tmp_entry;
13803
13804 }
13805 start = entry->vme_start;
13806
13807 top->private_pages_resident = 0;
13808 top->shared_pages_resident = 0;
13809
13810 vm_map_region_top_walk(entry, top);
13811
13812 if (object_name)
13813 *object_name = IP_NULL;
13814 *address = start;
13815 *size = (entry->vme_end - start);
13816
13817 vm_map_unlock_read(map);
13818 return(KERN_SUCCESS);
13819 }
13820 default:
13821 return(KERN_INVALID_ARGUMENT);
13822 }
13823}
13824
13825#define OBJ_RESIDENT_COUNT(obj, entry_size) \
13826 MIN((entry_size), \
13827 ((obj)->all_reusable ? \
13828 (obj)->wired_page_count : \
13829 (obj)->resident_page_count - (obj)->reusable_page_count))
13830
13831void
13832vm_map_region_top_walk(
13833 vm_map_entry_t entry,
13834 vm_region_top_info_t top)
13835{
13836
13837 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
13838 top->share_mode = SM_EMPTY;
13839 top->ref_count = 0;
13840 top->obj_id = 0;
13841 return;
13842 }
13843
13844 {
13845 struct vm_object *obj, *tmp_obj;
13846 int ref_count;
13847 uint32_t entry_size;
13848
13849 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
13850
13851 obj = VME_OBJECT(entry);
13852
13853 vm_object_lock(obj);
13854
13855 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13856 ref_count--;
13857
13858 assert(obj->reusable_page_count <= obj->resident_page_count);
13859 if (obj->shadow) {
13860 if (ref_count == 1)
13861 top->private_pages_resident =
13862 OBJ_RESIDENT_COUNT(obj, entry_size);
13863 else
13864 top->shared_pages_resident =
13865 OBJ_RESIDENT_COUNT(obj, entry_size);
13866 top->ref_count = ref_count;
13867 top->share_mode = SM_COW;
13868
13869 while ((tmp_obj = obj->shadow)) {
13870 vm_object_lock(tmp_obj);
13871 vm_object_unlock(obj);
13872 obj = tmp_obj;
13873
13874 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13875 ref_count--;
13876
13877 assert(obj->reusable_page_count <= obj->resident_page_count);
13878 top->shared_pages_resident +=
13879 OBJ_RESIDENT_COUNT(obj, entry_size);
13880 top->ref_count += ref_count - 1;
13881 }
13882 } else {
13883 if (entry->superpage_size) {
13884 top->share_mode = SM_LARGE_PAGE;
13885 top->shared_pages_resident = 0;
13886 top->private_pages_resident = entry_size;
13887 } else if (entry->needs_copy) {
13888 top->share_mode = SM_COW;
13889 top->shared_pages_resident =
13890 OBJ_RESIDENT_COUNT(obj, entry_size);
13891 } else {
13892 if (ref_count == 1 ||
13893 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
13894 top->share_mode = SM_PRIVATE;
13895 top->private_pages_resident =
13896 OBJ_RESIDENT_COUNT(obj,
13897 entry_size);
13898 } else {
13899 top->share_mode = SM_SHARED;
13900 top->shared_pages_resident =
13901 OBJ_RESIDENT_COUNT(obj,
13902 entry_size);
13903 }
13904 }
13905 top->ref_count = ref_count;
13906 }
13907 /* XXX K64: obj_id will be truncated */
13908 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
13909
13910 vm_object_unlock(obj);
13911 }
13912}
13913
13914void
13915vm_map_region_walk(
13916 vm_map_t map,
13917 vm_map_offset_t va,
13918 vm_map_entry_t entry,
13919 vm_object_offset_t offset,
13920 vm_object_size_t range,
13921 vm_region_extended_info_t extended,
13922 boolean_t look_for_pages,
13923 mach_msg_type_number_t count)
13924{
13925 struct vm_object *obj, *tmp_obj;
13926 vm_map_offset_t last_offset;
13927 int i;
13928 int ref_count;
13929 struct vm_object *shadow_object;
13930 int shadow_depth;
13931 boolean_t do_region_footprint;
13932
13933 do_region_footprint = task_self_region_footprint();
13934
13935 if ((VME_OBJECT(entry) == 0) ||
13936 (entry->is_sub_map) ||
13937 (VME_OBJECT(entry)->phys_contiguous &&
13938 !entry->superpage_size)) {
13939 extended->share_mode = SM_EMPTY;
13940 extended->ref_count = 0;
13941 return;
13942 }
13943
13944 if (entry->superpage_size) {
13945 extended->shadow_depth = 0;
13946 extended->share_mode = SM_LARGE_PAGE;
13947 extended->ref_count = 1;
13948 extended->external_pager = 0;
13949 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
13950 extended->shadow_depth = 0;
13951 return;
13952 }
13953
13954 obj = VME_OBJECT(entry);
13955
13956 vm_object_lock(obj);
13957
13958 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13959 ref_count--;
13960
13961 if (look_for_pages) {
13962 for (last_offset = offset + range;
13963 offset < last_offset;
13964 offset += PAGE_SIZE_64, va += PAGE_SIZE) {
13965
13966 if (do_region_footprint) {
13967 int disp;
13968
13969 disp = 0;
13970 if (map->has_corpse_footprint) {
13971 /*
13972 * Query the page info data we saved
13973 * while forking the corpse.
13974 */
13975 vm_map_corpse_footprint_query_page_info(
13976 map,
13977 va,
13978 &disp);
13979 } else {
13980 /*
13981 * Query the pmap.
13982 */
13983 pmap_query_page_info(map->pmap,
13984 va,
13985 &disp);
13986 }
13987 if (disp & PMAP_QUERY_PAGE_PRESENT) {
13988 if (!(disp & PMAP_QUERY_PAGE_ALTACCT)) {
13989 extended->pages_resident++;
13990 }
13991 if (disp & PMAP_QUERY_PAGE_REUSABLE) {
13992 extended->pages_reusable++;
13993 } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
13994 (disp & PMAP_QUERY_PAGE_ALTACCT)) {
13995 /* alternate accounting */
13996 } else {
13997 extended->pages_dirtied++;
13998 }
13999 } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
14000 if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
14001 /* alternate accounting */
14002 } else {
14003 extended->pages_swapped_out++;
14004 }
14005 }
14006 /* deal with alternate accounting */
14007 if (obj->purgable == VM_PURGABLE_NONVOLATILE &&
14008 /* && not tagged as no-footprint? */
14009 VM_OBJECT_OWNER(obj) != NULL &&
14010 VM_OBJECT_OWNER(obj)->map == map) {
14011 if ((((va
14012 - entry->vme_start
14013 + VME_OFFSET(entry))
14014 / PAGE_SIZE) <
14015 (obj->resident_page_count +
14016 vm_compressor_pager_get_count(obj->pager)))) {
14017 /*
14018 * Non-volatile purgeable object owned
14019 * by this task: report the first
14020 * "#resident + #compressed" pages as
14021 * "resident" (to show that they
14022 * contribute to the footprint) but not
14023 * "dirty" (to avoid double-counting
14024 * with the fake "non-volatile" region
14025 * we'll report at the end of the
14026 * address space to account for all
14027 * (mapped or not) non-volatile memory
14028 * owned by this task.
14029 */
14030 extended->pages_resident++;
14031 }
14032 } else if ((obj->purgable == VM_PURGABLE_VOLATILE ||
14033 obj->purgable == VM_PURGABLE_EMPTY) &&
14034 /* && not tagged as no-footprint? */
14035 VM_OBJECT_OWNER(obj) != NULL &&
14036 VM_OBJECT_OWNER(obj)->map == map) {
14037 if ((((va
14038 - entry->vme_start
14039 + VME_OFFSET(entry))
14040 / PAGE_SIZE) <
14041 obj->wired_page_count)) {
14042 /*
14043 * Volatile|empty purgeable object owned
14044 * by this task: report the first
14045 * "#wired" pages as "resident" (to
14046 * show that they contribute to the
14047 * footprint) but not "dirty" (to avoid
14048 * double-counting with the fake
14049 * "non-volatile" region we'll report
14050 * at the end of the address space to
14051 * account for all (mapped or not)
14052 * non-volatile memory owned by this
14053 * task.
14054 */
14055 extended->pages_resident++;
14056 }
14057 } else if (obj->purgable != VM_PURGABLE_DENY) {
14058 /*
14059 * Pages from purgeable objects
14060 * will be reported as dirty
14061 * appropriately in an extra
14062 * fake memory region at the end of
14063 * the address space.
14064 */
14065 } else if (entry->iokit_acct) {
14066 /*
14067 * IOKit mappings are considered
14068 * as fully dirty for footprint's
14069 * sake.
14070 */
14071 extended->pages_dirtied++;
14072 }
14073 continue;
14074 }
14075
14076 vm_map_region_look_for_page(map, va, obj,
14077 offset, ref_count,
14078 0, extended, count);
14079 }
14080
14081 if (do_region_footprint) {
14082 goto collect_object_info;
14083 }
14084
14085 } else {
14086 collect_object_info:
14087 shadow_object = obj->shadow;
14088 shadow_depth = 0;
14089
14090 if ( !(obj->pager_trusted) && !(obj->internal))
14091 extended->external_pager = 1;
14092
14093 if (shadow_object != VM_OBJECT_NULL) {
14094 vm_object_lock(shadow_object);
14095 for (;
14096 shadow_object != VM_OBJECT_NULL;
14097 shadow_depth++) {
14098 vm_object_t next_shadow;
14099
14100 if ( !(shadow_object->pager_trusted) &&
14101 !(shadow_object->internal))
14102 extended->external_pager = 1;
14103
14104 next_shadow = shadow_object->shadow;
14105 if (next_shadow) {
14106 vm_object_lock(next_shadow);
14107 }
14108 vm_object_unlock(shadow_object);
14109 shadow_object = next_shadow;
14110 }
14111 }
14112 extended->shadow_depth = shadow_depth;
14113 }
14114
14115 if (extended->shadow_depth || entry->needs_copy)
14116 extended->share_mode = SM_COW;
14117 else {
14118 if (ref_count == 1)
14119 extended->share_mode = SM_PRIVATE;
14120 else {
14121 if (obj->true_share)
14122 extended->share_mode = SM_TRUESHARED;
14123 else
14124 extended->share_mode = SM_SHARED;
14125 }
14126 }
14127 extended->ref_count = ref_count - extended->shadow_depth;
14128
14129 for (i = 0; i < extended->shadow_depth; i++) {
14130 if ((tmp_obj = obj->shadow) == 0)
14131 break;
14132 vm_object_lock(tmp_obj);
14133 vm_object_unlock(obj);
14134
14135 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
14136 ref_count--;
14137
14138 extended->ref_count += ref_count;
14139 obj = tmp_obj;
14140 }
14141 vm_object_unlock(obj);
14142
14143 if (extended->share_mode == SM_SHARED) {
14144 vm_map_entry_t cur;
14145 vm_map_entry_t last;
14146 int my_refs;
14147
14148 obj = VME_OBJECT(entry);
14149 last = vm_map_to_entry(map);
14150 my_refs = 0;
14151
14152 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
14153 ref_count--;
14154 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
14155 my_refs += vm_map_region_count_obj_refs(cur, obj);
14156
14157 if (my_refs == ref_count)
14158 extended->share_mode = SM_PRIVATE_ALIASED;
14159 else if (my_refs > 1)
14160 extended->share_mode = SM_SHARED_ALIASED;
14161 }
14162}
14163
14164
14165/* object is locked on entry and locked on return */
14166
14167
14168static void
14169vm_map_region_look_for_page(
14170 __unused vm_map_t map,
14171 __unused vm_map_offset_t va,
14172 vm_object_t object,
14173 vm_object_offset_t offset,
14174 int max_refcnt,
14175 int depth,
14176 vm_region_extended_info_t extended,
14177 mach_msg_type_number_t count)
14178{
14179 vm_page_t p;
14180 vm_object_t shadow;
14181 int ref_count;
14182 vm_object_t caller_object;
14183
14184 shadow = object->shadow;
14185 caller_object = object;
14186
14187
14188 while (TRUE) {
14189
14190 if ( !(object->pager_trusted) && !(object->internal))
14191 extended->external_pager = 1;
14192
14193 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
14194 if (shadow && (max_refcnt == 1))
14195 extended->pages_shared_now_private++;
14196
14197 if (!p->vmp_fictitious &&
14198 (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p))))
14199 extended->pages_dirtied++;
14200 else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
14201 if (p->vmp_reusable || object->all_reusable) {
14202 extended->pages_reusable++;
14203 }
14204 }
14205
14206 extended->pages_resident++;
14207
14208 if(object != caller_object)
14209 vm_object_unlock(object);
14210
14211 return;
14212 }
14213 if (object->internal &&
14214 object->alive &&
14215 !object->terminating &&
14216 object->pager_ready) {
14217
14218 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
14219 == VM_EXTERNAL_STATE_EXISTS) {
14220 /* the pager has that page */
14221 extended->pages_swapped_out++;
14222 if (object != caller_object)
14223 vm_object_unlock(object);
14224 return;
14225 }
14226 }
14227
14228 if (shadow) {
14229 vm_object_lock(shadow);
14230
14231 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
14232 ref_count--;
14233
14234 if (++depth > extended->shadow_depth)
14235 extended->shadow_depth = depth;
14236
14237 if (ref_count > max_refcnt)
14238 max_refcnt = ref_count;
14239
14240 if(object != caller_object)
14241 vm_object_unlock(object);
14242
14243 offset = offset + object->vo_shadow_offset;
14244 object = shadow;
14245 shadow = object->shadow;
14246 continue;
14247 }
14248 if(object != caller_object)
14249 vm_object_unlock(object);
14250 break;
14251 }
14252}
14253
14254static int
14255vm_map_region_count_obj_refs(
14256 vm_map_entry_t entry,
14257 vm_object_t object)
14258{
14259 int ref_count;
14260 vm_object_t chk_obj;
14261 vm_object_t tmp_obj;
14262
14263 if (VME_OBJECT(entry) == 0)
14264 return(0);
14265
14266 if (entry->is_sub_map)
14267 return(0);
14268 else {
14269 ref_count = 0;
14270
14271 chk_obj = VME_OBJECT(entry);
14272 vm_object_lock(chk_obj);
14273
14274 while (chk_obj) {
14275 if (chk_obj == object)
14276 ref_count++;
14277 tmp_obj = chk_obj->shadow;
14278 if (tmp_obj)
14279 vm_object_lock(tmp_obj);
14280 vm_object_unlock(chk_obj);
14281
14282 chk_obj = tmp_obj;
14283 }
14284 }
14285 return(ref_count);
14286}
14287
14288
14289/*
14290 * Routine: vm_map_simplify
14291 *
14292 * Description:
14293 * Attempt to simplify the map representation in
14294 * the vicinity of the given starting address.
14295 * Note:
14296 * This routine is intended primarily to keep the
14297 * kernel maps more compact -- they generally don't
14298 * benefit from the "expand a map entry" technology
14299 * at allocation time because the adjacent entry
14300 * is often wired down.
14301 */
14302void
14303vm_map_simplify_entry(
14304 vm_map_t map,
14305 vm_map_entry_t this_entry)
14306{
14307 vm_map_entry_t prev_entry;
14308
14309 counter(c_vm_map_simplify_entry_called++);
14310
14311 prev_entry = this_entry->vme_prev;
14312
14313 if ((this_entry != vm_map_to_entry(map)) &&
14314 (prev_entry != vm_map_to_entry(map)) &&
14315
14316 (prev_entry->vme_end == this_entry->vme_start) &&
14317
14318 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
14319 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
14320 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
14321 prev_entry->vme_start))
14322 == VME_OFFSET(this_entry)) &&
14323
14324 (prev_entry->behavior == this_entry->behavior) &&
14325 (prev_entry->needs_copy == this_entry->needs_copy) &&
14326 (prev_entry->protection == this_entry->protection) &&
14327 (prev_entry->max_protection == this_entry->max_protection) &&
14328 (prev_entry->inheritance == this_entry->inheritance) &&
14329 (prev_entry->use_pmap == this_entry->use_pmap) &&
14330 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
14331 (prev_entry->no_cache == this_entry->no_cache) &&
14332 (prev_entry->permanent == this_entry->permanent) &&
14333 (prev_entry->map_aligned == this_entry->map_aligned) &&
14334 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
14335 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
14336 (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
14337 /* from_reserved_zone: OK if that field doesn't match */
14338 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
14339 (prev_entry->vme_resilient_codesign ==
14340 this_entry->vme_resilient_codesign) &&
14341 (prev_entry->vme_resilient_media ==
14342 this_entry->vme_resilient_media) &&
14343
14344 (prev_entry->wired_count == this_entry->wired_count) &&
14345 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
14346
14347 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
14348 (prev_entry->in_transition == FALSE) &&
14349 (this_entry->in_transition == FALSE) &&
14350 (prev_entry->needs_wakeup == FALSE) &&
14351 (this_entry->needs_wakeup == FALSE) &&
14352 (prev_entry->is_shared == FALSE) &&
14353 (this_entry->is_shared == FALSE) &&
14354 (prev_entry->superpage_size == FALSE) &&
14355 (this_entry->superpage_size == FALSE)
14356 ) {
14357 vm_map_store_entry_unlink(map, prev_entry);
14358 assert(prev_entry->vme_start < this_entry->vme_end);
14359 if (prev_entry->map_aligned)
14360 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
14361 VM_MAP_PAGE_MASK(map)));
14362 this_entry->vme_start = prev_entry->vme_start;
14363 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
14364
14365 if (map->holelistenabled) {
14366 vm_map_store_update_first_free(map, this_entry, TRUE);
14367 }
14368
14369 if (prev_entry->is_sub_map) {
14370 vm_map_deallocate(VME_SUBMAP(prev_entry));
14371 } else {
14372 vm_object_deallocate(VME_OBJECT(prev_entry));
14373 }
14374 vm_map_entry_dispose(map, prev_entry);
14375 SAVE_HINT_MAP_WRITE(map, this_entry);
14376 counter(c_vm_map_simplified++);
14377 }
14378}
14379
14380void
14381vm_map_simplify(
14382 vm_map_t map,
14383 vm_map_offset_t start)
14384{
14385 vm_map_entry_t this_entry;
14386
14387 vm_map_lock(map);
14388 if (vm_map_lookup_entry(map, start, &this_entry)) {
14389 vm_map_simplify_entry(map, this_entry);
14390 vm_map_simplify_entry(map, this_entry->vme_next);
14391 }
14392 counter(c_vm_map_simplify_called++);
14393 vm_map_unlock(map);
14394}
14395
14396static void
14397vm_map_simplify_range(
14398 vm_map_t map,
14399 vm_map_offset_t start,
14400 vm_map_offset_t end)
14401{
14402 vm_map_entry_t entry;
14403
14404 /*
14405 * The map should be locked (for "write") by the caller.
14406 */
14407
14408 if (start >= end) {
14409 /* invalid address range */
14410 return;
14411 }
14412
14413 start = vm_map_trunc_page(start,
14414 VM_MAP_PAGE_MASK(map));
14415 end = vm_map_round_page(end,
14416 VM_MAP_PAGE_MASK(map));
14417
14418 if (!vm_map_lookup_entry(map, start, &entry)) {
14419 /* "start" is not mapped and "entry" ends before "start" */
14420 if (entry == vm_map_to_entry(map)) {
14421 /* start with first entry in the map */
14422 entry = vm_map_first_entry(map);
14423 } else {
14424 /* start with next entry */
14425 entry = entry->vme_next;
14426 }
14427 }
14428
14429 while (entry != vm_map_to_entry(map) &&
14430 entry->vme_start <= end) {
14431 /* try and coalesce "entry" with its previous entry */
14432 vm_map_simplify_entry(map, entry);
14433 entry = entry->vme_next;
14434 }
14435}
14436
14437
14438/*
14439 * Routine: vm_map_machine_attribute
14440 * Purpose:
14441 * Provide machine-specific attributes to mappings,
14442 * such as cachability etc. for machines that provide
14443 * them. NUMA architectures and machines with big/strange
14444 * caches will use this.
14445 * Note:
14446 * Responsibilities for locking and checking are handled here,
14447 * everything else in the pmap module. If any non-volatile
14448 * information must be kept, the pmap module should handle
14449 * it itself. [This assumes that attributes do not
14450 * need to be inherited, which seems ok to me]
14451 */
14452kern_return_t
14453vm_map_machine_attribute(
14454 vm_map_t map,
14455 vm_map_offset_t start,
14456 vm_map_offset_t end,
14457 vm_machine_attribute_t attribute,
14458 vm_machine_attribute_val_t* value) /* IN/OUT */
14459{
14460 kern_return_t ret;
14461 vm_map_size_t sync_size;
14462 vm_map_entry_t entry;
14463
14464 if (start < vm_map_min(map) || end > vm_map_max(map))
14465 return KERN_INVALID_ADDRESS;
14466
14467 /* Figure how much memory we need to flush (in page increments) */
14468 sync_size = end - start;
14469
14470 vm_map_lock(map);
14471
14472 if (attribute != MATTR_CACHE) {
14473 /* If we don't have to find physical addresses, we */
14474 /* don't have to do an explicit traversal here. */
14475 ret = pmap_attribute(map->pmap, start, end-start,
14476 attribute, value);
14477 vm_map_unlock(map);
14478 return ret;
14479 }
14480
14481 ret = KERN_SUCCESS; /* Assume it all worked */
14482
14483 while(sync_size) {
14484 if (vm_map_lookup_entry(map, start, &entry)) {
14485 vm_map_size_t sub_size;
14486 if((entry->vme_end - start) > sync_size) {
14487 sub_size = sync_size;
14488 sync_size = 0;
14489 } else {
14490 sub_size = entry->vme_end - start;
14491 sync_size -= sub_size;
14492 }
14493 if(entry->is_sub_map) {
14494 vm_map_offset_t sub_start;
14495 vm_map_offset_t sub_end;
14496
14497 sub_start = (start - entry->vme_start)
14498 + VME_OFFSET(entry);
14499 sub_end = sub_start + sub_size;
14500 vm_map_machine_attribute(
14501 VME_SUBMAP(entry),
14502 sub_start,
14503 sub_end,
14504 attribute, value);
14505 } else {
14506 if (VME_OBJECT(entry)) {
14507 vm_page_t m;
14508 vm_object_t object;
14509 vm_object_t base_object;
14510 vm_object_t last_object;
14511 vm_object_offset_t offset;
14512 vm_object_offset_t base_offset;
14513 vm_map_size_t range;
14514 range = sub_size;
14515 offset = (start - entry->vme_start)
14516 + VME_OFFSET(entry);
14517 base_offset = offset;
14518 object = VME_OBJECT(entry);
14519 base_object = object;
14520 last_object = NULL;
14521
14522 vm_object_lock(object);
14523
14524 while (range) {
14525 m = vm_page_lookup(
14526 object, offset);
14527
14528 if (m && !m->vmp_fictitious) {
14529 ret =
14530 pmap_attribute_cache_sync(
14531 VM_PAGE_GET_PHYS_PAGE(m),
14532 PAGE_SIZE,
14533 attribute, value);
14534
14535 } else if (object->shadow) {
14536 offset = offset + object->vo_shadow_offset;
14537 last_object = object;
14538 object = object->shadow;
14539 vm_object_lock(last_object->shadow);
14540 vm_object_unlock(last_object);
14541 continue;
14542 }
14543 range -= PAGE_SIZE;
14544
14545 if (base_object != object) {
14546 vm_object_unlock(object);
14547 vm_object_lock(base_object);
14548 object = base_object;
14549 }
14550 /* Bump to the next page */
14551 base_offset += PAGE_SIZE;
14552 offset = base_offset;
14553 }
14554 vm_object_unlock(object);
14555 }
14556 }
14557 start += sub_size;
14558 } else {
14559 vm_map_unlock(map);
14560 return KERN_FAILURE;
14561 }
14562
14563 }
14564
14565 vm_map_unlock(map);
14566
14567 return ret;
14568}
14569
14570/*
14571 * vm_map_behavior_set:
14572 *
14573 * Sets the paging reference behavior of the specified address
14574 * range in the target map. Paging reference behavior affects
14575 * how pagein operations resulting from faults on the map will be
14576 * clustered.
14577 */
14578kern_return_t
14579vm_map_behavior_set(
14580 vm_map_t map,
14581 vm_map_offset_t start,
14582 vm_map_offset_t end,
14583 vm_behavior_t new_behavior)
14584{
14585 vm_map_entry_t entry;
14586 vm_map_entry_t temp_entry;
14587
14588 XPR(XPR_VM_MAP,
14589 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
14590 map, start, end, new_behavior, 0);
14591
14592 if (start > end ||
14593 start < vm_map_min(map) ||
14594 end > vm_map_max(map)) {
14595 return KERN_NO_SPACE;
14596 }
14597
14598 switch (new_behavior) {
14599
14600 /*
14601 * This first block of behaviors all set a persistent state on the specified
14602 * memory range. All we have to do here is to record the desired behavior
14603 * in the vm_map_entry_t's.
14604 */
14605
14606 case VM_BEHAVIOR_DEFAULT:
14607 case VM_BEHAVIOR_RANDOM:
14608 case VM_BEHAVIOR_SEQUENTIAL:
14609 case VM_BEHAVIOR_RSEQNTL:
14610 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
14611 vm_map_lock(map);
14612
14613 /*
14614 * The entire address range must be valid for the map.
14615 * Note that vm_map_range_check() does a
14616 * vm_map_lookup_entry() internally and returns the
14617 * entry containing the start of the address range if
14618 * the entire range is valid.
14619 */
14620 if (vm_map_range_check(map, start, end, &temp_entry)) {
14621 entry = temp_entry;
14622 vm_map_clip_start(map, entry, start);
14623 }
14624 else {
14625 vm_map_unlock(map);
14626 return(KERN_INVALID_ADDRESS);
14627 }
14628
14629 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
14630 vm_map_clip_end(map, entry, end);
14631 if (entry->is_sub_map) {
14632 assert(!entry->use_pmap);
14633 }
14634
14635 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
14636 entry->zero_wired_pages = TRUE;
14637 } else {
14638 entry->behavior = new_behavior;
14639 }
14640 entry = entry->vme_next;
14641 }
14642
14643 vm_map_unlock(map);
14644 break;
14645
14646 /*
14647 * The rest of these are different from the above in that they cause
14648 * an immediate action to take place as opposed to setting a behavior that
14649 * affects future actions.
14650 */
14651
14652 case VM_BEHAVIOR_WILLNEED:
14653 return vm_map_willneed(map, start, end);
14654
14655 case VM_BEHAVIOR_DONTNEED:
14656 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
14657
14658 case VM_BEHAVIOR_FREE:
14659 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
14660
14661 case VM_BEHAVIOR_REUSABLE:
14662 return vm_map_reusable_pages(map, start, end);
14663
14664 case VM_BEHAVIOR_REUSE:
14665 return vm_map_reuse_pages(map, start, end);
14666
14667 case VM_BEHAVIOR_CAN_REUSE:
14668 return vm_map_can_reuse(map, start, end);
14669
14670#if MACH_ASSERT
14671 case VM_BEHAVIOR_PAGEOUT:
14672 return vm_map_pageout(map, start, end);
14673#endif /* MACH_ASSERT */
14674
14675 default:
14676 return(KERN_INVALID_ARGUMENT);
14677 }
14678
14679 return(KERN_SUCCESS);
14680}
14681
14682
14683/*
14684 * Internals for madvise(MADV_WILLNEED) system call.
14685 *
14686 * The present implementation is to do a read-ahead if the mapping corresponds
14687 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
14688 * and basically ignore the "advice" (which we are always free to do).
14689 */
14690
14691
14692static kern_return_t
14693vm_map_willneed(
14694 vm_map_t map,
14695 vm_map_offset_t start,
14696 vm_map_offset_t end
14697)
14698{
14699 vm_map_entry_t entry;
14700 vm_object_t object;
14701 memory_object_t pager;
14702 struct vm_object_fault_info fault_info = {};
14703 kern_return_t kr;
14704 vm_object_size_t len;
14705 vm_object_offset_t offset;
14706
14707 fault_info.interruptible = THREAD_UNINT; /* ignored value */
14708 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
14709 fault_info.stealth = TRUE;
14710
14711 /*
14712 * The MADV_WILLNEED operation doesn't require any changes to the
14713 * vm_map_entry_t's, so the read lock is sufficient.
14714 */
14715
14716 vm_map_lock_read(map);
14717
14718 /*
14719 * The madvise semantics require that the address range be fully
14720 * allocated with no holes. Otherwise, we're required to return
14721 * an error.
14722 */
14723
14724 if (! vm_map_range_check(map, start, end, &entry)) {
14725 vm_map_unlock_read(map);
14726 return KERN_INVALID_ADDRESS;
14727 }
14728
14729 /*
14730 * Examine each vm_map_entry_t in the range.
14731 */
14732 for (; entry != vm_map_to_entry(map) && start < end; ) {
14733
14734 /*
14735 * The first time through, the start address could be anywhere
14736 * within the vm_map_entry we found. So adjust the offset to
14737 * correspond. After that, the offset will always be zero to
14738 * correspond to the beginning of the current vm_map_entry.
14739 */
14740 offset = (start - entry->vme_start) + VME_OFFSET(entry);
14741
14742 /*
14743 * Set the length so we don't go beyond the end of the
14744 * map_entry or beyond the end of the range we were given.
14745 * This range could span also multiple map entries all of which
14746 * map different files, so make sure we only do the right amount
14747 * of I/O for each object. Note that it's possible for there
14748 * to be multiple map entries all referring to the same object
14749 * but with different page permissions, but it's not worth
14750 * trying to optimize that case.
14751 */
14752 len = MIN(entry->vme_end - start, end - start);
14753
14754 if ((vm_size_t) len != len) {
14755 /* 32-bit overflow */
14756 len = (vm_size_t) (0 - PAGE_SIZE);
14757 }
14758 fault_info.cluster_size = (vm_size_t) len;
14759 fault_info.lo_offset = offset;
14760 fault_info.hi_offset = offset + len;
14761 fault_info.user_tag = VME_ALIAS(entry);
14762 fault_info.pmap_options = 0;
14763 if (entry->iokit_acct ||
14764 (!entry->is_sub_map && !entry->use_pmap)) {
14765 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
14766 }
14767
14768 /*
14769 * If there's no read permission to this mapping, then just
14770 * skip it.
14771 */
14772 if ((entry->protection & VM_PROT_READ) == 0) {
14773 entry = entry->vme_next;
14774 start = entry->vme_start;
14775 continue;
14776 }
14777
14778 /*
14779 * Find the file object backing this map entry. If there is
14780 * none, then we simply ignore the "will need" advice for this
14781 * entry and go on to the next one.
14782 */
14783 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
14784 entry = entry->vme_next;
14785 start = entry->vme_start;
14786 continue;
14787 }
14788
14789 /*
14790 * The data_request() could take a long time, so let's
14791 * release the map lock to avoid blocking other threads.
14792 */
14793 vm_map_unlock_read(map);
14794
14795 vm_object_paging_begin(object);
14796 pager = object->pager;
14797 vm_object_unlock(object);
14798
14799 /*
14800 * Get the data from the object asynchronously.
14801 *
14802 * Note that memory_object_data_request() places limits on the
14803 * amount of I/O it will do. Regardless of the len we
14804 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
14805 * silently truncates the len to that size. This isn't
14806 * necessarily bad since madvise shouldn't really be used to
14807 * page in unlimited amounts of data. Other Unix variants
14808 * limit the willneed case as well. If this turns out to be an
14809 * issue for developers, then we can always adjust the policy
14810 * here and still be backwards compatible since this is all
14811 * just "advice".
14812 */
14813 kr = memory_object_data_request(
14814 pager,
14815 offset + object->paging_offset,
14816 0, /* ignored */
14817 VM_PROT_READ,
14818 (memory_object_fault_info_t)&fault_info);
14819
14820 vm_object_lock(object);
14821 vm_object_paging_end(object);
14822 vm_object_unlock(object);
14823
14824 /*
14825 * If we couldn't do the I/O for some reason, just give up on
14826 * the madvise. We still return success to the user since
14827 * madvise isn't supposed to fail when the advice can't be
14828 * taken.
14829 */
14830 if (kr != KERN_SUCCESS) {
14831 return KERN_SUCCESS;
14832 }
14833
14834 start += len;
14835 if (start >= end) {
14836 /* done */
14837 return KERN_SUCCESS;
14838 }
14839
14840 /* look up next entry */
14841 vm_map_lock_read(map);
14842 if (! vm_map_lookup_entry(map, start, &entry)) {
14843 /*
14844 * There's a new hole in the address range.
14845 */
14846 vm_map_unlock_read(map);
14847 return KERN_INVALID_ADDRESS;
14848 }
14849 }
14850
14851 vm_map_unlock_read(map);
14852 return KERN_SUCCESS;
14853}
14854
14855static boolean_t
14856vm_map_entry_is_reusable(
14857 vm_map_entry_t entry)
14858{
14859 /* Only user map entries */
14860
14861 vm_object_t object;
14862
14863 if (entry->is_sub_map) {
14864 return FALSE;
14865 }
14866
14867 switch (VME_ALIAS(entry)) {
14868 case VM_MEMORY_MALLOC:
14869 case VM_MEMORY_MALLOC_SMALL:
14870 case VM_MEMORY_MALLOC_LARGE:
14871 case VM_MEMORY_REALLOC:
14872 case VM_MEMORY_MALLOC_TINY:
14873 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
14874 case VM_MEMORY_MALLOC_LARGE_REUSED:
14875 /*
14876 * This is a malloc() memory region: check if it's still
14877 * in its original state and can be re-used for more
14878 * malloc() allocations.
14879 */
14880 break;
14881 default:
14882 /*
14883 * Not a malloc() memory region: let the caller decide if
14884 * it's re-usable.
14885 */
14886 return TRUE;
14887 }
14888
14889 if (/*entry->is_shared ||*/
14890 entry->is_sub_map ||
14891 entry->in_transition ||
14892 entry->protection != VM_PROT_DEFAULT ||
14893 entry->max_protection != VM_PROT_ALL ||
14894 entry->inheritance != VM_INHERIT_DEFAULT ||
14895 entry->no_cache ||
14896 entry->permanent ||
14897 entry->superpage_size != FALSE ||
14898 entry->zero_wired_pages ||
14899 entry->wired_count != 0 ||
14900 entry->user_wired_count != 0) {
14901 return FALSE;
14902 }
14903
14904 object = VME_OBJECT(entry);
14905 if (object == VM_OBJECT_NULL) {
14906 return TRUE;
14907 }
14908 if (
14909#if 0
14910 /*
14911 * Let's proceed even if the VM object is potentially
14912 * shared.
14913 * We check for this later when processing the actual
14914 * VM pages, so the contents will be safe if shared.
14915 *
14916 * But we can still mark this memory region as "reusable" to
14917 * acknowledge that the caller did let us know that the memory
14918 * could be re-used and should not be penalized for holding
14919 * on to it. This allows its "resident size" to not include
14920 * the reusable range.
14921 */
14922 object->ref_count == 1 &&
14923#endif
14924 object->wired_page_count == 0 &&
14925 object->copy == VM_OBJECT_NULL &&
14926 object->shadow == VM_OBJECT_NULL &&
14927 object->internal &&
14928 object->purgable == VM_PURGABLE_DENY &&
14929 object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
14930 !object->true_share &&
14931 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
14932 !object->code_signed) {
14933 return TRUE;
14934 }
14935 return FALSE;
14936
14937
14938}
14939
14940static kern_return_t
14941vm_map_reuse_pages(
14942 vm_map_t map,
14943 vm_map_offset_t start,
14944 vm_map_offset_t end)
14945{
14946 vm_map_entry_t entry;
14947 vm_object_t object;
14948 vm_object_offset_t start_offset, end_offset;
14949
14950 /*
14951 * The MADV_REUSE operation doesn't require any changes to the
14952 * vm_map_entry_t's, so the read lock is sufficient.
14953 */
14954
14955 vm_map_lock_read(map);
14956 assert(map->pmap != kernel_pmap); /* protect alias access */
14957
14958 /*
14959 * The madvise semantics require that the address range be fully
14960 * allocated with no holes. Otherwise, we're required to return
14961 * an error.
14962 */
14963
14964 if (!vm_map_range_check(map, start, end, &entry)) {
14965 vm_map_unlock_read(map);
14966 vm_page_stats_reusable.reuse_pages_failure++;
14967 return KERN_INVALID_ADDRESS;
14968 }
14969
14970 /*
14971 * Examine each vm_map_entry_t in the range.
14972 */
14973 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14974 entry = entry->vme_next) {
14975 /*
14976 * Sanity check on the VM map entry.
14977 */
14978 if (! vm_map_entry_is_reusable(entry)) {
14979 vm_map_unlock_read(map);
14980 vm_page_stats_reusable.reuse_pages_failure++;
14981 return KERN_INVALID_ADDRESS;
14982 }
14983
14984 /*
14985 * The first time through, the start address could be anywhere
14986 * within the vm_map_entry we found. So adjust the offset to
14987 * correspond.
14988 */
14989 if (entry->vme_start < start) {
14990 start_offset = start - entry->vme_start;
14991 } else {
14992 start_offset = 0;
14993 }
14994 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
14995 start_offset += VME_OFFSET(entry);
14996 end_offset += VME_OFFSET(entry);
14997
14998 assert(!entry->is_sub_map);
14999 object = VME_OBJECT(entry);
15000 if (object != VM_OBJECT_NULL) {
15001 vm_object_lock(object);
15002 vm_object_reuse_pages(object, start_offset, end_offset,
15003 TRUE);
15004 vm_object_unlock(object);
15005 }
15006
15007 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
15008 /*
15009 * XXX
15010 * We do not hold the VM map exclusively here.
15011 * The "alias" field is not that critical, so it's
15012 * safe to update it here, as long as it is the only
15013 * one that can be modified while holding the VM map
15014 * "shared".
15015 */
15016 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
15017 }
15018 }
15019
15020 vm_map_unlock_read(map);
15021 vm_page_stats_reusable.reuse_pages_success++;
15022 return KERN_SUCCESS;
15023}
15024
15025
15026static kern_return_t
15027vm_map_reusable_pages(
15028 vm_map_t map,
15029 vm_map_offset_t start,
15030 vm_map_offset_t end)
15031{
15032 vm_map_entry_t entry;
15033 vm_object_t object;
15034 vm_object_offset_t start_offset, end_offset;
15035 vm_map_offset_t pmap_offset;
15036
15037 /*
15038 * The MADV_REUSABLE operation doesn't require any changes to the
15039 * vm_map_entry_t's, so the read lock is sufficient.
15040 */
15041
15042 vm_map_lock_read(map);
15043 assert(map->pmap != kernel_pmap); /* protect alias access */
15044
15045 /*
15046 * The madvise semantics require that the address range be fully
15047 * allocated with no holes. Otherwise, we're required to return
15048 * an error.
15049 */
15050
15051 if (!vm_map_range_check(map, start, end, &entry)) {
15052 vm_map_unlock_read(map);
15053 vm_page_stats_reusable.reusable_pages_failure++;
15054 return KERN_INVALID_ADDRESS;
15055 }
15056
15057 /*
15058 * Examine each vm_map_entry_t in the range.
15059 */
15060 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15061 entry = entry->vme_next) {
15062 int kill_pages = 0;
15063
15064 /*
15065 * Sanity check on the VM map entry.
15066 */
15067 if (! vm_map_entry_is_reusable(entry)) {
15068 vm_map_unlock_read(map);
15069 vm_page_stats_reusable.reusable_pages_failure++;
15070 return KERN_INVALID_ADDRESS;
15071 }
15072
15073 if (! (entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
15074 /* not writable: can't discard contents */
15075 vm_map_unlock_read(map);
15076 vm_page_stats_reusable.reusable_nonwritable++;
15077 vm_page_stats_reusable.reusable_pages_failure++;
15078 return KERN_PROTECTION_FAILURE;
15079 }
15080
15081 /*
15082 * The first time through, the start address could be anywhere
15083 * within the vm_map_entry we found. So adjust the offset to
15084 * correspond.
15085 */
15086 if (entry->vme_start < start) {
15087 start_offset = start - entry->vme_start;
15088 pmap_offset = start;
15089 } else {
15090 start_offset = 0;
15091 pmap_offset = entry->vme_start;
15092 }
15093 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15094 start_offset += VME_OFFSET(entry);
15095 end_offset += VME_OFFSET(entry);
15096
15097 assert(!entry->is_sub_map);
15098 object = VME_OBJECT(entry);
15099 if (object == VM_OBJECT_NULL)
15100 continue;
15101
15102
15103 vm_object_lock(object);
15104 if (((object->ref_count == 1) ||
15105 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
15106 object->copy == VM_OBJECT_NULL)) &&
15107 object->shadow == VM_OBJECT_NULL &&
15108 /*
15109 * "iokit_acct" entries are billed for their virtual size
15110 * (rather than for their resident pages only), so they
15111 * wouldn't benefit from making pages reusable, and it
15112 * would be hard to keep track of pages that are both
15113 * "iokit_acct" and "reusable" in the pmap stats and
15114 * ledgers.
15115 */
15116 !(entry->iokit_acct ||
15117 (!entry->is_sub_map && !entry->use_pmap))) {
15118 if (object->ref_count != 1) {
15119 vm_page_stats_reusable.reusable_shared++;
15120 }
15121 kill_pages = 1;
15122 } else {
15123 kill_pages = -1;
15124 }
15125 if (kill_pages != -1) {
15126 vm_object_deactivate_pages(object,
15127 start_offset,
15128 end_offset - start_offset,
15129 kill_pages,
15130 TRUE /*reusable_pages*/,
15131 map->pmap,
15132 pmap_offset);
15133 } else {
15134 vm_page_stats_reusable.reusable_pages_shared++;
15135 }
15136 vm_object_unlock(object);
15137
15138 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
15139 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
15140 /*
15141 * XXX
15142 * We do not hold the VM map exclusively here.
15143 * The "alias" field is not that critical, so it's
15144 * safe to update it here, as long as it is the only
15145 * one that can be modified while holding the VM map
15146 * "shared".
15147 */
15148 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
15149 }
15150 }
15151
15152 vm_map_unlock_read(map);
15153 vm_page_stats_reusable.reusable_pages_success++;
15154 return KERN_SUCCESS;
15155}
15156
15157
15158static kern_return_t
15159vm_map_can_reuse(
15160 vm_map_t map,
15161 vm_map_offset_t start,
15162 vm_map_offset_t end)
15163{
15164 vm_map_entry_t entry;
15165
15166 /*
15167 * The MADV_REUSABLE operation doesn't require any changes to the
15168 * vm_map_entry_t's, so the read lock is sufficient.
15169 */
15170
15171 vm_map_lock_read(map);
15172 assert(map->pmap != kernel_pmap); /* protect alias access */
15173
15174 /*
15175 * The madvise semantics require that the address range be fully
15176 * allocated with no holes. Otherwise, we're required to return
15177 * an error.
15178 */
15179
15180 if (!vm_map_range_check(map, start, end, &entry)) {
15181 vm_map_unlock_read(map);
15182 vm_page_stats_reusable.can_reuse_failure++;
15183 return KERN_INVALID_ADDRESS;
15184 }
15185
15186 /*
15187 * Examine each vm_map_entry_t in the range.
15188 */
15189 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15190 entry = entry->vme_next) {
15191 /*
15192 * Sanity check on the VM map entry.
15193 */
15194 if (! vm_map_entry_is_reusable(entry)) {
15195 vm_map_unlock_read(map);
15196 vm_page_stats_reusable.can_reuse_failure++;
15197 return KERN_INVALID_ADDRESS;
15198 }
15199 }
15200
15201 vm_map_unlock_read(map);
15202 vm_page_stats_reusable.can_reuse_success++;
15203 return KERN_SUCCESS;
15204}
15205
15206
15207#if MACH_ASSERT
15208static kern_return_t
15209vm_map_pageout(
15210 vm_map_t map,
15211 vm_map_offset_t start,
15212 vm_map_offset_t end)
15213{
15214 vm_map_entry_t entry;
15215
15216 /*
15217 * The MADV_PAGEOUT operation doesn't require any changes to the
15218 * vm_map_entry_t's, so the read lock is sufficient.
15219 */
15220
15221 vm_map_lock_read(map);
15222
15223 /*
15224 * The madvise semantics require that the address range be fully
15225 * allocated with no holes. Otherwise, we're required to return
15226 * an error.
15227 */
15228
15229 if (!vm_map_range_check(map, start, end, &entry)) {
15230 vm_map_unlock_read(map);
15231 return KERN_INVALID_ADDRESS;
15232 }
15233
15234 /*
15235 * Examine each vm_map_entry_t in the range.
15236 */
15237 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15238 entry = entry->vme_next) {
15239 vm_object_t object;
15240
15241 /*
15242 * Sanity check on the VM map entry.
15243 */
15244 if (entry->is_sub_map) {
15245 vm_map_t submap;
15246 vm_map_offset_t submap_start;
15247 vm_map_offset_t submap_end;
15248 vm_map_entry_t submap_entry;
15249
15250 submap = VME_SUBMAP(entry);
15251 submap_start = VME_OFFSET(entry);
15252 submap_end = submap_start + (entry->vme_end -
15253 entry->vme_start);
15254
15255 vm_map_lock_read(submap);
15256
15257 if (! vm_map_range_check(submap,
15258 submap_start,
15259 submap_end,
15260 &submap_entry)) {
15261 vm_map_unlock_read(submap);
15262 vm_map_unlock_read(map);
15263 return KERN_INVALID_ADDRESS;
15264 }
15265
15266 object = VME_OBJECT(submap_entry);
15267 if (submap_entry->is_sub_map ||
15268 object == VM_OBJECT_NULL ||
15269 !object->internal) {
15270 vm_map_unlock_read(submap);
15271 continue;
15272 }
15273
15274 vm_object_pageout(object);
15275
15276 vm_map_unlock_read(submap);
15277 submap = VM_MAP_NULL;
15278 submap_entry = VM_MAP_ENTRY_NULL;
15279 continue;
15280 }
15281
15282 object = VME_OBJECT(entry);
15283 if (entry->is_sub_map ||
15284 object == VM_OBJECT_NULL ||
15285 !object->internal) {
15286 continue;
15287 }
15288
15289 vm_object_pageout(object);
15290 }
15291
15292 vm_map_unlock_read(map);
15293 return KERN_SUCCESS;
15294}
15295#endif /* MACH_ASSERT */
15296
15297
15298/*
15299 * Routine: vm_map_entry_insert
15300 *
15301 * Description: This routine inserts a new vm_entry in a locked map.
15302 */
15303vm_map_entry_t
15304vm_map_entry_insert(
15305 vm_map_t map,
15306 vm_map_entry_t insp_entry,
15307 vm_map_offset_t start,
15308 vm_map_offset_t end,
15309 vm_object_t object,
15310 vm_object_offset_t offset,
15311 boolean_t needs_copy,
15312 boolean_t is_shared,
15313 boolean_t in_transition,
15314 vm_prot_t cur_protection,
15315 vm_prot_t max_protection,
15316 vm_behavior_t behavior,
15317 vm_inherit_t inheritance,
15318 unsigned wired_count,
15319 boolean_t no_cache,
15320 boolean_t permanent,
15321 unsigned int superpage_size,
15322 boolean_t clear_map_aligned,
15323 boolean_t is_submap,
15324 boolean_t used_for_jit,
15325 int alias)
15326{
15327 vm_map_entry_t new_entry;
15328
15329 assert(insp_entry != (vm_map_entry_t)0);
15330 vm_map_lock_assert_exclusive(map);
15331
15332#if DEVELOPMENT || DEBUG
15333 vm_object_offset_t end_offset = 0;
15334 assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
15335#endif /* DEVELOPMENT || DEBUG */
15336
15337 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
15338
15339 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
15340 new_entry->map_aligned = TRUE;
15341 } else {
15342 new_entry->map_aligned = FALSE;
15343 }
15344 if (clear_map_aligned &&
15345 (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
15346 ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
15347 new_entry->map_aligned = FALSE;
15348 }
15349
15350 new_entry->vme_start = start;
15351 new_entry->vme_end = end;
15352 assert(page_aligned(new_entry->vme_start));
15353 assert(page_aligned(new_entry->vme_end));
15354 if (new_entry->map_aligned) {
15355 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
15356 VM_MAP_PAGE_MASK(map)));
15357 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
15358 VM_MAP_PAGE_MASK(map)));
15359 }
15360 assert(new_entry->vme_start < new_entry->vme_end);
15361
15362 VME_OBJECT_SET(new_entry, object);
15363 VME_OFFSET_SET(new_entry, offset);
15364 new_entry->is_shared = is_shared;
15365 new_entry->is_sub_map = is_submap;
15366 new_entry->needs_copy = needs_copy;
15367 new_entry->in_transition = in_transition;
15368 new_entry->needs_wakeup = FALSE;
15369 new_entry->inheritance = inheritance;
15370 new_entry->protection = cur_protection;
15371 new_entry->max_protection = max_protection;
15372 new_entry->behavior = behavior;
15373 new_entry->wired_count = wired_count;
15374 new_entry->user_wired_count = 0;
15375 if (is_submap) {
15376 /*
15377 * submap: "use_pmap" means "nested".
15378 * default: false.
15379 */
15380 new_entry->use_pmap = FALSE;
15381 } else {
15382 /*
15383 * object: "use_pmap" means "use pmap accounting" for footprint.
15384 * default: true.
15385 */
15386 new_entry->use_pmap = TRUE;
15387 }
15388 VME_ALIAS_SET(new_entry, alias);
15389 new_entry->zero_wired_pages = FALSE;
15390 new_entry->no_cache = no_cache;
15391 new_entry->permanent = permanent;
15392 if (superpage_size)
15393 new_entry->superpage_size = TRUE;
15394 else
15395 new_entry->superpage_size = FALSE;
15396 if (used_for_jit){
15397#if CONFIG_EMBEDDED
15398 if (!(map->jit_entry_exists))
15399#endif /* CONFIG_EMBEDDED */
15400 {
15401 new_entry->used_for_jit = TRUE;
15402 map->jit_entry_exists = TRUE;
15403
15404 /* Tell the pmap that it supports JIT. */
15405 pmap_set_jit_entitled(map->pmap);
15406 }
15407 } else {
15408 new_entry->used_for_jit = FALSE;
15409 }
15410 new_entry->pmap_cs_associated = FALSE;
15411 new_entry->iokit_acct = FALSE;
15412 new_entry->vme_resilient_codesign = FALSE;
15413 new_entry->vme_resilient_media = FALSE;
15414 new_entry->vme_atomic = FALSE;
15415
15416 /*
15417 * Insert the new entry into the list.
15418 */
15419
15420 vm_map_store_entry_link(map, insp_entry, new_entry,
15421 VM_MAP_KERNEL_FLAGS_NONE);
15422 map->size += end - start;
15423
15424 /*
15425 * Update the free space hint and the lookup hint.
15426 */
15427
15428 SAVE_HINT_MAP_WRITE(map, new_entry);
15429 return new_entry;
15430}
15431
15432/*
15433 * Routine: vm_map_remap_extract
15434 *
15435 * Descritpion: This routine returns a vm_entry list from a map.
15436 */
15437static kern_return_t
15438vm_map_remap_extract(
15439 vm_map_t map,
15440 vm_map_offset_t addr,
15441 vm_map_size_t size,
15442 boolean_t copy,
15443 struct vm_map_header *map_header,
15444 vm_prot_t *cur_protection,
15445 vm_prot_t *max_protection,
15446 /* What, no behavior? */
15447 vm_inherit_t inheritance,
15448 boolean_t pageable,
15449 boolean_t same_map,
15450 vm_map_kernel_flags_t vmk_flags)
15451{
15452 kern_return_t result;
15453 vm_map_size_t mapped_size;
15454 vm_map_size_t tmp_size;
15455 vm_map_entry_t src_entry; /* result of last map lookup */
15456 vm_map_entry_t new_entry;
15457 vm_object_offset_t offset;
15458 vm_map_offset_t map_address;
15459 vm_map_offset_t src_start; /* start of entry to map */
15460 vm_map_offset_t src_end; /* end of region to be mapped */
15461 vm_object_t object;
15462 vm_map_version_t version;
15463 boolean_t src_needs_copy;
15464 boolean_t new_entry_needs_copy;
15465 vm_map_entry_t saved_src_entry;
15466 boolean_t src_entry_was_wired;
15467 vm_prot_t max_prot_for_prot_copy;
15468
15469 assert(map != VM_MAP_NULL);
15470 assert(size != 0);
15471 assert(size == vm_map_round_page(size, PAGE_MASK));
15472 assert(inheritance == VM_INHERIT_NONE ||
15473 inheritance == VM_INHERIT_COPY ||
15474 inheritance == VM_INHERIT_SHARE);
15475
15476 /*
15477 * Compute start and end of region.
15478 */
15479 src_start = vm_map_trunc_page(addr, PAGE_MASK);
15480 src_end = vm_map_round_page(src_start + size, PAGE_MASK);
15481
15482
15483 /*
15484 * Initialize map_header.
15485 */
15486 map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15487 map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15488 map_header->nentries = 0;
15489 map_header->entries_pageable = pageable;
15490 map_header->page_shift = PAGE_SHIFT;
15491
15492 vm_map_store_init( map_header );
15493
15494 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15495 max_prot_for_prot_copy = *max_protection & VM_PROT_ALL;
15496 } else {
15497 max_prot_for_prot_copy = VM_PROT_NONE;
15498 }
15499 *cur_protection = VM_PROT_ALL;
15500 *max_protection = VM_PROT_ALL;
15501
15502 map_address = 0;
15503 mapped_size = 0;
15504 result = KERN_SUCCESS;
15505
15506 /*
15507 * The specified source virtual space might correspond to
15508 * multiple map entries, need to loop on them.
15509 */
15510 vm_map_lock(map);
15511 while (mapped_size != size) {
15512 vm_map_size_t entry_size;
15513
15514 /*
15515 * Find the beginning of the region.
15516 */
15517 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
15518 result = KERN_INVALID_ADDRESS;
15519 break;
15520 }
15521
15522 if (src_start < src_entry->vme_start ||
15523 (mapped_size && src_start != src_entry->vme_start)) {
15524 result = KERN_INVALID_ADDRESS;
15525 break;
15526 }
15527
15528 tmp_size = size - mapped_size;
15529 if (src_end > src_entry->vme_end)
15530 tmp_size -= (src_end - src_entry->vme_end);
15531
15532 entry_size = (vm_map_size_t)(src_entry->vme_end -
15533 src_entry->vme_start);
15534
15535 if(src_entry->is_sub_map) {
15536 vm_map_reference(VME_SUBMAP(src_entry));
15537 object = VM_OBJECT_NULL;
15538 } else {
15539 object = VME_OBJECT(src_entry);
15540 if (src_entry->iokit_acct) {
15541 /*
15542 * This entry uses "IOKit accounting".
15543 */
15544 } else if (object != VM_OBJECT_NULL &&
15545 object->purgable != VM_PURGABLE_DENY) {
15546 /*
15547 * Purgeable objects have their own accounting:
15548 * no pmap accounting for them.
15549 */
15550 assertf(!src_entry->use_pmap,
15551 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15552 map,
15553 src_entry,
15554 (uint64_t)src_entry->vme_start,
15555 (uint64_t)src_entry->vme_end,
15556 src_entry->protection,
15557 src_entry->max_protection,
15558 VME_ALIAS(src_entry));
15559 } else {
15560 /*
15561 * Not IOKit or purgeable:
15562 * must be accounted by pmap stats.
15563 */
15564 assertf(src_entry->use_pmap,
15565 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15566 map,
15567 src_entry,
15568 (uint64_t)src_entry->vme_start,
15569 (uint64_t)src_entry->vme_end,
15570 src_entry->protection,
15571 src_entry->max_protection,
15572 VME_ALIAS(src_entry));
15573 }
15574
15575 if (object == VM_OBJECT_NULL) {
15576 object = vm_object_allocate(entry_size);
15577 VME_OFFSET_SET(src_entry, 0);
15578 VME_OBJECT_SET(src_entry, object);
15579 assert(src_entry->use_pmap);
15580 } else if (object->copy_strategy !=
15581 MEMORY_OBJECT_COPY_SYMMETRIC) {
15582 /*
15583 * We are already using an asymmetric
15584 * copy, and therefore we already have
15585 * the right object.
15586 */
15587 assert(!src_entry->needs_copy);
15588 } else if (src_entry->needs_copy || object->shadowed ||
15589 (object->internal && !object->true_share &&
15590 !src_entry->is_shared &&
15591 object->vo_size > entry_size)) {
15592
15593 VME_OBJECT_SHADOW(src_entry, entry_size);
15594 assert(src_entry->use_pmap);
15595
15596 if (!src_entry->needs_copy &&
15597 (src_entry->protection & VM_PROT_WRITE)) {
15598 vm_prot_t prot;
15599
15600 assert(!pmap_has_prot_policy(src_entry->protection));
15601
15602 prot = src_entry->protection & ~VM_PROT_WRITE;
15603
15604 if (override_nx(map,
15605 VME_ALIAS(src_entry))
15606 && prot)
15607 prot |= VM_PROT_EXECUTE;
15608
15609 assert(!pmap_has_prot_policy(prot));
15610
15611 if(map->mapped_in_other_pmaps) {
15612 vm_object_pmap_protect(
15613 VME_OBJECT(src_entry),
15614 VME_OFFSET(src_entry),
15615 entry_size,
15616 PMAP_NULL,
15617 src_entry->vme_start,
15618 prot);
15619 } else {
15620 pmap_protect(vm_map_pmap(map),
15621 src_entry->vme_start,
15622 src_entry->vme_end,
15623 prot);
15624 }
15625 }
15626
15627 object = VME_OBJECT(src_entry);
15628 src_entry->needs_copy = FALSE;
15629 }
15630
15631
15632 vm_object_lock(object);
15633 vm_object_reference_locked(object); /* object ref. for new entry */
15634 if (object->copy_strategy ==
15635 MEMORY_OBJECT_COPY_SYMMETRIC) {
15636 object->copy_strategy =
15637 MEMORY_OBJECT_COPY_DELAY;
15638 }
15639 vm_object_unlock(object);
15640 }
15641
15642 offset = (VME_OFFSET(src_entry) +
15643 (src_start - src_entry->vme_start));
15644
15645 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
15646 vm_map_entry_copy(new_entry, src_entry);
15647 if (new_entry->is_sub_map) {
15648 /* clr address space specifics */
15649 new_entry->use_pmap = FALSE;
15650 } else if (copy) {
15651 /*
15652 * We're dealing with a copy-on-write operation,
15653 * so the resulting mapping should not inherit the
15654 * original mapping's accounting settings.
15655 * "use_pmap" should be reset to its default (TRUE)
15656 * so that the new mapping gets accounted for in
15657 * the task's memory footprint.
15658 */
15659 new_entry->use_pmap = TRUE;
15660 }
15661 /* "iokit_acct" was cleared in vm_map_entry_copy() */
15662 assert(!new_entry->iokit_acct);
15663
15664 new_entry->map_aligned = FALSE;
15665
15666 new_entry->vme_start = map_address;
15667 new_entry->vme_end = map_address + tmp_size;
15668 assert(new_entry->vme_start < new_entry->vme_end);
15669 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15670 /*
15671 * Remapping for vm_map_protect(VM_PROT_COPY)
15672 * to convert a read-only mapping into a
15673 * copy-on-write version of itself but
15674 * with write access:
15675 * keep the original inheritance and add
15676 * VM_PROT_WRITE to the max protection.
15677 */
15678 new_entry->inheritance = src_entry->inheritance;
15679 new_entry->protection &= max_prot_for_prot_copy;
15680 new_entry->max_protection |= VM_PROT_WRITE;
15681 } else {
15682 new_entry->inheritance = inheritance;
15683 }
15684 VME_OFFSET_SET(new_entry, offset);
15685
15686 /*
15687 * The new region has to be copied now if required.
15688 */
15689 RestartCopy:
15690 if (!copy) {
15691 /*
15692 * Cannot allow an entry describing a JIT
15693 * region to be shared across address spaces.
15694 */
15695 if (src_entry->used_for_jit == TRUE && !same_map) {
15696#if CONFIG_EMBEDDED
15697 result = KERN_INVALID_ARGUMENT;
15698 break;
15699#endif /* CONFIG_EMBEDDED */
15700 }
15701 src_entry->is_shared = TRUE;
15702 new_entry->is_shared = TRUE;
15703 if (!(new_entry->is_sub_map))
15704 new_entry->needs_copy = FALSE;
15705
15706 } else if (src_entry->is_sub_map) {
15707 /* make this a COW sub_map if not already */
15708 assert(new_entry->wired_count == 0);
15709 new_entry->needs_copy = TRUE;
15710 object = VM_OBJECT_NULL;
15711 } else if (src_entry->wired_count == 0 &&
15712 vm_object_copy_quickly(&VME_OBJECT(new_entry),
15713 VME_OFFSET(new_entry),
15714 (new_entry->vme_end -
15715 new_entry->vme_start),
15716 &src_needs_copy,
15717 &new_entry_needs_copy)) {
15718
15719 new_entry->needs_copy = new_entry_needs_copy;
15720 new_entry->is_shared = FALSE;
15721 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
15722
15723 /*
15724 * Handle copy_on_write semantics.
15725 */
15726 if (src_needs_copy && !src_entry->needs_copy) {
15727 vm_prot_t prot;
15728
15729 assert(!pmap_has_prot_policy(src_entry->protection));
15730
15731 prot = src_entry->protection & ~VM_PROT_WRITE;
15732
15733 if (override_nx(map,
15734 VME_ALIAS(src_entry))
15735 && prot)
15736 prot |= VM_PROT_EXECUTE;
15737
15738 assert(!pmap_has_prot_policy(prot));
15739
15740 vm_object_pmap_protect(object,
15741 offset,
15742 entry_size,
15743 ((src_entry->is_shared
15744 || map->mapped_in_other_pmaps) ?
15745 PMAP_NULL : map->pmap),
15746 src_entry->vme_start,
15747 prot);
15748
15749 assert(src_entry->wired_count == 0);
15750 src_entry->needs_copy = TRUE;
15751 }
15752 /*
15753 * Throw away the old object reference of the new entry.
15754 */
15755 vm_object_deallocate(object);
15756
15757 } else {
15758 new_entry->is_shared = FALSE;
15759 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
15760
15761 src_entry_was_wired = (src_entry->wired_count > 0);
15762 saved_src_entry = src_entry;
15763 src_entry = VM_MAP_ENTRY_NULL;
15764
15765 /*
15766 * The map can be safely unlocked since we
15767 * already hold a reference on the object.
15768 *
15769 * Record the timestamp of the map for later
15770 * verification, and unlock the map.
15771 */
15772 version.main_timestamp = map->timestamp;
15773 vm_map_unlock(map); /* Increments timestamp once! */
15774
15775 /*
15776 * Perform the copy.
15777 */
15778 if (src_entry_was_wired > 0) {
15779 vm_object_lock(object);
15780 result = vm_object_copy_slowly(
15781 object,
15782 offset,
15783 (new_entry->vme_end -
15784 new_entry->vme_start),
15785 THREAD_UNINT,
15786 &VME_OBJECT(new_entry));
15787
15788 VME_OFFSET_SET(new_entry, 0);
15789 new_entry->needs_copy = FALSE;
15790 } else {
15791 vm_object_offset_t new_offset;
15792
15793 new_offset = VME_OFFSET(new_entry);
15794 result = vm_object_copy_strategically(
15795 object,
15796 offset,
15797 (new_entry->vme_end -
15798 new_entry->vme_start),
15799 &VME_OBJECT(new_entry),
15800 &new_offset,
15801 &new_entry_needs_copy);
15802 if (new_offset != VME_OFFSET(new_entry)) {
15803 VME_OFFSET_SET(new_entry, new_offset);
15804 }
15805
15806 new_entry->needs_copy = new_entry_needs_copy;
15807 }
15808
15809 /*
15810 * Throw away the old object reference of the new entry.
15811 */
15812 vm_object_deallocate(object);
15813
15814 if (result != KERN_SUCCESS &&
15815 result != KERN_MEMORY_RESTART_COPY) {
15816 _vm_map_entry_dispose(map_header, new_entry);
15817 vm_map_lock(map);
15818 break;
15819 }
15820
15821 /*
15822 * Verify that the map has not substantially
15823 * changed while the copy was being made.
15824 */
15825
15826 vm_map_lock(map);
15827 if (version.main_timestamp + 1 != map->timestamp) {
15828 /*
15829 * Simple version comparison failed.
15830 *
15831 * Retry the lookup and verify that the
15832 * same object/offset are still present.
15833 */
15834 saved_src_entry = VM_MAP_ENTRY_NULL;
15835 vm_object_deallocate(VME_OBJECT(new_entry));
15836 _vm_map_entry_dispose(map_header, new_entry);
15837 if (result == KERN_MEMORY_RESTART_COPY)
15838 result = KERN_SUCCESS;
15839 continue;
15840 }
15841 /* map hasn't changed: src_entry is still valid */
15842 src_entry = saved_src_entry;
15843 saved_src_entry = VM_MAP_ENTRY_NULL;
15844
15845 if (result == KERN_MEMORY_RESTART_COPY) {
15846 vm_object_reference(object);
15847 goto RestartCopy;
15848 }
15849 }
15850
15851 _vm_map_store_entry_link(map_header,
15852 map_header->links.prev, new_entry);
15853
15854 /*Protections for submap mapping are irrelevant here*/
15855 if( !src_entry->is_sub_map ) {
15856 *cur_protection &= src_entry->protection;
15857 *max_protection &= src_entry->max_protection;
15858 }
15859 map_address += tmp_size;
15860 mapped_size += tmp_size;
15861 src_start += tmp_size;
15862
15863 } /* end while */
15864
15865 vm_map_unlock(map);
15866 if (result != KERN_SUCCESS) {
15867 /*
15868 * Free all allocated elements.
15869 */
15870 for (src_entry = map_header->links.next;
15871 src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
15872 src_entry = new_entry) {
15873 new_entry = src_entry->vme_next;
15874 _vm_map_store_entry_unlink(map_header, src_entry);
15875 if (src_entry->is_sub_map) {
15876 vm_map_deallocate(VME_SUBMAP(src_entry));
15877 } else {
15878 vm_object_deallocate(VME_OBJECT(src_entry));
15879 }
15880 _vm_map_entry_dispose(map_header, src_entry);
15881 }
15882 }
15883 return result;
15884}
15885
15886/*
15887 * Routine: vm_remap
15888 *
15889 * Map portion of a task's address space.
15890 * Mapped region must not overlap more than
15891 * one vm memory object. Protections and
15892 * inheritance attributes remain the same
15893 * as in the original task and are out parameters.
15894 * Source and Target task can be identical
15895 * Other attributes are identical as for vm_map()
15896 */
15897kern_return_t
15898vm_map_remap(
15899 vm_map_t target_map,
15900 vm_map_address_t *address,
15901 vm_map_size_t size,
15902 vm_map_offset_t mask,
15903 int flags,
15904 vm_map_kernel_flags_t vmk_flags,
15905 vm_tag_t tag,
15906 vm_map_t src_map,
15907 vm_map_offset_t memory_address,
15908 boolean_t copy,
15909 vm_prot_t *cur_protection,
15910 vm_prot_t *max_protection,
15911 vm_inherit_t inheritance)
15912{
15913 kern_return_t result;
15914 vm_map_entry_t entry;
15915 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
15916 vm_map_entry_t new_entry;
15917 struct vm_map_header map_header;
15918 vm_map_offset_t offset_in_mapping;
15919
15920 if (target_map == VM_MAP_NULL)
15921 return KERN_INVALID_ARGUMENT;
15922
15923 switch (inheritance) {
15924 case VM_INHERIT_NONE:
15925 case VM_INHERIT_COPY:
15926 case VM_INHERIT_SHARE:
15927 if (size != 0 && src_map != VM_MAP_NULL)
15928 break;
15929 /*FALL THRU*/
15930 default:
15931 return KERN_INVALID_ARGUMENT;
15932 }
15933
15934 /*
15935 * If the user is requesting that we return the address of the
15936 * first byte of the data (rather than the base of the page),
15937 * then we use different rounding semantics: specifically,
15938 * we assume that (memory_address, size) describes a region
15939 * all of whose pages we must cover, rather than a base to be truncated
15940 * down and a size to be added to that base. So we figure out
15941 * the highest page that the requested region includes and make
15942 * sure that the size will cover it.
15943 *
15944 * The key example we're worried about it is of the form:
15945 *
15946 * memory_address = 0x1ff0, size = 0x20
15947 *
15948 * With the old semantics, we round down the memory_address to 0x1000
15949 * and round up the size to 0x1000, resulting in our covering *only*
15950 * page 0x1000. With the new semantics, we'd realize that the region covers
15951 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
15952 * 0x1000 and page 0x2000 in the region we remap.
15953 */
15954 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
15955 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
15956 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
15957 } else {
15958 size = vm_map_round_page(size, PAGE_MASK);
15959 }
15960 if (size == 0) {
15961 return KERN_INVALID_ARGUMENT;
15962 }
15963
15964 result = vm_map_remap_extract(src_map, memory_address,
15965 size, copy, &map_header,
15966 cur_protection,
15967 max_protection,
15968 inheritance,
15969 target_map->hdr.entries_pageable,
15970 src_map == target_map,
15971 vmk_flags);
15972
15973 if (result != KERN_SUCCESS) {
15974 return result;
15975 }
15976
15977 /*
15978 * Allocate/check a range of free virtual address
15979 * space for the target
15980 */
15981 *address = vm_map_trunc_page(*address,
15982 VM_MAP_PAGE_MASK(target_map));
15983 vm_map_lock(target_map);
15984 result = vm_map_remap_range_allocate(target_map, address, size,
15985 mask, flags, vmk_flags, tag,
15986 &insp_entry);
15987
15988 for (entry = map_header.links.next;
15989 entry != CAST_TO_VM_MAP_ENTRY(&map_header.links);
15990 entry = new_entry) {
15991 new_entry = entry->vme_next;
15992 _vm_map_store_entry_unlink(&map_header, entry);
15993 if (result == KERN_SUCCESS) {
15994 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
15995 /* no codesigning -> read-only access */
15996 entry->max_protection = VM_PROT_READ;
15997 entry->protection = VM_PROT_READ;
15998 entry->vme_resilient_codesign = TRUE;
15999 }
16000 entry->vme_start += *address;
16001 entry->vme_end += *address;
16002 assert(!entry->map_aligned);
16003 vm_map_store_entry_link(target_map, insp_entry, entry,
16004 vmk_flags);
16005 insp_entry = entry;
16006 } else {
16007 if (!entry->is_sub_map) {
16008 vm_object_deallocate(VME_OBJECT(entry));
16009 } else {
16010 vm_map_deallocate(VME_SUBMAP(entry));
16011 }
16012 _vm_map_entry_dispose(&map_header, entry);
16013 }
16014 }
16015
16016 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16017 *cur_protection = VM_PROT_READ;
16018 *max_protection = VM_PROT_READ;
16019 }
16020
16021 if( target_map->disable_vmentry_reuse == TRUE) {
16022 assert(!target_map->is_nested_map);
16023 if( target_map->highest_entry_end < insp_entry->vme_end ){
16024 target_map->highest_entry_end = insp_entry->vme_end;
16025 }
16026 }
16027
16028 if (result == KERN_SUCCESS) {
16029 target_map->size += size;
16030 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
16031
16032#if PMAP_CS
16033 if (*max_protection & VM_PROT_EXECUTE) {
16034 vm_map_address_t region_start = 0, region_size = 0;
16035 struct pmap_cs_code_directory *region_cd = NULL;
16036 vm_map_address_t base = 0;
16037 struct pmap_cs_lookup_results results = {};
16038 vm_map_size_t page_addr = vm_map_trunc_page(memory_address, PAGE_MASK);
16039 vm_map_size_t assoc_size = vm_map_round_page(memory_address + size - page_addr, PAGE_MASK);
16040
16041 pmap_cs_lookup(src_map->pmap, memory_address, &results);
16042 region_size = results.region_size;
16043 region_start = results.region_start;
16044 region_cd = results.region_cd_entry;
16045 base = results.base;
16046
16047 if (region_cd != NULL && (page_addr != region_start || assoc_size != region_size)) {
16048 *cur_protection = VM_PROT_READ;
16049 *max_protection = VM_PROT_READ;
16050 printf("mismatched remap of executable range 0x%llx-0x%llx to 0x%llx, "
16051 "region_start 0x%llx, region_size 0x%llx, cd_entry %sNULL, making non-executable.\n",
16052 page_addr, page_addr+assoc_size, *address,
16053 region_start, region_size,
16054 region_cd != NULL ? "not " : "" // Don't leak kernel slide
16055 );
16056 }
16057 }
16058#endif
16059
16060 }
16061 vm_map_unlock(target_map);
16062
16063 if (result == KERN_SUCCESS && target_map->wiring_required)
16064 result = vm_map_wire_kernel(target_map, *address,
16065 *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
16066 TRUE);
16067
16068 /*
16069 * If requested, return the address of the data pointed to by the
16070 * request, rather than the base of the resulting page.
16071 */
16072 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16073 *address += offset_in_mapping;
16074 }
16075
16076 return result;
16077}
16078
16079/*
16080 * Routine: vm_map_remap_range_allocate
16081 *
16082 * Description:
16083 * Allocate a range in the specified virtual address map.
16084 * returns the address and the map entry just before the allocated
16085 * range
16086 *
16087 * Map must be locked.
16088 */
16089
16090static kern_return_t
16091vm_map_remap_range_allocate(
16092 vm_map_t map,
16093 vm_map_address_t *address, /* IN/OUT */
16094 vm_map_size_t size,
16095 vm_map_offset_t mask,
16096 int flags,
16097 vm_map_kernel_flags_t vmk_flags,
16098 __unused vm_tag_t tag,
16099 vm_map_entry_t *map_entry) /* OUT */
16100{
16101 vm_map_entry_t entry;
16102 vm_map_offset_t start;
16103 vm_map_offset_t end;
16104 vm_map_offset_t desired_empty_end;
16105 kern_return_t kr;
16106 vm_map_entry_t hole_entry;
16107
16108StartAgain: ;
16109
16110 start = *address;
16111
16112 if (flags & VM_FLAGS_ANYWHERE)
16113 {
16114 if (flags & VM_FLAGS_RANDOM_ADDR)
16115 {
16116 /*
16117 * Get a random start address.
16118 */
16119 kr = vm_map_random_address_for_size(map, address, size);
16120 if (kr != KERN_SUCCESS) {
16121 return(kr);
16122 }
16123 start = *address;
16124 }
16125
16126 /*
16127 * Calculate the first possible address.
16128 */
16129
16130 if (start < map->min_offset)
16131 start = map->min_offset;
16132 if (start > map->max_offset)
16133 return(KERN_NO_SPACE);
16134
16135 /*
16136 * Look for the first possible address;
16137 * if there's already something at this
16138 * address, we have to start after it.
16139 */
16140
16141 if( map->disable_vmentry_reuse == TRUE) {
16142 VM_MAP_HIGHEST_ENTRY(map, entry, start);
16143 } else {
16144
16145 if (map->holelistenabled) {
16146 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
16147
16148 if (hole_entry == NULL) {
16149 /*
16150 * No more space in the map?
16151 */
16152 return(KERN_NO_SPACE);
16153 } else {
16154
16155 boolean_t found_hole = FALSE;
16156
16157 do {
16158 if (hole_entry->vme_start >= start) {
16159 start = hole_entry->vme_start;
16160 found_hole = TRUE;
16161 break;
16162 }
16163
16164 if (hole_entry->vme_end > start) {
16165 found_hole = TRUE;
16166 break;
16167 }
16168 hole_entry = hole_entry->vme_next;
16169
16170 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
16171
16172 if (found_hole == FALSE) {
16173 return (KERN_NO_SPACE);
16174 }
16175
16176 entry = hole_entry;
16177 }
16178 } else {
16179 assert(first_free_is_valid(map));
16180 if (start == map->min_offset) {
16181 if ((entry = map->first_free) != vm_map_to_entry(map))
16182 start = entry->vme_end;
16183 } else {
16184 vm_map_entry_t tmp_entry;
16185 if (vm_map_lookup_entry(map, start, &tmp_entry))
16186 start = tmp_entry->vme_end;
16187 entry = tmp_entry;
16188 }
16189 }
16190 start = vm_map_round_page(start,
16191 VM_MAP_PAGE_MASK(map));
16192 }
16193
16194 /*
16195 * In any case, the "entry" always precedes
16196 * the proposed new region throughout the
16197 * loop:
16198 */
16199
16200 while (TRUE) {
16201 vm_map_entry_t next;
16202
16203 /*
16204 * Find the end of the proposed new region.
16205 * Be sure we didn't go beyond the end, or
16206 * wrap around the address.
16207 */
16208
16209 end = ((start + mask) & ~mask);
16210 end = vm_map_round_page(end,
16211 VM_MAP_PAGE_MASK(map));
16212 if (end < start)
16213 return(KERN_NO_SPACE);
16214 start = end;
16215 end += size;
16216
16217 /* We want an entire page of empty space, but don't increase the allocation size. */
16218 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
16219
16220 if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
16221 if (map->wait_for_space) {
16222 if (size <= (map->max_offset -
16223 map->min_offset)) {
16224 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
16225 vm_map_unlock(map);
16226 thread_block(THREAD_CONTINUE_NULL);
16227 vm_map_lock(map);
16228 goto StartAgain;
16229 }
16230 }
16231
16232 return(KERN_NO_SPACE);
16233 }
16234
16235 next = entry->vme_next;
16236
16237 if (map->holelistenabled) {
16238 if (entry->vme_end >= desired_empty_end)
16239 break;
16240 } else {
16241 /*
16242 * If there are no more entries, we must win.
16243 *
16244 * OR
16245 *
16246 * If there is another entry, it must be
16247 * after the end of the potential new region.
16248 */
16249
16250 if (next == vm_map_to_entry(map))
16251 break;
16252
16253 if (next->vme_start >= desired_empty_end)
16254 break;
16255 }
16256
16257 /*
16258 * Didn't fit -- move to the next entry.
16259 */
16260
16261 entry = next;
16262
16263 if (map->holelistenabled) {
16264 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
16265 /*
16266 * Wrapped around
16267 */
16268 return(KERN_NO_SPACE);
16269 }
16270 start = entry->vme_start;
16271 } else {
16272 start = entry->vme_end;
16273 }
16274 }
16275
16276 if (map->holelistenabled) {
16277
16278 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
16279 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
16280 }
16281 }
16282
16283 *address = start;
16284
16285 } else {
16286 vm_map_entry_t temp_entry;
16287
16288 /*
16289 * Verify that:
16290 * the address doesn't itself violate
16291 * the mask requirement.
16292 */
16293
16294 if ((start & mask) != 0)
16295 return(KERN_NO_SPACE);
16296
16297
16298 /*
16299 * ... the address is within bounds
16300 */
16301
16302 end = start + size;
16303
16304 if ((start < map->min_offset) ||
16305 (end > map->max_offset) ||
16306 (start >= end)) {
16307 return(KERN_INVALID_ADDRESS);
16308 }
16309
16310 /*
16311 * If we're asked to overwrite whatever was mapped in that
16312 * range, first deallocate that range.
16313 */
16314 if (flags & VM_FLAGS_OVERWRITE) {
16315 vm_map_t zap_map;
16316 int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
16317
16318 /*
16319 * We use a "zap_map" to avoid having to unlock
16320 * the "map" in vm_map_delete(), which would compromise
16321 * the atomicity of the "deallocate" and then "remap"
16322 * combination.
16323 */
16324 zap_map = vm_map_create(PMAP_NULL,
16325 start,
16326 end,
16327 map->hdr.entries_pageable);
16328 if (zap_map == VM_MAP_NULL) {
16329 return KERN_RESOURCE_SHORTAGE;
16330 }
16331 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
16332 vm_map_disable_hole_optimization(zap_map);
16333
16334 if (vmk_flags.vmkf_overwrite_immutable) {
16335 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
16336 }
16337 kr = vm_map_delete(map, start, end,
16338 remove_flags,
16339 zap_map);
16340 if (kr == KERN_SUCCESS) {
16341 vm_map_destroy(zap_map,
16342 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
16343 zap_map = VM_MAP_NULL;
16344 }
16345 }
16346
16347 /*
16348 * ... the starting address isn't allocated
16349 */
16350
16351 if (vm_map_lookup_entry(map, start, &temp_entry))
16352 return(KERN_NO_SPACE);
16353
16354 entry = temp_entry;
16355
16356 /*
16357 * ... the next region doesn't overlap the
16358 * end point.
16359 */
16360
16361 if ((entry->vme_next != vm_map_to_entry(map)) &&
16362 (entry->vme_next->vme_start < end))
16363 return(KERN_NO_SPACE);
16364 }
16365 *map_entry = entry;
16366 return(KERN_SUCCESS);
16367}
16368
16369/*
16370 * vm_map_switch:
16371 *
16372 * Set the address map for the current thread to the specified map
16373 */
16374
16375vm_map_t
16376vm_map_switch(
16377 vm_map_t map)
16378{
16379 int mycpu;
16380 thread_t thread = current_thread();
16381 vm_map_t oldmap = thread->map;
16382
16383 mp_disable_preemption();
16384 mycpu = cpu_number();
16385
16386 /*
16387 * Deactivate the current map and activate the requested map
16388 */
16389 PMAP_SWITCH_USER(thread, map, mycpu);
16390
16391 mp_enable_preemption();
16392 return(oldmap);
16393}
16394
16395
16396/*
16397 * Routine: vm_map_write_user
16398 *
16399 * Description:
16400 * Copy out data from a kernel space into space in the
16401 * destination map. The space must already exist in the
16402 * destination map.
16403 * NOTE: This routine should only be called by threads
16404 * which can block on a page fault. i.e. kernel mode user
16405 * threads.
16406 *
16407 */
16408kern_return_t
16409vm_map_write_user(
16410 vm_map_t map,
16411 void *src_p,
16412 vm_map_address_t dst_addr,
16413 vm_size_t size)
16414{
16415 kern_return_t kr = KERN_SUCCESS;
16416
16417 if(current_map() == map) {
16418 if (copyout(src_p, dst_addr, size)) {
16419 kr = KERN_INVALID_ADDRESS;
16420 }
16421 } else {
16422 vm_map_t oldmap;
16423
16424 /* take on the identity of the target map while doing */
16425 /* the transfer */
16426
16427 vm_map_reference(map);
16428 oldmap = vm_map_switch(map);
16429 if (copyout(src_p, dst_addr, size)) {
16430 kr = KERN_INVALID_ADDRESS;
16431 }
16432 vm_map_switch(oldmap);
16433 vm_map_deallocate(map);
16434 }
16435 return kr;
16436}
16437
16438/*
16439 * Routine: vm_map_read_user
16440 *
16441 * Description:
16442 * Copy in data from a user space source map into the
16443 * kernel map. The space must already exist in the
16444 * kernel map.
16445 * NOTE: This routine should only be called by threads
16446 * which can block on a page fault. i.e. kernel mode user
16447 * threads.
16448 *
16449 */
16450kern_return_t
16451vm_map_read_user(
16452 vm_map_t map,
16453 vm_map_address_t src_addr,
16454 void *dst_p,
16455 vm_size_t size)
16456{
16457 kern_return_t kr = KERN_SUCCESS;
16458
16459 if(current_map() == map) {
16460 if (copyin(src_addr, dst_p, size)) {
16461 kr = KERN_INVALID_ADDRESS;
16462 }
16463 } else {
16464 vm_map_t oldmap;
16465
16466 /* take on the identity of the target map while doing */
16467 /* the transfer */
16468
16469 vm_map_reference(map);
16470 oldmap = vm_map_switch(map);
16471 if (copyin(src_addr, dst_p, size)) {
16472 kr = KERN_INVALID_ADDRESS;
16473 }
16474 vm_map_switch(oldmap);
16475 vm_map_deallocate(map);
16476 }
16477 return kr;
16478}
16479
16480
16481/*
16482 * vm_map_check_protection:
16483 *
16484 * Assert that the target map allows the specified
16485 * privilege on the entire address region given.
16486 * The entire region must be allocated.
16487 */
16488boolean_t
16489vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
16490 vm_map_offset_t end, vm_prot_t protection)
16491{
16492 vm_map_entry_t entry;
16493 vm_map_entry_t tmp_entry;
16494
16495 vm_map_lock(map);
16496
16497 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
16498 {
16499 vm_map_unlock(map);
16500 return (FALSE);
16501 }
16502
16503 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
16504 vm_map_unlock(map);
16505 return(FALSE);
16506 }
16507
16508 entry = tmp_entry;
16509
16510 while (start < end) {
16511 if (entry == vm_map_to_entry(map)) {
16512 vm_map_unlock(map);
16513 return(FALSE);
16514 }
16515
16516 /*
16517 * No holes allowed!
16518 */
16519
16520 if (start < entry->vme_start) {
16521 vm_map_unlock(map);
16522 return(FALSE);
16523 }
16524
16525 /*
16526 * Check protection associated with entry.
16527 */
16528
16529 if ((entry->protection & protection) != protection) {
16530 vm_map_unlock(map);
16531 return(FALSE);
16532 }
16533
16534 /* go to next entry */
16535
16536 start = entry->vme_end;
16537 entry = entry->vme_next;
16538 }
16539 vm_map_unlock(map);
16540 return(TRUE);
16541}
16542
16543kern_return_t
16544vm_map_purgable_control(
16545 vm_map_t map,
16546 vm_map_offset_t address,
16547 vm_purgable_t control,
16548 int *state)
16549{
16550 vm_map_entry_t entry;
16551 vm_object_t object;
16552 kern_return_t kr;
16553 boolean_t was_nonvolatile;
16554
16555 /*
16556 * Vet all the input parameters and current type and state of the
16557 * underlaying object. Return with an error if anything is amiss.
16558 */
16559 if (map == VM_MAP_NULL)
16560 return(KERN_INVALID_ARGUMENT);
16561
16562 if (control != VM_PURGABLE_SET_STATE &&
16563 control != VM_PURGABLE_GET_STATE &&
16564 control != VM_PURGABLE_PURGE_ALL &&
16565 control != VM_PURGABLE_SET_STATE_FROM_KERNEL)
16566 return(KERN_INVALID_ARGUMENT);
16567
16568 if (control == VM_PURGABLE_PURGE_ALL) {
16569 vm_purgeable_object_purge_all();
16570 return KERN_SUCCESS;
16571 }
16572
16573 if ((control == VM_PURGABLE_SET_STATE ||
16574 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
16575 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
16576 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
16577 return(KERN_INVALID_ARGUMENT);
16578
16579 vm_map_lock_read(map);
16580
16581 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
16582
16583 /*
16584 * Must pass a valid non-submap address.
16585 */
16586 vm_map_unlock_read(map);
16587 return(KERN_INVALID_ADDRESS);
16588 }
16589
16590 if ((entry->protection & VM_PROT_WRITE) == 0) {
16591 /*
16592 * Can't apply purgable controls to something you can't write.
16593 */
16594 vm_map_unlock_read(map);
16595 return(KERN_PROTECTION_FAILURE);
16596 }
16597
16598 object = VME_OBJECT(entry);
16599 if (object == VM_OBJECT_NULL ||
16600 object->purgable == VM_PURGABLE_DENY) {
16601 /*
16602 * Object must already be present and be purgeable.
16603 */
16604 vm_map_unlock_read(map);
16605 return KERN_INVALID_ARGUMENT;
16606 }
16607
16608 vm_object_lock(object);
16609
16610#if 00
16611 if (VME_OFFSET(entry) != 0 ||
16612 entry->vme_end - entry->vme_start != object->vo_size) {
16613 /*
16614 * Can only apply purgable controls to the whole (existing)
16615 * object at once.
16616 */
16617 vm_map_unlock_read(map);
16618 vm_object_unlock(object);
16619 return KERN_INVALID_ARGUMENT;
16620 }
16621#endif
16622
16623 assert(!entry->is_sub_map);
16624 assert(!entry->use_pmap); /* purgeable has its own accounting */
16625
16626 vm_map_unlock_read(map);
16627
16628 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
16629
16630 kr = vm_object_purgable_control(object, control, state);
16631
16632 if (was_nonvolatile &&
16633 object->purgable != VM_PURGABLE_NONVOLATILE &&
16634 map->pmap == kernel_pmap) {
16635#if DEBUG
16636 object->vo_purgeable_volatilizer = kernel_task;
16637#endif /* DEBUG */
16638 }
16639
16640 vm_object_unlock(object);
16641
16642 return kr;
16643}
16644
16645kern_return_t
16646vm_map_page_query_internal(
16647 vm_map_t target_map,
16648 vm_map_offset_t offset,
16649 int *disposition,
16650 int *ref_count)
16651{
16652 kern_return_t kr;
16653 vm_page_info_basic_data_t info;
16654 mach_msg_type_number_t count;
16655
16656 count = VM_PAGE_INFO_BASIC_COUNT;
16657 kr = vm_map_page_info(target_map,
16658 offset,
16659 VM_PAGE_INFO_BASIC,
16660 (vm_page_info_t) &info,
16661 &count);
16662 if (kr == KERN_SUCCESS) {
16663 *disposition = info.disposition;
16664 *ref_count = info.ref_count;
16665 } else {
16666 *disposition = 0;
16667 *ref_count = 0;
16668 }
16669
16670 return kr;
16671}
16672
16673kern_return_t
16674vm_map_page_info(
16675 vm_map_t map,
16676 vm_map_offset_t offset,
16677 vm_page_info_flavor_t flavor,
16678 vm_page_info_t info,
16679 mach_msg_type_number_t *count)
16680{
16681 return (vm_map_page_range_info_internal(map,
16682 offset, /* start of range */
16683 (offset + 1), /* this will get rounded in the call to the page boundary */
16684 flavor,
16685 info,
16686 count));
16687}
16688
16689kern_return_t
16690vm_map_page_range_info_internal(
16691 vm_map_t map,
16692 vm_map_offset_t start_offset,
16693 vm_map_offset_t end_offset,
16694 vm_page_info_flavor_t flavor,
16695 vm_page_info_t info,
16696 mach_msg_type_number_t *count)
16697{
16698 vm_map_entry_t map_entry = VM_MAP_ENTRY_NULL;
16699 vm_object_t object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
16700 vm_page_t m = VM_PAGE_NULL;
16701 kern_return_t retval = KERN_SUCCESS;
16702 int disposition = 0;
16703 int ref_count = 0;
16704 int depth = 0, info_idx = 0;
16705 vm_page_info_basic_t basic_info = 0;
16706 vm_map_offset_t offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
16707 vm_map_offset_t start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
16708 boolean_t do_region_footprint;
16709
16710 switch (flavor) {
16711 case VM_PAGE_INFO_BASIC:
16712 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
16713 /*
16714 * The "vm_page_info_basic_data" structure was not
16715 * properly padded, so allow the size to be off by
16716 * one to maintain backwards binary compatibility...
16717 */
16718 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
16719 return KERN_INVALID_ARGUMENT;
16720 }
16721 break;
16722 default:
16723 return KERN_INVALID_ARGUMENT;
16724 }
16725
16726 do_region_footprint = task_self_region_footprint();
16727 disposition = 0;
16728 ref_count = 0;
16729 depth = 0;
16730 info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
16731 retval = KERN_SUCCESS;
16732
16733 offset_in_page = start_offset & PAGE_MASK;
16734 start = vm_map_trunc_page(start_offset, PAGE_MASK);
16735 end = vm_map_round_page(end_offset, PAGE_MASK);
16736
16737 assert ((end - start) <= MAX_PAGE_RANGE_QUERY);
16738
16739 vm_map_lock_read(map);
16740
16741 for (curr_s_offset = start; curr_s_offset < end;) {
16742 /*
16743 * New lookup needs reset of these variables.
16744 */
16745 curr_object = object = VM_OBJECT_NULL;
16746 offset_in_object = 0;
16747 ref_count = 0;
16748 depth = 0;
16749
16750 if (do_region_footprint &&
16751 curr_s_offset >= vm_map_last_entry(map)->vme_end) {
16752 ledger_amount_t nonvol_compressed;
16753
16754 /*
16755 * Request for "footprint" info about a page beyond
16756 * the end of address space: this must be for
16757 * the fake region vm_map_region_recurse_64()
16758 * reported to account for non-volatile purgeable
16759 * memory owned by this task.
16760 */
16761 disposition = 0;
16762 nonvol_compressed = 0;
16763 ledger_get_balance(
16764 map->pmap->ledger,
16765 task_ledgers.purgeable_nonvolatile_compressed,
16766 &nonvol_compressed);
16767 if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
16768 (unsigned) nonvol_compressed) {
16769 /*
16770 * We haven't reported all the "non-volatile
16771 * compressed" pages yet, so report this fake
16772 * page as "compressed".
16773 */
16774 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
16775 } else {
16776 /*
16777 * We've reported all the non-volatile
16778 * compressed page but not all the non-volatile
16779 * pages , so report this fake page as
16780 * "resident dirty".
16781 */
16782 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
16783 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
16784 disposition |= VM_PAGE_QUERY_PAGE_REF;
16785 }
16786 switch (flavor) {
16787 case VM_PAGE_INFO_BASIC:
16788 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16789 basic_info->disposition = disposition;
16790 basic_info->ref_count = 1;
16791 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
16792 basic_info->offset = 0;
16793 basic_info->depth = 0;
16794
16795 info_idx++;
16796 break;
16797 }
16798 curr_s_offset += PAGE_SIZE;
16799 continue;
16800 }
16801
16802 /*
16803 * First, find the map entry covering "curr_s_offset", going down
16804 * submaps if necessary.
16805 */
16806 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
16807 /* no entry -> no object -> no page */
16808
16809 if (curr_s_offset < vm_map_min(map)) {
16810 /*
16811 * Illegal address that falls below map min.
16812 */
16813 curr_e_offset = MIN(end, vm_map_min(map));
16814
16815 } else if (curr_s_offset >= vm_map_max(map)) {
16816 /*
16817 * Illegal address that falls on/after map max.
16818 */
16819 curr_e_offset = end;
16820
16821 } else if (map_entry == vm_map_to_entry(map)) {
16822 /*
16823 * Hit a hole.
16824 */
16825 if (map_entry->vme_next == vm_map_to_entry(map)) {
16826 /*
16827 * Empty map.
16828 */
16829 curr_e_offset = MIN(map->max_offset, end);
16830 } else {
16831 /*
16832 * Hole at start of the map.
16833 */
16834 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
16835 }
16836 } else {
16837 if (map_entry->vme_next == vm_map_to_entry(map)) {
16838 /*
16839 * Hole at the end of the map.
16840 */
16841 curr_e_offset = MIN(map->max_offset, end);
16842 } else {
16843 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
16844 }
16845 }
16846
16847 assert(curr_e_offset >= curr_s_offset);
16848
16849 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
16850
16851 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16852
16853 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
16854
16855 curr_s_offset = curr_e_offset;
16856
16857 info_idx += num_pages;
16858
16859 continue;
16860 }
16861
16862 /* compute offset from this map entry's start */
16863 offset_in_object = curr_s_offset - map_entry->vme_start;
16864
16865 /* compute offset into this map entry's object (or submap) */
16866 offset_in_object += VME_OFFSET(map_entry);
16867
16868 if (map_entry->is_sub_map) {
16869 vm_map_t sub_map = VM_MAP_NULL;
16870 vm_page_info_t submap_info = 0;
16871 vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
16872
16873 range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
16874
16875 submap_s_offset = offset_in_object;
16876 submap_e_offset = submap_s_offset + range_len;
16877
16878 sub_map = VME_SUBMAP(map_entry);
16879
16880 vm_map_reference(sub_map);
16881 vm_map_unlock_read(map);
16882
16883 submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16884
16885 retval = vm_map_page_range_info_internal(sub_map,
16886 submap_s_offset,
16887 submap_e_offset,
16888 VM_PAGE_INFO_BASIC,
16889 (vm_page_info_t) submap_info,
16890 count);
16891
16892 assert(retval == KERN_SUCCESS);
16893
16894 vm_map_lock_read(map);
16895 vm_map_deallocate(sub_map);
16896
16897 /* Move the "info" index by the number of pages we inspected.*/
16898 info_idx += range_len >> PAGE_SHIFT;
16899
16900 /* Move our current offset by the size of the range we inspected.*/
16901 curr_s_offset += range_len;
16902
16903 continue;
16904 }
16905
16906 object = VME_OBJECT(map_entry);
16907 if (object == VM_OBJECT_NULL) {
16908
16909 /*
16910 * We don't have an object here and, hence,
16911 * no pages to inspect. We'll fill up the
16912 * info structure appropriately.
16913 */
16914
16915 curr_e_offset = MIN(map_entry->vme_end, end);
16916
16917 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
16918
16919 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16920
16921 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
16922
16923 curr_s_offset = curr_e_offset;
16924
16925 info_idx += num_pages;
16926
16927 continue;
16928 }
16929
16930 if (do_region_footprint) {
16931 int pmap_disp;
16932
16933 disposition = 0;
16934 pmap_disp = 0;
16935 if (map->has_corpse_footprint) {
16936 /*
16937 * Query the page info data we saved
16938 * while forking the corpse.
16939 */
16940 vm_map_corpse_footprint_query_page_info(
16941 map,
16942 curr_s_offset,
16943 &pmap_disp);
16944 } else {
16945 /*
16946 * Query the pmap.
16947 */
16948 pmap_query_page_info(map->pmap,
16949 curr_s_offset,
16950 &pmap_disp);
16951 }
16952 if (object->purgable == VM_PURGABLE_NONVOLATILE &&
16953 /* && not tagged as no-footprint? */
16954 VM_OBJECT_OWNER(object) != NULL &&
16955 VM_OBJECT_OWNER(object)->map == map) {
16956 if ((((curr_s_offset
16957 - map_entry->vme_start
16958 + VME_OFFSET(map_entry))
16959 / PAGE_SIZE) <
16960 (object->resident_page_count +
16961 vm_compressor_pager_get_count(object->pager)))) {
16962 /*
16963 * Non-volatile purgeable object owned
16964 * by this task: report the first
16965 * "#resident + #compressed" pages as
16966 * "resident" (to show that they
16967 * contribute to the footprint) but not
16968 * "dirty" (to avoid double-counting
16969 * with the fake "non-volatile" region
16970 * we'll report at the end of the
16971 * address space to account for all
16972 * (mapped or not) non-volatile memory
16973 * owned by this task.
16974 */
16975 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
16976 }
16977 } else if ((object->purgable == VM_PURGABLE_VOLATILE ||
16978 object->purgable == VM_PURGABLE_EMPTY) &&
16979 /* && not tagged as no-footprint? */
16980 VM_OBJECT_OWNER(object) != NULL &&
16981 VM_OBJECT_OWNER(object)->map == map) {
16982 if ((((curr_s_offset
16983 - map_entry->vme_start
16984 + VME_OFFSET(map_entry))
16985 / PAGE_SIZE) <
16986 object->wired_page_count)) {
16987 /*
16988 * Volatile|empty purgeable object owned
16989 * by this task: report the first
16990 * "#wired" pages as "resident" (to
16991 * show that they contribute to the
16992 * footprint) but not "dirty" (to avoid
16993 * double-counting with the fake
16994 * "non-volatile" region we'll report
16995 * at the end of the address space to
16996 * account for all (mapped or not)
16997 * non-volatile memory owned by this
16998 * task.
16999 */
17000 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17001 }
17002 } else if (map_entry->iokit_acct &&
17003 object->internal &&
17004 object->purgable == VM_PURGABLE_DENY) {
17005 /*
17006 * Non-purgeable IOKit memory: phys_footprint
17007 * includes the entire virtual mapping.
17008 */
17009 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17010 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17011 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17012 } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
17013 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
17014 /* alternate accounting */
17015#if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
17016 if (map->pmap->footprint_was_suspended ||
17017 /*
17018 * XXX corpse does not know if original
17019 * pmap had its footprint suspended...
17020 */
17021 map->has_corpse_footprint) {
17022 /*
17023 * The assertion below can fail if dyld
17024 * suspended footprint accounting
17025 * while doing some adjustments to
17026 * this page; the mapping would say
17027 * "use pmap accounting" but the page
17028 * would be marked "alternate
17029 * accounting".
17030 */
17031 } else
17032#endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
17033 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17034 pmap_disp = 0;
17035 } else {
17036 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
17037 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17038 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17039 disposition |= VM_PAGE_QUERY_PAGE_REF;
17040 if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
17041 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17042 } else {
17043 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17044 }
17045 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
17046 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17047 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17048 }
17049 }
17050 switch (flavor) {
17051 case VM_PAGE_INFO_BASIC:
17052 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17053 basic_info->disposition = disposition;
17054 basic_info->ref_count = 1;
17055 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
17056 basic_info->offset = 0;
17057 basic_info->depth = 0;
17058
17059 info_idx++;
17060 break;
17061 }
17062 curr_s_offset += PAGE_SIZE;
17063 continue;
17064 }
17065
17066 vm_object_reference(object);
17067 /*
17068 * Shared mode -- so we can allow other readers
17069 * to grab the lock too.
17070 */
17071 vm_object_lock_shared(object);
17072
17073 curr_e_offset = MIN(map_entry->vme_end, end);
17074
17075 vm_map_unlock_read(map);
17076
17077 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
17078
17079 curr_object = object;
17080
17081 for (; curr_s_offset < curr_e_offset;) {
17082
17083 if (object == curr_object) {
17084 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
17085 } else {
17086 ref_count = curr_object->ref_count;
17087 }
17088
17089 curr_offset_in_object = offset_in_object;
17090
17091 for (;;) {
17092 m = vm_page_lookup(curr_object, curr_offset_in_object);
17093
17094 if (m != VM_PAGE_NULL) {
17095
17096 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17097 break;
17098
17099 } else {
17100 if (curr_object->internal &&
17101 curr_object->alive &&
17102 !curr_object->terminating &&
17103 curr_object->pager_ready) {
17104
17105 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, curr_offset_in_object)
17106 == VM_EXTERNAL_STATE_EXISTS) {
17107 /* the pager has that page */
17108 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17109 break;
17110 }
17111 }
17112
17113 /*
17114 * Go down the VM object shadow chain until we find the page
17115 * we're looking for.
17116 */
17117
17118 if (curr_object->shadow != VM_OBJECT_NULL) {
17119 vm_object_t shadow = VM_OBJECT_NULL;
17120
17121 curr_offset_in_object += curr_object->vo_shadow_offset;
17122 shadow = curr_object->shadow;
17123
17124 vm_object_lock_shared(shadow);
17125 vm_object_unlock(curr_object);
17126
17127 curr_object = shadow;
17128 depth++;
17129 continue;
17130 } else {
17131
17132 break;
17133 }
17134 }
17135 }
17136
17137 /* The ref_count is not strictly accurate, it measures the number */
17138 /* of entities holding a ref on the object, they may not be mapping */
17139 /* the object or may not be mapping the section holding the */
17140 /* target page but its still a ball park number and though an over- */
17141 /* count, it picks up the copy-on-write cases */
17142
17143 /* We could also get a picture of page sharing from pmap_attributes */
17144 /* but this would under count as only faulted-in mappings would */
17145 /* show up. */
17146
17147 if ((curr_object == object) && curr_object->shadow)
17148 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
17149
17150 if (! curr_object->internal)
17151 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17152
17153 if (m != VM_PAGE_NULL) {
17154
17155 if (m->vmp_fictitious) {
17156
17157 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
17158
17159 } else {
17160 if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))
17161 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17162
17163 if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m)))
17164 disposition |= VM_PAGE_QUERY_PAGE_REF;
17165
17166 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q)
17167 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
17168
17169 if (m->vmp_cs_validated)
17170 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
17171 if (m->vmp_cs_tainted)
17172 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
17173 if (m->vmp_cs_nx)
17174 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
17175 }
17176 }
17177
17178 switch (flavor) {
17179 case VM_PAGE_INFO_BASIC:
17180 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17181 basic_info->disposition = disposition;
17182 basic_info->ref_count = ref_count;
17183 basic_info->object_id = (vm_object_id_t) (uintptr_t)
17184 VM_KERNEL_ADDRPERM(curr_object);
17185 basic_info->offset =
17186 (memory_object_offset_t) curr_offset_in_object + offset_in_page;
17187 basic_info->depth = depth;
17188
17189 info_idx++;
17190 break;
17191 }
17192
17193 disposition = 0;
17194 offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
17195
17196 /*
17197 * Move to next offset in the range and in our object.
17198 */
17199 curr_s_offset += PAGE_SIZE;
17200 offset_in_object += PAGE_SIZE;
17201 curr_offset_in_object = offset_in_object;
17202
17203 if (curr_object != object) {
17204
17205 vm_object_unlock(curr_object);
17206
17207 curr_object = object;
17208
17209 vm_object_lock_shared(curr_object);
17210 } else {
17211
17212 vm_object_lock_yield_shared(curr_object);
17213 }
17214 }
17215
17216 vm_object_unlock(curr_object);
17217 vm_object_deallocate(curr_object);
17218
17219 vm_map_lock_read(map);
17220 }
17221
17222 vm_map_unlock_read(map);
17223 return retval;
17224}
17225
17226/*
17227 * vm_map_msync
17228 *
17229 * Synchronises the memory range specified with its backing store
17230 * image by either flushing or cleaning the contents to the appropriate
17231 * memory manager engaging in a memory object synchronize dialog with
17232 * the manager. The client doesn't return until the manager issues
17233 * m_o_s_completed message. MIG Magically converts user task parameter
17234 * to the task's address map.
17235 *
17236 * interpretation of sync_flags
17237 * VM_SYNC_INVALIDATE - discard pages, only return precious
17238 * pages to manager.
17239 *
17240 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
17241 * - discard pages, write dirty or precious
17242 * pages back to memory manager.
17243 *
17244 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
17245 * - write dirty or precious pages back to
17246 * the memory manager.
17247 *
17248 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
17249 * is a hole in the region, and we would
17250 * have returned KERN_SUCCESS, return
17251 * KERN_INVALID_ADDRESS instead.
17252 *
17253 * NOTE
17254 * The memory object attributes have not yet been implemented, this
17255 * function will have to deal with the invalidate attribute
17256 *
17257 * RETURNS
17258 * KERN_INVALID_TASK Bad task parameter
17259 * KERN_INVALID_ARGUMENT both sync and async were specified.
17260 * KERN_SUCCESS The usual.
17261 * KERN_INVALID_ADDRESS There was a hole in the region.
17262 */
17263
17264kern_return_t
17265vm_map_msync(
17266 vm_map_t map,
17267 vm_map_address_t address,
17268 vm_map_size_t size,
17269 vm_sync_t sync_flags)
17270{
17271 vm_map_entry_t entry;
17272 vm_map_size_t amount_left;
17273 vm_object_offset_t offset;
17274 boolean_t do_sync_req;
17275 boolean_t had_hole = FALSE;
17276 vm_map_offset_t pmap_offset;
17277
17278 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
17279 (sync_flags & VM_SYNC_SYNCHRONOUS))
17280 return(KERN_INVALID_ARGUMENT);
17281
17282 /*
17283 * align address and size on page boundaries
17284 */
17285 size = (vm_map_round_page(address + size,
17286 VM_MAP_PAGE_MASK(map)) -
17287 vm_map_trunc_page(address,
17288 VM_MAP_PAGE_MASK(map)));
17289 address = vm_map_trunc_page(address,
17290 VM_MAP_PAGE_MASK(map));
17291
17292 if (map == VM_MAP_NULL)
17293 return(KERN_INVALID_TASK);
17294
17295 if (size == 0)
17296 return(KERN_SUCCESS);
17297
17298 amount_left = size;
17299
17300 while (amount_left > 0) {
17301 vm_object_size_t flush_size;
17302 vm_object_t object;
17303
17304 vm_map_lock(map);
17305 if (!vm_map_lookup_entry(map,
17306 address,
17307 &entry)) {
17308
17309 vm_map_size_t skip;
17310
17311 /*
17312 * hole in the address map.
17313 */
17314 had_hole = TRUE;
17315
17316 if (sync_flags & VM_SYNC_KILLPAGES) {
17317 /*
17318 * For VM_SYNC_KILLPAGES, there should be
17319 * no holes in the range, since we couldn't
17320 * prevent someone else from allocating in
17321 * that hole and we wouldn't want to "kill"
17322 * their pages.
17323 */
17324 vm_map_unlock(map);
17325 break;
17326 }
17327
17328 /*
17329 * Check for empty map.
17330 */
17331 if (entry == vm_map_to_entry(map) &&
17332 entry->vme_next == entry) {
17333 vm_map_unlock(map);
17334 break;
17335 }
17336 /*
17337 * Check that we don't wrap and that
17338 * we have at least one real map entry.
17339 */
17340 if ((map->hdr.nentries == 0) ||
17341 (entry->vme_next->vme_start < address)) {
17342 vm_map_unlock(map);
17343 break;
17344 }
17345 /*
17346 * Move up to the next entry if needed
17347 */
17348 skip = (entry->vme_next->vme_start - address);
17349 if (skip >= amount_left)
17350 amount_left = 0;
17351 else
17352 amount_left -= skip;
17353 address = entry->vme_next->vme_start;
17354 vm_map_unlock(map);
17355 continue;
17356 }
17357
17358 offset = address - entry->vme_start;
17359 pmap_offset = address;
17360
17361 /*
17362 * do we have more to flush than is contained in this
17363 * entry ?
17364 */
17365 if (amount_left + entry->vme_start + offset > entry->vme_end) {
17366 flush_size = entry->vme_end -
17367 (entry->vme_start + offset);
17368 } else {
17369 flush_size = amount_left;
17370 }
17371 amount_left -= flush_size;
17372 address += flush_size;
17373
17374 if (entry->is_sub_map == TRUE) {
17375 vm_map_t local_map;
17376 vm_map_offset_t local_offset;
17377
17378 local_map = VME_SUBMAP(entry);
17379 local_offset = VME_OFFSET(entry);
17380 vm_map_unlock(map);
17381 if (vm_map_msync(
17382 local_map,
17383 local_offset,
17384 flush_size,
17385 sync_flags) == KERN_INVALID_ADDRESS) {
17386 had_hole = TRUE;
17387 }
17388 continue;
17389 }
17390 object = VME_OBJECT(entry);
17391
17392 /*
17393 * We can't sync this object if the object has not been
17394 * created yet
17395 */
17396 if (object == VM_OBJECT_NULL) {
17397 vm_map_unlock(map);
17398 continue;
17399 }
17400 offset += VME_OFFSET(entry);
17401
17402 vm_object_lock(object);
17403
17404 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
17405 int kill_pages = 0;
17406 boolean_t reusable_pages = FALSE;
17407
17408 if (sync_flags & VM_SYNC_KILLPAGES) {
17409 if (((object->ref_count == 1) ||
17410 ((object->copy_strategy !=
17411 MEMORY_OBJECT_COPY_SYMMETRIC) &&
17412 (object->copy == VM_OBJECT_NULL))) &&
17413 (object->shadow == VM_OBJECT_NULL)) {
17414 if (object->ref_count != 1) {
17415 vm_page_stats_reusable.free_shared++;
17416 }
17417 kill_pages = 1;
17418 } else {
17419 kill_pages = -1;
17420 }
17421 }
17422 if (kill_pages != -1)
17423 vm_object_deactivate_pages(
17424 object,
17425 offset,
17426 (vm_object_size_t) flush_size,
17427 kill_pages,
17428 reusable_pages,
17429 map->pmap,
17430 pmap_offset);
17431 vm_object_unlock(object);
17432 vm_map_unlock(map);
17433 continue;
17434 }
17435 /*
17436 * We can't sync this object if there isn't a pager.
17437 * Don't bother to sync internal objects, since there can't
17438 * be any "permanent" storage for these objects anyway.
17439 */
17440 if ((object->pager == MEMORY_OBJECT_NULL) ||
17441 (object->internal) || (object->private)) {
17442 vm_object_unlock(object);
17443 vm_map_unlock(map);
17444 continue;
17445 }
17446 /*
17447 * keep reference on the object until syncing is done
17448 */
17449 vm_object_reference_locked(object);
17450 vm_object_unlock(object);
17451
17452 vm_map_unlock(map);
17453
17454 do_sync_req = vm_object_sync(object,
17455 offset,
17456 flush_size,
17457 sync_flags & VM_SYNC_INVALIDATE,
17458 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
17459 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
17460 sync_flags & VM_SYNC_SYNCHRONOUS);
17461
17462 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
17463 /*
17464 * clear out the clustering and read-ahead hints
17465 */
17466 vm_object_lock(object);
17467
17468 object->pages_created = 0;
17469 object->pages_used = 0;
17470 object->sequential = 0;
17471 object->last_alloc = 0;
17472
17473 vm_object_unlock(object);
17474 }
17475 vm_object_deallocate(object);
17476 } /* while */
17477
17478 /* for proper msync() behaviour */
17479 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
17480 return(KERN_INVALID_ADDRESS);
17481
17482 return(KERN_SUCCESS);
17483}/* vm_msync */
17484
17485/*
17486 * Routine: convert_port_entry_to_map
17487 * Purpose:
17488 * Convert from a port specifying an entry or a task
17489 * to a map. Doesn't consume the port ref; produces a map ref,
17490 * which may be null. Unlike convert_port_to_map, the
17491 * port may be task or a named entry backed.
17492 * Conditions:
17493 * Nothing locked.
17494 */
17495
17496
17497vm_map_t
17498convert_port_entry_to_map(
17499 ipc_port_t port)
17500{
17501 vm_map_t map;
17502 vm_named_entry_t named_entry;
17503 uint32_t try_failed_count = 0;
17504
17505 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17506 while(TRUE) {
17507 ip_lock(port);
17508 if(ip_active(port) && (ip_kotype(port)
17509 == IKOT_NAMED_ENTRY)) {
17510 named_entry =
17511 (vm_named_entry_t)port->ip_kobject;
17512 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
17513 ip_unlock(port);
17514
17515 try_failed_count++;
17516 mutex_pause(try_failed_count);
17517 continue;
17518 }
17519 named_entry->ref_count++;
17520 lck_mtx_unlock(&(named_entry)->Lock);
17521 ip_unlock(port);
17522 if ((named_entry->is_sub_map) &&
17523 (named_entry->protection
17524 & VM_PROT_WRITE)) {
17525 map = named_entry->backing.map;
17526 } else {
17527 mach_destroy_memory_entry(port);
17528 return VM_MAP_NULL;
17529 }
17530 vm_map_reference_swap(map);
17531 mach_destroy_memory_entry(port);
17532 break;
17533 }
17534 else
17535 return VM_MAP_NULL;
17536 }
17537 }
17538 else
17539 map = convert_port_to_map(port);
17540
17541 return map;
17542}
17543
17544/*
17545 * Routine: convert_port_entry_to_object
17546 * Purpose:
17547 * Convert from a port specifying a named entry to an
17548 * object. Doesn't consume the port ref; produces a map ref,
17549 * which may be null.
17550 * Conditions:
17551 * Nothing locked.
17552 */
17553
17554
17555vm_object_t
17556convert_port_entry_to_object(
17557 ipc_port_t port)
17558{
17559 vm_object_t object = VM_OBJECT_NULL;
17560 vm_named_entry_t named_entry;
17561 uint32_t try_failed_count = 0;
17562
17563 if (IP_VALID(port) &&
17564 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17565 try_again:
17566 ip_lock(port);
17567 if (ip_active(port) &&
17568 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17569 named_entry = (vm_named_entry_t)port->ip_kobject;
17570 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
17571 ip_unlock(port);
17572 try_failed_count++;
17573 mutex_pause(try_failed_count);
17574 goto try_again;
17575 }
17576 named_entry->ref_count++;
17577 lck_mtx_unlock(&(named_entry)->Lock);
17578 ip_unlock(port);
17579 if (!(named_entry->is_sub_map) &&
17580 !(named_entry->is_copy) &&
17581 (named_entry->protection & VM_PROT_WRITE)) {
17582 object = named_entry->backing.object;
17583 vm_object_reference(object);
17584 }
17585 mach_destroy_memory_entry(port);
17586 }
17587 }
17588
17589 return object;
17590}
17591
17592/*
17593 * Export routines to other components for the things we access locally through
17594 * macros.
17595 */
17596#undef current_map
17597vm_map_t
17598current_map(void)
17599{
17600 return (current_map_fast());
17601}
17602
17603/*
17604 * vm_map_reference:
17605 *
17606 * Most code internal to the osfmk will go through a
17607 * macro defining this. This is always here for the
17608 * use of other kernel components.
17609 */
17610#undef vm_map_reference
17611void
17612vm_map_reference(
17613 vm_map_t map)
17614{
17615 if (map == VM_MAP_NULL)
17616 return;
17617
17618 lck_mtx_lock(&map->s_lock);
17619#if TASK_SWAPPER
17620 assert(map->res_count > 0);
17621 assert(map->map_refcnt >= map->res_count);
17622 map->res_count++;
17623#endif
17624 map->map_refcnt++;
17625 lck_mtx_unlock(&map->s_lock);
17626}
17627
17628/*
17629 * vm_map_deallocate:
17630 *
17631 * Removes a reference from the specified map,
17632 * destroying it if no references remain.
17633 * The map should not be locked.
17634 */
17635void
17636vm_map_deallocate(
17637 vm_map_t map)
17638{
17639 unsigned int ref;
17640
17641 if (map == VM_MAP_NULL)
17642 return;
17643
17644 lck_mtx_lock(&map->s_lock);
17645 ref = --map->map_refcnt;
17646 if (ref > 0) {
17647 vm_map_res_deallocate(map);
17648 lck_mtx_unlock(&map->s_lock);
17649 return;
17650 }
17651 assert(map->map_refcnt == 0);
17652 lck_mtx_unlock(&map->s_lock);
17653
17654#if TASK_SWAPPER
17655 /*
17656 * The map residence count isn't decremented here because
17657 * the vm_map_delete below will traverse the entire map,
17658 * deleting entries, and the residence counts on objects
17659 * and sharing maps will go away then.
17660 */
17661#endif
17662
17663 vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
17664}
17665
17666
17667void
17668vm_map_disable_NX(vm_map_t map)
17669{
17670 if (map == NULL)
17671 return;
17672 if (map->pmap == NULL)
17673 return;
17674
17675 pmap_disable_NX(map->pmap);
17676}
17677
17678void
17679vm_map_disallow_data_exec(vm_map_t map)
17680{
17681 if (map == NULL)
17682 return;
17683
17684 map->map_disallow_data_exec = TRUE;
17685}
17686
17687/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
17688 * more descriptive.
17689 */
17690void
17691vm_map_set_32bit(vm_map_t map)
17692{
17693#if defined(__arm__) || defined(__arm64__)
17694 map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
17695#else
17696 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
17697#endif
17698}
17699
17700
17701void
17702vm_map_set_64bit(vm_map_t map)
17703{
17704#if defined(__arm__) || defined(__arm64__)
17705 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
17706#else
17707 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
17708#endif
17709}
17710
17711/*
17712 * Expand the maximum size of an existing map to the maximum supported.
17713 */
17714void
17715vm_map_set_jumbo(vm_map_t map)
17716{
17717#if defined (__arm64__)
17718 vm_map_set_max_addr(map, ~0);
17719#else /* arm64 */
17720 (void) map;
17721#endif
17722}
17723
17724/*
17725 * Expand the maximum size of an existing map.
17726 */
17727void
17728vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
17729{
17730#if defined(__arm64__)
17731 vm_map_offset_t max_supported_offset = 0;
17732 vm_map_offset_t old_max_offset = map->max_offset;
17733 max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
17734
17735 new_max_offset = trunc_page(new_max_offset);
17736
17737 /* The address space cannot be shrunk using this routine. */
17738 if (old_max_offset >= new_max_offset) {
17739 return;
17740 }
17741
17742 if (max_supported_offset < new_max_offset) {
17743 new_max_offset = max_supported_offset;
17744 }
17745
17746 map->max_offset = new_max_offset;
17747
17748 if (map->holes_list->prev->vme_end == old_max_offset) {
17749 /*
17750 * There is already a hole at the end of the map; simply make it bigger.
17751 */
17752 map->holes_list->prev->vme_end = map->max_offset;
17753 } else {
17754 /*
17755 * There is no hole at the end, so we need to create a new hole
17756 * for the new empty space we're creating.
17757 */
17758 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
17759 new_hole->start = old_max_offset;
17760 new_hole->end = map->max_offset;
17761 new_hole->prev = map->holes_list->prev;
17762 new_hole->next = (struct vm_map_entry *)map->holes_list;
17763 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
17764 map->holes_list->prev = (struct vm_map_entry *)new_hole;
17765 }
17766#else
17767 (void)map;
17768 (void)new_max_offset;
17769#endif
17770}
17771
17772vm_map_offset_t
17773vm_compute_max_offset(boolean_t is64)
17774{
17775#if defined(__arm__) || defined(__arm64__)
17776 return (pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE));
17777#else
17778 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
17779#endif
17780}
17781
17782void
17783vm_map_get_max_aslr_slide_section(
17784 vm_map_t map __unused,
17785 int64_t *max_sections,
17786 int64_t *section_size)
17787{
17788#if defined(__arm64__)
17789 *max_sections = 3;
17790 *section_size = ARM_TT_TWIG_SIZE;
17791#else
17792 *max_sections = 1;
17793 *section_size = 0;
17794#endif
17795}
17796
17797uint64_t
17798vm_map_get_max_aslr_slide_pages(vm_map_t map)
17799{
17800#if defined(__arm64__)
17801 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
17802 * limited embedded address space; this is also meant to minimize pmap
17803 * memory usage on 16KB page systems.
17804 */
17805 return (1 << (24 - VM_MAP_PAGE_SHIFT(map)));
17806#else
17807 return (1 << (vm_map_is_64bit(map) ? 16 : 8));
17808#endif
17809}
17810
17811uint64_t
17812vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
17813{
17814#if defined(__arm64__)
17815 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
17816 * of independent entropy on 16KB page systems.
17817 */
17818 return (1 << (22 - VM_MAP_PAGE_SHIFT(map)));
17819#else
17820 return (1 << (vm_map_is_64bit(map) ? 16 : 8));
17821#endif
17822}
17823
17824#ifndef __arm__
17825boolean_t
17826vm_map_is_64bit(
17827 vm_map_t map)
17828{
17829 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
17830}
17831#endif
17832
17833boolean_t
17834vm_map_has_hard_pagezero(
17835 vm_map_t map,
17836 vm_map_offset_t pagezero_size)
17837{
17838 /*
17839 * XXX FBDP
17840 * We should lock the VM map (for read) here but we can get away
17841 * with it for now because there can't really be any race condition:
17842 * the VM map's min_offset is changed only when the VM map is created
17843 * and when the zero page is established (when the binary gets loaded),
17844 * and this routine gets called only when the task terminates and the
17845 * VM map is being torn down, and when a new map is created via
17846 * load_machfile()/execve().
17847 */
17848 return (map->min_offset >= pagezero_size);
17849}
17850
17851/*
17852 * Raise a VM map's maximun offset.
17853 */
17854kern_return_t
17855vm_map_raise_max_offset(
17856 vm_map_t map,
17857 vm_map_offset_t new_max_offset)
17858{
17859 kern_return_t ret;
17860
17861 vm_map_lock(map);
17862 ret = KERN_INVALID_ADDRESS;
17863
17864 if (new_max_offset >= map->max_offset) {
17865 if (!vm_map_is_64bit(map)) {
17866 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
17867 map->max_offset = new_max_offset;
17868 ret = KERN_SUCCESS;
17869 }
17870 } else {
17871 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
17872 map->max_offset = new_max_offset;
17873 ret = KERN_SUCCESS;
17874 }
17875 }
17876 }
17877
17878 vm_map_unlock(map);
17879 return ret;
17880}
17881
17882
17883/*
17884 * Raise a VM map's minimum offset.
17885 * To strictly enforce "page zero" reservation.
17886 */
17887kern_return_t
17888vm_map_raise_min_offset(
17889 vm_map_t map,
17890 vm_map_offset_t new_min_offset)
17891{
17892 vm_map_entry_t first_entry;
17893
17894 new_min_offset = vm_map_round_page(new_min_offset,
17895 VM_MAP_PAGE_MASK(map));
17896
17897 vm_map_lock(map);
17898
17899 if (new_min_offset < map->min_offset) {
17900 /*
17901 * Can't move min_offset backwards, as that would expose
17902 * a part of the address space that was previously, and for
17903 * possibly good reasons, inaccessible.
17904 */
17905 vm_map_unlock(map);
17906 return KERN_INVALID_ADDRESS;
17907 }
17908 if (new_min_offset >= map->max_offset) {
17909 /* can't go beyond the end of the address space */
17910 vm_map_unlock(map);
17911 return KERN_INVALID_ADDRESS;
17912 }
17913
17914 first_entry = vm_map_first_entry(map);
17915 if (first_entry != vm_map_to_entry(map) &&
17916 first_entry->vme_start < new_min_offset) {
17917 /*
17918 * Some memory was already allocated below the new
17919 * minimun offset. It's too late to change it now...
17920 */
17921 vm_map_unlock(map);
17922 return KERN_NO_SPACE;
17923 }
17924
17925 map->min_offset = new_min_offset;
17926
17927 assert(map->holes_list);
17928 map->holes_list->start = new_min_offset;
17929 assert(new_min_offset < map->holes_list->end);
17930
17931 vm_map_unlock(map);
17932
17933 return KERN_SUCCESS;
17934}
17935
17936/*
17937 * Set the limit on the maximum amount of user wired memory allowed for this map.
17938 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
17939 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
17940 * don't have to reach over to the BSD data structures.
17941 */
17942
17943void
17944vm_map_set_user_wire_limit(vm_map_t map,
17945 vm_size_t limit)
17946{
17947 map->user_wire_limit = limit;
17948}
17949
17950
17951void vm_map_switch_protect(vm_map_t map,
17952 boolean_t val)
17953{
17954 vm_map_lock(map);
17955 map->switch_protect=val;
17956 vm_map_unlock(map);
17957}
17958
17959/*
17960 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
17961 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
17962 * bump both counters.
17963 */
17964void
17965vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
17966{
17967 pmap_t pmap = vm_map_pmap(map);
17968
17969 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
17970 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
17971}
17972
17973void
17974vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
17975{
17976 pmap_t pmap = vm_map_pmap(map);
17977
17978 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
17979 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
17980}
17981
17982/* Add (generate) code signature for memory range */
17983#if CONFIG_DYNAMIC_CODE_SIGNING
17984kern_return_t vm_map_sign(vm_map_t map,
17985 vm_map_offset_t start,
17986 vm_map_offset_t end)
17987{
17988 vm_map_entry_t entry;
17989 vm_page_t m;
17990 vm_object_t object;
17991
17992 /*
17993 * Vet all the input parameters and current type and state of the
17994 * underlaying object. Return with an error if anything is amiss.
17995 */
17996 if (map == VM_MAP_NULL)
17997 return(KERN_INVALID_ARGUMENT);
17998
17999 vm_map_lock_read(map);
18000
18001 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
18002 /*
18003 * Must pass a valid non-submap address.
18004 */
18005 vm_map_unlock_read(map);
18006 return(KERN_INVALID_ADDRESS);
18007 }
18008
18009 if((entry->vme_start > start) || (entry->vme_end < end)) {
18010 /*
18011 * Map entry doesn't cover the requested range. Not handling
18012 * this situation currently.
18013 */
18014 vm_map_unlock_read(map);
18015 return(KERN_INVALID_ARGUMENT);
18016 }
18017
18018 object = VME_OBJECT(entry);
18019 if (object == VM_OBJECT_NULL) {
18020 /*
18021 * Object must already be present or we can't sign.
18022 */
18023 vm_map_unlock_read(map);
18024 return KERN_INVALID_ARGUMENT;
18025 }
18026
18027 vm_object_lock(object);
18028 vm_map_unlock_read(map);
18029
18030 while(start < end) {
18031 uint32_t refmod;
18032
18033 m = vm_page_lookup(object,
18034 start - entry->vme_start + VME_OFFSET(entry));
18035 if (m==VM_PAGE_NULL) {
18036 /* shoud we try to fault a page here? we can probably
18037 * demand it exists and is locked for this request */
18038 vm_object_unlock(object);
18039 return KERN_FAILURE;
18040 }
18041 /* deal with special page status */
18042 if (m->vmp_busy ||
18043 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
18044 vm_object_unlock(object);
18045 return KERN_FAILURE;
18046 }
18047
18048 /* Page is OK... now "validate" it */
18049 /* This is the place where we'll call out to create a code
18050 * directory, later */
18051 m->vmp_cs_validated = TRUE;
18052
18053 /* The page is now "clean" for codesigning purposes. That means
18054 * we don't consider it as modified (wpmapped) anymore. But
18055 * we'll disconnect the page so we note any future modification
18056 * attempts. */
18057 m->vmp_wpmapped = FALSE;
18058 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
18059
18060 /* Pull the dirty status from the pmap, since we cleared the
18061 * wpmapped bit */
18062 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
18063 SET_PAGE_DIRTY(m, FALSE);
18064 }
18065
18066 /* On to the next page */
18067 start += PAGE_SIZE;
18068 }
18069 vm_object_unlock(object);
18070
18071 return KERN_SUCCESS;
18072}
18073#endif
18074
18075kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
18076{
18077 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
18078 vm_map_entry_t next_entry;
18079 kern_return_t kr = KERN_SUCCESS;
18080 vm_map_t zap_map;
18081
18082 vm_map_lock(map);
18083
18084 /*
18085 * We use a "zap_map" to avoid having to unlock
18086 * the "map" in vm_map_delete().
18087 */
18088 zap_map = vm_map_create(PMAP_NULL,
18089 map->min_offset,
18090 map->max_offset,
18091 map->hdr.entries_pageable);
18092
18093 if (zap_map == VM_MAP_NULL) {
18094 return KERN_RESOURCE_SHORTAGE;
18095 }
18096
18097 vm_map_set_page_shift(zap_map,
18098 VM_MAP_PAGE_SHIFT(map));
18099 vm_map_disable_hole_optimization(zap_map);
18100
18101 for (entry = vm_map_first_entry(map);
18102 entry != vm_map_to_entry(map);
18103 entry = next_entry) {
18104 next_entry = entry->vme_next;
18105
18106 if (VME_OBJECT(entry) &&
18107 !entry->is_sub_map &&
18108 (VME_OBJECT(entry)->internal == TRUE) &&
18109 (VME_OBJECT(entry)->ref_count == 1)) {
18110
18111 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
18112 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
18113
18114 (void)vm_map_delete(map,
18115 entry->vme_start,
18116 entry->vme_end,
18117 VM_MAP_REMOVE_SAVE_ENTRIES,
18118 zap_map);
18119 }
18120 }
18121
18122 vm_map_unlock(map);
18123
18124 /*
18125 * Get rid of the "zap_maps" and all the map entries that
18126 * they may still contain.
18127 */
18128 if (zap_map != VM_MAP_NULL) {
18129 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18130 zap_map = VM_MAP_NULL;
18131 }
18132
18133 return kr;
18134}
18135
18136
18137#if DEVELOPMENT || DEBUG
18138
18139int
18140vm_map_disconnect_page_mappings(
18141 vm_map_t map,
18142 boolean_t do_unnest)
18143{
18144 vm_map_entry_t entry;
18145 int page_count = 0;
18146
18147 if (do_unnest == TRUE) {
18148#ifndef NO_NESTED_PMAP
18149 vm_map_lock(map);
18150
18151 for (entry = vm_map_first_entry(map);
18152 entry != vm_map_to_entry(map);
18153 entry = entry->vme_next) {
18154
18155 if (entry->is_sub_map && entry->use_pmap) {
18156 /*
18157 * Make sure the range between the start of this entry and
18158 * the end of this entry is no longer nested, so that
18159 * we will only remove mappings from the pmap in use by this
18160 * this task
18161 */
18162 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
18163 }
18164 }
18165 vm_map_unlock(map);
18166#endif
18167 }
18168 vm_map_lock_read(map);
18169
18170 page_count = map->pmap->stats.resident_count;
18171
18172 for (entry = vm_map_first_entry(map);
18173 entry != vm_map_to_entry(map);
18174 entry = entry->vme_next) {
18175
18176 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
18177 (VME_OBJECT(entry)->phys_contiguous))) {
18178 continue;
18179 }
18180 if (entry->is_sub_map)
18181 assert(!entry->use_pmap);
18182
18183 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
18184 }
18185 vm_map_unlock_read(map);
18186
18187 return page_count;
18188}
18189
18190#endif
18191
18192
18193#if CONFIG_FREEZE
18194
18195
18196int c_freezer_swapout_page_count;
18197int c_freezer_compression_count = 0;
18198AbsoluteTime c_freezer_last_yield_ts = 0;
18199
18200extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
18201extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
18202
18203kern_return_t
18204vm_map_freeze(
18205 vm_map_t map,
18206 unsigned int *purgeable_count,
18207 unsigned int *wired_count,
18208 unsigned int *clean_count,
18209 unsigned int *dirty_count,
18210 __unused unsigned int dirty_budget,
18211 unsigned int *shared_count,
18212 int *freezer_error_code,
18213 boolean_t eval_only)
18214{
18215 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
18216 kern_return_t kr = KERN_SUCCESS;
18217 boolean_t evaluation_phase = TRUE;
18218 vm_object_t cur_shared_object = NULL;
18219 int cur_shared_obj_ref_cnt = 0;
18220 unsigned int dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
18221
18222 *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
18223
18224 /*
18225 * We need the exclusive lock here so that we can
18226 * block any page faults or lookups while we are
18227 * in the middle of freezing this vm map.
18228 */
18229 vm_map_lock(map);
18230
18231 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
18232
18233 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18234 if (vm_compressor_low_on_space()) {
18235 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18236 }
18237
18238 if (vm_swap_low_on_space()) {
18239 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18240 }
18241
18242 kr = KERN_NO_SPACE;
18243 goto done;
18244 }
18245
18246 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
18247 /*
18248 * In-memory compressor backing the freezer. No disk.
18249 * So no need to do the evaluation phase.
18250 */
18251 evaluation_phase = FALSE;
18252
18253 if (eval_only == TRUE) {
18254 /*
18255 * We don't support 'eval_only' mode
18256 * in this non-swap config.
18257 */
18258 *freezer_error_code = FREEZER_ERROR_GENERIC;
18259 kr = KERN_INVALID_ARGUMENT;
18260 goto done;
18261 }
18262
18263 c_freezer_compression_count = 0;
18264 clock_get_uptime(&c_freezer_last_yield_ts);
18265 }
18266again:
18267
18268 for (entry2 = vm_map_first_entry(map);
18269 entry2 != vm_map_to_entry(map);
18270 entry2 = entry2->vme_next) {
18271
18272 vm_object_t src_object = VME_OBJECT(entry2);
18273
18274 if (src_object &&
18275 !entry2->is_sub_map &&
18276 !src_object->phys_contiguous) {
18277 /* If eligible, scan the entry, moving eligible pages over to our parent object */
18278
18279 if (src_object->internal == TRUE) {
18280
18281 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18282 /*
18283 * Pages belonging to this object could be swapped to disk.
18284 * Make sure it's not a shared object because we could end
18285 * up just bringing it back in again.
18286 *
18287 * We try to optimize somewhat by checking for objects that are mapped
18288 * more than once within our own map. But we don't do full searches,
18289 * we just look at the entries following our current entry.
18290 */
18291 if (src_object->ref_count > 1) {
18292 if (src_object != cur_shared_object) {
18293 obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18294 dirty_shared_count += obj_pages_snapshot;
18295
18296 cur_shared_object = src_object;
18297 cur_shared_obj_ref_cnt = 1;
18298 continue;
18299 } else {
18300 cur_shared_obj_ref_cnt++;
18301 if (src_object->ref_count == cur_shared_obj_ref_cnt) {
18302 /*
18303 * Fall through to below and treat this object as private.
18304 * So deduct its pages from our shared total and add it to the
18305 * private total.
18306 */
18307
18308 dirty_shared_count -= obj_pages_snapshot;
18309 dirty_private_count += obj_pages_snapshot;
18310 } else {
18311 continue;
18312 }
18313 }
18314 }
18315
18316
18317 if (src_object->ref_count == 1) {
18318 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18319 }
18320
18321 if (evaluation_phase == TRUE) {
18322
18323 continue;
18324 }
18325 }
18326
18327 vm_object_compressed_freezer_pageout(src_object);
18328
18329 *wired_count += src_object->wired_page_count;
18330
18331 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18332 if (vm_compressor_low_on_space()) {
18333 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18334 }
18335
18336 if (vm_swap_low_on_space()) {
18337 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18338 }
18339
18340 kr = KERN_NO_SPACE;
18341 break;
18342 }
18343 }
18344 }
18345 }
18346
18347 if (evaluation_phase) {
18348
18349 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
18350
18351 if (dirty_shared_count > shared_pages_threshold) {
18352 *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
18353 kr = KERN_FAILURE;
18354 goto done;
18355 }
18356
18357 if (dirty_shared_count &&
18358 ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
18359 *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
18360 kr = KERN_FAILURE;
18361 goto done;
18362 }
18363
18364 evaluation_phase = FALSE;
18365 dirty_shared_count = dirty_private_count = 0;
18366
18367 c_freezer_compression_count = 0;
18368 clock_get_uptime(&c_freezer_last_yield_ts);
18369
18370 if (eval_only) {
18371 kr = KERN_SUCCESS;
18372 goto done;
18373 }
18374
18375 goto again;
18376
18377 } else {
18378
18379 kr = KERN_SUCCESS;
18380 *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
18381 }
18382
18383done:
18384 vm_map_unlock(map);
18385
18386 if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
18387 vm_object_compressed_freezer_done();
18388
18389 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18390 /*
18391 * reset the counter tracking the # of swapped compressed pages
18392 * because we are now done with this freeze session and task.
18393 */
18394
18395 *dirty_count = c_freezer_swapout_page_count; //used to track pageouts
18396 c_freezer_swapout_page_count = 0;
18397 }
18398 }
18399 return kr;
18400}
18401
18402#endif
18403
18404/*
18405 * vm_map_entry_should_cow_for_true_share:
18406 *
18407 * Determines if the map entry should be clipped and setup for copy-on-write
18408 * to avoid applying "true_share" to a large VM object when only a subset is
18409 * targeted.
18410 *
18411 * For now, we target only the map entries created for the Objective C
18412 * Garbage Collector, which initially have the following properties:
18413 * - alias == VM_MEMORY_MALLOC
18414 * - wired_count == 0
18415 * - !needs_copy
18416 * and a VM object with:
18417 * - internal
18418 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
18419 * - !true_share
18420 * - vo_size == ANON_CHUNK_SIZE
18421 *
18422 * Only non-kernel map entries.
18423 */
18424boolean_t
18425vm_map_entry_should_cow_for_true_share(
18426 vm_map_entry_t entry)
18427{
18428 vm_object_t object;
18429
18430 if (entry->is_sub_map) {
18431 /* entry does not point at a VM object */
18432 return FALSE;
18433 }
18434
18435 if (entry->needs_copy) {
18436 /* already set for copy_on_write: done! */
18437 return FALSE;
18438 }
18439
18440 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
18441 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
18442 /* not a malloc heap or Obj-C Garbage Collector heap */
18443 return FALSE;
18444 }
18445
18446 if (entry->wired_count) {
18447 /* wired: can't change the map entry... */
18448 vm_counters.should_cow_but_wired++;
18449 return FALSE;
18450 }
18451
18452 object = VME_OBJECT(entry);
18453
18454 if (object == VM_OBJECT_NULL) {
18455 /* no object yet... */
18456 return FALSE;
18457 }
18458
18459 if (!object->internal) {
18460 /* not an internal object */
18461 return FALSE;
18462 }
18463
18464 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
18465 /* not the default copy strategy */
18466 return FALSE;
18467 }
18468
18469 if (object->true_share) {
18470 /* already true_share: too late to avoid it */
18471 return FALSE;
18472 }
18473
18474 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
18475 object->vo_size != ANON_CHUNK_SIZE) {
18476 /* ... not an object created for the ObjC Garbage Collector */
18477 return FALSE;
18478 }
18479
18480 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
18481 object->vo_size != 2048 * 4096) {
18482 /* ... not a "MALLOC_SMALL" heap */
18483 return FALSE;
18484 }
18485
18486 /*
18487 * All the criteria match: we have a large object being targeted for "true_share".
18488 * To limit the adverse side-effects linked with "true_share", tell the caller to
18489 * try and avoid setting up the entire object for "true_share" by clipping the
18490 * targeted range and setting it up for copy-on-write.
18491 */
18492 return TRUE;
18493}
18494
18495vm_map_offset_t
18496vm_map_round_page_mask(
18497 vm_map_offset_t offset,
18498 vm_map_offset_t mask)
18499{
18500 return VM_MAP_ROUND_PAGE(offset, mask);
18501}
18502
18503vm_map_offset_t
18504vm_map_trunc_page_mask(
18505 vm_map_offset_t offset,
18506 vm_map_offset_t mask)
18507{
18508 return VM_MAP_TRUNC_PAGE(offset, mask);
18509}
18510
18511boolean_t
18512vm_map_page_aligned(
18513 vm_map_offset_t offset,
18514 vm_map_offset_t mask)
18515{
18516 return ((offset) & mask) == 0;
18517}
18518
18519int
18520vm_map_page_shift(
18521 vm_map_t map)
18522{
18523 return VM_MAP_PAGE_SHIFT(map);
18524}
18525
18526int
18527vm_map_page_size(
18528 vm_map_t map)
18529{
18530 return VM_MAP_PAGE_SIZE(map);
18531}
18532
18533vm_map_offset_t
18534vm_map_page_mask(
18535 vm_map_t map)
18536{
18537 return VM_MAP_PAGE_MASK(map);
18538}
18539
18540kern_return_t
18541vm_map_set_page_shift(
18542 vm_map_t map,
18543 int pageshift)
18544{
18545 if (map->hdr.nentries != 0) {
18546 /* too late to change page size */
18547 return KERN_FAILURE;
18548 }
18549
18550 map->hdr.page_shift = pageshift;
18551
18552 return KERN_SUCCESS;
18553}
18554
18555kern_return_t
18556vm_map_query_volatile(
18557 vm_map_t map,
18558 mach_vm_size_t *volatile_virtual_size_p,
18559 mach_vm_size_t *volatile_resident_size_p,
18560 mach_vm_size_t *volatile_compressed_size_p,
18561 mach_vm_size_t *volatile_pmap_size_p,
18562 mach_vm_size_t *volatile_compressed_pmap_size_p)
18563{
18564 mach_vm_size_t volatile_virtual_size;
18565 mach_vm_size_t volatile_resident_count;
18566 mach_vm_size_t volatile_compressed_count;
18567 mach_vm_size_t volatile_pmap_count;
18568 mach_vm_size_t volatile_compressed_pmap_count;
18569 mach_vm_size_t resident_count;
18570 vm_map_entry_t entry;
18571 vm_object_t object;
18572
18573 /* map should be locked by caller */
18574
18575 volatile_virtual_size = 0;
18576 volatile_resident_count = 0;
18577 volatile_compressed_count = 0;
18578 volatile_pmap_count = 0;
18579 volatile_compressed_pmap_count = 0;
18580
18581 for (entry = vm_map_first_entry(map);
18582 entry != vm_map_to_entry(map);
18583 entry = entry->vme_next) {
18584 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
18585
18586 if (entry->is_sub_map) {
18587 continue;
18588 }
18589 if (! (entry->protection & VM_PROT_WRITE)) {
18590 continue;
18591 }
18592 object = VME_OBJECT(entry);
18593 if (object == VM_OBJECT_NULL) {
18594 continue;
18595 }
18596 if (object->purgable != VM_PURGABLE_VOLATILE &&
18597 object->purgable != VM_PURGABLE_EMPTY) {
18598 continue;
18599 }
18600 if (VME_OFFSET(entry)) {
18601 /*
18602 * If the map entry has been split and the object now
18603 * appears several times in the VM map, we don't want
18604 * to count the object's resident_page_count more than
18605 * once. We count it only for the first one, starting
18606 * at offset 0 and ignore the other VM map entries.
18607 */
18608 continue;
18609 }
18610 resident_count = object->resident_page_count;
18611 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
18612 resident_count = 0;
18613 } else {
18614 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
18615 }
18616
18617 volatile_virtual_size += entry->vme_end - entry->vme_start;
18618 volatile_resident_count += resident_count;
18619 if (object->pager) {
18620 volatile_compressed_count +=
18621 vm_compressor_pager_get_count(object->pager);
18622 }
18623 pmap_compressed_bytes = 0;
18624 pmap_resident_bytes =
18625 pmap_query_resident(map->pmap,
18626 entry->vme_start,
18627 entry->vme_end,
18628 &pmap_compressed_bytes);
18629 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
18630 volatile_compressed_pmap_count += (pmap_compressed_bytes
18631 / PAGE_SIZE);
18632 }
18633
18634 /* map is still locked on return */
18635
18636 *volatile_virtual_size_p = volatile_virtual_size;
18637 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
18638 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
18639 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
18640 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
18641
18642 return KERN_SUCCESS;
18643}
18644
18645void
18646vm_map_sizes(vm_map_t map,
18647 vm_map_size_t * psize,
18648 vm_map_size_t * pfree,
18649 vm_map_size_t * plargest_free)
18650{
18651 vm_map_entry_t entry;
18652 vm_map_offset_t prev;
18653 vm_map_size_t free, total_free, largest_free;
18654 boolean_t end;
18655
18656 if (!map)
18657 {
18658 *psize = *pfree = *plargest_free = 0;
18659 return;
18660 }
18661 total_free = largest_free = 0;
18662
18663 vm_map_lock_read(map);
18664 if (psize) *psize = map->max_offset - map->min_offset;
18665
18666 prev = map->min_offset;
18667 for (entry = vm_map_first_entry(map);; entry = entry->vme_next)
18668 {
18669 end = (entry == vm_map_to_entry(map));
18670
18671 if (end) free = entry->vme_end - prev;
18672 else free = entry->vme_start - prev;
18673
18674 total_free += free;
18675 if (free > largest_free) largest_free = free;
18676
18677 if (end) break;
18678 prev = entry->vme_end;
18679 }
18680 vm_map_unlock_read(map);
18681 if (pfree) *pfree = total_free;
18682 if (plargest_free) *plargest_free = largest_free;
18683}
18684
18685#if VM_SCAN_FOR_SHADOW_CHAIN
18686int vm_map_shadow_max(vm_map_t map);
18687int vm_map_shadow_max(
18688 vm_map_t map)
18689{
18690 int shadows, shadows_max;
18691 vm_map_entry_t entry;
18692 vm_object_t object, next_object;
18693
18694 if (map == NULL)
18695 return 0;
18696
18697 shadows_max = 0;
18698
18699 vm_map_lock_read(map);
18700
18701 for (entry = vm_map_first_entry(map);
18702 entry != vm_map_to_entry(map);
18703 entry = entry->vme_next) {
18704 if (entry->is_sub_map) {
18705 continue;
18706 }
18707 object = VME_OBJECT(entry);
18708 if (object == NULL) {
18709 continue;
18710 }
18711 vm_object_lock_shared(object);
18712 for (shadows = 0;
18713 object->shadow != NULL;
18714 shadows++, object = next_object) {
18715 next_object = object->shadow;
18716 vm_object_lock_shared(next_object);
18717 vm_object_unlock(object);
18718 }
18719 vm_object_unlock(object);
18720 if (shadows > shadows_max) {
18721 shadows_max = shadows;
18722 }
18723 }
18724
18725 vm_map_unlock_read(map);
18726
18727 return shadows_max;
18728}
18729#endif /* VM_SCAN_FOR_SHADOW_CHAIN */
18730
18731void vm_commit_pagezero_status(vm_map_t lmap) {
18732 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
18733}
18734
18735#if __x86_64__
18736void
18737vm_map_set_high_start(
18738 vm_map_t map,
18739 vm_map_offset_t high_start)
18740{
18741 map->vmmap_high_start = high_start;
18742}
18743#endif /* __x86_64__ */
18744
18745#if PMAP_CS
18746kern_return_t
18747vm_map_entry_cs_associate(
18748 vm_map_t map,
18749 vm_map_entry_t entry,
18750 vm_map_kernel_flags_t vmk_flags)
18751{
18752 vm_object_t cs_object, cs_shadow;
18753 vm_object_offset_t cs_offset;
18754 void *cs_blobs;
18755 struct vnode *cs_vnode;
18756 kern_return_t cs_ret;
18757
18758 if (map->pmap == NULL ||
18759 entry->is_sub_map || /* XXX FBDP: recurse on sub-range? */
18760 VME_OBJECT(entry) == VM_OBJECT_NULL ||
18761 ! (entry->protection & VM_PROT_EXECUTE)) {
18762 return KERN_SUCCESS;
18763 }
18764
18765 vm_map_lock_assert_exclusive(map);
18766
18767 if (entry->used_for_jit) {
18768 cs_ret = pmap_cs_associate(map->pmap,
18769 PMAP_CS_ASSOCIATE_JIT,
18770 entry->vme_start,
18771 entry->vme_end - entry->vme_start);
18772 goto done;
18773 }
18774
18775 if (vmk_flags.vmkf_remap_prot_copy) {
18776 cs_ret = pmap_cs_associate(map->pmap,
18777 PMAP_CS_ASSOCIATE_COW,
18778 entry->vme_start,
18779 entry->vme_end - entry->vme_start);
18780 goto done;
18781 }
18782
18783 vm_object_lock_shared(VME_OBJECT(entry));
18784 cs_offset = VME_OFFSET(entry);
18785 for (cs_object = VME_OBJECT(entry);
18786 (cs_object != VM_OBJECT_NULL &&
18787 !cs_object->code_signed);
18788 cs_object = cs_shadow) {
18789 cs_shadow = cs_object->shadow;
18790 if (cs_shadow != VM_OBJECT_NULL) {
18791 cs_offset += cs_object->vo_shadow_offset;
18792 vm_object_lock_shared(cs_shadow);
18793 }
18794 vm_object_unlock(cs_object);
18795 }
18796 if (cs_object == VM_OBJECT_NULL) {
18797 return KERN_SUCCESS;
18798 }
18799
18800 cs_offset += cs_object->paging_offset;
18801 cs_vnode = vnode_pager_lookup_vnode(cs_object->pager);
18802 cs_ret = vnode_pager_get_cs_blobs(cs_vnode,
18803 &cs_blobs);
18804 assert(cs_ret == KERN_SUCCESS);
18805 cs_ret = cs_associate_blob_with_mapping(map->pmap,
18806 entry->vme_start,
18807 (entry->vme_end -
18808 entry->vme_start),
18809 cs_offset,
18810 cs_blobs);
18811 vm_object_unlock(cs_object);
18812 cs_object = VM_OBJECT_NULL;
18813
18814 done:
18815 if (cs_ret == KERN_SUCCESS) {
18816 DTRACE_VM2(vm_map_entry_cs_associate_success,
18817 vm_map_offset_t, entry->vme_start,
18818 vm_map_offset_t, entry->vme_end);
18819 if (vm_map_executable_immutable) {
18820 /*
18821 * Prevent this executable
18822 * mapping from being unmapped
18823 * or modified.
18824 */
18825 entry->permanent = TRUE;
18826 }
18827 /*
18828 * pmap says it will validate the
18829 * code-signing validity of pages
18830 * faulted in via this mapping, so
18831 * this map entry should be marked so
18832 * that vm_fault() bypasses code-signing
18833 * validation for faults coming through
18834 * this mapping.
18835 */
18836 entry->pmap_cs_associated = TRUE;
18837 } else if (cs_ret == KERN_NOT_SUPPORTED) {
18838 /*
18839 * pmap won't check the code-signing
18840 * validity of pages faulted in via
18841 * this mapping, so VM should keep
18842 * doing it.
18843 */
18844 DTRACE_VM3(vm_map_entry_cs_associate_off,
18845 vm_map_offset_t, entry->vme_start,
18846 vm_map_offset_t, entry->vme_end,
18847 int, cs_ret);
18848 } else {
18849 /*
18850 * A real error: do not allow
18851 * execution in this mapping.
18852 */
18853 DTRACE_VM3(vm_map_entry_cs_associate_failure,
18854 vm_map_offset_t, entry->vme_start,
18855 vm_map_offset_t, entry->vme_end,
18856 int, cs_ret);
18857 entry->protection &= ~VM_PROT_EXECUTE;
18858 entry->max_protection &= ~VM_PROT_EXECUTE;
18859 }
18860
18861 return cs_ret;
18862}
18863#endif /* PMAP_CS */
18864
18865/*
18866 * FORKED CORPSE FOOTPRINT
18867 *
18868 * A forked corpse gets a copy of the original VM map but its pmap is mostly
18869 * empty since it never ran and never got to fault in any pages.
18870 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
18871 * a forked corpse would therefore return very little information.
18872 *
18873 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
18874 * to vm_map_fork() to collect footprint information from the original VM map
18875 * and its pmap, and store it in the forked corpse's VM map. That information
18876 * is stored in place of the VM map's "hole list" since we'll never need to
18877 * lookup for holes in the corpse's map.
18878 *
18879 * The corpse's footprint info looks like this:
18880 *
18881 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
18882 * as follows:
18883 * +---------------------------------------+
18884 * header-> | cf_size |
18885 * +-------------------+-------------------+
18886 * | cf_last_region | cf_last_zeroes |
18887 * +-------------------+-------------------+
18888 * region1-> | cfr_vaddr |
18889 * +-------------------+-------------------+
18890 * | cfr_num_pages | d0 | d1 | d2 | d3 |
18891 * +---------------------------------------+
18892 * | d4 | d5 | ... |
18893 * +---------------------------------------+
18894 * | ... |
18895 * +-------------------+-------------------+
18896 * | dy | dz | na | na | cfr_vaddr... | <-region2
18897 * +-------------------+-------------------+
18898 * | cfr_vaddr (ctd) | cfr_num_pages |
18899 * +---------------------------------------+
18900 * | d0 | d1 ... |
18901 * +---------------------------------------+
18902 * ...
18903 * +---------------------------------------+
18904 * last region-> | cfr_vaddr |
18905 * +---------------------------------------+
18906 * + cfr_num_pages | d0 | d1 | d2 | d3 |
18907 * +---------------------------------------+
18908 * ...
18909 * +---------------------------------------+
18910 * | dx | dy | dz | na | na | na | na | na |
18911 * +---------------------------------------+
18912 *
18913 * where:
18914 * cf_size: total size of the buffer (rounded to page size)
18915 * cf_last_region: offset in the buffer of the last "region" sub-header
18916 * cf_last_zeroes: number of trailing "zero" dispositions at the end
18917 * of last region
18918 * cfr_vaddr: virtual address of the start of the covered "region"
18919 * cfr_num_pages: number of pages in the covered "region"
18920 * d*: disposition of the page at that virtual address
18921 * Regions in the buffer are word-aligned.
18922 *
18923 * We estimate the size of the buffer based on the number of memory regions
18924 * and the virtual size of the address space. While copying each memory region
18925 * during vm_map_fork(), we also collect the footprint info for that region
18926 * and store it in the buffer, packing it as much as possible (coalescing
18927 * contiguous memory regions to avoid having too many region headers and
18928 * avoiding long streaks of "zero" page dispositions by splitting footprint
18929 * "regions", so the number of regions in the footprint buffer might not match
18930 * the number of memory regions in the address space.
18931 *
18932 * We also have to copy the original task's "nonvolatile" ledgers since that's
18933 * part of the footprint and will need to be reported to any tool asking for
18934 * the footprint information of the forked corpse.
18935 */
18936
18937uint64_t vm_map_corpse_footprint_count = 0;
18938uint64_t vm_map_corpse_footprint_size_avg = 0;
18939uint64_t vm_map_corpse_footprint_size_max = 0;
18940uint64_t vm_map_corpse_footprint_full = 0;
18941uint64_t vm_map_corpse_footprint_no_buf = 0;
18942
18943/*
18944 * vm_map_corpse_footprint_new_region:
18945 * closes the current footprint "region" and creates a new one
18946 *
18947 * Returns NULL if there's not enough space in the buffer for a new region.
18948 */
18949static struct vm_map_corpse_footprint_region *
18950vm_map_corpse_footprint_new_region(
18951 struct vm_map_corpse_footprint_header *footprint_header)
18952{
18953 uintptr_t footprint_edge;
18954 uint32_t new_region_offset;
18955 struct vm_map_corpse_footprint_region *footprint_region;
18956 struct vm_map_corpse_footprint_region *new_footprint_region;
18957
18958 footprint_edge = ((uintptr_t)footprint_header +
18959 footprint_header->cf_size);
18960 footprint_region = ((struct vm_map_corpse_footprint_region *)
18961 ((char *)footprint_header +
18962 footprint_header->cf_last_region));
18963 assert((uintptr_t)footprint_region + sizeof (*footprint_region) <=
18964 footprint_edge);
18965
18966 /* get rid of trailing zeroes in the last region */
18967 assert(footprint_region->cfr_num_pages >=
18968 footprint_header->cf_last_zeroes);
18969 footprint_region->cfr_num_pages -=
18970 footprint_header->cf_last_zeroes;
18971 footprint_header->cf_last_zeroes = 0;
18972
18973 /* reuse this region if it's now empty */
18974 if (footprint_region->cfr_num_pages == 0) {
18975 return footprint_region;
18976 }
18977
18978 /* compute offset of new region */
18979 new_region_offset = footprint_header->cf_last_region;
18980 new_region_offset += sizeof (*footprint_region);
18981 new_region_offset += footprint_region->cfr_num_pages;
18982 new_region_offset = roundup(new_region_offset, sizeof (int));
18983
18984 /* check if we're going over the edge */
18985 if (((uintptr_t)footprint_header +
18986 new_region_offset +
18987 sizeof (*footprint_region)) >=
18988 footprint_edge) {
18989 /* over the edge: no new region */
18990 return NULL;
18991 }
18992
18993 /* adjust offset of last region in header */
18994 footprint_header->cf_last_region = new_region_offset;
18995
18996 new_footprint_region = (struct vm_map_corpse_footprint_region *)
18997 ((char *)footprint_header +
18998 footprint_header->cf_last_region);
18999 new_footprint_region->cfr_vaddr = 0;
19000 new_footprint_region->cfr_num_pages = 0;
19001 /* caller needs to initialize new region */
19002
19003 return new_footprint_region;
19004}
19005
19006/*
19007 * vm_map_corpse_footprint_collect:
19008 * collect footprint information for "old_entry" in "old_map" and
19009 * stores it in "new_map"'s vmmap_footprint_info.
19010 */
19011kern_return_t
19012vm_map_corpse_footprint_collect(
19013 vm_map_t old_map,
19014 vm_map_entry_t old_entry,
19015 vm_map_t new_map)
19016{
19017 vm_map_offset_t va;
19018 int disp;
19019 kern_return_t kr;
19020 struct vm_map_corpse_footprint_header *footprint_header;
19021 struct vm_map_corpse_footprint_region *footprint_region;
19022 struct vm_map_corpse_footprint_region *new_footprint_region;
19023 unsigned char *next_disp_p;
19024 uintptr_t footprint_edge;
19025 uint32_t num_pages_tmp;
19026
19027 va = old_entry->vme_start;
19028
19029 vm_map_lock_assert_exclusive(old_map);
19030 vm_map_lock_assert_exclusive(new_map);
19031
19032 assert(new_map->has_corpse_footprint);
19033 assert(!old_map->has_corpse_footprint);
19034 if (!new_map->has_corpse_footprint ||
19035 old_map->has_corpse_footprint) {
19036 /*
19037 * This can only transfer footprint info from a
19038 * map with a live pmap to a map with a corpse footprint.
19039 */
19040 return KERN_NOT_SUPPORTED;
19041 }
19042
19043 if (new_map->vmmap_corpse_footprint == NULL) {
19044 vm_offset_t buf;
19045 vm_size_t buf_size;
19046
19047 buf = 0;
19048 buf_size = (sizeof (*footprint_header) +
19049 (old_map->hdr.nentries
19050 *
19051 (sizeof (*footprint_region) +
19052 + 3)) /* potential alignment for each region */
19053 +
19054 ((old_map->size / PAGE_SIZE)
19055 *
19056 sizeof (char))); /* disposition for each page */
19057// printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
19058 buf_size = round_page(buf_size);
19059
19060 /* limit buffer to 1 page to validate overflow detection */
19061// buf_size = PAGE_SIZE;
19062
19063 /* limit size to a somewhat sane amount */
19064#if CONFIG_EMBEDDED
19065#define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
19066#else /* CONFIG_EMBEDDED */
19067#define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
19068#endif /* CONFIG_EMBEDDED */
19069 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
19070 buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
19071 }
19072
19073 /*
19074 * Allocate the pageable buffer (with a trailing guard page).
19075 * It will be zero-filled on demand.
19076 */
19077 kr = kernel_memory_allocate(kernel_map,
19078 &buf,
19079 (buf_size
19080 + PAGE_SIZE), /* trailing guard page */
19081 0, /* mask */
19082 KMA_PAGEABLE | KMA_GUARD_LAST,
19083 VM_KERN_MEMORY_DIAG);
19084 if (kr != KERN_SUCCESS) {
19085 vm_map_corpse_footprint_no_buf++;
19086 return kr;
19087 }
19088
19089 /* initialize header and 1st region */
19090 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
19091 new_map->vmmap_corpse_footprint = footprint_header;
19092
19093 footprint_header->cf_size = buf_size;
19094 footprint_header->cf_last_region =
19095 sizeof (*footprint_header);
19096 footprint_header->cf_last_zeroes = 0;
19097
19098 footprint_region = (struct vm_map_corpse_footprint_region *)
19099 ((char *)footprint_header +
19100 footprint_header->cf_last_region);
19101 footprint_region->cfr_vaddr = 0;
19102 footprint_region->cfr_num_pages = 0;
19103 } else {
19104 /* retrieve header and last region */
19105 footprint_header = (struct vm_map_corpse_footprint_header *)
19106 new_map->vmmap_corpse_footprint;
19107 footprint_region = (struct vm_map_corpse_footprint_region *)
19108 ((char *)footprint_header +
19109 footprint_header->cf_last_region);
19110 }
19111 footprint_edge = ((uintptr_t)footprint_header +
19112 footprint_header->cf_size);
19113
19114 if ((footprint_region->cfr_vaddr +
19115 (((vm_map_offset_t)footprint_region->cfr_num_pages) *
19116 PAGE_SIZE))
19117 != old_entry->vme_start) {
19118 uint64_t num_pages_delta;
19119 uint32_t region_offset_delta;
19120
19121 /*
19122 * Not the next contiguous virtual address:
19123 * start a new region or store "zero" dispositions for
19124 * the missing pages?
19125 */
19126 /* size of gap in actual page dispositions */
19127 num_pages_delta = (((old_entry->vme_start -
19128 footprint_region->cfr_vaddr) / PAGE_SIZE)
19129 - footprint_region->cfr_num_pages);
19130 /* size of gap as a new footprint region header */
19131 region_offset_delta =
19132 (sizeof (*footprint_region) +
19133 roundup((footprint_region->cfr_num_pages -
19134 footprint_header->cf_last_zeroes),
19135 sizeof (int)) -
19136 (footprint_region->cfr_num_pages -
19137 footprint_header->cf_last_zeroes));
19138// printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
19139 if (region_offset_delta < num_pages_delta ||
19140 os_add3_overflow(footprint_region->cfr_num_pages,
19141 (uint32_t) num_pages_delta,
19142 1,
19143 &num_pages_tmp)) {
19144 /*
19145 * Storing data for this gap would take more space
19146 * than inserting a new footprint region header:
19147 * let's start a new region and save space. If it's a
19148 * tie, let's avoid using a new region, since that
19149 * would require more region hops to find the right
19150 * range during lookups.
19151 *
19152 * If the current region's cfr_num_pages would overflow
19153 * if we added "zero" page dispositions for the gap,
19154 * no choice but to start a new region.
19155 */
19156// printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
19157 new_footprint_region =
19158 vm_map_corpse_footprint_new_region(footprint_header);
19159 /* check that we're not going over the edge */
19160 if (new_footprint_region == NULL) {
19161 goto over_the_edge;
19162 }
19163 footprint_region = new_footprint_region;
19164 /* initialize new region as empty */
19165 footprint_region->cfr_vaddr = old_entry->vme_start;
19166 footprint_region->cfr_num_pages = 0;
19167 } else {
19168 /*
19169 * Store "zero" page dispositions for the missing
19170 * pages.
19171 */
19172// printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
19173 for (; num_pages_delta > 0; num_pages_delta--) {
19174 next_disp_p =
19175 ((unsigned char *) footprint_region +
19176 sizeof (*footprint_region) +
19177 footprint_region->cfr_num_pages);
19178 /* check that we're not going over the edge */
19179 if ((uintptr_t)next_disp_p >= footprint_edge) {
19180 goto over_the_edge;
19181 }
19182 /* store "zero" disposition for this gap page */
19183 footprint_region->cfr_num_pages++;
19184 *next_disp_p = (unsigned char) 0;
19185 footprint_header->cf_last_zeroes++;
19186 }
19187 }
19188 }
19189
19190 for (va = old_entry->vme_start;
19191 va < old_entry->vme_end;
19192 va += PAGE_SIZE) {
19193 vm_object_t object;
19194
19195 object = VME_OBJECT(old_entry);
19196 if (!old_entry->is_sub_map &&
19197 old_entry->iokit_acct &&
19198 object != VM_OBJECT_NULL &&
19199 object->internal &&
19200 object->purgable == VM_PURGABLE_DENY) {
19201 /*
19202 * Non-purgeable IOKit memory: phys_footprint
19203 * includes the entire virtual mapping.
19204 * Since the forked corpse's VM map entry will not
19205 * have "iokit_acct", pretend that this page's
19206 * disposition is "present & internal", so that it
19207 * shows up in the forked corpse's footprint.
19208 */
19209 disp = (PMAP_QUERY_PAGE_PRESENT |
19210 PMAP_QUERY_PAGE_INTERNAL);
19211 } else {
19212 disp = 0;
19213 pmap_query_page_info(old_map->pmap,
19214 va,
19215 &disp);
19216 }
19217
19218// if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
19219
19220 if (disp == 0 && footprint_region->cfr_num_pages == 0) {
19221 /*
19222 * Ignore "zero" dispositions at start of
19223 * region: just move start of region.
19224 */
19225 footprint_region->cfr_vaddr += PAGE_SIZE;
19226 continue;
19227 }
19228
19229 /* would region's cfr_num_pages overflow? */
19230 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
19231 &num_pages_tmp)) {
19232 /* overflow: create a new region */
19233 new_footprint_region =
19234 vm_map_corpse_footprint_new_region(
19235 footprint_header);
19236 if (new_footprint_region == NULL) {
19237 goto over_the_edge;
19238 }
19239 footprint_region = new_footprint_region;
19240 footprint_region->cfr_vaddr = va;
19241 footprint_region->cfr_num_pages = 0;
19242 }
19243
19244 next_disp_p = ((unsigned char *)footprint_region +
19245 sizeof (*footprint_region) +
19246 footprint_region->cfr_num_pages);
19247 /* check that we're not going over the edge */
19248 if ((uintptr_t)next_disp_p >= footprint_edge) {
19249 goto over_the_edge;
19250 }
19251 /* store this dispostion */
19252 *next_disp_p = (unsigned char) disp;
19253 footprint_region->cfr_num_pages++;
19254
19255 if (disp != 0) {
19256 /* non-zero disp: break the current zero streak */
19257 footprint_header->cf_last_zeroes = 0;
19258 /* done */
19259 continue;
19260 }
19261
19262 /* zero disp: add to the current streak of zeroes */
19263 footprint_header->cf_last_zeroes++;
19264 if ((footprint_header->cf_last_zeroes +
19265 roundup((footprint_region->cfr_num_pages -
19266 footprint_header->cf_last_zeroes) &
19267 (sizeof (int) - 1),
19268 sizeof (int))) <
19269 (sizeof (*footprint_header))) {
19270 /*
19271 * There are not enough trailing "zero" dispositions
19272 * (+ the extra padding we would need for the previous
19273 * region); creating a new region would not save space
19274 * at this point, so let's keep this "zero" disposition
19275 * in this region and reconsider later.
19276 */
19277 continue;
19278 }
19279 /*
19280 * Create a new region to avoid having too many consecutive
19281 * "zero" dispositions.
19282 */
19283 new_footprint_region =
19284 vm_map_corpse_footprint_new_region(footprint_header);
19285 if (new_footprint_region == NULL) {
19286 goto over_the_edge;
19287 }
19288 footprint_region = new_footprint_region;
19289 /* initialize the new region as empty ... */
19290 footprint_region->cfr_num_pages = 0;
19291 /* ... and skip this "zero" disp */
19292 footprint_region->cfr_vaddr = va + PAGE_SIZE;
19293 }
19294
19295 return KERN_SUCCESS;
19296
19297over_the_edge:
19298// printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
19299 vm_map_corpse_footprint_full++;
19300 return KERN_RESOURCE_SHORTAGE;
19301}
19302
19303/*
19304 * vm_map_corpse_footprint_collect_done:
19305 * completes the footprint collection by getting rid of any remaining
19306 * trailing "zero" dispositions and trimming the unused part of the
19307 * kernel buffer
19308 */
19309void
19310vm_map_corpse_footprint_collect_done(
19311 vm_map_t new_map)
19312{
19313 struct vm_map_corpse_footprint_header *footprint_header;
19314 struct vm_map_corpse_footprint_region *footprint_region;
19315 vm_size_t buf_size, actual_size;
19316 kern_return_t kr;
19317
19318 assert(new_map->has_corpse_footprint);
19319 if (!new_map->has_corpse_footprint ||
19320 new_map->vmmap_corpse_footprint == NULL) {
19321 return;
19322 }
19323
19324 footprint_header = (struct vm_map_corpse_footprint_header *)
19325 new_map->vmmap_corpse_footprint;
19326 buf_size = footprint_header->cf_size;
19327
19328 footprint_region = (struct vm_map_corpse_footprint_region *)
19329 ((char *)footprint_header +
19330 footprint_header->cf_last_region);
19331
19332 /* get rid of trailing zeroes in last region */
19333 assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
19334 footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
19335 footprint_header->cf_last_zeroes = 0;
19336
19337 actual_size = (vm_size_t)(footprint_header->cf_last_region +
19338 sizeof (*footprint_region) +
19339 footprint_region->cfr_num_pages);
19340
19341// printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
19342 vm_map_corpse_footprint_size_avg =
19343 (((vm_map_corpse_footprint_size_avg *
19344 vm_map_corpse_footprint_count) +
19345 actual_size) /
19346 (vm_map_corpse_footprint_count + 1));
19347 vm_map_corpse_footprint_count++;
19348 if (actual_size > vm_map_corpse_footprint_size_max) {
19349 vm_map_corpse_footprint_size_max = actual_size;
19350 }
19351
19352 actual_size = round_page(actual_size);
19353 if (buf_size > actual_size) {
19354 kr = vm_deallocate(kernel_map,
19355 ((vm_address_t)footprint_header +
19356 actual_size +
19357 PAGE_SIZE), /* trailing guard page */
19358 (buf_size - actual_size));
19359 assertf(kr == KERN_SUCCESS,
19360 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19361 footprint_header,
19362 (uint64_t) buf_size,
19363 (uint64_t) actual_size,
19364 kr);
19365 kr = vm_protect(kernel_map,
19366 ((vm_address_t)footprint_header +
19367 actual_size),
19368 PAGE_SIZE,
19369 FALSE, /* set_maximum */
19370 VM_PROT_NONE);
19371 assertf(kr == KERN_SUCCESS,
19372 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19373 footprint_header,
19374 (uint64_t) buf_size,
19375 (uint64_t) actual_size,
19376 kr);
19377 }
19378
19379 footprint_header->cf_size = actual_size;
19380}
19381
19382/*
19383 * vm_map_corpse_footprint_query_page_info:
19384 * retrieves the disposition of the page at virtual address "vaddr"
19385 * in the forked corpse's VM map
19386 *
19387 * This is the equivalent of pmap_query_page_info() for a forked corpse.
19388 */
19389kern_return_t
19390vm_map_corpse_footprint_query_page_info(
19391 vm_map_t map,
19392 vm_map_offset_t va,
19393 int *disp)
19394{
19395 struct vm_map_corpse_footprint_header *footprint_header;
19396 struct vm_map_corpse_footprint_region *footprint_region;
19397 uint32_t footprint_region_offset;
19398 vm_map_offset_t region_start, region_end;
19399 int disp_idx;
19400 kern_return_t kr;
19401
19402 if (!map->has_corpse_footprint) {
19403 *disp = 0;
19404 kr = KERN_INVALID_ARGUMENT;
19405 goto done;
19406 }
19407
19408 footprint_header = map->vmmap_corpse_footprint;
19409 if (footprint_header == NULL) {
19410 *disp = 0;
19411// if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19412 kr = KERN_INVALID_ARGUMENT;
19413 goto done;
19414 }
19415
19416 /* start looking at the hint ("cf_hint_region") */
19417 footprint_region_offset = footprint_header->cf_hint_region;
19418
19419lookup_again:
19420 if (footprint_region_offset < sizeof (*footprint_header)) {
19421 /* hint too low: start from 1st region */
19422 footprint_region_offset = sizeof (*footprint_header);
19423 }
19424 if (footprint_region_offset >= footprint_header->cf_last_region) {
19425 /* hint too high: re-start from 1st region */
19426 footprint_region_offset = sizeof (*footprint_header);
19427 }
19428 footprint_region = (struct vm_map_corpse_footprint_region *)
19429 ((char *)footprint_header + footprint_region_offset);
19430 region_start = footprint_region->cfr_vaddr;
19431 region_end = (region_start +
19432 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19433 PAGE_SIZE));
19434 if (va < region_start &&
19435 footprint_region_offset != sizeof (*footprint_header)) {
19436 /* our range starts before the hint region */
19437
19438 /* reset the hint (in a racy way...) */
19439 footprint_header->cf_hint_region = sizeof (*footprint_header);
19440 /* lookup "va" again from 1st region */
19441 footprint_region_offset = sizeof (*footprint_header);
19442 goto lookup_again;
19443 }
19444
19445 while (va >= region_end) {
19446 if (footprint_region_offset >= footprint_header->cf_last_region) {
19447 break;
19448 }
19449 /* skip the region's header */
19450 footprint_region_offset += sizeof (*footprint_region);
19451 /* skip the region's page dispositions */
19452 footprint_region_offset += footprint_region->cfr_num_pages;
19453 /* align to next word boundary */
19454 footprint_region_offset =
19455 roundup(footprint_region_offset,
19456 sizeof (int));
19457 footprint_region = (struct vm_map_corpse_footprint_region *)
19458 ((char *)footprint_header + footprint_region_offset);
19459 region_start = footprint_region->cfr_vaddr;
19460 region_end = (region_start +
19461 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19462 PAGE_SIZE));
19463 }
19464 if (va < region_start || va >= region_end) {
19465 /* page not found */
19466 *disp = 0;
19467// if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19468 kr = KERN_SUCCESS;
19469 goto done;
19470 }
19471
19472 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
19473 footprint_header->cf_hint_region = footprint_region_offset;
19474
19475 /* get page disposition for "va" in this region */
19476 disp_idx = (int) ((va - footprint_region->cfr_vaddr) / PAGE_SIZE);
19477 *disp = (int) (footprint_region->cfr_disposition[disp_idx]);
19478
19479 kr = KERN_SUCCESS;
19480done:
19481// if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19482 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
19483 DTRACE_VM4(footprint_query_page_info,
19484 vm_map_t, map,
19485 vm_map_offset_t, va,
19486 int, *disp,
19487 kern_return_t, kr);
19488
19489 return kr;
19490}
19491
19492
19493static void
19494vm_map_corpse_footprint_destroy(
19495 vm_map_t map)
19496{
19497 if (map->has_corpse_footprint &&
19498 map->vmmap_corpse_footprint != 0) {
19499 struct vm_map_corpse_footprint_header *footprint_header;
19500 vm_size_t buf_size;
19501 kern_return_t kr;
19502
19503 footprint_header = map->vmmap_corpse_footprint;
19504 buf_size = footprint_header->cf_size;
19505 kr = vm_deallocate(kernel_map,
19506 (vm_offset_t) map->vmmap_corpse_footprint,
19507 ((vm_size_t) buf_size
19508 + PAGE_SIZE)); /* trailing guard page */
19509 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
19510 map->vmmap_corpse_footprint = 0;
19511 map->has_corpse_footprint = FALSE;
19512 }
19513}
19514
19515/*
19516 * vm_map_copy_footprint_ledgers:
19517 * copies any ledger that's relevant to the memory footprint of "old_task"
19518 * into the forked corpse's task ("new_task")
19519 */
19520void
19521vm_map_copy_footprint_ledgers(
19522 task_t old_task,
19523 task_t new_task)
19524{
19525 vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
19526 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
19527 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
19528 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
19529 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
19530 vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
19531 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
19532 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
19533 vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
19534 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
19535 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
19536 vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
19537}
19538
19539/*
19540 * vm_map_copy_ledger:
19541 * copy a single ledger from "old_task" to "new_task"
19542 */
19543void
19544vm_map_copy_ledger(
19545 task_t old_task,
19546 task_t new_task,
19547 int ledger_entry)
19548{
19549 ledger_amount_t old_balance, new_balance, delta;
19550
19551 assert(new_task->map->has_corpse_footprint);
19552 if (!new_task->map->has_corpse_footprint)
19553 return;
19554
19555 /* turn off sanity checks for the ledger we're about to mess with */
19556 ledger_disable_panic_on_negative(new_task->ledger,
19557 ledger_entry);
19558
19559 /* adjust "new_task" to match "old_task" */
19560 ledger_get_balance(old_task->ledger,
19561 ledger_entry,
19562 &old_balance);
19563 ledger_get_balance(new_task->ledger,
19564 ledger_entry,
19565 &new_balance);
19566 if (new_balance == old_balance) {
19567 /* new == old: done */
19568 } else if (new_balance > old_balance) {
19569 /* new > old ==> new -= new - old */
19570 delta = new_balance - old_balance;
19571 ledger_debit(new_task->ledger,
19572 ledger_entry,
19573 delta);
19574 } else {
19575 /* new < old ==> new += old - new */
19576 delta = old_balance - new_balance;
19577 ledger_credit(new_task->ledger,
19578 ledger_entry,
19579 delta);
19580 }
19581}
19582