1/*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65#include <debug.h>
66#include <libkern/OSAtomic.h>
67#include <libkern/OSDebug.h>
68
69#include <mach/clock_types.h>
70#include <mach/vm_prot.h>
71#include <mach/vm_statistics.h>
72#include <mach/sdt.h>
73#include <kern/counter.h>
74#include <kern/host_statistics.h>
75#include <kern/sched_prim.h>
76#include <kern/policy_internal.h>
77#include <kern/task.h>
78#include <kern/thread.h>
79#include <kern/kalloc.h>
80#include <kern/zalloc_internal.h>
81#include <kern/ledger.h>
82#include <kern/ecc.h>
83#include <vm/pmap.h>
84#include <vm/vm_init.h>
85#include <vm/vm_map.h>
86#include <vm/vm_page.h>
87#include <vm/vm_pageout.h>
88#include <vm/vm_kern.h> /* kmem_alloc() */
89#include <kern/misc_protos.h>
90#include <mach_debug/zone_info.h>
91#include <vm/cpm.h>
92#include <pexpert/pexpert.h>
93#include <pexpert/device_tree.h>
94#include <san/kasan.h>
95#include <os/log.h>
96
97#include <vm/vm_protos.h>
98#include <vm/memory_object.h>
99#include <vm/vm_purgeable_internal.h>
100#include <vm/vm_compressor.h>
101#if defined (__x86_64__)
102#include <i386/misc_protos.h>
103#endif
104
105#if CONFIG_PHANTOM_CACHE
106#include <vm/vm_phantom_cache.h>
107#endif
108
109#if HIBERNATION
110#include <IOKit/IOHibernatePrivate.h>
111#include <machine/pal_hibernate.h>
112#endif /* HIBERNATION */
113
114#include <sys/kdebug.h>
115
116#if defined(HAS_APPLE_PAC)
117#include <ptrauth.h>
118#endif
119#if defined(__arm64__)
120#include <arm/cpu_internal.h>
121#endif /* defined(__arm64__) */
122
123#if MACH_ASSERT
124
125TUNABLE(bool, vm_check_refs_on_free, "vm_check_refs_on_free", true);
126#define ASSERT_PMAP_FREE(mem) pmap_assert_free(VM_PAGE_GET_PHYS_PAGE(mem))
127
128#else /* MACH_ASSERT */
129
130#define ASSERT_PMAP_FREE(mem) /* nothing */
131
132#endif /* MACH_ASSERT */
133
134extern boolean_t vm_pageout_running;
135extern thread_t vm_pageout_scan_thread;
136extern bool vps_dynamic_priority_enabled;
137
138char vm_page_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
139char vm_page_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
140char vm_page_non_speculative_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
141char vm_page_active_or_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
142
143#if CONFIG_SECLUDED_MEMORY
144struct vm_page_secluded_data vm_page_secluded;
145#endif /* CONFIG_SECLUDED_MEMORY */
146
147#if DEVELOPMENT || DEBUG
148extern struct memory_object_pager_ops shared_region_pager_ops;
149unsigned int shared_region_pagers_resident_count = 0;
150unsigned int shared_region_pagers_resident_peak = 0;
151#endif /* DEVELOPMENT || DEBUG */
152
153
154
155int PERCPU_DATA(start_color);
156vm_page_t PERCPU_DATA(free_pages);
157boolean_t hibernate_cleaning_in_progress = FALSE;
158
159uint32_t vm_lopage_free_count = 0;
160uint32_t vm_lopage_free_limit = 0;
161uint32_t vm_lopage_lowater = 0;
162boolean_t vm_lopage_refill = FALSE;
163boolean_t vm_lopage_needed = FALSE;
164
165int speculative_age_index = 0;
166int speculative_steal_index = 0;
167struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
168
169boolean_t hibernation_vmqueues_inspection = FALSE; /* Tracks if the hibernation code is looking at the VM queues.
170 * Updated and checked behind the vm_page_queues_lock. */
171
172static void vm_page_free_prepare(vm_page_t page);
173static vm_page_t vm_page_grab_fictitious_common(ppnum_t, boolean_t);
174
175static void vm_tag_init(void);
176
177/* for debugging purposes */
178SECURITY_READ_ONLY_EARLY(uint32_t) vm_packed_from_vm_pages_array_mask =
179 VM_PAGE_PACKED_FROM_ARRAY;
180SECURITY_READ_ONLY_EARLY(vm_packing_params_t) vm_page_packing_params =
181 VM_PACKING_PARAMS(VM_PAGE_PACKED_PTR);
182
183/*
184 * Associated with page of user-allocatable memory is a
185 * page structure.
186 */
187
188/*
189 * These variables record the values returned by vm_page_bootstrap,
190 * for debugging purposes. The implementation of pmap_steal_memory
191 * and pmap_startup here also uses them internally.
192 */
193
194vm_offset_t virtual_space_start;
195vm_offset_t virtual_space_end;
196uint32_t vm_page_pages;
197
198/*
199 * The vm_page_lookup() routine, which provides for fast
200 * (virtual memory object, offset) to page lookup, employs
201 * the following hash table. The vm_page_{insert,remove}
202 * routines install and remove associations in the table.
203 * [This table is often called the virtual-to-physical,
204 * or VP, table.]
205 */
206typedef struct {
207 vm_page_packed_t page_list;
208#if MACH_PAGE_HASH_STATS
209 int cur_count; /* current count */
210 int hi_count; /* high water mark */
211#endif /* MACH_PAGE_HASH_STATS */
212} vm_page_bucket_t;
213
214
215#define BUCKETS_PER_LOCK 16
216
217SECURITY_READ_ONLY_LATE(vm_page_bucket_t *) vm_page_buckets; /* Array of buckets */
218SECURITY_READ_ONLY_LATE(unsigned int) vm_page_bucket_count = 0; /* How big is array? */
219SECURITY_READ_ONLY_LATE(unsigned int) vm_page_hash_mask; /* Mask for hash function */
220SECURITY_READ_ONLY_LATE(unsigned int) vm_page_hash_shift; /* Shift for hash function */
221SECURITY_READ_ONLY_LATE(uint32_t) vm_page_bucket_hash; /* Basic bucket hash */
222SECURITY_READ_ONLY_LATE(unsigned int) vm_page_bucket_lock_count = 0; /* How big is array of locks? */
223
224#ifndef VM_TAG_ACTIVE_UPDATE
225#error VM_TAG_ACTIVE_UPDATE
226#endif
227#ifndef VM_TAG_SIZECLASSES
228#error VM_TAG_SIZECLASSES
229#endif
230
231/* for debugging */
232SECURITY_READ_ONLY_LATE(bool) vm_tag_active_update = VM_TAG_ACTIVE_UPDATE;
233SECURITY_READ_ONLY_LATE(lck_spin_t *) vm_page_bucket_locks;
234
235vm_allocation_site_t vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC + 1];
236vm_allocation_site_t * vm_allocation_sites[VM_MAX_TAG_VALUE];
237#if VM_TAG_SIZECLASSES
238static vm_allocation_zone_total_t **vm_allocation_zone_totals;
239#endif /* VM_TAG_SIZECLASSES */
240
241vm_tag_t vm_allocation_tag_highest;
242
243#if VM_PAGE_BUCKETS_CHECK
244boolean_t vm_page_buckets_check_ready = FALSE;
245#if VM_PAGE_FAKE_BUCKETS
246vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
247vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
248#endif /* VM_PAGE_FAKE_BUCKETS */
249#endif /* VM_PAGE_BUCKETS_CHECK */
250
251#if MACH_PAGE_HASH_STATS
252/* This routine is only for debug. It is intended to be called by
253 * hand by a developer using a kernel debugger. This routine prints
254 * out vm_page_hash table statistics to the kernel debug console.
255 */
256void
257hash_debug(void)
258{
259 int i;
260 int numbuckets = 0;
261 int highsum = 0;
262 int maxdepth = 0;
263
264 for (i = 0; i < vm_page_bucket_count; i++) {
265 if (vm_page_buckets[i].hi_count) {
266 numbuckets++;
267 highsum += vm_page_buckets[i].hi_count;
268 if (vm_page_buckets[i].hi_count > maxdepth) {
269 maxdepth = vm_page_buckets[i].hi_count;
270 }
271 }
272 }
273 printf("Total number of buckets: %d\n", vm_page_bucket_count);
274 printf("Number used buckets: %d = %d%%\n",
275 numbuckets, 100 * numbuckets / vm_page_bucket_count);
276 printf("Number unused buckets: %d = %d%%\n",
277 vm_page_bucket_count - numbuckets,
278 100 * (vm_page_bucket_count - numbuckets) / vm_page_bucket_count);
279 printf("Sum of bucket max depth: %d\n", highsum);
280 printf("Average bucket depth: %d.%2d\n",
281 highsum / vm_page_bucket_count,
282 highsum % vm_page_bucket_count);
283 printf("Maximum bucket depth: %d\n", maxdepth);
284}
285#endif /* MACH_PAGE_HASH_STATS */
286
287/*
288 * The virtual page size is currently implemented as a runtime
289 * variable, but is constant once initialized using vm_set_page_size.
290 * This initialization must be done in the machine-dependent
291 * bootstrap sequence, before calling other machine-independent
292 * initializations.
293 *
294 * All references to the virtual page size outside this
295 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
296 * constants.
297 */
298#if defined(__arm64__)
299vm_size_t page_size;
300vm_size_t page_mask;
301int page_shift;
302#else
303vm_size_t page_size = PAGE_SIZE;
304vm_size_t page_mask = PAGE_MASK;
305int page_shift = PAGE_SHIFT;
306#endif
307
308SECURITY_READ_ONLY_LATE(vm_page_t) vm_pages = VM_PAGE_NULL;
309SECURITY_READ_ONLY_LATE(vm_page_t) vm_page_array_beginning_addr;
310vm_page_t vm_page_array_ending_addr;
311
312unsigned int vm_pages_count = 0;
313
314/*
315 * Resident pages that represent real memory
316 * are allocated from a set of free lists,
317 * one per color.
318 */
319unsigned int vm_colors;
320unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
321unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
322unsigned int vm_free_magazine_refill_limit = 0;
323
324
325struct vm_page_queue_free_head {
326 vm_page_queue_head_t qhead;
327} VM_PAGE_PACKED_ALIGNED;
328
329struct vm_page_queue_free_head vm_page_queue_free[MAX_COLORS];
330
331
332unsigned int vm_page_free_wanted;
333unsigned int vm_page_free_wanted_privileged;
334#if CONFIG_SECLUDED_MEMORY
335unsigned int vm_page_free_wanted_secluded;
336#endif /* CONFIG_SECLUDED_MEMORY */
337unsigned int vm_page_free_count;
338
339unsigned int vm_page_realtime_count;
340
341/*
342 * Occasionally, the virtual memory system uses
343 * resident page structures that do not refer to
344 * real pages, for example to leave a page with
345 * important state information in the VP table.
346 *
347 * These page structures are allocated the way
348 * most other kernel structures are.
349 */
350SECURITY_READ_ONLY_LATE(zone_t) vm_page_zone;
351vm_locks_array_t vm_page_locks;
352
353LCK_ATTR_DECLARE(vm_page_lck_attr, 0, 0);
354LCK_GRP_DECLARE(vm_page_lck_grp_free, "vm_page_free");
355LCK_GRP_DECLARE(vm_page_lck_grp_queue, "vm_page_queue");
356LCK_GRP_DECLARE(vm_page_lck_grp_local, "vm_page_queue_local");
357LCK_GRP_DECLARE(vm_page_lck_grp_purge, "vm_page_purge");
358LCK_GRP_DECLARE(vm_page_lck_grp_alloc, "vm_page_alloc");
359LCK_GRP_DECLARE(vm_page_lck_grp_bucket, "vm_page_bucket");
360LCK_SPIN_DECLARE_ATTR(vm_objects_wired_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
361LCK_TICKET_DECLARE(vm_allocation_sites_lock, &vm_page_lck_grp_bucket);
362
363unsigned int vm_page_local_q_soft_limit = 250;
364unsigned int vm_page_local_q_hard_limit = 500;
365struct vpl *__zpercpu vm_page_local_q;
366
367/* N.B. Guard and fictitious pages must not
368 * be assigned a zero phys_page value.
369 */
370/*
371 * Fictitious pages don't have a physical address,
372 * but we must initialize phys_page to something.
373 * For debugging, this should be a strange value
374 * that the pmap module can recognize in assertions.
375 */
376const ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
377
378/*
379 * Guard pages are not accessible so they don't
380 * need a physical address, but we need to enter
381 * one in the pmap.
382 * Let's make it recognizable and make sure that
383 * we don't use a real physical page with that
384 * physical address.
385 */
386const ppnum_t vm_page_guard_addr = (ppnum_t) -2;
387
388/*
389 * Resident page structures are also chained on
390 * queues that are used by the page replacement
391 * system (pageout daemon). These queues are
392 * defined here, but are shared by the pageout
393 * module. The inactive queue is broken into
394 * file backed and anonymous for convenience as the
395 * pageout daemon often assignes a higher
396 * importance to anonymous pages (less likely to pick)
397 */
398vm_page_queue_head_t vm_page_queue_active VM_PAGE_PACKED_ALIGNED;
399vm_page_queue_head_t vm_page_queue_inactive VM_PAGE_PACKED_ALIGNED;
400#if CONFIG_SECLUDED_MEMORY
401vm_page_queue_head_t vm_page_queue_secluded VM_PAGE_PACKED_ALIGNED;
402#endif /* CONFIG_SECLUDED_MEMORY */
403vm_page_queue_head_t vm_page_queue_anonymous VM_PAGE_PACKED_ALIGNED; /* inactive memory queue for anonymous pages */
404vm_page_queue_head_t vm_page_queue_throttled VM_PAGE_PACKED_ALIGNED;
405
406queue_head_t vm_objects_wired;
407
408void vm_update_darkwake_mode(boolean_t);
409
410vm_page_queue_head_t vm_page_queue_donate VM_PAGE_PACKED_ALIGNED;
411uint32_t vm_page_donate_mode;
412uint32_t vm_page_donate_target, vm_page_donate_target_high, vm_page_donate_target_low;
413uint32_t vm_page_donate_count;
414bool vm_page_donate_queue_ripe;
415
416
417vm_page_queue_head_t vm_page_queue_background VM_PAGE_PACKED_ALIGNED;
418uint32_t vm_page_background_target;
419uint32_t vm_page_background_target_snapshot;
420uint32_t vm_page_background_count;
421uint64_t vm_page_background_promoted_count;
422
423uint32_t vm_page_background_internal_count;
424uint32_t vm_page_background_external_count;
425
426uint32_t vm_page_background_mode;
427uint32_t vm_page_background_exclude_external;
428
429unsigned int vm_page_active_count;
430unsigned int vm_page_inactive_count;
431unsigned int vm_page_kernelcache_count;
432#if CONFIG_SECLUDED_MEMORY
433unsigned int vm_page_secluded_count;
434unsigned int vm_page_secluded_count_free;
435unsigned int vm_page_secluded_count_inuse;
436unsigned int vm_page_secluded_count_over_target;
437#endif /* CONFIG_SECLUDED_MEMORY */
438unsigned int vm_page_anonymous_count;
439unsigned int vm_page_throttled_count;
440unsigned int vm_page_speculative_count;
441
442unsigned int vm_page_wire_count;
443unsigned int vm_page_wire_count_on_boot = 0;
444unsigned int vm_page_stolen_count = 0;
445unsigned int vm_page_wire_count_initial;
446unsigned int vm_page_gobble_count = 0;
447unsigned int vm_page_kern_lpage_count = 0;
448
449uint64_t booter_size; /* external so it can be found in core dumps */
450
451#define VM_PAGE_WIRE_COUNT_WARNING 0
452#define VM_PAGE_GOBBLE_COUNT_WARNING 0
453
454unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
455unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
456uint64_t vm_page_purged_count = 0; /* total count of purged pages */
457
458unsigned int vm_page_xpmapped_external_count = 0;
459unsigned int vm_page_external_count = 0;
460unsigned int vm_page_internal_count = 0;
461unsigned int vm_page_pageable_external_count = 0;
462unsigned int vm_page_pageable_internal_count = 0;
463
464#if DEVELOPMENT || DEBUG
465unsigned int vm_page_speculative_recreated = 0;
466unsigned int vm_page_speculative_created = 0;
467unsigned int vm_page_speculative_used = 0;
468#endif
469
470vm_page_queue_head_t vm_page_queue_cleaned VM_PAGE_PACKED_ALIGNED;
471
472unsigned int vm_page_cleaned_count = 0;
473
474uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
475ppnum_t max_valid_low_ppnum = PPNUM_MAX;
476
477
478/*
479 * Several page replacement parameters are also
480 * shared with this module, so that page allocation
481 * (done here in vm_page_alloc) can trigger the
482 * pageout daemon.
483 */
484unsigned int vm_page_free_target = 0;
485unsigned int vm_page_free_min = 0;
486unsigned int vm_page_throttle_limit = 0;
487unsigned int vm_page_inactive_target = 0;
488#if CONFIG_SECLUDED_MEMORY
489unsigned int vm_page_secluded_target = 0;
490#endif /* CONFIG_SECLUDED_MEMORY */
491unsigned int vm_page_anonymous_min = 0;
492unsigned int vm_page_free_reserved = 0;
493
494
495/*
496 * The VM system has a couple of heuristics for deciding
497 * that pages are "uninteresting" and should be placed
498 * on the inactive queue as likely candidates for replacement.
499 * These variables let the heuristics be controlled at run-time
500 * to make experimentation easier.
501 */
502
503boolean_t vm_page_deactivate_hint = TRUE;
504
505struct vm_page_stats_reusable vm_page_stats_reusable;
506
507/*
508 * vm_set_page_size:
509 *
510 * Sets the page size, perhaps based upon the memory
511 * size. Must be called before any use of page-size
512 * dependent functions.
513 *
514 * Sets page_shift and page_mask from page_size.
515 */
516void
517vm_set_page_size(void)
518{
519 page_size = PAGE_SIZE;
520 page_mask = PAGE_MASK;
521 page_shift = PAGE_SHIFT;
522
523 if ((page_mask & page_size) != 0) {
524 panic("vm_set_page_size: page size not a power of two");
525 }
526
527 for (page_shift = 0;; page_shift++) {
528 if ((1U << page_shift) == page_size) {
529 break;
530 }
531 }
532}
533
534#if defined (__x86_64__)
535
536#define MAX_CLUMP_SIZE 16
537#define DEFAULT_CLUMP_SIZE 4
538
539unsigned int vm_clump_size, vm_clump_mask, vm_clump_shift, vm_clump_promote_threshold;
540
541#if DEVELOPMENT || DEBUG
542unsigned long vm_clump_stats[MAX_CLUMP_SIZE + 1];
543unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
544
545static inline void
546vm_clump_update_stats(unsigned int c)
547{
548 assert(c <= vm_clump_size);
549 if (c > 0 && c <= vm_clump_size) {
550 vm_clump_stats[c] += c;
551 }
552 vm_clump_allocs += c;
553}
554#endif /* if DEVELOPMENT || DEBUG */
555
556/* Called once to setup the VM clump knobs */
557static void
558vm_page_setup_clump( void )
559{
560 unsigned int override, n;
561
562 vm_clump_size = DEFAULT_CLUMP_SIZE;
563 if (PE_parse_boot_argn("clump_size", &override, sizeof(override))) {
564 vm_clump_size = override;
565 }
566
567 if (vm_clump_size > MAX_CLUMP_SIZE) {
568 panic("vm_page_setup_clump:: clump_size is too large!");
569 }
570 if (vm_clump_size < 1) {
571 panic("vm_page_setup_clump:: clump_size must be >= 1");
572 }
573 if ((vm_clump_size & (vm_clump_size - 1)) != 0) {
574 panic("vm_page_setup_clump:: clump_size must be a power of 2");
575 }
576
577 vm_clump_promote_threshold = vm_clump_size;
578 vm_clump_mask = vm_clump_size - 1;
579 for (vm_clump_shift = 0, n = vm_clump_size; n > 1; n >>= 1, vm_clump_shift++) {
580 ;
581 }
582
583#if DEVELOPMENT || DEBUG
584 bzero(vm_clump_stats, sizeof(vm_clump_stats));
585 vm_clump_allocs = vm_clump_inserts = vm_clump_inrange = vm_clump_promotes = 0;
586#endif /* if DEVELOPMENT || DEBUG */
587}
588
589#endif /* #if defined (__x86_64__) */
590
591#define COLOR_GROUPS_TO_STEAL 4
592
593/* Called once during statup, once the cache geometry is known.
594 */
595static void
596vm_page_set_colors( void )
597{
598 unsigned int n, override;
599
600#if defined (__x86_64__)
601 /* adjust #colors because we need to color outside the clump boundary */
602 vm_cache_geometry_colors >>= vm_clump_shift;
603#endif
604 if (PE_parse_boot_argn(arg_string: "colors", arg_ptr: &override, max_arg: sizeof(override))) { /* colors specified as a boot-arg? */
605 n = override;
606 } else if (vm_cache_geometry_colors) { /* do we know what the cache geometry is? */
607 n = vm_cache_geometry_colors;
608 } else {
609 n = DEFAULT_COLORS; /* use default if all else fails */
610 }
611 if (n == 0) {
612 n = 1;
613 }
614 if (n > MAX_COLORS) {
615 n = MAX_COLORS;
616 }
617
618 /* the count must be a power of 2 */
619 if ((n & (n - 1)) != 0) {
620 n = DEFAULT_COLORS; /* use default if all else fails */
621 }
622 vm_colors = n;
623 vm_color_mask = n - 1;
624
625 vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
626
627#if defined (__x86_64__)
628 /* adjust for reduction in colors due to clumping and multiple cores */
629 if (real_ncpus) {
630 vm_free_magazine_refill_limit *= (vm_clump_size * real_ncpus);
631 }
632#endif
633}
634
635/*
636 * During single threaded early boot we don't initialize all pages.
637 * This avoids some delay during boot. They'll be initialized and
638 * added to the free list as needed or after we are multithreaded by
639 * what becomes the pageout thread.
640 */
641static boolean_t fill = FALSE;
642static unsigned int fillval;
643uint_t vm_delayed_count = 0; /* when non-zero, indicates we may have more pages to init */
644ppnum_t delay_above_pnum = PPNUM_MAX;
645
646/*
647 * For x86 first 8 Gig initializes quickly and gives us lots of lowmem + mem above to start off with.
648 * If ARM ever uses delayed page initialization, this value may need to be quite different.
649 */
650#define DEFAULT_DELAY_ABOVE_PHYS_GB (8)
651
652/*
653 * When we have to dip into more delayed pages due to low memory, free up
654 * a large chunk to get things back to normal. This avoids contention on the
655 * delayed code allocating page by page.
656 */
657#define VM_DELAY_PAGE_CHUNK ((1024 * 1024 * 1024) / PAGE_SIZE)
658
659/*
660 * Get and initialize the next delayed page.
661 */
662static vm_page_t
663vm_get_delayed_page(int grab_options)
664{
665 vm_page_t p;
666 ppnum_t pnum;
667
668 /*
669 * Get a new page if we have one.
670 */
671 vm_free_page_lock();
672 if (vm_delayed_count == 0) {
673 vm_free_page_unlock();
674 return NULL;
675 }
676
677 if (!pmap_next_page(pnum: &pnum)) {
678 vm_delayed_count = 0;
679 vm_free_page_unlock();
680 return NULL;
681 }
682
683
684 assert(vm_delayed_count > 0);
685 --vm_delayed_count;
686
687#if defined(__x86_64__)
688 /* x86 cluster code requires increasing phys_page in vm_pages[] */
689 if (vm_pages_count > 0) {
690 assert(pnum > vm_pages[vm_pages_count - 1].vmp_phys_page);
691 }
692#endif
693 p = &vm_pages[vm_pages_count];
694 assert(p < vm_page_array_ending_addr);
695 vm_page_init(page: p, phys_page: pnum, FALSE);
696 ++vm_pages_count;
697 ++vm_page_pages;
698 vm_free_page_unlock();
699
700 /*
701 * These pages were initially counted as wired, undo that now.
702 */
703 if (grab_options & VM_PAGE_GRAB_Q_LOCK_HELD) {
704 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
705 } else {
706 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
707 vm_page_lockspin_queues();
708 }
709 --vm_page_wire_count;
710 --vm_page_wire_count_initial;
711 if (vm_page_wire_count_on_boot != 0) {
712 --vm_page_wire_count_on_boot;
713 }
714 if (!(grab_options & VM_PAGE_GRAB_Q_LOCK_HELD)) {
715 vm_page_unlock_queues();
716 }
717
718
719 if (fill) {
720 fillPage(pa: pnum, fill: fillval);
721 }
722 return p;
723}
724
725static void vm_page_module_init_delayed(void);
726
727/*
728 * Free all remaining delayed pages to the free lists.
729 */
730void
731vm_free_delayed_pages(void)
732{
733 vm_page_t p;
734 vm_page_t list = NULL;
735 uint_t cnt = 0;
736 vm_offset_t start_free_va;
737 int64_t free_size;
738
739 while ((p = vm_get_delayed_page(VM_PAGE_GRAB_OPTIONS_NONE)) != NULL) {
740 if (vm_himemory_mode) {
741 vm_page_release(page: p, FALSE);
742 } else {
743 p->vmp_snext = list;
744 list = p;
745 }
746 ++cnt;
747 }
748
749 /*
750 * Free the pages in reverse order if not himemory mode.
751 * Hence the low memory pages will be first on free lists. (LIFO)
752 */
753 while (list != NULL) {
754 p = list;
755 list = p->vmp_snext;
756 p->vmp_snext = NULL;
757 vm_page_release(page: p, FALSE);
758 }
759#if DEVELOPMENT || DEBUG
760 kprintf("vm_free_delayed_pages: initialized %d free pages\n", cnt);
761#endif
762
763 /*
764 * Free up any unused full pages at the end of the vm_pages[] array
765 */
766 start_free_va = round_page(x: (vm_offset_t)&vm_pages[vm_pages_count]);
767
768#if defined(__x86_64__)
769 /*
770 * Since x86 might have used large pages for vm_pages[], we can't
771 * free starting in the middle of a partially used large page.
772 */
773 if (pmap_query_pagesize(kernel_pmap, start_free_va) == I386_LPGBYTES) {
774 start_free_va = ((start_free_va + I386_LPGMASK) & ~I386_LPGMASK);
775 }
776#endif
777 if (start_free_va < (vm_offset_t)vm_page_array_ending_addr) {
778 free_size = trunc_page((vm_offset_t)vm_page_array_ending_addr - start_free_va);
779 if (free_size > 0) {
780 ml_static_mfree(start_free_va, (vm_offset_t)free_size);
781 vm_page_array_ending_addr = (void *)start_free_va;
782
783 /*
784 * Note there's no locking here, as only this thread will ever change this value.
785 * The reader, vm_page_diagnose, doesn't grab any locks for the counts it looks at.
786 */
787 vm_page_stolen_count -= (free_size >> PAGE_SHIFT);
788
789#if DEVELOPMENT || DEBUG
790 kprintf("Freeing final unused %ld bytes from vm_pages[] at 0x%lx\n",
791 (long)free_size, (long)start_free_va);
792#endif
793 }
794 }
795
796
797 /*
798 * now we can create the VM page array zone
799 */
800 vm_page_module_init_delayed();
801}
802
803/*
804 * Try and free up enough delayed pages to match a contig memory allocation.
805 */
806static void
807vm_free_delayed_pages_contig(
808 uint_t npages,
809 ppnum_t max_pnum,
810 ppnum_t pnum_mask)
811{
812 vm_page_t p;
813 ppnum_t pnum;
814 uint_t cnt = 0;
815
816 /*
817 * Treat 0 as the absolute max page number.
818 */
819 if (max_pnum == 0) {
820 max_pnum = PPNUM_MAX;
821 }
822
823 /*
824 * Free till we get a properly aligned start page
825 */
826 for (;;) {
827 p = vm_get_delayed_page(VM_PAGE_GRAB_OPTIONS_NONE);
828 if (p == NULL) {
829 return;
830 }
831 pnum = VM_PAGE_GET_PHYS_PAGE(m: p);
832 vm_page_release(page: p, FALSE);
833 if (pnum >= max_pnum) {
834 return;
835 }
836 if ((pnum & pnum_mask) == 0) {
837 break;
838 }
839 }
840
841 /*
842 * Having a healthy pool of free pages will help performance. We don't
843 * want to fall back to the delayed code for every page allocation.
844 */
845 if (vm_page_free_count < VM_DELAY_PAGE_CHUNK) {
846 npages += VM_DELAY_PAGE_CHUNK;
847 }
848
849 /*
850 * Now free up the pages
851 */
852 for (cnt = 1; cnt < npages; ++cnt) {
853 p = vm_get_delayed_page(VM_PAGE_GRAB_OPTIONS_NONE);
854 if (p == NULL) {
855 return;
856 }
857 vm_page_release(page: p, FALSE);
858 }
859}
860
861#define ROUNDUP_NEXTP2(X) (1U << (32 - __builtin_clz((X) - 1)))
862
863void
864vm_page_init_local_q(unsigned int num_cpus)
865{
866 struct vpl *t_local_q;
867
868 /*
869 * no point in this for a uni-processor system
870 */
871 if (num_cpus >= 2) {
872 ml_cpu_info_t cpu_info;
873
874 /*
875 * Force the allocation alignment to a cacheline,
876 * because the `vpl` struct has a lock and will be taken
877 * cross CPU so we want to isolate the rest of the per-CPU
878 * data to avoid false sharing due to this lock being taken.
879 */
880
881 ml_cpu_get_info(ml_cpu_info: &cpu_info);
882
883 t_local_q = zalloc_percpu_permanent(size: sizeof(struct vpl),
884 align_mask: cpu_info.cache_line_size - 1);
885
886 zpercpu_foreach(lq, t_local_q) {
887 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
888 vm_page_queue_init(&lq->vpl_queue);
889 }
890
891 /* make the initialization visible to all cores */
892 os_atomic_store(&vm_page_local_q, t_local_q, release);
893 }
894}
895
896/*
897 * vm_init_before_launchd
898 *
899 * This should be called right before launchd is loaded.
900 */
901void
902vm_init_before_launchd()
903{
904 vm_page_lockspin_queues();
905 vm_page_wire_count_on_boot = vm_page_wire_count;
906 vm_page_unlock_queues();
907}
908
909
910/*
911 * vm_page_bootstrap:
912 *
913 * Initializes the resident memory module.
914 *
915 * Allocates memory for the page cells, and
916 * for the object/offset-to-page hash table headers.
917 * Each page cell is initialized and placed on the free list.
918 * Returns the range of available kernel virtual memory.
919 */
920__startup_func
921void
922vm_page_bootstrap(
923 vm_offset_t *startp,
924 vm_offset_t *endp)
925{
926 unsigned int i;
927 unsigned int log1;
928 unsigned int log2;
929 unsigned int size;
930
931 /*
932 * Initialize the page queues.
933 */
934
935 lck_mtx_init(lck: &vm_page_queue_free_lock, grp: &vm_page_lck_grp_free, attr: &vm_page_lck_attr);
936 lck_mtx_init(lck: &vm_page_queue_lock, grp: &vm_page_lck_grp_queue, attr: &vm_page_lck_attr);
937 lck_mtx_init(lck: &vm_purgeable_queue_lock, grp: &vm_page_lck_grp_purge, attr: &vm_page_lck_attr);
938
939 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
940 int group;
941
942 purgeable_queues[i].token_q_head = 0;
943 purgeable_queues[i].token_q_tail = 0;
944 for (group = 0; group < NUM_VOLATILE_GROUPS; group++) {
945 queue_init(&purgeable_queues[i].objq[group]);
946 }
947
948 purgeable_queues[i].type = i;
949 purgeable_queues[i].new_pages = 0;
950#if MACH_ASSERT
951 purgeable_queues[i].debug_count_tokens = 0;
952 purgeable_queues[i].debug_count_objects = 0;
953#endif
954 }
955 ;
956 purgeable_nonvolatile_count = 0;
957 queue_init(&purgeable_nonvolatile_queue);
958
959 for (i = 0; i < MAX_COLORS; i++) {
960 vm_page_queue_init(&vm_page_queue_free[i].qhead);
961 }
962
963 vm_page_queue_init(&vm_lopage_queue_free);
964 vm_page_queue_init(&vm_page_queue_active);
965 vm_page_queue_init(&vm_page_queue_inactive);
966#if CONFIG_SECLUDED_MEMORY
967 vm_page_queue_init(&vm_page_queue_secluded);
968#endif /* CONFIG_SECLUDED_MEMORY */
969 vm_page_queue_init(&vm_page_queue_cleaned);
970 vm_page_queue_init(&vm_page_queue_throttled);
971 vm_page_queue_init(&vm_page_queue_anonymous);
972 queue_init(&vm_objects_wired);
973
974 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
975 vm_page_queue_init(&vm_page_queue_speculative[i].age_q);
976
977 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
978 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
979 }
980
981 vm_page_queue_init(&vm_page_queue_donate);
982 vm_page_queue_init(&vm_page_queue_background);
983
984 vm_page_background_count = 0;
985 vm_page_background_internal_count = 0;
986 vm_page_background_external_count = 0;
987 vm_page_background_promoted_count = 0;
988
989 vm_page_background_target = (unsigned int)(atop_64(max_mem) / 25);
990
991 if (vm_page_background_target > VM_PAGE_BACKGROUND_TARGET_MAX) {
992 vm_page_background_target = VM_PAGE_BACKGROUND_TARGET_MAX;
993 }
994
995#if defined(__LP64__)
996 vm_page_background_mode = VM_PAGE_BG_ENABLED;
997 vm_page_donate_mode = VM_PAGE_DONATE_ENABLED;
998#else
999 vm_page_background_mode = VM_PAGE_BG_DISABLED;
1000 vm_page_donate_mode = VM_PAGE_DONATE_DISABLED;
1001#endif
1002 vm_page_background_exclude_external = 0;
1003
1004 PE_parse_boot_argn(arg_string: "vm_page_bg_mode", arg_ptr: &vm_page_background_mode, max_arg: sizeof(vm_page_background_mode));
1005 PE_parse_boot_argn(arg_string: "vm_page_bg_exclude_external", arg_ptr: &vm_page_background_exclude_external, max_arg: sizeof(vm_page_background_exclude_external));
1006 PE_parse_boot_argn(arg_string: "vm_page_bg_target", arg_ptr: &vm_page_background_target, max_arg: sizeof(vm_page_background_target));
1007
1008 if (vm_page_background_mode != VM_PAGE_BG_DISABLED && vm_page_background_mode != VM_PAGE_BG_ENABLED) {
1009 vm_page_background_mode = VM_PAGE_BG_DISABLED;
1010 }
1011
1012 PE_parse_boot_argn(arg_string: "vm_page_donate_mode", arg_ptr: &vm_page_donate_mode, max_arg: sizeof(vm_page_donate_mode));
1013 if (vm_page_donate_mode != VM_PAGE_DONATE_DISABLED && vm_page_donate_mode != VM_PAGE_DONATE_ENABLED) {
1014 vm_page_donate_mode = VM_PAGE_DONATE_DISABLED;
1015 }
1016
1017 vm_page_donate_target_high = VM_PAGE_DONATE_TARGET_HIGHWATER;
1018 vm_page_donate_target_low = VM_PAGE_DONATE_TARGET_LOWWATER;
1019 vm_page_donate_target = vm_page_donate_target_high;
1020 vm_page_donate_count = 0;
1021
1022 vm_page_free_wanted = 0;
1023 vm_page_free_wanted_privileged = 0;
1024#if CONFIG_SECLUDED_MEMORY
1025 vm_page_free_wanted_secluded = 0;
1026#endif /* CONFIG_SECLUDED_MEMORY */
1027
1028#if defined (__x86_64__)
1029 /* this must be called before vm_page_set_colors() */
1030 vm_page_setup_clump();
1031#endif
1032
1033 vm_page_set_colors();
1034
1035 bzero(s: vm_page_inactive_states, n: sizeof(vm_page_inactive_states));
1036 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
1037 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
1038 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
1039
1040 bzero(s: vm_page_pageable_states, n: sizeof(vm_page_pageable_states));
1041 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
1042 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
1043 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
1044 vm_page_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
1045 vm_page_pageable_states[VM_PAGE_ON_SPECULATIVE_Q] = 1;
1046 vm_page_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
1047#if CONFIG_SECLUDED_MEMORY
1048 vm_page_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
1049#endif /* CONFIG_SECLUDED_MEMORY */
1050
1051 bzero(s: vm_page_non_speculative_pageable_states, n: sizeof(vm_page_non_speculative_pageable_states));
1052 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
1053 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
1054 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
1055 vm_page_non_speculative_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
1056 vm_page_non_speculative_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
1057#if CONFIG_SECLUDED_MEMORY
1058 vm_page_non_speculative_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
1059#endif /* CONFIG_SECLUDED_MEMORY */
1060
1061 bzero(s: vm_page_active_or_inactive_states, n: sizeof(vm_page_active_or_inactive_states));
1062 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
1063 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
1064 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
1065 vm_page_active_or_inactive_states[VM_PAGE_ON_ACTIVE_Q] = 1;
1066#if CONFIG_SECLUDED_MEMORY
1067 vm_page_active_or_inactive_states[VM_PAGE_ON_SECLUDED_Q] = 1;
1068#endif /* CONFIG_SECLUDED_MEMORY */
1069
1070 for (vm_tag_t t = 0; t < VM_KERN_MEMORY_FIRST_DYNAMIC; t++) {
1071 vm_allocation_sites_static[t].refcount = 2;
1072 vm_allocation_sites_static[t].tag = t;
1073 vm_allocation_sites[t] = &vm_allocation_sites_static[t];
1074 }
1075 vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].refcount = 2;
1076 vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].tag = VM_KERN_MEMORY_ANY;
1077 vm_allocation_sites[VM_KERN_MEMORY_ANY] = &vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC];
1078
1079 /*
1080 * Steal memory for the map and zone subsystems.
1081 */
1082 kernel_startup_initialize_upto(upto: STARTUP_SUB_PMAP_STEAL);
1083
1084 /*
1085 * Allocate (and initialize) the virtual-to-physical
1086 * table hash buckets.
1087 *
1088 * The number of buckets should be a power of two to
1089 * get a good hash function. The following computation
1090 * chooses the first power of two that is greater
1091 * than the number of physical pages in the system.
1092 */
1093
1094 if (vm_page_bucket_count == 0) {
1095 unsigned int npages = pmap_free_pages();
1096
1097 vm_page_bucket_count = 1;
1098 while (vm_page_bucket_count < npages) {
1099 vm_page_bucket_count <<= 1;
1100 }
1101 }
1102 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
1103
1104 vm_page_hash_mask = vm_page_bucket_count - 1;
1105
1106 /*
1107 * Calculate object shift value for hashing algorithm:
1108 * O = log2(sizeof(struct vm_object))
1109 * B = log2(vm_page_bucket_count)
1110 * hash shifts the object left by
1111 * B/2 - O
1112 */
1113 size = vm_page_bucket_count;
1114 for (log1 = 0; size > 1; log1++) {
1115 size /= 2;
1116 }
1117 size = sizeof(struct vm_object);
1118 for (log2 = 0; size > 1; log2++) {
1119 size /= 2;
1120 }
1121 vm_page_hash_shift = log1 / 2 - log2 + 1;
1122
1123 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
1124 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
1125 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
1126
1127 if (vm_page_hash_mask & vm_page_bucket_count) {
1128 printf(format: "vm_page_bootstrap: WARNING -- strange page hash\n");
1129 }
1130
1131#if VM_PAGE_BUCKETS_CHECK
1132#if VM_PAGE_FAKE_BUCKETS
1133 /*
1134 * Allocate a decoy set of page buckets, to detect
1135 * any stomping there.
1136 */
1137 vm_page_fake_buckets = (vm_page_bucket_t *)
1138 pmap_steal_memory(vm_page_bucket_count *
1139 sizeof(vm_page_bucket_t), 0);
1140 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
1141 vm_page_fake_buckets_end =
1142 vm_map_round_page((vm_page_fake_buckets_start +
1143 (vm_page_bucket_count *
1144 sizeof(vm_page_bucket_t))),
1145 PAGE_MASK);
1146 char *cp;
1147 for (cp = (char *)vm_page_fake_buckets_start;
1148 cp < (char *)vm_page_fake_buckets_end;
1149 cp++) {
1150 *cp = 0x5a;
1151 }
1152#endif /* VM_PAGE_FAKE_BUCKETS */
1153#endif /* VM_PAGE_BUCKETS_CHECK */
1154
1155 kernel_debug_string_early(message: "vm_page_buckets");
1156 vm_page_buckets = (vm_page_bucket_t *)
1157 pmap_steal_memory(size: vm_page_bucket_count *
1158 sizeof(vm_page_bucket_t), alignment: 0);
1159
1160 kernel_debug_string_early(message: "vm_page_bucket_locks");
1161 vm_page_bucket_locks = (lck_spin_t *)
1162 pmap_steal_memory(size: vm_page_bucket_lock_count *
1163 sizeof(lck_spin_t), alignment: 0);
1164
1165 for (i = 0; i < vm_page_bucket_count; i++) {
1166 vm_page_bucket_t *bucket = &vm_page_buckets[i];
1167
1168 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
1169#if MACH_PAGE_HASH_STATS
1170 bucket->cur_count = 0;
1171 bucket->hi_count = 0;
1172#endif /* MACH_PAGE_HASH_STATS */
1173 }
1174
1175 for (i = 0; i < vm_page_bucket_lock_count; i++) {
1176 lck_spin_init(lck: &vm_page_bucket_locks[i], grp: &vm_page_lck_grp_bucket, attr: &vm_page_lck_attr);
1177 }
1178
1179 vm_tag_init();
1180
1181#if VM_PAGE_BUCKETS_CHECK
1182 vm_page_buckets_check_ready = TRUE;
1183#endif /* VM_PAGE_BUCKETS_CHECK */
1184
1185 /*
1186 * Machine-dependent code allocates the resident page table.
1187 * It uses vm_page_init to initialize the page frames.
1188 * The code also returns to us the virtual space available
1189 * to the kernel. We don't trust the pmap module
1190 * to get the alignment right.
1191 */
1192
1193 kernel_debug_string_early(message: "pmap_startup");
1194 pmap_startup(startp: &virtual_space_start, endp: &virtual_space_end);
1195 virtual_space_start = round_page(x: virtual_space_start);
1196 virtual_space_end = trunc_page(virtual_space_end);
1197
1198 *startp = virtual_space_start;
1199 *endp = virtual_space_end;
1200
1201 /*
1202 * Compute the initial "wire" count.
1203 * Up until now, the pages which have been set aside are not under
1204 * the VM system's control, so although they aren't explicitly
1205 * wired, they nonetheless can't be moved. At this moment,
1206 * all VM managed pages are "free", courtesy of pmap_startup.
1207 */
1208 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
1209 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) -
1210 vm_page_free_count - vm_lopage_free_count;
1211#if CONFIG_SECLUDED_MEMORY
1212 vm_page_wire_count -= vm_page_secluded_count;
1213#endif
1214 vm_page_wire_count_initial = vm_page_wire_count;
1215
1216 /* capture this for later use */
1217 booter_size = ml_get_booter_memory_size();
1218
1219 printf(format: "vm_page_bootstrap: %d free pages, %d wired pages, (up to %d of which are delayed free)\n",
1220 vm_page_free_count, vm_page_wire_count, vm_delayed_count);
1221
1222 kernel_debug_string_early(message: "vm_page_bootstrap complete");
1223}
1224
1225#ifndef MACHINE_PAGES
1226/*
1227 * This is the early boot time allocator for data structures needed to bootstrap the VM system.
1228 * On x86 it will allocate large pages if size is sufficiently large. We don't need to do this
1229 * on ARM yet, due to the combination of a large base page size and smaller RAM devices.
1230 */
1231static void *
1232pmap_steal_memory_internal(
1233 vm_size_t size,
1234 vm_size_t alignment,
1235 boolean_t might_free,
1236 unsigned int flags,
1237 pmap_mapping_type_t mapping_type)
1238{
1239 kern_return_t kr;
1240 vm_offset_t addr;
1241 vm_offset_t map_addr;
1242 ppnum_t phys_page;
1243 unsigned int pmap_flags;
1244
1245 /*
1246 * Size needs to be aligned to word size.
1247 */
1248 size = (size + sizeof(void *) - 1) & ~(sizeof(void *) - 1);
1249
1250 /*
1251 * Alignment defaults to word size if not specified.
1252 */
1253 if (alignment == 0) {
1254 alignment = sizeof(void*);
1255 }
1256
1257 /*
1258 * Alignment must be no greater than a page and must be a power of two.
1259 */
1260 assert(alignment <= PAGE_SIZE);
1261 assert((alignment & (alignment - 1)) == 0);
1262
1263 /*
1264 * On the first call, get the initial values for virtual address space
1265 * and page align them.
1266 */
1267 if (virtual_space_start == virtual_space_end) {
1268 pmap_virtual_space(virtual_start: &virtual_space_start, virtual_end: &virtual_space_end);
1269 virtual_space_start = round_page(x: virtual_space_start);
1270 virtual_space_end = trunc_page(virtual_space_end);
1271
1272#if defined(__x86_64__)
1273 /*
1274 * Release remaining unused section of preallocated KVA and the 4K page tables
1275 * that map it. This makes the VA available for large page mappings.
1276 */
1277 Idle_PTs_release(virtual_space_start, virtual_space_end);
1278#endif
1279 }
1280
1281 /*
1282 * Allocate the virtual space for this request. On x86, we'll align to a large page
1283 * address if the size is big enough to back with at least 1 large page.
1284 */
1285#if defined(__x86_64__)
1286 if (size >= I386_LPGBYTES) {
1287 virtual_space_start = ((virtual_space_start + I386_LPGMASK) & ~I386_LPGMASK);
1288 }
1289#endif
1290 virtual_space_start = (virtual_space_start + (alignment - 1)) & ~(alignment - 1);
1291 addr = virtual_space_start;
1292 virtual_space_start += size;
1293
1294 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1295
1296 /*
1297 * Allocate and map physical pages to back the new virtual space.
1298 */
1299 map_addr = round_page(x: addr);
1300 while (map_addr < addr + size) {
1301#if defined(__x86_64__)
1302 /*
1303 * Back with a large page if properly aligned on x86
1304 */
1305 if ((map_addr & I386_LPGMASK) == 0 &&
1306 map_addr + I386_LPGBYTES <= addr + size &&
1307 pmap_pre_expand_large(kernel_pmap, map_addr) == KERN_SUCCESS &&
1308 pmap_next_page_large(&phys_page) == KERN_SUCCESS) {
1309 kr = pmap_enter(kernel_pmap, map_addr, phys_page,
1310 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
1311 VM_WIMG_USE_DEFAULT | VM_MEM_SUPERPAGE, FALSE, mapping_type);
1312
1313 if (kr != KERN_SUCCESS) {
1314 panic("pmap_steal_memory: pmap_enter() large failed, new_addr=%#lx, phys_page=%u",
1315 (unsigned long)map_addr, phys_page);
1316 }
1317 map_addr += I386_LPGBYTES;
1318 vm_page_wire_count += I386_LPGBYTES >> PAGE_SHIFT;
1319 vm_page_stolen_count += I386_LPGBYTES >> PAGE_SHIFT;
1320 vm_page_kern_lpage_count++;
1321 continue;
1322 }
1323#endif
1324
1325 if (!pmap_next_page_hi(pnum: &phys_page, might_free)) {
1326 panic("pmap_steal_memory() size: 0x%llx", (uint64_t)size);
1327 }
1328
1329#if defined(__x86_64__)
1330 pmap_pre_expand(kernel_pmap, map_addr);
1331#endif
1332 pmap_flags = flags ? flags : VM_WIMG_USE_DEFAULT;
1333
1334 kr = pmap_enter(pmap: kernel_pmap, v: map_addr, pn: phys_page,
1335 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
1336 flags: pmap_flags, FALSE, mapping_type);
1337
1338 if (kr != KERN_SUCCESS) {
1339 panic("pmap_steal_memory() pmap_enter failed, map_addr=%#lx, phys_page=%u",
1340 (unsigned long)map_addr, phys_page);
1341 }
1342 map_addr += PAGE_SIZE;
1343
1344 /*
1345 * Account for newly stolen memory
1346 */
1347 vm_page_wire_count++;
1348 vm_page_stolen_count++;
1349 }
1350
1351#if defined(__x86_64__)
1352 /*
1353 * The call with might_free is currently the last use of pmap_steal_memory*().
1354 * Notify the pmap layer to record which high pages were allocated so far.
1355 */
1356 if (might_free) {
1357 pmap_hi_pages_done();
1358 }
1359#endif
1360#if KASAN
1361 kasan_notify_address(round_page(addr), size);
1362#endif
1363 return (void *) addr;
1364}
1365
1366void *
1367pmap_steal_memory(
1368 vm_size_t size,
1369 vm_size_t alignment)
1370{
1371 return pmap_steal_memory_internal(size, alignment, FALSE, flags: 0, mapping_type: PMAP_MAPPING_TYPE_RESTRICTED);
1372}
1373
1374void *
1375pmap_steal_freeable_memory(
1376 vm_size_t size)
1377{
1378 return pmap_steal_memory_internal(size, alignment: 0, TRUE, flags: 0, mapping_type: PMAP_MAPPING_TYPE_RESTRICTED);
1379}
1380
1381void *
1382pmap_steal_zone_memory(
1383 vm_size_t size,
1384 vm_size_t alignment)
1385{
1386 unsigned int flags = 0;
1387
1388
1389 return pmap_steal_memory_internal(size, alignment, FALSE, flags, mapping_type: PMAP_MAPPING_TYPE_RESTRICTED);
1390}
1391
1392
1393#if CONFIG_SECLUDED_MEMORY
1394/* boot-args to control secluded memory */
1395TUNABLE_DT(unsigned int, secluded_mem_mb, "/defaults", "kern.secluded_mem_mb", "secluded_mem_mb", 0, TUNABLE_DT_NONE);
1396/* IOKit can use secluded memory */
1397TUNABLE(bool, secluded_for_iokit, "secluded_for_iokit", true);
1398/* apps can use secluded memory */
1399TUNABLE(bool, secluded_for_apps, "secluded_for_apps", true);
1400/* filecache can use seclude memory */
1401TUNABLE(secluded_filecache_mode_t, secluded_for_filecache, "secluded_for_filecache", SECLUDED_FILECACHE_RDONLY);
1402uint64_t secluded_shutoff_trigger = 0;
1403uint64_t secluded_shutoff_headroom = 150 * 1024 * 1024; /* original value from N56 */
1404#endif /* CONFIG_SECLUDED_MEMORY */
1405
1406
1407#if defined(__arm64__)
1408extern void patch_low_glo_vm_page_info(void *, void *, uint32_t);
1409unsigned int vm_first_phys_ppnum = 0;
1410#endif
1411
1412void vm_page_release_startup(vm_page_t mem);
1413void
1414pmap_startup(
1415 vm_offset_t *startp,
1416 vm_offset_t *endp)
1417{
1418 unsigned int i, npages;
1419 ppnum_t phys_page;
1420 uint64_t mem_sz;
1421 uint64_t start_ns;
1422 uint64_t now_ns;
1423 uint_t low_page_count = 0;
1424
1425#if defined(__LP64__)
1426 /*
1427 * make sure we are aligned on a 64 byte boundary
1428 * for VM_PAGE_PACK_PTR (it clips off the low-order
1429 * 6 bits of the pointer)
1430 */
1431 if (virtual_space_start != virtual_space_end) {
1432 virtual_space_start = round_page(x: virtual_space_start);
1433 }
1434#endif
1435
1436 /*
1437 * We calculate how many page frames we will have
1438 * and then allocate the page structures in one chunk.
1439 *
1440 * Note that the calculation here doesn't take into account
1441 * the memory needed to map what's being allocated, i.e. the page
1442 * table entries. So the actual number of pages we get will be
1443 * less than this. To do someday: include that in the computation.
1444 *
1445 * Also for ARM, we don't use the count of free_pages, but rather the
1446 * range from last page to first page (ignore holes due to retired pages).
1447 */
1448#if defined(__arm64__)
1449 mem_sz = pmap_free_pages_span() * (uint64_t)PAGE_SIZE;
1450#else /* defined(__arm64__) */
1451 mem_sz = pmap_free_pages() * (uint64_t)PAGE_SIZE;
1452#endif /* defined(__arm64__) */
1453 mem_sz += round_page(x: virtual_space_start) - virtual_space_start; /* Account for any slop */
1454 npages = (uint_t)(mem_sz / (PAGE_SIZE + sizeof(*vm_pages))); /* scaled to include the vm_page_ts */
1455
1456
1457 vm_pages = (vm_page_t) pmap_steal_freeable_memory(size: npages * sizeof *vm_pages);
1458
1459 /*
1460 * Check if we want to initialize pages to a known value
1461 */
1462 if (PE_parse_boot_argn(arg_string: "fill", arg_ptr: &fillval, max_arg: sizeof(fillval))) {
1463 fill = TRUE;
1464 }
1465#if DEBUG
1466 /* This slows down booting the DEBUG kernel, particularly on
1467 * large memory systems, but is worthwhile in deterministically
1468 * trapping uninitialized memory usage.
1469 */
1470 if (!fill) {
1471 fill = TRUE;
1472 fillval = 0xDEB8F177;
1473 }
1474#endif
1475 if (fill) {
1476 kprintf(fmt: "Filling vm_pages with pattern: 0x%x\n", fillval);
1477 }
1478
1479#if CONFIG_SECLUDED_MEMORY
1480 /*
1481 * Figure out how much secluded memory to have before we start
1482 * release pages to free lists.
1483 * The default, if specified nowhere else, is no secluded mem.
1484 */
1485 vm_page_secluded_target = (unsigned int)atop_64(secluded_mem_mb * 1024ULL * 1024ULL);
1486
1487 /*
1488 * Allow a really large app to effectively use secluded memory until it exits.
1489 */
1490 if (vm_page_secluded_target != 0) {
1491 /*
1492 * Get an amount from boot-args, else use 1/2 of max_mem.
1493 * 1/2 max_mem was chosen from a Peace daemon tentpole test which
1494 * used munch to induce jetsam thrashing of false idle daemons on N56.
1495 */
1496 int secluded_shutoff_mb;
1497 if (PE_parse_boot_argn("secluded_shutoff_mb", &secluded_shutoff_mb,
1498 sizeof(secluded_shutoff_mb))) {
1499 secluded_shutoff_trigger = (uint64_t)secluded_shutoff_mb * 1024 * 1024;
1500 } else {
1501 secluded_shutoff_trigger = max_mem / 2;
1502 }
1503
1504 /* ensure the headroom value is sensible and avoid underflows */
1505 assert(secluded_shutoff_trigger == 0 || secluded_shutoff_trigger > secluded_shutoff_headroom);
1506 }
1507
1508#endif /* CONFIG_SECLUDED_MEMORY */
1509
1510#if defined(__x86_64__)
1511
1512 /*
1513 * Decide how much memory we delay freeing at boot time.
1514 */
1515 uint32_t delay_above_gb;
1516 if (!PE_parse_boot_argn("delay_above_gb", &delay_above_gb, sizeof(delay_above_gb))) {
1517 delay_above_gb = DEFAULT_DELAY_ABOVE_PHYS_GB;
1518 }
1519
1520 if (delay_above_gb == 0) {
1521 delay_above_pnum = PPNUM_MAX;
1522 } else {
1523 delay_above_pnum = delay_above_gb * (1024 * 1024 * 1024 / PAGE_SIZE);
1524 }
1525
1526 /* make sure we have sane breathing room: 1G above low memory */
1527 if (delay_above_pnum <= max_valid_low_ppnum) {
1528 delay_above_pnum = max_valid_low_ppnum + ((1024 * 1024 * 1024) >> PAGE_SHIFT);
1529 }
1530
1531 if (delay_above_pnum < PPNUM_MAX) {
1532 printf("pmap_startup() delaying init/free of page nums > 0x%x\n", delay_above_pnum);
1533 }
1534
1535#endif /* defined(__x86_64__) */
1536
1537 /*
1538 * Initialize and release the page frames.
1539 */
1540 kernel_debug_string_early(message: "page_frame_init");
1541
1542 vm_page_array_beginning_addr = &vm_pages[0];
1543 vm_page_array_ending_addr = &vm_pages[npages]; /* used by ptr packing/unpacking code */
1544#if VM_PAGE_PACKED_FROM_ARRAY
1545 if (npages >= VM_PAGE_PACKED_FROM_ARRAY) {
1546 panic("pmap_startup(): too many pages to support vm_page packing");
1547 }
1548#endif
1549
1550 vm_delayed_count = 0;
1551
1552 absolutetime_to_nanoseconds(abstime: mach_absolute_time(), result: &start_ns);
1553 vm_pages_count = 0;
1554 for (i = 0; i < npages; i++) {
1555 /* Did we run out of pages? */
1556 if (!pmap_next_page(pnum: &phys_page)) {
1557 break;
1558 }
1559
1560 if (phys_page < max_valid_low_ppnum) {
1561 ++low_page_count;
1562 }
1563
1564 /* Are we at high enough pages to delay the rest? */
1565 if (low_page_count > vm_lopage_free_limit && phys_page > delay_above_pnum) {
1566 vm_delayed_count = pmap_free_pages();
1567 break;
1568 }
1569
1570#if defined(__arm64__)
1571 if (i == 0) {
1572 vm_first_phys_ppnum = phys_page;
1573 patch_low_glo_vm_page_info((void *)vm_page_array_beginning_addr,
1574 (void *)vm_page_array_ending_addr, vm_first_phys_ppnum);
1575 }
1576#endif /* defined(__arm64__) */
1577
1578#if defined(__x86_64__)
1579 /* The x86 clump freeing code requires increasing ppn's to work correctly */
1580 if (i > 0) {
1581 assert(phys_page > vm_pages[i - 1].vmp_phys_page);
1582 }
1583#endif
1584 ++vm_pages_count;
1585 vm_page_init(page: &vm_pages[i], phys_page, FALSE);
1586 if (fill) {
1587 fillPage(pa: phys_page, fill: fillval);
1588 }
1589 if (vm_himemory_mode) {
1590 vm_page_release_startup(mem: &vm_pages[i]);
1591 }
1592 }
1593 vm_page_pages = vm_pages_count; /* used to report to user space */
1594
1595 if (!vm_himemory_mode) {
1596 do {
1597 if (!VMP_ERROR_GET(&vm_pages[--i])) { /* skip retired pages */
1598 vm_page_release_startup(mem: &vm_pages[i]);
1599 }
1600 } while (i != 0);
1601 }
1602
1603 absolutetime_to_nanoseconds(abstime: mach_absolute_time(), result: &now_ns);
1604 printf(format: "pmap_startup() init/release time: %lld microsec\n", (now_ns - start_ns) / NSEC_PER_USEC);
1605 printf(format: "pmap_startup() delayed init/release of %d pages\n", vm_delayed_count);
1606
1607#if defined(__LP64__)
1608 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0]))) != &vm_pages[0]) {
1609 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
1610 }
1611
1612 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count - 1]))) != &vm_pages[vm_pages_count - 1]) {
1613 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count - 1]);
1614 }
1615#endif
1616
1617 VM_CHECK_MEMORYSTATUS;
1618
1619 /*
1620 * We have to re-align virtual_space_start,
1621 * because pmap_steal_memory has been using it.
1622 */
1623 virtual_space_start = round_page(x: virtual_space_start);
1624 *startp = virtual_space_start;
1625 *endp = virtual_space_end;
1626}
1627#endif /* MACHINE_PAGES */
1628
1629/*
1630 * Create the zone that represents the vm_pages[] array. Nothing ever allocates
1631 * or frees to this zone. It's just here for reporting purposes via zprint command.
1632 * This needs to be done after all initially delayed pages are put on the free lists.
1633 */
1634static void
1635vm_page_module_init_delayed(void)
1636{
1637 (void)zone_create_ext(name: "vm pages array", size: sizeof(struct vm_page),
1638 flags: ZC_KASAN_NOREDZONE | ZC_KASAN_NOQUARANTINE, desired_zid: ZONE_ID_VM_PAGES, extra_setup: ^(zone_t z) {
1639 uint64_t vm_page_zone_pages, vm_page_array_zone_data_size;
1640
1641 zone_set_exhaustible(zone: z, max_elements: 0, true);
1642 /*
1643 * Reflect size and usage information for vm_pages[].
1644 */
1645
1646 z->z_elems_avail = (uint32_t)(vm_page_array_ending_addr - vm_pages);
1647 z->z_elems_free = z->z_elems_avail - vm_pages_count;
1648 zpercpu_get_cpu(z->z_stats, 0)->zs_mem_allocated =
1649 vm_pages_count * sizeof(struct vm_page);
1650 vm_page_array_zone_data_size = (uint64_t)vm_page_array_ending_addr - (uint64_t)vm_pages;
1651 vm_page_zone_pages = atop(round_page((vm_offset_t)vm_page_array_zone_data_size));
1652 z->z_wired_cur += vm_page_zone_pages;
1653 z->z_wired_hwm = z->z_wired_cur;
1654 z->z_va_cur = z->z_wired_cur;
1655 /* since zone accounts for these, take them out of stolen */
1656 VM_PAGE_MOVE_STOLEN(vm_page_zone_pages);
1657 });
1658}
1659
1660/*
1661 * Create the vm_pages zone. This is used for the vm_page structures for the pages
1662 * that are scavanged from other boot time usages by ml_static_mfree(). As such,
1663 * this needs to happen in early VM bootstrap.
1664 */
1665
1666__startup_func
1667static void
1668vm_page_module_init(void)
1669{
1670 vm_size_t vm_page_with_ppnum_size;
1671
1672 /*
1673 * Since the pointers to elements in this zone will be packed, they
1674 * must have appropriate size. Not strictly what sizeof() reports.
1675 */
1676 vm_page_with_ppnum_size =
1677 (sizeof(struct vm_page_with_ppnum) + (VM_PAGE_PACKED_PTR_ALIGNMENT - 1)) &
1678 ~(VM_PAGE_PACKED_PTR_ALIGNMENT - 1);
1679
1680 vm_page_zone = zone_create_ext(name: "vm pages", size: vm_page_with_ppnum_size,
1681 flags: ZC_ALIGNMENT_REQUIRED | ZC_VM | ZC_NOTBITAG,
1682 ZONE_ID_ANY, extra_setup: ^(zone_t z) {
1683 /*
1684 * The number "10" is a small number that is larger than the number
1685 * of fictitious pages that any single caller will attempt to allocate
1686 * without blocking.
1687 *
1688 * The largest such number at the moment is kmem_alloc()
1689 * when 2 guard pages are asked. 10 is simply a somewhat larger number,
1690 * taking into account the 50% hysteresis the zone allocator uses.
1691 *
1692 * Note: this works at all because the zone allocator
1693 * doesn't ever allocate fictitious pages.
1694 */
1695 zone_raise_reserve(zone_or_view: z, min_elements: 10);
1696 });
1697}
1698STARTUP(ZALLOC, STARTUP_RANK_SECOND, vm_page_module_init);
1699
1700/*
1701 * Routine: vm_page_create
1702 * Purpose:
1703 * After the VM system is up, machine-dependent code
1704 * may stumble across more physical memory. For example,
1705 * memory that it was reserving for a frame buffer.
1706 * vm_page_create turns this memory into available pages.
1707 */
1708
1709void
1710vm_page_create(
1711 ppnum_t start,
1712 ppnum_t end)
1713{
1714 ppnum_t phys_page;
1715 vm_page_t m;
1716
1717 for (phys_page = start;
1718 phys_page < end;
1719 phys_page++) {
1720 m = vm_page_grab_fictitious_common(phys_page, TRUE);
1721 m->vmp_fictitious = FALSE;
1722 pmap_clear_noencrypt(pn: phys_page);
1723
1724
1725 vm_free_page_lock();
1726 vm_page_pages++;
1727 vm_free_page_unlock();
1728 vm_page_release(page: m, FALSE);
1729 }
1730}
1731
1732
1733/*
1734 * vm_page_hash:
1735 *
1736 * Distributes the object/offset key pair among hash buckets.
1737 *
1738 * NOTE: The bucket count must be a power of 2
1739 */
1740#define vm_page_hash(object, offset) (\
1741 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1742 & vm_page_hash_mask)
1743
1744
1745/*
1746 * vm_page_insert: [ internal use only ]
1747 *
1748 * Inserts the given mem entry into the object/object-page
1749 * table and object list.
1750 *
1751 * The object must be locked.
1752 */
1753void
1754vm_page_insert(
1755 vm_page_t mem,
1756 vm_object_t object,
1757 vm_object_offset_t offset)
1758{
1759 vm_page_insert_internal(page: mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, FALSE, FALSE, NULL);
1760}
1761
1762void
1763vm_page_insert_wired(
1764 vm_page_t mem,
1765 vm_object_t object,
1766 vm_object_offset_t offset,
1767 vm_tag_t tag)
1768{
1769 vm_page_insert_internal(page: mem, object, offset, tag, FALSE, TRUE, FALSE, FALSE, NULL);
1770}
1771
1772void
1773vm_page_insert_internal(
1774 vm_page_t mem,
1775 vm_object_t object,
1776 vm_object_offset_t offset,
1777 vm_tag_t tag,
1778 boolean_t queues_lock_held,
1779 boolean_t insert_in_hash,
1780 boolean_t batch_pmap_op,
1781 boolean_t batch_accounting,
1782 uint64_t *delayed_ledger_update)
1783{
1784 vm_page_bucket_t *bucket;
1785 lck_spin_t *bucket_lock;
1786 int hash_id;
1787 task_t owner;
1788 int ledger_idx_volatile;
1789 int ledger_idx_nonvolatile;
1790 int ledger_idx_volatile_compressed;
1791 int ledger_idx_nonvolatile_compressed;
1792 boolean_t do_footprint;
1793
1794#if 0
1795 /*
1796 * we may not hold the page queue lock
1797 * so this check isn't safe to make
1798 */
1799 VM_PAGE_CHECK(mem);
1800#endif
1801
1802 assertf(page_aligned(offset), "0x%llx\n", offset);
1803
1804 assert(!VM_PAGE_WIRED(mem) || mem->vmp_private || mem->vmp_fictitious || (tag != VM_KERN_MEMORY_NONE));
1805
1806 vm_object_lock_assert_exclusive(object);
1807 LCK_MTX_ASSERT(&vm_page_queue_lock,
1808 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1809 : LCK_MTX_ASSERT_NOTOWNED);
1810
1811 if (queues_lock_held == FALSE) {
1812 assert(!VM_PAGE_PAGEABLE(mem));
1813 }
1814
1815 if (insert_in_hash == TRUE) {
1816#if DEBUG || VM_PAGE_BUCKETS_CHECK
1817 if (mem->vmp_tabled || mem->vmp_object) {
1818 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1819 "already in (obj=%p,off=0x%llx)",
1820 mem, object, offset, VM_PAGE_OBJECT(mem), mem->vmp_offset);
1821 }
1822#endif
1823 if (object->internal && (offset >= object->vo_size)) {
1824 panic("vm_page_insert_internal: (page=%p,obj=%p,off=0x%llx,size=0x%llx) inserted at offset past object bounds",
1825 mem, object, offset, object->vo_size);
1826 }
1827
1828 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1829
1830 /*
1831 * Record the object/offset pair in this page
1832 */
1833
1834 mem->vmp_object = VM_PAGE_PACK_OBJECT(object);
1835 mem->vmp_offset = offset;
1836
1837#if CONFIG_SECLUDED_MEMORY
1838 if (object->eligible_for_secluded) {
1839 vm_page_secluded.eligible_for_secluded++;
1840 }
1841#endif /* CONFIG_SECLUDED_MEMORY */
1842
1843 /*
1844 * Insert it into the object_object/offset hash table
1845 */
1846 hash_id = vm_page_hash(object, offset);
1847 bucket = &vm_page_buckets[hash_id];
1848 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1849
1850 lck_spin_lock_grp(lck: bucket_lock, grp: &vm_page_lck_grp_bucket);
1851
1852 mem->vmp_next_m = bucket->page_list;
1853 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1854 assert(mem == (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)));
1855
1856#if MACH_PAGE_HASH_STATS
1857 if (++bucket->cur_count > bucket->hi_count) {
1858 bucket->hi_count = bucket->cur_count;
1859 }
1860#endif /* MACH_PAGE_HASH_STATS */
1861 mem->vmp_hashed = TRUE;
1862 lck_spin_unlock(lck: bucket_lock);
1863 }
1864
1865 {
1866 unsigned int cache_attr;
1867
1868 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1869
1870 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1871 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1872 }
1873 }
1874 /*
1875 * Now link into the object's list of backed pages.
1876 */
1877 vm_page_queue_enter(&object->memq, mem, vmp_listq);
1878 object->memq_hint = mem;
1879 mem->vmp_tabled = TRUE;
1880
1881 /*
1882 * Show that the object has one more resident page.
1883 */
1884
1885 object->resident_page_count++;
1886 if (VM_PAGE_WIRED(mem)) {
1887 assert(mem->vmp_wire_count > 0);
1888 VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
1889 VM_OBJECT_WIRED_PAGE_ADD(object, mem);
1890 VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
1891 }
1892 assert(object->resident_page_count >= object->wired_page_count);
1893
1894#if DEVELOPMENT || DEBUG
1895 if (object->object_is_shared_cache &&
1896 object->pager != NULL &&
1897 object->pager->mo_pager_ops == &shared_region_pager_ops) {
1898 int new, old;
1899 assert(!object->internal);
1900 new = OSAddAtomic(+1, &shared_region_pagers_resident_count);
1901 do {
1902 old = shared_region_pagers_resident_peak;
1903 } while (old < new &&
1904 !OSCompareAndSwap(old, new, &shared_region_pagers_resident_peak));
1905 }
1906#endif /* DEVELOPMENT || DEBUG */
1907
1908 if (batch_accounting == FALSE) {
1909 if (object->internal) {
1910 OSAddAtomic(1, &vm_page_internal_count);
1911 } else {
1912 OSAddAtomic(1, &vm_page_external_count);
1913 }
1914 }
1915
1916 /*
1917 * It wouldn't make sense to insert a "reusable" page in
1918 * an object (the page would have been marked "reusable" only
1919 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1920 * in the object at that time).
1921 * But a page could be inserted in a "all_reusable" object, if
1922 * something faults it in (a vm_read() from another task or a
1923 * "use-after-free" issue in user space, for example). It can
1924 * also happen if we're relocating a page from that object to
1925 * a different physical page during a physically-contiguous
1926 * allocation.
1927 */
1928 assert(!mem->vmp_reusable);
1929 if (object->all_reusable) {
1930 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1931 }
1932
1933 if (object->purgable == VM_PURGABLE_DENY &&
1934 !object->vo_ledger_tag) {
1935 owner = TASK_NULL;
1936 } else {
1937 owner = VM_OBJECT_OWNER(object);
1938 vm_object_ledger_tag_ledgers(object,
1939 ledger_idx_volatile: &ledger_idx_volatile,
1940 ledger_idx_nonvolatile: &ledger_idx_nonvolatile,
1941 ledger_idx_volatile_compressed: &ledger_idx_volatile_compressed,
1942 ledger_idx_nonvolatile_compressed: &ledger_idx_nonvolatile_compressed,
1943 do_footprint: &do_footprint);
1944 }
1945 if (owner &&
1946 (object->purgable == VM_PURGABLE_NONVOLATILE ||
1947 object->purgable == VM_PURGABLE_DENY ||
1948 VM_PAGE_WIRED(mem))) {
1949 if (delayed_ledger_update) {
1950 *delayed_ledger_update += PAGE_SIZE;
1951 } else {
1952 /* more non-volatile bytes */
1953 ledger_credit(ledger: owner->ledger,
1954 entry: ledger_idx_nonvolatile,
1955 PAGE_SIZE);
1956 if (do_footprint) {
1957 /* more footprint */
1958 ledger_credit(ledger: owner->ledger,
1959 entry: task_ledgers.phys_footprint,
1960 PAGE_SIZE);
1961 }
1962 }
1963 } else if (owner &&
1964 (object->purgable == VM_PURGABLE_VOLATILE ||
1965 object->purgable == VM_PURGABLE_EMPTY)) {
1966 assert(!VM_PAGE_WIRED(mem));
1967 /* more volatile bytes */
1968 ledger_credit(ledger: owner->ledger,
1969 entry: ledger_idx_volatile,
1970 PAGE_SIZE);
1971 }
1972
1973 if (object->purgable == VM_PURGABLE_VOLATILE) {
1974 if (VM_PAGE_WIRED(mem)) {
1975 OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1976 } else {
1977 OSAddAtomic(+1, &vm_page_purgeable_count);
1978 }
1979 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1980 mem->vmp_q_state == VM_PAGE_ON_THROTTLED_Q) {
1981 /*
1982 * This page belongs to a purged VM object but hasn't
1983 * been purged (because it was "busy").
1984 * It's in the "throttled" queue and hence not
1985 * visible to vm_pageout_scan(). Move it to a pageable
1986 * queue, so that it can eventually be reclaimed, instead
1987 * of lingering in the "empty" object.
1988 */
1989 if (queues_lock_held == FALSE) {
1990 vm_page_lockspin_queues();
1991 }
1992 vm_page_deactivate(page: mem);
1993 if (queues_lock_held == FALSE) {
1994 vm_page_unlock_queues();
1995 }
1996 }
1997
1998#if VM_OBJECT_TRACKING_OP_MODIFIED
1999 if (vm_object_tracking_btlog &&
2000 object->internal &&
2001 object->resident_page_count == 0 &&
2002 object->pager == NULL &&
2003 object->shadow != NULL &&
2004 object->shadow->vo_copy == object) {
2005 btlog_record(vm_object_tracking_btlog, object,
2006 VM_OBJECT_TRACKING_OP_MODIFIED,
2007 btref_get(__builtin_frame_address(0), 0));
2008 }
2009#endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
2010}
2011
2012/*
2013 * vm_page_replace:
2014 *
2015 * Exactly like vm_page_insert, except that we first
2016 * remove any existing page at the given offset in object.
2017 *
2018 * The object must be locked.
2019 */
2020void
2021vm_page_replace(
2022 vm_page_t mem,
2023 vm_object_t object,
2024 vm_object_offset_t offset)
2025{
2026 vm_page_bucket_t *bucket;
2027 vm_page_t found_m = VM_PAGE_NULL;
2028 lck_spin_t *bucket_lock;
2029 int hash_id;
2030
2031#if 0
2032 /*
2033 * we don't hold the page queue lock
2034 * so this check isn't safe to make
2035 */
2036 VM_PAGE_CHECK(mem);
2037#endif
2038 vm_object_lock_assert_exclusive(object);
2039#if DEBUG || VM_PAGE_BUCKETS_CHECK
2040 if (mem->vmp_tabled || mem->vmp_object) {
2041 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
2042 "already in (obj=%p,off=0x%llx)",
2043 mem, object, offset, VM_PAGE_OBJECT(mem), mem->vmp_offset);
2044 }
2045#endif
2046 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
2047
2048 assert(!VM_PAGE_PAGEABLE(mem));
2049
2050 /*
2051 * Record the object/offset pair in this page
2052 */
2053 mem->vmp_object = VM_PAGE_PACK_OBJECT(object);
2054 mem->vmp_offset = offset;
2055
2056 /*
2057 * Insert it into the object_object/offset hash table,
2058 * replacing any page that might have been there.
2059 */
2060
2061 hash_id = vm_page_hash(object, offset);
2062 bucket = &vm_page_buckets[hash_id];
2063 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
2064
2065 lck_spin_lock_grp(lck: bucket_lock, grp: &vm_page_lck_grp_bucket);
2066
2067 if (bucket->page_list) {
2068 vm_page_packed_t *mp = &bucket->page_list;
2069 vm_page_t m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp));
2070
2071 do {
2072 /*
2073 * compare packed object pointers
2074 */
2075 if (m->vmp_object == mem->vmp_object && m->vmp_offset == offset) {
2076 /*
2077 * Remove old page from hash list
2078 */
2079 *mp = m->vmp_next_m;
2080 m->vmp_hashed = FALSE;
2081 m->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
2082
2083 found_m = m;
2084 break;
2085 }
2086 mp = &m->vmp_next_m;
2087 } while ((m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp))));
2088
2089 mem->vmp_next_m = bucket->page_list;
2090 } else {
2091 mem->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
2092 }
2093 /*
2094 * insert new page at head of hash list
2095 */
2096 bucket->page_list = VM_PAGE_PACK_PTR(mem);
2097 mem->vmp_hashed = TRUE;
2098
2099 lck_spin_unlock(lck: bucket_lock);
2100
2101 if (found_m) {
2102 /*
2103 * there was already a page at the specified
2104 * offset for this object... remove it from
2105 * the object and free it back to the free list
2106 */
2107 vm_page_free_unlocked(page: found_m, FALSE);
2108 }
2109 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, FALSE, FALSE, FALSE, NULL);
2110}
2111
2112/*
2113 * vm_page_remove: [ internal use only ]
2114 *
2115 * Removes the given mem entry from the object/offset-page
2116 * table and the object page list.
2117 *
2118 * The object must be locked.
2119 */
2120
2121void
2122vm_page_remove(
2123 vm_page_t mem,
2124 boolean_t remove_from_hash)
2125{
2126 vm_page_bucket_t *bucket;
2127 vm_page_t this;
2128 lck_spin_t *bucket_lock;
2129 int hash_id;
2130 task_t owner;
2131 vm_object_t m_object;
2132 int ledger_idx_volatile;
2133 int ledger_idx_nonvolatile;
2134 int ledger_idx_volatile_compressed;
2135 int ledger_idx_nonvolatile_compressed;
2136 int do_footprint;
2137
2138 m_object = VM_PAGE_OBJECT(mem);
2139
2140 vm_object_lock_assert_exclusive(m_object);
2141 assert(mem->vmp_tabled);
2142 assert(!mem->vmp_cleaning);
2143 assert(!mem->vmp_laundry);
2144
2145 if (VM_PAGE_PAGEABLE(mem)) {
2146 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2147 }
2148#if 0
2149 /*
2150 * we don't hold the page queue lock
2151 * so this check isn't safe to make
2152 */
2153 VM_PAGE_CHECK(mem);
2154#endif
2155 if (remove_from_hash == TRUE) {
2156 /*
2157 * Remove from the object_object/offset hash table
2158 */
2159 hash_id = vm_page_hash(m_object, mem->vmp_offset);
2160 bucket = &vm_page_buckets[hash_id];
2161 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
2162
2163 lck_spin_lock_grp(lck: bucket_lock, grp: &vm_page_lck_grp_bucket);
2164
2165 if ((this = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list))) == mem) {
2166 /* optimize for common case */
2167
2168 bucket->page_list = mem->vmp_next_m;
2169 } else {
2170 vm_page_packed_t *prev;
2171
2172 for (prev = &this->vmp_next_m;
2173 (this = (vm_page_t)(VM_PAGE_UNPACK_PTR(*prev))) != mem;
2174 prev = &this->vmp_next_m) {
2175 continue;
2176 }
2177 *prev = this->vmp_next_m;
2178 }
2179#if MACH_PAGE_HASH_STATS
2180 bucket->cur_count--;
2181#endif /* MACH_PAGE_HASH_STATS */
2182 mem->vmp_hashed = FALSE;
2183 this->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
2184 lck_spin_unlock(lck: bucket_lock);
2185 }
2186 /*
2187 * Now remove from the object's list of backed pages.
2188 */
2189
2190 vm_page_remove_internal(page: mem);
2191
2192 /*
2193 * And show that the object has one fewer resident
2194 * page.
2195 */
2196
2197 assert(m_object->resident_page_count > 0);
2198 m_object->resident_page_count--;
2199
2200#if DEVELOPMENT || DEBUG
2201 if (m_object->object_is_shared_cache &&
2202 m_object->pager != NULL &&
2203 m_object->pager->mo_pager_ops == &shared_region_pager_ops) {
2204 assert(!m_object->internal);
2205 OSAddAtomic(-1, &shared_region_pagers_resident_count);
2206 }
2207#endif /* DEVELOPMENT || DEBUG */
2208
2209 if (m_object->internal) {
2210#if DEBUG
2211 assert(vm_page_internal_count);
2212#endif /* DEBUG */
2213
2214 OSAddAtomic(-1, &vm_page_internal_count);
2215 } else {
2216 assert(vm_page_external_count);
2217 OSAddAtomic(-1, &vm_page_external_count);
2218
2219 if (mem->vmp_xpmapped) {
2220 assert(vm_page_xpmapped_external_count);
2221 OSAddAtomic(-1, &vm_page_xpmapped_external_count);
2222 }
2223 }
2224 if (!m_object->internal &&
2225 m_object->cached_list.next &&
2226 m_object->cached_list.prev) {
2227 if (m_object->resident_page_count == 0) {
2228 vm_object_cache_remove(m_object);
2229 }
2230 }
2231
2232 if (VM_PAGE_WIRED(mem)) {
2233 assert(mem->vmp_wire_count > 0);
2234 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
2235 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
2236 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
2237 }
2238 assert(m_object->resident_page_count >=
2239 m_object->wired_page_count);
2240 if (mem->vmp_reusable) {
2241 assert(m_object->reusable_page_count > 0);
2242 m_object->reusable_page_count--;
2243 assert(m_object->reusable_page_count <=
2244 m_object->resident_page_count);
2245 mem->vmp_reusable = FALSE;
2246 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
2247 vm_page_stats_reusable.reused_remove++;
2248 } else if (m_object->all_reusable) {
2249 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
2250 vm_page_stats_reusable.reused_remove++;
2251 }
2252
2253 if (m_object->purgable == VM_PURGABLE_DENY &&
2254 !m_object->vo_ledger_tag) {
2255 owner = TASK_NULL;
2256 } else {
2257 owner = VM_OBJECT_OWNER(m_object);
2258 vm_object_ledger_tag_ledgers(object: m_object,
2259 ledger_idx_volatile: &ledger_idx_volatile,
2260 ledger_idx_nonvolatile: &ledger_idx_nonvolatile,
2261 ledger_idx_volatile_compressed: &ledger_idx_volatile_compressed,
2262 ledger_idx_nonvolatile_compressed: &ledger_idx_nonvolatile_compressed,
2263 do_footprint: &do_footprint);
2264 }
2265 if (owner &&
2266 (m_object->purgable == VM_PURGABLE_NONVOLATILE ||
2267 m_object->purgable == VM_PURGABLE_DENY ||
2268 VM_PAGE_WIRED(mem))) {
2269 /* less non-volatile bytes */
2270 ledger_debit(ledger: owner->ledger,
2271 entry: ledger_idx_nonvolatile,
2272 PAGE_SIZE);
2273 if (do_footprint) {
2274 /* less footprint */
2275 ledger_debit(ledger: owner->ledger,
2276 entry: task_ledgers.phys_footprint,
2277 PAGE_SIZE);
2278 }
2279 } else if (owner &&
2280 (m_object->purgable == VM_PURGABLE_VOLATILE ||
2281 m_object->purgable == VM_PURGABLE_EMPTY)) {
2282 assert(!VM_PAGE_WIRED(mem));
2283 /* less volatile bytes */
2284 ledger_debit(ledger: owner->ledger,
2285 entry: ledger_idx_volatile,
2286 PAGE_SIZE);
2287 }
2288 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
2289 if (VM_PAGE_WIRED(mem)) {
2290 assert(vm_page_purgeable_wired_count > 0);
2291 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2292 } else {
2293 assert(vm_page_purgeable_count > 0);
2294 OSAddAtomic(-1, &vm_page_purgeable_count);
2295 }
2296 }
2297
2298 if (m_object->set_cache_attr == TRUE) {
2299 pmap_set_cache_attributes(VM_PAGE_GET_PHYS_PAGE(m: mem), 0);
2300 }
2301
2302 mem->vmp_tabled = FALSE;
2303 mem->vmp_object = 0;
2304 mem->vmp_offset = (vm_object_offset_t) -1;
2305}
2306
2307
2308/*
2309 * vm_page_lookup:
2310 *
2311 * Returns the page associated with the object/offset
2312 * pair specified; if none is found, VM_PAGE_NULL is returned.
2313 *
2314 * The object must be locked. No side effects.
2315 */
2316
2317#define VM_PAGE_HASH_LOOKUP_THRESHOLD 10
2318
2319#if DEBUG_VM_PAGE_LOOKUP
2320
2321struct {
2322 uint64_t vpl_total;
2323 uint64_t vpl_empty_obj;
2324 uint64_t vpl_bucket_NULL;
2325 uint64_t vpl_hit_hint;
2326 uint64_t vpl_hit_hint_next;
2327 uint64_t vpl_hit_hint_prev;
2328 uint64_t vpl_fast;
2329 uint64_t vpl_slow;
2330 uint64_t vpl_hit;
2331 uint64_t vpl_miss;
2332
2333 uint64_t vpl_fast_elapsed;
2334 uint64_t vpl_slow_elapsed;
2335} vm_page_lookup_stats __attribute__((aligned(8)));
2336
2337#endif
2338
2339#define KDP_VM_PAGE_WALK_MAX 1000
2340
2341vm_page_t
2342kdp_vm_page_lookup(
2343 vm_object_t object,
2344 vm_object_offset_t offset)
2345{
2346 vm_page_t cur_page;
2347 int num_traversed = 0;
2348
2349 if (not_in_kdp) {
2350 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
2351 }
2352
2353 vm_page_queue_iterate(&object->memq, cur_page, vmp_listq) {
2354 if (cur_page->vmp_offset == offset) {
2355 return cur_page;
2356 }
2357 num_traversed++;
2358
2359 if (num_traversed >= KDP_VM_PAGE_WALK_MAX) {
2360 return VM_PAGE_NULL;
2361 }
2362 }
2363
2364 return VM_PAGE_NULL;
2365}
2366
2367vm_page_t
2368vm_page_lookup(
2369 vm_object_t object,
2370 vm_object_offset_t offset)
2371{
2372 vm_page_t mem;
2373 vm_page_bucket_t *bucket;
2374 vm_page_queue_entry_t qe;
2375 lck_spin_t *bucket_lock = NULL;
2376 int hash_id;
2377#if DEBUG_VM_PAGE_LOOKUP
2378 uint64_t start, elapsed;
2379
2380 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_total);
2381#endif
2382
2383 if (VM_KERNEL_ADDRESS(offset)) {
2384 offset = VM_KERNEL_STRIP_UPTR(offset);
2385 }
2386
2387 vm_object_lock_assert_held(object);
2388 assertf(page_aligned(offset), "offset 0x%llx\n", offset);
2389
2390 if (object->resident_page_count == 0) {
2391#if DEBUG_VM_PAGE_LOOKUP
2392 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_empty_obj);
2393#endif
2394 return VM_PAGE_NULL;
2395 }
2396
2397 mem = object->memq_hint;
2398
2399 if (mem != VM_PAGE_NULL) {
2400 assert(VM_PAGE_OBJECT(mem) == object);
2401
2402 if (mem->vmp_offset == offset) {
2403#if DEBUG_VM_PAGE_LOOKUP
2404 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint);
2405#endif
2406 return mem;
2407 }
2408 qe = (vm_page_queue_entry_t)vm_page_queue_next(&mem->vmp_listq);
2409
2410 if (!vm_page_queue_end(&object->memq, qe)) {
2411 vm_page_t next_page;
2412
2413 next_page = (vm_page_t)((uintptr_t)qe);
2414 assert(VM_PAGE_OBJECT(next_page) == object);
2415
2416 if (next_page->vmp_offset == offset) {
2417 object->memq_hint = next_page; /* new hint */
2418#if DEBUG_VM_PAGE_LOOKUP
2419 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_next);
2420#endif
2421 return next_page;
2422 }
2423 }
2424 qe = (vm_page_queue_entry_t)vm_page_queue_prev(&mem->vmp_listq);
2425
2426 if (!vm_page_queue_end(&object->memq, qe)) {
2427 vm_page_t prev_page;
2428
2429 prev_page = (vm_page_t)((uintptr_t)qe);
2430 assert(VM_PAGE_OBJECT(prev_page) == object);
2431
2432 if (prev_page->vmp_offset == offset) {
2433 object->memq_hint = prev_page; /* new hint */
2434#if DEBUG_VM_PAGE_LOOKUP
2435 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_prev);
2436#endif
2437 return prev_page;
2438 }
2439 }
2440 }
2441 /*
2442 * Search the hash table for this object/offset pair
2443 */
2444 hash_id = vm_page_hash(object, offset);
2445 bucket = &vm_page_buckets[hash_id];
2446
2447 /*
2448 * since we hold the object lock, we are guaranteed that no
2449 * new pages can be inserted into this object... this in turn
2450 * guarantess that the page we're looking for can't exist
2451 * if the bucket it hashes to is currently NULL even when looked
2452 * at outside the scope of the hash bucket lock... this is a
2453 * really cheap optimiztion to avoid taking the lock
2454 */
2455 if (!bucket->page_list) {
2456#if DEBUG_VM_PAGE_LOOKUP
2457 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_bucket_NULL);
2458#endif
2459 return VM_PAGE_NULL;
2460 }
2461
2462#if DEBUG_VM_PAGE_LOOKUP
2463 start = mach_absolute_time();
2464#endif
2465 if (object->resident_page_count <= VM_PAGE_HASH_LOOKUP_THRESHOLD) {
2466 /*
2467 * on average, it's roughly 3 times faster to run a short memq list
2468 * than to take the spin lock and go through the hash list
2469 */
2470 mem = (vm_page_t)vm_page_queue_first(&object->memq);
2471
2472 while (!vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem)) {
2473 if (mem->vmp_offset == offset) {
2474 break;
2475 }
2476
2477 mem = (vm_page_t)vm_page_queue_next(&mem->vmp_listq);
2478 }
2479 if (vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem)) {
2480 mem = NULL;
2481 }
2482 } else {
2483 vm_page_object_t packed_object;
2484
2485 packed_object = VM_PAGE_PACK_OBJECT(object);
2486
2487 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
2488
2489 lck_spin_lock_grp(lck: bucket_lock, grp: &vm_page_lck_grp_bucket);
2490
2491 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
2492 mem != VM_PAGE_NULL;
2493 mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m))) {
2494#if 0
2495 /*
2496 * we don't hold the page queue lock
2497 * so this check isn't safe to make
2498 */
2499 VM_PAGE_CHECK(mem);
2500#endif
2501 if ((mem->vmp_object == packed_object) && (mem->vmp_offset == offset)) {
2502 break;
2503 }
2504 }
2505 lck_spin_unlock(lck: bucket_lock);
2506 }
2507
2508#if DEBUG_VM_PAGE_LOOKUP
2509 elapsed = mach_absolute_time() - start;
2510
2511 if (bucket_lock) {
2512 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_slow);
2513 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_slow_elapsed);
2514 } else {
2515 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_fast);
2516 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_fast_elapsed);
2517 }
2518 if (mem != VM_PAGE_NULL) {
2519 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit);
2520 } else {
2521 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_miss);
2522 }
2523#endif
2524 if (mem != VM_PAGE_NULL) {
2525 assert(VM_PAGE_OBJECT(mem) == object);
2526
2527 object->memq_hint = mem;
2528 }
2529 return mem;
2530}
2531
2532
2533/*
2534 * vm_page_rename:
2535 *
2536 * Move the given memory entry from its
2537 * current object to the specified target object/offset.
2538 *
2539 * The object must be locked.
2540 */
2541void
2542vm_page_rename(
2543 vm_page_t mem,
2544 vm_object_t new_object,
2545 vm_object_offset_t new_offset)
2546{
2547 boolean_t internal_to_external, external_to_internal;
2548 vm_tag_t tag;
2549 vm_object_t m_object;
2550
2551 m_object = VM_PAGE_OBJECT(mem);
2552
2553 assert(m_object != new_object);
2554 assert(m_object);
2555
2556 /*
2557 * Changes to mem->vmp_object require the page lock because
2558 * the pageout daemon uses that lock to get the object.
2559 */
2560 vm_page_lockspin_queues();
2561
2562 internal_to_external = FALSE;
2563 external_to_internal = FALSE;
2564
2565 if (mem->vmp_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q) {
2566 /*
2567 * it's much easier to get the vm_page_pageable_xxx accounting correct
2568 * if we first move the page to the active queue... it's going to end
2569 * up there anyway, and we don't do vm_page_rename's frequently enough
2570 * for this to matter.
2571 */
2572 vm_page_queues_remove(mem, FALSE);
2573 vm_page_activate(page: mem);
2574 }
2575 if (VM_PAGE_PAGEABLE(mem)) {
2576 if (m_object->internal && !new_object->internal) {
2577 internal_to_external = TRUE;
2578 }
2579 if (!m_object->internal && new_object->internal) {
2580 external_to_internal = TRUE;
2581 }
2582 }
2583
2584 tag = m_object->wire_tag;
2585 vm_page_remove(mem, TRUE);
2586 vm_page_insert_internal(mem, object: new_object, offset: new_offset, tag, TRUE, TRUE, FALSE, FALSE, NULL);
2587
2588 if (internal_to_external) {
2589 vm_page_pageable_internal_count--;
2590 vm_page_pageable_external_count++;
2591 } else if (external_to_internal) {
2592 vm_page_pageable_external_count--;
2593 vm_page_pageable_internal_count++;
2594 }
2595
2596 vm_page_unlock_queues();
2597}
2598
2599/*
2600 * vm_page_init:
2601 *
2602 * Initialize the fields in a new page.
2603 * This takes a structure with random values and initializes it
2604 * so that it can be given to vm_page_release or vm_page_insert.
2605 */
2606void
2607vm_page_init(
2608 vm_page_t mem,
2609 ppnum_t phys_page,
2610 boolean_t lopage)
2611{
2612 uint_t i;
2613 uintptr_t *p;
2614
2615 assert(phys_page);
2616
2617#if DEBUG
2618 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
2619 if (!(pmap_valid_page(phys_page))) {
2620 panic("vm_page_init: non-DRAM phys_page 0x%x", phys_page);
2621 }
2622 }
2623#endif /* DEBUG */
2624
2625 /*
2626 * Initialize the fields of the vm_page. If adding any new fields to vm_page,
2627 * try to use initial values which match 0. This minimizes the number of writes
2628 * needed for boot-time initialization.
2629 *
2630 * Kernel bzero() isn't an inline yet, so do it by hand for performance.
2631 */
2632 assert(VM_PAGE_NOT_ON_Q == 0);
2633 assert(sizeof(*mem) % sizeof(uintptr_t) == 0);
2634 for (p = (uintptr_t *)(void *)mem, i = sizeof(*mem) / sizeof(uintptr_t); i != 0; --i) {
2635 *p++ = 0;
2636 }
2637 mem->vmp_offset = (vm_object_offset_t)-1;
2638 mem->vmp_busy = TRUE;
2639 mem->vmp_lopage = lopage;
2640
2641 VM_PAGE_SET_PHYS_PAGE(mem, phys_page);
2642#if 0
2643 /*
2644 * we're leaving this turned off for now... currently pages
2645 * come off the free list and are either immediately dirtied/referenced
2646 * due to zero-fill or COW faults, or are used to read or write files...
2647 * in the file I/O case, the UPL mechanism takes care of clearing
2648 * the state of the HW ref/mod bits in a somewhat fragile way.
2649 * Since we may change the way this works in the future (to toughen it up),
2650 * I'm leaving this as a reminder of where these bits could get cleared
2651 */
2652
2653 /*
2654 * make sure both the h/w referenced and modified bits are
2655 * clear at this point... we are especially dependent on
2656 * not finding a 'stale' h/w modified in a number of spots
2657 * once this page goes back into use
2658 */
2659 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
2660#endif
2661}
2662
2663/*
2664 * vm_page_grab_fictitious:
2665 *
2666 * Remove a fictitious page from the free list.
2667 * Returns VM_PAGE_NULL if there are no free pages.
2668 */
2669
2670static vm_page_t
2671vm_page_grab_fictitious_common(ppnum_t phys_addr, boolean_t canwait)
2672{
2673 vm_page_t m;
2674
2675 m = zalloc_flags(vm_page_zone, canwait ? Z_WAITOK : Z_NOWAIT);
2676 if (m) {
2677 vm_page_init(mem: m, phys_page: phys_addr, FALSE);
2678 m->vmp_fictitious = TRUE;
2679 }
2680 return m;
2681}
2682
2683vm_page_t
2684vm_page_grab_fictitious(boolean_t canwait)
2685{
2686 return vm_page_grab_fictitious_common(phys_addr: vm_page_fictitious_addr, canwait);
2687}
2688
2689int vm_guard_count;
2690
2691
2692vm_page_t
2693vm_page_grab_guard(boolean_t canwait)
2694{
2695 vm_page_t page;
2696 page = vm_page_grab_fictitious_common(phys_addr: vm_page_guard_addr, canwait);
2697 if (page) {
2698 OSAddAtomic(1, &vm_guard_count);
2699 }
2700 return page;
2701}
2702
2703
2704/*
2705 * vm_page_release_fictitious:
2706 *
2707 * Release a fictitious page to the zone pool
2708 */
2709void
2710vm_page_release_fictitious(
2711 vm_page_t m)
2712{
2713 assert((m->vmp_q_state == VM_PAGE_NOT_ON_Q) || (m->vmp_q_state == VM_PAGE_IS_WIRED));
2714 assert(m->vmp_fictitious);
2715 assert(VM_PAGE_GET_PHYS_PAGE(m) == vm_page_fictitious_addr ||
2716 VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr);
2717 assert(!m->vmp_realtime);
2718
2719 if (VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr) {
2720 OSAddAtomic(-1, &vm_guard_count);
2721 }
2722
2723 zfree(vm_page_zone, m);
2724}
2725
2726/*
2727 * vm_pool_low():
2728 *
2729 * Return true if it is not likely that a non-vm_privileged thread
2730 * can get memory without blocking. Advisory only, since the
2731 * situation may change under us.
2732 */
2733bool
2734vm_pool_low(void)
2735{
2736 /* No locking, at worst we will fib. */
2737 return vm_page_free_count <= vm_page_free_reserved;
2738}
2739
2740boolean_t vm_darkwake_mode = FALSE;
2741
2742/*
2743 * vm_update_darkwake_mode():
2744 *
2745 * Tells the VM that the system is in / out of darkwake.
2746 *
2747 * Today, the VM only lowers/raises the background queue target
2748 * so as to favor consuming more/less background pages when
2749 * darwake is ON/OFF.
2750 *
2751 * We might need to do more things in the future.
2752 */
2753
2754void
2755vm_update_darkwake_mode(boolean_t darkwake_mode)
2756{
2757#if XNU_TARGET_OS_OSX && defined(__arm64__)
2758#pragma unused(darkwake_mode)
2759 assert(vm_darkwake_mode == FALSE);
2760 /*
2761 * Darkwake mode isn't supported for AS macOS.
2762 */
2763 return;
2764#else /* XNU_TARGET_OS_OSX && __arm64__ */
2765 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
2766
2767 vm_page_lockspin_queues();
2768
2769 if (vm_darkwake_mode == darkwake_mode) {
2770 /*
2771 * No change.
2772 */
2773 vm_page_unlock_queues();
2774 return;
2775 }
2776
2777 vm_darkwake_mode = darkwake_mode;
2778
2779 if (vm_darkwake_mode == TRUE) {
2780 /* save background target to restore later */
2781 vm_page_background_target_snapshot = vm_page_background_target;
2782
2783 /* target is set to 0...no protection for background pages */
2784 vm_page_background_target = 0;
2785 } else if (vm_darkwake_mode == FALSE) {
2786 if (vm_page_background_target_snapshot) {
2787 vm_page_background_target = vm_page_background_target_snapshot;
2788 }
2789 }
2790 vm_page_unlock_queues();
2791#endif
2792}
2793
2794void
2795vm_page_update_special_state(vm_page_t mem)
2796{
2797 if (mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR || mem->vmp_on_specialq == VM_PAGE_SPECIAL_Q_EMPTY) {
2798 return;
2799 }
2800
2801 int mode = mem->vmp_on_specialq;
2802
2803 switch (mode) {
2804 case VM_PAGE_SPECIAL_Q_BG:
2805 {
2806 task_t my_task = current_task_early();
2807
2808 if (vm_page_background_mode == VM_PAGE_BG_DISABLED) {
2809 return;
2810 }
2811
2812 if (my_task) {
2813 if (task_get_darkwake_mode(my_task)) {
2814 return;
2815 }
2816 }
2817
2818 if (my_task) {
2819 if (proc_get_effective_task_policy(task: my_task, TASK_POLICY_DARWIN_BG)) {
2820 return;
2821 }
2822 }
2823 vm_page_lockspin_queues();
2824
2825 vm_page_background_promoted_count++;
2826
2827 vm_page_remove_from_specialq(mem);
2828 mem->vmp_on_specialq = VM_PAGE_SPECIAL_Q_EMPTY;
2829
2830 vm_page_unlock_queues();
2831 break;
2832 }
2833
2834 case VM_PAGE_SPECIAL_Q_DONATE:
2835 {
2836 task_t my_task = current_task_early();
2837
2838 if (vm_page_donate_mode == VM_PAGE_DONATE_DISABLED) {
2839 return;
2840 }
2841
2842 if (my_task->donates_own_pages == false) {
2843 vm_page_lockspin_queues();
2844
2845 vm_page_remove_from_specialq(mem);
2846 mem->vmp_on_specialq = VM_PAGE_SPECIAL_Q_EMPTY;
2847
2848 vm_page_unlock_queues();
2849 }
2850 break;
2851 }
2852
2853 default:
2854 {
2855 assert(VM_PAGE_UNPACK_PTR(mem->vmp_specialq.next) == (uintptr_t)NULL &&
2856 VM_PAGE_UNPACK_PTR(mem->vmp_specialq.prev) == (uintptr_t)NULL);
2857 break;
2858 }
2859 }
2860}
2861
2862
2863void
2864vm_page_assign_special_state(vm_page_t mem, int mode)
2865{
2866 if (mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
2867 return;
2868 }
2869
2870 switch (mode) {
2871 case VM_PAGE_SPECIAL_Q_BG:
2872 {
2873 if (vm_page_background_mode == VM_PAGE_BG_DISABLED) {
2874 return;
2875 }
2876
2877 task_t my_task = current_task_early();
2878
2879 if (my_task) {
2880 if (task_get_darkwake_mode(my_task)) {
2881 mem->vmp_on_specialq = VM_PAGE_SPECIAL_Q_BG;
2882 return;
2883 }
2884 }
2885
2886 if (my_task) {
2887 mem->vmp_on_specialq = (proc_get_effective_task_policy(task: my_task, TASK_POLICY_DARWIN_BG) ? VM_PAGE_SPECIAL_Q_BG : VM_PAGE_SPECIAL_Q_EMPTY);
2888 }
2889 break;
2890 }
2891
2892 case VM_PAGE_SPECIAL_Q_DONATE:
2893 {
2894 if (vm_page_donate_mode == VM_PAGE_DONATE_DISABLED) {
2895 return;
2896 }
2897 mem->vmp_on_specialq = VM_PAGE_SPECIAL_Q_DONATE;
2898 break;
2899 }
2900
2901 default:
2902 break;
2903 }
2904}
2905
2906
2907void
2908vm_page_remove_from_specialq(
2909 vm_page_t mem)
2910{
2911 vm_object_t m_object;
2912 unsigned short mode;
2913
2914 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2915
2916 mode = mem->vmp_on_specialq;
2917
2918 switch (mode) {
2919 case VM_PAGE_SPECIAL_Q_BG:
2920 {
2921 if (mem->vmp_specialq.next && mem->vmp_specialq.prev) {
2922 vm_page_queue_remove(&vm_page_queue_background, mem, vmp_specialq);
2923
2924 mem->vmp_specialq.next = 0;
2925 mem->vmp_specialq.prev = 0;
2926
2927 vm_page_background_count--;
2928
2929 m_object = VM_PAGE_OBJECT(mem);
2930
2931 if (m_object->internal) {
2932 vm_page_background_internal_count--;
2933 } else {
2934 vm_page_background_external_count--;
2935 }
2936 }
2937 break;
2938 }
2939
2940 case VM_PAGE_SPECIAL_Q_DONATE:
2941 {
2942 if (mem->vmp_specialq.next && mem->vmp_specialq.prev) {
2943 vm_page_queue_remove((vm_page_queue_head_t*)&vm_page_queue_donate, mem, vmp_specialq);
2944 mem->vmp_specialq.next = 0;
2945 mem->vmp_specialq.prev = 0;
2946 vm_page_donate_count--;
2947 if (vm_page_donate_queue_ripe && (vm_page_donate_count < vm_page_donate_target)) {
2948 assert(vm_page_donate_target == vm_page_donate_target_low);
2949 vm_page_donate_target = vm_page_donate_target_high;
2950 vm_page_donate_queue_ripe = false;
2951 }
2952 }
2953
2954 break;
2955 }
2956
2957 default:
2958 {
2959 assert(VM_PAGE_UNPACK_PTR(mem->vmp_specialq.next) == (uintptr_t)NULL &&
2960 VM_PAGE_UNPACK_PTR(mem->vmp_specialq.prev) == (uintptr_t)NULL);
2961 break;
2962 }
2963 }
2964}
2965
2966
2967void
2968vm_page_add_to_specialq(
2969 vm_page_t mem,
2970 boolean_t first)
2971{
2972 vm_object_t m_object;
2973
2974 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2975
2976 if (mem->vmp_specialq.next && mem->vmp_specialq.prev) {
2977 return;
2978 }
2979
2980 int mode = mem->vmp_on_specialq;
2981
2982 switch (mode) {
2983 case VM_PAGE_SPECIAL_Q_BG:
2984 {
2985 if (vm_page_background_mode == VM_PAGE_BG_DISABLED) {
2986 return;
2987 }
2988
2989 m_object = VM_PAGE_OBJECT(mem);
2990
2991 if (vm_page_background_exclude_external && !m_object->internal) {
2992 return;
2993 }
2994
2995 if (first == TRUE) {
2996 vm_page_queue_enter_first(&vm_page_queue_background, mem, vmp_specialq);
2997 } else {
2998 vm_page_queue_enter(&vm_page_queue_background, mem, vmp_specialq);
2999 }
3000 mem->vmp_on_specialq = VM_PAGE_SPECIAL_Q_BG;
3001
3002 vm_page_background_count++;
3003
3004 if (m_object->internal) {
3005 vm_page_background_internal_count++;
3006 } else {
3007 vm_page_background_external_count++;
3008 }
3009 break;
3010 }
3011
3012 case VM_PAGE_SPECIAL_Q_DONATE:
3013 {
3014 if (first == TRUE) {
3015 vm_page_queue_enter_first((vm_page_queue_head_t*)&vm_page_queue_donate, mem, vmp_specialq);
3016 } else {
3017 vm_page_queue_enter((vm_page_queue_head_t*)&vm_page_queue_donate, mem, vmp_specialq);
3018 }
3019 vm_page_donate_count++;
3020 if (!vm_page_donate_queue_ripe && (vm_page_donate_count > vm_page_donate_target)) {
3021 assert(vm_page_donate_target == vm_page_donate_target_high);
3022 vm_page_donate_target = vm_page_donate_target_low;
3023 vm_page_donate_queue_ripe = true;
3024 }
3025 mem->vmp_on_specialq = VM_PAGE_SPECIAL_Q_DONATE;
3026 break;
3027 }
3028
3029 default:
3030 break;
3031 }
3032}
3033
3034/*
3035 * This can be switched to FALSE to help debug drivers
3036 * that are having problems with memory > 4G.
3037 */
3038boolean_t vm_himemory_mode = TRUE;
3039
3040/*
3041 * this interface exists to support hardware controllers
3042 * incapable of generating DMAs with more than 32 bits
3043 * of address on platforms with physical memory > 4G...
3044 */
3045unsigned int vm_lopages_allocated_q = 0;
3046unsigned int vm_lopages_allocated_cpm_success = 0;
3047unsigned int vm_lopages_allocated_cpm_failed = 0;
3048vm_page_queue_head_t vm_lopage_queue_free VM_PAGE_PACKED_ALIGNED;
3049
3050vm_page_t
3051vm_page_grablo(void)
3052{
3053 vm_page_t mem;
3054
3055 if (vm_lopage_needed == FALSE) {
3056 return vm_page_grab();
3057 }
3058
3059 vm_free_page_lock_spin();
3060
3061 if (!vm_page_queue_empty(&vm_lopage_queue_free)) {
3062 vm_page_queue_remove_first(&vm_lopage_queue_free, mem, vmp_pageq);
3063 assert(vm_lopage_free_count);
3064 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
3065 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
3066
3067 vm_lopage_free_count--;
3068 vm_lopages_allocated_q++;
3069
3070 if (vm_lopage_free_count < vm_lopage_lowater) {
3071 vm_lopage_refill = TRUE;
3072 }
3073
3074 vm_free_page_unlock();
3075
3076 if (current_task()->donates_own_pages) {
3077 vm_page_assign_special_state(mem, VM_PAGE_SPECIAL_Q_DONATE);
3078 } else {
3079 vm_page_assign_special_state(mem, VM_PAGE_SPECIAL_Q_BG);
3080 }
3081 } else {
3082 vm_free_page_unlock();
3083
3084 if (cpm_allocate(PAGE_SIZE, list: &mem, atop(PPNUM_MAX), pnum_mask: 0, FALSE, flags: KMA_LOMEM) != KERN_SUCCESS) {
3085 vm_free_page_lock_spin();
3086 vm_lopages_allocated_cpm_failed++;
3087 vm_free_page_unlock();
3088
3089 return VM_PAGE_NULL;
3090 }
3091 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3092
3093 mem->vmp_busy = TRUE;
3094
3095 vm_page_lockspin_queues();
3096
3097 mem->vmp_gobbled = FALSE;
3098 vm_page_gobble_count--;
3099 vm_page_wire_count--;
3100
3101 vm_lopages_allocated_cpm_success++;
3102 vm_page_unlock_queues();
3103 }
3104 assert(mem->vmp_busy);
3105 assert(!mem->vmp_pmapped);
3106 assert(!mem->vmp_wpmapped);
3107 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
3108
3109 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
3110
3111 counter_inc(&vm_page_grab_count);
3112 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, 0, 1, 0, 0);
3113
3114 return mem;
3115}
3116
3117/*
3118 * vm_page_grab:
3119 *
3120 * first try to grab a page from the per-cpu free list...
3121 * this must be done while pre-emption is disabled... if
3122 * a page is available, we're done...
3123 * if no page is available, grab the vm_page_queue_free_lock
3124 * and see if current number of free pages would allow us
3125 * to grab at least 1... if not, return VM_PAGE_NULL as before...
3126 * if there are pages available, disable preemption and
3127 * recheck the state of the per-cpu free list... we could
3128 * have been preempted and moved to a different cpu, or
3129 * some other thread could have re-filled it... if still
3130 * empty, figure out how many pages we can steal from the
3131 * global free queue and move to the per-cpu queue...
3132 * return 1 of these pages when done... only wakeup the
3133 * pageout_scan thread if we moved pages from the global
3134 * list... no need for the wakeup if we've satisfied the
3135 * request from the per-cpu queue.
3136 */
3137
3138#if CONFIG_SECLUDED_MEMORY
3139vm_page_t vm_page_grab_secluded(void);
3140#endif /* CONFIG_SECLUDED_MEMORY */
3141
3142static inline void
3143vm_page_grab_diags(void);
3144
3145/*
3146 * vm_page_validate_no_references:
3147 *
3148 * Make sure the physical page has no refcounts.
3149 *
3150 */
3151static inline void
3152vm_page_validate_no_references(
3153 vm_page_t mem)
3154{
3155 bool is_freed;
3156
3157 if (mem->vmp_fictitious) {
3158 return;
3159 }
3160
3161 pmap_paddr_t paddr = ptoa(VM_PAGE_GET_PHYS_PAGE(mem));
3162
3163#if CONFIG_SPTM
3164 is_freed = pmap_is_page_free(paddr);
3165#else
3166 is_freed = pmap_verify_free(pn: VM_PAGE_GET_PHYS_PAGE(m: mem));
3167#endif /* CONFIG_SPTM */
3168
3169 if (!is_freed) {
3170 /*
3171 * There is a redundancy here, but we are going to panic anyways,
3172 * and ASSERT_PMAP_FREE traces useful information. So, we keep this
3173 * behavior.
3174 */
3175 ASSERT_PMAP_FREE(mem);
3176 panic("%s: page 0x%llx is referenced", __func__, paddr);
3177 }
3178}
3179
3180vm_page_t
3181vm_page_grab(void)
3182{
3183 return vm_page_grab_options(VM_PAGE_GRAB_OPTIONS_NONE);
3184}
3185
3186#if HIBERNATION
3187boolean_t hibernate_rebuild_needed = FALSE;
3188#endif /* HIBERNATION */
3189
3190vm_page_t
3191vm_page_grab_options(
3192 int grab_options)
3193{
3194 vm_page_t mem;
3195
3196restart:
3197 disable_preemption();
3198
3199 if ((mem = *PERCPU_GET(free_pages))) {
3200 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
3201
3202#if HIBERNATION
3203 if (hibernate_rebuild_needed) {
3204 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
3205 }
3206#endif /* HIBERNATION */
3207
3208 vm_page_grab_diags();
3209
3210 vm_offset_t pcpu_base = current_percpu_base();
3211 counter_inc_preemption_disabled(&vm_page_grab_count);
3212 *PERCPU_GET_WITH_BASE(pcpu_base, free_pages) = mem->vmp_snext;
3213 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
3214
3215 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
3216 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
3217 enable_preemption();
3218
3219 assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
3220 assert(mem->vmp_tabled == FALSE);
3221 assert(mem->vmp_object == 0);
3222 assert(!mem->vmp_laundry);
3223 assert(mem->vmp_busy);
3224 assert(!mem->vmp_pmapped);
3225 assert(!mem->vmp_wpmapped);
3226 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
3227 assert(!mem->vmp_realtime);
3228
3229 vm_page_validate_no_references(mem);
3230
3231 task_t cur_task = current_task_early();
3232 if (cur_task && cur_task != kernel_task) {
3233 if (cur_task->donates_own_pages) {
3234 vm_page_assign_special_state(mem, VM_PAGE_SPECIAL_Q_DONATE);
3235 } else {
3236 vm_page_assign_special_state(mem, VM_PAGE_SPECIAL_Q_BG);
3237 }
3238 }
3239 return mem;
3240 }
3241 enable_preemption();
3242
3243
3244 /*
3245 * Optionally produce warnings if the wire or gobble
3246 * counts exceed some threshold.
3247 */
3248#if VM_PAGE_WIRE_COUNT_WARNING
3249 if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
3250 printf("mk: vm_page_grab(): high wired page count of %d\n",
3251 vm_page_wire_count);
3252 }
3253#endif
3254#if VM_PAGE_GOBBLE_COUNT_WARNING
3255 if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
3256 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
3257 vm_page_gobble_count);
3258 }
3259#endif
3260
3261 /*
3262 * If free count is low and we have delayed pages from early boot,
3263 * get one of those instead.
3264 */
3265 if (__improbable(vm_delayed_count > 0 &&
3266 vm_page_free_count <= vm_page_free_target &&
3267 (mem = vm_get_delayed_page(grab_options)) != NULL)) {
3268 assert(!mem->vmp_realtime);
3269 return mem;
3270 }
3271
3272 vm_free_page_lock_spin();
3273
3274 /*
3275 * Only let privileged threads (involved in pageout)
3276 * dip into the reserved pool.
3277 */
3278 if ((vm_page_free_count < vm_page_free_reserved) &&
3279 !(current_thread()->options & TH_OPT_VMPRIV)) {
3280 /* no page for us in the free queue... */
3281 vm_free_page_unlock();
3282 mem = VM_PAGE_NULL;
3283
3284#if CONFIG_SECLUDED_MEMORY
3285 /* ... but can we try and grab from the secluded queue? */
3286 if (vm_page_secluded_count > 0 &&
3287 ((grab_options & VM_PAGE_GRAB_SECLUDED) ||
3288 task_can_use_secluded_mem(current_task(), TRUE))) {
3289 mem = vm_page_grab_secluded();
3290 if (grab_options & VM_PAGE_GRAB_SECLUDED) {
3291 vm_page_secluded.grab_for_iokit++;
3292 if (mem) {
3293 vm_page_secluded.grab_for_iokit_success++;
3294 }
3295 }
3296 if (mem) {
3297 VM_CHECK_MEMORYSTATUS;
3298
3299 vm_page_grab_diags();
3300 counter_inc(&vm_page_grab_count);
3301 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
3302
3303 assert(!mem->vmp_realtime);
3304 return mem;
3305 }
3306 }
3307#else /* CONFIG_SECLUDED_MEMORY */
3308 (void) grab_options;
3309#endif /* CONFIG_SECLUDED_MEMORY */
3310 } else {
3311 vm_page_t head;
3312 vm_page_t tail;
3313 unsigned int pages_to_steal;
3314 unsigned int color;
3315 unsigned int clump_end, sub_count;
3316
3317 while (vm_page_free_count == 0) {
3318 vm_free_page_unlock();
3319 /*
3320 * must be a privileged thread to be
3321 * in this state since a non-privileged
3322 * thread would have bailed if we were
3323 * under the vm_page_free_reserved mark
3324 */
3325 VM_PAGE_WAIT();
3326 vm_free_page_lock_spin();
3327 }
3328
3329 /*
3330 * Need to repopulate the per-CPU free list from the global free list.
3331 * Note we don't do any processing of pending retirement pages here.
3332 * That'll happen in the code above when the page comes off the per-CPU list.
3333 */
3334 disable_preemption();
3335
3336 /*
3337 * If we got preempted the cache might now have pages.
3338 */
3339 if ((mem = *PERCPU_GET(free_pages))) {
3340 vm_free_page_unlock();
3341 enable_preemption();
3342 goto restart;
3343 }
3344
3345 if (vm_page_free_count <= vm_page_free_reserved) {
3346 pages_to_steal = 1;
3347 } else {
3348 if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved)) {
3349 pages_to_steal = vm_free_magazine_refill_limit;
3350 } else {
3351 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
3352 }
3353 }
3354 color = *PERCPU_GET(start_color);
3355 head = tail = NULL;
3356
3357 vm_page_free_count -= pages_to_steal;
3358 clump_end = sub_count = 0;
3359
3360 while (pages_to_steal--) {
3361 while (vm_page_queue_empty(&vm_page_queue_free[color].qhead)) {
3362 color = (color + 1) & vm_color_mask;
3363 }
3364#if defined(__x86_64__)
3365 vm_page_queue_remove_first_with_clump(&vm_page_queue_free[color].qhead,
3366 mem, clump_end);
3367#else
3368 vm_page_queue_remove_first(&vm_page_queue_free[color].qhead,
3369 mem, vmp_pageq);
3370#endif
3371
3372 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_Q);
3373
3374 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
3375
3376#if defined(__arm64__)
3377 color = (color + 1) & vm_color_mask;
3378#else
3379
3380#if DEVELOPMENT || DEBUG
3381
3382 sub_count++;
3383 if (clump_end) {
3384 vm_clump_update_stats(sub_count);
3385 sub_count = 0;
3386 color = (color + 1) & vm_color_mask;
3387 }
3388#else
3389 if (clump_end) {
3390 color = (color + 1) & vm_color_mask;
3391 }
3392
3393#endif /* if DEVELOPMENT || DEBUG */
3394
3395#endif /* if defined(__arm64__) */
3396
3397 if (head == NULL) {
3398 head = mem;
3399 } else {
3400 tail->vmp_snext = mem;
3401 }
3402 tail = mem;
3403
3404 assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
3405 assert(mem->vmp_tabled == FALSE);
3406 assert(mem->vmp_object == 0);
3407 assert(!mem->vmp_laundry);
3408
3409 mem->vmp_q_state = VM_PAGE_ON_FREE_LOCAL_Q;
3410
3411 assert(mem->vmp_busy);
3412 assert(!mem->vmp_pmapped);
3413 assert(!mem->vmp_wpmapped);
3414 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
3415 assert(!mem->vmp_realtime);
3416
3417 vm_page_validate_no_references(mem);
3418 }
3419#if defined (__x86_64__) && (DEVELOPMENT || DEBUG)
3420 vm_clump_update_stats(sub_count);
3421#endif
3422
3423#if HIBERNATION
3424 if (hibernate_rebuild_needed) {
3425 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
3426 }
3427#endif /* HIBERNATION */
3428 vm_offset_t pcpu_base = current_percpu_base();
3429 *PERCPU_GET_WITH_BASE(pcpu_base, free_pages) = head;
3430 *PERCPU_GET_WITH_BASE(pcpu_base, start_color) = color;
3431
3432 vm_free_page_unlock();
3433 enable_preemption();
3434 goto restart;
3435 }
3436
3437 /*
3438 * Decide if we should poke the pageout daemon.
3439 * We do this if the free count is less than the low
3440 * water mark. VM Pageout Scan will keep running till
3441 * the free_count > free_target (& hence above free_min).
3442 * This wakeup is to catch the possibility of the counts
3443 * dropping between VM Pageout Scan parking and this check.
3444 *
3445 * We don't have the counts locked ... if they change a little,
3446 * it doesn't really matter.
3447 */
3448 if (vm_page_free_count < vm_page_free_min) {
3449 vm_free_page_lock();
3450 if (vm_pageout_running == FALSE) {
3451 vm_free_page_unlock();
3452 thread_wakeup((event_t) &vm_page_free_wanted);
3453 } else {
3454 vm_free_page_unlock();
3455 }
3456 }
3457
3458 VM_CHECK_MEMORYSTATUS;
3459
3460 if (mem) {
3461 assert(!mem->vmp_realtime);
3462// dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
3463
3464 task_t cur_task = current_task_early();
3465 if (cur_task && cur_task != kernel_task) {
3466 if (cur_task->donates_own_pages) {
3467 vm_page_assign_special_state(mem, VM_PAGE_SPECIAL_Q_DONATE);
3468 } else {
3469 vm_page_assign_special_state(mem, VM_PAGE_SPECIAL_Q_BG);
3470 }
3471 }
3472 }
3473 return mem;
3474}
3475
3476#if CONFIG_SECLUDED_MEMORY
3477vm_page_t
3478vm_page_grab_secluded(void)
3479{
3480 vm_page_t mem;
3481 vm_object_t object;
3482 int refmod_state;
3483
3484 if (vm_page_secluded_count == 0) {
3485 /* no secluded pages to grab... */
3486 return VM_PAGE_NULL;
3487 }
3488
3489 /* secluded queue is protected by the VM page queue lock */
3490 vm_page_lock_queues();
3491
3492 if (vm_page_secluded_count == 0) {
3493 /* no secluded pages to grab... */
3494 vm_page_unlock_queues();
3495 return VM_PAGE_NULL;
3496 }
3497
3498#if 00
3499 /* can we grab from the secluded queue? */
3500 if (vm_page_secluded_count > vm_page_secluded_target ||
3501 (vm_page_secluded_count > 0 &&
3502 task_can_use_secluded_mem(current_task(), TRUE))) {
3503 /* OK */
3504 } else {
3505 /* can't grab from secluded queue... */
3506 vm_page_unlock_queues();
3507 return VM_PAGE_NULL;
3508 }
3509#endif
3510
3511 /* we can grab a page from secluded queue! */
3512 assert((vm_page_secluded_count_free +
3513 vm_page_secluded_count_inuse) ==
3514 vm_page_secluded_count);
3515 if (current_task()->task_can_use_secluded_mem) {
3516 assert(num_tasks_can_use_secluded_mem > 0);
3517 }
3518 assert(!vm_page_queue_empty(&vm_page_queue_secluded));
3519 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3520 mem = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
3521 assert(mem->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
3522 vm_page_queues_remove(mem, TRUE);
3523
3524 object = VM_PAGE_OBJECT(mem);
3525
3526 assert(!mem->vmp_fictitious);
3527 assert(!VM_PAGE_WIRED(mem));
3528 if (object == VM_OBJECT_NULL) {
3529 /* free for grab! */
3530 vm_page_unlock_queues();
3531 vm_page_secluded.grab_success_free++;
3532
3533 assert(mem->vmp_busy);
3534 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3535 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
3536 assert(mem->vmp_pageq.next == 0);
3537 assert(mem->vmp_pageq.prev == 0);
3538 assert(mem->vmp_listq.next == 0);
3539 assert(mem->vmp_listq.prev == 0);
3540 assert(mem->vmp_on_specialq == VM_PAGE_SPECIAL_Q_EMPTY);
3541 assert(mem->vmp_specialq.next == 0);
3542 assert(mem->vmp_specialq.prev == 0);
3543 return mem;
3544 }
3545
3546 assert(!object->internal);
3547// vm_page_pageable_external_count--;
3548
3549 if (!vm_object_lock_try(object)) {
3550// printf("SECLUDED: page %p: object %p locked\n", mem, object);
3551 vm_page_secluded.grab_failure_locked++;
3552reactivate_secluded_page:
3553 vm_page_activate(mem);
3554 vm_page_unlock_queues();
3555 return VM_PAGE_NULL;
3556 }
3557 if (mem->vmp_busy ||
3558 mem->vmp_cleaning ||
3559 mem->vmp_laundry) {
3560 /* can't steal page in this state... */
3561 vm_object_unlock(object);
3562 vm_page_secluded.grab_failure_state++;
3563 goto reactivate_secluded_page;
3564 }
3565 if (mem->vmp_realtime) {
3566 /* don't steal pages used by realtime threads... */
3567 vm_object_unlock(object);
3568 vm_page_secluded.grab_failure_realtime++;
3569 goto reactivate_secluded_page;
3570 }
3571
3572 mem->vmp_busy = TRUE;
3573 refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
3574 if (refmod_state & VM_MEM_REFERENCED) {
3575 mem->vmp_reference = TRUE;
3576 }
3577 if (refmod_state & VM_MEM_MODIFIED) {
3578 SET_PAGE_DIRTY(mem, FALSE);
3579 }
3580 if (mem->vmp_dirty || mem->vmp_precious) {
3581 /* can't grab a dirty page; re-activate */
3582// printf("SECLUDED: dirty page %p\n", mem);
3583 PAGE_WAKEUP_DONE(mem);
3584 vm_page_secluded.grab_failure_dirty++;
3585 vm_object_unlock(object);
3586 goto reactivate_secluded_page;
3587 }
3588 if (mem->vmp_reference) {
3589 /* it's been used but we do need to grab a page... */
3590 }
3591
3592 vm_page_unlock_queues();
3593
3594
3595 /* finish what vm_page_free() would have done... */
3596 vm_page_free_prepare_object(mem, TRUE);
3597 vm_object_unlock(object);
3598 object = VM_OBJECT_NULL;
3599
3600 vm_page_validate_no_references(mem);
3601
3602 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3603 vm_page_secluded.grab_success_other++;
3604
3605 assert(mem->vmp_busy);
3606 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3607 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
3608 assert(mem->vmp_pageq.next == 0);
3609 assert(mem->vmp_pageq.prev == 0);
3610 assert(mem->vmp_listq.next == 0);
3611 assert(mem->vmp_listq.prev == 0);
3612 assert(mem->vmp_on_specialq == VM_PAGE_SPECIAL_Q_EMPTY);
3613 assert(mem->vmp_specialq.next == 0);
3614 assert(mem->vmp_specialq.prev == 0);
3615
3616 return mem;
3617}
3618
3619uint64_t
3620vm_page_secluded_drain(void)
3621{
3622 vm_page_t local_freeq;
3623 int local_freed;
3624 uint64_t num_reclaimed;
3625 unsigned int saved_secluded_count, saved_secluded_target;
3626
3627 num_reclaimed = 0;
3628 local_freeq = NULL;
3629 local_freed = 0;
3630
3631 vm_page_lock_queues();
3632
3633 saved_secluded_count = vm_page_secluded_count;
3634 saved_secluded_target = vm_page_secluded_target;
3635 vm_page_secluded_target = 0;
3636 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
3637 while (vm_page_secluded_count) {
3638 vm_page_t secluded_page;
3639
3640 assert((vm_page_secluded_count_free +
3641 vm_page_secluded_count_inuse) ==
3642 vm_page_secluded_count);
3643 secluded_page = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
3644 assert(secluded_page->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
3645
3646 vm_page_queues_remove(secluded_page, FALSE);
3647 assert(!secluded_page->vmp_fictitious);
3648 assert(!VM_PAGE_WIRED(secluded_page));
3649
3650 if (secluded_page->vmp_object == 0) {
3651 /* transfer to free queue */
3652 assert(secluded_page->vmp_busy);
3653 secluded_page->vmp_snext = local_freeq;
3654 local_freeq = secluded_page;
3655 local_freed += 1;
3656 } else {
3657 /* transfer to head of active queue */
3658 vm_page_enqueue_active(secluded_page, FALSE);
3659 secluded_page = VM_PAGE_NULL;
3660 }
3661 num_reclaimed++;
3662 }
3663 vm_page_secluded_target = saved_secluded_target;
3664 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
3665
3666// printf("FBDP %s:%d secluded_count %d->%d, target %d, reclaimed %lld\n", __FUNCTION__, __LINE__, saved_secluded_count, vm_page_secluded_count, vm_page_secluded_target, num_reclaimed);
3667
3668 vm_page_unlock_queues();
3669
3670 if (local_freed) {
3671 vm_page_free_list(local_freeq, TRUE);
3672 local_freeq = NULL;
3673 local_freed = 0;
3674 }
3675
3676 return num_reclaimed;
3677}
3678#endif /* CONFIG_SECLUDED_MEMORY */
3679
3680static inline void
3681vm_page_grab_diags()
3682{
3683#if DEVELOPMENT || DEBUG
3684 task_t task = current_task_early();
3685 if (task == NULL) {
3686 return;
3687 }
3688
3689 ledger_credit(task->ledger, task_ledgers.pages_grabbed, 1);
3690#endif /* DEVELOPMENT || DEBUG */
3691}
3692
3693/*
3694 * vm_page_release:
3695 *
3696 * Return a page to the free list.
3697 */
3698
3699void
3700vm_page_release(
3701 vm_page_t mem,
3702 boolean_t page_queues_locked)
3703{
3704 unsigned int color;
3705 int need_wakeup = 0;
3706 int need_priv_wakeup = 0;
3707#if CONFIG_SECLUDED_MEMORY
3708 int need_secluded_wakeup = 0;
3709#endif /* CONFIG_SECLUDED_MEMORY */
3710 event_t wakeup_event = NULL;
3711
3712 if (page_queues_locked) {
3713 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3714 } else {
3715 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3716 }
3717
3718 assert(!mem->vmp_private && !mem->vmp_fictitious);
3719
3720#if MACH_ASSERT
3721 if (vm_check_refs_on_free) {
3722 vm_page_validate_no_references(mem);
3723 }
3724#endif /* MACH_ASSERT */
3725
3726// dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
3727
3728 pmap_clear_noencrypt(pn: VM_PAGE_GET_PHYS_PAGE(m: mem));
3729
3730 if (__improbable(mem->vmp_realtime)) {
3731 if (!page_queues_locked) {
3732 vm_page_lock_queues();
3733 }
3734 if (mem->vmp_realtime) {
3735 mem->vmp_realtime = false;
3736 vm_page_realtime_count--;
3737 }
3738 if (!page_queues_locked) {
3739 vm_page_unlock_queues();
3740 }
3741 }
3742
3743 vm_free_page_lock_spin();
3744
3745 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3746 assert(mem->vmp_busy);
3747 assert(!mem->vmp_laundry);
3748 assert(mem->vmp_object == 0);
3749 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
3750 assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
3751 assert(mem->vmp_specialq.next == 0 && mem->vmp_specialq.prev == 0);
3752
3753 /* Clear any specialQ hints before releasing page to the free pool*/
3754 mem->vmp_on_specialq = VM_PAGE_SPECIAL_Q_EMPTY;
3755
3756 if ((mem->vmp_lopage == TRUE || vm_lopage_refill == TRUE) &&
3757 vm_lopage_free_count < vm_lopage_free_limit &&
3758 VM_PAGE_GET_PHYS_PAGE(m: mem) < max_valid_low_ppnum) {
3759 /*
3760 * this exists to support hardware controllers
3761 * incapable of generating DMAs with more than 32 bits
3762 * of address on platforms with physical memory > 4G...
3763 */
3764 vm_page_queue_enter_first(&vm_lopage_queue_free, mem, vmp_pageq);
3765 vm_lopage_free_count++;
3766
3767 if (vm_lopage_free_count >= vm_lopage_free_limit) {
3768 vm_lopage_refill = FALSE;
3769 }
3770
3771 mem->vmp_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3772 mem->vmp_lopage = TRUE;
3773#if CONFIG_SECLUDED_MEMORY
3774 } else if (vm_page_free_count > vm_page_free_reserved &&
3775 vm_page_secluded_count < vm_page_secluded_target &&
3776 num_tasks_can_use_secluded_mem == 0) {
3777 /*
3778 * XXX FBDP TODO: also avoid refilling secluded queue
3779 * when some IOKit objects are already grabbing from it...
3780 */
3781 if (!page_queues_locked) {
3782 if (!vm_page_trylock_queues()) {
3783 /* take locks in right order */
3784 vm_free_page_unlock();
3785 vm_page_lock_queues();
3786 vm_free_page_lock_spin();
3787 }
3788 }
3789 mem->vmp_lopage = FALSE;
3790 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3791 vm_page_queue_enter_first(&vm_page_queue_secluded, mem, vmp_pageq);
3792 mem->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
3793 vm_page_secluded_count++;
3794 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
3795 vm_page_secluded_count_free++;
3796 if (!page_queues_locked) {
3797 vm_page_unlock_queues();
3798 }
3799 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_OWNED);
3800 if (vm_page_free_wanted_secluded > 0) {
3801 vm_page_free_wanted_secluded--;
3802 need_secluded_wakeup = 1;
3803 }
3804#endif /* CONFIG_SECLUDED_MEMORY */
3805 } else {
3806 mem->vmp_lopage = FALSE;
3807 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
3808
3809 color = VM_PAGE_GET_COLOR(mem);
3810#if defined(__x86_64__)
3811 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead, mem);
3812#else
3813 vm_page_queue_enter(&vm_page_queue_free[color].qhead, mem, vmp_pageq);
3814#endif
3815 vm_page_free_count++;
3816 /*
3817 * Check if we should wake up someone waiting for page.
3818 * But don't bother waking them unless they can allocate.
3819 *
3820 * We wakeup only one thread, to prevent starvation.
3821 * Because the scheduling system handles wait queues FIFO,
3822 * if we wakeup all waiting threads, one greedy thread
3823 * can starve multiple niceguy threads. When the threads
3824 * all wakeup, the greedy threads runs first, grabs the page,
3825 * and waits for another page. It will be the first to run
3826 * when the next page is freed.
3827 *
3828 * However, there is a slight danger here.
3829 * The thread we wake might not use the free page.
3830 * Then the other threads could wait indefinitely
3831 * while the page goes unused. To forestall this,
3832 * the pageout daemon will keep making free pages
3833 * as long as vm_page_free_wanted is non-zero.
3834 */
3835
3836 assert(vm_page_free_count > 0);
3837 if (vm_page_free_wanted_privileged > 0) {
3838 vm_page_free_wanted_privileged--;
3839 need_priv_wakeup = 1;
3840#if CONFIG_SECLUDED_MEMORY
3841 } else if (vm_page_free_wanted_secluded > 0 &&
3842 vm_page_free_count > vm_page_free_reserved) {
3843 vm_page_free_wanted_secluded--;
3844 need_secluded_wakeup = 1;
3845#endif /* CONFIG_SECLUDED_MEMORY */
3846 } else if (vm_page_free_wanted > 0 &&
3847 vm_page_free_count > vm_page_free_reserved) {
3848 vm_page_free_wanted--;
3849 need_wakeup = 1;
3850 }
3851 }
3852 vm_pageout_vminfo.vm_page_pages_freed++;
3853
3854 vm_free_page_unlock();
3855
3856 VM_DEBUG_CONSTANT_EVENT(vm_page_release, VM_PAGE_RELEASE, DBG_FUNC_NONE, 1, 0, 0, 0);
3857
3858 if (need_priv_wakeup) {
3859 wakeup_event = &vm_page_free_wanted_privileged;
3860 }
3861#if CONFIG_SECLUDED_MEMORY
3862 else if (need_secluded_wakeup) {
3863 wakeup_event = &vm_page_free_wanted_secluded;
3864 }
3865#endif /* CONFIG_SECLUDED_MEMORY */
3866 else if (need_wakeup) {
3867 wakeup_event = &vm_page_free_count;
3868 }
3869
3870 if (wakeup_event) {
3871 if (vps_dynamic_priority_enabled) {
3872 wakeup_one_with_inheritor(event: (event_t) wakeup_event,
3873 THREAD_AWAKENED, action: LCK_WAKE_DO_NOT_TRANSFER_PUSH,
3874 NULL);
3875 } else {
3876 thread_wakeup_one((event_t) wakeup_event);
3877 }
3878 }
3879
3880 VM_CHECK_MEMORYSTATUS;
3881}
3882
3883/*
3884 * This version of vm_page_release() is used only at startup
3885 * when we are single-threaded and pages are being released
3886 * for the first time. Hence, no locking or unnecessary checks are made.
3887 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
3888 */
3889void
3890vm_page_release_startup(
3891 vm_page_t mem)
3892{
3893 vm_page_queue_t queue_free;
3894
3895 if (vm_lopage_free_count < vm_lopage_free_limit &&
3896 VM_PAGE_GET_PHYS_PAGE(m: mem) < max_valid_low_ppnum) {
3897 mem->vmp_lopage = TRUE;
3898 mem->vmp_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3899 vm_lopage_free_count++;
3900 queue_free = &vm_lopage_queue_free;
3901#if CONFIG_SECLUDED_MEMORY
3902 } else if (vm_page_secluded_count < vm_page_secluded_target) {
3903 mem->vmp_lopage = FALSE;
3904 mem->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
3905 vm_page_secluded_count++;
3906 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
3907 vm_page_secluded_count_free++;
3908 queue_free = &vm_page_queue_secluded;
3909#endif /* CONFIG_SECLUDED_MEMORY */
3910 } else {
3911 mem->vmp_lopage = FALSE;
3912 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
3913 vm_page_free_count++;
3914 queue_free = &vm_page_queue_free[VM_PAGE_GET_COLOR(mem)].qhead;
3915 }
3916 if (mem->vmp_q_state == VM_PAGE_ON_FREE_Q) {
3917#if defined(__x86_64__)
3918 vm_page_queue_enter_clump(queue_free, mem);
3919#else
3920 vm_page_queue_enter(queue_free, mem, vmp_pageq);
3921#endif
3922 } else {
3923 vm_page_queue_enter_first(queue_free, mem, vmp_pageq);
3924 }
3925}
3926
3927/*
3928 * vm_page_wait:
3929 *
3930 * Wait for a page to become available.
3931 * If there are plenty of free pages, then we don't sleep.
3932 *
3933 * Returns:
3934 * TRUE: There may be another page, try again
3935 * FALSE: We were interrupted out of our wait, don't try again
3936 */
3937
3938boolean_t
3939vm_page_wait(
3940 int interruptible )
3941{
3942 /*
3943 * We can't use vm_page_free_reserved to make this
3944 * determination. Consider: some thread might
3945 * need to allocate two pages. The first allocation
3946 * succeeds, the second fails. After the first page is freed,
3947 * a call to vm_page_wait must really block.
3948 */
3949 kern_return_t wait_result;
3950 int need_wakeup = 0;
3951 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
3952 event_t wait_event = NULL;
3953
3954 vm_free_page_lock_spin();
3955
3956 if (is_privileged && vm_page_free_count) {
3957 vm_free_page_unlock();
3958 return TRUE;
3959 }
3960
3961 if (vm_page_free_count >= vm_page_free_target) {
3962 vm_free_page_unlock();
3963 return TRUE;
3964 }
3965
3966 if (is_privileged) {
3967 if (vm_page_free_wanted_privileged++ == 0) {
3968 need_wakeup = 1;
3969 }
3970 wait_event = (event_t)&vm_page_free_wanted_privileged;
3971#if CONFIG_SECLUDED_MEMORY
3972 } else if (secluded_for_apps &&
3973 task_can_use_secluded_mem(current_task(), FALSE)) {
3974#if 00
3975 /* XXX FBDP: need pageq lock for this... */
3976 /* XXX FBDP: might wait even if pages available, */
3977 /* XXX FBDP: hopefully not for too long... */
3978 if (vm_page_secluded_count > 0) {
3979 vm_free_page_unlock();
3980 return TRUE;
3981 }
3982#endif
3983 if (vm_page_free_wanted_secluded++ == 0) {
3984 need_wakeup = 1;
3985 }
3986 wait_event = (event_t)&vm_page_free_wanted_secluded;
3987#endif /* CONFIG_SECLUDED_MEMORY */
3988 } else {
3989 if (vm_page_free_wanted++ == 0) {
3990 need_wakeup = 1;
3991 }
3992 wait_event = (event_t)&vm_page_free_count;
3993 }
3994
3995 /*
3996 * We don't do a vm_pageout_scan wakeup if we already have
3997 * some waiters because vm_pageout_scan checks for waiters
3998 * before it returns and does so behind the vm_page_queue_free_lock,
3999 * which we own when we bump the waiter counts.
4000 */
4001
4002 if (vps_dynamic_priority_enabled) {
4003 /*
4004 * We are waking up vm_pageout_scan here. If it needs
4005 * the vm_page_queue_free_lock before we unlock it
4006 * we'll end up just blocking and incur an extra
4007 * context switch. Could be a perf. issue.
4008 */
4009
4010 if (need_wakeup) {
4011 thread_wakeup((event_t)&vm_page_free_wanted);
4012 }
4013
4014 /*
4015 * LD: This event is going to get recorded every time because
4016 * we don't get back THREAD_WAITING from lck_mtx_sleep_with_inheritor.
4017 * We just block in that routine.
4018 */
4019 VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
4020 vm_page_free_wanted_privileged,
4021 vm_page_free_wanted,
4022#if CONFIG_SECLUDED_MEMORY
4023 vm_page_free_wanted_secluded,
4024#else /* CONFIG_SECLUDED_MEMORY */
4025 0,
4026#endif /* CONFIG_SECLUDED_MEMORY */
4027 0);
4028 wait_result = lck_mtx_sleep_with_inheritor(lock: &vm_page_queue_free_lock,
4029 lck_sleep_action: LCK_SLEEP_UNLOCK,
4030 event: wait_event,
4031 inheritor: vm_pageout_scan_thread,
4032 interruptible,
4033 deadline: 0);
4034 } else {
4035 wait_result = assert_wait(event: wait_event, interruptible);
4036
4037 vm_free_page_unlock();
4038
4039 if (need_wakeup) {
4040 thread_wakeup((event_t)&vm_page_free_wanted);
4041 }
4042
4043 if (wait_result == THREAD_WAITING) {
4044 VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
4045 vm_page_free_wanted_privileged,
4046 vm_page_free_wanted,
4047#if CONFIG_SECLUDED_MEMORY
4048 vm_page_free_wanted_secluded,
4049#else /* CONFIG_SECLUDED_MEMORY */
4050 0,
4051#endif /* CONFIG_SECLUDED_MEMORY */
4052 0);
4053 wait_result = thread_block(THREAD_CONTINUE_NULL);
4054 VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block,
4055 VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
4056 }
4057 }
4058
4059 return (wait_result == THREAD_AWAKENED) || (wait_result == THREAD_NOT_WAITING);
4060}
4061
4062/*
4063 * vm_page_alloc:
4064 *
4065 * Allocate and return a memory cell associated
4066 * with this VM object/offset pair.
4067 *
4068 * Object must be locked.
4069 */
4070
4071vm_page_t
4072vm_page_alloc(
4073 vm_object_t object,
4074 vm_object_offset_t offset)
4075{
4076 vm_page_t mem;
4077 int grab_options;
4078
4079 vm_object_lock_assert_exclusive(object);
4080 grab_options = 0;
4081#if CONFIG_SECLUDED_MEMORY
4082 if (object->can_grab_secluded) {
4083 grab_options |= VM_PAGE_GRAB_SECLUDED;
4084 }
4085#endif /* CONFIG_SECLUDED_MEMORY */
4086 mem = vm_page_grab_options(grab_options);
4087 if (mem == VM_PAGE_NULL) {
4088 return VM_PAGE_NULL;
4089 }
4090
4091 vm_page_insert(mem, object, offset);
4092
4093 return mem;
4094}
4095
4096/*
4097 * vm_page_free_prepare:
4098 *
4099 * Removes page from any queue it may be on
4100 * and disassociates it from its VM object.
4101 *
4102 * Object and page queues must be locked prior to entry.
4103 */
4104static void
4105vm_page_free_prepare(
4106 vm_page_t mem)
4107{
4108#if CONFIG_SPTM
4109 /**
4110 * SPTM TODO: The pmap should retype frames automatically as mappings to them are
4111 * created and destroyed. In order to catch potential cases where this
4112 * does not happen, add an appropriate assert here. This code should be
4113 * executed on every frame that is about to be released to the VM.
4114 */
4115 const sptm_paddr_t paddr = ((uint64_t)VM_PAGE_GET_PHYS_PAGE(mem)) << PAGE_SHIFT;
4116 __unused const sptm_frame_type_t frame_type = sptm_get_frame_type(paddr);
4117
4118 assert(frame_type == XNU_DEFAULT);
4119#endif /* CONFIG_SPTM */
4120
4121 vm_page_free_prepare_queues(page: mem);
4122 vm_page_free_prepare_object(page: mem, TRUE);
4123}
4124
4125
4126void
4127vm_page_free_prepare_queues(
4128 vm_page_t mem)
4129{
4130 vm_object_t m_object;
4131
4132 VM_PAGE_CHECK(mem);
4133
4134 assert(mem->vmp_q_state != VM_PAGE_ON_FREE_Q);
4135 assert(!mem->vmp_cleaning);
4136 m_object = VM_PAGE_OBJECT(mem);
4137
4138 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4139 if (m_object) {
4140 vm_object_lock_assert_exclusive(m_object);
4141 }
4142 if (mem->vmp_laundry) {
4143 /*
4144 * We may have to free a page while it's being laundered
4145 * if we lost its pager (due to a forced unmount, for example).
4146 * We need to call vm_pageout_steal_laundry() before removing
4147 * the page from its VM object, so that we can remove it
4148 * from its pageout queue and adjust the laundry accounting
4149 */
4150 vm_pageout_steal_laundry(page: mem, TRUE);
4151 }
4152
4153 vm_page_queues_remove(mem, TRUE);
4154
4155 if (__improbable(mem->vmp_realtime)) {
4156 mem->vmp_realtime = false;
4157 vm_page_realtime_count--;
4158 }
4159
4160 if (VM_PAGE_WIRED(mem)) {
4161 assert(mem->vmp_wire_count > 0);
4162
4163 if (m_object) {
4164 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
4165 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
4166 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
4167
4168 assert(m_object->resident_page_count >=
4169 m_object->wired_page_count);
4170
4171 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
4172 OSAddAtomic(+1, &vm_page_purgeable_count);
4173 assert(vm_page_purgeable_wired_count > 0);
4174 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
4175 }
4176 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
4177 m_object->purgable == VM_PURGABLE_EMPTY) &&
4178 m_object->vo_owner != TASK_NULL) {
4179 task_t owner;
4180 int ledger_idx_volatile;
4181 int ledger_idx_nonvolatile;
4182 int ledger_idx_volatile_compressed;
4183 int ledger_idx_nonvolatile_compressed;
4184 boolean_t do_footprint;
4185
4186 owner = VM_OBJECT_OWNER(m_object);
4187 vm_object_ledger_tag_ledgers(
4188 object: m_object,
4189 ledger_idx_volatile: &ledger_idx_volatile,
4190 ledger_idx_nonvolatile: &ledger_idx_nonvolatile,
4191 ledger_idx_volatile_compressed: &ledger_idx_volatile_compressed,
4192 ledger_idx_nonvolatile_compressed: &ledger_idx_nonvolatile_compressed,
4193 do_footprint: &do_footprint);
4194 /*
4195 * While wired, this page was accounted
4196 * as "non-volatile" but it should now
4197 * be accounted as "volatile".
4198 */
4199 /* one less "non-volatile"... */
4200 ledger_debit(ledger: owner->ledger,
4201 entry: ledger_idx_nonvolatile,
4202 PAGE_SIZE);
4203 if (do_footprint) {
4204 /* ... and "phys_footprint" */
4205 ledger_debit(ledger: owner->ledger,
4206 entry: task_ledgers.phys_footprint,
4207 PAGE_SIZE);
4208 }
4209 /* one more "volatile" */
4210 ledger_credit(ledger: owner->ledger,
4211 entry: ledger_idx_volatile,
4212 PAGE_SIZE);
4213 }
4214 }
4215 if (!mem->vmp_private && !mem->vmp_fictitious) {
4216 vm_page_wire_count--;
4217 }
4218
4219 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
4220 mem->vmp_wire_count = 0;
4221 assert(!mem->vmp_gobbled);
4222 } else if (mem->vmp_gobbled) {
4223 if (!mem->vmp_private && !mem->vmp_fictitious) {
4224 vm_page_wire_count--;
4225 }
4226 vm_page_gobble_count--;
4227 }
4228}
4229
4230
4231void
4232vm_page_free_prepare_object(
4233 vm_page_t mem,
4234 boolean_t remove_from_hash)
4235{
4236 assert(!mem->vmp_realtime);
4237 if (mem->vmp_tabled) {
4238 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
4239 }
4240 PAGE_WAKEUP(mem); /* clears wanted */
4241
4242 if (mem->vmp_private) {
4243 mem->vmp_private = FALSE;
4244 mem->vmp_fictitious = TRUE;
4245 VM_PAGE_SET_PHYS_PAGE(mem, vm_page_fictitious_addr);
4246 }
4247 if (!mem->vmp_fictitious) {
4248 assert(mem->vmp_pageq.next == 0);
4249 assert(mem->vmp_pageq.prev == 0);
4250 assert(mem->vmp_listq.next == 0);
4251 assert(mem->vmp_listq.prev == 0);
4252 assert(mem->vmp_specialq.next == 0);
4253 assert(mem->vmp_specialq.prev == 0);
4254 assert(mem->vmp_next_m == 0);
4255
4256#if MACH_ASSERT
4257 if (vm_check_refs_on_free) {
4258 vm_page_validate_no_references(mem);
4259 }
4260#endif /* MACH_ASSERT */
4261
4262 {
4263 vm_page_init(mem, phys_page: VM_PAGE_GET_PHYS_PAGE(m: mem), lopage: mem->vmp_lopage);
4264 }
4265 }
4266}
4267
4268/*
4269 * vm_page_free:
4270 *
4271 * Returns the given page to the free list,
4272 * disassociating it with any VM object.
4273 *
4274 * Object and page queues must be locked prior to entry.
4275 */
4276void
4277vm_page_free(
4278 vm_page_t mem)
4279{
4280 vm_page_free_prepare(mem);
4281
4282 if (mem->vmp_fictitious) {
4283 vm_page_release_fictitious(m: mem);
4284 } else {
4285 vm_page_release(mem, TRUE); /* page queues are locked */
4286 }
4287}
4288
4289
4290void
4291vm_page_free_unlocked(
4292 vm_page_t mem,
4293 boolean_t remove_from_hash)
4294{
4295 vm_page_lockspin_queues();
4296 vm_page_free_prepare_queues(mem);
4297 vm_page_unlock_queues();
4298
4299 vm_page_free_prepare_object(mem, remove_from_hash);
4300
4301 if (mem->vmp_fictitious) {
4302 vm_page_release_fictitious(m: mem);
4303 } else {
4304 vm_page_release(mem, FALSE); /* page queues are not locked */
4305 }
4306}
4307
4308
4309/*
4310 * Free a list of pages. The list can be up to several hundred pages,
4311 * as blocked up by vm_pageout_scan().
4312 * The big win is not having to take the free list lock once
4313 * per page.
4314 *
4315 * The VM page queues lock (vm_page_queue_lock) should NOT be held.
4316 * The VM page free queues lock (vm_page_queue_free_lock) should NOT be held.
4317 */
4318void
4319vm_page_free_list(
4320 vm_page_t freeq,
4321 boolean_t prepare_object)
4322{
4323 vm_page_t mem;
4324 vm_page_t nxt;
4325 vm_page_t local_freeq;
4326 int pg_count;
4327
4328 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
4329 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_NOTOWNED);
4330
4331 while (freeq) {
4332 pg_count = 0;
4333 local_freeq = VM_PAGE_NULL;
4334 mem = freeq;
4335
4336 /*
4337 * break up the processing into smaller chunks so
4338 * that we can 'pipeline' the pages onto the
4339 * free list w/o introducing too much
4340 * contention on the global free queue lock
4341 */
4342 while (mem && pg_count < 64) {
4343 assert((mem->vmp_q_state == VM_PAGE_NOT_ON_Q) ||
4344 (mem->vmp_q_state == VM_PAGE_IS_WIRED));
4345 assert(mem->vmp_specialq.next == 0 &&
4346 mem->vmp_specialq.prev == 0);
4347 /*
4348 * &&
4349 * mem->vmp_on_specialq == VM_PAGE_SPECIAL_Q_EMPTY);
4350 */
4351 nxt = mem->vmp_snext;
4352 mem->vmp_snext = NULL;
4353 assert(mem->vmp_pageq.prev == 0);
4354
4355#if MACH_ASSERT
4356 if (vm_check_refs_on_free) {
4357 if (!mem->vmp_fictitious && !mem->vmp_private) {
4358 vm_page_validate_no_references(mem);
4359 }
4360 }
4361#endif /* MACH_ASSERT */
4362
4363 if (__improbable(mem->vmp_realtime)) {
4364 vm_page_lock_queues();
4365 if (mem->vmp_realtime) {
4366 mem->vmp_realtime = false;
4367 vm_page_realtime_count--;
4368 }
4369 vm_page_unlock_queues();
4370 }
4371
4372 if (prepare_object == TRUE) {
4373 vm_page_free_prepare_object(mem, TRUE);
4374 }
4375
4376 if (!mem->vmp_fictitious) {
4377 assert(mem->vmp_busy);
4378
4379 if ((mem->vmp_lopage == TRUE || vm_lopage_refill == TRUE) &&
4380 vm_lopage_free_count < vm_lopage_free_limit &&
4381 VM_PAGE_GET_PHYS_PAGE(m: mem) < max_valid_low_ppnum) {
4382 vm_page_release(mem, FALSE); /* page queues are not locked */
4383#if CONFIG_SECLUDED_MEMORY
4384 } else if (vm_page_secluded_count < vm_page_secluded_target &&
4385 num_tasks_can_use_secluded_mem == 0) {
4386 vm_page_release(mem,
4387 FALSE); /* page queues are not locked */
4388#endif /* CONFIG_SECLUDED_MEMORY */
4389 } else {
4390 /*
4391 * IMPORTANT: we can't set the page "free" here
4392 * because that would make the page eligible for
4393 * a physically-contiguous allocation (see
4394 * vm_page_find_contiguous()) right away (we don't
4395 * hold the vm_page_queue_free lock). That would
4396 * cause trouble because the page is not actually
4397 * in the free queue yet...
4398 */
4399 mem->vmp_snext = local_freeq;
4400 local_freeq = mem;
4401 pg_count++;
4402
4403 pmap_clear_noencrypt(pn: VM_PAGE_GET_PHYS_PAGE(m: mem));
4404 }
4405 } else {
4406 assert(VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_fictitious_addr ||
4407 VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr);
4408 vm_page_release_fictitious(m: mem);
4409 }
4410 mem = nxt;
4411 }
4412 freeq = mem;
4413
4414 if ((mem = local_freeq)) {
4415 unsigned int avail_free_count;
4416 unsigned int need_wakeup = 0;
4417 unsigned int need_priv_wakeup = 0;
4418#if CONFIG_SECLUDED_MEMORY
4419 unsigned int need_wakeup_secluded = 0;
4420#endif /* CONFIG_SECLUDED_MEMORY */
4421 event_t priv_wakeup_event, secluded_wakeup_event, normal_wakeup_event;
4422 boolean_t priv_wakeup_all, secluded_wakeup_all, normal_wakeup_all;
4423
4424 vm_free_page_lock_spin();
4425
4426 while (mem) {
4427 int color;
4428
4429 nxt = mem->vmp_snext;
4430
4431 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
4432 assert(mem->vmp_busy);
4433 assert(!mem->vmp_realtime);
4434 mem->vmp_lopage = FALSE;
4435 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
4436
4437 color = VM_PAGE_GET_COLOR(mem);
4438#if defined(__x86_64__)
4439 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead, mem);
4440#else
4441 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
4442 mem, vmp_pageq);
4443#endif
4444 mem = nxt;
4445 }
4446 vm_pageout_vminfo.vm_page_pages_freed += pg_count;
4447 vm_page_free_count += pg_count;
4448 avail_free_count = vm_page_free_count;
4449
4450 VM_DEBUG_CONSTANT_EVENT(vm_page_release, VM_PAGE_RELEASE, DBG_FUNC_NONE, pg_count, 0, 0, 0);
4451
4452 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
4453 if (avail_free_count < vm_page_free_wanted_privileged) {
4454 need_priv_wakeup = avail_free_count;
4455 vm_page_free_wanted_privileged -= avail_free_count;
4456 avail_free_count = 0;
4457 } else {
4458 need_priv_wakeup = vm_page_free_wanted_privileged;
4459 avail_free_count -= vm_page_free_wanted_privileged;
4460 vm_page_free_wanted_privileged = 0;
4461 }
4462 }
4463#if CONFIG_SECLUDED_MEMORY
4464 if (vm_page_free_wanted_secluded > 0 &&
4465 avail_free_count > vm_page_free_reserved) {
4466 unsigned int available_pages;
4467 available_pages = (avail_free_count -
4468 vm_page_free_reserved);
4469 if (available_pages <
4470 vm_page_free_wanted_secluded) {
4471 need_wakeup_secluded = available_pages;
4472 vm_page_free_wanted_secluded -=
4473 available_pages;
4474 avail_free_count -= available_pages;
4475 } else {
4476 need_wakeup_secluded =
4477 vm_page_free_wanted_secluded;
4478 avail_free_count -=
4479 vm_page_free_wanted_secluded;
4480 vm_page_free_wanted_secluded = 0;
4481 }
4482 }
4483#endif /* CONFIG_SECLUDED_MEMORY */
4484 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
4485 unsigned int available_pages;
4486
4487 available_pages = avail_free_count - vm_page_free_reserved;
4488
4489 if (available_pages >= vm_page_free_wanted) {
4490 need_wakeup = vm_page_free_wanted;
4491 vm_page_free_wanted = 0;
4492 } else {
4493 need_wakeup = available_pages;
4494 vm_page_free_wanted -= available_pages;
4495 }
4496 }
4497 vm_free_page_unlock();
4498
4499 priv_wakeup_event = NULL;
4500 secluded_wakeup_event = NULL;
4501 normal_wakeup_event = NULL;
4502
4503 priv_wakeup_all = FALSE;
4504 secluded_wakeup_all = FALSE;
4505 normal_wakeup_all = FALSE;
4506
4507
4508 if (need_priv_wakeup != 0) {
4509 /*
4510 * There shouldn't be that many VM-privileged threads,
4511 * so let's wake them all up, even if we don't quite
4512 * have enough pages to satisfy them all.
4513 */
4514 priv_wakeup_event = (event_t)&vm_page_free_wanted_privileged;
4515 priv_wakeup_all = TRUE;
4516 }
4517#if CONFIG_SECLUDED_MEMORY
4518 if (need_wakeup_secluded != 0 &&
4519 vm_page_free_wanted_secluded == 0) {
4520 secluded_wakeup_event = (event_t)&vm_page_free_wanted_secluded;
4521 secluded_wakeup_all = TRUE;
4522 need_wakeup_secluded = 0;
4523 } else {
4524 secluded_wakeup_event = (event_t)&vm_page_free_wanted_secluded;
4525 }
4526#endif /* CONFIG_SECLUDED_MEMORY */
4527 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
4528 /*
4529 * We don't expect to have any more waiters
4530 * after this, so let's wake them all up at
4531 * once.
4532 */
4533 normal_wakeup_event = (event_t) &vm_page_free_count;
4534 normal_wakeup_all = TRUE;
4535 need_wakeup = 0;
4536 } else {
4537 normal_wakeup_event = (event_t) &vm_page_free_count;
4538 }
4539
4540 if (priv_wakeup_event ||
4541#if CONFIG_SECLUDED_MEMORY
4542 secluded_wakeup_event ||
4543#endif /* CONFIG_SECLUDED_MEMORY */
4544 normal_wakeup_event) {
4545 if (vps_dynamic_priority_enabled) {
4546 if (priv_wakeup_all == TRUE) {
4547 wakeup_all_with_inheritor(event: priv_wakeup_event, THREAD_AWAKENED);
4548 }
4549
4550#if CONFIG_SECLUDED_MEMORY
4551 if (secluded_wakeup_all == TRUE) {
4552 wakeup_all_with_inheritor(secluded_wakeup_event, THREAD_AWAKENED);
4553 }
4554
4555 while (need_wakeup_secluded-- != 0) {
4556 /*
4557 * Wake up one waiter per page we just released.
4558 */
4559 wakeup_one_with_inheritor(secluded_wakeup_event,
4560 THREAD_AWAKENED, LCK_WAKE_DO_NOT_TRANSFER_PUSH, NULL);
4561 }
4562#endif /* CONFIG_SECLUDED_MEMORY */
4563
4564 if (normal_wakeup_all == TRUE) {
4565 wakeup_all_with_inheritor(event: normal_wakeup_event, THREAD_AWAKENED);
4566 }
4567
4568 while (need_wakeup-- != 0) {
4569 /*
4570 * Wake up one waiter per page we just released.
4571 */
4572 wakeup_one_with_inheritor(event: normal_wakeup_event,
4573 THREAD_AWAKENED, action: LCK_WAKE_DO_NOT_TRANSFER_PUSH,
4574 NULL);
4575 }
4576 } else {
4577 /*
4578 * Non-priority-aware wakeups.
4579 */
4580
4581 if (priv_wakeup_all == TRUE) {
4582 thread_wakeup(priv_wakeup_event);
4583 }
4584
4585#if CONFIG_SECLUDED_MEMORY
4586 if (secluded_wakeup_all == TRUE) {
4587 thread_wakeup(secluded_wakeup_event);
4588 }
4589
4590 while (need_wakeup_secluded-- != 0) {
4591 /*
4592 * Wake up one waiter per page we just released.
4593 */
4594 thread_wakeup_one(secluded_wakeup_event);
4595 }
4596
4597#endif /* CONFIG_SECLUDED_MEMORY */
4598 if (normal_wakeup_all == TRUE) {
4599 thread_wakeup(normal_wakeup_event);
4600 }
4601
4602 while (need_wakeup-- != 0) {
4603 /*
4604 * Wake up one waiter per page we just released.
4605 */
4606 thread_wakeup_one(normal_wakeup_event);
4607 }
4608 }
4609 }
4610
4611 VM_CHECK_MEMORYSTATUS;
4612 }
4613 }
4614}
4615
4616
4617/*
4618 * vm_page_wire:
4619 *
4620 * Mark this page as wired down by yet
4621 * another map, removing it from paging queues
4622 * as necessary.
4623 *
4624 * The page's object and the page queues must be locked.
4625 */
4626
4627
4628void
4629vm_page_wire(
4630 vm_page_t mem,
4631 vm_tag_t tag,
4632 boolean_t check_memorystatus)
4633{
4634 vm_object_t m_object;
4635
4636 m_object = VM_PAGE_OBJECT(mem);
4637
4638// dbgLog(current_thread(), mem->vmp_offset, m_object, 1); /* (TEST/DEBUG) */
4639
4640 VM_PAGE_CHECK(mem);
4641 if (m_object) {
4642 vm_object_lock_assert_exclusive(m_object);
4643 } else {
4644 /*
4645 * In theory, the page should be in an object before it
4646 * gets wired, since we need to hold the object lock
4647 * to update some fields in the page structure.
4648 * However, some code (i386 pmap, for example) might want
4649 * to wire a page before it gets inserted into an object.
4650 * That's somewhat OK, as long as nobody else can get to
4651 * that page and update it at the same time.
4652 */
4653 }
4654 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4655 if (!VM_PAGE_WIRED(mem)) {
4656 if (mem->vmp_laundry) {
4657 vm_pageout_steal_laundry(page: mem, TRUE);
4658 }
4659
4660 vm_page_queues_remove(mem, TRUE);
4661
4662 assert(mem->vmp_wire_count == 0);
4663 mem->vmp_q_state = VM_PAGE_IS_WIRED;
4664
4665#if CONFIG_TRACK_UNMODIFIED_ANON_PAGES
4666 if (mem->vmp_unmodified_ro == true) {
4667 /* Object and PageQ locks are held*/
4668 mem->vmp_unmodified_ro = false;
4669 os_atomic_dec(&compressor_ro_uncompressed, relaxed);
4670 VM_COMPRESSOR_PAGER_STATE_CLR(VM_PAGE_OBJECT(mem), mem->vmp_offset);
4671 }
4672#endif /* CONFIG_TRACK_UNMODIFIED_ANON_PAGES */
4673
4674 if (m_object) {
4675 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
4676 VM_OBJECT_WIRED_PAGE_ADD(m_object, mem);
4677 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, tag);
4678
4679 assert(m_object->resident_page_count >=
4680 m_object->wired_page_count);
4681 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
4682 assert(vm_page_purgeable_count > 0);
4683 OSAddAtomic(-1, &vm_page_purgeable_count);
4684 OSAddAtomic(1, &vm_page_purgeable_wired_count);
4685 }
4686 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
4687 m_object->purgable == VM_PURGABLE_EMPTY) &&
4688 m_object->vo_owner != TASK_NULL) {
4689 task_t owner;
4690 int ledger_idx_volatile;
4691 int ledger_idx_nonvolatile;
4692 int ledger_idx_volatile_compressed;
4693 int ledger_idx_nonvolatile_compressed;
4694 boolean_t do_footprint;
4695
4696 owner = VM_OBJECT_OWNER(m_object);
4697 vm_object_ledger_tag_ledgers(
4698 object: m_object,
4699 ledger_idx_volatile: &ledger_idx_volatile,
4700 ledger_idx_nonvolatile: &ledger_idx_nonvolatile,
4701 ledger_idx_volatile_compressed: &ledger_idx_volatile_compressed,
4702 ledger_idx_nonvolatile_compressed: &ledger_idx_nonvolatile_compressed,
4703 do_footprint: &do_footprint);
4704 /* less volatile bytes */
4705 ledger_debit(ledger: owner->ledger,
4706 entry: ledger_idx_volatile,
4707 PAGE_SIZE);
4708 /* more not-quite-volatile bytes */
4709 ledger_credit(ledger: owner->ledger,
4710 entry: ledger_idx_nonvolatile,
4711 PAGE_SIZE);
4712 if (do_footprint) {
4713 /* more footprint */
4714 ledger_credit(ledger: owner->ledger,
4715 entry: task_ledgers.phys_footprint,
4716 PAGE_SIZE);
4717 }
4718 }
4719 if (m_object->all_reusable) {
4720 /*
4721 * Wired pages are not counted as "re-usable"
4722 * in "all_reusable" VM objects, so nothing
4723 * to do here.
4724 */
4725 } else if (mem->vmp_reusable) {
4726 /*
4727 * This page is not "re-usable" when it's
4728 * wired, so adjust its state and the
4729 * accounting.
4730 */
4731 vm_page_lockconvert_queues();
4732 vm_object_reuse_pages(object: m_object,
4733 start_offset: mem->vmp_offset,
4734 end_offset: mem->vmp_offset + PAGE_SIZE_64,
4735 FALSE);
4736 }
4737 }
4738 assert(!mem->vmp_reusable);
4739
4740 if (!mem->vmp_private && !mem->vmp_fictitious && !mem->vmp_gobbled) {
4741 vm_page_wire_count++;
4742 }
4743 if (mem->vmp_gobbled) {
4744 vm_page_gobble_count--;
4745 }
4746 mem->vmp_gobbled = FALSE;
4747
4748 if (check_memorystatus == TRUE) {
4749 VM_CHECK_MEMORYSTATUS;
4750 }
4751 }
4752 assert(!mem->vmp_gobbled);
4753 assert(mem->vmp_q_state == VM_PAGE_IS_WIRED);
4754 mem->vmp_wire_count++;
4755 if (__improbable(mem->vmp_wire_count == 0)) {
4756 panic("vm_page_wire(%p): wire_count overflow", mem);
4757 }
4758 VM_PAGE_CHECK(mem);
4759}
4760
4761/*
4762 * vm_page_unwire:
4763 *
4764 * Release one wiring of this page, potentially
4765 * enabling it to be paged again.
4766 *
4767 * The page's object and the page queues must be locked.
4768 */
4769void
4770vm_page_unwire(
4771 vm_page_t mem,
4772 boolean_t queueit)
4773{
4774 vm_object_t m_object;
4775
4776 m_object = VM_PAGE_OBJECT(mem);
4777
4778// dbgLog(current_thread(), mem->vmp_offset, m_object, 0); /* (TEST/DEBUG) */
4779
4780 VM_PAGE_CHECK(mem);
4781 assert(VM_PAGE_WIRED(mem));
4782 assert(mem->vmp_wire_count > 0);
4783 assert(!mem->vmp_gobbled);
4784 assert(m_object != VM_OBJECT_NULL);
4785 vm_object_lock_assert_exclusive(m_object);
4786 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4787 if (--mem->vmp_wire_count == 0) {
4788 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
4789
4790 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
4791 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
4792 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
4793 if (!mem->vmp_private && !mem->vmp_fictitious) {
4794 vm_page_wire_count--;
4795 }
4796
4797 assert(m_object->resident_page_count >=
4798 m_object->wired_page_count);
4799 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
4800 OSAddAtomic(+1, &vm_page_purgeable_count);
4801 assert(vm_page_purgeable_wired_count > 0);
4802 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
4803 }
4804 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
4805 m_object->purgable == VM_PURGABLE_EMPTY) &&
4806 m_object->vo_owner != TASK_NULL) {
4807 task_t owner;
4808 int ledger_idx_volatile;
4809 int ledger_idx_nonvolatile;
4810 int ledger_idx_volatile_compressed;
4811 int ledger_idx_nonvolatile_compressed;
4812 boolean_t do_footprint;
4813
4814 owner = VM_OBJECT_OWNER(m_object);
4815 vm_object_ledger_tag_ledgers(
4816 object: m_object,
4817 ledger_idx_volatile: &ledger_idx_volatile,
4818 ledger_idx_nonvolatile: &ledger_idx_nonvolatile,
4819 ledger_idx_volatile_compressed: &ledger_idx_volatile_compressed,
4820 ledger_idx_nonvolatile_compressed: &ledger_idx_nonvolatile_compressed,
4821 do_footprint: &do_footprint);
4822 /* more volatile bytes */
4823 ledger_credit(ledger: owner->ledger,
4824 entry: ledger_idx_volatile,
4825 PAGE_SIZE);
4826 /* less not-quite-volatile bytes */
4827 ledger_debit(ledger: owner->ledger,
4828 entry: ledger_idx_nonvolatile,
4829 PAGE_SIZE);
4830 if (do_footprint) {
4831 /* less footprint */
4832 ledger_debit(ledger: owner->ledger,
4833 entry: task_ledgers.phys_footprint,
4834 PAGE_SIZE);
4835 }
4836 }
4837 assert(!is_kernel_object(m_object));
4838 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
4839
4840 if (queueit == TRUE) {
4841 if (m_object->purgable == VM_PURGABLE_EMPTY) {
4842 vm_page_deactivate(page: mem);
4843 } else {
4844 vm_page_activate(page: mem);
4845 }
4846 }
4847
4848 VM_CHECK_MEMORYSTATUS;
4849 }
4850 VM_PAGE_CHECK(mem);
4851}
4852
4853/*
4854 * vm_page_deactivate:
4855 *
4856 * Returns the given page to the inactive list,
4857 * indicating that no physical maps have access
4858 * to this page. [Used by the physical mapping system.]
4859 *
4860 * The page queues must be locked.
4861 */
4862void
4863vm_page_deactivate(
4864 vm_page_t m)
4865{
4866 vm_page_deactivate_internal(page: m, TRUE);
4867}
4868
4869
4870void
4871vm_page_deactivate_internal(
4872 vm_page_t m,
4873 boolean_t clear_hw_reference)
4874{
4875 vm_object_t m_object;
4876
4877 m_object = VM_PAGE_OBJECT(m);
4878
4879 VM_PAGE_CHECK(m);
4880 assert(!is_kernel_object(m_object));
4881 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4882
4883// dbgLog(VM_PAGE_GET_PHYS_PAGE(m), vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
4884 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4885 /*
4886 * This page is no longer very interesting. If it was
4887 * interesting (active or inactive/referenced), then we
4888 * clear the reference bit and (re)enter it in the
4889 * inactive queue. Note wired pages should not have
4890 * their reference bit cleared.
4891 */
4892 assert( !(m->vmp_absent && !m->vmp_unusual));
4893
4894 if (m->vmp_gobbled) { /* can this happen? */
4895 assert( !VM_PAGE_WIRED(m));
4896
4897 if (!m->vmp_private && !m->vmp_fictitious) {
4898 vm_page_wire_count--;
4899 }
4900 vm_page_gobble_count--;
4901 m->vmp_gobbled = FALSE;
4902 }
4903 /*
4904 * if this page is currently on the pageout queue, we can't do the
4905 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4906 * and we can't remove it manually since we would need the object lock
4907 * (which is not required here) to decrement the activity_in_progress
4908 * reference which is held on the object while the page is in the pageout queue...
4909 * just let the normal laundry processing proceed
4910 */
4911 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4912 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4913 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
4914 VM_PAGE_WIRED(m)) {
4915 return;
4916 }
4917 if (!m->vmp_absent && clear_hw_reference == TRUE) {
4918 vm_page_lockconvert_queues();
4919 pmap_clear_reference(pn: VM_PAGE_GET_PHYS_PAGE(m));
4920 }
4921
4922 m->vmp_reference = FALSE;
4923 m->vmp_no_cache = FALSE;
4924
4925 if (!VM_PAGE_INACTIVE(m)) {
4926 vm_page_queues_remove(mem: m, FALSE);
4927
4928 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4929 m->vmp_dirty && m_object->internal &&
4930 (m_object->purgable == VM_PURGABLE_DENY ||
4931 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4932 m_object->purgable == VM_PURGABLE_VOLATILE)) {
4933 vm_page_check_pageable_safe(page: m);
4934 vm_page_queue_enter(&vm_page_queue_throttled, m, vmp_pageq);
4935 m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
4936 vm_page_throttled_count++;
4937 } else {
4938 if (m_object->named && m_object->ref_count == 1) {
4939 vm_page_speculate(page: m, FALSE);
4940#if DEVELOPMENT || DEBUG
4941 vm_page_speculative_recreated++;
4942#endif
4943 } else {
4944 vm_page_enqueue_inactive(mem: m, FALSE);
4945 }
4946 }
4947 }
4948}
4949
4950/*
4951 * vm_page_enqueue_cleaned
4952 *
4953 * Put the page on the cleaned queue, mark it cleaned, etc.
4954 * Being on the cleaned queue (and having m->clean_queue set)
4955 * does ** NOT ** guarantee that the page is clean!
4956 *
4957 * Call with the queues lock held.
4958 */
4959
4960void
4961vm_page_enqueue_cleaned(vm_page_t m)
4962{
4963 vm_object_t m_object;
4964
4965 m_object = VM_PAGE_OBJECT(m);
4966
4967 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4968 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4969 assert( !(m->vmp_absent && !m->vmp_unusual));
4970
4971 if (VM_PAGE_WIRED(m)) {
4972 return;
4973 }
4974
4975 if (m->vmp_gobbled) {
4976 if (!m->vmp_private && !m->vmp_fictitious) {
4977 vm_page_wire_count--;
4978 }
4979 vm_page_gobble_count--;
4980 m->vmp_gobbled = FALSE;
4981 }
4982 /*
4983 * if this page is currently on the pageout queue, we can't do the
4984 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4985 * and we can't remove it manually since we would need the object lock
4986 * (which is not required here) to decrement the activity_in_progress
4987 * reference which is held on the object while the page is in the pageout queue...
4988 * just let the normal laundry processing proceed
4989 */
4990 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4991 (m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) ||
4992 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
4993 return;
4994 }
4995 vm_page_queues_remove(mem: m, FALSE);
4996
4997 vm_page_check_pageable_safe(page: m);
4998 vm_page_queue_enter(&vm_page_queue_cleaned, m, vmp_pageq);
4999 m->vmp_q_state = VM_PAGE_ON_INACTIVE_CLEANED_Q;
5000 vm_page_cleaned_count++;
5001
5002 vm_page_inactive_count++;
5003 if (m_object->internal) {
5004 vm_page_pageable_internal_count++;
5005 } else {
5006 vm_page_pageable_external_count++;
5007 }
5008 vm_page_add_to_specialq(mem: m, TRUE);
5009 VM_PAGEOUT_DEBUG(vm_pageout_enqueued_cleaned, 1);
5010}
5011
5012/*
5013 * vm_page_activate:
5014 *
5015 * Put the specified page on the active list (if appropriate).
5016 *
5017 * The page queues must be locked.
5018 */
5019
5020void
5021vm_page_activate(
5022 vm_page_t m)
5023{
5024 vm_object_t m_object;
5025
5026 m_object = VM_PAGE_OBJECT(m);
5027
5028 VM_PAGE_CHECK(m);
5029#ifdef FIXME_4778297
5030 assert(!is_kernel_object(m_object));
5031#endif
5032 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
5033 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
5034 assert( !(m->vmp_absent && !m->vmp_unusual));
5035
5036 if (m->vmp_gobbled) {
5037 assert( !VM_PAGE_WIRED(m));
5038 if (!m->vmp_private && !m->vmp_fictitious) {
5039 vm_page_wire_count--;
5040 }
5041 vm_page_gobble_count--;
5042 m->vmp_gobbled = FALSE;
5043 }
5044 /*
5045 * if this page is currently on the pageout queue, we can't do the
5046 * vm_page_queues_remove (which doesn't handle the pageout queue case)
5047 * and we can't remove it manually since we would need the object lock
5048 * (which is not required here) to decrement the activity_in_progress
5049 * reference which is held on the object while the page is in the pageout queue...
5050 * just let the normal laundry processing proceed
5051 */
5052 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
5053 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
5054 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
5055 return;
5056 }
5057
5058#if DEBUG
5059 if (m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q) {
5060 panic("vm_page_activate: already active");
5061 }
5062#endif
5063
5064 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
5065 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
5066 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
5067 }
5068
5069 /*
5070 * A freshly activated page should be promoted in the donation queue.
5071 * So we remove it here while preserving its hint and we will enqueue
5072 * it again in vm_page_enqueue_active.
5073 */
5074 vm_page_queues_remove(mem: m, remove_from_specialq: ((m->vmp_on_specialq == VM_PAGE_SPECIAL_Q_DONATE) ? TRUE : FALSE));
5075
5076 if (!VM_PAGE_WIRED(m)) {
5077 vm_page_check_pageable_safe(page: m);
5078 if (!VM_DYNAMIC_PAGING_ENABLED() &&
5079 m->vmp_dirty && m_object->internal &&
5080 (m_object->purgable == VM_PURGABLE_DENY ||
5081 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
5082 m_object->purgable == VM_PURGABLE_VOLATILE)) {
5083 vm_page_queue_enter(&vm_page_queue_throttled, m, vmp_pageq);
5084 m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
5085 vm_page_throttled_count++;
5086 } else {
5087#if CONFIG_SECLUDED_MEMORY
5088 if (secluded_for_filecache &&
5089 vm_page_secluded_target != 0 &&
5090 num_tasks_can_use_secluded_mem == 0 &&
5091 m_object->eligible_for_secluded &&
5092 !m->vmp_realtime) {
5093 vm_page_queue_enter(&vm_page_queue_secluded, m, vmp_pageq);
5094 m->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
5095 vm_page_secluded_count++;
5096 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
5097 vm_page_secluded_count_inuse++;
5098 assert(!m_object->internal);
5099// vm_page_pageable_external_count++;
5100 } else
5101#endif /* CONFIG_SECLUDED_MEMORY */
5102 vm_page_enqueue_active(mem: m, FALSE);
5103 }
5104 m->vmp_reference = TRUE;
5105 m->vmp_no_cache = FALSE;
5106 }
5107 VM_PAGE_CHECK(m);
5108}
5109
5110
5111/*
5112 * vm_page_speculate:
5113 *
5114 * Put the specified page on the speculative list (if appropriate).
5115 *
5116 * The page queues must be locked.
5117 */
5118void
5119vm_page_speculate(
5120 vm_page_t m,
5121 boolean_t new)
5122{
5123 struct vm_speculative_age_q *aq;
5124 vm_object_t m_object;
5125
5126 m_object = VM_PAGE_OBJECT(m);
5127
5128 VM_PAGE_CHECK(m);
5129 vm_page_check_pageable_safe(page: m);
5130
5131 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
5132 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
5133 assert( !(m->vmp_absent && !m->vmp_unusual));
5134 assert(m_object->internal == FALSE);
5135
5136 /*
5137 * if this page is currently on the pageout queue, we can't do the
5138 * vm_page_queues_remove (which doesn't handle the pageout queue case)
5139 * and we can't remove it manually since we would need the object lock
5140 * (which is not required here) to decrement the activity_in_progress
5141 * reference which is held on the object while the page is in the pageout queue...
5142 * just let the normal laundry processing proceed
5143 */
5144 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
5145 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
5146 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
5147 return;
5148 }
5149
5150 vm_page_queues_remove(mem: m, FALSE);
5151
5152 if (!VM_PAGE_WIRED(m)) {
5153 mach_timespec_t ts;
5154 clock_sec_t sec;
5155 clock_nsec_t nsec;
5156
5157 clock_get_system_nanotime(secs: &sec, nanosecs: &nsec);
5158 ts.tv_sec = (unsigned int) sec;
5159 ts.tv_nsec = nsec;
5160
5161 if (vm_page_speculative_count == 0) {
5162 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
5163 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
5164
5165 aq = &vm_page_queue_speculative[speculative_age_index];
5166
5167 /*
5168 * set the timer to begin a new group
5169 */
5170 aq->age_ts.tv_sec = vm_pageout_state.vm_page_speculative_q_age_ms / 1000;
5171 aq->age_ts.tv_nsec = (vm_pageout_state.vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
5172
5173 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
5174 } else {
5175 aq = &vm_page_queue_speculative[speculative_age_index];
5176
5177 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
5178 speculative_age_index++;
5179
5180 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
5181 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
5182 }
5183 if (speculative_age_index == speculative_steal_index) {
5184 speculative_steal_index = speculative_age_index + 1;
5185
5186 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
5187 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
5188 }
5189 }
5190 aq = &vm_page_queue_speculative[speculative_age_index];
5191
5192 if (!vm_page_queue_empty(&aq->age_q)) {
5193 vm_page_speculate_ageit(aq);
5194 }
5195
5196 aq->age_ts.tv_sec = vm_pageout_state.vm_page_speculative_q_age_ms / 1000;
5197 aq->age_ts.tv_nsec = (vm_pageout_state.vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
5198
5199 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
5200 }
5201 }
5202 vm_page_enqueue_tail(que: &aq->age_q, elt: &m->vmp_pageq);
5203 m->vmp_q_state = VM_PAGE_ON_SPECULATIVE_Q;
5204 vm_page_speculative_count++;
5205 vm_page_pageable_external_count++;
5206
5207 if (new == TRUE) {
5208 vm_object_lock_assert_exclusive(m_object);
5209
5210 m_object->pages_created++;
5211#if DEVELOPMENT || DEBUG
5212 vm_page_speculative_created++;
5213#endif
5214 }
5215 }
5216 VM_PAGE_CHECK(m);
5217}
5218
5219
5220/*
5221 * move pages from the specified aging bin to
5222 * the speculative bin that pageout_scan claims from
5223 *
5224 * The page queues must be locked.
5225 */
5226void
5227vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
5228{
5229 struct vm_speculative_age_q *sq;
5230 vm_page_t t;
5231
5232 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
5233
5234 if (vm_page_queue_empty(&sq->age_q)) {
5235 sq->age_q.next = aq->age_q.next;
5236 sq->age_q.prev = aq->age_q.prev;
5237
5238 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.next);
5239 t->vmp_pageq.prev = VM_PAGE_PACK_PTR(&sq->age_q);
5240
5241 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
5242 t->vmp_pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
5243 } else {
5244 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
5245 t->vmp_pageq.next = aq->age_q.next;
5246
5247 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.next);
5248 t->vmp_pageq.prev = sq->age_q.prev;
5249
5250 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.prev);
5251 t->vmp_pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
5252
5253 sq->age_q.prev = aq->age_q.prev;
5254 }
5255 vm_page_queue_init(&aq->age_q);
5256}
5257
5258
5259void
5260vm_page_lru(
5261 vm_page_t m)
5262{
5263 VM_PAGE_CHECK(m);
5264 assert(!is_kernel_object(VM_PAGE_OBJECT(m)));
5265 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
5266
5267 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
5268
5269 if (m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q) {
5270 /*
5271 * we don't need to do all the other work that
5272 * vm_page_queues_remove and vm_page_enqueue_inactive
5273 * bring along for the ride
5274 */
5275 assert(!m->vmp_laundry);
5276 assert(!m->vmp_private);
5277
5278 m->vmp_no_cache = FALSE;
5279
5280 vm_page_queue_remove(&vm_page_queue_inactive, m, vmp_pageq);
5281 vm_page_queue_enter(&vm_page_queue_inactive, m, vmp_pageq);
5282
5283 return;
5284 }
5285 /*
5286 * if this page is currently on the pageout queue, we can't do the
5287 * vm_page_queues_remove (which doesn't handle the pageout queue case)
5288 * and we can't remove it manually since we would need the object lock
5289 * (which is not required here) to decrement the activity_in_progress
5290 * reference which is held on the object while the page is in the pageout queue...
5291 * just let the normal laundry processing proceed
5292 */
5293 if (m->vmp_laundry || m->vmp_private ||
5294 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
5295 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
5296 VM_PAGE_WIRED(m)) {
5297 return;
5298 }
5299
5300 m->vmp_no_cache = FALSE;
5301
5302 vm_page_queues_remove(mem: m, FALSE);
5303
5304 vm_page_enqueue_inactive(mem: m, FALSE);
5305}
5306
5307
5308void
5309vm_page_reactivate_all_throttled(void)
5310{
5311 vm_page_t first_throttled, last_throttled;
5312 vm_page_t first_active;
5313 vm_page_t m;
5314 int extra_active_count;
5315 int extra_internal_count, extra_external_count;
5316 vm_object_t m_object;
5317
5318 if (!VM_DYNAMIC_PAGING_ENABLED()) {
5319 return;
5320 }
5321
5322 extra_active_count = 0;
5323 extra_internal_count = 0;
5324 extra_external_count = 0;
5325 vm_page_lock_queues();
5326 if (!vm_page_queue_empty(&vm_page_queue_throttled)) {
5327 /*
5328 * Switch "throttled" pages to "active".
5329 */
5330 vm_page_queue_iterate(&vm_page_queue_throttled, m, vmp_pageq) {
5331 VM_PAGE_CHECK(m);
5332 assert(m->vmp_q_state == VM_PAGE_ON_THROTTLED_Q);
5333
5334 m_object = VM_PAGE_OBJECT(m);
5335
5336 extra_active_count++;
5337 if (m_object->internal) {
5338 extra_internal_count++;
5339 } else {
5340 extra_external_count++;
5341 }
5342
5343 m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
5344 VM_PAGE_CHECK(m);
5345 vm_page_add_to_specialq(mem: m, FALSE);
5346 }
5347
5348 /*
5349 * Transfer the entire throttled queue to a regular LRU page queues.
5350 * We insert it at the head of the active queue, so that these pages
5351 * get re-evaluated by the LRU algorithm first, since they've been
5352 * completely out of it until now.
5353 */
5354 first_throttled = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
5355 last_throttled = (vm_page_t) vm_page_queue_last(&vm_page_queue_throttled);
5356 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
5357 if (vm_page_queue_empty(&vm_page_queue_active)) {
5358 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
5359 } else {
5360 first_active->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
5361 }
5362 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_throttled);
5363 first_throttled->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
5364 last_throttled->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
5365
5366#if DEBUG
5367 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
5368#endif
5369 vm_page_queue_init(&vm_page_queue_throttled);
5370 /*
5371 * Adjust the global page counts.
5372 */
5373 vm_page_active_count += extra_active_count;
5374 vm_page_pageable_internal_count += extra_internal_count;
5375 vm_page_pageable_external_count += extra_external_count;
5376 vm_page_throttled_count = 0;
5377 }
5378 assert(vm_page_throttled_count == 0);
5379 assert(vm_page_queue_empty(&vm_page_queue_throttled));
5380 vm_page_unlock_queues();
5381}
5382
5383
5384/*
5385 * move pages from the indicated local queue to the global active queue
5386 * its ok to fail if we're below the hard limit and force == FALSE
5387 * the nolocks == TRUE case is to allow this function to be run on
5388 * the hibernate path
5389 */
5390
5391void
5392vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
5393{
5394 struct vpl *lq;
5395 vm_page_t first_local, last_local;
5396 vm_page_t first_active;
5397 vm_page_t m;
5398 uint32_t count = 0;
5399
5400 if (vm_page_local_q == NULL) {
5401 return;
5402 }
5403
5404 lq = zpercpu_get_cpu(vm_page_local_q, lid);
5405
5406 if (nolocks == FALSE) {
5407 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
5408 if (!vm_page_trylockspin_queues()) {
5409 return;
5410 }
5411 } else {
5412 vm_page_lockspin_queues();
5413 }
5414
5415 VPL_LOCK(&lq->vpl_lock);
5416 }
5417 if (lq->vpl_count) {
5418 /*
5419 * Switch "local" pages to "active".
5420 */
5421 assert(!vm_page_queue_empty(&lq->vpl_queue));
5422
5423 vm_page_queue_iterate(&lq->vpl_queue, m, vmp_pageq) {
5424 VM_PAGE_CHECK(m);
5425 vm_page_check_pageable_safe(page: m);
5426 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q);
5427 assert(!m->vmp_fictitious);
5428
5429 if (m->vmp_local_id != lid) {
5430 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
5431 }
5432
5433 m->vmp_local_id = 0;
5434 m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
5435 VM_PAGE_CHECK(m);
5436 vm_page_add_to_specialq(mem: m, FALSE);
5437 count++;
5438 }
5439 if (count != lq->vpl_count) {
5440 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d", count, lq->vpl_count);
5441 }
5442
5443 /*
5444 * Transfer the entire local queue to a regular LRU page queues.
5445 */
5446 first_local = (vm_page_t) vm_page_queue_first(&lq->vpl_queue);
5447 last_local = (vm_page_t) vm_page_queue_last(&lq->vpl_queue);
5448 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
5449
5450 if (vm_page_queue_empty(&vm_page_queue_active)) {
5451 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
5452 } else {
5453 first_active->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
5454 }
5455 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
5456 first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
5457 last_local->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
5458
5459 vm_page_queue_init(&lq->vpl_queue);
5460 /*
5461 * Adjust the global page counts.
5462 */
5463 vm_page_active_count += lq->vpl_count;
5464 vm_page_pageable_internal_count += lq->vpl_internal_count;
5465 vm_page_pageable_external_count += lq->vpl_external_count;
5466 lq->vpl_count = 0;
5467 lq->vpl_internal_count = 0;
5468 lq->vpl_external_count = 0;
5469 }
5470 assert(vm_page_queue_empty(&lq->vpl_queue));
5471
5472 if (nolocks == FALSE) {
5473 VPL_UNLOCK(&lq->vpl_lock);
5474
5475 vm_page_balance_inactive(max_to_move: count / 4);
5476 vm_page_unlock_queues();
5477 }
5478}
5479
5480/*
5481 * vm_page_part_zero_fill:
5482 *
5483 * Zero-fill a part of the page.
5484 */
5485#define PMAP_ZERO_PART_PAGE_IMPLEMENTED
5486void
5487vm_page_part_zero_fill(
5488 vm_page_t m,
5489 vm_offset_t m_pa,
5490 vm_size_t len)
5491{
5492#if 0
5493 /*
5494 * we don't hold the page queue lock
5495 * so this check isn't safe to make
5496 */
5497 VM_PAGE_CHECK(m);
5498#endif
5499
5500#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
5501 pmap_zero_part_page(pn: VM_PAGE_GET_PHYS_PAGE(m), offset: m_pa, len);
5502#else
5503 vm_page_t tmp;
5504 while (1) {
5505 tmp = vm_page_grab();
5506 if (tmp == VM_PAGE_NULL) {
5507 vm_page_wait(THREAD_UNINT);
5508 continue;
5509 }
5510 break;
5511 }
5512 vm_page_zero_fill(tmp);
5513 if (m_pa != 0) {
5514 vm_page_part_copy(m, 0, tmp, 0, m_pa);
5515 }
5516 if ((m_pa + len) < PAGE_SIZE) {
5517 vm_page_part_copy(m, m_pa + len, tmp,
5518 m_pa + len, PAGE_SIZE - (m_pa + len));
5519 }
5520 vm_page_copy(tmp, m);
5521 VM_PAGE_FREE(tmp);
5522#endif
5523}
5524
5525/*
5526 * vm_page_zero_fill:
5527 *
5528 * Zero-fill the specified page.
5529 */
5530void
5531vm_page_zero_fill(
5532 vm_page_t m)
5533{
5534#if 0
5535 /*
5536 * we don't hold the page queue lock
5537 * so this check isn't safe to make
5538 */
5539 VM_PAGE_CHECK(m);
5540#endif
5541
5542// dbgTrace(0xAEAEAEAE, VM_PAGE_GET_PHYS_PAGE(m), 0); /* (BRINGUP) */
5543 pmap_zero_page(pn: VM_PAGE_GET_PHYS_PAGE(m));
5544}
5545
5546/*
5547 * vm_page_part_copy:
5548 *
5549 * copy part of one page to another
5550 */
5551
5552void
5553vm_page_part_copy(
5554 vm_page_t src_m,
5555 vm_offset_t src_pa,
5556 vm_page_t dst_m,
5557 vm_offset_t dst_pa,
5558 vm_size_t len)
5559{
5560#if 0
5561 /*
5562 * we don't hold the page queue lock
5563 * so this check isn't safe to make
5564 */
5565 VM_PAGE_CHECK(src_m);
5566 VM_PAGE_CHECK(dst_m);
5567#endif
5568 pmap_copy_part_page(src: VM_PAGE_GET_PHYS_PAGE(m: src_m), src_offset: src_pa,
5569 dst: VM_PAGE_GET_PHYS_PAGE(m: dst_m), dst_offset: dst_pa, len);
5570}
5571
5572/*
5573 * vm_page_copy:
5574 *
5575 * Copy one page to another
5576 */
5577
5578int vm_page_copy_cs_validations = 0;
5579int vm_page_copy_cs_tainted = 0;
5580
5581void
5582vm_page_copy(
5583 vm_page_t src_m,
5584 vm_page_t dest_m)
5585{
5586 vm_object_t src_m_object;
5587
5588 src_m_object = VM_PAGE_OBJECT(src_m);
5589
5590#if 0
5591 /*
5592 * we don't hold the page queue lock
5593 * so this check isn't safe to make
5594 */
5595 VM_PAGE_CHECK(src_m);
5596 VM_PAGE_CHECK(dest_m);
5597#endif
5598 vm_object_lock_assert_held(src_m_object);
5599
5600 if (src_m_object != VM_OBJECT_NULL &&
5601 src_m_object->code_signed) {
5602 /*
5603 * We're copying a page from a code-signed object.
5604 * Whoever ends up mapping the copy page might care about
5605 * the original page's integrity, so let's validate the
5606 * source page now.
5607 */
5608 vm_page_copy_cs_validations++;
5609 vm_page_validate_cs(page: src_m, PAGE_SIZE, fault_phys_offset: 0);
5610#if DEVELOPMENT || DEBUG
5611 DTRACE_VM4(codesigned_copy,
5612 vm_object_t, src_m_object,
5613 vm_object_offset_t, src_m->vmp_offset,
5614 int, src_m->vmp_cs_validated,
5615 int, src_m->vmp_cs_tainted);
5616#endif /* DEVELOPMENT || DEBUG */
5617 }
5618
5619 /*
5620 * Propagate the cs_tainted bit to the copy page. Do not propagate
5621 * the cs_validated bit.
5622 */
5623 dest_m->vmp_cs_tainted = src_m->vmp_cs_tainted;
5624 dest_m->vmp_cs_nx = src_m->vmp_cs_nx;
5625 if (dest_m->vmp_cs_tainted) {
5626 vm_page_copy_cs_tainted++;
5627 }
5628 dest_m->vmp_error = VMP_ERROR_GET(src_m); /* sliding src_m might have failed... */
5629 pmap_copy_page(src: VM_PAGE_GET_PHYS_PAGE(m: src_m), dest: VM_PAGE_GET_PHYS_PAGE(m: dest_m));
5630}
5631
5632#if MACH_ASSERT
5633static void
5634_vm_page_print(
5635 vm_page_t p)
5636{
5637 printf("vm_page %p: \n", p);
5638 printf(" pageq: next=%p prev=%p\n",
5639 (vm_page_t)VM_PAGE_UNPACK_PTR(p->vmp_pageq.next),
5640 (vm_page_t)VM_PAGE_UNPACK_PTR(p->vmp_pageq.prev));
5641 printf(" listq: next=%p prev=%p\n",
5642 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_listq.next)),
5643 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_listq.prev)));
5644 printf(" next=%p\n", (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_next_m)));
5645 printf(" object=%p offset=0x%llx\n", VM_PAGE_OBJECT(p), p->vmp_offset);
5646 printf(" wire_count=%u\n", p->vmp_wire_count);
5647 printf(" q_state=%u\n", p->vmp_q_state);
5648
5649 printf(" %slaundry, %sref, %sgobbled, %sprivate\n",
5650 (p->vmp_laundry ? "" : "!"),
5651 (p->vmp_reference ? "" : "!"),
5652 (p->vmp_gobbled ? "" : "!"),
5653 (p->vmp_private ? "" : "!"));
5654 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
5655 (p->vmp_busy ? "" : "!"),
5656 (p->vmp_wanted ? "" : "!"),
5657 (p->vmp_tabled ? "" : "!"),
5658 (p->vmp_fictitious ? "" : "!"),
5659 (p->vmp_pmapped ? "" : "!"),
5660 (p->vmp_wpmapped ? "" : "!"));
5661 printf(" %sfree_when_done, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
5662 (p->vmp_free_when_done ? "" : "!"),
5663 (p->vmp_absent ? "" : "!"),
5664 (VMP_ERROR_GET(p) ? "" : "!"),
5665 (p->vmp_dirty ? "" : "!"),
5666 (p->vmp_cleaning ? "" : "!"),
5667 (p->vmp_precious ? "" : "!"),
5668 (p->vmp_clustered ? "" : "!"));
5669 printf(" %soverwriting, %srestart, %sunusual\n",
5670 (p->vmp_overwriting ? "" : "!"),
5671 (p->vmp_restart ? "" : "!"),
5672 (p->vmp_unusual ? "" : "!"));
5673 printf(" cs_validated=%d, cs_tainted=%d, cs_nx=%d, %sno_cache\n",
5674 p->vmp_cs_validated,
5675 p->vmp_cs_tainted,
5676 p->vmp_cs_nx,
5677 (p->vmp_no_cache ? "" : "!"));
5678
5679 printf("phys_page=0x%x\n", VM_PAGE_GET_PHYS_PAGE(p));
5680}
5681
5682/*
5683 * Check that the list of pages is ordered by
5684 * ascending physical address and has no holes.
5685 */
5686static int
5687vm_page_verify_contiguous(
5688 vm_page_t pages,
5689 unsigned int npages)
5690{
5691 vm_page_t m;
5692 unsigned int page_count;
5693 vm_offset_t prev_addr;
5694
5695 prev_addr = VM_PAGE_GET_PHYS_PAGE(pages);
5696 page_count = 1;
5697 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
5698 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5699 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
5700 m, (long)prev_addr, VM_PAGE_GET_PHYS_PAGE(m));
5701 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
5702 panic("vm_page_verify_contiguous: not contiguous!");
5703 }
5704 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5705 ++page_count;
5706 }
5707 if (page_count != npages) {
5708 printf("pages %p actual count 0x%x but requested 0x%x\n",
5709 pages, page_count, npages);
5710 panic("vm_page_verify_contiguous: count error");
5711 }
5712 return 1;
5713}
5714
5715
5716/*
5717 * Check the free lists for proper length etc.
5718 */
5719static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
5720static unsigned int
5721vm_page_verify_free_list(
5722 vm_page_queue_head_t *vm_page_queue,
5723 unsigned int color,
5724 vm_page_t look_for_page,
5725 boolean_t expect_page)
5726{
5727 unsigned int npages;
5728 vm_page_t m;
5729 vm_page_t prev_m;
5730 boolean_t found_page;
5731
5732 if (!vm_page_verify_this_free_list_enabled) {
5733 return 0;
5734 }
5735
5736 found_page = FALSE;
5737 npages = 0;
5738 prev_m = (vm_page_t)((uintptr_t)vm_page_queue);
5739
5740 vm_page_queue_iterate(vm_page_queue, m, vmp_pageq) {
5741 if (m == look_for_page) {
5742 found_page = TRUE;
5743 }
5744 if ((vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.prev) != prev_m) {
5745 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p",
5746 color, npages, m, (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.prev), prev_m);
5747 }
5748 if (!m->vmp_busy) {
5749 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy",
5750 color, npages, m);
5751 }
5752 if (color != (unsigned int) -1) {
5753 if (VM_PAGE_GET_COLOR(m) != color) {
5754 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u",
5755 color, npages, m, VM_PAGE_GET_COLOR(m), color);
5756 }
5757 if (m->vmp_q_state != VM_PAGE_ON_FREE_Q) {
5758 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p - expecting q_state == VM_PAGE_ON_FREE_Q, found %d",
5759 color, npages, m, m->vmp_q_state);
5760 }
5761 } else {
5762 if (m->vmp_q_state != VM_PAGE_ON_FREE_LOCAL_Q) {
5763 panic("vm_page_verify_free_list(npages=%u): local page %p - expecting q_state == VM_PAGE_ON_FREE_LOCAL_Q, found %d",
5764 npages, m, m->vmp_q_state);
5765 }
5766 }
5767 ++npages;
5768 prev_m = m;
5769 }
5770 if (look_for_page != VM_PAGE_NULL) {
5771 unsigned int other_color;
5772
5773 if (expect_page && !found_page) {
5774 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
5775 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
5776 _vm_page_print(look_for_page);
5777 for (other_color = 0;
5778 other_color < vm_colors;
5779 other_color++) {
5780 if (other_color == color) {
5781 continue;
5782 }
5783 vm_page_verify_free_list(&vm_page_queue_free[other_color].qhead,
5784 other_color, look_for_page, FALSE);
5785 }
5786 if (color == (unsigned int) -1) {
5787 vm_page_verify_free_list(&vm_lopage_queue_free,
5788 (unsigned int) -1, look_for_page, FALSE);
5789 }
5790 panic("vm_page_verify_free_list(color=%u)", color);
5791 }
5792 if (!expect_page && found_page) {
5793 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
5794 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
5795 }
5796 }
5797 return npages;
5798}
5799
5800static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
5801static void
5802vm_page_verify_free_lists( void )
5803{
5804 unsigned int color, npages, nlopages;
5805 boolean_t toggle = TRUE;
5806
5807 if (!vm_page_verify_all_free_lists_enabled) {
5808 return;
5809 }
5810
5811 npages = 0;
5812
5813 vm_free_page_lock();
5814
5815 if (vm_page_verify_this_free_list_enabled == TRUE) {
5816 /*
5817 * This variable has been set globally for extra checking of
5818 * each free list Q. Since we didn't set it, we don't own it
5819 * and we shouldn't toggle it.
5820 */
5821 toggle = FALSE;
5822 }
5823
5824 if (toggle == TRUE) {
5825 vm_page_verify_this_free_list_enabled = TRUE;
5826 }
5827
5828 for (color = 0; color < vm_colors; color++) {
5829 npages += vm_page_verify_free_list(&vm_page_queue_free[color].qhead,
5830 color, VM_PAGE_NULL, FALSE);
5831 }
5832 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
5833 (unsigned int) -1,
5834 VM_PAGE_NULL, FALSE);
5835 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count) {
5836 panic("vm_page_verify_free_lists: "
5837 "npages %u free_count %d nlopages %u lo_free_count %u",
5838 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
5839 }
5840
5841 if (toggle == TRUE) {
5842 vm_page_verify_this_free_list_enabled = FALSE;
5843 }
5844
5845 vm_free_page_unlock();
5846}
5847
5848#endif /* MACH_ASSERT */
5849
5850/*
5851 * wrapper for pmap_enter()
5852 */
5853kern_return_t
5854pmap_enter_check(
5855 pmap_t pmap,
5856 vm_map_address_t virtual_address,
5857 vm_page_t page,
5858 vm_prot_t protection,
5859 vm_prot_t fault_type,
5860 unsigned int flags,
5861 boolean_t wired)
5862{
5863 int options = 0;
5864 vm_object_t obj;
5865
5866 if (VMP_ERROR_GET(page)) {
5867 return KERN_MEMORY_FAILURE;
5868 }
5869 obj = VM_PAGE_OBJECT(page);
5870 if (obj->internal) {
5871 options |= PMAP_OPTIONS_INTERNAL;
5872 }
5873 if (page->vmp_reusable || obj->all_reusable) {
5874 options |= PMAP_OPTIONS_REUSABLE;
5875 }
5876 return pmap_enter_options(pmap,
5877 v: virtual_address,
5878 pn: VM_PAGE_GET_PHYS_PAGE(m: page),
5879 prot: protection,
5880 fault_type,
5881 flags,
5882 wired,
5883 options,
5884 NULL,
5885 mapping_type: PMAP_MAPPING_TYPE_INFER);
5886}
5887
5888
5889extern boolean_t(*volatile consider_buffer_cache_collect)(int);
5890
5891/*
5892 * CONTIGUOUS PAGE ALLOCATION
5893 *
5894 * Find a region large enough to contain at least n pages
5895 * of contiguous physical memory.
5896 *
5897 * This is done by traversing the vm_page_t array in a linear fashion
5898 * we assume that the vm_page_t array has the avaiable physical pages in an
5899 * ordered, ascending list... this is currently true of all our implementations
5900 * and must remain so... there can be 'holes' in the array... we also can
5901 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
5902 * which use to happen via 'vm_page_convert'... that function was no longer
5903 * being called and was removed...
5904 *
5905 * The basic flow consists of stabilizing some of the interesting state of
5906 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
5907 * sweep at the beginning of the array looking for pages that meet our criterea
5908 * for a 'stealable' page... currently we are pretty conservative... if the page
5909 * meets this criterea and is physically contiguous to the previous page in the 'run'
5910 * we keep developing it. If we hit a page that doesn't fit, we reset our state
5911 * and start to develop a new run... if at this point we've already considered
5912 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
5913 * and mutex_pause (which will yield the processor), to keep the latency low w/r
5914 * to other threads trying to acquire free pages (or move pages from q to q),
5915 * and then continue from the spot we left off... we only make 1 pass through the
5916 * array. Once we have a 'run' that is long enough, we'll go into the loop which
5917 * which steals the pages from the queues they're currently on... pages on the free
5918 * queue can be stolen directly... pages that are on any of the other queues
5919 * must be removed from the object they are tabled on... this requires taking the
5920 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
5921 * or if the state of the page behind the vm_object lock is no longer viable, we'll
5922 * dump the pages we've currently stolen back to the free list, and pick up our
5923 * scan from the point where we aborted the 'current' run.
5924 *
5925 *
5926 * Requirements:
5927 * - neither vm_page_queue nor vm_free_list lock can be held on entry
5928 *
5929 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
5930 *
5931 * Algorithm:
5932 */
5933
5934#define MAX_CONSIDERED_BEFORE_YIELD 1000
5935
5936
5937#define RESET_STATE_OF_RUN() \
5938 MACRO_BEGIN \
5939 prevcontaddr = -2; \
5940 start_pnum = -1; \
5941 free_considered = 0; \
5942 substitute_needed = 0; \
5943 npages = 0; \
5944 MACRO_END
5945
5946/*
5947 * Can we steal in-use (i.e. not free) pages when searching for
5948 * physically-contiguous pages ?
5949 */
5950#define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
5951
5952static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
5953#if DEBUG
5954int vm_page_find_contig_debug = 0;
5955#endif
5956
5957static vm_page_t
5958vm_page_find_contiguous(
5959 unsigned int contig_pages,
5960 ppnum_t max_pnum,
5961 ppnum_t pnum_mask,
5962 boolean_t wire,
5963 int flags)
5964{
5965 vm_page_t m = NULL;
5966 ppnum_t prevcontaddr = 0;
5967 ppnum_t start_pnum = 0;
5968 unsigned int npages = 0, considered = 0, scanned = 0;
5969 unsigned int page_idx = 0, start_idx = 0, last_idx = 0, orig_last_idx = 0;
5970 unsigned int idx_last_contig_page_found = 0;
5971 int free_considered = 0, free_available = 0;
5972 int substitute_needed = 0;
5973 int zone_gc_called = 0;
5974 boolean_t wrapped;
5975 kern_return_t kr;
5976#if DEBUG
5977 clock_sec_t tv_start_sec = 0, tv_end_sec = 0;
5978 clock_usec_t tv_start_usec = 0, tv_end_usec = 0;
5979#endif
5980
5981 int yielded = 0;
5982 int dumped_run = 0;
5983 int stolen_pages = 0;
5984 int compressed_pages = 0;
5985
5986
5987 if (contig_pages == 0) {
5988 return VM_PAGE_NULL;
5989 }
5990
5991full_scan_again:
5992
5993#if MACH_ASSERT
5994 vm_page_verify_free_lists();
5995#endif
5996#if DEBUG
5997 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
5998#endif
5999 PAGE_REPLACEMENT_ALLOWED(TRUE);
6000
6001 /*
6002 * If there are still delayed pages, try to free up some that match.
6003 */
6004 if (__improbable(vm_delayed_count != 0 && contig_pages != 0)) {
6005 vm_free_delayed_pages_contig(npages: contig_pages, max_pnum, pnum_mask);
6006 }
6007
6008 vm_page_lock_queues();
6009 vm_free_page_lock();
6010
6011 RESET_STATE_OF_RUN();
6012
6013 scanned = 0;
6014 considered = 0;
6015 free_available = vm_page_free_count - vm_page_free_reserved;
6016
6017 wrapped = FALSE;
6018
6019 if (flags & KMA_LOMEM) {
6020 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
6021 } else {
6022 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
6023 }
6024
6025 orig_last_idx = idx_last_contig_page_found;
6026 last_idx = orig_last_idx;
6027
6028 for (page_idx = last_idx, start_idx = last_idx;
6029 npages < contig_pages && page_idx < vm_pages_count;
6030 page_idx++) {
6031retry:
6032 if (wrapped &&
6033 npages == 0 &&
6034 page_idx >= orig_last_idx) {
6035 /*
6036 * We're back where we started and we haven't
6037 * found any suitable contiguous range. Let's
6038 * give up.
6039 */
6040 break;
6041 }
6042 scanned++;
6043 m = &vm_pages[page_idx];
6044
6045 assert(!m->vmp_fictitious);
6046 assert(!m->vmp_private);
6047
6048 if (max_pnum && VM_PAGE_GET_PHYS_PAGE(m) > max_pnum) {
6049 /* no more low pages... */
6050 break;
6051 }
6052 if (!npages & ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0)) {
6053 /*
6054 * not aligned
6055 */
6056 RESET_STATE_OF_RUN();
6057 } else if (VM_PAGE_WIRED(m) || m->vmp_gobbled ||
6058 m->vmp_laundry || m->vmp_wanted ||
6059 m->vmp_cleaning || m->vmp_overwriting || m->vmp_free_when_done) {
6060 /*
6061 * page is in a transient state
6062 * or a state we don't want to deal
6063 * with, so don't consider it which
6064 * means starting a new run
6065 */
6066 RESET_STATE_OF_RUN();
6067 } else if ((m->vmp_q_state == VM_PAGE_NOT_ON_Q) ||
6068 (m->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q) ||
6069 (m->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q) ||
6070 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
6071 /*
6072 * page needs to be on one of our queues (other then the pageout or special free queues)
6073 * or it needs to belong to the compressor pool (which is now indicated
6074 * by vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR and falls out
6075 * from the check for VM_PAGE_NOT_ON_Q)
6076 * in order for it to be stable behind the
6077 * locks we hold at this point...
6078 * if not, don't consider it which
6079 * means starting a new run
6080 */
6081 RESET_STATE_OF_RUN();
6082 } else if ((m->vmp_q_state != VM_PAGE_ON_FREE_Q) && (!m->vmp_tabled || m->vmp_busy)) {
6083 /*
6084 * pages on the free list are always 'busy'
6085 * so we couldn't test for 'busy' in the check
6086 * for the transient states... pages that are
6087 * 'free' are never 'tabled', so we also couldn't
6088 * test for 'tabled'. So we check here to make
6089 * sure that a non-free page is not busy and is
6090 * tabled on an object...
6091 * if not, don't consider it which
6092 * means starting a new run
6093 */
6094 RESET_STATE_OF_RUN();
6095 } else {
6096 if (VM_PAGE_GET_PHYS_PAGE(m) != prevcontaddr + 1) {
6097 if ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0) {
6098 RESET_STATE_OF_RUN();
6099 goto did_consider;
6100 } else {
6101 npages = 1;
6102 start_idx = page_idx;
6103 start_pnum = VM_PAGE_GET_PHYS_PAGE(m);
6104 }
6105 } else {
6106 npages++;
6107 }
6108 prevcontaddr = VM_PAGE_GET_PHYS_PAGE(m);
6109
6110 VM_PAGE_CHECK(m);
6111 if (m->vmp_q_state == VM_PAGE_ON_FREE_Q) {
6112 free_considered++;
6113 } else {
6114 /*
6115 * This page is not free.
6116 * If we can't steal used pages,
6117 * we have to give up this run
6118 * and keep looking.
6119 * Otherwise, we might need to
6120 * move the contents of this page
6121 * into a substitute page.
6122 */
6123#if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
6124 if (m->vmp_pmapped || m->vmp_dirty || m->vmp_precious) {
6125 substitute_needed++;
6126 }
6127#else
6128 RESET_STATE_OF_RUN();
6129#endif
6130 }
6131
6132 if ((free_considered + substitute_needed) > free_available) {
6133 /*
6134 * if we let this run continue
6135 * we will end up dropping the vm_page_free_count
6136 * below the reserve limit... we need to abort
6137 * this run, but we can at least re-consider this
6138 * page... thus the jump back to 'retry'
6139 */
6140 RESET_STATE_OF_RUN();
6141
6142 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
6143 considered++;
6144 goto retry;
6145 }
6146 /*
6147 * free_available == 0
6148 * so can't consider any free pages... if
6149 * we went to retry in this case, we'd
6150 * get stuck looking at the same page
6151 * w/o making any forward progress
6152 * we also want to take this path if we've already
6153 * reached our limit that controls the lock latency
6154 */
6155 }
6156 }
6157did_consider:
6158 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
6159 PAGE_REPLACEMENT_ALLOWED(FALSE);
6160
6161 vm_free_page_unlock();
6162 vm_page_unlock_queues();
6163
6164 mutex_pause(0);
6165
6166 PAGE_REPLACEMENT_ALLOWED(TRUE);
6167
6168 vm_page_lock_queues();
6169 vm_free_page_lock();
6170
6171 RESET_STATE_OF_RUN();
6172 /*
6173 * reset our free page limit since we
6174 * dropped the lock protecting the vm_page_free_queue
6175 */
6176 free_available = vm_page_free_count - vm_page_free_reserved;
6177 considered = 0;
6178
6179 yielded++;
6180
6181 goto retry;
6182 }
6183 considered++;
6184 }
6185 m = VM_PAGE_NULL;
6186
6187 if (npages != contig_pages) {
6188 if (!wrapped) {
6189 /*
6190 * We didn't find a contiguous range but we didn't
6191 * start from the very first page.
6192 * Start again from the very first page.
6193 */
6194 RESET_STATE_OF_RUN();
6195 if (flags & KMA_LOMEM) {
6196 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
6197 } else {
6198 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
6199 }
6200 last_idx = 0;
6201 page_idx = last_idx;
6202 wrapped = TRUE;
6203 goto retry;
6204 }
6205 vm_free_page_unlock();
6206 } else {
6207 vm_page_t m1;
6208 vm_page_t m2;
6209 unsigned int cur_idx;
6210 unsigned int tmp_start_idx;
6211 vm_object_t locked_object = VM_OBJECT_NULL;
6212 boolean_t abort_run = FALSE;
6213
6214 assert(page_idx - start_idx == contig_pages);
6215
6216 tmp_start_idx = start_idx;
6217
6218 /*
6219 * first pass through to pull the free pages
6220 * off of the free queue so that in case we
6221 * need substitute pages, we won't grab any
6222 * of the free pages in the run... we'll clear
6223 * the 'free' bit in the 2nd pass, and even in
6224 * an abort_run case, we'll collect all of the
6225 * free pages in this run and return them to the free list
6226 */
6227 while (start_idx < page_idx) {
6228 m1 = &vm_pages[start_idx++];
6229
6230#if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
6231 assert(m1->vmp_q_state == VM_PAGE_ON_FREE_Q);
6232#endif
6233
6234 if (m1->vmp_q_state == VM_PAGE_ON_FREE_Q) {
6235 unsigned int color;
6236
6237 color = VM_PAGE_GET_COLOR(m1);
6238#if MACH_ASSERT
6239 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, m1, TRUE);
6240#endif
6241 vm_page_queue_remove(&vm_page_queue_free[color].qhead, m1, vmp_pageq);
6242
6243 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
6244#if MACH_ASSERT
6245 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, VM_PAGE_NULL, FALSE);
6246#endif
6247 /*
6248 * Clear the "free" bit so that this page
6249 * does not get considered for another
6250 * concurrent physically-contiguous allocation.
6251 */
6252 m1->vmp_q_state = VM_PAGE_NOT_ON_Q;
6253 assert(m1->vmp_busy);
6254
6255 vm_page_free_count--;
6256 }
6257 }
6258 if (flags & KMA_LOMEM) {
6259 vm_page_lomem_find_contiguous_last_idx = page_idx;
6260 } else {
6261 vm_page_find_contiguous_last_idx = page_idx;
6262 }
6263
6264 /*
6265 * we can drop the free queue lock at this point since
6266 * we've pulled any 'free' candidates off of the list
6267 * we need it dropped so that we can do a vm_page_grab
6268 * when substituing for pmapped/dirty pages
6269 */
6270 vm_free_page_unlock();
6271
6272 start_idx = tmp_start_idx;
6273 cur_idx = page_idx - 1;
6274
6275 while (start_idx++ < page_idx) {
6276 /*
6277 * must go through the list from back to front
6278 * so that the page list is created in the
6279 * correct order - low -> high phys addresses
6280 */
6281 m1 = &vm_pages[cur_idx--];
6282
6283 if (m1->vmp_object == 0) {
6284 /*
6285 * page has already been removed from
6286 * the free list in the 1st pass
6287 */
6288 assert(m1->vmp_q_state == VM_PAGE_NOT_ON_Q);
6289 assert(m1->vmp_offset == (vm_object_offset_t) -1);
6290 assert(m1->vmp_busy);
6291 assert(!m1->vmp_wanted);
6292 assert(!m1->vmp_laundry);
6293 } else {
6294 vm_object_t object;
6295 int refmod;
6296 boolean_t disconnected, reusable;
6297
6298 if (abort_run == TRUE) {
6299 continue;
6300 }
6301
6302 assert(m1->vmp_q_state != VM_PAGE_NOT_ON_Q);
6303
6304 object = VM_PAGE_OBJECT(m1);
6305
6306 if (object != locked_object) {
6307 if (locked_object) {
6308 vm_object_unlock(locked_object);
6309 locked_object = VM_OBJECT_NULL;
6310 }
6311 if (vm_object_lock_try(object)) {
6312 locked_object = object;
6313 }
6314 }
6315 if (locked_object == VM_OBJECT_NULL ||
6316 (VM_PAGE_WIRED(m1) || m1->vmp_gobbled ||
6317 m1->vmp_laundry || m1->vmp_wanted ||
6318 m1->vmp_cleaning || m1->vmp_overwriting || m1->vmp_free_when_done || m1->vmp_busy) ||
6319 (m1->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
6320 if (locked_object) {
6321 vm_object_unlock(locked_object);
6322 locked_object = VM_OBJECT_NULL;
6323 }
6324 tmp_start_idx = cur_idx;
6325 abort_run = TRUE;
6326 continue;
6327 }
6328
6329 disconnected = FALSE;
6330 reusable = FALSE;
6331
6332 if ((m1->vmp_reusable ||
6333 object->all_reusable) &&
6334 (m1->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q) &&
6335 !m1->vmp_dirty &&
6336 !m1->vmp_reference) {
6337 /* reusable page... */
6338 refmod = pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m: m1));
6339 disconnected = TRUE;
6340 if (refmod == 0) {
6341 /*
6342 * ... not reused: can steal
6343 * without relocating contents.
6344 */
6345 reusable = TRUE;
6346 }
6347 }
6348
6349 if ((m1->vmp_pmapped &&
6350 !reusable) ||
6351 m1->vmp_dirty ||
6352 m1->vmp_precious) {
6353 vm_object_offset_t offset;
6354
6355 m2 = vm_page_grab_options(VM_PAGE_GRAB_Q_LOCK_HELD);
6356
6357 if (m2 == VM_PAGE_NULL) {
6358 if (locked_object) {
6359 vm_object_unlock(locked_object);
6360 locked_object = VM_OBJECT_NULL;
6361 }
6362 tmp_start_idx = cur_idx;
6363 abort_run = TRUE;
6364 continue;
6365 }
6366 if (!disconnected) {
6367 if (m1->vmp_pmapped) {
6368 refmod = pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m: m1));
6369 } else {
6370 refmod = 0;
6371 }
6372 }
6373
6374 /* copy the page's contents */
6375 pmap_copy_page(src: VM_PAGE_GET_PHYS_PAGE(m: m1), dest: VM_PAGE_GET_PHYS_PAGE(m: m2));
6376 /* copy the page's state */
6377 assert(!VM_PAGE_WIRED(m1));
6378 assert(m1->vmp_q_state != VM_PAGE_ON_FREE_Q);
6379 assert(m1->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q);
6380 assert(!m1->vmp_laundry);
6381 m2->vmp_reference = m1->vmp_reference;
6382 assert(!m1->vmp_gobbled);
6383 assert(!m1->vmp_private);
6384 m2->vmp_no_cache = m1->vmp_no_cache;
6385 m2->vmp_xpmapped = 0;
6386 assert(!m1->vmp_busy);
6387 assert(!m1->vmp_wanted);
6388 assert(!m1->vmp_fictitious);
6389 m2->vmp_pmapped = m1->vmp_pmapped; /* should flush cache ? */
6390 m2->vmp_wpmapped = m1->vmp_wpmapped;
6391 assert(!m1->vmp_free_when_done);
6392 m2->vmp_absent = m1->vmp_absent;
6393 m2->vmp_error = VMP_ERROR_GET(m1);
6394 m2->vmp_dirty = m1->vmp_dirty;
6395 assert(!m1->vmp_cleaning);
6396 m2->vmp_precious = m1->vmp_precious;
6397 m2->vmp_clustered = m1->vmp_clustered;
6398 assert(!m1->vmp_overwriting);
6399 m2->vmp_restart = m1->vmp_restart;
6400 m2->vmp_unusual = m1->vmp_unusual;
6401 m2->vmp_cs_validated = m1->vmp_cs_validated;
6402 m2->vmp_cs_tainted = m1->vmp_cs_tainted;
6403 m2->vmp_cs_nx = m1->vmp_cs_nx;
6404
6405 m2->vmp_realtime = m1->vmp_realtime;
6406 m1->vmp_realtime = false;
6407
6408 /*
6409 * If m1 had really been reusable,
6410 * we would have just stolen it, so
6411 * let's not propagate it's "reusable"
6412 * bit and assert that m2 is not
6413 * marked as "reusable".
6414 */
6415 // m2->vmp_reusable = m1->vmp_reusable;
6416 assert(!m2->vmp_reusable);
6417
6418 // assert(!m1->vmp_lopage);
6419
6420 if (m1->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
6421 m2->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
6422 /*
6423 * We just grabbed m2 up above and so it isn't
6424 * going to be on any special Q as yet and so
6425 * we don't need to 'remove' it from the special
6426 * queues. Just resetting the state should be enough.
6427 */
6428 m2->vmp_on_specialq = VM_PAGE_SPECIAL_Q_EMPTY;
6429 }
6430
6431 /*
6432 * page may need to be flushed if
6433 * it is marshalled into a UPL
6434 * that is going to be used by a device
6435 * that doesn't support coherency
6436 */
6437 m2->vmp_written_by_kernel = TRUE;
6438
6439 /*
6440 * make sure we clear the ref/mod state
6441 * from the pmap layer... else we risk
6442 * inheriting state from the last time
6443 * this page was used...
6444 */
6445 pmap_clear_refmod(pn: VM_PAGE_GET_PHYS_PAGE(m: m2), VM_MEM_MODIFIED | VM_MEM_REFERENCED);
6446
6447 if (refmod & VM_MEM_REFERENCED) {
6448 m2->vmp_reference = TRUE;
6449 }
6450 if (refmod & VM_MEM_MODIFIED) {
6451 SET_PAGE_DIRTY(m2, TRUE);
6452 }
6453 offset = m1->vmp_offset;
6454
6455 /*
6456 * completely cleans up the state
6457 * of the page so that it is ready
6458 * to be put onto the free list, or
6459 * for this purpose it looks like it
6460 * just came off of the free list
6461 */
6462 vm_page_free_prepare(mem: m1);
6463
6464 /*
6465 * now put the substitute page
6466 * on the object
6467 */
6468 vm_page_insert_internal(mem: m2, object: locked_object, offset, VM_KERN_MEMORY_NONE, TRUE, TRUE, FALSE, FALSE, NULL);
6469
6470 if (m2->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
6471 m2->vmp_pmapped = TRUE;
6472 m2->vmp_wpmapped = TRUE;
6473
6474 kr = pmap_enter_check(pmap: kernel_pmap, virtual_address: (vm_map_offset_t)m2->vmp_offset, page: m2,
6475 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, flags: 0, TRUE);
6476
6477 assert(kr == KERN_SUCCESS);
6478
6479 compressed_pages++;
6480 } else {
6481 if (m2->vmp_reference) {
6482 vm_page_activate(m: m2);
6483 } else {
6484 vm_page_deactivate(m: m2);
6485 }
6486 }
6487 PAGE_WAKEUP_DONE(m2);
6488 } else {
6489 assert(m1->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR);
6490
6491 /*
6492 * completely cleans up the state
6493 * of the page so that it is ready
6494 * to be put onto the free list, or
6495 * for this purpose it looks like it
6496 * just came off of the free list
6497 */
6498 vm_page_free_prepare(mem: m1);
6499 }
6500
6501 stolen_pages++;
6502 }
6503 if (m1->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR) {
6504 /*
6505 * The Q state is preserved on m1 because vm_page_queues_remove doesn't
6506 * change it for pages marked as used-by-compressor.
6507 */
6508 vm_page_assign_special_state(mem: m1, VM_PAGE_SPECIAL_Q_BG);
6509 }
6510 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
6511 m1->vmp_snext = m;
6512 m = m1;
6513 }
6514 if (locked_object) {
6515 vm_object_unlock(locked_object);
6516 locked_object = VM_OBJECT_NULL;
6517 }
6518
6519 if (abort_run == TRUE) {
6520 /*
6521 * want the index of the last
6522 * page in this run that was
6523 * successfully 'stolen', so back
6524 * it up 1 for the auto-decrement on use
6525 * and 1 more to bump back over this page
6526 */
6527 page_idx = tmp_start_idx + 2;
6528 if (page_idx >= vm_pages_count) {
6529 if (wrapped) {
6530 if (m != VM_PAGE_NULL) {
6531 vm_page_unlock_queues();
6532 vm_page_free_list(freeq: m, FALSE);
6533 vm_page_lock_queues();
6534 m = VM_PAGE_NULL;
6535 }
6536 dumped_run++;
6537 goto done_scanning;
6538 }
6539 page_idx = last_idx = 0;
6540 wrapped = TRUE;
6541 }
6542 abort_run = FALSE;
6543
6544 /*
6545 * We didn't find a contiguous range but we didn't
6546 * start from the very first page.
6547 * Start again from the very first page.
6548 */
6549 RESET_STATE_OF_RUN();
6550
6551 if (flags & KMA_LOMEM) {
6552 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
6553 } else {
6554 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
6555 }
6556
6557 last_idx = page_idx;
6558
6559 if (m != VM_PAGE_NULL) {
6560 vm_page_unlock_queues();
6561 vm_page_free_list(freeq: m, FALSE);
6562 vm_page_lock_queues();
6563 m = VM_PAGE_NULL;
6564 }
6565 dumped_run++;
6566
6567 vm_free_page_lock();
6568 /*
6569 * reset our free page limit since we
6570 * dropped the lock protecting the vm_page_free_queue
6571 */
6572 free_available = vm_page_free_count - vm_page_free_reserved;
6573 goto retry;
6574 }
6575
6576 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
6577 assert(m1->vmp_q_state == VM_PAGE_NOT_ON_Q);
6578 assert(m1->vmp_wire_count == 0);
6579
6580 if (wire == TRUE) {
6581 m1->vmp_wire_count++;
6582 m1->vmp_q_state = VM_PAGE_IS_WIRED;
6583 } else {
6584 m1->vmp_gobbled = TRUE;
6585 }
6586 }
6587 if (wire == FALSE) {
6588 vm_page_gobble_count += npages;
6589 }
6590
6591 /*
6592 * gobbled pages are also counted as wired pages
6593 */
6594 vm_page_wire_count += npages;
6595
6596 assert(vm_page_verify_contiguous(m, npages));
6597 }
6598done_scanning:
6599 PAGE_REPLACEMENT_ALLOWED(FALSE);
6600
6601 vm_page_unlock_queues();
6602
6603#if DEBUG
6604 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
6605
6606 tv_end_sec -= tv_start_sec;
6607 if (tv_end_usec < tv_start_usec) {
6608 tv_end_sec--;
6609 tv_end_usec += 1000000;
6610 }
6611 tv_end_usec -= tv_start_usec;
6612 if (tv_end_usec >= 1000000) {
6613 tv_end_sec++;
6614 tv_end_sec -= 1000000;
6615 }
6616 if (vm_page_find_contig_debug) {
6617 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
6618 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
6619 (long)tv_end_sec, tv_end_usec, orig_last_idx,
6620 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
6621 }
6622
6623#endif
6624#if MACH_ASSERT
6625 vm_page_verify_free_lists();
6626#endif
6627 if (m == NULL && zone_gc_called < 2) {
6628 printf(format: "%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
6629 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
6630 scanned, yielded, dumped_run, stolen_pages, compressed_pages, vm_page_wire_count);
6631
6632 if (consider_buffer_cache_collect != NULL) {
6633 (void)(*consider_buffer_cache_collect)(1);
6634 }
6635
6636 zone_gc(level: zone_gc_called ? ZONE_GC_DRAIN : ZONE_GC_TRIM);
6637
6638 zone_gc_called++;
6639
6640 printf(format: "vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count);
6641 goto full_scan_again;
6642 }
6643
6644 return m;
6645}
6646
6647/*
6648 * Allocate a list of contiguous, wired pages.
6649 */
6650kern_return_t
6651cpm_allocate(
6652 vm_size_t size,
6653 vm_page_t *list,
6654 ppnum_t max_pnum,
6655 ppnum_t pnum_mask,
6656 boolean_t wire,
6657 int flags)
6658{
6659 vm_page_t pages;
6660 unsigned int npages;
6661
6662 if (size % PAGE_SIZE != 0) {
6663 return KERN_INVALID_ARGUMENT;
6664 }
6665
6666 npages = (unsigned int) (size / PAGE_SIZE);
6667 if (npages != size / PAGE_SIZE) {
6668 /* 32-bit overflow */
6669 return KERN_INVALID_ARGUMENT;
6670 }
6671
6672 /*
6673 * Obtain a pointer to a subset of the free
6674 * list large enough to satisfy the request;
6675 * the region will be physically contiguous.
6676 */
6677 pages = vm_page_find_contiguous(contig_pages: npages, max_pnum, pnum_mask, wire, flags);
6678
6679 if (pages == VM_PAGE_NULL) {
6680 return KERN_NO_SPACE;
6681 }
6682 /*
6683 * determine need for wakeups
6684 */
6685 if (vm_page_free_count < vm_page_free_min) {
6686 vm_free_page_lock();
6687 if (vm_pageout_running == FALSE) {
6688 vm_free_page_unlock();
6689 thread_wakeup((event_t) &vm_page_free_wanted);
6690 } else {
6691 vm_free_page_unlock();
6692 }
6693 }
6694
6695 VM_CHECK_MEMORYSTATUS;
6696
6697 /*
6698 * The CPM pages should now be available and
6699 * ordered by ascending physical address.
6700 */
6701 assert(vm_page_verify_contiguous(pages, npages));
6702
6703 if (flags & KMA_ZERO) {
6704 for (vm_page_t m = pages; m; m = NEXT_PAGE(m)) {
6705 vm_page_zero_fill(m);
6706 }
6707 }
6708
6709 *list = pages;
6710 return KERN_SUCCESS;
6711}
6712
6713
6714unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
6715
6716/*
6717 * when working on a 'run' of pages, it is necessary to hold
6718 * the vm_page_queue_lock (a hot global lock) for certain operations
6719 * on the page... however, the majority of the work can be done
6720 * while merely holding the object lock... in fact there are certain
6721 * collections of pages that don't require any work brokered by the
6722 * vm_page_queue_lock... to mitigate the time spent behind the global
6723 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
6724 * while doing all of the work that doesn't require the vm_page_queue_lock...
6725 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
6726 * necessary work for each page... we will grab the busy bit on the page
6727 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
6728 * if it can't immediately take the vm_page_queue_lock in order to compete
6729 * for the locks in the same order that vm_pageout_scan takes them.
6730 * the operation names are modeled after the names of the routines that
6731 * need to be called in order to make the changes very obvious in the
6732 * original loop
6733 */
6734
6735void
6736vm_page_do_delayed_work(
6737 vm_object_t object,
6738 vm_tag_t tag,
6739 struct vm_page_delayed_work *dwp,
6740 int dw_count)
6741{
6742 int j;
6743 vm_page_t m;
6744 vm_page_t local_free_q = VM_PAGE_NULL;
6745
6746 /*
6747 * pageout_scan takes the vm_page_lock_queues first
6748 * then tries for the object lock... to avoid what
6749 * is effectively a lock inversion, we'll go to the
6750 * trouble of taking them in that same order... otherwise
6751 * if this object contains the majority of the pages resident
6752 * in the UBC (or a small set of large objects actively being
6753 * worked on contain the majority of the pages), we could
6754 * cause the pageout_scan thread to 'starve' in its attempt
6755 * to find pages to move to the free queue, since it has to
6756 * successfully acquire the object lock of any candidate page
6757 * before it can steal/clean it.
6758 */
6759 if (!vm_page_trylock_queues()) {
6760 vm_object_unlock(object);
6761
6762 /*
6763 * "Turnstile enabled vm_pageout_scan" can be runnable
6764 * for a very long time without getting on a core.
6765 * If this is a higher priority thread it could be
6766 * waiting here for a very long time respecting the fact
6767 * that pageout_scan would like its object after VPS does
6768 * a mutex_pause(0).
6769 * So we cap the number of yields in the vm_object_lock_avoid()
6770 * case to a single mutex_pause(0) which will give vm_pageout_scan
6771 * 10us to run and grab the object if needed.
6772 */
6773 vm_page_lock_queues();
6774
6775 for (j = 0;; j++) {
6776 if ((!vm_object_lock_avoid(object) ||
6777 (vps_dynamic_priority_enabled && (j > 0))) &&
6778 _vm_object_lock_try(object)) {
6779 break;
6780 }
6781 vm_page_unlock_queues();
6782 mutex_pause(j);
6783 vm_page_lock_queues();
6784 }
6785 }
6786 for (j = 0; j < dw_count; j++, dwp++) {
6787 m = dwp->dw_m;
6788
6789 if (dwp->dw_mask & DW_vm_pageout_throttle_up) {
6790 vm_pageout_throttle_up(page: m);
6791 }
6792#if CONFIG_PHANTOM_CACHE
6793 if (dwp->dw_mask & DW_vm_phantom_cache_update) {
6794 vm_phantom_cache_update(m);
6795 }
6796#endif
6797 if (dwp->dw_mask & DW_vm_page_wire) {
6798 vm_page_wire(mem: m, tag, FALSE);
6799 } else if (dwp->dw_mask & DW_vm_page_unwire) {
6800 boolean_t queueit;
6801
6802 queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
6803
6804 vm_page_unwire(mem: m, queueit);
6805 }
6806 if (dwp->dw_mask & DW_vm_page_free) {
6807 vm_page_free_prepare_queues(mem: m);
6808
6809 assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);
6810 /*
6811 * Add this page to our list of reclaimed pages,
6812 * to be freed later.
6813 */
6814 m->vmp_snext = local_free_q;
6815 local_free_q = m;
6816 } else {
6817 if (dwp->dw_mask & DW_vm_page_deactivate_internal) {
6818 vm_page_deactivate_internal(m, FALSE);
6819 } else if (dwp->dw_mask & DW_vm_page_activate) {
6820 if (m->vmp_q_state != VM_PAGE_ON_ACTIVE_Q) {
6821 vm_page_activate(m);
6822 }
6823 } else if (dwp->dw_mask & DW_vm_page_speculate) {
6824 vm_page_speculate(m, TRUE);
6825 } else if (dwp->dw_mask & DW_enqueue_cleaned) {
6826 /*
6827 * if we didn't hold the object lock and did this,
6828 * we might disconnect the page, then someone might
6829 * soft fault it back in, then we would put it on the
6830 * cleaned queue, and so we would have a referenced (maybe even dirty)
6831 * page on that queue, which we don't want
6832 */
6833 int refmod_state = pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m));
6834
6835 if ((refmod_state & VM_MEM_REFERENCED)) {
6836 /*
6837 * this page has been touched since it got cleaned; let's activate it
6838 * if it hasn't already been
6839 */
6840 VM_PAGEOUT_DEBUG(vm_pageout_enqueued_cleaned, 1);
6841 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
6842
6843 if (m->vmp_q_state != VM_PAGE_ON_ACTIVE_Q) {
6844 vm_page_activate(m);
6845 }
6846 } else {
6847 m->vmp_reference = FALSE;
6848 vm_page_enqueue_cleaned(m);
6849 }
6850 } else if (dwp->dw_mask & DW_vm_page_lru) {
6851 vm_page_lru(m);
6852 } else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
6853 if (m->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q) {
6854 vm_page_queues_remove(mem: m, TRUE);
6855 }
6856 }
6857 if (dwp->dw_mask & DW_set_reference) {
6858 m->vmp_reference = TRUE;
6859 } else if (dwp->dw_mask & DW_clear_reference) {
6860 m->vmp_reference = FALSE;
6861 }
6862
6863 if (dwp->dw_mask & DW_move_page) {
6864 if (m->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q) {
6865 vm_page_queues_remove(mem: m, FALSE);
6866
6867 assert(!is_kernel_object(VM_PAGE_OBJECT(m)));
6868
6869 vm_page_enqueue_inactive(mem: m, FALSE);
6870 }
6871 }
6872 if (dwp->dw_mask & DW_clear_busy) {
6873 m->vmp_busy = FALSE;
6874 }
6875
6876 if (dwp->dw_mask & DW_PAGE_WAKEUP) {
6877 PAGE_WAKEUP(m);
6878 }
6879 }
6880 }
6881 vm_page_unlock_queues();
6882
6883 if (local_free_q) {
6884 vm_page_free_list(freeq: local_free_q, TRUE);
6885 }
6886
6887 VM_CHECK_MEMORYSTATUS;
6888}
6889
6890__abortlike
6891static void
6892__vm_page_alloc_list_failed_panic(
6893 vm_size_t page_count,
6894 kma_flags_t flags,
6895 kern_return_t kr)
6896{
6897 panic("vm_page_alloc_list(%zd, 0x%x) failed unexpectedly with %d",
6898 (size_t)page_count, flags, kr);
6899}
6900
6901kern_return_t
6902vm_page_alloc_list(
6903 vm_size_t page_count,
6904 kma_flags_t flags,
6905 vm_page_t *list)
6906{
6907 vm_page_t page_list = VM_PAGE_NULL;
6908 vm_page_t mem;
6909 kern_return_t kr = KERN_SUCCESS;
6910 int page_grab_count = 0;
6911#if DEVELOPMENT || DEBUG
6912 task_t task;
6913#endif /* DEVELOPMENT || DEBUG */
6914
6915 for (vm_size_t i = 0; i < page_count; i++) {
6916 for (;;) {
6917 if (flags & KMA_LOMEM) {
6918 mem = vm_page_grablo();
6919 } else {
6920 mem = vm_page_grab();
6921 }
6922
6923 if (mem != VM_PAGE_NULL) {
6924 break;
6925 }
6926
6927 if (flags & KMA_NOPAGEWAIT) {
6928 kr = KERN_RESOURCE_SHORTAGE;
6929 goto out;
6930 }
6931 if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) {
6932 kr = KERN_RESOURCE_SHORTAGE;
6933 goto out;
6934 }
6935
6936 /* VM privileged threads should have waited in vm_page_grab() and not get here. */
6937 assert(!(current_thread()->options & TH_OPT_VMPRIV));
6938
6939 if ((flags & KMA_NOFAIL) == 0) {
6940 uint64_t unavailable = ptoa_64(vm_page_wire_count + vm_page_free_target);
6941 if (unavailable > max_mem || ptoa_64(page_count) > (max_mem - unavailable)) {
6942 kr = KERN_RESOURCE_SHORTAGE;
6943 goto out;
6944 }
6945 }
6946 VM_PAGE_WAIT();
6947 }
6948
6949 page_grab_count++;
6950 mem->vmp_snext = page_list;
6951 page_list = mem;
6952 }
6953
6954 if ((KMA_ZERO | KMA_NOENCRYPT) & flags) {
6955 for (mem = page_list; mem; mem = mem->vmp_snext) {
6956 vm_page_zero_fill(m: mem);
6957 }
6958 }
6959
6960out:
6961#if DEBUG || DEVELOPMENT
6962 task = current_task_early();
6963 if (task != NULL) {
6964 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
6965 }
6966#endif
6967
6968 if (kr == KERN_SUCCESS) {
6969 *list = page_list;
6970 } else if (flags & KMA_NOFAIL) {
6971 __vm_page_alloc_list_failed_panic(page_count, flags, kr);
6972 } else {
6973 vm_page_free_list(freeq: page_list, FALSE);
6974 }
6975
6976 return kr;
6977}
6978
6979void
6980vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
6981{
6982 page->vmp_offset = offset;
6983}
6984
6985vm_page_t
6986vm_page_get_next(vm_page_t page)
6987{
6988 return page->vmp_snext;
6989}
6990
6991vm_object_offset_t
6992vm_page_get_offset(vm_page_t page)
6993{
6994 return page->vmp_offset;
6995}
6996
6997ppnum_t
6998vm_page_get_phys_page(vm_page_t page)
6999{
7000 return VM_PAGE_GET_PHYS_PAGE(m: page);
7001}
7002
7003
7004/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
7005
7006#if HIBERNATION
7007
7008static vm_page_t hibernate_gobble_queue;
7009
7010static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
7011static int hibernate_flush_dirty_pages(int);
7012static int hibernate_flush_queue(vm_page_queue_head_t *, int);
7013
7014void hibernate_flush_wait(void);
7015void hibernate_mark_in_progress(void);
7016void hibernate_clear_in_progress(void);
7017
7018void hibernate_free_range(int, int);
7019void hibernate_hash_insert_page(vm_page_t);
7020uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
7021uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
7022ppnum_t hibernate_lookup_paddr(unsigned int);
7023
7024struct hibernate_statistics {
7025 int hibernate_considered;
7026 int hibernate_reentered_on_q;
7027 int hibernate_found_dirty;
7028 int hibernate_skipped_cleaning;
7029 int hibernate_skipped_transient;
7030 int hibernate_skipped_precious;
7031 int hibernate_skipped_external;
7032 int hibernate_queue_nolock;
7033 int hibernate_queue_paused;
7034 int hibernate_throttled;
7035 int hibernate_throttle_timeout;
7036 int hibernate_drained;
7037 int hibernate_drain_timeout;
7038 int cd_lock_failed;
7039 int cd_found_precious;
7040 int cd_found_wired;
7041 int cd_found_busy;
7042 int cd_found_unusual;
7043 int cd_found_cleaning;
7044 int cd_found_laundry;
7045 int cd_found_dirty;
7046 int cd_found_xpmapped;
7047 int cd_skipped_xpmapped;
7048 int cd_local_free;
7049 int cd_total_free;
7050 int cd_vm_page_wire_count;
7051 int cd_vm_struct_pages_unneeded;
7052 int cd_pages;
7053 int cd_discarded;
7054 int cd_count_wire;
7055} hibernate_stats;
7056
7057
7058/*
7059 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
7060 * so that we don't overrun the estimated image size, which would
7061 * result in a hibernation failure.
7062 *
7063 * We use a size value instead of pages because we don't want to take up more space
7064 * on disk if the system has a 16K page size vs 4K. Also, we are not guaranteed
7065 * to have that additional space available.
7066 *
7067 * Since this was set at 40000 pages on X86 we are going to use 160MB as our
7068 * xpmapped size.
7069 */
7070#define HIBERNATE_XPMAPPED_LIMIT ((160 * 1024 * 1024ULL) / PAGE_SIZE)
7071
7072
7073static int
7074hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
7075{
7076 wait_result_t wait_result;
7077
7078 vm_page_lock_queues();
7079
7080 while (!vm_page_queue_empty(&q->pgo_pending)) {
7081 q->pgo_draining = TRUE;
7082
7083 assert_wait_timeout((event_t) (&q->pgo_laundry + 1), THREAD_INTERRUPTIBLE, 5000, 1000 * NSEC_PER_USEC);
7084
7085 vm_page_unlock_queues();
7086
7087 wait_result = thread_block(THREAD_CONTINUE_NULL);
7088
7089 if (wait_result == THREAD_TIMED_OUT && !vm_page_queue_empty(&q->pgo_pending)) {
7090 hibernate_stats.hibernate_drain_timeout++;
7091
7092 if (q == &vm_pageout_queue_external) {
7093 return 0;
7094 }
7095
7096 return 1;
7097 }
7098 vm_page_lock_queues();
7099
7100 hibernate_stats.hibernate_drained++;
7101 }
7102 vm_page_unlock_queues();
7103
7104 return 0;
7105}
7106
7107
7108boolean_t hibernate_skip_external = FALSE;
7109
7110static int
7111hibernate_flush_queue(vm_page_queue_head_t *q, int qcount)
7112{
7113 vm_page_t m;
7114 vm_object_t l_object = NULL;
7115 vm_object_t m_object = NULL;
7116 int refmod_state = 0;
7117 int try_failed_count = 0;
7118 int retval = 0;
7119 int current_run = 0;
7120 struct vm_pageout_queue *iq;
7121 struct vm_pageout_queue *eq;
7122 struct vm_pageout_queue *tq;
7123
7124 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START,
7125 VM_KERNEL_UNSLIDE_OR_PERM(q), qcount);
7126
7127 iq = &vm_pageout_queue_internal;
7128 eq = &vm_pageout_queue_external;
7129
7130 vm_page_lock_queues();
7131
7132 while (qcount && !vm_page_queue_empty(q)) {
7133 if (current_run++ == 1000) {
7134 if (hibernate_should_abort()) {
7135 retval = 1;
7136 break;
7137 }
7138 current_run = 0;
7139 }
7140
7141 m = (vm_page_t) vm_page_queue_first(q);
7142 m_object = VM_PAGE_OBJECT(m);
7143
7144 /*
7145 * check to see if we currently are working
7146 * with the same object... if so, we've
7147 * already got the lock
7148 */
7149 if (m_object != l_object) {
7150 /*
7151 * the object associated with candidate page is
7152 * different from the one we were just working
7153 * with... dump the lock if we still own it
7154 */
7155 if (l_object != NULL) {
7156 vm_object_unlock(l_object);
7157 l_object = NULL;
7158 }
7159 /*
7160 * Try to lock object; since we've alread got the
7161 * page queues lock, we can only 'try' for this one.
7162 * if the 'try' fails, we need to do a mutex_pause
7163 * to allow the owner of the object lock a chance to
7164 * run...
7165 */
7166 if (!vm_object_lock_try_scan(m_object)) {
7167 if (try_failed_count > 20) {
7168 hibernate_stats.hibernate_queue_nolock++;
7169
7170 goto reenter_pg_on_q;
7171 }
7172
7173 vm_page_unlock_queues();
7174 mutex_pause(try_failed_count++);
7175 vm_page_lock_queues();
7176
7177 hibernate_stats.hibernate_queue_paused++;
7178 continue;
7179 } else {
7180 l_object = m_object;
7181 }
7182 }
7183 if (!m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || VMP_ERROR_GET(m)) {
7184 /*
7185 * page is not to be cleaned
7186 * put it back on the head of its queue
7187 */
7188 if (m->vmp_cleaning) {
7189 hibernate_stats.hibernate_skipped_cleaning++;
7190 } else {
7191 hibernate_stats.hibernate_skipped_transient++;
7192 }
7193
7194 goto reenter_pg_on_q;
7195 }
7196 if (m_object->vo_copy == VM_OBJECT_NULL) {
7197 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
7198 /*
7199 * let the normal hibernate image path
7200 * deal with these
7201 */
7202 goto reenter_pg_on_q;
7203 }
7204 }
7205 if (!m->vmp_dirty && m->vmp_pmapped) {
7206 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
7207
7208 if ((refmod_state & VM_MEM_MODIFIED)) {
7209 SET_PAGE_DIRTY(m, FALSE);
7210 }
7211 } else {
7212 refmod_state = 0;
7213 }
7214
7215 if (!m->vmp_dirty) {
7216 /*
7217 * page is not to be cleaned
7218 * put it back on the head of its queue
7219 */
7220 if (m->vmp_precious) {
7221 hibernate_stats.hibernate_skipped_precious++;
7222 }
7223
7224 goto reenter_pg_on_q;
7225 }
7226
7227 if (hibernate_skip_external == TRUE && !m_object->internal) {
7228 hibernate_stats.hibernate_skipped_external++;
7229
7230 goto reenter_pg_on_q;
7231 }
7232 tq = NULL;
7233
7234 if (m_object->internal) {
7235 if (VM_PAGE_Q_THROTTLED(iq)) {
7236 tq = iq;
7237 }
7238 } else if (VM_PAGE_Q_THROTTLED(eq)) {
7239 tq = eq;
7240 }
7241
7242 if (tq != NULL) {
7243 wait_result_t wait_result;
7244 int wait_count = 5;
7245
7246 if (l_object != NULL) {
7247 vm_object_unlock(l_object);
7248 l_object = NULL;
7249 }
7250
7251 while (retval == 0) {
7252 tq->pgo_throttled = TRUE;
7253
7254 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000 * NSEC_PER_USEC);
7255
7256 vm_page_unlock_queues();
7257
7258 wait_result = thread_block(THREAD_CONTINUE_NULL);
7259
7260 vm_page_lock_queues();
7261
7262 if (wait_result != THREAD_TIMED_OUT) {
7263 break;
7264 }
7265 if (!VM_PAGE_Q_THROTTLED(tq)) {
7266 break;
7267 }
7268
7269 if (hibernate_should_abort()) {
7270 retval = 1;
7271 }
7272
7273 if (--wait_count == 0) {
7274 hibernate_stats.hibernate_throttle_timeout++;
7275
7276 if (tq == eq) {
7277 hibernate_skip_external = TRUE;
7278 break;
7279 }
7280 retval = 1;
7281 }
7282 }
7283 if (retval) {
7284 break;
7285 }
7286
7287 hibernate_stats.hibernate_throttled++;
7288
7289 continue;
7290 }
7291 /*
7292 * we've already factored out pages in the laundry which
7293 * means this page can't be on the pageout queue so it's
7294 * safe to do the vm_page_queues_remove
7295 */
7296 vm_page_queues_remove(m, TRUE);
7297
7298 if (m_object->internal == TRUE) {
7299 pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m), PMAP_OPTIONS_COMPRESSOR, NULL);
7300 }
7301
7302 vm_pageout_cluster(m);
7303
7304 hibernate_stats.hibernate_found_dirty++;
7305
7306 goto next_pg;
7307
7308reenter_pg_on_q:
7309 vm_page_queue_remove(q, m, vmp_pageq);
7310 vm_page_queue_enter(q, m, vmp_pageq);
7311
7312 hibernate_stats.hibernate_reentered_on_q++;
7313next_pg:
7314 hibernate_stats.hibernate_considered++;
7315
7316 qcount--;
7317 try_failed_count = 0;
7318 }
7319 if (l_object != NULL) {
7320 vm_object_unlock(l_object);
7321 l_object = NULL;
7322 }
7323
7324 vm_page_unlock_queues();
7325
7326 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
7327
7328 return retval;
7329}
7330
7331
7332static int
7333hibernate_flush_dirty_pages(int pass)
7334{
7335 struct vm_speculative_age_q *aq;
7336 uint32_t i;
7337
7338 if (vm_page_local_q) {
7339 zpercpu_foreach_cpu(lid) {
7340 vm_page_reactivate_local(lid, TRUE, FALSE);
7341 }
7342 }
7343
7344 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
7345 int qcount;
7346 vm_page_t m;
7347
7348 aq = &vm_page_queue_speculative[i];
7349
7350 if (vm_page_queue_empty(&aq->age_q)) {
7351 continue;
7352 }
7353 qcount = 0;
7354
7355 vm_page_lockspin_queues();
7356
7357 vm_page_queue_iterate(&aq->age_q, m, vmp_pageq) {
7358 qcount++;
7359 }
7360 vm_page_unlock_queues();
7361
7362 if (qcount) {
7363 if (hibernate_flush_queue(&aq->age_q, qcount)) {
7364 return 1;
7365 }
7366 }
7367 }
7368 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count)) {
7369 return 1;
7370 }
7371 /* XXX FBDP TODO: flush secluded queue */
7372 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count)) {
7373 return 1;
7374 }
7375 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count)) {
7376 return 1;
7377 }
7378 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
7379 return 1;
7380 }
7381
7382 if (pass == 1) {
7383 vm_compressor_record_warmup_start();
7384 }
7385
7386 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
7387 if (pass == 1) {
7388 vm_compressor_record_warmup_end();
7389 }
7390 return 1;
7391 }
7392 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
7393 if (pass == 1) {
7394 vm_compressor_record_warmup_end();
7395 }
7396 return 1;
7397 }
7398 if (pass == 1) {
7399 vm_compressor_record_warmup_end();
7400 }
7401
7402 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external)) {
7403 return 1;
7404 }
7405
7406 return 0;
7407}
7408
7409
7410void
7411hibernate_reset_stats()
7412{
7413 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
7414}
7415
7416
7417int
7418hibernate_flush_memory()
7419{
7420 int retval;
7421
7422 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
7423
7424 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
7425
7426 hibernate_cleaning_in_progress = TRUE;
7427 hibernate_skip_external = FALSE;
7428
7429 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
7430 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
7431
7432 vm_compressor_flush();
7433
7434 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
7435
7436 if (consider_buffer_cache_collect != NULL) {
7437 unsigned int orig_wire_count;
7438
7439 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
7440 orig_wire_count = vm_page_wire_count;
7441
7442 (void)(*consider_buffer_cache_collect)(1);
7443 zone_gc(ZONE_GC_DRAIN);
7444
7445 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
7446
7447 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
7448 }
7449 }
7450 hibernate_cleaning_in_progress = FALSE;
7451
7452 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
7453
7454 if (retval) {
7455 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
7456 }
7457
7458
7459 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
7460 hibernate_stats.hibernate_considered,
7461 hibernate_stats.hibernate_reentered_on_q,
7462 hibernate_stats.hibernate_found_dirty);
7463 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
7464 hibernate_stats.hibernate_skipped_cleaning,
7465 hibernate_stats.hibernate_skipped_transient,
7466 hibernate_stats.hibernate_skipped_precious,
7467 hibernate_stats.hibernate_skipped_external,
7468 hibernate_stats.hibernate_queue_nolock);
7469 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
7470 hibernate_stats.hibernate_queue_paused,
7471 hibernate_stats.hibernate_throttled,
7472 hibernate_stats.hibernate_throttle_timeout,
7473 hibernate_stats.hibernate_drained,
7474 hibernate_stats.hibernate_drain_timeout);
7475
7476 return retval;
7477}
7478
7479
7480static void
7481hibernate_page_list_zero(hibernate_page_list_t *list)
7482{
7483 uint32_t bank;
7484 hibernate_bitmap_t * bitmap;
7485
7486 bitmap = &list->bank_bitmap[0];
7487 for (bank = 0; bank < list->bank_count; bank++) {
7488 uint32_t last_bit;
7489
7490 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
7491 // set out-of-bound bits at end of bitmap.
7492 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
7493 if (last_bit) {
7494 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
7495 }
7496
7497 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
7498 }
7499}
7500
7501void
7502hibernate_free_gobble_pages(void)
7503{
7504 vm_page_t m, next;
7505 uint32_t count = 0;
7506
7507 m = (vm_page_t) hibernate_gobble_queue;
7508 while (m) {
7509 next = m->vmp_snext;
7510 vm_page_free(m);
7511 count++;
7512 m = next;
7513 }
7514 hibernate_gobble_queue = VM_PAGE_NULL;
7515
7516 if (count) {
7517 HIBLOG("Freed %d pages\n", count);
7518 }
7519}
7520
7521static boolean_t
7522hibernate_consider_discard(vm_page_t m, boolean_t preflight)
7523{
7524 vm_object_t object = NULL;
7525 int refmod_state;
7526 boolean_t discard = FALSE;
7527
7528 do{
7529 if (m->vmp_private) {
7530 panic("hibernate_consider_discard: private");
7531 }
7532
7533 object = VM_PAGE_OBJECT(m);
7534
7535 if (!vm_object_lock_try(object)) {
7536 object = NULL;
7537 if (!preflight) {
7538 hibernate_stats.cd_lock_failed++;
7539 }
7540 break;
7541 }
7542 if (VM_PAGE_WIRED(m)) {
7543 if (!preflight) {
7544 hibernate_stats.cd_found_wired++;
7545 }
7546 break;
7547 }
7548 if (m->vmp_precious) {
7549 if (!preflight) {
7550 hibernate_stats.cd_found_precious++;
7551 }
7552 break;
7553 }
7554 if (m->vmp_busy || !object->alive) {
7555 /*
7556 * Somebody is playing with this page.
7557 */
7558 if (!preflight) {
7559 hibernate_stats.cd_found_busy++;
7560 }
7561 break;
7562 }
7563 if (m->vmp_absent || m->vmp_unusual || VMP_ERROR_GET(m)) {
7564 /*
7565 * If it's unusual in anyway, ignore it
7566 */
7567 if (!preflight) {
7568 hibernate_stats.cd_found_unusual++;
7569 }
7570 break;
7571 }
7572 if (m->vmp_cleaning) {
7573 if (!preflight) {
7574 hibernate_stats.cd_found_cleaning++;
7575 }
7576 break;
7577 }
7578 if (m->vmp_laundry) {
7579 if (!preflight) {
7580 hibernate_stats.cd_found_laundry++;
7581 }
7582 break;
7583 }
7584 if (!m->vmp_dirty) {
7585 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
7586
7587 if (refmod_state & VM_MEM_REFERENCED) {
7588 m->vmp_reference = TRUE;
7589 }
7590 if (refmod_state & VM_MEM_MODIFIED) {
7591 SET_PAGE_DIRTY(m, FALSE);
7592 }
7593 }
7594
7595 /*
7596 * If it's clean or purgeable we can discard the page on wakeup.
7597 */
7598 discard = (!m->vmp_dirty)
7599 || (VM_PURGABLE_VOLATILE == object->purgable)
7600 || (VM_PURGABLE_EMPTY == object->purgable);
7601
7602
7603 if (discard == FALSE) {
7604 if (!preflight) {
7605 hibernate_stats.cd_found_dirty++;
7606 }
7607 } else if (m->vmp_xpmapped && m->vmp_reference && !object->internal) {
7608 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
7609 if (!preflight) {
7610 hibernate_stats.cd_found_xpmapped++;
7611 }
7612 discard = FALSE;
7613 } else {
7614 if (!preflight) {
7615 hibernate_stats.cd_skipped_xpmapped++;
7616 }
7617 }
7618 }
7619 }while (FALSE);
7620
7621 if (object) {
7622 vm_object_unlock(object);
7623 }
7624
7625 return discard;
7626}
7627
7628
7629static void
7630hibernate_discard_page(vm_page_t m)
7631{
7632 vm_object_t m_object;
7633
7634 if (m->vmp_absent || m->vmp_unusual || VMP_ERROR_GET(m)) {
7635 /*
7636 * If it's unusual in anyway, ignore
7637 */
7638 return;
7639 }
7640
7641 m_object = VM_PAGE_OBJECT(m);
7642
7643#if MACH_ASSERT || DEBUG
7644 if (!vm_object_lock_try(m_object)) {
7645 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
7646 }
7647#else
7648 /* No need to lock page queue for token delete, hibernate_vm_unlock()
7649 * makes sure these locks are uncontended before sleep */
7650#endif /* MACH_ASSERT || DEBUG */
7651
7652 if (m->vmp_pmapped == TRUE) {
7653 __unused int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
7654 }
7655
7656 if (m->vmp_laundry) {
7657 panic("hibernate_discard_page(%p) laundry", m);
7658 }
7659 if (m->vmp_private) {
7660 panic("hibernate_discard_page(%p) private", m);
7661 }
7662 if (m->vmp_fictitious) {
7663 panic("hibernate_discard_page(%p) fictitious", m);
7664 }
7665
7666 if (VM_PURGABLE_VOLATILE == m_object->purgable) {
7667 /* object should be on a queue */
7668 assert((m_object->objq.next != NULL) && (m_object->objq.prev != NULL));
7669 purgeable_q_t old_queue = vm_purgeable_object_remove(m_object);
7670 assert(old_queue);
7671 if (m_object->purgeable_when_ripe) {
7672 vm_purgeable_token_delete_first(old_queue);
7673 }
7674 vm_object_lock_assert_exclusive(m_object);
7675 VM_OBJECT_SET_PURGABLE(m_object, VM_PURGABLE_EMPTY);
7676
7677 /*
7678 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
7679 * accounted in the "volatile" ledger, so no change here.
7680 * We have to update vm_page_purgeable_count, though, since we're
7681 * effectively purging this object.
7682 */
7683 unsigned int delta;
7684 assert(m_object->resident_page_count >= m_object->wired_page_count);
7685 delta = (m_object->resident_page_count - m_object->wired_page_count);
7686 assert(vm_page_purgeable_count >= delta);
7687 assert(delta > 0);
7688 OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
7689 }
7690
7691 vm_page_free(m);
7692
7693#if MACH_ASSERT || DEBUG
7694 vm_object_unlock(m_object);
7695#endif /* MACH_ASSERT || DEBUG */
7696}
7697
7698/*
7699 * Grab locks for hibernate_page_list_setall()
7700 */
7701void
7702hibernate_vm_lock_queues(void)
7703{
7704 vm_object_lock(compressor_object);
7705 vm_page_lock_queues();
7706 vm_free_page_lock();
7707 lck_mtx_lock(&vm_purgeable_queue_lock);
7708
7709 if (vm_page_local_q) {
7710 zpercpu_foreach(lq, vm_page_local_q) {
7711 VPL_LOCK(&lq->vpl_lock);
7712 }
7713 }
7714}
7715
7716void
7717hibernate_vm_unlock_queues(void)
7718{
7719 if (vm_page_local_q) {
7720 zpercpu_foreach(lq, vm_page_local_q) {
7721 VPL_UNLOCK(&lq->vpl_lock);
7722 }
7723 }
7724 lck_mtx_unlock(&vm_purgeable_queue_lock);
7725 vm_free_page_unlock();
7726 vm_page_unlock_queues();
7727 vm_object_unlock(compressor_object);
7728}
7729
7730/*
7731 * Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
7732 * pages known to VM to not need saving are subtracted.
7733 * Wired pages to be saved are present in page_list_wired, pageable in page_list.
7734 */
7735
7736void
7737hibernate_page_list_setall(hibernate_page_list_t * page_list,
7738 hibernate_page_list_t * page_list_wired,
7739 hibernate_page_list_t * page_list_pal,
7740 boolean_t preflight,
7741 boolean_t will_discard,
7742 uint32_t * pagesOut)
7743{
7744 uint64_t start, end, nsec;
7745 vm_page_t m;
7746 vm_page_t next;
7747 uint32_t pages = page_list->page_count;
7748 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
7749 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
7750 uint32_t count_wire = pages;
7751 uint32_t count_discard_active = 0;
7752 uint32_t count_discard_inactive = 0;
7753 uint32_t count_retired = 0;
7754 uint32_t count_discard_cleaned = 0;
7755 uint32_t count_discard_purgeable = 0;
7756 uint32_t count_discard_speculative = 0;
7757 uint32_t count_discard_vm_struct_pages = 0;
7758 uint32_t i;
7759 uint32_t bank;
7760 hibernate_bitmap_t * bitmap;
7761 hibernate_bitmap_t * bitmap_wired;
7762 boolean_t discard_all;
7763 boolean_t discard = FALSE;
7764
7765 HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight);
7766
7767 if (preflight) {
7768 page_list = NULL;
7769 page_list_wired = NULL;
7770 page_list_pal = NULL;
7771 discard_all = FALSE;
7772 } else {
7773 discard_all = will_discard;
7774 }
7775
7776#if MACH_ASSERT || DEBUG
7777 if (!preflight) {
7778 assert(hibernate_vm_locks_are_safe());
7779 vm_page_lock_queues();
7780 if (vm_page_local_q) {
7781 zpercpu_foreach(lq, vm_page_local_q) {
7782 VPL_LOCK(&lq->vpl_lock);
7783 }
7784 }
7785 }
7786#endif /* MACH_ASSERT || DEBUG */
7787
7788
7789 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
7790
7791 clock_get_uptime(&start);
7792
7793 if (!preflight) {
7794 hibernate_page_list_zero(page_list);
7795 hibernate_page_list_zero(page_list_wired);
7796 hibernate_page_list_zero(page_list_pal);
7797
7798 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
7799 hibernate_stats.cd_pages = pages;
7800 }
7801
7802 if (vm_page_local_q) {
7803 zpercpu_foreach_cpu(lid) {
7804 vm_page_reactivate_local(lid, TRUE, !preflight);
7805 }
7806 }
7807
7808 if (preflight) {
7809 vm_object_lock(compressor_object);
7810 vm_page_lock_queues();
7811 vm_free_page_lock();
7812 }
7813
7814 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
7815
7816 hibernation_vmqueues_inspection = TRUE;
7817
7818 m = (vm_page_t) hibernate_gobble_queue;
7819 while (m) {
7820 pages--;
7821 count_wire--;
7822 if (!preflight) {
7823 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7824 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7825 }
7826 m = m->vmp_snext;
7827 }
7828
7829 if (!preflight) {
7830 percpu_foreach(free_pages_head, free_pages) {
7831 for (m = *free_pages_head; m; m = m->vmp_snext) {
7832 assert(m->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
7833
7834 pages--;
7835 count_wire--;
7836 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7837 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7838
7839 hibernate_stats.cd_local_free++;
7840 hibernate_stats.cd_total_free++;
7841 }
7842 }
7843 }
7844
7845 for (i = 0; i < vm_colors; i++) {
7846 vm_page_queue_iterate(&vm_page_queue_free[i].qhead, m, vmp_pageq) {
7847 assert(m->vmp_q_state == VM_PAGE_ON_FREE_Q);
7848
7849 pages--;
7850 count_wire--;
7851 if (!preflight) {
7852 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7853 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7854
7855 hibernate_stats.cd_total_free++;
7856 }
7857 }
7858 }
7859
7860 vm_page_queue_iterate(&vm_lopage_queue_free, m, vmp_pageq) {
7861 assert(m->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
7862
7863 pages--;
7864 count_wire--;
7865 if (!preflight) {
7866 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7867 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7868
7869 hibernate_stats.cd_total_free++;
7870 }
7871 }
7872
7873 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
7874 while (m && !vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t)m)) {
7875 assert(m->vmp_q_state == VM_PAGE_ON_THROTTLED_Q);
7876
7877 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7878 discard = FALSE;
7879 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7880 && hibernate_consider_discard(m, preflight)) {
7881 if (!preflight) {
7882 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7883 }
7884 count_discard_inactive++;
7885 discard = discard_all;
7886 } else {
7887 count_throttled++;
7888 }
7889 count_wire--;
7890 if (!preflight) {
7891 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7892 }
7893
7894 if (discard) {
7895 hibernate_discard_page(m);
7896 }
7897 m = next;
7898 }
7899
7900 m = (vm_page_t)vm_page_queue_first(&vm_page_queue_anonymous);
7901 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m)) {
7902 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
7903
7904 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7905 discard = FALSE;
7906 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) &&
7907 hibernate_consider_discard(m, preflight)) {
7908 if (!preflight) {
7909 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7910 }
7911 if (m->vmp_dirty) {
7912 count_discard_purgeable++;
7913 } else {
7914 count_discard_inactive++;
7915 }
7916 discard = discard_all;
7917 } else {
7918 count_anonymous++;
7919 }
7920 count_wire--;
7921 if (!preflight) {
7922 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7923 }
7924 if (discard) {
7925 hibernate_discard_page(m);
7926 }
7927 m = next;
7928 }
7929
7930 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
7931 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m)) {
7932 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
7933
7934 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7935 discard = FALSE;
7936 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) &&
7937 hibernate_consider_discard(m, preflight)) {
7938 if (!preflight) {
7939 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7940 }
7941 if (m->vmp_dirty) {
7942 count_discard_purgeable++;
7943 } else {
7944 count_discard_cleaned++;
7945 }
7946 discard = discard_all;
7947 } else {
7948 count_cleaned++;
7949 }
7950 count_wire--;
7951 if (!preflight) {
7952 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7953 }
7954 if (discard) {
7955 hibernate_discard_page(m);
7956 }
7957 m = next;
7958 }
7959
7960 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7961 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m)) {
7962 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
7963
7964 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7965 discard = FALSE;
7966 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode) &&
7967 hibernate_consider_discard(m, preflight)) {
7968 if (!preflight) {
7969 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7970 }
7971 if (m->vmp_dirty) {
7972 count_discard_purgeable++;
7973 } else {
7974 count_discard_active++;
7975 }
7976 discard = discard_all;
7977 } else {
7978 count_active++;
7979 }
7980 count_wire--;
7981 if (!preflight) {
7982 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7983 }
7984 if (discard) {
7985 hibernate_discard_page(m);
7986 }
7987 m = next;
7988 }
7989
7990 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7991 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m)) {
7992 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7993
7994 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7995 discard = FALSE;
7996 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) &&
7997 hibernate_consider_discard(m, preflight)) {
7998 if (!preflight) {
7999 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
8000 }
8001 if (m->vmp_dirty) {
8002 count_discard_purgeable++;
8003 } else {
8004 count_discard_inactive++;
8005 }
8006 discard = discard_all;
8007 } else {
8008 count_inactive++;
8009 }
8010 count_wire--;
8011 if (!preflight) {
8012 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
8013 }
8014 if (discard) {
8015 hibernate_discard_page(m);
8016 }
8017 m = next;
8018 }
8019 /* XXX FBDP TODO: secluded queue */
8020
8021 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
8022 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
8023 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m)) {
8024 assertf(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q,
8025 "Bad page: %p (0x%x:0x%x) on queue %d has state: %d (Discard: %d, Preflight: %d)",
8026 m, m->vmp_pageq.next, m->vmp_pageq.prev, i, m->vmp_q_state, discard, preflight);
8027
8028 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
8029 discard = FALSE;
8030 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) &&
8031 hibernate_consider_discard(m, preflight)) {
8032 if (!preflight) {
8033 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
8034 }
8035 count_discard_speculative++;
8036 discard = discard_all;
8037 } else {
8038 count_speculative++;
8039 }
8040 count_wire--;
8041 if (!preflight) {
8042 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
8043 }
8044 if (discard) {
8045 hibernate_discard_page(m);
8046 }
8047 m = next;
8048 }
8049 }
8050
8051 vm_page_queue_iterate(&compressor_object->memq, m, vmp_listq) {
8052 assert(m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR);
8053
8054 count_compressor++;
8055 count_wire--;
8056 if (!preflight) {
8057 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
8058 }
8059 }
8060
8061
8062 if (preflight == FALSE && discard_all == TRUE) {
8063 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START);
8064
8065 HIBLOG("hibernate_teardown started\n");
8066 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
8067 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
8068
8069 pages -= count_discard_vm_struct_pages;
8070 count_wire -= count_discard_vm_struct_pages;
8071
8072 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
8073
8074 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_END);
8075 }
8076
8077 if (!preflight) {
8078 // pull wired from hibernate_bitmap
8079 bitmap = &page_list->bank_bitmap[0];
8080 bitmap_wired = &page_list_wired->bank_bitmap[0];
8081 for (bank = 0; bank < page_list->bank_count; bank++) {
8082 for (i = 0; i < bitmap->bitmapwords; i++) {
8083 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
8084 }
8085 bitmap = (hibernate_bitmap_t *)&bitmap->bitmap[bitmap->bitmapwords];
8086 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
8087 }
8088 }
8089
8090 // machine dependent adjustments
8091 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
8092
8093 if (!preflight) {
8094 hibernate_stats.cd_count_wire = count_wire;
8095 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
8096 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
8097 }
8098
8099 clock_get_uptime(&end);
8100 absolutetime_to_nanoseconds(end - start, &nsec);
8101 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
8102
8103 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d retired %d\n",
8104 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
8105 discard_all ? "did" : "could",
8106 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned, count_retired);
8107
8108 if (hibernate_stats.cd_skipped_xpmapped) {
8109 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
8110 }
8111
8112 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned - count_retired;
8113
8114 if (preflight && will_discard) {
8115 *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
8116 /*
8117 * We try to keep max HIBERNATE_XPMAPPED_LIMIT pages around in the hibernation image
8118 * even if these are clean and so we need to size the hibernation image accordingly.
8119 *
8120 * NB: We have to assume all HIBERNATE_XPMAPPED_LIMIT pages might show up because 'dirty'
8121 * xpmapped pages aren't distinguishable from other 'dirty' pages in preflight. So we might
8122 * only see part of the xpmapped pages if we look at 'cd_found_xpmapped' which solely tracks
8123 * clean xpmapped pages.
8124 *
8125 * Since these pages are all cleaned by the time we are in the post-preflight phase, we might
8126 * see a much larger number in 'cd_found_xpmapped' now than we did in the preflight phase
8127 */
8128 *pagesOut += HIBERNATE_XPMAPPED_LIMIT;
8129 }
8130
8131 hibernation_vmqueues_inspection = FALSE;
8132
8133#if MACH_ASSERT || DEBUG
8134 if (!preflight) {
8135 if (vm_page_local_q) {
8136 zpercpu_foreach(lq, vm_page_local_q) {
8137 VPL_UNLOCK(&lq->vpl_lock);
8138 }
8139 }
8140 vm_page_unlock_queues();
8141 }
8142#endif /* MACH_ASSERT || DEBUG */
8143
8144 if (preflight) {
8145 vm_free_page_unlock();
8146 vm_page_unlock_queues();
8147 vm_object_unlock(compressor_object);
8148 }
8149
8150 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
8151}
8152
8153void
8154hibernate_page_list_discard(hibernate_page_list_t * page_list)
8155{
8156 uint64_t start, end, nsec;
8157 vm_page_t m;
8158 vm_page_t next;
8159 uint32_t i;
8160 uint32_t count_discard_active = 0;
8161 uint32_t count_discard_inactive = 0;
8162 uint32_t count_discard_purgeable = 0;
8163 uint32_t count_discard_cleaned = 0;
8164 uint32_t count_discard_speculative = 0;
8165
8166
8167#if MACH_ASSERT || DEBUG
8168 vm_page_lock_queues();
8169 if (vm_page_local_q) {
8170 zpercpu_foreach(lq, vm_page_local_q) {
8171 VPL_LOCK(&lq->vpl_lock);
8172 }
8173 }
8174#endif /* MACH_ASSERT || DEBUG */
8175
8176 clock_get_uptime(&start);
8177
8178 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
8179 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m)) {
8180 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
8181
8182 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
8183 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m))) {
8184 if (m->vmp_dirty) {
8185 count_discard_purgeable++;
8186 } else {
8187 count_discard_inactive++;
8188 }
8189 hibernate_discard_page(m);
8190 }
8191 m = next;
8192 }
8193
8194 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
8195 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
8196 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m)) {
8197 assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
8198
8199 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
8200 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m))) {
8201 count_discard_speculative++;
8202 hibernate_discard_page(m);
8203 }
8204 m = next;
8205 }
8206 }
8207
8208 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
8209 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m)) {
8210 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
8211
8212 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
8213 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m))) {
8214 if (m->vmp_dirty) {
8215 count_discard_purgeable++;
8216 } else {
8217 count_discard_inactive++;
8218 }
8219 hibernate_discard_page(m);
8220 }
8221 m = next;
8222 }
8223 /* XXX FBDP TODO: secluded queue */
8224
8225 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
8226 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m)) {
8227 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
8228
8229 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
8230 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m))) {
8231 if (m->vmp_dirty) {
8232 count_discard_purgeable++;
8233 } else {
8234 count_discard_active++;
8235 }
8236 hibernate_discard_page(m);
8237 }
8238 m = next;
8239 }
8240
8241 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
8242 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m)) {
8243 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
8244
8245 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
8246 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m))) {
8247 if (m->vmp_dirty) {
8248 count_discard_purgeable++;
8249 } else {
8250 count_discard_cleaned++;
8251 }
8252 hibernate_discard_page(m);
8253 }
8254 m = next;
8255 }
8256
8257#if MACH_ASSERT || DEBUG
8258 if (vm_page_local_q) {
8259 zpercpu_foreach(lq, vm_page_local_q) {
8260 VPL_UNLOCK(&lq->vpl_lock);
8261 }
8262 }
8263 vm_page_unlock_queues();
8264#endif /* MACH_ASSERT || DEBUG */
8265
8266 clock_get_uptime(&end);
8267 absolutetime_to_nanoseconds(end - start, &nsec);
8268 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
8269 nsec / 1000000ULL,
8270 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
8271}
8272
8273boolean_t hibernate_paddr_map_inited = FALSE;
8274unsigned int hibernate_teardown_last_valid_compact_indx = -1;
8275vm_page_t hibernate_rebuild_hash_list = NULL;
8276
8277unsigned int hibernate_teardown_found_tabled_pages = 0;
8278unsigned int hibernate_teardown_found_created_pages = 0;
8279unsigned int hibernate_teardown_found_free_pages = 0;
8280unsigned int hibernate_teardown_vm_page_free_count;
8281
8282
8283struct ppnum_mapping {
8284 struct ppnum_mapping *ppnm_next;
8285 ppnum_t ppnm_base_paddr;
8286 unsigned int ppnm_sindx;
8287 unsigned int ppnm_eindx;
8288};
8289
8290struct ppnum_mapping *ppnm_head;
8291struct ppnum_mapping *ppnm_last_found = NULL;
8292
8293
8294void
8295hibernate_create_paddr_map(void)
8296{
8297 unsigned int i;
8298 ppnum_t next_ppnum_in_run = 0;
8299 struct ppnum_mapping *ppnm = NULL;
8300
8301 if (hibernate_paddr_map_inited == FALSE) {
8302 for (i = 0; i < vm_pages_count; i++) {
8303 if (ppnm) {
8304 ppnm->ppnm_eindx = i;
8305 }
8306
8307 if (ppnm == NULL || VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) != next_ppnum_in_run) {
8308 ppnm = zalloc_permanent_type(struct ppnum_mapping);
8309
8310 ppnm->ppnm_next = ppnm_head;
8311 ppnm_head = ppnm;
8312
8313 ppnm->ppnm_sindx = i;
8314 ppnm->ppnm_base_paddr = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]);
8315 }
8316 next_ppnum_in_run = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) + 1;
8317 }
8318 ppnm->ppnm_eindx = vm_pages_count;
8319
8320 hibernate_paddr_map_inited = TRUE;
8321 }
8322}
8323
8324ppnum_t
8325hibernate_lookup_paddr(unsigned int indx)
8326{
8327 struct ppnum_mapping *ppnm = NULL;
8328
8329 ppnm = ppnm_last_found;
8330
8331 if (ppnm) {
8332 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
8333 goto done;
8334 }
8335 }
8336 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
8337 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
8338 ppnm_last_found = ppnm;
8339 break;
8340 }
8341 }
8342 if (ppnm == NULL) {
8343 panic("hibernate_lookup_paddr of %d failed", indx);
8344 }
8345done:
8346 return ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx);
8347}
8348
8349
8350uint32_t
8351hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
8352{
8353 addr64_t saddr_aligned;
8354 addr64_t eaddr_aligned;
8355 addr64_t addr;
8356 ppnum_t paddr;
8357 unsigned int mark_as_unneeded_pages = 0;
8358
8359 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
8360 eaddr_aligned = eaddr & ~PAGE_MASK_64;
8361
8362 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
8363 paddr = pmap_find_phys(kernel_pmap, addr);
8364
8365 assert(paddr);
8366
8367 hibernate_page_bitset(page_list, TRUE, paddr);
8368 hibernate_page_bitset(page_list_wired, TRUE, paddr);
8369
8370 mark_as_unneeded_pages++;
8371 }
8372 return mark_as_unneeded_pages;
8373}
8374
8375
8376void
8377hibernate_hash_insert_page(vm_page_t mem)
8378{
8379 vm_page_bucket_t *bucket;
8380 int hash_id;
8381 vm_object_t m_object;
8382
8383 m_object = VM_PAGE_OBJECT(mem);
8384
8385 assert(mem->vmp_hashed);
8386 assert(m_object);
8387 assert(mem->vmp_offset != (vm_object_offset_t) -1);
8388
8389 /*
8390 * Insert it into the object_object/offset hash table
8391 */
8392 hash_id = vm_page_hash(m_object, mem->vmp_offset);
8393 bucket = &vm_page_buckets[hash_id];
8394
8395 mem->vmp_next_m = bucket->page_list;
8396 bucket->page_list = VM_PAGE_PACK_PTR(mem);
8397}
8398
8399
8400void
8401hibernate_free_range(int sindx, int eindx)
8402{
8403 vm_page_t mem;
8404 unsigned int color;
8405
8406 while (sindx < eindx) {
8407 mem = &vm_pages[sindx];
8408
8409 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
8410
8411 mem->vmp_lopage = FALSE;
8412 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
8413
8414 color = VM_PAGE_GET_COLOR(mem);
8415#if defined(__x86_64__)
8416 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead, mem);
8417#else
8418 vm_page_queue_enter(&vm_page_queue_free[color].qhead, mem, vmp_pageq);
8419#endif
8420 vm_page_free_count++;
8421
8422 sindx++;
8423 }
8424}
8425
8426void
8427hibernate_rebuild_vm_structs(void)
8428{
8429 int i, cindx, sindx, eindx;
8430 vm_page_t mem, tmem, mem_next;
8431 AbsoluteTime startTime, endTime;
8432 uint64_t nsec;
8433
8434 if (hibernate_rebuild_needed == FALSE) {
8435 return;
8436 }
8437
8438 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START);
8439 HIBLOG("hibernate_rebuild started\n");
8440
8441 clock_get_uptime(&startTime);
8442
8443 pal_hib_rebuild_pmap_structs();
8444
8445 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
8446 eindx = vm_pages_count;
8447
8448 /*
8449 * Mark all the vm_pages[] that have not been initialized yet as being
8450 * transient. This is needed to ensure that buddy page search is corrrect.
8451 * Without this random data in these vm_pages[] can trip the buddy search
8452 */
8453 for (i = hibernate_teardown_last_valid_compact_indx + 1; i < eindx; ++i) {
8454 vm_pages[i].vmp_q_state = VM_PAGE_NOT_ON_Q;
8455 }
8456
8457 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
8458 mem = &vm_pages[cindx];
8459 assert(mem->vmp_q_state != VM_PAGE_ON_FREE_Q);
8460 /*
8461 * hibernate_teardown_vm_structs leaves the location where
8462 * this vm_page_t must be located in "next".
8463 */
8464 tmem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
8465 mem->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
8466
8467 sindx = (int)(tmem - &vm_pages[0]);
8468
8469 if (mem != tmem) {
8470 /*
8471 * this vm_page_t was moved by hibernate_teardown_vm_structs,
8472 * so move it back to its real location
8473 */
8474 *tmem = *mem;
8475 mem = tmem;
8476 }
8477 if (mem->vmp_hashed) {
8478 hibernate_hash_insert_page(mem);
8479 }
8480 /*
8481 * the 'hole' between this vm_page_t and the previous
8482 * vm_page_t we moved needs to be initialized as
8483 * a range of free vm_page_t's
8484 */
8485 hibernate_free_range(sindx + 1, eindx);
8486
8487 eindx = sindx;
8488 }
8489 if (sindx) {
8490 hibernate_free_range(0, sindx);
8491 }
8492
8493 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
8494
8495 /*
8496 * process the list of vm_page_t's that were entered in the hash,
8497 * but were not located in the vm_pages arrary... these are
8498 * vm_page_t's that were created on the fly (i.e. fictitious)
8499 */
8500 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
8501 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
8502
8503 mem->vmp_next_m = 0;
8504 hibernate_hash_insert_page(mem);
8505 }
8506 hibernate_rebuild_hash_list = NULL;
8507
8508 clock_get_uptime(&endTime);
8509 SUB_ABSOLUTETIME(&endTime, &startTime);
8510 absolutetime_to_nanoseconds(endTime, &nsec);
8511
8512 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
8513
8514 hibernate_rebuild_needed = FALSE;
8515
8516 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END);
8517}
8518
8519uint32_t
8520hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
8521{
8522 unsigned int i;
8523 unsigned int compact_target_indx;
8524 vm_page_t mem, mem_next;
8525 vm_page_bucket_t *bucket;
8526 unsigned int mark_as_unneeded_pages = 0;
8527 unsigned int unneeded_vm_page_bucket_pages = 0;
8528 unsigned int unneeded_vm_pages_pages = 0;
8529 unsigned int unneeded_pmap_pages = 0;
8530 addr64_t start_of_unneeded = 0;
8531 addr64_t end_of_unneeded = 0;
8532
8533
8534 if (hibernate_should_abort()) {
8535 return 0;
8536 }
8537
8538 hibernate_rebuild_needed = TRUE;
8539
8540 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
8541 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
8542 vm_page_cleaned_count, compressor_object->resident_page_count);
8543
8544 for (i = 0; i < vm_page_bucket_count; i++) {
8545 bucket = &vm_page_buckets[i];
8546
8547 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)); mem != VM_PAGE_NULL; mem = mem_next) {
8548 assert(mem->vmp_hashed);
8549
8550 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
8551
8552 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
8553 mem->vmp_next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
8554 hibernate_rebuild_hash_list = mem;
8555 }
8556 }
8557 }
8558 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
8559 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
8560
8561 hibernate_teardown_vm_page_free_count = vm_page_free_count;
8562
8563 compact_target_indx = 0;
8564
8565 for (i = 0; i < vm_pages_count; i++) {
8566 mem = &vm_pages[i];
8567
8568 if (mem->vmp_q_state == VM_PAGE_ON_FREE_Q) {
8569 unsigned int color;
8570
8571 assert(mem->vmp_busy);
8572 assert(!mem->vmp_lopage);
8573
8574 color = VM_PAGE_GET_COLOR(mem);
8575
8576 vm_page_queue_remove(&vm_page_queue_free[color].qhead, mem, vmp_pageq);
8577
8578 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
8579
8580 vm_page_free_count--;
8581
8582 hibernate_teardown_found_free_pages++;
8583
8584 if (vm_pages[compact_target_indx].vmp_q_state != VM_PAGE_ON_FREE_Q) {
8585 compact_target_indx = i;
8586 }
8587 } else {
8588 /*
8589 * record this vm_page_t's original location
8590 * we need this even if it doesn't get moved
8591 * as an indicator to the rebuild function that
8592 * we don't have to move it
8593 */
8594 mem->vmp_next_m = VM_PAGE_PACK_PTR(mem);
8595
8596 if (vm_pages[compact_target_indx].vmp_q_state == VM_PAGE_ON_FREE_Q) {
8597 /*
8598 * we've got a hole to fill, so
8599 * move this vm_page_t to it's new home
8600 */
8601 vm_pages[compact_target_indx] = *mem;
8602 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
8603
8604 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
8605 compact_target_indx++;
8606 } else {
8607 hibernate_teardown_last_valid_compact_indx = i;
8608 }
8609 }
8610 }
8611 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx + 1],
8612 (addr64_t)&vm_pages[vm_pages_count - 1], page_list, page_list_wired);
8613 mark_as_unneeded_pages += unneeded_vm_pages_pages;
8614
8615 pal_hib_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
8616
8617 if (start_of_unneeded) {
8618 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
8619 mark_as_unneeded_pages += unneeded_pmap_pages;
8620 }
8621 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
8622
8623 return mark_as_unneeded_pages;
8624}
8625
8626
8627#endif /* HIBERNATION */
8628
8629/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
8630
8631#include <mach_vm_debug.h>
8632#if MACH_VM_DEBUG
8633
8634#include <mach_debug/hash_info.h>
8635#include <vm/vm_debug.h>
8636
8637/*
8638 * Routine: vm_page_info
8639 * Purpose:
8640 * Return information about the global VP table.
8641 * Fills the buffer with as much information as possible
8642 * and returns the desired size of the buffer.
8643 * Conditions:
8644 * Nothing locked. The caller should provide
8645 * possibly-pageable memory.
8646 */
8647
8648unsigned int
8649vm_page_info(
8650 hash_info_bucket_t *info,
8651 unsigned int count)
8652{
8653 unsigned int i;
8654 lck_spin_t *bucket_lock;
8655
8656 if (vm_page_bucket_count < count) {
8657 count = vm_page_bucket_count;
8658 }
8659
8660 for (i = 0; i < count; i++) {
8661 vm_page_bucket_t *bucket = &vm_page_buckets[i];
8662 unsigned int bucket_count = 0;
8663 vm_page_t m;
8664
8665 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
8666 lck_spin_lock_grp(bucket_lock, &vm_page_lck_grp_bucket);
8667
8668 for (m = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
8669 m != VM_PAGE_NULL;
8670 m = (vm_page_t)(VM_PAGE_UNPACK_PTR(m->vmp_next_m))) {
8671 bucket_count++;
8672 }
8673
8674 lck_spin_unlock(bucket_lock);
8675
8676 /* don't touch pageable memory while holding locks */
8677 info[i].hib_count = bucket_count;
8678 }
8679
8680 return vm_page_bucket_count;
8681}
8682#endif /* MACH_VM_DEBUG */
8683
8684#if VM_PAGE_BUCKETS_CHECK
8685void
8686vm_page_buckets_check(void)
8687{
8688 unsigned int i;
8689 vm_page_t p;
8690 unsigned int p_hash;
8691 vm_page_bucket_t *bucket;
8692 lck_spin_t *bucket_lock;
8693
8694 if (!vm_page_buckets_check_ready) {
8695 return;
8696 }
8697
8698#if HIBERNATION
8699 if (hibernate_rebuild_needed ||
8700 hibernate_rebuild_hash_list) {
8701 panic("BUCKET_CHECK: hibernation in progress: "
8702 "rebuild_needed=%d rebuild_hash_list=%p\n",
8703 hibernate_rebuild_needed,
8704 hibernate_rebuild_hash_list);
8705 }
8706#endif /* HIBERNATION */
8707
8708#if VM_PAGE_FAKE_BUCKETS
8709 char *cp;
8710 for (cp = (char *) vm_page_fake_buckets_start;
8711 cp < (char *) vm_page_fake_buckets_end;
8712 cp++) {
8713 if (*cp != 0x5a) {
8714 panic("BUCKET_CHECK: corruption at %p in fake buckets "
8715 "[0x%llx:0x%llx]\n",
8716 cp,
8717 (uint64_t) vm_page_fake_buckets_start,
8718 (uint64_t) vm_page_fake_buckets_end);
8719 }
8720 }
8721#endif /* VM_PAGE_FAKE_BUCKETS */
8722
8723 for (i = 0; i < vm_page_bucket_count; i++) {
8724 vm_object_t p_object;
8725
8726 bucket = &vm_page_buckets[i];
8727 if (!bucket->page_list) {
8728 continue;
8729 }
8730
8731 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
8732 lck_spin_lock_grp(bucket_lock, &vm_page_lck_grp_bucket);
8733 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
8734
8735 while (p != VM_PAGE_NULL) {
8736 p_object = VM_PAGE_OBJECT(p);
8737
8738 if (!p->vmp_hashed) {
8739 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
8740 "hash %d in bucket %d at %p "
8741 "is not hashed\n",
8742 p, p_object, p->vmp_offset,
8743 p_hash, i, bucket);
8744 }
8745 p_hash = vm_page_hash(p_object, p->vmp_offset);
8746 if (p_hash != i) {
8747 panic("BUCKET_CHECK: corruption in bucket %d "
8748 "at %p: page %p object %p offset 0x%llx "
8749 "hash %d\n",
8750 i, bucket, p, p_object, p->vmp_offset,
8751 p_hash);
8752 }
8753 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_next_m));
8754 }
8755 lck_spin_unlock(bucket_lock);
8756 }
8757
8758// printf("BUCKET_CHECK: checked buckets\n");
8759}
8760#endif /* VM_PAGE_BUCKETS_CHECK */
8761
8762/*
8763 * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
8764 * local queues if they exist... its the only spot in the system where we add pages
8765 * to those queues... once on those queues, those pages can only move to one of the
8766 * global page queues or the free queues... they NEVER move from local q to local q.
8767 * the 'local' state is stable when vm_page_queues_remove is called since we're behind
8768 * the global vm_page_queue_lock at this point... we still need to take the local lock
8769 * in case this operation is being run on a different CPU then the local queue's identity,
8770 * but we don't have to worry about the page moving to a global queue or becoming wired
8771 * while we're grabbing the local lock since those operations would require the global
8772 * vm_page_queue_lock to be held, and we already own it.
8773 *
8774 * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
8775 * 'wired' and local are ALWAYS mutually exclusive conditions.
8776 */
8777
8778void
8779vm_page_queues_remove(vm_page_t mem, boolean_t remove_from_specialq)
8780{
8781 boolean_t was_pageable = TRUE;
8782 vm_object_t m_object;
8783
8784 m_object = VM_PAGE_OBJECT(mem);
8785
8786 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8787
8788 if (mem->vmp_q_state == VM_PAGE_NOT_ON_Q) {
8789 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
8790 if (remove_from_specialq == TRUE) {
8791 vm_page_remove_from_specialq(mem);
8792 }
8793 /*if (mem->vmp_on_specialq != VM_PAGE_SPECIAL_Q_EMPTY) {
8794 * assert(mem->vmp_specialq.next != 0);
8795 * assert(mem->vmp_specialq.prev != 0);
8796 * } else {*/
8797 if (mem->vmp_on_specialq == VM_PAGE_SPECIAL_Q_EMPTY) {
8798 assert(mem->vmp_specialq.next == 0);
8799 assert(mem->vmp_specialq.prev == 0);
8800 }
8801 return;
8802 }
8803
8804 if (mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
8805 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
8806 assert(mem->vmp_specialq.next == 0 &&
8807 mem->vmp_specialq.prev == 0 &&
8808 mem->vmp_on_specialq == VM_PAGE_SPECIAL_Q_EMPTY);
8809 return;
8810 }
8811 if (mem->vmp_q_state == VM_PAGE_IS_WIRED) {
8812 /*
8813 * might put these guys on a list for debugging purposes
8814 * if we do, we'll need to remove this assert
8815 */
8816 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
8817 assert(mem->vmp_specialq.next == 0 &&
8818 mem->vmp_specialq.prev == 0);
8819 /*
8820 * Recall that vmp_on_specialq also means a request to put
8821 * it on the special Q. So we don't want to reset that bit
8822 * just because a wiring request came in. We might want to
8823 * put it on the special queue post-unwiring.
8824 *
8825 * &&
8826 * mem->vmp_on_specialq == VM_PAGE_SPECIAL_Q_EMPTY);
8827 */
8828 return;
8829 }
8830
8831 assert(m_object != compressor_object);
8832 assert(!is_kernel_object(m_object));
8833 assert(!mem->vmp_fictitious);
8834
8835 switch (mem->vmp_q_state) {
8836 case VM_PAGE_ON_ACTIVE_LOCAL_Q:
8837 {
8838 struct vpl *lq;
8839
8840 lq = zpercpu_get_cpu(vm_page_local_q, mem->vmp_local_id);
8841 VPL_LOCK(&lq->vpl_lock);
8842 vm_page_queue_remove(&lq->vpl_queue, mem, vmp_pageq);
8843 mem->vmp_local_id = 0;
8844 lq->vpl_count--;
8845 if (m_object->internal) {
8846 lq->vpl_internal_count--;
8847 } else {
8848 lq->vpl_external_count--;
8849 }
8850 VPL_UNLOCK(&lq->vpl_lock);
8851 was_pageable = FALSE;
8852 break;
8853 }
8854 case VM_PAGE_ON_ACTIVE_Q:
8855 {
8856 vm_page_queue_remove(&vm_page_queue_active, mem, vmp_pageq);
8857 vm_page_active_count--;
8858 break;
8859 }
8860
8861 case VM_PAGE_ON_INACTIVE_INTERNAL_Q:
8862 {
8863 assert(m_object->internal == TRUE);
8864
8865 vm_page_inactive_count--;
8866 vm_page_queue_remove(&vm_page_queue_anonymous, mem, vmp_pageq);
8867 vm_page_anonymous_count--;
8868
8869 vm_purgeable_q_advance_all();
8870 vm_page_balance_inactive(max_to_move: 3);
8871 break;
8872 }
8873
8874 case VM_PAGE_ON_INACTIVE_EXTERNAL_Q:
8875 {
8876 assert(m_object->internal == FALSE);
8877
8878 vm_page_inactive_count--;
8879 vm_page_queue_remove(&vm_page_queue_inactive, mem, vmp_pageq);
8880 vm_purgeable_q_advance_all();
8881 vm_page_balance_inactive(max_to_move: 3);
8882 break;
8883 }
8884
8885 case VM_PAGE_ON_INACTIVE_CLEANED_Q:
8886 {
8887 assert(m_object->internal == FALSE);
8888
8889 vm_page_inactive_count--;
8890 vm_page_queue_remove(&vm_page_queue_cleaned, mem, vmp_pageq);
8891 vm_page_cleaned_count--;
8892 vm_page_balance_inactive(max_to_move: 3);
8893 break;
8894 }
8895
8896 case VM_PAGE_ON_THROTTLED_Q:
8897 {
8898 assert(m_object->internal == TRUE);
8899
8900 vm_page_queue_remove(&vm_page_queue_throttled, mem, vmp_pageq);
8901 vm_page_throttled_count--;
8902 was_pageable = FALSE;
8903 break;
8904 }
8905
8906 case VM_PAGE_ON_SPECULATIVE_Q:
8907 {
8908 assert(m_object->internal == FALSE);
8909
8910 vm_page_remque(elt: &mem->vmp_pageq);
8911 vm_page_speculative_count--;
8912 vm_page_balance_inactive(max_to_move: 3);
8913 break;
8914 }
8915
8916#if CONFIG_SECLUDED_MEMORY
8917 case VM_PAGE_ON_SECLUDED_Q:
8918 {
8919 vm_page_queue_remove(&vm_page_queue_secluded, mem, vmp_pageq);
8920 vm_page_secluded_count--;
8921 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
8922 if (m_object == VM_OBJECT_NULL) {
8923 vm_page_secluded_count_free--;
8924 was_pageable = FALSE;
8925 } else {
8926 assert(!m_object->internal);
8927 vm_page_secluded_count_inuse--;
8928 was_pageable = FALSE;
8929// was_pageable = TRUE;
8930 }
8931 break;
8932 }
8933#endif /* CONFIG_SECLUDED_MEMORY */
8934
8935 default:
8936 {
8937 /*
8938 * if (mem->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)
8939 * NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
8940 * the caller is responsible for determing if the page is on that queue, and if so, must
8941 * either first remove it (it needs both the page queues lock and the object lock to do
8942 * this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
8943 *
8944 * we also don't expect to encounter VM_PAGE_ON_FREE_Q, VM_PAGE_ON_FREE_LOCAL_Q, VM_PAGE_ON_FREE_LOPAGE_Q
8945 * or any of the undefined states
8946 */
8947 panic("vm_page_queues_remove - bad page q_state (%p, %d)", mem, mem->vmp_q_state);
8948 break;
8949 }
8950 }
8951 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
8952 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
8953
8954 if (remove_from_specialq == TRUE) {
8955 vm_page_remove_from_specialq(mem);
8956 }
8957 if (was_pageable) {
8958 if (m_object->internal) {
8959 vm_page_pageable_internal_count--;
8960 } else {
8961 vm_page_pageable_external_count--;
8962 }
8963 }
8964}
8965
8966void
8967vm_page_remove_internal(vm_page_t page)
8968{
8969 vm_object_t __object = VM_PAGE_OBJECT(page);
8970 if (page == __object->memq_hint) {
8971 vm_page_t __new_hint;
8972 vm_page_queue_entry_t __qe;
8973 __qe = (vm_page_queue_entry_t)vm_page_queue_next(&page->vmp_listq);
8974 if (vm_page_queue_end(&__object->memq, __qe)) {
8975 __qe = (vm_page_queue_entry_t)vm_page_queue_prev(&page->vmp_listq);
8976 if (vm_page_queue_end(&__object->memq, __qe)) {
8977 __qe = NULL;
8978 }
8979 }
8980 __new_hint = (vm_page_t)((uintptr_t) __qe);
8981 __object->memq_hint = __new_hint;
8982 }
8983 vm_page_queue_remove(&__object->memq, page, vmp_listq);
8984#if CONFIG_SECLUDED_MEMORY
8985 if (__object->eligible_for_secluded) {
8986 vm_page_secluded.eligible_for_secluded--;
8987 }
8988#endif /* CONFIG_SECLUDED_MEMORY */
8989}
8990
8991void
8992vm_page_enqueue_inactive(vm_page_t mem, boolean_t first)
8993{
8994 vm_object_t m_object;
8995
8996 m_object = VM_PAGE_OBJECT(mem);
8997
8998 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8999 assert(!mem->vmp_fictitious);
9000 assert(!mem->vmp_laundry);
9001 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
9002 vm_page_check_pageable_safe(page: mem);
9003
9004 if (m_object->internal) {
9005 mem->vmp_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
9006
9007 if (first == TRUE) {
9008 vm_page_queue_enter_first(&vm_page_queue_anonymous, mem, vmp_pageq);
9009 } else {
9010 vm_page_queue_enter(&vm_page_queue_anonymous, mem, vmp_pageq);
9011 }
9012
9013 vm_page_anonymous_count++;
9014 vm_page_pageable_internal_count++;
9015 } else {
9016 mem->vmp_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
9017
9018 if (first == TRUE) {
9019 vm_page_queue_enter_first(&vm_page_queue_inactive, mem, vmp_pageq);
9020 } else {
9021 vm_page_queue_enter(&vm_page_queue_inactive, mem, vmp_pageq);
9022 }
9023
9024 vm_page_pageable_external_count++;
9025 }
9026 vm_page_inactive_count++;
9027 token_new_pagecount++;
9028
9029 vm_page_add_to_specialq(mem, FALSE);
9030}
9031
9032void
9033vm_page_enqueue_active(vm_page_t mem, boolean_t first)
9034{
9035 vm_object_t m_object;
9036
9037 m_object = VM_PAGE_OBJECT(mem);
9038
9039 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
9040 assert(!mem->vmp_fictitious);
9041 assert(!mem->vmp_laundry);
9042 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
9043 vm_page_check_pageable_safe(page: mem);
9044
9045 mem->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
9046 if (first == TRUE) {
9047 vm_page_queue_enter_first(&vm_page_queue_active, mem, vmp_pageq);
9048 } else {
9049 vm_page_queue_enter(&vm_page_queue_active, mem, vmp_pageq);
9050 }
9051 vm_page_active_count++;
9052
9053 if (m_object->internal) {
9054 vm_page_pageable_internal_count++;
9055 } else {
9056 vm_page_pageable_external_count++;
9057 }
9058
9059 vm_page_add_to_specialq(mem, FALSE);
9060 vm_page_balance_inactive(max_to_move: 3);
9061}
9062
9063/*
9064 * Pages from special kernel objects shouldn't
9065 * be placed on pageable queues.
9066 */
9067void
9068vm_page_check_pageable_safe(vm_page_t page)
9069{
9070 vm_object_t page_object;
9071
9072 page_object = VM_PAGE_OBJECT(page);
9073
9074 if (is_kernel_object(page_object)) {
9075 panic("vm_page_check_pageable_safe: trying to add page"
9076 "from kernel object (%p) to pageable queue", page_object);
9077 }
9078
9079 if (page_object == compressor_object) {
9080 panic("vm_page_check_pageable_safe: trying to add page"
9081 "from compressor object (%p) to pageable queue", compressor_object);
9082 }
9083}
9084
9085/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
9086* wired page diagnose
9087* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
9088
9089#include <libkern/OSKextLibPrivate.h>
9090
9091#define KA_SIZE(namelen, subtotalscount) \
9092 (sizeof(struct vm_allocation_site) + (namelen) + 1 + ((subtotalscount) * sizeof(struct vm_allocation_total)))
9093
9094#define KA_NAME(alloc) \
9095 ((char *)(&(alloc)->subtotals[(alloc->subtotalscount)]))
9096
9097#define KA_NAME_LEN(alloc) \
9098 (VM_TAG_NAME_LEN_MAX & (alloc->flags >> VM_TAG_NAME_LEN_SHIFT))
9099
9100vm_tag_t
9101vm_tag_bt(void)
9102{
9103 uintptr_t* frameptr;
9104 uintptr_t* frameptr_next;
9105 uintptr_t retaddr;
9106 uintptr_t kstackb, kstackt;
9107 const vm_allocation_site_t * site;
9108 thread_t cthread;
9109 kern_allocation_name_t name;
9110
9111 cthread = current_thread();
9112 if (__improbable(cthread == NULL)) {
9113 return VM_KERN_MEMORY_OSFMK;
9114 }
9115
9116 if ((name = thread_get_kernel_state(cthread)->allocation_name)) {
9117 if (!name->tag) {
9118 vm_tag_alloc(site: name);
9119 }
9120 return name->tag;
9121 }
9122
9123 kstackb = cthread->kernel_stack;
9124 kstackt = kstackb + kernel_stack_size;
9125
9126 /* Load stack frame pointer (EBP on x86) into frameptr */
9127 frameptr = __builtin_frame_address(0);
9128 site = NULL;
9129 while (frameptr != NULL) {
9130 /* Verify thread stack bounds */
9131 if (((uintptr_t)(frameptr + 2) > kstackt) || ((uintptr_t)frameptr < kstackb)) {
9132 break;
9133 }
9134
9135 /* Next frame pointer is pointed to by the previous one */
9136 frameptr_next = (uintptr_t*) *frameptr;
9137#if defined(HAS_APPLE_PAC)
9138 frameptr_next = ptrauth_strip(frameptr_next, ptrauth_key_frame_pointer);
9139#endif
9140
9141 /* Pull return address from one spot above the frame pointer */
9142 retaddr = *(frameptr + 1);
9143
9144#if defined(HAS_APPLE_PAC)
9145 retaddr = (uintptr_t) ptrauth_strip((void *)retaddr, ptrauth_key_return_address);
9146#endif
9147
9148 if (((retaddr < vm_kernel_builtinkmod_text_end) && (retaddr >= vm_kernel_builtinkmod_text))
9149 || (retaddr < vm_kernel_stext) || (retaddr > vm_kernel_top)) {
9150 site = OSKextGetAllocationSiteForCaller(address: retaddr);
9151 break;
9152 }
9153 frameptr = frameptr_next;
9154 }
9155
9156 return site ? site->tag : VM_KERN_MEMORY_NONE;
9157}
9158
9159static uint64_t free_tag_bits[VM_MAX_TAG_VALUE / 64];
9160
9161void
9162vm_tag_alloc_locked(vm_allocation_site_t * site, vm_allocation_site_t ** releasesiteP)
9163{
9164 vm_tag_t tag;
9165 uint64_t avail;
9166 uint32_t idx;
9167 vm_allocation_site_t * prev;
9168
9169 if (site->tag) {
9170 return;
9171 }
9172
9173 idx = 0;
9174 while (TRUE) {
9175 avail = free_tag_bits[idx];
9176 if (avail) {
9177 tag = (vm_tag_t)__builtin_clzll(avail);
9178 avail &= ~(1ULL << (63 - tag));
9179 free_tag_bits[idx] = avail;
9180 tag += (idx << 6);
9181 break;
9182 }
9183 idx++;
9184 if (idx >= ARRAY_COUNT(free_tag_bits)) {
9185 for (idx = 0; idx < ARRAY_COUNT(vm_allocation_sites); idx++) {
9186 prev = vm_allocation_sites[idx];
9187 if (!prev) {
9188 continue;
9189 }
9190 if (!KA_NAME_LEN(prev)) {
9191 continue;
9192 }
9193 if (!prev->tag) {
9194 continue;
9195 }
9196 if (prev->total) {
9197 continue;
9198 }
9199 if (1 != prev->refcount) {
9200 continue;
9201 }
9202
9203 assert(idx == prev->tag);
9204 tag = (vm_tag_t)idx;
9205 prev->tag = VM_KERN_MEMORY_NONE;
9206 *releasesiteP = prev;
9207 break;
9208 }
9209 if (idx >= ARRAY_COUNT(vm_allocation_sites)) {
9210 tag = VM_KERN_MEMORY_ANY;
9211 }
9212 break;
9213 }
9214 }
9215 site->tag = tag;
9216
9217 OSAddAtomic16(amount: 1, address: &site->refcount);
9218
9219 if (VM_KERN_MEMORY_ANY != tag) {
9220 vm_allocation_sites[tag] = site;
9221 }
9222
9223 if (tag > vm_allocation_tag_highest) {
9224 vm_allocation_tag_highest = tag;
9225 }
9226}
9227
9228static void
9229vm_tag_free_locked(vm_tag_t tag)
9230{
9231 uint64_t avail;
9232 uint32_t idx;
9233 uint64_t bit;
9234
9235 if (VM_KERN_MEMORY_ANY == tag) {
9236 return;
9237 }
9238
9239 idx = (tag >> 6);
9240 avail = free_tag_bits[idx];
9241 tag &= 63;
9242 bit = (1ULL << (63 - tag));
9243 assert(!(avail & bit));
9244 free_tag_bits[idx] = (avail | bit);
9245}
9246
9247static void
9248vm_tag_init(void)
9249{
9250 vm_tag_t tag;
9251 for (tag = VM_KERN_MEMORY_FIRST_DYNAMIC; tag < VM_KERN_MEMORY_ANY; tag++) {
9252 vm_tag_free_locked(tag);
9253 }
9254
9255 for (tag = VM_KERN_MEMORY_ANY + 1; tag < VM_MAX_TAG_VALUE; tag++) {
9256 vm_tag_free_locked(tag);
9257 }
9258}
9259
9260vm_tag_t
9261vm_tag_alloc(vm_allocation_site_t * site)
9262{
9263 vm_allocation_site_t * releasesite;
9264
9265 if (!site->tag) {
9266 releasesite = NULL;
9267 lck_ticket_lock(tlock: &vm_allocation_sites_lock, grp: &vm_page_lck_grp_bucket);
9268 vm_tag_alloc_locked(site, releasesiteP: &releasesite);
9269 lck_ticket_unlock(tlock: &vm_allocation_sites_lock);
9270 if (releasesite) {
9271 kern_allocation_name_release(allocation: releasesite);
9272 }
9273 }
9274
9275 return site->tag;
9276}
9277
9278#if VM_BTLOG_TAGS
9279#define VM_KERN_MEMORY_STR_MAX_LEN (32)
9280TUNABLE_STR(vmtaglog, VM_KERN_MEMORY_STR_MAX_LEN, "vmtaglog", "");
9281#define VM_TAG_BTLOG_SIZE (16u << 10)
9282
9283btlog_t vmtaglog_btlog;
9284vm_tag_t vmtaglog_tag;
9285
9286static void
9287vm_tag_log(vm_object_t object, int64_t delta, void *fp)
9288{
9289 if (is_kernel_object(object)) {
9290 /* kernel object backtraces are tracked in vm entries */
9291 return;
9292 }
9293 if (delta > 0) {
9294 btref_t ref = btref_get(fp, BTREF_GET_NOWAIT);
9295 btlog_record(vmtaglog_btlog, object, 0, ref);
9296 } else if (object->wired_page_count == 0) {
9297 btlog_erase(vmtaglog_btlog, object);
9298 }
9299}
9300
9301#ifndef ARRAY_SIZE
9302#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
9303#endif /* ARRAY_SIZE */
9304#define VM_KERN_MEMORY_ELEM(name) [VM_KERN_MEMORY_##name] = #name
9305const char *vm_kern_memory_strs[] = {
9306 VM_KERN_MEMORY_ELEM(OSFMK),
9307 VM_KERN_MEMORY_ELEM(BSD),
9308 VM_KERN_MEMORY_ELEM(IOKIT),
9309 VM_KERN_MEMORY_ELEM(LIBKERN),
9310 VM_KERN_MEMORY_ELEM(OSKEXT),
9311 VM_KERN_MEMORY_ELEM(KEXT),
9312 VM_KERN_MEMORY_ELEM(IPC),
9313 VM_KERN_MEMORY_ELEM(STACK),
9314 VM_KERN_MEMORY_ELEM(CPU),
9315 VM_KERN_MEMORY_ELEM(PMAP),
9316 VM_KERN_MEMORY_ELEM(PTE),
9317 VM_KERN_MEMORY_ELEM(ZONE),
9318 VM_KERN_MEMORY_ELEM(KALLOC),
9319 VM_KERN_MEMORY_ELEM(COMPRESSOR),
9320 VM_KERN_MEMORY_ELEM(COMPRESSED_DATA),
9321 VM_KERN_MEMORY_ELEM(PHANTOM_CACHE),
9322 VM_KERN_MEMORY_ELEM(WAITQ),
9323 VM_KERN_MEMORY_ELEM(DIAG),
9324 VM_KERN_MEMORY_ELEM(LOG),
9325 VM_KERN_MEMORY_ELEM(FILE),
9326 VM_KERN_MEMORY_ELEM(MBUF),
9327 VM_KERN_MEMORY_ELEM(UBC),
9328 VM_KERN_MEMORY_ELEM(SECURITY),
9329 VM_KERN_MEMORY_ELEM(MLOCK),
9330 VM_KERN_MEMORY_ELEM(REASON),
9331 VM_KERN_MEMORY_ELEM(SKYWALK),
9332 VM_KERN_MEMORY_ELEM(LTABLE),
9333 VM_KERN_MEMORY_ELEM(HV),
9334 VM_KERN_MEMORY_ELEM(KALLOC_DATA),
9335 VM_KERN_MEMORY_ELEM(RETIRED),
9336 VM_KERN_MEMORY_ELEM(KALLOC_TYPE),
9337 VM_KERN_MEMORY_ELEM(TRIAGE),
9338 VM_KERN_MEMORY_ELEM(RECOUNT),
9339};
9340
9341static vm_tag_t
9342vm_tag_str_to_idx(char tagstr[VM_KERN_MEMORY_STR_MAX_LEN])
9343{
9344 for (vm_tag_t i = VM_KERN_MEMORY_OSFMK; i < ARRAY_SIZE(vm_kern_memory_strs); i++) {
9345 if (!strncmp(vm_kern_memory_strs[i], tagstr, VM_KERN_MEMORY_STR_MAX_LEN)) {
9346 return i;
9347 }
9348 }
9349
9350 printf("Unable to find vm tag %s for btlog\n", tagstr);
9351 return VM_KERN_MEMORY_NONE;
9352}
9353
9354__startup_func
9355static void
9356vm_btlog_init(void)
9357{
9358 vmtaglog_tag = vm_tag_str_to_idx(vmtaglog);
9359
9360 if (vmtaglog_tag != VM_KERN_MEMORY_NONE) {
9361 vmtaglog_btlog = btlog_create(BTLOG_HASH, VM_TAG_BTLOG_SIZE, 0);
9362 }
9363}
9364STARTUP(ZALLOC, STARTUP_RANK_FIRST, vm_btlog_init);
9365#endif /* VM_BTLOG_TAGS */
9366
9367void
9368vm_tag_update_size(vm_tag_t tag, int64_t delta, vm_object_t object)
9369{
9370 assert(VM_KERN_MEMORY_NONE != tag && tag < VM_MAX_TAG_VALUE);
9371
9372 kern_allocation_update_size(allocation: vm_allocation_sites[tag], delta, object);
9373}
9374
9375uint64_t
9376vm_tag_get_size(vm_tag_t tag)
9377{
9378 vm_allocation_site_t *allocation;
9379
9380 assert(VM_KERN_MEMORY_NONE != tag && tag < VM_MAX_TAG_VALUE);
9381
9382 allocation = vm_allocation_sites[tag];
9383 return allocation ? os_atomic_load(&allocation->total, relaxed) : 0;
9384}
9385
9386void
9387kern_allocation_update_size(kern_allocation_name_t allocation, int64_t delta, __unused vm_object_t object)
9388{
9389 uint64_t value;
9390
9391 value = os_atomic_add(&allocation->total, delta, relaxed);
9392 if (delta < 0) {
9393 assertf(value + (uint64_t)-delta > value,
9394 "tag %d, site %p", allocation->tag, allocation);
9395 }
9396
9397#if DEBUG || DEVELOPMENT
9398 if (value > allocation->peak) {
9399 os_atomic_max(&allocation->peak, value, relaxed);
9400 }
9401#endif /* DEBUG || DEVELOPMENT */
9402
9403 if (value == (uint64_t)delta && !allocation->tag) {
9404 vm_tag_alloc(site: allocation);
9405 }
9406
9407#if VM_BTLOG_TAGS
9408 if (vmtaglog_tag && (allocation->tag == vmtaglog_tag) && object) {
9409 vm_tag_log(object, delta, __builtin_frame_address(0));
9410 }
9411#endif /* VM_BTLOG_TAGS */
9412}
9413
9414#if VM_TAG_SIZECLASSES
9415
9416void
9417vm_allocation_zones_init(void)
9418{
9419 vm_offset_t addr;
9420 vm_size_t size;
9421
9422 const vm_tag_t early_tags[] = {
9423 VM_KERN_MEMORY_DIAG,
9424 VM_KERN_MEMORY_KALLOC,
9425 VM_KERN_MEMORY_KALLOC_DATA,
9426 VM_KERN_MEMORY_KALLOC_TYPE,
9427 VM_KERN_MEMORY_LIBKERN,
9428 VM_KERN_MEMORY_OSFMK,
9429 VM_KERN_MEMORY_RECOUNT,
9430 };
9431
9432 size = VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t * *)
9433 + ARRAY_COUNT(early_tags) * VM_TAG_SIZECLASSES * sizeof(vm_allocation_zone_total_t);
9434
9435 kmem_alloc(kernel_map, &addr, round_page(size),
9436 KMA_NOFAIL | KMA_KOBJECT | KMA_ZERO | KMA_PERMANENT,
9437 VM_KERN_MEMORY_DIAG);
9438
9439 vm_allocation_zone_totals = (vm_allocation_zone_total_t **) addr;
9440 addr += VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t * *);
9441
9442 // prepopulate early tag ranges so allocations
9443 // in vm_tag_update_zone_size() and early boot won't recurse
9444 for (size_t i = 0; i < ARRAY_COUNT(early_tags); i++) {
9445 vm_allocation_zone_totals[early_tags[i]] = (vm_allocation_zone_total_t *)addr;
9446 addr += VM_TAG_SIZECLASSES * sizeof(vm_allocation_zone_total_t);
9447 }
9448}
9449
9450__attribute__((noinline))
9451static vm_tag_t
9452vm_tag_zone_stats_alloc(vm_tag_t tag, zalloc_flags_t flags)
9453{
9454 vm_allocation_zone_total_t *stats;
9455 vm_size_t size = sizeof(*stats) * VM_TAG_SIZECLASSES;
9456
9457 flags = Z_VM_TAG(Z_ZERO | flags, VM_KERN_MEMORY_DIAG);
9458 stats = kalloc_data(size, flags);
9459 if (!stats) {
9460 return VM_KERN_MEMORY_NONE;
9461 }
9462 if (!os_atomic_cmpxchg(&vm_allocation_zone_totals[tag], NULL, stats, release)) {
9463 kfree_data(stats, size);
9464 }
9465 return tag;
9466}
9467
9468vm_tag_t
9469vm_tag_will_update_zone(vm_tag_t tag, uint32_t zidx, uint32_t zflags)
9470{
9471 assert(VM_KERN_MEMORY_NONE != tag);
9472 assert(tag < VM_MAX_TAG_VALUE);
9473
9474 if (zidx >= VM_TAG_SIZECLASSES) {
9475 return VM_KERN_MEMORY_NONE;
9476 }
9477
9478 if (__probable(vm_allocation_zone_totals[tag])) {
9479 return tag;
9480 }
9481 return vm_tag_zone_stats_alloc(tag, zflags);
9482}
9483
9484void
9485vm_tag_update_zone_size(vm_tag_t tag, uint32_t zidx, long delta)
9486{
9487 vm_allocation_zone_total_t *stats;
9488 vm_size_t value;
9489
9490 assert(VM_KERN_MEMORY_NONE != tag);
9491 assert(tag < VM_MAX_TAG_VALUE);
9492
9493 if (zidx >= VM_TAG_SIZECLASSES) {
9494 return;
9495 }
9496
9497 stats = vm_allocation_zone_totals[tag];
9498 assert(stats);
9499 stats += zidx;
9500
9501 value = os_atomic_add(&stats->vazt_total, delta, relaxed);
9502 if (delta < 0) {
9503 assertf((long)value >= 0, "zidx %d, tag %d, %p", zidx, tag, stats);
9504 return;
9505 } else if (os_atomic_load(&stats->vazt_peak, relaxed) < value) {
9506 os_atomic_max(&stats->vazt_peak, value, relaxed);
9507 }
9508}
9509
9510#endif /* VM_TAG_SIZECLASSES */
9511
9512void
9513kern_allocation_update_subtotal(kern_allocation_name_t allocation, uint32_t subtag, int64_t delta)
9514{
9515 kern_allocation_name_t other;
9516 struct vm_allocation_total * total;
9517 uint32_t subidx;
9518
9519 assert(VM_KERN_MEMORY_NONE != subtag);
9520 lck_ticket_lock(tlock: &vm_allocation_sites_lock, grp: &vm_page_lck_grp_bucket);
9521 for (subidx = 0; subidx < allocation->subtotalscount; subidx++) {
9522 total = &allocation->subtotals[subidx];
9523 if (subtag == total->tag) {
9524 break;
9525 }
9526 }
9527 if (subidx >= allocation->subtotalscount) {
9528 for (subidx = 0; subidx < allocation->subtotalscount; subidx++) {
9529 total = &allocation->subtotals[subidx];
9530 if ((VM_KERN_MEMORY_NONE == total->tag)
9531 || !total->total) {
9532 total->tag = (vm_tag_t)subtag;
9533 break;
9534 }
9535 }
9536 }
9537 assert(subidx < allocation->subtotalscount);
9538 if (subidx >= allocation->subtotalscount) {
9539 lck_ticket_unlock(tlock: &vm_allocation_sites_lock);
9540 return;
9541 }
9542 if (delta < 0) {
9543 assertf(total->total >= ((uint64_t)-delta), "name %p", allocation);
9544 }
9545 OSAddAtomic64(delta, &total->total);
9546 lck_ticket_unlock(tlock: &vm_allocation_sites_lock);
9547
9548 other = vm_allocation_sites[subtag];
9549 assert(other);
9550 if (delta < 0) {
9551 assertf(other->mapped >= ((uint64_t)-delta), "other %p", other);
9552 }
9553 OSAddAtomic64(delta, &other->mapped);
9554}
9555
9556const char *
9557kern_allocation_get_name(kern_allocation_name_t allocation)
9558{
9559 return KA_NAME(allocation);
9560}
9561
9562kern_allocation_name_t
9563kern_allocation_name_allocate(const char * name, uint16_t subtotalscount)
9564{
9565 kern_allocation_name_t allocation;
9566 uint16_t namelen;
9567
9568 namelen = (uint16_t)strnlen(s: name, MACH_MEMORY_INFO_NAME_MAX_LEN - 1);
9569
9570 allocation = kalloc_data(KA_SIZE(namelen, subtotalscount), Z_WAITOK | Z_ZERO);
9571 allocation->refcount = 1;
9572 allocation->subtotalscount = subtotalscount;
9573 allocation->flags = (uint16_t)(namelen << VM_TAG_NAME_LEN_SHIFT);
9574 strlcpy(KA_NAME(allocation), src: name, n: namelen + 1);
9575
9576 vm_tag_alloc(site: allocation);
9577 return allocation;
9578}
9579
9580void
9581kern_allocation_name_release(kern_allocation_name_t allocation)
9582{
9583 assert(allocation->refcount > 0);
9584 if (1 == OSAddAtomic16(amount: -1, address: &allocation->refcount)) {
9585 kfree_data(allocation,
9586 KA_SIZE(KA_NAME_LEN(allocation), allocation->subtotalscount));
9587 }
9588}
9589
9590vm_tag_t
9591kern_allocation_name_get_vm_tag(kern_allocation_name_t allocation)
9592{
9593 return vm_tag_alloc(site: allocation);
9594}
9595
9596#if !VM_TAG_ACTIVE_UPDATE
9597static void
9598vm_page_count_object(mach_memory_info_t * info, unsigned int __unused num_info, vm_object_t object)
9599{
9600 if (!object->wired_page_count) {
9601 return;
9602 }
9603 if (!is_kernel_object(object)) {
9604 assert(object->wire_tag < num_info);
9605 info[object->wire_tag].size += ptoa_64(object->wired_page_count);
9606 }
9607}
9608
9609typedef void (*vm_page_iterate_proc)(mach_memory_info_t * info,
9610 unsigned int num_info, vm_object_t object);
9611
9612static void
9613vm_page_iterate_purgeable_objects(mach_memory_info_t * info, unsigned int num_info,
9614 vm_page_iterate_proc proc, purgeable_q_t queue,
9615 int group)
9616{
9617 vm_object_t object;
9618
9619 for (object = (vm_object_t) queue_first(&queue->objq[group]);
9620 !queue_end(&queue->objq[group], (queue_entry_t) object);
9621 object = (vm_object_t) queue_next(&object->objq)) {
9622 proc(info, num_info, object);
9623 }
9624}
9625
9626static void
9627vm_page_iterate_objects(mach_memory_info_t * info, unsigned int num_info,
9628 vm_page_iterate_proc proc)
9629{
9630 vm_object_t object;
9631
9632 lck_spin_lock_grp(&vm_objects_wired_lock, &vm_page_lck_grp_bucket);
9633 queue_iterate(&vm_objects_wired,
9634 object,
9635 vm_object_t,
9636 wired_objq)
9637 {
9638 proc(info, num_info, object);
9639 }
9640 lck_spin_unlock(&vm_objects_wired_lock);
9641}
9642#endif /* ! VM_TAG_ACTIVE_UPDATE */
9643
9644static uint64_t
9645process_account(mach_memory_info_t * info, unsigned int num_info,
9646 uint64_t zones_collectable_bytes, boolean_t iterated, bool redact_info __unused)
9647{
9648 size_t namelen;
9649 unsigned int idx, count, nextinfo;
9650 vm_allocation_site_t * site;
9651 lck_ticket_lock(tlock: &vm_allocation_sites_lock, grp: &vm_page_lck_grp_bucket);
9652
9653 for (idx = 0; idx <= vm_allocation_tag_highest; idx++) {
9654 site = vm_allocation_sites[idx];
9655 if (!site) {
9656 continue;
9657 }
9658 info[idx].mapped = site->mapped;
9659 info[idx].tag = site->tag;
9660 if (!iterated) {
9661 info[idx].size = site->total;
9662#if DEBUG || DEVELOPMENT
9663 info[idx].peak = site->peak;
9664#endif /* DEBUG || DEVELOPMENT */
9665 } else {
9666 if (!site->subtotalscount && (site->total != info[idx].size)) {
9667 printf(format: "tag mismatch[%d] 0x%qx, iter 0x%qx\n", idx, site->total, info[idx].size);
9668 info[idx].size = site->total;
9669 }
9670 }
9671 info[idx].flags |= VM_KERN_SITE_WIRED;
9672 if (idx < VM_KERN_MEMORY_FIRST_DYNAMIC) {
9673 info[idx].site = idx;
9674 info[idx].flags |= VM_KERN_SITE_TAG;
9675 if (VM_KERN_MEMORY_ZONE == idx) {
9676 info[idx].flags |= VM_KERN_SITE_HIDE;
9677 info[idx].flags &= ~VM_KERN_SITE_WIRED;
9678 info[idx].collectable_bytes = zones_collectable_bytes;
9679 }
9680 } else if ((namelen = (VM_TAG_NAME_LEN_MAX & (site->flags >> VM_TAG_NAME_LEN_SHIFT)))) {
9681 info[idx].site = 0;
9682 info[idx].flags |= VM_KERN_SITE_NAMED;
9683 if (namelen > sizeof(info[idx].name)) {
9684 namelen = sizeof(info[idx].name);
9685 }
9686 strncpy(&info[idx].name[0], KA_NAME(site), namelen);
9687 } else if (VM_TAG_KMOD & site->flags) {
9688 info[idx].site = OSKextGetKmodIDForSite(site, NULL, namelen: 0);
9689 info[idx].flags |= VM_KERN_SITE_KMOD;
9690 } else {
9691 info[idx].site = VM_KERNEL_UNSLIDE(site);
9692 info[idx].flags |= VM_KERN_SITE_KERNEL;
9693 }
9694 }
9695
9696 nextinfo = (vm_allocation_tag_highest + 1);
9697 count = nextinfo;
9698 if (count >= num_info) {
9699 count = num_info;
9700 }
9701
9702 for (idx = 0; idx < count; idx++) {
9703 site = vm_allocation_sites[idx];
9704 if (!site) {
9705 continue;
9706 }
9707#if VM_TAG_SIZECLASSES
9708 vm_allocation_zone_total_t * zone;
9709 unsigned int zidx;
9710
9711 if (!redact_info
9712 && vm_allocation_zone_totals
9713 && (zone = vm_allocation_zone_totals[idx])
9714 && (nextinfo < num_info)) {
9715 for (zidx = 0; zidx < VM_TAG_SIZECLASSES; zidx++) {
9716 if (!zone[zidx].vazt_peak) {
9717 continue;
9718 }
9719 info[nextinfo] = info[idx];
9720 info[nextinfo].zone = zone_index_from_tag_index(zidx);
9721 info[nextinfo].flags &= ~VM_KERN_SITE_WIRED;
9722 info[nextinfo].flags |= VM_KERN_SITE_ZONE;
9723 info[nextinfo].flags |= VM_KERN_SITE_KALLOC;
9724 info[nextinfo].size = zone[zidx].vazt_total;
9725 info[nextinfo].peak = zone[zidx].vazt_peak;
9726 info[nextinfo].mapped = 0;
9727 nextinfo++;
9728 }
9729 }
9730#endif /* VM_TAG_SIZECLASSES */
9731 if (site->subtotalscount) {
9732 uint64_t mapped, mapcost, take;
9733 uint32_t sub;
9734 vm_tag_t alloctag;
9735
9736 info[idx].size = site->total;
9737 mapped = info[idx].size;
9738 info[idx].mapped = mapped;
9739 mapcost = 0;
9740 for (sub = 0; sub < site->subtotalscount; sub++) {
9741 alloctag = site->subtotals[sub].tag;
9742 assert(alloctag < num_info);
9743 if (info[alloctag].name[0]) {
9744 continue;
9745 }
9746 take = site->subtotals[sub].total;
9747 if (take > info[alloctag].size) {
9748 take = info[alloctag].size;
9749 }
9750 if (take > mapped) {
9751 take = mapped;
9752 }
9753 info[alloctag].mapped -= take;
9754 info[alloctag].size -= take;
9755 mapped -= take;
9756 mapcost += take;
9757 }
9758 info[idx].size = mapcost;
9759 }
9760 }
9761 lck_ticket_unlock(tlock: &vm_allocation_sites_lock);
9762
9763 return 0;
9764}
9765
9766uint32_t
9767vm_page_diagnose_estimate(void)
9768{
9769 vm_allocation_site_t * site;
9770 uint32_t count = zone_view_count;
9771 uint32_t idx;
9772
9773 lck_ticket_lock(tlock: &vm_allocation_sites_lock, grp: &vm_page_lck_grp_bucket);
9774 for (idx = 0; idx < VM_MAX_TAG_VALUE; idx++) {
9775 site = vm_allocation_sites[idx];
9776 if (!site) {
9777 continue;
9778 }
9779 count++;
9780#if VM_TAG_SIZECLASSES
9781 if (vm_allocation_zone_totals) {
9782 vm_allocation_zone_total_t * zone;
9783 zone = vm_allocation_zone_totals[idx];
9784 if (!zone) {
9785 continue;
9786 }
9787 for (uint32_t zidx = 0; zidx < VM_TAG_SIZECLASSES; zidx++) {
9788 count += (zone[zidx].vazt_peak != 0);
9789 }
9790 }
9791#endif
9792 }
9793 lck_ticket_unlock(tlock: &vm_allocation_sites_lock);
9794
9795 /* some slop for new tags created */
9796 count += 8;
9797 count += VM_KERN_COUNTER_COUNT;
9798
9799 return count;
9800}
9801
9802static void
9803vm_page_diagnose_zone_stats(mach_memory_info_t *info, zone_stats_t zstats,
9804 bool percpu)
9805{
9806 zpercpu_foreach(zs, zstats) {
9807 info->size += zs->zs_mem_allocated - zs->zs_mem_freed;
9808 }
9809 if (percpu) {
9810 info->size *= zpercpu_count();
9811 }
9812 info->flags |= VM_KERN_SITE_NAMED | VM_KERN_SITE_ZONE_VIEW;
9813}
9814
9815static void
9816vm_page_add_info(
9817 mach_memory_info_t *info,
9818 zone_stats_t stats,
9819 bool per_cpu,
9820 const char *parent_heap_name,
9821 const char *parent_zone_name,
9822 const char *view_name)
9823{
9824 vm_page_diagnose_zone_stats(info, zstats: stats, percpu: per_cpu);
9825 snprintf(info->name, sizeof(info->name),
9826 "%s%s[%s]", parent_heap_name, parent_zone_name, view_name);
9827}
9828
9829static void
9830vm_page_diagnose_zone(mach_memory_info_t *info, zone_t z)
9831{
9832 vm_page_add_info(info, stats: z->z_stats, per_cpu: z->z_percpu, parent_heap_name: zone_heap_name(zone: z),
9833 parent_zone_name: z->z_name, view_name: "raw");
9834}
9835
9836static void
9837vm_page_add_view(
9838 mach_memory_info_t *info,
9839 zone_stats_t stats,
9840 const char *parent_heap_name,
9841 const char *parent_zone_name,
9842 const char *view_name)
9843{
9844 vm_page_add_info(info, stats, false, parent_heap_name, parent_zone_name,
9845 view_name);
9846}
9847
9848static uint32_t
9849vm_page_diagnose_heap_views(
9850 mach_memory_info_t *info,
9851 kalloc_heap_t kh,
9852 const char *parent_heap_name,
9853 const char *parent_zone_name)
9854{
9855 uint32_t i = 0;
9856
9857 while (kh) {
9858 vm_page_add_view(info: info + i, stats: kh->kh_stats, parent_heap_name,
9859 parent_zone_name, view_name: kh->kh_name);
9860 kh = kh->kh_views;
9861 i++;
9862 }
9863 return i;
9864}
9865
9866static uint32_t
9867vm_page_diagnose_heap(mach_memory_info_t *info, kalloc_heap_t kheap)
9868{
9869 uint32_t i = 0;
9870
9871 for (; i < KHEAP_NUM_ZONES; i++) {
9872 vm_page_diagnose_zone(info: info + i, z: zone_by_id(zid: kheap->kh_zstart + i));
9873 }
9874
9875 i += vm_page_diagnose_heap_views(info: info + i, kh: kheap->kh_views, parent_heap_name: kheap->kh_name,
9876 NULL);
9877 return i;
9878}
9879
9880static int
9881vm_page_diagnose_kt_heaps(mach_memory_info_t *info)
9882{
9883 uint32_t idx = 0;
9884 vm_page_add_view(info: info + idx, stats: KHEAP_KT_VAR->kh_stats, parent_heap_name: KHEAP_KT_VAR->kh_name,
9885 parent_zone_name: "", view_name: "raw");
9886 idx++;
9887
9888 for (uint32_t i = 0; i < KT_VAR_MAX_HEAPS; i++) {
9889 struct kheap_info heap = kalloc_type_heap_array[i];
9890 char heap_num_tmp[MAX_ZONE_NAME] = "";
9891 const char *heap_num;
9892
9893 snprintf(&heap_num_tmp[0], MAX_ZONE_NAME, "%u", i);
9894 heap_num = &heap_num_tmp[0];
9895
9896 for (kalloc_type_var_view_t ktv = heap.kt_views; ktv;
9897 ktv = (kalloc_type_var_view_t) ktv->kt_next) {
9898 if (ktv->kt_stats && ktv->kt_stats != KHEAP_KT_VAR->kh_stats) {
9899 vm_page_add_view(info: info + idx, stats: ktv->kt_stats, parent_heap_name: KHEAP_KT_VAR->kh_name,
9900 parent_zone_name: heap_num, view_name: ktv->kt_name);
9901 idx++;
9902 }
9903 }
9904
9905 idx += vm_page_diagnose_heap_views(info: info + idx, kh: heap.kh_views,
9906 parent_heap_name: KHEAP_KT_VAR->kh_name, parent_zone_name: heap_num);
9907 }
9908
9909 return idx;
9910}
9911
9912kern_return_t
9913vm_page_diagnose(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes, bool redact_info)
9914{
9915 uint64_t wired_size;
9916 uint64_t wired_managed_size;
9917 uint64_t wired_reserved_size;
9918 boolean_t iterate;
9919 mach_memory_info_t * counts;
9920 uint32_t i;
9921
9922 bzero(s: info, n: num_info * sizeof(mach_memory_info_t));
9923
9924 if (!vm_page_wire_count_initial) {
9925 return KERN_ABORTED;
9926 }
9927
9928#if !XNU_TARGET_OS_OSX
9929 wired_size = ptoa_64(vm_page_wire_count);
9930 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count);
9931#else /* !XNU_TARGET_OS_OSX */
9932 wired_size = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
9933 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
9934#endif /* !XNU_TARGET_OS_OSX */
9935 wired_managed_size = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
9936
9937 wired_size += booter_size;
9938
9939 assert(num_info >= VM_KERN_COUNTER_COUNT);
9940 num_info -= VM_KERN_COUNTER_COUNT;
9941 counts = &info[num_info];
9942
9943#define SET_COUNT(xcount, xsize, xflags) \
9944 counts[xcount].tag = VM_MAX_TAG_VALUE + xcount; \
9945 counts[xcount].site = (xcount); \
9946 counts[xcount].size = (xsize); \
9947 counts[xcount].mapped = (xsize); \
9948 counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
9949
9950 SET_COUNT(VM_KERN_COUNT_MANAGED, ptoa_64(vm_page_pages), 0);
9951 SET_COUNT(VM_KERN_COUNT_WIRED, wired_size, 0);
9952 SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED, wired_managed_size, 0);
9953 SET_COUNT(VM_KERN_COUNT_RESERVED, wired_reserved_size, VM_KERN_SITE_WIRED);
9954 SET_COUNT(VM_KERN_COUNT_STOLEN, ptoa_64(vm_page_stolen_count), VM_KERN_SITE_WIRED);
9955 SET_COUNT(VM_KERN_COUNT_LOPAGE, ptoa_64(vm_lopage_free_count), VM_KERN_SITE_WIRED);
9956 SET_COUNT(VM_KERN_COUNT_WIRED_BOOT, ptoa_64(vm_page_wire_count_on_boot), 0);
9957 SET_COUNT(VM_KERN_COUNT_BOOT_STOLEN, booter_size, VM_KERN_SITE_WIRED);
9958 SET_COUNT(VM_KERN_COUNT_WIRED_STATIC_KERNELCACHE, ptoa_64(vm_page_kernelcache_count), 0);
9959
9960#define SET_MAP(xcount, xsize, xfree, xlargest) \
9961 counts[xcount].site = (xcount); \
9962 counts[xcount].size = (xsize); \
9963 counts[xcount].mapped = (xsize); \
9964 counts[xcount].free = (xfree); \
9965 counts[xcount].largest = (xlargest); \
9966 counts[xcount].flags = VM_KERN_SITE_COUNTER;
9967
9968 vm_map_size_t map_size, map_free, map_largest;
9969
9970 vm_map_sizes(map: kernel_map, psize: &map_size, pfree: &map_free, plargest_free: &map_largest);
9971 SET_MAP(VM_KERN_COUNT_MAP_KERNEL, map_size, map_free, map_largest);
9972
9973 zone_map_sizes(psize: &map_size, pfree: &map_free, plargest_free: &map_largest);
9974 SET_MAP(VM_KERN_COUNT_MAP_ZONE, map_size, map_free, map_largest);
9975
9976 assert(num_info >= zone_view_count);
9977 num_info -= zone_view_count;
9978 counts = &info[num_info];
9979 i = 0;
9980
9981 if (!redact_info) {
9982 if (KHEAP_DATA_BUFFERS->kh_heap_id == KHEAP_ID_DATA_BUFFERS) {
9983 i += vm_page_diagnose_heap(info: counts + i, kheap: KHEAP_DATA_BUFFERS);
9984 }
9985 if (KHEAP_KT_VAR->kh_heap_id == KHEAP_ID_KT_VAR) {
9986 i += vm_page_diagnose_kt_heaps(info: counts + i);
9987 }
9988 assert(i <= zone_view_count);
9989
9990 zone_index_foreach(zidx) {
9991 zone_t z = &zone_array[zidx];
9992 zone_security_flags_t zsflags = zone_security_array[zidx];
9993 zone_view_t zv = z->z_views;
9994
9995 if (zv == NULL) {
9996 continue;
9997 }
9998
9999 zone_stats_t zv_stats_head = z->z_stats;
10000 bool has_raw_view = false;
10001
10002 for (; zv; zv = zv->zv_next) {
10003 /*
10004 * kalloc_types that allocate from the same zone are linked
10005 * as views. Only print the ones that have their own stats.
10006 */
10007 if (zv->zv_stats == zv_stats_head) {
10008 continue;
10009 }
10010 has_raw_view = true;
10011 vm_page_diagnose_zone_stats(info: counts + i, zstats: zv->zv_stats,
10012 percpu: z->z_percpu);
10013 snprintf(counts[i].name, sizeof(counts[i].name), "%s%s[%s]",
10014 zone_heap_name(zone: z), z->z_name, zv->zv_name);
10015 i++;
10016 assert(i <= zone_view_count);
10017 }
10018
10019 /*
10020 * Print raw views for non kalloc or kalloc_type zones
10021 */
10022 bool kalloc_type = zsflags.z_kalloc_type;
10023 if ((zsflags.z_kheap_id == KHEAP_ID_NONE && !kalloc_type) ||
10024 (kalloc_type && has_raw_view)) {
10025 vm_page_diagnose_zone(info: counts + i, z);
10026 i++;
10027 assert(i <= zone_view_count);
10028 }
10029 }
10030 }
10031
10032 iterate = !VM_TAG_ACTIVE_UPDATE;
10033 if (iterate) {
10034 enum { kMaxKernelDepth = 1 };
10035 vm_map_t maps[kMaxKernelDepth];
10036 vm_map_entry_t entries[kMaxKernelDepth];
10037 vm_map_t map;
10038 vm_map_entry_t entry;
10039 vm_object_offset_t offset;
10040 vm_page_t page;
10041 int stackIdx, count;
10042
10043#if !VM_TAG_ACTIVE_UPDATE
10044 vm_page_iterate_objects(info, num_info, &vm_page_count_object);
10045#endif /* ! VM_TAG_ACTIVE_UPDATE */
10046
10047 map = kernel_map;
10048 stackIdx = 0;
10049 while (map) {
10050 vm_map_lock(map);
10051 for (entry = map->hdr.links.next; map; entry = entry->vme_next) {
10052 if (entry->is_sub_map) {
10053 assert(stackIdx < kMaxKernelDepth);
10054 maps[stackIdx] = map;
10055 entries[stackIdx] = entry;
10056 stackIdx++;
10057 map = VME_SUBMAP(entry);
10058 entry = NULL;
10059 break;
10060 }
10061 if (is_kernel_object(VME_OBJECT(entry))) {
10062 count = 0;
10063 vm_object_lock(VME_OBJECT(entry));
10064 for (offset = entry->vme_start; offset < entry->vme_end; offset += page_size) {
10065 page = vm_page_lookup(VME_OBJECT(entry), offset);
10066 if (page && VM_PAGE_WIRED(page)) {
10067 count++;
10068 }
10069 }
10070 vm_object_unlock(VME_OBJECT(entry));
10071
10072 if (count) {
10073 assert(VME_ALIAS(entry) != VM_KERN_MEMORY_NONE);
10074 assert(VME_ALIAS(entry) < num_info);
10075 info[VME_ALIAS(entry)].size += ptoa_64(count);
10076 }
10077 }
10078 while (map && (entry == vm_map_last_entry(map))) {
10079 vm_map_unlock(map);
10080 if (!stackIdx) {
10081 map = NULL;
10082 } else {
10083 --stackIdx;
10084 map = maps[stackIdx];
10085 entry = entries[stackIdx];
10086 }
10087 }
10088 }
10089 }
10090 }
10091
10092 process_account(info, num_info, zones_collectable_bytes, iterated: iterate, redact_info);
10093
10094 return KERN_SUCCESS;
10095}
10096
10097#if DEBUG || DEVELOPMENT
10098
10099kern_return_t
10100vm_kern_allocation_info(uintptr_t addr, vm_size_t * size, vm_tag_t * tag, vm_size_t * zone_size)
10101{
10102 kern_return_t ret;
10103 vm_size_t zsize;
10104 vm_map_t map;
10105 vm_map_entry_t entry;
10106
10107 zsize = zone_element_info((void *) addr, tag);
10108 if (zsize) {
10109 *zone_size = *size = zsize;
10110 return KERN_SUCCESS;
10111 }
10112
10113 *zone_size = 0;
10114 ret = KERN_INVALID_ADDRESS;
10115 for (map = kernel_map; map;) {
10116 vm_map_lock(map);
10117 if (!vm_map_lookup_entry_allow_pgz(map, addr, &entry)) {
10118 break;
10119 }
10120 if (entry->is_sub_map) {
10121 if (map != kernel_map) {
10122 break;
10123 }
10124 map = VME_SUBMAP(entry);
10125 continue;
10126 }
10127 if (entry->vme_start != addr) {
10128 break;
10129 }
10130 *tag = (vm_tag_t)VME_ALIAS(entry);
10131 *size = (entry->vme_end - addr);
10132 ret = KERN_SUCCESS;
10133 break;
10134 }
10135 if (map != kernel_map) {
10136 vm_map_unlock(map);
10137 }
10138 vm_map_unlock(kernel_map);
10139
10140 return ret;
10141}
10142
10143#endif /* DEBUG || DEVELOPMENT */
10144
10145uint32_t
10146vm_tag_get_kext(vm_tag_t tag, char * name, vm_size_t namelen)
10147{
10148 vm_allocation_site_t * site;
10149 uint32_t kmodId;
10150
10151 kmodId = 0;
10152 lck_ticket_lock(tlock: &vm_allocation_sites_lock, grp: &vm_page_lck_grp_bucket);
10153 if ((site = vm_allocation_sites[tag])) {
10154 if (VM_TAG_KMOD & site->flags) {
10155 kmodId = OSKextGetKmodIDForSite(site, name, namelen);
10156 }
10157 }
10158 lck_ticket_unlock(tlock: &vm_allocation_sites_lock);
10159
10160 return kmodId;
10161}
10162
10163
10164#if CONFIG_SECLUDED_MEMORY
10165/*
10166 * Note that there's no locking around other accesses to vm_page_secluded_target.
10167 * That should be OK, since these are the only place where it can be changed after
10168 * initialization. Other users (like vm_pageout) may see the wrong value briefly,
10169 * but will eventually get the correct value. This brief mismatch is OK as pageout
10170 * and page freeing will auto-adjust the vm_page_secluded_count to match the target
10171 * over time.
10172 */
10173unsigned int vm_page_secluded_suppress_cnt = 0;
10174unsigned int vm_page_secluded_save_target;
10175
10176LCK_GRP_DECLARE(secluded_suppress_slock_grp, "secluded_suppress_slock");
10177LCK_SPIN_DECLARE(secluded_suppress_slock, &secluded_suppress_slock_grp);
10178
10179void
10180start_secluded_suppression(task_t task)
10181{
10182 if (task->task_suppressed_secluded) {
10183 return;
10184 }
10185 lck_spin_lock(&secluded_suppress_slock);
10186 if (!task->task_suppressed_secluded && vm_page_secluded_suppress_cnt++ == 0) {
10187 task->task_suppressed_secluded = TRUE;
10188 vm_page_secluded_save_target = vm_page_secluded_target;
10189 vm_page_secluded_target = 0;
10190 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
10191 }
10192 lck_spin_unlock(&secluded_suppress_slock);
10193}
10194
10195void
10196stop_secluded_suppression(task_t task)
10197{
10198 lck_spin_lock(&secluded_suppress_slock);
10199 if (task->task_suppressed_secluded && --vm_page_secluded_suppress_cnt == 0) {
10200 task->task_suppressed_secluded = FALSE;
10201 vm_page_secluded_target = vm_page_secluded_save_target;
10202 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
10203 }
10204 lck_spin_unlock(&secluded_suppress_slock);
10205}
10206
10207#endif /* CONFIG_SECLUDED_MEMORY */
10208
10209/*
10210 * Move the list of retired pages on the vm_page_queue_retired to
10211 * their final resting place on retired_pages_object.
10212 */
10213void
10214vm_retire_boot_pages(void)
10215{
10216}
10217
10218/*
10219 * This holds the reported physical address if an ECC error leads to a panic.
10220 * SMC will store it in PMU SRAM under the 'sECC' key.
10221 */
10222uint64_t ecc_panic_physical_address = 0;
10223
10224
10225boolean_t
10226vm_page_created(vm_page_t page)
10227{
10228 return (page < &vm_pages[0]) || (page >= &vm_pages[vm_pages_count]);
10229}
10230