1/*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65#include <debug.h>
66#include <libkern/OSAtomic.h>
67#include <libkern/OSDebug.h>
68
69#include <mach/clock_types.h>
70#include <mach/vm_prot.h>
71#include <mach/vm_statistics.h>
72#include <mach/sdt.h>
73#include <kern/counters.h>
74#include <kern/sched_prim.h>
75#include <kern/policy_internal.h>
76#include <kern/task.h>
77#include <kern/thread.h>
78#include <kern/kalloc.h>
79#include <kern/zalloc.h>
80#include <kern/xpr.h>
81#include <kern/ledger.h>
82#include <vm/pmap.h>
83#include <vm/vm_init.h>
84#include <vm/vm_map.h>
85#include <vm/vm_page.h>
86#include <vm/vm_pageout.h>
87#include <vm/vm_kern.h> /* kernel_memory_allocate() */
88#include <kern/misc_protos.h>
89#include <zone_debug.h>
90#include <mach_debug/zone_info.h>
91#include <vm/cpm.h>
92#include <pexpert/pexpert.h>
93#include <san/kasan.h>
94
95#include <vm/vm_protos.h>
96#include <vm/memory_object.h>
97#include <vm/vm_purgeable_internal.h>
98#include <vm/vm_compressor.h>
99
100#if CONFIG_PHANTOM_CACHE
101#include <vm/vm_phantom_cache.h>
102#endif
103
104#include <IOKit/IOHibernatePrivate.h>
105
106#include <sys/kdebug.h>
107
108
109
110char vm_page_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
111char vm_page_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
112char vm_page_non_speculative_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
113char vm_page_active_or_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
114
115#if CONFIG_SECLUDED_MEMORY
116struct vm_page_secluded_data vm_page_secluded;
117void secluded_suppression_init(void);
118#endif /* CONFIG_SECLUDED_MEMORY */
119
120boolean_t hibernate_cleaning_in_progress = FALSE;
121boolean_t vm_page_free_verify = TRUE;
122
123uint32_t vm_lopage_free_count = 0;
124uint32_t vm_lopage_free_limit = 0;
125uint32_t vm_lopage_lowater = 0;
126boolean_t vm_lopage_refill = FALSE;
127boolean_t vm_lopage_needed = FALSE;
128
129lck_mtx_ext_t vm_page_queue_lock_ext;
130lck_mtx_ext_t vm_page_queue_free_lock_ext;
131lck_mtx_ext_t vm_purgeable_queue_lock_ext;
132
133int speculative_age_index = 0;
134int speculative_steal_index = 0;
135struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
136
137
138__private_extern__ void vm_page_init_lck_grp(void);
139
140static void vm_page_free_prepare(vm_page_t page);
141static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
142
143static void vm_tag_init(void);
144
145uint64_t vm_min_kernel_and_kext_address = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
146uint32_t vm_packed_from_vm_pages_array_mask = VM_PACKED_FROM_VM_PAGES_ARRAY;
147uint32_t vm_packed_pointer_shift = VM_PACKED_POINTER_SHIFT;
148
149/*
150 * Associated with page of user-allocatable memory is a
151 * page structure.
152 */
153
154/*
155 * These variables record the values returned by vm_page_bootstrap,
156 * for debugging purposes. The implementation of pmap_steal_memory
157 * and pmap_startup here also uses them internally.
158 */
159
160vm_offset_t virtual_space_start;
161vm_offset_t virtual_space_end;
162uint32_t vm_page_pages;
163
164/*
165 * The vm_page_lookup() routine, which provides for fast
166 * (virtual memory object, offset) to page lookup, employs
167 * the following hash table. The vm_page_{insert,remove}
168 * routines install and remove associations in the table.
169 * [This table is often called the virtual-to-physical,
170 * or VP, table.]
171 */
172typedef struct {
173 vm_page_packed_t page_list;
174#if MACH_PAGE_HASH_STATS
175 int cur_count; /* current count */
176 int hi_count; /* high water mark */
177#endif /* MACH_PAGE_HASH_STATS */
178} vm_page_bucket_t;
179
180
181#define BUCKETS_PER_LOCK 16
182
183vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
184unsigned int vm_page_bucket_count = 0; /* How big is array? */
185unsigned int vm_page_hash_mask; /* Mask for hash function */
186unsigned int vm_page_hash_shift; /* Shift for hash function */
187uint32_t vm_page_bucket_hash; /* Basic bucket hash */
188unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
189
190#ifndef VM_TAG_ACTIVE_UPDATE
191#error VM_TAG_ACTIVE_UPDATE
192#endif
193#ifndef VM_MAX_TAG_ZONES
194#error VM_MAX_TAG_ZONES
195#endif
196
197boolean_t vm_tag_active_update = VM_TAG_ACTIVE_UPDATE;
198lck_spin_t *vm_page_bucket_locks;
199lck_spin_t vm_objects_wired_lock;
200lck_spin_t vm_allocation_sites_lock;
201
202vm_allocation_site_t vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC + 1];
203vm_allocation_site_t * vm_allocation_sites[VM_MAX_TAG_VALUE];
204#if VM_MAX_TAG_ZONES
205vm_allocation_zone_total_t ** vm_allocation_zone_totals;
206#endif /* VM_MAX_TAG_ZONES */
207
208vm_tag_t vm_allocation_tag_highest;
209
210#if VM_PAGE_BUCKETS_CHECK
211boolean_t vm_page_buckets_check_ready = FALSE;
212#if VM_PAGE_FAKE_BUCKETS
213vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
214vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
215#endif /* VM_PAGE_FAKE_BUCKETS */
216#endif /* VM_PAGE_BUCKETS_CHECK */
217
218
219
220#if MACH_PAGE_HASH_STATS
221/* This routine is only for debug. It is intended to be called by
222 * hand by a developer using a kernel debugger. This routine prints
223 * out vm_page_hash table statistics to the kernel debug console.
224 */
225void
226hash_debug(void)
227{
228 int i;
229 int numbuckets = 0;
230 int highsum = 0;
231 int maxdepth = 0;
232
233 for (i = 0; i < vm_page_bucket_count; i++) {
234 if (vm_page_buckets[i].hi_count) {
235 numbuckets++;
236 highsum += vm_page_buckets[i].hi_count;
237 if (vm_page_buckets[i].hi_count > maxdepth)
238 maxdepth = vm_page_buckets[i].hi_count;
239 }
240 }
241 printf("Total number of buckets: %d\n", vm_page_bucket_count);
242 printf("Number used buckets: %d = %d%%\n",
243 numbuckets, 100*numbuckets/vm_page_bucket_count);
244 printf("Number unused buckets: %d = %d%%\n",
245 vm_page_bucket_count - numbuckets,
246 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
247 printf("Sum of bucket max depth: %d\n", highsum);
248 printf("Average bucket depth: %d.%2d\n",
249 highsum/vm_page_bucket_count,
250 highsum%vm_page_bucket_count);
251 printf("Maximum bucket depth: %d\n", maxdepth);
252}
253#endif /* MACH_PAGE_HASH_STATS */
254
255/*
256 * The virtual page size is currently implemented as a runtime
257 * variable, but is constant once initialized using vm_set_page_size.
258 * This initialization must be done in the machine-dependent
259 * bootstrap sequence, before calling other machine-independent
260 * initializations.
261 *
262 * All references to the virtual page size outside this
263 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
264 * constants.
265 */
266#if defined(__arm__) || defined(__arm64__)
267vm_size_t page_size;
268vm_size_t page_mask;
269int page_shift;
270#else
271vm_size_t page_size = PAGE_SIZE;
272vm_size_t page_mask = PAGE_MASK;
273int page_shift = PAGE_SHIFT;
274#endif
275
276/*
277 * Resident page structures are initialized from
278 * a template (see vm_page_alloc).
279 *
280 * When adding a new field to the virtual memory
281 * object structure, be sure to add initialization
282 * (see vm_page_bootstrap).
283 */
284struct vm_page vm_page_template;
285
286vm_page_t vm_pages = VM_PAGE_NULL;
287vm_page_t vm_page_array_beginning_addr;
288vm_page_t vm_page_array_ending_addr;
289vm_page_t vm_page_array_boundary;
290
291unsigned int vm_pages_count = 0;
292ppnum_t vm_page_lowest = 0;
293
294/*
295 * Resident pages that represent real memory
296 * are allocated from a set of free lists,
297 * one per color.
298 */
299unsigned int vm_colors;
300unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
301unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
302unsigned int vm_free_magazine_refill_limit = 0;
303
304
305struct vm_page_queue_free_head {
306 vm_page_queue_head_t qhead;
307} __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
308
309struct vm_page_queue_free_head vm_page_queue_free[MAX_COLORS];
310
311
312unsigned int vm_page_free_wanted;
313unsigned int vm_page_free_wanted_privileged;
314#if CONFIG_SECLUDED_MEMORY
315unsigned int vm_page_free_wanted_secluded;
316#endif /* CONFIG_SECLUDED_MEMORY */
317unsigned int vm_page_free_count;
318
319/*
320 * Occasionally, the virtual memory system uses
321 * resident page structures that do not refer to
322 * real pages, for example to leave a page with
323 * important state information in the VP table.
324 *
325 * These page structures are allocated the way
326 * most other kernel structures are.
327 */
328zone_t vm_page_array_zone;
329zone_t vm_page_zone;
330vm_locks_array_t vm_page_locks;
331decl_lck_mtx_data(,vm_page_alloc_lock)
332lck_mtx_ext_t vm_page_alloc_lock_ext;
333
334unsigned int vm_page_local_q_count = 0;
335unsigned int vm_page_local_q_soft_limit = 250;
336unsigned int vm_page_local_q_hard_limit = 500;
337struct vplq *vm_page_local_q = NULL;
338
339/* N.B. Guard and fictitious pages must not
340 * be assigned a zero phys_page value.
341 */
342/*
343 * Fictitious pages don't have a physical address,
344 * but we must initialize phys_page to something.
345 * For debugging, this should be a strange value
346 * that the pmap module can recognize in assertions.
347 */
348const ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
349
350/*
351 * Guard pages are not accessible so they don't
352 * need a physical address, but we need to enter
353 * one in the pmap.
354 * Let's make it recognizable and make sure that
355 * we don't use a real physical page with that
356 * physical address.
357 */
358const ppnum_t vm_page_guard_addr = (ppnum_t) -2;
359
360/*
361 * Resident page structures are also chained on
362 * queues that are used by the page replacement
363 * system (pageout daemon). These queues are
364 * defined here, but are shared by the pageout
365 * module. The inactive queue is broken into
366 * file backed and anonymous for convenience as the
367 * pageout daemon often assignes a higher
368 * importance to anonymous pages (less likely to pick)
369 */
370vm_page_queue_head_t vm_page_queue_active __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
371vm_page_queue_head_t vm_page_queue_inactive __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
372#if CONFIG_SECLUDED_MEMORY
373vm_page_queue_head_t vm_page_queue_secluded __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
374#endif /* CONFIG_SECLUDED_MEMORY */
375vm_page_queue_head_t vm_page_queue_anonymous __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT))); /* inactive memory queue for anonymous pages */
376vm_page_queue_head_t vm_page_queue_throttled __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
377
378queue_head_t vm_objects_wired;
379
380void vm_update_darkwake_mode(boolean_t);
381
382#if CONFIG_BACKGROUND_QUEUE
383vm_page_queue_head_t vm_page_queue_background __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
384uint32_t vm_page_background_target;
385uint32_t vm_page_background_target_snapshot;
386uint32_t vm_page_background_count;
387uint64_t vm_page_background_promoted_count;
388
389uint32_t vm_page_background_internal_count;
390uint32_t vm_page_background_external_count;
391
392uint32_t vm_page_background_mode;
393uint32_t vm_page_background_exclude_external;
394#endif
395
396unsigned int vm_page_active_count;
397unsigned int vm_page_inactive_count;
398#if CONFIG_SECLUDED_MEMORY
399unsigned int vm_page_secluded_count;
400unsigned int vm_page_secluded_count_free;
401unsigned int vm_page_secluded_count_inuse;
402#endif /* CONFIG_SECLUDED_MEMORY */
403unsigned int vm_page_anonymous_count;
404unsigned int vm_page_throttled_count;
405unsigned int vm_page_speculative_count;
406
407unsigned int vm_page_wire_count;
408unsigned int vm_page_wire_count_on_boot = 0;
409unsigned int vm_page_stolen_count;
410unsigned int vm_page_wire_count_initial;
411unsigned int vm_page_pages_initial;
412unsigned int vm_page_gobble_count = 0;
413
414#define VM_PAGE_WIRE_COUNT_WARNING 0
415#define VM_PAGE_GOBBLE_COUNT_WARNING 0
416
417unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
418unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
419uint64_t vm_page_purged_count = 0; /* total count of purged pages */
420
421unsigned int vm_page_xpmapped_external_count = 0;
422unsigned int vm_page_external_count = 0;
423unsigned int vm_page_internal_count = 0;
424unsigned int vm_page_pageable_external_count = 0;
425unsigned int vm_page_pageable_internal_count = 0;
426
427#if DEVELOPMENT || DEBUG
428unsigned int vm_page_speculative_recreated = 0;
429unsigned int vm_page_speculative_created = 0;
430unsigned int vm_page_speculative_used = 0;
431#endif
432
433vm_page_queue_head_t vm_page_queue_cleaned __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
434
435unsigned int vm_page_cleaned_count = 0;
436
437uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
438ppnum_t max_valid_low_ppnum = 0xffffffff;
439
440
441/*
442 * Several page replacement parameters are also
443 * shared with this module, so that page allocation
444 * (done here in vm_page_alloc) can trigger the
445 * pageout daemon.
446 */
447unsigned int vm_page_free_target = 0;
448unsigned int vm_page_free_min = 0;
449unsigned int vm_page_throttle_limit = 0;
450unsigned int vm_page_inactive_target = 0;
451#if CONFIG_SECLUDED_MEMORY
452unsigned int vm_page_secluded_target = 0;
453#endif /* CONFIG_SECLUDED_MEMORY */
454unsigned int vm_page_anonymous_min = 0;
455unsigned int vm_page_free_reserved = 0;
456
457
458/*
459 * The VM system has a couple of heuristics for deciding
460 * that pages are "uninteresting" and should be placed
461 * on the inactive queue as likely candidates for replacement.
462 * These variables let the heuristics be controlled at run-time
463 * to make experimentation easier.
464 */
465
466boolean_t vm_page_deactivate_hint = TRUE;
467
468struct vm_page_stats_reusable vm_page_stats_reusable;
469
470/*
471 * vm_set_page_size:
472 *
473 * Sets the page size, perhaps based upon the memory
474 * size. Must be called before any use of page-size
475 * dependent functions.
476 *
477 * Sets page_shift and page_mask from page_size.
478 */
479void
480vm_set_page_size(void)
481{
482 page_size = PAGE_SIZE;
483 page_mask = PAGE_MASK;
484 page_shift = PAGE_SHIFT;
485
486 if ((page_mask & page_size) != 0)
487 panic("vm_set_page_size: page size not a power of two");
488
489 for (page_shift = 0; ; page_shift++)
490 if ((1U << page_shift) == page_size)
491 break;
492}
493
494#if defined (__x86_64__)
495
496#define MAX_CLUMP_SIZE 16
497#define DEFAULT_CLUMP_SIZE 4
498
499unsigned int vm_clump_size, vm_clump_mask, vm_clump_shift, vm_clump_promote_threshold;
500
501#if DEVELOPMENT || DEBUG
502unsigned long vm_clump_stats[MAX_CLUMP_SIZE+1];
503unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
504
505static inline void vm_clump_update_stats(unsigned int c) {
506 assert(c<=vm_clump_size);
507 if(c>0 && c<=vm_clump_size) vm_clump_stats[c]+=c;
508 vm_clump_allocs+=c;
509}
510#endif /* if DEVELOPMENT || DEBUG */
511
512/* Called once to setup the VM clump knobs */
513static void
514vm_page_setup_clump( void )
515{
516 unsigned int override, n;
517
518 vm_clump_size = DEFAULT_CLUMP_SIZE;
519 if ( PE_parse_boot_argn("clump_size", &override, sizeof (override)) ) vm_clump_size = override;
520
521 if(vm_clump_size > MAX_CLUMP_SIZE) panic("vm_page_setup_clump:: clump_size is too large!");
522 if(vm_clump_size < 1) panic("vm_page_setup_clump:: clump_size must be >= 1");
523 if((vm_clump_size & (vm_clump_size-1)) != 0) panic("vm_page_setup_clump:: clump_size must be a power of 2");
524
525 vm_clump_promote_threshold = vm_clump_size;
526 vm_clump_mask = vm_clump_size - 1;
527 for(vm_clump_shift=0, n=vm_clump_size; n>1; n>>=1, vm_clump_shift++);
528
529#if DEVELOPMENT || DEBUG
530 bzero(vm_clump_stats, sizeof(vm_clump_stats));
531 vm_clump_allocs = vm_clump_inserts = vm_clump_inrange = vm_clump_promotes = 0;
532#endif /* if DEVELOPMENT || DEBUG */
533}
534
535#endif /* #if defined (__x86_64__) */
536
537#define COLOR_GROUPS_TO_STEAL 4
538
539/* Called once during statup, once the cache geometry is known.
540 */
541static void
542vm_page_set_colors( void )
543{
544 unsigned int n, override;
545
546#if defined (__x86_64__)
547 /* adjust #colors because we need to color outside the clump boundary */
548 vm_cache_geometry_colors >>= vm_clump_shift;
549#endif
550 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
551 n = override;
552 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
553 n = vm_cache_geometry_colors;
554 else n = DEFAULT_COLORS; /* use default if all else fails */
555
556 if ( n == 0 )
557 n = 1;
558 if ( n > MAX_COLORS )
559 n = MAX_COLORS;
560
561 /* the count must be a power of 2 */
562 if ( ( n & (n - 1)) != 0 )
563 n = DEFAULT_COLORS; /* use default if all else fails */
564
565 vm_colors = n;
566 vm_color_mask = n - 1;
567
568 vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
569
570#if defined (__x86_64__)
571 /* adjust for reduction in colors due to clumping and multiple cores */
572 if (real_ncpus)
573 vm_free_magazine_refill_limit *= (vm_clump_size * real_ncpus);
574#endif
575}
576
577
578lck_grp_t vm_page_lck_grp_free;
579lck_grp_t vm_page_lck_grp_queue;
580lck_grp_t vm_page_lck_grp_local;
581lck_grp_t vm_page_lck_grp_purge;
582lck_grp_t vm_page_lck_grp_alloc;
583lck_grp_t vm_page_lck_grp_bucket;
584lck_grp_attr_t vm_page_lck_grp_attr;
585lck_attr_t vm_page_lck_attr;
586
587
588__private_extern__ void
589vm_page_init_lck_grp(void)
590{
591 /*
592 * initialze the vm_page lock world
593 */
594 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
595 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
596 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
597 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
598 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
599 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
600 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
601 lck_attr_setdefault(&vm_page_lck_attr);
602 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
603
604 vm_compressor_init_locks();
605}
606
607#define ROUNDUP_NEXTP2(X) (1U << (32 - __builtin_clz((X) - 1)))
608
609void
610vm_page_init_local_q()
611{
612 unsigned int num_cpus;
613 unsigned int i;
614 struct vplq *t_local_q;
615
616 num_cpus = ml_get_max_cpus();
617
618 /*
619 * no point in this for a uni-processor system
620 */
621 if (num_cpus >= 2) {
622#if KASAN
623 /* KASAN breaks the expectation of a size-aligned object by adding a
624 * redzone, so explicitly align. */
625 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq) + VM_PACKED_POINTER_ALIGNMENT);
626 t_local_q = (void *)(((uintptr_t)t_local_q + (VM_PACKED_POINTER_ALIGNMENT-1)) & ~(VM_PACKED_POINTER_ALIGNMENT-1));
627#else
628 /* round the size up to the nearest power of two */
629 t_local_q = (struct vplq *)kalloc(ROUNDUP_NEXTP2(num_cpus * sizeof(struct vplq)));
630#endif
631
632 for (i = 0; i < num_cpus; i++) {
633 struct vpl *lq;
634
635 lq = &t_local_q[i].vpl_un.vpl;
636 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
637 vm_page_queue_init(&lq->vpl_queue);
638 lq->vpl_count = 0;
639 lq->vpl_internal_count = 0;
640 lq->vpl_external_count = 0;
641 }
642 vm_page_local_q_count = num_cpus;
643
644 vm_page_local_q = (struct vplq *)t_local_q;
645 }
646}
647
648/*
649 * vm_init_before_launchd
650 *
651 * This should be called right before launchd is loaded.
652 */
653void
654vm_init_before_launchd()
655{
656 vm_page_wire_count_on_boot = vm_page_wire_count;
657}
658
659
660/*
661 * vm_page_bootstrap:
662 *
663 * Initializes the resident memory module.
664 *
665 * Allocates memory for the page cells, and
666 * for the object/offset-to-page hash table headers.
667 * Each page cell is initialized and placed on the free list.
668 * Returns the range of available kernel virtual memory.
669 */
670
671void
672vm_page_bootstrap(
673 vm_offset_t *startp,
674 vm_offset_t *endp)
675{
676 vm_page_t m;
677 unsigned int i;
678 unsigned int log1;
679 unsigned int log2;
680 unsigned int size;
681
682 /*
683 * Initialize the vm_page template.
684 */
685
686 m = &vm_page_template;
687 bzero(m, sizeof (*m));
688
689#if CONFIG_BACKGROUND_QUEUE
690 m->vmp_backgroundq.next = 0;
691 m->vmp_backgroundq.prev = 0;
692 m->vmp_in_background = FALSE;
693 m->vmp_on_backgroundq = FALSE;
694#endif
695
696 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
697 m->vmp_listq.next = 0;
698 m->vmp_listq.prev = 0;
699 m->vmp_next_m = 0;
700
701 m->vmp_object = 0; /* reset later */
702 m->vmp_offset = (vm_object_offset_t) -1; /* reset later */
703
704 m->vmp_wire_count = 0;
705 m->vmp_q_state = VM_PAGE_NOT_ON_Q;
706 m->vmp_laundry = FALSE;
707 m->vmp_reference = FALSE;
708 m->vmp_gobbled = FALSE;
709 m->vmp_private = FALSE;
710 m->vmp_unused_page_bits = 0;
711
712#if !defined(__arm__) && !defined(__arm64__)
713 VM_PAGE_SET_PHYS_PAGE(m, 0); /* reset later */
714#endif
715 m->vmp_busy = TRUE;
716 m->vmp_wanted = FALSE;
717 m->vmp_tabled = FALSE;
718 m->vmp_hashed = FALSE;
719 m->vmp_fictitious = FALSE;
720 m->vmp_pmapped = FALSE;
721 m->vmp_wpmapped = FALSE;
722 m->vmp_free_when_done = FALSE;
723 m->vmp_absent = FALSE;
724 m->vmp_error = FALSE;
725 m->vmp_dirty = FALSE;
726 m->vmp_cleaning = FALSE;
727 m->vmp_precious = FALSE;
728 m->vmp_clustered = FALSE;
729 m->vmp_overwriting = FALSE;
730 m->vmp_restart = FALSE;
731 m->vmp_unusual = FALSE;
732 m->vmp_cs_validated = FALSE;
733 m->vmp_cs_tainted = FALSE;
734 m->vmp_cs_nx = FALSE;
735 m->vmp_no_cache = FALSE;
736 m->vmp_reusable = FALSE;
737 m->vmp_xpmapped = FALSE;
738 m->vmp_written_by_kernel = FALSE;
739 m->vmp_unused_object_bits = 0;
740
741 /*
742 * Initialize the page queues.
743 */
744 vm_page_init_lck_grp();
745
746 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
747 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
748 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
749
750 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
751 int group;
752
753 purgeable_queues[i].token_q_head = 0;
754 purgeable_queues[i].token_q_tail = 0;
755 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
756 queue_init(&purgeable_queues[i].objq[group]);
757
758 purgeable_queues[i].type = i;
759 purgeable_queues[i].new_pages = 0;
760#if MACH_ASSERT
761 purgeable_queues[i].debug_count_tokens = 0;
762 purgeable_queues[i].debug_count_objects = 0;
763#endif
764 };
765 purgeable_nonvolatile_count = 0;
766 queue_init(&purgeable_nonvolatile_queue);
767
768 for (i = 0; i < MAX_COLORS; i++ )
769 vm_page_queue_init(&vm_page_queue_free[i].qhead);
770
771 vm_page_queue_init(&vm_lopage_queue_free);
772 vm_page_queue_init(&vm_page_queue_active);
773 vm_page_queue_init(&vm_page_queue_inactive);
774#if CONFIG_SECLUDED_MEMORY
775 vm_page_queue_init(&vm_page_queue_secluded);
776#endif /* CONFIG_SECLUDED_MEMORY */
777 vm_page_queue_init(&vm_page_queue_cleaned);
778 vm_page_queue_init(&vm_page_queue_throttled);
779 vm_page_queue_init(&vm_page_queue_anonymous);
780 queue_init(&vm_objects_wired);
781
782 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
783 vm_page_queue_init(&vm_page_queue_speculative[i].age_q);
784
785 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
786 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
787 }
788#if CONFIG_BACKGROUND_QUEUE
789 vm_page_queue_init(&vm_page_queue_background);
790
791 vm_page_background_count = 0;
792 vm_page_background_internal_count = 0;
793 vm_page_background_external_count = 0;
794 vm_page_background_promoted_count = 0;
795
796 vm_page_background_target = (unsigned int)(atop_64(max_mem) / 25);
797
798 if (vm_page_background_target > VM_PAGE_BACKGROUND_TARGET_MAX)
799 vm_page_background_target = VM_PAGE_BACKGROUND_TARGET_MAX;
800
801 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
802 vm_page_background_exclude_external = 0;
803
804 PE_parse_boot_argn("vm_page_bg_mode", &vm_page_background_mode, sizeof(vm_page_background_mode));
805 PE_parse_boot_argn("vm_page_bg_exclude_external", &vm_page_background_exclude_external, sizeof(vm_page_background_exclude_external));
806 PE_parse_boot_argn("vm_page_bg_target", &vm_page_background_target, sizeof(vm_page_background_target));
807
808 if (vm_page_background_mode > VM_PAGE_BG_LEVEL_1)
809 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
810#endif
811 vm_page_free_wanted = 0;
812 vm_page_free_wanted_privileged = 0;
813#if CONFIG_SECLUDED_MEMORY
814 vm_page_free_wanted_secluded = 0;
815#endif /* CONFIG_SECLUDED_MEMORY */
816
817#if defined (__x86_64__)
818 /* this must be called before vm_page_set_colors() */
819 vm_page_setup_clump();
820#endif
821
822 vm_page_set_colors();
823
824 bzero(vm_page_inactive_states, sizeof(vm_page_inactive_states));
825 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
826 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
827 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
828
829 bzero(vm_page_pageable_states, sizeof(vm_page_pageable_states));
830 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
831 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
832 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
833 vm_page_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
834 vm_page_pageable_states[VM_PAGE_ON_SPECULATIVE_Q] = 1;
835 vm_page_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
836#if CONFIG_SECLUDED_MEMORY
837 vm_page_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
838#endif /* CONFIG_SECLUDED_MEMORY */
839
840 bzero(vm_page_non_speculative_pageable_states, sizeof(vm_page_non_speculative_pageable_states));
841 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
842 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
843 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
844 vm_page_non_speculative_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
845 vm_page_non_speculative_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
846#if CONFIG_SECLUDED_MEMORY
847 vm_page_non_speculative_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
848#endif /* CONFIG_SECLUDED_MEMORY */
849
850 bzero(vm_page_active_or_inactive_states, sizeof(vm_page_active_or_inactive_states));
851 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
852 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
853 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
854 vm_page_active_or_inactive_states[VM_PAGE_ON_ACTIVE_Q] = 1;
855#if CONFIG_SECLUDED_MEMORY
856 vm_page_active_or_inactive_states[VM_PAGE_ON_SECLUDED_Q] = 1;
857#endif /* CONFIG_SECLUDED_MEMORY */
858
859 for (i = 0; i < VM_KERN_MEMORY_FIRST_DYNAMIC; i++)
860 {
861 vm_allocation_sites_static[i].refcount = 2;
862 vm_allocation_sites_static[i].tag = i;
863 vm_allocation_sites[i] = &vm_allocation_sites_static[i];
864 }
865 vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].refcount = 2;
866 vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].tag = VM_KERN_MEMORY_ANY;
867 vm_allocation_sites[VM_KERN_MEMORY_ANY] = &vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC];
868
869 /*
870 * Steal memory for the map and zone subsystems.
871 */
872#if CONFIG_GZALLOC
873 gzalloc_configure();
874#endif
875 kernel_debug_string_early("vm_map_steal_memory");
876 vm_map_steal_memory();
877
878 /*
879 * Allocate (and initialize) the virtual-to-physical
880 * table hash buckets.
881 *
882 * The number of buckets should be a power of two to
883 * get a good hash function. The following computation
884 * chooses the first power of two that is greater
885 * than the number of physical pages in the system.
886 */
887
888 if (vm_page_bucket_count == 0) {
889 unsigned int npages = pmap_free_pages();
890
891 vm_page_bucket_count = 1;
892 while (vm_page_bucket_count < npages)
893 vm_page_bucket_count <<= 1;
894 }
895 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
896
897 vm_page_hash_mask = vm_page_bucket_count - 1;
898
899 /*
900 * Calculate object shift value for hashing algorithm:
901 * O = log2(sizeof(struct vm_object))
902 * B = log2(vm_page_bucket_count)
903 * hash shifts the object left by
904 * B/2 - O
905 */
906 size = vm_page_bucket_count;
907 for (log1 = 0; size > 1; log1++)
908 size /= 2;
909 size = sizeof(struct vm_object);
910 for (log2 = 0; size > 1; log2++)
911 size /= 2;
912 vm_page_hash_shift = log1/2 - log2 + 1;
913
914 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
915 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
916 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
917
918 if (vm_page_hash_mask & vm_page_bucket_count)
919 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
920
921#if VM_PAGE_BUCKETS_CHECK
922#if VM_PAGE_FAKE_BUCKETS
923 /*
924 * Allocate a decoy set of page buckets, to detect
925 * any stomping there.
926 */
927 vm_page_fake_buckets = (vm_page_bucket_t *)
928 pmap_steal_memory(vm_page_bucket_count *
929 sizeof(vm_page_bucket_t));
930 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
931 vm_page_fake_buckets_end =
932 vm_map_round_page((vm_page_fake_buckets_start +
933 (vm_page_bucket_count *
934 sizeof (vm_page_bucket_t))),
935 PAGE_MASK);
936 char *cp;
937 for (cp = (char *)vm_page_fake_buckets_start;
938 cp < (char *)vm_page_fake_buckets_end;
939 cp++) {
940 *cp = 0x5a;
941 }
942#endif /* VM_PAGE_FAKE_BUCKETS */
943#endif /* VM_PAGE_BUCKETS_CHECK */
944
945 kernel_debug_string_early("vm_page_buckets");
946 vm_page_buckets = (vm_page_bucket_t *)
947 pmap_steal_memory(vm_page_bucket_count *
948 sizeof(vm_page_bucket_t));
949
950 kernel_debug_string_early("vm_page_bucket_locks");
951 vm_page_bucket_locks = (lck_spin_t *)
952 pmap_steal_memory(vm_page_bucket_lock_count *
953 sizeof(lck_spin_t));
954
955 for (i = 0; i < vm_page_bucket_count; i++) {
956 vm_page_bucket_t *bucket = &vm_page_buckets[i];
957
958 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
959#if MACH_PAGE_HASH_STATS
960 bucket->cur_count = 0;
961 bucket->hi_count = 0;
962#endif /* MACH_PAGE_HASH_STATS */
963 }
964
965 for (i = 0; i < vm_page_bucket_lock_count; i++)
966 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
967
968 lck_spin_init(&vm_objects_wired_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
969 lck_spin_init(&vm_allocation_sites_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
970 vm_tag_init();
971
972#if VM_PAGE_BUCKETS_CHECK
973 vm_page_buckets_check_ready = TRUE;
974#endif /* VM_PAGE_BUCKETS_CHECK */
975
976 /*
977 * Machine-dependent code allocates the resident page table.
978 * It uses vm_page_init to initialize the page frames.
979 * The code also returns to us the virtual space available
980 * to the kernel. We don't trust the pmap module
981 * to get the alignment right.
982 */
983
984 kernel_debug_string_early("pmap_startup");
985 pmap_startup(&virtual_space_start, &virtual_space_end);
986 virtual_space_start = round_page(virtual_space_start);
987 virtual_space_end = trunc_page(virtual_space_end);
988
989 *startp = virtual_space_start;
990 *endp = virtual_space_end;
991
992 /*
993 * Compute the initial "wire" count.
994 * Up until now, the pages which have been set aside are not under
995 * the VM system's control, so although they aren't explicitly
996 * wired, they nonetheless can't be moved. At this moment,
997 * all VM managed pages are "free", courtesy of pmap_startup.
998 */
999 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
1000 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
1001#if CONFIG_SECLUDED_MEMORY
1002 vm_page_wire_count -= vm_page_secluded_count;
1003#endif
1004 vm_page_wire_count_initial = vm_page_wire_count;
1005 vm_page_pages_initial = vm_page_pages;
1006
1007 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
1008 vm_page_free_count, vm_page_wire_count);
1009
1010 kernel_debug_string_early("vm_page_bootstrap complete");
1011 simple_lock_init(&vm_paging_lock, 0);
1012}
1013
1014#ifndef MACHINE_PAGES
1015/*
1016 * We implement pmap_steal_memory and pmap_startup with the help
1017 * of two simpler functions, pmap_virtual_space and pmap_next_page.
1018 */
1019
1020void *
1021pmap_steal_memory(
1022 vm_size_t size)
1023{
1024 kern_return_t kr;
1025 vm_offset_t addr, vaddr;
1026 ppnum_t phys_page;
1027
1028 /*
1029 * We round the size to a round multiple.
1030 */
1031
1032 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
1033
1034 /*
1035 * If this is the first call to pmap_steal_memory,
1036 * we have to initialize ourself.
1037 */
1038
1039 if (virtual_space_start == virtual_space_end) {
1040 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
1041
1042 /*
1043 * The initial values must be aligned properly, and
1044 * we don't trust the pmap module to do it right.
1045 */
1046
1047 virtual_space_start = round_page(virtual_space_start);
1048 virtual_space_end = trunc_page(virtual_space_end);
1049 }
1050
1051 /*
1052 * Allocate virtual memory for this request.
1053 */
1054
1055 addr = virtual_space_start;
1056 virtual_space_start += size;
1057
1058 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1059
1060 /*
1061 * Allocate and map physical pages to back new virtual pages.
1062 */
1063
1064 for (vaddr = round_page(addr);
1065 vaddr < addr + size;
1066 vaddr += PAGE_SIZE) {
1067
1068 if (!pmap_next_page_hi(&phys_page))
1069 panic("pmap_steal_memory() size: 0x%llx\n", (uint64_t)size);
1070
1071 /*
1072 * XXX Logically, these mappings should be wired,
1073 * but some pmap modules barf if they are.
1074 */
1075#if defined(__LP64__)
1076#ifdef __arm64__
1077 /* ARM64_TODO: verify that we really don't need this */
1078#else
1079 pmap_pre_expand(kernel_pmap, vaddr);
1080#endif
1081#endif
1082
1083 kr = pmap_enter(kernel_pmap, vaddr, phys_page,
1084 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
1085 VM_WIMG_USE_DEFAULT, FALSE);
1086
1087 if (kr != KERN_SUCCESS) {
1088 panic("pmap_steal_memory() pmap_enter failed, vaddr=%#lx, phys_page=%u",
1089 (unsigned long)vaddr, phys_page);
1090 }
1091
1092 /*
1093 * Account for newly stolen memory
1094 */
1095 vm_page_wire_count++;
1096 vm_page_stolen_count++;
1097 }
1098
1099#if KASAN
1100 kasan_notify_address(round_page(addr), size);
1101#endif
1102 return (void *) addr;
1103}
1104
1105#if CONFIG_SECLUDED_MEMORY
1106/* boot-args to control secluded memory */
1107unsigned int secluded_mem_mb = 0; /* # of MBs of RAM to seclude */
1108int secluded_for_iokit = 1; /* IOKit can use secluded memory */
1109int secluded_for_apps = 1; /* apps can use secluded memory */
1110int secluded_for_filecache = 2; /* filecache can use seclude memory */
1111#if 11
1112int secluded_for_fbdp = 0;
1113#endif
1114uint64_t secluded_shutoff_trigger = 0;
1115#endif /* CONFIG_SECLUDED_MEMORY */
1116
1117
1118#if defined(__arm__) || defined(__arm64__)
1119extern void patch_low_glo_vm_page_info(void *, void *, uint32_t);
1120unsigned int vm_first_phys_ppnum = 0;
1121#endif
1122
1123
1124void vm_page_release_startup(vm_page_t mem);
1125void
1126pmap_startup(
1127 vm_offset_t *startp,
1128 vm_offset_t *endp)
1129{
1130 unsigned int i, npages, pages_initialized, fill, fillval;
1131 ppnum_t phys_page;
1132 addr64_t tmpaddr;
1133
1134#if defined(__LP64__)
1135 /*
1136 * make sure we are aligned on a 64 byte boundary
1137 * for VM_PAGE_PACK_PTR (it clips off the low-order
1138 * 6 bits of the pointer)
1139 */
1140 if (virtual_space_start != virtual_space_end)
1141 virtual_space_start = round_page(virtual_space_start);
1142#endif
1143
1144 /*
1145 * We calculate how many page frames we will have
1146 * and then allocate the page structures in one chunk.
1147 */
1148
1149 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
1150 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
1151 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
1152
1153 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
1154
1155 /*
1156 * Initialize the page frames.
1157 */
1158 kernel_debug_string_early("Initialize the page frames");
1159
1160 vm_page_array_beginning_addr = &vm_pages[0];
1161 vm_page_array_ending_addr = &vm_pages[npages];
1162
1163 for (i = 0, pages_initialized = 0; i < npages; i++) {
1164 if (!pmap_next_page(&phys_page))
1165 break;
1166#if defined(__arm__) || defined(__arm64__)
1167 if (pages_initialized == 0) {
1168 vm_first_phys_ppnum = phys_page;
1169 patch_low_glo_vm_page_info((void *)vm_page_array_beginning_addr, (void *)vm_page_array_ending_addr, vm_first_phys_ppnum);
1170 }
1171 assert((i + vm_first_phys_ppnum) == phys_page);
1172#endif
1173 if (pages_initialized == 0 || phys_page < vm_page_lowest)
1174 vm_page_lowest = phys_page;
1175
1176 vm_page_init(&vm_pages[i], phys_page, FALSE);
1177 vm_page_pages++;
1178 pages_initialized++;
1179 }
1180 vm_pages_count = pages_initialized;
1181 vm_page_array_boundary = &vm_pages[pages_initialized];
1182
1183#if defined(__LP64__)
1184
1185 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0]))) != &vm_pages[0])
1186 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
1187
1188 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1]))) != &vm_pages[vm_pages_count-1])
1189 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
1190#endif
1191 kernel_debug_string_early("page fill/release");
1192 /*
1193 * Check if we want to initialize pages to a known value
1194 */
1195 fill = 0; /* Assume no fill */
1196 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
1197#if DEBUG
1198 /* This slows down booting the DEBUG kernel, particularly on
1199 * large memory systems, but is worthwhile in deterministically
1200 * trapping uninitialized memory usage.
1201 */
1202 if (fill == 0) {
1203 fill = 1;
1204 fillval = 0xDEB8F177;
1205 }
1206#endif
1207 if (fill)
1208 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
1209
1210#if CONFIG_SECLUDED_MEMORY
1211 /* default: no secluded mem */
1212 secluded_mem_mb = 0;
1213 if (max_mem > 1*1024*1024*1024) {
1214 /* default to 90MB for devices with > 1GB of RAM */
1215 secluded_mem_mb = 90;
1216 }
1217 /* override with value from device tree, if provided */
1218 PE_get_default("kern.secluded_mem_mb",
1219 &secluded_mem_mb, sizeof(secluded_mem_mb));
1220 /* override with value from boot-args, if provided */
1221 PE_parse_boot_argn("secluded_mem_mb",
1222 &secluded_mem_mb,
1223 sizeof (secluded_mem_mb));
1224
1225 vm_page_secluded_target = (unsigned int)
1226 ((secluded_mem_mb * 1024ULL * 1024ULL) / PAGE_SIZE);
1227 PE_parse_boot_argn("secluded_for_iokit",
1228 &secluded_for_iokit,
1229 sizeof (secluded_for_iokit));
1230 PE_parse_boot_argn("secluded_for_apps",
1231 &secluded_for_apps,
1232 sizeof (secluded_for_apps));
1233 PE_parse_boot_argn("secluded_for_filecache",
1234 &secluded_for_filecache,
1235 sizeof (secluded_for_filecache));
1236#if 11
1237 PE_parse_boot_argn("secluded_for_fbdp",
1238 &secluded_for_fbdp,
1239 sizeof (secluded_for_fbdp));
1240#endif
1241
1242 /*
1243 * On small devices, allow a large app to effectively suppress
1244 * secluded memory until it exits.
1245 */
1246 if (max_mem <= 1 * 1024 * 1024 * 1024 && vm_page_secluded_target != 0) {
1247
1248 /*
1249 * Get an amount from boot-args, else use 500MB.
1250 * 500MB was chosen from a Peace daemon tentpole test which used munch
1251 * to induce jetsam thrashing of false idle daemons.
1252 */
1253 int secluded_shutoff_mb;
1254 if (PE_parse_boot_argn("secluded_shutoff_mb", &secluded_shutoff_mb,
1255 sizeof (secluded_shutoff_mb)))
1256 secluded_shutoff_trigger = (uint64_t)secluded_shutoff_mb * 1024 * 1024;
1257 else
1258 secluded_shutoff_trigger = 500 * 1024 * 1024;
1259
1260 if (secluded_shutoff_trigger != 0)
1261 secluded_suppression_init();
1262 }
1263
1264#endif /* CONFIG_SECLUDED_MEMORY */
1265
1266 /*
1267 * By default release pages in reverse order so that physical pages
1268 * initially get allocated in ascending addresses. This keeps
1269 * the devices (which must address physical memory) happy if
1270 * they require several consecutive pages.
1271 *
1272 * For debugging, you can reverse this ordering and/or fill
1273 * all pages with a known value.
1274 */
1275 if (vm_himemory_mode == 2) {
1276 for (i = 0; i < pages_initialized; i++) {
1277 if (fill)
1278 fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]), fillval);
1279 vm_page_release_startup(&vm_pages[i]);
1280 }
1281 } else {
1282 for (i = pages_initialized; i-- > 0; ) {
1283 if (fill)
1284 fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]), fillval);
1285 vm_page_release_startup(&vm_pages[i]);
1286 }
1287 }
1288
1289 VM_CHECK_MEMORYSTATUS;
1290
1291#if 0
1292 {
1293 vm_page_t xx, xxo, xxl;
1294 int i, j, k, l;
1295
1296 j = 0; /* (BRINGUP) */
1297 xxl = 0;
1298
1299 for( i = 0; i < vm_colors; i++ ) {
1300 queue_iterate(&vm_page_queue_free[i].qhead,
1301 xx,
1302 vm_page_t,
1303 vmp_pageq) { /* BRINGUP */
1304 j++; /* (BRINGUP) */
1305 if(j > vm_page_free_count) { /* (BRINGUP) */
1306 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
1307 }
1308
1309 l = vm_page_free_count - j; /* (BRINGUP) */
1310 k = 0; /* (BRINGUP) */
1311
1312 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
1313
1314 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i].qhead; xxo = xxo->pageq.next) { /* (BRINGUP) */
1315 k++;
1316 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
1317 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
1318 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
1319 }
1320 }
1321
1322 xxl = xx;
1323 }
1324 }
1325
1326 if(j != vm_page_free_count) { /* (BRINGUP) */
1327 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
1328 }
1329 }
1330#endif
1331
1332
1333 /*
1334 * We have to re-align virtual_space_start,
1335 * because pmap_steal_memory has been using it.
1336 */
1337
1338 virtual_space_start = round_page(virtual_space_start);
1339
1340 *startp = virtual_space_start;
1341 *endp = virtual_space_end;
1342}
1343#endif /* MACHINE_PAGES */
1344
1345/*
1346 * Routine: vm_page_module_init
1347 * Purpose:
1348 * Second initialization pass, to be done after
1349 * the basic VM system is ready.
1350 */
1351void
1352vm_page_module_init(void)
1353{
1354 uint64_t vm_page_zone_pages, vm_page_array_zone_data_size;
1355 vm_size_t vm_page_with_ppnum_size;
1356
1357 vm_page_array_zone = zinit((vm_size_t) sizeof(struct vm_page),
1358 0, PAGE_SIZE, "vm pages array");
1359
1360 zone_change(vm_page_array_zone, Z_CALLERACCT, FALSE);
1361 zone_change(vm_page_array_zone, Z_EXPAND, FALSE);
1362 zone_change(vm_page_array_zone, Z_EXHAUST, TRUE);
1363 zone_change(vm_page_array_zone, Z_FOREIGN, TRUE);
1364 zone_change(vm_page_array_zone, Z_GZALLOC_EXEMPT, TRUE);
1365 /*
1366 * Adjust zone statistics to account for the real pages allocated
1367 * in vm_page_create(). [Q: is this really what we want?]
1368 */
1369 vm_page_array_zone->count += vm_page_pages;
1370 vm_page_array_zone->sum_count += vm_page_pages;
1371 vm_page_array_zone_data_size = vm_page_pages * vm_page_array_zone->elem_size;
1372 vm_page_array_zone->cur_size += vm_page_array_zone_data_size;
1373 vm_page_zone_pages = ((round_page(vm_page_array_zone_data_size)) / PAGE_SIZE);
1374 OSAddAtomic64(vm_page_zone_pages, &(vm_page_array_zone->page_count));
1375 /* since zone accounts for these, take them out of stolen */
1376 VM_PAGE_MOVE_STOLEN(vm_page_zone_pages);
1377
1378 vm_page_with_ppnum_size = (sizeof(struct vm_page_with_ppnum) + (VM_PACKED_POINTER_ALIGNMENT-1)) & ~(VM_PACKED_POINTER_ALIGNMENT - 1);
1379
1380 vm_page_zone = zinit(vm_page_with_ppnum_size,
1381 0, PAGE_SIZE, "vm pages");
1382
1383 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1384 zone_change(vm_page_zone, Z_EXPAND, FALSE);
1385 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1386 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1387 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1388 zone_change(vm_page_zone, Z_ALIGNMENT_REQUIRED, TRUE);
1389}
1390
1391/*
1392 * Routine: vm_page_create
1393 * Purpose:
1394 * After the VM system is up, machine-dependent code
1395 * may stumble across more physical memory. For example,
1396 * memory that it was reserving for a frame buffer.
1397 * vm_page_create turns this memory into available pages.
1398 */
1399
1400void
1401vm_page_create(
1402 ppnum_t start,
1403 ppnum_t end)
1404{
1405 ppnum_t phys_page;
1406 vm_page_t m;
1407
1408 for (phys_page = start;
1409 phys_page < end;
1410 phys_page++) {
1411 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1412 == VM_PAGE_NULL)
1413 vm_page_more_fictitious();
1414
1415 m->vmp_fictitious = FALSE;
1416 pmap_clear_noencrypt(phys_page);
1417
1418 vm_page_pages++;
1419 vm_page_release(m, FALSE);
1420 }
1421}
1422
1423/*
1424 * vm_page_hash:
1425 *
1426 * Distributes the object/offset key pair among hash buckets.
1427 *
1428 * NOTE: The bucket count must be a power of 2
1429 */
1430#define vm_page_hash(object, offset) (\
1431 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1432 & vm_page_hash_mask)
1433
1434
1435/*
1436 * vm_page_insert: [ internal use only ]
1437 *
1438 * Inserts the given mem entry into the object/object-page
1439 * table and object list.
1440 *
1441 * The object must be locked.
1442 */
1443void
1444vm_page_insert(
1445 vm_page_t mem,
1446 vm_object_t object,
1447 vm_object_offset_t offset)
1448{
1449 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, FALSE, FALSE, NULL);
1450}
1451
1452void
1453vm_page_insert_wired(
1454 vm_page_t mem,
1455 vm_object_t object,
1456 vm_object_offset_t offset,
1457 vm_tag_t tag)
1458{
1459 vm_page_insert_internal(mem, object, offset, tag, FALSE, TRUE, FALSE, FALSE, NULL);
1460}
1461
1462void
1463vm_page_insert_internal(
1464 vm_page_t mem,
1465 vm_object_t object,
1466 vm_object_offset_t offset,
1467 vm_tag_t tag,
1468 boolean_t queues_lock_held,
1469 boolean_t insert_in_hash,
1470 boolean_t batch_pmap_op,
1471 boolean_t batch_accounting,
1472 uint64_t *delayed_ledger_update)
1473{
1474 vm_page_bucket_t *bucket;
1475 lck_spin_t *bucket_lock;
1476 int hash_id;
1477 task_t owner;
1478 int ledger_idx_volatile;
1479 int ledger_idx_nonvolatile;
1480 int ledger_idx_volatile_compressed;
1481 int ledger_idx_nonvolatile_compressed;
1482 boolean_t do_footprint;
1483
1484 XPR(XPR_VM_PAGE,
1485 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1486 object, offset, mem, 0,0);
1487#if 0
1488 /*
1489 * we may not hold the page queue lock
1490 * so this check isn't safe to make
1491 */
1492 VM_PAGE_CHECK(mem);
1493#endif
1494
1495 assert(page_aligned(offset));
1496
1497 assert(!VM_PAGE_WIRED(mem) || mem->vmp_private || mem->vmp_fictitious || (tag != VM_KERN_MEMORY_NONE));
1498
1499 /* the vm_submap_object is only a placeholder for submaps */
1500 assert(object != vm_submap_object);
1501
1502 vm_object_lock_assert_exclusive(object);
1503 LCK_MTX_ASSERT(&vm_page_queue_lock,
1504 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1505 : LCK_MTX_ASSERT_NOTOWNED);
1506
1507 if (queues_lock_held == FALSE)
1508 assert(!VM_PAGE_PAGEABLE(mem));
1509
1510 if (insert_in_hash == TRUE) {
1511#if DEBUG || VM_PAGE_CHECK_BUCKETS
1512 if (mem->vmp_tabled || mem->vmp_object)
1513 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1514 "already in (obj=%p,off=0x%llx)",
1515 mem, object, offset, VM_PAGE_OBJECT(mem), mem->vmp_offset);
1516#endif
1517 if (object->internal && (offset >= object->vo_size)) {
1518 panic("vm_page_insert_internal: (page=%p,obj=%p,off=0x%llx,size=0x%llx) inserted at offset past object bounds",
1519 mem, object, offset, object->vo_size);
1520 }
1521
1522 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1523
1524 /*
1525 * Record the object/offset pair in this page
1526 */
1527
1528 mem->vmp_object = VM_PAGE_PACK_OBJECT(object);
1529 mem->vmp_offset = offset;
1530
1531#if CONFIG_SECLUDED_MEMORY
1532 if (object->eligible_for_secluded) {
1533 vm_page_secluded.eligible_for_secluded++;
1534 }
1535#endif /* CONFIG_SECLUDED_MEMORY */
1536
1537 /*
1538 * Insert it into the object_object/offset hash table
1539 */
1540 hash_id = vm_page_hash(object, offset);
1541 bucket = &vm_page_buckets[hash_id];
1542 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1543
1544 lck_spin_lock(bucket_lock);
1545
1546 mem->vmp_next_m = bucket->page_list;
1547 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1548 assert(mem == (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)));
1549
1550#if MACH_PAGE_HASH_STATS
1551 if (++bucket->cur_count > bucket->hi_count)
1552 bucket->hi_count = bucket->cur_count;
1553#endif /* MACH_PAGE_HASH_STATS */
1554 mem->vmp_hashed = TRUE;
1555 lck_spin_unlock(bucket_lock);
1556 }
1557
1558 {
1559 unsigned int cache_attr;
1560
1561 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1562
1563 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1564 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1565 }
1566 }
1567 /*
1568 * Now link into the object's list of backed pages.
1569 */
1570 vm_page_queue_enter(&object->memq, mem, vm_page_t, vmp_listq);
1571 object->memq_hint = mem;
1572 mem->vmp_tabled = TRUE;
1573
1574 /*
1575 * Show that the object has one more resident page.
1576 */
1577
1578 object->resident_page_count++;
1579 if (VM_PAGE_WIRED(mem)) {
1580 assert(mem->vmp_wire_count > 0);
1581 VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
1582 VM_OBJECT_WIRED_PAGE_ADD(object, mem);
1583 VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
1584 }
1585 assert(object->resident_page_count >= object->wired_page_count);
1586
1587 if (batch_accounting == FALSE) {
1588 if (object->internal) {
1589 OSAddAtomic(1, &vm_page_internal_count);
1590 } else {
1591 OSAddAtomic(1, &vm_page_external_count);
1592 }
1593 }
1594
1595 /*
1596 * It wouldn't make sense to insert a "reusable" page in
1597 * an object (the page would have been marked "reusable" only
1598 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1599 * in the object at that time).
1600 * But a page could be inserted in a "all_reusable" object, if
1601 * something faults it in (a vm_read() from another task or a
1602 * "use-after-free" issue in user space, for example). It can
1603 * also happen if we're relocating a page from that object to
1604 * a different physical page during a physically-contiguous
1605 * allocation.
1606 */
1607 assert(!mem->vmp_reusable);
1608 if (object->all_reusable) {
1609 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1610 }
1611
1612 if (object->purgable == VM_PURGABLE_DENY &&
1613 ! object->vo_ledger_tag) {
1614 owner = TASK_NULL;
1615 } else {
1616 owner = VM_OBJECT_OWNER(object);
1617 vm_object_ledger_tag_ledgers(object,
1618 &ledger_idx_volatile,
1619 &ledger_idx_nonvolatile,
1620 &ledger_idx_volatile_compressed,
1621 &ledger_idx_nonvolatile_compressed,
1622 &do_footprint);
1623 }
1624 if (owner &&
1625 (object->purgable == VM_PURGABLE_NONVOLATILE ||
1626 object->purgable == VM_PURGABLE_DENY ||
1627 VM_PAGE_WIRED(mem))) {
1628
1629 if (delayed_ledger_update)
1630 *delayed_ledger_update += PAGE_SIZE;
1631 else {
1632 /* more non-volatile bytes */
1633 ledger_credit(owner->ledger,
1634 ledger_idx_nonvolatile,
1635 PAGE_SIZE);
1636 if (do_footprint) {
1637 /* more footprint */
1638 ledger_credit(owner->ledger,
1639 task_ledgers.phys_footprint,
1640 PAGE_SIZE);
1641 }
1642 }
1643
1644 } else if (owner &&
1645 (object->purgable == VM_PURGABLE_VOLATILE ||
1646 object->purgable == VM_PURGABLE_EMPTY)) {
1647 assert(! VM_PAGE_WIRED(mem));
1648 /* more volatile bytes */
1649 ledger_credit(owner->ledger,
1650 ledger_idx_volatile,
1651 PAGE_SIZE);
1652 }
1653
1654 if (object->purgable == VM_PURGABLE_VOLATILE) {
1655 if (VM_PAGE_WIRED(mem)) {
1656 OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1657 } else {
1658 OSAddAtomic(+1, &vm_page_purgeable_count);
1659 }
1660 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1661 mem->vmp_q_state == VM_PAGE_ON_THROTTLED_Q) {
1662 /*
1663 * This page belongs to a purged VM object but hasn't
1664 * been purged (because it was "busy").
1665 * It's in the "throttled" queue and hence not
1666 * visible to vm_pageout_scan(). Move it to a pageable
1667 * queue, so that it can eventually be reclaimed, instead
1668 * of lingering in the "empty" object.
1669 */
1670 if (queues_lock_held == FALSE)
1671 vm_page_lockspin_queues();
1672 vm_page_deactivate(mem);
1673 if (queues_lock_held == FALSE)
1674 vm_page_unlock_queues();
1675 }
1676
1677#if VM_OBJECT_TRACKING_OP_MODIFIED
1678 if (vm_object_tracking_inited &&
1679 object->internal &&
1680 object->resident_page_count == 0 &&
1681 object->pager == NULL &&
1682 object->shadow != NULL &&
1683 object->shadow->copy == object) {
1684 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1685 int numsaved = 0;
1686
1687 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1688 btlog_add_entry(vm_object_tracking_btlog,
1689 object,
1690 VM_OBJECT_TRACKING_OP_MODIFIED,
1691 bt,
1692 numsaved);
1693 }
1694#endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1695}
1696
1697/*
1698 * vm_page_replace:
1699 *
1700 * Exactly like vm_page_insert, except that we first
1701 * remove any existing page at the given offset in object.
1702 *
1703 * The object must be locked.
1704 */
1705void
1706vm_page_replace(
1707 vm_page_t mem,
1708 vm_object_t object,
1709 vm_object_offset_t offset)
1710{
1711 vm_page_bucket_t *bucket;
1712 vm_page_t found_m = VM_PAGE_NULL;
1713 lck_spin_t *bucket_lock;
1714 int hash_id;
1715
1716#if 0
1717 /*
1718 * we don't hold the page queue lock
1719 * so this check isn't safe to make
1720 */
1721 VM_PAGE_CHECK(mem);
1722#endif
1723 vm_object_lock_assert_exclusive(object);
1724#if DEBUG || VM_PAGE_CHECK_BUCKETS
1725 if (mem->vmp_tabled || mem->vmp_object)
1726 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1727 "already in (obj=%p,off=0x%llx)",
1728 mem, object, offset, VM_PAGE_OBJECT(mem), mem->vmp_offset);
1729#endif
1730 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1731
1732 assert(!VM_PAGE_PAGEABLE(mem));
1733
1734 /*
1735 * Record the object/offset pair in this page
1736 */
1737 mem->vmp_object = VM_PAGE_PACK_OBJECT(object);
1738 mem->vmp_offset = offset;
1739
1740 /*
1741 * Insert it into the object_object/offset hash table,
1742 * replacing any page that might have been there.
1743 */
1744
1745 hash_id = vm_page_hash(object, offset);
1746 bucket = &vm_page_buckets[hash_id];
1747 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1748
1749 lck_spin_lock(bucket_lock);
1750
1751 if (bucket->page_list) {
1752 vm_page_packed_t *mp = &bucket->page_list;
1753 vm_page_t m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp));
1754
1755 do {
1756 /*
1757 * compare packed object pointers
1758 */
1759 if (m->vmp_object == mem->vmp_object && m->vmp_offset == offset) {
1760 /*
1761 * Remove old page from hash list
1762 */
1763 *mp = m->vmp_next_m;
1764 m->vmp_hashed = FALSE;
1765 m->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
1766
1767 found_m = m;
1768 break;
1769 }
1770 mp = &m->vmp_next_m;
1771 } while ((m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp))));
1772
1773 mem->vmp_next_m = bucket->page_list;
1774 } else {
1775 mem->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
1776 }
1777 /*
1778 * insert new page at head of hash list
1779 */
1780 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1781 mem->vmp_hashed = TRUE;
1782
1783 lck_spin_unlock(bucket_lock);
1784
1785 if (found_m) {
1786 /*
1787 * there was already a page at the specified
1788 * offset for this object... remove it from
1789 * the object and free it back to the free list
1790 */
1791 vm_page_free_unlocked(found_m, FALSE);
1792 }
1793 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, FALSE, FALSE, FALSE, NULL);
1794}
1795
1796/*
1797 * vm_page_remove: [ internal use only ]
1798 *
1799 * Removes the given mem entry from the object/offset-page
1800 * table and the object page list.
1801 *
1802 * The object must be locked.
1803 */
1804
1805void
1806vm_page_remove(
1807 vm_page_t mem,
1808 boolean_t remove_from_hash)
1809{
1810 vm_page_bucket_t *bucket;
1811 vm_page_t this;
1812 lck_spin_t *bucket_lock;
1813 int hash_id;
1814 task_t owner;
1815 vm_object_t m_object;
1816 int ledger_idx_volatile;
1817 int ledger_idx_nonvolatile;
1818 int ledger_idx_volatile_compressed;
1819 int ledger_idx_nonvolatile_compressed;
1820 int do_footprint;
1821
1822 m_object = VM_PAGE_OBJECT(mem);
1823
1824 XPR(XPR_VM_PAGE,
1825 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1826 m_object, mem->vmp_offset,
1827 mem, 0,0);
1828
1829 vm_object_lock_assert_exclusive(m_object);
1830 assert(mem->vmp_tabled);
1831 assert(!mem->vmp_cleaning);
1832 assert(!mem->vmp_laundry);
1833
1834 if (VM_PAGE_PAGEABLE(mem)) {
1835 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1836 }
1837#if 0
1838 /*
1839 * we don't hold the page queue lock
1840 * so this check isn't safe to make
1841 */
1842 VM_PAGE_CHECK(mem);
1843#endif
1844 if (remove_from_hash == TRUE) {
1845 /*
1846 * Remove from the object_object/offset hash table
1847 */
1848 hash_id = vm_page_hash(m_object, mem->vmp_offset);
1849 bucket = &vm_page_buckets[hash_id];
1850 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1851
1852 lck_spin_lock(bucket_lock);
1853
1854 if ((this = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list))) == mem) {
1855 /* optimize for common case */
1856
1857 bucket->page_list = mem->vmp_next_m;
1858 } else {
1859 vm_page_packed_t *prev;
1860
1861 for (prev = &this->vmp_next_m;
1862 (this = (vm_page_t)(VM_PAGE_UNPACK_PTR(*prev))) != mem;
1863 prev = &this->vmp_next_m)
1864 continue;
1865 *prev = this->vmp_next_m;
1866 }
1867#if MACH_PAGE_HASH_STATS
1868 bucket->cur_count--;
1869#endif /* MACH_PAGE_HASH_STATS */
1870 mem->vmp_hashed = FALSE;
1871 this->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
1872 lck_spin_unlock(bucket_lock);
1873 }
1874 /*
1875 * Now remove from the object's list of backed pages.
1876 */
1877
1878 vm_page_remove_internal(mem);
1879
1880 /*
1881 * And show that the object has one fewer resident
1882 * page.
1883 */
1884
1885 assert(m_object->resident_page_count > 0);
1886 m_object->resident_page_count--;
1887
1888 if (m_object->internal) {
1889#if DEBUG
1890 assert(vm_page_internal_count);
1891#endif /* DEBUG */
1892
1893 OSAddAtomic(-1, &vm_page_internal_count);
1894 } else {
1895 assert(vm_page_external_count);
1896 OSAddAtomic(-1, &vm_page_external_count);
1897
1898 if (mem->vmp_xpmapped) {
1899 assert(vm_page_xpmapped_external_count);
1900 OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1901 }
1902 }
1903 if (!m_object->internal &&
1904 m_object->cached_list.next &&
1905 m_object->cached_list.prev) {
1906 if (m_object->resident_page_count == 0)
1907 vm_object_cache_remove(m_object);
1908 }
1909
1910 if (VM_PAGE_WIRED(mem)) {
1911 assert(mem->vmp_wire_count > 0);
1912 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
1913 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
1914 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
1915 }
1916 assert(m_object->resident_page_count >=
1917 m_object->wired_page_count);
1918 if (mem->vmp_reusable) {
1919 assert(m_object->reusable_page_count > 0);
1920 m_object->reusable_page_count--;
1921 assert(m_object->reusable_page_count <=
1922 m_object->resident_page_count);
1923 mem->vmp_reusable = FALSE;
1924 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1925 vm_page_stats_reusable.reused_remove++;
1926 } else if (m_object->all_reusable) {
1927 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1928 vm_page_stats_reusable.reused_remove++;
1929 }
1930
1931 if (m_object->purgable == VM_PURGABLE_DENY &&
1932 ! m_object->vo_ledger_tag) {
1933 owner = TASK_NULL;
1934 } else {
1935 owner = VM_OBJECT_OWNER(m_object);
1936 vm_object_ledger_tag_ledgers(m_object,
1937 &ledger_idx_volatile,
1938 &ledger_idx_nonvolatile,
1939 &ledger_idx_volatile_compressed,
1940 &ledger_idx_nonvolatile_compressed,
1941 &do_footprint);
1942 }
1943 if (owner &&
1944 (m_object->purgable == VM_PURGABLE_NONVOLATILE ||
1945 m_object->purgable == VM_PURGABLE_DENY ||
1946 VM_PAGE_WIRED(mem))) {
1947 /* less non-volatile bytes */
1948 ledger_debit(owner->ledger,
1949 ledger_idx_nonvolatile,
1950 PAGE_SIZE);
1951 if (do_footprint) {
1952 /* less footprint */
1953 ledger_debit(owner->ledger,
1954 task_ledgers.phys_footprint,
1955 PAGE_SIZE);
1956 }
1957 } else if (owner &&
1958 (m_object->purgable == VM_PURGABLE_VOLATILE ||
1959 m_object->purgable == VM_PURGABLE_EMPTY)) {
1960 assert(! VM_PAGE_WIRED(mem));
1961 /* less volatile bytes */
1962 ledger_debit(owner->ledger,
1963 ledger_idx_volatile,
1964 PAGE_SIZE);
1965 }
1966 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
1967 if (VM_PAGE_WIRED(mem)) {
1968 assert(vm_page_purgeable_wired_count > 0);
1969 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1970 } else {
1971 assert(vm_page_purgeable_count > 0);
1972 OSAddAtomic(-1, &vm_page_purgeable_count);
1973 }
1974 }
1975
1976 if (m_object->set_cache_attr == TRUE)
1977 pmap_set_cache_attributes(VM_PAGE_GET_PHYS_PAGE(mem), 0);
1978
1979 mem->vmp_tabled = FALSE;
1980 mem->vmp_object = 0;
1981 mem->vmp_offset = (vm_object_offset_t) -1;
1982}
1983
1984
1985/*
1986 * vm_page_lookup:
1987 *
1988 * Returns the page associated with the object/offset
1989 * pair specified; if none is found, VM_PAGE_NULL is returned.
1990 *
1991 * The object must be locked. No side effects.
1992 */
1993
1994#define VM_PAGE_HASH_LOOKUP_THRESHOLD 10
1995
1996#if DEBUG_VM_PAGE_LOOKUP
1997
1998struct {
1999 uint64_t vpl_total;
2000 uint64_t vpl_empty_obj;
2001 uint64_t vpl_bucket_NULL;
2002 uint64_t vpl_hit_hint;
2003 uint64_t vpl_hit_hint_next;
2004 uint64_t vpl_hit_hint_prev;
2005 uint64_t vpl_fast;
2006 uint64_t vpl_slow;
2007 uint64_t vpl_hit;
2008 uint64_t vpl_miss;
2009
2010 uint64_t vpl_fast_elapsed;
2011 uint64_t vpl_slow_elapsed;
2012} vm_page_lookup_stats __attribute__((aligned(8)));
2013
2014#endif
2015
2016#define KDP_VM_PAGE_WALK_MAX 1000
2017
2018vm_page_t
2019kdp_vm_page_lookup(
2020 vm_object_t object,
2021 vm_object_offset_t offset)
2022{
2023 vm_page_t cur_page;
2024 int num_traversed = 0;
2025
2026 if (not_in_kdp) {
2027 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
2028 }
2029
2030 vm_page_queue_iterate(&object->memq, cur_page, vm_page_t, vmp_listq) {
2031 if (cur_page->vmp_offset == offset) {
2032 return cur_page;
2033 }
2034 num_traversed++;
2035
2036 if (num_traversed >= KDP_VM_PAGE_WALK_MAX) {
2037 return VM_PAGE_NULL;
2038 }
2039 }
2040
2041 return VM_PAGE_NULL;
2042}
2043
2044vm_page_t
2045vm_page_lookup(
2046 vm_object_t object,
2047 vm_object_offset_t offset)
2048{
2049 vm_page_t mem;
2050 vm_page_bucket_t *bucket;
2051 vm_page_queue_entry_t qe;
2052 lck_spin_t *bucket_lock = NULL;
2053 int hash_id;
2054#if DEBUG_VM_PAGE_LOOKUP
2055 uint64_t start, elapsed;
2056
2057 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_total);
2058#endif
2059 vm_object_lock_assert_held(object);
2060
2061 if (object->resident_page_count == 0) {
2062#if DEBUG_VM_PAGE_LOOKUP
2063 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_empty_obj);
2064#endif
2065 return (VM_PAGE_NULL);
2066 }
2067
2068 mem = object->memq_hint;
2069
2070 if (mem != VM_PAGE_NULL) {
2071 assert(VM_PAGE_OBJECT(mem) == object);
2072
2073 if (mem->vmp_offset == offset) {
2074#if DEBUG_VM_PAGE_LOOKUP
2075 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint);
2076#endif
2077 return (mem);
2078 }
2079 qe = (vm_page_queue_entry_t)vm_page_queue_next(&mem->vmp_listq);
2080
2081 if (! vm_page_queue_end(&object->memq, qe)) {
2082 vm_page_t next_page;
2083
2084 next_page = (vm_page_t)((uintptr_t)qe);
2085 assert(VM_PAGE_OBJECT(next_page) == object);
2086
2087 if (next_page->vmp_offset == offset) {
2088 object->memq_hint = next_page; /* new hint */
2089#if DEBUG_VM_PAGE_LOOKUP
2090 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_next);
2091#endif
2092 return (next_page);
2093 }
2094 }
2095 qe = (vm_page_queue_entry_t)vm_page_queue_prev(&mem->vmp_listq);
2096
2097 if (! vm_page_queue_end(&object->memq, qe)) {
2098 vm_page_t prev_page;
2099
2100 prev_page = (vm_page_t)((uintptr_t)qe);
2101 assert(VM_PAGE_OBJECT(prev_page) == object);
2102
2103 if (prev_page->vmp_offset == offset) {
2104 object->memq_hint = prev_page; /* new hint */
2105#if DEBUG_VM_PAGE_LOOKUP
2106 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_prev);
2107#endif
2108 return (prev_page);
2109 }
2110 }
2111 }
2112 /*
2113 * Search the hash table for this object/offset pair
2114 */
2115 hash_id = vm_page_hash(object, offset);
2116 bucket = &vm_page_buckets[hash_id];
2117
2118 /*
2119 * since we hold the object lock, we are guaranteed that no
2120 * new pages can be inserted into this object... this in turn
2121 * guarantess that the page we're looking for can't exist
2122 * if the bucket it hashes to is currently NULL even when looked
2123 * at outside the scope of the hash bucket lock... this is a
2124 * really cheap optimiztion to avoid taking the lock
2125 */
2126 if (!bucket->page_list) {
2127#if DEBUG_VM_PAGE_LOOKUP
2128 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_bucket_NULL);
2129#endif
2130 return (VM_PAGE_NULL);
2131 }
2132
2133#if DEBUG_VM_PAGE_LOOKUP
2134 start = mach_absolute_time();
2135#endif
2136 if (object->resident_page_count <= VM_PAGE_HASH_LOOKUP_THRESHOLD) {
2137 /*
2138 * on average, it's roughly 3 times faster to run a short memq list
2139 * than to take the spin lock and go through the hash list
2140 */
2141 mem = (vm_page_t)vm_page_queue_first(&object->memq);
2142
2143 while (!vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem)) {
2144
2145 if (mem->vmp_offset == offset)
2146 break;
2147
2148 mem = (vm_page_t)vm_page_queue_next(&mem->vmp_listq);
2149 }
2150 if (vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem))
2151 mem = NULL;
2152 } else {
2153 vm_page_object_t packed_object;
2154
2155 packed_object = VM_PAGE_PACK_OBJECT(object);
2156
2157 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
2158
2159 lck_spin_lock(bucket_lock);
2160
2161 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
2162 mem != VM_PAGE_NULL;
2163 mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m))) {
2164#if 0
2165 /*
2166 * we don't hold the page queue lock
2167 * so this check isn't safe to make
2168 */
2169 VM_PAGE_CHECK(mem);
2170#endif
2171 if ((mem->vmp_object == packed_object) && (mem->vmp_offset == offset))
2172 break;
2173 }
2174 lck_spin_unlock(bucket_lock);
2175 }
2176
2177#if DEBUG_VM_PAGE_LOOKUP
2178 elapsed = mach_absolute_time() - start;
2179
2180 if (bucket_lock) {
2181 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_slow);
2182 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_slow_elapsed);
2183 } else {
2184 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_fast);
2185 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_fast_elapsed);
2186 }
2187 if (mem != VM_PAGE_NULL)
2188 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit);
2189 else
2190 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_miss);
2191#endif
2192 if (mem != VM_PAGE_NULL) {
2193 assert(VM_PAGE_OBJECT(mem) == object);
2194
2195 object->memq_hint = mem;
2196 }
2197 return (mem);
2198}
2199
2200
2201/*
2202 * vm_page_rename:
2203 *
2204 * Move the given memory entry from its
2205 * current object to the specified target object/offset.
2206 *
2207 * The object must be locked.
2208 */
2209void
2210vm_page_rename(
2211 vm_page_t mem,
2212 vm_object_t new_object,
2213 vm_object_offset_t new_offset)
2214{
2215 boolean_t internal_to_external, external_to_internal;
2216 vm_tag_t tag;
2217 vm_object_t m_object;
2218
2219 m_object = VM_PAGE_OBJECT(mem);
2220
2221 assert(m_object != new_object);
2222 assert(m_object);
2223
2224 XPR(XPR_VM_PAGE,
2225 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
2226 new_object, new_offset,
2227 mem, 0,0);
2228
2229 /*
2230 * Changes to mem->vmp_object require the page lock because
2231 * the pageout daemon uses that lock to get the object.
2232 */
2233 vm_page_lockspin_queues();
2234
2235 internal_to_external = FALSE;
2236 external_to_internal = FALSE;
2237
2238 if (mem->vmp_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q) {
2239 /*
2240 * it's much easier to get the vm_page_pageable_xxx accounting correct
2241 * if we first move the page to the active queue... it's going to end
2242 * up there anyway, and we don't do vm_page_rename's frequently enough
2243 * for this to matter.
2244 */
2245 vm_page_queues_remove(mem, FALSE);
2246 vm_page_activate(mem);
2247 }
2248 if (VM_PAGE_PAGEABLE(mem)) {
2249 if (m_object->internal && !new_object->internal) {
2250 internal_to_external = TRUE;
2251 }
2252 if (!m_object->internal && new_object->internal) {
2253 external_to_internal = TRUE;
2254 }
2255 }
2256
2257 tag = m_object->wire_tag;
2258 vm_page_remove(mem, TRUE);
2259 vm_page_insert_internal(mem, new_object, new_offset, tag, TRUE, TRUE, FALSE, FALSE, NULL);
2260
2261 if (internal_to_external) {
2262 vm_page_pageable_internal_count--;
2263 vm_page_pageable_external_count++;
2264 } else if (external_to_internal) {
2265 vm_page_pageable_external_count--;
2266 vm_page_pageable_internal_count++;
2267 }
2268
2269 vm_page_unlock_queues();
2270}
2271
2272/*
2273 * vm_page_init:
2274 *
2275 * Initialize the fields in a new page.
2276 * This takes a structure with random values and initializes it
2277 * so that it can be given to vm_page_release or vm_page_insert.
2278 */
2279void
2280vm_page_init(
2281 vm_page_t mem,
2282 ppnum_t phys_page,
2283 boolean_t lopage)
2284{
2285 assert(phys_page);
2286
2287#if DEBUG
2288 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
2289 if (!(pmap_valid_page(phys_page))) {
2290 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
2291 }
2292 }
2293#endif
2294 *mem = vm_page_template;
2295
2296 VM_PAGE_SET_PHYS_PAGE(mem, phys_page);
2297#if 0
2298 /*
2299 * we're leaving this turned off for now... currently pages
2300 * come off the free list and are either immediately dirtied/referenced
2301 * due to zero-fill or COW faults, or are used to read or write files...
2302 * in the file I/O case, the UPL mechanism takes care of clearing
2303 * the state of the HW ref/mod bits in a somewhat fragile way.
2304 * Since we may change the way this works in the future (to toughen it up),
2305 * I'm leaving this as a reminder of where these bits could get cleared
2306 */
2307
2308 /*
2309 * make sure both the h/w referenced and modified bits are
2310 * clear at this point... we are especially dependent on
2311 * not finding a 'stale' h/w modified in a number of spots
2312 * once this page goes back into use
2313 */
2314 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
2315#endif
2316 mem->vmp_lopage = lopage;
2317}
2318
2319/*
2320 * vm_page_grab_fictitious:
2321 *
2322 * Remove a fictitious page from the free list.
2323 * Returns VM_PAGE_NULL if there are no free pages.
2324 */
2325int c_vm_page_grab_fictitious = 0;
2326int c_vm_page_grab_fictitious_failed = 0;
2327int c_vm_page_release_fictitious = 0;
2328int c_vm_page_more_fictitious = 0;
2329
2330vm_page_t
2331vm_page_grab_fictitious_common(
2332 ppnum_t phys_addr)
2333{
2334 vm_page_t m;
2335
2336 if ((m = (vm_page_t)zget(vm_page_zone))) {
2337
2338 vm_page_init(m, phys_addr, FALSE);
2339 m->vmp_fictitious = TRUE;
2340
2341 c_vm_page_grab_fictitious++;
2342 } else
2343 c_vm_page_grab_fictitious_failed++;
2344
2345 return m;
2346}
2347
2348vm_page_t
2349vm_page_grab_fictitious(void)
2350{
2351 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
2352}
2353
2354int vm_guard_count;
2355
2356
2357vm_page_t
2358vm_page_grab_guard(void)
2359{
2360 vm_page_t page;
2361 page = vm_page_grab_fictitious_common(vm_page_guard_addr);
2362 if (page) OSAddAtomic(1, &vm_guard_count);
2363 return page;
2364}
2365
2366
2367/*
2368 * vm_page_release_fictitious:
2369 *
2370 * Release a fictitious page to the zone pool
2371 */
2372void
2373vm_page_release_fictitious(
2374 vm_page_t m)
2375{
2376 assert((m->vmp_q_state == VM_PAGE_NOT_ON_Q) || (m->vmp_q_state == VM_PAGE_IS_WIRED));
2377 assert(m->vmp_fictitious);
2378 assert(VM_PAGE_GET_PHYS_PAGE(m) == vm_page_fictitious_addr ||
2379 VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr);
2380
2381
2382if (VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr) OSAddAtomic(-1, &vm_guard_count);
2383
2384 c_vm_page_release_fictitious++;
2385
2386 zfree(vm_page_zone, m);
2387}
2388
2389/*
2390 * vm_page_more_fictitious:
2391 *
2392 * Add more fictitious pages to the zone.
2393 * Allowed to block. This routine is way intimate
2394 * with the zones code, for several reasons:
2395 * 1. we need to carve some page structures out of physical
2396 * memory before zones work, so they _cannot_ come from
2397 * the zone_map.
2398 * 2. the zone needs to be collectable in order to prevent
2399 * growth without bound. These structures are used by
2400 * the device pager (by the hundreds and thousands), as
2401 * private pages for pageout, and as blocking pages for
2402 * pagein. Temporary bursts in demand should not result in
2403 * permanent allocation of a resource.
2404 * 3. To smooth allocation humps, we allocate single pages
2405 * with kernel_memory_allocate(), and cram them into the
2406 * zone.
2407 */
2408
2409void vm_page_more_fictitious(void)
2410{
2411 vm_offset_t addr;
2412 kern_return_t retval;
2413
2414 c_vm_page_more_fictitious++;
2415
2416 /*
2417 * Allocate a single page from the zone_map. Do not wait if no physical
2418 * pages are immediately available, and do not zero the space. We need
2419 * our own blocking lock here to prevent having multiple,
2420 * simultaneous requests from piling up on the zone_map lock. Exactly
2421 * one (of our) threads should be potentially waiting on the map lock.
2422 * If winner is not vm-privileged, then the page allocation will fail,
2423 * and it will temporarily block here in the vm_page_wait().
2424 */
2425 lck_mtx_lock(&vm_page_alloc_lock);
2426 /*
2427 * If another thread allocated space, just bail out now.
2428 */
2429 if (zone_free_count(vm_page_zone) > 5) {
2430 /*
2431 * The number "5" is a small number that is larger than the
2432 * number of fictitious pages that any single caller will
2433 * attempt to allocate. Otherwise, a thread will attempt to
2434 * acquire a fictitious page (vm_page_grab_fictitious), fail,
2435 * release all of the resources and locks already acquired,
2436 * and then call this routine. This routine finds the pages
2437 * that the caller released, so fails to allocate new space.
2438 * The process repeats infinitely. The largest known number
2439 * of fictitious pages required in this manner is 2. 5 is
2440 * simply a somewhat larger number.
2441 */
2442 lck_mtx_unlock(&vm_page_alloc_lock);
2443 return;
2444 }
2445
2446 retval = kernel_memory_allocate(zone_map,
2447 &addr, PAGE_SIZE, 0,
2448 KMA_KOBJECT|KMA_NOPAGEWAIT, VM_KERN_MEMORY_ZONE);
2449 if (retval != KERN_SUCCESS) {
2450 /*
2451 * No page was available. Drop the
2452 * lock to give another thread a chance at it, and
2453 * wait for the pageout daemon to make progress.
2454 */
2455 lck_mtx_unlock(&vm_page_alloc_lock);
2456 vm_page_wait(THREAD_UNINT);
2457 return;
2458 }
2459
2460 zcram(vm_page_zone, addr, PAGE_SIZE);
2461
2462 lck_mtx_unlock(&vm_page_alloc_lock);
2463}
2464
2465
2466/*
2467 * vm_pool_low():
2468 *
2469 * Return true if it is not likely that a non-vm_privileged thread
2470 * can get memory without blocking. Advisory only, since the
2471 * situation may change under us.
2472 */
2473int
2474vm_pool_low(void)
2475{
2476 /* No locking, at worst we will fib. */
2477 return( vm_page_free_count <= vm_page_free_reserved );
2478}
2479
2480boolean_t vm_darkwake_mode = FALSE;
2481
2482/*
2483 * vm_update_darkwake_mode():
2484 *
2485 * Tells the VM that the system is in / out of darkwake.
2486 *
2487 * Today, the VM only lowers/raises the background queue target
2488 * so as to favor consuming more/less background pages when
2489 * darwake is ON/OFF.
2490 *
2491 * We might need to do more things in the future.
2492 */
2493
2494void
2495vm_update_darkwake_mode(boolean_t darkwake_mode)
2496{
2497 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
2498
2499 vm_page_lockspin_queues();
2500
2501 if (vm_darkwake_mode == darkwake_mode) {
2502 /*
2503 * No change.
2504 */
2505 vm_page_unlock_queues();
2506 return;
2507 }
2508
2509 vm_darkwake_mode = darkwake_mode;
2510
2511 if (vm_darkwake_mode == TRUE) {
2512#if CONFIG_BACKGROUND_QUEUE
2513
2514 /* save background target to restore later */
2515 vm_page_background_target_snapshot = vm_page_background_target;
2516
2517 /* target is set to 0...no protection for background pages */
2518 vm_page_background_target = 0;
2519
2520#endif /* CONFIG_BACKGROUND_QUEUE */
2521
2522 } else if (vm_darkwake_mode == FALSE) {
2523#if CONFIG_BACKGROUND_QUEUE
2524
2525 if (vm_page_background_target_snapshot) {
2526 vm_page_background_target = vm_page_background_target_snapshot;
2527 }
2528#endif /* CONFIG_BACKGROUND_QUEUE */
2529 }
2530 vm_page_unlock_queues();
2531}
2532
2533#if CONFIG_BACKGROUND_QUEUE
2534
2535void
2536vm_page_update_background_state(vm_page_t mem)
2537{
2538 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2539 return;
2540
2541 if (mem->vmp_in_background == FALSE)
2542 return;
2543
2544 task_t my_task = current_task();
2545
2546 if (my_task) {
2547 if (task_get_darkwake_mode(my_task)) {
2548 return;
2549 }
2550 }
2551
2552#if BACKGROUNDQ_BASED_ON_QOS
2553 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
2554 return;
2555#else
2556 if (my_task) {
2557 if (proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG))
2558 return;
2559 }
2560#endif
2561 vm_page_lockspin_queues();
2562
2563 mem->vmp_in_background = FALSE;
2564 vm_page_background_promoted_count++;
2565
2566 vm_page_remove_from_backgroundq(mem);
2567
2568 vm_page_unlock_queues();
2569}
2570
2571
2572void
2573vm_page_assign_background_state(vm_page_t mem)
2574{
2575 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2576 return;
2577
2578 task_t my_task = current_task();
2579
2580 if (my_task) {
2581 if (task_get_darkwake_mode(my_task)) {
2582 mem->vmp_in_background = TRUE;
2583 return;
2584 }
2585 }
2586
2587#if BACKGROUNDQ_BASED_ON_QOS
2588 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
2589 mem->vmp_in_background = TRUE;
2590 else
2591 mem->vmp_in_background = FALSE;
2592#else
2593 if (my_task)
2594 mem->vmp_in_background = proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG);
2595#endif
2596}
2597
2598
2599void
2600vm_page_remove_from_backgroundq(
2601 vm_page_t mem)
2602{
2603 vm_object_t m_object;
2604
2605 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2606
2607 if (mem->vmp_on_backgroundq) {
2608 vm_page_queue_remove(&vm_page_queue_background, mem, vm_page_t, vmp_backgroundq);
2609
2610 mem->vmp_backgroundq.next = 0;
2611 mem->vmp_backgroundq.prev = 0;
2612 mem->vmp_on_backgroundq = FALSE;
2613
2614 vm_page_background_count--;
2615
2616 m_object = VM_PAGE_OBJECT(mem);
2617
2618 if (m_object->internal)
2619 vm_page_background_internal_count--;
2620 else
2621 vm_page_background_external_count--;
2622 } else {
2623 assert(VM_PAGE_UNPACK_PTR(mem->vmp_backgroundq.next) == (uintptr_t)NULL &&
2624 VM_PAGE_UNPACK_PTR(mem->vmp_backgroundq.prev) == (uintptr_t)NULL);
2625 }
2626}
2627
2628
2629void
2630vm_page_add_to_backgroundq(
2631 vm_page_t mem,
2632 boolean_t first)
2633{
2634 vm_object_t m_object;
2635
2636 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2637
2638 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2639 return;
2640
2641 if (mem->vmp_on_backgroundq == FALSE) {
2642
2643 m_object = VM_PAGE_OBJECT(mem);
2644
2645 if (vm_page_background_exclude_external && !m_object->internal)
2646 return;
2647
2648 if (first == TRUE)
2649 vm_page_queue_enter_first(&vm_page_queue_background, mem, vm_page_t, vmp_backgroundq);
2650 else
2651 vm_page_queue_enter(&vm_page_queue_background, mem, vm_page_t, vmp_backgroundq);
2652 mem->vmp_on_backgroundq = TRUE;
2653
2654 vm_page_background_count++;
2655
2656 if (m_object->internal)
2657 vm_page_background_internal_count++;
2658 else
2659 vm_page_background_external_count++;
2660 }
2661}
2662
2663#endif /* CONFIG_BACKGROUND_QUEUE */
2664
2665/*
2666 * this is an interface to support bring-up of drivers
2667 * on platforms with physical memory > 4G...
2668 */
2669int vm_himemory_mode = 2;
2670
2671
2672/*
2673 * this interface exists to support hardware controllers
2674 * incapable of generating DMAs with more than 32 bits
2675 * of address on platforms with physical memory > 4G...
2676 */
2677unsigned int vm_lopages_allocated_q = 0;
2678unsigned int vm_lopages_allocated_cpm_success = 0;
2679unsigned int vm_lopages_allocated_cpm_failed = 0;
2680vm_page_queue_head_t vm_lopage_queue_free __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
2681
2682vm_page_t
2683vm_page_grablo(void)
2684{
2685 vm_page_t mem;
2686
2687 if (vm_lopage_needed == FALSE)
2688 return (vm_page_grab());
2689
2690 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2691
2692 if ( !vm_page_queue_empty(&vm_lopage_queue_free)) {
2693 vm_page_queue_remove_first(&vm_lopage_queue_free,
2694 mem,
2695 vm_page_t,
2696 vmp_pageq);
2697 assert(vm_lopage_free_count);
2698 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
2699 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
2700
2701 vm_lopage_free_count--;
2702 vm_lopages_allocated_q++;
2703
2704 if (vm_lopage_free_count < vm_lopage_lowater)
2705 vm_lopage_refill = TRUE;
2706
2707 lck_mtx_unlock(&vm_page_queue_free_lock);
2708
2709#if CONFIG_BACKGROUND_QUEUE
2710 vm_page_assign_background_state(mem);
2711#endif
2712 } else {
2713 lck_mtx_unlock(&vm_page_queue_free_lock);
2714
2715 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
2716
2717 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2718 vm_lopages_allocated_cpm_failed++;
2719 lck_mtx_unlock(&vm_page_queue_free_lock);
2720
2721 return (VM_PAGE_NULL);
2722 }
2723 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
2724
2725 mem->vmp_busy = TRUE;
2726
2727 vm_page_lockspin_queues();
2728
2729 mem->vmp_gobbled = FALSE;
2730 vm_page_gobble_count--;
2731 vm_page_wire_count--;
2732
2733 vm_lopages_allocated_cpm_success++;
2734 vm_page_unlock_queues();
2735 }
2736 assert(mem->vmp_busy);
2737 assert(!mem->vmp_pmapped);
2738 assert(!mem->vmp_wpmapped);
2739 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2740
2741 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2742
2743 disable_preemption();
2744 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2745 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, 0, 1, 0, 0);
2746 enable_preemption();
2747
2748 return (mem);
2749}
2750
2751
2752/*
2753 * vm_page_grab:
2754 *
2755 * first try to grab a page from the per-cpu free list...
2756 * this must be done while pre-emption is disabled... if
2757 * a page is available, we're done...
2758 * if no page is available, grab the vm_page_queue_free_lock
2759 * and see if current number of free pages would allow us
2760 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2761 * if there are pages available, disable preemption and
2762 * recheck the state of the per-cpu free list... we could
2763 * have been preempted and moved to a different cpu, or
2764 * some other thread could have re-filled it... if still
2765 * empty, figure out how many pages we can steal from the
2766 * global free queue and move to the per-cpu queue...
2767 * return 1 of these pages when done... only wakeup the
2768 * pageout_scan thread if we moved pages from the global
2769 * list... no need for the wakeup if we've satisfied the
2770 * request from the per-cpu queue.
2771 */
2772
2773#if CONFIG_SECLUDED_MEMORY
2774vm_page_t vm_page_grab_secluded(void);
2775#endif /* CONFIG_SECLUDED_MEMORY */
2776
2777vm_page_t
2778vm_page_grab(void)
2779{
2780 return vm_page_grab_options(0);
2781}
2782
2783#if HIBERNATION
2784boolean_t hibernate_rebuild_needed = FALSE;
2785#endif /* HIBERNATION */
2786
2787vm_page_t
2788vm_page_grab_options(
2789 int grab_options)
2790{
2791 vm_page_t mem;
2792
2793 disable_preemption();
2794
2795 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2796return_page_from_cpu_list:
2797 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
2798
2799#if HIBERNATION
2800 if (hibernate_rebuild_needed) {
2801 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
2802 }
2803#endif /* HIBERNATION */
2804 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2805 PROCESSOR_DATA(current_processor(), free_pages) = mem->vmp_snext;
2806 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
2807
2808 enable_preemption();
2809 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2810 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
2811
2812 assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
2813 assert(mem->vmp_tabled == FALSE);
2814 assert(mem->vmp_object == 0);
2815 assert(!mem->vmp_laundry);
2816 assertf(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)), "page = 0x%llx", (uint64_t)VM_PAGE_GET_PHYS_PAGE(mem));
2817 assert(mem->vmp_busy);
2818 assert(!mem->vmp_pmapped);
2819 assert(!mem->vmp_wpmapped);
2820 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2821
2822#if CONFIG_BACKGROUND_QUEUE
2823 vm_page_assign_background_state(mem);
2824#endif
2825 return mem;
2826 }
2827 enable_preemption();
2828
2829
2830 /*
2831 * Optionally produce warnings if the wire or gobble
2832 * counts exceed some threshold.
2833 */
2834#if VM_PAGE_WIRE_COUNT_WARNING
2835 if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
2836 printf("mk: vm_page_grab(): high wired page count of %d\n",
2837 vm_page_wire_count);
2838 }
2839#endif
2840#if VM_PAGE_GOBBLE_COUNT_WARNING
2841 if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
2842 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2843 vm_page_gobble_count);
2844 }
2845#endif
2846
2847 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2848
2849 /*
2850 * Only let privileged threads (involved in pageout)
2851 * dip into the reserved pool.
2852 */
2853 if ((vm_page_free_count < vm_page_free_reserved) &&
2854 !(current_thread()->options & TH_OPT_VMPRIV)) {
2855 /* no page for us in the free queue... */
2856 lck_mtx_unlock(&vm_page_queue_free_lock);
2857 mem = VM_PAGE_NULL;
2858
2859#if CONFIG_SECLUDED_MEMORY
2860 /* ... but can we try and grab from the secluded queue? */
2861 if (vm_page_secluded_count > 0 &&
2862 ((grab_options & VM_PAGE_GRAB_SECLUDED) ||
2863 task_can_use_secluded_mem(current_task(), TRUE))) {
2864 mem = vm_page_grab_secluded();
2865 if (grab_options & VM_PAGE_GRAB_SECLUDED) {
2866 vm_page_secluded.grab_for_iokit++;
2867 if (mem) {
2868 vm_page_secluded.grab_for_iokit_success++;
2869 }
2870 }
2871 if (mem) {
2872 VM_CHECK_MEMORYSTATUS;
2873
2874 disable_preemption();
2875 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2876 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
2877 enable_preemption();
2878
2879 return mem;
2880 }
2881 }
2882#else /* CONFIG_SECLUDED_MEMORY */
2883 (void) grab_options;
2884#endif /* CONFIG_SECLUDED_MEMORY */
2885 }
2886 else {
2887 vm_page_t head;
2888 vm_page_t tail;
2889 unsigned int pages_to_steal;
2890 unsigned int color;
2891 unsigned int clump_end, sub_count;
2892
2893 while ( vm_page_free_count == 0 ) {
2894
2895 lck_mtx_unlock(&vm_page_queue_free_lock);
2896 /*
2897 * must be a privileged thread to be
2898 * in this state since a non-privileged
2899 * thread would have bailed if we were
2900 * under the vm_page_free_reserved mark
2901 */
2902 VM_PAGE_WAIT();
2903 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2904 }
2905
2906 disable_preemption();
2907
2908 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2909 lck_mtx_unlock(&vm_page_queue_free_lock);
2910
2911 /*
2912 * we got preempted and moved to another processor
2913 * or we got preempted and someone else ran and filled the cache
2914 */
2915 goto return_page_from_cpu_list;
2916 }
2917 if (vm_page_free_count <= vm_page_free_reserved)
2918 pages_to_steal = 1;
2919 else {
2920 if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2921 pages_to_steal = vm_free_magazine_refill_limit;
2922 else
2923 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2924 }
2925 color = PROCESSOR_DATA(current_processor(), start_color);
2926 head = tail = NULL;
2927
2928 vm_page_free_count -= pages_to_steal;
2929 clump_end = sub_count = 0;
2930
2931 while (pages_to_steal--) {
2932
2933 while (vm_page_queue_empty(&vm_page_queue_free[color].qhead))
2934 color = (color + 1) & vm_color_mask;
2935#if defined(__x86_64__)
2936 vm_page_queue_remove_first_with_clump(&vm_page_queue_free[color].qhead,
2937 mem,
2938 vm_page_t,
2939 vmp_pageq,
2940 clump_end);
2941#else
2942 vm_page_queue_remove_first(&vm_page_queue_free[color].qhead,
2943 mem,
2944 vm_page_t,
2945 vmp_pageq);
2946#endif
2947
2948 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_Q);
2949
2950 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2951
2952#if defined(__arm__) || defined(__arm64__)
2953 color = (color + 1) & vm_color_mask;
2954#else
2955
2956#if DEVELOPMENT || DEBUG
2957
2958 sub_count++;
2959 if (clump_end) {
2960 vm_clump_update_stats(sub_count);
2961 sub_count = 0;
2962 color = (color + 1) & vm_color_mask;
2963 }
2964#else
2965 if (clump_end) color = (color + 1) & vm_color_mask;
2966
2967#endif /* if DEVELOPMENT || DEBUG */
2968
2969#endif /* if defined(__arm__) || defined(__arm64__) */
2970
2971 if (head == NULL)
2972 head = mem;
2973 else
2974 tail->vmp_snext = mem;
2975 tail = mem;
2976
2977 assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
2978 assert(mem->vmp_tabled == FALSE);
2979 assert(mem->vmp_object == 0);
2980 assert(!mem->vmp_laundry);
2981
2982 mem->vmp_q_state = VM_PAGE_ON_FREE_LOCAL_Q;
2983
2984 assertf(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)), "page = 0x%llx", (uint64_t)VM_PAGE_GET_PHYS_PAGE(mem));
2985 assert(mem->vmp_busy);
2986 assert(!mem->vmp_pmapped);
2987 assert(!mem->vmp_wpmapped);
2988 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2989 }
2990#if defined (__x86_64__) && (DEVELOPMENT || DEBUG)
2991 vm_clump_update_stats(sub_count);
2992#endif
2993 lck_mtx_unlock(&vm_page_queue_free_lock);
2994
2995#if HIBERNATION
2996 if (hibernate_rebuild_needed) {
2997 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
2998 }
2999#endif /* HIBERNATION */
3000 PROCESSOR_DATA(current_processor(), free_pages) = head->vmp_snext;
3001 PROCESSOR_DATA(current_processor(), start_color) = color;
3002
3003 /*
3004 * satisfy this request
3005 */
3006 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
3007 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
3008 mem = head;
3009 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
3010
3011 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
3012 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
3013
3014 enable_preemption();
3015 }
3016 /*
3017 * Decide if we should poke the pageout daemon.
3018 * We do this if the free count is less than the low
3019 * water mark, or if the free count is less than the high
3020 * water mark (but above the low water mark) and the inactive
3021 * count is less than its target.
3022 *
3023 * We don't have the counts locked ... if they change a little,
3024 * it doesn't really matter.
3025 */
3026 if (vm_page_free_count < vm_page_free_min)
3027 thread_wakeup((event_t) &vm_page_free_wanted);
3028
3029 VM_CHECK_MEMORYSTATUS;
3030
3031 if (mem) {
3032// dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
3033
3034#if CONFIG_BACKGROUND_QUEUE
3035 vm_page_assign_background_state(mem);
3036#endif
3037 }
3038 return mem;
3039}
3040
3041#if CONFIG_SECLUDED_MEMORY
3042vm_page_t
3043vm_page_grab_secluded(void)
3044{
3045 vm_page_t mem;
3046 vm_object_t object;
3047 int refmod_state;
3048
3049 if (vm_page_secluded_count == 0) {
3050 /* no secluded pages to grab... */
3051 return VM_PAGE_NULL;
3052 }
3053
3054 /* secluded queue is protected by the VM page queue lock */
3055 vm_page_lock_queues();
3056
3057 if (vm_page_secluded_count == 0) {
3058 /* no secluded pages to grab... */
3059 vm_page_unlock_queues();
3060 return VM_PAGE_NULL;
3061 }
3062
3063#if 00
3064 /* can we grab from the secluded queue? */
3065 if (vm_page_secluded_count > vm_page_secluded_target ||
3066 (vm_page_secluded_count > 0 &&
3067 task_can_use_secluded_mem(current_task(), TRUE))) {
3068 /* OK */
3069 } else {
3070 /* can't grab from secluded queue... */
3071 vm_page_unlock_queues();
3072 return VM_PAGE_NULL;
3073 }
3074#endif
3075
3076 /* we can grab a page from secluded queue! */
3077 assert((vm_page_secluded_count_free +
3078 vm_page_secluded_count_inuse) ==
3079 vm_page_secluded_count);
3080 if (current_task()->task_can_use_secluded_mem) {
3081 assert(num_tasks_can_use_secluded_mem > 0);
3082 }
3083 assert(!vm_page_queue_empty(&vm_page_queue_secluded));
3084 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3085 mem = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
3086 assert(mem->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
3087 vm_page_queues_remove(mem, TRUE);
3088
3089 object = VM_PAGE_OBJECT(mem);
3090
3091 assert(!mem->vmp_fictitious);
3092 assert(!VM_PAGE_WIRED(mem));
3093 if (object == VM_OBJECT_NULL) {
3094 /* free for grab! */
3095 vm_page_unlock_queues();
3096 vm_page_secluded.grab_success_free++;
3097
3098 assert(mem->vmp_busy);
3099 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3100 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
3101 assert(mem->vmp_pageq.next == 0);
3102 assert(mem->vmp_pageq.prev == 0);
3103 assert(mem->vmp_listq.next == 0);
3104 assert(mem->vmp_listq.prev == 0);
3105#if CONFIG_BACKGROUND_QUEUE
3106 assert(mem->vmp_on_backgroundq == 0);
3107 assert(mem->vmp_backgroundq.next == 0);
3108 assert(mem->vmp_backgroundq.prev == 0);
3109#endif /* CONFIG_BACKGROUND_QUEUE */
3110 return mem;
3111 }
3112
3113 assert(!object->internal);
3114// vm_page_pageable_external_count--;
3115
3116 if (!vm_object_lock_try(object)) {
3117// printf("SECLUDED: page %p: object %p locked\n", mem, object);
3118 vm_page_secluded.grab_failure_locked++;
3119 reactivate_secluded_page:
3120 vm_page_activate(mem);
3121 vm_page_unlock_queues();
3122 return VM_PAGE_NULL;
3123 }
3124 if (mem->vmp_busy ||
3125 mem->vmp_cleaning ||
3126 mem->vmp_laundry) {
3127 /* can't steal page in this state... */
3128 vm_object_unlock(object);
3129 vm_page_secluded.grab_failure_state++;
3130 goto reactivate_secluded_page;
3131 }
3132
3133 mem->vmp_busy = TRUE;
3134 refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
3135 if (refmod_state & VM_MEM_REFERENCED) {
3136 mem->vmp_reference = TRUE;
3137 }
3138 if (refmod_state & VM_MEM_MODIFIED) {
3139 SET_PAGE_DIRTY(mem, FALSE);
3140 }
3141 if (mem->vmp_dirty || mem->vmp_precious) {
3142 /* can't grab a dirty page; re-activate */
3143// printf("SECLUDED: dirty page %p\n", mem);
3144 PAGE_WAKEUP_DONE(mem);
3145 vm_page_secluded.grab_failure_dirty++;
3146 vm_object_unlock(object);
3147 goto reactivate_secluded_page;
3148 }
3149 if (mem->vmp_reference) {
3150 /* it's been used but we do need to grab a page... */
3151 }
3152
3153 vm_page_unlock_queues();
3154
3155 /* finish what vm_page_free() would have done... */
3156 vm_page_free_prepare_object(mem, TRUE);
3157 vm_object_unlock(object);
3158 object = VM_OBJECT_NULL;
3159 if (vm_page_free_verify) {
3160 assertf(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)), "page = 0x%llx", (uint64_t)VM_PAGE_GET_PHYS_PAGE(mem));
3161 }
3162 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3163 vm_page_secluded.grab_success_other++;
3164
3165 assert(mem->vmp_busy);
3166 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3167 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
3168 assert(mem->vmp_pageq.next == 0);
3169 assert(mem->vmp_pageq.prev == 0);
3170 assert(mem->vmp_listq.next == 0);
3171 assert(mem->vmp_listq.prev == 0);
3172#if CONFIG_BACKGROUND_QUEUE
3173 assert(mem->vmp_on_backgroundq == 0);
3174 assert(mem->vmp_backgroundq.next == 0);
3175 assert(mem->vmp_backgroundq.prev == 0);
3176#endif /* CONFIG_BACKGROUND_QUEUE */
3177
3178 return mem;
3179}
3180#endif /* CONFIG_SECLUDED_MEMORY */
3181
3182/*
3183 * vm_page_release:
3184 *
3185 * Return a page to the free list.
3186 */
3187
3188void
3189vm_page_release(
3190 vm_page_t mem,
3191 boolean_t page_queues_locked)
3192{
3193 unsigned int color;
3194 int need_wakeup = 0;
3195 int need_priv_wakeup = 0;
3196#if CONFIG_SECLUDED_MEMORY
3197 int need_secluded_wakeup = 0;
3198#endif /* CONFIG_SECLUDED_MEMORY */
3199
3200 if (page_queues_locked) {
3201 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3202 } else {
3203 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3204 }
3205
3206 assert(!mem->vmp_private && !mem->vmp_fictitious);
3207 if (vm_page_free_verify) {
3208 assertf(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)), "page = 0x%llx", (uint64_t)VM_PAGE_GET_PHYS_PAGE(mem));
3209 }
3210// dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
3211
3212 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3213
3214 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3215
3216 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3217 assert(mem->vmp_busy);
3218 assert(!mem->vmp_laundry);
3219 assert(mem->vmp_object == 0);
3220 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
3221 assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
3222#if CONFIG_BACKGROUND_QUEUE
3223 assert(mem->vmp_backgroundq.next == 0 &&
3224 mem->vmp_backgroundq.prev == 0 &&
3225 mem->vmp_on_backgroundq == FALSE);
3226#endif
3227 if ((mem->vmp_lopage == TRUE || vm_lopage_refill == TRUE) &&
3228 vm_lopage_free_count < vm_lopage_free_limit &&
3229 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3230 /*
3231 * this exists to support hardware controllers
3232 * incapable of generating DMAs with more than 32 bits
3233 * of address on platforms with physical memory > 4G...
3234 */
3235 vm_page_queue_enter_first(&vm_lopage_queue_free,
3236 mem,
3237 vm_page_t,
3238 vmp_pageq);
3239 vm_lopage_free_count++;
3240
3241 if (vm_lopage_free_count >= vm_lopage_free_limit)
3242 vm_lopage_refill = FALSE;
3243
3244 mem->vmp_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3245 mem->vmp_lopage = TRUE;
3246#if CONFIG_SECLUDED_MEMORY
3247 } else if (vm_page_free_count > vm_page_free_reserved &&
3248 vm_page_secluded_count < vm_page_secluded_target &&
3249 num_tasks_can_use_secluded_mem == 0) {
3250 /*
3251 * XXX FBDP TODO: also avoid refilling secluded queue
3252 * when some IOKit objects are already grabbing from it...
3253 */
3254 if (!page_queues_locked) {
3255 if (!vm_page_trylock_queues()) {
3256 /* take locks in right order */
3257 lck_mtx_unlock(&vm_page_queue_free_lock);
3258 vm_page_lock_queues();
3259 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3260 }
3261 }
3262 mem->vmp_lopage = FALSE;
3263 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3264 vm_page_queue_enter_first(&vm_page_queue_secluded,
3265 mem,
3266 vm_page_t,
3267 vmp_pageq);
3268 mem->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
3269 vm_page_secluded_count++;
3270 vm_page_secluded_count_free++;
3271 if (!page_queues_locked) {
3272 vm_page_unlock_queues();
3273 }
3274 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_OWNED);
3275 if (vm_page_free_wanted_secluded > 0) {
3276 vm_page_free_wanted_secluded--;
3277 need_secluded_wakeup = 1;
3278 }
3279#endif /* CONFIG_SECLUDED_MEMORY */
3280 } else {
3281 mem->vmp_lopage = FALSE;
3282 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
3283
3284 color = VM_PAGE_GET_COLOR(mem);
3285#if defined(__x86_64__)
3286 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
3287 mem,
3288 vm_page_t,
3289 vmp_pageq);
3290#else
3291 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
3292 mem,
3293 vm_page_t,
3294 vmp_pageq);
3295#endif
3296 vm_page_free_count++;
3297 /*
3298 * Check if we should wake up someone waiting for page.
3299 * But don't bother waking them unless they can allocate.
3300 *
3301 * We wakeup only one thread, to prevent starvation.
3302 * Because the scheduling system handles wait queues FIFO,
3303 * if we wakeup all waiting threads, one greedy thread
3304 * can starve multiple niceguy threads. When the threads
3305 * all wakeup, the greedy threads runs first, grabs the page,
3306 * and waits for another page. It will be the first to run
3307 * when the next page is freed.
3308 *
3309 * However, there is a slight danger here.
3310 * The thread we wake might not use the free page.
3311 * Then the other threads could wait indefinitely
3312 * while the page goes unused. To forestall this,
3313 * the pageout daemon will keep making free pages
3314 * as long as vm_page_free_wanted is non-zero.
3315 */
3316
3317 assert(vm_page_free_count > 0);
3318 if (vm_page_free_wanted_privileged > 0) {
3319 vm_page_free_wanted_privileged--;
3320 need_priv_wakeup = 1;
3321#if CONFIG_SECLUDED_MEMORY
3322 } else if (vm_page_free_wanted_secluded > 0 &&
3323 vm_page_free_count > vm_page_free_reserved) {
3324 vm_page_free_wanted_secluded--;
3325 need_secluded_wakeup = 1;
3326#endif /* CONFIG_SECLUDED_MEMORY */
3327 } else if (vm_page_free_wanted > 0 &&
3328 vm_page_free_count > vm_page_free_reserved) {
3329 vm_page_free_wanted--;
3330 need_wakeup = 1;
3331 }
3332 }
3333 vm_pageout_vminfo.vm_page_pages_freed++;
3334
3335 VM_DEBUG_CONSTANT_EVENT(vm_page_release, VM_PAGE_RELEASE, DBG_FUNC_NONE, 1, 0, 0, 0);
3336
3337 lck_mtx_unlock(&vm_page_queue_free_lock);
3338
3339 if (need_priv_wakeup)
3340 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
3341#if CONFIG_SECLUDED_MEMORY
3342 else if (need_secluded_wakeup)
3343 thread_wakeup_one((event_t) &vm_page_free_wanted_secluded);
3344#endif /* CONFIG_SECLUDED_MEMORY */
3345 else if (need_wakeup)
3346 thread_wakeup_one((event_t) &vm_page_free_count);
3347
3348 VM_CHECK_MEMORYSTATUS;
3349}
3350
3351/*
3352 * This version of vm_page_release() is used only at startup
3353 * when we are single-threaded and pages are being released
3354 * for the first time. Hence, no locking or unnecessary checks are made.
3355 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
3356 */
3357void
3358vm_page_release_startup(
3359 vm_page_t mem)
3360{
3361 vm_page_queue_t queue_free;
3362
3363 if (vm_lopage_free_count < vm_lopage_free_limit &&
3364 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3365 mem->vmp_lopage = TRUE;
3366 mem->vmp_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3367 vm_lopage_free_count++;
3368 queue_free = &vm_lopage_queue_free;
3369#if CONFIG_SECLUDED_MEMORY
3370 } else if (vm_page_secluded_count < vm_page_secluded_target) {
3371 mem->vmp_lopage = FALSE;
3372 mem->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
3373 vm_page_secluded_count++;
3374 vm_page_secluded_count_free++;
3375 queue_free = &vm_page_queue_secluded;
3376#endif /* CONFIG_SECLUDED_MEMORY */
3377 } else {
3378 mem->vmp_lopage = FALSE;
3379 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
3380 vm_page_free_count++;
3381 queue_free = &vm_page_queue_free[VM_PAGE_GET_COLOR(mem)].qhead;
3382 }
3383 if (mem->vmp_q_state == VM_PAGE_ON_FREE_Q) {
3384#if defined(__x86_64__)
3385 vm_page_queue_enter_clump(queue_free, mem, vm_page_t, vmp_pageq);
3386#else
3387 vm_page_queue_enter(queue_free, mem, vm_page_t, vmp_pageq);
3388#endif
3389 } else
3390 vm_page_queue_enter_first(queue_free, mem, vm_page_t, vmp_pageq);
3391}
3392
3393/*
3394 * vm_page_wait:
3395 *
3396 * Wait for a page to become available.
3397 * If there are plenty of free pages, then we don't sleep.
3398 *
3399 * Returns:
3400 * TRUE: There may be another page, try again
3401 * FALSE: We were interrupted out of our wait, don't try again
3402 */
3403
3404boolean_t
3405vm_page_wait(
3406 int interruptible )
3407{
3408 /*
3409 * We can't use vm_page_free_reserved to make this
3410 * determination. Consider: some thread might
3411 * need to allocate two pages. The first allocation
3412 * succeeds, the second fails. After the first page is freed,
3413 * a call to vm_page_wait must really block.
3414 */
3415 kern_return_t wait_result;
3416 int need_wakeup = 0;
3417 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
3418
3419 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3420
3421 if (is_privileged && vm_page_free_count) {
3422 lck_mtx_unlock(&vm_page_queue_free_lock);
3423 return TRUE;
3424 }
3425
3426 if (vm_page_free_count >= vm_page_free_target) {
3427 lck_mtx_unlock(&vm_page_queue_free_lock);
3428 return TRUE;
3429 }
3430
3431 if (is_privileged) {
3432 if (vm_page_free_wanted_privileged++ == 0)
3433 need_wakeup = 1;
3434 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
3435#if CONFIG_SECLUDED_MEMORY
3436 } else if (secluded_for_apps &&
3437 task_can_use_secluded_mem(current_task(), FALSE)) {
3438#if 00
3439 /* XXX FBDP: need pageq lock for this... */
3440 /* XXX FBDP: might wait even if pages available, */
3441 /* XXX FBDP: hopefully not for too long... */
3442 if (vm_page_secluded_count > 0) {
3443 lck_mtx_unlock(&vm_page_queue_free_lock);
3444 return TRUE;
3445 }
3446#endif
3447 if (vm_page_free_wanted_secluded++ == 0) {
3448 need_wakeup = 1;
3449 }
3450 wait_result = assert_wait(
3451 (event_t)&vm_page_free_wanted_secluded,
3452 interruptible);
3453#endif /* CONFIG_SECLUDED_MEMORY */
3454 } else {
3455 if (vm_page_free_wanted++ == 0)
3456 need_wakeup = 1;
3457 wait_result = assert_wait((event_t)&vm_page_free_count,
3458 interruptible);
3459 }
3460 lck_mtx_unlock(&vm_page_queue_free_lock);
3461 counter(c_vm_page_wait_block++);
3462
3463 if (need_wakeup)
3464 thread_wakeup((event_t)&vm_page_free_wanted);
3465
3466 if (wait_result == THREAD_WAITING) {
3467 VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
3468 vm_page_free_wanted_privileged,
3469 vm_page_free_wanted,
3470#if CONFIG_SECLUDED_MEMORY
3471 vm_page_free_wanted_secluded,
3472#else /* CONFIG_SECLUDED_MEMORY */
3473 0,
3474#endif /* CONFIG_SECLUDED_MEMORY */
3475 0);
3476 wait_result = thread_block(THREAD_CONTINUE_NULL);
3477 VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block,
3478 VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
3479 }
3480
3481 return (wait_result == THREAD_AWAKENED);
3482}
3483
3484/*
3485 * vm_page_alloc:
3486 *
3487 * Allocate and return a memory cell associated
3488 * with this VM object/offset pair.
3489 *
3490 * Object must be locked.
3491 */
3492
3493vm_page_t
3494vm_page_alloc(
3495 vm_object_t object,
3496 vm_object_offset_t offset)
3497{
3498 vm_page_t mem;
3499 int grab_options;
3500
3501 vm_object_lock_assert_exclusive(object);
3502 grab_options = 0;
3503#if CONFIG_SECLUDED_MEMORY
3504 if (object->can_grab_secluded) {
3505 grab_options |= VM_PAGE_GRAB_SECLUDED;
3506 }
3507#endif /* CONFIG_SECLUDED_MEMORY */
3508 mem = vm_page_grab_options(grab_options);
3509 if (mem == VM_PAGE_NULL)
3510 return VM_PAGE_NULL;
3511
3512 vm_page_insert(mem, object, offset);
3513
3514 return(mem);
3515}
3516
3517/*
3518 * vm_page_alloc_guard:
3519 *
3520 * Allocate a fictitious page which will be used
3521 * as a guard page. The page will be inserted into
3522 * the object and returned to the caller.
3523 */
3524
3525vm_page_t
3526vm_page_alloc_guard(
3527 vm_object_t object,
3528 vm_object_offset_t offset)
3529{
3530 vm_page_t mem;
3531
3532 vm_object_lock_assert_exclusive(object);
3533 mem = vm_page_grab_guard();
3534 if (mem == VM_PAGE_NULL)
3535 return VM_PAGE_NULL;
3536
3537 vm_page_insert(mem, object, offset);
3538
3539 return(mem);
3540}
3541
3542
3543counter(unsigned int c_laundry_pages_freed = 0;)
3544
3545/*
3546 * vm_page_free_prepare:
3547 *
3548 * Removes page from any queue it may be on
3549 * and disassociates it from its VM object.
3550 *
3551 * Object and page queues must be locked prior to entry.
3552 */
3553static void
3554vm_page_free_prepare(
3555 vm_page_t mem)
3556{
3557 vm_page_free_prepare_queues(mem);
3558 vm_page_free_prepare_object(mem, TRUE);
3559}
3560
3561
3562void
3563vm_page_free_prepare_queues(
3564 vm_page_t mem)
3565{
3566 vm_object_t m_object;
3567
3568 VM_PAGE_CHECK(mem);
3569
3570 assert(mem->vmp_q_state != VM_PAGE_ON_FREE_Q);
3571 assert(!mem->vmp_cleaning);
3572 m_object = VM_PAGE_OBJECT(mem);
3573
3574 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3575 if (m_object) {
3576 vm_object_lock_assert_exclusive(m_object);
3577 }
3578 if (mem->vmp_laundry) {
3579 /*
3580 * We may have to free a page while it's being laundered
3581 * if we lost its pager (due to a forced unmount, for example).
3582 * We need to call vm_pageout_steal_laundry() before removing
3583 * the page from its VM object, so that we can remove it
3584 * from its pageout queue and adjust the laundry accounting
3585 */
3586 vm_pageout_steal_laundry(mem, TRUE);
3587 counter(++c_laundry_pages_freed);
3588 }
3589
3590 vm_page_queues_remove(mem, TRUE);
3591
3592 if (VM_PAGE_WIRED(mem)) {
3593 assert(mem->vmp_wire_count > 0);
3594
3595 if (m_object) {
3596
3597 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
3598 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
3599 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
3600
3601 assert(m_object->resident_page_count >=
3602 m_object->wired_page_count);
3603
3604 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
3605 OSAddAtomic(+1, &vm_page_purgeable_count);
3606 assert(vm_page_purgeable_wired_count > 0);
3607 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3608 }
3609 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3610 m_object->purgable == VM_PURGABLE_EMPTY) &&
3611 m_object->vo_owner != TASK_NULL) {
3612 task_t owner;
3613 int ledger_idx_volatile;
3614 int ledger_idx_nonvolatile;
3615 int ledger_idx_volatile_compressed;
3616 int ledger_idx_nonvolatile_compressed;
3617 boolean_t do_footprint;
3618
3619 owner = VM_OBJECT_OWNER(m_object);
3620 vm_object_ledger_tag_ledgers(
3621 m_object,
3622 &ledger_idx_volatile,
3623 &ledger_idx_nonvolatile,
3624 &ledger_idx_volatile_compressed,
3625 &ledger_idx_nonvolatile_compressed,
3626 &do_footprint);
3627 /*
3628 * While wired, this page was accounted
3629 * as "non-volatile" but it should now
3630 * be accounted as "volatile".
3631 */
3632 /* one less "non-volatile"... */
3633 ledger_debit(owner->ledger,
3634 ledger_idx_nonvolatile,
3635 PAGE_SIZE);
3636 if (do_footprint) {
3637 /* ... and "phys_footprint" */
3638 ledger_debit(owner->ledger,
3639 task_ledgers.phys_footprint,
3640 PAGE_SIZE);
3641 }
3642 /* one more "volatile" */
3643 ledger_credit(owner->ledger,
3644 ledger_idx_volatile,
3645 PAGE_SIZE);
3646 }
3647 }
3648 if (!mem->vmp_private && !mem->vmp_fictitious)
3649 vm_page_wire_count--;
3650
3651 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
3652 mem->vmp_wire_count = 0;
3653 assert(!mem->vmp_gobbled);
3654 } else if (mem->vmp_gobbled) {
3655 if (!mem->vmp_private && !mem->vmp_fictitious)
3656 vm_page_wire_count--;
3657 vm_page_gobble_count--;
3658 }
3659}
3660
3661
3662void
3663vm_page_free_prepare_object(
3664 vm_page_t mem,
3665 boolean_t remove_from_hash)
3666{
3667 if (mem->vmp_tabled)
3668 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
3669
3670 PAGE_WAKEUP(mem); /* clears wanted */
3671
3672 if (mem->vmp_private) {
3673 mem->vmp_private = FALSE;
3674 mem->vmp_fictitious = TRUE;
3675 VM_PAGE_SET_PHYS_PAGE(mem, vm_page_fictitious_addr);
3676 }
3677 if ( !mem->vmp_fictitious) {
3678 assert(mem->vmp_pageq.next == 0);
3679 assert(mem->vmp_pageq.prev == 0);
3680 assert(mem->vmp_listq.next == 0);
3681 assert(mem->vmp_listq.prev == 0);
3682#if CONFIG_BACKGROUND_QUEUE
3683 assert(mem->vmp_backgroundq.next == 0);
3684 assert(mem->vmp_backgroundq.prev == 0);
3685#endif /* CONFIG_BACKGROUND_QUEUE */
3686 assert(mem->vmp_next_m == 0);
3687 vm_page_init(mem, VM_PAGE_GET_PHYS_PAGE(mem), mem->vmp_lopage);
3688 }
3689}
3690
3691
3692/*
3693 * vm_page_free:
3694 *
3695 * Returns the given page to the free list,
3696 * disassociating it with any VM object.
3697 *
3698 * Object and page queues must be locked prior to entry.
3699 */
3700void
3701vm_page_free(
3702 vm_page_t mem)
3703{
3704 vm_page_free_prepare(mem);
3705
3706 if (mem->vmp_fictitious) {
3707 vm_page_release_fictitious(mem);
3708 } else {
3709 vm_page_release(mem,
3710 TRUE); /* page queues are locked */
3711 }
3712}
3713
3714
3715void
3716vm_page_free_unlocked(
3717 vm_page_t mem,
3718 boolean_t remove_from_hash)
3719{
3720 vm_page_lockspin_queues();
3721 vm_page_free_prepare_queues(mem);
3722 vm_page_unlock_queues();
3723
3724 vm_page_free_prepare_object(mem, remove_from_hash);
3725
3726 if (mem->vmp_fictitious) {
3727 vm_page_release_fictitious(mem);
3728 } else {
3729 vm_page_release(mem, FALSE); /* page queues are not locked */
3730 }
3731}
3732
3733
3734/*
3735 * Free a list of pages. The list can be up to several hundred pages,
3736 * as blocked up by vm_pageout_scan().
3737 * The big win is not having to take the free list lock once
3738 * per page.
3739 *
3740 * The VM page queues lock (vm_page_queue_lock) should NOT be held.
3741 * The VM page free queues lock (vm_page_queue_free_lock) should NOT be held.
3742 */
3743void
3744vm_page_free_list(
3745 vm_page_t freeq,
3746 boolean_t prepare_object)
3747{
3748 vm_page_t mem;
3749 vm_page_t nxt;
3750 vm_page_t local_freeq;
3751 int pg_count;
3752
3753 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3754 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_NOTOWNED);
3755
3756 while (freeq) {
3757
3758 pg_count = 0;
3759 local_freeq = VM_PAGE_NULL;
3760 mem = freeq;
3761
3762 /*
3763 * break up the processing into smaller chunks so
3764 * that we can 'pipeline' the pages onto the
3765 * free list w/o introducing too much
3766 * contention on the global free queue lock
3767 */
3768 while (mem && pg_count < 64) {
3769
3770 assert((mem->vmp_q_state == VM_PAGE_NOT_ON_Q) ||
3771 (mem->vmp_q_state == VM_PAGE_IS_WIRED));
3772#if CONFIG_BACKGROUND_QUEUE
3773 assert(mem->vmp_backgroundq.next == 0 &&
3774 mem->vmp_backgroundq.prev == 0 &&
3775 mem->vmp_on_backgroundq == FALSE);
3776#endif
3777 nxt = mem->vmp_snext;
3778 mem->vmp_snext = NULL;
3779 assert(mem->vmp_pageq.prev == 0);
3780
3781 if (vm_page_free_verify && !mem->vmp_fictitious && !mem->vmp_private) {
3782 assertf(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)), "page = 0x%llx", (uint64_t)VM_PAGE_GET_PHYS_PAGE(mem));
3783 }
3784 if (prepare_object == TRUE)
3785 vm_page_free_prepare_object(mem, TRUE);
3786
3787 if (!mem->vmp_fictitious) {
3788 assert(mem->vmp_busy);
3789
3790 if ((mem->vmp_lopage == TRUE || vm_lopage_refill == TRUE) &&
3791 vm_lopage_free_count < vm_lopage_free_limit &&
3792 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3793 vm_page_release(mem, FALSE); /* page queues are not locked */
3794#if CONFIG_SECLUDED_MEMORY
3795 } else if (vm_page_secluded_count < vm_page_secluded_target &&
3796 num_tasks_can_use_secluded_mem == 0) {
3797 vm_page_release(mem,
3798 FALSE); /* page queues are not locked */
3799#endif /* CONFIG_SECLUDED_MEMORY */
3800 } else {
3801 /*
3802 * IMPORTANT: we can't set the page "free" here
3803 * because that would make the page eligible for
3804 * a physically-contiguous allocation (see
3805 * vm_page_find_contiguous()) right away (we don't
3806 * hold the vm_page_queue_free lock). That would
3807 * cause trouble because the page is not actually
3808 * in the free queue yet...
3809 */
3810 mem->vmp_snext = local_freeq;
3811 local_freeq = mem;
3812 pg_count++;
3813
3814 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3815 }
3816 } else {
3817 assert(VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_fictitious_addr ||
3818 VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr);
3819 vm_page_release_fictitious(mem);
3820 }
3821 mem = nxt;
3822 }
3823 freeq = mem;
3824
3825 if ( (mem = local_freeq) ) {
3826 unsigned int avail_free_count;
3827 unsigned int need_wakeup = 0;
3828 unsigned int need_priv_wakeup = 0;
3829#if CONFIG_SECLUDED_MEMORY
3830 unsigned int need_wakeup_secluded = 0;
3831#endif /* CONFIG_SECLUDED_MEMORY */
3832
3833 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3834
3835 while (mem) {
3836 int color;
3837
3838 nxt = mem->vmp_snext;
3839
3840 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3841 assert(mem->vmp_busy);
3842 mem->vmp_lopage = FALSE;
3843 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
3844
3845 color = VM_PAGE_GET_COLOR(mem);
3846#if defined(__x86_64__)
3847 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
3848 mem,
3849 vm_page_t,
3850 vmp_pageq);
3851#else
3852 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
3853 mem,
3854 vm_page_t,
3855 vmp_pageq);
3856#endif
3857 mem = nxt;
3858 }
3859 vm_pageout_vminfo.vm_page_pages_freed += pg_count;
3860 vm_page_free_count += pg_count;
3861 avail_free_count = vm_page_free_count;
3862
3863 VM_DEBUG_CONSTANT_EVENT(vm_page_release, VM_PAGE_RELEASE, DBG_FUNC_NONE, pg_count, 0, 0, 0);
3864
3865 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
3866
3867 if (avail_free_count < vm_page_free_wanted_privileged) {
3868 need_priv_wakeup = avail_free_count;
3869 vm_page_free_wanted_privileged -= avail_free_count;
3870 avail_free_count = 0;
3871 } else {
3872 need_priv_wakeup = vm_page_free_wanted_privileged;
3873 avail_free_count -= vm_page_free_wanted_privileged;
3874 vm_page_free_wanted_privileged = 0;
3875 }
3876 }
3877#if CONFIG_SECLUDED_MEMORY
3878 if (vm_page_free_wanted_secluded > 0 &&
3879 avail_free_count > vm_page_free_reserved) {
3880 unsigned int available_pages;
3881 available_pages = (avail_free_count -
3882 vm_page_free_reserved);
3883 if (available_pages <
3884 vm_page_free_wanted_secluded) {
3885 need_wakeup_secluded = available_pages;
3886 vm_page_free_wanted_secluded -=
3887 available_pages;
3888 avail_free_count -= available_pages;
3889 } else {
3890 need_wakeup_secluded =
3891 vm_page_free_wanted_secluded;
3892 avail_free_count -=
3893 vm_page_free_wanted_secluded;
3894 vm_page_free_wanted_secluded = 0;
3895 }
3896 }
3897#endif /* CONFIG_SECLUDED_MEMORY */
3898 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
3899 unsigned int available_pages;
3900
3901 available_pages = avail_free_count - vm_page_free_reserved;
3902
3903 if (available_pages >= vm_page_free_wanted) {
3904 need_wakeup = vm_page_free_wanted;
3905 vm_page_free_wanted = 0;
3906 } else {
3907 need_wakeup = available_pages;
3908 vm_page_free_wanted -= available_pages;
3909 }
3910 }
3911 lck_mtx_unlock(&vm_page_queue_free_lock);
3912
3913 if (need_priv_wakeup != 0) {
3914 /*
3915 * There shouldn't be that many VM-privileged threads,
3916 * so let's wake them all up, even if we don't quite
3917 * have enough pages to satisfy them all.
3918 */
3919 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
3920 }
3921#if CONFIG_SECLUDED_MEMORY
3922 if (need_wakeup_secluded != 0 &&
3923 vm_page_free_wanted_secluded == 0) {
3924 thread_wakeup((event_t)
3925 &vm_page_free_wanted_secluded);
3926 } else {
3927 for (;
3928 need_wakeup_secluded != 0;
3929 need_wakeup_secluded--) {
3930 thread_wakeup_one(
3931 (event_t)
3932 &vm_page_free_wanted_secluded);
3933 }
3934 }
3935#endif /* CONFIG_SECLUDED_MEMORY */
3936 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
3937 /*
3938 * We don't expect to have any more waiters
3939 * after this, so let's wake them all up at
3940 * once.
3941 */
3942 thread_wakeup((event_t) &vm_page_free_count);
3943 } else for (; need_wakeup != 0; need_wakeup--) {
3944 /*
3945 * Wake up one waiter per page we just released.
3946 */
3947 thread_wakeup_one((event_t) &vm_page_free_count);
3948 }
3949
3950 VM_CHECK_MEMORYSTATUS;
3951 }
3952 }
3953}
3954
3955
3956/*
3957 * vm_page_wire:
3958 *
3959 * Mark this page as wired down by yet
3960 * another map, removing it from paging queues
3961 * as necessary.
3962 *
3963 * The page's object and the page queues must be locked.
3964 */
3965
3966
3967void
3968vm_page_wire(
3969 vm_page_t mem,
3970 vm_tag_t tag,
3971 boolean_t check_memorystatus)
3972{
3973 vm_object_t m_object;
3974
3975 m_object = VM_PAGE_OBJECT(mem);
3976
3977// dbgLog(current_thread(), mem->vmp_offset, m_object, 1); /* (TEST/DEBUG) */
3978
3979 VM_PAGE_CHECK(mem);
3980 if (m_object) {
3981 vm_object_lock_assert_exclusive(m_object);
3982 } else {
3983 /*
3984 * In theory, the page should be in an object before it
3985 * gets wired, since we need to hold the object lock
3986 * to update some fields in the page structure.
3987 * However, some code (i386 pmap, for example) might want
3988 * to wire a page before it gets inserted into an object.
3989 * That's somewhat OK, as long as nobody else can get to
3990 * that page and update it at the same time.
3991 */
3992 }
3993 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3994 if ( !VM_PAGE_WIRED(mem)) {
3995
3996 if (mem->vmp_laundry)
3997 vm_pageout_steal_laundry(mem, TRUE);
3998
3999 vm_page_queues_remove(mem, TRUE);
4000
4001 assert(mem->vmp_wire_count == 0);
4002 mem->vmp_q_state = VM_PAGE_IS_WIRED;
4003
4004 if (m_object) {
4005
4006 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
4007 VM_OBJECT_WIRED_PAGE_ADD(m_object, mem);
4008 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, tag);
4009
4010 assert(m_object->resident_page_count >=
4011 m_object->wired_page_count);
4012 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
4013 assert(vm_page_purgeable_count > 0);
4014 OSAddAtomic(-1, &vm_page_purgeable_count);
4015 OSAddAtomic(1, &vm_page_purgeable_wired_count);
4016 }
4017 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
4018 m_object->purgable == VM_PURGABLE_EMPTY) &&
4019 m_object->vo_owner != TASK_NULL) {
4020 task_t owner;
4021 int ledger_idx_volatile;
4022 int ledger_idx_nonvolatile;
4023 int ledger_idx_volatile_compressed;
4024 int ledger_idx_nonvolatile_compressed;
4025 boolean_t do_footprint;
4026
4027 owner = VM_OBJECT_OWNER(m_object);
4028 vm_object_ledger_tag_ledgers(
4029 m_object,
4030 &ledger_idx_volatile,
4031 &ledger_idx_nonvolatile,
4032 &ledger_idx_volatile_compressed,
4033 &ledger_idx_nonvolatile_compressed,
4034 &do_footprint);
4035 /* less volatile bytes */
4036 ledger_debit(owner->ledger,
4037 ledger_idx_volatile,
4038 PAGE_SIZE);
4039 /* more not-quite-volatile bytes */
4040 ledger_credit(owner->ledger,
4041 ledger_idx_nonvolatile,
4042 PAGE_SIZE);
4043 if (do_footprint) {
4044 /* more footprint */
4045 ledger_credit(owner->ledger,
4046 task_ledgers.phys_footprint,
4047 PAGE_SIZE);
4048 }
4049 }
4050 if (m_object->all_reusable) {
4051 /*
4052 * Wired pages are not counted as "re-usable"
4053 * in "all_reusable" VM objects, so nothing
4054 * to do here.
4055 */
4056 } else if (mem->vmp_reusable) {
4057 /*
4058 * This page is not "re-usable" when it's
4059 * wired, so adjust its state and the
4060 * accounting.
4061 */
4062 vm_object_reuse_pages(m_object,
4063 mem->vmp_offset,
4064 mem->vmp_offset+PAGE_SIZE_64,
4065 FALSE);
4066 }
4067 }
4068 assert(!mem->vmp_reusable);
4069
4070 if (!mem->vmp_private && !mem->vmp_fictitious && !mem->vmp_gobbled)
4071 vm_page_wire_count++;
4072 if (mem->vmp_gobbled)
4073 vm_page_gobble_count--;
4074 mem->vmp_gobbled = FALSE;
4075
4076 if (check_memorystatus == TRUE) {
4077 VM_CHECK_MEMORYSTATUS;
4078 }
4079 }
4080 assert(!mem->vmp_gobbled);
4081 assert(mem->vmp_q_state == VM_PAGE_IS_WIRED);
4082 mem->vmp_wire_count++;
4083 if (__improbable(mem->vmp_wire_count == 0)) {
4084 panic("vm_page_wire(%p): wire_count overflow", mem);
4085 }
4086 VM_PAGE_CHECK(mem);
4087}
4088
4089/*
4090 * vm_page_unwire:
4091 *
4092 * Release one wiring of this page, potentially
4093 * enabling it to be paged again.
4094 *
4095 * The page's object and the page queues must be locked.
4096 */
4097void
4098vm_page_unwire(
4099 vm_page_t mem,
4100 boolean_t queueit)
4101{
4102 vm_object_t m_object;
4103
4104 m_object = VM_PAGE_OBJECT(mem);
4105
4106// dbgLog(current_thread(), mem->vmp_offset, m_object, 0); /* (TEST/DEBUG) */
4107
4108 VM_PAGE_CHECK(mem);
4109 assert(VM_PAGE_WIRED(mem));
4110 assert(mem->vmp_wire_count > 0);
4111 assert(!mem->vmp_gobbled);
4112 assert(m_object != VM_OBJECT_NULL);
4113 vm_object_lock_assert_exclusive(m_object);
4114 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4115 if (--mem->vmp_wire_count == 0) {
4116
4117 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
4118
4119 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
4120 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
4121 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
4122 if (!mem->vmp_private && !mem->vmp_fictitious) {
4123 vm_page_wire_count--;
4124 }
4125
4126 assert(m_object->resident_page_count >=
4127 m_object->wired_page_count);
4128 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
4129 OSAddAtomic(+1, &vm_page_purgeable_count);
4130 assert(vm_page_purgeable_wired_count > 0);
4131 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
4132 }
4133 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
4134 m_object->purgable == VM_PURGABLE_EMPTY) &&
4135 m_object->vo_owner != TASK_NULL) {
4136 task_t owner;
4137 int ledger_idx_volatile;
4138 int ledger_idx_nonvolatile;
4139 int ledger_idx_volatile_compressed;
4140 int ledger_idx_nonvolatile_compressed;
4141 boolean_t do_footprint;
4142
4143 owner = VM_OBJECT_OWNER(m_object);
4144 vm_object_ledger_tag_ledgers(
4145 m_object,
4146 &ledger_idx_volatile,
4147 &ledger_idx_nonvolatile,
4148 &ledger_idx_volatile_compressed,
4149 &ledger_idx_nonvolatile_compressed,
4150 &do_footprint);
4151 /* more volatile bytes */
4152 ledger_credit(owner->ledger,
4153 ledger_idx_volatile,
4154 PAGE_SIZE);
4155 /* less not-quite-volatile bytes */
4156 ledger_debit(owner->ledger,
4157 ledger_idx_nonvolatile,
4158 PAGE_SIZE);
4159 if (do_footprint) {
4160 /* less footprint */
4161 ledger_debit(owner->ledger,
4162 task_ledgers.phys_footprint,
4163 PAGE_SIZE);
4164 }
4165 }
4166 assert(m_object != kernel_object);
4167 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
4168
4169 if (queueit == TRUE) {
4170 if (m_object->purgable == VM_PURGABLE_EMPTY) {
4171 vm_page_deactivate(mem);
4172 } else {
4173 vm_page_activate(mem);
4174 }
4175 }
4176
4177 VM_CHECK_MEMORYSTATUS;
4178
4179 }
4180 VM_PAGE_CHECK(mem);
4181}
4182
4183/*
4184 * vm_page_deactivate:
4185 *
4186 * Returns the given page to the inactive list,
4187 * indicating that no physical maps have access
4188 * to this page. [Used by the physical mapping system.]
4189 *
4190 * The page queues must be locked.
4191 */
4192void
4193vm_page_deactivate(
4194 vm_page_t m)
4195{
4196 vm_page_deactivate_internal(m, TRUE);
4197}
4198
4199
4200void
4201vm_page_deactivate_internal(
4202 vm_page_t m,
4203 boolean_t clear_hw_reference)
4204{
4205 vm_object_t m_object;
4206
4207 m_object = VM_PAGE_OBJECT(m);
4208
4209 VM_PAGE_CHECK(m);
4210 assert(m_object != kernel_object);
4211 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4212
4213// dbgLog(VM_PAGE_GET_PHYS_PAGE(m), vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
4214 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4215 /*
4216 * This page is no longer very interesting. If it was
4217 * interesting (active or inactive/referenced), then we
4218 * clear the reference bit and (re)enter it in the
4219 * inactive queue. Note wired pages should not have
4220 * their reference bit cleared.
4221 */
4222 assert ( !(m->vmp_absent && !m->vmp_unusual));
4223
4224 if (m->vmp_gobbled) { /* can this happen? */
4225 assert( !VM_PAGE_WIRED(m));
4226
4227 if (!m->vmp_private && !m->vmp_fictitious)
4228 vm_page_wire_count--;
4229 vm_page_gobble_count--;
4230 m->vmp_gobbled = FALSE;
4231 }
4232 /*
4233 * if this page is currently on the pageout queue, we can't do the
4234 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4235 * and we can't remove it manually since we would need the object lock
4236 * (which is not required here) to decrement the activity_in_progress
4237 * reference which is held on the object while the page is in the pageout queue...
4238 * just let the normal laundry processing proceed
4239 */
4240 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4241 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4242 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
4243 VM_PAGE_WIRED(m)) {
4244 return;
4245 }
4246 if (!m->vmp_absent && clear_hw_reference == TRUE)
4247 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
4248
4249 m->vmp_reference = FALSE;
4250 m->vmp_no_cache = FALSE;
4251
4252 if ( !VM_PAGE_INACTIVE(m)) {
4253 vm_page_queues_remove(m, FALSE);
4254
4255 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4256 m->vmp_dirty && m_object->internal &&
4257 (m_object->purgable == VM_PURGABLE_DENY ||
4258 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4259 m_object->purgable == VM_PURGABLE_VOLATILE)) {
4260 vm_page_check_pageable_safe(m);
4261 vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, vmp_pageq);
4262 m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
4263 vm_page_throttled_count++;
4264 } else {
4265 if (m_object->named && m_object->ref_count == 1) {
4266 vm_page_speculate(m, FALSE);
4267#if DEVELOPMENT || DEBUG
4268 vm_page_speculative_recreated++;
4269#endif
4270 } else {
4271 vm_page_enqueue_inactive(m, FALSE);
4272 }
4273 }
4274 }
4275}
4276
4277/*
4278 * vm_page_enqueue_cleaned
4279 *
4280 * Put the page on the cleaned queue, mark it cleaned, etc.
4281 * Being on the cleaned queue (and having m->clean_queue set)
4282 * does ** NOT ** guarantee that the page is clean!
4283 *
4284 * Call with the queues lock held.
4285 */
4286
4287void vm_page_enqueue_cleaned(vm_page_t m)
4288{
4289 vm_object_t m_object;
4290
4291 m_object = VM_PAGE_OBJECT(m);
4292
4293 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4294 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4295 assert( !(m->vmp_absent && !m->vmp_unusual));
4296
4297 if (VM_PAGE_WIRED(m)) {
4298 return;
4299 }
4300
4301 if (m->vmp_gobbled) {
4302 if (!m->vmp_private && !m->vmp_fictitious)
4303 vm_page_wire_count--;
4304 vm_page_gobble_count--;
4305 m->vmp_gobbled = FALSE;
4306 }
4307 /*
4308 * if this page is currently on the pageout queue, we can't do the
4309 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4310 * and we can't remove it manually since we would need the object lock
4311 * (which is not required here) to decrement the activity_in_progress
4312 * reference which is held on the object while the page is in the pageout queue...
4313 * just let the normal laundry processing proceed
4314 */
4315 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4316 (m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) ||
4317 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
4318 return;
4319 }
4320 vm_page_queues_remove(m, FALSE);
4321
4322 vm_page_check_pageable_safe(m);
4323 vm_page_queue_enter(&vm_page_queue_cleaned, m, vm_page_t, vmp_pageq);
4324 m->vmp_q_state = VM_PAGE_ON_INACTIVE_CLEANED_Q;
4325 vm_page_cleaned_count++;
4326
4327 vm_page_inactive_count++;
4328 if (m_object->internal) {
4329 vm_page_pageable_internal_count++;
4330 } else {
4331 vm_page_pageable_external_count++;
4332 }
4333#if CONFIG_BACKGROUND_QUEUE
4334 if (m->vmp_in_background)
4335 vm_page_add_to_backgroundq(m, TRUE);
4336#endif
4337 VM_PAGEOUT_DEBUG(vm_pageout_enqueued_cleaned, 1);
4338}
4339
4340/*
4341 * vm_page_activate:
4342 *
4343 * Put the specified page on the active list (if appropriate).
4344 *
4345 * The page queues must be locked.
4346 */
4347
4348void
4349vm_page_activate(
4350 vm_page_t m)
4351{
4352 vm_object_t m_object;
4353
4354 m_object = VM_PAGE_OBJECT(m);
4355
4356 VM_PAGE_CHECK(m);
4357#ifdef FIXME_4778297
4358 assert(m_object != kernel_object);
4359#endif
4360 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4361 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4362 assert( !(m->vmp_absent && !m->vmp_unusual));
4363
4364 if (m->vmp_gobbled) {
4365 assert( !VM_PAGE_WIRED(m));
4366 if (!m->vmp_private && !m->vmp_fictitious)
4367 vm_page_wire_count--;
4368 vm_page_gobble_count--;
4369 m->vmp_gobbled = FALSE;
4370 }
4371 /*
4372 * if this page is currently on the pageout queue, we can't do the
4373 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4374 * and we can't remove it manually since we would need the object lock
4375 * (which is not required here) to decrement the activity_in_progress
4376 * reference which is held on the object while the page is in the pageout queue...
4377 * just let the normal laundry processing proceed
4378 */
4379 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4380 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4381 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q))
4382 return;
4383
4384#if DEBUG
4385 if (m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q)
4386 panic("vm_page_activate: already active");
4387#endif
4388
4389 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
4390 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
4391 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
4392 }
4393
4394 vm_page_queues_remove(m, FALSE);
4395
4396 if ( !VM_PAGE_WIRED(m)) {
4397 vm_page_check_pageable_safe(m);
4398 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4399 m->vmp_dirty && m_object->internal &&
4400 (m_object->purgable == VM_PURGABLE_DENY ||
4401 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4402 m_object->purgable == VM_PURGABLE_VOLATILE)) {
4403 vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, vmp_pageq);
4404 m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
4405 vm_page_throttled_count++;
4406 } else {
4407#if CONFIG_SECLUDED_MEMORY
4408 if (secluded_for_filecache &&
4409 vm_page_secluded_target != 0 &&
4410 num_tasks_can_use_secluded_mem == 0 &&
4411 m_object->eligible_for_secluded) {
4412 vm_page_queue_enter(&vm_page_queue_secluded, m,
4413 vm_page_t, vmp_pageq);
4414 m->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
4415 vm_page_secluded_count++;
4416 vm_page_secluded_count_inuse++;
4417 assert(!m_object->internal);
4418// vm_page_pageable_external_count++;
4419 } else
4420#endif /* CONFIG_SECLUDED_MEMORY */
4421 vm_page_enqueue_active(m, FALSE);
4422 }
4423 m->vmp_reference = TRUE;
4424 m->vmp_no_cache = FALSE;
4425 }
4426 VM_PAGE_CHECK(m);
4427}
4428
4429
4430/*
4431 * vm_page_speculate:
4432 *
4433 * Put the specified page on the speculative list (if appropriate).
4434 *
4435 * The page queues must be locked.
4436 */
4437void
4438vm_page_speculate(
4439 vm_page_t m,
4440 boolean_t new)
4441{
4442 struct vm_speculative_age_q *aq;
4443 vm_object_t m_object;
4444
4445 m_object = VM_PAGE_OBJECT(m);
4446
4447 VM_PAGE_CHECK(m);
4448 vm_page_check_pageable_safe(m);
4449
4450 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4451 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4452 assert( !(m->vmp_absent && !m->vmp_unusual));
4453 assert(m_object->internal == FALSE);
4454
4455 /*
4456 * if this page is currently on the pageout queue, we can't do the
4457 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4458 * and we can't remove it manually since we would need the object lock
4459 * (which is not required here) to decrement the activity_in_progress
4460 * reference which is held on the object while the page is in the pageout queue...
4461 * just let the normal laundry processing proceed
4462 */
4463 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4464 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4465 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q))
4466 return;
4467
4468 vm_page_queues_remove(m, FALSE);
4469
4470 if ( !VM_PAGE_WIRED(m)) {
4471 mach_timespec_t ts;
4472 clock_sec_t sec;
4473 clock_nsec_t nsec;
4474
4475 clock_get_system_nanotime(&sec, &nsec);
4476 ts.tv_sec = (unsigned int) sec;
4477 ts.tv_nsec = nsec;
4478
4479 if (vm_page_speculative_count == 0) {
4480
4481 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4482 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4483
4484 aq = &vm_page_queue_speculative[speculative_age_index];
4485
4486 /*
4487 * set the timer to begin a new group
4488 */
4489 aq->age_ts.tv_sec = vm_pageout_state.vm_page_speculative_q_age_ms / 1000;
4490 aq->age_ts.tv_nsec = (vm_pageout_state.vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
4491 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4492 } else {
4493 aq = &vm_page_queue_speculative[speculative_age_index];
4494
4495 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
4496
4497 speculative_age_index++;
4498
4499 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4500 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4501 if (speculative_age_index == speculative_steal_index) {
4502 speculative_steal_index = speculative_age_index + 1;
4503
4504 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4505 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4506 }
4507 aq = &vm_page_queue_speculative[speculative_age_index];
4508
4509 if (!vm_page_queue_empty(&aq->age_q))
4510 vm_page_speculate_ageit(aq);
4511
4512 aq->age_ts.tv_sec = vm_pageout_state.vm_page_speculative_q_age_ms / 1000;
4513 aq->age_ts.tv_nsec = (vm_pageout_state.vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
4514 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4515 }
4516 }
4517 vm_page_enqueue_tail(&aq->age_q, &m->vmp_pageq);
4518 m->vmp_q_state = VM_PAGE_ON_SPECULATIVE_Q;
4519 vm_page_speculative_count++;
4520 vm_page_pageable_external_count++;
4521
4522 if (new == TRUE) {
4523 vm_object_lock_assert_exclusive(m_object);
4524
4525 m_object->pages_created++;
4526#if DEVELOPMENT || DEBUG
4527 vm_page_speculative_created++;
4528#endif
4529 }
4530 }
4531 VM_PAGE_CHECK(m);
4532}
4533
4534
4535/*
4536 * move pages from the specified aging bin to
4537 * the speculative bin that pageout_scan claims from
4538 *
4539 * The page queues must be locked.
4540 */
4541void
4542vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
4543{
4544 struct vm_speculative_age_q *sq;
4545 vm_page_t t;
4546
4547 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
4548
4549 if (vm_page_queue_empty(&sq->age_q)) {
4550 sq->age_q.next = aq->age_q.next;
4551 sq->age_q.prev = aq->age_q.prev;
4552
4553 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.next);
4554 t->vmp_pageq.prev = VM_PAGE_PACK_PTR(&sq->age_q);
4555
4556 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
4557 t->vmp_pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
4558 } else {
4559 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
4560 t->vmp_pageq.next = aq->age_q.next;
4561
4562 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.next);
4563 t->vmp_pageq.prev = sq->age_q.prev;
4564
4565 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.prev);
4566 t->vmp_pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
4567
4568 sq->age_q.prev = aq->age_q.prev;
4569 }
4570 vm_page_queue_init(&aq->age_q);
4571}
4572
4573
4574void
4575vm_page_lru(
4576 vm_page_t m)
4577{
4578 VM_PAGE_CHECK(m);
4579 assert(VM_PAGE_OBJECT(m) != kernel_object);
4580 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4581
4582 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4583
4584 if (m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q) {
4585 /*
4586 * we don't need to do all the other work that
4587 * vm_page_queues_remove and vm_page_enqueue_inactive
4588 * bring along for the ride
4589 */
4590 assert(!m->vmp_laundry);
4591 assert(!m->vmp_private);
4592
4593 m->vmp_no_cache = FALSE;
4594
4595 vm_page_queue_remove(&vm_page_queue_inactive, m, vm_page_t, vmp_pageq);
4596 vm_page_queue_enter(&vm_page_queue_inactive, m, vm_page_t, vmp_pageq);
4597
4598 return;
4599 }
4600 /*
4601 * if this page is currently on the pageout queue, we can't do the
4602 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4603 * and we can't remove it manually since we would need the object lock
4604 * (which is not required here) to decrement the activity_in_progress
4605 * reference which is held on the object while the page is in the pageout queue...
4606 * just let the normal laundry processing proceed
4607 */
4608 if (m->vmp_laundry || m->vmp_private ||
4609 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4610 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
4611 VM_PAGE_WIRED(m))
4612 return;
4613
4614 m->vmp_no_cache = FALSE;
4615
4616 vm_page_queues_remove(m, FALSE);
4617
4618 vm_page_enqueue_inactive(m, FALSE);
4619}
4620
4621
4622void
4623vm_page_reactivate_all_throttled(void)
4624{
4625 vm_page_t first_throttled, last_throttled;
4626 vm_page_t first_active;
4627 vm_page_t m;
4628 int extra_active_count;
4629 int extra_internal_count, extra_external_count;
4630 vm_object_t m_object;
4631
4632 if (!VM_DYNAMIC_PAGING_ENABLED())
4633 return;
4634
4635 extra_active_count = 0;
4636 extra_internal_count = 0;
4637 extra_external_count = 0;
4638 vm_page_lock_queues();
4639 if (! vm_page_queue_empty(&vm_page_queue_throttled)) {
4640 /*
4641 * Switch "throttled" pages to "active".
4642 */
4643 vm_page_queue_iterate(&vm_page_queue_throttled, m, vm_page_t, vmp_pageq) {
4644 VM_PAGE_CHECK(m);
4645 assert(m->vmp_q_state == VM_PAGE_ON_THROTTLED_Q);
4646
4647 m_object = VM_PAGE_OBJECT(m);
4648
4649 extra_active_count++;
4650 if (m_object->internal) {
4651 extra_internal_count++;
4652 } else {
4653 extra_external_count++;
4654 }
4655
4656 m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
4657 VM_PAGE_CHECK(m);
4658#if CONFIG_BACKGROUND_QUEUE
4659 if (m->vmp_in_background)
4660 vm_page_add_to_backgroundq(m, FALSE);
4661#endif
4662 }
4663
4664 /*
4665 * Transfer the entire throttled queue to a regular LRU page queues.
4666 * We insert it at the head of the active queue, so that these pages
4667 * get re-evaluated by the LRU algorithm first, since they've been
4668 * completely out of it until now.
4669 */
4670 first_throttled = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
4671 last_throttled = (vm_page_t) vm_page_queue_last(&vm_page_queue_throttled);
4672 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
4673 if (vm_page_queue_empty(&vm_page_queue_active)) {
4674 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
4675 } else {
4676 first_active->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
4677 }
4678 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_throttled);
4679 first_throttled->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4680 last_throttled->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
4681
4682#if DEBUG
4683 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
4684#endif
4685 vm_page_queue_init(&vm_page_queue_throttled);
4686 /*
4687 * Adjust the global page counts.
4688 */
4689 vm_page_active_count += extra_active_count;
4690 vm_page_pageable_internal_count += extra_internal_count;
4691 vm_page_pageable_external_count += extra_external_count;
4692 vm_page_throttled_count = 0;
4693 }
4694 assert(vm_page_throttled_count == 0);
4695 assert(vm_page_queue_empty(&vm_page_queue_throttled));
4696 vm_page_unlock_queues();
4697}
4698
4699
4700/*
4701 * move pages from the indicated local queue to the global active queue
4702 * its ok to fail if we're below the hard limit and force == FALSE
4703 * the nolocks == TRUE case is to allow this function to be run on
4704 * the hibernate path
4705 */
4706
4707void
4708vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
4709{
4710 struct vpl *lq;
4711 vm_page_t first_local, last_local;
4712 vm_page_t first_active;
4713 vm_page_t m;
4714 uint32_t count = 0;
4715
4716 if (vm_page_local_q == NULL)
4717 return;
4718
4719 lq = &vm_page_local_q[lid].vpl_un.vpl;
4720
4721 if (nolocks == FALSE) {
4722 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
4723 if ( !vm_page_trylockspin_queues())
4724 return;
4725 } else
4726 vm_page_lockspin_queues();
4727
4728 VPL_LOCK(&lq->vpl_lock);
4729 }
4730 if (lq->vpl_count) {
4731 /*
4732 * Switch "local" pages to "active".
4733 */
4734 assert(!vm_page_queue_empty(&lq->vpl_queue));
4735
4736 vm_page_queue_iterate(&lq->vpl_queue, m, vm_page_t, vmp_pageq) {
4737 VM_PAGE_CHECK(m);
4738 vm_page_check_pageable_safe(m);
4739 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q);
4740 assert(!m->vmp_fictitious);
4741
4742 if (m->vmp_local_id != lid)
4743 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
4744
4745 m->vmp_local_id = 0;
4746 m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
4747 VM_PAGE_CHECK(m);
4748#if CONFIG_BACKGROUND_QUEUE
4749 if (m->vmp_in_background)
4750 vm_page_add_to_backgroundq(m, FALSE);
4751#endif
4752 count++;
4753 }
4754 if (count != lq->vpl_count)
4755 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
4756
4757 /*
4758 * Transfer the entire local queue to a regular LRU page queues.
4759 */
4760 first_local = (vm_page_t) vm_page_queue_first(&lq->vpl_queue);
4761 last_local = (vm_page_t) vm_page_queue_last(&lq->vpl_queue);
4762 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
4763
4764 if (vm_page_queue_empty(&vm_page_queue_active)) {
4765 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
4766 } else {
4767 first_active->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
4768 }
4769 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
4770 first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4771 last_local->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
4772
4773 vm_page_queue_init(&lq->vpl_queue);
4774 /*
4775 * Adjust the global page counts.
4776 */
4777 vm_page_active_count += lq->vpl_count;
4778 vm_page_pageable_internal_count += lq->vpl_internal_count;
4779 vm_page_pageable_external_count += lq->vpl_external_count;
4780 lq->vpl_count = 0;
4781 lq->vpl_internal_count = 0;
4782 lq->vpl_external_count = 0;
4783 }
4784 assert(vm_page_queue_empty(&lq->vpl_queue));
4785
4786 if (nolocks == FALSE) {
4787 VPL_UNLOCK(&lq->vpl_lock);
4788
4789 vm_page_balance_inactive(count / 4);
4790 vm_page_unlock_queues();
4791 }
4792}
4793
4794/*
4795 * vm_page_part_zero_fill:
4796 *
4797 * Zero-fill a part of the page.
4798 */
4799#define PMAP_ZERO_PART_PAGE_IMPLEMENTED
4800void
4801vm_page_part_zero_fill(
4802 vm_page_t m,
4803 vm_offset_t m_pa,
4804 vm_size_t len)
4805{
4806
4807#if 0
4808 /*
4809 * we don't hold the page queue lock
4810 * so this check isn't safe to make
4811 */
4812 VM_PAGE_CHECK(m);
4813#endif
4814
4815#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
4816 pmap_zero_part_page(VM_PAGE_GET_PHYS_PAGE(m), m_pa, len);
4817#else
4818 vm_page_t tmp;
4819 while (1) {
4820 tmp = vm_page_grab();
4821 if (tmp == VM_PAGE_NULL) {
4822 vm_page_wait(THREAD_UNINT);
4823 continue;
4824 }
4825 break;
4826 }
4827 vm_page_zero_fill(tmp);
4828 if(m_pa != 0) {
4829 vm_page_part_copy(m, 0, tmp, 0, m_pa);
4830 }
4831 if((m_pa + len) < PAGE_SIZE) {
4832 vm_page_part_copy(m, m_pa + len, tmp,
4833 m_pa + len, PAGE_SIZE - (m_pa + len));
4834 }
4835 vm_page_copy(tmp,m);
4836 VM_PAGE_FREE(tmp);
4837#endif
4838
4839}
4840
4841/*
4842 * vm_page_zero_fill:
4843 *
4844 * Zero-fill the specified page.
4845 */
4846void
4847vm_page_zero_fill(
4848 vm_page_t m)
4849{
4850 XPR(XPR_VM_PAGE,
4851 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
4852 VM_PAGE_OBJECT(m), m->vmp_offset, m, 0,0);
4853#if 0
4854 /*
4855 * we don't hold the page queue lock
4856 * so this check isn't safe to make
4857 */
4858 VM_PAGE_CHECK(m);
4859#endif
4860
4861// dbgTrace(0xAEAEAEAE, VM_PAGE_GET_PHYS_PAGE(m), 0); /* (BRINGUP) */
4862 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
4863}
4864
4865/*
4866 * vm_page_part_copy:
4867 *
4868 * copy part of one page to another
4869 */
4870
4871void
4872vm_page_part_copy(
4873 vm_page_t src_m,
4874 vm_offset_t src_pa,
4875 vm_page_t dst_m,
4876 vm_offset_t dst_pa,
4877 vm_size_t len)
4878{
4879#if 0
4880 /*
4881 * we don't hold the page queue lock
4882 * so this check isn't safe to make
4883 */
4884 VM_PAGE_CHECK(src_m);
4885 VM_PAGE_CHECK(dst_m);
4886#endif
4887 pmap_copy_part_page(VM_PAGE_GET_PHYS_PAGE(src_m), src_pa,
4888 VM_PAGE_GET_PHYS_PAGE(dst_m), dst_pa, len);
4889}
4890
4891/*
4892 * vm_page_copy:
4893 *
4894 * Copy one page to another
4895 */
4896
4897int vm_page_copy_cs_validations = 0;
4898int vm_page_copy_cs_tainted = 0;
4899
4900void
4901vm_page_copy(
4902 vm_page_t src_m,
4903 vm_page_t dest_m)
4904{
4905 vm_object_t src_m_object;
4906
4907 src_m_object = VM_PAGE_OBJECT(src_m);
4908
4909 XPR(XPR_VM_PAGE,
4910 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
4911 src_m_object, src_m->vmp_offset,
4912 VM_PAGE_OBJECT(dest_m), dest_m->vmp_offset,
4913 0);
4914#if 0
4915 /*
4916 * we don't hold the page queue lock
4917 * so this check isn't safe to make
4918 */
4919 VM_PAGE_CHECK(src_m);
4920 VM_PAGE_CHECK(dest_m);
4921#endif
4922 vm_object_lock_assert_held(src_m_object);
4923
4924 if (src_m_object != VM_OBJECT_NULL &&
4925 src_m_object->code_signed) {
4926 /*
4927 * We're copying a page from a code-signed object.
4928 * Whoever ends up mapping the copy page might care about
4929 * the original page's integrity, so let's validate the
4930 * source page now.
4931 */
4932 vm_page_copy_cs_validations++;
4933 vm_page_validate_cs(src_m);
4934#if DEVELOPMENT || DEBUG
4935 DTRACE_VM4(codesigned_copy,
4936 vm_object_t, src_m_object,
4937 vm_object_offset_t, src_m->vmp_offset,
4938 int, src_m->vmp_cs_validated,
4939 int, src_m->vmp_cs_tainted);
4940#endif /* DEVELOPMENT || DEBUG */
4941
4942 }
4943
4944 /*
4945 * Propagate the cs_tainted bit to the copy page. Do not propagate
4946 * the cs_validated bit.
4947 */
4948 dest_m->vmp_cs_tainted = src_m->vmp_cs_tainted;
4949 if (dest_m->vmp_cs_tainted) {
4950 vm_page_copy_cs_tainted++;
4951 }
4952 dest_m->vmp_error = src_m->vmp_error; /* sliding src_m might have failed... */
4953 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(src_m), VM_PAGE_GET_PHYS_PAGE(dest_m));
4954}
4955
4956#if MACH_ASSERT
4957static void
4958_vm_page_print(
4959 vm_page_t p)
4960{
4961 printf("vm_page %p: \n", p);
4962 printf(" pageq: next=%p prev=%p\n",
4963 (vm_page_t)VM_PAGE_UNPACK_PTR(p->vmp_pageq.next),
4964 (vm_page_t)VM_PAGE_UNPACK_PTR(p->vmp_pageq.prev));
4965 printf(" listq: next=%p prev=%p\n",
4966 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_listq.next)),
4967 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_listq.prev)));
4968 printf(" next=%p\n", (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_next_m)));
4969 printf(" object=%p offset=0x%llx\n",VM_PAGE_OBJECT(p), p->vmp_offset);
4970 printf(" wire_count=%u\n", p->vmp_wire_count);
4971 printf(" q_state=%u\n", p->vmp_q_state);
4972
4973 printf(" %slaundry, %sref, %sgobbled, %sprivate\n",
4974 (p->vmp_laundry ? "" : "!"),
4975 (p->vmp_reference ? "" : "!"),
4976 (p->vmp_gobbled ? "" : "!"),
4977 (p->vmp_private ? "" : "!"));
4978 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
4979 (p->vmp_busy ? "" : "!"),
4980 (p->vmp_wanted ? "" : "!"),
4981 (p->vmp_tabled ? "" : "!"),
4982 (p->vmp_fictitious ? "" : "!"),
4983 (p->vmp_pmapped ? "" : "!"),
4984 (p->vmp_wpmapped ? "" : "!"));
4985 printf(" %sfree_when_done, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
4986 (p->vmp_free_when_done ? "" : "!"),
4987 (p->vmp_absent ? "" : "!"),
4988 (p->vmp_error ? "" : "!"),
4989 (p->vmp_dirty ? "" : "!"),
4990 (p->vmp_cleaning ? "" : "!"),
4991 (p->vmp_precious ? "" : "!"),
4992 (p->vmp_clustered ? "" : "!"));
4993 printf(" %soverwriting, %srestart, %sunusual\n",
4994 (p->vmp_overwriting ? "" : "!"),
4995 (p->vmp_restart ? "" : "!"),
4996 (p->vmp_unusual ? "" : "!"));
4997 printf(" %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
4998 (p->vmp_cs_validated ? "" : "!"),
4999 (p->vmp_cs_tainted ? "" : "!"),
5000 (p->vmp_cs_nx ? "" : "!"),
5001 (p->vmp_no_cache ? "" : "!"));
5002
5003 printf("phys_page=0x%x\n", VM_PAGE_GET_PHYS_PAGE(p));
5004}
5005
5006/*
5007 * Check that the list of pages is ordered by
5008 * ascending physical address and has no holes.
5009 */
5010static int
5011vm_page_verify_contiguous(
5012 vm_page_t pages,
5013 unsigned int npages)
5014{
5015 vm_page_t m;
5016 unsigned int page_count;
5017 vm_offset_t prev_addr;
5018
5019 prev_addr = VM_PAGE_GET_PHYS_PAGE(pages);
5020 page_count = 1;
5021 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
5022 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5023 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
5024 m, (long)prev_addr, VM_PAGE_GET_PHYS_PAGE(m));
5025 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
5026 panic("vm_page_verify_contiguous: not contiguous!");
5027 }
5028 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5029 ++page_count;
5030 }
5031 if (page_count != npages) {
5032 printf("pages %p actual count 0x%x but requested 0x%x\n",
5033 pages, page_count, npages);
5034 panic("vm_page_verify_contiguous: count error");
5035 }
5036 return 1;
5037}
5038
5039
5040/*
5041 * Check the free lists for proper length etc.
5042 */
5043static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
5044static unsigned int
5045vm_page_verify_free_list(
5046 vm_page_queue_head_t *vm_page_queue,
5047 unsigned int color,
5048 vm_page_t look_for_page,
5049 boolean_t expect_page)
5050{
5051 unsigned int npages;
5052 vm_page_t m;
5053 vm_page_t prev_m;
5054 boolean_t found_page;
5055
5056 if (! vm_page_verify_this_free_list_enabled)
5057 return 0;
5058
5059 found_page = FALSE;
5060 npages = 0;
5061 prev_m = (vm_page_t)((uintptr_t)vm_page_queue);
5062
5063 vm_page_queue_iterate(vm_page_queue,
5064 m,
5065 vm_page_t,
5066 vmp_pageq) {
5067
5068 if (m == look_for_page) {
5069 found_page = TRUE;
5070 }
5071 if ((vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.prev) != prev_m)
5072 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
5073 color, npages, m, (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.prev), prev_m);
5074 if ( ! m->vmp_busy )
5075 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
5076 color, npages, m);
5077 if (color != (unsigned int) -1) {
5078 if (VM_PAGE_GET_COLOR(m) != color)
5079 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
5080 color, npages, m, VM_PAGE_GET_COLOR(m), color);
5081 if (m->vmp_q_state != VM_PAGE_ON_FREE_Q)
5082 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p - expecting q_state == VM_PAGE_ON_FREE_Q, found %d\n",
5083 color, npages, m, m->vmp_q_state);
5084 } else {
5085 if (m->vmp_q_state != VM_PAGE_ON_FREE_LOCAL_Q)
5086 panic("vm_page_verify_free_list(npages=%u): local page %p - expecting q_state == VM_PAGE_ON_FREE_LOCAL_Q, found %d\n",
5087 npages, m, m->vmp_q_state);
5088 }
5089 ++npages;
5090 prev_m = m;
5091 }
5092 if (look_for_page != VM_PAGE_NULL) {
5093 unsigned int other_color;
5094
5095 if (expect_page && !found_page) {
5096 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
5097 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
5098 _vm_page_print(look_for_page);
5099 for (other_color = 0;
5100 other_color < vm_colors;
5101 other_color++) {
5102 if (other_color == color)
5103 continue;
5104 vm_page_verify_free_list(&vm_page_queue_free[other_color].qhead,
5105 other_color, look_for_page, FALSE);
5106 }
5107 if (color == (unsigned int) -1) {
5108 vm_page_verify_free_list(&vm_lopage_queue_free,
5109 (unsigned int) -1, look_for_page, FALSE);
5110 }
5111 panic("vm_page_verify_free_list(color=%u)\n", color);
5112 }
5113 if (!expect_page && found_page) {
5114 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
5115 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
5116 }
5117 }
5118 return npages;
5119}
5120
5121static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
5122static void
5123vm_page_verify_free_lists( void )
5124{
5125 unsigned int color, npages, nlopages;
5126 boolean_t toggle = TRUE;
5127
5128 if (! vm_page_verify_all_free_lists_enabled)
5129 return;
5130
5131 npages = 0;
5132
5133 lck_mtx_lock(&vm_page_queue_free_lock);
5134
5135 if (vm_page_verify_this_free_list_enabled == TRUE) {
5136 /*
5137 * This variable has been set globally for extra checking of
5138 * each free list Q. Since we didn't set it, we don't own it
5139 * and we shouldn't toggle it.
5140 */
5141 toggle = FALSE;
5142 }
5143
5144 if (toggle == TRUE) {
5145 vm_page_verify_this_free_list_enabled = TRUE;
5146 }
5147
5148 for( color = 0; color < vm_colors; color++ ) {
5149 npages += vm_page_verify_free_list(&vm_page_queue_free[color].qhead,
5150 color, VM_PAGE_NULL, FALSE);
5151 }
5152 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
5153 (unsigned int) -1,
5154 VM_PAGE_NULL, FALSE);
5155 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
5156 panic("vm_page_verify_free_lists: "
5157 "npages %u free_count %d nlopages %u lo_free_count %u",
5158 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
5159
5160 if (toggle == TRUE) {
5161 vm_page_verify_this_free_list_enabled = FALSE;
5162 }
5163
5164 lck_mtx_unlock(&vm_page_queue_free_lock);
5165}
5166
5167#endif /* MACH_ASSERT */
5168
5169
5170
5171#if __arm64__
5172/*
5173 * 1 or more clients (currently only SEP) ask for a large contiguous chunk of memory
5174 * after the system has 'aged'. To ensure that other allocation requests don't mess
5175 * with the chances of that request being satisfied, we pre-allocate a single contiguous
5176 * 10MB buffer and hand it out to the first request of >= 4MB.
5177 */
5178
5179kern_return_t cpm_preallocate_early(void);
5180
5181vm_page_t cpm_preallocated_pages_list = NULL;
5182boolean_t preallocated_buffer_available = FALSE;
5183
5184#define PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT ((10 * 1024 * 1024) / PAGE_SIZE_64) /* 10 MB */
5185#define MIN_CONTIG_PAGES_REQUEST_FOR_PREALLOCATED_BUFFER ((4 * 1024 *1024) / PAGE_SIZE_64) /* 4 MB */
5186
5187kern_return_t
5188cpm_preallocate_early(void)
5189{
5190
5191 kern_return_t kr = KERN_SUCCESS;
5192 vm_map_size_t prealloc_size = (PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT * PAGE_SIZE_64);
5193
5194 printf("cpm_preallocate_early called to preallocate contiguous buffer of %llu pages\n", PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT);
5195
5196 kr = cpm_allocate(CAST_DOWN(vm_size_t, prealloc_size), &cpm_preallocated_pages_list, 0, 0, TRUE, 0);
5197
5198 if (kr != KERN_SUCCESS) {
5199 printf("cpm_allocate for preallocated contig buffer failed with %d.\n", kr);
5200 } else {
5201 preallocated_buffer_available = TRUE;
5202 }
5203
5204 return kr;
5205}
5206#endif /* __arm64__ */
5207
5208
5209extern boolean_t (* volatile consider_buffer_cache_collect)(int);
5210
5211/*
5212 * CONTIGUOUS PAGE ALLOCATION
5213 *
5214 * Find a region large enough to contain at least n pages
5215 * of contiguous physical memory.
5216 *
5217 * This is done by traversing the vm_page_t array in a linear fashion
5218 * we assume that the vm_page_t array has the avaiable physical pages in an
5219 * ordered, ascending list... this is currently true of all our implementations
5220 * and must remain so... there can be 'holes' in the array... we also can
5221 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
5222 * which use to happen via 'vm_page_convert'... that function was no longer
5223 * being called and was removed...
5224 *
5225 * The basic flow consists of stabilizing some of the interesting state of
5226 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
5227 * sweep at the beginning of the array looking for pages that meet our criterea
5228 * for a 'stealable' page... currently we are pretty conservative... if the page
5229 * meets this criterea and is physically contiguous to the previous page in the 'run'
5230 * we keep developing it. If we hit a page that doesn't fit, we reset our state
5231 * and start to develop a new run... if at this point we've already considered
5232 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
5233 * and mutex_pause (which will yield the processor), to keep the latency low w/r
5234 * to other threads trying to acquire free pages (or move pages from q to q),
5235 * and then continue from the spot we left off... we only make 1 pass through the
5236 * array. Once we have a 'run' that is long enough, we'll go into the loop which
5237 * which steals the pages from the queues they're currently on... pages on the free
5238 * queue can be stolen directly... pages that are on any of the other queues
5239 * must be removed from the object they are tabled on... this requires taking the
5240 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
5241 * or if the state of the page behind the vm_object lock is no longer viable, we'll
5242 * dump the pages we've currently stolen back to the free list, and pick up our
5243 * scan from the point where we aborted the 'current' run.
5244 *
5245 *
5246 * Requirements:
5247 * - neither vm_page_queue nor vm_free_list lock can be held on entry
5248 *
5249 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
5250 *
5251 * Algorithm:
5252 */
5253
5254#define MAX_CONSIDERED_BEFORE_YIELD 1000
5255
5256
5257#define RESET_STATE_OF_RUN() \
5258 MACRO_BEGIN \
5259 prevcontaddr = -2; \
5260 start_pnum = -1; \
5261 free_considered = 0; \
5262 substitute_needed = 0; \
5263 npages = 0; \
5264 MACRO_END
5265
5266/*
5267 * Can we steal in-use (i.e. not free) pages when searching for
5268 * physically-contiguous pages ?
5269 */
5270#define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
5271
5272static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
5273#if DEBUG
5274int vm_page_find_contig_debug = 0;
5275#endif
5276
5277static vm_page_t
5278vm_page_find_contiguous(
5279 unsigned int contig_pages,
5280 ppnum_t max_pnum,
5281 ppnum_t pnum_mask,
5282 boolean_t wire,
5283 int flags)
5284{
5285 vm_page_t m = NULL;
5286 ppnum_t prevcontaddr = 0;
5287 ppnum_t start_pnum = 0;
5288 unsigned int npages = 0, considered = 0, scanned = 0;
5289 unsigned int page_idx = 0, start_idx = 0, last_idx = 0, orig_last_idx = 0;
5290 unsigned int idx_last_contig_page_found = 0;
5291 int free_considered = 0, free_available = 0;
5292 int substitute_needed = 0;
5293 boolean_t wrapped, zone_gc_called = FALSE;
5294 kern_return_t kr;
5295#if DEBUG
5296 clock_sec_t tv_start_sec = 0, tv_end_sec = 0;
5297 clock_usec_t tv_start_usec = 0, tv_end_usec = 0;
5298#endif
5299
5300 int yielded = 0;
5301 int dumped_run = 0;
5302 int stolen_pages = 0;
5303 int compressed_pages = 0;
5304
5305
5306 if (contig_pages == 0)
5307 return VM_PAGE_NULL;
5308
5309full_scan_again:
5310
5311#if MACH_ASSERT
5312 vm_page_verify_free_lists();
5313#endif
5314#if DEBUG
5315 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
5316#endif
5317 PAGE_REPLACEMENT_ALLOWED(TRUE);
5318
5319 vm_page_lock_queues();
5320
5321#if __arm64__
5322 if (preallocated_buffer_available) {
5323
5324 if ((contig_pages >= MIN_CONTIG_PAGES_REQUEST_FOR_PREALLOCATED_BUFFER) && (contig_pages <= PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT)) {
5325
5326 m = cpm_preallocated_pages_list;
5327
5328 start_idx = (unsigned int) (m - &vm_pages[0]);
5329
5330 if (wire == FALSE) {
5331
5332 last_idx = start_idx;
5333
5334 for(npages = 0; npages < contig_pages; npages++, last_idx++) {
5335
5336 assert(vm_pages[last_idx].vmp_gobbled == FALSE);
5337
5338 vm_pages[last_idx].vmp_gobbled = TRUE;
5339 vm_page_gobble_count++;
5340
5341 assert(1 == vm_pages[last_idx].vmp_wire_count);
5342 /*
5343 * Gobbled pages are counted as wired pages. So no need to drop
5344 * the global wired page count. Just the page's wire count is fine.
5345 */
5346 vm_pages[last_idx].vmp_wire_count--;
5347 vm_pages[last_idx].vmp_q_state = VM_PAGE_NOT_ON_Q;
5348 }
5349
5350 }
5351
5352 last_idx = start_idx + contig_pages - 1;
5353
5354 vm_pages[last_idx].vmp_snext = NULL;
5355
5356 printf("Using preallocated buffer: Requested size (pages):%d... index range: %d-%d...freeing %llu pages\n", contig_pages, start_idx, last_idx, PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT - contig_pages);
5357
5358 last_idx += 1;
5359 for(npages = contig_pages; npages < PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT; npages++, last_idx++) {
5360
5361 VM_PAGE_ZERO_PAGEQ_ENTRY(&vm_pages[last_idx]);
5362 vm_page_free(&vm_pages[last_idx]);
5363 }
5364
5365 cpm_preallocated_pages_list = NULL;
5366 preallocated_buffer_available = FALSE;
5367
5368 goto done_scanning;
5369 }
5370 }
5371#endif /* __arm64__ */
5372
5373 lck_mtx_lock(&vm_page_queue_free_lock);
5374
5375 RESET_STATE_OF_RUN();
5376
5377 scanned = 0;
5378 considered = 0;
5379 free_available = vm_page_free_count - vm_page_free_reserved;
5380
5381 wrapped = FALSE;
5382
5383 if(flags & KMA_LOMEM)
5384 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
5385 else
5386 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
5387
5388 orig_last_idx = idx_last_contig_page_found;
5389 last_idx = orig_last_idx;
5390
5391 for (page_idx = last_idx, start_idx = last_idx;
5392 npages < contig_pages && page_idx < vm_pages_count;
5393 page_idx++) {
5394retry:
5395 if (wrapped &&
5396 npages == 0 &&
5397 page_idx >= orig_last_idx) {
5398 /*
5399 * We're back where we started and we haven't
5400 * found any suitable contiguous range. Let's
5401 * give up.
5402 */
5403 break;
5404 }
5405 scanned++;
5406 m = &vm_pages[page_idx];
5407
5408 assert(!m->vmp_fictitious);
5409 assert(!m->vmp_private);
5410
5411 if (max_pnum && VM_PAGE_GET_PHYS_PAGE(m) > max_pnum) {
5412 /* no more low pages... */
5413 break;
5414 }
5415 if (!npages & ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0)) {
5416 /*
5417 * not aligned
5418 */
5419 RESET_STATE_OF_RUN();
5420
5421 } else if (VM_PAGE_WIRED(m) || m->vmp_gobbled ||
5422 m->vmp_laundry || m->vmp_wanted ||
5423 m->vmp_cleaning || m->vmp_overwriting || m->vmp_free_when_done) {
5424 /*
5425 * page is in a transient state
5426 * or a state we don't want to deal
5427 * with, so don't consider it which
5428 * means starting a new run
5429 */
5430 RESET_STATE_OF_RUN();
5431
5432 } else if ((m->vmp_q_state == VM_PAGE_NOT_ON_Q) ||
5433 (m->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q) ||
5434 (m->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q) ||
5435 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
5436 /*
5437 * page needs to be on one of our queues (other then the pageout or special free queues)
5438 * or it needs to belong to the compressor pool (which is now indicated
5439 * by vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR and falls out
5440 * from the check for VM_PAGE_NOT_ON_Q)
5441 * in order for it to be stable behind the
5442 * locks we hold at this point...
5443 * if not, don't consider it which
5444 * means starting a new run
5445 */
5446 RESET_STATE_OF_RUN();
5447
5448 } else if ((m->vmp_q_state != VM_PAGE_ON_FREE_Q) && (!m->vmp_tabled || m->vmp_busy)) {
5449 /*
5450 * pages on the free list are always 'busy'
5451 * so we couldn't test for 'busy' in the check
5452 * for the transient states... pages that are
5453 * 'free' are never 'tabled', so we also couldn't
5454 * test for 'tabled'. So we check here to make
5455 * sure that a non-free page is not busy and is
5456 * tabled on an object...
5457 * if not, don't consider it which
5458 * means starting a new run
5459 */
5460 RESET_STATE_OF_RUN();
5461
5462 } else {
5463 if (VM_PAGE_GET_PHYS_PAGE(m) != prevcontaddr + 1) {
5464 if ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0) {
5465 RESET_STATE_OF_RUN();
5466 goto did_consider;
5467 } else {
5468 npages = 1;
5469 start_idx = page_idx;
5470 start_pnum = VM_PAGE_GET_PHYS_PAGE(m);
5471 }
5472 } else {
5473 npages++;
5474 }
5475 prevcontaddr = VM_PAGE_GET_PHYS_PAGE(m);
5476
5477 VM_PAGE_CHECK(m);
5478 if (m->vmp_q_state == VM_PAGE_ON_FREE_Q) {
5479 free_considered++;
5480 } else {
5481 /*
5482 * This page is not free.
5483 * If we can't steal used pages,
5484 * we have to give up this run
5485 * and keep looking.
5486 * Otherwise, we might need to
5487 * move the contents of this page
5488 * into a substitute page.
5489 */
5490#if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5491 if (m->vmp_pmapped || m->vmp_dirty || m->vmp_precious) {
5492 substitute_needed++;
5493 }
5494#else
5495 RESET_STATE_OF_RUN();
5496#endif
5497 }
5498
5499 if ((free_considered + substitute_needed) > free_available) {
5500 /*
5501 * if we let this run continue
5502 * we will end up dropping the vm_page_free_count
5503 * below the reserve limit... we need to abort
5504 * this run, but we can at least re-consider this
5505 * page... thus the jump back to 'retry'
5506 */
5507 RESET_STATE_OF_RUN();
5508
5509 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
5510 considered++;
5511 goto retry;
5512 }
5513 /*
5514 * free_available == 0
5515 * so can't consider any free pages... if
5516 * we went to retry in this case, we'd
5517 * get stuck looking at the same page
5518 * w/o making any forward progress
5519 * we also want to take this path if we've already
5520 * reached our limit that controls the lock latency
5521 */
5522 }
5523 }
5524did_consider:
5525 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
5526
5527 PAGE_REPLACEMENT_ALLOWED(FALSE);
5528
5529 lck_mtx_unlock(&vm_page_queue_free_lock);
5530 vm_page_unlock_queues();
5531
5532 mutex_pause(0);
5533
5534 PAGE_REPLACEMENT_ALLOWED(TRUE);
5535
5536 vm_page_lock_queues();
5537 lck_mtx_lock(&vm_page_queue_free_lock);
5538
5539 RESET_STATE_OF_RUN();
5540 /*
5541 * reset our free page limit since we
5542 * dropped the lock protecting the vm_page_free_queue
5543 */
5544 free_available = vm_page_free_count - vm_page_free_reserved;
5545 considered = 0;
5546
5547 yielded++;
5548
5549 goto retry;
5550 }
5551 considered++;
5552 }
5553 m = VM_PAGE_NULL;
5554
5555 if (npages != contig_pages) {
5556 if (!wrapped) {
5557 /*
5558 * We didn't find a contiguous range but we didn't
5559 * start from the very first page.
5560 * Start again from the very first page.
5561 */
5562 RESET_STATE_OF_RUN();
5563 if( flags & KMA_LOMEM)
5564 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
5565 else
5566 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
5567 last_idx = 0;
5568 page_idx = last_idx;
5569 wrapped = TRUE;
5570 goto retry;
5571 }
5572 lck_mtx_unlock(&vm_page_queue_free_lock);
5573 } else {
5574 vm_page_t m1;
5575 vm_page_t m2;
5576 unsigned int cur_idx;
5577 unsigned int tmp_start_idx;
5578 vm_object_t locked_object = VM_OBJECT_NULL;
5579 boolean_t abort_run = FALSE;
5580
5581 assert(page_idx - start_idx == contig_pages);
5582
5583 tmp_start_idx = start_idx;
5584
5585 /*
5586 * first pass through to pull the free pages
5587 * off of the free queue so that in case we
5588 * need substitute pages, we won't grab any
5589 * of the free pages in the run... we'll clear
5590 * the 'free' bit in the 2nd pass, and even in
5591 * an abort_run case, we'll collect all of the
5592 * free pages in this run and return them to the free list
5593 */
5594 while (start_idx < page_idx) {
5595
5596 m1 = &vm_pages[start_idx++];
5597
5598#if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5599 assert(m1->vmp_q_state == VM_PAGE_ON_FREE_Q);
5600#endif
5601
5602 if (m1->vmp_q_state == VM_PAGE_ON_FREE_Q) {
5603 unsigned int color;
5604
5605 color = VM_PAGE_GET_COLOR(m1);
5606#if MACH_ASSERT
5607 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, m1, TRUE);
5608#endif
5609 vm_page_queue_remove(&vm_page_queue_free[color].qhead,
5610 m1,
5611 vm_page_t,
5612 vmp_pageq);
5613
5614 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
5615#if MACH_ASSERT
5616 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, VM_PAGE_NULL, FALSE);
5617#endif
5618 /*
5619 * Clear the "free" bit so that this page
5620 * does not get considered for another
5621 * concurrent physically-contiguous allocation.
5622 */
5623 m1->vmp_q_state = VM_PAGE_NOT_ON_Q;
5624 assert(m1->vmp_busy);
5625
5626 vm_page_free_count--;
5627 }
5628 }
5629 if( flags & KMA_LOMEM)
5630 vm_page_lomem_find_contiguous_last_idx = page_idx;
5631 else
5632 vm_page_find_contiguous_last_idx = page_idx;
5633
5634 /*
5635 * we can drop the free queue lock at this point since
5636 * we've pulled any 'free' candidates off of the list
5637 * we need it dropped so that we can do a vm_page_grab
5638 * when substituing for pmapped/dirty pages
5639 */
5640 lck_mtx_unlock(&vm_page_queue_free_lock);
5641
5642 start_idx = tmp_start_idx;
5643 cur_idx = page_idx - 1;
5644
5645 while (start_idx++ < page_idx) {
5646 /*
5647 * must go through the list from back to front
5648 * so that the page list is created in the
5649 * correct order - low -> high phys addresses
5650 */
5651 m1 = &vm_pages[cur_idx--];
5652
5653 if (m1->vmp_object == 0) {
5654 /*
5655 * page has already been removed from
5656 * the free list in the 1st pass
5657 */
5658 assert(m1->vmp_q_state == VM_PAGE_NOT_ON_Q);
5659 assert(m1->vmp_offset == (vm_object_offset_t) -1);
5660 assert(m1->vmp_busy);
5661 assert(!m1->vmp_wanted);
5662 assert(!m1->vmp_laundry);
5663 } else {
5664 vm_object_t object;
5665 int refmod;
5666 boolean_t disconnected, reusable;
5667
5668 if (abort_run == TRUE)
5669 continue;
5670
5671 assert(m1->vmp_q_state != VM_PAGE_NOT_ON_Q);
5672
5673 object = VM_PAGE_OBJECT(m1);
5674
5675 if (object != locked_object) {
5676 if (locked_object) {
5677 vm_object_unlock(locked_object);
5678 locked_object = VM_OBJECT_NULL;
5679 }
5680 if (vm_object_lock_try(object))
5681 locked_object = object;
5682 }
5683 if (locked_object == VM_OBJECT_NULL ||
5684 (VM_PAGE_WIRED(m1) || m1->vmp_gobbled ||
5685 m1->vmp_laundry || m1->vmp_wanted ||
5686 m1->vmp_cleaning || m1->vmp_overwriting || m1->vmp_free_when_done || m1->vmp_busy) ||
5687 (m1->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
5688
5689 if (locked_object) {
5690 vm_object_unlock(locked_object);
5691 locked_object = VM_OBJECT_NULL;
5692 }
5693 tmp_start_idx = cur_idx;
5694 abort_run = TRUE;
5695 continue;
5696 }
5697
5698 disconnected = FALSE;
5699 reusable = FALSE;
5700
5701 if ((m1->vmp_reusable ||
5702 object->all_reusable) &&
5703 (m1->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q) &&
5704 !m1->vmp_dirty &&
5705 !m1->vmp_reference) {
5706 /* reusable page... */
5707 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
5708 disconnected = TRUE;
5709 if (refmod == 0) {
5710 /*
5711 * ... not reused: can steal
5712 * without relocating contents.
5713 */
5714 reusable = TRUE;
5715 }
5716 }
5717
5718 if ((m1->vmp_pmapped &&
5719 ! reusable) ||
5720 m1->vmp_dirty ||
5721 m1->vmp_precious) {
5722 vm_object_offset_t offset;
5723
5724 m2 = vm_page_grab();
5725
5726 if (m2 == VM_PAGE_NULL) {
5727 if (locked_object) {
5728 vm_object_unlock(locked_object);
5729 locked_object = VM_OBJECT_NULL;
5730 }
5731 tmp_start_idx = cur_idx;
5732 abort_run = TRUE;
5733 continue;
5734 }
5735 if (! disconnected) {
5736 if (m1->vmp_pmapped)
5737 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
5738 else
5739 refmod = 0;
5740 }
5741
5742 /* copy the page's contents */
5743 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(m1), VM_PAGE_GET_PHYS_PAGE(m2));
5744 /* copy the page's state */
5745 assert(!VM_PAGE_WIRED(m1));
5746 assert(m1->vmp_q_state != VM_PAGE_ON_FREE_Q);
5747 assert(m1->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q);
5748 assert(!m1->vmp_laundry);
5749 m2->vmp_reference = m1->vmp_reference;
5750 assert(!m1->vmp_gobbled);
5751 assert(!m1->vmp_private);
5752 m2->vmp_no_cache = m1->vmp_no_cache;
5753 m2->vmp_xpmapped = 0;
5754 assert(!m1->vmp_busy);
5755 assert(!m1->vmp_wanted);
5756 assert(!m1->vmp_fictitious);
5757 m2->vmp_pmapped = m1->vmp_pmapped; /* should flush cache ? */
5758 m2->vmp_wpmapped = m1->vmp_wpmapped;
5759 assert(!m1->vmp_free_when_done);
5760 m2->vmp_absent = m1->vmp_absent;
5761 m2->vmp_error = m1->vmp_error;
5762 m2->vmp_dirty = m1->vmp_dirty;
5763 assert(!m1->vmp_cleaning);
5764 m2->vmp_precious = m1->vmp_precious;
5765 m2->vmp_clustered = m1->vmp_clustered;
5766 assert(!m1->vmp_overwriting);
5767 m2->vmp_restart = m1->vmp_restart;
5768 m2->vmp_unusual = m1->vmp_unusual;
5769 m2->vmp_cs_validated = m1->vmp_cs_validated;
5770 m2->vmp_cs_tainted = m1->vmp_cs_tainted;
5771 m2->vmp_cs_nx = m1->vmp_cs_nx;
5772
5773 /*
5774 * If m1 had really been reusable,
5775 * we would have just stolen it, so
5776 * let's not propagate it's "reusable"
5777 * bit and assert that m2 is not
5778 * marked as "reusable".
5779 */
5780 // m2->vmp_reusable = m1->vmp_reusable;
5781 assert(!m2->vmp_reusable);
5782
5783 // assert(!m1->vmp_lopage);
5784
5785 if (m1->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR)
5786 m2->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
5787
5788 /*
5789 * page may need to be flushed if
5790 * it is marshalled into a UPL
5791 * that is going to be used by a device
5792 * that doesn't support coherency
5793 */
5794 m2->vmp_written_by_kernel = TRUE;
5795
5796 /*
5797 * make sure we clear the ref/mod state
5798 * from the pmap layer... else we risk
5799 * inheriting state from the last time
5800 * this page was used...
5801 */
5802 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m2), VM_MEM_MODIFIED | VM_MEM_REFERENCED);
5803
5804 if (refmod & VM_MEM_REFERENCED)
5805 m2->vmp_reference = TRUE;
5806 if (refmod & VM_MEM_MODIFIED) {
5807 SET_PAGE_DIRTY(m2, TRUE);
5808 }
5809 offset = m1->vmp_offset;
5810
5811 /*
5812 * completely cleans up the state
5813 * of the page so that it is ready
5814 * to be put onto the free list, or
5815 * for this purpose it looks like it
5816 * just came off of the free list
5817 */
5818 vm_page_free_prepare(m1);
5819
5820 /*
5821 * now put the substitute page
5822 * on the object
5823 */
5824 vm_page_insert_internal(m2, locked_object, offset, VM_KERN_MEMORY_NONE, TRUE, TRUE, FALSE, FALSE, NULL);
5825
5826 if (m2->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
5827 m2->vmp_pmapped = TRUE;
5828 m2->vmp_wpmapped = TRUE;
5829
5830 PMAP_ENTER(kernel_pmap, m2->vmp_offset, m2,
5831 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE, kr);
5832
5833 assert(kr == KERN_SUCCESS);
5834
5835 compressed_pages++;
5836
5837 } else {
5838 if (m2->vmp_reference)
5839 vm_page_activate(m2);
5840 else
5841 vm_page_deactivate(m2);
5842 }
5843 PAGE_WAKEUP_DONE(m2);
5844
5845 } else {
5846 assert(m1->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR);
5847
5848 /*
5849 * completely cleans up the state
5850 * of the page so that it is ready
5851 * to be put onto the free list, or
5852 * for this purpose it looks like it
5853 * just came off of the free list
5854 */
5855 vm_page_free_prepare(m1);
5856 }
5857
5858 stolen_pages++;
5859
5860 }
5861#if CONFIG_BACKGROUND_QUEUE
5862 vm_page_assign_background_state(m1);
5863#endif
5864 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
5865 m1->vmp_snext = m;
5866 m = m1;
5867 }
5868 if (locked_object) {
5869 vm_object_unlock(locked_object);
5870 locked_object = VM_OBJECT_NULL;
5871 }
5872
5873 if (abort_run == TRUE) {
5874 /*
5875 * want the index of the last
5876 * page in this run that was
5877 * successfully 'stolen', so back
5878 * it up 1 for the auto-decrement on use
5879 * and 1 more to bump back over this page
5880 */
5881 page_idx = tmp_start_idx + 2;
5882 if (page_idx >= vm_pages_count) {
5883 if (wrapped) {
5884 if (m != VM_PAGE_NULL) {
5885 vm_page_unlock_queues();
5886 vm_page_free_list(m, FALSE);
5887 vm_page_lock_queues();
5888 m = VM_PAGE_NULL;
5889 }
5890 dumped_run++;
5891 goto done_scanning;
5892 }
5893 page_idx = last_idx = 0;
5894 wrapped = TRUE;
5895 }
5896 abort_run = FALSE;
5897
5898 /*
5899 * We didn't find a contiguous range but we didn't
5900 * start from the very first page.
5901 * Start again from the very first page.
5902 */
5903 RESET_STATE_OF_RUN();
5904
5905 if( flags & KMA_LOMEM)
5906 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
5907 else
5908 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
5909
5910 last_idx = page_idx;
5911
5912 if (m != VM_PAGE_NULL) {
5913 vm_page_unlock_queues();
5914 vm_page_free_list(m, FALSE);
5915 vm_page_lock_queues();
5916 m = VM_PAGE_NULL;
5917 }
5918 dumped_run++;
5919
5920 lck_mtx_lock(&vm_page_queue_free_lock);
5921 /*
5922 * reset our free page limit since we
5923 * dropped the lock protecting the vm_page_free_queue
5924 */
5925 free_available = vm_page_free_count - vm_page_free_reserved;
5926 goto retry;
5927 }
5928
5929 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
5930
5931 assert(m1->vmp_q_state == VM_PAGE_NOT_ON_Q);
5932 assert(m1->vmp_wire_count == 0);
5933
5934 if (wire == TRUE) {
5935 m1->vmp_wire_count++;
5936 m1->vmp_q_state = VM_PAGE_IS_WIRED;
5937 } else
5938 m1->vmp_gobbled = TRUE;
5939 }
5940 if (wire == FALSE)
5941 vm_page_gobble_count += npages;
5942
5943 /*
5944 * gobbled pages are also counted as wired pages
5945 */
5946 vm_page_wire_count += npages;
5947
5948 assert(vm_page_verify_contiguous(m, npages));
5949 }
5950done_scanning:
5951 PAGE_REPLACEMENT_ALLOWED(FALSE);
5952
5953 vm_page_unlock_queues();
5954
5955#if DEBUG
5956 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
5957
5958 tv_end_sec -= tv_start_sec;
5959 if (tv_end_usec < tv_start_usec) {
5960 tv_end_sec--;
5961 tv_end_usec += 1000000;
5962 }
5963 tv_end_usec -= tv_start_usec;
5964 if (tv_end_usec >= 1000000) {
5965 tv_end_sec++;
5966 tv_end_sec -= 1000000;
5967 }
5968 if (vm_page_find_contig_debug) {
5969 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
5970 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5971 (long)tv_end_sec, tv_end_usec, orig_last_idx,
5972 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
5973 }
5974
5975#endif
5976#if MACH_ASSERT
5977 vm_page_verify_free_lists();
5978#endif
5979 if (m == NULL && zone_gc_called == FALSE) {
5980 printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
5981 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5982 scanned, yielded, dumped_run, stolen_pages, compressed_pages, vm_page_wire_count);
5983
5984 if (consider_buffer_cache_collect != NULL) {
5985 (void)(*consider_buffer_cache_collect)(1);
5986 }
5987
5988 consider_zone_gc(FALSE);
5989
5990 zone_gc_called = TRUE;
5991
5992 printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count);
5993 goto full_scan_again;
5994 }
5995
5996 return m;
5997}
5998
5999/*
6000 * Allocate a list of contiguous, wired pages.
6001 */
6002kern_return_t
6003cpm_allocate(
6004 vm_size_t size,
6005 vm_page_t *list,
6006 ppnum_t max_pnum,
6007 ppnum_t pnum_mask,
6008 boolean_t wire,
6009 int flags)
6010{
6011 vm_page_t pages;
6012 unsigned int npages;
6013
6014 if (size % PAGE_SIZE != 0)
6015 return KERN_INVALID_ARGUMENT;
6016
6017 npages = (unsigned int) (size / PAGE_SIZE);
6018 if (npages != size / PAGE_SIZE) {
6019 /* 32-bit overflow */
6020 return KERN_INVALID_ARGUMENT;
6021 }
6022
6023 /*
6024 * Obtain a pointer to a subset of the free
6025 * list large enough to satisfy the request;
6026 * the region will be physically contiguous.
6027 */
6028 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
6029
6030 if (pages == VM_PAGE_NULL)
6031 return KERN_NO_SPACE;
6032 /*
6033 * determine need for wakeups
6034 */
6035 if (vm_page_free_count < vm_page_free_min)
6036 thread_wakeup((event_t) &vm_page_free_wanted);
6037
6038 VM_CHECK_MEMORYSTATUS;
6039
6040 /*
6041 * The CPM pages should now be available and
6042 * ordered by ascending physical address.
6043 */
6044 assert(vm_page_verify_contiguous(pages, npages));
6045
6046 *list = pages;
6047 return KERN_SUCCESS;
6048}
6049
6050
6051unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
6052
6053/*
6054 * when working on a 'run' of pages, it is necessary to hold
6055 * the vm_page_queue_lock (a hot global lock) for certain operations
6056 * on the page... however, the majority of the work can be done
6057 * while merely holding the object lock... in fact there are certain
6058 * collections of pages that don't require any work brokered by the
6059 * vm_page_queue_lock... to mitigate the time spent behind the global
6060 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
6061 * while doing all of the work that doesn't require the vm_page_queue_lock...
6062 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
6063 * necessary work for each page... we will grab the busy bit on the page
6064 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
6065 * if it can't immediately take the vm_page_queue_lock in order to compete
6066 * for the locks in the same order that vm_pageout_scan takes them.
6067 * the operation names are modeled after the names of the routines that
6068 * need to be called in order to make the changes very obvious in the
6069 * original loop
6070 */
6071
6072void
6073vm_page_do_delayed_work(
6074 vm_object_t object,
6075 vm_tag_t tag,
6076 struct vm_page_delayed_work *dwp,
6077 int dw_count)
6078{
6079 int j;
6080 vm_page_t m;
6081 vm_page_t local_free_q = VM_PAGE_NULL;
6082
6083 /*
6084 * pageout_scan takes the vm_page_lock_queues first
6085 * then tries for the object lock... to avoid what
6086 * is effectively a lock inversion, we'll go to the
6087 * trouble of taking them in that same order... otherwise
6088 * if this object contains the majority of the pages resident
6089 * in the UBC (or a small set of large objects actively being
6090 * worked on contain the majority of the pages), we could
6091 * cause the pageout_scan thread to 'starve' in its attempt
6092 * to find pages to move to the free queue, since it has to
6093 * successfully acquire the object lock of any candidate page
6094 * before it can steal/clean it.
6095 */
6096 if (!vm_page_trylockspin_queues()) {
6097 vm_object_unlock(object);
6098
6099 vm_page_lockspin_queues();
6100
6101 for (j = 0; ; j++) {
6102 if (!vm_object_lock_avoid(object) &&
6103 _vm_object_lock_try(object))
6104 break;
6105 vm_page_unlock_queues();
6106 mutex_pause(j);
6107 vm_page_lockspin_queues();
6108 }
6109 }
6110 for (j = 0; j < dw_count; j++, dwp++) {
6111
6112 m = dwp->dw_m;
6113
6114 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
6115 vm_pageout_throttle_up(m);
6116#if CONFIG_PHANTOM_CACHE
6117 if (dwp->dw_mask & DW_vm_phantom_cache_update)
6118 vm_phantom_cache_update(m);
6119#endif
6120 if (dwp->dw_mask & DW_vm_page_wire)
6121 vm_page_wire(m, tag, FALSE);
6122 else if (dwp->dw_mask & DW_vm_page_unwire) {
6123 boolean_t queueit;
6124
6125 queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
6126
6127 vm_page_unwire(m, queueit);
6128 }
6129 if (dwp->dw_mask & DW_vm_page_free) {
6130 vm_page_free_prepare_queues(m);
6131
6132 assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);
6133 /*
6134 * Add this page to our list of reclaimed pages,
6135 * to be freed later.
6136 */
6137 m->vmp_snext = local_free_q;
6138 local_free_q = m;
6139 } else {
6140 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
6141 vm_page_deactivate_internal(m, FALSE);
6142 else if (dwp->dw_mask & DW_vm_page_activate) {
6143 if (m->vmp_q_state != VM_PAGE_ON_ACTIVE_Q) {
6144 vm_page_activate(m);
6145 }
6146 }
6147 else if (dwp->dw_mask & DW_vm_page_speculate)
6148 vm_page_speculate(m, TRUE);
6149 else if (dwp->dw_mask & DW_enqueue_cleaned) {
6150 /*
6151 * if we didn't hold the object lock and did this,
6152 * we might disconnect the page, then someone might
6153 * soft fault it back in, then we would put it on the
6154 * cleaned queue, and so we would have a referenced (maybe even dirty)
6155 * page on that queue, which we don't want
6156 */
6157 int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6158
6159 if ((refmod_state & VM_MEM_REFERENCED)) {
6160 /*
6161 * this page has been touched since it got cleaned; let's activate it
6162 * if it hasn't already been
6163 */
6164 VM_PAGEOUT_DEBUG(vm_pageout_enqueued_cleaned, 1);
6165 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
6166
6167 if (m->vmp_q_state != VM_PAGE_ON_ACTIVE_Q)
6168 vm_page_activate(m);
6169 } else {
6170 m->vmp_reference = FALSE;
6171 vm_page_enqueue_cleaned(m);
6172 }
6173 }
6174 else if (dwp->dw_mask & DW_vm_page_lru)
6175 vm_page_lru(m);
6176 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
6177 if (m->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q)
6178 vm_page_queues_remove(m, TRUE);
6179 }
6180 if (dwp->dw_mask & DW_set_reference)
6181 m->vmp_reference = TRUE;
6182 else if (dwp->dw_mask & DW_clear_reference)
6183 m->vmp_reference = FALSE;
6184
6185 if (dwp->dw_mask & DW_move_page) {
6186 if (m->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q) {
6187 vm_page_queues_remove(m, FALSE);
6188
6189 assert(VM_PAGE_OBJECT(m) != kernel_object);
6190
6191 vm_page_enqueue_inactive(m, FALSE);
6192 }
6193 }
6194 if (dwp->dw_mask & DW_clear_busy)
6195 m->vmp_busy = FALSE;
6196
6197 if (dwp->dw_mask & DW_PAGE_WAKEUP)
6198 PAGE_WAKEUP(m);
6199 }
6200 }
6201 vm_page_unlock_queues();
6202
6203 if (local_free_q)
6204 vm_page_free_list(local_free_q, TRUE);
6205
6206 VM_CHECK_MEMORYSTATUS;
6207
6208}
6209
6210kern_return_t
6211vm_page_alloc_list(
6212 int page_count,
6213 int flags,
6214 vm_page_t *list)
6215{
6216 vm_page_t lo_page_list = VM_PAGE_NULL;
6217 vm_page_t mem;
6218 int i;
6219
6220 if ( !(flags & KMA_LOMEM))
6221 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
6222
6223 for (i = 0; i < page_count; i++) {
6224
6225 mem = vm_page_grablo();
6226
6227 if (mem == VM_PAGE_NULL) {
6228 if (lo_page_list)
6229 vm_page_free_list(lo_page_list, FALSE);
6230
6231 *list = VM_PAGE_NULL;
6232
6233 return (KERN_RESOURCE_SHORTAGE);
6234 }
6235 mem->vmp_snext = lo_page_list;
6236 lo_page_list = mem;
6237 }
6238 *list = lo_page_list;
6239
6240 return (KERN_SUCCESS);
6241}
6242
6243void
6244vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
6245{
6246 page->vmp_offset = offset;
6247}
6248
6249vm_page_t
6250vm_page_get_next(vm_page_t page)
6251{
6252 return (page->vmp_snext);
6253}
6254
6255vm_object_offset_t
6256vm_page_get_offset(vm_page_t page)
6257{
6258 return (page->vmp_offset);
6259}
6260
6261ppnum_t
6262vm_page_get_phys_page(vm_page_t page)
6263{
6264 return (VM_PAGE_GET_PHYS_PAGE(page));
6265}
6266
6267
6268/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6269
6270#if HIBERNATION
6271
6272static vm_page_t hibernate_gobble_queue;
6273
6274static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
6275static int hibernate_flush_dirty_pages(int);
6276static int hibernate_flush_queue(vm_page_queue_head_t *, int);
6277
6278void hibernate_flush_wait(void);
6279void hibernate_mark_in_progress(void);
6280void hibernate_clear_in_progress(void);
6281
6282void hibernate_free_range(int, int);
6283void hibernate_hash_insert_page(vm_page_t);
6284uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
6285void hibernate_rebuild_vm_structs(void);
6286uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
6287ppnum_t hibernate_lookup_paddr(unsigned int);
6288
6289struct hibernate_statistics {
6290 int hibernate_considered;
6291 int hibernate_reentered_on_q;
6292 int hibernate_found_dirty;
6293 int hibernate_skipped_cleaning;
6294 int hibernate_skipped_transient;
6295 int hibernate_skipped_precious;
6296 int hibernate_skipped_external;
6297 int hibernate_queue_nolock;
6298 int hibernate_queue_paused;
6299 int hibernate_throttled;
6300 int hibernate_throttle_timeout;
6301 int hibernate_drained;
6302 int hibernate_drain_timeout;
6303 int cd_lock_failed;
6304 int cd_found_precious;
6305 int cd_found_wired;
6306 int cd_found_busy;
6307 int cd_found_unusual;
6308 int cd_found_cleaning;
6309 int cd_found_laundry;
6310 int cd_found_dirty;
6311 int cd_found_xpmapped;
6312 int cd_skipped_xpmapped;
6313 int cd_local_free;
6314 int cd_total_free;
6315 int cd_vm_page_wire_count;
6316 int cd_vm_struct_pages_unneeded;
6317 int cd_pages;
6318 int cd_discarded;
6319 int cd_count_wire;
6320} hibernate_stats;
6321
6322
6323/*
6324 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
6325 * so that we don't overrun the estimated image size, which would
6326 * result in a hibernation failure.
6327 */
6328#define HIBERNATE_XPMAPPED_LIMIT 40000
6329
6330
6331static int
6332hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
6333{
6334 wait_result_t wait_result;
6335
6336 vm_page_lock_queues();
6337
6338 while ( !vm_page_queue_empty(&q->pgo_pending) ) {
6339
6340 q->pgo_draining = TRUE;
6341
6342 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
6343
6344 vm_page_unlock_queues();
6345
6346 wait_result = thread_block(THREAD_CONTINUE_NULL);
6347
6348 if (wait_result == THREAD_TIMED_OUT && !vm_page_queue_empty(&q->pgo_pending)) {
6349 hibernate_stats.hibernate_drain_timeout++;
6350
6351 if (q == &vm_pageout_queue_external)
6352 return (0);
6353
6354 return (1);
6355 }
6356 vm_page_lock_queues();
6357
6358 hibernate_stats.hibernate_drained++;
6359 }
6360 vm_page_unlock_queues();
6361
6362 return (0);
6363}
6364
6365
6366boolean_t hibernate_skip_external = FALSE;
6367
6368static int
6369hibernate_flush_queue(vm_page_queue_head_t *q, int qcount)
6370{
6371 vm_page_t m;
6372 vm_object_t l_object = NULL;
6373 vm_object_t m_object = NULL;
6374 int refmod_state = 0;
6375 int try_failed_count = 0;
6376 int retval = 0;
6377 int current_run = 0;
6378 struct vm_pageout_queue *iq;
6379 struct vm_pageout_queue *eq;
6380 struct vm_pageout_queue *tq;
6381
6382 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START,
6383 VM_KERNEL_UNSLIDE_OR_PERM(q), qcount);
6384
6385 iq = &vm_pageout_queue_internal;
6386 eq = &vm_pageout_queue_external;
6387
6388 vm_page_lock_queues();
6389
6390 while (qcount && !vm_page_queue_empty(q)) {
6391
6392 if (current_run++ == 1000) {
6393 if (hibernate_should_abort()) {
6394 retval = 1;
6395 break;
6396 }
6397 current_run = 0;
6398 }
6399
6400 m = (vm_page_t) vm_page_queue_first(q);
6401 m_object = VM_PAGE_OBJECT(m);
6402
6403 /*
6404 * check to see if we currently are working
6405 * with the same object... if so, we've
6406 * already got the lock
6407 */
6408 if (m_object != l_object) {
6409 /*
6410 * the object associated with candidate page is
6411 * different from the one we were just working
6412 * with... dump the lock if we still own it
6413 */
6414 if (l_object != NULL) {
6415 vm_object_unlock(l_object);
6416 l_object = NULL;
6417 }
6418 /*
6419 * Try to lock object; since we've alread got the
6420 * page queues lock, we can only 'try' for this one.
6421 * if the 'try' fails, we need to do a mutex_pause
6422 * to allow the owner of the object lock a chance to
6423 * run...
6424 */
6425 if ( !vm_object_lock_try_scan(m_object)) {
6426
6427 if (try_failed_count > 20) {
6428 hibernate_stats.hibernate_queue_nolock++;
6429
6430 goto reenter_pg_on_q;
6431 }
6432
6433 vm_page_unlock_queues();
6434 mutex_pause(try_failed_count++);
6435 vm_page_lock_queues();
6436
6437 hibernate_stats.hibernate_queue_paused++;
6438 continue;
6439 } else {
6440 l_object = m_object;
6441 }
6442 }
6443 if ( !m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || m->vmp_error) {
6444 /*
6445 * page is not to be cleaned
6446 * put it back on the head of its queue
6447 */
6448 if (m->vmp_cleaning)
6449 hibernate_stats.hibernate_skipped_cleaning++;
6450 else
6451 hibernate_stats.hibernate_skipped_transient++;
6452
6453 goto reenter_pg_on_q;
6454 }
6455 if (m_object->copy == VM_OBJECT_NULL) {
6456 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
6457 /*
6458 * let the normal hibernate image path
6459 * deal with these
6460 */
6461 goto reenter_pg_on_q;
6462 }
6463 }
6464 if ( !m->vmp_dirty && m->vmp_pmapped) {
6465 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
6466
6467 if ((refmod_state & VM_MEM_MODIFIED)) {
6468 SET_PAGE_DIRTY(m, FALSE);
6469 }
6470 } else
6471 refmod_state = 0;
6472
6473 if ( !m->vmp_dirty) {
6474 /*
6475 * page is not to be cleaned
6476 * put it back on the head of its queue
6477 */
6478 if (m->vmp_precious)
6479 hibernate_stats.hibernate_skipped_precious++;
6480
6481 goto reenter_pg_on_q;
6482 }
6483
6484 if (hibernate_skip_external == TRUE && !m_object->internal) {
6485
6486 hibernate_stats.hibernate_skipped_external++;
6487
6488 goto reenter_pg_on_q;
6489 }
6490 tq = NULL;
6491
6492 if (m_object->internal) {
6493 if (VM_PAGE_Q_THROTTLED(iq))
6494 tq = iq;
6495 } else if (VM_PAGE_Q_THROTTLED(eq))
6496 tq = eq;
6497
6498 if (tq != NULL) {
6499 wait_result_t wait_result;
6500 int wait_count = 5;
6501
6502 if (l_object != NULL) {
6503 vm_object_unlock(l_object);
6504 l_object = NULL;
6505 }
6506
6507 while (retval == 0) {
6508
6509 tq->pgo_throttled = TRUE;
6510
6511 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
6512
6513 vm_page_unlock_queues();
6514
6515 wait_result = thread_block(THREAD_CONTINUE_NULL);
6516
6517 vm_page_lock_queues();
6518
6519 if (wait_result != THREAD_TIMED_OUT)
6520 break;
6521 if (!VM_PAGE_Q_THROTTLED(tq))
6522 break;
6523
6524 if (hibernate_should_abort())
6525 retval = 1;
6526
6527 if (--wait_count == 0) {
6528
6529 hibernate_stats.hibernate_throttle_timeout++;
6530
6531 if (tq == eq) {
6532 hibernate_skip_external = TRUE;
6533 break;
6534 }
6535 retval = 1;
6536 }
6537 }
6538 if (retval)
6539 break;
6540
6541 hibernate_stats.hibernate_throttled++;
6542
6543 continue;
6544 }
6545 /*
6546 * we've already factored out pages in the laundry which
6547 * means this page can't be on the pageout queue so it's
6548 * safe to do the vm_page_queues_remove
6549 */
6550 vm_page_queues_remove(m, TRUE);
6551
6552 if (m_object->internal == TRUE)
6553 pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m), PMAP_OPTIONS_COMPRESSOR, NULL);
6554
6555 vm_pageout_cluster(m);
6556
6557 hibernate_stats.hibernate_found_dirty++;
6558
6559 goto next_pg;
6560
6561reenter_pg_on_q:
6562 vm_page_queue_remove(q, m, vm_page_t, vmp_pageq);
6563 vm_page_queue_enter(q, m, vm_page_t, vmp_pageq);
6564
6565 hibernate_stats.hibernate_reentered_on_q++;
6566next_pg:
6567 hibernate_stats.hibernate_considered++;
6568
6569 qcount--;
6570 try_failed_count = 0;
6571 }
6572 if (l_object != NULL) {
6573 vm_object_unlock(l_object);
6574 l_object = NULL;
6575 }
6576
6577 vm_page_unlock_queues();
6578
6579 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
6580
6581 return (retval);
6582}
6583
6584
6585static int
6586hibernate_flush_dirty_pages(int pass)
6587{
6588 struct vm_speculative_age_q *aq;
6589 uint32_t i;
6590
6591 if (vm_page_local_q) {
6592 for (i = 0; i < vm_page_local_q_count; i++)
6593 vm_page_reactivate_local(i, TRUE, FALSE);
6594 }
6595
6596 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
6597 int qcount;
6598 vm_page_t m;
6599
6600 aq = &vm_page_queue_speculative[i];
6601
6602 if (vm_page_queue_empty(&aq->age_q))
6603 continue;
6604 qcount = 0;
6605
6606 vm_page_lockspin_queues();
6607
6608 vm_page_queue_iterate(&aq->age_q,
6609 m,
6610 vm_page_t,
6611 vmp_pageq)
6612 {
6613 qcount++;
6614 }
6615 vm_page_unlock_queues();
6616
6617 if (qcount) {
6618 if (hibernate_flush_queue(&aq->age_q, qcount))
6619 return (1);
6620 }
6621 }
6622 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
6623 return (1);
6624 /* XXX FBDP TODO: flush secluded queue */
6625 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
6626 return (1);
6627 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
6628 return (1);
6629 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
6630 return (1);
6631
6632 if (pass == 1)
6633 vm_compressor_record_warmup_start();
6634
6635 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
6636 if (pass == 1)
6637 vm_compressor_record_warmup_end();
6638 return (1);
6639 }
6640 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
6641 if (pass == 1)
6642 vm_compressor_record_warmup_end();
6643 return (1);
6644 }
6645 if (pass == 1)
6646 vm_compressor_record_warmup_end();
6647
6648 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
6649 return (1);
6650
6651 return (0);
6652}
6653
6654
6655void
6656hibernate_reset_stats()
6657{
6658 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
6659}
6660
6661
6662int
6663hibernate_flush_memory()
6664{
6665 int retval;
6666
6667 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
6668
6669 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
6670
6671 hibernate_cleaning_in_progress = TRUE;
6672 hibernate_skip_external = FALSE;
6673
6674 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
6675
6676 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
6677
6678 vm_compressor_flush();
6679
6680 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
6681
6682 if (consider_buffer_cache_collect != NULL) {
6683 unsigned int orig_wire_count;
6684
6685 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6686 orig_wire_count = vm_page_wire_count;
6687
6688 (void)(*consider_buffer_cache_collect)(1);
6689 consider_zone_gc(FALSE);
6690
6691 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
6692
6693 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
6694 }
6695 }
6696 hibernate_cleaning_in_progress = FALSE;
6697
6698 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
6699
6700 if (retval)
6701 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
6702
6703
6704 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
6705 hibernate_stats.hibernate_considered,
6706 hibernate_stats.hibernate_reentered_on_q,
6707 hibernate_stats.hibernate_found_dirty);
6708 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
6709 hibernate_stats.hibernate_skipped_cleaning,
6710 hibernate_stats.hibernate_skipped_transient,
6711 hibernate_stats.hibernate_skipped_precious,
6712 hibernate_stats.hibernate_skipped_external,
6713 hibernate_stats.hibernate_queue_nolock);
6714 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
6715 hibernate_stats.hibernate_queue_paused,
6716 hibernate_stats.hibernate_throttled,
6717 hibernate_stats.hibernate_throttle_timeout,
6718 hibernate_stats.hibernate_drained,
6719 hibernate_stats.hibernate_drain_timeout);
6720
6721 return (retval);
6722}
6723
6724
6725static void
6726hibernate_page_list_zero(hibernate_page_list_t *list)
6727{
6728 uint32_t bank;
6729 hibernate_bitmap_t * bitmap;
6730
6731 bitmap = &list->bank_bitmap[0];
6732 for (bank = 0; bank < list->bank_count; bank++)
6733 {
6734 uint32_t last_bit;
6735
6736 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
6737 // set out-of-bound bits at end of bitmap.
6738 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
6739 if (last_bit)
6740 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
6741
6742 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
6743 }
6744}
6745
6746void
6747hibernate_free_gobble_pages(void)
6748{
6749 vm_page_t m, next;
6750 uint32_t count = 0;
6751
6752 m = (vm_page_t) hibernate_gobble_queue;
6753 while(m)
6754 {
6755 next = m->vmp_snext;
6756 vm_page_free(m);
6757 count++;
6758 m = next;
6759 }
6760 hibernate_gobble_queue = VM_PAGE_NULL;
6761
6762 if (count)
6763 HIBLOG("Freed %d pages\n", count);
6764}
6765
6766static boolean_t
6767hibernate_consider_discard(vm_page_t m, boolean_t preflight)
6768{
6769 vm_object_t object = NULL;
6770 int refmod_state;
6771 boolean_t discard = FALSE;
6772
6773 do
6774 {
6775 if (m->vmp_private)
6776 panic("hibernate_consider_discard: private");
6777
6778 object = VM_PAGE_OBJECT(m);
6779
6780 if (!vm_object_lock_try(object)) {
6781 object = NULL;
6782 if (!preflight) hibernate_stats.cd_lock_failed++;
6783 break;
6784 }
6785 if (VM_PAGE_WIRED(m)) {
6786 if (!preflight) hibernate_stats.cd_found_wired++;
6787 break;
6788 }
6789 if (m->vmp_precious) {
6790 if (!preflight) hibernate_stats.cd_found_precious++;
6791 break;
6792 }
6793 if (m->vmp_busy || !object->alive) {
6794 /*
6795 * Somebody is playing with this page.
6796 */
6797 if (!preflight) hibernate_stats.cd_found_busy++;
6798 break;
6799 }
6800 if (m->vmp_absent || m->vmp_unusual || m->vmp_error) {
6801 /*
6802 * If it's unusual in anyway, ignore it
6803 */
6804 if (!preflight) hibernate_stats.cd_found_unusual++;
6805 break;
6806 }
6807 if (m->vmp_cleaning) {
6808 if (!preflight) hibernate_stats.cd_found_cleaning++;
6809 break;
6810 }
6811 if (m->vmp_laundry) {
6812 if (!preflight) hibernate_stats.cd_found_laundry++;
6813 break;
6814 }
6815 if (!m->vmp_dirty)
6816 {
6817 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
6818
6819 if (refmod_state & VM_MEM_REFERENCED)
6820 m->vmp_reference = TRUE;
6821 if (refmod_state & VM_MEM_MODIFIED) {
6822 SET_PAGE_DIRTY(m, FALSE);
6823 }
6824 }
6825
6826 /*
6827 * If it's clean or purgeable we can discard the page on wakeup.
6828 */
6829 discard = (!m->vmp_dirty)
6830 || (VM_PURGABLE_VOLATILE == object->purgable)
6831 || (VM_PURGABLE_EMPTY == object->purgable);
6832
6833
6834 if (discard == FALSE) {
6835 if (!preflight)
6836 hibernate_stats.cd_found_dirty++;
6837 } else if (m->vmp_xpmapped && m->vmp_reference && !object->internal) {
6838 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
6839 if (!preflight)
6840 hibernate_stats.cd_found_xpmapped++;
6841 discard = FALSE;
6842 } else {
6843 if (!preflight)
6844 hibernate_stats.cd_skipped_xpmapped++;
6845 }
6846 }
6847 }
6848 while (FALSE);
6849
6850 if (object)
6851 vm_object_unlock(object);
6852
6853 return (discard);
6854}
6855
6856
6857static void
6858hibernate_discard_page(vm_page_t m)
6859{
6860 vm_object_t m_object;
6861
6862 if (m->vmp_absent || m->vmp_unusual || m->vmp_error)
6863 /*
6864 * If it's unusual in anyway, ignore
6865 */
6866 return;
6867
6868 m_object = VM_PAGE_OBJECT(m);
6869
6870#if MACH_ASSERT || DEBUG
6871 if (!vm_object_lock_try(m_object))
6872 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
6873#else
6874 /* No need to lock page queue for token delete, hibernate_vm_unlock()
6875 makes sure these locks are uncontended before sleep */
6876#endif /* MACH_ASSERT || DEBUG */
6877
6878 if (m->vmp_pmapped == TRUE)
6879 {
6880 __unused int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6881 }
6882
6883 if (m->vmp_laundry)
6884 panic("hibernate_discard_page(%p) laundry", m);
6885 if (m->vmp_private)
6886 panic("hibernate_discard_page(%p) private", m);
6887 if (m->vmp_fictitious)
6888 panic("hibernate_discard_page(%p) fictitious", m);
6889
6890 if (VM_PURGABLE_VOLATILE == m_object->purgable)
6891 {
6892 /* object should be on a queue */
6893 assert((m_object->objq.next != NULL) && (m_object->objq.prev != NULL));
6894 purgeable_q_t old_queue = vm_purgeable_object_remove(m_object);
6895 assert(old_queue);
6896 if (m_object->purgeable_when_ripe) {
6897 vm_purgeable_token_delete_first(old_queue);
6898 }
6899 vm_object_lock_assert_exclusive(m_object);
6900 m_object->purgable = VM_PURGABLE_EMPTY;
6901
6902 /*
6903 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
6904 * accounted in the "volatile" ledger, so no change here.
6905 * We have to update vm_page_purgeable_count, though, since we're
6906 * effectively purging this object.
6907 */
6908 unsigned int delta;
6909 assert(m_object->resident_page_count >= m_object->wired_page_count);
6910 delta = (m_object->resident_page_count - m_object->wired_page_count);
6911 assert(vm_page_purgeable_count >= delta);
6912 assert(delta > 0);
6913 OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
6914 }
6915
6916 vm_page_free(m);
6917
6918#if MACH_ASSERT || DEBUG
6919 vm_object_unlock(m_object);
6920#endif /* MACH_ASSERT || DEBUG */
6921}
6922
6923/*
6924 Grab locks for hibernate_page_list_setall()
6925*/
6926void
6927hibernate_vm_lock_queues(void)
6928{
6929 vm_object_lock(compressor_object);
6930 vm_page_lock_queues();
6931 lck_mtx_lock(&vm_page_queue_free_lock);
6932 lck_mtx_lock(&vm_purgeable_queue_lock);
6933
6934 if (vm_page_local_q) {
6935 uint32_t i;
6936 for (i = 0; i < vm_page_local_q_count; i++) {
6937 struct vpl *lq;
6938 lq = &vm_page_local_q[i].vpl_un.vpl;
6939 VPL_LOCK(&lq->vpl_lock);
6940 }
6941 }
6942}
6943
6944void
6945hibernate_vm_unlock_queues(void)
6946{
6947 if (vm_page_local_q) {
6948 uint32_t i;
6949 for (i = 0; i < vm_page_local_q_count; i++) {
6950 struct vpl *lq;
6951 lq = &vm_page_local_q[i].vpl_un.vpl;
6952 VPL_UNLOCK(&lq->vpl_lock);
6953 }
6954 }
6955 lck_mtx_unlock(&vm_purgeable_queue_lock);
6956 lck_mtx_unlock(&vm_page_queue_free_lock);
6957 vm_page_unlock_queues();
6958 vm_object_unlock(compressor_object);
6959}
6960
6961/*
6962 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
6963 pages known to VM to not need saving are subtracted.
6964 Wired pages to be saved are present in page_list_wired, pageable in page_list.
6965*/
6966
6967void
6968hibernate_page_list_setall(hibernate_page_list_t * page_list,
6969 hibernate_page_list_t * page_list_wired,
6970 hibernate_page_list_t * page_list_pal,
6971 boolean_t preflight,
6972 boolean_t will_discard,
6973 uint32_t * pagesOut)
6974{
6975 uint64_t start, end, nsec;
6976 vm_page_t m;
6977 vm_page_t next;
6978 uint32_t pages = page_list->page_count;
6979 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
6980 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
6981 uint32_t count_wire = pages;
6982 uint32_t count_discard_active = 0;
6983 uint32_t count_discard_inactive = 0;
6984 uint32_t count_discard_cleaned = 0;
6985 uint32_t count_discard_purgeable = 0;
6986 uint32_t count_discard_speculative = 0;
6987 uint32_t count_discard_vm_struct_pages = 0;
6988 uint32_t i;
6989 uint32_t bank;
6990 hibernate_bitmap_t * bitmap;
6991 hibernate_bitmap_t * bitmap_wired;
6992 boolean_t discard_all;
6993 boolean_t discard;
6994
6995 HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight);
6996
6997 if (preflight) {
6998 page_list = NULL;
6999 page_list_wired = NULL;
7000 page_list_pal = NULL;
7001 discard_all = FALSE;
7002 } else {
7003 discard_all = will_discard;
7004 }
7005
7006#if MACH_ASSERT || DEBUG
7007 if (!preflight)
7008 {
7009 assert(hibernate_vm_locks_are_safe());
7010 vm_page_lock_queues();
7011 if (vm_page_local_q) {
7012 for (i = 0; i < vm_page_local_q_count; i++) {
7013 struct vpl *lq;
7014 lq = &vm_page_local_q[i].vpl_un.vpl;
7015 VPL_LOCK(&lq->vpl_lock);
7016 }
7017 }
7018 }
7019#endif /* MACH_ASSERT || DEBUG */
7020
7021
7022 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
7023
7024 clock_get_uptime(&start);
7025
7026 if (!preflight) {
7027 hibernate_page_list_zero(page_list);
7028 hibernate_page_list_zero(page_list_wired);
7029 hibernate_page_list_zero(page_list_pal);
7030
7031 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
7032 hibernate_stats.cd_pages = pages;
7033 }
7034
7035 if (vm_page_local_q) {
7036 for (i = 0; i < vm_page_local_q_count; i++)
7037 vm_page_reactivate_local(i, TRUE, !preflight);
7038 }
7039
7040 if (preflight) {
7041 vm_object_lock(compressor_object);
7042 vm_page_lock_queues();
7043 lck_mtx_lock(&vm_page_queue_free_lock);
7044 }
7045
7046 m = (vm_page_t) hibernate_gobble_queue;
7047 while (m)
7048 {
7049 pages--;
7050 count_wire--;
7051 if (!preflight) {
7052 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7053 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7054 }
7055 m = m->vmp_snext;
7056 }
7057
7058 if (!preflight) for( i = 0; i < real_ncpus; i++ )
7059 {
7060 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
7061 {
7062 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = m->vmp_snext)
7063 {
7064 assert(m->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
7065
7066 pages--;
7067 count_wire--;
7068 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7069 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7070
7071 hibernate_stats.cd_local_free++;
7072 hibernate_stats.cd_total_free++;
7073 }
7074 }
7075 }
7076
7077 for( i = 0; i < vm_colors; i++ )
7078 {
7079 vm_page_queue_iterate(&vm_page_queue_free[i].qhead,
7080 m,
7081 vm_page_t,
7082 vmp_pageq)
7083 {
7084 assert(m->vmp_q_state == VM_PAGE_ON_FREE_Q);
7085
7086 pages--;
7087 count_wire--;
7088 if (!preflight) {
7089 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7090 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7091
7092 hibernate_stats.cd_total_free++;
7093 }
7094 }
7095 }
7096
7097 vm_page_queue_iterate(&vm_lopage_queue_free,
7098 m,
7099 vm_page_t,
7100 vmp_pageq)
7101 {
7102 assert(m->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
7103
7104 pages--;
7105 count_wire--;
7106 if (!preflight) {
7107 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7108 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7109
7110 hibernate_stats.cd_total_free++;
7111 }
7112 }
7113
7114 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
7115 while (m && !vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t)m))
7116 {
7117 assert(m->vmp_q_state == VM_PAGE_ON_THROTTLED_Q);
7118
7119 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7120 discard = FALSE;
7121 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7122 && hibernate_consider_discard(m, preflight))
7123 {
7124 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7125 count_discard_inactive++;
7126 discard = discard_all;
7127 }
7128 else
7129 count_throttled++;
7130 count_wire--;
7131 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7132
7133 if (discard) hibernate_discard_page(m);
7134 m = next;
7135 }
7136
7137 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
7138 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
7139 {
7140 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
7141
7142 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7143 discard = FALSE;
7144 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7145 && hibernate_consider_discard(m, preflight))
7146 {
7147 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7148 if (m->vmp_dirty)
7149 count_discard_purgeable++;
7150 else
7151 count_discard_inactive++;
7152 discard = discard_all;
7153 }
7154 else
7155 count_anonymous++;
7156 count_wire--;
7157 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7158 if (discard) hibernate_discard_page(m);
7159 m = next;
7160 }
7161
7162 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
7163 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
7164 {
7165 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
7166
7167 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7168 discard = FALSE;
7169 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7170 && hibernate_consider_discard(m, preflight))
7171 {
7172 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7173 if (m->vmp_dirty)
7174 count_discard_purgeable++;
7175 else
7176 count_discard_cleaned++;
7177 discard = discard_all;
7178 }
7179 else
7180 count_cleaned++;
7181 count_wire--;
7182 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7183 if (discard) hibernate_discard_page(m);
7184 m = next;
7185 }
7186
7187 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7188 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
7189 {
7190 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
7191
7192 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7193 discard = FALSE;
7194 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
7195 && hibernate_consider_discard(m, preflight))
7196 {
7197 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7198 if (m->vmp_dirty)
7199 count_discard_purgeable++;
7200 else
7201 count_discard_active++;
7202 discard = discard_all;
7203 }
7204 else
7205 count_active++;
7206 count_wire--;
7207 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7208 if (discard) hibernate_discard_page(m);
7209 m = next;
7210 }
7211
7212 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7213 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
7214 {
7215 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7216
7217 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7218 discard = FALSE;
7219 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7220 && hibernate_consider_discard(m, preflight))
7221 {
7222 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7223 if (m->vmp_dirty)
7224 count_discard_purgeable++;
7225 else
7226 count_discard_inactive++;
7227 discard = discard_all;
7228 }
7229 else
7230 count_inactive++;
7231 count_wire--;
7232 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7233 if (discard) hibernate_discard_page(m);
7234 m = next;
7235 }
7236 /* XXX FBDP TODO: secluded queue */
7237
7238 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
7239 {
7240 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7241 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
7242 {
7243 assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
7244 assertf(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q,
7245 "Bad page: %p (0x%x:0x%x) on queue %d has state: %d (Discard: %d, Preflight: %d)",
7246 m, m->vmp_pageq.next, m->vmp_pageq.prev, i, m->vmp_q_state, discard, preflight);
7247
7248 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7249 discard = FALSE;
7250 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7251 && hibernate_consider_discard(m, preflight))
7252 {
7253 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7254 count_discard_speculative++;
7255 discard = discard_all;
7256 }
7257 else
7258 count_speculative++;
7259 count_wire--;
7260 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7261 if (discard) hibernate_discard_page(m);
7262 m = next;
7263 }
7264 }
7265
7266 vm_page_queue_iterate(&compressor_object->memq, m, vm_page_t, vmp_listq)
7267 {
7268 assert(m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR);
7269
7270 count_compressor++;
7271 count_wire--;
7272 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7273 }
7274
7275 if (preflight == FALSE && discard_all == TRUE) {
7276 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START);
7277
7278 HIBLOG("hibernate_teardown started\n");
7279 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
7280 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
7281
7282 pages -= count_discard_vm_struct_pages;
7283 count_wire -= count_discard_vm_struct_pages;
7284
7285 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
7286
7287 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_END);
7288 }
7289
7290 if (!preflight) {
7291 // pull wired from hibernate_bitmap
7292 bitmap = &page_list->bank_bitmap[0];
7293 bitmap_wired = &page_list_wired->bank_bitmap[0];
7294 for (bank = 0; bank < page_list->bank_count; bank++)
7295 {
7296 for (i = 0; i < bitmap->bitmapwords; i++)
7297 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
7298 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
7299 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
7300 }
7301 }
7302
7303 // machine dependent adjustments
7304 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
7305
7306 if (!preflight) {
7307 hibernate_stats.cd_count_wire = count_wire;
7308 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
7309 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
7310 }
7311
7312 clock_get_uptime(&end);
7313 absolutetime_to_nanoseconds(end - start, &nsec);
7314 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
7315
7316 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
7317 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
7318 discard_all ? "did" : "could",
7319 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
7320
7321 if (hibernate_stats.cd_skipped_xpmapped)
7322 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
7323
7324 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
7325
7326 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
7327
7328#if MACH_ASSERT || DEBUG
7329 if (!preflight)
7330 {
7331 if (vm_page_local_q) {
7332 for (i = 0; i < vm_page_local_q_count; i++) {
7333 struct vpl *lq;
7334 lq = &vm_page_local_q[i].vpl_un.vpl;
7335 VPL_UNLOCK(&lq->vpl_lock);
7336 }
7337 }
7338 vm_page_unlock_queues();
7339 }
7340#endif /* MACH_ASSERT || DEBUG */
7341
7342 if (preflight) {
7343 lck_mtx_unlock(&vm_page_queue_free_lock);
7344 vm_page_unlock_queues();
7345 vm_object_unlock(compressor_object);
7346 }
7347
7348 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
7349}
7350
7351void
7352hibernate_page_list_discard(hibernate_page_list_t * page_list)
7353{
7354 uint64_t start, end, nsec;
7355 vm_page_t m;
7356 vm_page_t next;
7357 uint32_t i;
7358 uint32_t count_discard_active = 0;
7359 uint32_t count_discard_inactive = 0;
7360 uint32_t count_discard_purgeable = 0;
7361 uint32_t count_discard_cleaned = 0;
7362 uint32_t count_discard_speculative = 0;
7363
7364
7365#if MACH_ASSERT || DEBUG
7366 vm_page_lock_queues();
7367 if (vm_page_local_q) {
7368 for (i = 0; i < vm_page_local_q_count; i++) {
7369 struct vpl *lq;
7370 lq = &vm_page_local_q[i].vpl_un.vpl;
7371 VPL_LOCK(&lq->vpl_lock);
7372 }
7373 }
7374#endif /* MACH_ASSERT || DEBUG */
7375
7376 clock_get_uptime(&start);
7377
7378 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
7379 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
7380 {
7381 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
7382
7383 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7384 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7385 {
7386 if (m->vmp_dirty)
7387 count_discard_purgeable++;
7388 else
7389 count_discard_inactive++;
7390 hibernate_discard_page(m);
7391 }
7392 m = next;
7393 }
7394
7395 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
7396 {
7397 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7398 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
7399 {
7400 assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
7401
7402 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7403 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7404 {
7405 count_discard_speculative++;
7406 hibernate_discard_page(m);
7407 }
7408 m = next;
7409 }
7410 }
7411
7412 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7413 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
7414 {
7415 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7416
7417 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7418 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7419 {
7420 if (m->vmp_dirty)
7421 count_discard_purgeable++;
7422 else
7423 count_discard_inactive++;
7424 hibernate_discard_page(m);
7425 }
7426 m = next;
7427 }
7428 /* XXX FBDP TODO: secluded queue */
7429
7430 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7431 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
7432 {
7433 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
7434
7435 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7436 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7437 {
7438 if (m->vmp_dirty)
7439 count_discard_purgeable++;
7440 else
7441 count_discard_active++;
7442 hibernate_discard_page(m);
7443 }
7444 m = next;
7445 }
7446
7447 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
7448 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
7449 {
7450 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
7451
7452 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7453 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7454 {
7455 if (m->vmp_dirty)
7456 count_discard_purgeable++;
7457 else
7458 count_discard_cleaned++;
7459 hibernate_discard_page(m);
7460 }
7461 m = next;
7462 }
7463
7464#if MACH_ASSERT || DEBUG
7465 if (vm_page_local_q) {
7466 for (i = 0; i < vm_page_local_q_count; i++) {
7467 struct vpl *lq;
7468 lq = &vm_page_local_q[i].vpl_un.vpl;
7469 VPL_UNLOCK(&lq->vpl_lock);
7470 }
7471 }
7472 vm_page_unlock_queues();
7473#endif /* MACH_ASSERT || DEBUG */
7474
7475 clock_get_uptime(&end);
7476 absolutetime_to_nanoseconds(end - start, &nsec);
7477 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
7478 nsec / 1000000ULL,
7479 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
7480}
7481
7482boolean_t hibernate_paddr_map_inited = FALSE;
7483unsigned int hibernate_teardown_last_valid_compact_indx = -1;
7484vm_page_t hibernate_rebuild_hash_list = NULL;
7485
7486unsigned int hibernate_teardown_found_tabled_pages = 0;
7487unsigned int hibernate_teardown_found_created_pages = 0;
7488unsigned int hibernate_teardown_found_free_pages = 0;
7489unsigned int hibernate_teardown_vm_page_free_count;
7490
7491
7492struct ppnum_mapping {
7493 struct ppnum_mapping *ppnm_next;
7494 ppnum_t ppnm_base_paddr;
7495 unsigned int ppnm_sindx;
7496 unsigned int ppnm_eindx;
7497};
7498
7499struct ppnum_mapping *ppnm_head;
7500struct ppnum_mapping *ppnm_last_found = NULL;
7501
7502
7503void
7504hibernate_create_paddr_map()
7505{
7506 unsigned int i;
7507 ppnum_t next_ppnum_in_run = 0;
7508 struct ppnum_mapping *ppnm = NULL;
7509
7510 if (hibernate_paddr_map_inited == FALSE) {
7511
7512 for (i = 0; i < vm_pages_count; i++) {
7513
7514 if (ppnm)
7515 ppnm->ppnm_eindx = i;
7516
7517 if (ppnm == NULL || VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) != next_ppnum_in_run) {
7518
7519 ppnm = kalloc(sizeof(struct ppnum_mapping));
7520
7521 ppnm->ppnm_next = ppnm_head;
7522 ppnm_head = ppnm;
7523
7524 ppnm->ppnm_sindx = i;
7525 ppnm->ppnm_base_paddr = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]);
7526 }
7527 next_ppnum_in_run = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) + 1;
7528 }
7529 ppnm->ppnm_eindx++;
7530
7531 hibernate_paddr_map_inited = TRUE;
7532 }
7533}
7534
7535ppnum_t
7536hibernate_lookup_paddr(unsigned int indx)
7537{
7538 struct ppnum_mapping *ppnm = NULL;
7539
7540 ppnm = ppnm_last_found;
7541
7542 if (ppnm) {
7543 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
7544 goto done;
7545 }
7546 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
7547
7548 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
7549 ppnm_last_found = ppnm;
7550 break;
7551 }
7552 }
7553 if (ppnm == NULL)
7554 panic("hibernate_lookup_paddr of %d failed\n", indx);
7555done:
7556 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
7557}
7558
7559
7560uint32_t
7561hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7562{
7563 addr64_t saddr_aligned;
7564 addr64_t eaddr_aligned;
7565 addr64_t addr;
7566 ppnum_t paddr;
7567 unsigned int mark_as_unneeded_pages = 0;
7568
7569 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
7570 eaddr_aligned = eaddr & ~PAGE_MASK_64;
7571
7572 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
7573
7574 paddr = pmap_find_phys(kernel_pmap, addr);
7575
7576 assert(paddr);
7577
7578 hibernate_page_bitset(page_list, TRUE, paddr);
7579 hibernate_page_bitset(page_list_wired, TRUE, paddr);
7580
7581 mark_as_unneeded_pages++;
7582 }
7583 return (mark_as_unneeded_pages);
7584}
7585
7586
7587void
7588hibernate_hash_insert_page(vm_page_t mem)
7589{
7590 vm_page_bucket_t *bucket;
7591 int hash_id;
7592 vm_object_t m_object;
7593
7594 m_object = VM_PAGE_OBJECT(mem);
7595
7596 assert(mem->vmp_hashed);
7597 assert(m_object);
7598 assert(mem->vmp_offset != (vm_object_offset_t) -1);
7599
7600 /*
7601 * Insert it into the object_object/offset hash table
7602 */
7603 hash_id = vm_page_hash(m_object, mem->vmp_offset);
7604 bucket = &vm_page_buckets[hash_id];
7605
7606 mem->vmp_next_m = bucket->page_list;
7607 bucket->page_list = VM_PAGE_PACK_PTR(mem);
7608}
7609
7610
7611void
7612hibernate_free_range(int sindx, int eindx)
7613{
7614 vm_page_t mem;
7615 unsigned int color;
7616
7617 while (sindx < eindx) {
7618 mem = &vm_pages[sindx];
7619
7620 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
7621
7622 mem->vmp_lopage = FALSE;
7623 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
7624
7625 color = VM_PAGE_GET_COLOR(mem);
7626#if defined(__x86_64__)
7627 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
7628 mem,
7629 vm_page_t,
7630 vmp_pageq);
7631#else
7632 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
7633 mem,
7634 vm_page_t,
7635 vmp_pageq);
7636#endif
7637 vm_page_free_count++;
7638
7639 sindx++;
7640 }
7641}
7642
7643
7644extern void hibernate_rebuild_pmap_structs(void);
7645
7646void
7647hibernate_rebuild_vm_structs(void)
7648{
7649 int i, cindx, sindx, eindx;
7650 vm_page_t mem, tmem, mem_next;
7651 AbsoluteTime startTime, endTime;
7652 uint64_t nsec;
7653
7654 if (hibernate_rebuild_needed == FALSE)
7655 return;
7656
7657 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START);
7658 HIBLOG("hibernate_rebuild started\n");
7659
7660 clock_get_uptime(&startTime);
7661
7662 hibernate_rebuild_pmap_structs();
7663
7664 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
7665 eindx = vm_pages_count;
7666
7667 /*
7668 * Mark all the vm_pages[] that have not been initialized yet as being
7669 * transient. This is needed to ensure that buddy page search is corrrect.
7670 * Without this random data in these vm_pages[] can trip the buddy search
7671 */
7672 for (i = hibernate_teardown_last_valid_compact_indx+1; i < eindx; ++i)
7673 vm_pages[i].vmp_q_state = VM_PAGE_NOT_ON_Q;
7674
7675 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
7676
7677 mem = &vm_pages[cindx];
7678 assert(mem->vmp_q_state != VM_PAGE_ON_FREE_Q);
7679 /*
7680 * hibernate_teardown_vm_structs leaves the location where
7681 * this vm_page_t must be located in "next".
7682 */
7683 tmem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
7684 mem->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
7685
7686 sindx = (int)(tmem - &vm_pages[0]);
7687
7688 if (mem != tmem) {
7689 /*
7690 * this vm_page_t was moved by hibernate_teardown_vm_structs,
7691 * so move it back to its real location
7692 */
7693 *tmem = *mem;
7694 mem = tmem;
7695 }
7696 if (mem->vmp_hashed)
7697 hibernate_hash_insert_page(mem);
7698 /*
7699 * the 'hole' between this vm_page_t and the previous
7700 * vm_page_t we moved needs to be initialized as
7701 * a range of free vm_page_t's
7702 */
7703 hibernate_free_range(sindx + 1, eindx);
7704
7705 eindx = sindx;
7706 }
7707 if (sindx)
7708 hibernate_free_range(0, sindx);
7709
7710 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
7711
7712 /*
7713 * process the list of vm_page_t's that were entered in the hash,
7714 * but were not located in the vm_pages arrary... these are
7715 * vm_page_t's that were created on the fly (i.e. fictitious)
7716 */
7717 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
7718 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
7719
7720 mem->vmp_next_m = 0;
7721 hibernate_hash_insert_page(mem);
7722 }
7723 hibernate_rebuild_hash_list = NULL;
7724
7725 clock_get_uptime(&endTime);
7726 SUB_ABSOLUTETIME(&endTime, &startTime);
7727 absolutetime_to_nanoseconds(endTime, &nsec);
7728
7729 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
7730
7731 hibernate_rebuild_needed = FALSE;
7732
7733 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END);
7734}
7735
7736
7737extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
7738
7739uint32_t
7740hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7741{
7742 unsigned int i;
7743 unsigned int compact_target_indx;
7744 vm_page_t mem, mem_next;
7745 vm_page_bucket_t *bucket;
7746 unsigned int mark_as_unneeded_pages = 0;
7747 unsigned int unneeded_vm_page_bucket_pages = 0;
7748 unsigned int unneeded_vm_pages_pages = 0;
7749 unsigned int unneeded_pmap_pages = 0;
7750 addr64_t start_of_unneeded = 0;
7751 addr64_t end_of_unneeded = 0;
7752
7753
7754 if (hibernate_should_abort())
7755 return (0);
7756
7757 hibernate_rebuild_needed = TRUE;
7758
7759 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
7760 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
7761 vm_page_cleaned_count, compressor_object->resident_page_count);
7762
7763 for (i = 0; i < vm_page_bucket_count; i++) {
7764
7765 bucket = &vm_page_buckets[i];
7766
7767 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)); mem != VM_PAGE_NULL; mem = mem_next) {
7768 assert(mem->vmp_hashed);
7769
7770 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
7771
7772 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
7773 mem->vmp_next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
7774 hibernate_rebuild_hash_list = mem;
7775 }
7776 }
7777 }
7778 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
7779 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
7780
7781 hibernate_teardown_vm_page_free_count = vm_page_free_count;
7782
7783 compact_target_indx = 0;
7784
7785 for (i = 0; i < vm_pages_count; i++) {
7786
7787 mem = &vm_pages[i];
7788
7789 if (mem->vmp_q_state == VM_PAGE_ON_FREE_Q) {
7790 unsigned int color;
7791
7792 assert(mem->vmp_busy);
7793 assert(!mem->vmp_lopage);
7794
7795 color = VM_PAGE_GET_COLOR(mem);
7796
7797 vm_page_queue_remove(&vm_page_queue_free[color].qhead,
7798 mem,
7799 vm_page_t,
7800 vmp_pageq);
7801
7802 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
7803
7804 vm_page_free_count--;
7805
7806 hibernate_teardown_found_free_pages++;
7807
7808 if (vm_pages[compact_target_indx].vmp_q_state != VM_PAGE_ON_FREE_Q)
7809 compact_target_indx = i;
7810 } else {
7811 /*
7812 * record this vm_page_t's original location
7813 * we need this even if it doesn't get moved
7814 * as an indicator to the rebuild function that
7815 * we don't have to move it
7816 */
7817 mem->vmp_next_m = VM_PAGE_PACK_PTR(mem);
7818
7819 if (vm_pages[compact_target_indx].vmp_q_state == VM_PAGE_ON_FREE_Q) {
7820 /*
7821 * we've got a hole to fill, so
7822 * move this vm_page_t to it's new home
7823 */
7824 vm_pages[compact_target_indx] = *mem;
7825 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
7826
7827 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
7828 compact_target_indx++;
7829 } else
7830 hibernate_teardown_last_valid_compact_indx = i;
7831 }
7832 }
7833 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
7834 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
7835 mark_as_unneeded_pages += unneeded_vm_pages_pages;
7836
7837 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
7838
7839 if (start_of_unneeded) {
7840 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
7841 mark_as_unneeded_pages += unneeded_pmap_pages;
7842 }
7843 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
7844
7845 return (mark_as_unneeded_pages);
7846}
7847
7848
7849#endif /* HIBERNATION */
7850
7851/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
7852
7853#include <mach_vm_debug.h>
7854#if MACH_VM_DEBUG
7855
7856#include <mach_debug/hash_info.h>
7857#include <vm/vm_debug.h>
7858
7859/*
7860 * Routine: vm_page_info
7861 * Purpose:
7862 * Return information about the global VP table.
7863 * Fills the buffer with as much information as possible
7864 * and returns the desired size of the buffer.
7865 * Conditions:
7866 * Nothing locked. The caller should provide
7867 * possibly-pageable memory.
7868 */
7869
7870unsigned int
7871vm_page_info(
7872 hash_info_bucket_t *info,
7873 unsigned int count)
7874{
7875 unsigned int i;
7876 lck_spin_t *bucket_lock;
7877
7878 if (vm_page_bucket_count < count)
7879 count = vm_page_bucket_count;
7880
7881 for (i = 0; i < count; i++) {
7882 vm_page_bucket_t *bucket = &vm_page_buckets[i];
7883 unsigned int bucket_count = 0;
7884 vm_page_t m;
7885
7886 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7887 lck_spin_lock(bucket_lock);
7888
7889 for (m = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7890 m != VM_PAGE_NULL;
7891 m = (vm_page_t)(VM_PAGE_UNPACK_PTR(m->vmp_next_m)))
7892 bucket_count++;
7893
7894 lck_spin_unlock(bucket_lock);
7895
7896 /* don't touch pageable memory while holding locks */
7897 info[i].hib_count = bucket_count;
7898 }
7899
7900 return vm_page_bucket_count;
7901}
7902#endif /* MACH_VM_DEBUG */
7903
7904#if VM_PAGE_BUCKETS_CHECK
7905void
7906vm_page_buckets_check(void)
7907{
7908 unsigned int i;
7909 vm_page_t p;
7910 unsigned int p_hash;
7911 vm_page_bucket_t *bucket;
7912 lck_spin_t *bucket_lock;
7913
7914 if (!vm_page_buckets_check_ready) {
7915 return;
7916 }
7917
7918#if HIBERNATION
7919 if (hibernate_rebuild_needed ||
7920 hibernate_rebuild_hash_list) {
7921 panic("BUCKET_CHECK: hibernation in progress: "
7922 "rebuild_needed=%d rebuild_hash_list=%p\n",
7923 hibernate_rebuild_needed,
7924 hibernate_rebuild_hash_list);
7925 }
7926#endif /* HIBERNATION */
7927
7928#if VM_PAGE_FAKE_BUCKETS
7929 char *cp;
7930 for (cp = (char *) vm_page_fake_buckets_start;
7931 cp < (char *) vm_page_fake_buckets_end;
7932 cp++) {
7933 if (*cp != 0x5a) {
7934 panic("BUCKET_CHECK: corruption at %p in fake buckets "
7935 "[0x%llx:0x%llx]\n",
7936 cp,
7937 (uint64_t) vm_page_fake_buckets_start,
7938 (uint64_t) vm_page_fake_buckets_end);
7939 }
7940 }
7941#endif /* VM_PAGE_FAKE_BUCKETS */
7942
7943 for (i = 0; i < vm_page_bucket_count; i++) {
7944 vm_object_t p_object;
7945
7946 bucket = &vm_page_buckets[i];
7947 if (!bucket->page_list) {
7948 continue;
7949 }
7950
7951 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7952 lck_spin_lock(bucket_lock);
7953 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7954
7955 while (p != VM_PAGE_NULL) {
7956 p_object = VM_PAGE_OBJECT(p);
7957
7958 if (!p->vmp_hashed) {
7959 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
7960 "hash %d in bucket %d at %p "
7961 "is not hashed\n",
7962 p, p_object, p->vmp_offset,
7963 p_hash, i, bucket);
7964 }
7965 p_hash = vm_page_hash(p_object, p->vmp_offset);
7966 if (p_hash != i) {
7967 panic("BUCKET_CHECK: corruption in bucket %d "
7968 "at %p: page %p object %p offset 0x%llx "
7969 "hash %d\n",
7970 i, bucket, p, p_object, p->vmp_offset,
7971 p_hash);
7972 }
7973 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_next_m));
7974 }
7975 lck_spin_unlock(bucket_lock);
7976 }
7977
7978// printf("BUCKET_CHECK: checked buckets\n");
7979}
7980#endif /* VM_PAGE_BUCKETS_CHECK */
7981
7982/*
7983 * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
7984 * local queues if they exist... its the only spot in the system where we add pages
7985 * to those queues... once on those queues, those pages can only move to one of the
7986 * global page queues or the free queues... they NEVER move from local q to local q.
7987 * the 'local' state is stable when vm_page_queues_remove is called since we're behind
7988 * the global vm_page_queue_lock at this point... we still need to take the local lock
7989 * in case this operation is being run on a different CPU then the local queue's identity,
7990 * but we don't have to worry about the page moving to a global queue or becoming wired
7991 * while we're grabbing the local lock since those operations would require the global
7992 * vm_page_queue_lock to be held, and we already own it.
7993 *
7994 * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
7995 * 'wired' and local are ALWAYS mutually exclusive conditions.
7996 */
7997
7998#if CONFIG_BACKGROUND_QUEUE
7999void
8000vm_page_queues_remove(vm_page_t mem, boolean_t remove_from_backgroundq)
8001#else
8002void
8003vm_page_queues_remove(vm_page_t mem, boolean_t __unused remove_from_backgroundq)
8004#endif
8005{
8006 boolean_t was_pageable = TRUE;
8007 vm_object_t m_object;
8008
8009 m_object = VM_PAGE_OBJECT(mem);
8010
8011 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8012
8013 if (mem->vmp_q_state == VM_PAGE_NOT_ON_Q)
8014 {
8015 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
8016#if CONFIG_BACKGROUND_QUEUE
8017 if (remove_from_backgroundq == TRUE) {
8018 vm_page_remove_from_backgroundq(mem);
8019 }
8020 if (mem->vmp_on_backgroundq) {
8021 assert(mem->vmp_backgroundq.next != 0);
8022 assert(mem->vmp_backgroundq.prev != 0);
8023 } else {
8024 assert(mem->vmp_backgroundq.next == 0);
8025 assert(mem->vmp_backgroundq.prev == 0);
8026 }
8027#endif /* CONFIG_BACKGROUND_QUEUE */
8028 return;
8029 }
8030
8031 if (mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR)
8032 {
8033 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
8034#if CONFIG_BACKGROUND_QUEUE
8035 assert(mem->vmp_backgroundq.next == 0 &&
8036 mem->vmp_backgroundq.prev == 0 &&
8037 mem->vmp_on_backgroundq == FALSE);
8038#endif
8039 return;
8040 }
8041 if (mem->vmp_q_state == VM_PAGE_IS_WIRED) {
8042 /*
8043 * might put these guys on a list for debugging purposes
8044 * if we do, we'll need to remove this assert
8045 */
8046 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
8047#if CONFIG_BACKGROUND_QUEUE
8048 assert(mem->vmp_backgroundq.next == 0 &&
8049 mem->vmp_backgroundq.prev == 0 &&
8050 mem->vmp_on_backgroundq == FALSE);
8051#endif
8052 return;
8053 }
8054
8055 assert(m_object != compressor_object);
8056 assert(m_object != kernel_object);
8057 assert(m_object != vm_submap_object);
8058 assert(!mem->vmp_fictitious);
8059
8060 switch(mem->vmp_q_state) {
8061
8062 case VM_PAGE_ON_ACTIVE_LOCAL_Q:
8063 {
8064 struct vpl *lq;
8065
8066 lq = &vm_page_local_q[mem->vmp_local_id].vpl_un.vpl;
8067 VPL_LOCK(&lq->vpl_lock);
8068 vm_page_queue_remove(&lq->vpl_queue,
8069 mem, vm_page_t, vmp_pageq);
8070 mem->vmp_local_id = 0;
8071 lq->vpl_count--;
8072 if (m_object->internal) {
8073 lq->vpl_internal_count--;
8074 } else {
8075 lq->vpl_external_count--;
8076 }
8077 VPL_UNLOCK(&lq->vpl_lock);
8078 was_pageable = FALSE;
8079 break;
8080 }
8081 case VM_PAGE_ON_ACTIVE_Q:
8082 {
8083 vm_page_queue_remove(&vm_page_queue_active,
8084 mem, vm_page_t, vmp_pageq);
8085 vm_page_active_count--;
8086 break;
8087 }
8088
8089 case VM_PAGE_ON_INACTIVE_INTERNAL_Q:
8090 {
8091 assert(m_object->internal == TRUE);
8092
8093 vm_page_inactive_count--;
8094 vm_page_queue_remove(&vm_page_queue_anonymous,
8095 mem, vm_page_t, vmp_pageq);
8096 vm_page_anonymous_count--;
8097
8098 vm_purgeable_q_advance_all();
8099 vm_page_balance_inactive(3);
8100 break;
8101 }
8102
8103 case VM_PAGE_ON_INACTIVE_EXTERNAL_Q:
8104 {
8105 assert(m_object->internal == FALSE);
8106
8107 vm_page_inactive_count--;
8108 vm_page_queue_remove(&vm_page_queue_inactive,
8109 mem, vm_page_t, vmp_pageq);
8110 vm_purgeable_q_advance_all();
8111 vm_page_balance_inactive(3);
8112 break;
8113 }
8114
8115 case VM_PAGE_ON_INACTIVE_CLEANED_Q:
8116 {
8117 assert(m_object->internal == FALSE);
8118
8119 vm_page_inactive_count--;
8120 vm_page_queue_remove(&vm_page_queue_cleaned,
8121 mem, vm_page_t, vmp_pageq);
8122 vm_page_cleaned_count--;
8123 vm_page_balance_inactive(3);
8124 break;
8125 }
8126
8127 case VM_PAGE_ON_THROTTLED_Q:
8128 {
8129 assert(m_object->internal == TRUE);
8130
8131 vm_page_queue_remove(&vm_page_queue_throttled,
8132 mem, vm_page_t, vmp_pageq);
8133 vm_page_throttled_count--;
8134 was_pageable = FALSE;
8135 break;
8136 }
8137
8138 case VM_PAGE_ON_SPECULATIVE_Q:
8139 {
8140 assert(m_object->internal == FALSE);
8141
8142 vm_page_remque(&mem->vmp_pageq);
8143 vm_page_speculative_count--;
8144 vm_page_balance_inactive(3);
8145 break;
8146 }
8147
8148#if CONFIG_SECLUDED_MEMORY
8149 case VM_PAGE_ON_SECLUDED_Q:
8150 {
8151 vm_page_queue_remove(&vm_page_queue_secluded,
8152 mem, vm_page_t, vmp_pageq);
8153 vm_page_secluded_count--;
8154 if (m_object == VM_OBJECT_NULL) {
8155 vm_page_secluded_count_free--;
8156 was_pageable = FALSE;
8157 } else {
8158 assert(!m_object->internal);
8159 vm_page_secluded_count_inuse--;
8160 was_pageable = FALSE;
8161// was_pageable = TRUE;
8162 }
8163 break;
8164 }
8165#endif /* CONFIG_SECLUDED_MEMORY */
8166
8167 default:
8168 {
8169 /*
8170 * if (mem->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)
8171 * NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
8172 * the caller is responsible for determing if the page is on that queue, and if so, must
8173 * either first remove it (it needs both the page queues lock and the object lock to do
8174 * this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
8175 *
8176 * we also don't expect to encounter VM_PAGE_ON_FREE_Q, VM_PAGE_ON_FREE_LOCAL_Q, VM_PAGE_ON_FREE_LOPAGE_Q
8177 * or any of the undefined states
8178 */
8179 panic("vm_page_queues_remove - bad page q_state (%p, %d)\n", mem, mem->vmp_q_state);
8180 break;
8181 }
8182
8183 }
8184 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
8185 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
8186
8187#if CONFIG_BACKGROUND_QUEUE
8188 if (remove_from_backgroundq == TRUE)
8189 vm_page_remove_from_backgroundq(mem);
8190#endif
8191 if (was_pageable) {
8192 if (m_object->internal) {
8193 vm_page_pageable_internal_count--;
8194 } else {
8195 vm_page_pageable_external_count--;
8196 }
8197 }
8198}
8199
8200void
8201vm_page_remove_internal(vm_page_t page)
8202{
8203 vm_object_t __object = VM_PAGE_OBJECT(page);
8204 if (page == __object->memq_hint) {
8205 vm_page_t __new_hint;
8206 vm_page_queue_entry_t __qe;
8207 __qe = (vm_page_queue_entry_t)vm_page_queue_next(&page->vmp_listq);
8208 if (vm_page_queue_end(&__object->memq, __qe)) {
8209 __qe = (vm_page_queue_entry_t)vm_page_queue_prev(&page->vmp_listq);
8210 if (vm_page_queue_end(&__object->memq, __qe)) {
8211 __qe = NULL;
8212 }
8213 }
8214 __new_hint = (vm_page_t)((uintptr_t) __qe);
8215 __object->memq_hint = __new_hint;
8216 }
8217 vm_page_queue_remove(&__object->memq, page, vm_page_t, vmp_listq);
8218#if CONFIG_SECLUDED_MEMORY
8219 if (__object->eligible_for_secluded) {
8220 vm_page_secluded.eligible_for_secluded--;
8221 }
8222#endif /* CONFIG_SECLUDED_MEMORY */
8223}
8224
8225void
8226vm_page_enqueue_inactive(vm_page_t mem, boolean_t first)
8227{
8228 vm_object_t m_object;
8229
8230 m_object = VM_PAGE_OBJECT(mem);
8231
8232 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8233 assert(!mem->vmp_fictitious);
8234 assert(!mem->vmp_laundry);
8235 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
8236 vm_page_check_pageable_safe(mem);
8237
8238 if (m_object->internal) {
8239 mem->vmp_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
8240
8241 if (first == TRUE)
8242 vm_page_queue_enter_first(&vm_page_queue_anonymous, mem, vm_page_t, vmp_pageq);
8243 else
8244 vm_page_queue_enter(&vm_page_queue_anonymous, mem, vm_page_t, vmp_pageq);
8245
8246 vm_page_anonymous_count++;
8247 vm_page_pageable_internal_count++;
8248 } else {
8249 mem->vmp_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
8250
8251 if (first == TRUE)
8252 vm_page_queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, vmp_pageq);
8253 else
8254 vm_page_queue_enter(&vm_page_queue_inactive, mem, vm_page_t, vmp_pageq);
8255
8256 vm_page_pageable_external_count++;
8257 }
8258 vm_page_inactive_count++;
8259 token_new_pagecount++;
8260
8261#if CONFIG_BACKGROUND_QUEUE
8262 if (mem->vmp_in_background)
8263 vm_page_add_to_backgroundq(mem, FALSE);
8264#endif
8265}
8266
8267void
8268vm_page_enqueue_active(vm_page_t mem, boolean_t first)
8269{
8270 vm_object_t m_object;
8271
8272 m_object = VM_PAGE_OBJECT(mem);
8273
8274 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8275 assert(!mem->vmp_fictitious);
8276 assert(!mem->vmp_laundry);
8277 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
8278 vm_page_check_pageable_safe(mem);
8279
8280 mem->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
8281 if (first == TRUE)
8282 vm_page_queue_enter_first(&vm_page_queue_active, mem, vm_page_t, vmp_pageq);
8283 else
8284 vm_page_queue_enter(&vm_page_queue_active, mem, vm_page_t, vmp_pageq);
8285 vm_page_active_count++;
8286
8287 if (m_object->internal) {
8288 vm_page_pageable_internal_count++;
8289 } else {
8290 vm_page_pageable_external_count++;
8291 }
8292
8293#if CONFIG_BACKGROUND_QUEUE
8294 if (mem->vmp_in_background)
8295 vm_page_add_to_backgroundq(mem, FALSE);
8296#endif
8297 vm_page_balance_inactive(3);
8298}
8299
8300/*
8301 * Pages from special kernel objects shouldn't
8302 * be placed on pageable queues.
8303 */
8304void
8305vm_page_check_pageable_safe(vm_page_t page)
8306{
8307 vm_object_t page_object;
8308
8309 page_object = VM_PAGE_OBJECT(page);
8310
8311 if (page_object == kernel_object) {
8312 panic("vm_page_check_pageable_safe: trying to add page" \
8313 "from kernel object (%p) to pageable queue", kernel_object);
8314 }
8315
8316 if (page_object == compressor_object) {
8317 panic("vm_page_check_pageable_safe: trying to add page" \
8318 "from compressor object (%p) to pageable queue", compressor_object);
8319 }
8320
8321 if (page_object == vm_submap_object) {
8322 panic("vm_page_check_pageable_safe: trying to add page" \
8323 "from submap object (%p) to pageable queue", vm_submap_object);
8324 }
8325}
8326
8327/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
8328 * wired page diagnose
8329 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
8330
8331#include <libkern/OSKextLibPrivate.h>
8332
8333#define KA_SIZE(namelen, subtotalscount) \
8334 (sizeof(struct vm_allocation_site) + (namelen) + 1 + ((subtotalscount) * sizeof(struct vm_allocation_total)))
8335
8336#define KA_NAME(alloc) \
8337 ((char *)(&(alloc)->subtotals[(alloc->subtotalscount)]))
8338
8339#define KA_NAME_LEN(alloc) \
8340 (VM_TAG_NAME_LEN_MAX & (alloc->flags >> VM_TAG_NAME_LEN_SHIFT))
8341
8342vm_tag_t
8343vm_tag_bt(void)
8344{
8345 uintptr_t* frameptr;
8346 uintptr_t* frameptr_next;
8347 uintptr_t retaddr;
8348 uintptr_t kstackb, kstackt;
8349 const vm_allocation_site_t * site;
8350 thread_t cthread;
8351 kern_allocation_name_t name;
8352
8353 cthread = current_thread();
8354 if (__improbable(cthread == NULL)) return VM_KERN_MEMORY_OSFMK;
8355
8356 if ((name = thread_get_kernel_state(cthread)->allocation_name))
8357 {
8358 if (!name->tag) vm_tag_alloc(name);
8359 return name->tag;
8360 }
8361
8362 kstackb = cthread->kernel_stack;
8363 kstackt = kstackb + kernel_stack_size;
8364
8365 /* Load stack frame pointer (EBP on x86) into frameptr */
8366 frameptr = __builtin_frame_address(0);
8367 site = NULL;
8368 while (frameptr != NULL)
8369 {
8370 /* Verify thread stack bounds */
8371 if (((uintptr_t)(frameptr + 2) > kstackt) || ((uintptr_t)frameptr < kstackb)) break;
8372
8373 /* Next frame pointer is pointed to by the previous one */
8374 frameptr_next = (uintptr_t*) *frameptr;
8375
8376 /* Pull return address from one spot above the frame pointer */
8377 retaddr = *(frameptr + 1);
8378
8379
8380 if (((retaddr < vm_kernel_builtinkmod_text_end) && (retaddr >= vm_kernel_builtinkmod_text))
8381 || (retaddr < vm_kernel_stext) || (retaddr > vm_kernel_top))
8382 {
8383 site = OSKextGetAllocationSiteForCaller(retaddr);
8384 break;
8385 }
8386 frameptr = frameptr_next;
8387 }
8388
8389 return (site ? site->tag : VM_KERN_MEMORY_NONE);
8390}
8391
8392static uint64_t free_tag_bits[VM_MAX_TAG_VALUE/64];
8393
8394void
8395vm_tag_alloc_locked(vm_allocation_site_t * site, vm_allocation_site_t ** releasesiteP)
8396{
8397 vm_tag_t tag;
8398 uint64_t avail;
8399 uint32_t idx;
8400 vm_allocation_site_t * prev;
8401
8402 if (site->tag) return;
8403
8404 idx = 0;
8405 while (TRUE)
8406 {
8407 avail = free_tag_bits[idx];
8408 if (avail)
8409 {
8410 tag = __builtin_clzll(avail);
8411 avail &= ~(1ULL << (63 - tag));
8412 free_tag_bits[idx] = avail;
8413 tag += (idx << 6);
8414 break;
8415 }
8416 idx++;
8417 if (idx >= ARRAY_COUNT(free_tag_bits))
8418 {
8419 for (idx = 0; idx < ARRAY_COUNT(vm_allocation_sites); idx++)
8420 {
8421 prev = vm_allocation_sites[idx];
8422 if (!prev) continue;
8423 if (!KA_NAME_LEN(prev)) continue;
8424 if (!prev->tag) continue;
8425 if (prev->total) continue;
8426 if (1 != prev->refcount) continue;
8427
8428 assert(idx == prev->tag);
8429 tag = idx;
8430 prev->tag = VM_KERN_MEMORY_NONE;
8431 *releasesiteP = prev;
8432 break;
8433 }
8434 if (idx >= ARRAY_COUNT(vm_allocation_sites))
8435 {
8436 tag = VM_KERN_MEMORY_ANY;
8437 }
8438 break;
8439 }
8440 }
8441 site->tag = tag;
8442
8443 OSAddAtomic16(1, &site->refcount);
8444
8445 if (VM_KERN_MEMORY_ANY != tag) vm_allocation_sites[tag] = site;
8446
8447 if (tag > vm_allocation_tag_highest) vm_allocation_tag_highest = tag;
8448}
8449
8450static void
8451vm_tag_free_locked(vm_tag_t tag)
8452{
8453 uint64_t avail;
8454 uint32_t idx;
8455 uint64_t bit;
8456
8457 if (VM_KERN_MEMORY_ANY == tag) return;
8458
8459 idx = (tag >> 6);
8460 avail = free_tag_bits[idx];
8461 tag &= 63;
8462 bit = (1ULL << (63 - tag));
8463 assert(!(avail & bit));
8464 free_tag_bits[idx] = (avail | bit);
8465}
8466
8467static void
8468vm_tag_init(void)
8469{
8470 vm_tag_t tag;
8471 for (tag = VM_KERN_MEMORY_FIRST_DYNAMIC; tag < VM_KERN_MEMORY_ANY; tag++)
8472 {
8473 vm_tag_free_locked(tag);
8474 }
8475
8476 for (tag = VM_KERN_MEMORY_ANY + 1; tag < VM_MAX_TAG_VALUE; tag++)
8477 {
8478 vm_tag_free_locked(tag);
8479 }
8480}
8481
8482vm_tag_t
8483vm_tag_alloc(vm_allocation_site_t * site)
8484{
8485 vm_tag_t tag;
8486 vm_allocation_site_t * releasesite;
8487
8488 if (VM_TAG_BT & site->flags)
8489 {
8490 tag = vm_tag_bt();
8491 if (VM_KERN_MEMORY_NONE != tag) return (tag);
8492 }
8493
8494 if (!site->tag)
8495 {
8496 releasesite = NULL;
8497 lck_spin_lock(&vm_allocation_sites_lock);
8498 vm_tag_alloc_locked(site, &releasesite);
8499 lck_spin_unlock(&vm_allocation_sites_lock);
8500 if (releasesite) kern_allocation_name_release(releasesite);
8501 }
8502
8503 return (site->tag);
8504}
8505
8506void
8507vm_tag_update_size(vm_tag_t tag, int64_t delta)
8508{
8509 vm_allocation_site_t * allocation;
8510 uint64_t prior;
8511
8512 assert(VM_KERN_MEMORY_NONE != tag);
8513 assert(tag < VM_MAX_TAG_VALUE);
8514
8515 allocation = vm_allocation_sites[tag];
8516 assert(allocation);
8517
8518 if (delta < 0) {
8519 assertf(allocation->total >= ((uint64_t)-delta), "tag %d, site %p", tag, allocation);
8520 }
8521 prior = OSAddAtomic64(delta, &allocation->total);
8522
8523#if DEBUG || DEVELOPMENT
8524
8525 uint64_t new, peak;
8526 new = prior + delta;
8527 do
8528 {
8529 peak = allocation->peak;
8530 if (new <= peak) break;
8531 }
8532 while (!OSCompareAndSwap64(peak, new, &allocation->peak));
8533
8534#endif /* DEBUG || DEVELOPMENT */
8535
8536 if (tag < VM_KERN_MEMORY_FIRST_DYNAMIC) return;
8537
8538 if (!prior && !allocation->tag) vm_tag_alloc(allocation);
8539}
8540
8541void
8542kern_allocation_update_size(kern_allocation_name_t allocation, int64_t delta)
8543{
8544 uint64_t prior;
8545
8546 if (delta < 0) {
8547 assertf(allocation->total >= ((uint64_t)-delta), "name %p", allocation);
8548 }
8549 prior = OSAddAtomic64(delta, &allocation->total);
8550
8551#if DEBUG || DEVELOPMENT
8552
8553 uint64_t new, peak;
8554 new = prior + delta;
8555 do
8556 {
8557 peak = allocation->peak;
8558 if (new <= peak) break;
8559 }
8560 while (!OSCompareAndSwap64(peak, new, &allocation->peak));
8561
8562#endif /* DEBUG || DEVELOPMENT */
8563
8564 if (!prior && !allocation->tag) vm_tag_alloc(allocation);
8565}
8566
8567#if VM_MAX_TAG_ZONES
8568
8569void
8570vm_allocation_zones_init(void)
8571{
8572 kern_return_t ret;
8573 vm_offset_t addr;
8574 vm_size_t size;
8575
8576 size = VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t **)
8577 + 2 * VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
8578
8579 ret = kernel_memory_allocate(kernel_map,
8580 &addr, round_page(size), 0,
8581 KMA_ZERO, VM_KERN_MEMORY_DIAG);
8582 assert(KERN_SUCCESS == ret);
8583
8584 vm_allocation_zone_totals = (vm_allocation_zone_total_t **) addr;
8585 addr += VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t **);
8586
8587 // prepopulate VM_KERN_MEMORY_DIAG & VM_KERN_MEMORY_KALLOC so allocations
8588 // in vm_tag_update_zone_size() won't recurse
8589 vm_allocation_zone_totals[VM_KERN_MEMORY_DIAG] = (vm_allocation_zone_total_t *) addr;
8590 addr += VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
8591 vm_allocation_zone_totals[VM_KERN_MEMORY_KALLOC] = (vm_allocation_zone_total_t *) addr;
8592}
8593
8594void
8595vm_tag_will_update_zone(vm_tag_t tag, uint32_t zidx)
8596{
8597 vm_allocation_zone_total_t * zone;
8598
8599 assert(VM_KERN_MEMORY_NONE != tag);
8600 assert(tag < VM_MAX_TAG_VALUE);
8601
8602 if (zidx >= VM_MAX_TAG_ZONES) return;
8603
8604 zone = vm_allocation_zone_totals[tag];
8605 if (!zone)
8606 {
8607 zone = kalloc_tag(VM_MAX_TAG_ZONES * sizeof(*zone), VM_KERN_MEMORY_DIAG);
8608 if (!zone) return;
8609 bzero(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
8610 if (!OSCompareAndSwapPtr(NULL, zone, &vm_allocation_zone_totals[tag]))
8611 {
8612 kfree(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
8613 }
8614 }
8615}
8616
8617void
8618vm_tag_update_zone_size(vm_tag_t tag, uint32_t zidx, int64_t delta, int64_t dwaste)
8619{
8620 vm_allocation_zone_total_t * zone;
8621 uint32_t new;
8622
8623 assert(VM_KERN_MEMORY_NONE != tag);
8624 assert(tag < VM_MAX_TAG_VALUE);
8625
8626 if (zidx >= VM_MAX_TAG_ZONES) return;
8627
8628 zone = vm_allocation_zone_totals[tag];
8629 assert(zone);
8630 zone += zidx;
8631
8632 /* the zone is locked */
8633 if (delta < 0)
8634 {
8635 assertf(zone->total >= ((uint64_t)-delta), "zidx %d, tag %d, %p", zidx, tag, zone);
8636 zone->total += delta;
8637 }
8638 else
8639 {
8640 zone->total += delta;
8641 if (zone->total > zone->peak) zone->peak = zone->total;
8642 if (dwaste)
8643 {
8644 new = zone->waste;
8645 if (zone->wastediv < 65536) zone->wastediv++;
8646 else new -= (new >> 16);
8647 __assert_only bool ov = os_add_overflow(new, dwaste, &new);
8648 assert(!ov);
8649 zone->waste = new;
8650 }
8651 }
8652}
8653
8654#endif /* VM_MAX_TAG_ZONES */
8655
8656void
8657kern_allocation_update_subtotal(kern_allocation_name_t allocation, uint32_t subtag, int64_t delta)
8658{
8659 kern_allocation_name_t other;
8660 struct vm_allocation_total * total;
8661 uint32_t subidx;
8662
8663 subidx = 0;
8664 assert(VM_KERN_MEMORY_NONE != subtag);
8665 for (; subidx < allocation->subtotalscount; subidx++)
8666 {
8667 if (VM_KERN_MEMORY_NONE == allocation->subtotals[subidx].tag)
8668 {
8669 allocation->subtotals[subidx].tag = subtag;
8670 break;
8671 }
8672 if (subtag == allocation->subtotals[subidx].tag) break;
8673 }
8674 assert(subidx < allocation->subtotalscount);
8675 if (subidx >= allocation->subtotalscount) return;
8676
8677 total = &allocation->subtotals[subidx];
8678 other = vm_allocation_sites[subtag];
8679 assert(other);
8680
8681 if (delta < 0)
8682 {
8683 assertf(total->total >= ((uint64_t)-delta), "name %p", allocation);
8684 OSAddAtomic64(delta, &total->total);
8685 assertf(other->mapped >= ((uint64_t)-delta), "other %p", other);
8686 OSAddAtomic64(delta, &other->mapped);
8687 }
8688 else
8689 {
8690 OSAddAtomic64(delta, &other->mapped);
8691 OSAddAtomic64(delta, &total->total);
8692 }
8693}
8694
8695const char *
8696kern_allocation_get_name(kern_allocation_name_t allocation)
8697{
8698 return (KA_NAME(allocation));
8699}
8700
8701kern_allocation_name_t
8702kern_allocation_name_allocate(const char * name, uint32_t subtotalscount)
8703{
8704 uint32_t namelen;
8705
8706 namelen = (uint32_t) strnlen(name, MACH_MEMORY_INFO_NAME_MAX_LEN - 1);
8707
8708 kern_allocation_name_t allocation;
8709 allocation = kalloc(KA_SIZE(namelen, subtotalscount));
8710 bzero(allocation, KA_SIZE(namelen, subtotalscount));
8711
8712 allocation->refcount = 1;
8713 allocation->subtotalscount = subtotalscount;
8714 allocation->flags = (namelen << VM_TAG_NAME_LEN_SHIFT);
8715 strlcpy(KA_NAME(allocation), name, namelen + 1);
8716
8717 return (allocation);
8718}
8719
8720void
8721kern_allocation_name_release(kern_allocation_name_t allocation)
8722{
8723 assert(allocation->refcount > 0);
8724 if (1 == OSAddAtomic16(-1, &allocation->refcount))
8725 {
8726 kfree(allocation, KA_SIZE(KA_NAME_LEN(allocation), allocation->subtotalscount));
8727 }
8728}
8729
8730vm_tag_t
8731kern_allocation_name_get_vm_tag(kern_allocation_name_t allocation)
8732{
8733 return (vm_tag_alloc(allocation));
8734}
8735
8736#if ! VM_TAG_ACTIVE_UPDATE
8737static void
8738vm_page_count_object(mach_memory_info_t * info, unsigned int __unused num_info, vm_object_t object)
8739{
8740 if (!object->wired_page_count) return;
8741 if (object != kernel_object)
8742 {
8743 assert(object->wire_tag < num_info);
8744 info[object->wire_tag].size += ptoa_64(object->wired_page_count);
8745 }
8746}
8747
8748typedef void (*vm_page_iterate_proc)(mach_memory_info_t * info,
8749 unsigned int num_info, vm_object_t object);
8750
8751static void
8752vm_page_iterate_purgeable_objects(mach_memory_info_t * info, unsigned int num_info,
8753 vm_page_iterate_proc proc, purgeable_q_t queue,
8754 int group)
8755{
8756 vm_object_t object;
8757
8758 for (object = (vm_object_t) queue_first(&queue->objq[group]);
8759 !queue_end(&queue->objq[group], (queue_entry_t) object);
8760 object = (vm_object_t) queue_next(&object->objq))
8761 {
8762 proc(info, num_info, object);
8763 }
8764}
8765
8766static void
8767vm_page_iterate_objects(mach_memory_info_t * info, unsigned int num_info,
8768 vm_page_iterate_proc proc)
8769{
8770 vm_object_t object;
8771
8772 lck_spin_lock(&vm_objects_wired_lock);
8773 queue_iterate(&vm_objects_wired,
8774 object,
8775 vm_object_t,
8776 wired_objq)
8777 {
8778 proc(info, num_info, object);
8779 }
8780 lck_spin_unlock(&vm_objects_wired_lock);
8781}
8782#endif /* ! VM_TAG_ACTIVE_UPDATE */
8783
8784static uint64_t
8785process_account(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes, boolean_t iterated)
8786{
8787 size_t namelen;
8788 unsigned int idx, count, nextinfo;
8789 vm_allocation_site_t * site;
8790 lck_spin_lock(&vm_allocation_sites_lock);
8791
8792 for (idx = 0; idx <= vm_allocation_tag_highest; idx++)
8793 {
8794 site = vm_allocation_sites[idx];
8795 if (!site) continue;
8796 info[idx].mapped = site->mapped;
8797 info[idx].tag = site->tag;
8798 if (!iterated)
8799 {
8800 info[idx].size = site->total;
8801#if DEBUG || DEVELOPMENT
8802 info[idx].peak = site->peak;
8803#endif /* DEBUG || DEVELOPMENT */
8804 }
8805 else
8806 {
8807 if (!site->subtotalscount && (site->total != info[idx].size))
8808 {
8809 printf("tag mismatch[%d] 0x%qx, iter 0x%qx\n", idx, site->total, info[idx].size);
8810 info[idx].size = site->total;
8811 }
8812 }
8813 }
8814
8815 nextinfo = (vm_allocation_tag_highest + 1);
8816 count = nextinfo;
8817 if (count >= num_info) count = num_info;
8818
8819 for (idx = 0; idx < count; idx++)
8820 {
8821 site = vm_allocation_sites[idx];
8822 if (!site) continue;
8823 info[idx].flags |= VM_KERN_SITE_WIRED;
8824 if (idx < VM_KERN_MEMORY_FIRST_DYNAMIC)
8825 {
8826 info[idx].site = idx;
8827 info[idx].flags |= VM_KERN_SITE_TAG;
8828 if (VM_KERN_MEMORY_ZONE == idx)
8829 {
8830 info[idx].flags |= VM_KERN_SITE_HIDE;
8831 info[idx].flags &= ~VM_KERN_SITE_WIRED;
8832 info[idx].collectable_bytes = zones_collectable_bytes;
8833 }
8834 }
8835 else if ((namelen = (VM_TAG_NAME_LEN_MAX & (site->flags >> VM_TAG_NAME_LEN_SHIFT))))
8836 {
8837 info[idx].site = 0;
8838 info[idx].flags |= VM_KERN_SITE_NAMED;
8839 if (namelen > sizeof(info[idx].name)) namelen = sizeof(info[idx].name);
8840 strncpy(&info[idx].name[0], KA_NAME(site), namelen);
8841 }
8842 else if (VM_TAG_KMOD & site->flags)
8843 {
8844 info[idx].site = OSKextGetKmodIDForSite(site, NULL, 0);
8845 info[idx].flags |= VM_KERN_SITE_KMOD;
8846 }
8847 else
8848 {
8849 info[idx].site = VM_KERNEL_UNSLIDE(site);
8850 info[idx].flags |= VM_KERN_SITE_KERNEL;
8851 }
8852#if VM_MAX_TAG_ZONES
8853 vm_allocation_zone_total_t * zone;
8854 unsigned int zidx;
8855 vm_size_t elem_size;
8856
8857 if (vm_allocation_zone_totals
8858 && (zone = vm_allocation_zone_totals[idx])
8859 && (nextinfo < num_info))
8860 {
8861 for (zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++)
8862 {
8863 if (!zone[zidx].peak) continue;
8864 info[nextinfo] = info[idx];
8865 info[nextinfo].zone = zone_index_from_tag_index(zidx, &elem_size);
8866 info[nextinfo].flags &= ~VM_KERN_SITE_WIRED;
8867 info[nextinfo].flags |= VM_KERN_SITE_ZONE;
8868 info[nextinfo].size = zone[zidx].total;
8869 info[nextinfo].peak = zone[zidx].peak;
8870 info[nextinfo].mapped = 0;
8871 if (zone[zidx].wastediv)
8872 {
8873 info[nextinfo].collectable_bytes = ((zone[zidx].waste * zone[zidx].total / elem_size) / zone[zidx].wastediv);
8874 }
8875 nextinfo++;
8876 }
8877 }
8878#endif /* VM_MAX_TAG_ZONES */
8879 if (site->subtotalscount)
8880 {
8881 uint64_t mapped, mapcost, take;
8882 uint32_t sub;
8883 vm_tag_t alloctag;
8884
8885 info[idx].size = site->total;
8886 mapped = info[idx].size;
8887 info[idx].mapped = mapped;
8888 mapcost = 0;
8889 for (sub = 0; sub < site->subtotalscount; sub++)
8890 {
8891 alloctag = site->subtotals[sub].tag;
8892 assert(alloctag < num_info);
8893 if (info[alloctag].name[0]) continue;
8894 take = info[alloctag].mapped;
8895 if (take > info[alloctag].size) take = info[alloctag].size;
8896 if (take > mapped) take = mapped;
8897 info[alloctag].mapped -= take;
8898 info[alloctag].size -= take;
8899 mapped -= take;
8900 mapcost += take;
8901 }
8902 info[idx].size = mapcost;
8903 }
8904 }
8905 lck_spin_unlock(&vm_allocation_sites_lock);
8906
8907 return (0);
8908}
8909
8910uint32_t
8911vm_page_diagnose_estimate(void)
8912{
8913 vm_allocation_site_t * site;
8914 uint32_t count;
8915 uint32_t idx;
8916
8917 lck_spin_lock(&vm_allocation_sites_lock);
8918 for (count = idx = 0; idx < VM_MAX_TAG_VALUE; idx++)
8919 {
8920 site = vm_allocation_sites[idx];
8921 if (!site) continue;
8922 count++;
8923#if VM_MAX_TAG_ZONES
8924 if (vm_allocation_zone_totals)
8925 {
8926 vm_allocation_zone_total_t * zone;
8927 zone = vm_allocation_zone_totals[idx];
8928 if (!zone) continue;
8929 for (uint32_t zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++) if (zone[zidx].peak) count++;
8930 }
8931#endif
8932 }
8933 lck_spin_unlock(&vm_allocation_sites_lock);
8934
8935 /* some slop for new tags created */
8936 count += 8;
8937 count += VM_KERN_COUNTER_COUNT;
8938
8939 return (count);
8940}
8941
8942
8943kern_return_t
8944vm_page_diagnose(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes)
8945{
8946 uint64_t wired_size;
8947 uint64_t wired_managed_size;
8948 uint64_t wired_reserved_size;
8949 uint64_t booter_size;
8950 boolean_t iterate;
8951 mach_memory_info_t * counts;
8952
8953 bzero(info, num_info * sizeof(mach_memory_info_t));
8954
8955 if (!vm_page_wire_count_initial) return (KERN_ABORTED);
8956
8957#if CONFIG_EMBEDDED
8958 wired_size = ptoa_64(vm_page_wire_count);
8959 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count);
8960#else
8961 wired_size = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
8962 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
8963#endif
8964 wired_managed_size = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
8965
8966 booter_size = ml_get_booter_memory_size();
8967 wired_size += booter_size;
8968
8969 assert(num_info >= VM_KERN_COUNTER_COUNT);
8970 num_info -= VM_KERN_COUNTER_COUNT;
8971 counts = &info[num_info];
8972
8973#define SET_COUNT(xcount, xsize, xflags) \
8974 counts[xcount].tag = VM_MAX_TAG_VALUE + xcount; \
8975 counts[xcount].site = (xcount); \
8976 counts[xcount].size = (xsize); \
8977 counts[xcount].mapped = (xsize); \
8978 counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
8979
8980 SET_COUNT(VM_KERN_COUNT_MANAGED, ptoa_64(vm_page_pages), 0);
8981 SET_COUNT(VM_KERN_COUNT_WIRED, wired_size, 0);
8982 SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED, wired_managed_size, 0);
8983 SET_COUNT(VM_KERN_COUNT_RESERVED, wired_reserved_size, VM_KERN_SITE_WIRED);
8984 SET_COUNT(VM_KERN_COUNT_STOLEN, ptoa_64(vm_page_stolen_count), VM_KERN_SITE_WIRED);
8985 SET_COUNT(VM_KERN_COUNT_LOPAGE, ptoa_64(vm_lopage_free_count), VM_KERN_SITE_WIRED);
8986 SET_COUNT(VM_KERN_COUNT_WIRED_BOOT, ptoa_64(vm_page_wire_count_on_boot), 0);
8987 SET_COUNT(VM_KERN_COUNT_BOOT_STOLEN, booter_size, VM_KERN_SITE_WIRED);
8988
8989#define SET_MAP(xcount, xsize, xfree, xlargest) \
8990 counts[xcount].site = (xcount); \
8991 counts[xcount].size = (xsize); \
8992 counts[xcount].mapped = (xsize); \
8993 counts[xcount].free = (xfree); \
8994 counts[xcount].largest = (xlargest); \
8995 counts[xcount].flags = VM_KERN_SITE_COUNTER;
8996
8997 vm_map_size_t map_size, map_free, map_largest;
8998
8999 vm_map_sizes(kernel_map, &map_size, &map_free, &map_largest);
9000 SET_MAP(VM_KERN_COUNT_MAP_KERNEL, map_size, map_free, map_largest);
9001
9002 vm_map_sizes(zone_map, &map_size, &map_free, &map_largest);
9003 SET_MAP(VM_KERN_COUNT_MAP_ZONE, map_size, map_free, map_largest);
9004
9005 vm_map_sizes(kalloc_map, &map_size, &map_free, &map_largest);
9006 SET_MAP(VM_KERN_COUNT_MAP_KALLOC, map_size, map_free, map_largest);
9007
9008 iterate = !VM_TAG_ACTIVE_UPDATE;
9009 if (iterate)
9010 {
9011 enum { kMaxKernelDepth = 1 };
9012 vm_map_t maps [kMaxKernelDepth];
9013 vm_map_entry_t entries[kMaxKernelDepth];
9014 vm_map_t map;
9015 vm_map_entry_t entry;
9016 vm_object_offset_t offset;
9017 vm_page_t page;
9018 int stackIdx, count;
9019
9020#if ! VM_TAG_ACTIVE_UPDATE
9021 vm_page_iterate_objects(info, num_info, &vm_page_count_object);
9022#endif /* ! VM_TAG_ACTIVE_UPDATE */
9023
9024 map = kernel_map;
9025 stackIdx = 0;
9026 while (map)
9027 {
9028 vm_map_lock(map);
9029 for (entry = map->hdr.links.next; map; entry = entry->links.next)
9030 {
9031 if (entry->is_sub_map)
9032 {
9033 assert(stackIdx < kMaxKernelDepth);
9034 maps[stackIdx] = map;
9035 entries[stackIdx] = entry;
9036 stackIdx++;
9037 map = VME_SUBMAP(entry);
9038 entry = NULL;
9039 break;
9040 }
9041 if (VME_OBJECT(entry) == kernel_object)
9042 {
9043 count = 0;
9044 vm_object_lock(VME_OBJECT(entry));
9045 for (offset = entry->links.start; offset < entry->links.end; offset += page_size)
9046 {
9047 page = vm_page_lookup(VME_OBJECT(entry), offset);
9048 if (page && VM_PAGE_WIRED(page)) count++;
9049 }
9050 vm_object_unlock(VME_OBJECT(entry));
9051
9052 if (count)
9053 {
9054 assert(VME_ALIAS(entry) != VM_KERN_MEMORY_NONE);
9055 assert(VME_ALIAS(entry) < num_info);
9056 info[VME_ALIAS(entry)].size += ptoa_64(count);
9057 }
9058 }
9059 while (map && (entry == vm_map_last_entry(map)))
9060 {
9061 vm_map_unlock(map);
9062 if (!stackIdx) map = NULL;
9063 else
9064 {
9065 --stackIdx;
9066 map = maps[stackIdx];
9067 entry = entries[stackIdx];
9068 }
9069 }
9070 }
9071 }
9072 }
9073
9074 process_account(info, num_info, zones_collectable_bytes, iterate);
9075
9076 return (KERN_SUCCESS);
9077}
9078
9079#if DEBUG || DEVELOPMENT
9080
9081kern_return_t
9082vm_kern_allocation_info(uintptr_t addr, vm_size_t * size, vm_tag_t * tag, vm_size_t * zone_size)
9083{
9084 kern_return_t ret;
9085 vm_size_t zsize;
9086 vm_map_t map;
9087 vm_map_entry_t entry;
9088
9089 zsize = zone_element_info((void *) addr, tag);
9090 if (zsize)
9091 {
9092 *zone_size = *size = zsize;
9093 return (KERN_SUCCESS);
9094 }
9095
9096 *zone_size = 0;
9097 ret = KERN_INVALID_ADDRESS;
9098 for (map = kernel_map; map; )
9099 {
9100 vm_map_lock(map);
9101 if (!vm_map_lookup_entry(map, addr, &entry)) break;
9102 if (entry->is_sub_map)
9103 {
9104 if (map != kernel_map) break;
9105 map = VME_SUBMAP(entry);
9106 continue;
9107 }
9108 if (entry->vme_start != addr) break;
9109 *tag = VME_ALIAS(entry);
9110 *size = (entry->vme_end - addr);
9111 ret = KERN_SUCCESS;
9112 break;
9113 }
9114 if (map != kernel_map) vm_map_unlock(map);
9115 vm_map_unlock(kernel_map);
9116
9117 return (ret);
9118}
9119
9120#endif /* DEBUG || DEVELOPMENT */
9121
9122uint32_t
9123vm_tag_get_kext(vm_tag_t tag, char * name, vm_size_t namelen)
9124{
9125 vm_allocation_site_t * site;
9126 uint32_t kmodId;
9127
9128 kmodId = 0;
9129 lck_spin_lock(&vm_allocation_sites_lock);
9130 if ((site = vm_allocation_sites[tag]))
9131 {
9132 if (VM_TAG_KMOD & site->flags)
9133 {
9134 kmodId = OSKextGetKmodIDForSite(site, name, namelen);
9135 }
9136 }
9137 lck_spin_unlock(&vm_allocation_sites_lock);
9138
9139 return (kmodId);
9140}
9141
9142
9143#if CONFIG_SECLUDED_MEMORY
9144/*
9145 * Note that there's no locking around other accesses to vm_page_secluded_target.
9146 * That should be OK, since these are the only place where it can be changed after
9147 * initialization. Other users (like vm_pageout) may see the wrong value briefly,
9148 * but will eventually get the correct value. This brief mismatch is OK as pageout
9149 * and page freeing will auto-adjust the vm_page_secluded_count to match the target
9150 * over time.
9151 */
9152unsigned int vm_page_secluded_suppress_cnt = 0;
9153unsigned int vm_page_secluded_save_target;
9154
9155
9156lck_grp_attr_t secluded_suppress_slock_grp_attr;
9157lck_grp_t secluded_suppress_slock_grp;
9158lck_attr_t secluded_suppress_slock_attr;
9159lck_spin_t secluded_suppress_slock;
9160
9161void
9162secluded_suppression_init(void)
9163{
9164 lck_grp_attr_setdefault(&secluded_suppress_slock_grp_attr);
9165 lck_grp_init(&secluded_suppress_slock_grp,
9166 "secluded_suppress_slock", &secluded_suppress_slock_grp_attr);
9167 lck_attr_setdefault(&secluded_suppress_slock_attr);
9168 lck_spin_init(&secluded_suppress_slock,
9169 &secluded_suppress_slock_grp, &secluded_suppress_slock_attr);
9170}
9171
9172void
9173start_secluded_suppression(task_t task)
9174{
9175 if (task->task_suppressed_secluded)
9176 return;
9177 lck_spin_lock(&secluded_suppress_slock);
9178 if (!task->task_suppressed_secluded && vm_page_secluded_suppress_cnt++ == 0) {
9179 task->task_suppressed_secluded = TRUE;
9180 vm_page_secluded_save_target = vm_page_secluded_target;
9181 vm_page_secluded_target = 0;
9182 }
9183 lck_spin_unlock(&secluded_suppress_slock);
9184}
9185
9186void
9187stop_secluded_suppression(task_t task)
9188{
9189 lck_spin_lock(&secluded_suppress_slock);
9190 if (task->task_suppressed_secluded && --vm_page_secluded_suppress_cnt == 0) {
9191 task->task_suppressed_secluded = FALSE;
9192 vm_page_secluded_target = vm_page_secluded_save_target;
9193 }
9194 lck_spin_unlock(&secluded_suppress_slock);
9195}
9196
9197#endif /* CONFIG_SECLUDED_MEMORY */
9198